twox-hash-2.1.2/.cargo_vcs_info.json0000644000000001360000000000100127600ustar { "git": { "sha1": "bc5bb80b4857707e0372d2386157b1d31e4441d3" }, "path_in_vcs": "" }twox-hash-2.1.2/.github/dependabot.yml000064400000000000000000000002211046102023000157330ustar 00000000000000version: 2 updates: - package-ecosystem: cargo directory: "/" schedule: interval: daily time: "10:00" open-pull-requests-limit: 10 twox-hash-2.1.2/.github/workflows/ci.yml000064400000000000000000000101201046102023000162550ustar 00000000000000on: [push, pull_request] name: Continuous integration env: RUSTFLAGS: -D warnings RUSTDOCFLAGS: -D warnings jobs: library: strategy: matrix: platform: - ubuntu-latest rust: - stable - beta - nightly - 1.81.0 # MSRV include: - platform: macos-latest # This serves as our aarch64 / arm64 runner rust: stable - platform: windows-latest rust: stable runs-on: ${{ matrix.platform }} steps: - uses: actions/checkout@v4 - run: git submodule update --init --recursive - uses: dtolnay/rust-toolchain@master with: toolchain: ${{ matrix.rust }} - name: Unit Tests run: cargo test --all-features - name: Property Tests run: cargo test -p comparison --all-features miri: runs-on: ubuntu-latest env: MIRIFLAGS: --cfg _internal_xxhash3_force_scalar steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@master with: toolchain: nightly components: miri - name: Unsafe Code run: cargo miri test --all-features - name: Big Endian Platform run: cargo miri test --all-features --target s390x-unknown-linux-gnu lints: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - run: git submodule update --init --recursive - uses: dtolnay/rust-toolchain@master with: toolchain: stable components: rustfmt, clippy - run: cargo fmt --check --all - run: cargo clippy --all --all-targets --all-features - run: cargo doc --all-features no-std: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@master with: toolchain: stable targets: thumbv6m-none-eabi - run: > cargo build --no-default-features --features=xxhash32,xxhash64,xxhash3_64 --target thumbv6m-none-eabi features: runs-on: ubuntu-latest env: IMPLEMENTATIONS: xxhash32 xxhash64 xxhash3_64 xxhash3_128 FEATURE_SET: random serialize std alloc steps: - uses: actions/checkout@v4 - run: git submodule update --init --recursive - uses: dtolnay/rust-toolchain@master with: toolchain: stable - name: Compute Powerset shell: "ruby {0}" run: | features = ENV['FEATURE_SET'] .split(' ') .reduce([[]]) { |ps, i| ps + ps.map { |e| e + [i] } } .map { |s| s.join(',') } .join(" ") File.open(ENV['GITHUB_ENV'], 'a') { |f| f.write("FEATURES=#{features}") } - name: Check implementations with features run: | for impl in ${IMPLEMENTATIONS}; do echo "::group::Implementation ${impl}" # Check the implementation by itself cargo check --no-default-features --features="${impl}" # And with extra features for feature in ${FEATURES}; do echo "::group::Features ${feature}" cargo check --no-default-features --features="${impl},${feature}" echo "::endgroup::" done echo ::endgroup:: done minimal-versions: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - run: git submodule update --init --recursive - uses: dtolnay/rust-toolchain@master with: toolchain: 1.81.0 # MSRV - uses: dtolnay/rust-toolchain@master with: toolchain: nightly - name: Remove non-essential dependencies run: | # Remove workspace dependencies sed -i '/\[workspace]/,/#END-\[workspace]/d' Cargo.toml # Remove dev-dependencies sed -i '/\[dev-dependencies]/,/#END-\[dev-dependencies]/d' Cargo.toml - name: Downgrade to minimal dependencies run: | cargo +nightly -Z minimal-versions update - run: cargo +1.81.0 build --all-features twox-hash-2.1.2/.gitignore000064400000000000000000000000241046102023000135340ustar 00000000000000/Cargo.lock /target twox-hash-2.1.2/.gitmodules000064400000000000000000000001451046102023000137250ustar 00000000000000[submodule "xxHash"] path = xx_hash-sys/xxHash url = https://github.com/Cyan4973/xxHash.git twox-hash-2.1.2/CHANGELOG.md000064400000000000000000000075021046102023000133650ustar 00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [2.1.2] - 2025-09-03 [2.1.2]: https://github.com/shepmaster/twox-hash/tree/v2.1.2 ### Changed - The documentation has been updated to account for `XxHash3_128`. ## [2.1.1] - 2025-06-09 [2.1.1]: https://github.com/shepmaster/twox-hash/tree/v2.1.1 ### Changed - The version range for the optional `rand` dependency is now 0.9. ## [2.1.0] - 2024-12-09 [2.1.0]: https://github.com/shepmaster/twox-hash/tree/v2.1.0 ### Added - The XXH3 128-bit algorithm is implemented via `XxHash3_128` and the `xxhash3_128` module. ## [2.0.1] - 2024-11-04 [2.0.1]: https://github.com/shepmaster/twox-hash/tree/v2.0.1 ### Fixed - Removed a panic that could occur when using `XxHash3_64` to hash 1 to 3 bytes of data in debug mode. Release mode and different lengths of data are unaffected. ## [2.0.0] - 2024-10-18 [2.0.0]: https://github.com/shepmaster/twox-hash/tree/v2.0.0 This release is a complete rewrite of the crate, including reorganization of the code. The XXH3 algorithm now matches the 0.8 release of the reference C xxHash implementation. ### Added - `XxHash32::oneshot` and `XxHash64::oneshot` can perform hashing with zero allocation and generally improved performance. If you have code that creates a hasher and hashes a slice of bytes exactly once, you are strongly encouraged to use the new functions. This might look like: ```rust // Before let mut hasher = XxHash64::new(); // or XxHash32, or with seeds some_bytes.hash(&mut hasher); let hash = hasher.finish(); // After let hash = XxHash64::oneshot(some_bytes); ``` - There is a feature flag for each hashing implementation. It is recommended that you opt-out of the crate's default features and only select the implementations you need to improve compile speed. ### Changed - The crates minimum supported Rust version (MSRV) is now 1.81. - Functional and performance comparisons are made against the reference C xxHash library version 0.8.2, which includes a stable XXH3 algorithm. - Support for randomly-generated hasher instances is now behind the `random` feature flag. It was previously combined with the `std` feature flag. ### Removed - The deprecated type aliases `XxHash` and `RandomXxHashBuilder` have been removed. Replace them with `XxHash64` and `xxhash64::RandomState` respectively. - `RandomXxHashBuilder32` and `RandomXxHashBuilder64` are no longer available at the top-level of the crate. Replace them with `xxhash32::RandomState` and ``xxhash64::RandomState` respectively. - `Xxh3Hash64` and `xx3::Hash64` have been renamed to `XxHash3_64` and `xxhash3_64::Hasher` respectively. - The free functions `xxh3::hash64`, `xxh3::hash64_with_seed`, and `xxh3::hash64_with_secret` are now associated functions of `xxhash3_64::Hasher`: `oneshot`, `oneshot_with_seed` and `oneshot_with_secret`. Note that the argument order has changed. - Support for the [digest][] crate has been removed. The digest crate is for **cryptographic** hash functions and xxHash is **non-cryptographic**. - `XxHash32` and `XxHash64` no longer implement `Copy`. This prevents accidentally mutating a duplicate instance of the state instead of the original state. `Clone` is still implemented so you can make deliberate duplicates. - The XXH3 128-bit variant is not yet re-written. Work is in progress for this. - We no longer provide support for randomly-generated instances of the XXH3 64-bit variant. The XXH3 algorithm takes both a seed and a secret as input and deciding what to randomize is non-trivial and can have negative impacts on performance. [digest]: https://docs.rs/digest/latest/digest/ twox-hash-2.1.2/Cargo.lock0000644000000117320000000000100107370ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "cfg-if" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" [[package]] name = "getrandom" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", "r-efi", "wasi", ] [[package]] name = "itoa" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "libc" version = "0.2.175" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" [[package]] name = "memchr" version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" dependencies = [ "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" dependencies = [ "getrandom", ] [[package]] name = "ryu" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "serde" version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ "itoa", "memchr", "ryu", "serde", ] [[package]] name = "syn" version = "2.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "twox-hash" version = "2.1.2" dependencies = [ "rand", "serde", "serde_json", ] [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "wasi" version = "0.14.3+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a51ae83037bdd272a9e28ce236db8c07016dd0d50c27038b3f407533c030c95" dependencies = [ "wit-bindgen", ] [[package]] name = "wit-bindgen" version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "052283831dbae3d879dc7f51f3d92703a316ca49f91540417d38591826127814" [[package]] name = "zerocopy" version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", "syn", ] twox-hash-2.1.2/Cargo.toml0000644000000035130000000000100107600ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.81" name = "twox-hash" version = "2.1.2" authors = ["Jake Goulding "] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A Rust implementation of the XXHash and XXH3 algorithms" documentation = "https://docs.rs/twox-hash/" readme = "README.md" keywords = [ "hash", "hasher", "xxhash", "xxh3", ] categories = ["algorithms"] license = "MIT" repository = "https://github.com/shepmaster/twox-hash" [package.metadata.docs.rs] all-features = true [features] alloc = [] default = [ "random", "xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std", ] random = ["dep:rand"] serialize = ["dep:serde"] std = ["alloc"] xxhash32 = [] xxhash3_128 = [] xxhash3_64 = [] xxhash64 = [] [lib] name = "twox_hash" path = "src/lib.rs" [dependencies.rand] version = "0.9.0" features = ["thread_rng"] optional = true default-features = false [dependencies.serde] version = "1.0.0" features = ["derive"] optional = true default-features = false [dev-dependencies.serde_json] version = "1.0.117" [lints.rust.unexpected_cfgs] level = "warn" priority = 0 check-cfg = [ "cfg(_internal_xxhash3_force_scalar)", "cfg(_internal_xxhash3_force_neon)", "cfg(_internal_xxhash3_force_sse2)", "cfg(_internal_xxhash3_force_avx2)", ] twox-hash-2.1.2/Cargo.toml.orig000064400000000000000000000024621046102023000144430ustar 00000000000000[package] name = "twox-hash" version = "2.1.2" authors = ["Jake Goulding "] edition = "2021" rust-version = "1.81" description = "A Rust implementation of the XXHash and XXH3 algorithms" readme = "README.md" keywords = ["hash", "hasher", "xxhash", "xxh3"] categories = ["algorithms"] repository = "https://github.com/shepmaster/twox-hash" documentation = "https://docs.rs/twox-hash/" license = "MIT" [workspace] members = [ "asmasm", "comparison", "twox-hash-sum", "xx_hash-sys", ] #END-[workspace] [features] default = ["random", "xxhash32", "xxhash64", "xxhash3_64", "xxhash3_128", "std"] random = ["dep:rand"] serialize = ["dep:serde"] xxhash32 = [] xxhash64 = [] xxhash3_64 = [] xxhash3_128 = [] std = ["alloc"] alloc = [] [lints.rust.unexpected_cfgs] level = "warn" check-cfg = [ 'cfg(_internal_xxhash3_force_scalar)', 'cfg(_internal_xxhash3_force_neon)', 'cfg(_internal_xxhash3_force_sse2)', 'cfg(_internal_xxhash3_force_avx2)', ] [dependencies] rand = { version = "0.9.0", optional = true, default-features = false, features = ["thread_rng"] } serde = { version = "1.0.0", optional = true, default-features = false, features = ["derive"] } [dev-dependencies] serde_json = "1.0.117" #END-[dev-dependencies] [package.metadata.docs.rs] all-features = true twox-hash-2.1.2/LICENSE.txt000064400000000000000000000020701046102023000133720ustar 00000000000000The MIT License (MIT) Copyright (c) 2015 Jake Goulding Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. twox-hash-2.1.2/README.md000064400000000000000000000101521046102023000130260ustar 00000000000000A Rust implementation of the [xxHash] algorithm. [![Crates.io][crates-badge]][crates-url] [![Documentation][docs-badge]][docs-url] [![Build Status][actions-badge]][actions-url] [xxHash]: https://github.com/Cyan4973/xxHash [crates-badge]: https://img.shields.io/crates/v/twox-hash.svg [crates-url]: https://crates.io/crates/twox-hash [docs-badge]: https://img.shields.io/docsrs/twox-hash [docs-url]: https://docs.rs/twox-hash/ [actions-badge]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml/badge.svg?branch=main [actions-url]: https://github.com/shepmaster/twox-hash/actions/workflows/ci.yml?query=branch%3Amain # Examples These examples use [`XxHash64`][] but the same ideas can be used for [`XxHash32`][], [`XxHash3_64`][], or [`XxHash3_128`][]. ## Hashing arbitrary data ### When all the data is available at once ```rust use twox_hash::XxHash64; let seed = 1234; let hash = XxHash64::oneshot(seed, b"some bytes"); assert_eq!(0xeab5_5659_a496_d78b, hash); ``` ### When the data is streaming ```rust use std::hash::Hasher as _; use twox_hash::XxHash64; let seed = 1234; let mut hasher = XxHash64::with_seed(seed); hasher.write(b"some"); hasher.write(b" "); hasher.write(b"bytes"); let hash = hasher.finish(); assert_eq!(0xeab5_5659_a496_d78b, hash); ``` ## In a [`HashMap`][] ### With a default seed ```rust use std::{collections::HashMap, hash::BuildHasherDefault}; use twox_hash::XxHash64; let mut hash = HashMap::<_, _, BuildHasherDefault>::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` ### With a random seed ```rust use std::collections::HashMap; use twox_hash::xxhash64; let mut hash = HashMap::<_, _, xxhash64::RandomState>::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` ### With a fixed seed ```rust use std::collections::HashMap; use twox_hash::xxhash64; let mut hash = HashMap::with_hasher(xxhash64::State::with_seed(0xdead_cafe)); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); ``` # Feature Flags | name | description | |-------------|-------------------------------------------------------------------------------------------------------------------------------| | xxhash32 | Include the [`XxHash32`][] algorithm | | xxhash64 | Include the [`XxHash64`][] algorithm | | xxhash3_64 | Include the [`XxHash3_64`][] algorithm | | xxhash3_128 | Include the [`XxHash3_128`][] algorithm | | random | Create random instances of the hashers | | serialize | Serialize and deserialize hasher state with Serde | | std | Use the Rust standard library. Enable this if you want SIMD support in [`XxHash3_64`][] or [`XxHash3_128`][] | | alloc | Use the Rust allocator library. Enable this if you want to create [`XxHash3_64`][] or [`XxHash3_128`][] with dynamic secrets | # Benchmarks See benchmarks in the [comparison][] README. [comparison]: https://github.com/shepmaster/twox-hash/tree/main/comparison # Contributing 1. Fork it () 2. Create your feature branch (`git checkout -b my-new-feature`) 3. Add a failing test. 4. Add code to pass the test. 5. Commit your changes (`git commit -am 'Add some feature'`) 6. Ensure tests pass. 7. Push to the branch (`git push origin my-new-feature`) 8. Create a new Pull Request [`Hashmap`]: std::collections::HashMap [`XxHash32`]: crate::XxHash32 [`XxHash64`]: crate::XxHash64 [`XxHash3_64`]: crate::XxHash3_64 [`XxHash3_128`]: crate::XxHash3_128 twox-hash-2.1.2/clippy.toml000064400000000000000000000000331046102023000137410ustar 00000000000000check-private-items = true twox-hash-2.1.2/src/lib.rs000064400000000000000000000042071046102023000134560ustar 00000000000000#![doc = include_str!("../README.md")] #![deny(rust_2018_idioms)] #![deny(missing_docs)] #![deny(unnameable_types)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] #[cfg(all( feature = "alloc", any(feature = "xxhash3_64", feature = "xxhash3_128") ))] extern crate alloc; #[cfg(any(feature = "std", doc, test))] extern crate std; #[cfg(feature = "xxhash32")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] pub mod xxhash32; #[cfg(feature = "xxhash32")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash32")))] pub use xxhash32::Hasher as XxHash32; #[cfg(feature = "xxhash64")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] pub mod xxhash64; #[cfg(feature = "xxhash64")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash64")))] pub use xxhash64::Hasher as XxHash64; #[cfg(any(feature = "xxhash3_64", feature = "xxhash3_128"))] mod xxhash3; #[cfg(feature = "xxhash3_64")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] pub mod xxhash3_64; #[cfg(feature = "xxhash3_64")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_64")))] pub use xxhash3_64::Hasher as XxHash3_64; #[cfg(feature = "xxhash3_128")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_128")))] pub mod xxhash3_128; #[cfg(feature = "xxhash3_128")] #[cfg_attr(docsrs, doc(cfg(feature = "xxhash3_128")))] pub use xxhash3_128::Hasher as XxHash3_128; #[allow(dead_code, reason = "Too lazy to cfg-gate these")] trait IntoU32 { fn into_u32(self) -> u32; } impl IntoU32 for u8 { fn into_u32(self) -> u32 { self.into() } } #[allow(dead_code, reason = "Too lazy to cfg-gate these")] trait IntoU64 { fn into_u64(self) -> u64; } impl IntoU64 for u8 { fn into_u64(self) -> u64 { self.into() } } impl IntoU64 for u32 { fn into_u64(self) -> u64 { self.into() } } #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] impl IntoU64 for usize { fn into_u64(self) -> u64 { self as u64 } } #[allow(dead_code, reason = "Too lazy to cfg-gate these")] trait IntoU128 { fn into_u128(self) -> u128; } impl IntoU128 for u64 { fn into_u128(self) -> u128 { u128::from(self) } } twox-hash-2.1.2/src/xxhash3/large/avx2.rs000064400000000000000000000055331046102023000162530ustar 00000000000000use core::arch::x86_64::*; use super::{scalar, Vector}; #[derive(Copy, Clone)] pub struct Impl(()); impl Impl { /// # Safety /// /// You must ensure that the CPU has the AVX2 feature #[inline] #[cfg(feature = "std")] pub unsafe fn new_unchecked() -> Impl { Impl(()) } } impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { // Safety: Type can only be constructed when AVX2 feature is present unsafe { round_scramble_avx2(acc, secret_end) } } #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { // Safety: Type can only be constructed when AVX2 feature is present unsafe { accumulate_avx2(acc, stripe, secret) } } } /// # Safety /// /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] unsafe fn round_scramble_avx2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { // The scalar implementation is autovectorized nicely enough scalar::Impl.round_scramble(acc, secret_end) } /// # Safety /// /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] unsafe fn accumulate_avx2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let acc = acc.as_mut_ptr().cast::<__m256i>(); let stripe = stripe.as_ptr().cast::<__m256i>(); let secret = secret.as_ptr().cast::<__m256i>(); // Safety: The caller has ensured we have the AVX2 // feature. We load from and store to references so we // know that data is valid. We use unaligned loads / // stores. Data manipulation is otherwise done on // intermediate values. unsafe { for i in 0..2 { // [align-acc]: The C code aligns the accumulator to avoid // the unaligned load and store here, but that doesn't // seem to be a big performance loss. let mut acc_0 = _mm256_loadu_si256(acc.add(i)); let stripe_0 = _mm256_loadu_si256(stripe.add(i)); let secret_0 = _mm256_loadu_si256(secret.add(i)); // let value[i] = stripe[i] ^ secret[i]; let value_0 = _mm256_xor_si256(stripe_0, secret_0); // stripe_swap[i] = stripe[i ^ 1] let stripe_swap_0 = _mm256_shuffle_epi32::<0b01_00_11_10>(stripe_0); // acc[i] += stripe_swap[i] acc_0 = _mm256_add_epi64(acc_0, stripe_swap_0); // value_shift[i] = value[i] >> 32 let value_shift_0 = _mm256_srli_epi64::<32>(value_0); // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) let product_0 = _mm256_mul_epu32(value_0, value_shift_0); // acc[i] += product[i] acc_0 = _mm256_add_epi64(acc_0, product_0); _mm256_storeu_si256(acc.add(i), acc_0); } } } twox-hash-2.1.2/src/xxhash3/large/neon.rs000064400000000000000000000156531046102023000163360ustar 00000000000000use core::arch::aarch64::*; use super::Vector; use crate::xxhash3::{primes::PRIME32_1, SliceBackport as _}; #[derive(Copy, Clone)] pub struct Impl(()); impl Impl { /// # Safety /// /// You must ensure that the CPU has the NEON feature #[inline] #[cfg(feature = "std")] pub unsafe fn new_unchecked() -> Self { Self(()) } } impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { // Safety: Type can only be constructed when NEON feature is present unsafe { round_scramble_neon(acc, secret_end) } } #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { // Safety: Type can only be constructed when NEON feature is present unsafe { accumulate_neon(acc, stripe, secret) } } } /// # Safety /// /// You must ensure that the CPU has the NEON feature #[target_feature(enable = "neon")] #[inline] unsafe fn round_scramble_neon(acc: &mut [u64; 8], secret_end: &[u8; 64]) { let secret_base = secret_end.as_ptr().cast::(); let (acc, _) = acc.bp_as_chunks_mut::<2>(); for (i, acc) in acc.iter_mut().enumerate() { // Safety: The caller has ensured we have the NEON // feature. We load from and store to references so we // know that data is valid. We use unaligned loads / // stores. Data manipulation is otherwise done on // intermediate values. unsafe { let mut accv = vld1q_u64(acc.as_ptr()); let secret = vld1q_u64(secret_base.add(i * 2)); // tmp[i] = acc[i] >> 47 let shifted = vshrq_n_u64::<47>(accv); // acc[i] ^= tmp[i] accv = veorq_u64(accv, shifted); // acc[i] ^= secret[i] accv = veorq_u64(accv, secret); // acc[i] *= PRIME32_1 accv = xx_vmulq_u32_u64(accv, PRIME32_1 as u32); vst1q_u64(acc.as_mut_ptr(), accv); } } } /// We process 4x u64 at a time as that allows us to completely /// fill a `uint64x2_t` with useful values when performing the /// multiplication. /// /// # Safety /// /// You must ensure that the CPU has the NEON feature #[target_feature(enable = "neon")] #[inline] unsafe fn accumulate_neon(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let (acc2, _) = acc.bp_as_chunks_mut::<4>(); for (i, acc) in acc2.iter_mut().enumerate() { // Safety: The caller has ensured we have the NEON // feature. We load from and store to references so we // know that data is valid. We use unaligned loads / // stores. Data manipulation is otherwise done on // intermediate values. unsafe { let mut accv_0 = vld1q_u64(acc.as_ptr().cast::()); let mut accv_1 = vld1q_u64(acc.as_ptr().cast::().add(2)); let stripe_0 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4)); let stripe_1 = vld1q_u64(stripe.as_ptr().cast::().add(i * 4 + 2)); let secret_0 = vld1q_u64(secret.as_ptr().cast::().add(i * 4)); let secret_1 = vld1q_u64(secret.as_ptr().cast::().add(i * 4 + 2)); // stripe_rot[i ^ 1] = stripe[i]; let stripe_rot_0 = vextq_u64::<1>(stripe_0, stripe_0); let stripe_rot_1 = vextq_u64::<1>(stripe_1, stripe_1); // value[i] = stripe[i] ^ secret[i]; let value_0 = veorq_u64(stripe_0, secret_0); let value_1 = veorq_u64(stripe_1, secret_1); // sum[i] = value[i] * (value[i] >> 32) + stripe_rot[i] // // Each vector has 64-bit values, but we treat them as // 32-bit and then unzip them. This naturally splits // the upper and lower 32 bits. let parts_0 = vreinterpretq_u32_u64(value_0); let parts_1 = vreinterpretq_u32_u64(value_1); let hi = vuzp1q_u32(parts_0, parts_1); let lo = vuzp2q_u32(parts_0, parts_1); let sum_0 = vmlal_u32(stripe_rot_0, vget_low_u32(hi), vget_low_u32(lo)); let sum_1 = vmlal_high_u32(stripe_rot_1, hi, lo); reordering_barrier(sum_0); reordering_barrier(sum_1); // acc[i] += sum[i] accv_0 = vaddq_u64(accv_0, sum_0); accv_1 = vaddq_u64(accv_1, sum_1); vst1q_u64(acc.as_mut_ptr().cast::(), accv_0); vst1q_u64(acc.as_mut_ptr().cast::().add(2), accv_1); }; } } // There is no `vmulq_u64` (multiply 64-bit by 64-bit, keeping the // lower 64 bits of the result) operation, so we have to make our // own out of 32-bit operations . We can simplify by realizing // that we are always multiplying by a 32-bit number. // // The basic algorithm is traditional long multiplication. `[]` // denotes groups of 32 bits. // // [AAAA][BBBB] // x [CCCC] // -------------------- // [BCBC][BCBC] // + [ACAC][ACAC] // -------------------- // [ACBC][BCBC] // 64-bit truncation occurs // // This can be written in NEON as a vectorwise wrapping // multiplication of the high-order chunk of the input (`A`) // against the constant and then a multiply-widen-and-accumulate // of the low-order chunk of the input and the constant: // // 1. High-order, vectorwise // // [AAAA][BBBB] // x [CCCC][0000] // -------------------- // [ACAC][0000] // // 2. Low-order, widening // // [BBBB] // x [CCCC] // widening // -------------------- // [BCBC][BCBC] // // 3. Accumulation // // [ACAC][0000] // + [BCBC][BCBC] // vectorwise // -------------------- // [ACBC][BCBC] // // Thankfully, NEON has a single multiply-widen-and-accumulate // operation. #[inline] pub fn xx_vmulq_u32_u64(input: uint64x2_t, og_factor: u32) -> uint64x2_t { // Safety: We only compute using our argument values and do // not change memory. unsafe { let input_as_u32 = vreinterpretq_u32_u64(input); let factor = vmov_n_u32(og_factor); let factor_striped = vmovq_n_u64(u64::from(og_factor) << 32); let factor_striped = vreinterpretq_u32_u64(factor_striped); let high_shifted_as_32 = vmulq_u32(input_as_u32, factor_striped); let high_shifted = vreinterpretq_u64_u32(high_shifted_as_32); let input_lo = vmovn_u64(input); vmlal_u32(high_shifted, input_lo, factor) } } /// # Safety /// /// You must ensure that the CPU has the NEON feature // // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5312-L5323 #[inline] #[target_feature(enable = "neon")] unsafe fn reordering_barrier(r: uint64x2_t) { // Safety: The caller has ensured we have the NEON feature. We // aren't doing anything with the argument, so we shouldn't be // able to cause unsafety! unsafe { core::arch::asm!( "/* {r:v} */", r = in(vreg) r, options(nomem, nostack), ) } } twox-hash-2.1.2/src/xxhash3/large/scalar.rs000064400000000000000000000037671046102023000166470ustar 00000000000000use super::Vector; use crate::xxhash3::{primes::PRIME32_1, SliceBackport as _}; #[derive(Copy, Clone)] pub struct Impl; impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { let (last, _) = secret_end.bp_as_chunks(); let last = last.iter().copied().map(u64::from_le_bytes); for (acc, secret) in acc.iter_mut().zip(last) { *acc ^= *acc >> 47; *acc ^= secret; *acc = acc.wrapping_mul(PRIME32_1); } } #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let (stripe, _) = stripe.bp_as_chunks(); let (secret, _) = secret.bp_as_chunks(); for i in 0..8 { let stripe = u64::from_le_bytes(stripe[i]); let secret = u64::from_le_bytes(secret[i]); let value = stripe ^ secret; acc[i ^ 1] = acc[i ^ 1].wrapping_add(stripe); acc[i] = multiply_64_as_32_and_add(value, value >> 32, acc[i]); } } } #[inline] #[cfg(any(miri, not(target_arch = "aarch64")))] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { use super::IntoU64; let lhs = (lhs as u32).into_u64(); let rhs = (rhs as u32).into_u64(); let product = lhs.wrapping_mul(rhs); acc.wrapping_add(product) } #[inline] // https://github.com/Cyan4973/xxHash/blob/d5fe4f54c47bc8b8e76c6da9146c32d5c720cd79/xxhash.h#L5595-L5610 // https://github.com/llvm/llvm-project/issues/98481 #[cfg(all(not(miri), target_arch = "aarch64"))] fn multiply_64_as_32_and_add(lhs: u64, rhs: u64, acc: u64) -> u64 { let res; // Safety: We only compute using our argument values and do // not change memory. unsafe { core::arch::asm!( "umaddl {res}, {lhs:w}, {rhs:w}, {acc}", lhs = in(reg) lhs, rhs = in(reg) rhs, acc = in(reg) acc, res = out(reg) res, options(pure, nomem, nostack), ) } res } twox-hash-2.1.2/src/xxhash3/large/sse2.rs000064400000000000000000000052411046102023000162430ustar 00000000000000use core::arch::x86_64::*; use super::{scalar, Vector}; #[derive(Copy, Clone)] pub struct Impl(()); impl Impl { /// # Safety /// /// You must ensure that the CPU has the SSE2 feature #[inline] #[cfg(feature = "std")] pub unsafe fn new_unchecked() -> Impl { Impl(()) } } impl Vector for Impl { #[inline] fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]) { // Safety: Type can only be constructed when SSE2 feature is present unsafe { round_scramble_sse2(acc, secret_end) } } #[inline] fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { // Safety: Type can only be constructed when SSE2 feature is present unsafe { accumulate_sse2(acc, stripe, secret) } } } /// # Safety /// /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] unsafe fn round_scramble_sse2(acc: &mut [u64; 8], secret_end: &[u8; 64]) { // The scalar implementation is autovectorized nicely enough scalar::Impl.round_scramble(acc, secret_end) } /// # Safety /// /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] unsafe fn accumulate_sse2(acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]) { let acc = acc.as_mut_ptr().cast::<__m128i>(); let stripe = stripe.as_ptr().cast::<__m128i>(); let secret = secret.as_ptr().cast::<__m128i>(); // Safety: The caller has ensured we have the SSE2 // feature. We load from and store to references so we // know that data is valid. We use unaligned loads / // stores. Data manipulation is otherwise done on // intermediate values. unsafe { for i in 0..4 { // See [align-acc]. let mut acc_0 = _mm_loadu_si128(acc.add(i)); let stripe_0 = _mm_loadu_si128(stripe.add(i)); let secret_0 = _mm_loadu_si128(secret.add(i)); // let value[i] = stripe[i] ^ secret[i]; let value_0 = _mm_xor_si128(stripe_0, secret_0); // stripe_swap[i] = stripe[i ^ 1] let stripe_swap_0 = _mm_shuffle_epi32::<0b01_00_11_10>(stripe_0); // acc[i] += stripe_swap[i] acc_0 = _mm_add_epi64(acc_0, stripe_swap_0); // value_shift[i] = value[i] >> 32 let value_shift_0 = _mm_srli_epi64::<32>(value_0); // product[i] = lower_32_bit(value[i]) * lower_32_bit(value_shift[i]) let product_0 = _mm_mul_epu32(value_0, value_shift_0); // acc[i] += product[i] acc_0 = _mm_add_epi64(acc_0, product_0); _mm_storeu_si128(acc.add(i), acc_0); } } } twox-hash-2.1.2/src/xxhash3/large.rs000064400000000000000000000221601046102023000153660ustar 00000000000000use super::{ assert_input_range, avalanche, primes::*, stripes_with_tail, Halves, Secret, SliceBackport as _, }; #[cfg(feature = "xxhash3_128")] use super::X128; use crate::{IntoU128, IntoU64}; // This module is not `cfg`-gated because it is used by some of the // SIMD implementations. pub mod scalar; #[cfg(all(target_arch = "aarch64", feature = "std"))] pub mod neon; #[cfg(all(target_arch = "x86_64", feature = "std"))] pub mod avx2; #[cfg(all(target_arch = "x86_64", feature = "std"))] pub mod sse2; macro_rules! dispatch { ( fn $fn_name:ident<$($gen:ident),*>($($arg_name:ident : $arg_ty:ty),*) $(-> $ret_ty:ty)? [$($wheres:tt)*] ) => { #[inline] fn do_scalar<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? where $($wheres)* { $fn_name($crate::xxhash3::large::scalar::Impl, $($arg_name),*) } /// # Safety /// /// You must ensure that the CPU has the NEON feature #[inline] #[target_feature(enable = "neon")] #[cfg(all(target_arch = "aarch64", feature = "std"))] unsafe fn do_neon<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? where $($wheres)* { // Safety: The caller has ensured we have the NEON feature unsafe { $fn_name($crate::xxhash3::large::neon::Impl::new_unchecked(), $($arg_name),*) } } /// # Safety /// /// You must ensure that the CPU has the AVX2 feature #[inline] #[target_feature(enable = "avx2")] #[cfg(all(target_arch = "x86_64", feature = "std"))] unsafe fn do_avx2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? where $($wheres)* { // Safety: The caller has ensured we have the AVX2 feature unsafe { $fn_name($crate::xxhash3::large::avx2::Impl::new_unchecked(), $($arg_name),*) } } /// # Safety /// /// You must ensure that the CPU has the SSE2 feature #[inline] #[target_feature(enable = "sse2")] #[cfg(all(target_arch = "x86_64", feature = "std"))] unsafe fn do_sse2<$($gen),*>($($arg_name : $arg_ty),*) $(-> $ret_ty)? where $($wheres)* { // Safety: The caller has ensured we have the SSE2 feature unsafe { $fn_name($crate::xxhash3::large::sse2::Impl::new_unchecked(), $($arg_name),*) } } // Now we invoke the right function #[cfg(_internal_xxhash3_force_neon)] return unsafe { do_neon($($arg_name),*) }; #[cfg(_internal_xxhash3_force_avx2)] return unsafe { do_avx2($($arg_name),*) }; #[cfg(_internal_xxhash3_force_sse2)] return unsafe { do_sse2($($arg_name),*) }; #[cfg(_internal_xxhash3_force_scalar)] return do_scalar($($arg_name),*); // This code can be unreachable if one of the `*_force_*` cfgs // are set above, but that's the point. #[allow(unreachable_code)] { #[cfg(all(target_arch = "aarch64", feature = "std"))] { if std::arch::is_aarch64_feature_detected!("neon") { // Safety: We just ensured we have the NEON feature return unsafe { do_neon($($arg_name),*) }; } } #[cfg(all(target_arch = "x86_64", feature = "std"))] { if is_x86_feature_detected!("avx2") { // Safety: We just ensured we have the AVX2 feature return unsafe { do_avx2($($arg_name),*) }; } else if is_x86_feature_detected!("sse2") { // Safety: We just ensured we have the SSE2 feature return unsafe { do_sse2($($arg_name),*) }; } } do_scalar($($arg_name),*) } }; } pub(crate) use dispatch; pub trait Vector: Copy { fn round_scramble(&self, acc: &mut [u64; 8], secret_end: &[u8; 64]); fn accumulate(&self, acc: &mut [u64; 8], stripe: &[u8; 64], secret: &[u8; 64]); } #[rustfmt::skip] pub const INITIAL_ACCUMULATORS: [u64; 8] = [ PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3, PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1, ]; pub struct Algorithm(pub V); impl Algorithm where V: Vector, { #[inline] pub fn oneshot(&self, secret: &Secret, input: &[u8], finalize: F) -> F::Output where F: super::Finalize, { assert_input_range!(241.., input.len()); let mut acc = INITIAL_ACCUMULATORS; let stripes_per_block = (secret.len() - 64) / 8; let block_size = 64 * stripes_per_block; let mut blocks = input.chunks_exact(block_size); let last_block = if blocks.remainder().is_empty() { // Safety: We know that `input` is non-empty, which means // that either there will be a remainder or one or more // full blocks. That info isn't flowing to the optimizer, // so we use `unwrap_unchecked`. unsafe { blocks.next_back().unwrap_unchecked() } } else { blocks.remainder() }; self.rounds(&mut acc, blocks, secret); let len = input.len(); let last_stripe = input.last_chunk().unwrap(); finalize.large(self.0, acc, last_block, last_stripe, secret, len) } #[inline] fn rounds<'a>( &self, acc: &mut [u64; 8], blocks: impl IntoIterator, secret: &Secret, ) { for block in blocks { let (stripes, _) = block.bp_as_chunks(); self.round(acc, stripes, secret); } } #[inline] fn round(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { let secret_end = secret.last_stripe(); self.round_accumulate(acc, stripes, secret); self.0.round_scramble(acc, secret_end); } #[inline] fn round_accumulate(&self, acc: &mut [u64; 8], stripes: &[[u8; 64]], secret: &Secret) { let secrets = (0..stripes.len()).map(|i| { // Safety: The number of stripes is determined by the // block size, which is determined by the secret size. unsafe { secret.stripe(i) } }); for (stripe, secret) in stripes.iter().zip(secrets) { self.0.accumulate(acc, stripe, secret); } } #[inline(always)] #[cfg(feature = "xxhash3_64")] pub fn finalize_64( &self, mut acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], secret: &Secret, len: usize, ) -> u64 { debug_assert!(!last_block.is_empty()); self.last_round(&mut acc, last_block, last_stripe, secret); let low = len.into_u64().wrapping_mul(PRIME64_1); self.final_merge(&acc, low, secret.final_secret()) } #[inline] #[cfg(feature = "xxhash3_128")] pub fn finalize_128( &self, mut acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], secret: &Secret, len: usize, ) -> u128 { debug_assert!(!last_block.is_empty()); self.last_round(&mut acc, last_block, last_stripe, secret); let len = len.into_u64(); let low = len.wrapping_mul(PRIME64_1); let low = self.final_merge(&acc, low, secret.final_secret()); let high = !len.wrapping_mul(PRIME64_2); let high = self.final_merge(&acc, high, secret.for_128().final_secret()); X128 { low, high }.into() } #[inline] fn last_round( &self, acc: &mut [u64; 8], block: &[u8], last_stripe: &[u8; 64], secret: &Secret, ) { // Accumulation steps are run for the stripes in the last block, // except for the last stripe (whether it is full or not) let (stripes, _) = stripes_with_tail(block); let secrets = (0..stripes.len()).map(|i| { // Safety: The number of stripes is determined by the // block size, which is determined by the secret size. unsafe { secret.stripe(i) } }); for (stripe, secret) in stripes.iter().zip(secrets) { self.0.accumulate(acc, stripe, secret); } let last_stripe_secret = secret.last_stripe_secret_better_name(); self.0.accumulate(acc, last_stripe, last_stripe_secret); } #[inline] fn final_merge(&self, acc: &[u64; 8], init_value: u64, secret: &[u8; 64]) -> u64 { let (secrets, _) = secret.bp_as_chunks(); let mut result = init_value; for i in 0..4 { // 64-bit by 64-bit multiplication to 128-bit full result let mul_result = { let sa = u64::from_le_bytes(secrets[i * 2]); let sb = u64::from_le_bytes(secrets[i * 2 + 1]); let a = (acc[i * 2] ^ sa).into_u128(); let b = (acc[i * 2 + 1] ^ sb).into_u128(); a.wrapping_mul(b) }; result = result.wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); } avalanche(result) } } twox-hash-2.1.2/src/xxhash3/secret.rs000064400000000000000000000150531046102023000155640ustar 00000000000000use core::{hint::assert_unchecked, mem}; use super::SliceBackport as _; #[cfg(feature = "xxhash3_128")] use super::pairs_of_u64_bytes; /// The minimum length of a secret. pub const SECRET_MINIMUM_LENGTH: usize = 136; #[repr(transparent)] pub struct Secret([u8]); impl Secret { #[inline] pub fn new(bytes: &[u8]) -> Result<&Self, Error> { // Safety: We check for validity before returning. unsafe { let this = Self::new_unchecked(bytes); if this.is_valid() { Ok(this) } else { Err(Error(())) } } } /// # Safety /// /// You must ensure that the secret byte length is >= /// SECRET_MINIMUM_LENGTH. #[inline] pub const unsafe fn new_unchecked(bytes: &[u8]) -> &Self { // Safety: We are `#[repr(transparent)]`. It's up to the // caller to ensure the length unsafe { mem::transmute(bytes) } } #[inline] #[cfg(feature = "xxhash3_64")] pub fn for_64(&self) -> Secret64BitView<'_> { Secret64BitView(self) } #[inline] #[cfg(feature = "xxhash3_128")] pub fn for_128(&self) -> Secret128BitView<'_> { Secret128BitView(self) } #[inline] pub fn words_for_17_to_128(&self) -> &[[u8; 16]] { self.reassert_preconditions(); let (words, _) = self.0.bp_as_chunks(); words } /// # Safety /// /// `i` must be less than the number of stripes in the secret /// ([`Self::n_stripes`][]). #[inline] pub unsafe fn stripe(&self, i: usize) -> &[u8; 64] { self.reassert_preconditions(); // Safety: The caller has ensured that `i` is // in-bounds. `&[u8]` and `&[u8; 64]` have the same alignment. unsafe { debug_assert!(i < self.n_stripes()); &*self.0.get_unchecked(i * 8..).as_ptr().cast() } } #[inline] pub fn last_stripe(&self) -> &[u8; 64] { self.reassert_preconditions(); self.0.last_chunk().unwrap() } #[inline] pub fn last_stripe_secret_better_name(&self) -> &[u8; 64] { self.reassert_preconditions(); self.0[self.0.len() - 71..].first_chunk().unwrap() } #[inline] pub fn final_secret(&self) -> &[u8; 64] { self.reassert_preconditions(); self.0[11..].first_chunk().unwrap() } #[inline] pub fn len(&self) -> usize { self.0.len() } #[inline] pub fn n_stripes(&self) -> usize { // stripes_per_block (self.len() - 64) / 8 } #[inline(always)] fn reassert_preconditions(&self) { // Safety: The length of the bytes was checked at value // construction time. unsafe { debug_assert!(self.is_valid()); assert_unchecked(self.is_valid()); } } #[inline(always)] pub fn is_valid(&self) -> bool { self.0.len() >= SECRET_MINIMUM_LENGTH } } #[derive(Copy, Clone)] #[cfg(feature = "xxhash3_64")] pub struct Secret64BitView<'a>(&'a Secret); #[cfg(feature = "xxhash3_64")] impl<'a> Secret64BitView<'a> { #[inline] pub fn words_for_0(self) -> [u64; 2] { self.0.reassert_preconditions(); let (q, _) = self.b()[56..].bp_as_chunks(); [q[0], q[1]].map(u64::from_le_bytes) } #[inline] pub fn words_for_1_to_3(self) -> [u32; 2] { self.0.reassert_preconditions(); let (q, _) = self.b().bp_as_chunks(); [q[0], q[1]].map(u32::from_le_bytes) } #[inline] pub fn words_for_4_to_8(self) -> [u64; 2] { self.0.reassert_preconditions(); let (q, _) = self.b()[8..].bp_as_chunks(); [q[0], q[1]].map(u64::from_le_bytes) } #[inline] pub fn words_for_9_to_16(self) -> [u64; 4] { self.0.reassert_preconditions(); let (q, _) = self.b()[24..].bp_as_chunks(); [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) } #[inline] pub fn words_for_127_to_240_part1(self) -> &'a [[u8; 16]] { self.0.reassert_preconditions(); let (ss, _) = self.b().bp_as_chunks(); ss } #[inline] pub fn words_for_127_to_240_part2(self) -> &'a [[u8; 16]] { self.0.reassert_preconditions(); let (ss, _) = self.b()[3..].bp_as_chunks(); ss } #[inline] pub fn words_for_127_to_240_part3(self) -> &'a [u8; 16] { self.0.reassert_preconditions(); self.b()[119..].first_chunk().unwrap() } fn b(self) -> &'a [u8] { &(self.0).0 } } #[derive(Copy, Clone)] #[cfg(feature = "xxhash3_128")] pub struct Secret128BitView<'a>(&'a Secret); #[cfg(feature = "xxhash3_128")] impl<'a> Secret128BitView<'a> { #[inline] pub fn words_for_0(self) -> [u64; 4] { self.0.reassert_preconditions(); let (q, _) = self.b()[64..].bp_as_chunks(); [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) } #[inline] pub fn words_for_1_to_3(self) -> [u32; 4] { self.0.reassert_preconditions(); let (q, _) = self.b().bp_as_chunks(); [q[0], q[1], q[2], q[3]].map(u32::from_le_bytes) } #[inline] pub fn words_for_4_to_8(self) -> [u64; 2] { self.0.reassert_preconditions(); let (q, _) = self.b()[16..].bp_as_chunks(); [q[0], q[1]].map(u64::from_le_bytes) } #[inline] pub fn words_for_9_to_16(self) -> [u64; 4] { self.0.reassert_preconditions(); let (q, _) = self.b()[32..].bp_as_chunks(); [q[0], q[1], q[2], q[3]].map(u64::from_le_bytes) } #[inline] pub fn words_for_127_to_240_part1(self) -> &'a [[[u8; 16]; 2]] { self.0.reassert_preconditions(); pairs_of_u64_bytes(self.b()) } #[inline] pub fn words_for_127_to_240_part2(self) -> &'a [[[u8; 16]; 2]] { self.0.reassert_preconditions(); pairs_of_u64_bytes(&self.b()[3..]) } #[inline] pub fn words_for_127_to_240_part3(self) -> &'a [[u8; 16]; 2] { self.0.reassert_preconditions(); pairs_of_u64_bytes(&self.b()[103..]).first().unwrap() } #[inline] pub fn final_secret(self) -> &'a [u8; 64] { self.0.reassert_preconditions(); let b = self.b(); b[b.len() - 75..].first_chunk().unwrap() } fn b(self) -> &'a [u8] { &(self.0).0 } } #[derive(Debug)] pub struct Error(()); impl core::error::Error for Error {} impl core::fmt::Display for Error { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "The secret must have at least {SECRET_MINIMUM_LENGTH} bytes" ) } } twox-hash-2.1.2/src/xxhash3/streaming.rs000064400000000000000000000364161046102023000162760ustar 00000000000000use core::hint::assert_unchecked; use super::{large::INITIAL_ACCUMULATORS, *}; /// A buffer containing the secret bytes. /// /// # Safety /// /// Must always return a slice with the same number of elements. pub unsafe trait FixedBuffer: AsRef<[u8]> {} /// A mutable buffer to contain the secret bytes. /// /// # Safety /// /// Must always return a slice with the same number of elements. The /// slice must always be the same as that returned from /// [`AsRef::as_ref`][]. pub unsafe trait FixedMutBuffer: FixedBuffer + AsMut<[u8]> {} // Safety: An array will never change size. unsafe impl FixedBuffer for [u8; N] {} // Safety: An array will never change size. unsafe impl FixedMutBuffer for [u8; N] {} // Safety: An array will never change size. unsafe impl FixedBuffer for &[u8; N] {} // Safety: An array will never change size. unsafe impl FixedBuffer for &mut [u8; N] {} // Safety: An array will never change size. unsafe impl FixedMutBuffer for &mut [u8; N] {} const STRIPE_BYTES: usize = 64; const BUFFERED_STRIPES: usize = 4; const BUFFERED_BYTES: usize = STRIPE_BYTES * BUFFERED_STRIPES; type Buffer = [u8; BUFFERED_BYTES]; // Ensure that a full buffer always implies we are in the 241+ byte case. const _: () = assert!(BUFFERED_BYTES > CUTOFF); /// Holds secret and temporary buffers that are ensured to be /// appropriately sized. #[derive(Clone)] pub struct SecretBuffer { seed: u64, secret: S, buffer: Buffer, } impl SecretBuffer { /// Returns the secret. pub fn into_secret(self) -> S { self.secret } } impl SecretBuffer where S: FixedBuffer, { /// Takes the seed, secret, and buffer and performs no /// modifications to them, only validating that the sizes are /// appropriate. pub fn new(seed: u64, secret: S) -> Result> { match Secret::new(secret.as_ref()) { Ok(_) => Ok(Self { seed, secret, buffer: [0; BUFFERED_BYTES], }), Err(e) => Err(SecretTooShortError(e, secret)), } } #[inline(always)] #[cfg(test)] fn is_valid(&self) -> bool { let secret = self.secret.as_ref(); secret.len() >= SECRET_MINIMUM_LENGTH } #[inline] fn n_stripes(&self) -> usize { Self::secret(&self.secret).n_stripes() } #[inline] fn parts(&self) -> (u64, &Secret, &Buffer) { (self.seed, Self::secret(&self.secret), &self.buffer) } #[inline] fn parts_mut(&mut self) -> (u64, &Secret, &mut Buffer) { (self.seed, Self::secret(&self.secret), &mut self.buffer) } fn secret(secret: &S) -> &Secret { let secret = secret.as_ref(); // Safety: We established the length at construction and the // length is not allowed to change. unsafe { Secret::new_unchecked(secret) } } } impl SecretBuffer where S: FixedMutBuffer, { /// Fills the secret buffer with a secret derived from the seed /// and the default secret. The secret must be exactly /// [`DEFAULT_SECRET_LENGTH`][] bytes long. pub fn with_seed(seed: u64, mut secret: S) -> Result> { match <&mut DefaultSecret>::try_from(secret.as_mut()) { Ok(secret_slice) => { *secret_slice = DEFAULT_SECRET_RAW; derive_secret(seed, secret_slice); Ok(Self { seed, secret, buffer: [0; BUFFERED_BYTES], }) } Err(_) => Err(SecretWithSeedError(secret)), } } } impl SecretBuffer<&'static [u8; DEFAULT_SECRET_LENGTH]> { /// Use the default seed and secret values while allocating nothing. #[inline] pub const fn default() -> Self { SecretBuffer { seed: DEFAULT_SEED, secret: &DEFAULT_SECRET_RAW, buffer: [0; BUFFERED_BYTES], } } } #[derive(Clone)] pub struct RawHasherCore { secret_buffer: SecretBuffer, buffer_usage: usize, stripe_accumulator: StripeAccumulator, total_bytes: usize, } impl RawHasherCore { pub fn new(secret_buffer: SecretBuffer) -> Self { Self { secret_buffer, buffer_usage: 0, stripe_accumulator: StripeAccumulator::new(), total_bytes: 0, } } pub fn into_secret(self) -> S { self.secret_buffer.into_secret() } } impl RawHasherCore where S: FixedBuffer, { #[inline] pub fn write(&mut self, input: &[u8]) { let this = self; dispatch! { fn write_impl(this: &mut RawHasherCore, input: &[u8]) [S: FixedBuffer] } } #[inline] pub fn finish(&self, finalize: F) -> F::Output where F: Finalize, { let this = self; dispatch! { fn finish_impl(this: &RawHasherCore, finalize: F) -> F::Output [S: FixedBuffer, F: Finalize] } } } #[inline(always)] fn write_impl(vector: impl Vector, this: &mut RawHasherCore, mut input: &[u8]) where S: FixedBuffer, { if input.is_empty() { return; } let RawHasherCore { secret_buffer, buffer_usage, stripe_accumulator, total_bytes, .. } = this; let n_stripes = secret_buffer.n_stripes(); let (_, secret, buffer) = secret_buffer.parts_mut(); *total_bytes += input.len(); // Safety: This is an invariant of the buffer. unsafe { debug_assert!(*buffer_usage <= buffer.len()); assert_unchecked(*buffer_usage <= buffer.len()) }; // We have some previous data saved; try to fill it up and process it first if !buffer.is_empty() { let remaining = &mut buffer[*buffer_usage..]; let n_to_copy = usize::min(remaining.len(), input.len()); let (remaining_head, remaining_tail) = remaining.split_at_mut(n_to_copy); let (input_head, input_tail) = input.split_at(n_to_copy); remaining_head.copy_from_slice(input_head); *buffer_usage += n_to_copy; input = input_tail; // We did not fill up the buffer if !remaining_tail.is_empty() { return; } // We don't know this isn't the last of the data if input.is_empty() { return; } let (stripes, _) = buffer.bp_as_chunks(); for stripe in stripes { stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); } *buffer_usage = 0; } debug_assert!(*buffer_usage == 0); // Process as much of the input data in-place as possible, // while leaving at least one full stripe for the // finalization. if let Some(len) = input.len().checked_sub(STRIPE_BYTES) { let full_block_point = (len / STRIPE_BYTES) * STRIPE_BYTES; // Safety: We know that `full_block_point` must be less than // `input.len()` as we subtracted and then integer-divided // (which rounds down) and then multiplied back. That's not // evident to the compiler and `split_at` results in a // potential panic. // // https://github.com/llvm/llvm-project/issues/104827 let (stripes, remainder) = unsafe { input.split_at_unchecked(full_block_point) }; let (stripes, _) = stripes.bp_as_chunks(); for stripe in stripes { stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret) } input = remainder; } // Any remaining data has to be less than the buffer, and the // buffer is empty so just fill up the buffer. debug_assert!(*buffer_usage == 0); debug_assert!(!input.is_empty()); // Safety: We have parsed all the full blocks of input except one // and potentially a full block minus one byte. That amount of // data must be less than the buffer. let buffer_head = unsafe { debug_assert!(input.len() < 2 * STRIPE_BYTES); debug_assert!(2 * STRIPE_BYTES < buffer.len()); buffer.get_unchecked_mut(..input.len()) }; buffer_head.copy_from_slice(input); *buffer_usage = input.len(); } #[inline(always)] fn finish_impl(vector: impl Vector, this: &RawHasherCore, finalize: F) -> F::Output where S: FixedBuffer, F: Finalize, { let RawHasherCore { ref secret_buffer, buffer_usage, mut stripe_accumulator, total_bytes, } = *this; let n_stripes = secret_buffer.n_stripes(); let (seed, secret, buffer) = secret_buffer.parts(); // Safety: This is an invariant of the buffer. unsafe { debug_assert!(buffer_usage <= buffer.len()); assert_unchecked(buffer_usage <= buffer.len()) }; if total_bytes > CUTOFF { let input = &buffer[..buffer_usage]; // Ingest final stripes let (stripes, remainder) = stripes_with_tail(input); for stripe in stripes { stripe_accumulator.process_stripe(vector, stripe, n_stripes, secret); } let mut temp = [0; 64]; let last_stripe = match input.last_chunk() { Some(chunk) => chunk, None => { let n_to_reuse = 64 - input.len(); let to_reuse = buffer.len() - n_to_reuse; let (temp_head, temp_tail) = temp.split_at_mut(n_to_reuse); temp_head.copy_from_slice(&buffer[to_reuse..]); temp_tail.copy_from_slice(input); &temp } }; finalize.large( vector, stripe_accumulator.accumulator, remainder, last_stripe, secret, total_bytes, ) } else { finalize.small(DEFAULT_SECRET, seed, &buffer[..total_bytes]) } } pub trait Finalize { type Output; fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output; fn large( &self, vector: impl Vector, acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], secret: &Secret, len: usize, ) -> Self::Output; } #[cfg(feature = "alloc")] #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] pub mod with_alloc { use ::alloc::boxed::Box; use super::*; // Safety: A plain slice will never change size. unsafe impl FixedBuffer for Box<[u8]> {} // Safety: A plain slice will never change size. unsafe impl FixedMutBuffer for Box<[u8]> {} type AllocSecretBuffer = SecretBuffer>; impl AllocSecretBuffer { /// Allocates the secret and temporary buffers and fills them /// with the default seed and secret values. pub fn allocate_default() -> Self { Self { seed: DEFAULT_SEED, secret: DEFAULT_SECRET_RAW.to_vec().into(), buffer: [0; BUFFERED_BYTES], } } /// Allocates the secret and temporary buffers and uses the /// provided seed to construct the secret value. pub fn allocate_with_seed(seed: u64) -> Self { let mut secret = DEFAULT_SECRET_RAW; derive_secret(seed, &mut secret); Self { seed, secret: secret.to_vec().into(), buffer: [0; BUFFERED_BYTES], } } /// Allocates the temporary buffer and uses the provided seed /// and secret buffer. pub fn allocate_with_seed_and_secret( seed: u64, secret: impl Into>, ) -> Result>> { Self::new(seed, secret.into()) } } pub type AllocRawHasher = RawHasherCore>; impl AllocRawHasher { pub fn allocate_default() -> Self { Self::new(SecretBuffer::allocate_default()) } pub fn allocate_with_seed(seed: u64) -> Self { Self::new(SecretBuffer::allocate_with_seed(seed)) } pub fn allocate_with_seed_and_secret( seed: u64, secret: impl Into>, ) -> Result>> { SecretBuffer::allocate_with_seed_and_secret(seed, secret).map(Self::new) } } } #[cfg(feature = "alloc")] pub use with_alloc::AllocRawHasher; /// Tracks which stripe we are currently on to know which part of the /// secret we should be using. #[derive(Copy, Clone)] pub struct StripeAccumulator { pub accumulator: [u64; 8], current_stripe: usize, } impl StripeAccumulator { pub fn new() -> Self { Self { accumulator: INITIAL_ACCUMULATORS, current_stripe: 0, } } #[inline] pub fn process_stripe( &mut self, vector: impl Vector, stripe: &[u8; 64], n_stripes: usize, secret: &Secret, ) { let Self { accumulator, current_stripe, .. } = self; // For each stripe // Safety: The number of stripes is determined by the // block size, which is determined by the secret size. let secret_stripe = unsafe { secret.stripe(*current_stripe) }; vector.accumulate(accumulator, stripe, secret_stripe); *current_stripe += 1; // After a full block's worth if *current_stripe == n_stripes { let secret_end = secret.last_stripe(); vector.round_scramble(accumulator, secret_end); *current_stripe = 0; } } } /// The provided secret was not exactly [`DEFAULT_SECRET_LENGTH`][] /// bytes. pub struct SecretWithSeedError(S); impl SecretWithSeedError { /// Returns the secret. pub fn into_secret(self) -> S { self.0 } } impl core::error::Error for SecretWithSeedError {} impl core::fmt::Debug for SecretWithSeedError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_tuple("SecretWithSeedError").finish() } } impl core::fmt::Display for SecretWithSeedError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { write!( f, "The secret must be exactly {DEFAULT_SECRET_LENGTH} bytes" ) } } /// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] /// bytes. pub struct SecretTooShortError(secret::Error, S); impl SecretTooShortError { /// Returns the secret. pub fn into_secret(self) -> S { self.1 } } impl core::error::Error for SecretTooShortError {} impl core::fmt::Debug for SecretTooShortError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { f.debug_tuple("SecretTooShortError").finish() } } impl core::fmt::Display for SecretTooShortError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.0.fmt(f) } } #[cfg(test)] mod test { use super::*; #[test] fn secret_buffer_default_is_valid() { assert!(SecretBuffer::default().is_valid()); } #[test] fn secret_buffer_allocate_default_is_valid() { assert!(SecretBuffer::allocate_default().is_valid()) } #[test] fn secret_buffer_allocate_with_seed_is_valid() { assert!(SecretBuffer::allocate_with_seed(0xdead_beef).is_valid()) } } twox-hash-2.1.2/src/xxhash3.rs000064400000000000000000000277501046102023000143060ustar 00000000000000use core::slice; use crate::{IntoU128 as _, IntoU32 as _}; pub mod large; pub(crate) use large::dispatch; pub use large::{Algorithm, Vector}; pub mod secret; pub use secret::{Secret, SECRET_MINIMUM_LENGTH}; mod streaming; pub use streaming::{ Finalize, FixedBuffer, FixedMutBuffer, RawHasherCore, SecretBuffer, SecretTooShortError, SecretWithSeedError, }; #[cfg(feature = "alloc")] pub use streaming::AllocRawHasher; pub mod primes { pub const PRIME32_1: u64 = 0x9E3779B1; pub const PRIME32_2: u64 = 0x85EBCA77; pub const PRIME32_3: u64 = 0xC2B2AE3D; pub const PRIME64_1: u64 = 0x9E3779B185EBCA87; pub const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; pub const PRIME64_3: u64 = 0x165667B19E3779F9; pub const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; pub const PRIME64_5: u64 = 0x27D4EB2F165667C5; pub const PRIME_MX1: u64 = 0x165667919E3779F9; pub const PRIME_MX2: u64 = 0x9FB21C651E98DF25; } pub const CUTOFF: usize = 240; pub const DEFAULT_SEED: u64 = 0; /// The length of the default secret. pub const DEFAULT_SECRET_LENGTH: usize = 192; pub type DefaultSecret = [u8; DEFAULT_SECRET_LENGTH]; pub const DEFAULT_SECRET_RAW: DefaultSecret = [ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c, 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f, 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21, 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c, 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3, 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8, 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d, 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64, 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb, 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e, 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce, 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e, ]; // Safety: The default secret is long enough pub const DEFAULT_SECRET: &Secret = unsafe { Secret::new_unchecked(&DEFAULT_SECRET_RAW) }; /// # Correctness /// /// This function assumes that the incoming buffer has been populated /// with the default secret. #[inline] pub fn derive_secret(seed: u64, secret: &mut DefaultSecret) { if seed == DEFAULT_SEED { return; } let (words, _) = secret.bp_as_chunks_mut(); let (pairs, _) = words.bp_as_chunks_mut(); for [a_p, b_p] in pairs { let a = u64::from_le_bytes(*a_p); let b = u64::from_le_bytes(*b_p); let a = a.wrapping_add(seed); let b = b.wrapping_sub(seed); *a_p = a.to_le_bytes(); *b_p = b.to_le_bytes(); } } /// The provided secret was not at least [`SECRET_MINIMUM_LENGTH`][] /// bytes. #[derive(Debug)] pub struct OneshotWithSecretError(pub(crate) secret::Error); impl core::error::Error for OneshotWithSecretError {} impl core::fmt::Display for OneshotWithSecretError { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.0.fmt(f) } } macro_rules! assert_input_range { ($min:literal.., $len:expr) => { assert!($min <= $len); }; ($min:literal..=$max:literal, $len:expr) => { assert!($min <= $len); assert!($len <= $max); }; } pub(crate) use assert_input_range; #[inline(always)] pub fn impl_1_to_3_bytes_combined(input: &[u8]) -> u32 { assert_input_range!(1..=3, input.len()); let input_length = input.len() as u8; // OK as we checked that the length fits input[input.len() - 1].into_u32() | input_length.into_u32() << 8 | input[0].into_u32() << 16 | input[input.len() >> 1].into_u32() << 24 } #[inline] pub fn impl_17_to_128_bytes_iter( secret: &Secret, input: &[u8], mut f: impl FnMut(&[u8; 16], &[u8; 16], &[[u8; 16]; 2]), ) { let secret = secret.words_for_17_to_128(); let (secret, _) = secret.bp_as_chunks::<2>(); let (fwd, _) = input.bp_as_chunks(); let (_, bwd) = input.bp_as_rchunks(); let q = bwd.len(); if input.len() > 32 { if input.len() > 64 { if input.len() > 96 { f(&fwd[3], &bwd[q - 4], &secret[3]); } f(&fwd[2], &bwd[q - 3], &secret[2]); } f(&fwd[1], &bwd[q - 2], &secret[1]); } f(&fwd[0], &bwd[q - 1], &secret[0]); } #[inline] pub fn mix_step(data: &[u8; 16], secret: &[u8; 16], seed: u64) -> u64 { let data_words = to_u64s(data); let secret_words = to_u64s(secret); let mul_result = { let a = (data_words[0] ^ secret_words[0].wrapping_add(seed)).into_u128(); let b = (data_words[1] ^ secret_words[1].wrapping_sub(seed)).into_u128(); a.wrapping_mul(b) }; mul_result.lower_half() ^ mul_result.upper_half() } #[inline] pub fn to_u64s(bytes: &[u8; 16]) -> [u64; 2] { let (pair, _) = bytes.bp_as_chunks::<8>(); [pair[0], pair[1]].map(u64::from_le_bytes) } #[inline] #[cfg(feature = "xxhash3_128")] pub fn pairs_of_u64_bytes(bytes: &[u8]) -> &[[[u8; 16]; 2]] { let (u64_bytes, _) = bytes.bp_as_chunks::<16>(); let (pairs, _) = u64_bytes.bp_as_chunks::<2>(); pairs } #[inline] pub fn avalanche(mut x: u64) -> u64 { x ^= x >> 37; x = x.wrapping_mul(primes::PRIME_MX1); x ^= x >> 32; x } #[inline] pub fn avalanche_xxh64(mut x: u64) -> u64 { x ^= x >> 33; x = x.wrapping_mul(primes::PRIME64_2); x ^= x >> 29; x = x.wrapping_mul(primes::PRIME64_3); x ^= x >> 32; x } #[inline] pub fn stripes_with_tail(block: &[u8]) -> (&[[u8; 64]], &[u8]) { match block.bp_as_chunks() { ([stripes @ .., last], []) => (stripes, last), (stripes, last) => (stripes, last), } } /// THis exists just to easily map the XXH3 algorithm to Rust as the /// algorithm describes 128-bit results as a pair of high and low u64 /// values. #[derive(Copy, Clone)] pub(crate) struct X128 { pub low: u64, pub high: u64, } impl From for u128 { fn from(value: X128) -> Self { value.high.into_u128() << 64 | value.low.into_u128() } } impl crate::IntoU128 for X128 { fn into_u128(self) -> u128 { self.into() } } pub trait Halves { type Output; fn upper_half(self) -> Self::Output; fn lower_half(self) -> Self::Output; } impl Halves for u64 { type Output = u32; #[inline] fn upper_half(self) -> Self::Output { (self >> 32) as _ } #[inline] fn lower_half(self) -> Self::Output { self as _ } } impl Halves for u128 { type Output = u64; #[inline] fn upper_half(self) -> Self::Output { (self >> 64) as _ } #[inline] fn lower_half(self) -> Self::Output { self as _ } } pub trait U8SliceExt { fn first_u32(&self) -> Option; fn last_u32(&self) -> Option; fn first_u64(&self) -> Option; fn last_u64(&self) -> Option; } impl U8SliceExt for [u8] { #[inline] fn first_u32(&self) -> Option { self.first_chunk().copied().map(u32::from_le_bytes) } #[inline] fn last_u32(&self) -> Option { self.last_chunk().copied().map(u32::from_le_bytes) } #[inline] fn first_u64(&self) -> Option { self.first_chunk().copied().map(u64::from_le_bytes) } #[inline] fn last_u64(&self) -> Option { self.last_chunk().copied().map(u64::from_le_bytes) } } pub trait SliceBackport { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]); fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]); fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]); } impl SliceBackport for [T] { fn bp_as_chunks(&self) -> (&[[T; N]], &[T]) { assert_ne!(N, 0); let len = self.len() / N; // Safety: `(len / N) * N` has to be less-than-or-equal to `len` let (head, tail) = unsafe { self.split_at_unchecked(len * N) }; // Safety: (1) `head` points to valid data, (2) the alignment // of an array and the individual type are the same, (3) the // valid elements are less-than-or-equal to the original // slice. let head = unsafe { slice::from_raw_parts(head.as_ptr().cast(), len) }; (head, tail) } fn bp_as_chunks_mut(&mut self) -> (&mut [[T; N]], &mut [T]) { assert_ne!(N, 0); let len = self.len() / N; // Safety: `(len / N) * N` has to be less than or equal to `len` let (head, tail) = unsafe { self.split_at_mut_unchecked(len * N) }; // Safety: (1) `head` points to valid data, (2) the alignment // of an array and the individual type are the same, (3) the // valid elements are less-than-or-equal to the original // slice. let head = unsafe { slice::from_raw_parts_mut(head.as_mut_ptr().cast(), len) }; (head, tail) } fn bp_as_rchunks(&self) -> (&[T], &[[T; N]]) { assert_ne!(N, 0); let len = self.len() / N; // Safety: `(len / N) * N` has to be less than or equal to `len` let (head, tail) = unsafe { self.split_at_unchecked(self.len() - len * N) }; // Safety: (1) `tail` points to valid data, (2) the alignment // of an array and the individual type are the same, (3) the // valid elements are less-than-or-equal to the original // slice. let tail = unsafe { slice::from_raw_parts(tail.as_ptr().cast(), len) }; (head, tail) } } #[cfg(test)] pub mod test { use std::array; use super::*; macro_rules! bytes { ($($n: literal),* $(,)?) => { &[$(&crate::xxhash3::test::gen_bytes::<$n>() as &[u8],)*] as &[&[u8]] }; } pub(crate) use bytes; pub fn gen_bytes() -> [u8; N] { // Picking 251 as it's a prime number, which will hopefully // help avoid incidental power-of-two alignment. array::from_fn(|i| (i % 251) as u8) } #[test] fn default_secret_is_valid() { assert!(DEFAULT_SECRET.is_valid()) } #[test] fn backported_as_chunks() { let x = [1, 2, 3, 4, 5]; let (a, b) = x.bp_as_chunks::<1>(); assert_eq!(a, &[[1], [2], [3], [4], [5]]); assert_eq!(b, &[] as &[i32]); let (a, b) = x.bp_as_chunks::<2>(); assert_eq!(a, &[[1, 2], [3, 4]]); assert_eq!(b, &[5]); let (a, b) = x.bp_as_chunks::<3>(); assert_eq!(a, &[[1, 2, 3]]); assert_eq!(b, &[4, 5]); let (a, b) = x.bp_as_chunks::<4>(); assert_eq!(a, &[[1, 2, 3, 4]]); assert_eq!(b, &[5]); let (a, b) = x.bp_as_chunks::<5>(); assert_eq!(a, &[[1, 2, 3, 4, 5]]); assert_eq!(b, &[] as &[i32]); let (a, b) = x.bp_as_chunks::<6>(); assert_eq!(a, &[] as &[[i32; 6]]); assert_eq!(b, &[1, 2, 3, 4, 5]); } #[test] fn backported_as_rchunks() { let x = [1, 2, 3, 4, 5]; let (a, b) = x.bp_as_rchunks::<1>(); assert_eq!(a, &[] as &[i32]); assert_eq!(b, &[[1], [2], [3], [4], [5]]); let (a, b) = x.bp_as_rchunks::<2>(); assert_eq!(a, &[1]); assert_eq!(b, &[[2, 3], [4, 5]]); let (a, b) = x.bp_as_rchunks::<3>(); assert_eq!(a, &[1, 2]); assert_eq!(b, &[[3, 4, 5]]); let (a, b) = x.bp_as_rchunks::<4>(); assert_eq!(a, &[1]); assert_eq!(b, &[[2, 3, 4, 5]]); let (a, b) = x.bp_as_rchunks::<5>(); assert_eq!(a, &[] as &[i32]); assert_eq!(b, &[[1, 2, 3, 4, 5]]); let (a, b) = x.bp_as_rchunks::<6>(); assert_eq!(a, &[1, 2, 3, 4, 5]); assert_eq!(b, &[] as &[[i32; 6]]); } } twox-hash-2.1.2/src/xxhash32.rs000064400000000000000000000453561046102023000143720ustar 00000000000000//! The implementation of XXH32. use core::{ fmt, hash::{self, BuildHasher}, mem, }; use crate::{IntoU32, IntoU64}; // Keeping these constants in this form to match the C code. const PRIME32_1: u32 = 0x9E3779B1; const PRIME32_2: u32 = 0x85EBCA77; const PRIME32_3: u32 = 0xC2B2AE3D; const PRIME32_4: u32 = 0x27D4EB2F; const PRIME32_5: u32 = 0x165667B1; type Lane = u32; type Lanes = [Lane; 4]; type Bytes = [u8; 16]; const BYTES_IN_LANE: usize = mem::size_of::(); #[derive(Clone, PartialEq)] struct BufferData(Lanes); impl BufferData { const fn new() -> Self { Self([0; 4]) } const fn bytes(&self) -> &Bytes { const _: () = assert!(mem::align_of::() <= mem::align_of::()); // SAFETY[bytes]: The alignment of `u32` is at least that of // `u8` and all the values are initialized. unsafe { &*self.0.as_ptr().cast() } } fn bytes_mut(&mut self) -> &mut Bytes { // SAFETY: See SAFETY[bytes] unsafe { &mut *self.0.as_mut_ptr().cast() } } } impl fmt::Debug for BufferData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.0.iter()).finish() } } #[derive(Debug, Clone, PartialEq)] struct Buffer { offset: usize, data: BufferData, } impl Buffer { const fn new() -> Self { Self { offset: 0, data: BufferData::new(), } } // RATIONALE: See RATIONALE[inline] #[inline] fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { // Most of the slice methods we use here have `_unchecked` variants, but // // 1. this method is called one time per `Hasher::write` call // 2. this method early exits if we don't have anything in the buffer // // Because of this, removing the panics via `unsafe` doesn't // have much benefit other than reducing code size by a tiny // bit. if self.offset == 0 { return (None, data); }; let bytes = self.data.bytes_mut(); debug_assert!(self.offset <= bytes.len()); let empty = &mut bytes[self.offset..]; let n_to_copy = usize::min(empty.len(), data.len()); let dst = &mut empty[..n_to_copy]; let (src, rest) = data.split_at(n_to_copy); dst.copy_from_slice(src); self.offset += n_to_copy; debug_assert!(self.offset <= bytes.len()); if self.offset == bytes.len() { self.offset = 0; (Some(&self.data.0), rest) } else { (None, rest) } } // RATIONALE: See RATIONALE[inline] #[inline] fn set(&mut self, data: &[u8]) { if data.is_empty() { return; } debug_assert_eq!(self.offset, 0); let n_to_copy = data.len(); let bytes = self.data.bytes_mut(); debug_assert!(n_to_copy < bytes.len()); bytes[..n_to_copy].copy_from_slice(data); self.offset = data.len(); } // RATIONALE: See RATIONALE[inline] #[inline] fn remaining(&self) -> &[u8] { &self.data.bytes()[..self.offset] } } #[derive(Clone, PartialEq)] struct Accumulators(Lanes); impl Accumulators { const fn new(seed: u32) -> Self { Self([ seed.wrapping_add(PRIME32_1).wrapping_add(PRIME32_2), seed.wrapping_add(PRIME32_2), seed, seed.wrapping_sub(PRIME32_1), ]) } // RATIONALE: See RATIONALE[inline] #[inline] fn write(&mut self, lanes: Lanes) { let [acc1, acc2, acc3, acc4] = &mut self.0; let [lane1, lane2, lane3, lane4] = lanes; *acc1 = round(*acc1, lane1.to_le()); *acc2 = round(*acc2, lane2.to_le()); *acc3 = round(*acc3, lane3.to_le()); *acc4 = round(*acc4, lane4.to_le()); } // RATIONALE: See RATIONALE[inline] #[inline] fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { while let Some((chunk, rest)) = data.split_first_chunk::() { // SAFETY: We have the right number of bytes and are // handling the unaligned case. let lanes = unsafe { chunk.as_ptr().cast::().read_unaligned() }; self.write(lanes); data = rest; } data } // RATIONALE: See RATIONALE[inline] #[inline] const fn finish(&self) -> u32 { let [acc1, acc2, acc3, acc4] = self.0; let acc1 = acc1.rotate_left(1); let acc2 = acc2.rotate_left(7); let acc3 = acc3.rotate_left(12); let acc4 = acc4.rotate_left(18); acc1.wrapping_add(acc2) .wrapping_add(acc3) .wrapping_add(acc4) } } impl fmt::Debug for Accumulators { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let [acc1, acc2, acc3, acc4] = self.0; f.debug_struct("Accumulators") .field("acc1", &acc1) .field("acc2", &acc2) .field("acc3", &acc3) .field("acc4", &acc4) .finish() } } /// Calculates the 32-bit hash. /// /// ### Caution /// /// Although this struct implements [`hash::Hasher`][], it only calculates a /// 32-bit number, leaving the upper bits as 0. This means it is /// unlikely to be correct to use this in places like a [`HashMap`][std::collections::HashMap]. #[derive(Debug, Clone, PartialEq)] pub struct Hasher { seed: u32, accumulators: Accumulators, buffer: Buffer, length: u64, } impl Default for Hasher { fn default() -> Self { Self::with_seed(0) } } impl Hasher { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] // RATIONALE[inline]: Keeping parallel to the 64-bit // implementation, even though the performance gains for the // 32-bit version haven't been tested. #[inline] pub fn oneshot(seed: u32, data: &[u8]) -> u32 { let len = data.len(); // Since we know that there's no more data coming, we don't // need to construct the intermediate buffers or copy data to // or from the buffers. let mut accumulators = Accumulators::new(seed); let data = accumulators.write_many(data); Self::finish_with(seed, len.into_u64(), &accumulators, data) } /// Constructs the hasher with an initial seed. #[must_use] pub const fn with_seed(seed: u32) -> Self { // Step 1. Initialize internal accumulators Self { seed, accumulators: Accumulators::new(seed), buffer: Buffer::new(), length: 0, } } /// The seed this hasher was created with. pub const fn seed(&self) -> u32 { self.seed } /// The total number of bytes hashed. pub const fn total_len(&self) -> u64 { self.length } /// The total number of bytes hashed, truncated to 32 bits. /// /// For the full 64-bit byte count, use [`total_len`](Self::total_len). pub const fn total_len_32(&self) -> u32 { self.length as u32 } /// Returns the hash value for the values written so far. Unlike /// [`hash::Hasher::finish`][], this method returns the actual 32-bit /// value calculated, not a 64-bit value. #[must_use] // RATIONALE: See RATIONALE[inline] #[inline] pub fn finish_32(&self) -> u32 { Self::finish_with( self.seed, self.length, &self.accumulators, self.buffer.remaining(), ) } #[must_use] // RATIONALE: See RATIONALE[inline] #[inline] fn finish_with(seed: u32, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u32 { // Step 3. Accumulator convergence let mut acc = if len < BYTES_IN_LANE.into_u64() { seed.wrapping_add(PRIME32_5) } else { accumulators.finish() }; // Step 4. Add input length // // "Note that, if input length is so large that it requires // more than 32-bits, only the lower 32-bits are added to the // accumulator." acc += len as u32; // Step 5. Consume remaining input while let Some((chunk, rest)) = remaining.split_first_chunk() { let lane = u32::from_ne_bytes(*chunk).to_le(); acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_3)); acc = acc.rotate_left(17).wrapping_mul(PRIME32_4); remaining = rest; } for &byte in remaining { let lane = byte.into_u32(); acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_5)); acc = acc.rotate_left(11).wrapping_mul(PRIME32_1); } // Step 6. Final mix (avalanche) acc ^= acc >> 15; acc = acc.wrapping_mul(PRIME32_2); acc ^= acc >> 13; acc = acc.wrapping_mul(PRIME32_3); acc ^= acc >> 16; acc } } impl hash::Hasher for Hasher { // RATIONALE: See RATIONALE[inline] #[inline] fn write(&mut self, data: &[u8]) { let len = data.len(); // Step 2. Process stripes let (buffered_lanes, data) = self.buffer.extend(data); if let Some(&lanes) = buffered_lanes { self.accumulators.write(lanes); } let data = self.accumulators.write_many(data); self.buffer.set(data); self.length += len.into_u64(); } // RATIONALE: See RATIONALE[inline] #[inline] fn finish(&self) -> u64 { Hasher::finish_32(self).into() } } // RATIONALE: See RATIONALE[inline] #[inline] const fn round(mut acc: u32, lane: u32) -> u32 { acc = acc.wrapping_add(lane.wrapping_mul(PRIME32_2)); acc = acc.rotate_left(13); acc.wrapping_mul(PRIME32_1) } /// Constructs [`Hasher`][] for multiple hasher instances. See /// the [usage warning][Hasher#caution]. #[derive(Clone)] pub struct State(u32); impl State { /// Constructs the hasher with an initial seed. pub fn with_seed(seed: u32) -> Self { Self(seed) } } impl BuildHasher for State { type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { Hasher::with_seed(self.0) } } #[cfg(test)] mod test { use core::{ array, hash::{BuildHasherDefault, Hasher as _}, }; use std::collections::HashMap; use super::*; const _TRAITS: () = { const fn is_clone() {} is_clone::(); is_clone::(); }; const EMPTY_BYTES: [u8; 0] = []; #[test] fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes = [0; 32]; let mut byte_by_byte = Hasher::with_seed(0); for byte in bytes.chunks(1) { byte_by_byte.write(byte); } let byte_by_byte = byte_by_byte.finish(); let mut one_chunk = Hasher::with_seed(0); one_chunk.write(&bytes); let one_chunk = one_chunk.finish(); assert_eq!(byte_by_byte, one_chunk); } #[test] fn hash_of_nothing_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0x02cc_5d05); } #[test] fn hash_of_single_byte_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); hasher.write(&[42]); assert_eq!(hasher.finish(), 0xe0fe_705f); } #[test] fn hash_of_multiple_bytes_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); assert_eq!(hasher.finish(), 0x9e5e_7e93); } #[test] fn hash_of_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); let mut hasher = Hasher::with_seed(0); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x7f89_ba44); } #[test] fn hash_with_different_seed_matches_c_implementation() { let mut hasher = Hasher::with_seed(0x42c9_1977); hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0xd6bf_8459); } #[test] fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); let mut hasher = Hasher::with_seed(0x42c9_1977); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x6d2f_6c17); } #[test] fn hashes_with_different_offsets_are_the_same() { let bytes = [0x7c; 4096]; let expected = Hasher::oneshot(0, &[0x7c; 64]); let the_same = bytes .windows(64) .map(|w| { let mut hasher = Hasher::with_seed(0); hasher.write(w); hasher.finish_32() }) .all(|h| h == expected); assert!(the_same); } // This test validates wraparound/truncation behavior for very // large inputs of a 32-bit hash, but runs very slowly in the // normal "cargo test" build config since it hashes 4.3GB of // data. It runs reasonably quick under "cargo test --release". #[ignore] #[test] fn length_overflows_32bit() { // Hash 4.3 billion (4_300_000_000) bytes, which overflows a u32. let bytes200: [u8; 200] = array::from_fn(|i| i as _); let mut hasher = Hasher::with_seed(0); for _ in 0..(4_300_000_000 / bytes200.len()) { hasher.write(&bytes200); } assert_eq!(hasher.total_len(), 0x0000_0001_004c_cb00); assert_eq!(hasher.total_len_32(), 0x004c_cb00); // compared against the C implementation assert_eq!(hasher.finish(), 0x1522_4ca7); } #[test] fn can_be_used_in_a_hashmap_with_a_default_seed() { let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } } #[cfg(feature = "random")] #[cfg_attr(docsrs, doc(cfg(feature = "random")))] mod random_impl { use super::*; /// Constructs a randomized seed and reuses it for multiple hasher /// instances. See the [usage warning][Hasher#caution]. #[derive(Clone)] pub struct RandomState(State); impl Default for RandomState { fn default() -> Self { Self::new() } } impl RandomState { fn new() -> Self { Self(State::with_seed(rand::random())) } } impl BuildHasher for RandomState { type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { self.0.build_hasher() } } #[cfg(test)] mod test { use std::collections::HashMap; use super::*; const _: () = { const fn is_clone() {} is_clone::(); is_clone::(); }; #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomState> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } } } #[cfg(feature = "random")] #[cfg_attr(docsrs, doc(cfg(feature = "random")))] pub use random_impl::*; #[cfg(feature = "serialize")] #[cfg_attr(docsrs, doc(cfg(feature = "serialize")))] mod serialize_impl { use serde::{Deserialize, Serialize}; use super::*; impl<'de> Deserialize<'de> for Hasher { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let shim = Deserialize::deserialize(deserializer)?; let Shim { total_len, seed, core, buffer, buffer_usage, } = shim; let Core { v1, v2, v3, v4 } = core; let mut buffer_data = BufferData::new(); buffer_data.bytes_mut().copy_from_slice(&buffer); Ok(Hasher { seed, accumulators: Accumulators([v1, v2, v3, v4]), buffer: Buffer { offset: buffer_usage, data: buffer_data, }, length: total_len, }) } } impl Serialize for Hasher { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let Hasher { seed, ref accumulators, ref buffer, length, } = *self; let [v1, v2, v3, v4] = accumulators.0; let Buffer { offset, ref data } = *buffer; let buffer = *data.bytes(); let shim = Shim { total_len: length, seed, core: Core { v1, v2, v3, v4 }, buffer, buffer_usage: offset, }; shim.serialize(serializer) } } #[derive(Serialize, Deserialize)] struct Shim { total_len: u64, seed: u32, core: Core, buffer: [u8; 16], buffer_usage: usize, } #[derive(Serialize, Deserialize)] struct Core { v1: u32, v2: u32, v3: u32, v4: u32, } #[cfg(test)] mod test { use std::hash::Hasher as _; use super::*; type Result = core::result::Result; #[test] fn test_serialization_cycle() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); let _ = hasher.finish(); let serialized = serde_json::to_string(&hasher)?; let unserialized: Hasher = serde_json::from_str(&serialized)?; assert_eq!(hasher, unserialized); Ok(()) } #[test] fn test_serialization_stability() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); let _ = hasher.finish(); let expected_serialized = r#"{ "total_len": 14, "seed": 0, "core": { "v1": 606290984, "v2": 2246822519, "v3": 0, "v4": 1640531535 }, "buffer": [ 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33, 0, 0, 0 ], "buffer_usage": 14 }"#; let unserialized: Hasher = serde_json::from_str(expected_serialized)?; assert_eq!(hasher, unserialized); let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?; let actual_value = serde_json::to_value(&hasher)?; assert_eq!(expected_value, actual_value); Ok(()) } } } twox-hash-2.1.2/src/xxhash3_128.rs000064400000000000000000000462311046102023000146730ustar 00000000000000//! The implementation of XXH3_128. #![deny( clippy::missing_safety_doc, clippy::undocumented_unsafe_blocks, unsafe_op_in_unsafe_fn )] use crate::{ xxhash3::{primes::*, *}, IntoU128 as _, IntoU64 as _, }; pub use crate::xxhash3::{ FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError, SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH, }; /// Calculates the 128-bit hash. /// /// This type does not implement [`std::hash::Hasher`] as that trait /// requires a 64-bit result while this computes a 128-bit result. #[derive(Clone)] pub struct Hasher { #[cfg(feature = "alloc")] inner: AllocRawHasher, _private: (), } impl Hasher { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] #[inline] pub fn oneshot(input: &[u8]) -> u128 { impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input) } /// Hash all data at once using the provided seed and a secret /// derived from the seed. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] #[inline] pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u128 { let mut secret = DEFAULT_SECRET_RAW; // We know that the secret will only be used if we have more // than 240 bytes, so don't waste time computing it otherwise. if input.len() > CUTOFF { derive_secret(seed, &mut secret); } let secret = Secret::new(&secret).expect("The default secret length is invalid"); impl_oneshot(secret, seed, input) } /// Hash all data at once using the provided secret and the /// default seed. If you can use this function, you may see /// noticable speed gains for certain types of input. #[inline] pub fn oneshot_with_secret( secret: &[u8], input: &[u8], ) -> Result { let secret = Secret::new(secret).map_err(OneshotWithSecretError)?; Ok(impl_oneshot(secret, DEFAULT_SEED, input)) } /// Hash all data at once using the provided seed and secret. If /// you can use this function, you may see noticable speed gains /// for certain types of input. #[inline] pub fn oneshot_with_seed_and_secret( seed: u64, secret: &[u8], input: &[u8], ) -> Result { let secret = if input.len() > CUTOFF { Secret::new(secret).map_err(OneshotWithSecretError)? } else { DEFAULT_SECRET }; Ok(impl_oneshot(secret, seed, input)) } } #[cfg(feature = "alloc")] #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] mod with_alloc { use ::alloc::boxed::Box; use super::*; impl Hasher { /// Constructs the hasher using the default seed and secret values. pub fn new() -> Self { Self { inner: RawHasherCore::allocate_default(), _private: (), } } /// Constructs the hasher using the provided seed and a secret /// derived from the seed. pub fn with_seed(seed: u64) -> Self { Self { inner: RawHasherCore::allocate_with_seed(seed), _private: (), } } /// Constructs the hasher using the provided seed and secret. pub fn with_seed_and_secret( seed: u64, secret: impl Into>, ) -> Result>> { Ok(Self { inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?, _private: (), }) } /// Returns the secret. pub fn into_secret(self) -> Box<[u8]> { self.inner.into_secret() } /// Writes some data into this `Hasher`. #[inline] pub fn write(&mut self, input: &[u8]) { self.inner.write(input); } /// Returns the hash value for the values written so /// far. Unlike [`std::hash::Hasher::finish`][], this method /// returns the complete 128-bit value calculated, not a /// 64-bit value. #[inline] pub fn finish_128(&self) -> u128 { self.inner.finish(Finalize128) } } impl Default for Hasher { fn default() -> Self { Self::new() } } } #[derive(Clone)] /// A lower-level interface for computing a hash from streaming data. /// /// The algorithm requires a secret which can be a reasonably large /// piece of data. [`Hasher`][] makes one concrete implementation /// decision that uses dynamic memory allocation, but specialized /// usages may desire more flexibility. This type, combined with /// [`SecretBuffer`][], offer that flexibility at the cost of a /// generic type. pub struct RawHasher(RawHasherCore); impl RawHasher { /// Construct the hasher with the provided seed, secret, and /// temporary buffer. pub fn new(secret_buffer: SecretBuffer) -> Self { Self(RawHasherCore::new(secret_buffer)) } /// Returns the secret. pub fn into_secret(self) -> S { self.0.into_secret() } } impl RawHasher where S: FixedBuffer, { /// Writes some data into this `Hasher`. #[inline] pub fn write(&mut self, input: &[u8]) { self.0.write(input); } /// Returns the hash value for the values written so /// far. Unlike [`std::hash::Hasher::finish`][], this method /// returns the complete 128-bit value calculated, not a /// 64-bit value. #[inline] pub fn finish_128(&self) -> u128 { self.0.finish(Finalize128) } } struct Finalize128; impl Finalize for Finalize128 { type Output = u128; #[inline] fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output { impl_oneshot(secret, seed, input) } #[inline] fn large( &self, vector: impl Vector, acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], secret: &Secret, len: usize, ) -> Self::Output { Algorithm(vector).finalize_128(acc, last_block, last_stripe, secret, len) } } #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u128 { match input.len() { 241.. => impl_241_plus_bytes(secret, input), 129..=240 => impl_129_to_240_bytes(secret, seed, input), 17..=128 => impl_17_to_128_bytes(secret, seed, input), 9..=16 => impl_9_to_16_bytes(secret, seed, input), 4..=8 => impl_4_to_8_bytes(secret, seed, input), 1..=3 => impl_1_to_3_bytes(secret, seed, input), 0 => impl_0_bytes(secret, seed), } } #[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u128 { let secret_words = secret.for_128().words_for_0(); let low = avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]); let high = avalanche_xxh64(seed ^ secret_words[2] ^ secret_words[3]); X128 { low, high }.into() } #[inline(always)] fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { assert_input_range!(1..=3, input.len()); let combined = impl_1_to_3_bytes_combined(input); let secret_words = secret.for_128().words_for_1_to_3(); let low = { let secret = (secret_words[0] ^ secret_words[1]).into_u64(); secret.wrapping_add(seed) ^ combined.into_u64() }; let high = { let secret = (secret_words[2] ^ secret_words[3]).into_u64(); secret.wrapping_sub(seed) ^ combined.swap_bytes().rotate_left(13).into_u64() }; let low = avalanche_xxh64(low); let high = avalanche_xxh64(high); X128 { low, high }.into() } #[inline(always)] fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { assert_input_range!(4..=8, input.len()); let input_first = input.first_u32().unwrap(); let input_last = input.last_u32().unwrap(); let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); let secret_words = secret.for_128().words_for_4_to_8(); let combined = input_first.into_u64() | (input_last.into_u64() << 32); let lhs = { let a = secret_words[0] ^ secret_words[1]; let b = a.wrapping_add(modified_seed); b ^ combined }; let rhs = PRIME64_1.wrapping_add(input.len().into_u64() << 2); let mul_result = lhs.into_u128().wrapping_mul(rhs.into_u128()); let mut high = mul_result.upper_half(); let mut low = mul_result.lower_half(); high = high.wrapping_add(low << 1); low ^= high >> 3; low ^= low >> 35; low = low.wrapping_mul(PRIME_MX2); low ^= low >> 28; high = avalanche(high); X128 { low, high }.into() } #[inline(always)] fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { assert_input_range!(9..=16, input.len()); let input_first = input.first_u64().unwrap(); let input_last = input.last_u64().unwrap(); let secret_words = secret.for_128().words_for_9_to_16(); let val1 = ((secret_words[0] ^ secret_words[1]).wrapping_sub(seed)) ^ input_first ^ input_last; let val2 = ((secret_words[2] ^ secret_words[3]).wrapping_add(seed)) ^ input_last; let mul_result = val1.into_u128().wrapping_mul(PRIME64_1.into_u128()); let low = mul_result .lower_half() .wrapping_add((input.len() - 1).into_u64() << 54); // Algorithm describes this in two ways let high = mul_result .upper_half() .wrapping_add(val2.upper_half().into_u64() << 32) .wrapping_add(val2.lower_half().into_u64().wrapping_mul(PRIME32_2)); let low = low ^ high.swap_bytes(); // Algorithm describes this multiplication in two ways. let q = X128 { low, high } .into_u128() .wrapping_mul(PRIME64_2.into_u128()); let low = avalanche(q.lower_half()); let high = avalanche(q.upper_half()); X128 { low, high }.into() } #[inline] fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { assert_input_range!(17..=128, input.len()); let input_len = input.len().into_u64(); let mut acc = [input_len.wrapping_mul(PRIME64_1), 0]; impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| { mix_two_chunks(&mut acc, fwd, bwd, secret, seed); }); finalize_medium(acc, input_len, seed) } #[inline] fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u128 { assert_input_range!(129..=240, input.len()); let input_len = input.len().into_u64(); let mut acc = [input_len.wrapping_mul(PRIME64_1), 0]; let head = pairs_of_u64_bytes(input); let mut head = head.iter(); let ss = secret.for_128().words_for_127_to_240_part1(); for (input, secret) in head.by_ref().zip(ss).take(4) { mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed); } let mut acc = acc.map(avalanche); let ss = secret.for_128().words_for_127_to_240_part2(); for (input, secret) in head.zip(ss) { mix_two_chunks(&mut acc, &input[0], &input[1], secret, seed); } let (_, tail) = input.bp_as_rchunks::<16>(); let (_, tail) = tail.bp_as_rchunks::<2>(); let tail = tail.last().unwrap(); let ss = secret.for_128().words_for_127_to_240_part3(); // note that the half-chunk order and the seed is different here mix_two_chunks(&mut acc, &tail[1], &tail[0], ss, seed.wrapping_neg()); finalize_medium(acc, input_len, seed) } #[inline] fn mix_two_chunks( acc: &mut [u64; 2], data1: &[u8; 16], data2: &[u8; 16], secret: &[[u8; 16]; 2], seed: u64, ) { let data_words1 = to_u64s(data1); let data_words2 = to_u64s(data2); acc[0] = acc[0].wrapping_add(mix_step(data1, &secret[0], seed)); acc[1] = acc[1].wrapping_add(mix_step(data2, &secret[1], seed)); acc[0] ^= data_words2[0].wrapping_add(data_words2[1]); acc[1] ^= data_words1[0].wrapping_add(data_words1[1]); } #[inline] fn finalize_medium(acc: [u64; 2], input_len: u64, seed: u64) -> u128 { let low = acc[0].wrapping_add(acc[1]); let high = acc[0] .wrapping_mul(PRIME64_1) .wrapping_add(acc[1].wrapping_mul(PRIME64_4)) .wrapping_add((input_len.wrapping_sub(seed)).wrapping_mul(PRIME64_2)); let low = avalanche(low); let high = avalanche(high).wrapping_neg(); X128 { low, high }.into() } #[inline] fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u128 { assert_input_range!(241.., input.len()); dispatch! { fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u128 [] } } #[inline] fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u128 { Algorithm(vector).oneshot(secret, input, Finalize128) } #[cfg(test)] mod test { use crate::xxhash3::test::bytes; use super::*; const _: () = { const fn is_clone() {} is_clone::(); }; const EMPTY_BYTES: [u8; 0] = []; fn hash_byte_by_byte(input: &[u8]) -> u128 { let mut hasher = Hasher::new(); for byte in input.chunks(1) { hasher.write(byte) } hasher.finish_128() } #[test] fn oneshot_empty() { let hash = Hasher::oneshot(&EMPTY_BYTES); assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f); } #[test] fn streaming_empty() { let hash = hash_byte_by_byte(&EMPTY_BYTES); assert_eq!(hash, 0x99aa_06d3_0147_98d8_6001_c324_468d_497f); } #[test] fn oneshot_1_to_3_bytes() { test_1_to_3_bytes(Hasher::oneshot) } #[test] fn streaming_1_to_3_bytes() { test_1_to_3_bytes(hash_byte_by_byte) } #[track_caller] fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![1, 2, 3]; let expected = [ 0xa6cd_5e93_9200_0f6a_c44b_dff4_074e_ecdb, 0x6a4a_5274_c1b0_d3ad_d664_5fc3_051a_9457, 0xe3b5_5f57_945a_17cf_5f42_99fc_161c_9cbb, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_4_to_8_bytes() { test_4_to_8_bytes(Hasher::oneshot) } #[test] fn streaming_4_to_8_bytes() { test_4_to_8_bytes(hash_byte_by_byte) } #[track_caller] fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![4, 5, 6, 7, 8]; let expected = [ 0xeb70_bf5f_c779_e9e6_a611_1d53_e80a_3db5, 0x9434_5321_06a7_c141_c920_d234_7a85_929b, 0x545f_093d_32b1_68fe_a6b5_2f4d_ea38_96a3, 0x61ce_291b_c3a4_357d_dbb2_0782_1e6d_5efe, 0xe1e4_432a_6221_7fe4_cfd5_0c61_c8bb_98c1, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_9_to_16_bytes() { test_9_to_16_bytes(Hasher::oneshot) } #[test] fn streaming_9_to_16_bytes() { test_9_to_16_bytes(hash_byte_by_byte) } #[track_caller] fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16]; let expected = [ 0x16c7_69d8_3e4a_ebce_9079_3197_9dca_3746, 0xbd93_0669_a87b_4b37_e67b_f1ad_8dcf_73a8, 0xacad_8071_8f47_d494_7d67_cfc1_730f_22a3, 0x38f9_2247_a7f7_3cc5_7780_eb31_198f_13ca, 0xae92_e123_e947_2408_bd79_5526_1902_66c0, 0x5f91_e6bf_7418_cfaa_55d6_5715_e2a5_7c31, 0x301a_9f75_4e8f_569a_0017_ea4b_e19b_c787, 0x7295_0631_8276_07e2_8428_12cc_870d_cae2, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_17_to_128_bytes() { test_17_to_128_bytes(Hasher::oneshot) } #[test] fn streaming_17_to_128_bytes() { test_17_to_128_bytes(hash_byte_by_byte) } #[track_caller] fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u128) { let lower_boundary = bytes![17, 18, 19]; let chunk_boundary = bytes![31, 32, 33]; let upper_boundary = bytes![126, 127, 128]; let inputs = lower_boundary .iter() .chain(chunk_boundary) .chain(upper_boundary); let expected = [ // lower_boundary 0x685b_c458_b37d_057f_c06e_233d_f772_9217, 0x87ce_996b_b557_6d8d_e3a3_c96b_b0af_2c23, 0x7619_bcef_2e31_1cd8_c47d_dc58_8737_93df, // chunk_boundary 0x4ed3_946d_393b_687b_b54d_e399_3874_ed20, 0x25e7_c9b3_424c_eed2_457d_9566_b6fc_d697, 0x0217_5c3a_abb0_0637_e08d_8495_1339_de86, // upper_boundary 0x0abc_2062_87ce_2afe_5181_0be2_9323_2106, 0xd5ad_d870_c9c9_e00f_060c_2e3d_df0f_2fb9, 0x1479_2fc3_af88_dc6c_0532_1a0b_64d6_7b41, ]; for (input, expected) in inputs.zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_129_to_240_bytes() { test_129_to_240_bytes(Hasher::oneshot) } #[test] fn streaming_129_to_240_bytes() { test_129_to_240_bytes(hash_byte_by_byte) } #[track_caller] fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u128) { let lower_boundary = bytes![129, 130, 131]; let upper_boundary = bytes![238, 239, 240]; let inputs = lower_boundary.iter().chain(upper_boundary); let expected = [ // lower_boundary 0xdd5e_74ac_6b45_f54e_bc30_b633_82b0_9a3b, 0x6cd2_e56a_10f1_e707_3ec5_f135_d0a7_d28f, 0x6da7_92f1_702d_4494_5609_cfc7_9dba_18fd, // upper_boundary 0x73a9_e8f7_bd32_83c8_2a9b_ddd0_e5c4_014c, 0x9843_ab31_a06b_e0df_fe21_3746_28fc_c539, 0x65b5_be86_da55_40e7_c92b_68e1_6f83_bbb6, ]; for (input, expected) in inputs.zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_241_plus_bytes() { test_241_plus_bytes(Hasher::oneshot) } #[test] fn streaming_241_plus_bytes() { test_241_plus_bytes(hash_byte_by_byte) } #[track_caller] fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u128) { let inputs = bytes![241, 242, 243, 244, 1024, 10240]; let expected = [ 0x1da1_cb61_bcb8_a2a1_02e8_cd95_421c_6d02, 0x1623_84cb_44d1_d806_ddcb_33c4_9405_1832, 0xbd2e_9fcf_378c_35e9_8835_f952_9193_e3dc, 0x3ff4_93d7_a813_7ab6_bc17_c91e_c3cf_8d7f, 0xd0ac_1f7b_93bf_57b9_e5d7_8baf_a45b_2aa5, 0x4f63_75cc_a7ec_e1e1_bcd6_3266_df6e_2244, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); eprintln!("{hash:032x}\n{expected:032x}"); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } } twox-hash-2.1.2/src/xxhash3_64.rs000064400000000000000000000414461046102023000146150ustar 00000000000000//! The implementation of XXH3_64. #![deny( clippy::missing_safety_doc, clippy::undocumented_unsafe_blocks, unsafe_op_in_unsafe_fn )] use core::hash; use crate::{ xxhash3::{primes::*, *}, IntoU128 as _, IntoU64 as _, }; pub use crate::xxhash3::{ FixedBuffer, FixedMutBuffer, OneshotWithSecretError, SecretBuffer, SecretTooShortError, SecretWithSeedError, DEFAULT_SECRET_LENGTH, SECRET_MINIMUM_LENGTH, }; /// Calculates the 64-bit hash. #[derive(Clone)] pub struct Hasher { #[cfg(feature = "alloc")] inner: AllocRawHasher, _private: (), } impl Hasher { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] #[inline] pub fn oneshot(input: &[u8]) -> u64 { impl_oneshot(DEFAULT_SECRET, DEFAULT_SEED, input) } /// Hash all data at once using the provided seed and a secret /// derived from the seed. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] #[inline] pub fn oneshot_with_seed(seed: u64, input: &[u8]) -> u64 { let mut secret = DEFAULT_SECRET_RAW; // We know that the secret will only be used if we have more // than 240 bytes, so don't waste time computing it otherwise. if input.len() > CUTOFF { derive_secret(seed, &mut secret); } let secret = Secret::new(&secret).expect("The default secret length is invalid"); impl_oneshot(secret, seed, input) } /// Hash all data at once using the provided secret and the /// default seed. If you can use this function, you may see /// noticable speed gains for certain types of input. #[inline] pub fn oneshot_with_secret(secret: &[u8], input: &[u8]) -> Result { let secret = Secret::new(secret).map_err(OneshotWithSecretError)?; Ok(impl_oneshot(secret, DEFAULT_SEED, input)) } /// Hash all data at once using the provided seed and secret. If /// you can use this function, you may see noticable speed gains /// for certain types of input. #[inline] pub fn oneshot_with_seed_and_secret( seed: u64, secret: &[u8], input: &[u8], ) -> Result { let secret = if input.len() > CUTOFF { Secret::new(secret).map_err(OneshotWithSecretError)? } else { DEFAULT_SECRET }; Ok(impl_oneshot(secret, seed, input)) } } #[cfg(feature = "alloc")] #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] mod with_alloc { use ::alloc::boxed::Box; use super::*; impl Hasher { /// Constructs the hasher using the default seed and secret values. pub fn new() -> Self { Self { inner: RawHasherCore::allocate_default(), _private: (), } } /// Constructs the hasher using the provided seed and a secret /// derived from the seed. pub fn with_seed(seed: u64) -> Self { Self { inner: RawHasherCore::allocate_with_seed(seed), _private: (), } } /// Constructs the hasher using the provided seed and secret. pub fn with_seed_and_secret( seed: u64, secret: impl Into>, ) -> Result>> { Ok(Self { inner: RawHasherCore::allocate_with_seed_and_secret(seed, secret)?, _private: (), }) } /// Returns the secret. pub fn into_secret(self) -> Box<[u8]> { self.inner.into_secret() } } impl Default for Hasher { fn default() -> Self { Self::new() } } impl hash::Hasher for Hasher { #[inline] fn write(&mut self, input: &[u8]) { self.inner.write(input) } #[inline] fn finish(&self) -> u64 { self.inner.finish(Finalize64) } } } #[derive(Clone)] /// A lower-level interface for computing a hash from streaming data. /// /// The algorithm requires a secret which can be a reasonably large /// piece of data. [`Hasher`][] makes one concrete implementation /// decision that uses dynamic memory allocation, but specialized /// usages may desire more flexibility. This type, combined with /// [`SecretBuffer`][], offer that flexibility at the cost of a /// generic type. pub struct RawHasher(RawHasherCore); impl RawHasher { /// Construct the hasher with the provided seed, secret, and /// temporary buffer. pub fn new(secret_buffer: SecretBuffer) -> Self { Self(RawHasherCore::new(secret_buffer)) } /// Returns the secret. pub fn into_secret(self) -> S { self.0.into_secret() } } impl hash::Hasher for RawHasher where S: FixedBuffer, { #[inline] fn write(&mut self, input: &[u8]) { self.0.write(input); } #[inline] fn finish(&self) -> u64 { self.0.finish(Finalize64) } } struct Finalize64; impl Finalize for Finalize64 { type Output = u64; #[inline(always)] fn small(&self, secret: &Secret, seed: u64, input: &[u8]) -> Self::Output { impl_oneshot(secret, seed, input) } #[inline(always)] fn large( &self, vector: impl Vector, acc: [u64; 8], last_block: &[u8], last_stripe: &[u8; 64], secret: &Secret, len: usize, ) -> Self::Output { Algorithm(vector).finalize_64(acc, last_block, last_stripe, secret, len) } } #[inline(always)] fn impl_oneshot(secret: &Secret, seed: u64, input: &[u8]) -> u64 { match input.len() { 241.. => impl_241_plus_bytes(secret, input), 129..=240 => impl_129_to_240_bytes(secret, seed, input), 17..=128 => impl_17_to_128_bytes(secret, seed, input), 9..=16 => impl_9_to_16_bytes(secret, seed, input), 4..=8 => impl_4_to_8_bytes(secret, seed, input), 1..=3 => impl_1_to_3_bytes(secret, seed, input), 0 => impl_0_bytes(secret, seed), } } #[inline(always)] fn impl_0_bytes(secret: &Secret, seed: u64) -> u64 { let secret_words = secret.for_64().words_for_0(); avalanche_xxh64(seed ^ secret_words[0] ^ secret_words[1]) } #[inline(always)] fn impl_1_to_3_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(1..=3, input.len()); let combined = impl_1_to_3_bytes_combined(input); let secret_words = secret.for_64().words_for_1_to_3(); let value = { let secret = (secret_words[0] ^ secret_words[1]).into_u64(); secret.wrapping_add(seed) ^ combined.into_u64() }; // FUTURE: TEST: "Note that the XXH3-64 result is the lower half of XXH3-128 result." avalanche_xxh64(value) } #[inline(always)] fn impl_4_to_8_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(4..=8, input.len()); let input_first = input.first_u32().unwrap(); let input_last = input.last_u32().unwrap(); let modified_seed = seed ^ (seed.lower_half().swap_bytes().into_u64() << 32); let secret_words = secret.for_64().words_for_4_to_8(); let combined = input_last.into_u64() | (input_first.into_u64() << 32); let mut value = { let a = secret_words[0] ^ secret_words[1]; let b = a.wrapping_sub(modified_seed); b ^ combined }; value ^= value.rotate_left(49) ^ value.rotate_left(24); value = value.wrapping_mul(PRIME_MX2); value ^= (value >> 35).wrapping_add(input.len().into_u64()); value = value.wrapping_mul(PRIME_MX2); value ^= value >> 28; value } #[inline(always)] fn impl_9_to_16_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(9..=16, input.len()); let input_first = input.first_u64().unwrap(); let input_last = input.last_u64().unwrap(); let secret_words = secret.for_64().words_for_9_to_16(); let low = ((secret_words[0] ^ secret_words[1]).wrapping_add(seed)) ^ input_first; let high = ((secret_words[2] ^ secret_words[3]).wrapping_sub(seed)) ^ input_last; let mul_result = low.into_u128().wrapping_mul(high.into_u128()); let value = input .len() .into_u64() .wrapping_add(low.swap_bytes()) .wrapping_add(high) .wrapping_add(mul_result.lower_half() ^ mul_result.upper_half()); avalanche(value) } #[inline] fn impl_17_to_128_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(17..=128, input.len()); let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); impl_17_to_128_bytes_iter(secret, input, |fwd, bwd, secret| { acc = acc.wrapping_add(mix_step(fwd, &secret[0], seed)); acc = acc.wrapping_add(mix_step(bwd, &secret[1], seed)); }); avalanche(acc) } #[inline] fn impl_129_to_240_bytes(secret: &Secret, seed: u64, input: &[u8]) -> u64 { assert_input_range!(129..=240, input.len()); let mut acc = input.len().into_u64().wrapping_mul(PRIME64_1); let (head, _) = input.bp_as_chunks(); let mut head = head.iter(); let ss = secret.for_64().words_for_127_to_240_part1(); for (chunk, secret) in head.by_ref().zip(ss).take(8) { acc = acc.wrapping_add(mix_step(chunk, secret, seed)); } acc = avalanche(acc); let ss = secret.for_64().words_for_127_to_240_part2(); for (chunk, secret) in head.zip(ss) { acc = acc.wrapping_add(mix_step(chunk, secret, seed)); } let last_chunk = input.last_chunk().unwrap(); let ss = secret.for_64().words_for_127_to_240_part3(); acc = acc.wrapping_add(mix_step(last_chunk, ss, seed)); avalanche(acc) } #[inline] fn impl_241_plus_bytes(secret: &Secret, input: &[u8]) -> u64 { assert_input_range!(241.., input.len()); dispatch! { fn oneshot_impl<>(secret: &Secret, input: &[u8]) -> u64 [] } } #[inline] fn oneshot_impl(vector: impl Vector, secret: &Secret, input: &[u8]) -> u64 { Algorithm(vector).oneshot(secret, input, Finalize64) } #[cfg(test)] mod test { use std::hash::Hasher as _; use crate::xxhash3::test::bytes; use super::*; const _: () = { const fn is_clone() {} is_clone::(); }; const EMPTY_BYTES: [u8; 0] = []; fn hash_byte_by_byte(input: &[u8]) -> u64 { let mut hasher = Hasher::new(); for byte in input.chunks(1) { hasher.write(byte) } hasher.finish() } fn hash_byte_by_byte_with_seed(seed: u64, input: &[u8]) -> u64 { let mut hasher = Hasher::with_seed(seed); for byte in input.chunks(1) { hasher.write(byte) } hasher.finish() } #[test] fn oneshot_empty() { let hash = Hasher::oneshot(&EMPTY_BYTES); assert_eq!(hash, 0x2d06_8005_38d3_94c2); } #[test] fn streaming_empty() { let hash = hash_byte_by_byte(&EMPTY_BYTES); assert_eq!(hash, 0x2d06_8005_38d3_94c2); } #[test] fn oneshot_1_to_3_bytes() { test_1_to_3_bytes(Hasher::oneshot) } #[test] fn streaming_1_to_3_bytes() { test_1_to_3_bytes(hash_byte_by_byte) } #[track_caller] fn test_1_to_3_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![1, 2, 3]; let expected = [ 0xc44b_dff4_074e_ecdb, 0xd664_5fc3_051a_9457, 0x5f42_99fc_161c_9cbb, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_4_to_8_bytes() { test_4_to_8_bytes(Hasher::oneshot) } #[test] fn streaming_4_to_8_bytes() { test_4_to_8_bytes(hash_byte_by_byte) } #[track_caller] fn test_4_to_8_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![4, 5, 6, 7, 8]; let expected = [ 0x60da_b036_a582_11f2, 0xb075_753a_84ca_0fbe, 0xa658_4d1d_9a6a_e704, 0x0cd2_084a_6240_6b69, 0x3a1c_2d7c_85af_88f8, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_9_to_16_bytes() { test_9_to_16_bytes(Hasher::oneshot) } #[test] fn streaming_9_to_16_bytes() { test_9_to_16_bytes(hash_byte_by_byte) } #[track_caller] fn test_9_to_16_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![9, 10, 11, 12, 13, 14, 15, 16]; let expected = [ 0xe961_2598_145b_b9dc, 0xab69_a08e_f83d_8f77, 0x1cf3_96aa_4de6_198d, 0x5ace_6a51_1c10_894b, 0xb7a5_d8a8_309a_2cb9, 0x4cf4_5c94_4a9a_2237, 0x55ec_edc2_b87b_b042, 0x8355_e3a6_f617_70db, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_17_to_128_bytes() { test_17_to_128_bytes(Hasher::oneshot) } #[test] fn streaming_17_to_128_bytes() { test_17_to_128_bytes(hash_byte_by_byte) } #[track_caller] fn test_17_to_128_bytes(mut f: impl FnMut(&[u8]) -> u64) { let lower_boundary = bytes![17, 18, 19]; let chunk_boundary = bytes![31, 32, 33]; let upper_boundary = bytes![126, 127, 128]; let inputs = lower_boundary .iter() .chain(chunk_boundary) .chain(upper_boundary); let expected = [ // lower_boundary 0x9ef3_41a9_9de3_7328, 0xf691_2490_d4c0_eed5, 0x60e7_2614_3cf5_0312, // chunk_boundary 0x4f36_db8e_4df3_78fd, 0x3523_581f_e96e_4c05, 0xe68c_56ba_8899_1e58, // upper_boundary 0x6c2a_9eb7_459c_dc61, 0x120b_9787_f842_5f2f, 0x85c6_174c_7ff4_c46b, ]; for (input, expected) in inputs.zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_129_to_240_bytes() { test_129_to_240_bytes(Hasher::oneshot) } #[test] fn streaming_129_to_240_bytes() { test_129_to_240_bytes(hash_byte_by_byte) } #[track_caller] fn test_129_to_240_bytes(mut f: impl FnMut(&[u8]) -> u64) { let lower_boundary = bytes![129, 130, 131]; let upper_boundary = bytes![238, 239, 240]; let inputs = lower_boundary.iter().chain(upper_boundary); let expected = [ // lower_boundary 0xec76_42b4_31ba_3e5a, 0x4d32_24b1_0090_8a87, 0xe57f_7ea6_741f_e3a0, // upper_boundary 0x3044_9a0b_4899_dee9, 0x972b_14e3_c46f_214b, 0x375a_384d_957f_e865, ]; for (input, expected) in inputs.zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_241_plus_bytes() { test_241_plus_bytes(Hasher::oneshot) } #[test] fn streaming_241_plus_bytes() { test_241_plus_bytes(hash_byte_by_byte) } #[track_caller] fn test_241_plus_bytes(mut f: impl FnMut(&[u8]) -> u64) { let inputs = bytes![241, 242, 243, 244, 1024, 10240]; let expected = [ 0x02e8_cd95_421c_6d02, 0xddcb_33c4_9405_1832, 0x8835_f952_9193_e3dc, 0xbc17_c91e_c3cf_8d7f, 0xe5d7_8baf_a45b_2aa5, 0xbcd6_3266_df6e_2244, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } #[test] fn oneshot_with_seed() { test_with_seed(Hasher::oneshot_with_seed) } #[test] fn streaming_with_seed() { test_with_seed(hash_byte_by_byte_with_seed) } #[track_caller] fn test_with_seed(mut f: impl FnMut(u64, &[u8]) -> u64) { let inputs = bytes![0, 1, 4, 9, 17, 129, 241, 1024]; let expected = [ 0x4aed_e683_89c0_e311, 0x78fc_079a_75aa_f3c0, 0x1b73_06b8_9f25_4507, 0x7df7_627f_d1f9_39b6, 0x49ca_0fff_0950_1622, 0x2bfd_caec_30ff_3000, 0xf984_56bc_25be_0901, 0x2483_9f0f_cdf4_d078, ]; for (input, expected) in inputs.iter().zip(expected) { let hash = f(0xdead_cafe, input); assert_eq!(hash, expected, "input was {} bytes", input.len()); } } } twox-hash-2.1.2/src/xxhash64.rs000064400000000000000000000445031046102023000143700ustar 00000000000000//! The implementation of XXH64. use core::{ fmt, hash::{self, BuildHasher}, mem, }; use crate::IntoU64; // Keeping these constants in this form to match the C code. const PRIME64_1: u64 = 0x9E3779B185EBCA87; const PRIME64_2: u64 = 0xC2B2AE3D27D4EB4F; const PRIME64_3: u64 = 0x165667B19E3779F9; const PRIME64_4: u64 = 0x85EBCA77C2B2AE63; const PRIME64_5: u64 = 0x27D4EB2F165667C5; type Lane = u64; type Lanes = [Lane; 4]; type Bytes = [u8; 32]; const BYTES_IN_LANE: usize = mem::size_of::(); #[derive(Clone, PartialEq)] struct BufferData(Lanes); impl BufferData { const fn new() -> Self { Self([0; 4]) } const fn bytes(&self) -> &Bytes { const _: () = assert!(mem::align_of::() <= mem::align_of::()); // SAFETY[bytes]: The alignment of `u64` is at least that of // `u8` and all the values are initialized. unsafe { &*self.0.as_ptr().cast() } } fn bytes_mut(&mut self) -> &mut Bytes { // SAFETY: See SAFETY[bytes] unsafe { &mut *self.0.as_mut_ptr().cast() } } } impl fmt::Debug for BufferData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_list().entries(self.0.iter()).finish() } } #[derive(Debug, Clone, PartialEq)] struct Buffer { offset: usize, data: BufferData, } impl Buffer { const fn new() -> Self { Self { offset: 0, data: BufferData::new(), } } // RATIONALE: See RATIONALE[inline] #[inline] fn extend<'d>(&mut self, data: &'d [u8]) -> (Option<&Lanes>, &'d [u8]) { // Most of the slice methods we use here have `_unchecked` variants, but // // 1. this method is called one time per `Hasher::write` call // 2. this method early exits if we don't have anything in the buffer // // Because of this, removing the panics via `unsafe` doesn't // have much benefit other than reducing code size by a tiny // bit. if self.offset == 0 { return (None, data); }; let bytes = self.data.bytes_mut(); debug_assert!(self.offset <= bytes.len()); let empty = &mut bytes[self.offset..]; let n_to_copy = usize::min(empty.len(), data.len()); let dst = &mut empty[..n_to_copy]; let (src, rest) = data.split_at(n_to_copy); dst.copy_from_slice(src); self.offset += n_to_copy; debug_assert!(self.offset <= bytes.len()); if self.offset == bytes.len() { self.offset = 0; (Some(&self.data.0), rest) } else { (None, rest) } } // RATIONALE: See RATIONALE[inline] #[inline] fn set(&mut self, data: &[u8]) { if data.is_empty() { return; } debug_assert_eq!(self.offset, 0); let n_to_copy = data.len(); let bytes = self.data.bytes_mut(); debug_assert!(n_to_copy < bytes.len()); bytes[..n_to_copy].copy_from_slice(data); self.offset = data.len(); } // RATIONALE: See RATIONALE[inline] #[inline] fn remaining(&self) -> &[u8] { &self.data.bytes()[..self.offset] } } #[derive(Clone, PartialEq)] struct Accumulators(Lanes); impl Accumulators { const fn new(seed: u64) -> Self { Self([ seed.wrapping_add(PRIME64_1).wrapping_add(PRIME64_2), seed.wrapping_add(PRIME64_2), seed, seed.wrapping_sub(PRIME64_1), ]) } // RATIONALE: See RATIONALE[inline] #[inline] fn write(&mut self, lanes: Lanes) { let [acc1, acc2, acc3, acc4] = &mut self.0; let [lane1, lane2, lane3, lane4] = lanes; *acc1 = round(*acc1, lane1.to_le()); *acc2 = round(*acc2, lane2.to_le()); *acc3 = round(*acc3, lane3.to_le()); *acc4 = round(*acc4, lane4.to_le()); } // RATIONALE: See RATIONALE[inline] #[inline] fn write_many<'d>(&mut self, mut data: &'d [u8]) -> &'d [u8] { while let Some((chunk, rest)) = data.split_first_chunk::() { // SAFETY: We have the right number of bytes and are // handling the unaligned case. let lanes = unsafe { chunk.as_ptr().cast::().read_unaligned() }; self.write(lanes); data = rest; } data } // RATIONALE: See RATIONALE[inline] #[inline] const fn finish(&self) -> u64 { let [acc1, acc2, acc3, acc4] = self.0; let mut acc = { let acc1 = acc1.rotate_left(1); let acc2 = acc2.rotate_left(7); let acc3 = acc3.rotate_left(12); let acc4 = acc4.rotate_left(18); acc1.wrapping_add(acc2) .wrapping_add(acc3) .wrapping_add(acc4) }; acc = Self::merge_accumulator(acc, acc1); acc = Self::merge_accumulator(acc, acc2); acc = Self::merge_accumulator(acc, acc3); acc = Self::merge_accumulator(acc, acc4); acc } // RATIONALE: See RATIONALE[inline] #[inline] const fn merge_accumulator(mut acc: u64, acc_n: u64) -> u64 { acc ^= round(0, acc_n); acc = acc.wrapping_mul(PRIME64_1); acc.wrapping_add(PRIME64_4) } } impl fmt::Debug for Accumulators { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let [acc1, acc2, acc3, acc4] = self.0; f.debug_struct("Accumulators") .field("acc1", &acc1) .field("acc2", &acc2) .field("acc3", &acc3) .field("acc4", &acc4) .finish() } } /// Calculates the 64-bit hash. #[derive(Debug, Clone, PartialEq)] pub struct Hasher { seed: u64, accumulators: Accumulators, buffer: Buffer, length: u64, } impl Default for Hasher { fn default() -> Self { Self::with_seed(0) } } impl Hasher { /// Hash all data at once. If you can use this function, you may /// see noticable speed gains for certain types of input. #[must_use] // RATIONALE[inline]: // // These `inline`s help unlock a speedup in one benchmark [1] from // ~900µs to ~200µs. // // Further inspection of the disassembly showed that various // helper functions were not being inlined. Avoiding these few // function calls wins us the tiniest performance increase, just // enough so that we are neck-and-neck with (or slightly faster // than!) the C code. // // This results in the entire hash computation being inlined at // the call site. // // [1]: https://github.com/apache/datafusion-comet/pull/575 #[inline] pub fn oneshot(seed: u64, data: &[u8]) -> u64 { let len = data.len(); // Since we know that there's no more data coming, we don't // need to construct the intermediate buffers or copy data to // or from the buffers. let mut accumulators = Accumulators::new(seed); let data = accumulators.write_many(data); Self::finish_with(seed, len.into_u64(), &accumulators, data) } /// Constructs the hasher with an initial seed. #[must_use] pub const fn with_seed(seed: u64) -> Self { // Step 1. Initialize internal accumulators Self { seed, accumulators: Accumulators::new(seed), buffer: Buffer::new(), length: 0, } } /// The seed this hasher was created with. pub const fn seed(&self) -> u64 { self.seed } /// The total number of bytes hashed. pub const fn total_len(&self) -> u64 { self.length } #[must_use] // RATIONALE: See RATIONALE[inline] #[inline] fn finish_with(seed: u64, len: u64, accumulators: &Accumulators, mut remaining: &[u8]) -> u64 { // Step 3. Accumulator convergence let mut acc = if len < BYTES_IN_LANE.into_u64() { seed.wrapping_add(PRIME64_5) } else { accumulators.finish() }; // Step 4. Add input length acc += len; // Step 5. Consume remaining input while let Some((chunk, rest)) = remaining.split_first_chunk() { let lane = u64::from_ne_bytes(*chunk).to_le(); acc ^= round(0, lane); acc = acc.rotate_left(27).wrapping_mul(PRIME64_1); acc = acc.wrapping_add(PRIME64_4); remaining = rest; } while let Some((chunk, rest)) = remaining.split_first_chunk() { let lane = u32::from_ne_bytes(*chunk).to_le().into_u64(); acc ^= lane.wrapping_mul(PRIME64_1); acc = acc.rotate_left(23).wrapping_mul(PRIME64_2); acc = acc.wrapping_add(PRIME64_3); remaining = rest; } for &byte in remaining { let lane = byte.into_u64(); acc ^= lane.wrapping_mul(PRIME64_5); acc = acc.rotate_left(11).wrapping_mul(PRIME64_1); } // Step 6. Final mix (avalanche) acc ^= acc >> 33; acc = acc.wrapping_mul(PRIME64_2); acc ^= acc >> 29; acc = acc.wrapping_mul(PRIME64_3); acc ^= acc >> 32; acc } } impl hash::Hasher for Hasher { // RATIONALE: See RATIONALE[inline] #[inline] fn write(&mut self, data: &[u8]) { let len = data.len(); // Step 2. Process stripes let (buffered_lanes, data) = self.buffer.extend(data); if let Some(&lanes) = buffered_lanes { self.accumulators.write(lanes); } let data = self.accumulators.write_many(data); self.buffer.set(data); self.length += len.into_u64(); } // RATIONALE: See RATIONALE[inline] #[inline] fn finish(&self) -> u64 { Self::finish_with( self.seed, self.length, &self.accumulators, self.buffer.remaining(), ) } } // RATIONALE: See RATIONALE[inline] #[inline] const fn round(mut acc: u64, lane: u64) -> u64 { acc = acc.wrapping_add(lane.wrapping_mul(PRIME64_2)); acc = acc.rotate_left(31); acc.wrapping_mul(PRIME64_1) } /// Constructs [`Hasher`][] for multiple hasher instances. #[derive(Clone)] pub struct State(u64); impl State { /// Constructs the hasher with an initial seed. pub fn with_seed(seed: u64) -> Self { Self(seed) } } impl BuildHasher for State { type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { Hasher::with_seed(self.0) } } #[cfg(test)] mod test { use core::{ array, hash::{BuildHasherDefault, Hasher as _}, }; use std::collections::HashMap; use super::*; const _TRAITS: () = { const fn is_clone() {} is_clone::(); is_clone::(); }; const EMPTY_BYTES: [u8; 0] = []; #[test] fn ingesting_byte_by_byte_is_equivalent_to_large_chunks() { let bytes = [0x9c; 32]; let mut byte_by_byte = Hasher::with_seed(0); for byte in bytes.chunks(1) { byte_by_byte.write(byte); } let byte_by_byte = byte_by_byte.finish(); let mut one_chunk = Hasher::with_seed(0); one_chunk.write(&bytes); let one_chunk = one_chunk.finish(); assert_eq!(byte_by_byte, one_chunk); } #[test] fn hash_of_nothing_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0xef46_db37_51d8_e999); } #[test] fn hash_of_single_byte_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); hasher.write(&[42]); assert_eq!(hasher.finish(), 0x0a9e_dece_beb0_3ae4); } #[test] fn hash_of_multiple_bytes_matches_c_implementation() { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); assert_eq!(hasher.finish(), 0x7b06_c531_ea43_e89f); } #[test] fn hash_of_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); let mut hasher = Hasher::with_seed(0); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x6ac1_e580_3216_6597); } #[test] fn hash_with_different_seed_matches_c_implementation() { let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91); hasher.write(&EMPTY_BYTES); assert_eq!(hasher.finish(), 0x4b6a_04fc_df7a_4672); } #[test] fn hash_with_different_seed_and_multiple_chunks_matches_c_implementation() { let bytes: [u8; 100] = array::from_fn(|i| i as u8); let mut hasher = Hasher::with_seed(0xae05_4331_1b70_2d91); hasher.write(&bytes); assert_eq!(hasher.finish(), 0x567e_355e_0682_e1f1); } #[test] fn hashes_with_different_offsets_are_the_same() { let bytes = [0x7c; 4096]; let expected = Hasher::oneshot(0, &[0x7c; 64]); let the_same = bytes .windows(64) .map(|w| { let mut hasher = Hasher::with_seed(0); hasher.write(w); hasher.finish() }) .all(|h| h == expected); assert!(the_same); } #[test] fn can_be_used_in_a_hashmap_with_a_default_seed() { let mut hash: HashMap<_, _, BuildHasherDefault> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } } #[cfg(feature = "random")] #[cfg_attr(docsrs, doc(cfg(feature = "random")))] mod random_impl { use super::*; /// Constructs a randomized seed and reuses it for multiple hasher /// instances. #[derive(Clone)] pub struct RandomState(State); impl Default for RandomState { fn default() -> Self { Self::new() } } impl RandomState { fn new() -> Self { Self(State::with_seed(rand::random())) } } impl BuildHasher for RandomState { type Hasher = Hasher; fn build_hasher(&self) -> Self::Hasher { self.0.build_hasher() } } #[cfg(test)] mod test { use std::collections::HashMap; use super::*; const _TRAITS: () = { const fn is_clone() {} is_clone::(); }; #[test] fn can_be_used_in_a_hashmap_with_a_random_seed() { let mut hash: HashMap<_, _, RandomState> = Default::default(); hash.insert(42, "the answer"); assert_eq!(hash.get(&42), Some(&"the answer")); } } } #[cfg(feature = "random")] #[cfg_attr(docsrs, doc(cfg(feature = "random")))] pub use random_impl::*; #[cfg(feature = "serialize")] #[cfg_attr(docsrs, doc(cfg(feature = "serialize")))] mod serialize_impl { use serde::{Deserialize, Serialize}; use super::*; impl<'de> Deserialize<'de> for Hasher { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { let shim = Deserialize::deserialize(deserializer)?; let Shim { total_len, seed, core, buffer, buffer_usage, } = shim; let Core { v1, v2, v3, v4 } = core; let mut buffer_data = BufferData::new(); buffer_data.bytes_mut().copy_from_slice(&buffer); Ok(Hasher { seed, accumulators: Accumulators([v1, v2, v3, v4]), buffer: Buffer { offset: buffer_usage, data: buffer_data, }, length: total_len, }) } } impl Serialize for Hasher { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { let Hasher { seed, ref accumulators, ref buffer, length, } = *self; let [v1, v2, v3, v4] = accumulators.0; let Buffer { offset, ref data } = *buffer; let buffer = *data.bytes(); let shim = Shim { total_len: length, seed, core: Core { v1, v2, v3, v4 }, buffer, buffer_usage: offset, }; shim.serialize(serializer) } } #[derive(Serialize, Deserialize)] struct Shim { total_len: u64, seed: u64, core: Core, buffer: [u8; 32], buffer_usage: usize, } #[derive(Serialize, Deserialize)] struct Core { v1: u64, v2: u64, v3: u64, v4: u64, } #[cfg(test)] mod test { use std::hash::Hasher as _; use super::*; type Result = core::result::Result; #[test] fn test_serialization_cycle() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); let _ = hasher.finish(); let serialized = serde_json::to_string(&hasher)?; let unserialized: Hasher = serde_json::from_str(&serialized)?; assert_eq!(hasher, unserialized); Ok(()) } #[test] fn test_serialization_stability() -> Result { let mut hasher = Hasher::with_seed(0); hasher.write(b"Hello, world!\0"); let _ = hasher.finish(); let expected_serialized = r#"{ "total_len": 14, "seed": 0, "core": { "v1": 6983438078262162902, "v2": 14029467366897019727, "v3": 0, "v4": 7046029288634856825 }, "buffer": [ 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ], "buffer_usage": 14 }"#; let unserialized: Hasher = serde_json::from_str(expected_serialized)?; assert_eq!(hasher, unserialized); let expected_value: serde_json::Value = serde_json::from_str(expected_serialized)?; let actual_value = serde_json::to_value(&hasher)?; assert_eq!(expected_value, actual_value); Ok(()) } } }