foldhash-0.2.0/.cargo_vcs_info.json0000644000000001360000000000100126230ustar { "git": { "sha1": "8f878c636fda9c9e93384824ea45e06d03f009f5" }, "path_in_vcs": "" }foldhash-0.2.0/.gitignore000064400000000000000000000000611046102023000134000ustar 00000000000000/target /Cargo.lock /bench_results /out .DS_Storefoldhash-0.2.0/Cargo.lock0000644000000602310000000000100106000ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "ahash" version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", "getrandom 0.3.3", "once_cell", "version_check", "zerocopy", ] [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", ] [[package]] name = "allocator-api2" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" [[package]] name = "android_system_properties" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" dependencies = [ "libc", ] [[package]] name = "anes" version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "bitflags" version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" [[package]] name = "bumpalo" version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "byteorder" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "cast" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" version = "1.2.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3a42d84bb6b69d3a8b3eaacf0d88f179e1929695e1ad012b6cf64d9caaa5fd2" dependencies = [ "shlex", ] [[package]] name = "cfg-if" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "chrono" version = "0.4.41" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" dependencies = [ "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", "windows-link", ] [[package]] name = "ciborium" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", "serde", ] [[package]] name = "ciborium-io" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", ] [[package]] name = "clap" version = "4.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb690e81c7840c0d7aade59f242ea3b41b9bc27bcd5997890e7702ae4b32e487" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" version = "4.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ed2e96bc16d8d740f6f48d663eddf4b8a0983e79210fd55479b7bcd0a69860e" dependencies = [ "anstyle", "clap_lex", ] [[package]] name = "clap_lex" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "criterion" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" dependencies = [ "anes", "cast", "ciborium", "clap", "criterion-plot", "is-terminal", "itertools", "num-traits", "once_cell", "oorandom", "plotters", "rayon", "regex", "serde", "serde_derive", "serde_json", "tinytemplate", "walkdir", ] [[package]] name = "criterion-plot" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", "itertools", ] [[package]] name = "crossbeam-deque" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "crunchy" version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "either" version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] name = "equivalent" version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "foldhash" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "foldhash" version = "0.2.0" dependencies = [ "ahash", "chrono", "criterion", "fxhash", "hashbrown", "rand", "rapidhash", "uuid", ] [[package]] name = "fxhash" version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" dependencies = [ "byteorder", ] [[package]] name = "getrandom" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", "libc", "wasi 0.11.1+wasi-snapshot-preview1", ] [[package]] name = "getrandom" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" dependencies = [ "cfg-if", "libc", "r-efi", "wasi 0.14.2+wasi-0.2.4", ] [[package]] name = "half" version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", ] [[package]] name = "hashbrown" version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", "foldhash 0.1.5", ] [[package]] name = "hermit-abi" version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "iana-time-zone" version = "0.1.63" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" dependencies = [ "android_system_properties", "core-foundation-sys", "iana-time-zone-haiku", "js-sys", "log", "wasm-bindgen", "windows-core", ] [[package]] name = "iana-time-zone-haiku" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" dependencies = [ "cc", ] [[package]] name = "is-terminal" version = "0.4.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9" dependencies = [ "hermit-abi", "libc", "windows-sys", ] [[package]] name = "itertools" version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] [[package]] name = "itoa" version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "js-sys" version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ "once_cell", "wasm-bindgen", ] [[package]] name = "libc" version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] name = "log" version = "0.4.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" [[package]] name = "memchr" version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", ] [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "oorandom" version = "11.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" [[package]] name = "plotters" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" dependencies = [ "num-traits", "plotters-backend", "plotters-svg", "wasm-bindgen", "web-sys", ] [[package]] name = "plotters-backend" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" [[package]] name = "plotters-svg" version = "0.3.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" dependencies = [ "plotters-backend", ] [[package]] name = "ppv-lite86" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" dependencies = [ "zerocopy", ] [[package]] name = "proc-macro2" version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom 0.2.16", ] [[package]] name = "rapidhash" version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efee4b7317469c6c6e7fdeee3d094313af846a97678d6ed971d83a852d730083" [[package]] name = "rayon" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "regex" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", "regex-automata", "regex-syntax", ] [[package]] name = "regex-automata" version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "rustversion" version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "ryu" version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "same-file" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" dependencies = [ "winapi-util", ] [[package]] name = "serde" version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.142" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "030fedb782600dcbd6f02d479bf0d817ac3bb40d644745b769d6a96bc3afc5a7" dependencies = [ "itoa", "memchr", "ryu", "serde", ] [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "syn" version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tinytemplate" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" dependencies = [ "serde", "serde_json", ] [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "uuid" version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "walkdir" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", ] [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] [[package]] name = "wasm-bindgen" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", ] [[package]] name = "wasm-bindgen-backend" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" dependencies = [ "bumpalo", "log", "proc-macro2", "quote", "syn", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" dependencies = [ "quote", "wasm-bindgen-macro-support", ] [[package]] name = "wasm-bindgen-macro-support" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" version = "0.2.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" dependencies = [ "unicode-ident", ] [[package]] name = "web-sys" version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" dependencies = [ "js-sys", "wasm-bindgen", ] [[package]] name = "winapi-util" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ "windows-sys", ] [[package]] name = "windows-core" version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ "windows-implement", "windows-interface", "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-implement" version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "windows-interface" version = "0.59.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "windows-link" version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-result" version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ "windows-link", ] [[package]] name = "windows-strings" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ "windows-link", ] [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "wit-bindgen-rt" version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ "bitflags", ] [[package]] name = "zerocopy" version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", "syn", ] foldhash-0.2.0/Cargo.toml0000644000000030170000000000100106220ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.60" name = "foldhash" version = "0.2.0" authors = ["Orson Peters "] build = false exclude = [ "benches", "tools", "assets", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A fast, non-cryptographic, minimally DoS-resistant hashing algorithm." readme = "README.md" keywords = [ "hash", "hasher", "no-std", ] categories = [ "algorithms", "no-std", ] license = "Zlib" repository = "https://github.com/orlp/foldhash" [features] default = ["std"] nightly = [] std = [] [lib] name = "foldhash" path = "src/lib.rs" bench = false [dependencies] [dev-dependencies.ahash] version = "0.8" [dev-dependencies.chrono] version = "0.4" [dev-dependencies.criterion] version = "0.5" [dev-dependencies.fxhash] version = "0.2" [dev-dependencies.hashbrown] version = "0.15" [dev-dependencies.rand] version = "0.8" [dev-dependencies.rapidhash] version = "3.1.0" [dev-dependencies.uuid] version = "1.8" [profile.release] lto = "thin" foldhash-0.2.0/Cargo.toml.orig000064400000000000000000000015071046102023000143050ustar 00000000000000[package] name = "foldhash" version = "0.2.0" authors = ["Orson Peters "] license = "Zlib" repository = "https://github.com/orlp/foldhash" readme = "README.md" keywords = ["hash", "hasher", "no-std"] categories = ["algorithms", "no-std"] description = "A fast, non-cryptographic, minimally DoS-resistant hashing algorithm." edition = "2021" exclude = ["benches", "tools", "assets"] rust-version = "1.60" [features] default = ["std"] std = [] nightly = [] [dependencies] [dev-dependencies] criterion = "0.5" hashbrown = "0.15" uuid = "1.8" rand = "0.8" ahash = "0.8" fxhash = "0.2" rapidhash = "3.1.0" chrono = "0.4" [lib] bench = false [[bench]] name = "bench" path = "benches/bench.rs" harness = false [[bench]] name = "avalanche" path = "benches/avalanche.rs" harness = false [profile.release] lto = "thin" foldhash-0.2.0/LICENSE000064400000000000000000000015301046102023000124170ustar 00000000000000Copyright (c) 2024 Orson Peters This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution.foldhash-0.2.0/README.md000064400000000000000000000432761046102023000127060ustar 00000000000000# Foldhash This repository contains foldhash, a fast, non-cryptographic, minimally DoS-resistant hashing algorithm implemented in Rust designed for computational uses such as hash maps, bloom filters, count sketching, etc. When should you **not** use foldhash: - You are afraid of people studying your long-running program's behavior to reverse engineer its internal random state and using this knowledge to create many colliding inputs for computational complexity attacks. For more details see the section "HashDoS resistance". - You expect foldhash to have a consistent output across versions or platforms, such as for persistent file formats or communication protocols. - You are relying on foldhash's properties for any kind of security. Foldhash is **not appropriate for any cryptographic purpose**. Foldhash has two variants, one optimized for speed which is ideal for data structures such as hash maps and bloom filters, and one optimized for statistical quality which is ideal for algorithms such as [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) and [MinHash](https://en.wikipedia.org/wiki/MinHash). Foldhash can be used in a `#![no_std]` environment by disabling its default `"std"` feature. ## Performance We evaluated foldhash against three commonly used hashes in Rust: [aHash](https://github.com/tkaitchuck/aHash) v0.8.11, [fxhash](https://github.com/cbreeden/fxhash) v0.2.1, and [SipHash-1-3](https://en.wikipedia.org/wiki/SipHash), the default hash algorithm in Rust at the time of writing. We evaluated both variants foldhash provides, `foldhash-f` and `foldhash-q`, which correspond to `foldhash::fast` and `foldhash::quality` in the crate respectively. First we note that hashers with random state inflate the size of your `HashMap`, which may or may not be important for your performance: ```rust std::mem::size_of::>() = 40 // (both variants) std::mem::size_of::>() = 64 std::mem::size_of::>() = 32 std::mem::size_of::>() = 48 ``` We tested runtime performance on two machines, one with a 2023 Apple M2 CPU, one with a 2023 Intel Xeon Platinum 8481C server CPU, both with stable Rust 1.80.1. Since one of our competitors (aHash) is reliant on AES-based instructions for optimal performance we have included both a benchmark with and without `-C target-cpu=native` for the Intel machine. We tested across a wide variety of data types we consider representative of types / distributions one might hash in the real world, in the context of a hash table key: - `u32` - random 32-bit unsigned integers, - `u32pair` - pairs of random 32-bit unsigned integers, - `u64` - random 64-bit unsigned integers, - `u64pair` - pairs of random 64-bit unsigned integers, - `u64lobits` - 64-bit unsigned integers where only the bottom 16 bits vary, - `u64hibits` - 64-bit unsigned integers where only the top 16 bits vary, - `ipv4` - [`std::net::Ipv4Addr`](https://doc.rust-lang.org/std/net/struct.Ipv4Addr.html), which is equivalent to `[u8; 4]`, - `ipv6` - [`std::net::Ipv6Addr`](https://doc.rust-lang.org/std/net/struct.Ipv6Addr.html), which is equivalent to `[u8; 16]`, - `rgba` - random `(u8, u8, u8, u8)` tuples, - `strenglishword` - strings containing words sampled uniformly from the top 10,000 most common English words, - `struuid` - random UUIDs, hashed in string representation, - `strurl` - strings containing URLs sampled uniformly from a corpus of 10,000 URLs, - `strdate` - random `YYYY-MM-DD` date strings, - `accesslog` - `(u128, u32, chrono::NaiveDate, bool)`, meant to simulate a typical larger compound type, in this case `(resource_id, user_id, date, success)` for an access log. - `kilobyte` - random bytestrings one kilobyte in length, - `tenkilobyte` - random bytestrings ten kilobytes in length. We tested the performance of hashing the above data types in the following four contexts: - `hashonly` - only the time it takes to hash the value, - `lookupmiss` - the time it takes to do a lookup in a 1,000 element hash map of random elements, only sampling keys of which we know that are not in the hash map, - `lookuphit` - similar to `lookupmiss`, except the keys are sampled from keys known to be in the hash map, - `setbuild` - the time it takes to construct a `HashSet` of 1,000 elements from 1,000 randomly sampled elements each repeated 10 times (so 10,000 inserts, with ~90% duplicates). All times are reported as expected time per operation, so one hash, one lookup, or one insert respectively. The full results [can be found here](https://gist.github.com/orlp/1271ad5b8b775c651cc55773888858eb). To summarize, we will only show the results for `u64` and `strenglishword` here, as well as the observed geometric mean and average rank over the full benchmark. ``` Xeon 8481c ┌────────────────┬────────────┬────────────┬────────────┬─────────┬─────────┬─────────┐ │ avg_rank ┆ 1.58 ┆ 2.66 ┆ 2.09 ┆ 3.70 ┆ 4.97 │ │ geometric_mean ┆ 6.21 ┆ 7.01 ┆ 7.56 ┆ 8.74 ┆ 28.70 │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ distr ┆ bench ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ u64 ┆ hashonly ┆ 0.79 ┆ 1.03 ┆ 0.67 ┆ 1.23 ┆ 9.09 │ │ u64 ┆ lookupmiss ┆ 2.01 ┆ 2.44 ┆ 1.73 ┆ 2.73 ┆ 12.03 │ │ u64 ┆ lookuphit ┆ 3.04 ┆ 3.59 ┆ 2.64 ┆ 3.84 ┆ 12.65 │ │ u64 ┆ setbuild ┆ 6.13 ┆ 6.52 ┆ 4.88 ┆ 6.66 ┆ 17.80 │ | ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... | │ strenglishword ┆ hashonly ┆ 2.63 ┆ 2.98 ┆ 3.24 ┆ 3.57 ┆ 11.87 │ │ strenglishword ┆ lookupmiss ┆ 4.63 ┆ 5.03 ┆ 4.51 ┆ 5.86 ┆ 15.19 │ │ strenglishword ┆ lookuphit ┆ 8.62 ┆ 9.25 ┆ 8.28 ┆ 10.06 ┆ 21.35 │ │ strenglishword ┆ setbuild ┆ 14.77 ┆ 15.57 ┆ 18.86 ┆ 15.72 ┆ 35.36 │ └────────────────┴────────────┴────────────┴────────────┴─────────┴─────────┴─────────┘ Xeon 8481c with RUSTFLAGS="-C target-cpu=native" ┌────────────────┬────────────┬────────────┬────────────┬─────────┬─────────┬─────────┐ │ avg_rank ┆ 1.89 ┆ 3.12 ┆ 2.25 ┆ 2.77 ┆ 4.97 │ │ geometric_mean ┆ 6.00 ┆ 6.82 ┆ 7.39 ┆ 6.94 ┆ 29.49 │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ distr ┆ bench ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ u64 ┆ hashonly ┆ 0.79 ┆ 1.01 ┆ 0.67 ┆ 1.34 ┆ 9.24 │ │ u64 ┆ lookupmiss ┆ 1.68 ┆ 2.12 ┆ 1.62 ┆ 1.96 ┆ 12.04 │ │ u64 ┆ lookuphit ┆ 2.68 ┆ 3.19 ┆ 2.28 ┆ 3.16 ┆ 13.09 │ │ u64 ┆ setbuild ┆ 6.16 ┆ 6.42 ┆ 4.75 ┆ 7.03 ┆ 18.88 │ | ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... | │ strenglishword ┆ hashonly ┆ 2.60 ┆ 2.97 ┆ 3.25 ┆ 3.04 ┆ 11.58 │ │ strenglishword ┆ lookupmiss ┆ 4.41 ┆ 4.96 ┆ 4.82 ┆ 4.79 ┆ 32.31 │ │ strenglishword ┆ lookuphit ┆ 8.68 ┆ 9.35 ┆ 8.46 ┆ 8.63 ┆ 21.48 │ │ strenglishword ┆ setbuild ┆ 15.01 ┆ 16.34 ┆ 19.34 ┆ 15.37 ┆ 35.22 │ └────────────────┴────────────┴────────────┴────────────┴─────────┴─────────┴─────────┘ Apple M2 ┌────────────────┬────────────┬────────────┬────────────┬─────────┬─────────┬─────────┐ │ avg_rank ┆ 1.62 ┆ 2.81 ┆ 2.02 ┆ 3.58 ┆ 4.97 │ │ geometric_mean ┆ 4.41 ┆ 4.86 ┆ 5.39 ┆ 5.71 ┆ 21.94 │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ distr ┆ bench ┆ foldhash-f ┆ foldhash-q ┆ fxhash ┆ ahash ┆ siphash │ ╞════════════════╪════════════╪════════════╪════════════╪═════════╪═════════╪═════════╡ │ u64 ┆ hashonly ┆ 0.60 ┆ 0.70 ┆ 0.41 ┆ 0.78 ┆ 6.61 │ │ u64 ┆ lookupmiss ┆ 1.50 ┆ 1.61 ┆ 1.23 ┆ 1.65 ┆ 8.28 │ │ u64 ┆ lookuphit ┆ 1.78 ┆ 2.10 ┆ 1.57 ┆ 2.25 ┆ 8.53 │ │ u64 ┆ setbuild ┆ 4.74 ┆ 5.19 ┆ 3.61 ┆ 5.38 ┆ 15.36 │ | ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... ┆ ... | │ strenglishword ┆ hashonly ┆ 1.84 ┆ 2.13 ┆ 1.85 ┆ 2.13 ┆ 11.61 │ │ strenglishword ┆ lookupmiss ┆ 2.71 ┆ 2.96 ┆ 2.47 ┆ 2.99 ┆ 9.27 │ │ strenglishword ┆ lookuphit ┆ 7.54 ┆ 8.77 ┆ 7.83 ┆ 8.77 ┆ 18.65 │ │ strenglishword ┆ setbuild ┆ 16.61 ┆ 17.09 ┆ 14.83 ┆ 16.52 ┆ 26.42 │ └────────────────┴────────────┴────────────┴────────────┴─────────┴─────────┴─────────┘ ``` We note from the above benchmark that for hash table performance the extra quality that `foldhash-q` provides is almost never actually worth the small but also non-negligible computational overhead it has over `foldhash-f`. This is our justification for providing `foldhash::fast` as a default choice for hash tables, even though it has measurable biases (see also the "Quality" section). fxhash generally does fairly well for small inputs on the benchmarks, however it has structural weaknesses as a hash which makes it ill-advised to use as a general-purpose hash function in our opinion. For example the `lookuphit` benchmark on Apple M2 for `u64hibits` takes 1.77 nanoseconds per lookup for foldhash, but 67.72 nanoseconds for fxhash (due to everything colliding - the effects would be even worse with a larger hash map). In our opinion foldhash-f strikes the right balance between quality and performance for hash tables, whereas fxhash flies a bit too close to the sun. aHash is faster than foldhash for medium-long strings when compiled with AES instruction support, but is slower in almost every other scenario or when AES instructions are unavailable. ## Quality Foldhash-f is a fairly strong hash in terms of collisions *on its full 64-bit output*. However, statistical tests such as [SMHasher3](https://gitlab.com/fwojcik/smhasher3) can distinguish it from an ideal hash function in tests that focus on the relationship between individual input/output bits. One such property is avalanching: changing a single bit in the input does not flip every other bit with 50% probability when using foldhash-f like it should if it behaved like a proper random oracle. As the benchmarks above show, spending more effort in foldhash-f to improve the hash quality does not lead to better hash table performance. However, there are also use cases for hash functions where it is important that (each bit of) the hash is unbiased and a random function of all bits of the input, such as in algorithms as HyperLogLog or MinHash. For this purpose we also provide foldhash-q, which is simply a post-processed version of foldhash-f to properly avalanche all the bits. Foldhash-q passes the [SMHasher3](https://gitlab.com/fwojcik/smhasher3) test suite [without any failures](https://github.com/orlp/foldhash_smhasher3). You can also plot the worst-case probability (where 50% is ideal) that any particular output bit flips if you flip an input bit, which nicely visualizes how fxhash and foldhash-f fail this avalanche property but foldhash-q and SipHash-1-3 pass: | FxHash | Foldhash-f | Foldhash-q | SipHash-1-3 | |--------|------------|------------|-------------| | | | | ## Background The name foldhash is derived from the *folded multiply*. This technique compresses two 64-bit words into a single 64-bit word while simultaneously thoroughly mixing the bits. It does this using a 64 x 64 bit -> 128 bit multiplication followed by folding the two halves of the 128-bit product together using a XOR operation: ```rust let full = (x as u128) * (y as u128); let lo = full as u64; let hi = (full >> 64) as u64; let folded = lo ^ hi; ``` We're not aware of a formal analysis of this operation, but empirically it works very well. An informal intuition for why it should work is that multiplication can be seen as the sum of many shifted copies of one of the arguments, only including those shifted copies where the other argument has set bits, e.g. for multiplying 4-bit words `abcd` and `efgh`: ``` abcd * efgh = abcd * e abcd * f abcd * g abcd * h --------------- + ``` Note that the middle bits of the product are a function of many of the input bits, whereas the top-most and bottom-most bits are impacted by fewer of the input bits. By folding the top half back onto the bottom half these effects compensate each other, making all the output bits affected by much of the input. We did not invent the folded multiply, it was previously used in essentially the same way in [aHash](https://github.com/tkaitchuck/aHash), [wyhash](https://github.com/wangyi-fudan/wyhash), and [xxhash3](https://github.com/Cyan4973/xxHash). The operation was also used in [mum-hash](https://github.com/vnmakarov/mum-hash), and probably others. We do not know who originally invented it, the earliest reference we could find was Steven Fuerst [blogging about it](https://web.archive.org/web/20121213174842/http://locklessinc.com/articles/crypto_hash/) in 2012. ## HashDoS resistance The folded multiply has a fairly glaring flaw: if one of the halves is zero, the output is zero. This makes it trivial to create a large number of hash collisions (even by accident, as zeroes are a common input to hashes). To combat this, every folded multiply in foldhash has the following form: ```rust folded_multiply(input1 ^ secret1, input2 ^ secret2) ``` Here `secret1` or `secret2` are either secret random numbers generated by foldhash beforehand, or partial hash results influenced by such a secret prior. This (plus other careful design throughout the hash function) ensures that it is not possible to create a list of inputs that collide for every instance of foldhash, and also prevents certain access patterns on hash tables going quadratric by ensuring that each hash table uses a different seed and thus a different access pattern. It is these two properties that we refer to when we claim foldhash is "minimally DoS-resistant": it does the bare minimum to defeat very simple attacks. However, to be crystal clear, **foldhash does not claim to provide HashDoS resistance against interactive attackers**. For a student of cryptography it should be trivial to derive the secret values from direct observation of hash outputs, and feasible to derive the secret values from indirect observation of hashes, such as through timing attacks or hash table iteration. Once an attacker knows the secret values, they can once again create infinite hash collisions with ease. ## Acknowledgements We thank Liam Gray for their suggestions on improving string hashing performance. foldhash-0.2.0/src/convenience.rs000064400000000000000000000043011046102023000150420ustar 00000000000000use super::fast::{FixedState, RandomState}; /// Type alias for [`std::collections::HashMap`]. pub type HashMap = std::collections::HashMap; /// Type alias for [`std::collections::HashSet`]. pub type HashSet = std::collections::HashSet; /// A convenience extension trait to enable [`HashMap::new`] for hash maps that use `foldhash`. pub trait HashMapExt { /// Creates an empty `HashMap`. fn new() -> Self; /// Creates an empty `HashMap` with at least the specified capacity. fn with_capacity(capacity: usize) -> Self; } impl HashMapExt for std::collections::HashMap { #[inline(always)] fn new() -> Self { Self::with_hasher(RandomState::default()) } #[inline(always)] fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, RandomState::default()) } } impl HashMapExt for std::collections::HashMap { #[inline(always)] fn new() -> Self { Self::with_hasher(FixedState::default()) } #[inline(always)] fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, FixedState::default()) } } /// A convenience extension trait to enable [`HashSet::new`] for hash sets that use `foldhash`. pub trait HashSetExt { /// Creates an empty `HashSet`. fn new() -> Self; /// Creates an empty `HashSet` with at least the specified capacity. fn with_capacity(capacity: usize) -> Self; } impl HashSetExt for std::collections::HashSet { #[inline(always)] fn new() -> Self { Self::with_hasher(RandomState::default()) } #[inline(always)] fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, RandomState::default()) } } impl HashSetExt for std::collections::HashSet { #[inline(always)] fn new() -> Self { Self::with_hasher(FixedState::default()) } #[inline(always)] fn with_capacity(capacity: usize) -> Self { Self::with_capacity_and_hasher(capacity, FixedState::default()) } } foldhash-0.2.0/src/fast.rs000064400000000000000000000160521046102023000135110ustar 00000000000000//! The foldhash implementation optimized for speed. use core::hash::{BuildHasher, Hasher}; use crate::seed::{gen_per_hasher_seed, GlobalSeed, SharedSeed}; use crate::{folded_multiply, hash_bytes_long, hash_bytes_short, rotate_right, ARBITRARY3}; /// A [`Hasher`] instance implementing foldhash, optimized for speed. /// /// While you can create one directly with [`FoldHasher::with_seed`], you /// most likely want to use [`RandomState`], [`SeedableRandomState`] or /// [`FixedState`] to create [`FoldHasher`]s. #[derive(Clone)] pub struct FoldHasher<'a> { accumulator: u64, sponge: u128, sponge_len: u8, seeds: &'a [u64; 6], } impl<'a> FoldHasher<'a> { /// Initializes this [`FoldHasher`] with the given per-hasher seed and /// [`SharedSeed`]. #[inline] pub const fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> { FoldHasher { accumulator: per_hasher_seed, sponge: 0, sponge_len: 0, seeds: &shared_seed.seeds, } } #[inline(always)] fn write_num>(&mut self, x: T) { let bits: usize = 8 * core::mem::size_of::(); if self.sponge_len as usize + bits > 128 { let lo = self.sponge as u64; let hi = (self.sponge >> 64) as u64; self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]); self.sponge = x.into(); self.sponge_len = bits as u8; } else { self.sponge |= x.into() << self.sponge_len; self.sponge_len += bits as u8; } } } impl<'a> Hasher for FoldHasher<'a> { #[inline(always)] fn write(&mut self, bytes: &[u8]) { // We perform overlapping reads in the byte hash which could lead to // trivial length-extension attacks. These should be defeated by // adding a length-dependent rotation on our unpredictable seed // which costs only a single cycle (or none if executed with // instruction-level parallelism). let len = bytes.len(); self.accumulator = rotate_right(self.accumulator, len as u32); if len <= 16 { self.accumulator = hash_bytes_short(bytes, self.accumulator, self.seeds); } else { unsafe { // SAFETY: we checked that the length is > 16 bytes. self.accumulator = hash_bytes_long(bytes, self.accumulator, self.seeds); } } } #[inline(always)] fn write_u8(&mut self, i: u8) { self.write_num(i); } #[inline(always)] fn write_u16(&mut self, i: u16) { self.write_num(i); } #[inline(always)] fn write_u32(&mut self, i: u32) { self.write_num(i); } #[inline(always)] fn write_u64(&mut self, i: u64) { self.write_num(i); } #[inline(always)] fn write_u128(&mut self, i: u128) { let lo = i as u64; let hi = (i >> 64) as u64; self.accumulator = folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]); } #[inline(always)] fn write_usize(&mut self, i: usize) { // u128 doesn't implement From. #[cfg(target_pointer_width = "32")] self.write_num(i as u32); #[cfg(target_pointer_width = "64")] self.write_num(i as u64); } #[cfg(feature = "nightly")] #[inline(always)] fn write_str(&mut self, s: &str) { // Our write function already handles length differences. self.write(s.as_bytes()) } #[inline(always)] fn finish(&self) -> u64 { if self.sponge_len > 0 { let lo = self.sponge as u64; let hi = (self.sponge >> 64) as u64; folded_multiply(lo ^ self.accumulator, hi ^ self.seeds[0]) } else { self.accumulator } } } /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly initialized. #[derive(Clone, Debug)] pub struct RandomState { per_hasher_seed: u64, global_seed: GlobalSeed, } impl Default for RandomState { #[inline(always)] fn default() -> Self { Self { per_hasher_seed: gen_per_hasher_seed(), global_seed: GlobalSeed::new(), } } } impl BuildHasher for RandomState { type Hasher = FoldHasher<'static>; #[inline(always)] fn build_hasher(&self) -> FoldHasher<'static> { FoldHasher::with_seed(self.per_hasher_seed, self.global_seed.get()) } } /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that is randomly /// initialized by default, but can also be initialized with a specific seed. /// /// This can be useful for e.g. testing, but the downside is that this type /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is. #[derive(Clone, Debug)] pub struct SeedableRandomState { per_hasher_seed: u64, shared_seed: &'static SharedSeed, } impl Default for SeedableRandomState { #[inline(always)] fn default() -> Self { Self::random() } } impl SeedableRandomState { /// Generates a random [`SeedableRandomState`], similar to [`RandomState`]. #[inline(always)] pub fn random() -> Self { Self { per_hasher_seed: gen_per_hasher_seed(), shared_seed: SharedSeed::global_random(), } } /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`]. #[inline(always)] pub fn fixed() -> Self { Self { per_hasher_seed: ARBITRARY3, shared_seed: SharedSeed::global_fixed(), } } /// Generates a [`SeedableRandomState`] with the given per-hasher seed /// and [`SharedSeed`]. #[inline(always)] pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self { // XOR with ARBITRARY3 such that with_seed(0) matches default. Self { per_hasher_seed: per_hasher_seed ^ ARBITRARY3, shared_seed, } } } impl BuildHasher for SeedableRandomState { type Hasher = FoldHasher<'static>; #[inline(always)] fn build_hasher(&self) -> FoldHasher<'static> { FoldHasher::with_seed(self.per_hasher_seed, self.shared_seed) } } /// A [`BuildHasher`] for [`fast::FoldHasher`](FoldHasher) that always has the same fixed seed. /// /// Not recommended unless you absolutely need determinism. #[derive(Clone, Debug)] pub struct FixedState { per_hasher_seed: u64, } impl FixedState { /// Creates a [`FixedState`] with the given per-hasher-seed. #[inline(always)] pub const fn with_seed(per_hasher_seed: u64) -> Self { // XOR with ARBITRARY3 such that with_seed(0) matches default. Self { per_hasher_seed: per_hasher_seed ^ ARBITRARY3, } } } impl Default for FixedState { #[inline(always)] fn default() -> Self { Self { per_hasher_seed: ARBITRARY3, } } } impl BuildHasher for FixedState { type Hasher = FoldHasher<'static>; #[inline(always)] fn build_hasher(&self) -> FoldHasher<'static> { FoldHasher::with_seed(self.per_hasher_seed, SharedSeed::global_fixed()) } } foldhash-0.2.0/src/lib.rs000064400000000000000000000332111046102023000133160ustar 00000000000000//! This crate provides foldhash, a fast, non-cryptographic, minimally //! DoS-resistant hashing algorithm designed for computational uses such as //! hashmaps, bloom filters, count sketching, etc. //! //! When should you **not** use foldhash: //! //! - You are afraid of people studying your long-running program's behavior //! to reverse engineer its internal random state and using this knowledge to //! create many colliding inputs for computational complexity attacks. //! //! - You expect foldhash to have a consistent output across versions or //! platforms, such as for persistent file formats or communication protocols. //! //! - You are relying on foldhash's properties for any kind of security. //! Foldhash is **not appropriate for any cryptographic purpose**. //! //! Foldhash has two variants, one optimized for speed which is ideal for data //! structures such as hash maps and bloom filters, and one optimized for //! statistical quality which is ideal for algorithms such as //! [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog) and //! [MinHash](https://en.wikipedia.org/wiki/MinHash). //! //! Foldhash can be used in a `#![no_std]` environment by disabling its default //! `"std"` feature. //! //! # Usage //! //! The easiest way to use this crate with the standard library [`HashMap`] or //! [`HashSet`] is to import them from `foldhash` instead, along with the //! extension traits to make [`HashMap::new`] and [`HashMap::with_capacity`] //! work out-of-the-box: //! //! ```rust //! use foldhash::{HashMap, HashMapExt}; //! //! let mut hm = HashMap::new(); //! hm.insert(42, "hello"); //! ``` //! //! You can also avoid the convenience types and do it manually by initializing //! a [`RandomState`](fast::RandomState), for example if you are using a different hash map //! implementation like [`hashbrown`](https://docs.rs/hashbrown/): //! //! ```rust //! use hashbrown::HashMap; //! use foldhash::fast::RandomState; //! //! let mut hm = HashMap::with_hasher(RandomState::default()); //! hm.insert("foo", "bar"); //! ``` //! //! The above methods are the recommended way to use foldhash, which will //! automatically generate a randomly generated hasher instance for you. If you //! absolutely must have determinism you can use [`FixedState`](fast::FixedState) //! instead, but note that this makes you trivially vulnerable to HashDoS //! attacks and might lead to quadratic runtime when moving data from one //! hashmap/set into another: //! //! ```rust //! use std::collections::HashSet; //! use foldhash::fast::FixedState; //! //! let mut hm = HashSet::with_hasher(FixedState::with_seed(42)); //! hm.insert([1, 10, 100]); //! ``` //! //! If you rely on statistical properties of the hash for the correctness of //! your algorithm, such as in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog), //! it is suggested to use the [`RandomState`](quality::RandomState) //! or [`FixedState`](quality::FixedState) from the [`quality`] module instead //! of the [`fast`] module. The latter is optimized purely for speed in hash //! tables and has known statistical imperfections. //! //! Finally, you can also directly use the [`RandomState`](quality::RandomState) //! or [`FixedState`](quality::FixedState) to manually hash items using the //! [`BuildHasher`](std::hash::BuildHasher) trait: //! ```rust //! use std::hash::BuildHasher; //! use foldhash::quality::RandomState; //! //! let random_state = RandomState::default(); //! let hash = random_state.hash_one("hello world"); //! ``` //! //! ## Seeding //! //! Foldhash relies on a single 8-byte per-hasher seed which should be ideally //! be different from each instance to instance, and also a larger //! [`SharedSeed`] which may be shared by many different instances. //! //! To reduce overhead, this [`SharedSeed`] is typically initialized once and //! stored. To prevent each hashmap unnecessarily containing a reference to this //! value there are three kinds of [`BuildHasher`](core::hash::BuildHasher)s //! foldhash provides (both for [`fast`] and [`quality`]): //! //! 1. [`RandomState`](fast::RandomState), which always generates a //! random per-hasher seed and implicitly stores a reference to [`SharedSeed::global_random`]. //! 2. [`FixedState`](fast::FixedState), which by default uses a fixed //! per-hasher seed and implicitly stores a reference to [`SharedSeed::global_fixed`]. //! 3. [`SeedableRandomState`](fast::SeedableRandomState), which works like //! [`RandomState`](fast::RandomState) by default but can be seeded in any manner. //! This state must include an explicit reference to a [`SharedSeed`], and thus //! this struct is 16 bytes as opposed to just 8 bytes for the previous two. //! //! ## Features //! //! This crate has the following features: //! - `nightly`, this feature improves string hashing performance //! slightly using the nightly-only Rust feature //! [`hasher_prefixfree_extras`](https://github.com/rust-lang/rust/issues/96762), //! - `std`, this enabled-by-default feature offers convenient aliases for `std` //! containers, but can be turned off for `#![no_std]` crates. #![cfg_attr(all(not(test), not(feature = "std")), no_std)] #![cfg_attr(feature = "nightly", feature(hasher_prefixfree_extras))] #![warn(missing_docs)] pub mod fast; pub mod quality; mod seed; pub use seed::SharedSeed; #[cfg(feature = "std")] mod convenience; #[cfg(feature = "std")] pub use convenience::*; // Arbitrary constants with high entropy. Hexadecimal digits of pi were used. const ARBITRARY0: u64 = 0x243f6a8885a308d3; const ARBITRARY1: u64 = 0x13198a2e03707344; const ARBITRARY2: u64 = 0xa4093822299f31d0; const ARBITRARY3: u64 = 0x082efa98ec4e6c89; const ARBITRARY4: u64 = 0x452821e638d01377; const ARBITRARY5: u64 = 0xbe5466cf34e90c6c; const ARBITRARY6: u64 = 0xc0ac29b7c97c50dd; const ARBITRARY7: u64 = 0x3f84d5b5b5470917; const ARBITRARY8: u64 = 0x9216d5d98979fb1b; const ARBITRARY9: u64 = 0xd1310ba698dfb5ac; const ARBITRARY10: u64 = 0x2ffd72dbd01adfb7; const ARBITRARY11: u64 = 0xb8e1afed6a267e96; #[inline(always)] const fn folded_multiply(x: u64, y: u64) -> u64 { // The following code path is only fast if 64-bit to 128-bit widening // multiplication is supported by the architecture. Most 64-bit // architectures except SPARC64 and Wasm64 support it. However, the target // pointer width doesn't always indicate that we are dealing with a 64-bit // architecture, as there are ABIs that reduce the pointer width, especially // on AArch64 and x86-64. WebAssembly (regardless of pointer width) supports // 64-bit to 128-bit widening multiplication with the `wide-arithmetic` // proposal. #[cfg(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), ))] { // We compute the full u64 x u64 -> u128 product, this is a single mul // instruction on x86-64, one mul plus one mulhi on ARM64. let full = (x as u128).wrapping_mul(y as u128); let lo = full as u64; let hi = (full >> 64) as u64; // The middle bits of the full product fluctuate the most with small // changes in the input. This is the top bits of lo and the bottom bits // of hi. We can thus make the entire output fluctuate with small // changes to the input by XOR'ing these two halves. lo ^ hi } #[cfg(not(any( all( target_pointer_width = "64", not(any(target_arch = "sparc64", target_arch = "wasm64")), ), target_arch = "aarch64", target_arch = "x86_64", all(target_family = "wasm", target_feature = "wide-arithmetic"), )))] { // u64 x u64 -> u128 product is quite expensive on 32-bit. // We approximate it by expanding the multiplication and eliminating // carries by replacing additions with XORs: // (2^32 hx + lx)*(2^32 hy + ly) = // 2^64 hx*hy + 2^32 (hx*ly + lx*hy) + lx*ly ~= // 2^64 hx*hy ^ 2^32 (hx*ly ^ lx*hy) ^ lx*ly // Which when folded becomes: // (hx*hy ^ lx*ly) ^ (hx*ly ^ lx*hy).rotate_right(32) let lx = x as u32; let ly = y as u32; let hx = (x >> 32) as u32; let hy = (y >> 32) as u32; let ll = (lx as u64).wrapping_mul(ly as u64); let lh = (lx as u64).wrapping_mul(hy as u64); let hl = (hx as u64).wrapping_mul(ly as u64); let hh = (hx as u64).wrapping_mul(hy as u64); (hh ^ ll) ^ (hl ^ lh).rotate_right(32) } } #[inline(always)] const fn rotate_right(x: u64, r: u32) -> u64 { #[cfg(any( target_pointer_width = "64", target_arch = "aarch64", target_arch = "x86_64", target_family = "wasm", ))] { x.rotate_right(r) } #[cfg(not(any( target_pointer_width = "64", target_arch = "aarch64", target_arch = "x86_64", target_family = "wasm", )))] { // On platforms without 64-bit arithmetic rotation can be slow, rotate // each 32-bit half independently. let lo = (x as u32).rotate_right(r); let hi = ((x >> 32) as u32).rotate_right(r); ((hi as u64) << 32) | lo as u64 } } #[cold] fn cold_path() {} /// Hashes strings <= 16 bytes, has unspecified behavior when bytes.len() > 16. #[inline(always)] fn hash_bytes_short(bytes: &[u8], accumulator: u64, seeds: &[u64; 6]) -> u64 { let len = bytes.len(); let mut s0 = accumulator; let mut s1 = seeds[1]; // XOR the input into s0, s1, then multiply and fold. if len >= 8 { s0 ^= u64::from_ne_bytes(bytes[0..8].try_into().unwrap()); s1 ^= u64::from_ne_bytes(bytes[len - 8..].try_into().unwrap()); } else if len >= 4 { s0 ^= u32::from_ne_bytes(bytes[0..4].try_into().unwrap()) as u64; s1 ^= u32::from_ne_bytes(bytes[len - 4..].try_into().unwrap()) as u64; } else if len > 0 { let lo = bytes[0]; let mid = bytes[len / 2]; let hi = bytes[len - 1]; s0 ^= lo as u64; s1 ^= ((hi as u64) << 8) | mid as u64; } folded_multiply(s0, s1) } /// Load 8 bytes into a u64 word at the given offset. /// /// # Safety /// You must ensure that offset + 8 <= bytes.len(). #[inline(always)] unsafe fn load(bytes: &[u8], offset: usize) -> u64 { // In most (but not all) cases this unsafe code is not necessary to avoid // the bounds checks in the below code, but the register allocation became // worse if I replaced those calls which could be replaced with safe code. unsafe { bytes.as_ptr().add(offset).cast::().read_unaligned() } } /// Hashes strings > 16 bytes. /// /// # Safety /// v.len() must be > 16 bytes. #[cold] #[inline(never)] unsafe fn hash_bytes_long(mut v: &[u8], accumulator: u64, seeds: &[u64; 6]) -> u64 { let mut s0 = accumulator; let mut s1 = s0.wrapping_add(seeds[1]); if v.len() > 128 { cold_path(); let mut s2 = s0.wrapping_add(seeds[2]); let mut s3 = s0.wrapping_add(seeds[3]); if v.len() > 256 { cold_path(); let mut s4 = s0.wrapping_add(seeds[4]); let mut s5 = s0.wrapping_add(seeds[5]); loop { unsafe { // SAFETY: we checked the length is > 256, we index at most v[..96]. s0 = folded_multiply(load(v, 0) ^ s0, load(v, 48) ^ seeds[0]); s1 = folded_multiply(load(v, 8) ^ s1, load(v, 56) ^ seeds[0]); s2 = folded_multiply(load(v, 16) ^ s2, load(v, 64) ^ seeds[0]); s3 = folded_multiply(load(v, 24) ^ s3, load(v, 72) ^ seeds[0]); s4 = folded_multiply(load(v, 32) ^ s4, load(v, 80) ^ seeds[0]); s5 = folded_multiply(load(v, 40) ^ s5, load(v, 88) ^ seeds[0]); } v = &v[96..]; if v.len() <= 256 { break; } } s0 ^= s4; s1 ^= s5; } loop { unsafe { // SAFETY: we checked the length is > 128, we index at most v[..64]. s0 = folded_multiply(load(v, 0) ^ s0, load(v, 32) ^ seeds[0]); s1 = folded_multiply(load(v, 8) ^ s1, load(v, 40) ^ seeds[0]); s2 = folded_multiply(load(v, 16) ^ s2, load(v, 48) ^ seeds[0]); s3 = folded_multiply(load(v, 24) ^ s3, load(v, 56) ^ seeds[0]); } v = &v[64..]; if v.len() <= 128 { break; } } s0 ^= s2; s1 ^= s3; } let len = v.len(); unsafe { // SAFETY: our precondition ensures our length is at least 16, and the // above loops do not reduce the length under that. This protects our // first iteration of this loop, the further iterations are protected // directly by the checks on len. s0 = folded_multiply(load(v, 0) ^ s0, load(v, len - 16) ^ seeds[0]); s1 = folded_multiply(load(v, 8) ^ s1, load(v, len - 8) ^ seeds[0]); if len >= 32 { s0 = folded_multiply(load(v, 16) ^ s0, load(v, len - 32) ^ seeds[0]); s1 = folded_multiply(load(v, 24) ^ s1, load(v, len - 24) ^ seeds[0]); if len >= 64 { s0 = folded_multiply(load(v, 32) ^ s0, load(v, len - 48) ^ seeds[0]); s1 = folded_multiply(load(v, 40) ^ s1, load(v, len - 40) ^ seeds[0]); if len >= 96 { s0 = folded_multiply(load(v, 48) ^ s0, load(v, len - 64) ^ seeds[0]); s1 = folded_multiply(load(v, 56) ^ s1, load(v, len - 56) ^ seeds[0]); } } } } s0 ^ s1 } foldhash-0.2.0/src/quality.rs000064400000000000000000000117311046102023000142430ustar 00000000000000//! The foldhash implementation optimized for quality. use core::hash::{BuildHasher, Hasher}; use crate::seed::SharedSeed; use crate::{fast, folded_multiply, ARBITRARY0, ARBITRARY4}; /// A [`Hasher`] instance implementing foldhash, optimized for quality. /// /// While you can create one directly with [`FoldHasher::with_seed`], you /// most likely want to use [`RandomState`], [`SeedableRandomState`] or /// [`FixedState`] to create [`FoldHasher`]s. #[derive(Clone)] pub struct FoldHasher<'a> { pub(crate) inner: fast::FoldHasher<'a>, } impl<'a> FoldHasher<'a> { /// Initializes this [`FoldHasher`] with the given per-hasher seed and /// [`SharedSeed`]. #[inline(always)] pub const fn with_seed(per_hasher_seed: u64, shared_seed: &'a SharedSeed) -> FoldHasher<'a> { FoldHasher { inner: fast::FoldHasher::with_seed(per_hasher_seed, shared_seed), } } } impl<'a> Hasher for FoldHasher<'a> { #[inline(always)] fn write(&mut self, bytes: &[u8]) { self.inner.write(bytes); } #[inline(always)] fn write_u8(&mut self, i: u8) { self.inner.write_u8(i); } #[inline(always)] fn write_u16(&mut self, i: u16) { self.inner.write_u16(i); } #[inline(always)] fn write_u32(&mut self, i: u32) { self.inner.write_u32(i); } #[inline(always)] fn write_u64(&mut self, i: u64) { self.inner.write_u64(i); } #[inline(always)] fn write_u128(&mut self, i: u128) { self.inner.write_u128(i); } #[inline(always)] fn write_usize(&mut self, i: usize) { self.inner.write_usize(i); } #[cfg(feature = "nightly")] #[inline(always)] fn write_str(&mut self, s: &str) { self.inner.write_str(s); } #[inline(always)] fn finish(&self) -> u64 { folded_multiply(self.inner.finish(), ARBITRARY0) } } /// A [`BuildHasher`] for [`quality::FoldHasher`](FoldHasher) that is randomly initialized. #[derive(Clone, Default, Debug)] pub struct RandomState { inner: fast::RandomState, } impl BuildHasher for RandomState { type Hasher = FoldHasher<'static>; #[inline(always)] fn build_hasher(&self) -> FoldHasher<'static> { FoldHasher { inner: self.inner.build_hasher(), } } } /// A [`BuildHasher`] for [`quality::FoldHasher`](FoldHasher) that is randomly /// initialized by default, but can also be initialized with a specific seed. /// /// This can be useful for e.g. testing, but the downside is that this type /// has a size of 16 bytes rather than the 8 bytes [`RandomState`] is. #[derive(Clone, Default, Debug)] pub struct SeedableRandomState { inner: fast::SeedableRandomState, } impl SeedableRandomState { /// Generates a random [`SeedableRandomState`], similar to [`RandomState`]. #[inline(always)] pub fn random() -> Self { Self { inner: fast::SeedableRandomState::random(), } } /// Generates a fixed [`SeedableRandomState`], similar to [`FixedState`]. #[inline(always)] pub fn fixed() -> Self { Self { inner: fast::SeedableRandomState::fixed(), } } /// Generates a [`SeedableRandomState`] with the given per-hasher seed /// and [`SharedSeed`]. #[inline(always)] pub fn with_seed(per_hasher_seed: u64, shared_seed: &'static SharedSeed) -> Self { Self { // We do an additional folded multiply with the seed here for // the quality hash to ensure better independence between seed // and hash. inner: fast::SeedableRandomState::with_seed( folded_multiply(per_hasher_seed, ARBITRARY4), shared_seed, ), } } } impl BuildHasher for SeedableRandomState { type Hasher = FoldHasher<'static>; #[inline(always)] fn build_hasher(&self) -> FoldHasher<'static> { FoldHasher { inner: self.inner.build_hasher(), } } } /// A [`BuildHasher`] for [`quality::FoldHasher`](FoldHasher) that always has the same fixed seed. /// /// Not recommended unless you absolutely need determinism. #[derive(Clone, Default, Debug)] pub struct FixedState { inner: fast::FixedState, } impl FixedState { /// Creates a [`FixedState`] with the given per-hasher seed. #[inline(always)] pub const fn with_seed(per_hasher_seed: u64) -> Self { Self { // We do an additional folded multiply with the seed here for // the quality hash to ensure better independence between seed // and hash. If the seed is zero the folded multiply is zero, // preserving with_seed(0) == default(). inner: fast::FixedState::with_seed(folded_multiply(per_hasher_seed, ARBITRARY4)), } } } impl BuildHasher for FixedState { type Hasher = FoldHasher<'static>; #[inline(always)] fn build_hasher(&self) -> FoldHasher<'static> { FoldHasher { inner: self.inner.build_hasher(), } } } foldhash-0.2.0/src/seed.rs000064400000000000000000000236711046102023000135010ustar 00000000000000// These constants may end up unused depending on platform support. #[allow(unused)] use crate::{ARBITRARY1, ARBITRARY5}; use super::{ folded_multiply, ARBITRARY10, ARBITRARY11, ARBITRARY2, ARBITRARY6, ARBITRARY7, ARBITRARY8, ARBITRARY9, }; /// Used for FixedState, and RandomState if atomics for dynamic init are unavailable. const FIXED_GLOBAL_SEED: SharedSeed = SharedSeed { seeds: [ ARBITRARY6, ARBITRARY7, ARBITRARY8, ARBITRARY9, ARBITRARY10, ARBITRARY11, ], }; pub(crate) fn gen_per_hasher_seed() -> u64 { // We initialize the per-hasher seed with the stack pointer to ensure // different threads have different seeds, with as side benefit that // stack address randomization gives us further non-determinism. let mut per_hasher_seed = 0; let stack_ptr = core::ptr::addr_of!(per_hasher_seed) as u64; per_hasher_seed = stack_ptr; // If we have the standard library available we use a thread-local // state to ensure RandomStates are different with high probability, // even if the call stack is the same. #[cfg(feature = "std")] { use std::cell::Cell; thread_local! { static PER_HASHER_NONDETERMINISM: Cell = const { Cell::new(0) }; } PER_HASHER_NONDETERMINISM.with(|cell| { let nondeterminism = cell.get(); per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); cell.set(per_hasher_seed); }) }; // If we don't have the standard library we instead use a global // atomic instead of a thread-local state. // // PER_HASHER_NONDETERMINISM is loaded and updated in a racy manner, // but this doesn't matter in practice - it is impossible that two // different threads have the same stack location, so they'll almost // surely generate different seeds, and provide a different possible // update for PER_HASHER_NONDETERMINISM. If we would use a proper // fetch_add atomic update then there is a larger chance of // problematic contention. // // We use usize instead of 64-bit atomics for best platform support. #[cfg(not(feature = "std"))] { use core::sync::atomic::{AtomicUsize, Ordering}; static PER_HASHER_NONDETERMINISM: AtomicUsize = AtomicUsize::new(0); let nondeterminism = PER_HASHER_NONDETERMINISM.load(Ordering::Relaxed) as u64; per_hasher_seed = folded_multiply(per_hasher_seed, ARBITRARY1 ^ nondeterminism); PER_HASHER_NONDETERMINISM.store(per_hasher_seed as usize, Ordering::Relaxed); } // One extra mixing step to ensure good random bits. folded_multiply(per_hasher_seed, ARBITRARY2) } /// A random seed intended to be shared by many different foldhash instances. /// /// This seed is consumed by [`FoldHasher::with_seed`](crate::fast::FoldHasher::with_seed), /// and [`SeedableRandomState::with_seed`](crate::fast::SeedableRandomState::with_seed). #[derive(Clone, Debug)] pub struct SharedSeed { pub(crate) seeds: [u64; 6], } impl SharedSeed { /// Returns the globally shared randomly initialized [`SharedSeed`] as used /// by [`RandomState`](crate::fast::RandomState). #[inline(always)] pub fn global_random() -> &'static SharedSeed { global::GlobalSeed::new().get() } /// Returns the globally shared fixed [`SharedSeed`] as used /// by [`FixedState`](crate::fast::FixedState). #[inline(always)] pub const fn global_fixed() -> &'static SharedSeed { &FIXED_GLOBAL_SEED } /// Generates a new [`SharedSeed`] from a single 64-bit seed. /// /// Note that this is somewhat expensive so it is suggested to re-use the /// [`SharedSeed`] as much as possible, using the per-hasher seed to /// differentiate between hash instances. pub const fn from_u64(seed: u64) -> Self { macro_rules! mix { ($x: expr) => { folded_multiply($x, ARBITRARY5) }; } let seed_a = mix!(mix!(mix!(seed))); let seed_b = mix!(mix!(mix!(seed_a))); let seed_c = mix!(mix!(mix!(seed_b))); let seed_d = mix!(mix!(mix!(seed_c))); let seed_e = mix!(mix!(mix!(seed_d))); let seed_f = mix!(mix!(mix!(seed_e))); // Zeroes form a weak-point for the multiply-mix, and zeroes tend to be // a common input. So we want our global seeds that are XOR'ed with the // input to always be non-zero. To also ensure there is always a good spread // of bits, we give up 3 bits of entropy and simply force some bits on. const FORCED_ONES: u64 = (1 << 63) | (1 << 31) | 1; Self { seeds: [ seed_a | FORCED_ONES, seed_b | FORCED_ONES, seed_c | FORCED_ONES, seed_d | FORCED_ONES, seed_e | FORCED_ONES, seed_f | FORCED_ONES, ], } } } #[cfg(target_has_atomic = "8")] mod global { use super::*; use core::cell::UnsafeCell; use core::sync::atomic::{AtomicU8, Ordering}; fn generate_global_seed() -> SharedSeed { let mix = |seed: u64, x: u64| folded_multiply(seed ^ x, ARBITRARY5); // Use address space layout randomization as our main randomness source. // This isn't great, but we don't advertise HashDoS resistance in the first // place. This is a whole lot better than nothing, at near zero cost with // no dependencies. let mut seed = 0; let stack_ptr = &seed as *const _; let func_ptr = generate_global_seed; let static_ptr = &GLOBAL_SEED_STORAGE as *const _; seed = mix(seed, stack_ptr as usize as u64); seed = mix(seed, func_ptr as usize as u64); seed = mix(seed, static_ptr as usize as u64); // If we have the standard library available, augment entropy with the // current time and an address from the allocator. #[cfg(feature = "std")] { #[cfg(not(any( miri, all(target_family = "wasm", target_os = "unknown"), target_os = "zkvm" )))] if let Ok(duration) = std::time::UNIX_EPOCH.elapsed() { seed = mix(seed, duration.subsec_nanos() as u64); seed = mix(seed, duration.as_secs()); } let box_ptr = &*Box::new(0u8) as *const _; seed = mix(seed, box_ptr as usize as u64); } SharedSeed::from_u64(seed) } // Now all the below code purely exists to cache the above seed as // efficiently as possible. Even if we weren't a no_std crate and had access to // OnceLock, we don't want to check whether the global is set each time we // hash an object, so we hand-roll a global storage where type safety allows us // to assume the storage is initialized after construction. struct GlobalSeedStorage { state: AtomicU8, seed: UnsafeCell, } const UNINIT: u8 = 0; const LOCKED: u8 = 1; const INIT: u8 = 2; // SAFETY: we only mutate the UnsafeCells when state is in the thread-exclusive // LOCKED state, and only read the UnsafeCells when state is in the // once-achieved-eternally-preserved state INIT. unsafe impl Sync for GlobalSeedStorage {} static GLOBAL_SEED_STORAGE: GlobalSeedStorage = GlobalSeedStorage { state: AtomicU8::new(UNINIT), seed: UnsafeCell::new(SharedSeed { seeds: [0; 6] }), }; /// An object representing an initialized global seed. /// /// Does not actually store the seed inside itself, it is a zero-sized type. /// This prevents inflating the RandomState size and in turn HashMap's size. #[derive(Copy, Clone, Debug)] pub struct GlobalSeed { // So we can't accidentally type GlobalSeed { } within this crate. _no_accidental_unsafe_init: (), } impl GlobalSeed { #[inline(always)] pub fn new() -> Self { if GLOBAL_SEED_STORAGE.state.load(Ordering::Acquire) != INIT { Self::init_slow() } Self { _no_accidental_unsafe_init: (), } } #[cold] #[inline(never)] fn init_slow() { // Generate seed outside of critical section. let seed = generate_global_seed(); loop { match GLOBAL_SEED_STORAGE.state.compare_exchange_weak( UNINIT, LOCKED, Ordering::Acquire, Ordering::Acquire, ) { Ok(_) => unsafe { // SAFETY: we just acquired an exclusive lock. *GLOBAL_SEED_STORAGE.seed.get() = seed; GLOBAL_SEED_STORAGE.state.store(INIT, Ordering::Release); return; }, Err(INIT) => return, // Yes, it's a spin loop. We need to support no_std (so no easy // access to proper locks), this is a one-time-per-program // initialization, and the critical section is only a few // store instructions, so it'll be fine. _ => core::hint::spin_loop(), } } } #[inline(always)] pub fn get(self) -> &'static SharedSeed { // SAFETY: our constructor ensured we are in the INIT state and thus // this raw read does not race with any write. unsafe { &*GLOBAL_SEED_STORAGE.seed.get() } } } } #[cfg(not(target_has_atomic = "8"))] mod global { use super::*; #[derive(Copy, Clone, Debug)] pub struct GlobalSeed {} impl GlobalSeed { #[inline(always)] pub fn new() -> Self { Self {} } #[inline(always)] pub fn get(self) -> &'static SharedSeed { &super::FIXED_GLOBAL_SEED } } } pub(crate) use global::GlobalSeed;