space-0.18.0/.cargo/config000064400000000000000000000000570072674642500134000ustar 00000000000000[build] rustflags = ["-C", "target-cpu=native"]space-0.18.0/.cargo_vcs_info.json0000644000000001120000000000100122070ustar { "git": { "sha1": "bd7318781f9e12e2654ac626b61233d2461fb4eb" } } space-0.18.0/.github/workflows/lints.yml000064400000000000000000000022140072674642500163050ustar 00000000000000on: [push, pull_request] name: lints jobs: rustfmt: name: rustfmt runs-on: ubuntu-latest steps: - name: Checkout sources uses: actions/checkout@v2 - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true components: rustfmt - name: Run cargo fmt uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check clippy: name: clippy runs-on: ubuntu-latest strategy: matrix: features: - --all-features - --no-default-features --features alloc - --no-default-features steps: - name: Checkout sources uses: actions/checkout@v2 - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true components: clippy - name: Run cargo clippy uses: actions-rs/cargo@v1 with: command: clippy args: ${{ matrix.features }} -- -D warningsspace-0.18.0/.github/workflows/no-std.yml000064400000000000000000000011200072674642500163530ustar 00000000000000# This builds for armv7a-none-eabi to ensure we can build with no-std. # It will fail if there is a dependency on std, as armv7a-none-eabi has no std. on: [push, pull_request] name: no-std jobs: build: name: no-std runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: toolchain: nightly target: armv7a-none-eabi override: true - uses: actions-rs/cargo@v1 with: command: rustc args: --target=armv7a-none-eabi --manifest-path=ensure_no_std/Cargo.tomlspace-0.18.0/.github/workflows/tests.yml000064400000000000000000000007370072674642500163260ustar 00000000000000on: [push, pull_request] name: tests jobs: tests: name: tests runs-on: ubuntu-latest steps: - name: Checkout sources uses: actions/checkout@v2 - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true - name: Run cargo test uses: actions-rs/cargo@v1 with: command: test args: --all-featuresspace-0.18.0/.gitignore000064400000000000000000000000370072674642500130250ustar 00000000000000/target /Cargo.lock **/*.rs.bk space-0.18.0/Cargo.toml0000644000000030360000000000100102150ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "space" version = "0.18.0" authors = ["Geordon Worley ", "Yuhan Liin "] description = "A library providing abstractions for spatial datastructures and search" documentation = "https://docs.rs/space/" readme = "README.md" keywords = ["space", "knn", "ann", "search"] categories = ["no-std", "computer-vision", "data-structures"] license = "MIT" repository = "https://github.com/rust-cv/space" [package.metadata.docs.rs] all-features = true [profile.bench] lto = true codegen-units = 1 [[test]] name = "linear_knn" required-features = ["alloc"] [[bench]] name = "knn" harness = false required-features = ["simd-hamming", "alloc"] [dependencies.doc-comment] version = "0.3.3" [dependencies.num-traits] version = "0.2.14" default-features = false [dependencies.serde] version = "1.0.125" features = ["derive"] optional = true [dev-dependencies.criterion] version = "0.3.4" [dev-dependencies.rand_core] version = "0.6.2" [dev-dependencies.rand_pcg] version = "0.3.0" [features] alloc = [] default = ["alloc"] space-0.18.0/Cargo.toml.orig000064400000000000000000000020540072674642500137250ustar 00000000000000[package] name = "space" version = "0.18.0" authors = ["Geordon Worley ", "Yuhan Liin "] edition = "2018" description = "A library providing abstractions for spatial datastructures and search" documentation = "https://docs.rs/space/" repository = "https://github.com/rust-cv/space" keywords = ["space", "knn", "ann", "search"] categories = ["no-std", "computer-vision", "data-structures"] license = "MIT" readme = "README.md" [features] default = ["alloc"] # Enables the usage of LinearKnn, which requires Vec. alloc = [] [dependencies] serde = { version = "1.0.125", features = ["derive"], optional = true } num-traits = { version = "0.2.14", default-features = false } doc-comment = "0.3.3" [dev-dependencies] criterion = "0.3.4" rand_core = "0.6.2" rand_pcg = "0.3.0" [[bench]] name = "knn" harness = false required-features = ["simd-hamming", "alloc"] [[test]] name = "linear_knn" required-features = ["alloc"] [profile.bench] lto = true codegen-units = 1 [package.metadata.docs.rs] all-features = true space-0.18.0/LICENSE000064400000000000000000000020500072674642500120370ustar 00000000000000MIT License Copyright (c) 2021 Rust CV Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. space-0.18.0/README.md000064400000000000000000000056510072674642500123230ustar 00000000000000# space [![Discord][dci]][dcl] [![Crates.io][ci]][cl] ![MIT/Apache][li] [![docs.rs][di]][dl] ![LoC][lo] ![Tests][btl] ![Lints][bll] ![no_std][bnl] [ci]: https://img.shields.io/crates/v/space.svg [cl]: https://crates.io/crates/space/ [li]: https://img.shields.io/crates/l/specs.svg?maxAge=2592000 [di]: https://docs.rs/space/badge.svg [dl]: https://docs.rs/space/ [lo]: https://tokei.rs/b1/github/rust-cv/space?category=code [dci]: https://img.shields.io/discord/550706294311485440.svg?logo=discord&colorB=7289DA [dcl]: https://discord.gg/d32jaam [btl]: https://github.com/rust-cv/space/workflows/tests/badge.svg [bll]: https://github.com/rust-cv/space/workflows/lints/badge.svg [bnl]: https://github.com/rust-cv/space/workflows/no-std/badge.svg A library providing abstractions for spatial datastructures and search If you use a kNN datastructure library and would like to have the `Knn` trait implemented on its types natively, please raise an issue on that library. Similarly, crates which define datapoints with specific distance metrics, and not general linear algebra crates, can implement the `MetricPoint` trait. See the [bitarray](https://crates.io/crates/bitarray) crate for an implementation of `MetricPoint` using hamming distance (with optional, though unstable, 512-bit SIMD support, and always-on 64-bit popcnt instruction support). ## Usage ```rust use space::Metric; struct Hamming; impl Metric for Hamming { type Unit = u8; fn distance(&self, &a: &u8, &b: &u8) -> Self::Unit { (a ^ b).count_ones() as u8 } } ``` ```rust use space::{Knn, KnnFromBatch, LinearKnn, Metric, Neighbor}; #[derive(Default)] struct Hamming; impl Metric for Hamming { type Unit = u8; fn distance(&self, &a: &u8, &b: &u8) -> Self::Unit { (a ^ b).count_ones() as u8 } } let data = vec![ (0b1010_1010, 12), (0b1111_1111, 13), (0b0000_0000, 14), (0b1111_0000, 16), (0b0000_1111, 10), ]; let search: LinearKnn = KnnFromBatch::from_batch(data.iter()); assert_eq!( &search.knn(&0b0101_0000, 3), &[ ( Neighbor { index: 2, distance: 2 }, &data[2].0, &data[2].1 ), ( Neighbor { index: 3, distance: 2 }, &data[3].0, &data[3].1 ), ( Neighbor { index: 0, distance: 6 }, &data[0].0, &data[0].1 ) ] ); ``` ## Benchmarks To run the benchmarks, use the following command: ```bash cargo bench --all-features ``` If you do not pass `--all-features`, the benchmark wont run. Due to [this issue](https://github.com/rust-lang/cargo/issues/2911), the SIMD feature must be enabled. Cargo offers no way to automatically bring the SIMD feature in for the benchmark, and thus it must be passed at the command line. space-0.18.0/benches/knn.rs000064400000000000000000000020000072674642500135700ustar 00000000000000use criterion::{criterion_group, criterion_main, Criterion}; use rand_core::{RngCore, SeedableRng}; use rand_pcg::Pcg64; use space::{Bits512, Knn, MetricPoint}; fn criterion_benchmark(c: &mut Criterion) { let mut rng = Pcg64::from_seed([1; 32]); let mut gen = || { let mut feature = Bits512([0; 64]); rng.fill_bytes(&mut *feature); feature }; let search = gen(); let data = (0..16384).map(|_| gen()).collect::>(); c.bench_function("space: 4-nn in 16384", |b| { b.iter(|| space::LinearKnn(data.iter()).knn(&search, 4).len()) }) .bench_function("min_by_key: 1-nn in 16384", |b| { b.iter(|| { data.iter() .map(|f| f.distance(&search)) .enumerate() .min_by_key(|&(_, d)| d) }) }) .bench_function("space: 1-nn in 16384", |b| { b.iter(|| space::LinearKnn(data.iter()).nn(&search)) }); } criterion_group!(benches, criterion_benchmark); criterion_main!(benches); space-0.18.0/rustfmt.toml000064400000000000000000000000350072674642500134340ustar 00000000000000imports_granularity = "Crate"space-0.18.0/src/lib.rs000064400000000000000000000261520072674642500127460ustar 00000000000000//! See the [Crates.io page](https://crates.io/crates/space) for the README. #![no_std] doc_comment::doctest!("../README.md"); #[cfg(feature = "alloc")] extern crate alloc; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; #[cfg(feature = "alloc")] use alloc::vec::Vec; use num_traits::Zero; /// This trait is implemented for metrics that form a metric space. /// It is primarily used for keys in nearest neighbor searches. /// When implementing this trait, it is recommended to choose the smallest unsigned integer that /// represents your metric space, but you may also use a float so long as you wrap it in /// a newtype that enforces the `Ord + Zero + Copy` trait bounds. /// It is recommended to use /// [`NoisyFloat`](https://docs.rs/noisy_float/0.2.0/noisy_float/struct.NoisyFloat.html) /// for this purpose, as it implements the trait bound. /// /// It is important that all metrics that implement this trait satisfy /// the [triangle inequality](https://en.wikipedia.org/wiki/Triangle_inequality). /// This requirement basically means that the sum of distances that start /// at a point A and end at a point B can never be less than the distance /// from A to B directly. Note that the metric is required to be an unsigned integer, /// as distances can only be positive and must be fully ordered. /// It is also required that two overlapping points (the same point in space) must return /// a distance of [`Zero::zero`]. /// /// Floating point numbers can be converted to integer metrics by being interpreted as integers by design, /// although some special patterns (like NaN) do not fit into this model. To be interpreted as an unsigned /// integer, the float must be positive zero, subnormal, normal, or positive infinity. Any NaN needs /// to be dealt with before converting into a metric, as they do NOT satisfy the triangle inequality, /// and will lead to errors. You may want to check for positive infinity as well depending on your use case. /// You must remove NaNs if you convert to integers, but you must also remove NaNs if you use an ordered /// wrapper like [`NoisyFloat`](https://docs.rs/noisy_float/0.2.0/noisy_float/struct.NoisyFloat.html). /// Be careful if you use a wrapper like /// [`FloatOrd`](https://docs.rs/float-ord/0.3.2/float_ord/struct.FloatOrd.html) which does not /// force you to remove NaNs. When implementing a metric, you must be sure that NaNs are not allowed, because /// they may cause nearest neighbor algorithms to panic. /// /// ## Example /// /// ``` /// struct AbsDiff; /// /// impl space::Metric for AbsDiff { /// type Unit = u64; /// /// fn distance(&self, &a: &f64, &b: &f64) -> Self::Unit { /// let delta = (a - b).abs(); /// debug_assert!(!delta.is_nan()); /// delta.to_bits() /// } /// } /// ``` pub trait Metric

{ type Unit: Ord + Zero + Copy; fn distance(&self, a: &P, b: &P) -> Self::Unit; } /// For k-NN algorithms to return neighbors. #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Neighbor { /// Index of the neighbor in the search space. pub index: Ix, /// The distance of the neighbor from the search feature. pub distance: Unit, } /// Implement this trait on data structures (or wrappers) which perform KNN searches. /// The data structure should maintain a key-value mapping between neighbour points and data /// values. /// /// The lifetime on the trait will be removed once GATs are stabilized. pub trait Knn<'a> { type Ix: Copy; type Point: 'a; type Value: 'a; type Metric: Metric; type KnnIter: IntoIterator< Item = ( Neighbor<>::Unit, Self::Ix>, &'a Self::Point, &'a Self::Value, ), >; /// Get a point using a neighbor index returned by [`Knn::knn`] or [`Knn::nn`]. /// /// This should only be used directly after one of the mentioned methods are called to retrieve /// a point associated with a neighbor, and will panic if the index is incorrect due to /// mutating the data structure thereafter. The index is only valid up until the next mutation. fn point(&self, index: Self::Ix) -> &'a Self::Point; /// Get a value using a neighbor index returned by [`Knn::knn`] or [`Knn::nn`]. /// /// This should only be used directly after one of the mentioned methods are called to retrieve /// a value associated with a neighbor, and will panic if the index is incorrect due to /// mutating the data structure thereafter. The index is only valid up until the next mutation. fn value(&self, index: Self::Ix) -> &'a Self::Value; /// Get `num` nearest neighbor keys and values of `target`. /// /// For many KNN search algorithms, the returned neighbors are approximate, and may not /// be the actual nearest neighbors. fn knn(&'a self, query: &Self::Point, num: usize) -> Self::KnnIter; /// Get the nearest neighbor key and values of `target`. /// /// For many KNN search algorithms, the returned neighbors are approximate, and may not /// be the actual nearest neighbors. #[allow(clippy::type_complexity)] fn nn( &'a self, query: &Self::Point, ) -> Option<( Neighbor<>::Unit, Self::Ix>, &'a Self::Point, &'a Self::Value, )>; } /// Implement this trait on data structures (or wrappers) which perform range queries. /// The data structure should maintain a key-value mapping between neighbour points and data /// values. /// /// The lifetime on the trait will be removed once GATs are stabilized. pub trait RangeQuery<'a>: Knn<'a> { type RangeIter: IntoIterator< Item = ( Neighbor<>::Unit, Self::Ix>, &'a Self::Point, &'a Self::Value, ), >; /// Get all the points in the data structure that lie within a specified range of the query /// point. The points may or may not be sorted by distance. #[allow(clippy::type_complexity)] fn range_query( &self, query: &Self::Point, range: >::Unit, ) -> Self::RangeIter; } /// Implement this trait on KNN search data structures that map keys to values and which you can /// insert new (key, value) pairs. pub trait KnnInsert<'a>: Knn<'a> { /// Insert a (key, value) pair to the [`KnnMap`]. /// /// Returns the index type fn insert(&mut self, key: Self::Point, value: Self::Value) -> Self::Ix; } /// Create a data structure from a metric and a batch of data points, such as a vector. /// For many algorithms, using batch initialization yields better results than inserting the points /// one at a time. pub trait KnnFromMetricAndBatch { fn from_metric_and_batch(metric: M, batch: B) -> Self; } /// Create a data structure from a batch of data points, such as a vector. /// For many algorithms, using batch initialization yields better results than inserting the points /// one at a time. pub trait KnnFromBatch: KnnFromMetricAndBatch { fn from_batch(batch: B) -> Self; } impl KnnFromBatch for T where T: KnnFromMetricAndBatch, M: Default, { fn from_batch(batch: B) -> Self { Self::from_metric_and_batch(M::default(), batch) } } /// Performs a linear knn search by iterating over everything in the space /// and performing a binary search on running set of neighbors. /// /// ## Example /// /// ``` /// use space::{Knn, LinearKnn, Metric, Neighbor, KnnFromBatch}; /// /// #[derive(Default)] /// struct Hamming; /// /// impl Metric for Hamming { /// type Unit = u8; /// /// fn distance(&self, &a: &u8, &b: &u8) -> Self::Unit { /// (a ^ b).count_ones() as u8 /// } /// } /// /// let data = vec![ /// (0b1010_1010, 12), /// (0b1111_1111, 13), /// (0b0000_0000, 14), /// (0b1111_0000, 16), /// (0b0000_1111, 10), /// ]; /// /// let search: LinearKnn = KnnFromBatch::from_batch(data.iter()); /// /// assert_eq!( /// &search.knn(&0b0101_0000, 2), /// &[ /// (Neighbor { index: 2, distance: 2 }, &data[2].0, &data[2].1), /// (Neighbor { index: 3, distance: 2 }, &data[3].0, &data[3].1), /// ] /// ); /// ``` #[cfg(feature = "alloc")] pub struct LinearKnn { pub metric: M, pub points: I, } #[cfg(feature = "alloc")] impl<'a, M: Metric

, I, P: 'a, V: 'a> Knn<'a> for LinearKnn where I: Iterator + Clone, { type Ix = usize; type Metric = M; type Point = P; type Value = V; type KnnIter = Vec<(Neighbor, &'a P, &'a V)>; fn point(&self, index: Self::Ix) -> &'a Self::Point { &self.points.clone().nth(index).unwrap().0 } fn value(&self, index: Self::Ix) -> &'a Self::Value { &self.points.clone().nth(index).unwrap().1 } fn knn(&'a self, query: &Self::Point, num: usize) -> Self::KnnIter { // Create an iterator mapping the dataset into `Neighbor`. let mut dataset = self.points.clone().enumerate().map(|(index, (pt, val))| { ( Neighbor { index, distance: self.metric.distance(pt, query), }, pt, val, ) }); // Create a vector with the correct capacity in advance. let mut neighbors = Vec::with_capacity(num); // Extend the vector with the first `num` neighbors. neighbors.extend((&mut dataset).take(num)); // Sort the vector by the neighbor distance. neighbors.sort_unstable_by_key(|n| n.0.distance); // Iterate over each additional neighbor. for point in dataset { // Find the position at which it would be inserted. let position = neighbors.partition_point(|n| n.0.distance <= point.0.distance); // If the point is closer than at least one of the points already in `neighbors`, add it // into its sorted position. if position != num { neighbors.pop(); neighbors.insert(position, point); } } neighbors } #[allow(clippy::type_complexity)] fn nn( &self, query: &Self::Point, ) -> Option<( Neighbor<>::Unit, Self::Ix>, &'a Self::Point, &'a Self::Value, )> { // Map the input iterator into neighbors and then find the smallest one by distance. self.points .clone() .enumerate() .map(|(index, (pt, val))| { ( Neighbor { index, distance: self.metric.distance(pt, query), }, pt, val, ) }) .min_by_key(|n| n.0.distance) } } #[cfg(feature = "alloc")] impl<'a, M, I> KnnFromMetricAndBatch for LinearKnn where M: Default, { fn from_metric_and_batch(metric: M, points: I) -> Self { Self { metric, points } } } space-0.18.0/tests/linear_knn.rs000064400000000000000000000022400072674642500146630ustar 00000000000000use space::{Knn, KnnFromBatch, LinearKnn, Metric, Neighbor}; #[derive(Default)] struct Hamming; impl Metric for Hamming { type Unit = u8; fn distance(&self, &a: &u8, &b: &u8) -> Self::Unit { (a ^ b).count_ones() as u8 } } #[test] fn test_linear_knn() { let data = vec![ (0b1010_1010, 12), (0b1111_1111, 13), (0b0000_0000, 14), (0b1111_0000, 16), (0b0000_1111, 10), ]; let search: LinearKnn = KnnFromBatch::from_batch(data.iter()); assert_eq!( &search.knn(&0b0101_0000, 3), &[ ( Neighbor { index: 2, distance: 2 }, &data[2].0, &data[2].1 ), ( Neighbor { index: 3, distance: 2 }, &data[3].0, &data[3].1 ), ( Neighbor { index: 0, distance: 6 }, &data[0].0, &data[0].1 ) ] ); }