tree-edit-distance-0.4.0/.cargo_vcs_info.json0000644000000001360000000000100145070ustar { "git": { "sha1": "872c9cb1b44612b009b2f1d342fcd84efe1ba2c0" }, "path_in_vcs": "" }tree-edit-distance-0.4.0/.github/dependabot.yml000064400000000000000000000002021046102023000174610ustar 00000000000000version: 2 updates: - package-ecosystem: "cargo" directory: "/" schedule: interval: "weekly" day: "saturday"tree-edit-distance-0.4.0/.github/workflows/cd.yml000064400000000000000000000006051046102023000200060ustar 00000000000000name: CD on: release: types: [published] jobs: publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: stable - uses: actions-rs/cargo@v1 with: command: publish args: --token ${{ secrets.CARGO_REGISTRY_TOKEN }} tree-edit-distance-0.4.0/.github/workflows/ci.yml000064400000000000000000000072441046102023000200210ustar 00000000000000name: CI on: push: branches: - master pull_request: branches: - master schedule: - cron: '0 6 * * SAT' jobs: fmt: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: stable components: rustfmt - uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check clippy: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: stable components: clippy - uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} args: --all-targets -- -D warnings audit: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/audit-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} check: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: stable - uses: actions-rs/cargo@v1 with: command: check args: --all doc: runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: stable - uses: actions-rs/cargo@v1 with: command: doc args: --all-features --no-deps test: needs: [fmt, clippy, audit, check, doc] runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macOS-latest] rust: [stable, nightly] steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: ${{ matrix.rust }} - uses: actions-rs/cargo@v1 with: command: test - uses: actions-rs/cargo@v1 with: command: test args: --no-default-features miri: needs: [test] runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: nightly components: miri, rust-src - uses: actions-rs/cargo@v1 with: command: miri args: setup - uses: actions-rs/cargo@v1 with: command: miri args: test --all-features env: MIRIFLAGS: "-Zmiri-disable-isolation" PROPTEST_CASES: 1 sanitize: needs: [test] runs-on: ubuntu-latest strategy: fail-fast: false matrix: sanitizer: [address, leak, memory, thread] steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: nightly components: rust-src - uses: actions-rs/cargo@v1 with: command: test args: --tests --lib --all-features --target x86_64-unknown-linux-gnu -Z build-std env: RUSTFLAGS: "-Z sanitizer=${{ matrix.sanitizer }}" coverage: needs: [test] runs-on: ubuntu-latest steps: - uses: actions/checkout@master - uses: actions-rs/toolchain@v1 with: override: true profile: minimal toolchain: stable - uses: actions-rs/tarpaulin@v0.1 with: args: --all-features --avoid-cfg-tarpaulin - uses: codecov/codecov-action@v2 with: token: ${{secrets.CODECOV_TOKEN}} fail_ci_if_error: true tree-edit-distance-0.4.0/.gitignore000064400000000000000000000000771046102023000152730ustar 00000000000000/target /proptest-regressions Cargo.lock cobertura.xml .vscode tree-edit-distance-0.4.0/CONTRIBUTING.md000064400000000000000000000017251046102023000155350ustar 00000000000000## Guidelines * All code submitted to TreeEditDistance via pull requests is assumed to be [licensed under the MIT][LICENSE]. * Every code change must be covered by unit tests, use [tarpaulin] to generate the code coverage report: + `cargo +nightly tarpaulin -v --all-features` * Besides `cargo test`, make sure [Clippy] and [rustfmt] checks also pass before submitting a pull request: + `cargo clippy --all-targets -- -D warnings` + `cargo fmt --all -- --check` * Follow [rustsec.org] advisories when introducing new dependencies, use [cargo-audit] to verify: + `cargo audit -D` [LICENSE]: https://github.com/brunocodutra/tree-edit-distance/blob/master/LICENSE [rustsec.org]: https://rustsec.org/advisories/ [Clippy]: https://github.com/rust-lang/rust-clippy#usage [rustfmt]: https://github.com/rust-lang/rustfmt#quick-start [tarpaulin]: https://github.com/xd009642/tarpaulin#usage [cargo-audit]: https://github.com/RustSec/cargo-audit#installation tree-edit-distance-0.4.0/Cargo.toml0000644000000033160000000000100125100ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "tree-edit-distance" version = "0.4.0" authors = ["Bruno Dutra "] description = "Find the lowest cost sequence of edits between two trees" readme = "README.md" keywords = [ "diff", "tree", "distance", "levenshtein", "lcs", ] categories = ["algorithms"] license = "MIT" repository = "https://github.com/brunocodutra/tree-edit-distance.git" [lib] bench = false [[bench]] name = "diff" harness = false [dependencies.arrayvec] version = "0.7.2" features = ["std"] default-features = false [dependencies.derive_more] version = "0.99.17" features = [ "add", "from", ] default-features = false [dependencies.itertools] version = "0.10.3" features = ["use_std"] default-features = false [dependencies.pathfinding] version = "3.0.9" default-features = false [dev-dependencies.assert_matches] version = "1.5.0" default-features = false [dev-dependencies.criterion] version = "0.4.0" features = ["rayon"] default-features = false [dev-dependencies.proptest] version = "1.0.0" features = ["std"] default-features = false [dev-dependencies.serde_json] version = "1.0.85" features = ["std"] default-features = false [dev-dependencies.test-strategy] version = "0.2.0" default-features = false tree-edit-distance-0.4.0/Cargo.toml.orig000064400000000000000000000022361046102023000161710ustar 00000000000000[package] name = "tree-edit-distance" version = "0.4.0" authors = ["Bruno Dutra "] edition = "2021" description = "Find the lowest cost sequence of edits between two trees" repository = "https://github.com/brunocodutra/tree-edit-distance.git" license = "MIT" readme = "README.md" keywords = ["diff", "tree", "distance", "levenshtein", "lcs"] categories = ["algorithms"] [dependencies] arrayvec = { version = "0.7.2", default-features = false, features = ["std"] } derive_more = { version = "0.99.17", default-features = false, features = ["add", "from"] } itertools = { version = "0.10.3", default-features = false, features = ["use_std"] } pathfinding = { version = "3.0.9", default-features = false } [dev-dependencies] assert_matches = { version = "1.5.0", default-features = false } criterion = { version = "0.4.0", default-features = false, features = ["rayon"] } proptest = { version = "1.0.0", default-features = false, features = ["std"] } serde_json = { version = "1.0.85", default-features = false, features = ["std"] } test-strategy = { version = "0.2.0", default-features = false } [lib] bench = false [[bench]] name = "diff" harness = false tree-edit-distance-0.4.0/LICENSE000064400000000000000000000020541046102023000143050ustar 00000000000000MIT License Copyright (c) 2020 Bruno Dutra Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. tree-edit-distance-0.4.0/README.md000064400000000000000000000032531046102023000145610ustar 00000000000000# TreeEditDistance [![docs.badge]][docs.home] [![codecov.badge]][codecov.home] This crate provides an algorithm to compute the lowest cost sequence of edits between two trees. It is based on a recursive generalized version of the [Levenshtein distance][levenshtein] for arbitrary sequences, where inserting/deleting nodes may have an arbitrary user-defined cost. ## Using TreeEditDistance TreeEditDistance is available on [crates.io], simply add it as a dependency in your `Cargo.toml`: ``` [dependencies] tree-edit-distance = "0.4" ``` The full API documentation is available on [docs.rs][docs.home] ## Contribution TreeEditDistance is an open source project and you're very welcome to contribute to this project by opening [issues] and/or [pull requests][pulls], see [CONTRIBUTING] for general guidelines. ## License TreeEditDistance is distributed under the terms of the MIT license, see [LICENSE] for details. [crates.io]: https://crates.io/crates/tree-edit-distance [docs.home]: https://docs.rs/tree-edit-distance [docs.badge]: https://docs.rs/tree-edit-distance/badge.svg [codecov.home]: https://codecov.io/gh/brunocodutra/tree-edit-distance [codecov.badge]: https://codecov.io/gh/brunocodutra/tree-edit-distance/branch/master/graph/badge.svg [issues]: https://github.com/brunocodutra/tree-edit-distance/issues [pulls]: https://github.com/brunocodutra/tree-edit-distance/pulls [LICENSE]: https://github.com/brunocodutra/tree-edit-distance/blob/master/LICENSE [CONTRIBUTING]: https://github.com/brunocodutra/tree-edit-distance/blob/master/CONTRIBUTING.md [levenshtein]: https://en.wikipedia.org/wiki/Levenshtein_distance tree-edit-distance-0.4.0/benches/diff.rs000064400000000000000000000031121046102023000161610ustar 00000000000000use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; use derive_more::From; use proptest::strategy::{LazyJust, ValueTree}; use proptest::{collection::vec, prelude::*, test_runner::TestRunner}; use tree_edit_distance::{diff, Node, Tree}; #[derive(Debug, From)] struct TreeNode { weight: u8, children: Vec, } impl Node for TreeNode { type Kind = (); fn kind(&self) -> Self::Kind {} type Weight = u32; fn weight(&self) -> Self::Weight { self.weight.into() } } impl Tree for TreeNode { type Children<'c> = &'c [Self]; fn children(&self) -> Self::Children<'_> { &self.children } } fn tree(depth: u32, breadth: u32) -> impl Strategy { let size = (breadth.pow(depth + 1) - 1) / (breadth - 1) / 2; // half the maximum number of nodes (1u8.., LazyJust::new(Vec::new)) .prop_map_into() .prop_recursive(depth, size, breadth, move |inner| { (1u8.., vec(inner, ..=breadth as usize)).prop_map_into() }) } fn bench(c: &mut Criterion) { let mut runner = TestRunner::default(); let mut group = c.benchmark_group("n-ary tree diff"); for (d, b) in [(7, 2), (3, 6), (2, 15), (1, 255)] { group.bench_with_input(format!("depth={}/breadth={}", d, b), &tree(d, b), |b, s| { b.iter_batched_ref( || (s, s).new_tree(&mut runner).unwrap().current(), |(a, b)| diff(a, b), BatchSize::SmallInput, ) }); } group.finish(); } criterion_group!(benches, bench); criterion_main!(benches); tree-edit-distance-0.4.0/src/cost.rs000064400000000000000000000015061046102023000154060ustar 00000000000000use std::ops::Add; pub(crate) trait Cost { type Output: Default + Copy + Add; fn cost(&self) -> Self::Output; } impl, V: Default + Copy + Add> Cost for [C] { type Output = C::Output; fn cost(&self) -> Self::Output { self.iter().map(C::cost).reduce(V::add).unwrap_or_default() } } impl> Cost for V { type Output = V; #[inline] fn cost(&self) -> Self::Output { *self } } #[cfg(test)] mod tests { use super::*; use proptest::collection::{vec, SizeRange}; use test_strategy::proptest; #[proptest] fn cost_of_slice_equals_sum_of_costs( #[strategy(vec(..32u32, SizeRange::default()))] s: Vec, ) { assert_eq!(s.cost(), s.iter().sum::()); } } tree-edit-distance-0.4.0/src/diff.rs000064400000000000000000000131631046102023000153500ustar 00000000000000use crate::{memoize, Cost, Edit, Tree}; use arrayvec::ArrayVec; use derive_more::{Add, From}; use itertools::Itertools; use pathfinding::{num_traits::Zero, prelude::*}; use std::{collections::HashMap, ops::Add}; #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, From, Add)] struct WholeNumber(T); impl> Zero for WholeNumber { fn zero() -> Self { Self::default() } fn is_zero(&self) -> bool { *self == Self::zero() } } fn levenshtein<'c, T>(a: &'c [T], b: &'c [T]) -> (Box<[Edit]>, T::Weight) where T: Tree = &'c [T]> + Cost, { let mut edges = HashMap::new(); let (path, WholeNumber(cost)) = astar( &(0, 0), |&(i, j)| { let x = a.get(i); let y = b.get(j); let mut successors = ArrayVec::<_, 3>::new(); if let Some(x) = x { let next = (i + 1, j); let none = edges.insert(((i, j), next), Edit::Remove); debug_assert!(none.is_none()); successors.push((next, x.cost().into())); } if let Some(y) = y { let next = (i, j + 1); let none = edges.insert(((i, j), next), Edit::Insert); debug_assert!(none.is_none()); successors.push((next, y.cost().into())); } if let (Some(x), Some(y)) = (x, y) { if x.kind() == y.kind() { let next = (i + 1, j + 1); let (inner, cost) = levenshtein(x.children(), y.children()); let none = edges.insert(((i, j), next), Edit::Replace(inner)); debug_assert!(none.is_none()); successors.push((next, cost.into())); } } successors }, |&(i, j)| match (&a[i..], &b[j..]) { (&[], rest) | (rest, &[]) => rest.cost().into(), (a, b) if a.len() != b.len() => { let rest = if a.len() > b.len() { a } else { b }; let nth = a.len().max(b.len()) - a.len().min(b.len()); let mut costs: Box<[_]> = rest.iter().map(T::cost).collect(); let (cheapest, _, _) = costs.select_nth_unstable(nth); cheapest.cost().into() } _ => WholeNumber::default(), }, |&p| p == (a.len(), b.len()), ) .unwrap(); let patches = path .into_iter() .tuple_windows() .flat_map(move |e| edges.remove(&e)) .collect(); (patches, cost) } /// Finds the lowest cost sequence of [Edit]s that transforms one [Tree] into the other. /// /// The sequence of [Edit]s is understood to apply to the left-hand side so it becomes the /// right-hand side. pub fn diff(a: &T, b: &T) -> (Box<[Edit]>, T::Weight) { levenshtein(&[memoize(a)], &[memoize(b)]) } #[cfg(test)] mod tests { use super::*; use crate::{Fold, MockTree, Tree}; use assert_matches::assert_matches; use proptest::collection::size_range; use test_strategy::{proptest, Arbitrary}; #[derive(Debug, Default, Copy, Clone, Eq, PartialEq, Arbitrary)] struct Eq; #[derive(Debug, Default, Copy, Clone, Arbitrary)] struct NotEq; impl PartialEq for NotEq { fn eq(&self, _: &Self) -> bool { false } } #[proptest] fn the_number_of_edits_is_at_most_equal_to_the_total_number_of_nodes( a: MockTree, b: MockTree, ) { let (e, _) = diff(&a, &b); assert_matches!((e.count(), a.count() + b.count()), (x, y) if x <= y); } #[proptest] fn the_cost_is_at_most_equal_to_the_sum_of_costs(a: MockTree, b: MockTree) { let (_, c) = diff(&a, &b); assert_matches!((c, a.cost() + b.cost()), (x, y) if x <= y); } #[proptest] fn the_cost_between_identical_trees_is_zero(a: MockTree) { let (e, c) = diff(&a, &a); assert_eq!(e.count(), a.count()); assert_eq!(c, 0); } #[proptest] fn nodes_of_different_kinds_cannot_be_replaced(a: MockTree, b: MockTree) { use Edit::*; let (e, _) = diff(&a, &b); assert_matches!(&e[..], [Remove, Insert] | [Insert, Remove]); } #[proptest] fn nodes_of_equal_kinds_can_be_replaced(a: MockTree, b: MockTree) { let (e, _) = diff(&a, &b); let (i, _) = levenshtein(a.children(), b.children()); assert_matches!(&e[..], [Edit::Replace(x)] => { assert_eq!(x, &i); }); } #[proptest] fn the_cost_of_swapping_nodes_is_equal_to_the_sum_of_their_costs( a: MockTree, b: MockTree, ) { let (_, c) = diff(&a, &b); assert_eq!(c, a.cost() + b.cost()); } #[proptest] fn the_cost_of_replacing_nodes_does_not_depend_on_their_weights( a: MockTree, b: MockTree, ) { let (_, c) = diff(&a, &b); let (_, d) = levenshtein(a.children(), b.children()); assert_eq!(c, d); } #[proptest] fn the_cost_is_always_minimized( #[any(size_range(1..8).lift())] a: Vec>, #[any(size_range(1..8).lift())] b: Vec>, #[strategy(0..#a.len())] i: usize, #[strategy(0..#b.len())] j: usize, ) { let mut x = a.clone(); let mut y = b.clone(); let m = x.remove(i); let n = y.remove(j); let (_, c) = levenshtein(&a, &b); let (_, d) = levenshtein(&x, &y); assert_matches!((c, d + m.cost() + n.cost()), (x, y) if x <= y); } } tree-edit-distance-0.4.0/src/edit.rs000064400000000000000000000012231046102023000153570ustar 00000000000000use crate::Fold; /// A single operation between two [Node][crate::Node]s. #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub enum Edit { /// Swap the [Node][crate::Node]s and edit their children. Replace(Box<[Edit]>), /// Insert the incoming [Node][crate::Node] along with its children in place. Insert, /// Remove the existing [Node][crate::Node] along with its children. Remove, } impl Fold for Edit { #[inline] fn fold R>(&self, init: R, f: &mut Fn) -> R { if let Edit::Replace(c) = self { c.fold(f(init, self), f) } else { f(init, self) } } } tree-edit-distance-0.4.0/src/fold.rs000064400000000000000000000010511046102023000153550ustar 00000000000000use std::ops::Add; pub(crate) trait Fold { fn fold R>(&self, init: R, f: &mut Fn) -> R; #[inline] fn sum, Fn: FnMut(&I) -> N>(&self, mut f: Fn) -> N { self.fold(N::default(), &mut |n, i| n + f(i)) } #[inline] fn count(&self) -> usize { self.sum(|_| 1) } } impl, I: ?Sized> Fold for [F] { fn fold R>(&self, init: R, f: &mut Fn) -> R { self.iter().fold(init, |r, i| i.fold(r, f)) } } tree-edit-distance-0.4.0/src/lib.rs000064400000000000000000000064231046102023000152070ustar 00000000000000//! # Overview //! //! This crate provides an implementation of a recursive generalized version of the //! [Levenshtein distance][levenshtein] for arbitrary sequences that finds the smallest possible //! diff between two trees, according to a user-defined measure for the cost of inserting and //! removing nodes. The smallest possible diff is defined by the the lowest cost sequence of edits //! that transforms one tree into the other. //! //! [levenshtein]: https://en.wikipedia.org/wiki/Levenshtein_distance //! //! # Example //! //! ```rust //! use tree_edit_distance::*; //! use std::mem::{discriminant, Discriminant}; //! use std::iter::empty; //! //! enum Json { //! Null, //! Bool(bool), //! Number(f64), //! String(String), //! Array(Vec), //! Map(Vec<(String, Json)>), //! } //! //! impl Node for Json { //! type Kind = Discriminant; //! fn kind(&self) -> Self::Kind { //! discriminant(self) //! } //! //! type Weight = u64; //! fn weight(&self) -> Self::Weight { //! 1 //! } //! } //! //! impl Tree for Json { //! type Children<'c> = Box + 'c> //! where //! Self: 'c; //! //! fn children(&self) -> Self::Children<'_> { //! match self { //! Json::Array(a) => Box::new(a.iter()), //! Json::Map(m) => Box::new(m.iter().map(|(_, v)| v)), //! _ => Box::new(empty()), //! } //! } //! } //! # //! # impl From for Json { //! # fn from(obj: serde_json::Value) -> Self { //! # use serde_json::Value::*; //! # match obj { //! # Null => Json::Null, //! # Bool(b) => Json::Bool(b), //! # Number(n) => Json::Number(n.as_i64().unwrap() as f64), //! # String(s) => Json::String(s), //! # Array(a) => Json::Array(a.into_iter().map(Into::into).collect()), //! # Object(m) => Json::Map( //! # m.into_iter() //! # .map(|(k, v)| (k, v.into())) //! # .collect(), //! # ), //! # } //! # } //! # } //! //! macro_rules! json { //! ($( $tokens:tt )*) => { //! // ... //! # Json::from(::serde_json::json!({$($tokens)*})) //! }; //! } //! //! let john = json! { //! "name": "John Doe", //! "age": 43, //! "phones": [ //! "+44 1234567", //! "+44 2345678" //! ] //! }; //! //! let jane = json! { //! "name": "Jane Doe", //! "maiden name": "Smith", //! "age": 40, //! "phones": [ //! "+44 7654321", //! ] //! }; //! //! let (edits, cost) = diff(&john, &jane); //! //! assert_eq!(cost, 2); //! //! assert_eq!(&*edits, &[ //! Edit::Replace(Box::new([ //! Edit::Replace(Box::default()), // "name" //! Edit::Insert, // "maiden name" //! Edit::Replace(Box::default()), // "age" //! Edit::Replace(Box::new([ // "phones" //! Edit::Remove, //! Edit::Replace(Box::default()), //! ])), //! ])) //! ]); //! ``` mod diff; mod edit; mod tree; pub use diff::*; pub use edit::*; pub use tree::*; mod cost; mod fold; mod memoize; pub(crate) use cost::*; pub(crate) use fold::*; pub(crate) use memoize::*; tree-edit-distance-0.4.0/src/memoize.rs000064400000000000000000000030641046102023000161040ustar 00000000000000use crate::{Cost, Node, Tree}; #[derive(Debug, Clone, Eq, PartialEq, Hash)] pub(crate) struct Memoized<'t, T: 't + Tree> { tree: &'t T, cost: T::Weight, children: Box<[Self]>, } impl<'t, T: 't + Tree> Node for Memoized<'t, T> { type Kind = T::Kind; #[inline] fn kind(&self) -> Self::Kind { T::kind(self.tree) } type Weight = T::Weight; #[inline] fn weight(&self) -> Self::Weight { T::weight(self.tree) } } impl<'t, T: 't + Tree> Tree for Memoized<'t, T> { type Children<'c> = &'c [Self] where Self: 'c; #[inline] fn children(&self) -> Self::Children<'_> { &self.children } } impl<'t, T: 't + Tree> Cost for Memoized<'t, T> { type Output = T::Weight; #[inline] fn cost(&self) -> Self::Output { self.cost } } pub(crate) fn memoize(t: &T) -> Memoized { let children: Box<[_]> = t.children().into_iter().map(memoize).collect(); Memoized { tree: t, cost: t.weight() + children.cost(), children, } } #[cfg(test)] mod tests { use super::*; use crate::MockTree; use test_strategy::proptest; #[proptest] fn kind_is_preserved(t: MockTree) { assert_eq!(memoize(&t).kind(), t.kind()); } #[proptest] fn weight_is_preserved(t: MockTree) { assert_eq!(memoize(&t).weight(), t.weight()); } #[proptest] fn cost_is_memoized(t: MockTree) { let u = memoize(&t); assert_eq!(u.cost, t.cost()); assert_eq!(u.cost, u.cost()); } } tree-edit-distance-0.4.0/src/tree.rs000064400000000000000000000075551046102023000154070ustar 00000000000000use std::ops::Add; /// An abstraction for a generic tree node. pub trait Node { /// The type of this [Node]'s [kind][Node::kind]. /// /// Only [Node]s of the equal _kind_ can replace each other. type Kind: PartialEq; /// Returns this [Node]'s _kind_. fn kind(&self) -> Self::Kind; /// The type of this [Node]'s [weight][Node::weight]. /// /// The default value of this type is assumed to be the additive identity (i.e. _zero_). type Weight: Default + Copy + Ord + Add; /// Returns the cost of inserting or deleting this [Node]. /// /// A [Node]'s weight should be independent of the weight of its children. fn weight(&self) -> Self::Weight; } /// An abstraction for a recursive tree. pub trait Tree: Node { /// A type that can iterate over this [Tree]'s [children][Tree::children]. type Children<'c>: 'c + IntoIterator where Self: 'c; /// Returns this [Tree]'s immediate children. fn children(&self) -> Self::Children<'_>; } #[cfg(test)] mod tests { use super::*; use crate::{Cost, Fold}; use derive_more::From; use proptest::{collection::vec, prelude::*, strategy::LazyJust}; use test_strategy::proptest; #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, From)] pub struct Size { depth: usize, breadth: usize, } impl Default for Size { fn default() -> Self { #[cfg(not(miri))] { (5, 5).into() } #[cfg(miri)] { (2, 2).into() } } } fn tree(size: Size) -> impl Strategy> where K: 'static + Copy + PartialEq + Arbitrary, { let depth = size.depth as u32; let breadth = size.breadth as u32; let size = (breadth.pow(depth + 1) - 1) / (breadth - 1) / 2; // half the maximum number of nodes (any::(), any::(), LazyJust::new(Vec::new)) .prop_map_into() .prop_recursive(depth, size, breadth, move |inner| { (any::(), any::(), vec(inner, ..=breadth as usize)).prop_map_into() }) } #[derive(Debug, Default, Clone, PartialEq, Eq, Hash, From)] pub(crate) struct MockTree { kind: K, weight: u8, children: Vec, } impl Arbitrary for MockTree { type Parameters = Size; type Strategy = BoxedStrategy; fn arbitrary_with(size: Size) -> Self::Strategy { tree(size).boxed() } } impl Node for MockTree { type Kind = K; fn kind(&self) -> Self::Kind { self.kind } type Weight = u64; fn weight(&self) -> Self::Weight { self.weight.into() } } impl Tree for MockTree { type Children<'c> = &'c [Self] where Self: 'c; fn children(&self) -> Self::Children<'_> { &self.children } } impl Fold for MockTree { fn fold R>(&self, init: R, f: &mut Fn) -> R { self.children().fold(f(init, self), f) } } impl Cost for MockTree { type Output = ::Weight; #[inline] fn cost(&self) -> Self::Output { self.sum(|c| c.weight()) } } #[proptest] fn count_equals_one_plus_sum_of_count_of_children(t: MockTree<()>) { assert_eq!(t.count(), 1 + t.children().count()); } #[proptest] fn cost_equals_weight_plus_sum_of_costs_of_children(t: MockTree<()>) { assert_eq!(t.cost(), t.weight() + t.children().cost()); } } #[cfg(test)] pub(crate) use tests::MockTree;