diffy-imara-0.3.2/.cargo_vcs_info.json0000644000000001360000000000100132260ustar { "git": { "sha1": "059cb784afa4380dc3b818c521a059dbad695ff8" }, "path_in_vcs": "" }diffy-imara-0.3.2/.gitignore000064400000000000000000000000341046102023000140030ustar 00000000000000/target Cargo.lock .direnv diffy-imara-0.3.2/CHANGELOG.md000064400000000000000000000030111046102023000136220ustar 00000000000000# Changelog ## [0.3.2] - 2025-02-26 ### Fixed - #9 The doctest to match the new (old) API. This should've been a part of 0.3.0 as well, so yank the previous release... ## [0.3.1] - 2025-02-20 ### Changed - The fields of `MergeOptions` are all private again. This should've been a part of the previous release, so yank that. ## [0.3.0] - 2025-02-18 ### Changed - The fields of `DiffOptions` are all private again ### Upstream - [#36](https://github.com/bmwill/diffy/pull/36) Add ability to configure filenames when creating a patch with `DiffOptions`. - [#37](https://github.com/bmwill/diffy/pull/37) Allow configuring the "No newline at end of file" message from being printed when formatting a patch. - [#38](https://github.com/bmwill/diffy/pull/38) Add support for configuring `suppress_blank_empty`. ## [0.2.0] - 2025-01-30 ### Added - #8 correctly render conflicts in files without the final newline ### Changed - realize that adding imara-diff necessitated an MSRV bump to 1.63.0 ## [0.1.1] - 2025-01-11 ### Changed - the repository URL ## [0.1.0] - 2024-12-27 Fork! ### Added - [#4](https://codeberg.org/ada4a/diffy-imara) switch to imara-diff backend. Allows using histogram diff [0.3.1]: https://codeberg.org/ada4a/diffy_imara/releases/tag/v0.3.1 [0.3.0]: https://codeberg.org/ada4a/diffy_imara/releases/tag/v0.3.0 [0.2.0]: https://codeberg.org/ada4a/diffy_imara/releases/tag/v0.2.0 [0.1.1]: https://codeberg.org/ada4a/diffy_imara/releases/tag/0.1.1 [0.1.0]: https://codeberg.org/ada4a/diffy_imara/releases/tag/0.1.0 diffy-imara-0.3.2/Cargo.lock0000644000000136410000000000100112060ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "ahash" version = "0.8.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", "getrandom", "once_cell", "version_check", "zerocopy", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "diffy-imara" version = "0.3.2" dependencies = [ "imara-diff", "nu-ansi-term", ] [[package]] name = "getrandom" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "imara-diff" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc9da1a252bd44cd341657203722352efc9bc0c847d06ea6d2dc1cd1135e0a01" dependencies = [ "ahash", "hashbrown", ] [[package]] name = "libc" version = "0.2.169" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" [[package]] name = "nu-ansi-term" version = "0.50.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" dependencies = [ "windows-sys", ] [[package]] name = "once_cell" version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "proc-macro2" version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" dependencies = [ "proc-macro2", ] [[package]] name = "syn" version = "2.0.96" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "unicode-ident" version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "version_check" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "zerocopy" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", "syn", ] diffy-imara-0.3.2/Cargo.toml0000644000000023430000000000100112260ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.63.0" name = "diffy-imara" version = "0.3.2" authors = ["Ada Alakbarova "] build = false exclude = [ ".forgejo", "flake.nix", "flake.lock", ".envrc", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Tools for finding and manipulating differences between files" documentation = "https://docs.rs/diffy-imara" readme = "README.md" keywords = [ "diff", "patch", "merge", ] categories = ["text-processing"] license = "MIT OR Apache-2.0" repository = "https://codeberg.ord/ada4a/diffy-imara" [lib] name = "diffy_imara" path = "src/lib.rs" [dependencies.imara-diff] version = "0.1.7" [dependencies.nu-ansi-term] version = "0.50" diffy-imara-0.3.2/Cargo.toml.orig000064400000000000000000000010641046102023000147060ustar 00000000000000[package] name = "diffy-imara" version = "0.3.2" authors = ["Ada Alakbarova "] license = "MIT OR Apache-2.0" description = "Tools for finding and manipulating differences between files" documentation = "https://docs.rs/diffy-imara" repository = "https://codeberg.ord/ada4a/diffy-imara" readme = "README.md" keywords = ["diff", "patch", "merge"] categories = ["text-processing"] rust-version = "1.63.0" edition = "2021" exclude = [".forgejo", "flake.nix", "flake.lock", ".envrc"] [dependencies] imara-diff = "0.1.7" nu-ansi-term = "0.50" diffy-imara-0.3.2/LICENSE-APACHE000064400000000000000000000251111046102023000137420ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2025 ada4a Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diffy-imara-0.3.2/LICENSE-MIT000064400000000000000000000017771046102023000134660ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diffy-imara-0.3.2/README.md000064400000000000000000000011371046102023000132770ustar 00000000000000# diffy-imara [![diffy-imara on crates.io](https://img.shields.io/crates/v/diffy-imara)](https://crates.io/crates/diffy-imara) [![License](https://img.shields.io/badge/license-Apache-green.svg)](LICENSE-APACHE) [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE-MIT) Tools for finding and manipulating differences between files. Fork of [diffy](https://crates.io/crates/diffy) with [imara-diff](https://crates.io/crates/imara-diff) backend. ## License This project is available under the terms of either the [Apache 2.0 license](LICENSE-APACHE) or the [MIT license](LICENSE-MIT). diffy-imara-0.3.2/src/apply.rs000064400000000000000000000140421046102023000143010ustar 00000000000000use crate::{ patch::{Hunk, Line, Patch}, utils::LineIter, }; use std::{fmt, iter}; /// An error returned when [`apply`]ing a `Patch` fails /// /// [`apply`]: fn.apply.html #[derive(Debug)] pub struct ApplyError(usize); impl fmt::Display for ApplyError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "error applying hunk #{}", self.0) } } impl std::error::Error for ApplyError {} #[derive(Debug)] enum ImageLine<'a, T: ?Sized> { Unpatched(&'a T), Patched(&'a T), } impl<'a, T: ?Sized> ImageLine<'a, T> { fn inner(&self) -> &'a T { match self { ImageLine::Unpatched(inner) | ImageLine::Patched(inner) => inner, } } fn into_inner(self) -> &'a T { self.inner() } fn is_patched(&self) -> bool { match self { ImageLine::Unpatched(_) => false, ImageLine::Patched(_) => true, } } } impl Copy for ImageLine<'_, T> {} impl Clone for ImageLine<'_, T> { fn clone(&self) -> Self { *self } } /// Apply a `Patch` to a base image /// /// ``` /// use diffy_imara::{apply, Patch}; /// /// let s = "\ /// --- a/ideals /// +++ b/ideals /// @@ -1,4 +1,6 @@ /// First: /// Life before death, /// strength before weakness, /// journey before destination. /// +Second: /// + I will protect those who cannot protect themselves. /// "; /// /// let patch = Patch::from_str(s).unwrap(); /// /// let base_image = "\ /// First: /// Life before death, /// strength before weakness, /// journey before destination. /// "; /// /// let expected = "\ /// First: /// Life before death, /// strength before weakness, /// journey before destination. /// Second: /// I will protect those who cannot protect themselves. /// "; /// /// assert_eq!(apply(base_image, &patch).unwrap(), expected); /// ``` pub fn apply(base_image: &str, patch: &Patch<'_, str>) -> Result { let mut image: Vec<_> = LineIter::new(base_image) .map(ImageLine::Unpatched) .collect(); for (i, hunk) in patch.hunks().iter().enumerate() { apply_hunk(&mut image, hunk).map_err(|_| ApplyError(i + 1))?; } Ok(image.into_iter().map(ImageLine::into_inner).collect()) } /// Apply a non-utf8 `Patch` to a base image pub fn apply_bytes(base_image: &[u8], patch: &Patch<'_, [u8]>) -> Result, ApplyError> { let mut image: Vec<_> = LineIter::new(base_image) .map(ImageLine::Unpatched) .collect(); for (i, hunk) in patch.hunks().iter().enumerate() { apply_hunk(&mut image, hunk).map_err(|_| ApplyError(i + 1))?; } Ok(image .into_iter() .flat_map(ImageLine::into_inner) .copied() .collect()) } fn apply_hunk<'a, T: PartialEq + ?Sized>( image: &mut Vec>, hunk: &Hunk<'a, T>, ) -> Result<(), ()> { // Find position let pos = find_position(image, hunk).ok_or(())?; // update image image.splice( pos..pos + pre_image_line_count(hunk.lines()), post_image(hunk.lines()).map(ImageLine::Patched), ); Ok(()) } // Search in `image` for a palce to apply hunk. // This follows the general algorithm (minus fuzzy-matching context lines) described in GNU patch's // man page. // // It might be worth looking into other possible positions to apply the hunk to as described here: // https://neil.fraser.name/writing/patch/ fn find_position( image: &[ImageLine], hunk: &Hunk<'_, T>, ) -> Option { // In order to avoid searching through positions which are out of bounds of the image, // clamp the starting position based on the length of the image let pos = std::cmp::min(hunk.new_range().start().saturating_sub(1), image.len()); // Create an iterator that starts with 'pos' and then interleaves // moving pos backward/foward by one. let backward = (0..pos).rev(); let forward = pos + 1..image.len(); iter::once(pos) .chain(interleave(backward, forward)) .find(|&pos| match_fragment(image, hunk.lines(), pos)) } fn pre_image_line_count(lines: &[Line<'_, T>]) -> usize { pre_image(lines).count() } fn post_image<'a, 'b, T: ?Sized>(lines: &'b [Line<'a, T>]) -> impl Iterator + 'b { lines.iter().filter_map(|line| match line { Line::Context(l) | Line::Insert(l) => Some(*l), Line::Delete(_) => None, }) } fn pre_image<'a, 'b, T: ?Sized>(lines: &'b [Line<'a, T>]) -> impl Iterator + 'b { lines.iter().filter_map(|line| match line { Line::Context(l) | Line::Delete(l) => Some(*l), Line::Insert(_) => None, }) } fn match_fragment( image: &[ImageLine], lines: &[Line<'_, T>], pos: usize, ) -> bool { let len = pre_image_line_count(lines); let image = if let Some(image) = image.get(pos..pos + len) { image } else { return false; }; // If any of these lines have already been patched then we can't match at this position if image.iter().any(ImageLine::is_patched) { return false; } pre_image(lines).eq(image.iter().map(ImageLine::inner)) } #[derive(Debug)] struct Interleave { a: iter::Fuse, b: iter::Fuse, flag: bool, } fn interleave( i: I, j: J, ) -> Interleave<::IntoIter, ::IntoIter> where I: IntoIterator, J: IntoIterator, { Interleave { a: i.into_iter().fuse(), b: j.into_iter().fuse(), flag: false, } } impl Iterator for Interleave where I: Iterator, J: Iterator, { type Item = I::Item; fn next(&mut self) -> Option { self.flag = !self.flag; if self.flag { match self.a.next() { None => self.b.next(), item => item, } } else { match self.b.next() { None => self.a.next(), item => item, } } } } diffy-imara-0.3.2/src/diff/cleanup.rs000064400000000000000000000214751046102023000155230ustar 00000000000000use crate::range::{DiffRange, SliceLike}; // Walks through all edits and shifts them up and then down, trying to see if they run into similar // edits which can be merged #[allow(clippy::needless_lifetimes)] pub fn compact<'a, 'b, T: ?Sized + SliceLike>(diffs: &mut Vec>) { // First attempt to compact all Deletions let mut pointer = 0; while let Some(&diff) = diffs.get(pointer) { if let DiffRange::Delete(_) = diff { pointer = shift_diff_up(diffs, pointer); pointer = shift_diff_down(diffs, pointer); } pointer += 1; } // TODO maybe able to merge these and do them in the same pass? // Then attempt to compact all Insertions let mut pointer = 0; while let Some(&diff) = diffs.get(pointer) { if let DiffRange::Insert(_) = diff { pointer = shift_diff_up(diffs, pointer); pointer = shift_diff_down(diffs, pointer); } pointer += 1; } } // Attempts to shift the Insertion or Deletion at location `pointer` as far upwards as possible. #[allow(clippy::needless_lifetimes)] fn shift_diff_up<'a, 'b, T: ?Sized + SliceLike>( diffs: &mut Vec>, mut pointer: usize, ) -> usize { while let Some(&prev_diff) = pointer.checked_sub(1).and_then(|idx| diffs.get(idx)) { match (diffs[pointer], prev_diff) { // // Shift Inserts Upwards // (DiffRange::Insert(this_diff), DiffRange::Equal(prev_diff1, _)) => { // check common suffix for the amount we can shift let suffix_len = this_diff.common_suffix_len(prev_diff1); if suffix_len != 0 { if let Some(DiffRange::Equal(..)) = diffs.get(pointer + 1) { diffs[pointer + 1].grow_up(suffix_len); } else { diffs.insert( pointer + 1, DiffRange::Equal( prev_diff1.slice(prev_diff1.len() - suffix_len..), this_diff.slice(this_diff.len() - suffix_len..), ), ); } diffs[pointer].shift_up(suffix_len); diffs[pointer - 1].shrink_back(suffix_len); if diffs[pointer - 1].is_empty() { diffs.remove(pointer - 1); pointer -= 1; } } else if diffs[pointer - 1].is_empty() { diffs.remove(pointer - 1); pointer -= 1; } else { // We can't shift upwards anymore break; } } // // Shift Deletions Upwards // (DiffRange::Delete(this_diff), DiffRange::Equal(_, prev_diff2)) => { // check common suffix for the amount we can shift let suffix_len = this_diff.common_suffix_len(prev_diff2); if suffix_len != 0 { if let Some(DiffRange::Equal(..)) = diffs.get(pointer + 1) { diffs[pointer + 1].grow_up(suffix_len); } else { diffs.insert( pointer + 1, DiffRange::Equal( this_diff.slice(this_diff.len() - suffix_len..), prev_diff2.slice(prev_diff2.len() - suffix_len..), ), ); } diffs[pointer].shift_up(suffix_len); diffs[pointer - 1].shrink_back(suffix_len); if diffs[pointer - 1].is_empty() { diffs.remove(pointer - 1); pointer -= 1; } } else if diffs[pointer - 1].is_empty() { diffs.remove(pointer - 1); pointer -= 1; } else { // We can't shift upwards anymore break; } } // // Swap the Delete and Insert // (DiffRange::Insert(_), DiffRange::Delete(_)) | (DiffRange::Delete(_), DiffRange::Insert(_)) => { diffs.swap(pointer - 1, pointer); pointer -= 1; } // // Merge the two ranges // (this_diff @ DiffRange::Insert(_), DiffRange::Insert(_)) | (this_diff @ DiffRange::Delete(_), DiffRange::Delete(_)) => { diffs[pointer - 1].grow_down(this_diff.len()); diffs.remove(pointer); pointer -= 1; } _ => panic!("range to shift must be either Insert or Delete"), } } pointer } // Attempts to shift the Insertion or Deletion at location `pointer` as far downwards as possible. #[allow(clippy::needless_lifetimes)] fn shift_diff_down<'a, 'b, T: ?Sized + SliceLike>( diffs: &mut Vec>, mut pointer: usize, ) -> usize { while let Some(&next_diff) = pointer.checked_add(1).and_then(|idx| diffs.get(idx)) { match (diffs[pointer], next_diff) { // // Shift Insert Downward // (DiffRange::Insert(this_diff), DiffRange::Equal(next_diff1, _)) => { // check common prefix for the amoutn we can shift let prefix_len = this_diff.common_prefix_len(next_diff1); if prefix_len != 0 { if let Some(DiffRange::Equal(..)) = pointer.checked_sub(1).and_then(|idx| diffs.get(idx)) { diffs[pointer - 1].grow_down(prefix_len); } else { diffs.insert( pointer, DiffRange::Equal( next_diff1.slice(..prefix_len), this_diff.slice(..prefix_len), ), ); pointer += 1; } diffs[pointer].shift_down(prefix_len); diffs[pointer + 1].shrink_front(prefix_len); if diffs[pointer + 1].is_empty() { diffs.remove(pointer + 1); } } else if diffs[pointer + 1].is_empty() { diffs.remove(pointer + 1); } else { // We can't shift downwards anymore break; } } // // Shift Deletion Downward // (DiffRange::Delete(this_diff), DiffRange::Equal(_, next_diff2)) => { // check common prefix for the amoutn we can shift let prefix_len = this_diff.common_prefix_len(next_diff2); if prefix_len != 0 { if let Some(DiffRange::Equal(..)) = pointer.checked_sub(1).and_then(|idx| diffs.get(idx)) { diffs[pointer - 1].grow_down(prefix_len); } else { diffs.insert( pointer, DiffRange::Equal( this_diff.slice(..prefix_len), next_diff2.slice(..prefix_len), ), ); pointer += 1; } diffs[pointer].shift_down(prefix_len); diffs[pointer + 1].shrink_front(prefix_len); if diffs[pointer + 1].is_empty() { diffs.remove(pointer + 1); } } else if diffs[pointer + 1].is_empty() { diffs.remove(pointer + 1); } else { // We can't shift downwards anymore break; } } // // Swap the Delete and Insert // (DiffRange::Insert(_), DiffRange::Delete(_)) | (DiffRange::Delete(_), DiffRange::Insert(_)) => { diffs.swap(pointer, pointer + 1); pointer += 1; } // // Merge the two ranges // (DiffRange::Insert(_), next_diff @ DiffRange::Insert(_)) | (DiffRange::Delete(_), next_diff @ DiffRange::Delete(_)) => { diffs[pointer].grow_down(next_diff.len()); diffs.remove(pointer + 1); } _ => panic!("range to shift must be either Insert or Delete"), } } pointer } diffy-imara-0.3.2/src/diff/mod.rs000064400000000000000000000332221046102023000146440ustar 00000000000000use imara_diff::{ intern::{InternedInput, Token}, sources::{byte_lines_with_terminator, lines_with_terminator}, }; use crate::{ patch::{Hunk, HunkRange, Line, Patch}, range::{DiffRange, SliceLike}, sink::DiffyDiffRangeBuilder, Algorithm, }; use std::{borrow::Cow, cmp, hash::Hash, ops}; mod cleanup; mod myers; #[cfg(test)] mod tests; // TODO determine if this should be exposed in the public API #[allow(dead_code)] #[derive(Debug, PartialEq, Eq)] enum Diff<'a, T: ?Sized> { Equal(&'a T), Delete(&'a T), Insert(&'a T), } impl Copy for Diff<'_, T> {} impl Clone for Diff<'_, T> { fn clone(&self) -> Self { *self } } impl<'a, T> From> for Diff<'a, T> where T: ?Sized + SliceLike, { fn from(diff: DiffRange<'a, 'a, T>) -> Self { match diff { DiffRange::Equal(range, _) => Diff::Equal(range.as_slice()), DiffRange::Delete(range) => Diff::Delete(range.as_slice()), DiffRange::Insert(range) => Diff::Insert(range.as_slice()), } } } /// A collection of options for modifying the way a diff is performed #[derive(Debug)] pub struct DiffOptions { compact: bool, context_len: usize, algorithm: Algorithm, original_filename: Option>, modified_filename: Option>, } impl DiffOptions { /// Construct a new `DiffOptions` with default settings /// /// ## Defaults /// * context_len = 3 /// * algorithm = Algorithm::Histogram pub fn new() -> Self { Self { compact: true, context_len: 3, algorithm: Algorithm::Histogram, original_filename: Some("original".into()), modified_filename: Some("modified".into()), } } /// Set the number of context lines that should be used when producing a patch pub fn set_context_len(&mut self, context_len: usize) -> &mut Self { self.context_len = context_len; self } /// Enable/Disable diff compaction. Compaction is a post-processing step which attempts to /// produce a prettier diff by reducing the number of edited blocks by shifting and merging /// edit blocks. // TODO determine if this should be exposed in the public API #[allow(dead_code)] fn set_compact(&mut self, compact: bool) -> &mut Self { self.compact = compact; self } /// Set the algorithm used to perform the diff. pub fn set_algorithm(&mut self, algorithm: Algorithm) -> &mut Self { self.algorithm = algorithm; self } /// Set the filename to be used in the patch for the original text /// /// If not set, the default value is "original". pub fn set_original_filename(&mut self, filename: T) -> &mut Self where T: Into>, { self.original_filename = Some(filename.into()); self } /// Set the filename to be used in the patch for the modified text /// /// If not set, the default value is "modified". pub fn set_modified_filename(&mut self, filename: T) -> &mut Self where T: Into>, { self.modified_filename = Some(filename.into()); self } // TODO determine if this should be exposed in the public API #[allow(dead_code)] fn diff<'a>(&self, original: &'a str, modified: &'a str) -> Vec> { let solution = myers::diff(original.as_bytes(), modified.as_bytes()); let mut solution = solution .into_iter() .map(|diff_range| diff_range.to_str(original, modified)) .collect(); if self.compact { cleanup::compact(&mut solution); } solution.into_iter().map(Diff::from).collect() } /// Produce a Patch between two texts based on the configured options pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> { let input = InternedInput::new(original, modified); let old_lines: Vec<_> = lines_with_terminator(original).collect(); let new_lines: Vec<_> = lines_with_terminator(modified).collect(); let solution = self.diff_interned(&input); let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len); Patch::new( self.original_filename.clone(), self.modified_filename.clone(), hunks, ) } /// Create a patch between two potentially non-utf8 texts pub fn create_patch_bytes<'a>( &self, original: &'a [u8], modified: &'a [u8], ) -> Patch<'a, [u8]> { let input = InternedInput::new(original, modified); let old_lines: Vec<_> = byte_lines_with_terminator(original).collect(); let new_lines: Vec<_> = byte_lines_with_terminator(modified).collect(); let solution = self.diff_interned(&input); let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len); // helper function to convert a utf8 cow to a bytes cow fn cow_str_to_bytes(cow: Cow<'static, str>) -> Cow<'static, [u8]> { match cow { Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()), Cow::Owned(o) => Cow::Owned(o.into_bytes()), } } Patch::new( self.original_filename.clone().map(cow_str_to_bytes), self.modified_filename.clone().map(cow_str_to_bytes), hunks, ) } pub(crate) fn diff_interned<'a, T: Eq + Hash>( &self, input: &'a InternedInput, ) -> Vec> { let sink = DiffyDiffRangeBuilder::new(input); imara_diff::diff(self.algorithm, input, sink) } pub(crate) fn diff_tokens<'a>( &self, before: &'a [Token], after: &'a [Token], num_tokens: u32, ) -> Vec> { let sink = DiffyDiffRangeBuilder::from_tokens(before, after); imara_diff::diff_with_tokens(self.algorithm, before, after, num_tokens, sink) } } impl Default for DiffOptions { fn default() -> Self { Self::new() } } // TODO determine if this should be exposed in the public API #[allow(dead_code)] fn diff<'a>(original: &'a str, modified: &'a str) -> Vec> { DiffOptions::default().diff(original, modified) } /// Create a patch between two texts. /// /// ``` /// # use diffy_imara::create_patch; /// let original = "\ /// I am afraid, however, that all I have known - that my story - will be forgotten. /// I am afraid for the world that is to come. /// Afraid that my plans will fail. /// Afraid of a doom worse than the Deepness. /// "; /// /// let modified = "\ /// I am afraid, however, that all I have known - that my story - will be forgotten. /// I am afraid for the world that is to come. /// Afraid that Alendi will fail. /// Afraid of a doom brought by the Deepness. /// "; /// /// let expected = "\ /// --- original /// +++ modified /// @@ -1,4 +1,4 @@ /// I am afraid, however, that all I have known - that my story - will be forgotten. /// I am afraid for the world that is to come. /// -Afraid that my plans will fail. /// -Afraid of a doom worse than the Deepness. /// +Afraid that Alendi will fail. /// +Afraid of a doom brought by the Deepness. /// "; /// /// let patch = create_patch(original, modified); /// assert_eq!(patch.to_string(), expected); /// ``` pub fn create_patch<'a>(original: &'a str, modified: &'a str) -> Patch<'a, str> { DiffOptions::default().create_patch(original, modified) } /// Create a patch between two potentially non-utf8 texts pub fn create_patch_bytes<'a>(original: &'a [u8], modified: &'a [u8]) -> Patch<'a, [u8]> { DiffOptions::default().create_patch_bytes(original, modified) } fn to_hunks<'a, T: ?Sized>( lines1: &[&'a T], lines2: &[&'a T], solution: &[DiffRange<[Token]>], context_len: usize, ) -> Vec> { let edit_script = build_edit_script(solution); let mut hunks = Vec::new(); let mut idx = 0; while let Some(mut script) = edit_script.get(idx) { let start1 = script.old.start.saturating_sub(context_len); let start2 = script.new.start.saturating_sub(context_len); let (mut end1, mut end2) = calc_end( context_len, lines1.len(), lines2.len(), script.old.end, script.new.end, ); let mut lines = Vec::new(); // Pre-context for line in lines2.get(start2..script.new.start).into_iter().flatten() { lines.push(Line::Context(*line)); } loop { // Delete lines from text1 for line in lines1.get(script.old.clone()).into_iter().flatten() { lines.push(Line::Delete(*line)); } // Insert lines from text2 for line in lines2.get(script.new.clone()).into_iter().flatten() { lines.push(Line::Insert(*line)); } if let Some(s) = edit_script.get(idx + 1) { // Check to see if we can merge the hunks let start1_next = cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len); if start1_next < end1 { // Context lines between hunks for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start) { if let Some(line) = lines2.get(i2) { lines.push(Line::Context(*line)); } } // Calc the new end let (e1, e2) = calc_end( context_len, lines1.len(), lines2.len(), s.old.end, s.new.end, ); end1 = e1; end2 = e2; script = s; idx += 1; continue; } } break; } // Post-context for line in lines2.get(script.new.end..end2).into_iter().flatten() { lines.push(Line::Context(*line)); } let len1 = end1 - start1; let old_range = HunkRange::new(if len1 > 0 { start1 + 1 } else { start1 }, len1); let len2 = end2 - start2; let new_range = HunkRange::new(if len2 > 0 { start2 + 1 } else { start2 }, len2); hunks.push(Hunk::new(old_range, new_range, None, lines)); idx += 1; } hunks } fn calc_end( context_len: usize, text1_len: usize, text2_len: usize, script1_end: usize, script2_end: usize, ) -> (usize, usize) { let post_context_len = cmp::min( context_len, cmp::min( text1_len.saturating_sub(script1_end), text2_len.saturating_sub(script2_end), ), ); let end1 = script1_end + post_context_len; let end2 = script2_end + post_context_len; (end1, end2) } #[derive(Debug)] struct EditRange { old: ops::Range, new: ops::Range, } impl EditRange { fn new(old: ops::Range, new: ops::Range) -> Self { Self { old, new } } } fn build_edit_script(solution: &[DiffRange<[T]>]) -> Vec { let mut idx_a = 0; let mut idx_b = 0; let mut edit_script: Vec = Vec::new(); let mut script = None; for diff in solution { match diff { DiffRange::Equal(range1, range2) => { idx_a += range1.len(); idx_b += range2.len(); if let Some(script) = script.take() { edit_script.push(script); } } DiffRange::Delete(range) => { match script { Some(ref mut s) => s.old.end += range.len(), None => { script = Some(EditRange::new(idx_a..idx_a + range.len(), idx_b..idx_b)); } } idx_a += range.len(); } DiffRange::Insert(range) => { match script { Some(ref mut s) => s.new.end += range.len(), None => { script = Some(EditRange::new(idx_a..idx_a, idx_b..idx_b + range.len())); } } idx_b += range.len(); } } } if let Some(script) = script.take() { edit_script.push(script); } edit_script } #[cfg(test)] mod test { use super::DiffOptions; #[test] fn set_original_and_modified_filenames() { let original = "\ I am afraid, however, that all I have known - that my story - will be forgotten. I am afraid for the world that is to come. Afraid that my plans will fail. Afraid of a doom worse than the Deepness. "; let modified = "\ I am afraid, however, that all I have known - that my story - will be forgotten. I am afraid for the world that is to come. Afraid that Alendi will fail. Afraid of a doom brought by the Deepness. "; let expected = "\ --- the old version +++ the better version @@ -1,4 +1,4 @@ I am afraid, however, that all I have known - that my story - will be forgotten. I am afraid for the world that is to come. -Afraid that my plans will fail. -Afraid of a doom worse than the Deepness. +Afraid that Alendi will fail. +Afraid of a doom brought by the Deepness. "; let patch = DiffOptions::new() .set_original_filename("the old version") .set_modified_filename("the better version") .create_patch(original, modified); assert_eq!(patch.to_string(), expected); } } diffy-imara-0.3.2/src/diff/myers.rs000064400000000000000000000210541046102023000152240ustar 00000000000000use crate::range::{DiffRange, Range}; use std::ops::{Index, IndexMut}; // A D-path is a path which starts at (0,0) that has exactly D non-diagonal edges. All D-paths // consist of a (D - 1)-path followed by a non-diagonal edge and then a possibly empty sequence of // diagonal edges called a snake. /// `V` contains the endpoints of the furthest reaching `D-paths`. For each recorded endpoint /// `(x,y)` in diagonal `k`, we only need to retain `x` because `y` can be computed from `x - k`. /// In other words, `V` is an array of integers where `V[k]` contains the row index of the endpoint /// of the furthest reaching path in diagonal `k`. /// /// We can't use a traditional Vec to represent `V` since we use `k` as an index and it can take on /// negative values. So instead `V` is represented as a light-weight wrapper around a Vec plus an /// `offset` which is the maximum value `k` can take on in order to map negative `k`'s back to a /// value >= 0. #[derive(Debug, Clone)] struct V { offset: isize, v: Vec, // Look into initializing this to -1 and storing isize } impl V { fn new(max_d: usize) -> Self { Self { offset: max_d as isize, v: vec![0; 2 * max_d], } } fn len(&self) -> usize { self.v.len() } } impl Index for V { type Output = usize; fn index(&self, index: isize) -> &Self::Output { &self.v[(index + self.offset) as usize] } } impl IndexMut for V { fn index_mut(&mut self, index: isize) -> &mut Self::Output { &mut self.v[(index + self.offset) as usize] } } /// A `Snake` is a sequence of diagonal edges in the edit graph. It is possible for a snake to have /// a length of zero, meaning the start and end points are the same. #[derive(Debug)] struct Snake { x_start: usize, y_start: usize, x_end: usize, y_end: usize, } impl ::std::fmt::Display for Snake { fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { write!( f, "({}, {}) -> ({}, {})", self.x_start, self.y_start, self.x_end, self.y_end ) } } fn max_d(len1: usize, len2: usize) -> usize { // XXX look into reducing the need to have the additional '+ 1' (len1 + len2 + 1) / 2 + 1 } // The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes some of which may // be empty. The divide step requires finding the ceil(D/2) + 1 or middle snake of an optimal // D-path. The idea for doing so is to simultaneously run the basic algorithm in both the // forward and reverse directions until furthest reaching forward and reverse paths starting at // opposing corners 'overlap'. fn find_middle_snake( old: Range<'_, [T]>, new: Range<'_, [T]>, vf: &mut V, vb: &mut V, ) -> (isize, Snake) { let n = old.len(); let m = new.len(); // By Lemma 1 in the paper, the optimal edit script length is odd or even as `delta` is odd // or even. let delta = n as isize - m as isize; let odd = delta & 1 == 1; // The initial point at (0, -1) vf[1] = 0; // The initial point at (N, M+1) vb[1] = 0; // We only need to explore ceil(D/2) + 1 let d_max = max_d(n, m); assert!(vf.len() >= d_max); assert!(vb.len() >= d_max); for d in 0..d_max as isize { // Forward path for k in (-d..=d).rev().step_by(2) { let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) { vf[k + 1] } else { vf[k - 1] + 1 }; let mut y = (x as isize - k) as usize; // The coordinate of the start of a snake let (x0, y0) = (x, y); // While these sequences are identical, keep moving through the graph with no cost if let (Some(s1), Some(s2)) = (old.get(x..), new.get(y..)) { let advance = s1.common_prefix_len(s2); x += advance; y += advance; } // This is the new best x value vf[k] = x; // Only check for connections from the forward search when N - M is odd // and when there is a reciprocal k line coming from the other direction. if odd && (k - delta).abs() <= (d - 1) { // TODO optimize this so we don't have to compare against n if vf[k] + vb[-(k - delta)] >= n { // Return the snake let snake = Snake { x_start: x0, y_start: y0, x_end: x, y_end: y, }; // Edit distance to this snake is `2 * d - 1` return (2 * d - 1, snake); } } } // Backward path for k in (-d..=d).rev().step_by(2) { let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) { vb[k + 1] } else { vb[k - 1] + 1 }; let mut y = (x as isize - k) as usize; // The coordinate of the start of a snake let (x0, y0) = (x, y); if x < n && y < m { let advance = old.slice(..n - x).common_suffix_len(new.slice(..m - y)); x += advance; y += advance; } // This is the new best x value vb[k] = x; if !odd && (k - delta).abs() <= d { // TODO optimize this so we don't have to compare against n if vb[k] + vf[-(k - delta)] >= n { // Return the snake let snake = Snake { x_start: n - x, y_start: m - y, x_end: n - x0, y_end: m - y0, }; // Edit distance to this snake is `2 * d` return (2 * d, snake); } } } // TODO: Maybe there's an opportunity to optimize and bail early? } unreachable!("unable to find a middle snake"); } fn conquer<'a, 'b, T: PartialEq>( mut old: Range<'a, [T]>, mut new: Range<'b, [T]>, vf: &mut V, vb: &mut V, solution: &mut Vec>, ) { // Check for common prefix let common_prefix_len = old.common_prefix_len(new); if common_prefix_len > 0 { let common_prefix = DiffRange::Equal( old.slice(..common_prefix_len), new.slice(..common_prefix_len), ); solution.push(common_prefix); } old = old.slice(common_prefix_len..old.len()); new = new.slice(common_prefix_len..new.len()); // Check for common suffix let common_suffix_len = old.common_suffix_len(new); let common_suffix = DiffRange::Equal( old.slice(old.len() - common_suffix_len..), new.slice(new.len() - common_suffix_len..), ); old = old.slice(..old.len() - common_suffix_len); new = new.slice(..new.len() - common_suffix_len); if old.is_empty() && new.is_empty() { // Do nothing } else if old.is_empty() { // Inserts solution.push(DiffRange::Insert(new)); } else if new.is_empty() { // Deletes solution.push(DiffRange::Delete(old)); } else { // Divide & Conquer let (_shortest_edit_script_len, snake) = find_middle_snake(old, new, vf, vb); let (old_a, old_b) = old.split_at(snake.x_start); let (new_a, new_b) = new.split_at(snake.y_start); conquer(old_a, new_a, vf, vb, solution); conquer(old_b, new_b, vf, vb, solution); } if common_suffix_len > 0 { solution.push(common_suffix); } } pub fn diff<'a, 'b, T: PartialEq>(old: &'a [T], new: &'b [T]) -> Vec> { let old_recs = Range::new(old, ..); let new_recs = Range::new(new, ..); let mut solution = Vec::new(); // The arrays that hold the 'best possible x values' in search from: // `vf`: top left to bottom right // `vb`: bottom right to top left let max_d = max_d(old.len(), new.len()); let mut vf = V::new(max_d); let mut vb = V::new(max_d); conquer(old_recs, new_recs, &mut vf, &mut vb, &mut solution); solution } #[cfg(test)] mod tests { use super::*; #[test] fn test_find_middle_snake() { let a = Range::new(&b"ABCABBA"[..], ..); let b = Range::new(&b"CBABAC"[..], ..); let max_d = max_d(a.len(), b.len()); let mut vf = V::new(max_d); let mut vb = V::new(max_d); find_middle_snake(a, b, &mut vf, &mut vb); } } diffy-imara-0.3.2/src/diff/tests.rs000064400000000000000000000521551046102023000152350ustar 00000000000000use super::*; use crate::{ apply::apply, diff::{Diff, DiffRange}, patch::Patch, range::Range, PatchFormatter, }; // Helper macros are based off of the ones used in [dissimilar](https://docs.rs/dissimilar) macro_rules! diff_range_list { () => { Vec::new() }; ($($kind:ident($text:literal)),+ $(,)?) => {{ macro_rules! text1 { (Insert, $s:literal) => { "" }; (Delete, $s:literal) => { $s }; (Equal, $s:literal) => { $s }; } macro_rules! text2 { (Insert, $s:literal) => { $s }; (Delete, $s:literal) => { "" }; (Equal, $s:literal) => { $s }; } let _text1 = concat!($(text1!($kind, $text)),*); let _text2 = concat!($(text2!($kind, $text)),*); let (_i, _j) = (&mut 0, &mut 0); macro_rules! range { (Insert, $s:literal) => { DiffRange::Insert(range(_text2, _j, $s)) }; (Delete, $s:literal) => { DiffRange::Delete(range(_text1, _i, $s)) }; (Equal, $s:literal) => { DiffRange::Equal(range(_text1, _i, $s), range(_text2, _j, $s)) }; } vec![$(range!($kind, $text)),*] }}; } fn range<'a>(doc: &'a str, offset: &mut usize, text: &str) -> Range<'a, str> { let range = Range::new(doc, *offset..*offset + text.len()); *offset += text.len(); range } macro_rules! assert_diff_range { ([$($kind:ident($text:literal)),* $(,)?], $solution:ident $(,)?) => { let expected = &[$(Diff::$kind($text)),*]; assert!( same_diffs(expected, &$solution), concat!("\nexpected={:#?}\nactual={:#?}"), expected, $solution, ); }; ([$($kind:ident($text:literal)),* $(,)?], $solution:ident, $msg:expr $(,)?) => { let expected = &[$(Diff::$kind($text)),*]; assert!( same_diffs(expected, &$solution), concat!($msg, "\nexpected={:#?}\nactual={:#?}"), expected, $solution, ); }; } fn same_diffs(expected: &[Diff], actual: &[DiffRange]) -> bool { expected.len() == actual.len() && expected.iter().zip(actual).all(|pair| match pair { (Diff::Insert(expected), DiffRange::Insert(actual)) => *expected == actual.as_slice(), (Diff::Delete(expected), DiffRange::Delete(actual)) => *expected == actual.as_slice(), (Diff::Equal(expected), DiffRange::Equal(actual1, actual2)) => { *expected == actual1.as_slice() && *expected == actual2.as_slice() } (_, _) => false, }) } #[test] fn test_diff_str() { let a = "ABCABBA"; let b = "CBABAC"; let solution = diff(a, b); use Diff::*; assert_eq!( &solution, &[ Delete("AB"), Equal("C"), Delete("A"), Equal("B"), Insert("A"), Equal("BA"), Insert("C"), ], ); let a = "abgdef"; let b = "gh"; let solution = diff(a, b); assert_eq!( &solution, &[Delete("ab"), Equal("g"), Delete("def"), Insert("h")], ); let a = "bat"; let b = "map"; let solution = diff(a, b); assert_eq!( &solution, &[ Delete("b"), Insert("m"), Equal("a"), Delete("t"), Insert("p"), ], ); let a = "ACZBDZ"; let b = "ACBCBDEFD"; let solution = diff(a, b); assert_eq!( &solution, &[ Equal("AC"), Delete("Z"), Equal("B"), Insert("CBDEF"), Equal("D"), Delete("Z"), ], ); let a = "1A "; let b = "1A B A 2"; let solution = diff(a, b); assert_eq!(&solution, &[Equal("1A "), Insert("B A 2")],); let a = "ACBD"; let b = "ACBCBDEFD"; let solution = diff(a, b); assert_eq!(&solution, &[Equal("ACB"), Insert("CBDEF"), Equal("D")],); let a = "abc"; let b = "def"; let solution = diff(a, b); assert_eq!(&solution, &[Delete("abc"), Insert("def")], "No Equal"); } #[test] fn test_unicode() { // Unicode snowman and unicode comet have the same first two bytes. A // byte-based diff would produce a 2-byte Equal followed by 1-byte Delete // and Insert. let snowman = "\u{2603}"; let comet = "\u{2604}"; assert_eq!(snowman.as_bytes()[..2], comet.as_bytes()[..2]); let d = diff(snowman, comet); assert_eq!(d, vec![Diff::Delete(snowman), Diff::Insert(comet)]); } #[test] fn test_compact() { let mut solution = diff_range_list![]; cleanup::compact(&mut solution); assert_diff_range!([], solution, "Null case"); let mut solution = diff_range_list![Equal("a"), Delete("b"), Insert("c")]; cleanup::compact(&mut solution); assert_diff_range!( [Equal("a"), Delete("b"), Insert("c")], solution, "No change case", ); // TODO implement equality compaction // let mut solution = diff_range_list![Equal("a"), Equal("b"), Equal("c")]; // cleanup::compact(&mut solution); // assert_diff_range!([Equal("abc")], solution, "Compact equalities"); let mut solution = diff_range_list![Delete("a"), Delete("b"), Delete("c")]; cleanup::compact(&mut solution); assert_diff_range!([Delete("abc")], solution, "Compact deletions"); let mut solution = diff_range_list![Insert("a"), Insert("b"), Insert("c")]; cleanup::compact(&mut solution); assert_diff_range!([Insert("abc")], solution, "Compact Insertions"); let mut solution = diff_range_list![ Delete("a"), Insert("b"), Delete("c"), Insert("d"), Equal("ef"), ]; cleanup::compact(&mut solution); assert_diff_range!( [Delete("ac"), Insert("bd"), Equal("ef")], solution, "Compact interweave", ); let mut solution = diff_range_list![ Equal("a"), Delete("b"), Equal("c"), Delete("ac"), Equal("x"), ]; cleanup::compact(&mut solution); assert_diff_range!( [Equal("a"), Delete("bca"), Equal("cx")], solution, "Slide edit left", ); let mut solution = diff_range_list![ Equal("x"), Delete("ca"), Equal("c"), Delete("b"), Equal("a"), ]; cleanup::compact(&mut solution); assert_diff_range!([Equal("xca"), Delete("cba")], solution, "Slide edit right"); let mut solution = diff_range_list![Equal(""), Insert("a"), Equal("b")]; cleanup::compact(&mut solution); assert_diff_range!([Insert("a"), Equal("b")], solution, "Empty equality"); let mut solution = diff_range_list![Equal("1"), Insert("A B "), Equal("A "), Insert("2")]; cleanup::compact(&mut solution); assert_diff_range!([Equal("1A "), Insert("B A 2")], solution); let mut solution = diff_range_list![Equal("AC"), Insert("BC"), Equal("BD"), Insert("EFD")]; cleanup::compact(&mut solution); assert_diff_range!([Equal("ACB"), Insert("CBDEF"), Equal("D")], solution); let mut solution = diff_range_list![ Equal("AC"), Delete("Z"), Insert("BC"), Equal("BD"), Delete("Z"), Insert("EFD"), ]; cleanup::compact(&mut solution); assert_diff_range!( [ Equal("AC"), Delete("Z"), Equal("B"), Insert("CBDEF"), Equal("D"), Delete("Z"), ], solution, "Compact Inserts" ); let mut solution = diff_range_list![ Equal("AC"), Insert("Z"), Delete("BC"), Equal("BD"), Insert("Z"), Delete("EFD"), ]; cleanup::compact(&mut solution); assert_diff_range!( [ Equal("AC"), Insert("Z"), Equal("B"), Delete("CBDEF"), Equal("D"), Insert("Z"), ], solution, "Compact Deletions" ); } macro_rules! assert_patch { ($diff_options:expr, $old:ident, $new:ident, $expected:ident $(,)?) => { let patch = $diff_options.create_patch($old, $new); let bpatch = $diff_options.create_patch_bytes($old.as_bytes(), $new.as_bytes()); let patch_str = patch.to_string(); let patch_bytes = bpatch.to_bytes(); assert_eq!(patch_str, $expected); assert_eq!(patch_bytes, patch_str.as_bytes()); assert_eq!(patch_bytes, $expected.as_bytes()); assert_eq!(Patch::from_str($expected).unwrap(), patch); assert_eq!(Patch::from_str(&patch_str).unwrap(), patch); assert_eq!(Patch::from_bytes($expected.as_bytes()).unwrap(), bpatch); assert_eq!(Patch::from_bytes(&patch_bytes).unwrap(), bpatch); assert_eq!(apply($old, &patch).unwrap(), $new); assert_eq!( crate::apply_bytes($old.as_bytes(), &bpatch).unwrap(), $new.as_bytes() ); }; ($old:ident, $new:ident, $expected:ident $(,)?) => { assert_patch!(DiffOptions::default(), $old, $new, $expected); }; } #[test] fn diff_str() { let a = "A\nB\nC\nA\nB\nB\nA\n"; let b = "C\nB\nA\nB\nA\nC\n"; let expected_myers = "\ --- original +++ modified @@ -1,7 +1,6 @@ -A -B C -A B +A B A +C "; let opts = DiffOptions { algorithm: Algorithm::Myers, ..Default::default() }; assert_patch!(opts, a, b, expected_myers); let expected_histogram = "\ --- original +++ modified @@ -1,7 +1,6 @@ -A -B C -A -B B A +B +A +C "; assert_patch!(a, b, expected_histogram); } #[test] fn sample() { let mut opts = DiffOptions::default(); let lao = "\ The Way that can be told of is not the eternal Way; The name that can be named is not the eternal name. The Nameless is the origin of Heaven and Earth; The Named is the mother of all things. Therefore let there always be non-being, so we may see their subtlety, And let there always be being, so we may see their outcome. The two are the same, But after they are produced, they have different names. "; let tzu = "\ The Nameless is the origin of Heaven and Earth; The named is the mother of all things. Therefore let there always be non-being, so we may see their subtlety, And let there always be being, so we may see their outcome. The two are the same, But after they are produced, they have different names. They both may be called deep and profound. Deeper and more profound, The door of all subtleties! "; let expected = "\ --- original +++ modified @@ -1,7 +1,6 @@ -The Way that can be told of is not the eternal Way; -The name that can be named is not the eternal name. The Nameless is the origin of Heaven and Earth; -The Named is the mother of all things. +The named is the mother of all things. + Therefore let there always be non-being, so we may see their subtlety, And let there always be being, @@ -9,3 +8,6 @@ The two are the same, But after they are produced, they have different names. +They both may be called deep and profound. +Deeper and more profound, +The door of all subtleties! "; assert_patch!(opts, lao, tzu, expected); let expected = "\ --- original +++ modified @@ -1,2 +0,0 @@ -The Way that can be told of is not the eternal Way; -The name that can be named is not the eternal name. @@ -4 +2,2 @@ -The Named is the mother of all things. +The named is the mother of all things. + @@ -11,0 +11,3 @@ +They both may be called deep and profound. +Deeper and more profound, +The door of all subtleties! "; opts.context_len = 0; assert_patch!(opts, lao, tzu, expected); let expected = "\ --- original +++ modified @@ -1,5 +1,4 @@ -The Way that can be told of is not the eternal Way; -The name that can be named is not the eternal name. The Nameless is the origin of Heaven and Earth; -The Named is the mother of all things. +The named is the mother of all things. + Therefore let there always be non-being, @@ -11 +10,4 @@ they have different names. +They both may be called deep and profound. +Deeper and more profound, +The door of all subtleties! "; opts.context_len = 1; assert_patch!(opts, lao, tzu, expected); } #[test] fn no_newline_at_eof() { let old = "old line"; let new = "new line"; let expected = "\ --- original +++ modified @@ -1 +1 @@ -old line \\ No newline at end of file +new line \\ No newline at end of file "; assert_patch!(old, new, expected); let old = "old line\n"; let new = "new line"; let expected = "\ --- original +++ modified @@ -1 +1 @@ -old line +new line \\ No newline at end of file "; assert_patch!(old, new, expected); let old = "old line"; let new = "new line\n"; let expected = "\ --- original +++ modified @@ -1 +1 @@ -old line \\ No newline at end of file +new line "; assert_patch!(old, new, expected); let old = "old line\ncommon line"; let new = "new line\ncommon line"; let expected = "\ --- original +++ modified @@ -1,2 +1,2 @@ -old line +new line common line \\ No newline at end of file "; assert_patch!(old, new, expected); } #[test] fn without_no_newline_at_eof_message() { let old = "old line"; let new = "new line"; let expected = "\ --- original +++ modified @@ -1 +1 @@ -old line +new line "; let f = PatchFormatter::new().missing_newline_message(false); let patch = create_patch(old, new); let bpatch = create_patch_bytes(old.as_bytes(), new.as_bytes()); let patch_str = format!("{}", f.fmt_patch(&patch)); let mut patch_bytes = Vec::new(); f.write_patch_into(&bpatch, &mut patch_bytes).unwrap(); assert_eq!(patch_str, expected); assert_eq!(patch_bytes, patch_str.as_bytes()); assert_eq!(patch_bytes, expected.as_bytes()); assert_eq!(apply(old, &patch).unwrap(), new); assert_eq!( crate::apply_bytes(old.as_bytes(), &bpatch).unwrap(), new.as_bytes() ); } #[test] fn diffy_vs_git() { let original = "\ void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; if (!Chunk_bounds_check(dst, dst_start, n)) return; memcpy(dst->data + dst_start, src->data + src_start, n); } int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { if (chunk == NULL) return 0; return start <= chunk->length && n <= chunk->length - start; } "; let a = "\ int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { if (chunk == NULL) return 0; return start <= chunk->length && n <= chunk->length - start; } void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; if (!Chunk_bounds_check(dst, dst_start, n)) return; memcpy(dst->data + dst_start, src->data + src_start, n); } "; // TODO This differs from the expected output when using git's myers algorithm let expected_git = "\ --- original +++ modified @@ -1,14 +1,14 @@ -void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) +int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { - if (!Chunk_bounds_check(src, src_start, n)) return; - if (!Chunk_bounds_check(dst, dst_start, n)) return; + if (chunk == NULL) return 0; - memcpy(dst->data + dst_start, src->data + src_start, n); + return start <= chunk->length && n <= chunk->length - start; } -int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) +void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { - if (chunk == NULL) return 0; + if (!Chunk_bounds_check(src, src_start, n)) return; + if (!Chunk_bounds_check(dst, dst_start, n)) return; - return start <= chunk->length && n <= chunk->length - start; + memcpy(dst->data + dst_start, src->data + src_start, n); } "; let git_patch = Patch::from_str(expected_git).unwrap(); assert_eq!(apply(original, &git_patch).unwrap(), a); let expected_diffy_histogram = "\ --- original +++ modified @@ -1,3 +1,10 @@ +int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) +{ + if (chunk == NULL) return 0; + + return start <= chunk->length && n <= chunk->length - start; +} + void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; @@ -4,11 +11,4 @@ if (!Chunk_bounds_check(dst, dst_start, n)) return; memcpy(dst->data + dst_start, src->data + src_start, n); -} - -int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) -{ - if (chunk == NULL) return 0; - - return start <= chunk->length && n <= chunk->length - start; } "; assert_patch!(original, a, expected_diffy_histogram); let expected_diffy_myers = expected_git; let opts = DiffOptions { algorithm: Algorithm::Myers, ..Default::default() }; assert_patch!(opts, original, a, expected_diffy_myers); } #[test] fn suppress_blank_empty() { let original = "\ 1 2 3 4 "; let modified = "\ 1 2 3 5 "; // Note that there is a space " " on the line after 3 let expected = "\ --- original +++ modified @@ -2,4 +2,4 @@ 2 3 -4 +5 "; let f = PatchFormatter::new().suppress_blank_empty(false); let patch = create_patch(original, modified); let bpatch = create_patch_bytes(original.as_bytes(), modified.as_bytes()); let patch_str = format!("{}", f.fmt_patch(&patch)); let mut patch_bytes = Vec::new(); f.write_patch_into(&bpatch, &mut patch_bytes).unwrap(); assert_eq!(patch_str, expected); assert_eq!(patch_bytes, patch_str.as_bytes()); assert_eq!(patch_bytes, expected.as_bytes()); assert_eq!(apply(original, &patch).unwrap(), modified); assert_eq!( crate::apply_bytes(original.as_bytes(), &bpatch).unwrap(), modified.as_bytes() ); // Note that there is no space " " on the line after 3 let expected_suppressed = "\ --- original +++ modified @@ -2,4 +2,4 @@ 2 3 -4 +5 "; let f = PatchFormatter::new().suppress_blank_empty(true); let patch = create_patch(original, modified); let bpatch = create_patch_bytes(original.as_bytes(), modified.as_bytes()); let patch_str = format!("{}", f.fmt_patch(&patch)); let mut patch_bytes = Vec::new(); f.write_patch_into(&bpatch, &mut patch_bytes).unwrap(); assert_eq!(patch_str, expected_suppressed); assert_eq!(patch_bytes, patch_str.as_bytes()); assert_eq!(patch_bytes, expected_suppressed.as_bytes()); assert_eq!(apply(original, &patch).unwrap(), modified); assert_eq!( crate::apply_bytes(original.as_bytes(), &bpatch).unwrap(), modified.as_bytes() ); } // In the event that a patch has an invalid hunk range we want to ensure that when apply is // attempting to search for a matching position to apply a hunk that the search algorithm runs in // time bounded by the length of the original image being patched. Before clamping the search space // this test would take >200ms and now it runs in roughly ~30us on an M1 laptop. #[test] fn apply_with_incorrect_hunk_has_bounded_performance() { let patch = "\ @@ -10,6 +1000000,8 @@ First: Life before death, strength before weakness, journey before destination. Second: - I will put the law before all else. + I swear to seek justice, + to let it guide me, + until I find a more perfect Ideal. "; let original = "\ First: Life before death, strength before weakness, journey before destination. Second: I will put the law before all else. "; let expected = "\ First: Life before death, strength before weakness, journey before destination. Second: I swear to seek justice, to let it guide me, until I find a more perfect Ideal. "; let patch = Patch::from_str(patch).unwrap(); let now = std::time::Instant::now(); let result = apply(original, &patch).unwrap(); let elapsed = now.elapsed(); println!("{:?}", elapsed); assert!(elapsed < std::time::Duration::from_micros(200)); assert_eq!(result, expected); } #[test] fn reverse_empty_file() { let p = create_patch("", "make it so"); let reverse = p.reverse(); let hunk_lines = p.hunks().iter().map(|h| h.lines()); let reverse_hunk_lines = reverse.hunks().iter().map(|h| h.lines()); for (lines, reverse_lines) in hunk_lines.zip(reverse_hunk_lines) { for (line, reverse) in lines.iter().zip(reverse_lines.iter()) { match line { l @ Line::Context(_) => assert_eq!(l, reverse), Line::Delete(d) => assert!(matches!(reverse, Line::Insert(i) if d == i)), Line::Insert(i) => assert!(matches!(reverse, Line::Delete(d) if d == i)), } } } let re_reverse = apply(&apply("", &p).unwrap(), &reverse).unwrap(); assert_eq!(re_reverse, ""); } #[test] fn reverse_multi_line_file() { let original = r"Commander Worf What do you want this time, Picard?! Commander Worf how dare you speak to mean that way! "; let modified = r"Commander Worf Yes, Captain Picard? Commander Worf, you are a valued member of my crew Why, thank you Captain. As are you. A true warrior. Kupluh! Kupluh, Indeed "; let p = create_patch(original, modified); let reverse = p.reverse(); let re_reverse = apply(&apply(original, &p).unwrap(), &reverse).unwrap(); assert_eq!(re_reverse, original); } diffy-imara-0.3.2/src/lib.rs000064400000000000000000000177321046102023000137330ustar 00000000000000//! Tools for finding and manipulating differences between files //! //! ## Overview //! //! This library is a fork of [`diffy`](https://docs.rs/diffy) with the backend of //! [`imara-diff`](https://docs.rs/imara-diff). //! This is done so that all the tools provided by it can use either //! [Myers' diff algorithm] or [Histogram diff algorithm]. //! //! This library is intended to be a collection of tools used to find and //! manipulate differences between files inspired by [LibXDiff] and [GNU //! Diffutils]. Version control systems like [Git] and [Mercurial] generally //! communicate differences between two versions of a file using a `diff` or //! `patch`. //! //! //! The documentation generally refers to "files" in many places but none of //! the apis explicitly operate on on-disk files. Instead this library //! requires that the text being operated on resides in-memory and as such if //! you want to perform operations on files, it is up to the user to load the //! contents of those files into memory before passing their contents to the //! apis provided by this library. //! //! ## UTF-8 and Non-UTF-8 //! //! This library has support for working with both utf8 and non-utf8 texts. //! Most of the API's have two different variants, one for working with utf8 //! `str` texts (e.g. [`create_patch`]) and one for working with bytes `[u8]` //! which may or may not be utf8 (e.g. [`create_patch_bytes`]). //! //! ## Creating a Patch //! //! A [`Patch`] between two texts can be created by doing the following: //! //! ``` //! use diffy_imara::create_patch; //! //! let original = "The Way of Kings\nWords of Radiance\n"; //! let modified = "The Way of Kings\nWords of Radiance\nOathbringer\n"; //! //! let patch = create_patch(original, modified); //! # //! # let expected = "\ //! # --- original //! # +++ modified //! # @@ -1,2 +1,3 @@ //! # The Way of Kings //! # Words of Radiance //! # +Oathbringer //! # "; //! # //! # assert_eq!(patch.to_string(), expected); //! ``` //! //! A [`Patch`] can the be output in the [Unified Format] either by using its //! [`Display`] impl or by using a [`PatchFormatter`] to output the diff with //! color. //! //! ``` //! # use diffy_imara::create_patch; //! # //! # let original = "The Way of Kings\nWords of Radiance\n"; //! # let modified = "The Way of Kings\nWords of Radiance\nOathbringer\n"; //! # //! # let patch = create_patch(original, modified); //! # //! # let expected = "\ //! # --- original //! # +++ modified //! # @@ -1,2 +1,3 @@ //! # The Way of Kings //! # Words of Radiance //! # +Oathbringer //! # "; //! # //! # assert_eq!(patch.to_string(), expected); //! # //! // Without color //! print!("{}", patch); //! //! // With color //! # use diffy_imara::PatchFormatter; //! let f = PatchFormatter::new().with_color(); //! print!("{}", f.fmt_patch(&patch)); //! ``` //! //! ```console //! --- original //! +++ modified //! @@ -1,2 +1,3 @@ //! The Way of Kings //! Words of Radiance //! +Oathbringer //! ``` //! //! ## Applying a Patch //! //! Once you have a [`Patch`] you can apply it to a base image in order to //! recover the new text. Each hunk will be applied to the base image in //! sequence. Similarly to GNU `patch`, this implementation can detect when //! line numbers specified in the patch are incorrect and will attempt to find //! the correct place to apply each hunk by iterating forward and backward //! from the given position until all context lines from a hunk match the base //! image. //! //! ``` //! use diffy_imara::{apply, Patch}; //! //! let s = "\ //! --- a/skybreaker-ideals //! +++ b/skybreaker-ideals //! @@ -10,6 +10,8 @@ //! First: //! Life before death, //! strength before weakness, //! journey before destination. //! Second: //! - I will put the law before all else. //! + I swear to seek justice, //! + to let it guide me, //! + until I find a more perfect Ideal. //! "; //! //! let patch = Patch::from_str(s).unwrap(); //! //! let base_image = "\ //! First: //! Life before death, //! strength before weakness, //! journey before destination. //! Second: //! I will put the law before all else. //! "; //! //! let expected = "\ //! First: //! Life before death, //! strength before weakness, //! journey before destination. //! Second: //! I swear to seek justice, //! to let it guide me, //! until I find a more perfect Ideal. //! "; //! //! assert_eq!(apply(base_image, &patch).unwrap(), expected); //! ``` //! //! ## Performing a Three-way Merge //! //! Two files `A` and `B` can be merged together given a common ancestor or //! original file `O` to produce a file `C` similarly to how [diff3] //! performs a three-way merge. //! //! ```console //! --- A --- //! / \ //! / \ //! O C //! \ / //! \ / //! --- B --- //! ``` //! //! If files `A` and `B` modified different regions of the original file `O` //! (or the same region in the same way) then the files can be merged without //! conflict. //! //! ``` //! use diffy_imara::merge; //! //! let original = "the final empire\nThe Well of Ascension\nThe hero of ages\n"; //! let a = "The Final Empire\nThe Well of Ascension\nThe Hero of Ages\n"; //! let b = "The Final Empire\nThe Well of Ascension\nThe hero of ages\n"; //! let expected = "\ //! The Final Empire //! The Well of Ascension //! The Hero of Ages //! "; //! //! assert_eq!(merge(original, a, b).unwrap(), expected); //! ``` //! //! If both files `A` and `B` modified the same region of the original file //! `O` (and those modifications are different), it would result in a conflict //! as it is not clear which modifications should be used in the merged //! result. //! //! ``` //! use diffy_imara::merge; //! //! let original = "The Final Empire\nThe Well of Ascension\nThe hero of ages\n"; //! let a = "The Final Empire\nThe Well of Ascension\nThe Hero of Ages\nSecret History\n"; //! let b = "The Final Empire\nThe Well of Ascension\nThe hero of ages\nThe Alloy of Law\n"; //! let expected = "\ //! The Final Empire //! The Well of Ascension //! <<<<<<< ours //! The Hero of Ages //! Secret History //! ||||||| original //! The hero of ages //! ======= //! The hero of ages //! The Alloy of Law //! >>>>>>> theirs //! "; //! //! assert_eq!(merge(original, a, b).unwrap_err(), expected); //! ``` //! //! ## Choosing the diff algorithm //! //! In addition to free-standing functions [`create_patch`] and [`merge()`], this library provides methods //! [`DiffOptions::create_patch`] and [`MergeOptions::merge`], with both structs having an `algorithm` field. //! ``` //! # use diffy_imara::{Algorithm, DiffOptions}; //! # //! # let original = "The Way of Kings\nWords of Radiance\n"; //! # let modified = "The Way of Kings\nWords of Radiance\nOathbringer\n"; //! let patch = DiffOptions::new() //! .set_algorithm(Algorithm::Myers) //! .create_patch(original, modified); //! ``` //! //! [LibXDiff]: http://www.xmailserver.org/xdiff-lib.html //! [Myers' diff algorithm]: http://www.xmailserver.org/diff2.pdf //! [Histogram diff algorithm]: https://github.com/eclipse-jgit/jgit/blob/a1c3a818b739ee2bedfda1c69cdac23989f1e97f/org.eclipse.jgit/src/org/eclipse/jgit/diff/HistogramDiff.java //! [GNU Diffutils]: https://www.gnu.org/software/diffutils/ //! [Git]: https://git-scm.com/ //! [Mercurial]: https://www.mercurial-scm.org/ //! [Unified Format]: https://en.wikipedia.org/wiki/Diff#Unified_format //! [diff3]: https://en.wikipedia.org/wiki/Diff3 //! //! [`Display`]: https://doc.rust-lang.org/stable/std/fmt/trait.Display.html //! [`Patch`]: struct.Patch.html //! [`PatchFormatter`]: struct.PatchFormatter.html //! [`create_patch`]: fn.create_patch.html //! [`create_patch_bytes`]: fn.create_patch_bytes.html mod apply; mod diff; mod merge; mod patch; mod range; mod sink; mod utils; pub use apply::{apply, apply_bytes, ApplyError}; pub use diff::{create_patch, create_patch_bytes, DiffOptions}; pub use imara_diff::Algorithm; pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions}; pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter}; diffy-imara-0.3.2/src/merge/mod.rs000064400000000000000000000613521046102023000150400ustar 00000000000000use imara_diff::sources::{byte_lines_with_terminator, lines_with_terminator}; use crate::{ diff::DiffOptions, range::{DiffRange, Range, SliceLike}, utils::InternedMergeInput, Algorithm, }; use std::{cmp, fmt}; #[cfg(test)] mod tests; const DEFAULT_CONFLICT_MARKER_LENGTH: usize = 7; enum Diff3Range<'ancestor, 'ours, 'theirs, T: ?Sized> { Equal(Range<'ancestor, T>, Range<'ours, T>, Range<'theirs, T>), Ancestor(Range<'ancestor, T>), AncestorOurs(Range<'ancestor, T>, Range<'ours, T>), AncestorTheirs(Range<'ancestor, T>, Range<'theirs, T>), Ours(Range<'ours, T>), Theirs(Range<'theirs, T>), } impl fmt::Debug for Diff3Range<'_, '_, '_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Diff3Range::Equal(range, ..) => write!(f, "Equal: {:?}", range.as_slice()), Diff3Range::Ancestor(range) => write!(f, "Ancestor: {:?}", range.as_slice()), Diff3Range::AncestorOurs(range, ..) => { write!(f, "AncestorOurs: {:?}", range.as_slice()) } Diff3Range::AncestorTheirs(range, ..) => { write!(f, "AncestorTheirs: {:?}", range.as_slice()) } Diff3Range::Ours(range) => write!(f, "Ours: {:?}", range.as_slice()), Diff3Range::Theirs(range) => write!(f, "Theirs: {:?}", range.as_slice()), } } } impl Copy for Diff3Range<'_, '_, '_, T> {} impl Clone for Diff3Range<'_, '_, '_, T> { fn clone(&self) -> Self { *self } } enum MergeRange<'ancestor, 'ours, 'theirs, T: ?Sized> { Equal(Range<'ancestor, T>, Range<'ours, T>, Range<'theirs, T>), Conflict(Range<'ancestor, T>, Range<'ours, T>, Range<'theirs, T>), Ours(Range<'ours, T>), Theirs(Range<'theirs, T>), Both(Range<'ours, T>, Range<'theirs, T>), } impl fmt::Debug for MergeRange<'_, '_, '_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { MergeRange::Equal(range, ..) => write!(f, "Equal: {:?}", range.as_slice()), MergeRange::Conflict(ancestor, ours, theirs) => write!( f, "Conflict: ancestor: {:?} ours: {:?} theirs: {:?}", ancestor.as_slice(), ours.as_slice(), theirs.as_slice() ), MergeRange::Ours(range) => write!(f, "Ours: {:?}", range.as_slice()), MergeRange::Theirs(range) => write!(f, "Theirs: {:?}", range.as_slice()), MergeRange::Both(ours, theirs) => write!( f, "Both: ours: {:?} theirs: {:?}", ours.as_slice(), theirs.as_slice() ), } } } impl Copy for MergeRange<'_, '_, '_, T> {} impl Clone for MergeRange<'_, '_, '_, T> { fn clone(&self) -> Self { *self } } /// Style used when rendering a conflict #[derive(Copy, Clone, Debug, Default)] pub enum ConflictStyle { /// Renders conflicting lines from both files, separated by conflict markers. /// /// ```console /// <<<<<<< A /// lines in file A /// ======= /// lines in file B /// >>>>>>> B /// ``` Merge, /// Renders conflicting lines from both files including lines from the original files, /// separated by conflict markers. /// /// ```console /// <<<<<<< A /// lines in file A /// ||||||| Original /// lines in Original file /// ======= /// lines in file B /// >>>>>>> B /// ``` #[default] Diff3, } /// A collection of options for modifying the way a merge is performed #[derive(Debug)] pub struct MergeOptions { /// The length of the conflict markers used when displaying a merge conflict conflict_marker_length: usize, /// The conflict style used when displaying a merge conflict style: ConflictStyle, algorithm: Algorithm, } impl MergeOptions { /// Constructs a new `MergeOptions` with default settings /// /// ## Defaults /// * conflict_marker_length = 7 /// * style = ConflictStyle::Diff3 /// * algorithm = Algorithm::Histogram pub fn new() -> Self { Self { conflict_marker_length: DEFAULT_CONFLICT_MARKER_LENGTH, style: ConflictStyle::Diff3, algorithm: Algorithm::Histogram, } } /// Set the length of the conflict markers used when displaying a merge conflict pub fn set_conflict_marker_length(&mut self, conflict_marker_length: usize) -> &mut Self { self.conflict_marker_length = conflict_marker_length; self } /// Set the conflict style used when displaying a merge conflict pub fn set_conflict_style(&mut self, style: ConflictStyle) -> &mut Self { self.style = style; self } /// Set the algorithm used to perform the internal diffs ("ancestor" with "ours", "ancestor" with /// "theirs"). pub fn set_algorithm(&mut self, algorithm: Algorithm) -> &mut Self { self.algorithm = algorithm; self } /// Merge two files, given a common ancestor, based on the configured options /// The algorithm set in [`Self::algorithm`] is used for computing the diff between `ancestor` /// and `ours`, and between `ancestor` and `theirs` pub fn merge<'a>( &self, ancestor: &'a str, ours: &'a str, theirs: &'a str, ) -> Result { let input = InternedMergeInput::new(ancestor, ours, theirs); let ancestor_lines: Vec<_> = lines_with_terminator(ancestor).collect(); let our_lines: Vec<_> = lines_with_terminator(ours).collect(); let their_lines: Vec<_> = lines_with_terminator(theirs).collect(); let mut opts = DiffOptions::new(); opts.set_algorithm(self.algorithm); let our_solution = opts.diff_tokens(&input.base, &input.left, input.interner.num_tokens()); let their_solution = opts.diff_tokens(&input.base, &input.right, input.interner.num_tokens()); let merged = merge_solutions(&our_solution, &their_solution); let mut merge = diff3_range_to_merge_range(&merged); cleanup_conflicts(&mut merge); output_result( &ancestor_lines, &our_lines, &their_lines, &merge, self.conflict_marker_length, self.style, ) } /// Perform a 3-way merge between potentially non-utf8 texts pub fn merge_bytes<'a>( &self, ancestor: &'a [u8], ours: &'a [u8], theirs: &'a [u8], ) -> Result, Vec> { let input = InternedMergeInput::new(ancestor, ours, theirs); let ancestor_lines: Vec<_> = byte_lines_with_terminator(ancestor).collect(); let our_lines: Vec<_> = byte_lines_with_terminator(ours).collect(); let their_lines: Vec<_> = byte_lines_with_terminator(theirs).collect(); let mut opts = DiffOptions::new(); opts.set_algorithm(self.algorithm); let our_solution = opts.diff_tokens(&input.base, &input.left, input.interner.num_tokens()); let their_solution = opts.diff_tokens(&input.base, &input.right, input.interner.num_tokens()); let merged = merge_solutions(&our_solution, &their_solution); let mut merge = diff3_range_to_merge_range(&merged); cleanup_conflicts(&mut merge); output_result_bytes( &ancestor_lines, &our_lines, &their_lines, &merge, self.conflict_marker_length, self.style, ) } } impl Default for MergeOptions { fn default() -> Self { Self::new() } } /// Merge two files given a common ancestor. /// /// Returns `Ok(String)` upon a successful merge. /// Returns `Err(String)` if there were conflicts, with the conflicting /// regions marked with conflict markers. /// /// ## Merging two files without conflicts /// ``` /// # use diffy_imara::merge; /// let original = "\ /// Devotion /// Dominion /// Odium /// Preservation /// Ruin /// Cultivation /// Honor /// Endowment /// Autonomy /// Ambition /// "; /// let a = "\ /// Odium /// Preservation /// Ruin /// Cultivation /// Endowment /// Autonomy /// "; /// let b = "\ /// Devotion /// Dominion /// Odium /// Harmony /// Cultivation /// Honor /// Endowment /// Autonomy /// Ambition /// "; /// /// let expected = "\ /// Odium /// Harmony /// Cultivation /// Endowment /// Autonomy /// "; /// /// assert_eq!(merge(original, a, b).unwrap(), expected); /// ``` pub fn merge<'a>(ancestor: &'a str, ours: &'a str, theirs: &'a str) -> Result { MergeOptions::default().merge(ancestor, ours, theirs) } /// Perform a 3-way merge between potentially non-utf8 texts pub fn merge_bytes<'a>( ancestor: &'a [u8], ours: &'a [u8], theirs: &'a [u8], ) -> Result, Vec> { MergeOptions::default().merge_bytes(ancestor, ours, theirs) } fn merge_solutions<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( our_solution: &[DiffRange<'ancestor, 'ours, T>], their_solution: &[DiffRange<'ancestor, 'theirs, T>], ) -> Vec> { let mut our_solution = our_solution.iter().copied(); let mut their_solution = their_solution.iter().copied(); let mut ours = our_solution.next(); let mut theirs = their_solution.next(); let mut solution = Vec::new(); while ours.is_some() || theirs.is_some() { use DiffRange as DR; let merge_range = match (ours, theirs) { // // Inserts can't easily be checked to see if they match each other // (Some(DR::Insert(range)), _) => { ours.take(); Diff3Range::Ours(range) } (_, Some(DR::Insert(range))) => { theirs.take(); Diff3Range::Theirs(range) } (Some(DR::Equal(ancestor1, our_range)), Some(DR::Equal(ancestor2, their_range))) => { assert_eq!(ancestor1.offset(), ancestor2.offset()); let len = cmp::min(ancestor1.len(), ancestor2.len()); shrink_front(&mut ours, len); shrink_front(&mut theirs, len); Diff3Range::Equal( ancestor1.slice(..len), our_range.slice(..len), their_range.slice(..len), ) } (Some(DR::Equal(ancestor1, our_range)), Some(DR::Delete(ancestor2))) => { assert_eq!(ancestor1.offset(), ancestor2.offset()); let len = cmp::min(ancestor1.len(), ancestor2.len()); shrink_front(&mut ours, len); shrink_front(&mut theirs, len); Diff3Range::AncestorOurs(ancestor1.slice(..len), our_range.slice(..len)) } (Some(DR::Delete(ancestor1)), Some(DR::Equal(ancestor2, their_range))) => { assert_eq!(ancestor1.offset(), ancestor2.offset()); let len = cmp::min(ancestor1.len(), ancestor2.len()); shrink_front(&mut ours, len); shrink_front(&mut theirs, len); Diff3Range::AncestorTheirs(ancestor2.slice(..len), their_range.slice(..len)) } (Some(DR::Delete(ancestor1)), Some(DR::Delete(ancestor2))) => { assert_eq!(ancestor1.offset(), ancestor2.offset()); let len = cmp::min(ancestor1.len(), ancestor2.len()); shrink_front(&mut ours, len); shrink_front(&mut theirs, len); Diff3Range::Ancestor(ancestor1.slice(..len)) } // // Unreachable cases // (Some(DR::Equal(..) | DR::Delete(..)), None) | (None, Some(DR::Equal(..) | DR::Delete(_))) | (None, None) => unreachable!("Equal/Delete should match up"), }; solution.push(merge_range); if ours.map_or(true, |range| range.is_empty()) { ours = our_solution.next(); } if theirs.map_or(true, |range| range.is_empty()) { theirs = their_solution.next(); } } solution } fn shrink_front(maybe_range: &mut Option>, len: usize) { if let Some(range) = maybe_range { range.shrink_front(len) } } fn diff3_range_to_merge_range<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( solution: &[Diff3Range<'ancestor, 'ours, 'theirs, T>], ) -> Vec> { let mut ancestor: Option> = None; let mut ours: Option> = None; let mut theirs: Option> = None; let mut merge = Vec::new(); for &diff3 in solution { match diff3 { Diff3Range::Equal(ancestor_range, our_range, their_range) => { if let Some(merge_range) = create_merge_range(ancestor.take(), ours.take(), theirs.take()) { merge.push(merge_range); } merge.push(MergeRange::Equal(ancestor_range, our_range, their_range)); } Diff3Range::Ancestor(range) => { set_or_merge_range(&mut ancestor, range); set_or_merge_range(&mut ours, Range::empty()); set_or_merge_range(&mut theirs, Range::empty()); } Diff3Range::AncestorOurs(ancestor_range, our_range) => { set_or_merge_range(&mut ancestor, ancestor_range); set_or_merge_range(&mut ours, our_range); } Diff3Range::AncestorTheirs(ancestor_range, their_range) => { set_or_merge_range(&mut ancestor, ancestor_range); set_or_merge_range(&mut theirs, their_range); } Diff3Range::Ours(range) => set_or_merge_range(&mut ours, range), Diff3Range::Theirs(range) => set_or_merge_range(&mut theirs, range), } } if let Some(merge_range) = create_merge_range(ancestor.take(), ours.take(), theirs.take()) { merge.push(merge_range); } merge } fn set_or_merge_range<'a, T: ?Sized>(range1: &mut Option>, range2: Range<'a, T>) { if let Some(range1) = range1 { if range1.is_empty() { *range1 = range2; } else if !range2.is_empty() { assert_eq!(range1.offset() + range1.len(), range2.offset()); range1.grow_down(range2.len()); } } else { *range1 = Some(range2); } } fn create_merge_range<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( ancestor: Option>, ours: Option>, theirs: Option>, ) -> Option> { match (ancestor, ours, theirs) { (_, None, None) => None, (None, Some(ours), None) => Some(MergeRange::Ours(ours)), (None, None, Some(theirs)) => Some(MergeRange::Theirs(theirs)), (ancestor, ours, theirs) => Some(MergeRange::Conflict( ancestor.unwrap_or_default(), ours.unwrap_or_default(), theirs.unwrap_or_default(), )), } } #[allow(clippy::needless_lifetimes)] fn cleanup_conflicts<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike + PartialEq>( solution: &mut [MergeRange<'ancestor, 'ours, 'theirs, T>], ) { // TODO this could probably be more sophisticated: // e.g. run the diff algorithm on the conflict area for merge in solution { if let MergeRange::Conflict(ancestor, ours, theirs) = *merge { // If the ranges in the conflict end up being the same on both sides then we can // eliminate the conflict if ours.as_slice() == theirs.as_slice() { *merge = MergeRange::Both(ours, theirs); // If either ours or theirs exactly matches ancestor then we can also eliminate the // conflict } else if ancestor.as_slice() == ours.as_slice() { *merge = MergeRange::Theirs(theirs); } else if ancestor.as_slice() == theirs.as_slice() { *merge = MergeRange::Ours(ours); } } } } fn output_result<'a, T: ?Sized>( ancestor: &[&'a str], ours: &[&'a str], theirs: &[&'a str], merge: &[MergeRange], marker_len: usize, style: ConflictStyle, ) -> Result { let mut conflicts = 0; let mut output = String::new(); for (range_idx, merge_range) in merge.iter().enumerate() { match merge_range { MergeRange::Equal(range, ..) => { add_lines(&mut output, &ancestor[range.range()]); } MergeRange::Conflict(ancestor_range, ours_range, theirs_range) => { let ancestor = &ancestor[ancestor_range.range()]; let ours = &ours[ours_range.range()]; let theirs = &theirs[theirs_range.range()]; if range_idx == merge.len() - 1 { let ancestor_last_ends_with_newline = ancestor.last().map_or(false, |l| l.ends_with('\n')); let ours_last_ends_with_newline = ours.last().map_or(false, |l| l.ends_with('\n')); let theirs_last_ends_with_newline = theirs.last().map_or(false, |l| l.ends_with('\n')); let (add_after_lines, add_after_right_marker) = if ancestor_last_ends_with_newline && ours_last_ends_with_newline && theirs_last_ends_with_newline { (false, true) } else { (true, false) }; add_conflict_marker(&mut output, '<', marker_len, Some("ours")); add_lines(&mut output, ours); if add_after_lines { output.push('\n'); } if let ConflictStyle::Diff3 = style { add_conflict_marker(&mut output, '|', marker_len, Some("original")); add_lines(&mut output, ancestor); if add_after_lines { output.push('\n'); } } add_conflict_marker(&mut output, '=', marker_len, None); add_lines(&mut output, theirs); if add_after_lines { output.push('\n'); } add_conflict_marker(&mut output, '>', marker_len, Some("theirs")); if !add_after_right_marker { output .pop() .expect("a `\n` is always added by the `add_conflict_marker` above"); } } else { add_conflict_marker(&mut output, '<', marker_len, Some("ours")); add_lines(&mut output, ours); if let ConflictStyle::Diff3 = style { add_conflict_marker(&mut output, '|', marker_len, Some("original")); add_lines(&mut output, ancestor); } add_conflict_marker(&mut output, '=', marker_len, None); add_lines(&mut output, theirs); add_conflict_marker(&mut output, '>', marker_len, Some("theirs")); } conflicts += 1; } MergeRange::Ours(range) => { add_lines(&mut output, &ours[range.range()]); } MergeRange::Theirs(range) => { add_lines(&mut output, &theirs[range.range()]); } MergeRange::Both(range, _) => { add_lines(&mut output, &ours[range.range()]); } } } if conflicts != 0 { Err(output) } else { Ok(output) } } fn add_lines(dest: &mut String, lines: &[&str]) { dest.extend(lines.iter().copied()); } fn add_conflict_marker( output: &mut String, marker: char, marker_len: usize, filename: Option<&str>, ) { for _ in 0..marker_len { output.push(marker); } if let Some(filename) = filename { output.push(' '); output.push_str(filename); } output.push('\n'); } fn output_result_bytes<'a, T: ?Sized>( ancestor: &[&'a [u8]], ours: &[&'a [u8]], theirs: &[&'a [u8]], merge: &[MergeRange], marker_len: usize, style: ConflictStyle, ) -> Result, Vec> { let mut conflicts = 0; let mut output: Vec = Vec::new(); for (range_idx, merge_range) in merge.iter().enumerate() { match merge_range { MergeRange::Equal(range, ..) => { add_lines_bytes(&mut output, &ancestor[range.range()]); } MergeRange::Conflict(ancestor_range, ours_range, theirs_range) => { let ancestor = &ancestor[ancestor_range.range()]; let ours = &ours[ours_range.range()]; let theirs = &theirs[theirs_range.range()]; if range_idx == merge.len() - 1 { let ancestor_last_ends_with_newline = ancestor.last().map_or(false, |l| l.ends_with(b"\n")); let ours_last_ends_with_newline = ours.last().map_or(false, |l| l.ends_with(b"\n")); let theirs_last_ends_with_newline = theirs.last().map_or(false, |l| l.ends_with(b"\n")); let (add_after_lines, add_after_right_marker) = if ancestor_last_ends_with_newline && ours_last_ends_with_newline && theirs_last_ends_with_newline { (false, true) } else { (true, false) }; add_conflict_marker_bytes(&mut output, b'<', marker_len, Some(b"ours")); add_lines_bytes(&mut output, ours); if add_after_lines { output.push(b'\n'); } if let ConflictStyle::Diff3 = style { add_conflict_marker_bytes(&mut output, b'|', marker_len, Some(b"original")); add_lines_bytes(&mut output, ancestor); if add_after_lines { output.push(b'\n'); } } add_conflict_marker_bytes(&mut output, b'=', marker_len, None); add_lines_bytes(&mut output, theirs); if add_after_lines { output.push(b'\n'); } add_conflict_marker_bytes(&mut output, b'>', marker_len, Some(b"theirs")); if !add_after_right_marker { output.pop().expect( "a `\n` is always added by the `add_conflict_marker_bytes` above", ); } } else { add_conflict_marker_bytes(&mut output, b'<', marker_len, Some(b"ours")); add_lines_bytes(&mut output, ours); if let ConflictStyle::Diff3 = style { add_conflict_marker_bytes(&mut output, b'|', marker_len, Some(b"original")); add_lines_bytes(&mut output, ancestor); } add_conflict_marker_bytes(&mut output, b'=', marker_len, None); add_lines_bytes(&mut output, theirs); add_conflict_marker_bytes(&mut output, b'>', marker_len, Some(b"theirs")); } conflicts += 1; } MergeRange::Ours(range) => { add_lines_bytes(&mut output, &ours[range.range()]); } MergeRange::Theirs(range) => { add_lines_bytes(&mut output, &theirs[range.range()]); } MergeRange::Both(range, _) => { add_lines_bytes(&mut output, &ours[range.range()]); } } } if conflicts != 0 { Err(output) } else { Ok(output) } } fn add_lines_bytes(output: &mut Vec, lines: &[&[u8]]) { lines.iter().for_each(|line| output.extend_from_slice(line)); } fn add_conflict_marker_bytes( output: &mut Vec, marker: u8, marker_len: usize, filename: Option<&[u8]>, ) { for _ in 0..marker_len { output.push(marker); } if let Some(filename) = filename { output.push(b' '); output.extend_from_slice(filename); } output.push(b'\n'); } diffy-imara-0.3.2/src/merge/tests.rs000064400000000000000000000267241046102023000154270ustar 00000000000000use super::*; fn assert_merge( merge_options: Option<&MergeOptions>, original: &str, ours: &str, theirs: &str, expected: Result<&str, &str>, msg: &str, ) { let opts = match merge_options { None => &Default::default(), Some(opts) => opts, }; let solution = opts.merge(original, ours, theirs); assert!( same_merge(expected, &solution), "{msg}\nexpected={expected:#?}\nactual={solution:#?}" ); let expected_bytes = expected.map(str::as_bytes).map_err(str::as_bytes); let solution_bytes = opts.merge_bytes(original.as_bytes(), ours.as_bytes(), theirs.as_bytes()); assert!( same_merge_bytes(expected_bytes, &solution_bytes), "{msg}\nexpected={expected_bytes:#?}\nactual={solution_bytes:#?}" ); } fn same_merge(expected: Result<&str, &str>, actual: &Result) -> bool { match (expected, actual) { (Ok(expected), Ok(actual)) => expected == actual, (Err(expected), Err(actual)) => expected == actual, (_, _) => false, } } fn same_merge_bytes(expected: Result<&[u8], &[u8]>, actual: &Result, Vec>) -> bool { match (expected, actual) { (Ok(expected), Ok(actual)) => expected == &actual[..], (Err(expected), Err(actual)) => expected == &actual[..], (_, _) => false, } } #[test] fn test_merge() { let original = "\ carrots garlic onions salmon mushrooms tomatoes salt "; let a = "\ carrots salmon mushrooms tomatoes garlic onions salt "; let b = "\ carrots salmon garlic onions mushrooms tomatoes salt "; #[rustfmt::skip] assert_merge( None, original, original, original, Ok(original), "Equal case #1",); assert_merge(None, original, a, a, Ok(a), "Equal case #2"); assert_merge(None, original, b, b, Ok(b), "Equal case #3"); let expected = "\ carrots <<<<<<< ours salmon ||||||| original garlic onions salmon ======= salmon garlic onions >>>>>>> theirs mushrooms tomatoes garlic onions salt "; assert_merge(None, original, a, b, Err(expected), "Single Conflict case"); let expected = "\ carrots <<<<<<< ours salmon garlic onions ||||||| original garlic onions salmon ======= salmon >>>>>>> theirs mushrooms tomatoes garlic onions salt "; assert_merge( None, original, b, a, Err(expected), "Reverse Single Conflict case", ); } #[test] fn test_merge_multiple_conflicts() { let original = "\ carrots garlic onions salmon tomatoes salt "; let a = "\ carrots salmon tomatoes garlic onions salt "; let b = "\ carrots salmon garlic onions tomatoes salt "; let expected_myers = "\ carrots <<<<<<< ours salmon ||||||| original garlic onions salmon ======= salmon garlic onions >>>>>>> theirs tomatoes garlic onions salt "; let opts_myers = MergeOptions { algorithm: Algorithm::Myers, ..Default::default() }; assert_merge( Some(&opts_myers), original, a, b, Err(expected_myers), "Multiple Conflict case", ); let expected_histogram = expected_myers; assert_merge( None, original, a, b, Err(expected_histogram), "Multiple Conflict case", ); let expected_myers = "\ carrots <<<<<<< ours salmon garlic onions ||||||| original garlic onions salmon ======= salmon >>>>>>> theirs tomatoes garlic onions salt "; assert_merge( Some(&opts_myers), original, b, a, Err(expected_myers), "Reverse Multiple Conflict case", ); let expected_histogram = expected_myers; assert_merge( None, original, b, a, Err(expected_histogram), "Reverse Multiple Conflict case", ); } #[test] fn diffy_vs_git() { let original = "\ void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; if (!Chunk_bounds_check(dst, dst_start, n)) return; memcpy(dst->data + dst_start, src->data + src_start, n); } int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { if (chunk == NULL) return 0; return start <= chunk->length && n <= chunk->length - start; } "; let a = "\ int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { if (chunk == NULL) return 0; return start <= chunk->length && n <= chunk->length - start; } void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; if (!Chunk_bounds_check(dst, dst_start, n)) return; memcpy(dst->data + dst_start, src->data + src_start, n); } "; let b = "\ void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; if (!Chunk_bounds_check(dst, dst_start, n)) return; // copy the bytes memcpy(dst->data + dst_start, src->data + src_start, n); } int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { if (chunk == NULL) return 0; return start <= chunk->length && n <= chunk->length - start; } "; let expected_myers = "\ int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { if (chunk == NULL) return 0; <<<<<<< ours return start <= chunk->length && n <= chunk->length - start; ||||||| original memcpy(dst->data + dst_start, src->data + src_start, n); ======= // copy the bytes memcpy(dst->data + dst_start, src->data + src_start, n); >>>>>>> theirs } void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; if (!Chunk_bounds_check(dst, dst_start, n)) return; memcpy(dst->data + dst_start, src->data + src_start, n); } "; let opts_myers = MergeOptions { algorithm: Algorithm::Myers, ..Default::default() }; assert_merge( Some(&opts_myers), original, a, b, Err(expected_myers), "Myers diffy merge", ); let expected_histogram = "\ int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) { if (chunk == NULL) return 0; return start <= chunk->length && n <= chunk->length - start; } void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) { if (!Chunk_bounds_check(src, src_start, n)) return; if (!Chunk_bounds_check(dst, dst_start, n)) return; // copy the bytes memcpy(dst->data + dst_start, src->data + src_start, n); } "; assert_merge( None, original, a, b, Ok(expected_histogram), "Histogram diffy merge", ); } #[test] fn correct_range_is_used_for_both_case() { let base = r#" class GithubCall(db.Model): `url`: URL of request Example.`https://api.github.com` "#; let theirs = r#" class GithubCall(db.Model): `repo`: String field. Github repository fields. Example: `amitu/python` "#; let ours = r#" class Call(models.Model): `body`: String field. The payload of the webhook call from the github. `repo`: String field. Github repository fields. Example: `amitu/python` "#; let expected = r#" class Call(models.Model): `body`: String field. The payload of the webhook call from the github. `repo`: String field. Github repository fields. Example: `amitu/python` "#; assert_merge( None, base, ours, theirs, Ok(expected), "MergeRange::Both case", ); } #[test] fn delete_and_insert_conflict() { let base = r#" { int a = 2; } "#; let ours = r#" { } "#; let theirs = r#" { int a = 2; int b = 3; } "#; let expected = r#" { <<<<<<< ours ||||||| original int a = 2; ======= int a = 2; int b = 3; >>>>>>> theirs } "#; assert_merge( None, base, ours, theirs, Err(expected), "MergeRange (Ours::delete, Theirs::insert) conflict", ); let expected = r#" { <<<<<<< ours int a = 2; int b = 3; ||||||| original int a = 2; ======= >>>>>>> theirs } "#; assert_merge( None, base, theirs, ours, Err(expected), "MergeRange (Theirs::delete, Ours::insert) conflict", ); } #[test] fn one_line() { let base = "[1, 2]"; let ours = "[1]"; let theirs = "[2]"; let expected = "\ <<<<<<< ours [1] ||||||| original [1, 2] ======= [2] >>>>>>> theirs"; assert_merge(None, base, ours, theirs, Err(expected), "One-line"); let expected = "\ <<<<<<< ours [2] ||||||| original [1, 2] ======= [1] >>>>>>> theirs"; assert_merge(None, base, theirs, ours, Err(expected), "One-line reverse"); } #[test] fn no_newline_at_the_end() { // meanings of the used shortenings: // - wo - without final newline // - w - with final newline // - expected_w_wo_w - expected from base_w, left_wo, right_w let base_wo = "\ object Foo: def bar(input: String) = input"; let base_w = "\ object Foo: def bar(input: String) = input "; let ours_wo = "\ object Foo: def bar(newname: String) = newname"; let ours_w = "\ object Foo: def bar(newname: String) = newname "; let theirs_wo = "\ object Foo: def baz(input: String) = input"; let theirs_w = "\ object Foo: def baz(input: String) = input "; let expected_wo_wo_wo = "\ object Foo: <<<<<<< ours def bar(newname: String) = newname ||||||| original def bar(input: String) = input ======= def baz(input: String) = input >>>>>>> theirs"; assert_merge( None, base_wo, ours_wo, theirs_wo, Err(expected_wo_wo_wo), "without/without/without", ); let expected_wo_w_wo = "\ object Foo: <<<<<<< ours def bar(newname: String) = newname ||||||| original def bar(input: String) = input ======= def baz(input: String) = input >>>>>>> theirs"; assert_merge( None, base_wo, ours_w, theirs_wo, Err(expected_wo_w_wo), "without/with/without", ); // wo_wo_w case should be symmetrical to wo_w_wo let expected_wo_w_w = "\ object Foo: <<<<<<< ours def bar(newname: String) = newname ||||||| original def bar(input: String) = input ======= def baz(input: String) = input >>>>>>> theirs"; assert_merge( None, base_wo, ours_w, theirs_w, Err(expected_wo_w_w), "without/with/with", ); let expected_w_wo_wo = "\ object Foo: <<<<<<< ours def bar(newname: String) = newname ||||||| original def bar(input: String) = input ======= def baz(input: String) = input >>>>>>> theirs"; assert_merge( None, base_w, ours_wo, theirs_wo, Err(expected_w_wo_wo), "with/without/without", ); let expected_w_w_wo = "\ object Foo: <<<<<<< ours def bar(newname: String) = newname ||||||| original def bar(input: String) = input ======= def baz(input: String) = input >>>>>>> theirs"; assert_merge( None, base_w, ours_w, theirs_wo, Err(expected_w_w_wo), "with/with/without", ); // w_wo_w should be symmetrical to w_w_wo let expected_w_w_w = "\ object Foo: <<<<<<< ours def bar(newname: String) = newname ||||||| original def bar(input: String) = input ======= def baz(input: String) = input >>>>>>> theirs "; assert_merge( None, base_w, ours_w, theirs_w, Err(expected_w_w_w), "with/with/with", ); } diffy-imara-0.3.2/src/patch/format.rs000064400000000000000000000224311046102023000155440ustar 00000000000000use super::{Hunk, Line, Patch, NO_NEWLINE_AT_EOF}; use nu_ansi_term::{Color, Style}; use std::{ fmt::{Display, Formatter, Result}, io, }; /// Struct used to adjust the formatting of a `Patch` #[derive(Debug)] pub struct PatchFormatter { with_color: bool, with_missing_newline_message: bool, suppress_blank_empty: bool, context: Style, delete: Style, insert: Style, hunk_header: Style, patch_header: Style, function_context: Style, } impl PatchFormatter { /// Construct a new formatter pub fn new() -> Self { Self { with_color: false, with_missing_newline_message: true, // TODO the default in git-diff and GNU diff is to have this set to false, on the next // semver breaking release we should contemplate switching this to be false by default suppress_blank_empty: true, context: Style::new(), delete: Color::Red.normal(), insert: Color::Green.normal(), hunk_header: Color::Cyan.normal(), patch_header: Style::new().bold(), function_context: Style::new(), } } /// Enable formatting a patch with color pub fn with_color(mut self) -> Self { self.with_color = true; self } /// Sets whether to format a patch with a "No newline at end of file" message. /// /// Default is `true`. /// /// Note: If this is disabled by setting to `false`, formatted patches will no longer contain /// sufficient information to determine if a file ended with a newline character (`\n`) or not /// and the patch will be formatted as if both the original and modified files ended with a /// newline character (`\n`). pub fn missing_newline_message(mut self, enable: bool) -> Self { self.with_missing_newline_message = enable; self } /// Sets whether to suppress printing of a space before empty lines. /// /// Defaults to `true`. /// /// For more information you can refer to the [Omitting trailing blanks] manual page of GNU /// diff or the [diff.suppressBlankEmpty] config for `git-diff`. /// /// [Omitting trailing blanks]: https://www.gnu.org/software/diffutils/manual/html_node/Trailing-Blanks.html /// [diff.suppressBlankEmpty]: https://git-scm.com/docs/git-diff#Documentation/git-diff.txt-codediffsuppressBlankEmptycode pub fn suppress_blank_empty(mut self, enable: bool) -> Self { self.suppress_blank_empty = enable; self } /// Returns a `Display` impl which can be used to print a Patch pub fn fmt_patch<'a>(&'a self, patch: &'a Patch<'a, str>) -> impl Display + 'a { PatchDisplay { f: self, patch } } pub fn write_patch_into + ?Sized, W: io::Write>( &self, patch: &Patch<'_, T>, w: W, ) -> io::Result<()> { PatchDisplay { f: self, patch }.write_into(w) } fn fmt_hunk<'a>(&'a self, hunk: &'a Hunk<'a, str>) -> impl Display + 'a { HunkDisplay { f: self, hunk } } fn write_hunk_into + ?Sized, W: io::Write>( &self, hunk: &Hunk<'_, T>, w: W, ) -> io::Result<()> { HunkDisplay { f: self, hunk }.write_into(w) } fn fmt_line<'a>(&'a self, line: &'a Line<'a, str>) -> impl Display + 'a { LineDisplay { f: self, line } } fn write_line_into + ?Sized, W: io::Write>( &self, line: &Line<'_, T>, w: W, ) -> io::Result<()> { LineDisplay { f: self, line }.write_into(w) } } impl Default for PatchFormatter { fn default() -> Self { Self::new() } } struct PatchDisplay<'a, T: ToOwned + ?Sized> { f: &'a PatchFormatter, patch: &'a Patch<'a, T>, } impl + ?Sized> PatchDisplay<'_, T> { fn write_into(&self, mut w: W) -> io::Result<()> { if self.patch.original.is_some() || self.patch.modified.is_some() { if self.f.with_color { write!(w, "{}", self.f.patch_header.prefix())?; } if let Some(original) = &self.patch.original { write!(w, "--- ")?; original.write_into(&mut w)?; writeln!(w)?; } if let Some(modified) = &self.patch.modified { write!(w, "+++ ")?; modified.write_into(&mut w)?; writeln!(w)?; } if self.f.with_color { write!(w, "{}", self.f.patch_header.suffix())?; } } for hunk in &self.patch.hunks { self.f.write_hunk_into(hunk, &mut w)?; } Ok(()) } } impl Display for PatchDisplay<'_, str> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { if self.patch.original.is_some() || self.patch.modified.is_some() { if self.f.with_color { write!(f, "{}", self.f.patch_header.prefix())?; } if let Some(original) = &self.patch.original { writeln!(f, "--- {}", original)?; } if let Some(modified) = &self.patch.modified { writeln!(f, "+++ {}", modified)?; } if self.f.with_color { write!(f, "{}", self.f.patch_header.suffix())?; } } for hunk in &self.patch.hunks { write!(f, "{}", self.f.fmt_hunk(hunk))?; } Ok(()) } } struct HunkDisplay<'a, T: ?Sized> { f: &'a PatchFormatter, hunk: &'a Hunk<'a, T>, } impl + ?Sized> HunkDisplay<'_, T> { fn write_into(&self, mut w: W) -> io::Result<()> { if self.f.with_color { write!(w, "{}", self.f.hunk_header.prefix())?; } write!(w, "@@ -{} +{} @@", self.hunk.old_range, self.hunk.new_range)?; if self.f.with_color { write!(w, "{}", self.f.hunk_header.suffix())?; } if let Some(ctx) = self.hunk.function_context { write!(w, " ")?; if self.f.with_color { write!(w, "{}", self.f.function_context.prefix())?; } write!(w, " ")?; w.write_all(ctx.as_ref())?; if self.f.with_color { write!(w, "{}", self.f.function_context.suffix())?; } } writeln!(w)?; for line in &self.hunk.lines { self.f.write_line_into(line, &mut w)?; } Ok(()) } } impl Display for HunkDisplay<'_, str> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { if self.f.with_color { write!(f, "{}", self.f.hunk_header.prefix())?; } write!(f, "@@ -{} +{} @@", self.hunk.old_range, self.hunk.new_range)?; if self.f.with_color { write!(f, "{}", self.f.hunk_header.suffix())?; } if let Some(ctx) = self.hunk.function_context { write!(f, " ")?; if self.f.with_color { write!(f, "{}", self.f.function_context.prefix())?; } write!(f, " {}", ctx)?; if self.f.with_color { write!(f, "{}", self.f.function_context.suffix())?; } } writeln!(f)?; for line in &self.hunk.lines { write!(f, "{}", self.f.fmt_line(line))?; } Ok(()) } } struct LineDisplay<'a, T: ?Sized> { f: &'a PatchFormatter, line: &'a Line<'a, T>, } impl + ?Sized> LineDisplay<'_, T> { fn write_into(&self, mut w: W) -> io::Result<()> { let (sign, line, style) = match self.line { Line::Context(line) => (' ', line.as_ref(), self.f.context), Line::Delete(line) => ('-', line.as_ref(), self.f.delete), Line::Insert(line) => ('+', line.as_ref(), self.f.insert), }; if self.f.with_color { write!(w, "{}", style.prefix())?; } if self.f.suppress_blank_empty && sign == ' ' && line == b"\n" { w.write_all(line)?; } else { write!(w, "{}", sign)?; w.write_all(line)?; } if self.f.with_color { write!(w, "{}", style.suffix())?; } if !line.ends_with(b"\n") { writeln!(w)?; if self.f.with_missing_newline_message { writeln!(w, "{}", NO_NEWLINE_AT_EOF)?; } } Ok(()) } } impl Display for LineDisplay<'_, str> { fn fmt(&self, f: &mut Formatter<'_>) -> Result { let (sign, line, style) = match self.line { Line::Context(line) => (' ', line, self.f.context), Line::Delete(line) => ('-', line, self.f.delete), Line::Insert(line) => ('+', line, self.f.insert), }; if self.f.with_color { write!(f, "{}", style.prefix())?; } if self.f.suppress_blank_empty && sign == ' ' && *line == "\n" { write!(f, "{}", line)?; } else { write!(f, "{}{}", sign, line)?; } if self.f.with_color { write!(f, "{}", style.suffix())?; } if !line.ends_with('\n') { writeln!(f)?; if self.f.with_missing_newline_message { writeln!(f, "{}", NO_NEWLINE_AT_EOF)?; } } Ok(()) } } diffy-imara-0.3.2/src/patch/mod.rs000064400000000000000000000244061046102023000150370ustar 00000000000000mod format; mod parse; pub use format::PatchFormatter; pub use parse::ParsePatchError; use std::{borrow::Cow, fmt, ops}; const NO_NEWLINE_AT_EOF: &str = "\\ No newline at end of file"; /// Representation of all the differences between two files #[derive(PartialEq, Eq)] pub struct Patch<'a, T: ToOwned + ?Sized> { // TODO GNU patch is able to parse patches without filename headers. // This should be changed to an `Option` type to reflect this instead of setting this to "" // when they're missing original: Option>, modified: Option>, hunks: Vec>, } impl<'a, T: ToOwned + ?Sized> Patch<'a, T> { pub(crate) fn new( original: Option, modified: Option, hunks: Vec>, ) -> Self where O: Into>, M: Into>, { let original = original.map(|o| Filename(o.into())); let modified = modified.map(|m| Filename(m.into())); Self { original, modified, hunks, } } /// Return the name of the old file pub fn original(&self) -> Option<&T> { self.original.as_ref().map(AsRef::as_ref) } /// Return the name of the new file pub fn modified(&self) -> Option<&T> { self.modified.as_ref().map(AsRef::as_ref) } /// Returns the hunks in the patch pub fn hunks(&self) -> &[Hunk<'_, T>] { &self.hunks } pub fn reverse(&self) -> Patch<'_, T> { let hunks = self.hunks.iter().map(Hunk::reverse).collect(); Patch { original: self.modified.clone(), modified: self.original.clone(), hunks, } } } impl + ToOwned + ?Sized> Patch<'_, T> { /// Convert a `Patch` into bytes /// /// This is the equivalent of the `to_string` function but for /// potentially non-utf8 patches. pub fn to_bytes(&self) -> Vec { let mut bytes = Vec::new(); PatchFormatter::new() .write_patch_into(self, &mut bytes) .unwrap(); bytes } } impl<'a> Patch<'a, str> { /// Parse a `Patch` from a string /// /// ``` /// use diffy_imara::Patch; /// /// let s = "\ /// --- a/ideals /// +++ b/ideals /// @@ -1,4 +1,6 @@ /// First: /// Life before death, /// strength before weakness, /// journey before destination. /// +Second: /// + I will protect those who cannot protect themselves. /// "; /// /// let patch = Patch::from_str(s).unwrap(); /// ``` #[allow(clippy::should_implement_trait)] pub fn from_str(s: &'a str) -> Result, ParsePatchError> { parse::parse(s) } } impl<'a> Patch<'a, [u8]> { /// Parse a `Patch` from bytes pub fn from_bytes(s: &'a [u8]) -> Result, ParsePatchError> { parse::parse_bytes(s) } } impl Clone for Patch<'_, T> { fn clone(&self) -> Self { Self { original: self.original.clone(), modified: self.modified.clone(), hunks: self.hunks.clone(), } } } impl fmt::Display for Patch<'_, str> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", PatchFormatter::new().fmt_patch(self)) } } impl fmt::Debug for Patch<'_, T> where T: ToOwned + fmt::Debug, O: std::borrow::Borrow + fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("Patch") .field("original", &self.original) .field("modified", &self.modified) .field("hunks", &self.hunks) .finish() } } #[derive(PartialEq, Eq)] struct Filename<'a, T: ToOwned + ?Sized>(Cow<'a, T>); const ESCAPED_CHARS: &[char] = &['\n', '\t', '\0', '\r', '\"', '\\']; #[allow(clippy::byte_char_slices)] const ESCAPED_CHARS_BYTES: &[u8] = &[b'\n', b'\t', b'\0', b'\r', b'\"', b'\\']; impl Filename<'_, str> { fn needs_to_be_escaped(&self) -> bool { self.0.contains(ESCAPED_CHARS) } } impl + ?Sized> Filename<'_, T> { fn needs_to_be_escaped_bytes(&self) -> bool { self.0 .as_ref() .as_ref() .iter() .any(|b| ESCAPED_CHARS_BYTES.contains(b)) } fn write_into(&self, mut w: W) -> std::io::Result<()> { if self.needs_to_be_escaped_bytes() { w.write_all(b"\"")?; for b in self.0.as_ref().as_ref() { if ESCAPED_CHARS_BYTES.contains(b) { w.write_all(b"\\")?; } w.write_all(&[*b])?; } w.write_all(b"\"")?; } else { w.write_all(self.0.as_ref().as_ref())?; } Ok(()) } } impl AsRef for Filename<'_, T> { fn as_ref(&self) -> &T { &self.0 } } impl ops::Deref for Filename<'_, T> { type Target = T; fn deref(&self) -> &Self::Target { &self.0 } } impl Clone for Filename<'_, T> { fn clone(&self) -> Self { Self(self.0.clone()) } } impl fmt::Display for Filename<'_, str> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use std::fmt::Write; if self.needs_to_be_escaped() { f.write_char('\"')?; for c in self.0.chars() { if ESCAPED_CHARS.contains(&c) { f.write_char('\\')?; } f.write_char(c)?; } f.write_char('\"')?; } else { f.write_str(&self.0)?; } Ok(()) } } impl fmt::Debug for Filename<'_, T> where T: ToOwned + fmt::Debug, O: std::borrow::Borrow + fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("Filename").field(&self.0).finish() } } /// Represents a group of differing lines between two files #[derive(Debug, PartialEq, Eq)] pub struct Hunk<'a, T: ?Sized> { old_range: HunkRange, new_range: HunkRange, function_context: Option<&'a T>, lines: Vec>, } fn hunk_lines_count(lines: &[Line<'_, T>]) -> (usize, usize) { lines.iter().fold((0, 0), |count, line| match line { Line::Context(_) => (count.0 + 1, count.1 + 1), Line::Delete(_) => (count.0 + 1, count.1), Line::Insert(_) => (count.0, count.1 + 1), }) } impl<'a, T: ?Sized> Hunk<'a, T> { pub(crate) fn new( old_range: HunkRange, new_range: HunkRange, function_context: Option<&'a T>, lines: Vec>, ) -> Self { let (old_count, new_count) = hunk_lines_count(&lines); assert_eq!(old_range.len, old_count); assert_eq!(new_range.len, new_count); Self { old_range, new_range, function_context, lines, } } /// Returns the corresponding range for the old file in the hunk pub fn old_range(&self) -> HunkRange { self.old_range } /// Returns the corresponding range for the new file in the hunk pub fn new_range(&self) -> HunkRange { self.new_range } /// Returns the function context (if any) for the hunk pub fn function_context(&self) -> Option<&T> { self.function_context } /// Returns the lines in the hunk pub fn lines(&self) -> &[Line<'a, T>] { &self.lines } /// Creates a reverse patch for the hunk. This is equivalent to what /// XDL_PATCH_REVERSE would apply in libxdiff. pub fn reverse(&self) -> Self { let lines = self.lines.iter().map(Line::reverse).collect(); Self { old_range: self.new_range, new_range: self.old_range, function_context: self.function_context, lines, } } } impl Clone for Hunk<'_, T> { fn clone(&self) -> Self { Self { old_range: self.old_range, new_range: self.new_range, function_context: self.function_context, lines: self.lines.clone(), } } } /// The range of lines in a file for a particular `Hunk`. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct HunkRange { /// The starting line number of a hunk start: usize, /// The hunk size (number of lines) len: usize, } impl HunkRange { pub(crate) fn new(start: usize, len: usize) -> Self { Self { start, len } } /// Returns the range as a `ops::Range` pub fn range(&self) -> ops::Range { self.start..self.end() } /// Returns the starting line number of the range (inclusive) pub fn start(&self) -> usize { self.start } /// Returns the ending line number of the range (exclusive) pub fn end(&self) -> usize { self.start + self.len } /// Returns the number of lines in the range pub fn len(&self) -> usize { self.len } /// Returns `true` if the range is empty (has a length of `0`) pub fn is_empty(&self) -> bool { self.len == 0 } } impl fmt::Display for HunkRange { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.start)?; if self.len != 1 { write!(f, ",{}", self.len)?; } Ok(()) } } /// A line in either the old file, new file, or both. /// /// A `Line` contains the terminating newline character `\n` unless it is the final /// line in the file and the file does not end with a newline character. #[derive(Debug, PartialEq, Eq)] pub enum Line<'a, T: ?Sized> { /// A line providing context in the diff which is present in both the old and new file Context(&'a T), /// A line deleted from the old file Delete(&'a T), /// A line inserted to the new file Insert(&'a T), } impl Copy for Line<'_, T> {} impl Clone for Line<'_, T> { fn clone(&self) -> Self { *self } } impl Line<'_, T> { pub fn reverse(&self) -> Self { match self { Line::Context(s) => Line::Context(s), Line::Delete(s) => Line::Insert(s), Line::Insert(s) => Line::Delete(s), } } } diffy-imara-0.3.2/src/patch/parse.rs000064400000000000000000000322041046102023000153650ustar 00000000000000//! Parse a Patch use super::{Hunk, HunkRange, Line, ESCAPED_CHARS_BYTES, NO_NEWLINE_AT_EOF}; use crate::{ patch::Patch, utils::{LineIter, Text}, }; use std::{borrow::Cow, fmt}; type Result = std::result::Result; /// An error returned when parsing a `Patch` using [`Patch::from_str`] fails /// /// [`Patch::from_str`]: struct.Patch.html#method.from_str // TODO use a custom error type instead of a Cow #[derive(Debug)] pub struct ParsePatchError(Cow<'static, str>); impl ParsePatchError { fn new>>(e: E) -> Self { Self(e.into()) } } impl fmt::Display for ParsePatchError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "error parsing patch: {}", self.0) } } impl std::error::Error for ParsePatchError {} struct Parser<'a, T: Text + ?Sized> { lines: std::iter::Peekable>, } impl<'a, T: Text + ?Sized> Parser<'a, T> { fn new(input: &'a T) -> Self { Self { lines: LineIter::new(input).peekable(), } } fn peek(&mut self) -> Option<&&'a T> { self.lines.peek() } fn next(&mut self) -> Result<&'a T> { let line = self .lines .next() .ok_or_else(|| ParsePatchError::new("unexpected EOF"))?; Ok(line) } } pub fn parse(input: &str) -> Result> { let mut parser = Parser::new(input); let header = patch_header(&mut parser)?; let hunks = hunks(&mut parser)?; Ok(Patch::new( header.0.map(convert_cow_to_str), header.1.map(convert_cow_to_str), hunks, )) } pub fn parse_bytes(input: &[u8]) -> Result> { let mut parser = Parser::new(input); let header = patch_header(&mut parser)?; let hunks = hunks(&mut parser)?; Ok(Patch::new(header.0, header.1, hunks)) } // This is only used when the type originated as a utf8 string fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> { match cow { Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(), Cow::Owned(o) => String::from_utf8(o).unwrap().into(), } } #[allow(clippy::type_complexity)] fn patch_header<'a, T: Text + ToOwned + ?Sized>( parser: &mut Parser<'a, T>, ) -> Result<(Option>, Option>)> { skip_header_preamble(parser)?; let mut filename1 = None; let mut filename2 = None; while let Some(line) = parser.peek() { if line.starts_with("--- ") { if filename1.is_some() { return Err(ParsePatchError::new("multiple '---' lines")); } filename1 = Some(parse_filename("--- ", parser.next()?)?); } else if line.starts_with("+++ ") { if filename2.is_some() { return Err(ParsePatchError::new("multiple '+++' lines")); } filename2 = Some(parse_filename("+++ ", parser.next()?)?); } else { break; } } Ok((filename1, filename2)) } // Skip to the first filename header ("--- " or "+++ ") or hunk line, // skipping any preamble lines like "diff --git", etc. fn skip_header_preamble(parser: &mut Parser<'_, T>) -> Result<()> { while let Some(line) = parser.peek() { if line.starts_with("--- ") | line.starts_with("+++ ") | line.starts_with("@@ ") { break; } parser.next()?; } Ok(()) } fn parse_filename<'a, T: Text + ToOwned + ?Sized>( prefix: &str, line: &'a T, ) -> Result> { let line = line .strip_prefix(prefix) .ok_or_else(|| ParsePatchError::new("unable to parse filename"))?; let filename = if let Some((filename, _)) = line.split_at_exclusive("\t") { filename } else if let Some((filename, _)) = line.split_at_exclusive("\n") { filename } else { return Err(ParsePatchError::new("filename unterminated")); }; let filename = if let Some(quoted) = is_quoted(filename) { escaped_filename(quoted)? } else { unescaped_filename(filename)? }; Ok(filename) } fn is_quoted(s: &T) -> Option<&T> { s.strip_prefix("\"").and_then(|s| s.strip_suffix("\"")) } fn unescaped_filename(filename: &T) -> Result> { let bytes = filename.as_bytes(); if bytes.iter().any(|b| ESCAPED_CHARS_BYTES.contains(b)) { return Err(ParsePatchError::new("invalid char in unquoted filename")); } Ok(bytes.into()) } fn escaped_filename(escaped: &T) -> Result> { let mut filename = Vec::new(); let mut chars = escaped.as_bytes().iter().copied(); while let Some(c) = chars.next() { if c == b'\\' { let ch = match chars .next() .ok_or_else(|| ParsePatchError::new("expected escaped character"))? { b'n' => b'\n', b't' => b'\t', b'0' => b'\0', b'r' => b'\r', b'\"' => b'\"', b'\\' => b'\\', _ => return Err(ParsePatchError::new("invalid escaped character")), }; filename.push(ch); } else if ESCAPED_CHARS_BYTES.contains(&c) { return Err(ParsePatchError::new("invalid unescaped character")); } else { filename.push(c); } } Ok(filename.into()) } fn verify_hunks_in_order(hunks: &[Hunk<'_, T>]) -> bool { for hunk in hunks.windows(2) { if hunk[0].old_range.end() > hunk[1].old_range.start() || hunk[0].new_range.end() > hunk[1].new_range.start() { return false; } } true } fn hunks<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result>> { let mut hunks = Vec::new(); while parser.peek().is_some() { hunks.push(hunk(parser)?); } // check and verify that the Hunks are in sorted order and don't overlap if !verify_hunks_in_order(&hunks) { return Err(ParsePatchError::new("Hunks not in order or overlap")); } Ok(hunks) } fn hunk<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result> { let (range1, range2, function_context) = hunk_header(parser.next()?)?; let lines = hunk_lines(parser)?; // check counts of lines to see if they match the ranges in the hunk header let (len1, len2) = super::hunk_lines_count(&lines); if len1 != range1.len || len2 != range2.len { return Err(ParsePatchError::new("Hunk header does not match hunk")); } Ok(Hunk::new(range1, range2, function_context, lines)) } fn hunk_header(input: &T) -> Result<(HunkRange, HunkRange, Option<&T>)> { let input = input .strip_prefix("@@ ") .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?; let (ranges, function_context) = input .split_at_exclusive(" @@") .ok_or_else(|| ParsePatchError::new("hunk header unterminated"))?; let function_context = function_context.strip_prefix(" "); let (range1, range2) = ranges .split_at_exclusive(" ") .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?; let range1 = range( range1 .strip_prefix("-") .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?, )?; let range2 = range( range2 .strip_prefix("+") .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?, )?; Ok((range1, range2, function_context)) } fn range(s: &T) -> Result { let (start, len) = if let Some((start, len)) = s.split_at_exclusive(",") { ( start .parse() .ok_or_else(|| ParsePatchError::new("can't parse range"))?, len.parse() .ok_or_else(|| ParsePatchError::new("can't parse range"))?, ) } else { ( s.parse() .ok_or_else(|| ParsePatchError::new("can't parse range"))?, 1, ) }; Ok(HunkRange::new(start, len)) } fn hunk_lines<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result>> { let mut lines: Vec> = Vec::new(); let mut no_newline_context = false; let mut no_newline_delete = false; let mut no_newline_insert = false; while let Some(line) = parser.peek() { let line = if line.starts_with("@") { break; } else if no_newline_context { return Err(ParsePatchError::new("expected end of hunk")); } else if let Some(line) = line.strip_prefix(" ") { Line::Context(line) } else if line.starts_with("\n") { Line::Context(*line) } else if let Some(line) = line.strip_prefix("-") { if no_newline_delete { return Err(ParsePatchError::new("expected no more deleted lines")); } Line::Delete(line) } else if let Some(line) = line.strip_prefix("+") { if no_newline_insert { return Err(ParsePatchError::new("expected no more inserted lines")); } Line::Insert(line) } else if line.starts_with(NO_NEWLINE_AT_EOF) { let last_line = lines.pop().ok_or_else(|| { ParsePatchError::new("unexpected 'No newline at end of file' line") })?; match last_line { Line::Context(line) => { no_newline_context = true; Line::Context(strip_newline(line)?) } Line::Delete(line) => { no_newline_delete = true; Line::Delete(strip_newline(line)?) } Line::Insert(line) => { no_newline_insert = true; Line::Insert(strip_newline(line)?) } } } else { return Err(ParsePatchError::new("unexpected line in hunk body")); }; lines.push(line); parser.next()?; } Ok(lines) } fn strip_newline(s: &T) -> Result<&T> { if let Some(stripped) = s.strip_suffix("\n") { Ok(stripped) } else { Err(ParsePatchError::new("missing newline")) } } #[cfg(test)] mod tests { use super::{parse, parse_bytes}; #[test] fn test_escaped_filenames() { // No escaped characters let s = "\ --- original +++ modified @@ -1,0 +1,1 @@ +Oathbringer "; parse(s).unwrap(); parse_bytes(s.as_ref()).unwrap(); // unescaped characters fail parsing let s = "\ --- ori\"ginal +++ modified @@ -1,0 +1,1 @@ +Oathbringer "; parse(s).unwrap_err(); parse_bytes(s.as_ref()).unwrap_err(); // quoted with invalid escaped characters let s = "\ --- \"ori\\\"g\rinal\" +++ modified @@ -1,0 +1,1 @@ +Oathbringer "; parse(s).unwrap_err(); parse_bytes(s.as_ref()).unwrap_err(); // quoted with escaped characters let s = r#"\ --- "ori\"g\tinal" +++ "mo\0\t\r\n\\dified" @@ -1,0 +1,1 @@ +Oathbringer "#; let p = parse(s).unwrap(); assert_eq!(p.original(), Some("ori\"g\tinal")); assert_eq!(p.modified(), Some("mo\0\t\r\n\\dified")); let b = parse_bytes(s.as_ref()).unwrap(); assert_eq!(b.original(), Some(&b"ori\"g\tinal"[..])); assert_eq!(b.modified(), Some(&b"mo\0\t\r\n\\dified"[..])); } #[test] fn test_missing_filename_header() { // Missing Both '---' and '+++' lines let patch = r#" @@ -1,11 +1,12 @@ diesel::table! { users1 (id) { - id -> Nullable, + id -> Integer, } } diesel::table! { - users2 (id) { - id -> Nullable, + users2 (myid) { + #[sql_name = "id"] + myid -> Integer, } } "#; parse(patch).unwrap(); // Missing '---' let s = "\ +++ modified @@ -1,0 +1,1 @@ +Oathbringer "; parse(s).unwrap(); // Missing '+++' let s = "\ --- original @@ -1,0 +1,1 @@ +Oathbringer "; parse(s).unwrap(); // Headers out of order let s = "\ +++ modified --- original @@ -1,0 +1,1 @@ +Oathbringer "; parse(s).unwrap(); // multiple headers should fail to parse let s = "\ --- original --- modified @@ -1,0 +1,1 @@ +Oathbringer "; parse(s).unwrap_err(); } #[test] fn adjacent_hunks_correctly_parse() { let s = "\ --- original +++ modified @@ -110,7 +110,7 @@ -- I am afraid, however, that all I have known - that my story - will be forgotten. I am afraid for the world that is to come. -Afraid that my plans will fail. Afraid of a doom worse than the Deepness. +Afraid that Alendi will fail. Afraid of a doom brought by the Deepness. Alendi was never the Hero of Ages. @@ -117,7 +117,7 @@ At best, I have amplified his virtues, creating a Hero where there was none. -At worst, I fear that all we believe may have been corrupted. +At worst, I fear that I have corrupted all we believe. -- Alendi must not reach the Well of Ascension. He must not take the power for himself. "; parse(s).unwrap(); } } diffy-imara-0.3.2/src/range.rs000064400000000000000000000421041046102023000142500ustar 00000000000000use std::{cmp, fmt::Debug, ops}; // Range type inspired by the Range type used in [dissimilar](https://docs.rs/dissimilar) #[derive(Debug, PartialEq, Eq)] pub struct Range<'a, T: ?Sized> { inner: &'a T, offset: usize, len: usize, } impl Copy for Range<'_, T> {} impl Clone for Range<'_, T> { fn clone(&self) -> Self { *self } } impl<'a, T: ?Sized> Range<'a, T> { pub fn is_empty(&self) -> bool { self.len == 0 } pub fn inner(&self) -> &'a T { self.inner } pub fn len(&self) -> usize { self.len } pub fn offset(&self) -> usize { self.offset } #[allow(dead_code)] pub fn range(&self) -> ops::Range { self.offset..self.offset + self.len } pub fn grow_up(&mut self, adjust: usize) { self.offset -= adjust; self.len += adjust; } pub fn grow_down(&mut self, adjust: usize) { self.len += adjust; } pub fn shrink_front(&mut self, adjust: usize) { self.offset += adjust; self.len -= adjust; } pub fn shrink_back(&mut self, adjust: usize) { self.len -= adjust; } pub fn shift_up(&mut self, adjust: usize) { self.offset -= adjust } pub fn shift_down(&mut self, adjust: usize) { self.offset += adjust; } pub fn slice(&self, bounds: impl RangeBounds) -> Self { let (offset, len) = bounds.index(self.len); Range { inner: self.inner, offset: self.offset + offset, len, } } pub fn get(&self, bounds: impl RangeBounds) -> Option { let (offset, len) = bounds.try_index(self.len)?; Some(Range { inner: self.inner, offset: self.offset + offset, len, }) } pub fn split_at(&self, mid: usize) -> (Self, Self) { (self.slice(..mid), self.slice(mid..)) } } impl<'a, T> Range<'a, T> where T: ?Sized + SliceLike, { pub fn new(inner: &'a T, bounds: impl RangeBounds) -> Self { let (offset, len) = bounds.index(inner.len()); Range { inner, offset, len } } #[allow(dead_code)] pub fn empty() -> Range<'a, T> { Range { inner: T::empty(), offset: 0, len: 0, } } pub fn as_slice(&self) -> &'a T { self.inner.as_slice(self.offset..self.offset + self.len) } pub fn common_prefix_len(&self, other: Range<'_, T>) -> usize { self.as_slice().common_prefix_len(other.as_slice()) } pub fn common_suffix_len(&self, other: Range<'_, T>) -> usize { self.as_slice().common_suffix_len(other.as_slice()) } #[allow(dead_code)] pub fn common_overlap_len(&self, other: Range<'_, T>) -> usize { self.as_slice().common_overlap_len(other.as_slice()) } #[allow(dead_code)] pub fn starts_with(&self, prefix: Range<'_, T>) -> bool { self.as_slice().starts_with(prefix.as_slice()) } #[allow(dead_code)] pub fn ends_with(&self, suffix: Range<'_, T>) -> bool { self.as_slice().ends_with(suffix.as_slice()) } } impl Default for Range<'_, T> { fn default() -> Self { Self::empty() } } pub trait RangeBounds: Sized + Clone + Debug { // Returns (offset, len). fn try_index(self, len: usize) -> Option<(usize, usize)>; fn index(self, len: usize) -> (usize, usize) { match self.clone().try_index(len) { Some(range) => range, None => panic!("index out of range, index={:?}, len={}", self, len), } } } impl RangeBounds for ops::Range { fn try_index(self, len: usize) -> Option<(usize, usize)> { if self.start <= self.end && self.end <= len { Some((self.start, self.end - self.start)) } else { None } } } impl RangeBounds for ops::RangeFrom { fn try_index(self, len: usize) -> Option<(usize, usize)> { if self.start <= len { Some((self.start, len - self.start)) } else { None } } } impl RangeBounds for ops::RangeTo { fn try_index(self, len: usize) -> Option<(usize, usize)> { if self.end <= len { Some((0, self.end)) } else { None } } } impl RangeBounds for ops::RangeFull { fn try_index(self, len: usize) -> Option<(usize, usize)> { Some((0, len)) } } pub trait SliceLike: ops::Index> { fn len(&self) -> usize; fn empty<'a>() -> &'a Self; fn as_slice(&self, range: ops::Range) -> &Self; fn common_prefix_len(&self, other: &Self) -> usize; fn common_suffix_len(&self, other: &Self) -> usize; fn common_overlap_len(&self, other: &Self) -> usize; fn starts_with(&self, prefix: &Self) -> bool; fn ends_with(&self, suffix: &Self) -> bool; } impl SliceLike for str { fn len(&self) -> usize { self.len() } fn empty<'a>() -> &'a str { "" } fn as_slice(&self, range: ops::Range) -> &str { &self[range] } fn common_prefix_len(&self, other: &str) -> usize { for ((i, ch1), ch2) in self.char_indices().zip(other.chars()) { if ch1 != ch2 { return i; } } cmp::min(self.len(), other.len()) } fn common_suffix_len(&self, other: &str) -> usize { for ((i, ch1), ch2) in self.char_indices().rev().zip(other.chars().rev()) { if ch1 != ch2 { return self.len() - i - ch1.len_utf8(); } } cmp::min(self.len(), other.len()) } // returns length of overlap of prefix of `self` with suffic of `other` fn common_overlap_len(&self, mut other: &str) -> usize { let mut this = self; // Eliminate the null case if this.is_empty() || other.is_empty() { return 0; } match this.len().cmp(&other.len()) { cmp::Ordering::Greater => { let mut end = other.len(); while !this.is_char_boundary(end) { end -= 1; } this = &this[..end]; } cmp::Ordering::Less => { let mut start = other.len() - this.len(); while !other.is_char_boundary(start) { start += 1; } other = &other[start..] } cmp::Ordering::Equal => {} } // Quick check for the worst case. if this == other { return this.len(); } // Start by looking for a single character match // and increase length until no match is found. // Performance analysis: https://neil.fraser.name/news/2010/11/04/ let mut best = 0; let mut length = 0; for (i, c) in other.char_indices().rev() { let pattern = &other[i..]; let found = match this.find(pattern) { Some(found) => found, None => return best, }; length += c.len_utf8(); if found == 0 { best = length; } } best } fn starts_with(&self, prefix: &str) -> bool { self.starts_with(prefix) } fn ends_with(&self, suffix: &str) -> bool { self.ends_with(suffix) } } impl SliceLike for [T] where T: PartialEq, { fn len(&self) -> usize { self.len() } fn empty<'a>() -> &'a [T] { &[] } fn as_slice(&self, range: ops::Range) -> &[T] { &self[range] } fn common_prefix_len(&self, other: &[T]) -> usize { for (i, (item1, item2)) in self.iter().zip(other.iter()).enumerate() { if item1 != item2 { return i; } } cmp::min(self.len(), other.len()) } fn common_suffix_len(&self, other: &[T]) -> usize { for (i, (item1, item2)) in self.iter().rev().zip(other.iter().rev()).enumerate() { if item1 != item2 { return i; } } cmp::min(self.len(), other.len()) } // returns length of overlap of prefix of `self` with suffic of `other` //TODO make a more efficient solution fn common_overlap_len(&self, other: &[T]) -> usize { let mut len = cmp::min(self.len(), other.len()); while len > 0 { if self[..len] == other[other.len() - len..] { break; } len -= 1; } len } fn starts_with(&self, prefix: &Self) -> bool { self.starts_with(prefix) } fn ends_with(&self, suffix: &Self) -> bool { self.ends_with(suffix) } } #[derive(Debug, PartialEq, Eq)] pub enum DiffRange<'a, 'b, T: ?Sized> { Equal(Range<'a, T>, Range<'b, T>), Delete(Range<'a, T>), Insert(Range<'b, T>), } impl Copy for DiffRange<'_, '_, T> {} impl Clone for DiffRange<'_, '_, T> { fn clone(&self) -> Self { *self } } impl<'tmp, 'a: 'tmp, 'b: 'tmp, T> DiffRange<'a, 'b, T> where T: ?Sized + SliceLike, { pub fn inner(&self) -> Range<'tmp, T> { match *self { DiffRange::Equal(range, _) | DiffRange::Delete(range) | DiffRange::Insert(range) => { range } } } pub fn is_empty(&self) -> bool { self.inner().is_empty() } pub fn len(&self) -> usize { self.inner().len() } pub fn grow_up(&mut self, adjust: usize) { self.for_each(|range| range.grow_up(adjust)); } pub fn grow_down(&mut self, adjust: usize) { self.for_each(|range| range.grow_down(adjust)); } pub fn shrink_front(&mut self, adjust: usize) { self.for_each(|range| range.shrink_front(adjust)); } pub fn shrink_back(&mut self, adjust: usize) { self.for_each(|range| range.shrink_back(adjust)); } pub fn shift_up(&mut self, adjust: usize) { self.for_each(|range| range.shift_up(adjust)); } pub fn shift_down(&mut self, adjust: usize) { self.for_each(|range| range.shift_down(adjust)); } fn for_each(&mut self, f: impl Fn(&mut Range<'_, T>)) { match self { DiffRange::Equal(range1, range2) => { f(range1); f(range2); } DiffRange::Delete(range) => f(range), DiffRange::Insert(range) => f(range), } } } impl<'a, 'b> DiffRange<'a, 'b, [u8]> { pub fn to_str(self, text1: &'a str, text2: &'b str) -> DiffRange<'a, 'b, str> { fn boundary_down(text: &str, pos: usize) -> usize { let mut adjust = 0; while !text.is_char_boundary(pos - adjust) { adjust += 1; } adjust } fn boundary_up(text: &str, pos: usize) -> usize { let mut adjust = 0; while !text.is_char_boundary(pos + adjust) { adjust += 1; } adjust } match self { DiffRange::Equal(range1, range2) => { debug_assert_eq!(range1.inner().as_ptr(), text1.as_ptr()); debug_assert_eq!(range2.inner().as_ptr(), text2.as_ptr()); let mut offset1 = range1.offset(); let mut len1 = range1.len(); let mut offset2 = range2.offset(); let mut len2 = range2.len(); let adjust = boundary_up(text1, offset1); offset1 += adjust; len1 -= adjust; offset2 += adjust; len2 -= adjust; let adjust = boundary_down(text1, offset1 + len1); len1 -= adjust; len2 -= adjust; DiffRange::Equal( Range::new(text1, offset1..offset1 + len1), Range::new(text2, offset2..offset2 + len2), ) } DiffRange::Delete(range) => { debug_assert_eq!(range.inner().as_ptr(), text1.as_ptr()); let mut offset = range.offset(); let mut len = range.len(); let adjust = boundary_down(text1, offset); offset -= adjust; len += adjust; let adjust = boundary_up(text1, offset + len); len += adjust; DiffRange::Delete(Range::new(text1, offset..offset + len)) } DiffRange::Insert(range) => { debug_assert_eq!(range.inner().as_ptr(), text2.as_ptr()); let mut offset = range.offset(); let mut len = range.len(); let adjust = boundary_down(text2, offset); offset -= adjust; len += adjust; let adjust = boundary_up(text2, offset + len); len += adjust; DiffRange::Insert(Range::new(text2, offset..offset + len)) } } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_common_prefix() { let text1 = Range::new("abc", ..); let text2 = Range::new("xyz", ..); assert_eq!(0, text1.common_prefix_len(text2), "Null case"); let text1 = Range::new(b"abc".as_ref(), ..); let text2 = Range::new(b"xyz".as_ref(), ..); assert_eq!(0, text1.common_prefix_len(text2), "Null case"); let text1 = Range::new("1234abcdef", ..); let text2 = Range::new("1234xyz", ..); assert_eq!(4, text1.common_prefix_len(text2), "Non-null case"); let text1 = Range::new(b"1234abcdef".as_ref(), ..); let text2 = Range::new(b"1234xyz".as_ref(), ..); assert_eq!(4, text1.common_prefix_len(text2), "Non-null case"); let text1 = Range::new("1234", ..); let text2 = Range::new("1234xyz", ..); assert_eq!(4, text1.common_prefix_len(text2), "Whole case"); let text1 = Range::new(b"1234".as_ref(), ..); let text2 = Range::new(b"1234xyz".as_ref(), ..); assert_eq!(4, text1.common_prefix_len(text2), "Whole case"); let snowman = "\u{2603}"; let comet = "\u{2604}"; let text1 = Range::new(snowman, ..); let text2 = Range::new(comet, ..); assert_eq!(0, text1.common_prefix_len(text2), "Unicode case"); let text1 = Range::new(snowman.as_bytes(), ..); let text2 = Range::new(comet.as_bytes(), ..); assert_eq!(2, text1.common_prefix_len(text2), "Unicode case"); } #[test] fn test_common_suffix() { let text1 = Range::new("abc", ..); let text2 = Range::new("xyz", ..); assert_eq!(0, text1.common_suffix_len(text2), "Null case"); let text1 = Range::new(b"abc".as_ref(), ..); let text2 = Range::new(b"xyz".as_ref(), ..); assert_eq!(0, text1.common_suffix_len(text2), "Null case"); let text1 = Range::new("abcdef1234", ..); let text2 = Range::new("xyz1234", ..); assert_eq!(4, text1.common_suffix_len(text2), "Non-null case"); let text1 = Range::new(b"abcdef1234".as_ref(), ..); let text2 = Range::new(b"xyz1234".as_ref(), ..); assert_eq!(4, text1.common_suffix_len(text2), "Non-null case"); let text1 = Range::new("1234", ..); let text2 = Range::new("xyz1234", ..); assert_eq!(4, text1.common_suffix_len(text2), "Whole case"); let text1 = Range::new(b"1234".as_ref(), ..); let text2 = Range::new(b"xyz1234".as_ref(), ..); assert_eq!(4, text1.common_suffix_len(text2), "Whole case"); } #[test] fn test_common_overlap() { let text1 = Range::empty(); let text2 = Range::new("abcd", ..); assert_eq!(0, text1.common_overlap_len(text2), "Null case"); let text1 = Range::empty(); let text2 = Range::new(b"abcd".as_ref(), ..); assert_eq!(0, text1.common_overlap_len(text2), "Null case"); let text1 = Range::new("abcd", ..); let text2 = Range::new("abc", ..); assert_eq!(3, text1.common_overlap_len(text2), "Whole case"); let text1 = Range::new(b"abcd".as_ref(), ..); let text2 = Range::new(b"abc".as_ref(), ..); assert_eq!(3, text1.common_overlap_len(text2), "Whole case"); let text1 = Range::new("123456", ..); let text2 = Range::new("abcd", ..); assert_eq!(0, text1.common_overlap_len(text2), "No overlap"); let text1 = Range::new(b"123456".as_ref(), ..); let text2 = Range::new(b"abcd".as_ref(), ..); assert_eq!(0, text1.common_overlap_len(text2), "No overlap"); let text1 = Range::new("xxxabcd", ..); let text2 = Range::new("123456xxx", ..); assert_eq!(3, text1.common_overlap_len(text2), "Overlap"); let text1 = Range::new(b"xxxabcd".as_ref(), ..); let text2 = Range::new(b"123456xxx".as_ref(), ..); assert_eq!(3, text1.common_overlap_len(text2), "Overlap"); // Some overly clever languages (C#) may treat ligatures as equal to their // component letters. E.g. U+FB01 == 'fi' let text1 = Range::new("fi", ..); let text2 = Range::new("\u{fb01}i", ..); assert_eq!(0, text1.common_overlap_len(text2), "Unicode"); } } diffy-imara-0.3.2/src/sink.rs000064400000000000000000000200421046102023000141150ustar 00000000000000use core::{hash::Hash, ops}; use imara_diff::{ intern::{InternedInput, Token}, Sink, }; use crate::range::{DiffRange, Range}; pub(crate) struct DiffyDiffRangeBuilder<'a> { before: &'a [Token], after: &'a [Token], prev_before_end: usize, prev_after_end: usize, dst: Vec>, } impl<'a> DiffyDiffRangeBuilder<'a> { pub fn from_tokens(before: &'a [Token], after: &'a [Token]) -> Self { Self { before, after, prev_before_end: 0, prev_after_end: 0, dst: vec![], } } pub fn new(input: &'a InternedInput) -> Self { Self { before: &input.before, after: &input.after, prev_before_end: 0, prev_after_end: 0, dst: vec![], } } } impl<'a> Sink for DiffyDiffRangeBuilder<'a> { type Out = Vec>; fn process_change(&mut self, before: ops::Range, after: ops::Range) { let before = before.start as usize..before.end as usize; let after = after.start as usize..after.end as usize; let unchanged_before_range = self.prev_before_end..before.start; let hunk_before_range = before.start..before.end; let unchanged_after_range = self.prev_after_end..after.start; let hunk_after_range = after.start..after.end; if !unchanged_before_range.is_empty() || !unchanged_after_range.is_empty() { self.dst.push(DiffRange::Equal( Range::new(self.before, unchanged_before_range), Range::new(self.after, unchanged_after_range), )); } if !hunk_before_range.is_empty() { self.dst.push(DiffRange::Delete(Range::new( self.before, hunk_before_range, ))); } if !hunk_after_range.is_empty() { self.dst .push(DiffRange::Insert(Range::new(self.after, hunk_after_range))); }; (self.prev_before_end, self.prev_after_end) = (before.end, after.end); } fn finish(mut self) -> Self::Out { let before_till_end = self.prev_before_end..self.before.len(); let after_till_end = self.prev_after_end..self.after.len(); if !before_till_end.is_empty() || !after_till_end.is_empty() { self.dst.push(DiffRange::Equal( Range::new(self.before, before_till_end), Range::new(self.after, after_till_end), )); } self.dst } } #[cfg(test)] mod test { use super::*; use imara_diff::{intern::InternedInput, sources::lines_with_terminator}; #[test] fn equal_insert_equal() { let before = "A\nB\nD\n"; let after = "A\nB\nC\nD\n"; let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); let diff = imara_diff::diff_with_tokens( imara_diff::Algorithm::Histogram, &input.before, &input.after, input.interner.num_tokens(), DiffyDiffRangeBuilder::new(&input), ); assert_eq!( &diff, &[ DiffRange::Equal( Range::new(input.before.as_slice(), 0..2), Range::new(&input.after, 0..2), ), DiffRange::Insert(Range::new(&input.after, 2..3)), DiffRange::Equal( Range::new(&input.before, 2..3), Range::new(&input.after, 3..4) ) ] ); } #[test] fn equal_insert_equal_delete_equal() { let before = "A\nC\nD\nE\n"; let after = "A\nB\nC\nE\n"; let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); let diff = imara_diff::diff_with_tokens( imara_diff::Algorithm::Histogram, &input.before, &input.after, input.interner.num_tokens(), DiffyDiffRangeBuilder::new(&input), ); assert_eq!( &diff, &[ DiffRange::Equal( Range::new(input.before.as_slice(), 0..1), Range::new(&input.after, 0..1), ), DiffRange::Insert(Range::new(&input.after, 1..2)), DiffRange::Equal( Range::new(&input.before, 1..2), Range::new(&input.after, 2..3) ), DiffRange::Delete(Range::new(&input.before, 2..3)), DiffRange::Equal( Range::new(&input.before, 3..4), Range::new(&input.after, 3..4) ) ] ); } #[test] fn equal_delete_insert_equal() { let before = "A\nD\nE\n"; let after = "A\nB\nE\n"; let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); let diff = imara_diff::diff_with_tokens( imara_diff::Algorithm::Histogram, &input.before, &input.after, input.interner.num_tokens(), DiffyDiffRangeBuilder::new(&input), ); assert_eq!( &diff, &[ DiffRange::Equal( Range::new(input.before.as_slice(), 0..1), Range::new(&input.after, 0..1), ), DiffRange::Delete(Range::new(&input.before, 1..2)), DiffRange::Insert(Range::new(&input.after, 1..2)), DiffRange::Equal( Range::new(&input.before, 2..3), Range::new(&input.after, 2..3) ) ] ); } #[test] fn insert_equal() { let before = "B\n"; let after = "A\nB\n"; let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); let diff = imara_diff::diff_with_tokens( imara_diff::Algorithm::Histogram, &input.before, &input.after, input.interner.num_tokens(), DiffyDiffRangeBuilder::new(&input), ); assert_eq!( &diff, &[ DiffRange::Insert(Range::new(input.after.as_slice(), 0..1)), DiffRange::Equal( Range::new(&input.before, 0..1), Range::new(&input.after, 1..2), ), ] ); } #[test] fn insert() { let before = ""; let after = "A\n"; let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); let diff = imara_diff::diff_with_tokens( imara_diff::Algorithm::Histogram, &input.before, &input.after, input.interner.num_tokens(), DiffyDiffRangeBuilder::new(&input), ); assert_eq!( &diff, &[DiffRange::Insert(Range::new(input.after.as_slice(), 0..1))] ); } #[test] fn delete() { let before = "A\n"; let after = ""; let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); let diff = imara_diff::diff_with_tokens( imara_diff::Algorithm::Histogram, &input.before, &input.after, input.interner.num_tokens(), DiffyDiffRangeBuilder::new(&input), ); assert_eq!( &diff, &[DiffRange::Delete(Range::new(input.before.as_slice(), 0..1))] ); } #[test] fn empty() { let before = ""; let after = ""; let input = InternedInput::new(lines_with_terminator(before), lines_with_terminator(after)); let diff = imara_diff::diff_with_tokens( imara_diff::Algorithm::Histogram, &input.before, &input.after, input.interner.num_tokens(), DiffyDiffRangeBuilder::new(&input), ); assert_eq!(&diff, &[]); } } diffy-imara-0.3.2/src/utils.rs000064400000000000000000000174761046102023000143320ustar 00000000000000//! Common utilities use std::hash::Hash; use imara_diff::intern::{Interner, Token, TokenSource}; // TODO: remove the trait bounds on new release of imara-diff // /// Similar to `InternedInput`, but takes 3 files instead of 2 #[derive(Default)] pub struct InternedMergeInput { /// The base revision, aka. "ancestor" pub base: Vec, /// The left revision, aka. "ours" pub left: Vec, /// The right revision, aka. "theirs" pub right: Vec, pub interner: Interner, } impl InternedMergeInput { pub fn new>(base: I, left: I, right: I) -> Self { let token_estimate_base = base.estimate_tokens() as usize; let token_estimate_left = left.estimate_tokens() as usize; let token_estimate_right = right.estimate_tokens() as usize; let mut res = Self { base: Vec::with_capacity(token_estimate_base), left: Vec::with_capacity(token_estimate_left), right: Vec::with_capacity(token_estimate_right), interner: Interner::new( token_estimate_base + token_estimate_left + token_estimate_right, ), }; res.update_base(base.tokenize()); res.update_left(left.tokenize()); res.update_right(right.tokenize()); res } /// replaces `self.base` wtih the iterned Tokens yielded by `input` /// Note that this does not erase any tokens from the interner and might therefore be considered /// a memory leak. If this function is called often over a long-running process /// consider clearing the interner with [`clear`](crate::intern::InternedMergeInput::clear). pub fn update_base(&mut self, input: impl Iterator) { self.base.clear(); self.base .extend(input.map(|token| self.interner.intern(token))); } /// replaces `self.left` wtih the iterned Tokens yielded by `input` /// Note that this does not erase any tokens from the interner and might therefore be considered /// a memory leak. If this function is called often over a long-running process /// consider clearing the interner with [`clear`](crate::intern::InternedMergeInput::clear) or /// [`erase_tokens_after`](https://docs.rs/imara-diff/latest/imara_diff/intern/struct.Interner.html#method.erase_tokens_after). pub fn update_left(&mut self, input: impl Iterator) { self.left.clear(); self.left .extend(input.map(|token| self.interner.intern(token))); } /// replaces `self.right` wtih the iterned Tokens yielded by `input` /// Note that this does not erase any tokens from the interner and might therefore be considered /// a memory leak. If this function is called often over a long-running process /// consider clearing the interner with [`clear`](crate::intern::InternedMergeInput::clear) or /// [`erase_tokens_after`](https://docs.rs/imara-diff/latest/imara_diff/intern/struct.Interner.html#method.erase_tokens_after). pub fn update_right(&mut self, input: impl Iterator) { self.right.clear(); self.right .extend(input.map(|token| self.interner.intern(token))); } } /// Iterator over the lines of a string, including the `\n` character. pub struct LineIter<'a, T: ?Sized>(&'a T); impl<'a, T: ?Sized> LineIter<'a, T> { pub fn new(text: &'a T) -> Self { Self(text) } } impl<'a, T: Text + ?Sized> Iterator for LineIter<'a, T> { type Item = &'a T; fn next(&mut self) -> Option { if self.0.is_empty() { return None; } let end = if let Some(idx) = self.0.find("\n") { idx + 1 } else { self.0.len() }; let (line, remaining) = self.0.split_at(end); self.0 = remaining; Some(line) } } /// A helper trait for processing text like `str` and `[u8]` /// Useful for abstracting over those types for parsing as well as breaking input into lines pub trait Text: Eq + Hash { fn is_empty(&self) -> bool; fn len(&self) -> usize; fn starts_with(&self, prefix: &str) -> bool; #[allow(unused)] fn ends_with(&self, suffix: &str) -> bool; fn strip_prefix(&self, prefix: &str) -> Option<&Self>; fn strip_suffix(&self, suffix: &str) -> Option<&Self>; fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)>; fn find(&self, needle: &str) -> Option; fn split_at(&self, mid: usize) -> (&Self, &Self); fn as_str(&self) -> Option<&str>; fn as_bytes(&self) -> &[u8]; #[allow(unused)] fn lines(&self) -> LineIter; fn parse(&self) -> Option { self.as_str().and_then(|s| s.parse().ok()) } } impl Text for str { fn is_empty(&self) -> bool { self.is_empty() } fn len(&self) -> usize { self.len() } fn starts_with(&self, prefix: &str) -> bool { self.starts_with(prefix) } fn ends_with(&self, suffix: &str) -> bool { self.ends_with(suffix) } fn strip_prefix(&self, prefix: &str) -> Option<&Self> { self.strip_prefix(prefix) } fn strip_suffix(&self, suffix: &str) -> Option<&Self> { self.strip_suffix(suffix) } fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> { self.find(needle) .map(|idx| (&self[..idx], &self[idx + needle.len()..])) } fn find(&self, needle: &str) -> Option { self.find(needle) } fn split_at(&self, mid: usize) -> (&Self, &Self) { self.split_at(mid) } fn as_str(&self) -> Option<&str> { Some(self) } fn as_bytes(&self) -> &[u8] { self.as_bytes() } fn lines(&self) -> LineIter { LineIter::new(self) } } impl Text for [u8] { fn is_empty(&self) -> bool { self.is_empty() } fn len(&self) -> usize { self.len() } fn starts_with(&self, prefix: &str) -> bool { self.starts_with(prefix.as_bytes()) } fn ends_with(&self, suffix: &str) -> bool { self.ends_with(suffix.as_bytes()) } fn strip_prefix(&self, prefix: &str) -> Option<&Self> { self.strip_prefix(prefix.as_bytes()) } fn strip_suffix(&self, suffix: &str) -> Option<&Self> { self.strip_suffix(suffix.as_bytes()) } fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> { find_bytes(self, needle.as_bytes()).map(|idx| (&self[..idx], &self[idx + needle.len()..])) } fn find(&self, needle: &str) -> Option { find_bytes(self, needle.as_bytes()) } fn split_at(&self, mid: usize) -> (&Self, &Self) { self.split_at(mid) } fn as_str(&self) -> Option<&str> { std::str::from_utf8(self).ok() } fn as_bytes(&self) -> &[u8] { self } fn lines(&self) -> LineIter { LineIter::new(self) } } fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option { match needle.len() { 0 => Some(0), 1 => find_byte(haystack, needle[0]), len if len > haystack.len() => None, needle_len => { let mut offset = 0; let mut haystack = haystack; while let Some(position) = find_byte(haystack, needle[0]) { offset += position; if let Some(haystack) = haystack.get(position..position + needle_len) { if haystack == needle { return Some(offset); } } else { return None; } haystack = &haystack[position + 1..]; offset += 1; } None } } } // XXX Maybe use `memchr`? fn find_byte(haystack: &[u8], byte: u8) -> Option { haystack.iter().position(|&b| b == byte) }