├── .github └── workflows │ ├── ci.yml │ └── rustdoc.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.md └── src ├── apply.rs ├── diff ├── cleanup.rs ├── mod.rs ├── myers.rs └── tests.rs ├── lib.rs ├── merge ├── mod.rs └── tests.rs ├── patch ├── format.rs ├── mod.rs └── parse.rs ├── range.rs └── utils.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | env: 8 | CARGO_INCREMENTAL: 0 9 | RUSTFLAGS: -D warnings 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v2 18 | 19 | - name: Install Rust toolchain 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: stable 23 | override: true 24 | components: rustfmt, clippy 25 | 26 | - name: Lint 27 | run: | 28 | cargo fmt -- --check 29 | cargo clippy --all-targets 30 | 31 | - name: Build Documentation 32 | run: cargo doc --no-deps 33 | 34 | - name: Run tests 35 | run: cargo test 36 | 37 | minimum-supported-rust-version: 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v2 41 | - uses: actions-rs/toolchain@v1 42 | with: 43 | toolchain: 1.62.1 44 | override: true 45 | - run: cargo check 46 | -------------------------------------------------------------------------------- /.github/workflows/rustdoc.yml: -------------------------------------------------------------------------------- 1 | name: rustdoc 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | 7 | env: 8 | CARGO_INCREMENTAL: 0 9 | RUSTFLAGS: -D warnings 10 | 11 | jobs: 12 | rustdoc: 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - name: Checkout repository 17 | uses: actions/checkout@v2 18 | 19 | - name: Install Rust toolchain 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | toolchain: stable 23 | override: true 24 | 25 | - name: Build Documentation 26 | run: cargo doc --no-deps 27 | 28 | - name: Deploy Docs 29 | uses: peaceiris/actions-gh-pages@v3 30 | with: 31 | github_token: ${{ secrets.GITHUB_TOKEN }} 32 | publish_branch: gh-pages 33 | publish_dir: ./target/doc 34 | force_orphan: true 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [0.4.2] - 2025-01-29 4 | 5 | ### Added 6 | - [#37](https://github.com/bmwill/diffy/pull/37) Allow configuring the "No 7 | newline at end of file" message from being printed when formatting a patch. 8 | - [#38](https://github.com/bmwill/diffy/pull/38) Add support for configuring 9 | `suppress_blank_empty`. 10 | 11 | ## [0.4.1] - 2025-01-29 12 | 13 | ### Added 14 | - [#36](https://github.com/bmwill/diffy/pull/36) Add ability to configure 15 | filenames when creating a patch with `DiffOptions`. 16 | 17 | ## [0.4.0] - 2024-06-14 18 | 19 | ### Fixed 20 | - [#28](https://github.com/bmwill/diffy/issues/28) Fixed an issue where 21 | conflicts were being omitted from merges. 22 | 23 | ### Added 24 | - [#26](https://github.com/bmwill/diffy/pull/26) Add ability to reverse a 25 | patch. 26 | 27 | ### Changed 28 | - [#29](https://github.com/bmwill/diffy/pull/29) Bump minimum supported rust 29 | version (msrv) to 1.62.1. 30 | - [#22](https://github.com/bmwill/diffy/pull/22) update nu-ansi-term dependency 31 | to 0.50. 32 | 33 | ## [0.3.0] - 2022-08-29 34 | 35 | ### Fixed 36 | - [#17](https://github.com/bmwill/diffy/issues/17) Fix an issue which resulted 37 | in a large slowdown when applying a patch with incorrect hunk headers. 38 | - [#18](https://github.com/bmwill/diffy/pull/18) Replace unmaintained ansi_term 39 | dependency with nu_ansi_term in order to address 40 | [RUSTSEC-2021-0139](https://rustsec.org/advisories/RUSTSEC-2021-0139). 41 | 42 | ### Changed 43 | - [#19](https://github.com/bmwill/diffy/pull/19) Bump minimum supported rust 44 | version (msrv) to 1.51.0. 45 | 46 | ## [0.2.2] - 2022-01-31 47 | 48 | ### Fixed 49 | - [#16](https://github.com/bmwill/diffy/issues/16) Fix an issue where patch 50 | files failed to parse when they contained hunks which were adjacent to one 51 | another. 52 | 53 | ## [0.2.1] - 2021-01-27 54 | 55 | ### Fixed 56 | - [#9](https://github.com/bmwill/diffy/issues/9) Fix an issue where the incorrect 57 | range was being used to index an array when calculating a merge resulting in a 58 | panic in some cases. 59 | 60 | ## [0.2.0] - 2020-07-07 61 | ### Added 62 | - Support for working with potentially non-utf8 data with the addition of 63 | various `*_bytes` functions. 64 | - Support for writing both utf8 and non-utf8 patches into a writer `W: io::write` 65 | - Support for a minimum supported rust version (msrv) of 1.36.0. 66 | 67 | ### Changed 68 | - The `Patch` type is now generic across the text type, either `str` for utf8 69 | text and `[u8]` for potentially non-utf8 texts. 70 | - The filenames for the original and modified files of a patch are now 71 | optional. This means that patches which don't include filename headers 72 | (only include hunks) can now properly be parsed. 73 | 74 | ### Fixed 75 | - Quoted filenames which include escaped characters are now properly parsed. 76 | 77 | ## [0.1.1] - 2020-07-01 78 | ### Added 79 | - `Patch`es can now be parsed from strings with `Patch::from_str` 80 | - A `Patch` can now be applied to a base image with `apply` 81 | 82 | ## [0.1.0] - 2020-06-30 83 | - Initial release. 84 | 85 | [0.4.2]: https://github.com/bmwill/diffy/releases/tag/0.4.2 86 | [0.4.1]: https://github.com/bmwill/diffy/releases/tag/0.4.1 87 | [0.4.0]: https://github.com/bmwill/diffy/releases/tag/0.4.0 88 | [0.3.0]: https://github.com/bmwill/diffy/releases/tag/0.3.0 89 | [0.2.2]: https://github.com/bmwill/diffy/releases/tag/0.2.2 90 | [0.2.1]: https://github.com/bmwill/diffy/releases/tag/0.2.1 91 | [0.2.0]: https://github.com/bmwill/diffy/releases/tag/0.2.0 92 | [0.1.1]: https://github.com/bmwill/diffy/releases/tag/0.1.1 93 | [0.1.0]: https://github.com/bmwill/diffy/releases/tag/0.1.0 94 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "diffy" 3 | version = "0.4.2" 4 | authors = ["Brandon Williams "] 5 | license = "MIT OR Apache-2.0" 6 | description = "Tools for finding and manipulating differences between files" 7 | documentation = "https://docs.rs/diffy" 8 | repository = "https://github.com/bmwill/diffy" 9 | readme = "README.md" 10 | keywords = ["diff", "patch", "merge"] 11 | categories = ["text-processing"] 12 | rust-version = "1.62.1" 13 | edition = "2021" 14 | 15 | [dependencies] 16 | nu-ansi-term = "0.50" 17 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # diffy 2 | 3 | [![diffy on crates.io](https://img.shields.io/crates/v/diffy)](https://crates.io/crates/diffy) 4 | [![Documentation (latest release)](https://docs.rs/diffy/badge.svg)](https://docs.rs/diffy/) 5 | [![Documentation (master)](https://img.shields.io/badge/docs-master-59f)](https://bmwill.github.io/diffy/diffy/) 6 | [![License](https://img.shields.io/badge/license-Apache-green.svg)](LICENSE-APACHE) 7 | [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE-MIT) 8 | 9 | Tools for finding and manipulating differences between files 10 | 11 | ## License 12 | 13 | This project is available under the terms of either the [Apache 2.0 14 | license](LICENSE-APACHE) or the [MIT license](LICENSE-MIT). 15 | -------------------------------------------------------------------------------- /src/apply.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | patch::{Hunk, Line, Patch}, 3 | utils::LineIter, 4 | }; 5 | use std::{fmt, iter}; 6 | 7 | /// An error returned when [`apply`]ing a `Patch` fails 8 | /// 9 | /// [`apply`]: fn.apply.html 10 | #[derive(Debug)] 11 | pub struct ApplyError(usize); 12 | 13 | impl fmt::Display for ApplyError { 14 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 15 | write!(f, "error applying hunk #{}", self.0) 16 | } 17 | } 18 | 19 | impl std::error::Error for ApplyError {} 20 | 21 | #[derive(Debug)] 22 | enum ImageLine<'a, T: ?Sized> { 23 | Unpatched(&'a T), 24 | Patched(&'a T), 25 | } 26 | 27 | impl<'a, T: ?Sized> ImageLine<'a, T> { 28 | fn inner(&self) -> &'a T { 29 | match self { 30 | ImageLine::Unpatched(inner) | ImageLine::Patched(inner) => inner, 31 | } 32 | } 33 | 34 | fn into_inner(self) -> &'a T { 35 | self.inner() 36 | } 37 | 38 | fn is_patched(&self) -> bool { 39 | match self { 40 | ImageLine::Unpatched(_) => false, 41 | ImageLine::Patched(_) => true, 42 | } 43 | } 44 | } 45 | 46 | impl Copy for ImageLine<'_, T> {} 47 | 48 | impl Clone for ImageLine<'_, T> { 49 | fn clone(&self) -> Self { 50 | *self 51 | } 52 | } 53 | 54 | /// Apply a `Patch` to a base image 55 | /// 56 | /// ``` 57 | /// use diffy::{apply, Patch}; 58 | /// 59 | /// let s = "\ 60 | /// --- a/ideals 61 | /// +++ b/ideals 62 | /// @@ -1,4 +1,6 @@ 63 | /// First: 64 | /// Life before death, 65 | /// strength before weakness, 66 | /// journey before destination. 67 | /// +Second: 68 | /// + I will protect those who cannot protect themselves. 69 | /// "; 70 | /// 71 | /// let patch = Patch::from_str(s).unwrap(); 72 | /// 73 | /// let base_image = "\ 74 | /// First: 75 | /// Life before death, 76 | /// strength before weakness, 77 | /// journey before destination. 78 | /// "; 79 | /// 80 | /// let expected = "\ 81 | /// First: 82 | /// Life before death, 83 | /// strength before weakness, 84 | /// journey before destination. 85 | /// Second: 86 | /// I will protect those who cannot protect themselves. 87 | /// "; 88 | /// 89 | /// assert_eq!(apply(base_image, &patch).unwrap(), expected); 90 | /// ``` 91 | pub fn apply(base_image: &str, patch: &Patch<'_, str>) -> Result { 92 | let mut image: Vec<_> = LineIter::new(base_image) 93 | .map(ImageLine::Unpatched) 94 | .collect(); 95 | 96 | for (i, hunk) in patch.hunks().iter().enumerate() { 97 | apply_hunk(&mut image, hunk).map_err(|_| ApplyError(i + 1))?; 98 | } 99 | 100 | Ok(image.into_iter().map(ImageLine::into_inner).collect()) 101 | } 102 | 103 | /// Apply a non-utf8 `Patch` to a base image 104 | pub fn apply_bytes(base_image: &[u8], patch: &Patch<'_, [u8]>) -> Result, ApplyError> { 105 | let mut image: Vec<_> = LineIter::new(base_image) 106 | .map(ImageLine::Unpatched) 107 | .collect(); 108 | 109 | for (i, hunk) in patch.hunks().iter().enumerate() { 110 | apply_hunk(&mut image, hunk).map_err(|_| ApplyError(i + 1))?; 111 | } 112 | 113 | Ok(image 114 | .into_iter() 115 | .flat_map(ImageLine::into_inner) 116 | .copied() 117 | .collect()) 118 | } 119 | 120 | fn apply_hunk<'a, T: PartialEq + ?Sized>( 121 | image: &mut Vec>, 122 | hunk: &Hunk<'a, T>, 123 | ) -> Result<(), ()> { 124 | // Find position 125 | let pos = find_position(image, hunk).ok_or(())?; 126 | 127 | // update image 128 | image.splice( 129 | pos..pos + pre_image_line_count(hunk.lines()), 130 | post_image(hunk.lines()).map(ImageLine::Patched), 131 | ); 132 | 133 | Ok(()) 134 | } 135 | 136 | // Search in `image` for a palce to apply hunk. 137 | // This follows the general algorithm (minus fuzzy-matching context lines) described in GNU patch's 138 | // man page. 139 | // 140 | // It might be worth looking into other possible positions to apply the hunk to as described here: 141 | // https://neil.fraser.name/writing/patch/ 142 | fn find_position( 143 | image: &[ImageLine], 144 | hunk: &Hunk<'_, T>, 145 | ) -> Option { 146 | // In order to avoid searching through positions which are out of bounds of the image, 147 | // clamp the starting position based on the length of the image 148 | let pos = std::cmp::min(hunk.new_range().start().saturating_sub(1), image.len()); 149 | 150 | // Create an iterator that starts with 'pos' and then interleaves 151 | // moving pos backward/foward by one. 152 | let backward = (0..pos).rev(); 153 | let forward = pos + 1..image.len(); 154 | 155 | iter::once(pos) 156 | .chain(interleave(backward, forward)) 157 | .find(|&pos| match_fragment(image, hunk.lines(), pos)) 158 | } 159 | 160 | fn pre_image_line_count(lines: &[Line<'_, T>]) -> usize { 161 | pre_image(lines).count() 162 | } 163 | 164 | fn post_image<'a, 'b, T: ?Sized>(lines: &'b [Line<'a, T>]) -> impl Iterator + 'b { 165 | lines.iter().filter_map(|line| match line { 166 | Line::Context(l) | Line::Insert(l) => Some(*l), 167 | Line::Delete(_) => None, 168 | }) 169 | } 170 | 171 | fn pre_image<'a, 'b, T: ?Sized>(lines: &'b [Line<'a, T>]) -> impl Iterator + 'b { 172 | lines.iter().filter_map(|line| match line { 173 | Line::Context(l) | Line::Delete(l) => Some(*l), 174 | Line::Insert(_) => None, 175 | }) 176 | } 177 | 178 | fn match_fragment( 179 | image: &[ImageLine], 180 | lines: &[Line<'_, T>], 181 | pos: usize, 182 | ) -> bool { 183 | let len = pre_image_line_count(lines); 184 | 185 | let image = if let Some(image) = image.get(pos..pos + len) { 186 | image 187 | } else { 188 | return false; 189 | }; 190 | 191 | // If any of these lines have already been patched then we can't match at this position 192 | if image.iter().any(ImageLine::is_patched) { 193 | return false; 194 | } 195 | 196 | pre_image(lines).eq(image.iter().map(ImageLine::inner)) 197 | } 198 | 199 | #[derive(Debug)] 200 | struct Interleave { 201 | a: iter::Fuse, 202 | b: iter::Fuse, 203 | flag: bool, 204 | } 205 | 206 | fn interleave( 207 | i: I, 208 | j: J, 209 | ) -> Interleave<::IntoIter, ::IntoIter> 210 | where 211 | I: IntoIterator, 212 | J: IntoIterator, 213 | { 214 | Interleave { 215 | a: i.into_iter().fuse(), 216 | b: j.into_iter().fuse(), 217 | flag: false, 218 | } 219 | } 220 | 221 | impl Iterator for Interleave 222 | where 223 | I: Iterator, 224 | J: Iterator, 225 | { 226 | type Item = I::Item; 227 | 228 | fn next(&mut self) -> Option { 229 | self.flag = !self.flag; 230 | if self.flag { 231 | match self.a.next() { 232 | None => self.b.next(), 233 | item => item, 234 | } 235 | } else { 236 | match self.b.next() { 237 | None => self.a.next(), 238 | item => item, 239 | } 240 | } 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /src/diff/cleanup.rs: -------------------------------------------------------------------------------- 1 | use crate::range::{DiffRange, SliceLike}; 2 | 3 | // Walks through all edits and shifts them up and then down, trying to see if they run into similar 4 | // edits which can be merged 5 | #[allow(clippy::needless_lifetimes)] 6 | pub fn compact<'a, 'b, T: ?Sized + SliceLike>(diffs: &mut Vec>) { 7 | // First attempt to compact all Deletions 8 | let mut pointer = 0; 9 | while let Some(&diff) = diffs.get(pointer) { 10 | if let DiffRange::Delete(_) = diff { 11 | pointer = shift_diff_up(diffs, pointer); 12 | pointer = shift_diff_down(diffs, pointer); 13 | } 14 | pointer += 1; 15 | } 16 | 17 | // TODO maybe able to merge these and do them in the same pass? 18 | // Then attempt to compact all Insertions 19 | let mut pointer = 0; 20 | while let Some(&diff) = diffs.get(pointer) { 21 | if let DiffRange::Insert(_) = diff { 22 | pointer = shift_diff_up(diffs, pointer); 23 | pointer = shift_diff_down(diffs, pointer); 24 | } 25 | pointer += 1; 26 | } 27 | } 28 | 29 | // Attempts to shift the Insertion or Deletion at location `pointer` as far upwards as possible. 30 | #[allow(clippy::needless_lifetimes)] 31 | fn shift_diff_up<'a, 'b, T: ?Sized + SliceLike>( 32 | diffs: &mut Vec>, 33 | mut pointer: usize, 34 | ) -> usize { 35 | while let Some(&prev_diff) = pointer.checked_sub(1).and_then(|idx| diffs.get(idx)) { 36 | match (diffs[pointer], prev_diff) { 37 | // 38 | // Shift Inserts Upwards 39 | // 40 | (DiffRange::Insert(this_diff), DiffRange::Equal(prev_diff1, _)) => { 41 | // check common suffix for the amount we can shift 42 | let suffix_len = this_diff.common_suffix_len(prev_diff1); 43 | if suffix_len != 0 { 44 | if let Some(DiffRange::Equal(..)) = diffs.get(pointer + 1) { 45 | diffs[pointer + 1].grow_up(suffix_len); 46 | } else { 47 | diffs.insert( 48 | pointer + 1, 49 | DiffRange::Equal( 50 | prev_diff1.slice(prev_diff1.len() - suffix_len..), 51 | this_diff.slice(this_diff.len() - suffix_len..), 52 | ), 53 | ); 54 | } 55 | diffs[pointer].shift_up(suffix_len); 56 | diffs[pointer - 1].shrink_back(suffix_len); 57 | 58 | if diffs[pointer - 1].is_empty() { 59 | diffs.remove(pointer - 1); 60 | pointer -= 1; 61 | } 62 | } else if diffs[pointer - 1].is_empty() { 63 | diffs.remove(pointer - 1); 64 | pointer -= 1; 65 | } else { 66 | // We can't shift upwards anymore 67 | break; 68 | } 69 | } 70 | 71 | // 72 | // Shift Deletions Upwards 73 | // 74 | (DiffRange::Delete(this_diff), DiffRange::Equal(_, prev_diff2)) => { 75 | // check common suffix for the amount we can shift 76 | let suffix_len = this_diff.common_suffix_len(prev_diff2); 77 | if suffix_len != 0 { 78 | if let Some(DiffRange::Equal(..)) = diffs.get(pointer + 1) { 79 | diffs[pointer + 1].grow_up(suffix_len); 80 | } else { 81 | diffs.insert( 82 | pointer + 1, 83 | DiffRange::Equal( 84 | this_diff.slice(this_diff.len() - suffix_len..), 85 | prev_diff2.slice(prev_diff2.len() - suffix_len..), 86 | ), 87 | ); 88 | } 89 | diffs[pointer].shift_up(suffix_len); 90 | diffs[pointer - 1].shrink_back(suffix_len); 91 | 92 | if diffs[pointer - 1].is_empty() { 93 | diffs.remove(pointer - 1); 94 | pointer -= 1; 95 | } 96 | } else if diffs[pointer - 1].is_empty() { 97 | diffs.remove(pointer - 1); 98 | pointer -= 1; 99 | } else { 100 | // We can't shift upwards anymore 101 | break; 102 | } 103 | } 104 | 105 | // 106 | // Swap the Delete and Insert 107 | // 108 | (DiffRange::Insert(_), DiffRange::Delete(_)) 109 | | (DiffRange::Delete(_), DiffRange::Insert(_)) => { 110 | diffs.swap(pointer - 1, pointer); 111 | pointer -= 1; 112 | } 113 | 114 | // 115 | // Merge the two ranges 116 | // 117 | (this_diff @ DiffRange::Insert(_), DiffRange::Insert(_)) 118 | | (this_diff @ DiffRange::Delete(_), DiffRange::Delete(_)) => { 119 | diffs[pointer - 1].grow_down(this_diff.len()); 120 | diffs.remove(pointer); 121 | pointer -= 1; 122 | } 123 | 124 | _ => panic!("range to shift must be either Insert or Delete"), 125 | } 126 | } 127 | 128 | pointer 129 | } 130 | 131 | // Attempts to shift the Insertion or Deletion at location `pointer` as far downwards as possible. 132 | #[allow(clippy::needless_lifetimes)] 133 | fn shift_diff_down<'a, 'b, T: ?Sized + SliceLike>( 134 | diffs: &mut Vec>, 135 | mut pointer: usize, 136 | ) -> usize { 137 | while let Some(&next_diff) = pointer.checked_add(1).and_then(|idx| diffs.get(idx)) { 138 | match (diffs[pointer], next_diff) { 139 | // 140 | // Shift Insert Downward 141 | // 142 | (DiffRange::Insert(this_diff), DiffRange::Equal(next_diff1, _)) => { 143 | // check common prefix for the amoutn we can shift 144 | let prefix_len = this_diff.common_prefix_len(next_diff1); 145 | if prefix_len != 0 { 146 | if let Some(DiffRange::Equal(..)) = 147 | pointer.checked_sub(1).and_then(|idx| diffs.get(idx)) 148 | { 149 | diffs[pointer - 1].grow_down(prefix_len); 150 | } else { 151 | diffs.insert( 152 | pointer, 153 | DiffRange::Equal( 154 | next_diff1.slice(..prefix_len), 155 | this_diff.slice(..prefix_len), 156 | ), 157 | ); 158 | pointer += 1; 159 | } 160 | 161 | diffs[pointer].shift_down(prefix_len); 162 | diffs[pointer + 1].shrink_front(prefix_len); 163 | 164 | if diffs[pointer + 1].is_empty() { 165 | diffs.remove(pointer + 1); 166 | } 167 | } else if diffs[pointer + 1].is_empty() { 168 | diffs.remove(pointer + 1); 169 | } else { 170 | // We can't shift downwards anymore 171 | break; 172 | } 173 | } 174 | 175 | // 176 | // Shift Deletion Downward 177 | // 178 | (DiffRange::Delete(this_diff), DiffRange::Equal(_, next_diff2)) => { 179 | // check common prefix for the amoutn we can shift 180 | let prefix_len = this_diff.common_prefix_len(next_diff2); 181 | if prefix_len != 0 { 182 | if let Some(DiffRange::Equal(..)) = 183 | pointer.checked_sub(1).and_then(|idx| diffs.get(idx)) 184 | { 185 | diffs[pointer - 1].grow_down(prefix_len); 186 | } else { 187 | diffs.insert( 188 | pointer, 189 | DiffRange::Equal( 190 | this_diff.slice(..prefix_len), 191 | next_diff2.slice(..prefix_len), 192 | ), 193 | ); 194 | pointer += 1; 195 | } 196 | 197 | diffs[pointer].shift_down(prefix_len); 198 | diffs[pointer + 1].shrink_front(prefix_len); 199 | 200 | if diffs[pointer + 1].is_empty() { 201 | diffs.remove(pointer + 1); 202 | } 203 | } else if diffs[pointer + 1].is_empty() { 204 | diffs.remove(pointer + 1); 205 | } else { 206 | // We can't shift downwards anymore 207 | break; 208 | } 209 | } 210 | 211 | // 212 | // Swap the Delete and Insert 213 | // 214 | (DiffRange::Insert(_), DiffRange::Delete(_)) 215 | | (DiffRange::Delete(_), DiffRange::Insert(_)) => { 216 | diffs.swap(pointer, pointer + 1); 217 | pointer += 1; 218 | } 219 | 220 | // 221 | // Merge the two ranges 222 | // 223 | (DiffRange::Insert(_), next_diff @ DiffRange::Insert(_)) 224 | | (DiffRange::Delete(_), next_diff @ DiffRange::Delete(_)) => { 225 | diffs[pointer].grow_down(next_diff.len()); 226 | diffs.remove(pointer + 1); 227 | } 228 | 229 | _ => panic!("range to shift must be either Insert or Delete"), 230 | } 231 | } 232 | 233 | pointer 234 | } 235 | -------------------------------------------------------------------------------- /src/diff/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | patch::{Hunk, HunkRange, Line, Patch}, 3 | range::{DiffRange, SliceLike}, 4 | utils::Classifier, 5 | }; 6 | use std::{borrow::Cow, cmp, ops}; 7 | 8 | mod cleanup; 9 | mod myers; 10 | 11 | #[cfg(test)] 12 | mod tests; 13 | 14 | // TODO determine if this should be exposed in the public API 15 | #[allow(dead_code)] 16 | #[derive(Debug, PartialEq, Eq)] 17 | enum Diff<'a, T: ?Sized> { 18 | Equal(&'a T), 19 | Delete(&'a T), 20 | Insert(&'a T), 21 | } 22 | 23 | impl Copy for Diff<'_, T> {} 24 | 25 | impl Clone for Diff<'_, T> { 26 | fn clone(&self) -> Self { 27 | *self 28 | } 29 | } 30 | 31 | impl<'a, T> From> for Diff<'a, T> 32 | where 33 | T: ?Sized + SliceLike, 34 | { 35 | fn from(diff: DiffRange<'a, 'a, T>) -> Self { 36 | match diff { 37 | DiffRange::Equal(range, _) => Diff::Equal(range.as_slice()), 38 | DiffRange::Delete(range) => Diff::Delete(range.as_slice()), 39 | DiffRange::Insert(range) => Diff::Insert(range.as_slice()), 40 | } 41 | } 42 | } 43 | 44 | /// A collection of options for modifying the way a diff is performed 45 | #[derive(Debug)] 46 | pub struct DiffOptions { 47 | compact: bool, 48 | context_len: usize, 49 | original_filename: Option>, 50 | modified_filename: Option>, 51 | } 52 | 53 | impl DiffOptions { 54 | /// Construct a new `DiffOptions` with default settings 55 | /// 56 | /// ## Defaults 57 | /// * context_len = 3 58 | pub fn new() -> Self { 59 | Self { 60 | compact: true, 61 | context_len: 3, 62 | original_filename: Some("original".into()), 63 | modified_filename: Some("modified".into()), 64 | } 65 | } 66 | 67 | /// Set the number of context lines that should be used when producing a patch 68 | pub fn set_context_len(&mut self, context_len: usize) -> &mut Self { 69 | self.context_len = context_len; 70 | self 71 | } 72 | 73 | /// Enable/Disable diff compaction. Compaction is a post-processing step which attempts to 74 | /// produce a prettier diff by reducing the number of edited blocks by shifting and merging 75 | /// edit blocks. 76 | // TODO determine if this should be exposed in the public API 77 | #[allow(dead_code)] 78 | fn set_compact(&mut self, compact: bool) -> &mut Self { 79 | self.compact = compact; 80 | self 81 | } 82 | 83 | /// Set the filename to be used in the patch for the original text 84 | /// 85 | /// If not set, the default value is "original". 86 | pub fn set_original_filename(&mut self, filename: T) -> &mut Self 87 | where 88 | T: Into>, 89 | { 90 | self.original_filename = Some(filename.into()); 91 | self 92 | } 93 | 94 | /// Set the filename to be used in the patch for the modified text 95 | /// 96 | /// If not set, the default value is "modified". 97 | pub fn set_modified_filename(&mut self, filename: T) -> &mut Self 98 | where 99 | T: Into>, 100 | { 101 | self.modified_filename = Some(filename.into()); 102 | self 103 | } 104 | 105 | // TODO determine if this should be exposed in the public API 106 | #[allow(dead_code)] 107 | fn diff<'a>(&self, original: &'a str, modified: &'a str) -> Vec> { 108 | let solution = myers::diff(original.as_bytes(), modified.as_bytes()); 109 | 110 | let mut solution = solution 111 | .into_iter() 112 | .map(|diff_range| diff_range.to_str(original, modified)) 113 | .collect(); 114 | 115 | if self.compact { 116 | cleanup::compact(&mut solution); 117 | } 118 | 119 | solution.into_iter().map(Diff::from).collect() 120 | } 121 | 122 | /// Produce a Patch between two texts based on the configured options 123 | pub fn create_patch<'a>(&self, original: &'a str, modified: &'a str) -> Patch<'a, str> { 124 | let mut classifier = Classifier::default(); 125 | let (old_lines, old_ids) = classifier.classify_lines(original); 126 | let (new_lines, new_ids) = classifier.classify_lines(modified); 127 | 128 | let solution = self.diff_slice(&old_ids, &new_ids); 129 | 130 | let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len); 131 | Patch::new( 132 | self.original_filename.clone(), 133 | self.modified_filename.clone(), 134 | hunks, 135 | ) 136 | } 137 | 138 | /// Create a patch between two potentially non-utf8 texts 139 | pub fn create_patch_bytes<'a>( 140 | &self, 141 | original: &'a [u8], 142 | modified: &'a [u8], 143 | ) -> Patch<'a, [u8]> { 144 | let mut classifier = Classifier::default(); 145 | let (old_lines, old_ids) = classifier.classify_lines(original); 146 | let (new_lines, new_ids) = classifier.classify_lines(modified); 147 | 148 | let solution = self.diff_slice(&old_ids, &new_ids); 149 | 150 | let hunks = to_hunks(&old_lines, &new_lines, &solution, self.context_len); 151 | 152 | // helper function to convert a utf8 cow to a bytes cow 153 | fn cow_str_to_bytes(cow: Cow<'static, str>) -> Cow<'static, [u8]> { 154 | match cow { 155 | Cow::Borrowed(b) => Cow::Borrowed(b.as_bytes()), 156 | Cow::Owned(o) => Cow::Owned(o.into_bytes()), 157 | } 158 | } 159 | 160 | Patch::new( 161 | self.original_filename.clone().map(cow_str_to_bytes), 162 | self.modified_filename.clone().map(cow_str_to_bytes), 163 | hunks, 164 | ) 165 | } 166 | 167 | pub(crate) fn diff_slice<'a, T: PartialEq>( 168 | &self, 169 | old: &'a [T], 170 | new: &'a [T], 171 | ) -> Vec> { 172 | let mut solution = myers::diff(old, new); 173 | 174 | if self.compact { 175 | cleanup::compact(&mut solution); 176 | } 177 | 178 | solution 179 | } 180 | } 181 | 182 | impl Default for DiffOptions { 183 | fn default() -> Self { 184 | Self::new() 185 | } 186 | } 187 | 188 | // TODO determine if this should be exposed in the public API 189 | #[allow(dead_code)] 190 | fn diff<'a>(original: &'a str, modified: &'a str) -> Vec> { 191 | DiffOptions::default().diff(original, modified) 192 | } 193 | 194 | /// Create a patch between two texts. 195 | /// 196 | /// ``` 197 | /// # use diffy::create_patch; 198 | /// let original = "\ 199 | /// I am afraid, however, that all I have known - that my story - will be forgotten. 200 | /// I am afraid for the world that is to come. 201 | /// Afraid that my plans will fail. 202 | /// Afraid of a doom worse than the Deepness. 203 | /// "; 204 | /// 205 | /// let modified = "\ 206 | /// I am afraid, however, that all I have known - that my story - will be forgotten. 207 | /// I am afraid for the world that is to come. 208 | /// Afraid that Alendi will fail. 209 | /// Afraid of a doom brought by the Deepness. 210 | /// "; 211 | /// 212 | /// let expected = "\ 213 | /// --- original 214 | /// +++ modified 215 | /// @@ -1,4 +1,4 @@ 216 | /// I am afraid, however, that all I have known - that my story - will be forgotten. 217 | /// I am afraid for the world that is to come. 218 | /// -Afraid that my plans will fail. 219 | /// -Afraid of a doom worse than the Deepness. 220 | /// +Afraid that Alendi will fail. 221 | /// +Afraid of a doom brought by the Deepness. 222 | /// "; 223 | /// 224 | /// let patch = create_patch(original, modified); 225 | /// assert_eq!(patch.to_string(), expected); 226 | /// ``` 227 | pub fn create_patch<'a>(original: &'a str, modified: &'a str) -> Patch<'a, str> { 228 | DiffOptions::default().create_patch(original, modified) 229 | } 230 | 231 | /// Create a patch between two potentially non-utf8 texts 232 | pub fn create_patch_bytes<'a>(original: &'a [u8], modified: &'a [u8]) -> Patch<'a, [u8]> { 233 | DiffOptions::default().create_patch_bytes(original, modified) 234 | } 235 | 236 | fn to_hunks<'a, T: ?Sized>( 237 | lines1: &[&'a T], 238 | lines2: &[&'a T], 239 | solution: &[DiffRange<[u64]>], 240 | context_len: usize, 241 | ) -> Vec> { 242 | let edit_script = build_edit_script(solution); 243 | 244 | let mut hunks = Vec::new(); 245 | 246 | let mut idx = 0; 247 | while let Some(mut script) = edit_script.get(idx) { 248 | let start1 = script.old.start.saturating_sub(context_len); 249 | let start2 = script.new.start.saturating_sub(context_len); 250 | 251 | let (mut end1, mut end2) = calc_end( 252 | context_len, 253 | lines1.len(), 254 | lines2.len(), 255 | script.old.end, 256 | script.new.end, 257 | ); 258 | 259 | let mut lines = Vec::new(); 260 | 261 | // Pre-context 262 | for line in lines2.get(start2..script.new.start).into_iter().flatten() { 263 | lines.push(Line::Context(*line)); 264 | } 265 | 266 | loop { 267 | // Delete lines from text1 268 | for line in lines1.get(script.old.clone()).into_iter().flatten() { 269 | lines.push(Line::Delete(*line)); 270 | } 271 | 272 | // Insert lines from text2 273 | for line in lines2.get(script.new.clone()).into_iter().flatten() { 274 | lines.push(Line::Insert(*line)); 275 | } 276 | 277 | if let Some(s) = edit_script.get(idx + 1) { 278 | // Check to see if we can merge the hunks 279 | let start1_next = 280 | cmp::min(s.old.start, lines1.len() - 1).saturating_sub(context_len); 281 | if start1_next < end1 { 282 | // Context lines between hunks 283 | for (_i1, i2) in (script.old.end..s.old.start).zip(script.new.end..s.new.start) 284 | { 285 | if let Some(line) = lines2.get(i2) { 286 | lines.push(Line::Context(*line)); 287 | } 288 | } 289 | 290 | // Calc the new end 291 | let (e1, e2) = calc_end( 292 | context_len, 293 | lines1.len(), 294 | lines2.len(), 295 | s.old.end, 296 | s.new.end, 297 | ); 298 | 299 | end1 = e1; 300 | end2 = e2; 301 | script = s; 302 | idx += 1; 303 | continue; 304 | } 305 | } 306 | 307 | break; 308 | } 309 | 310 | // Post-context 311 | for line in lines2.get(script.new.end..end2).into_iter().flatten() { 312 | lines.push(Line::Context(*line)); 313 | } 314 | 315 | let len1 = end1 - start1; 316 | let old_range = HunkRange::new(if len1 > 0 { start1 + 1 } else { start1 }, len1); 317 | 318 | let len2 = end2 - start2; 319 | let new_range = HunkRange::new(if len2 > 0 { start2 + 1 } else { start2 }, len2); 320 | 321 | hunks.push(Hunk::new(old_range, new_range, None, lines)); 322 | idx += 1; 323 | } 324 | 325 | hunks 326 | } 327 | 328 | fn calc_end( 329 | context_len: usize, 330 | text1_len: usize, 331 | text2_len: usize, 332 | script1_end: usize, 333 | script2_end: usize, 334 | ) -> (usize, usize) { 335 | let post_context_len = cmp::min( 336 | context_len, 337 | cmp::min( 338 | text1_len.saturating_sub(script1_end), 339 | text2_len.saturating_sub(script2_end), 340 | ), 341 | ); 342 | 343 | let end1 = script1_end + post_context_len; 344 | let end2 = script2_end + post_context_len; 345 | 346 | (end1, end2) 347 | } 348 | 349 | #[derive(Debug)] 350 | struct EditRange { 351 | old: ops::Range, 352 | new: ops::Range, 353 | } 354 | 355 | impl EditRange { 356 | fn new(old: ops::Range, new: ops::Range) -> Self { 357 | Self { old, new } 358 | } 359 | } 360 | 361 | fn build_edit_script(solution: &[DiffRange<[T]>]) -> Vec { 362 | let mut idx_a = 0; 363 | let mut idx_b = 0; 364 | 365 | let mut edit_script: Vec = Vec::new(); 366 | let mut script = None; 367 | 368 | for diff in solution { 369 | match diff { 370 | DiffRange::Equal(range1, range2) => { 371 | idx_a += range1.len(); 372 | idx_b += range2.len(); 373 | if let Some(script) = script.take() { 374 | edit_script.push(script); 375 | } 376 | } 377 | DiffRange::Delete(range) => { 378 | match &mut script { 379 | Some(s) => s.old.end += range.len(), 380 | None => { 381 | script = Some(EditRange::new(idx_a..idx_a + range.len(), idx_b..idx_b)); 382 | } 383 | } 384 | idx_a += range.len(); 385 | } 386 | DiffRange::Insert(range) => { 387 | match &mut script { 388 | Some(s) => s.new.end += range.len(), 389 | None => { 390 | script = Some(EditRange::new(idx_a..idx_a, idx_b..idx_b + range.len())); 391 | } 392 | } 393 | idx_b += range.len(); 394 | } 395 | } 396 | } 397 | 398 | if let Some(script) = script.take() { 399 | edit_script.push(script); 400 | } 401 | 402 | edit_script 403 | } 404 | 405 | #[cfg(test)] 406 | mod test { 407 | use super::DiffOptions; 408 | 409 | #[test] 410 | fn set_original_and_modified_filenames() { 411 | let original = "\ 412 | I am afraid, however, that all I have known - that my story - will be forgotten. 413 | I am afraid for the world that is to come. 414 | Afraid that my plans will fail. 415 | Afraid of a doom worse than the Deepness. 416 | "; 417 | let modified = "\ 418 | I am afraid, however, that all I have known - that my story - will be forgotten. 419 | I am afraid for the world that is to come. 420 | Afraid that Alendi will fail. 421 | Afraid of a doom brought by the Deepness. 422 | "; 423 | let expected = "\ 424 | --- the old version 425 | +++ the better version 426 | @@ -1,4 +1,4 @@ 427 | I am afraid, however, that all I have known - that my story - will be forgotten. 428 | I am afraid for the world that is to come. 429 | -Afraid that my plans will fail. 430 | -Afraid of a doom worse than the Deepness. 431 | +Afraid that Alendi will fail. 432 | +Afraid of a doom brought by the Deepness. 433 | "; 434 | 435 | let patch = DiffOptions::new() 436 | .set_original_filename("the old version") 437 | .set_modified_filename("the better version") 438 | .create_patch(original, modified); 439 | 440 | assert_eq!(patch.to_string(), expected); 441 | } 442 | } 443 | -------------------------------------------------------------------------------- /src/diff/myers.rs: -------------------------------------------------------------------------------- 1 | use crate::range::{DiffRange, Range}; 2 | use std::ops::{Index, IndexMut}; 3 | 4 | // A D-path is a path which starts at (0,0) that has exactly D non-diagonal edges. All D-paths 5 | // consist of a (D - 1)-path followed by a non-diagonal edge and then a possibly empty sequence of 6 | // diagonal edges called a snake. 7 | 8 | /// `V` contains the endpoints of the furthest reaching `D-paths`. For each recorded endpoint 9 | /// `(x,y)` in diagonal `k`, we only need to retain `x` because `y` can be computed from `x - k`. 10 | /// In other words, `V` is an array of integers where `V[k]` contains the row index of the endpoint 11 | /// of the furthest reaching path in diagonal `k`. 12 | /// 13 | /// We can't use a traditional Vec to represent `V` since we use `k` as an index and it can take on 14 | /// negative values. So instead `V` is represented as a light-weight wrapper around a Vec plus an 15 | /// `offset` which is the maximum value `k` can take on in order to map negative `k`'s back to a 16 | /// value >= 0. 17 | #[derive(Debug, Clone)] 18 | struct V { 19 | offset: isize, 20 | v: Vec, // Look into initializing this to -1 and storing isize 21 | } 22 | 23 | impl V { 24 | fn new(max_d: usize) -> Self { 25 | Self { 26 | offset: max_d as isize, 27 | v: vec![0; 2 * max_d], 28 | } 29 | } 30 | 31 | fn len(&self) -> usize { 32 | self.v.len() 33 | } 34 | } 35 | 36 | impl Index for V { 37 | type Output = usize; 38 | 39 | fn index(&self, index: isize) -> &Self::Output { 40 | &self.v[(index + self.offset) as usize] 41 | } 42 | } 43 | 44 | impl IndexMut for V { 45 | fn index_mut(&mut self, index: isize) -> &mut Self::Output { 46 | &mut self.v[(index + self.offset) as usize] 47 | } 48 | } 49 | 50 | /// A `Snake` is a sequence of diagonal edges in the edit graph. It is possible for a snake to have 51 | /// a length of zero, meaning the start and end points are the same. 52 | #[derive(Debug)] 53 | struct Snake { 54 | x_start: usize, 55 | y_start: usize, 56 | x_end: usize, 57 | y_end: usize, 58 | } 59 | 60 | impl ::std::fmt::Display for Snake { 61 | fn fmt(&self, f: &mut ::std::fmt::Formatter<'_>) -> ::std::fmt::Result { 62 | write!( 63 | f, 64 | "({}, {}) -> ({}, {})", 65 | self.x_start, self.y_start, self.x_end, self.y_end 66 | ) 67 | } 68 | } 69 | 70 | fn max_d(len1: usize, len2: usize) -> usize { 71 | // XXX look into reducing the need to have the additional '+ 1' 72 | (len1 + len2 + 1) / 2 + 1 73 | } 74 | 75 | // The divide part of a divide-and-conquer strategy. A D-path has D+1 snakes some of which may 76 | // be empty. The divide step requires finding the ceil(D/2) + 1 or middle snake of an optimal 77 | // D-path. The idea for doing so is to simultaneously run the basic algorithm in both the 78 | // forward and reverse directions until furthest reaching forward and reverse paths starting at 79 | // opposing corners 'overlap'. 80 | fn find_middle_snake( 81 | old: Range<'_, [T]>, 82 | new: Range<'_, [T]>, 83 | vf: &mut V, 84 | vb: &mut V, 85 | ) -> (isize, Snake) { 86 | let n = old.len(); 87 | let m = new.len(); 88 | 89 | // By Lemma 1 in the paper, the optimal edit script length is odd or even as `delta` is odd 90 | // or even. 91 | let delta = n as isize - m as isize; 92 | let odd = delta & 1 == 1; 93 | 94 | // The initial point at (0, -1) 95 | vf[1] = 0; 96 | // The initial point at (N, M+1) 97 | vb[1] = 0; 98 | 99 | // We only need to explore ceil(D/2) + 1 100 | let d_max = max_d(n, m); 101 | assert!(vf.len() >= d_max); 102 | assert!(vb.len() >= d_max); 103 | 104 | for d in 0..d_max as isize { 105 | // Forward path 106 | for k in (-d..=d).rev().step_by(2) { 107 | let mut x = if k == -d || (k != d && vf[k - 1] < vf[k + 1]) { 108 | vf[k + 1] 109 | } else { 110 | vf[k - 1] + 1 111 | }; 112 | let mut y = (x as isize - k) as usize; 113 | 114 | // The coordinate of the start of a snake 115 | let (x0, y0) = (x, y); 116 | // While these sequences are identical, keep moving through the graph with no cost 117 | if let (Some(s1), Some(s2)) = (old.get(x..), new.get(y..)) { 118 | let advance = s1.common_prefix_len(s2); 119 | x += advance; 120 | y += advance; 121 | } 122 | 123 | // This is the new best x value 124 | vf[k] = x; 125 | // Only check for connections from the forward search when N - M is odd 126 | // and when there is a reciprocal k line coming from the other direction. 127 | if odd && (k - delta).abs() <= (d - 1) { 128 | // TODO optimize this so we don't have to compare against n 129 | if vf[k] + vb[-(k - delta)] >= n { 130 | // Return the snake 131 | let snake = Snake { 132 | x_start: x0, 133 | y_start: y0, 134 | x_end: x, 135 | y_end: y, 136 | }; 137 | // Edit distance to this snake is `2 * d - 1` 138 | return (2 * d - 1, snake); 139 | } 140 | } 141 | } 142 | 143 | // Backward path 144 | for k in (-d..=d).rev().step_by(2) { 145 | let mut x = if k == -d || (k != d && vb[k - 1] < vb[k + 1]) { 146 | vb[k + 1] 147 | } else { 148 | vb[k - 1] + 1 149 | }; 150 | let mut y = (x as isize - k) as usize; 151 | 152 | // The coordinate of the start of a snake 153 | let (x0, y0) = (x, y); 154 | if x < n && y < m { 155 | let advance = old.slice(..n - x).common_suffix_len(new.slice(..m - y)); 156 | x += advance; 157 | y += advance; 158 | } 159 | 160 | // This is the new best x value 161 | vb[k] = x; 162 | 163 | if !odd && (k - delta).abs() <= d { 164 | // TODO optimize this so we don't have to compare against n 165 | if vb[k] + vf[-(k - delta)] >= n { 166 | // Return the snake 167 | let snake = Snake { 168 | x_start: n - x, 169 | y_start: m - y, 170 | x_end: n - x0, 171 | y_end: m - y0, 172 | }; 173 | // Edit distance to this snake is `2 * d` 174 | return (2 * d, snake); 175 | } 176 | } 177 | } 178 | 179 | // TODO: Maybe there's an opportunity to optimize and bail early? 180 | } 181 | 182 | unreachable!("unable to find a middle snake"); 183 | } 184 | 185 | fn conquer<'a, 'b, T: PartialEq>( 186 | mut old: Range<'a, [T]>, 187 | mut new: Range<'b, [T]>, 188 | vf: &mut V, 189 | vb: &mut V, 190 | solution: &mut Vec>, 191 | ) { 192 | // Check for common prefix 193 | let common_prefix_len = old.common_prefix_len(new); 194 | if common_prefix_len > 0 { 195 | let common_prefix = DiffRange::Equal( 196 | old.slice(..common_prefix_len), 197 | new.slice(..common_prefix_len), 198 | ); 199 | solution.push(common_prefix); 200 | } 201 | 202 | old = old.slice(common_prefix_len..old.len()); 203 | new = new.slice(common_prefix_len..new.len()); 204 | 205 | // Check for common suffix 206 | let common_suffix_len = old.common_suffix_len(new); 207 | let common_suffix = DiffRange::Equal( 208 | old.slice(old.len() - common_suffix_len..), 209 | new.slice(new.len() - common_suffix_len..), 210 | ); 211 | old = old.slice(..old.len() - common_suffix_len); 212 | new = new.slice(..new.len() - common_suffix_len); 213 | 214 | if old.is_empty() && new.is_empty() { 215 | // Do nothing 216 | } else if old.is_empty() { 217 | // Inserts 218 | solution.push(DiffRange::Insert(new)); 219 | } else if new.is_empty() { 220 | // Deletes 221 | solution.push(DiffRange::Delete(old)); 222 | } else { 223 | // Divide & Conquer 224 | let (_shortest_edit_script_len, snake) = find_middle_snake(old, new, vf, vb); 225 | 226 | let (old_a, old_b) = old.split_at(snake.x_start); 227 | let (new_a, new_b) = new.split_at(snake.y_start); 228 | 229 | conquer(old_a, new_a, vf, vb, solution); 230 | conquer(old_b, new_b, vf, vb, solution); 231 | } 232 | 233 | if common_suffix_len > 0 { 234 | solution.push(common_suffix); 235 | } 236 | } 237 | 238 | pub fn diff<'a, 'b, T: PartialEq>(old: &'a [T], new: &'b [T]) -> Vec> { 239 | let old_recs = Range::new(old, ..); 240 | let new_recs = Range::new(new, ..); 241 | 242 | let mut solution = Vec::new(); 243 | 244 | // The arrays that hold the 'best possible x values' in search from: 245 | // `vf`: top left to bottom right 246 | // `vb`: bottom right to top left 247 | let max_d = max_d(old.len(), new.len()); 248 | let mut vf = V::new(max_d); 249 | let mut vb = V::new(max_d); 250 | 251 | conquer(old_recs, new_recs, &mut vf, &mut vb, &mut solution); 252 | 253 | solution 254 | } 255 | 256 | #[cfg(test)] 257 | mod tests { 258 | use super::*; 259 | 260 | #[test] 261 | fn test_find_middle_snake() { 262 | let a = Range::new(&b"ABCABBA"[..], ..); 263 | let b = Range::new(&b"CBABAC"[..], ..); 264 | let max_d = max_d(a.len(), b.len()); 265 | let mut vf = V::new(max_d); 266 | let mut vb = V::new(max_d); 267 | find_middle_snake(a, b, &mut vf, &mut vb); 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /src/diff/tests.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use crate::{ 3 | apply::apply, 4 | diff::{Diff, DiffRange}, 5 | patch::Patch, 6 | range::Range, 7 | PatchFormatter, 8 | }; 9 | 10 | // Helper macros are based off of the ones used in [dissimilar](https://docs.rs/dissimilar) 11 | macro_rules! diff_range_list { 12 | () => { 13 | Vec::new() 14 | }; 15 | ($($kind:ident($text:literal)),+ $(,)?) => {{ 16 | macro_rules! text1 { 17 | (Insert, $s:literal) => { "" }; 18 | (Delete, $s:literal) => { $s }; 19 | (Equal, $s:literal) => { $s }; 20 | } 21 | macro_rules! text2 { 22 | (Insert, $s:literal) => { $s }; 23 | (Delete, $s:literal) => { "" }; 24 | (Equal, $s:literal) => { $s }; 25 | } 26 | let _text1 = concat!($(text1!($kind, $text)),*); 27 | let _text2 = concat!($(text2!($kind, $text)),*); 28 | let (_i, _j) = (&mut 0, &mut 0); 29 | macro_rules! range { 30 | (Insert, $s:literal) => { 31 | DiffRange::Insert(range(_text2, _j, $s)) 32 | }; 33 | (Delete, $s:literal) => { 34 | DiffRange::Delete(range(_text1, _i, $s)) 35 | }; 36 | (Equal, $s:literal) => { 37 | DiffRange::Equal(range(_text1, _i, $s), range(_text2, _j, $s)) 38 | }; 39 | } 40 | vec![$(range!($kind, $text)),*] 41 | }}; 42 | } 43 | 44 | fn range<'a>(doc: &'a str, offset: &mut usize, text: &str) -> Range<'a, str> { 45 | let range = Range::new(doc, *offset..*offset + text.len()); 46 | *offset += text.len(); 47 | range 48 | } 49 | 50 | macro_rules! assert_diff_range { 51 | ([$($kind:ident($text:literal)),* $(,)?], $solution:ident $(,)?) => { 52 | let expected = &[$(Diff::$kind($text)),*]; 53 | assert!( 54 | same_diffs(expected, &$solution), 55 | concat!("\nexpected={:#?}\nactual={:#?}"), 56 | expected, $solution, 57 | ); 58 | }; 59 | ([$($kind:ident($text:literal)),* $(,)?], $solution:ident, $msg:expr $(,)?) => { 60 | let expected = &[$(Diff::$kind($text)),*]; 61 | assert!( 62 | same_diffs(expected, &$solution), 63 | concat!($msg, "\nexpected={:#?}\nactual={:#?}"), 64 | expected, $solution, 65 | ); 66 | }; 67 | } 68 | 69 | fn same_diffs(expected: &[Diff], actual: &[DiffRange]) -> bool { 70 | expected.len() == actual.len() 71 | && expected.iter().zip(actual).all(|pair| match pair { 72 | (Diff::Insert(expected), DiffRange::Insert(actual)) => *expected == actual.as_slice(), 73 | (Diff::Delete(expected), DiffRange::Delete(actual)) => *expected == actual.as_slice(), 74 | (Diff::Equal(expected), DiffRange::Equal(actual1, actual2)) => { 75 | *expected == actual1.as_slice() && *expected == actual2.as_slice() 76 | } 77 | (_, _) => false, 78 | }) 79 | } 80 | 81 | macro_rules! assert_diff { 82 | ([$($kind:ident($text:literal)),* $(,)?], $solution:ident $(,)?) => { 83 | let expected: &[_] = &[$(Diff::$kind($text)),*]; 84 | assert_eq!( 85 | expected, 86 | &$solution[..], 87 | concat!("\nexpected={:#?}\nactual={:#?}"), 88 | expected, $solution, 89 | ); 90 | }; 91 | ([$($kind:ident($text:literal)),* $(,)?], $solution:ident, $msg:expr $(,)?) => { 92 | let expected: &[_] = &[$(Diff::$kind($text)),*]; 93 | assert_eq!( 94 | expected, 95 | &$solution[..], 96 | concat!($msg, "\nexpected={:#?}\nactual={:#?}"), 97 | expected, $solution, 98 | ); 99 | }; 100 | } 101 | 102 | #[test] 103 | fn test_diff_str() { 104 | let a = "ABCABBA"; 105 | let b = "CBABAC"; 106 | let solution = diff(a, b); 107 | assert_diff!( 108 | [ 109 | Delete("AB"), 110 | Equal("C"), 111 | Delete("A"), 112 | Equal("B"), 113 | Insert("A"), 114 | Equal("BA"), 115 | Insert("C"), 116 | ], 117 | solution, 118 | ); 119 | 120 | let a = "abgdef"; 121 | let b = "gh"; 122 | let solution = diff(a, b); 123 | assert_diff!( 124 | [Delete("ab"), Equal("g"), Delete("def"), Insert("h")], 125 | solution, 126 | ); 127 | 128 | let a = "bat"; 129 | let b = "map"; 130 | let solution = diff(a, b); 131 | assert_diff!( 132 | [ 133 | Delete("b"), 134 | Insert("m"), 135 | Equal("a"), 136 | Delete("t"), 137 | Insert("p"), 138 | ], 139 | solution, 140 | ); 141 | 142 | let a = "ACZBDZ"; 143 | let b = "ACBCBDEFD"; 144 | let solution = diff(a, b); 145 | assert_diff!( 146 | [ 147 | Equal("AC"), 148 | Delete("Z"), 149 | Equal("B"), 150 | Insert("CBDEF"), 151 | Equal("D"), 152 | Delete("Z"), 153 | ], 154 | solution, 155 | ); 156 | 157 | let a = "1A "; 158 | let b = "1A B A 2"; 159 | let solution = diff(a, b); 160 | assert_diff!([Equal("1A "), Insert("B A 2")], solution); 161 | 162 | let a = "ACBD"; 163 | let b = "ACBCBDEFD"; 164 | let solution = diff(a, b); 165 | assert_diff!([Equal("ACB"), Insert("CBDEF"), Equal("D")], solution); 166 | 167 | let a = "abc"; 168 | let b = "def"; 169 | let solution = diff(a, b); 170 | assert_diff!([Delete("abc"), Insert("def")], solution, "No Equal"); 171 | } 172 | 173 | #[test] 174 | fn test_diff_slice() { 175 | let a = b"bat"; 176 | let b = b"map"; 177 | let solution = DiffOptions::default().diff_slice(a, b); 178 | let solution: Vec<_> = solution.into_iter().map(Diff::from).collect(); 179 | let expected: Vec> = vec![ 180 | Diff::Delete(b"b"), 181 | Diff::Insert(b"m"), 182 | Diff::Equal(b"a"), 183 | Diff::Delete(b"t"), 184 | Diff::Insert(b"p"), 185 | ]; 186 | assert_eq!(solution, expected); 187 | } 188 | 189 | #[test] 190 | fn test_unicode() { 191 | // Unicode snowman and unicode comet have the same first two bytes. A 192 | // byte-based diff would produce a 2-byte Equal followed by 1-byte Delete 193 | // and Insert. 194 | let snowman = "\u{2603}"; 195 | let comet = "\u{2604}"; 196 | assert_eq!(snowman.as_bytes()[..2], comet.as_bytes()[..2]); 197 | 198 | let d = diff(snowman, comet); 199 | assert_eq!(d, vec![Diff::Delete(snowman), Diff::Insert(comet)]); 200 | } 201 | 202 | #[test] 203 | fn test_compact() { 204 | let mut solution = diff_range_list![]; 205 | cleanup::compact(&mut solution); 206 | assert_diff_range!([], solution, "Null case"); 207 | 208 | let mut solution = diff_range_list![Equal("a"), Delete("b"), Insert("c")]; 209 | cleanup::compact(&mut solution); 210 | assert_diff_range!( 211 | [Equal("a"), Delete("b"), Insert("c")], 212 | solution, 213 | "No change case", 214 | ); 215 | 216 | // TODO implement equality compaction 217 | // let mut solution = diff_range_list![Equal("a"), Equal("b"), Equal("c")]; 218 | // cleanup::compact(&mut solution); 219 | // assert_diff_range!([Equal("abc")], solution, "Compact equalities"); 220 | 221 | let mut solution = diff_range_list![Delete("a"), Delete("b"), Delete("c")]; 222 | cleanup::compact(&mut solution); 223 | assert_diff_range!([Delete("abc")], solution, "Compact deletions"); 224 | 225 | let mut solution = diff_range_list![Insert("a"), Insert("b"), Insert("c")]; 226 | cleanup::compact(&mut solution); 227 | assert_diff_range!([Insert("abc")], solution, "Compact Insertions"); 228 | 229 | let mut solution = diff_range_list![ 230 | Delete("a"), 231 | Insert("b"), 232 | Delete("c"), 233 | Insert("d"), 234 | Equal("ef"), 235 | ]; 236 | cleanup::compact(&mut solution); 237 | assert_diff_range!( 238 | [Delete("ac"), Insert("bd"), Equal("ef")], 239 | solution, 240 | "Compact interweave", 241 | ); 242 | 243 | let mut solution = diff_range_list![ 244 | Equal("a"), 245 | Delete("b"), 246 | Equal("c"), 247 | Delete("ac"), 248 | Equal("x"), 249 | ]; 250 | cleanup::compact(&mut solution); 251 | assert_diff_range!( 252 | [Equal("a"), Delete("bca"), Equal("cx")], 253 | solution, 254 | "Slide edit left", 255 | ); 256 | 257 | let mut solution = diff_range_list![ 258 | Equal("x"), 259 | Delete("ca"), 260 | Equal("c"), 261 | Delete("b"), 262 | Equal("a"), 263 | ]; 264 | cleanup::compact(&mut solution); 265 | assert_diff_range!([Equal("xca"), Delete("cba")], solution, "Slide edit right"); 266 | 267 | let mut solution = diff_range_list![Equal(""), Insert("a"), Equal("b")]; 268 | cleanup::compact(&mut solution); 269 | assert_diff_range!([Insert("a"), Equal("b")], solution, "Empty equality"); 270 | 271 | let mut solution = diff_range_list![Equal("1"), Insert("A B "), Equal("A "), Insert("2")]; 272 | 273 | cleanup::compact(&mut solution); 274 | assert_diff_range!([Equal("1A "), Insert("B A 2")], solution); 275 | 276 | let mut solution = diff_range_list![Equal("AC"), Insert("BC"), Equal("BD"), Insert("EFD")]; 277 | cleanup::compact(&mut solution); 278 | 279 | assert_diff_range!([Equal("ACB"), Insert("CBDEF"), Equal("D")], solution); 280 | 281 | let mut solution = diff_range_list![ 282 | Equal("AC"), 283 | Delete("Z"), 284 | Insert("BC"), 285 | Equal("BD"), 286 | Delete("Z"), 287 | Insert("EFD"), 288 | ]; 289 | 290 | cleanup::compact(&mut solution); 291 | assert_diff_range!( 292 | [ 293 | Equal("AC"), 294 | Delete("Z"), 295 | Equal("B"), 296 | Insert("CBDEF"), 297 | Equal("D"), 298 | Delete("Z"), 299 | ], 300 | solution, 301 | "Compact Inserts" 302 | ); 303 | 304 | let mut solution = diff_range_list![ 305 | Equal("AC"), 306 | Insert("Z"), 307 | Delete("BC"), 308 | Equal("BD"), 309 | Insert("Z"), 310 | Delete("EFD"), 311 | ]; 312 | cleanup::compact(&mut solution); 313 | assert_diff_range!( 314 | [ 315 | Equal("AC"), 316 | Insert("Z"), 317 | Equal("B"), 318 | Delete("CBDEF"), 319 | Equal("D"), 320 | Insert("Z"), 321 | ], 322 | solution, 323 | "Compact Deletions" 324 | ); 325 | } 326 | 327 | macro_rules! assert_patch { 328 | ($diff_options:expr, $old:ident, $new:ident, $expected:ident $(,)?) => { 329 | let patch = $diff_options.create_patch($old, $new); 330 | let bpatch = $diff_options.create_patch_bytes($old.as_bytes(), $new.as_bytes()); 331 | let patch_str = patch.to_string(); 332 | let patch_bytes = bpatch.to_bytes(); 333 | assert_eq!(patch_str, $expected); 334 | assert_eq!(patch_bytes, patch_str.as_bytes()); 335 | assert_eq!(patch_bytes, $expected.as_bytes()); 336 | assert_eq!(Patch::from_str($expected).unwrap(), patch); 337 | assert_eq!(Patch::from_str(&patch_str).unwrap(), patch); 338 | assert_eq!(Patch::from_bytes($expected.as_bytes()).unwrap(), bpatch); 339 | assert_eq!(Patch::from_bytes(&patch_bytes).unwrap(), bpatch); 340 | assert_eq!(apply($old, &patch).unwrap(), $new); 341 | assert_eq!( 342 | crate::apply_bytes($old.as_bytes(), &bpatch).unwrap(), 343 | $new.as_bytes() 344 | ); 345 | }; 346 | ($old:ident, $new:ident, $expected:ident $(,)?) => { 347 | assert_patch!(DiffOptions::default(), $old, $new, $expected); 348 | }; 349 | } 350 | 351 | #[test] 352 | fn diff_str() { 353 | let a = "A\nB\nC\nA\nB\nB\nA\n"; 354 | let b = "C\nB\nA\nB\nA\nC\n"; 355 | let expected = "\ 356 | --- original 357 | +++ modified 358 | @@ -1,7 +1,6 @@ 359 | -A 360 | -B 361 | C 362 | -A 363 | B 364 | +A 365 | B 366 | A 367 | +C 368 | "; 369 | 370 | assert_patch!(a, b, expected); 371 | } 372 | 373 | #[test] 374 | fn sample() { 375 | let mut opts = DiffOptions::default(); 376 | let lao = "\ 377 | The Way that can be told of is not the eternal Way; 378 | The name that can be named is not the eternal name. 379 | The Nameless is the origin of Heaven and Earth; 380 | The Named is the mother of all things. 381 | Therefore let there always be non-being, 382 | so we may see their subtlety, 383 | And let there always be being, 384 | so we may see their outcome. 385 | The two are the same, 386 | But after they are produced, 387 | they have different names. 388 | "; 389 | 390 | let tzu = "\ 391 | The Nameless is the origin of Heaven and Earth; 392 | The named is the mother of all things. 393 | 394 | Therefore let there always be non-being, 395 | so we may see their subtlety, 396 | And let there always be being, 397 | so we may see their outcome. 398 | The two are the same, 399 | But after they are produced, 400 | they have different names. 401 | They both may be called deep and profound. 402 | Deeper and more profound, 403 | The door of all subtleties! 404 | "; 405 | 406 | let expected = "\ 407 | --- original 408 | +++ modified 409 | @@ -1,7 +1,6 @@ 410 | -The Way that can be told of is not the eternal Way; 411 | -The name that can be named is not the eternal name. 412 | The Nameless is the origin of Heaven and Earth; 413 | -The Named is the mother of all things. 414 | +The named is the mother of all things. 415 | + 416 | Therefore let there always be non-being, 417 | so we may see their subtlety, 418 | And let there always be being, 419 | @@ -9,3 +8,6 @@ 420 | The two are the same, 421 | But after they are produced, 422 | they have different names. 423 | +They both may be called deep and profound. 424 | +Deeper and more profound, 425 | +The door of all subtleties! 426 | "; 427 | 428 | assert_patch!(opts, lao, tzu, expected); 429 | 430 | let expected = "\ 431 | --- original 432 | +++ modified 433 | @@ -1,2 +0,0 @@ 434 | -The Way that can be told of is not the eternal Way; 435 | -The name that can be named is not the eternal name. 436 | @@ -4 +2,2 @@ 437 | -The Named is the mother of all things. 438 | +The named is the mother of all things. 439 | + 440 | @@ -11,0 +11,3 @@ 441 | +They both may be called deep and profound. 442 | +Deeper and more profound, 443 | +The door of all subtleties! 444 | "; 445 | opts.set_context_len(0); 446 | assert_patch!(opts, lao, tzu, expected); 447 | 448 | let expected = "\ 449 | --- original 450 | +++ modified 451 | @@ -1,5 +1,4 @@ 452 | -The Way that can be told of is not the eternal Way; 453 | -The name that can be named is not the eternal name. 454 | The Nameless is the origin of Heaven and Earth; 455 | -The Named is the mother of all things. 456 | +The named is the mother of all things. 457 | + 458 | Therefore let there always be non-being, 459 | @@ -11 +10,4 @@ 460 | they have different names. 461 | +They both may be called deep and profound. 462 | +Deeper and more profound, 463 | +The door of all subtleties! 464 | "; 465 | opts.set_context_len(1); 466 | assert_patch!(opts, lao, tzu, expected); 467 | } 468 | 469 | #[test] 470 | fn no_newline_at_eof() { 471 | let old = "old line"; 472 | let new = "new line"; 473 | let expected = "\ 474 | --- original 475 | +++ modified 476 | @@ -1 +1 @@ 477 | -old line 478 | \\ No newline at end of file 479 | +new line 480 | \\ No newline at end of file 481 | "; 482 | assert_patch!(old, new, expected); 483 | 484 | let old = "old line\n"; 485 | let new = "new line"; 486 | let expected = "\ 487 | --- original 488 | +++ modified 489 | @@ -1 +1 @@ 490 | -old line 491 | +new line 492 | \\ No newline at end of file 493 | "; 494 | assert_patch!(old, new, expected); 495 | 496 | let old = "old line"; 497 | let new = "new line\n"; 498 | let expected = "\ 499 | --- original 500 | +++ modified 501 | @@ -1 +1 @@ 502 | -old line 503 | \\ No newline at end of file 504 | +new line 505 | "; 506 | assert_patch!(old, new, expected); 507 | 508 | let old = "old line\ncommon line"; 509 | let new = "new line\ncommon line"; 510 | let expected = "\ 511 | --- original 512 | +++ modified 513 | @@ -1,2 +1,2 @@ 514 | -old line 515 | +new line 516 | common line 517 | \\ No newline at end of file 518 | "; 519 | assert_patch!(old, new, expected); 520 | } 521 | 522 | #[test] 523 | fn without_no_newline_at_eof_message() { 524 | let old = "old line"; 525 | let new = "new line"; 526 | let expected = "\ 527 | --- original 528 | +++ modified 529 | @@ -1 +1 @@ 530 | -old line 531 | +new line 532 | "; 533 | 534 | let f = PatchFormatter::new().missing_newline_message(false); 535 | let patch = create_patch(old, new); 536 | let bpatch = create_patch_bytes(old.as_bytes(), new.as_bytes()); 537 | let patch_str = format!("{}", f.fmt_patch(&patch)); 538 | let mut patch_bytes = Vec::new(); 539 | f.write_patch_into(&bpatch, &mut patch_bytes).unwrap(); 540 | 541 | assert_eq!(patch_str, expected); 542 | assert_eq!(patch_bytes, patch_str.as_bytes()); 543 | assert_eq!(patch_bytes, expected.as_bytes()); 544 | assert_eq!(apply(old, &patch).unwrap(), new); 545 | assert_eq!( 546 | crate::apply_bytes(old.as_bytes(), &bpatch).unwrap(), 547 | new.as_bytes() 548 | ); 549 | } 550 | 551 | #[test] 552 | fn myers_diffy_vs_git() { 553 | let original = "\ 554 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 555 | { 556 | if (!Chunk_bounds_check(src, src_start, n)) return; 557 | if (!Chunk_bounds_check(dst, dst_start, n)) return; 558 | 559 | memcpy(dst->data + dst_start, src->data + src_start, n); 560 | } 561 | 562 | int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 563 | { 564 | if (chunk == NULL) return 0; 565 | 566 | return start <= chunk->length && n <= chunk->length - start; 567 | } 568 | "; 569 | let a = "\ 570 | int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 571 | { 572 | if (chunk == NULL) return 0; 573 | 574 | return start <= chunk->length && n <= chunk->length - start; 575 | } 576 | 577 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 578 | { 579 | if (!Chunk_bounds_check(src, src_start, n)) return; 580 | if (!Chunk_bounds_check(dst, dst_start, n)) return; 581 | 582 | memcpy(dst->data + dst_start, src->data + src_start, n); 583 | } 584 | "; 585 | 586 | // TODO This differs from the expected output when using git's myers algorithm 587 | let expected_git = "\ 588 | --- original 589 | +++ modified 590 | @@ -1,14 +1,14 @@ 591 | -void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 592 | +int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 593 | { 594 | - if (!Chunk_bounds_check(src, src_start, n)) return; 595 | - if (!Chunk_bounds_check(dst, dst_start, n)) return; 596 | + if (chunk == NULL) return 0; 597 | 598 | - memcpy(dst->data + dst_start, src->data + src_start, n); 599 | + return start <= chunk->length && n <= chunk->length - start; 600 | } 601 | 602 | -int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 603 | +void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 604 | { 605 | - if (chunk == NULL) return 0; 606 | + if (!Chunk_bounds_check(src, src_start, n)) return; 607 | + if (!Chunk_bounds_check(dst, dst_start, n)) return; 608 | 609 | - return start <= chunk->length && n <= chunk->length - start; 610 | + memcpy(dst->data + dst_start, src->data + src_start, n); 611 | } 612 | "; 613 | let git_patch = Patch::from_str(expected_git).unwrap(); 614 | assert_eq!(apply(original, &git_patch).unwrap(), a); 615 | 616 | let expected_diffy = "\ 617 | --- original 618 | +++ modified 619 | @@ -1,3 +1,10 @@ 620 | +int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 621 | +{ 622 | + if (chunk == NULL) return 0; 623 | + 624 | + return start <= chunk->length && n <= chunk->length - start; 625 | +} 626 | + 627 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 628 | { 629 | if (!Chunk_bounds_check(src, src_start, n)) return; 630 | @@ -5,10 +12,3 @@ 631 | 632 | memcpy(dst->data + dst_start, src->data + src_start, n); 633 | } 634 | - 635 | -int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 636 | -{ 637 | - if (chunk == NULL) return 0; 638 | - 639 | - return start <= chunk->length && n <= chunk->length - start; 640 | -} 641 | "; 642 | assert_patch!(original, a, expected_diffy); 643 | } 644 | 645 | #[test] 646 | fn suppress_blank_empty() { 647 | let original = "\ 648 | 1 649 | 2 650 | 3 651 | 652 | 4 653 | "; 654 | 655 | let modified = "\ 656 | 1 657 | 2 658 | 3 659 | 660 | 5 661 | "; 662 | 663 | // Note that there is a space " " on the line after 3 664 | let expected = "\ 665 | --- original 666 | +++ modified 667 | @@ -2,4 +2,4 @@ 668 | 2 669 | 3 670 | 671 | -4 672 | +5 673 | "; 674 | 675 | let f = PatchFormatter::new().suppress_blank_empty(false); 676 | let patch = create_patch(original, modified); 677 | let bpatch = create_patch_bytes(original.as_bytes(), modified.as_bytes()); 678 | let patch_str = format!("{}", f.fmt_patch(&patch)); 679 | let mut patch_bytes = Vec::new(); 680 | f.write_patch_into(&bpatch, &mut patch_bytes).unwrap(); 681 | 682 | assert_eq!(patch_str, expected); 683 | assert_eq!(patch_bytes, patch_str.as_bytes()); 684 | assert_eq!(patch_bytes, expected.as_bytes()); 685 | assert_eq!(apply(original, &patch).unwrap(), modified); 686 | assert_eq!( 687 | crate::apply_bytes(original.as_bytes(), &bpatch).unwrap(), 688 | modified.as_bytes() 689 | ); 690 | 691 | // Note that there is no space " " on the line after 3 692 | let expected_suppressed = "\ 693 | --- original 694 | +++ modified 695 | @@ -2,4 +2,4 @@ 696 | 2 697 | 3 698 | 699 | -4 700 | +5 701 | "; 702 | 703 | let f = PatchFormatter::new().suppress_blank_empty(true); 704 | let patch = create_patch(original, modified); 705 | let bpatch = create_patch_bytes(original.as_bytes(), modified.as_bytes()); 706 | let patch_str = format!("{}", f.fmt_patch(&patch)); 707 | let mut patch_bytes = Vec::new(); 708 | f.write_patch_into(&bpatch, &mut patch_bytes).unwrap(); 709 | 710 | assert_eq!(patch_str, expected_suppressed); 711 | assert_eq!(patch_bytes, patch_str.as_bytes()); 712 | assert_eq!(patch_bytes, expected_suppressed.as_bytes()); 713 | assert_eq!(apply(original, &patch).unwrap(), modified); 714 | assert_eq!( 715 | crate::apply_bytes(original.as_bytes(), &bpatch).unwrap(), 716 | modified.as_bytes() 717 | ); 718 | } 719 | 720 | // In the event that a patch has an invalid hunk range we want to ensure that when apply is 721 | // attempting to search for a matching position to apply a hunk that the search algorithm runs in 722 | // time bounded by the length of the original image being patched. Before clamping the search space 723 | // this test would take >200ms and now it runs in roughly ~30us on an M1 laptop. 724 | #[test] 725 | fn apply_with_incorrect_hunk_has_bounded_performance() { 726 | let patch = "\ 727 | @@ -10,6 +1000000,8 @@ 728 | First: 729 | Life before death, 730 | strength before weakness, 731 | journey before destination. 732 | Second: 733 | - I will put the law before all else. 734 | + I swear to seek justice, 735 | + to let it guide me, 736 | + until I find a more perfect Ideal. 737 | "; 738 | 739 | let original = "\ 740 | First: 741 | Life before death, 742 | strength before weakness, 743 | journey before destination. 744 | Second: 745 | I will put the law before all else. 746 | "; 747 | 748 | let expected = "\ 749 | First: 750 | Life before death, 751 | strength before weakness, 752 | journey before destination. 753 | Second: 754 | I swear to seek justice, 755 | to let it guide me, 756 | until I find a more perfect Ideal. 757 | "; 758 | 759 | let patch = Patch::from_str(patch).unwrap(); 760 | 761 | let now = std::time::Instant::now(); 762 | 763 | let result = apply(original, &patch).unwrap(); 764 | 765 | let elapsed = now.elapsed(); 766 | 767 | println!("{:?}", elapsed); 768 | assert!(elapsed < std::time::Duration::from_micros(200)); 769 | 770 | assert_eq!(result, expected); 771 | } 772 | 773 | #[test] 774 | fn reverse_empty_file() { 775 | let p = create_patch("", "make it so"); 776 | let reverse = p.reverse(); 777 | 778 | let hunk_lines = p.hunks().iter().map(|h| h.lines()); 779 | let reverse_hunk_lines = reverse.hunks().iter().map(|h| h.lines()); 780 | 781 | for (lines, reverse_lines) in hunk_lines.zip(reverse_hunk_lines) { 782 | for (line, reverse) in lines.iter().zip(reverse_lines.iter()) { 783 | match line { 784 | l @ Line::Context(_) => assert_eq!(l, reverse), 785 | Line::Delete(d) => assert!(matches!(reverse, Line::Insert(i) if d == i)), 786 | Line::Insert(i) => assert!(matches!(reverse, Line::Delete(d) if d == i)), 787 | } 788 | } 789 | } 790 | 791 | let re_reverse = apply(&apply("", &p).unwrap(), &reverse).unwrap(); 792 | assert_eq!(re_reverse, ""); 793 | } 794 | 795 | #[test] 796 | fn reverse_multi_line_file() { 797 | let original = r"Commander Worf 798 | What do you want this time, Picard?! 799 | Commander Worf how dare you speak to mean that way! 800 | "; 801 | let modified = r"Commander Worf 802 | Yes, Captain Picard? 803 | Commander Worf, you are a valued member of my crew 804 | Why, thank you Captain. As are you. A true warrior. Kupluh! 805 | Kupluh, Indeed 806 | "; 807 | 808 | let p = create_patch(original, modified); 809 | let reverse = p.reverse(); 810 | 811 | let re_reverse = apply(&apply(original, &p).unwrap(), &reverse).unwrap(); 812 | assert_eq!(re_reverse, original); 813 | } 814 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Tools for finding and manipulating differences between files 2 | //! 3 | //! ## Overview 4 | //! 5 | //! This library is intended to be a collection of tools used to find and 6 | //! manipulate differences between files inspired by [LibXDiff] and [GNU 7 | //! Diffutils]. Version control systems like [Git] and [Mercurial] generally 8 | //! communicate differences between two versions of a file using a `diff` or 9 | //! `patch`. 10 | //! 11 | //! The current diff implementation is based on the [Myers' diff algorithm]. 12 | //! 13 | //! The documentation generally refers to "files" in many places but none of 14 | //! the apis explicitly operate on on-disk files. Instead this library 15 | //! requires that the text being operated on resides in-memory and as such if 16 | //! you want to perform operations on files, it is up to the user to load the 17 | //! contents of those files into memory before passing their contents to the 18 | //! apis provided by this library. 19 | //! 20 | //! ## UTF-8 and Non-UTF-8 21 | //! 22 | //! This library has support for working with both utf8 and non-utf8 texts. 23 | //! Most of the API's have two different variants, one for working with utf8 24 | //! `str` texts (e.g. [`create_patch`]) and one for working with bytes `[u8]` 25 | //! which may or may not be utf8 (e.g. [`create_patch_bytes`]). 26 | //! 27 | //! ## Creating a Patch 28 | //! 29 | //! A [`Patch`] between two texts can be created by doing the following: 30 | //! 31 | //! ``` 32 | //! use diffy::create_patch; 33 | //! 34 | //! let original = "The Way of Kings\nWords of Radiance\n"; 35 | //! let modified = "The Way of Kings\nWords of Radiance\nOathbringer\n"; 36 | //! 37 | //! let patch = create_patch(original, modified); 38 | //! # 39 | //! # let expected = "\ 40 | //! # --- original 41 | //! # +++ modified 42 | //! # @@ -1,2 +1,3 @@ 43 | //! # The Way of Kings 44 | //! # Words of Radiance 45 | //! # +Oathbringer 46 | //! # "; 47 | //! # 48 | //! # assert_eq!(patch.to_string(), expected); 49 | //! ``` 50 | //! 51 | //! A [`Patch`] can the be output in the [Unified Format] either by using its 52 | //! [`Display`] impl or by using a [`PatchFormatter`] to output the diff with 53 | //! color. 54 | //! 55 | //! ``` 56 | //! # use diffy::create_patch; 57 | //! # 58 | //! # let original = "The Way of Kings\nWords of Radiance\n"; 59 | //! # let modified = "The Way of Kings\nWords of Radiance\nOathbringer\n"; 60 | //! # 61 | //! # let patch = create_patch(original, modified); 62 | //! # 63 | //! # let expected = "\ 64 | //! # --- original 65 | //! # +++ modified 66 | //! # @@ -1,2 +1,3 @@ 67 | //! # The Way of Kings 68 | //! # Words of Radiance 69 | //! # +Oathbringer 70 | //! # "; 71 | //! # 72 | //! # assert_eq!(patch.to_string(), expected); 73 | //! # 74 | //! // Without color 75 | //! print!("{}", patch); 76 | //! 77 | //! // With color 78 | //! # use diffy::PatchFormatter; 79 | //! let f = PatchFormatter::new().with_color(); 80 | //! print!("{}", f.fmt_patch(&patch)); 81 | //! ``` 82 | //! 83 | //! ```console 84 | //! --- original 85 | //! +++ modified 86 | //! @@ -1,2 +1,3 @@ 87 | //! The Way of Kings 88 | //! Words of Radiance 89 | //! +Oathbringer 90 | //! ``` 91 | //! 92 | //! ## Applying a Patch 93 | //! 94 | //! Once you have a [`Patch`] you can apply it to a base image in order to 95 | //! recover the new text. Each hunk will be applied to the base image in 96 | //! sequence. Similarly to GNU `patch`, this implementation can detect when 97 | //! line numbers specified in the patch are incorrect and will attempt to find 98 | //! the correct place to apply each hunk by iterating forward and backward 99 | //! from the given position until all context lines from a hunk match the base 100 | //! image. 101 | //! 102 | //! ``` 103 | //! use diffy::{apply, Patch}; 104 | //! 105 | //! let s = "\ 106 | //! --- a/skybreaker-ideals 107 | //! +++ b/skybreaker-ideals 108 | //! @@ -10,6 +10,8 @@ 109 | //! First: 110 | //! Life before death, 111 | //! strength before weakness, 112 | //! journey before destination. 113 | //! Second: 114 | //! - I will put the law before all else. 115 | //! + I swear to seek justice, 116 | //! + to let it guide me, 117 | //! + until I find a more perfect Ideal. 118 | //! "; 119 | //! 120 | //! let patch = Patch::from_str(s).unwrap(); 121 | //! 122 | //! let base_image = "\ 123 | //! First: 124 | //! Life before death, 125 | //! strength before weakness, 126 | //! journey before destination. 127 | //! Second: 128 | //! I will put the law before all else. 129 | //! "; 130 | //! 131 | //! let expected = "\ 132 | //! First: 133 | //! Life before death, 134 | //! strength before weakness, 135 | //! journey before destination. 136 | //! Second: 137 | //! I swear to seek justice, 138 | //! to let it guide me, 139 | //! until I find a more perfect Ideal. 140 | //! "; 141 | //! 142 | //! assert_eq!(apply(base_image, &patch).unwrap(), expected); 143 | //! ``` 144 | //! 145 | //! ## Performing a Three-way Merge 146 | //! 147 | //! Two files `A` and `B` can be merged together given a common ancestor or 148 | //! original file `O` to produce a file `C` similarly to how [diff3] 149 | //! performs a three-way merge. 150 | //! 151 | //! ```console 152 | //! --- A --- 153 | //! / \ 154 | //! / \ 155 | //! O C 156 | //! \ / 157 | //! \ / 158 | //! --- B --- 159 | //! ``` 160 | //! 161 | //! If files `A` and `B` modified different regions of the original file `O` 162 | //! (or the same region in the same way) then the files can be merged without 163 | //! conflict. 164 | //! 165 | //! ``` 166 | //! use diffy::merge; 167 | //! 168 | //! let original = "the final empire\nThe Well of Ascension\nThe hero of ages\n"; 169 | //! let a = "The Final Empire\nThe Well of Ascension\nThe Hero of Ages\n"; 170 | //! let b = "The Final Empire\nThe Well of Ascension\nThe hero of ages\n"; 171 | //! let expected = "\ 172 | //! The Final Empire 173 | //! The Well of Ascension 174 | //! The Hero of Ages 175 | //! "; 176 | //! 177 | //! assert_eq!(merge(original, a, b).unwrap(), expected); 178 | //! ``` 179 | //! 180 | //! If both files `A` and `B` modified the same region of the original file 181 | //! `O` (and those modifications are different), it would result in a conflict 182 | //! as it is not clear which modifications should be used in the merged 183 | //! result. 184 | //! 185 | //! ``` 186 | //! use diffy::merge; 187 | //! 188 | //! let original = "The Final Empire\nThe Well of Ascension\nThe hero of ages\n"; 189 | //! let a = "The Final Empire\nThe Well of Ascension\nThe Hero of Ages\nSecret History\n"; 190 | //! let b = "The Final Empire\nThe Well of Ascension\nThe hero of ages\nThe Alloy of Law\n"; 191 | //! let expected = "\ 192 | //! The Final Empire 193 | //! The Well of Ascension 194 | //! <<<<<<< ours 195 | //! The Hero of Ages 196 | //! Secret History 197 | //! ||||||| original 198 | //! The hero of ages 199 | //! ======= 200 | //! The hero of ages 201 | //! The Alloy of Law 202 | //! >>>>>>> theirs 203 | //! "; 204 | //! 205 | //! assert_eq!(merge(original, a, b).unwrap_err(), expected); 206 | //! ``` 207 | //! 208 | //! [LibXDiff]: http://www.xmailserver.org/xdiff-lib.html 209 | //! [Myers' diff algorithm]: http://www.xmailserver.org/diff2.pdf 210 | //! [GNU Diffutils]: https://www.gnu.org/software/diffutils/ 211 | //! [Git]: https://git-scm.com/ 212 | //! [Mercurial]: https://www.mercurial-scm.org/ 213 | //! [Unified Format]: https://en.wikipedia.org/wiki/Diff#Unified_format 214 | //! [diff3]: https://en.wikipedia.org/wiki/Diff3 215 | //! 216 | //! [`Display`]: https://doc.rust-lang.org/stable/std/fmt/trait.Display.html 217 | //! [`Patch`]: struct.Patch.html 218 | //! [`PatchFormatter`]: struct.PatchFormatter.html 219 | //! [`create_patch`]: fn.create_patch.html 220 | //! [`create_patch_bytes`]: fn.create_patch_bytes.html 221 | 222 | mod apply; 223 | mod diff; 224 | mod merge; 225 | mod patch; 226 | mod range; 227 | mod utils; 228 | 229 | pub use apply::{apply, apply_bytes, ApplyError}; 230 | pub use diff::{create_patch, create_patch_bytes, DiffOptions}; 231 | pub use merge::{merge, merge_bytes, ConflictStyle, MergeOptions}; 232 | pub use patch::{Hunk, HunkRange, Line, ParsePatchError, Patch, PatchFormatter}; 233 | -------------------------------------------------------------------------------- /src/merge/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::{ 2 | diff::DiffOptions, 3 | range::{DiffRange, Range, SliceLike}, 4 | utils::Classifier, 5 | }; 6 | use std::{cmp, fmt}; 7 | 8 | #[cfg(test)] 9 | mod tests; 10 | 11 | const DEFAULT_CONFLICT_MARKER_LENGTH: usize = 7; 12 | 13 | enum Diff3Range<'ancestor, 'ours, 'theirs, T: ?Sized> { 14 | Equal(Range<'ancestor, T>, Range<'ours, T>, Range<'theirs, T>), 15 | Ancestor(Range<'ancestor, T>), 16 | AncestorOurs(Range<'ancestor, T>, Range<'ours, T>), 17 | AncestorTheirs(Range<'ancestor, T>, Range<'theirs, T>), 18 | Ours(Range<'ours, T>), 19 | Theirs(Range<'theirs, T>), 20 | } 21 | 22 | impl fmt::Debug for Diff3Range<'_, '_, '_, T> { 23 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 24 | match self { 25 | Diff3Range::Equal(range, ..) => write!(f, "Equal: {:?}", range.as_slice()), 26 | Diff3Range::Ancestor(range) => write!(f, "Ancestor: {:?}", range.as_slice()), 27 | Diff3Range::AncestorOurs(range, ..) => { 28 | write!(f, "AncestorOurs: {:?}", range.as_slice()) 29 | } 30 | Diff3Range::AncestorTheirs(range, ..) => { 31 | write!(f, "AncestorTheirs: {:?}", range.as_slice()) 32 | } 33 | Diff3Range::Ours(range) => write!(f, "Ours: {:?}", range.as_slice()), 34 | Diff3Range::Theirs(range) => write!(f, "Theirs: {:?}", range.as_slice()), 35 | } 36 | } 37 | } 38 | 39 | impl Copy for Diff3Range<'_, '_, '_, T> {} 40 | 41 | impl Clone for Diff3Range<'_, '_, '_, T> { 42 | fn clone(&self) -> Self { 43 | *self 44 | } 45 | } 46 | 47 | enum MergeRange<'ancestor, 'ours, 'theirs, T: ?Sized> { 48 | Equal(Range<'ancestor, T>, Range<'ours, T>, Range<'theirs, T>), 49 | Conflict(Range<'ancestor, T>, Range<'ours, T>, Range<'theirs, T>), 50 | Ours(Range<'ours, T>), 51 | Theirs(Range<'theirs, T>), 52 | Both(Range<'ours, T>, Range<'theirs, T>), 53 | } 54 | 55 | impl fmt::Debug for MergeRange<'_, '_, '_, T> { 56 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 57 | match self { 58 | MergeRange::Equal(range, ..) => write!(f, "Equal: {:?}", range.as_slice()), 59 | MergeRange::Conflict(ancestor, ours, theirs) => write!( 60 | f, 61 | "Conflict: ancestor: {:?} ours: {:?} theirs: {:?}", 62 | ancestor.as_slice(), 63 | ours.as_slice(), 64 | theirs.as_slice() 65 | ), 66 | MergeRange::Ours(range) => write!(f, "Ours: {:?}", range.as_slice()), 67 | MergeRange::Theirs(range) => write!(f, "Theirs: {:?}", range.as_slice()), 68 | MergeRange::Both(ours, theirs) => write!( 69 | f, 70 | "Both: ours: {:?} theirs: {:?}", 71 | ours.as_slice(), 72 | theirs.as_slice() 73 | ), 74 | } 75 | } 76 | } 77 | 78 | impl Copy for MergeRange<'_, '_, '_, T> {} 79 | 80 | impl Clone for MergeRange<'_, '_, '_, T> { 81 | fn clone(&self) -> Self { 82 | *self 83 | } 84 | } 85 | 86 | /// Style used when rendering a conflict 87 | #[derive(Copy, Clone, Debug)] 88 | pub enum ConflictStyle { 89 | /// Renders conflicting lines from both files, separated by conflict markers. 90 | /// 91 | /// ```console 92 | /// <<<<<<< A 93 | /// lines in file A 94 | /// ======= 95 | /// lines in file B 96 | /// >>>>>>> B 97 | /// ``` 98 | Merge, 99 | 100 | /// Renders conflicting lines from both files including lines from the original files, 101 | /// separated by conflict markers. 102 | /// 103 | /// ```console 104 | /// <<<<<<< A 105 | /// lines in file A 106 | /// ||||||| Original 107 | /// lines in Original file 108 | /// ======= 109 | /// lines in file B 110 | /// >>>>>>> B 111 | /// ``` 112 | Diff3, 113 | } 114 | 115 | /// A collection of options for modifying the way a merge is performed 116 | #[derive(Debug)] 117 | pub struct MergeOptions { 118 | conflict_marker_length: usize, 119 | style: ConflictStyle, 120 | } 121 | 122 | impl MergeOptions { 123 | /// Constructs a new `MergeOptions` with default settings 124 | /// 125 | /// ## Defaults 126 | /// * conflict_marker_length = 7 127 | /// * style = ConflictStyle::Diff3 128 | pub fn new() -> Self { 129 | Self { 130 | conflict_marker_length: DEFAULT_CONFLICT_MARKER_LENGTH, 131 | style: ConflictStyle::Diff3, 132 | } 133 | } 134 | 135 | /// Set the length of the conflict markers used when displaying a merge conflict 136 | pub fn set_conflict_marker_length(&mut self, conflict_marker_length: usize) -> &mut Self { 137 | self.conflict_marker_length = conflict_marker_length; 138 | self 139 | } 140 | 141 | /// Set the conflict style used when displaying a merge conflict 142 | pub fn set_conflict_style(&mut self, style: ConflictStyle) -> &mut Self { 143 | self.style = style; 144 | self 145 | } 146 | 147 | /// Merge two files, given a common ancestor, based on the configured options 148 | pub fn merge<'a>( 149 | &self, 150 | ancestor: &'a str, 151 | ours: &'a str, 152 | theirs: &'a str, 153 | ) -> Result { 154 | let mut classifier = Classifier::default(); 155 | let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor); 156 | let (our_lines, our_ids) = classifier.classify_lines(ours); 157 | let (their_lines, their_ids) = classifier.classify_lines(theirs); 158 | 159 | let opts = DiffOptions::default(); 160 | let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); 161 | let their_solution = opts.diff_slice(&ancestor_ids, &their_ids); 162 | 163 | let merged = merge_solutions(&our_solution, &their_solution); 164 | let mut merge = diff3_range_to_merge_range(&merged); 165 | 166 | cleanup_conflicts(&mut merge); 167 | 168 | output_result( 169 | &ancestor_lines, 170 | &our_lines, 171 | &their_lines, 172 | &merge, 173 | self.conflict_marker_length, 174 | self.style, 175 | ) 176 | } 177 | 178 | /// Perform a 3-way merge between potentially non-utf8 texts 179 | pub fn merge_bytes<'a>( 180 | &self, 181 | ancestor: &'a [u8], 182 | ours: &'a [u8], 183 | theirs: &'a [u8], 184 | ) -> Result, Vec> { 185 | let mut classifier = Classifier::default(); 186 | let (ancestor_lines, ancestor_ids) = classifier.classify_lines(ancestor); 187 | let (our_lines, our_ids) = classifier.classify_lines(ours); 188 | let (their_lines, their_ids) = classifier.classify_lines(theirs); 189 | 190 | let opts = DiffOptions::default(); 191 | let our_solution = opts.diff_slice(&ancestor_ids, &our_ids); 192 | let their_solution = opts.diff_slice(&ancestor_ids, &their_ids); 193 | 194 | let merged = merge_solutions(&our_solution, &their_solution); 195 | let mut merge = diff3_range_to_merge_range(&merged); 196 | 197 | cleanup_conflicts(&mut merge); 198 | 199 | output_result_bytes( 200 | &ancestor_lines, 201 | &our_lines, 202 | &their_lines, 203 | &merge, 204 | self.conflict_marker_length, 205 | self.style, 206 | ) 207 | } 208 | } 209 | 210 | impl Default for MergeOptions { 211 | fn default() -> Self { 212 | Self::new() 213 | } 214 | } 215 | 216 | /// Merge two files given a common ancestor. 217 | /// 218 | /// Returns `Ok(String)` upon a successful merge. 219 | /// Returns `Err(String)` if there were conflicts, with the conflicting 220 | /// regions marked with conflict markers. 221 | /// 222 | /// ## Merging two files without conflicts 223 | /// ``` 224 | /// # use diffy::merge; 225 | /// let original = "\ 226 | /// Devotion 227 | /// Dominion 228 | /// Odium 229 | /// Preservation 230 | /// Ruin 231 | /// Cultivation 232 | /// Honor 233 | /// Endowment 234 | /// Autonomy 235 | /// Ambition 236 | /// "; 237 | /// let a = "\ 238 | /// Odium 239 | /// Preservation 240 | /// Ruin 241 | /// Cultivation 242 | /// Endowment 243 | /// Autonomy 244 | /// "; 245 | /// let b = "\ 246 | /// Devotion 247 | /// Dominion 248 | /// Odium 249 | /// Harmony 250 | /// Cultivation 251 | /// Honor 252 | /// Endowment 253 | /// Autonomy 254 | /// Ambition 255 | /// "; 256 | /// 257 | /// let expected = "\ 258 | /// Odium 259 | /// Harmony 260 | /// Cultivation 261 | /// Endowment 262 | /// Autonomy 263 | /// "; 264 | /// 265 | /// assert_eq!(merge(original, a, b).unwrap(), expected); 266 | /// ``` 267 | pub fn merge<'a>(ancestor: &'a str, ours: &'a str, theirs: &'a str) -> Result { 268 | MergeOptions::default().merge(ancestor, ours, theirs) 269 | } 270 | 271 | /// Perform a 3-way merge between potentially non-utf8 texts 272 | pub fn merge_bytes<'a>( 273 | ancestor: &'a [u8], 274 | ours: &'a [u8], 275 | theirs: &'a [u8], 276 | ) -> Result, Vec> { 277 | MergeOptions::default().merge_bytes(ancestor, ours, theirs) 278 | } 279 | 280 | fn merge_solutions<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( 281 | our_solution: &[DiffRange<'ancestor, 'ours, T>], 282 | their_solution: &[DiffRange<'ancestor, 'theirs, T>], 283 | ) -> Vec> { 284 | let mut our_solution = our_solution.iter().copied(); 285 | let mut their_solution = their_solution.iter().copied(); 286 | let mut ours = our_solution.next(); 287 | let mut theirs = their_solution.next(); 288 | 289 | let mut solution = Vec::new(); 290 | 291 | while ours.is_some() || theirs.is_some() { 292 | let merge_range = match (ours, theirs) { 293 | // 294 | // Inserts can't easily be checked to see if they match each other 295 | // 296 | (Some(DiffRange::Insert(range)), _) => { 297 | ours.take(); 298 | Diff3Range::Ours(range) 299 | } 300 | (_, Some(DiffRange::Insert(range))) => { 301 | theirs.take(); 302 | Diff3Range::Theirs(range) 303 | } 304 | 305 | ( 306 | Some(DiffRange::Equal(ancestor1, our_range)), 307 | Some(DiffRange::Equal(ancestor2, their_range)), 308 | ) => { 309 | assert_eq!(ancestor1.offset(), ancestor2.offset()); 310 | let len = cmp::min(ancestor1.len(), ancestor2.len()); 311 | 312 | shrink_front(&mut ours, len); 313 | shrink_front(&mut theirs, len); 314 | 315 | Diff3Range::Equal( 316 | ancestor1.slice(..len), 317 | our_range.slice(..len), 318 | their_range.slice(..len), 319 | ) 320 | } 321 | 322 | (Some(DiffRange::Equal(ancestor1, our_range)), Some(DiffRange::Delete(ancestor2))) => { 323 | assert_eq!(ancestor1.offset(), ancestor2.offset()); 324 | let len = cmp::min(ancestor1.len(), ancestor2.len()); 325 | 326 | shrink_front(&mut ours, len); 327 | shrink_front(&mut theirs, len); 328 | 329 | Diff3Range::AncestorOurs(ancestor1.slice(..len), our_range.slice(..len)) 330 | } 331 | 332 | ( 333 | Some(DiffRange::Delete(ancestor1)), 334 | Some(DiffRange::Equal(ancestor2, their_range)), 335 | ) => { 336 | assert_eq!(ancestor1.offset(), ancestor2.offset()); 337 | let len = cmp::min(ancestor1.len(), ancestor2.len()); 338 | 339 | shrink_front(&mut ours, len); 340 | shrink_front(&mut theirs, len); 341 | 342 | Diff3Range::AncestorTheirs(ancestor2.slice(..len), their_range.slice(..len)) 343 | } 344 | 345 | (Some(DiffRange::Delete(ancestor1)), Some(DiffRange::Delete(ancestor2))) => { 346 | assert_eq!(ancestor1.offset(), ancestor2.offset()); 347 | let len = cmp::min(ancestor1.len(), ancestor2.len()); 348 | 349 | shrink_front(&mut ours, len); 350 | shrink_front(&mut theirs, len); 351 | 352 | Diff3Range::Ancestor(ancestor1.slice(..len)) 353 | } 354 | 355 | // 356 | // Unreachable cases 357 | // 358 | (Some(DiffRange::Equal(..)), None) 359 | | (Some(DiffRange::Delete(_)), None) 360 | | (None, Some(DiffRange::Equal(..))) 361 | | (None, Some(DiffRange::Delete(_))) 362 | | (None, None) => unreachable!("Equal/Delete should match up"), 363 | }; 364 | 365 | solution.push(merge_range); 366 | 367 | if ours.map_or(true, |range| range.is_empty()) { 368 | ours = our_solution.next(); 369 | } 370 | if theirs.map_or(true, |range| range.is_empty()) { 371 | theirs = their_solution.next(); 372 | } 373 | } 374 | 375 | solution 376 | } 377 | 378 | fn shrink_front(maybe_range: &mut Option>, len: usize) { 379 | if let Some(range) = maybe_range { 380 | range.shrink_front(len) 381 | } 382 | } 383 | 384 | fn diff3_range_to_merge_range<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( 385 | solution: &[Diff3Range<'ancestor, 'ours, 'theirs, T>], 386 | ) -> Vec> { 387 | let mut ancestor: Option> = None; 388 | let mut ours: Option> = None; 389 | let mut theirs: Option> = None; 390 | 391 | let mut merge = Vec::new(); 392 | 393 | for &diff3 in solution { 394 | match diff3 { 395 | Diff3Range::Equal(ancestor_range, our_range, their_range) => { 396 | if let Some(merge_range) = 397 | create_merge_range(ancestor.take(), ours.take(), theirs.take()) 398 | { 399 | merge.push(merge_range); 400 | } 401 | merge.push(MergeRange::Equal(ancestor_range, our_range, their_range)); 402 | } 403 | Diff3Range::Ancestor(range) => { 404 | set_or_merge_range(&mut ancestor, range); 405 | set_or_merge_range(&mut ours, Range::empty()); 406 | set_or_merge_range(&mut theirs, Range::empty()); 407 | } 408 | Diff3Range::AncestorOurs(ancestor_range, our_range) => { 409 | set_or_merge_range(&mut ancestor, ancestor_range); 410 | set_or_merge_range(&mut ours, our_range); 411 | } 412 | Diff3Range::AncestorTheirs(ancestor_range, their_range) => { 413 | set_or_merge_range(&mut ancestor, ancestor_range); 414 | set_or_merge_range(&mut theirs, their_range); 415 | } 416 | Diff3Range::Ours(range) => set_or_merge_range(&mut ours, range), 417 | Diff3Range::Theirs(range) => set_or_merge_range(&mut theirs, range), 418 | } 419 | } 420 | 421 | if let Some(merge_range) = create_merge_range(ancestor.take(), ours.take(), theirs.take()) { 422 | merge.push(merge_range); 423 | } 424 | 425 | merge 426 | } 427 | 428 | fn set_or_merge_range<'a, T: ?Sized>(range1: &mut Option>, range2: Range<'a, T>) { 429 | if let Some(range1) = range1 { 430 | if range1.is_empty() { 431 | *range1 = range2; 432 | } else if !range2.is_empty() { 433 | assert_eq!(range1.offset() + range1.len(), range2.offset()); 434 | range1.grow_down(range2.len()); 435 | } 436 | } else { 437 | *range1 = Some(range2); 438 | } 439 | } 440 | 441 | fn create_merge_range<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike>( 442 | ancestor: Option>, 443 | ours: Option>, 444 | theirs: Option>, 445 | ) -> Option> { 446 | match (ancestor, ours, theirs) { 447 | (Some(ancestor), Some(ours), Some(theirs)) => { 448 | Some(MergeRange::Conflict(ancestor, ours, theirs)) 449 | } 450 | (None, Some(ours), Some(theirs)) => { 451 | Some(MergeRange::Conflict(Range::empty(), ours, theirs)) 452 | } 453 | (None, Some(ours), None) => Some(MergeRange::Ours(ours)), 454 | (None, None, Some(theirs)) => Some(MergeRange::Theirs(theirs)), 455 | 456 | (Some(ancestor), None, Some(theirs)) => { 457 | Some(MergeRange::Conflict(ancestor, Range::empty(), theirs)) 458 | } 459 | (Some(ancestor), Some(ours), None) => { 460 | Some(MergeRange::Conflict(ancestor, ours, Range::empty())) 461 | } 462 | 463 | (Some(_), None, None) | (None, None, None) => None, 464 | } 465 | } 466 | 467 | #[allow(clippy::needless_lifetimes)] 468 | fn cleanup_conflicts<'ancestor, 'ours, 'theirs, T: ?Sized + SliceLike + PartialEq>( 469 | solution: &mut [MergeRange<'ancestor, 'ours, 'theirs, T>], 470 | ) { 471 | let mut pointer = 0; 472 | 473 | // TODO this could probably be more sophisticated: 474 | // e.g. run the diff algorithm on the conflict area 475 | while let Some(&merge) = solution.get(pointer) { 476 | if let MergeRange::Conflict(ancestor, ours, theirs) = merge { 477 | // If the ranges in the conflict end up being the same on both sides then we can 478 | // eliminate the conflict 479 | if ours.as_slice() == theirs.as_slice() { 480 | solution[pointer] = MergeRange::Both(ours, theirs); 481 | // If either ours or theirs exactly matches ancestor then we can also eliminate the 482 | // conflict 483 | } else if ancestor.as_slice() == ours.as_slice() { 484 | solution[pointer] = MergeRange::Theirs(theirs); 485 | } else if ancestor.as_slice() == theirs.as_slice() { 486 | solution[pointer] = MergeRange::Ours(ours); 487 | } 488 | } 489 | pointer += 1; 490 | } 491 | } 492 | 493 | fn output_result<'a, T: ?Sized>( 494 | ancestor: &[&'a str], 495 | ours: &[&'a str], 496 | theirs: &[&'a str], 497 | merge: &[MergeRange], 498 | marker_len: usize, 499 | style: ConflictStyle, 500 | ) -> Result { 501 | let mut conflicts = 0; 502 | let mut output = String::new(); 503 | 504 | for merge_range in merge { 505 | match merge_range { 506 | MergeRange::Equal(range, ..) => { 507 | output.extend(ancestor[range.range()].iter().copied()); 508 | } 509 | MergeRange::Conflict(ancestor_range, ours_range, theirs_range) => { 510 | add_conflict_marker(&mut output, '<', marker_len, Some("ours")); 511 | output.extend(ours[ours_range.range()].iter().copied()); 512 | 513 | if let ConflictStyle::Diff3 = style { 514 | add_conflict_marker(&mut output, '|', marker_len, Some("original")); 515 | output.extend(ancestor[ancestor_range.range()].iter().copied()); 516 | } 517 | 518 | add_conflict_marker(&mut output, '=', marker_len, None); 519 | output.extend(theirs[theirs_range.range()].iter().copied()); 520 | add_conflict_marker(&mut output, '>', marker_len, Some("theirs")); 521 | conflicts += 1; 522 | } 523 | MergeRange::Ours(range) => { 524 | output.extend(ours[range.range()].iter().copied()); 525 | } 526 | MergeRange::Theirs(range) => { 527 | output.extend(theirs[range.range()].iter().copied()); 528 | } 529 | MergeRange::Both(range, _) => { 530 | output.extend(ours[range.range()].iter().copied()); 531 | } 532 | } 533 | } 534 | 535 | if conflicts != 0 { 536 | Err(output) 537 | } else { 538 | Ok(output) 539 | } 540 | } 541 | 542 | fn add_conflict_marker( 543 | output: &mut String, 544 | marker: char, 545 | marker_len: usize, 546 | filename: Option<&str>, 547 | ) { 548 | for _ in 0..marker_len { 549 | output.push(marker); 550 | } 551 | 552 | if let Some(filename) = filename { 553 | output.push(' '); 554 | output.push_str(filename); 555 | } 556 | output.push('\n'); 557 | } 558 | 559 | fn output_result_bytes<'a, T: ?Sized>( 560 | ancestor: &[&'a [u8]], 561 | ours: &[&'a [u8]], 562 | theirs: &[&'a [u8]], 563 | merge: &[MergeRange], 564 | marker_len: usize, 565 | style: ConflictStyle, 566 | ) -> Result, Vec> { 567 | let mut conflicts = 0; 568 | let mut output: Vec = Vec::new(); 569 | 570 | for merge_range in merge { 571 | match merge_range { 572 | MergeRange::Equal(range, ..) => { 573 | ancestor[range.range()] 574 | .iter() 575 | .for_each(|line| output.extend_from_slice(line)); 576 | } 577 | MergeRange::Conflict(ancestor_range, ours_range, theirs_range) => { 578 | add_conflict_marker_bytes(&mut output, b'<', marker_len, Some(b"ours")); 579 | ours[ours_range.range()] 580 | .iter() 581 | .for_each(|line| output.extend_from_slice(line)); 582 | 583 | if let ConflictStyle::Diff3 = style { 584 | add_conflict_marker_bytes(&mut output, b'|', marker_len, Some(b"original")); 585 | ancestor[ancestor_range.range()] 586 | .iter() 587 | .for_each(|line| output.extend_from_slice(line)); 588 | } 589 | 590 | add_conflict_marker_bytes(&mut output, b'=', marker_len, None); 591 | theirs[theirs_range.range()] 592 | .iter() 593 | .for_each(|line| output.extend_from_slice(line)); 594 | add_conflict_marker_bytes(&mut output, b'>', marker_len, Some(b"theirs")); 595 | conflicts += 1; 596 | } 597 | MergeRange::Ours(range) => { 598 | ours[range.range()] 599 | .iter() 600 | .for_each(|line| output.extend_from_slice(line)); 601 | } 602 | MergeRange::Theirs(range) => { 603 | theirs[range.range()] 604 | .iter() 605 | .for_each(|line| output.extend_from_slice(line)); 606 | } 607 | MergeRange::Both(range, _) => { 608 | ours[range.range()] 609 | .iter() 610 | .for_each(|line| output.extend_from_slice(line)); 611 | } 612 | } 613 | } 614 | 615 | if conflicts != 0 { 616 | Err(output) 617 | } else { 618 | Ok(output) 619 | } 620 | } 621 | 622 | fn add_conflict_marker_bytes( 623 | output: &mut Vec, 624 | marker: u8, 625 | marker_len: usize, 626 | filename: Option<&[u8]>, 627 | ) { 628 | for _ in 0..marker_len { 629 | output.push(marker); 630 | } 631 | 632 | if let Some(filename) = filename { 633 | output.push(b' '); 634 | output.extend_from_slice(filename); 635 | } 636 | output.push(b'\n'); 637 | } 638 | -------------------------------------------------------------------------------- /src/merge/tests.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | macro_rules! assert_merge { 4 | ($original:ident, $ours:ident, $theirs:ident, $kind:ident($expected:expr), $msg:literal $(,)?) => { 5 | let solution = merge($original, $ours, $theirs); 6 | 7 | macro_rules! result { 8 | (Ok, $s:expr) => { 9 | Result::<&str, &str>::Ok($s) 10 | }; 11 | (Err, $s:expr) => { 12 | Result::<&str, &str>::Err($s) 13 | }; 14 | } 15 | assert!( 16 | same_merge(result!($kind, $expected), &solution), 17 | concat!($msg, "\nexpected={:#?}\nactual={:#?}"), 18 | result!($kind, $expected), 19 | solution 20 | ); 21 | 22 | let solution_bytes = 23 | merge_bytes($original.as_bytes(), $ours.as_bytes(), $theirs.as_bytes()); 24 | 25 | macro_rules! result_bytes { 26 | (Ok, $s:expr) => { 27 | Result::<&[u8], &[u8]>::Ok($s.as_bytes()) 28 | }; 29 | (Err, $s:expr) => { 30 | Result::<&[u8], &[u8]>::Err($s.as_bytes()) 31 | }; 32 | } 33 | assert!( 34 | same_merge_bytes(result_bytes!($kind, $expected), &solution_bytes), 35 | concat!($msg, "\nexpected={:#?}\nactual={:#?}"), 36 | result_bytes!($kind, $expected), 37 | solution_bytes 38 | ); 39 | }; 40 | } 41 | 42 | fn same_merge(expected: Result<&str, &str>, actual: &Result) -> bool { 43 | match (expected, actual) { 44 | (Ok(expected), Ok(actual)) => expected == actual, 45 | (Err(expected), Err(actual)) => expected == actual, 46 | (_, _) => false, 47 | } 48 | } 49 | 50 | fn same_merge_bytes(expected: Result<&[u8], &[u8]>, actual: &Result, Vec>) -> bool { 51 | match (expected, actual) { 52 | (Ok(expected), Ok(actual)) => expected == &actual[..], 53 | (Err(expected), Err(actual)) => expected == &actual[..], 54 | (_, _) => false, 55 | } 56 | } 57 | 58 | #[test] 59 | fn test_merge() { 60 | let original = "\ 61 | carrots 62 | garlic 63 | onions 64 | salmon 65 | mushrooms 66 | tomatoes 67 | salt 68 | "; 69 | let a = "\ 70 | carrots 71 | salmon 72 | mushrooms 73 | tomatoes 74 | garlic 75 | onions 76 | salt 77 | "; 78 | let b = "\ 79 | carrots 80 | salmon 81 | garlic 82 | onions 83 | mushrooms 84 | tomatoes 85 | salt 86 | "; 87 | 88 | assert_merge!(original, original, original, Ok(original), "Equal case #1"); 89 | assert_merge!(original, a, a, Ok(a), "Equal case #2"); 90 | assert_merge!(original, b, b, Ok(b), "Equal case #3"); 91 | 92 | let expected = "\ 93 | carrots 94 | <<<<<<< ours 95 | salmon 96 | ||||||| original 97 | garlic 98 | onions 99 | salmon 100 | ======= 101 | salmon 102 | garlic 103 | onions 104 | >>>>>>> theirs 105 | mushrooms 106 | tomatoes 107 | garlic 108 | onions 109 | salt 110 | "; 111 | 112 | assert_merge!(original, a, b, Err(expected), "Single Conflict case"); 113 | 114 | let expected = "\ 115 | carrots 116 | <<<<<<< ours 117 | salmon 118 | garlic 119 | onions 120 | ||||||| original 121 | garlic 122 | onions 123 | salmon 124 | ======= 125 | salmon 126 | >>>>>>> theirs 127 | mushrooms 128 | tomatoes 129 | garlic 130 | onions 131 | salt 132 | "; 133 | 134 | assert_merge!( 135 | original, 136 | b, 137 | a, 138 | Err(expected), 139 | "Reverse Single Conflict case" 140 | ); 141 | 142 | let original = "\ 143 | carrots 144 | garlic 145 | onions 146 | salmon 147 | tomatoes 148 | salt 149 | "; 150 | let a = "\ 151 | carrots 152 | salmon 153 | tomatoes 154 | garlic 155 | onions 156 | salt 157 | "; 158 | let b = "\ 159 | carrots 160 | salmon 161 | garlic 162 | onions 163 | tomatoes 164 | salt 165 | "; 166 | let expected = "\ 167 | carrots 168 | <<<<<<< ours 169 | salmon 170 | tomatoes 171 | ||||||| original 172 | ======= 173 | salmon 174 | >>>>>>> theirs 175 | garlic 176 | onions 177 | <<<<<<< ours 178 | ||||||| original 179 | salmon 180 | tomatoes 181 | ======= 182 | tomatoes 183 | >>>>>>> theirs 184 | salt 185 | "; 186 | 187 | assert_merge!(original, a, b, Err(expected), "Multiple Conflict case"); 188 | 189 | let expected = "\ 190 | carrots 191 | <<<<<<< ours 192 | salmon 193 | ||||||| original 194 | ======= 195 | salmon 196 | tomatoes 197 | >>>>>>> theirs 198 | garlic 199 | onions 200 | <<<<<<< ours 201 | tomatoes 202 | ||||||| original 203 | salmon 204 | tomatoes 205 | ======= 206 | >>>>>>> theirs 207 | salt 208 | "; 209 | assert_merge!( 210 | original, 211 | b, 212 | a, 213 | Err(expected), 214 | "Reverse Multiple Conflict case" 215 | ); 216 | } 217 | 218 | #[test] 219 | fn myers_diffy_vs_git() { 220 | let original = "\ 221 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 222 | { 223 | if (!Chunk_bounds_check(src, src_start, n)) return; 224 | if (!Chunk_bounds_check(dst, dst_start, n)) return; 225 | 226 | memcpy(dst->data + dst_start, src->data + src_start, n); 227 | } 228 | 229 | int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 230 | { 231 | if (chunk == NULL) return 0; 232 | 233 | return start <= chunk->length && n <= chunk->length - start; 234 | } 235 | "; 236 | let a = "\ 237 | int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 238 | { 239 | if (chunk == NULL) return 0; 240 | 241 | return start <= chunk->length && n <= chunk->length - start; 242 | } 243 | 244 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 245 | { 246 | if (!Chunk_bounds_check(src, src_start, n)) return; 247 | if (!Chunk_bounds_check(dst, dst_start, n)) return; 248 | 249 | memcpy(dst->data + dst_start, src->data + src_start, n); 250 | } 251 | "; 252 | let b = "\ 253 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 254 | { 255 | if (!Chunk_bounds_check(src, src_start, n)) return; 256 | if (!Chunk_bounds_check(dst, dst_start, n)) return; 257 | 258 | // copy the bytes 259 | memcpy(dst->data + dst_start, src->data + src_start, n); 260 | } 261 | 262 | int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 263 | { 264 | if (chunk == NULL) return 0; 265 | 266 | return start <= chunk->length && n <= chunk->length - start; 267 | } 268 | "; 269 | 270 | // TODO investigate why this doesn't match git's output 271 | let _expected_git = "\ 272 | int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 273 | { 274 | if (chunk == NULL) return 0; 275 | 276 | <<<<<<< ours 277 | return start <= chunk->length && n <= chunk->length - start; 278 | ||||||| original 279 | memcpy(dst->data + dst_start, src->data + src_start, n); 280 | ======= 281 | // copy the bytes 282 | memcpy(dst->data + dst_start, src->data + src_start, n); 283 | >>>>>>> theirs 284 | } 285 | 286 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 287 | { 288 | if (!Chunk_bounds_check(src, src_start, n)) return; 289 | if (!Chunk_bounds_check(dst, dst_start, n)) return; 290 | 291 | memcpy(dst->data + dst_start, src->data + src_start, n); 292 | } 293 | "; 294 | 295 | let expected_diffy = "\ 296 | int Chunk_bounds_check(Chunk *chunk, size_t start, size_t n) 297 | { 298 | if (chunk == NULL) return 0; 299 | 300 | return start <= chunk->length && n <= chunk->length - start; 301 | } 302 | 303 | void Chunk_copy(Chunk *src, size_t src_start, Chunk *dst, size_t dst_start, size_t n) 304 | { 305 | if (!Chunk_bounds_check(src, src_start, n)) return; 306 | if (!Chunk_bounds_check(dst, dst_start, n)) return; 307 | 308 | // copy the bytes 309 | memcpy(dst->data + dst_start, src->data + src_start, n); 310 | } 311 | "; 312 | 313 | assert_merge!(original, a, b, Ok(expected_diffy), "Myers diffy merge"); 314 | } 315 | 316 | #[test] 317 | fn correct_range_is_used_for_both_case() { 318 | let base = r#" 319 | class GithubCall(db.Model): 320 | 321 | `url`: URL of request Example.`https://api.github.com` 322 | "#; 323 | 324 | let theirs = r#" 325 | class GithubCall(db.Model): 326 | 327 | `repo`: String field. Github repository fields. Example: `amitu/python` 328 | "#; 329 | 330 | let ours = r#" 331 | class Call(models.Model): 332 | `body`: String field. The payload of the webhook call from the github. 333 | 334 | `repo`: String field. Github repository fields. Example: `amitu/python` 335 | "#; 336 | 337 | let expected = r#" 338 | class Call(models.Model): 339 | `body`: String field. The payload of the webhook call from the github. 340 | 341 | `repo`: String field. Github repository fields. Example: `amitu/python` 342 | "#; 343 | 344 | assert_merge!(base, ours, theirs, Ok(expected), "MergeRange::Both case"); 345 | } 346 | 347 | #[test] 348 | fn delete_and_insert_conflict() { 349 | let base = r#" 350 | { 351 | int a = 2; 352 | } 353 | "#; 354 | 355 | let ours = r#" 356 | { 357 | } 358 | "#; 359 | 360 | let theirs = r#" 361 | { 362 | int a = 2; 363 | int b = 3; 364 | } 365 | "#; 366 | 367 | let expected = r#" 368 | { 369 | <<<<<<< ours 370 | ||||||| original 371 | int a = 2; 372 | ======= 373 | int a = 2; 374 | int b = 3; 375 | >>>>>>> theirs 376 | } 377 | "#; 378 | 379 | assert_merge!( 380 | base, 381 | ours, 382 | theirs, 383 | Err(expected), 384 | "MergeRange (Ours::delete, Theirs::insert) conflict" 385 | ); 386 | 387 | let expected = r#" 388 | { 389 | <<<<<<< ours 390 | int a = 2; 391 | int b = 3; 392 | ||||||| original 393 | int a = 2; 394 | ======= 395 | >>>>>>> theirs 396 | } 397 | "#; 398 | 399 | assert_merge!( 400 | base, 401 | theirs, 402 | ours, 403 | Err(expected), 404 | "MergeRange (Theirs::delete, Ours::insert) conflict" 405 | ); 406 | } 407 | -------------------------------------------------------------------------------- /src/patch/format.rs: -------------------------------------------------------------------------------- 1 | use super::{Hunk, Line, Patch, NO_NEWLINE_AT_EOF}; 2 | use nu_ansi_term::{Color, Style}; 3 | use std::{ 4 | fmt::{Display, Formatter, Result}, 5 | io, 6 | }; 7 | 8 | /// Struct used to adjust the formatting of a `Patch` 9 | #[derive(Debug)] 10 | pub struct PatchFormatter { 11 | with_color: bool, 12 | with_missing_newline_message: bool, 13 | suppress_blank_empty: bool, 14 | 15 | context: Style, 16 | delete: Style, 17 | insert: Style, 18 | hunk_header: Style, 19 | patch_header: Style, 20 | function_context: Style, 21 | } 22 | 23 | impl PatchFormatter { 24 | /// Construct a new formatter 25 | pub fn new() -> Self { 26 | Self { 27 | with_color: false, 28 | with_missing_newline_message: true, 29 | 30 | // TODO the default in git-diff and GNU diff is to have this set to false, on the next 31 | // semver breaking release we should contemplate switching this to be false by default 32 | suppress_blank_empty: true, 33 | 34 | context: Style::new(), 35 | delete: Color::Red.normal(), 36 | insert: Color::Green.normal(), 37 | hunk_header: Color::Cyan.normal(), 38 | patch_header: Style::new().bold(), 39 | function_context: Style::new(), 40 | } 41 | } 42 | 43 | /// Enable formatting a patch with color 44 | pub fn with_color(mut self) -> Self { 45 | self.with_color = true; 46 | self 47 | } 48 | 49 | /// Sets whether to format a patch with a "No newline at end of file" message. 50 | /// 51 | /// Default is `true`. 52 | /// 53 | /// Note: If this is disabled by setting to `false`, formatted patches will no longer contain 54 | /// sufficient information to determine if a file ended with a newline character (`\n`) or not 55 | /// and the patch will be formatted as if both the original and modified files ended with a 56 | /// newline character (`\n`). 57 | pub fn missing_newline_message(mut self, enable: bool) -> Self { 58 | self.with_missing_newline_message = enable; 59 | self 60 | } 61 | 62 | /// Sets whether to suppress printing of a space before empty lines. 63 | /// 64 | /// Defaults to `true`. 65 | /// 66 | /// For more information you can refer to the [Omitting trailing blanks] manual page of GNU 67 | /// diff or the [diff.suppressBlankEmpty] config for `git-diff`. 68 | /// 69 | /// [Omitting trailing blanks]: https://www.gnu.org/software/diffutils/manual/html_node/Trailing-Blanks.html 70 | /// [diff.suppressBlankEmpty]: https://git-scm.com/docs/git-diff#Documentation/git-diff.txt-codediffsuppressBlankEmptycode 71 | pub fn suppress_blank_empty(mut self, enable: bool) -> Self { 72 | self.suppress_blank_empty = enable; 73 | self 74 | } 75 | 76 | /// Returns a `Display` impl which can be used to print a Patch 77 | pub fn fmt_patch<'a>(&'a self, patch: &'a Patch<'a, str>) -> impl Display + 'a { 78 | PatchDisplay { f: self, patch } 79 | } 80 | 81 | pub fn write_patch_into + ?Sized, W: io::Write>( 82 | &self, 83 | patch: &Patch<'_, T>, 84 | w: W, 85 | ) -> io::Result<()> { 86 | PatchDisplay { f: self, patch }.write_into(w) 87 | } 88 | 89 | fn fmt_hunk<'a>(&'a self, hunk: &'a Hunk<'a, str>) -> impl Display + 'a { 90 | HunkDisplay { f: self, hunk } 91 | } 92 | 93 | fn write_hunk_into + ?Sized, W: io::Write>( 94 | &self, 95 | hunk: &Hunk<'_, T>, 96 | w: W, 97 | ) -> io::Result<()> { 98 | HunkDisplay { f: self, hunk }.write_into(w) 99 | } 100 | 101 | fn fmt_line<'a>(&'a self, line: &'a Line<'a, str>) -> impl Display + 'a { 102 | LineDisplay { f: self, line } 103 | } 104 | 105 | fn write_line_into + ?Sized, W: io::Write>( 106 | &self, 107 | line: &Line<'_, T>, 108 | w: W, 109 | ) -> io::Result<()> { 110 | LineDisplay { f: self, line }.write_into(w) 111 | } 112 | } 113 | 114 | impl Default for PatchFormatter { 115 | fn default() -> Self { 116 | Self::new() 117 | } 118 | } 119 | 120 | struct PatchDisplay<'a, T: ToOwned + ?Sized> { 121 | f: &'a PatchFormatter, 122 | patch: &'a Patch<'a, T>, 123 | } 124 | 125 | impl + ?Sized> PatchDisplay<'_, T> { 126 | fn write_into(&self, mut w: W) -> io::Result<()> { 127 | if self.patch.original.is_some() || self.patch.modified.is_some() { 128 | if self.f.with_color { 129 | write!(w, "{}", self.f.patch_header.prefix())?; 130 | } 131 | if let Some(original) = &self.patch.original { 132 | write!(w, "--- ")?; 133 | original.write_into(&mut w)?; 134 | writeln!(w)?; 135 | } 136 | if let Some(modified) = &self.patch.modified { 137 | write!(w, "+++ ")?; 138 | modified.write_into(&mut w)?; 139 | writeln!(w)?; 140 | } 141 | if self.f.with_color { 142 | write!(w, "{}", self.f.patch_header.suffix())?; 143 | } 144 | } 145 | 146 | for hunk in &self.patch.hunks { 147 | self.f.write_hunk_into(hunk, &mut w)?; 148 | } 149 | 150 | Ok(()) 151 | } 152 | } 153 | 154 | impl Display for PatchDisplay<'_, str> { 155 | fn fmt(&self, f: &mut Formatter<'_>) -> Result { 156 | if self.patch.original.is_some() || self.patch.modified.is_some() { 157 | if self.f.with_color { 158 | write!(f, "{}", self.f.patch_header.prefix())?; 159 | } 160 | if let Some(original) = &self.patch.original { 161 | writeln!(f, "--- {}", original)?; 162 | } 163 | if let Some(modified) = &self.patch.modified { 164 | writeln!(f, "+++ {}", modified)?; 165 | } 166 | if self.f.with_color { 167 | write!(f, "{}", self.f.patch_header.suffix())?; 168 | } 169 | } 170 | 171 | for hunk in &self.patch.hunks { 172 | write!(f, "{}", self.f.fmt_hunk(hunk))?; 173 | } 174 | 175 | Ok(()) 176 | } 177 | } 178 | 179 | struct HunkDisplay<'a, T: ?Sized> { 180 | f: &'a PatchFormatter, 181 | hunk: &'a Hunk<'a, T>, 182 | } 183 | 184 | impl + ?Sized> HunkDisplay<'_, T> { 185 | fn write_into(&self, mut w: W) -> io::Result<()> { 186 | if self.f.with_color { 187 | write!(w, "{}", self.f.hunk_header.prefix())?; 188 | } 189 | write!(w, "@@ -{} +{} @@", self.hunk.old_range, self.hunk.new_range)?; 190 | if self.f.with_color { 191 | write!(w, "{}", self.f.hunk_header.suffix())?; 192 | } 193 | 194 | if let Some(ctx) = self.hunk.function_context { 195 | write!(w, " ")?; 196 | if self.f.with_color { 197 | write!(w, "{}", self.f.function_context.prefix())?; 198 | } 199 | write!(w, " ")?; 200 | w.write_all(ctx.as_ref())?; 201 | if self.f.with_color { 202 | write!(w, "{}", self.f.function_context.suffix())?; 203 | } 204 | } 205 | writeln!(w)?; 206 | 207 | for line in &self.hunk.lines { 208 | self.f.write_line_into(line, &mut w)?; 209 | } 210 | 211 | Ok(()) 212 | } 213 | } 214 | 215 | impl Display for HunkDisplay<'_, str> { 216 | fn fmt(&self, f: &mut Formatter<'_>) -> Result { 217 | if self.f.with_color { 218 | write!(f, "{}", self.f.hunk_header.prefix())?; 219 | } 220 | write!(f, "@@ -{} +{} @@", self.hunk.old_range, self.hunk.new_range)?; 221 | if self.f.with_color { 222 | write!(f, "{}", self.f.hunk_header.suffix())?; 223 | } 224 | 225 | if let Some(ctx) = self.hunk.function_context { 226 | write!(f, " ")?; 227 | if self.f.with_color { 228 | write!(f, "{}", self.f.function_context.prefix())?; 229 | } 230 | write!(f, " {}", ctx)?; 231 | if self.f.with_color { 232 | write!(f, "{}", self.f.function_context.suffix())?; 233 | } 234 | } 235 | writeln!(f)?; 236 | 237 | for line in &self.hunk.lines { 238 | write!(f, "{}", self.f.fmt_line(line))?; 239 | } 240 | 241 | Ok(()) 242 | } 243 | } 244 | 245 | struct LineDisplay<'a, T: ?Sized> { 246 | f: &'a PatchFormatter, 247 | line: &'a Line<'a, T>, 248 | } 249 | 250 | impl + ?Sized> LineDisplay<'_, T> { 251 | fn write_into(&self, mut w: W) -> io::Result<()> { 252 | let (sign, line, style) = match self.line { 253 | Line::Context(line) => (' ', line.as_ref(), self.f.context), 254 | Line::Delete(line) => ('-', line.as_ref(), self.f.delete), 255 | Line::Insert(line) => ('+', line.as_ref(), self.f.insert), 256 | }; 257 | 258 | if self.f.with_color { 259 | write!(w, "{}", style.prefix())?; 260 | } 261 | 262 | if self.f.suppress_blank_empty && sign == ' ' && line == b"\n" { 263 | w.write_all(line)?; 264 | } else { 265 | write!(w, "{}", sign)?; 266 | w.write_all(line)?; 267 | } 268 | 269 | if self.f.with_color { 270 | write!(w, "{}", style.suffix())?; 271 | } 272 | 273 | if !line.ends_with(b"\n") { 274 | writeln!(w)?; 275 | if self.f.with_missing_newline_message { 276 | writeln!(w, "{}", NO_NEWLINE_AT_EOF)?; 277 | } 278 | } 279 | 280 | Ok(()) 281 | } 282 | } 283 | 284 | impl Display for LineDisplay<'_, str> { 285 | fn fmt(&self, f: &mut Formatter<'_>) -> Result { 286 | let (sign, line, style) = match self.line { 287 | Line::Context(line) => (' ', line, self.f.context), 288 | Line::Delete(line) => ('-', line, self.f.delete), 289 | Line::Insert(line) => ('+', line, self.f.insert), 290 | }; 291 | 292 | if self.f.with_color { 293 | write!(f, "{}", style.prefix())?; 294 | } 295 | 296 | if self.f.suppress_blank_empty && sign == ' ' && *line == "\n" { 297 | write!(f, "{}", line)?; 298 | } else { 299 | write!(f, "{}{}", sign, line)?; 300 | } 301 | 302 | if self.f.with_color { 303 | write!(f, "{}", style.suffix())?; 304 | } 305 | 306 | if !line.ends_with('\n') { 307 | writeln!(f)?; 308 | if self.f.with_missing_newline_message { 309 | writeln!(f, "{}", NO_NEWLINE_AT_EOF)?; 310 | } 311 | } 312 | 313 | Ok(()) 314 | } 315 | } 316 | -------------------------------------------------------------------------------- /src/patch/mod.rs: -------------------------------------------------------------------------------- 1 | mod format; 2 | mod parse; 3 | 4 | pub use format::PatchFormatter; 5 | pub use parse::ParsePatchError; 6 | 7 | use std::{borrow::Cow, fmt, ops}; 8 | 9 | const NO_NEWLINE_AT_EOF: &str = "\\ No newline at end of file"; 10 | 11 | /// Representation of all the differences between two files 12 | #[derive(PartialEq, Eq)] 13 | pub struct Patch<'a, T: ToOwned + ?Sized> { 14 | // TODO GNU patch is able to parse patches without filename headers. 15 | // This should be changed to an `Option` type to reflect this instead of setting this to "" 16 | // when they're missing 17 | original: Option>, 18 | modified: Option>, 19 | hunks: Vec>, 20 | } 21 | 22 | impl<'a, T: ToOwned + ?Sized> Patch<'a, T> { 23 | pub(crate) fn new( 24 | original: Option, 25 | modified: Option, 26 | hunks: Vec>, 27 | ) -> Self 28 | where 29 | O: Into>, 30 | M: Into>, 31 | { 32 | let original = original.map(|o| Filename(o.into())); 33 | let modified = modified.map(|m| Filename(m.into())); 34 | Self { 35 | original, 36 | modified, 37 | hunks, 38 | } 39 | } 40 | 41 | /// Return the name of the old file 42 | pub fn original(&self) -> Option<&T> { 43 | self.original.as_ref().map(AsRef::as_ref) 44 | } 45 | 46 | /// Return the name of the new file 47 | pub fn modified(&self) -> Option<&T> { 48 | self.modified.as_ref().map(AsRef::as_ref) 49 | } 50 | 51 | /// Returns the hunks in the patch 52 | pub fn hunks(&self) -> &[Hunk<'_, T>] { 53 | &self.hunks 54 | } 55 | 56 | pub fn reverse(&self) -> Patch<'_, T> { 57 | let hunks = self.hunks.iter().map(Hunk::reverse).collect(); 58 | Patch { 59 | original: self.modified.clone(), 60 | modified: self.original.clone(), 61 | hunks, 62 | } 63 | } 64 | } 65 | 66 | impl + ToOwned + ?Sized> Patch<'_, T> { 67 | /// Convert a `Patch` into bytes 68 | /// 69 | /// This is the equivalent of the `to_string` function but for 70 | /// potentially non-utf8 patches. 71 | pub fn to_bytes(&self) -> Vec { 72 | let mut bytes = Vec::new(); 73 | PatchFormatter::new() 74 | .write_patch_into(self, &mut bytes) 75 | .unwrap(); 76 | bytes 77 | } 78 | } 79 | 80 | impl<'a> Patch<'a, str> { 81 | /// Parse a `Patch` from a string 82 | /// 83 | /// ``` 84 | /// use diffy::Patch; 85 | /// 86 | /// let s = "\ 87 | /// --- a/ideals 88 | /// +++ b/ideals 89 | /// @@ -1,4 +1,6 @@ 90 | /// First: 91 | /// Life before death, 92 | /// strength before weakness, 93 | /// journey before destination. 94 | /// +Second: 95 | /// + I will protect those who cannot protect themselves. 96 | /// "; 97 | /// 98 | /// let patch = Patch::from_str(s).unwrap(); 99 | /// ``` 100 | #[allow(clippy::should_implement_trait)] 101 | pub fn from_str(s: &'a str) -> Result, ParsePatchError> { 102 | parse::parse(s) 103 | } 104 | } 105 | 106 | impl<'a> Patch<'a, [u8]> { 107 | /// Parse a `Patch` from bytes 108 | pub fn from_bytes(s: &'a [u8]) -> Result, ParsePatchError> { 109 | parse::parse_bytes(s) 110 | } 111 | } 112 | 113 | impl Clone for Patch<'_, T> { 114 | fn clone(&self) -> Self { 115 | Self { 116 | original: self.original.clone(), 117 | modified: self.modified.clone(), 118 | hunks: self.hunks.clone(), 119 | } 120 | } 121 | } 122 | 123 | impl fmt::Display for Patch<'_, str> { 124 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 125 | write!(f, "{}", PatchFormatter::new().fmt_patch(self)) 126 | } 127 | } 128 | 129 | impl fmt::Debug for Patch<'_, T> 130 | where 131 | T: ToOwned + fmt::Debug, 132 | O: std::borrow::Borrow + fmt::Debug, 133 | { 134 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 135 | f.debug_struct("Patch") 136 | .field("original", &self.original) 137 | .field("modified", &self.modified) 138 | .field("hunks", &self.hunks) 139 | .finish() 140 | } 141 | } 142 | 143 | #[derive(PartialEq, Eq)] 144 | struct Filename<'a, T: ToOwned + ?Sized>(Cow<'a, T>); 145 | 146 | const ESCAPED_CHARS: &[char] = &['\n', '\t', '\0', '\r', '\"', '\\']; 147 | #[allow(clippy::byte_char_slices)] 148 | const ESCAPED_CHARS_BYTES: &[u8] = &[b'\n', b'\t', b'\0', b'\r', b'\"', b'\\']; 149 | 150 | impl Filename<'_, str> { 151 | fn needs_to_be_escaped(&self) -> bool { 152 | self.0.contains(ESCAPED_CHARS) 153 | } 154 | } 155 | 156 | impl + ?Sized> Filename<'_, T> { 157 | fn needs_to_be_escaped_bytes(&self) -> bool { 158 | self.0 159 | .as_ref() 160 | .as_ref() 161 | .iter() 162 | .any(|b| ESCAPED_CHARS_BYTES.contains(b)) 163 | } 164 | 165 | fn write_into(&self, mut w: W) -> std::io::Result<()> { 166 | if self.needs_to_be_escaped_bytes() { 167 | w.write_all(b"\"")?; 168 | for b in self.0.as_ref().as_ref() { 169 | if ESCAPED_CHARS_BYTES.contains(b) { 170 | w.write_all(b"\\")?; 171 | } 172 | w.write_all(&[*b])?; 173 | } 174 | w.write_all(b"\"")?; 175 | } else { 176 | w.write_all(self.0.as_ref().as_ref())?; 177 | } 178 | 179 | Ok(()) 180 | } 181 | } 182 | 183 | impl AsRef for Filename<'_, T> { 184 | fn as_ref(&self) -> &T { 185 | &self.0 186 | } 187 | } 188 | 189 | impl ops::Deref for Filename<'_, T> { 190 | type Target = T; 191 | 192 | fn deref(&self) -> &Self::Target { 193 | &self.0 194 | } 195 | } 196 | 197 | impl Clone for Filename<'_, T> { 198 | fn clone(&self) -> Self { 199 | Self(self.0.clone()) 200 | } 201 | } 202 | 203 | impl fmt::Display for Filename<'_, str> { 204 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { 205 | use std::fmt::Write; 206 | if self.needs_to_be_escaped() { 207 | f.write_char('\"')?; 208 | for c in self.0.chars() { 209 | if ESCAPED_CHARS.contains(&c) { 210 | f.write_char('\\')?; 211 | } 212 | f.write_char(c)?; 213 | } 214 | f.write_char('\"')?; 215 | } else { 216 | f.write_str(&self.0)?; 217 | } 218 | 219 | Ok(()) 220 | } 221 | } 222 | 223 | impl fmt::Debug for Filename<'_, T> 224 | where 225 | T: ToOwned + fmt::Debug, 226 | O: std::borrow::Borrow + fmt::Debug, 227 | { 228 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 229 | f.debug_tuple("Filename").field(&self.0).finish() 230 | } 231 | } 232 | 233 | /// Represents a group of differing lines between two files 234 | #[derive(Debug, PartialEq, Eq)] 235 | pub struct Hunk<'a, T: ?Sized> { 236 | old_range: HunkRange, 237 | new_range: HunkRange, 238 | 239 | function_context: Option<&'a T>, 240 | 241 | lines: Vec>, 242 | } 243 | 244 | fn hunk_lines_count(lines: &[Line<'_, T>]) -> (usize, usize) { 245 | lines.iter().fold((0, 0), |count, line| match line { 246 | Line::Context(_) => (count.0 + 1, count.1 + 1), 247 | Line::Delete(_) => (count.0 + 1, count.1), 248 | Line::Insert(_) => (count.0, count.1 + 1), 249 | }) 250 | } 251 | 252 | impl<'a, T: ?Sized> Hunk<'a, T> { 253 | pub(crate) fn new( 254 | old_range: HunkRange, 255 | new_range: HunkRange, 256 | function_context: Option<&'a T>, 257 | lines: Vec>, 258 | ) -> Self { 259 | let (old_count, new_count) = hunk_lines_count(&lines); 260 | 261 | assert_eq!(old_range.len, old_count); 262 | assert_eq!(new_range.len, new_count); 263 | 264 | Self { 265 | old_range, 266 | new_range, 267 | function_context, 268 | lines, 269 | } 270 | } 271 | 272 | /// Returns the corresponding range for the old file in the hunk 273 | pub fn old_range(&self) -> HunkRange { 274 | self.old_range 275 | } 276 | 277 | /// Returns the corresponding range for the new file in the hunk 278 | pub fn new_range(&self) -> HunkRange { 279 | self.new_range 280 | } 281 | 282 | /// Returns the function context (if any) for the hunk 283 | pub fn function_context(&self) -> Option<&T> { 284 | self.function_context 285 | } 286 | 287 | /// Returns the lines in the hunk 288 | pub fn lines(&self) -> &[Line<'a, T>] { 289 | &self.lines 290 | } 291 | 292 | /// Creates a reverse patch for the hunk. This is equivalent to what 293 | /// XDL_PATCH_REVERSE would apply in libxdiff. 294 | pub fn reverse(&self) -> Self { 295 | let lines = self.lines.iter().map(Line::reverse).collect(); 296 | Self { 297 | old_range: self.new_range, 298 | new_range: self.old_range, 299 | function_context: self.function_context, 300 | lines, 301 | } 302 | } 303 | } 304 | 305 | impl Clone for Hunk<'_, T> { 306 | fn clone(&self) -> Self { 307 | Self { 308 | old_range: self.old_range, 309 | new_range: self.new_range, 310 | function_context: self.function_context, 311 | lines: self.lines.clone(), 312 | } 313 | } 314 | } 315 | 316 | /// The range of lines in a file for a particular `Hunk`. 317 | #[derive(Copy, Clone, Debug, PartialEq, Eq)] 318 | pub struct HunkRange { 319 | /// The starting line number of a hunk 320 | start: usize, 321 | /// The hunk size (number of lines) 322 | len: usize, 323 | } 324 | 325 | impl HunkRange { 326 | pub(crate) fn new(start: usize, len: usize) -> Self { 327 | Self { start, len } 328 | } 329 | 330 | /// Returns the range as a `ops::Range` 331 | pub fn range(&self) -> ops::Range { 332 | self.start..self.end() 333 | } 334 | 335 | /// Returns the starting line number of the range (inclusive) 336 | pub fn start(&self) -> usize { 337 | self.start 338 | } 339 | 340 | /// Returns the ending line number of the range (exclusive) 341 | pub fn end(&self) -> usize { 342 | self.start + self.len 343 | } 344 | 345 | /// Returns the number of lines in the range 346 | pub fn len(&self) -> usize { 347 | self.len 348 | } 349 | 350 | /// Returns `true` if the range is empty (has a length of `0`) 351 | pub fn is_empty(&self) -> bool { 352 | self.len == 0 353 | } 354 | } 355 | 356 | impl fmt::Display for HunkRange { 357 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 358 | write!(f, "{}", self.start)?; 359 | if self.len != 1 { 360 | write!(f, ",{}", self.len)?; 361 | } 362 | Ok(()) 363 | } 364 | } 365 | 366 | /// A line in either the old file, new file, or both. 367 | /// 368 | /// A `Line` contains the terminating newline character `\n` unless it is the final 369 | /// line in the file and the file does not end with a newline character. 370 | #[derive(Debug, PartialEq, Eq)] 371 | pub enum Line<'a, T: ?Sized> { 372 | /// A line providing context in the diff which is present in both the old and new file 373 | Context(&'a T), 374 | /// A line deleted from the old file 375 | Delete(&'a T), 376 | /// A line inserted to the new file 377 | Insert(&'a T), 378 | } 379 | 380 | impl Copy for Line<'_, T> {} 381 | 382 | impl Clone for Line<'_, T> { 383 | fn clone(&self) -> Self { 384 | *self 385 | } 386 | } 387 | 388 | impl Line<'_, T> { 389 | pub fn reverse(&self) -> Self { 390 | match self { 391 | Line::Context(s) => Line::Context(s), 392 | Line::Delete(s) => Line::Insert(s), 393 | Line::Insert(s) => Line::Delete(s), 394 | } 395 | } 396 | } 397 | -------------------------------------------------------------------------------- /src/patch/parse.rs: -------------------------------------------------------------------------------- 1 | //! Parse a Patch 2 | 3 | use super::{Hunk, HunkRange, Line, ESCAPED_CHARS_BYTES, NO_NEWLINE_AT_EOF}; 4 | use crate::{ 5 | patch::Patch, 6 | utils::{LineIter, Text}, 7 | }; 8 | use std::{borrow::Cow, fmt}; 9 | 10 | type Result = std::result::Result; 11 | 12 | /// An error returned when parsing a `Patch` using [`Patch::from_str`] fails 13 | /// 14 | /// [`Patch::from_str`]: struct.Patch.html#method.from_str 15 | // TODO use a custom error type instead of a Cow 16 | #[derive(Debug)] 17 | pub struct ParsePatchError(Cow<'static, str>); 18 | 19 | impl ParsePatchError { 20 | fn new>>(e: E) -> Self { 21 | Self(e.into()) 22 | } 23 | } 24 | 25 | impl fmt::Display for ParsePatchError { 26 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 27 | write!(f, "error parsing patch: {}", self.0) 28 | } 29 | } 30 | 31 | impl std::error::Error for ParsePatchError {} 32 | 33 | struct Parser<'a, T: Text + ?Sized> { 34 | lines: std::iter::Peekable>, 35 | } 36 | 37 | impl<'a, T: Text + ?Sized> Parser<'a, T> { 38 | fn new(input: &'a T) -> Self { 39 | Self { 40 | lines: LineIter::new(input).peekable(), 41 | } 42 | } 43 | 44 | fn peek(&mut self) -> Option<&&'a T> { 45 | self.lines.peek() 46 | } 47 | 48 | fn next(&mut self) -> Result<&'a T> { 49 | let line = self 50 | .lines 51 | .next() 52 | .ok_or_else(|| ParsePatchError::new("unexpected EOF"))?; 53 | Ok(line) 54 | } 55 | } 56 | 57 | pub fn parse(input: &str) -> Result> { 58 | let mut parser = Parser::new(input); 59 | let header = patch_header(&mut parser)?; 60 | let hunks = hunks(&mut parser)?; 61 | 62 | Ok(Patch::new( 63 | header.0.map(convert_cow_to_str), 64 | header.1.map(convert_cow_to_str), 65 | hunks, 66 | )) 67 | } 68 | 69 | pub fn parse_bytes(input: &[u8]) -> Result> { 70 | let mut parser = Parser::new(input); 71 | let header = patch_header(&mut parser)?; 72 | let hunks = hunks(&mut parser)?; 73 | 74 | Ok(Patch::new(header.0, header.1, hunks)) 75 | } 76 | 77 | // This is only used when the type originated as a utf8 string 78 | fn convert_cow_to_str(cow: Cow<'_, [u8]>) -> Cow<'_, str> { 79 | match cow { 80 | Cow::Borrowed(b) => std::str::from_utf8(b).unwrap().into(), 81 | Cow::Owned(o) => String::from_utf8(o).unwrap().into(), 82 | } 83 | } 84 | 85 | #[allow(clippy::type_complexity)] 86 | fn patch_header<'a, T: Text + ToOwned + ?Sized>( 87 | parser: &mut Parser<'a, T>, 88 | ) -> Result<(Option>, Option>)> { 89 | skip_header_preamble(parser)?; 90 | 91 | let mut filename1 = None; 92 | let mut filename2 = None; 93 | 94 | while let Some(line) = parser.peek() { 95 | if line.starts_with("--- ") { 96 | if filename1.is_some() { 97 | return Err(ParsePatchError::new("multiple '---' lines")); 98 | } 99 | filename1 = Some(parse_filename("--- ", parser.next()?)?); 100 | } else if line.starts_with("+++ ") { 101 | if filename2.is_some() { 102 | return Err(ParsePatchError::new("multiple '+++' lines")); 103 | } 104 | filename2 = Some(parse_filename("+++ ", parser.next()?)?); 105 | } else { 106 | break; 107 | } 108 | } 109 | 110 | Ok((filename1, filename2)) 111 | } 112 | 113 | // Skip to the first filename header ("--- " or "+++ ") or hunk line, 114 | // skipping any preamble lines like "diff --git", etc. 115 | fn skip_header_preamble(parser: &mut Parser<'_, T>) -> Result<()> { 116 | while let Some(line) = parser.peek() { 117 | if line.starts_with("--- ") | line.starts_with("+++ ") | line.starts_with("@@ ") { 118 | break; 119 | } 120 | parser.next()?; 121 | } 122 | 123 | Ok(()) 124 | } 125 | 126 | fn parse_filename<'a, T: Text + ToOwned + ?Sized>( 127 | prefix: &str, 128 | line: &'a T, 129 | ) -> Result> { 130 | let line = line 131 | .strip_prefix(prefix) 132 | .ok_or_else(|| ParsePatchError::new("unable to parse filename"))?; 133 | 134 | let filename = if let Some((filename, _)) = line.split_at_exclusive("\t") { 135 | filename 136 | } else if let Some((filename, _)) = line.split_at_exclusive("\n") { 137 | filename 138 | } else { 139 | return Err(ParsePatchError::new("filename unterminated")); 140 | }; 141 | 142 | let filename = if let Some(quoted) = is_quoted(filename) { 143 | escaped_filename(quoted)? 144 | } else { 145 | unescaped_filename(filename)? 146 | }; 147 | 148 | Ok(filename) 149 | } 150 | 151 | fn is_quoted(s: &T) -> Option<&T> { 152 | s.strip_prefix("\"").and_then(|s| s.strip_suffix("\"")) 153 | } 154 | 155 | fn unescaped_filename(filename: &T) -> Result> { 156 | let bytes = filename.as_bytes(); 157 | 158 | if bytes.iter().any(|b| ESCAPED_CHARS_BYTES.contains(b)) { 159 | return Err(ParsePatchError::new("invalid char in unquoted filename")); 160 | } 161 | 162 | Ok(bytes.into()) 163 | } 164 | 165 | fn escaped_filename(escaped: &T) -> Result> { 166 | let mut filename = Vec::new(); 167 | 168 | let mut chars = escaped.as_bytes().iter().copied(); 169 | while let Some(c) = chars.next() { 170 | if c == b'\\' { 171 | let ch = match chars 172 | .next() 173 | .ok_or_else(|| ParsePatchError::new("expected escaped character"))? 174 | { 175 | b'n' => b'\n', 176 | b't' => b'\t', 177 | b'0' => b'\0', 178 | b'r' => b'\r', 179 | b'\"' => b'\"', 180 | b'\\' => b'\\', 181 | _ => return Err(ParsePatchError::new("invalid escaped character")), 182 | }; 183 | filename.push(ch); 184 | } else if ESCAPED_CHARS_BYTES.contains(&c) { 185 | return Err(ParsePatchError::new("invalid unescaped character")); 186 | } else { 187 | filename.push(c); 188 | } 189 | } 190 | 191 | Ok(filename.into()) 192 | } 193 | 194 | fn verify_hunks_in_order(hunks: &[Hunk<'_, T>]) -> bool { 195 | for hunk in hunks.windows(2) { 196 | if hunk[0].old_range.end() > hunk[1].old_range.start() 197 | || hunk[0].new_range.end() > hunk[1].new_range.start() 198 | { 199 | return false; 200 | } 201 | } 202 | true 203 | } 204 | 205 | fn hunks<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result>> { 206 | let mut hunks = Vec::new(); 207 | while parser.peek().is_some() { 208 | hunks.push(hunk(parser)?); 209 | } 210 | 211 | // check and verify that the Hunks are in sorted order and don't overlap 212 | if !verify_hunks_in_order(&hunks) { 213 | return Err(ParsePatchError::new("Hunks not in order or overlap")); 214 | } 215 | 216 | Ok(hunks) 217 | } 218 | 219 | fn hunk<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result> { 220 | let (range1, range2, function_context) = hunk_header(parser.next()?)?; 221 | let lines = hunk_lines(parser)?; 222 | 223 | // check counts of lines to see if they match the ranges in the hunk header 224 | let (len1, len2) = super::hunk_lines_count(&lines); 225 | if len1 != range1.len || len2 != range2.len { 226 | return Err(ParsePatchError::new("Hunk header does not match hunk")); 227 | } 228 | 229 | Ok(Hunk::new(range1, range2, function_context, lines)) 230 | } 231 | 232 | fn hunk_header(input: &T) -> Result<(HunkRange, HunkRange, Option<&T>)> { 233 | let input = input 234 | .strip_prefix("@@ ") 235 | .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?; 236 | 237 | let (ranges, function_context) = input 238 | .split_at_exclusive(" @@") 239 | .ok_or_else(|| ParsePatchError::new("hunk header unterminated"))?; 240 | let function_context = function_context.strip_prefix(" "); 241 | 242 | let (range1, range2) = ranges 243 | .split_at_exclusive(" ") 244 | .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?; 245 | let range1 = range( 246 | range1 247 | .strip_prefix("-") 248 | .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?, 249 | )?; 250 | let range2 = range( 251 | range2 252 | .strip_prefix("+") 253 | .ok_or_else(|| ParsePatchError::new("unable to parse hunk header"))?, 254 | )?; 255 | Ok((range1, range2, function_context)) 256 | } 257 | 258 | fn range(s: &T) -> Result { 259 | let (start, len) = if let Some((start, len)) = s.split_at_exclusive(",") { 260 | ( 261 | start 262 | .parse() 263 | .ok_or_else(|| ParsePatchError::new("can't parse range"))?, 264 | len.parse() 265 | .ok_or_else(|| ParsePatchError::new("can't parse range"))?, 266 | ) 267 | } else { 268 | ( 269 | s.parse() 270 | .ok_or_else(|| ParsePatchError::new("can't parse range"))?, 271 | 1, 272 | ) 273 | }; 274 | 275 | Ok(HunkRange::new(start, len)) 276 | } 277 | 278 | fn hunk_lines<'a, T: Text + ?Sized>(parser: &mut Parser<'a, T>) -> Result>> { 279 | let mut lines: Vec> = Vec::new(); 280 | let mut no_newline_context = false; 281 | let mut no_newline_delete = false; 282 | let mut no_newline_insert = false; 283 | 284 | while let Some(line) = parser.peek() { 285 | let line = if line.starts_with("@") { 286 | break; 287 | } else if no_newline_context { 288 | return Err(ParsePatchError::new("expected end of hunk")); 289 | } else if let Some(line) = line.strip_prefix(" ") { 290 | Line::Context(line) 291 | } else if line.starts_with("\n") { 292 | Line::Context(*line) 293 | } else if let Some(line) = line.strip_prefix("-") { 294 | if no_newline_delete { 295 | return Err(ParsePatchError::new("expected no more deleted lines")); 296 | } 297 | Line::Delete(line) 298 | } else if let Some(line) = line.strip_prefix("+") { 299 | if no_newline_insert { 300 | return Err(ParsePatchError::new("expected no more inserted lines")); 301 | } 302 | Line::Insert(line) 303 | } else if line.starts_with(NO_NEWLINE_AT_EOF) { 304 | let last_line = lines.pop().ok_or_else(|| { 305 | ParsePatchError::new("unexpected 'No newline at end of file' line") 306 | })?; 307 | match last_line { 308 | Line::Context(line) => { 309 | no_newline_context = true; 310 | Line::Context(strip_newline(line)?) 311 | } 312 | Line::Delete(line) => { 313 | no_newline_delete = true; 314 | Line::Delete(strip_newline(line)?) 315 | } 316 | Line::Insert(line) => { 317 | no_newline_insert = true; 318 | Line::Insert(strip_newline(line)?) 319 | } 320 | } 321 | } else { 322 | return Err(ParsePatchError::new("unexpected line in hunk body")); 323 | }; 324 | 325 | lines.push(line); 326 | parser.next()?; 327 | } 328 | 329 | Ok(lines) 330 | } 331 | 332 | fn strip_newline(s: &T) -> Result<&T> { 333 | if let Some(stripped) = s.strip_suffix("\n") { 334 | Ok(stripped) 335 | } else { 336 | Err(ParsePatchError::new("missing newline")) 337 | } 338 | } 339 | 340 | #[cfg(test)] 341 | mod tests { 342 | use super::{parse, parse_bytes}; 343 | 344 | #[test] 345 | fn test_escaped_filenames() { 346 | // No escaped characters 347 | let s = "\ 348 | --- original 349 | +++ modified 350 | @@ -1,0 +1,1 @@ 351 | +Oathbringer 352 | "; 353 | parse(s).unwrap(); 354 | parse_bytes(s.as_ref()).unwrap(); 355 | 356 | // unescaped characters fail parsing 357 | let s = "\ 358 | --- ori\"ginal 359 | +++ modified 360 | @@ -1,0 +1,1 @@ 361 | +Oathbringer 362 | "; 363 | parse(s).unwrap_err(); 364 | parse_bytes(s.as_ref()).unwrap_err(); 365 | 366 | // quoted with invalid escaped characters 367 | let s = "\ 368 | --- \"ori\\\"g\rinal\" 369 | +++ modified 370 | @@ -1,0 +1,1 @@ 371 | +Oathbringer 372 | "; 373 | parse(s).unwrap_err(); 374 | parse_bytes(s.as_ref()).unwrap_err(); 375 | 376 | // quoted with escaped characters 377 | let s = r#"\ 378 | --- "ori\"g\tinal" 379 | +++ "mo\0\t\r\n\\dified" 380 | @@ -1,0 +1,1 @@ 381 | +Oathbringer 382 | "#; 383 | let p = parse(s).unwrap(); 384 | assert_eq!(p.original(), Some("ori\"g\tinal")); 385 | assert_eq!(p.modified(), Some("mo\0\t\r\n\\dified")); 386 | let b = parse_bytes(s.as_ref()).unwrap(); 387 | assert_eq!(b.original(), Some(&b"ori\"g\tinal"[..])); 388 | assert_eq!(b.modified(), Some(&b"mo\0\t\r\n\\dified"[..])); 389 | } 390 | 391 | #[test] 392 | fn test_missing_filename_header() { 393 | // Missing Both '---' and '+++' lines 394 | let patch = r#" 395 | @@ -1,11 +1,12 @@ 396 | diesel::table! { 397 | users1 (id) { 398 | - id -> Nullable, 399 | + id -> Integer, 400 | } 401 | } 402 | 403 | diesel::table! { 404 | - users2 (id) { 405 | - id -> Nullable, 406 | + users2 (myid) { 407 | + #[sql_name = "id"] 408 | + myid -> Integer, 409 | } 410 | } 411 | "#; 412 | 413 | parse(patch).unwrap(); 414 | 415 | // Missing '---' 416 | let s = "\ 417 | +++ modified 418 | @@ -1,0 +1,1 @@ 419 | +Oathbringer 420 | "; 421 | parse(s).unwrap(); 422 | 423 | // Missing '+++' 424 | let s = "\ 425 | --- original 426 | @@ -1,0 +1,1 @@ 427 | +Oathbringer 428 | "; 429 | parse(s).unwrap(); 430 | 431 | // Headers out of order 432 | let s = "\ 433 | +++ modified 434 | --- original 435 | @@ -1,0 +1,1 @@ 436 | +Oathbringer 437 | "; 438 | parse(s).unwrap(); 439 | 440 | // multiple headers should fail to parse 441 | let s = "\ 442 | --- original 443 | --- modified 444 | @@ -1,0 +1,1 @@ 445 | +Oathbringer 446 | "; 447 | parse(s).unwrap_err(); 448 | } 449 | 450 | #[test] 451 | fn adjacent_hunks_correctly_parse() { 452 | let s = "\ 453 | --- original 454 | +++ modified 455 | @@ -110,7 +110,7 @@ 456 | -- 457 | 458 | I am afraid, however, that all I have known - that my story - will be forgotten. 459 | I am afraid for the world that is to come. 460 | -Afraid that my plans will fail. Afraid of a doom worse than the Deepness. 461 | +Afraid that Alendi will fail. Afraid of a doom brought by the Deepness. 462 | 463 | Alendi was never the Hero of Ages. 464 | @@ -117,7 +117,7 @@ 465 | At best, I have amplified his virtues, creating a Hero where there was none. 466 | 467 | -At worst, I fear that all we believe may have been corrupted. 468 | +At worst, I fear that I have corrupted all we believe. 469 | 470 | -- 471 | Alendi must not reach the Well of Ascension. He must not take the power for himself. 472 | 473 | "; 474 | parse(s).unwrap(); 475 | } 476 | } 477 | -------------------------------------------------------------------------------- /src/range.rs: -------------------------------------------------------------------------------- 1 | use std::{cmp, fmt::Debug, ops}; 2 | 3 | // Range type inspired by the Range type used in [dissimilar](https://docs.rs/dissimilar) 4 | #[derive(Debug)] 5 | pub struct Range<'a, T: ?Sized> { 6 | inner: &'a T, 7 | offset: usize, 8 | len: usize, 9 | } 10 | 11 | impl Copy for Range<'_, T> {} 12 | 13 | impl Clone for Range<'_, T> { 14 | fn clone(&self) -> Self { 15 | *self 16 | } 17 | } 18 | 19 | impl<'a, T: ?Sized> Range<'a, T> { 20 | pub fn is_empty(&self) -> bool { 21 | self.len == 0 22 | } 23 | 24 | pub fn inner(&self) -> &'a T { 25 | self.inner 26 | } 27 | 28 | pub fn len(&self) -> usize { 29 | self.len 30 | } 31 | 32 | pub fn offset(&self) -> usize { 33 | self.offset 34 | } 35 | 36 | #[allow(dead_code)] 37 | pub fn range(&self) -> ops::Range { 38 | self.offset..self.offset + self.len 39 | } 40 | 41 | pub fn grow_up(&mut self, adjust: usize) { 42 | self.offset -= adjust; 43 | self.len += adjust; 44 | } 45 | 46 | pub fn grow_down(&mut self, adjust: usize) { 47 | self.len += adjust; 48 | } 49 | 50 | pub fn shrink_front(&mut self, adjust: usize) { 51 | self.offset += adjust; 52 | self.len -= adjust; 53 | } 54 | 55 | pub fn shrink_back(&mut self, adjust: usize) { 56 | self.len -= adjust; 57 | } 58 | 59 | pub fn shift_up(&mut self, adjust: usize) { 60 | self.offset -= adjust 61 | } 62 | 63 | pub fn shift_down(&mut self, adjust: usize) { 64 | self.offset += adjust; 65 | } 66 | 67 | pub fn slice(&self, bounds: impl RangeBounds) -> Self { 68 | let (offset, len) = bounds.index(self.len); 69 | Range { 70 | inner: self.inner, 71 | offset: self.offset + offset, 72 | len, 73 | } 74 | } 75 | 76 | pub fn get(&self, bounds: impl RangeBounds) -> Option { 77 | let (offset, len) = bounds.try_index(self.len)?; 78 | Some(Range { 79 | inner: self.inner, 80 | offset: self.offset + offset, 81 | len, 82 | }) 83 | } 84 | 85 | pub fn split_at(&self, mid: usize) -> (Self, Self) { 86 | (self.slice(..mid), self.slice(mid..)) 87 | } 88 | } 89 | 90 | impl<'a, T> Range<'a, T> 91 | where 92 | T: ?Sized + SliceLike, 93 | { 94 | pub fn new(inner: &'a T, bounds: impl RangeBounds) -> Self { 95 | let (offset, len) = bounds.index(inner.len()); 96 | Range { inner, offset, len } 97 | } 98 | 99 | #[allow(dead_code)] 100 | pub fn empty() -> Range<'a, T> { 101 | Range { 102 | inner: T::empty(), 103 | offset: 0, 104 | len: 0, 105 | } 106 | } 107 | 108 | pub fn as_slice(&self) -> &'a T { 109 | self.inner.as_slice(self.offset..self.offset + self.len) 110 | } 111 | 112 | pub fn common_prefix_len(&self, other: Range<'_, T>) -> usize { 113 | self.as_slice().common_prefix_len(other.as_slice()) 114 | } 115 | 116 | pub fn common_suffix_len(&self, other: Range<'_, T>) -> usize { 117 | self.as_slice().common_suffix_len(other.as_slice()) 118 | } 119 | 120 | #[allow(dead_code)] 121 | pub fn common_overlap_len(&self, other: Range<'_, T>) -> usize { 122 | self.as_slice().common_overlap_len(other.as_slice()) 123 | } 124 | 125 | #[allow(dead_code)] 126 | pub fn starts_with(&self, prefix: Range<'_, T>) -> bool { 127 | self.as_slice().starts_with(prefix.as_slice()) 128 | } 129 | 130 | #[allow(dead_code)] 131 | pub fn ends_with(&self, suffix: Range<'_, T>) -> bool { 132 | self.as_slice().ends_with(suffix.as_slice()) 133 | } 134 | } 135 | 136 | pub trait RangeBounds: Sized + Clone + Debug { 137 | // Returns (offset, len). 138 | fn try_index(self, len: usize) -> Option<(usize, usize)>; 139 | 140 | fn index(self, len: usize) -> (usize, usize) { 141 | match self.clone().try_index(len) { 142 | Some(range) => range, 143 | None => panic!("index out of range, index={:?}, len={}", self, len), 144 | } 145 | } 146 | } 147 | 148 | impl RangeBounds for ops::Range { 149 | fn try_index(self, len: usize) -> Option<(usize, usize)> { 150 | if self.start <= self.end && self.end <= len { 151 | Some((self.start, self.end - self.start)) 152 | } else { 153 | None 154 | } 155 | } 156 | } 157 | 158 | impl RangeBounds for ops::RangeFrom { 159 | fn try_index(self, len: usize) -> Option<(usize, usize)> { 160 | if self.start <= len { 161 | Some((self.start, len - self.start)) 162 | } else { 163 | None 164 | } 165 | } 166 | } 167 | 168 | impl RangeBounds for ops::RangeTo { 169 | fn try_index(self, len: usize) -> Option<(usize, usize)> { 170 | if self.end <= len { 171 | Some((0, self.end)) 172 | } else { 173 | None 174 | } 175 | } 176 | } 177 | 178 | impl RangeBounds for ops::RangeFull { 179 | fn try_index(self, len: usize) -> Option<(usize, usize)> { 180 | Some((0, len)) 181 | } 182 | } 183 | 184 | pub trait SliceLike: ops::Index> { 185 | fn len(&self) -> usize; 186 | fn empty<'a>() -> &'a Self; 187 | fn as_slice(&self, range: ops::Range) -> &Self; 188 | fn common_prefix_len(&self, other: &Self) -> usize; 189 | fn common_suffix_len(&self, other: &Self) -> usize; 190 | fn common_overlap_len(&self, other: &Self) -> usize; 191 | fn starts_with(&self, prefix: &Self) -> bool; 192 | fn ends_with(&self, suffix: &Self) -> bool; 193 | } 194 | 195 | impl SliceLike for str { 196 | fn len(&self) -> usize { 197 | self.len() 198 | } 199 | 200 | fn empty<'a>() -> &'a str { 201 | "" 202 | } 203 | 204 | fn as_slice(&self, range: ops::Range) -> &str { 205 | &self[range] 206 | } 207 | 208 | fn common_prefix_len(&self, other: &str) -> usize { 209 | for ((i, ch1), ch2) in self.char_indices().zip(other.chars()) { 210 | if ch1 != ch2 { 211 | return i; 212 | } 213 | } 214 | cmp::min(self.len(), other.len()) 215 | } 216 | 217 | fn common_suffix_len(&self, other: &str) -> usize { 218 | for ((i, ch1), ch2) in self.char_indices().rev().zip(other.chars().rev()) { 219 | if ch1 != ch2 { 220 | return self.len() - i - ch1.len_utf8(); 221 | } 222 | } 223 | cmp::min(self.len(), other.len()) 224 | } 225 | 226 | // returns length of overlap of prefix of `self` with suffic of `other` 227 | fn common_overlap_len(&self, mut other: &str) -> usize { 228 | let mut this = self; 229 | // Eliminate the null case 230 | if this.is_empty() || other.is_empty() { 231 | return 0; 232 | } 233 | 234 | match this.len().cmp(&other.len()) { 235 | cmp::Ordering::Greater => { 236 | let mut end = other.len(); 237 | while !this.is_char_boundary(end) { 238 | end -= 1; 239 | } 240 | 241 | this = &this[..end]; 242 | } 243 | cmp::Ordering::Less => { 244 | let mut start = other.len() - this.len(); 245 | while !other.is_char_boundary(start) { 246 | start += 1; 247 | } 248 | 249 | other = &other[start..] 250 | } 251 | cmp::Ordering::Equal => {} 252 | } 253 | 254 | // Quick check for the worst case. 255 | if this == other { 256 | return this.len(); 257 | } 258 | 259 | // Start by looking for a single character match 260 | // and increase length until no match is found. 261 | // Performance analysis: https://neil.fraser.name/news/2010/11/04/ 262 | let mut best = 0; 263 | let mut length = 0; 264 | for (i, c) in other.char_indices().rev() { 265 | let pattern = &other[i..]; 266 | let found = match this.find(pattern) { 267 | Some(found) => found, 268 | None => return best, 269 | }; 270 | 271 | length += c.len_utf8(); 272 | if found == 0 { 273 | best = length; 274 | } 275 | } 276 | 277 | best 278 | } 279 | 280 | fn starts_with(&self, prefix: &str) -> bool { 281 | self.starts_with(prefix) 282 | } 283 | 284 | fn ends_with(&self, suffix: &str) -> bool { 285 | self.ends_with(suffix) 286 | } 287 | } 288 | 289 | impl SliceLike for [T] 290 | where 291 | T: PartialEq, 292 | { 293 | fn len(&self) -> usize { 294 | self.len() 295 | } 296 | 297 | fn empty<'a>() -> &'a [T] { 298 | &[] 299 | } 300 | 301 | fn as_slice(&self, range: ops::Range) -> &[T] { 302 | &self[range] 303 | } 304 | 305 | fn common_prefix_len(&self, other: &[T]) -> usize { 306 | for (i, (item1, item2)) in self.iter().zip(other.iter()).enumerate() { 307 | if item1 != item2 { 308 | return i; 309 | } 310 | } 311 | cmp::min(self.len(), other.len()) 312 | } 313 | 314 | fn common_suffix_len(&self, other: &[T]) -> usize { 315 | for (i, (item1, item2)) in self.iter().rev().zip(other.iter().rev()).enumerate() { 316 | if item1 != item2 { 317 | return i; 318 | } 319 | } 320 | cmp::min(self.len(), other.len()) 321 | } 322 | 323 | // returns length of overlap of prefix of `self` with suffic of `other` 324 | //TODO make a more efficient solution 325 | fn common_overlap_len(&self, other: &[T]) -> usize { 326 | let mut len = cmp::min(self.len(), other.len()); 327 | 328 | while len > 0 { 329 | if self[..len] == other[other.len() - len..] { 330 | break; 331 | } 332 | len -= 1; 333 | } 334 | 335 | len 336 | } 337 | 338 | fn starts_with(&self, prefix: &Self) -> bool { 339 | self.starts_with(prefix) 340 | } 341 | 342 | fn ends_with(&self, suffix: &Self) -> bool { 343 | self.ends_with(suffix) 344 | } 345 | } 346 | 347 | #[derive(Debug)] 348 | pub enum DiffRange<'a, 'b, T: ?Sized> { 349 | Equal(Range<'a, T>, Range<'b, T>), 350 | Delete(Range<'a, T>), 351 | Insert(Range<'b, T>), 352 | } 353 | 354 | impl Copy for DiffRange<'_, '_, T> {} 355 | 356 | impl Clone for DiffRange<'_, '_, T> { 357 | fn clone(&self) -> Self { 358 | *self 359 | } 360 | } 361 | 362 | impl<'tmp, 'a: 'tmp, 'b: 'tmp, T> DiffRange<'a, 'b, T> 363 | where 364 | T: ?Sized + SliceLike, 365 | { 366 | pub fn inner(&self) -> Range<'tmp, T> { 367 | match *self { 368 | DiffRange::Equal(range, _) | DiffRange::Delete(range) | DiffRange::Insert(range) => { 369 | range 370 | } 371 | } 372 | } 373 | 374 | pub fn is_empty(&self) -> bool { 375 | self.inner().is_empty() 376 | } 377 | 378 | pub fn len(&self) -> usize { 379 | self.inner().len() 380 | } 381 | 382 | pub fn grow_up(&mut self, adjust: usize) { 383 | self.for_each(|range| range.grow_up(adjust)); 384 | } 385 | 386 | pub fn grow_down(&mut self, adjust: usize) { 387 | self.for_each(|range| range.grow_down(adjust)); 388 | } 389 | 390 | pub fn shrink_front(&mut self, adjust: usize) { 391 | self.for_each(|range| range.shrink_front(adjust)); 392 | } 393 | 394 | pub fn shrink_back(&mut self, adjust: usize) { 395 | self.for_each(|range| range.shrink_back(adjust)); 396 | } 397 | 398 | pub fn shift_up(&mut self, adjust: usize) { 399 | self.for_each(|range| range.shift_up(adjust)); 400 | } 401 | 402 | pub fn shift_down(&mut self, adjust: usize) { 403 | self.for_each(|range| range.shift_down(adjust)); 404 | } 405 | 406 | fn for_each(&mut self, f: impl Fn(&mut Range<'_, T>)) { 407 | match self { 408 | DiffRange::Equal(range1, range2) => { 409 | f(range1); 410 | f(range2); 411 | } 412 | DiffRange::Delete(range) => f(range), 413 | DiffRange::Insert(range) => f(range), 414 | } 415 | } 416 | } 417 | 418 | impl<'a, 'b> DiffRange<'a, 'b, [u8]> { 419 | pub fn to_str(self, text1: &'a str, text2: &'b str) -> DiffRange<'a, 'b, str> { 420 | fn boundary_down(text: &str, pos: usize) -> usize { 421 | let mut adjust = 0; 422 | while !text.is_char_boundary(pos - adjust) { 423 | adjust += 1; 424 | } 425 | adjust 426 | } 427 | 428 | fn boundary_up(text: &str, pos: usize) -> usize { 429 | let mut adjust = 0; 430 | while !text.is_char_boundary(pos + adjust) { 431 | adjust += 1; 432 | } 433 | adjust 434 | } 435 | 436 | match self { 437 | DiffRange::Equal(range1, range2) => { 438 | debug_assert_eq!(range1.inner().as_ptr(), text1.as_ptr()); 439 | debug_assert_eq!(range2.inner().as_ptr(), text2.as_ptr()); 440 | let mut offset1 = range1.offset(); 441 | let mut len1 = range1.len(); 442 | let mut offset2 = range2.offset(); 443 | let mut len2 = range2.len(); 444 | 445 | let adjust = boundary_up(text1, offset1); 446 | offset1 += adjust; 447 | len1 -= adjust; 448 | offset2 += adjust; 449 | len2 -= adjust; 450 | let adjust = boundary_down(text1, offset1 + len1); 451 | len1 -= adjust; 452 | len2 -= adjust; 453 | 454 | DiffRange::Equal( 455 | Range::new(text1, offset1..offset1 + len1), 456 | Range::new(text2, offset2..offset2 + len2), 457 | ) 458 | } 459 | DiffRange::Delete(range) => { 460 | debug_assert_eq!(range.inner().as_ptr(), text1.as_ptr()); 461 | let mut offset = range.offset(); 462 | let mut len = range.len(); 463 | let adjust = boundary_down(text1, offset); 464 | offset -= adjust; 465 | len += adjust; 466 | let adjust = boundary_up(text1, offset + len); 467 | len += adjust; 468 | DiffRange::Delete(Range::new(text1, offset..offset + len)) 469 | } 470 | DiffRange::Insert(range) => { 471 | debug_assert_eq!(range.inner().as_ptr(), text2.as_ptr()); 472 | let mut offset = range.offset(); 473 | let mut len = range.len(); 474 | let adjust = boundary_down(text2, offset); 475 | offset -= adjust; 476 | len += adjust; 477 | let adjust = boundary_up(text2, offset + len); 478 | len += adjust; 479 | DiffRange::Insert(Range::new(text2, offset..offset + len)) 480 | } 481 | } 482 | } 483 | } 484 | 485 | #[cfg(test)] 486 | mod tests { 487 | use super::*; 488 | 489 | #[test] 490 | fn test_common_prefix() { 491 | let text1 = Range::new("abc", ..); 492 | let text2 = Range::new("xyz", ..); 493 | assert_eq!(0, text1.common_prefix_len(text2), "Null case"); 494 | let text1 = Range::new(b"abc".as_ref(), ..); 495 | let text2 = Range::new(b"xyz".as_ref(), ..); 496 | assert_eq!(0, text1.common_prefix_len(text2), "Null case"); 497 | 498 | let text1 = Range::new("1234abcdef", ..); 499 | let text2 = Range::new("1234xyz", ..); 500 | assert_eq!(4, text1.common_prefix_len(text2), "Non-null case"); 501 | let text1 = Range::new(b"1234abcdef".as_ref(), ..); 502 | let text2 = Range::new(b"1234xyz".as_ref(), ..); 503 | assert_eq!(4, text1.common_prefix_len(text2), "Non-null case"); 504 | 505 | let text1 = Range::new("1234", ..); 506 | let text2 = Range::new("1234xyz", ..); 507 | assert_eq!(4, text1.common_prefix_len(text2), "Whole case"); 508 | 509 | let text1 = Range::new(b"1234".as_ref(), ..); 510 | let text2 = Range::new(b"1234xyz".as_ref(), ..); 511 | assert_eq!(4, text1.common_prefix_len(text2), "Whole case"); 512 | 513 | let snowman = "\u{2603}"; 514 | let comet = "\u{2604}"; 515 | let text1 = Range::new(snowman, ..); 516 | let text2 = Range::new(comet, ..); 517 | assert_eq!(0, text1.common_prefix_len(text2), "Unicode case"); 518 | let text1 = Range::new(snowman.as_bytes(), ..); 519 | let text2 = Range::new(comet.as_bytes(), ..); 520 | assert_eq!(2, text1.common_prefix_len(text2), "Unicode case"); 521 | } 522 | 523 | #[test] 524 | fn test_common_suffix() { 525 | let text1 = Range::new("abc", ..); 526 | let text2 = Range::new("xyz", ..); 527 | assert_eq!(0, text1.common_suffix_len(text2), "Null case"); 528 | let text1 = Range::new(b"abc".as_ref(), ..); 529 | let text2 = Range::new(b"xyz".as_ref(), ..); 530 | assert_eq!(0, text1.common_suffix_len(text2), "Null case"); 531 | 532 | let text1 = Range::new("abcdef1234", ..); 533 | let text2 = Range::new("xyz1234", ..); 534 | assert_eq!(4, text1.common_suffix_len(text2), "Non-null case"); 535 | let text1 = Range::new(b"abcdef1234".as_ref(), ..); 536 | let text2 = Range::new(b"xyz1234".as_ref(), ..); 537 | assert_eq!(4, text1.common_suffix_len(text2), "Non-null case"); 538 | 539 | let text1 = Range::new("1234", ..); 540 | let text2 = Range::new("xyz1234", ..); 541 | assert_eq!(4, text1.common_suffix_len(text2), "Whole case"); 542 | let text1 = Range::new(b"1234".as_ref(), ..); 543 | let text2 = Range::new(b"xyz1234".as_ref(), ..); 544 | assert_eq!(4, text1.common_suffix_len(text2), "Whole case"); 545 | } 546 | 547 | #[test] 548 | fn test_common_overlap() { 549 | let text1 = Range::empty(); 550 | let text2 = Range::new("abcd", ..); 551 | assert_eq!(0, text1.common_overlap_len(text2), "Null case"); 552 | let text1 = Range::empty(); 553 | let text2 = Range::new(b"abcd".as_ref(), ..); 554 | assert_eq!(0, text1.common_overlap_len(text2), "Null case"); 555 | 556 | let text1 = Range::new("abcd", ..); 557 | let text2 = Range::new("abc", ..); 558 | assert_eq!(3, text1.common_overlap_len(text2), "Whole case"); 559 | let text1 = Range::new(b"abcd".as_ref(), ..); 560 | let text2 = Range::new(b"abc".as_ref(), ..); 561 | assert_eq!(3, text1.common_overlap_len(text2), "Whole case"); 562 | 563 | let text1 = Range::new("123456", ..); 564 | let text2 = Range::new("abcd", ..); 565 | assert_eq!(0, text1.common_overlap_len(text2), "No overlap"); 566 | let text1 = Range::new(b"123456".as_ref(), ..); 567 | let text2 = Range::new(b"abcd".as_ref(), ..); 568 | assert_eq!(0, text1.common_overlap_len(text2), "No overlap"); 569 | 570 | let text1 = Range::new("xxxabcd", ..); 571 | let text2 = Range::new("123456xxx", ..); 572 | assert_eq!(3, text1.common_overlap_len(text2), "Overlap"); 573 | let text1 = Range::new(b"xxxabcd".as_ref(), ..); 574 | let text2 = Range::new(b"123456xxx".as_ref(), ..); 575 | assert_eq!(3, text1.common_overlap_len(text2), "Overlap"); 576 | 577 | // Some overly clever languages (C#) may treat ligatures as equal to their 578 | // component letters. E.g. U+FB01 == 'fi' 579 | let text1 = Range::new("fi", ..); 580 | let text2 = Range::new("\u{fb01}i", ..); 581 | assert_eq!(0, text1.common_overlap_len(text2), "Unicode"); 582 | } 583 | } 584 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | //! Common utilities 2 | 3 | use std::{ 4 | collections::{hash_map::Entry, HashMap}, 5 | hash::Hash, 6 | }; 7 | 8 | /// Classifies lines, converting lines into unique `u64`s for quicker comparison 9 | pub struct Classifier<'a, T: ?Sized> { 10 | next_id: u64, 11 | unique_ids: HashMap<&'a T, u64>, 12 | } 13 | 14 | impl<'a, T: ?Sized + Eq + Hash> Classifier<'a, T> { 15 | fn classify(&mut self, record: &'a T) -> u64 { 16 | match self.unique_ids.entry(record) { 17 | Entry::Occupied(o) => *o.get(), 18 | Entry::Vacant(v) => { 19 | let id = self.next_id; 20 | self.next_id += 1; 21 | *v.insert(id) 22 | } 23 | } 24 | } 25 | } 26 | 27 | impl<'a, T: ?Sized + Text> Classifier<'a, T> { 28 | pub fn classify_lines(&mut self, text: &'a T) -> (Vec<&'a T>, Vec) { 29 | LineIter::new(text) 30 | .map(|line| (line, self.classify(line))) 31 | .unzip() 32 | } 33 | } 34 | 35 | impl Default for Classifier<'_, T> { 36 | fn default() -> Self { 37 | Self { 38 | next_id: 0, 39 | unique_ids: HashMap::default(), 40 | } 41 | } 42 | } 43 | 44 | /// Iterator over the lines of a string, including the `\n` character. 45 | pub struct LineIter<'a, T: ?Sized>(&'a T); 46 | 47 | impl<'a, T: ?Sized> LineIter<'a, T> { 48 | pub fn new(text: &'a T) -> Self { 49 | Self(text) 50 | } 51 | } 52 | 53 | impl<'a, T: Text + ?Sized> Iterator for LineIter<'a, T> { 54 | type Item = &'a T; 55 | 56 | fn next(&mut self) -> Option { 57 | if self.0.is_empty() { 58 | return None; 59 | } 60 | 61 | let end = if let Some(idx) = self.0.find("\n") { 62 | idx + 1 63 | } else { 64 | self.0.len() 65 | }; 66 | 67 | let (line, remaining) = self.0.split_at(end); 68 | self.0 = remaining; 69 | Some(line) 70 | } 71 | } 72 | 73 | /// A helper trait for processing text like `str` and `[u8]` 74 | /// Useful for abstracting over those types for parsing as well as breaking input into lines 75 | pub trait Text: Eq + Hash { 76 | fn is_empty(&self) -> bool; 77 | fn len(&self) -> usize; 78 | fn starts_with(&self, prefix: &str) -> bool; 79 | #[allow(unused)] 80 | fn ends_with(&self, suffix: &str) -> bool; 81 | fn strip_prefix(&self, prefix: &str) -> Option<&Self>; 82 | fn strip_suffix(&self, suffix: &str) -> Option<&Self>; 83 | fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)>; 84 | fn find(&self, needle: &str) -> Option; 85 | fn split_at(&self, mid: usize) -> (&Self, &Self); 86 | fn as_str(&self) -> Option<&str>; 87 | fn as_bytes(&self) -> &[u8]; 88 | #[allow(unused)] 89 | fn lines(&self) -> LineIter; 90 | 91 | fn parse(&self) -> Option { 92 | self.as_str().and_then(|s| s.parse().ok()) 93 | } 94 | } 95 | 96 | impl Text for str { 97 | fn is_empty(&self) -> bool { 98 | self.is_empty() 99 | } 100 | 101 | fn len(&self) -> usize { 102 | self.len() 103 | } 104 | 105 | fn starts_with(&self, prefix: &str) -> bool { 106 | self.starts_with(prefix) 107 | } 108 | 109 | fn ends_with(&self, suffix: &str) -> bool { 110 | self.ends_with(suffix) 111 | } 112 | 113 | fn strip_prefix(&self, prefix: &str) -> Option<&Self> { 114 | self.strip_prefix(prefix) 115 | } 116 | 117 | fn strip_suffix(&self, suffix: &str) -> Option<&Self> { 118 | self.strip_suffix(suffix) 119 | } 120 | 121 | fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> { 122 | self.find(needle) 123 | .map(|idx| (&self[..idx], &self[idx + needle.len()..])) 124 | } 125 | 126 | fn find(&self, needle: &str) -> Option { 127 | self.find(needle) 128 | } 129 | 130 | fn split_at(&self, mid: usize) -> (&Self, &Self) { 131 | self.split_at(mid) 132 | } 133 | 134 | fn as_str(&self) -> Option<&str> { 135 | Some(self) 136 | } 137 | 138 | fn as_bytes(&self) -> &[u8] { 139 | self.as_bytes() 140 | } 141 | 142 | fn lines(&self) -> LineIter { 143 | LineIter::new(self) 144 | } 145 | } 146 | 147 | impl Text for [u8] { 148 | fn is_empty(&self) -> bool { 149 | self.is_empty() 150 | } 151 | 152 | fn len(&self) -> usize { 153 | self.len() 154 | } 155 | 156 | fn starts_with(&self, prefix: &str) -> bool { 157 | self.starts_with(prefix.as_bytes()) 158 | } 159 | 160 | fn ends_with(&self, suffix: &str) -> bool { 161 | self.ends_with(suffix.as_bytes()) 162 | } 163 | 164 | fn strip_prefix(&self, prefix: &str) -> Option<&Self> { 165 | self.strip_prefix(prefix.as_bytes()) 166 | } 167 | 168 | fn strip_suffix(&self, suffix: &str) -> Option<&Self> { 169 | self.strip_suffix(suffix.as_bytes()) 170 | } 171 | 172 | fn split_at_exclusive(&self, needle: &str) -> Option<(&Self, &Self)> { 173 | find_bytes(self, needle.as_bytes()).map(|idx| (&self[..idx], &self[idx + needle.len()..])) 174 | } 175 | 176 | fn find(&self, needle: &str) -> Option { 177 | find_bytes(self, needle.as_bytes()) 178 | } 179 | 180 | fn split_at(&self, mid: usize) -> (&Self, &Self) { 181 | self.split_at(mid) 182 | } 183 | 184 | fn as_str(&self) -> Option<&str> { 185 | std::str::from_utf8(self).ok() 186 | } 187 | 188 | fn as_bytes(&self) -> &[u8] { 189 | self 190 | } 191 | 192 | fn lines(&self) -> LineIter { 193 | LineIter::new(self) 194 | } 195 | } 196 | 197 | fn find_bytes(haystack: &[u8], needle: &[u8]) -> Option { 198 | match needle.len() { 199 | 0 => Some(0), 200 | 1 => find_byte(haystack, needle[0]), 201 | len if len > haystack.len() => None, 202 | needle_len => { 203 | let mut offset = 0; 204 | let mut haystack = haystack; 205 | 206 | while let Some(position) = find_byte(haystack, needle[0]) { 207 | offset += position; 208 | 209 | if let Some(haystack) = haystack.get(position..position + needle_len) { 210 | if haystack == needle { 211 | return Some(offset); 212 | } 213 | } else { 214 | return None; 215 | } 216 | 217 | haystack = &haystack[position + 1..]; 218 | offset += 1; 219 | } 220 | 221 | None 222 | } 223 | } 224 | } 225 | 226 | // XXX Maybe use `memchr`? 227 | fn find_byte(haystack: &[u8], byte: u8) -> Option { 228 | haystack.iter().position(|&b| b == byte) 229 | } 230 | --------------------------------------------------------------------------------