├── .gitattributes ├── .gitignore ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE.txt ├── README.md ├── assets ├── diffr.1.md ├── h.txt └── help.txt ├── azure-pipelines.yml ├── ci ├── azure-install-rust.yml ├── azure-runtests.yml ├── azure-rustfmt.yml └── azure_integration_test.yml ├── screenshots ├── example_cross_lines_common_tokens.png ├── example_nonconsecutive.png ├── example_simple.png └── example_simple_mac.png └── src ├── cli_args.rs ├── diffr_lib ├── best_projection.rs ├── mod.rs └── tests_lib.rs ├── main.rs ├── tests_app.rs └── tests_cli.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | *.rs text 2 | *.toml text 3 | *.lock text 4 | *.md text 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/target/** 2 | **/*.rs.bk 3 | *.cmd 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.1.3 (2020/03/19) 2 | - diffr: add --line-numbers flag to display the line numbers 3 | (Github #44, Athir Saleem). 4 | 5 | - diffr: --colors: allow to display italic faces (Github #45). 6 | 7 | ## 0.1.3 (2019/12/07) 8 | - diffr-lib: optimize_partition: new function. 9 | Postprocessing of the results of the LCS algorithm to reduce the 10 | number of segments of consecutive shared tokens. 11 | 12 | ## 0.1.2 (2019/09/07) 13 | - Split in two crates: diffr-lib contains reusable parts, while diffr 14 | only contains application logic. 15 | 16 | - Fix a bug in display code that messed up the colors in diffs with 17 | lines starting with dashes. 18 | 19 | - Configuration: default to use 16 colors everywhere (Github #16). 20 | 21 | ## 0.1.1 (2019/07/15) 22 | - Add --colors flag to customize faces propertized by diffr (Github #3). 23 | This changes the default colors used on linux and macOS. 24 | The default still works on windows. 25 | 26 | ## 0.1.0 (2019/07/01) Initial release. 27 | - Initial release. 28 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "bstr" 7 | version = "1.9.1" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706" 10 | dependencies = [ 11 | "memchr", 12 | "regex-automata", 13 | ] 14 | 15 | [[package]] 16 | name = "diffr" 17 | version = "0.1.5" 18 | dependencies = [ 19 | "bstr", 20 | "termcolor", 21 | ] 22 | 23 | [[package]] 24 | name = "memchr" 25 | version = "2.7.2" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" 28 | 29 | [[package]] 30 | name = "regex-automata" 31 | version = "0.4.6" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" 34 | 35 | [[package]] 36 | name = "termcolor" 37 | version = "1.4.1" 38 | source = "registry+https://github.com/rust-lang/crates.io-index" 39 | checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" 40 | dependencies = [ 41 | "winapi-util", 42 | ] 43 | 44 | [[package]] 45 | name = "winapi-util" 46 | version = "0.1.8" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" 49 | dependencies = [ 50 | "windows-sys", 51 | ] 52 | 53 | [[package]] 54 | name = "windows-sys" 55 | version = "0.52.0" 56 | source = "registry+https://github.com/rust-lang/crates.io-index" 57 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" 58 | dependencies = [ 59 | "windows-targets", 60 | ] 61 | 62 | [[package]] 63 | name = "windows-targets" 64 | version = "0.52.5" 65 | source = "registry+https://github.com/rust-lang/crates.io-index" 66 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" 67 | dependencies = [ 68 | "windows_aarch64_gnullvm", 69 | "windows_aarch64_msvc", 70 | "windows_i686_gnu", 71 | "windows_i686_gnullvm", 72 | "windows_i686_msvc", 73 | "windows_x86_64_gnu", 74 | "windows_x86_64_gnullvm", 75 | "windows_x86_64_msvc", 76 | ] 77 | 78 | [[package]] 79 | name = "windows_aarch64_gnullvm" 80 | version = "0.52.5" 81 | source = "registry+https://github.com/rust-lang/crates.io-index" 82 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" 83 | 84 | [[package]] 85 | name = "windows_aarch64_msvc" 86 | version = "0.52.5" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" 89 | 90 | [[package]] 91 | name = "windows_i686_gnu" 92 | version = "0.52.5" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" 95 | 96 | [[package]] 97 | name = "windows_i686_gnullvm" 98 | version = "0.52.5" 99 | source = "registry+https://github.com/rust-lang/crates.io-index" 100 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" 101 | 102 | [[package]] 103 | name = "windows_i686_msvc" 104 | version = "0.52.5" 105 | source = "registry+https://github.com/rust-lang/crates.io-index" 106 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" 107 | 108 | [[package]] 109 | name = "windows_x86_64_gnu" 110 | version = "0.52.5" 111 | source = "registry+https://github.com/rust-lang/crates.io-index" 112 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" 113 | 114 | [[package]] 115 | name = "windows_x86_64_gnullvm" 116 | version = "0.52.5" 117 | source = "registry+https://github.com/rust-lang/crates.io-index" 118 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" 119 | 120 | [[package]] 121 | name = "windows_x86_64_msvc" 122 | version = "0.52.5" 123 | source = "registry+https://github.com/rust-lang/crates.io-index" 124 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" 125 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "diffr" 3 | version = "0.1.5" 4 | authors = ["Nathan Moreau "] 5 | description = """ 6 | An LCS based diff highlighting tool to ease code review from your terminal. 7 | """ 8 | categories = ["command-line-utilities"] 9 | edition = "2018" 10 | homepage = "https://github.com/mookid/diffr" 11 | repository = "https://github.com/mookid/diffr" 12 | keywords = ["diff", "code-review", "git", "console", "cli"] 13 | license = "MIT" 14 | readme = "README.md" 15 | 16 | [profile.release] 17 | debug = true 18 | 19 | [dependencies] 20 | bstr = { version = "1.9.1", default-features = false, features = ["unicode"] } 21 | termcolor = "1.1" 22 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright 2019 Nathan Moreau 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## diffr 2 | 3 | Reviewing changes involves reading diffs. Sometimes, a line-oriented 4 | presentation of changes is not precise enough, especially when changes 5 | involve long lines or very similar consecutive lines. 6 | 7 | This program processes such diffs, and outputs them (in the console) 8 | with additional diff information on top of the unified diff format, 9 | using text attributes. 10 | 11 | It works hunk by hunk, recomputing the diff on a word-by-word basis. 12 | 13 | The current implementation uses 14 | [Myers' longest common subsequence](http://www.xmailserver.org/diff2.pdf) 15 | algorithm. 16 | 17 | [![crates.io](https://img.shields.io/crates/v/diffr.svg)](https://crates.io/crates/diffr) 18 | [![crates.io](https://img.shields.io/crates/d/diffr.svg)](https://crates.io/crates/diffr) 19 | [![Build Status](https://dev.azure.com/nathanmoreau/diffr/_apis/build/status/mookid.diffr?branchName=master)](https://dev.azure.com/nathanmoreau/diffr/_build/latest?definitionId=4&branchName=master) 20 | 21 | ![Demo](screenshots/example_simple_mac.png) 22 | ![Demo](screenshots/example_nonconsecutive.png) 23 | ![Demo](screenshots/example_cross_lines_common_tokens.png) 24 | 25 | ### Installation 26 | 27 | #### Arch Linux 28 | 29 | Install from the [AUR](https://aur.archlinux.org/packages/diffr/): 30 | 31 | ``` 32 | git clone https://aur.archlinux.org/diffr.git 33 | cd diffr 34 | makepkg -si 35 | ``` 36 | 37 | #### Homebrew 38 | 39 | ``` 40 | brew install diffr 41 | ``` 42 | 43 | #### From source 44 | 45 | You will need the [Rust compiler installed](https://www.rust-lang.org/tools/install). 46 | 47 | To install the latest published version: 48 | 49 | ``` 50 | cargo install diffr 51 | ``` 52 | 53 | Alternatively, you can build the development version: 54 | 55 | ``` 56 | git clone https://github.com/mookid/diffr.git 57 | cd diffr 58 | cargo install --path . 59 | ``` 60 | 61 | ### How to use it? 62 | 63 | diffr tries to be a well behaved Unix program: it reads its input from stdin 64 | and writes to stdout. 65 | 66 | #### One-off usage 67 | 68 | ``` 69 | git show HEAD | diffr 70 | ``` 71 | 72 | #### Integration with git 73 | 74 | Add the following section to your `.gitconfig` file: 75 | 76 | ``` 77 | [core] 78 | pager = diffr | less -R 79 | [interactive] 80 | diffFilter = diffr 81 | ``` 82 | 83 | Alternatively, you can run from the command line: 84 | 85 | ``` 86 | git config --global core.pager 'diffr | less -R' 87 | git config --global interactive.difffilter diffr 88 | ``` 89 | 90 | #### Color customization 91 | 92 | Use the --colors flag. 93 | 94 | You can customize the display of diffing and common segments of added 95 | and removed lines. 96 | 97 | For example, 98 | 99 | ``` 100 | diffr --colors refine-removed:background:200,0,0:foreground:white:bold 101 | ``` 102 | 103 | tweaks the red used for uniquely removed text; 104 | 105 | The configuration used in the first screenshot is 106 | 107 | ``` 108 | diffr --colors refine-added:none:background:0x33,0x99,0x33:bold --colors added:none:background:0x33,0x55,0x33 --colors refine-removed:none:background:0x99,0x33,0x33:bold --colors removed:none:background:0x55,0x33,0x33 109 | ``` 110 | 111 | #### Display line numbers 112 | 113 | The ` --line-numbers` displays the line numbers of the hunk. 114 | 115 | ### Related projects 116 | 117 | This is improvement on the 118 | [diff-highlight](https://github.com/git/git/tree/master/contrib/diff-highlight) 119 | script distributed with git. 120 | 121 | git itself provides both `--word-diff` and `--color-words` options to 122 | several commands. 123 | -------------------------------------------------------------------------------- /assets/diffr.1.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: DIFFR 3 | section: 1 4 | header: User Manual 5 | footer: diffr 1.5.0 6 | date: April 14, 2023 7 | --- 8 | # NAME 9 | diffr - adds word-level diff on top of unified diffs 10 | 11 | # SYNOPSIS 12 | **diffr** [**\-\-colors** *\*] [**\-\-line-numbers** \] 13 | 14 | diff -u | **diffr** [OPTIONS] 15 | 16 | git show | **diffr** [OPTIONS] 17 | 18 | # DESCRIPTION 19 | **\-\-colors** *\* 20 | Configure color settings for console ouput. 21 | 22 | There are four faces to customize: 23 | +----------------+--------------+----------------+ 24 | | line prefix | + | - | 25 | +----------------+--------------+----------------+ 26 | | common segment | added | removed | 27 | | unique segment | refine-added | refine-removed | 28 | +----------------+--------------+----------------+ 29 | 30 | The customization allows 31 | - to change the foreground or background color; 32 | - to set or unset the attributes 'bold', 'intense', 'underline'; 33 | - to clear all attributes. 34 | 35 | Customization is done passing a color_spec argument. 36 | This flag may be provided multiple times. 37 | 38 | The syntax is the following: 39 | 40 | color_spec = face-name + ':' + attributes 41 | attributes = attribute 42 | | attribute + ':' + attributes 43 | attribute = ('foreground' | 'background') + ':' + color 44 | | ( | 'no') + font-flag 45 | | 'none' 46 | font-flag = 'italic' 47 | | 'bold' 48 | | 'intense' 49 | | 'underline' 50 | color = 'none' 51 | | [0-255] 52 | | [0-255] + ',' + [0-255] + ',' + [0-255] 53 | | ('black', 'blue', 'green', 'red', 54 | 'cyan', 'magenta', 'yellow', 'white') 55 | 56 | For example, the color_spec 57 | 58 | 'refine-added:background:blue:bold' 59 | 60 | sets the color of unique added segments with 61 | a blue background, written with a bold font. 62 | 63 | **\-\-line-numbers** \ 64 | Display line numbers. Style is optional. 65 | When style = 'compact', take as little width as possible. 66 | When style = 'aligned', align to tab stops (useful if tab is used for indentation). [default: compact] 67 | 68 | **-h**, **\-\-help** 69 | Prints help information 70 | 71 | **-V**, **\-\-version** 72 | Prints version information 73 | 74 | # AUTHOR 75 | Nathan Moreau \ 76 | 77 | # LICENSE 78 | The MIT License (MIT) 79 | -------------------------------------------------------------------------------- /assets/h.txt: -------------------------------------------------------------------------------- 1 | diffr $VERSION 2 | Nathan Moreau 3 | 4 | diffr adds word-level diff on top of unified diffs. 5 | word-level diff information is displayed using text attributes. 6 | 7 | USAGE: 8 | diffr reads from standard input and writes to standard output. 9 | 10 | Typical usage is for interactive use of diff: 11 | diff -u | diffr 12 | git show | diffr 13 | 14 | OPTIONS: 15 | --colors ... Configure color settings. 16 | --line-numbers Display line numbers. 17 | -h, --help Prints help information 18 | -V, --version Prints version information 19 | -------------------------------------------------------------------------------- /assets/help.txt: -------------------------------------------------------------------------------- 1 | diffr $VERSION 2 | Nathan Moreau 3 | 4 | diffr adds word-level diff on top of unified diffs. 5 | word-level diff information is displayed using text attributes. 6 | 7 | USAGE: 8 | diffr reads from standard input and writes to standard output. 9 | 10 | Typical usage is for interactive use of diff: 11 | diff -u | diffr 12 | git show | diffr 13 | 14 | OPTIONS: 15 | --colors ... 16 | Configure color settings for console ouput. 17 | 18 | There are four faces to customize: 19 | +----------------+--------------+----------------+ 20 | | line prefix | + | - | 21 | +----------------+--------------+----------------+ 22 | | common segment | added | removed | 23 | | unique segment | refine-added | refine-removed | 24 | +----------------+--------------+----------------+ 25 | 26 | The customization allows 27 | - to change the foreground or background color; 28 | - to set or unset the attributes 'bold', 'intense', 'underline'; 29 | - to clear all attributes. 30 | 31 | Customization is done passing a color_spec argument. 32 | This flag may be provided multiple times. 33 | 34 | The syntax is the following: 35 | 36 | color_spec = face-name + ':' + attributes 37 | attributes = attribute 38 | | attribute + ':' + attributes 39 | attribute = ('foreground' | 'background') + ':' + color 40 | | ( | 'no') + font-flag 41 | | 'none' 42 | font-flag = 'italic' 43 | | 'bold' 44 | | 'intense' 45 | | 'underline' 46 | color = 'none' 47 | | [0-255] 48 | | [0-255] + ',' + [0-255] + ',' + [0-255] 49 | | ('black', 'blue', 'green', 'red', 50 | 'cyan', 'magenta', 'yellow', 'white') 51 | 52 | For example, the color_spec 53 | 54 | 'refine-added:background:blue:bold' 55 | 56 | sets the color of unique added segments with 57 | a blue background, written with a bold font. 58 | 59 | --line-numbers 60 | Display line numbers. Style is optional. 61 | When style = 'compact', take as little width as possible. 62 | When style = 'aligned', align to tab stops (useful if tab is used for indentation). [default: compact] 63 | 64 | -h, --help 65 | Prints help information 66 | 67 | -V, --version 68 | Prints version information 69 | -------------------------------------------------------------------------------- /azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | - master 3 | 4 | strategy: 5 | matrix: 6 | linux: 7 | vmImage: ubuntu-latest 8 | macOS: 9 | vmImage: macOS-latest 10 | windows: 11 | vmImage: windows-latest 12 | 13 | pool: 14 | vmImage: $(vmImage) 15 | 16 | steps: 17 | - template: ci/azure-install-rust.yml 18 | parameters: 19 | rust_version: stable 20 | 21 | - template: ci/azure-rustfmt.yml 22 | parameters: 23 | crate_path: . 24 | 25 | - template: ci/azure-runtests.yml 26 | parameters: 27 | crate_path: . 28 | 29 | - template: ci/azure_integration_test.yml 30 | -------------------------------------------------------------------------------- /ci/azure-install-rust.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | # Linux and macOS. 3 | - script: | 4 | set -e 5 | curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain none 6 | export PATH=$PATH:$HOME/.cargo/bin 7 | rustup toolchain install $RUSTUP_TOOLCHAIN 8 | rustup default $RUSTUP_TOOLCHAIN 9 | echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin" 10 | env: 11 | RUSTUP_TOOLCHAIN: ${{parameters.rust_version}} 12 | displayName: "Install rust (*nix)" 13 | condition: not(eq(variables['Agent.OS'], 'Windows_NT')) 14 | 15 | # Windows. 16 | - script: | 17 | curl -sSf -o rustup-init.exe https://win.rustup.rs 18 | rustup-init.exe -y --default-toolchain none 19 | set PATH=%PATH%;%USERPROFILE%\.cargo\bin 20 | rustup toolchain install %RUSTUP_TOOLCHAIN% 21 | rustup default %RUSTUP_TOOLCHAIN% 22 | echo "##vso[task.setvariable variable=PATH;]%PATH%;%USERPROFILE%\.cargo\bin" 23 | env: 24 | RUSTUP_TOOLCHAIN: ${{parameters.rust_version}} 25 | displayName: "Install rust (windows)" 26 | condition: eq(variables['Agent.OS'], 'Windows_NT') 27 | 28 | # All platforms. 29 | - script: | 30 | rustc -Vv 31 | cargo -V 32 | displayName: Query rust and cargo versions 33 | 34 | -------------------------------------------------------------------------------- /ci/azure-runtests.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | set -e 4 | cd ${{parameters.crate_path}} 5 | cargo build 6 | cargo test 7 | displayName: Run tests (${{parameters.crate_path}}) 8 | -------------------------------------------------------------------------------- /ci/azure-rustfmt.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | set -e 4 | cd ${{parameters.crate_path}} 5 | rustup component add rustfmt 6 | cargo fmt --version 7 | cargo fmt --all -- --check 8 | displayName: Check formatting (${{parameters.crate_path}}) 9 | condition: eq(variables['Agent.OS'], 'Linux') 10 | -------------------------------------------------------------------------------- /ci/azure_integration_test.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | set -e 4 | cargo install --git https://github.com/mookid/trimcolor 5 | displayName: Install trimcolor 6 | 7 | - script: | 8 | set -e 9 | git log -p >whole_log 10 | cargo run whole_log_diffr 11 | if git diff --no-index whole_log whole_log_diffr 12 | then 13 | exit 0 14 | else 15 | exit 1 16 | fi 17 | 18 | displayName: Integration test 19 | condition: eq(variables['Agent.OS'], 'Linux') 20 | -------------------------------------------------------------------------------- /screenshots/example_cross_lines_common_tokens.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_cross_lines_common_tokens.png -------------------------------------------------------------------------------- /screenshots/example_nonconsecutive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_nonconsecutive.png -------------------------------------------------------------------------------- /screenshots/example_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_simple.png -------------------------------------------------------------------------------- /screenshots/example_simple_mac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_simple_mac.png -------------------------------------------------------------------------------- /src/cli_args.rs: -------------------------------------------------------------------------------- 1 | use super::AppConfig; 2 | use super::LineNumberStyle; 3 | 4 | use std::fmt::Display; 5 | use std::fmt::Error as FmtErr; 6 | use std::fmt::Formatter; 7 | use std::io::IsTerminal; 8 | use std::io::Write; 9 | use std::iter::Peekable; 10 | use std::process; 11 | use std::str::FromStr; 12 | 13 | use termcolor::Color; 14 | use termcolor::ColorSpec; 15 | use termcolor::ParseColorError; 16 | 17 | const FLAG_DEBUG: &str = "--debug"; 18 | const FLAG_COLOR: &str = "--colors"; 19 | const FLAG_LINE_NUMBERS: &str = "--line-numbers"; 20 | const FLAG_TOO_LARGE: &str = "--large-diff-threshold"; 21 | 22 | const BIN_NAME: &str = env!("CARGO_PKG_NAME"); 23 | const VERSION: &str = env!("CARGO_PKG_VERSION"); 24 | 25 | const HELP_SHORT: &str = include_str!("../assets/h.txt"); 26 | const HELP_LONG: &str = include_str!("../assets/help.txt"); 27 | 28 | fn show_version() -> ! { 29 | eprintln!("{} {}", BIN_NAME, VERSION); 30 | process::exit(0); 31 | } 32 | 33 | #[derive(Debug, Clone, Copy)] 34 | enum FaceName { 35 | Added, 36 | RefineAdded, 37 | Removed, 38 | RefineRemoved, 39 | } 40 | 41 | fn missing_arg(arg: impl std::fmt::Display) -> ! { 42 | eprintln!("option requires an argument: '{}'", arg); 43 | process::exit(2); 44 | } 45 | 46 | fn interpolate(s: &str) -> String { 47 | s.replace("$VERSION", VERSION) 48 | } 49 | 50 | fn usage(code: i32) -> ! { 51 | let txt = interpolate(HELP_SHORT); 52 | let _ = std::io::stderr().write(txt.as_bytes()); 53 | process::exit(code); 54 | } 55 | 56 | fn help(long: bool) -> ! { 57 | let txt = if long { HELP_LONG } else { HELP_SHORT }; 58 | let txt = interpolate(txt); 59 | let _ = std::io::stdout().write(txt.as_bytes()); 60 | process::exit(0); 61 | } 62 | 63 | impl EnumString for FaceName { 64 | fn data() -> &'static [(&'static str, Self)] { 65 | use FaceName::*; 66 | &[ 67 | ("added", Added), 68 | ("refine-added", RefineAdded), 69 | ("removed", Removed), 70 | ("refine-removed", RefineRemoved), 71 | ] 72 | } 73 | } 74 | 75 | impl Display for FaceName { 76 | fn fmt(&self, f: &mut Formatter) -> Result<(), FmtErr> { 77 | use FaceName::*; 78 | match self { 79 | Added => write!(f, "added"), 80 | RefineAdded => write!(f, "refine-added"), 81 | Removed => write!(f, "removed"), 82 | RefineRemoved => write!(f, "refine-removed"), 83 | } 84 | } 85 | } 86 | 87 | impl FaceName { 88 | fn get_face_mut<'b>(&self, config: &'b mut super::AppConfig) -> &'b mut ColorSpec { 89 | use FaceName::*; 90 | match self { 91 | Added => &mut config.added_face, 92 | RefineAdded => &mut config.refine_added_face, 93 | Removed => &mut config.removed_face, 94 | RefineRemoved => &mut config.refine_removed_face, 95 | } 96 | } 97 | } 98 | 99 | // custom parsing of Option 100 | struct ColorOpt(Option); 101 | 102 | impl FromStr for ColorOpt { 103 | type Err = ArgParsingError; 104 | fn from_str(input: &str) -> Result { 105 | if input == "none" { 106 | Ok(ColorOpt(None)) 107 | } else { 108 | match input.parse() { 109 | Ok(color) => Ok(ColorOpt(Some(color))), 110 | Err(err) => Err(ArgParsingError::Color(err)), 111 | } 112 | } 113 | } 114 | } 115 | 116 | trait EnumString: Copy { 117 | fn data() -> &'static [(&'static str, Self)]; 118 | } 119 | 120 | fn tryparse(input: &str) -> Result 121 | where 122 | T: EnumString + 'static, 123 | { 124 | T::data() 125 | .iter() 126 | .find(|p| p.0 == input) 127 | .map(|&p| p.1) 128 | .ok_or_else(|| { 129 | format!( 130 | "got '{}', expected {}", 131 | input, 132 | T::data().iter().map(|p| p.0).collect::>().join("|") 133 | ) 134 | }) 135 | } 136 | 137 | #[derive(Debug, Clone, Copy)] 138 | struct LineNumberStyleOpt(LineNumberStyle); 139 | 140 | impl EnumString for LineNumberStyleOpt { 141 | fn data() -> &'static [(&'static str, Self)] { 142 | use LineNumberStyle::*; 143 | &[ 144 | ("aligned", LineNumberStyleOpt(Aligned)), 145 | ("compact", LineNumberStyleOpt(Compact)), 146 | ("fixed", LineNumberStyleOpt(Fixed(3))), 147 | ] 148 | } 149 | } 150 | 151 | #[derive(Debug, Clone, Copy)] 152 | enum FaceColor { 153 | Foreground, 154 | Background, 155 | } 156 | 157 | #[derive(Debug, Clone, Copy)] 158 | enum AttributeName { 159 | Color(FaceColor), 160 | Italic(bool), 161 | Bold(bool), 162 | Intense(bool), 163 | Underline(bool), 164 | Reset, 165 | } 166 | 167 | impl EnumString for AttributeName { 168 | fn data() -> &'static [(&'static str, Self)] { 169 | use AttributeName::*; 170 | &[ 171 | ("foreground", Color(FaceColor::Foreground)), 172 | ("background", Color(FaceColor::Background)), 173 | ("italic", Italic(true)), 174 | ("noitalic", Italic(false)), 175 | ("bold", Bold(true)), 176 | ("nobold", Bold(false)), 177 | ("intense", Intense(true)), 178 | ("nointense", Intense(false)), 179 | ("underline", Underline(true)), 180 | ("nounderline", Underline(false)), 181 | ("none", Reset), 182 | ] 183 | } 184 | } 185 | 186 | #[derive(Debug)] 187 | enum ArgParsingError { 188 | FaceName(String), 189 | AttributeName(String), 190 | Color(ParseColorError), 191 | MissingValue(FaceName), 192 | LineNumberStyle(String), 193 | LargeDiffThreshold(String), 194 | } 195 | 196 | impl Display for ArgParsingError { 197 | fn fmt(&self, f: &mut Formatter) -> Result<(), FmtErr> { 198 | match self { 199 | ArgParsingError::FaceName(err) => write!(f, "unexpected face name: {}", err), 200 | ArgParsingError::AttributeName(err) => write!(f, "unexpected attribute name: {}", err), 201 | ArgParsingError::Color(err) => write!(f, "unexpected color value: {}", err), 202 | ArgParsingError::MissingValue(face_name) => write!( 203 | f, 204 | "error parsing color: missing color value for face '{}'", 205 | face_name 206 | ), 207 | ArgParsingError::LineNumberStyle(err) => { 208 | write!(f, "unexpected line number style: {}", err) 209 | } 210 | ArgParsingError::LargeDiffThreshold(err) => { 211 | write!(f, "invalid threshold value: {}", err) 212 | } 213 | } 214 | } 215 | } 216 | 217 | impl FromStr for FaceName { 218 | type Err = ArgParsingError; 219 | fn from_str(input: &str) -> Result { 220 | tryparse(input).map_err(ArgParsingError::FaceName) 221 | } 222 | } 223 | 224 | impl FromStr for AttributeName { 225 | type Err = ArgParsingError; 226 | fn from_str(input: &str) -> Result { 227 | tryparse(input).map_err(ArgParsingError::AttributeName) 228 | } 229 | } 230 | 231 | impl FromStr for LineNumberStyleOpt { 232 | type Err = ArgParsingError; 233 | fn from_str(input: &str) -> Result { 234 | tryparse(input).map_err(ArgParsingError::LineNumberStyle) 235 | } 236 | } 237 | 238 | fn ignore(_: T) {} 239 | 240 | fn parse_line_number_style( 241 | config: &mut AppConfig, 242 | value: Option<&str>, 243 | ) -> Result<(), ArgParsingError> { 244 | let style = if let Some(style) = value { 245 | style.parse::()?.0 246 | } else { 247 | LineNumberStyle::Compact 248 | }; 249 | config.line_numbers_style = Some(style); 250 | Ok(()) 251 | } 252 | 253 | fn parse_color_attributes<'a, Values>( 254 | config: &mut AppConfig, 255 | mut values: Values, 256 | face_name: FaceName, 257 | ) -> Result<(), ArgParsingError> 258 | where 259 | Values: Iterator, 260 | { 261 | use AttributeName::*; 262 | let face = face_name.get_face_mut(config); 263 | while let Some(value) = values.next() { 264 | let attribute_name = value.parse::()?; 265 | match attribute_name { 266 | Color(kind) => { 267 | if let Some(value) = values.next() { 268 | let ColorOpt(color) = value.parse::()?; 269 | match kind { 270 | FaceColor::Foreground => face.set_fg(color), 271 | FaceColor::Background => face.set_bg(color), 272 | }; 273 | } else { 274 | return Err(ArgParsingError::MissingValue(face_name)); 275 | } 276 | } 277 | Italic(italic) => ignore(face.set_italic(italic)), 278 | Bold(bold) => ignore(face.set_bold(bold)), 279 | Intense(intense) => ignore(face.set_intense(intense)), 280 | Underline(underline) => ignore(face.set_underline(underline)), 281 | Reset => *face = Default::default(), 282 | } 283 | } 284 | Ok(()) 285 | } 286 | 287 | fn parse_color_arg(value: &str, config: &mut AppConfig) -> Result<(), ArgParsingError> { 288 | let mut pieces = value.split(':'); 289 | if let Some(piece) = pieces.next() { 290 | let face_name = piece.parse::()?; 291 | parse_color_attributes(config, pieces, face_name)?; 292 | }; 293 | Ok(()) 294 | } 295 | 296 | fn parse_large_diff_threshold(value: &str, config: &mut AppConfig) -> Result<(), ArgParsingError> { 297 | match value.parse() { 298 | Ok(val) => { 299 | config.large_diff_threshold = val; 300 | Ok(()) 301 | } 302 | Err(err) => Err(ArgParsingError::LargeDiffThreshold(err.to_string())), 303 | } 304 | } 305 | 306 | fn die_error(result: Result) -> bool { 307 | if let Err(err) = result { 308 | eprintln!("{}", err); 309 | process::exit(-1); 310 | } 311 | true 312 | } 313 | 314 | fn color(config: &mut AppConfig, args: &mut Peekable>) -> bool { 315 | let arg = args.next().unwrap(); 316 | if let Some(spec) = args.next() { 317 | die_error(parse_color_arg(&spec, config)) 318 | } else { 319 | missing_arg(arg) 320 | } 321 | } 322 | 323 | fn line_numbers(config: &mut AppConfig, args: &mut Peekable>) -> bool { 324 | args.next(); 325 | let spec = if let Some(spec) = args.next() { 326 | parse_line_number_style(config, Some(&*spec)) 327 | } else { 328 | parse_line_number_style(config, None) 329 | }; 330 | die_error(spec) 331 | } 332 | 333 | fn large_diff(config: &mut AppConfig, args: &mut Peekable>) -> bool { 334 | let arg = args.next().unwrap(); 335 | if let Some(spec) = args.next() { 336 | die_error(parse_large_diff_threshold(&spec, config)) 337 | } else { 338 | missing_arg(arg) 339 | } 340 | } 341 | 342 | fn debug(config: &mut AppConfig, args: &mut Peekable>) -> bool { 343 | config.debug = true; 344 | args.next(); 345 | true 346 | } 347 | 348 | fn bad_arg(arg: &str) -> ! { 349 | eprintln!("bad argument: '{}'", arg); 350 | usage(2); 351 | } 352 | 353 | fn parse_options( 354 | config: &mut AppConfig, 355 | args: &mut Peekable>, 356 | ) -> bool { 357 | if let Some(arg) = args.peek() { 358 | match &arg[..] { 359 | // generic flags 360 | "-h" | "--help" => help(&arg[..] == "--help"), 361 | "-V" | "--version" => show_version(), 362 | 363 | // documented flags 364 | FLAG_COLOR => color(config, args), 365 | FLAG_LINE_NUMBERS => line_numbers(config, args), 366 | 367 | // hidden flags 368 | FLAG_TOO_LARGE => large_diff(config, args), 369 | FLAG_DEBUG => debug(config, args), 370 | 371 | arg => bad_arg(arg), 372 | } 373 | } else { 374 | false 375 | } 376 | } 377 | 378 | pub fn parse_config() -> AppConfig { 379 | let args = || std::env::args().skip(1); 380 | if args().any(|s| s == "--help") { 381 | help(true); 382 | } 383 | 384 | let mut config = AppConfig::default(); 385 | let mut args = args().peekable(); 386 | while parse_options(&mut config, &mut args) {} 387 | 388 | if std::io::stdin().is_terminal() { 389 | usage(-1); 390 | } 391 | config 392 | } 393 | -------------------------------------------------------------------------------- /src/diffr_lib/best_projection.rs: -------------------------------------------------------------------------------- 1 | use std::collections::hash_map::Entry::*; 2 | use std::collections::HashMap; 3 | use std::convert::TryFrom; 4 | 5 | use super::TokenId; 6 | use super::Tokenization; 7 | 8 | #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy, Hash)] 9 | struct Coord { 10 | next_lcs: usize, 11 | next_seq: usize, 12 | } 13 | 14 | #[derive(Debug)] 15 | struct Context { 16 | seq_index: HashMap>, 17 | } 18 | 19 | impl Context { 20 | fn new<'a>(seq: &'a Tokenization<'a>, lcs: &'a Tokenization<'a>) -> Self { 21 | let mut seq_index = HashMap::new(); 22 | for v in lcs.tokens() { 23 | match seq_index.entry(*v) { 24 | Occupied(_) => (), 25 | Vacant(e) => { 26 | e.insert(vec![]); 27 | } 28 | } 29 | } 30 | for (i, v) in seq.tokens().iter().enumerate() { 31 | match seq_index.entry(*v) { 32 | Occupied(e) => { 33 | e.into_mut().push(i); 34 | } 35 | Vacant(_) => (), 36 | } 37 | } 38 | Context { seq_index } 39 | } 40 | 41 | fn get_indexes(&self, tok: TokenId, min_value: usize) -> &[usize] { 42 | match self.seq_index.get(&tok) { 43 | Some(values) => { 44 | let min_idx = match values.binary_search(&min_value) { 45 | Ok(i) | Err(i) => i, 46 | }; 47 | &values[min_idx..] 48 | } 49 | None => &[], 50 | } 51 | } 52 | } 53 | 54 | /// The result of `optimize_partition`. This is mostly used by `shared_segments`. 55 | #[derive(Debug)] 56 | pub struct NormalizationResult { 57 | pub path: Vec, 58 | pub starts_with_shared: bool, 59 | } 60 | 61 | impl NormalizationResult { 62 | /// The shared segments between both inputs of `optimize_partition`. 63 | /// The `seq` argument is the longest of the two inputs. 64 | pub fn shared_segments<'a>( 65 | &'a self, 66 | seq: &'a Tokenization, 67 | ) -> impl Iterator + 'a { 68 | SharedSegments::new(self, seq) 69 | } 70 | } 71 | 72 | fn snake_len(seq: &Tokenization, lcs: &Tokenization, start_lcs: usize, start_seq: usize) -> usize { 73 | let lcs_len = lcs.nb_tokens() - start_lcs; 74 | let seq_len = seq.nb_tokens() - start_seq; 75 | let max_snake_len = lcs_len.min(seq_len); 76 | let mut snake_len = 0; 77 | let seq = &seq.tokens()[start_seq..start_seq + max_snake_len]; 78 | let lcs = &lcs.tokens()[start_lcs..start_lcs + max_snake_len]; 79 | 80 | while snake_len < max_snake_len && lcs[snake_len] == seq[snake_len] { 81 | snake_len += 1 82 | } 83 | snake_len 84 | } 85 | 86 | /// Minimize the number of elements when partitioning `seq` according to `lcs`. 87 | /// `lcs` is a subsequence of `seq`. 88 | pub fn optimize_partition(seq: &Tokenization, lcs: &Tokenization) -> NormalizationResult { 89 | let context = Context::new(seq, lcs); 90 | let root = Coord { 91 | next_lcs: 0, 92 | next_seq: 0, 93 | }; 94 | let target = Coord { 95 | next_lcs: lcs.nb_tokens(), 96 | next_seq: seq.nb_tokens(), 97 | }; 98 | let mut frontier = vec![root]; 99 | let mut new_frontier = vec![]; 100 | let mut prev = HashMap::new(); 101 | let mut found_seq = None; 102 | while !frontier.is_empty() && found_seq.is_none() { 103 | new_frontier.clear(); 104 | for &coord in frontier.iter() { 105 | if coord.next_lcs == target.next_lcs { 106 | found_seq = Some(coord.next_seq); 107 | if coord.next_seq == target.next_seq { 108 | break; 109 | } else { 110 | // TODO do something more clever here 111 | continue; 112 | } 113 | } 114 | let start_lcs = coord.next_lcs; 115 | let lcs_len = lcs.nb_tokens() - start_lcs; 116 | let mut last_enqueued_snake_len = 0; 117 | for start_seq in 118 | context.get_indexes(lcs.nth_token(to_isize(coord.next_lcs)), coord.next_seq) 119 | { 120 | if start_seq + lcs_len > seq.nb_tokens() { 121 | break; 122 | } 123 | let snake_len = 1 + snake_len(seq, lcs, start_lcs + 1, start_seq + 1); 124 | let next_coord = Coord { 125 | next_lcs: start_lcs + snake_len, 126 | next_seq: start_seq + snake_len, 127 | }; 128 | if last_enqueued_snake_len < snake_len || next_coord == target { 129 | if next_coord.next_lcs == target.next_lcs 130 | && (next_coord.next_seq == target.next_seq || found_seq.is_none()) 131 | { 132 | found_seq = Some(next_coord.next_seq); 133 | } 134 | match prev.entry(next_coord) { 135 | Occupied(_) => continue, 136 | Vacant(e) => e.insert(coord), 137 | }; 138 | new_frontier.push(next_coord); 139 | last_enqueued_snake_len = snake_len; 140 | } 141 | } 142 | } 143 | std::mem::swap(&mut frontier, &mut new_frontier) 144 | } 145 | 146 | let target = found_seq.map(|next_seq| Coord { 147 | next_lcs: lcs.nb_tokens(), 148 | next_seq, 149 | }); 150 | let mut path = vec![]; 151 | let mut starts_with_shared = false; 152 | let mut coord = target.as_ref(); 153 | let mut seq = seq.nb_tokens(); 154 | let mut lcs = lcs.nb_tokens(); 155 | while let Some(&coord_content) = coord { 156 | let next_seq = coord_content.next_seq; 157 | let next_lcs = coord_content.next_lcs; 158 | let snake_len = lcs - next_lcs; 159 | push_if_not_last(&mut path, to_isize(seq - snake_len)); 160 | starts_with_shared = !push_if_not_last(&mut path, to_isize(next_seq)); 161 | 162 | coord = prev.get(&coord_content); 163 | 164 | seq = next_seq; 165 | lcs = next_lcs; 166 | } 167 | path.reverse(); 168 | NormalizationResult { 169 | path, 170 | starts_with_shared, 171 | } 172 | } 173 | 174 | fn push_if_not_last(v: &mut Vec, val: isize) -> bool { 175 | let should_push = v.last() != Some(&val); 176 | if should_push { 177 | v.push(val); 178 | } 179 | should_push 180 | } 181 | 182 | fn to_isize(input: usize) -> isize { 183 | isize::try_from(input).unwrap() 184 | } 185 | 186 | /// The shared segments between both inputs of `optimize_partition`. 187 | struct SharedSegments<'a> { 188 | index: usize, 189 | normalization: &'a Vec, 190 | seq: &'a Tokenization<'a>, 191 | } 192 | 193 | impl<'a> SharedSegments<'a> { 194 | fn new(normalization: &'a NormalizationResult, seq: &'a Tokenization) -> Self { 195 | SharedSegments { 196 | index: if normalization.starts_with_shared { 197 | 0 198 | } else { 199 | 1 200 | }, 201 | normalization: &normalization.path, 202 | seq, 203 | } 204 | } 205 | } 206 | 207 | impl<'a> Iterator for SharedSegments<'a> { 208 | type Item = (usize, usize); 209 | fn next(&mut self) -> Option { 210 | if self.index + 1 < self.normalization.len() { 211 | let prev = self.normalization[self.index]; 212 | let curr = self.normalization[self.index + 1]; 213 | let from = self.seq.nth_span(prev).0; 214 | let to = self.seq.nth_span(curr - 1).1; 215 | self.index += 2; 216 | Some((from, to)) 217 | } else { 218 | None 219 | } 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /src/diffr_lib/mod.rs: -------------------------------------------------------------------------------- 1 | //! Algorithms to compute diffs. 2 | //! 3 | //! This module implements various algorithms described in E. Myers 4 | //! paper: [An O(ND) Difference Algorithm and Its 5 | //! Variations](http://www.xmailserver.org/diff2.pdf). 6 | //! 7 | //! The main entrypoint is `diff`, which allows to compute the longest 8 | //! common subsequence between two sequences of byte slices. 9 | 10 | use bstr::ByteSlice; 11 | use std::collections::hash_map::Entry::*; 12 | use std::collections::HashMap; 13 | use std::convert::TryFrom; 14 | use std::fmt::Debug; 15 | use std::fmt::{Error as FmtErr, Formatter}; 16 | 17 | mod best_projection; 18 | pub use best_projection::optimize_partition; 19 | 20 | type Span = (usize, usize); 21 | 22 | type TokenId = u64; 23 | 24 | pub struct TokenMap<'a>(HashMap<&'a [u8], TokenId>); 25 | 26 | impl<'a> TokenMap<'a> { 27 | pub fn new(input: &mut [(impl Iterator, &'a [u8])]) -> Self { 28 | let mut m = HashMap::new(); 29 | let mut counter = 0; 30 | for (spans, data) in input.iter_mut() { 31 | for span in spans { 32 | let data = &data[span.0..span.1]; 33 | match m.entry(data) { 34 | Vacant(e) => { 35 | e.insert(counter); 36 | counter += 1 37 | } 38 | Occupied(_) => {} 39 | } 40 | } 41 | } 42 | TokenMap(m) 43 | } 44 | 45 | fn get(&self, slice: &'a [u8]) -> TokenId { 46 | *self.0.get(slice).unwrap() 47 | } 48 | } 49 | 50 | pub struct Tokenization<'a> { 51 | data: &'a [u8], 52 | spans: &'a [Span], 53 | token_ids: Vec, 54 | } 55 | 56 | impl Debug for Tokenization<'_> { 57 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> { 58 | let Self { data, spans, .. } = self; 59 | let data_pp = String::from_utf8_lossy(data); 60 | let tokens_pp = spans 61 | .iter() 62 | .map(|sref| String::from_utf8_lossy(&data[sref.0..sref.1])) 63 | .collect::>(); 64 | f.debug_struct("Tokenization") 65 | .field("data", &data_pp) 66 | .field("tokens", &tokens_pp) 67 | .finish() 68 | } 69 | } 70 | 71 | struct TokenizationRange<'a> { 72 | t: &'a Tokenization<'a>, 73 | start_index: isize, 74 | one_past_end_index: isize, 75 | } 76 | 77 | impl<'a> Debug for TokenizationRange<'a> { 78 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> { 79 | let Self { 80 | t: Tokenization { data, spans, .. }, 81 | start_index, 82 | one_past_end_index, 83 | } = self; 84 | let data_pp = String::from_utf8_lossy(data); 85 | let tokens_pp = spans[to_usize(*start_index)..to_usize(*one_past_end_index)] 86 | .iter() 87 | .map(|sref| String::from_utf8_lossy(&data[sref.0..sref.1])) 88 | .collect::>(); 89 | f.debug_struct("TokenizationRange") 90 | .field("data", &data_pp) 91 | .field("tokens", &tokens_pp) 92 | .finish() 93 | } 94 | } 95 | 96 | impl<'a> Tokenization<'a> { 97 | pub fn new(data: &'a [u8], spans: &'a [Span], token_map: &TokenMap) -> Self { 98 | let mut token_ids = Vec::with_capacity(spans.len()); 99 | for span in spans { 100 | token_ids.push(token_map.get(&data[span.0..span.1])); 101 | } 102 | Tokenization { 103 | data, 104 | spans, 105 | token_ids, 106 | } 107 | } 108 | 109 | pub fn data(&self) -> &[u8] { 110 | self.data 111 | } 112 | 113 | pub fn nb_tokens(&self) -> usize { 114 | self.spans.len() 115 | } 116 | 117 | pub fn nth_span(&self, n: isize) -> Span { 118 | self.spans[to_usize(n)] 119 | } 120 | 121 | pub fn tokens(&self) -> &[TokenId] { 122 | &self.token_ids 123 | } 124 | 125 | pub fn nth_token(&self, n: isize) -> TokenId { 126 | self.token_ids[to_usize(n)] 127 | } 128 | } 129 | 130 | impl<'a> TokenizationRange<'a> { 131 | fn new(t: &'a Tokenization<'a>) -> Self { 132 | TokenizationRange { 133 | t, 134 | start_index: 0, 135 | one_past_end_index: to_isize(t.spans.len()), 136 | } 137 | } 138 | 139 | /// Split `self` in two tokenizations: 140 | /// * the first one from the start to `lo`; 141 | /// * the second one from `hi` to the end. 142 | fn split_at(&self, lo: isize, hi: isize) -> (Self, Self) { 143 | let start = self.start_index; 144 | let end = self.one_past_end_index; 145 | assert!(start <= lo); 146 | assert!(lo <= hi); 147 | assert!(hi <= end); 148 | ( 149 | TokenizationRange { 150 | one_past_end_index: lo, 151 | ..*self 152 | }, 153 | TokenizationRange { 154 | start_index: hi, 155 | ..*self 156 | }, 157 | ) 158 | } 159 | 160 | /// Get `self`'s number of tokens. 161 | fn nb_tokens(&self) -> usize { 162 | to_usize(self.one_past_end_index - self.start_index) 163 | } 164 | 165 | /// Get `self`'s `n`th token. 166 | fn nth_token(&self, n: isize) -> TokenId { 167 | self.t.token_ids[to_usize(self.start_index + n)] 168 | } 169 | } 170 | 171 | /// A pair of `TokenizationRange`s to compare. 172 | #[derive(Debug)] 173 | pub struct DiffInput<'a> { 174 | added: TokenizationRange<'a>, 175 | removed: TokenizationRange<'a>, 176 | large_diff_threshold: usize, 177 | } 178 | 179 | impl<'a> DiffInput<'a> { 180 | pub fn new( 181 | added: &'a Tokenization<'a>, 182 | removed: &'a Tokenization<'a>, 183 | large_diff_threshold: usize, 184 | ) -> Self { 185 | DiffInput { 186 | added: TokenizationRange::new(added), 187 | removed: TokenizationRange::new(removed), 188 | large_diff_threshold, 189 | } 190 | } 191 | 192 | pub fn to_owned(&'a self) -> Self { 193 | Self::new(self.added(), self.removed(), self.large_diff_threshold) 194 | } 195 | 196 | pub fn added(&self) -> &Tokenization<'a> { 197 | self.added.t 198 | } 199 | 200 | pub fn removed(&self) -> &Tokenization<'a> { 201 | self.removed.t 202 | } 203 | 204 | fn split_at(&self, (x0, y0): (isize, isize), (x1, y1): (isize, isize)) -> (Self, Self) { 205 | let (removed1, removed2) = self.removed.split_at(x0, x1); 206 | let (added1, added2) = self.added.split_at(y0, y1); 207 | 208 | ( 209 | DiffInput { 210 | added: added1, 211 | removed: removed1, 212 | large_diff_threshold: self.large_diff_threshold, 213 | }, 214 | DiffInput { 215 | added: added2, 216 | removed: removed2, 217 | large_diff_threshold: self.large_diff_threshold, 218 | }, 219 | ) 220 | } 221 | 222 | fn n(&self) -> usize { 223 | self.removed.nb_tokens() 224 | } 225 | 226 | fn m(&self) -> usize { 227 | self.added.nb_tokens() 228 | } 229 | 230 | fn seq_a(&self, index: isize) -> TokenId { 231 | self.removed.nth_token(index) 232 | } 233 | 234 | fn seq_b(&self, index: isize) -> TokenId { 235 | self.added.nth_token(index) 236 | } 237 | } 238 | 239 | struct DiffTraversal<'a> { 240 | v: &'a mut [isize], 241 | max: usize, 242 | _end: (isize, isize), 243 | } 244 | 245 | impl<'a> DiffTraversal<'a> { 246 | fn from_slice(input: &'a DiffInput<'a>, v: &'a mut [isize], forward: bool, max: usize) -> Self { 247 | let start = (input.removed.start_index, input.added.start_index); 248 | let end = ( 249 | input.removed.one_past_end_index, 250 | input.added.one_past_end_index, 251 | ); 252 | assert!(max * 2 < v.len()); 253 | let (start, end) = if forward { (start, end) } else { (end, start) }; 254 | let mut res = DiffTraversal { v, max, _end: end }; 255 | if max != 0 { 256 | *res.v_mut(1) = start.0 - input.removed.start_index 257 | } 258 | res 259 | } 260 | 261 | #[cfg(test)] 262 | fn from_vector( 263 | input: &'a DiffInput<'a>, 264 | v: &'a mut Vec, 265 | forward: bool, 266 | max: usize, 267 | ) -> Self { 268 | v.resize(max * 2 + 1, 0); 269 | Self::from_slice(input, v, forward, max) 270 | } 271 | 272 | fn v(&self, index: isize) -> isize { 273 | self.v[to_usize(index + to_isize(self.max))] 274 | } 275 | 276 | fn v_mut(&mut self, index: isize) -> &mut isize { 277 | &mut self.v[to_usize(index + to_isize(self.max))] 278 | } 279 | } 280 | 281 | #[cfg(test)] 282 | fn diff_sequences_kernel_forward( 283 | input: &DiffInput, 284 | ctx: &mut DiffTraversal, 285 | d: usize, 286 | ) -> Option { 287 | let n = to_isize(input.n()); 288 | let m = to_isize(input.m()); 289 | assert!(d < ctx.max); 290 | let d = to_isize(d); 291 | for k in (-d..=d).step_by(2) { 292 | let mut x = if k == -d || k != d && ctx.v(k - 1) < ctx.v(k + 1) { 293 | ctx.v(k + 1) 294 | } else { 295 | ctx.v(k - 1) + 1 296 | }; 297 | let mut y = x - k; 298 | while x < n && y < m && input.seq_a(x) == input.seq_b(y) { 299 | x += 1; 300 | y += 1; 301 | } 302 | *ctx.v_mut(k) = x; 303 | if ctx._end == (x, y) { 304 | return Some(to_usize(d)); 305 | } 306 | } 307 | None 308 | } 309 | 310 | #[cfg(test)] 311 | fn diff_sequences_kernel_backward( 312 | input: &DiffInput, 313 | ctx: &mut DiffTraversal, 314 | d: usize, 315 | ) -> Option { 316 | let n = to_isize(input.n()); 317 | let m = to_isize(input.m()); 318 | let delta = n - m; 319 | assert!(d < ctx.max); 320 | let d = to_isize(d); 321 | for k in (-d..=d).step_by(2) { 322 | let mut x = if k == -d || k != d && ctx.v(k + 1) < ctx.v(k - 1) { 323 | ctx.v(k + 1) 324 | } else { 325 | ctx.v(k - 1) + 1 326 | }; 327 | let mut y = x - (k + delta); 328 | while 0 < x && 0 < y && input.seq_a(x - 1) == input.seq_b(y - 1) { 329 | x -= 1; 330 | y -= 1; 331 | } 332 | *ctx.v_mut(k) = x - 1; 333 | if ctx._end == (x, y) { 334 | return Some(to_usize(d)); 335 | } 336 | } 337 | None 338 | } 339 | 340 | /// A wrapper around a vector of bytes that keeps track of end of lines. 341 | #[derive(Debug, Default)] 342 | pub struct LineSplit { 343 | data: Vec, 344 | line_lengths: Vec, 345 | } 346 | 347 | impl LineSplit { 348 | pub fn iter(&self) -> impl Iterator + '_ { 349 | LineSplitIter { 350 | line_split: self, 351 | index: 0, 352 | start_of_slice: 0, 353 | } 354 | } 355 | 356 | pub fn data(&self) -> &[u8] { 357 | &self.data 358 | } 359 | 360 | pub fn append_line(&mut self, line: &[u8]) { 361 | if self.data.last().cloned() == Some(b'\n') { 362 | self.line_lengths.push(line.len()); 363 | } else { 364 | match self.line_lengths.last_mut() { 365 | Some(len) => *len += line.len(), 366 | None => self.line_lengths.push(line.len()), 367 | } 368 | } 369 | self.data.extend_from_slice(line) 370 | } 371 | 372 | pub fn clear(&mut self) { 373 | self.data.clear(); 374 | self.line_lengths.clear(); 375 | } 376 | 377 | pub fn len(&self) -> usize { 378 | self.data.len() 379 | } 380 | } 381 | 382 | struct LineSplitIter<'a> { 383 | line_split: &'a LineSplit, 384 | start_of_slice: usize, 385 | index: usize, 386 | } 387 | 388 | impl<'a> Iterator for LineSplitIter<'a> { 389 | type Item = (usize, usize); 390 | fn next(&mut self) -> Option { 391 | let &mut LineSplitIter { 392 | line_split: 393 | LineSplit { 394 | data: _, 395 | line_lengths, 396 | }, 397 | index, 398 | start_of_slice, 399 | } = self; 400 | if index < line_lengths.len() { 401 | let len = line_lengths[index]; 402 | self.start_of_slice += len; 403 | self.index += 1; 404 | Some((start_of_slice, start_of_slice + len)) 405 | } else { 406 | None 407 | } 408 | } 409 | } 410 | 411 | /// A pair of spans with the same content in two different slices. 412 | #[derive(Clone, Debug, Default)] 413 | pub struct Snake { 414 | /// The start of the span in the removed bytes. 415 | pub x0: isize, 416 | 417 | /// The start of the span in the added bytes. 418 | pub y0: isize, 419 | 420 | /// The length of the span. 421 | pub len: isize, 422 | } 423 | 424 | impl Snake { 425 | fn from(mut self, x0: isize, y0: isize) -> Self { 426 | self.x0 = x0; 427 | self.y0 = y0; 428 | self 429 | } 430 | 431 | fn len(mut self, len: isize) -> Self { 432 | self.len = len; 433 | self 434 | } 435 | } 436 | 437 | fn diff_sequences_kernel_bidirectional( 438 | input: &DiffInput, 439 | ctx_fwd: &mut DiffTraversal, 440 | ctx_bwd: &mut DiffTraversal, 441 | d: usize, 442 | ) -> Option<(Snake, isize)> { 443 | let n = to_isize(input.n()); 444 | let m = to_isize(input.m()); 445 | let delta = n - m; 446 | let odd = delta % 2 != 0; 447 | assert!(d < ctx_fwd.max); 448 | assert!(d < ctx_bwd.max); 449 | let d = to_isize(d); 450 | let mut k = -d; 451 | while k <= d { 452 | let mut x = if k == -d || k != d && ctx_fwd.v(k - 1) < ctx_fwd.v(k + 1) { 453 | ctx_fwd.v(k + 1) 454 | } else { 455 | ctx_fwd.v(k - 1) + 1 456 | }; 457 | let mut y = x - k; 458 | let (x0, y0) = (x, y); 459 | while x < n && y < m && input.seq_a(x) == input.seq_b(y) { 460 | x += 1; 461 | y += 1; 462 | } 463 | if odd && (k - delta).abs() < d && x > ctx_bwd.v(k - delta) { 464 | return Some((Snake::default().from(x0, y0).len(x - x0), 2 * d - 1)); 465 | } 466 | *ctx_fwd.v_mut(k) = x; 467 | 468 | k += 2; 469 | } 470 | let mut k = -d; 471 | while k <= d { 472 | let mut x = if k == -d || k != d && ctx_bwd.v(k + 1) < ctx_bwd.v(k - 1) { 473 | ctx_bwd.v(k + 1) 474 | } else { 475 | ctx_bwd.v(k - 1) + 1 476 | }; 477 | let mut y = x - (k + delta); 478 | let x1 = x; 479 | while 0 < x && 0 < y && input.seq_a(x - 1) == input.seq_b(y - 1) { 480 | x -= 1; 481 | y -= 1; 482 | } 483 | if !odd && (k + delta).abs() <= d && x - 1 < ctx_fwd.v(k + delta) { 484 | return Some((Snake::default().from(x, y).len(x1 - x), 2 * d)); 485 | } 486 | *ctx_bwd.v_mut(k) = x - 1; 487 | 488 | k += 2; 489 | } 490 | None 491 | } 492 | 493 | /// Compute the length of the edit script for `input`. 494 | /// This is the forward version. 495 | #[cfg(test)] 496 | fn diff_sequences_simple_forward(input: &DiffInput, v: &mut Vec) -> usize { 497 | diff_sequences_simple(input, v, true) 498 | } 499 | 500 | /// Compute the length of the edit script for `input`. 501 | /// This is the backward version. 502 | #[cfg(test)] 503 | fn diff_sequences_simple_backward(input: &DiffInput, v: &mut Vec) -> usize { 504 | diff_sequences_simple(input, v, false) 505 | } 506 | 507 | #[cfg(test)] 508 | fn diff_sequences_simple(input: &DiffInput, v: &mut Vec, forward: bool) -> usize { 509 | let max_result = input.n() + input.m(); 510 | let ctx = &mut DiffTraversal::from_vector(input, v, forward, max_result); 511 | (0..max_result) 512 | .filter_map(|d| { 513 | if forward { 514 | diff_sequences_kernel_forward(input, ctx, d) 515 | } else { 516 | diff_sequences_kernel_backward(input, ctx, d) 517 | } 518 | }) 519 | .next() 520 | .unwrap_or(max_result) 521 | } 522 | 523 | /// Compute the longest common subsequence for `input` into `dst`. 524 | pub fn diff(input: &DiffInput, v: &mut Vec, dst: &mut Vec) { 525 | dst.clear(); 526 | enum Task<'a> { 527 | Diff(DiffInput<'a>), 528 | PushSnake(Snake), 529 | } 530 | use Task::*; 531 | 532 | let mut todo = vec![Diff(input.to_owned())]; 533 | while let Some(task) = todo.pop() { 534 | match task { 535 | Diff(input) => { 536 | let n = to_isize(input.n()); 537 | fn trivial_diff(tok: &TokenizationRange) -> bool { 538 | tok.one_past_end_index <= tok.start_index 539 | } 540 | 541 | if trivial_diff(&input.removed) || trivial_diff(&input.added) { 542 | continue; 543 | } 544 | 545 | let snake = diff_sequences_bidirectional_snake(&input, v); 546 | if let Some((snake @ Snake { x0, y0, len }, d)) = snake { 547 | if 1 < d { 548 | let (input1, input2) = input.split_at((x0, y0), (x0 + len, y0 + len)); 549 | todo.push(Diff(input2)); 550 | if len != 0 { 551 | todo.push(PushSnake(snake)); 552 | } 553 | todo.push(Diff(input1)); 554 | } else { 555 | let SplittingPoint { sp, dx, dy } = find_splitting_point(&input); 556 | let x0 = input.removed.start_index; 557 | let y0 = input.added.start_index; 558 | if sp != 0 { 559 | dst.push(Snake::default().from(x0, y0).len(sp)); 560 | } 561 | let len = n - sp - dx; 562 | if len != 0 { 563 | dst.push(Snake::default().from(x0 + sp + dx, y0 + sp + dy).len(len)); 564 | } 565 | } 566 | } 567 | } 568 | PushSnake(snake) => dst.push(snake), 569 | } 570 | } 571 | } 572 | 573 | struct SplittingPoint { 574 | sp: isize, 575 | dx: isize, 576 | dy: isize, 577 | } 578 | 579 | // Find the splitting point when two sequences differ by one element. 580 | fn find_splitting_point(input: &DiffInput) -> SplittingPoint { 581 | use std::cmp::Ordering::*; 582 | 583 | let n = to_isize(input.n()); 584 | let m = to_isize(input.m()); 585 | let (short, long, nb_tokens, dx, dy) = match n.cmp(&m) { 586 | Less => (&input.removed, &input.added, n, 0, 1), 587 | Greater => (&input.added, &input.removed, m, 1, 0), 588 | Equal => (&input.added, &input.removed, m, 0, 0), 589 | }; 590 | let mut sp = nb_tokens; 591 | for i in 0..nb_tokens { 592 | if long.nth_token(i) != short.nth_token(i) { 593 | sp = i; 594 | break; 595 | } 596 | } 597 | SplittingPoint { sp, dx, dy } 598 | } 599 | 600 | /// Compute the length of the edit script for `input`. 601 | /// This is the bidirectional version. 602 | #[cfg(test)] 603 | fn diff_sequences_bidirectional(input: &DiffInput, v: &mut Vec) -> usize { 604 | if input.n() + input.m() == 0 { 605 | return 0; 606 | } 607 | to_usize(diff_sequences_bidirectional_snake(input, v).unwrap().1) 608 | } 609 | 610 | fn diff_sequences_bidirectional_snake( 611 | input: &DiffInput, 612 | v: &mut Vec, 613 | ) -> Option<(Snake, isize)> { 614 | let mut max = (input.n() + input.m() + 1) / 2 + 1; 615 | if input.large_diff_threshold > 0 { 616 | max = max.min(input.large_diff_threshold); 617 | } 618 | let iter_len = 2 * max + 1; 619 | v.resize(2 * iter_len, 0); 620 | 621 | let (v1, v2) = v.split_at_mut(iter_len); 622 | let ctx_fwd = &mut DiffTraversal::from_slice(input, v1, true, max); 623 | let ctx_bwd = &mut DiffTraversal::from_slice(input, v2, false, max); 624 | let result = (0..max) 625 | .filter_map(|d| diff_sequences_kernel_bidirectional(input, ctx_fwd, ctx_bwd, d)) 626 | .next(); 627 | match result { 628 | Some(mut result) => { 629 | result.0.x0 += input.removed.start_index; 630 | result.0.y0 += input.added.start_index; 631 | Some(result) 632 | } 633 | None => None, 634 | } 635 | } 636 | 637 | fn to_isize(input: usize) -> isize { 638 | if cfg!(debug_assertions) { 639 | isize::try_from(input).unwrap() 640 | } else { 641 | input as _ 642 | } 643 | } 644 | 645 | fn to_usize(input: isize) -> usize { 646 | if cfg!(debug_assertions) { 647 | usize::try_from(input).unwrap() 648 | } else { 649 | input as _ 650 | } 651 | } 652 | #[derive(PartialEq, Eq, Clone, Copy, Debug)] 653 | enum TokenKind { 654 | Other, 655 | Word, 656 | Spaces, 657 | } 658 | 659 | /// Tokenize data from `src` from the position `ofs` into `tokens`. 660 | pub fn tokenize(src: &[u8], ofs: usize, tokens: &mut Vec) { 661 | let mut push = |lo: usize, hi: usize| { 662 | if lo < hi { 663 | tokens.push((lo, hi)) 664 | } 665 | }; 666 | let mut kind = TokenKind::Other; 667 | let mut lo = ofs; 668 | #[allow(clippy::needless_range_loop)] 669 | for (grapheme_start, _, g) in src[ofs..].grapheme_indices() { 670 | let hi = grapheme_start + ofs; 671 | let oldkind = kind; 672 | kind = classify_grapheme(g); 673 | if kind != oldkind || oldkind == TokenKind::Other { 674 | push(lo, hi); 675 | lo = hi 676 | } 677 | } 678 | push(lo, src.len()); 679 | } 680 | 681 | fn classify_grapheme(g: &str) -> TokenKind { 682 | let first_char = g.chars().next().unwrap_or_default(); 683 | if first_char.is_alphanumeric() || first_char == '_' { 684 | TokenKind::Word 685 | } else if first_char == ' ' || first_char == '\t' { 686 | TokenKind::Spaces 687 | } else { 688 | TokenKind::Other 689 | } 690 | } 691 | 692 | #[cfg(test)] 693 | mod tests_lib; 694 | -------------------------------------------------------------------------------- /src/diffr_lib/tests_lib.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use DiffKind::*; 3 | 4 | impl<'a> TokenizationRange<'a> { 5 | fn nth_span(&self, n: isize) -> Span { 6 | self.t.spans[to_usize(self.start_index + n)] 7 | } 8 | } 9 | 10 | #[derive(Clone, Copy, Debug, PartialEq, Eq)] 11 | enum DiffKind { 12 | Keep, 13 | Added, 14 | Removed, 15 | } 16 | 17 | fn string_of_bytes(buf: &[u8]) -> String { 18 | String::from_utf8_lossy(buf).into() 19 | } 20 | 21 | fn to_strings(buf: &[u8], tokens: It) -> Vec 22 | where 23 | It: Iterator, 24 | { 25 | mk_vec(tokens.map(|range| string_of_bytes(&buf[range.0..range.1]))) 26 | } 27 | 28 | fn mk_vec(it: It) -> Vec 29 | where 30 | It: Iterator, 31 | { 32 | it.collect() 33 | } 34 | 35 | fn nth_token<'a>(input: &'a TokenizationRange, idx: isize) -> &'a [u8] { 36 | let span = input.nth_span(idx); 37 | &input.t.data()[span.0..span.1] 38 | } 39 | 40 | fn compress_path(values: &Vec<(Vec, DiffKind)>) -> Vec<(Vec, DiffKind)> { 41 | let mut values = values.clone(); 42 | let mut it = values.iter_mut(); 43 | let mut result = vec![]; 44 | let mut current = it.next(); 45 | for next in it { 46 | match current { 47 | Some(ref mut c) => { 48 | if c.1 == next.1 { 49 | c.0.extend_from_slice(&next.0) 50 | } else { 51 | result.push(c.clone()); 52 | *c = next; 53 | } 54 | } 55 | None => panic!(), 56 | } 57 | } 58 | 59 | if let Some(last) = current { 60 | result.push(last.clone()); 61 | } 62 | result 63 | } 64 | 65 | fn dummy_tokenize(data: &[u8]) -> Vec { 66 | let mut toks = vec![]; 67 | for i in 0..data.len() { 68 | toks.push((i, i + 1)); 69 | } 70 | toks 71 | } 72 | 73 | fn really_tokenize(data: &[u8]) -> Vec { 74 | let mut toks = vec![]; 75 | tokenize(data, 0, &mut toks); 76 | toks 77 | } 78 | 79 | fn diff_sequences_test(expected: &[(&[u8], DiffKind)], seq_a: &[u8], seq_b: &[u8]) { 80 | diff_sequences_test_aux(expected, seq_a, seq_b, dummy_tokenize) 81 | } 82 | 83 | fn diff_sequences_test_tokenized(expected: &[(&[u8], DiffKind)], seq_a: &[u8], seq_b: &[u8]) { 84 | diff_sequences_test_aux(expected, seq_a, seq_b, really_tokenize) 85 | } 86 | 87 | fn diff_sequences_test_aux( 88 | expected: &[(&[u8], DiffKind)], 89 | seq_a: &[u8], 90 | seq_b: &[u8], 91 | tok: impl Fn(&[u8]) -> Vec, 92 | ) { 93 | let toks_a = tok(seq_a); 94 | let toks_b = tok(seq_b); 95 | let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]); 96 | let tok_a = Tokenization::new(seq_a, &toks_a, &m); 97 | let tok_b = Tokenization::new(seq_b, &toks_b, &m); 98 | let input = DiffInput::new(&tok_b, &tok_a, 123); 99 | let input_r = DiffInput::new(&tok_a, &tok_b, 123); 100 | 101 | let mut v = vec![]; 102 | let result = diff_sequences_simple_forward(&input, &mut v); 103 | let result_bwd = diff_sequences_simple_backward(&input, &mut v); 104 | let result_bidi = diff_sequences_bidirectional(&input, &mut v); 105 | let result_r = diff_sequences_simple(&input_r, &mut v, true); 106 | let result_r_bwd = diff_sequences_simple(&input_r, &mut v, false); 107 | let result_r_bidi = diff_sequences_bidirectional(&input_r, &mut v); 108 | 109 | let mut result_complete = vec![]; 110 | diff(&input, &mut v, &mut result_complete); 111 | let mut result_r_complete = vec![]; 112 | diff(&input_r, &mut v, &mut result_r_complete); 113 | 114 | let d = expected 115 | .iter() 116 | .map(|(buf, kind)| match kind { 117 | Added | Removed => tok(buf).len(), 118 | Keep => 0, 119 | }) 120 | .sum::(); 121 | 122 | assert_eq!(d, result); 123 | assert_eq!(d, result_r); 124 | assert_eq!(d, result_bwd); 125 | assert_eq!(d, result_r_bwd); 126 | assert_eq!(d, result_bidi); 127 | assert_eq!(d, result_r_bidi); 128 | 129 | for complete in &[&result_complete, &result_r_complete] { 130 | let all_snakes = complete.iter().fold(0, |acc, s| acc + s.len); 131 | 132 | let d_calc = input.n() + input.m() - 2 * to_usize(all_snakes); 133 | assert_eq!(d, d_calc); 134 | } 135 | // construct edit script 136 | let mut x0 = 0; 137 | let mut y0 = 0; 138 | let mut script = vec![]; 139 | for snake in result_complete { 140 | let Snake { 141 | x0: x, y0: y, len, .. 142 | } = snake; 143 | 144 | if x0 != x { 145 | assert!(x0 < x); 146 | let lo = input.removed.nth_span(x0).0; 147 | let hi = input.removed.nth_span(x - 1).1; 148 | script.push((input.removed.t.data[lo..hi].to_vec(), Removed)); 149 | } 150 | if y0 != y { 151 | assert!(y0 < y); 152 | let lo = input.added.nth_span(y0).0; 153 | let hi = input.added.nth_span(y - 1).1; 154 | script.push((input.added.t.data[lo..hi].to_vec(), Added)); 155 | } 156 | 157 | let mut added = vec![]; 158 | let mut removed = vec![]; 159 | for i in 0..len { 160 | let r = input.removed.nth_span(x + i); 161 | removed.extend_from_slice(&input.removed.t.data[r.0..r.1]); 162 | let r = input.added.nth_span(y + i); 163 | added.extend_from_slice(&input.added.t.data[r.0..r.1]); 164 | } 165 | 166 | assert_eq!(added, removed, "{:?}", snake); 167 | script.push((added.to_vec(), Keep)); 168 | 169 | x0 = x + len; 170 | y0 = y + len; 171 | } 172 | 173 | let x = input.removed.nb_tokens(); 174 | let x0 = to_usize(x0); 175 | if x0 != x { 176 | assert!(x0 < x); 177 | script.push((input.removed.t.data[x0..x].to_vec(), Removed)); 178 | } 179 | let y = input.added.nb_tokens(); 180 | let y0 = to_usize(y0); 181 | if y0 != y { 182 | assert!(y0 < y); 183 | script.push((input.added.t.data[y0..y].to_vec(), Added)); 184 | } 185 | 186 | assert_eq!( 187 | &*mk_vec(expected.iter().map(|p| (string_of_bytes(p.0), p.1))), 188 | &*mk_vec(script.iter().map(|p| (string_of_bytes(&p.0), p.1))), 189 | ); 190 | } 191 | 192 | #[test] 193 | fn compress_path_test() { 194 | let test = |expected: Vec<(Vec, DiffKind)>, input| { 195 | assert_eq!(expected, compress_path(&input)); 196 | }; 197 | 198 | test(vec![], vec![]); 199 | 200 | test( 201 | vec![(b"abc".to_vec(), Added)], 202 | vec![(b"abc".to_vec(), Added)], 203 | ); 204 | test( 205 | vec![(b"abcdef".to_vec(), Added)], 206 | vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Added)], 207 | ); 208 | test( 209 | vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Removed)], 210 | vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Removed)], 211 | ); 212 | 213 | test( 214 | vec![ 215 | (b"abc".to_vec(), Added), 216 | (b"defghijkl".to_vec(), Removed), 217 | (b"xyz".to_vec(), Keep), 218 | ], 219 | vec![ 220 | (b"abc".to_vec(), Added), 221 | (b"def".to_vec(), Removed), 222 | (b"ghi".to_vec(), Removed), 223 | (b"jkl".to_vec(), Removed), 224 | (b"xyz".to_vec(), Keep), 225 | ], 226 | ); 227 | } 228 | 229 | #[test] 230 | fn diff_sequences_test_1() { 231 | diff_sequences_test( 232 | &[ 233 | (b"a", Removed), 234 | (b"c", Added), 235 | (b"b", Keep), 236 | (b"c", Removed), 237 | (b"ab", Keep), 238 | (b"b", Removed), 239 | (b"a", Keep), 240 | (b"c", Added), 241 | ], 242 | b"abcabba", 243 | b"cbabac", 244 | ) 245 | } 246 | 247 | #[test] 248 | fn diff_sequences_test_2() { 249 | diff_sequences_test( 250 | &[(b"xaxbx", Added), (b"abc", Keep), (b"y", Removed)], 251 | b"abcy", 252 | b"xaxbxabc", 253 | ) 254 | } 255 | 256 | #[test] 257 | fn diff_sequences_test_3() { 258 | diff_sequences_test(&[(b"abc", Removed), (b"defgh", Added)], b"abc", b"defgh") 259 | } 260 | 261 | #[test] 262 | fn diff_sequences_test_4() { 263 | diff_sequences_test( 264 | &[(b"abc", Removed), (b"defg", Added), (b"zzz", Keep)], 265 | b"abczzz", 266 | b"defgzzz", 267 | ) 268 | } 269 | 270 | #[test] 271 | fn diff_sequences_test_5() { 272 | diff_sequences_test( 273 | &[(b"zzz", Keep), (b"abcd", Removed), (b"efgh", Added)], 274 | b"zzzabcd", 275 | b"zzzefgh", 276 | ) 277 | } 278 | 279 | #[test] 280 | fn diff_sequences_test_6() { 281 | diff_sequences_test(&[(b"abcd", Added)], b"", b"abcd") 282 | } 283 | 284 | #[test] 285 | fn diff_sequences_test_7() { 286 | diff_sequences_test(&[], b"", b"") 287 | } 288 | 289 | #[test] 290 | fn diff_sequences_test_8() { 291 | // This tests the recursion in diff 292 | diff_sequences_test( 293 | &[ 294 | (b"a", Removed), 295 | (b"c", Added), 296 | (b"b", Keep), 297 | (b"c", Removed), 298 | (b"a", Keep), 299 | (b"b", Removed), 300 | (b"ba", Keep), 301 | (b"a", Removed), 302 | (b"cc", Added), 303 | (b"b", Keep), 304 | (b"c", Removed), 305 | (b"ab", Keep), 306 | (b"b", Removed), 307 | (b"a", Keep), 308 | (b"a", Removed), 309 | (b"cc", Added), 310 | (b"b", Keep), 311 | (b"c", Removed), 312 | // this is weird; the 2 next should be combined? 313 | (b"a", Keep), 314 | (b"b", Keep), 315 | (b"b", Removed), 316 | (b"a", Keep), 317 | (b"c", Added), 318 | ], 319 | b"abcabbaabcabbaabcabba", 320 | b"cbabaccbabaccbabac", 321 | ) 322 | } 323 | 324 | #[test] 325 | fn range_equality_test() { 326 | let range_a = [1, 2, 3]; 327 | let range_b = [1, 2, 3]; 328 | let range_c = [1, 2, 4]; 329 | assert!(range_a == range_b); 330 | assert!(range_a != range_c); 331 | } 332 | 333 | #[test] 334 | fn tokenize_test() { 335 | fn test(expected: &[&str], buf: &[u8]) { 336 | let mut tokens = vec![]; 337 | tokenize(buf, 0, &mut tokens); 338 | assert_eq!( 339 | buf.len(), 340 | tokens.iter().map(|range| range.1 - range.0).sum() 341 | ); 342 | for token in &tokens { 343 | assert!(token.0 < token.1) 344 | } 345 | assert_eq!( 346 | mk_vec(buf.iter()), 347 | mk_vec(tokens.iter().flat_map(|range| &buf[range.0..range.1])) 348 | ); 349 | 350 | let foo = mk_vec( 351 | tokens 352 | .iter() 353 | .map(|range| &buf[range.0..range.1]) 354 | .map(string_of_bytes), 355 | ); 356 | 357 | let foo = mk_vec(foo.iter().map(|str| &**str)); 358 | 359 | assert_eq!(expected, &*foo); 360 | 361 | // TODO 362 | let tokens = tokens.iter().map(|hsr| (hsr.0, hsr.1)); 363 | assert_eq!(expected, &to_strings(buf, tokens)[..]); 364 | } 365 | test(&[], b""); 366 | test(&[" "], b" "); 367 | test(&["a"], b"a"); 368 | test(&["abcd", " ", "defg", " "], b"abcd defg "); 369 | test(&["abcd", " ", "defg"], b"abcd defg"); 370 | test(&["abcd", " ", "defg"], b"abcd defg"); 371 | test(&["abcd", "\t ", "defg"], b"abcd\t defg"); 372 | test(&["ab_cd", " ", "de", "-", "fg"], b"ab_cd de-fg"); 373 | test( 374 | &["*", "(", "abcd", ")", " ", "#", "[", "efgh", "]"], 375 | b"*(abcd) #[efgh]", 376 | ); 377 | test(&["кирилица", " ", "🧑🏼‍🌾"], "кирилица 🧑🏼‍🌾".as_bytes()); // (cyrilic and a farmer emoji) 378 | } 379 | 380 | #[test] 381 | fn find_splitting_point_test() { 382 | fn test(expected: isize, seq_a: &[u8], seq_b: &[u8]) { 383 | let toks_a = dummy_tokenize(seq_a); 384 | let toks_b = dummy_tokenize(seq_b); 385 | let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]); 386 | let tok_a = Tokenization::new(seq_a, &toks_a, &m); 387 | let tok_b = Tokenization::new(seq_b, &toks_b, &m); 388 | let input = DiffInput::new(&tok_b, &tok_a, 123); 389 | 390 | assert_eq!(expected, find_splitting_point(&input).sp); 391 | for i in 0..expected { 392 | assert_eq!(input.removed.nth_token(i), input.added.nth_token(i)); 393 | } 394 | for i in expected..to_isize(input.removed.nb_tokens()) { 395 | assert_eq!(input.removed.nth_token(i), input.added.nth_token(i + 1)); 396 | } 397 | } 398 | 399 | test(0, b"abc", b"zabc"); 400 | test(1, b"abc", b"azbc"); 401 | test(2, b"abc", b"abzc"); 402 | test(3, b"abc", b"abcz"); 403 | } 404 | 405 | fn get_lcs(seq_a: &[u8], seq_b: &[u8]) -> Vec> { 406 | fn subsequences(seq_a: &[u8]) -> Vec> { 407 | let res: Vec> = { 408 | if seq_a.is_empty() { 409 | vec![vec![]] 410 | } else if seq_a.len() == 1 { 411 | vec![vec![], seq_a.to_owned()] 412 | } else { 413 | let (seq_a1, seq_a2) = seq_a.split_at(seq_a.len() / 2); 414 | let mut res = vec![]; 415 | for part1 in subsequences(seq_a1) { 416 | for part2 in subsequences(seq_a2) { 417 | let mut nth_token = vec![]; 418 | nth_token.extend_from_slice(&part1); 419 | nth_token.extend_from_slice(&part2); 420 | res.push(nth_token); 421 | } 422 | } 423 | res 424 | } 425 | }; 426 | assert_eq!(res.len(), 1 << seq_a.len()); 427 | res 428 | } 429 | fn is_subseq(subseq: &[u8], nth_token: &[u8]) -> bool { 430 | if subseq.is_empty() { 431 | true 432 | } else { 433 | let target = subseq[0]; 434 | for i in 0..nth_token.len() { 435 | if nth_token[i] == target { 436 | return is_subseq(&subseq[1..], &nth_token[i + 1..]); 437 | } 438 | } 439 | false 440 | } 441 | } 442 | 443 | let mut bests = vec![]; 444 | let mut best_len = 0; 445 | for subseq in subsequences(seq_a) { 446 | if subseq.len() < best_len || !is_subseq(&subseq, seq_b) { 447 | continue; 448 | } 449 | if best_len < subseq.len() { 450 | bests.clear(); 451 | best_len = subseq.len(); 452 | } 453 | if best_len <= subseq.len() { 454 | bests.push(subseq) 455 | } 456 | } 457 | bests 458 | } 459 | 460 | #[test] 461 | fn test_get_lcs() { 462 | dbg!(get_lcs(b"abcd", b"cdef")); 463 | let expected: &[u8] = b"cd"; 464 | assert_eq!(expected, &**get_lcs(b"abcd", b"cdef").first().unwrap()) 465 | } 466 | 467 | #[test] 468 | fn test_lcs_random() { 469 | fn test_lcs(seq_a: &[u8], seq_b: &[u8]) { 470 | let toks_a = dummy_tokenize(seq_a); 471 | let toks_b = dummy_tokenize(seq_b); 472 | let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]); 473 | let tok_a = Tokenization::new(seq_a, &toks_a, &m); 474 | let tok_b = Tokenization::new(seq_b, &toks_b, &m); 475 | let input = DiffInput::new(&tok_a, &tok_b, 123); 476 | let mut v = vec![]; 477 | let mut dst = vec![]; 478 | diff(&input, &mut v, &mut dst); 479 | 480 | // check that dst content defines a subsequence of seq_a and seq_b 481 | let mut diff_lcs = vec![]; 482 | for Snake { x0, y0, len, .. } in dst { 483 | let part_seq_a = (x0..x0 + len) 484 | .flat_map(|idx| nth_token(&input.removed, idx).iter().cloned()) 485 | .collect::>(); 486 | let part_seq_b = (y0..y0 + len) 487 | .flat_map(|idx| nth_token(&input.added, idx).iter().cloned()) 488 | .collect::>(); 489 | assert_eq!(&*part_seq_a, &*part_seq_b); 490 | diff_lcs.extend_from_slice(&part_seq_a); 491 | } 492 | 493 | // bruteforce check that it is the longest 494 | assert!(get_lcs(seq_a, seq_b) 495 | .iter() 496 | .any(|nth_token| *nth_token == diff_lcs)); 497 | } 498 | 499 | let len_a = 6; 500 | let len_b = 6; 501 | let nletters = 3_u8; 502 | let mut seq_a = vec![b'1'; len_a]; 503 | let mut seq_b = vec![b'1'; len_b]; 504 | for i in 0..len_a { 505 | for j in 0..len_b { 506 | for la in 0..nletters { 507 | for lb in 0..nletters { 508 | seq_a[i] = la; 509 | seq_b[j] = lb; 510 | test_lcs(&seq_a, &seq_b); 511 | } 512 | } 513 | } 514 | } 515 | } 516 | 517 | #[should_panic] 518 | #[test] 519 | fn to_usize_checked_negative_test() { 520 | to_usize(-1_isize); 521 | } 522 | 523 | #[test] 524 | fn split_lines_test() { 525 | let input: &[u8] = b"abcd\nefgh\nij"; 526 | let split = LineSplit { 527 | data: input.to_vec(), 528 | line_lengths: vec![5, 5, 2], 529 | }; 530 | check_split(input, &split) 531 | } 532 | 533 | #[test] 534 | fn split_lines_append_test() { 535 | let input: &[u8] = b"abcd\nefgh\nij"; 536 | let mut split = LineSplit::default(); 537 | split.append_line(&input[..3]); 538 | split.append_line(&input[3..6]); 539 | split.append_line(&input[6..]); 540 | check_split(input, &split) 541 | } 542 | 543 | fn check_split(input: &[u8], split: &LineSplit) { 544 | assert_eq!( 545 | input, 546 | &*split.iter().fold(vec![], |mut acc, (lo, hi)| { 547 | acc.extend_from_slice(&input[lo..hi]); 548 | acc 549 | }) 550 | ); 551 | } 552 | 553 | #[test] 554 | fn issue15() { 555 | diff_sequences_test_tokenized( 556 | &[ 557 | (b"+ ", Added), 558 | (b"-", Keep), 559 | (b" -", Removed), 560 | (b"01234;\r\n", Keep), 561 | (b"+ ", Added), 562 | (b"-", Keep), 563 | (b" ", Removed), 564 | (b"-", Keep), 565 | (b"-", Removed), 566 | (b"abc;\r\n", Keep), 567 | (b"- ", Removed), 568 | (b"+ ", Added), 569 | (b"--", Keep), 570 | (b"def;\r\n", Keep), 571 | (b"- ", Removed), 572 | (b"+ ", Added), 573 | (b"--jkl;\r\n", Keep), 574 | (b"+ ", Added), 575 | (b"-", Keep), 576 | (b" ", Removed), 577 | (b"-", Keep), 578 | (b"-", Removed), 579 | (b"poi;\r\n", Keep), 580 | ], 581 | b"- -01234;\r\n- --abc;\r\n- --def;\r\n- --jkl;\r\n- --poi;\r\n", 582 | b"+ -01234;\r\n+ --abc;\r\n+ --def;\r\n+ --jkl;\r\n+ --poi;\r\n", 583 | ) 584 | } 585 | 586 | #[test] 587 | fn issue15_2() { 588 | diff_sequences_test_tokenized( 589 | &[ 590 | (b"-", Removed), 591 | (b"+", Added), 592 | (b" --include \'+ */\'", Keep), 593 | (b" ", Added), 594 | (b"\r\n", Keep), 595 | ], 596 | b"- --include '+ */'\r\n", 597 | b"+ --include '+ */' \r\n", 598 | ) 599 | } 600 | 601 | #[test] 602 | fn issue27() { 603 | diff_sequences_test( 604 | &[ 605 | (b"note: ", Keep), 606 | (b"AAA", Removed), 607 | (b"BBB CCC", Added), 608 | (b"\r\n", Keep), 609 | ], 610 | b"note: AAA\r\n", 611 | b"note: BBB CCC\r\n", 612 | ); 613 | diff_sequences_test( 614 | &[(b"^", Added), (b"^^^^^^^^^^", Keep), (b"^^^^", Added)], 615 | b"^^^^^^^^^^", 616 | b"^^^^^^^^^^^^^^^", 617 | ); 618 | diff_sequences_test( 619 | &[ 620 | (b"a", Keep), 621 | (b"cbc", Added), 622 | (b"bcz", Keep), 623 | (b"c", Added), 624 | (b"z", Keep), 625 | (b"abz", Added), 626 | ], 627 | b"abczz", 628 | b"acbcbczczabz", 629 | ); 630 | } 631 | 632 | #[derive(Debug)] 633 | struct TestNormalizePartitionExpected<'a> { 634 | expected: &'a [&'a [u8]], 635 | expected_starts_with_shared: bool, 636 | } 637 | 638 | fn test_optimize_alternatives( 639 | alternatives: &[TestNormalizePartitionExpected], 640 | seq: &[u8], 641 | lcs: &[u8], 642 | ) { 643 | let toks_seq = dummy_tokenize(seq); 644 | let toks_lcs = dummy_tokenize(lcs); 645 | let m = TokenMap::new(&mut [(toks_seq.iter(), &seq), (toks_lcs.iter(), &lcs)]); 646 | let seq = Tokenization::new(seq, &toks_seq, &m); 647 | let lcs = Tokenization::new(lcs, &toks_lcs, &m); 648 | let opt_result = optimize_partition(&seq, &lcs); 649 | let seq = TokenizationRange::new(&seq); 650 | let mut it = opt_result.path.iter().cloned(); 651 | let mut prev = match it.next() { 652 | None => { 653 | assert!(alternatives.iter().any(|e| e.expected.is_empty())); 654 | return; 655 | } 656 | Some(val) => val, 657 | }; 658 | let mut partition = vec![]; 659 | for i in it { 660 | let mut part = vec![]; 661 | for j in prev..i { 662 | part.extend_from_slice(nth_token(&seq, j)); 663 | } 664 | partition.push(part); 665 | prev = i; 666 | } 667 | assert!( 668 | alternatives.iter().any(|e| { 669 | let expected = e 670 | .expected 671 | .iter() 672 | .map(|slice| slice.to_vec()) 673 | .collect::>(); 674 | expected == &*partition 675 | && e.expected_starts_with_shared == opt_result.starts_with_shared 676 | }), 677 | "alternatives:\n\t{:?}\n\nactual:\n\t{:?}", 678 | &alternatives, 679 | (&partition, opt_result.starts_with_shared), 680 | ) 681 | } 682 | 683 | fn test_optimize_partition1( 684 | expected: &[&[u8]], 685 | expected_starts_with_shared: bool, 686 | seq: &[u8], 687 | lcs: &[u8], 688 | ) { 689 | let expected = vec![TestNormalizePartitionExpected { 690 | expected, 691 | expected_starts_with_shared, 692 | }]; 693 | test_optimize_alternatives(&expected, seq, lcs) 694 | } 695 | 696 | #[test] 697 | fn test_optimize_partition() { 698 | test_optimize_partition1(&[b"abcd"], true, b"abcd", b"abcd"); 699 | test_optimize_partition1(&[b"abcd"], false, b"abcd", b""); 700 | test_optimize_partition1(&[b"a", b"xyz", b"bc"], true, b"axyzbc", b"abc"); 701 | test_optimize_partition1(&[b"zab", b"a"], false, b"zaba", b"a"); 702 | test_optimize_partition1(&[b"k", b"a", b"xyz", b"bc"], false, b"kaxyzbc", b"abc"); 703 | test_optimize_partition1( 704 | &[b"k", b"a", b"xyz", b"bc", b"x"], 705 | false, 706 | b"kaxyzbcx", 707 | b"abc", 708 | ); 709 | test_optimize_partition1( 710 | &[b"a", b"cbc", b"bcz", b"czab", b"z"], 711 | true, 712 | b"acbcbczczabz", 713 | b"abczz", 714 | ); 715 | test_optimize_alternatives( 716 | &[ 717 | TestNormalizePartitionExpected { 718 | expected: &[b"^^^^^^^^^^", b"^^^^^"], 719 | expected_starts_with_shared: true, 720 | }, 721 | TestNormalizePartitionExpected { 722 | expected: &[b"^^^^^", b"^^^^^^^^^^"], 723 | expected_starts_with_shared: false, 724 | }, 725 | ], 726 | b"^^^^^^^^^^^^^^^", 727 | b"^^^^^^^^^^", 728 | ); 729 | 730 | test_optimize_partition1( 731 | &[b"note: ", b"AAA", b"\r\n"], 732 | true, 733 | b"note: AAA\r\n", 734 | b"note: \r\n", 735 | ); 736 | 737 | test_optimize_partition1( 738 | &[b"note: ", b"BBB CCC", b"\r\n"], 739 | true, 740 | b"note: BBB CCC\r\n", 741 | b"note: \r\n", 742 | ); 743 | } 744 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Display, Error as FmtErr, Formatter}; 2 | use std::io::{self, BufRead, Write}; 3 | use std::iter::Peekable; 4 | use std::time::SystemTime; 5 | use termcolor::{ 6 | Color::{self, Green, Red, Rgb}, 7 | ColorChoice, ColorSpec, StandardStream, WriteColor, 8 | }; 9 | 10 | use diffr_lib::*; 11 | 12 | mod cli_args; 13 | mod diffr_lib; 14 | 15 | #[derive(Debug, Clone, Copy)] 16 | pub enum LineNumberStyle { 17 | Compact, 18 | Aligned, 19 | Fixed(usize), 20 | } 21 | 22 | impl LineNumberStyle { 23 | fn min_width(&self) -> usize { 24 | match *self { 25 | LineNumberStyle::Compact | LineNumberStyle::Aligned => 0, 26 | LineNumberStyle::Fixed(w) => w, 27 | } 28 | } 29 | } 30 | 31 | #[derive(Debug)] 32 | pub struct AppConfig { 33 | debug: bool, 34 | line_numbers_style: Option, 35 | added_face: ColorSpec, 36 | refine_added_face: ColorSpec, 37 | removed_face: ColorSpec, 38 | refine_removed_face: ColorSpec, 39 | large_diff_threshold: usize, 40 | } 41 | 42 | impl Default for AppConfig { 43 | fn default() -> Self { 44 | // The ANSI white is actually gray on many implementations. The actual white 45 | // that seem to work on all implementations is "bright white". `termcolor` 46 | // crate has no enum member for it, so we create it with Rgb. 47 | let bright_white = Rgb(255, 255, 255); 48 | AppConfig { 49 | debug: false, 50 | line_numbers_style: None, 51 | added_face: color_spec(Some(Green), None, false), 52 | refine_added_face: color_spec(Some(bright_white), Some(Green), true), 53 | removed_face: color_spec(Some(Red), None, false), 54 | refine_removed_face: color_spec(Some(bright_white), Some(Red), true), 55 | large_diff_threshold: 1000, 56 | } 57 | } 58 | } 59 | 60 | impl AppConfig { 61 | fn has_line_numbers(&self) -> bool { 62 | self.line_numbers_style.is_some() 63 | } 64 | 65 | fn line_numbers_aligned(&self) -> bool { 66 | if let Some(LineNumberStyle::Aligned) = self.line_numbers_style { 67 | return true; 68 | } 69 | false 70 | } 71 | } 72 | 73 | fn main() { 74 | let config = cli_args::parse_config(); 75 | let mut hunk_buffer = HunkBuffer::new(config); 76 | match hunk_buffer.run() { 77 | Ok(()) => (), 78 | Err(ref err) if err.kind() == io::ErrorKind::BrokenPipe => (), 79 | Err(ref err) => { 80 | eprintln!("io error: {}", err); 81 | std::process::exit(-1) 82 | } 83 | } 84 | } 85 | 86 | fn now(do_timings: bool) -> Option { 87 | if do_timings { 88 | Some(SystemTime::now()) 89 | } else { 90 | None 91 | } 92 | } 93 | 94 | fn duration_ms_since(time: &Option) -> u128 { 95 | if let Some(time) = time { 96 | if let Ok(elapsed) = time.elapsed() { 97 | elapsed.as_millis() 98 | } else { 99 | // some non monotonically increasing clock 100 | // this is a short period of time anyway, 101 | // let us map it to 0 102 | 0 103 | } 104 | } else { 105 | 0 106 | } 107 | } 108 | 109 | fn color_spec(fg: Option, bg: Option, bold: bool) -> ColorSpec { 110 | let mut colorspec: ColorSpec = ColorSpec::default(); 111 | colorspec.set_fg(fg); 112 | colorspec.set_bg(bg); 113 | colorspec.set_bold(bold); 114 | colorspec 115 | } 116 | 117 | #[derive(Default)] 118 | struct ExecStats { 119 | time_computing_diff_ms: u128, 120 | time_lcs_ms: u128, 121 | time_opt_lcs_ms: u128, 122 | total_time_ms: u128, 123 | program_start: Option, 124 | } 125 | 126 | impl ExecStats { 127 | fn new(debug: bool) -> Self { 128 | ExecStats { 129 | time_computing_diff_ms: 0, 130 | time_lcs_ms: 0, 131 | time_opt_lcs_ms: 0, 132 | total_time_ms: 0, 133 | program_start: now(debug), 134 | } 135 | } 136 | 137 | /// Should we call SystemTime::now at all? 138 | fn do_timings(&self) -> bool { 139 | self.program_start.is_some() 140 | } 141 | 142 | fn stop(&mut self) { 143 | if self.do_timings() { 144 | self.total_time_ms = duration_ms_since(&self.program_start); 145 | } 146 | } 147 | 148 | fn report(&self) -> std::io::Result<()> { 149 | self.report_into(&mut std::io::stderr()) 150 | } 151 | 152 | fn report_into(&self, w: &mut W) -> std::io::Result<()> 153 | where 154 | W: std::io::Write, 155 | { 156 | const WORD_PADDING: usize = 35; 157 | const FIELD_PADDING: usize = 15; 158 | if self.do_timings() { 159 | let format_header = |name| format!("{} (ms)", name); 160 | let format_ratio = |dt: u128| { 161 | format!( 162 | "({:3.3}%)", 163 | 100.0 * (dt as f64) / (self.total_time_ms as f64) 164 | ) 165 | }; 166 | let mut report = |name: &'static str, dt: u128| { 167 | writeln!( 168 | w, 169 | "{:>w$} {:>f$} {:>f$}", 170 | format_header(name), 171 | dt, 172 | format_ratio(dt), 173 | w = WORD_PADDING, 174 | f = FIELD_PADDING, 175 | ) 176 | }; 177 | report("hunk processing time", self.time_computing_diff_ms)?; 178 | report("-- compute lcs", self.time_lcs_ms)?; 179 | report("-- optimize lcs", self.time_opt_lcs_ms)?; 180 | writeln!( 181 | w, 182 | "{:>w$} {:>f$}", 183 | format_header("total processing time"), 184 | self.total_time_ms, 185 | w = WORD_PADDING, 186 | f = FIELD_PADDING, 187 | )?; 188 | } 189 | Ok(()) 190 | } 191 | } 192 | 193 | struct HunkBuffer { 194 | v: Vec, 195 | diff_buffer: Vec, 196 | added_tokens: Vec<(usize, usize)>, 197 | removed_tokens: Vec<(usize, usize)>, 198 | line_number_info: Option, 199 | lines: LineSplit, 200 | config: AppConfig, 201 | margin: Vec, 202 | warning_lines: Vec, 203 | stats: ExecStats, 204 | } 205 | 206 | #[derive(Default)] 207 | struct Margin<'a> { 208 | lino_minus: usize, 209 | lino_plus: usize, 210 | margin: &'a mut [u8], 211 | half_margin: usize, 212 | } 213 | 214 | const MARGIN_TAB_STOP: usize = 8; 215 | 216 | impl<'a> Margin<'a> { 217 | fn new(header: &'a HunkHeader, margin: &'a mut [u8], config: &'a AppConfig) -> Self { 218 | let full_margin = header.width(config.line_numbers_style); 219 | let half_margin = full_margin / 2; 220 | 221 | // If line number is 0, the column is empty and 222 | // shouldn't be printed 223 | let margin_size = if header.minus_range.0 == 0 || header.plus_range.0 == 0 { 224 | half_margin 225 | } else { 226 | full_margin 227 | }; 228 | assert!(margin.len() >= margin_size); 229 | Margin { 230 | lino_plus: header.plus_range.0, 231 | lino_minus: header.minus_range.0, 232 | margin: &mut margin[..margin_size], 233 | half_margin, 234 | } 235 | } 236 | 237 | fn write_margin_padding(&mut self, out: &mut impl WriteColor) -> io::Result<()> { 238 | if self.margin.len() % MARGIN_TAB_STOP != 0 { 239 | write!(out, "\t")?; 240 | } 241 | Ok(()) 242 | } 243 | 244 | fn write_margin_changed( 245 | &mut self, 246 | is_plus: bool, 247 | config: &AppConfig, 248 | out: &mut impl WriteColor, 249 | ) -> io::Result<()> { 250 | let mut margin_buf = &mut self.margin[..]; 251 | let color; 252 | if is_plus { 253 | color = &config.added_face; 254 | if self.lino_minus != 0 { 255 | write!(margin_buf, "{:w$} ", ' ', w = self.half_margin)?; 256 | } 257 | write!(margin_buf, "{:w$}", self.lino_plus, w = self.half_margin)?; 258 | self.lino_plus += 1; 259 | } else { 260 | color = &config.removed_face; 261 | write!(margin_buf, "{:w$}", self.lino_minus, w = self.half_margin)?; 262 | if self.lino_plus != 0 { 263 | write!(margin_buf, " {:w$}", ' ', w = self.half_margin)?; 264 | } 265 | self.lino_minus += 1; 266 | }; 267 | output(self.margin, 0, self.margin.len(), color, out)?; 268 | if config.line_numbers_aligned() { 269 | self.write_margin_padding(out)?; 270 | } 271 | Ok(()) 272 | } 273 | 274 | fn write_margin_context( 275 | &mut self, 276 | config: &AppConfig, 277 | out: &mut impl WriteColor, 278 | ) -> io::Result<()> { 279 | if self.lino_minus != self.lino_plus { 280 | write!(out, "{:w$}", self.lino_minus, w = self.half_margin)?; 281 | } else { 282 | write!(out, "{:w$}", ' ', w = self.half_margin)?; 283 | } 284 | write!(out, " {:w$}", self.lino_plus, w = self.half_margin)?; 285 | if config.line_numbers_aligned() { 286 | self.write_margin_padding(out)?; 287 | } 288 | self.lino_minus += 1; 289 | self.lino_plus += 1; 290 | Ok(()) 291 | } 292 | } 293 | 294 | fn shared_spans(added_tokens: &Tokenization, diff_buffer: &[Snake]) -> Vec<(usize, usize)> { 295 | let mut shared_spans = vec![]; 296 | for snake in diff_buffer.iter() { 297 | for i in 0..snake.len { 298 | shared_spans.push(added_tokens.nth_span(snake.y0 + i)); 299 | } 300 | } 301 | shared_spans 302 | } 303 | 304 | const MAX_MARGIN: usize = 41; 305 | 306 | impl HunkBuffer { 307 | fn new(config: AppConfig) -> Self { 308 | let debug = config.debug; 309 | HunkBuffer { 310 | v: vec![], 311 | diff_buffer: vec![], 312 | added_tokens: vec![], 313 | removed_tokens: vec![], 314 | line_number_info: None, 315 | lines: Default::default(), 316 | config, 317 | margin: vec![0; MAX_MARGIN], 318 | warning_lines: vec![], 319 | stats: ExecStats::new(debug), 320 | } 321 | } 322 | 323 | // Returns the number of completely printed snakes 324 | fn paint_line( 325 | data: &[u8], 326 | &(data_lo, data_hi): &(usize, usize), 327 | no_highlight: &ColorSpec, 328 | highlight: &ColorSpec, 329 | shared: &mut Peekable, 330 | out: &mut Stream, 331 | ) -> io::Result<()> 332 | where 333 | Stream: WriteColor, 334 | Positions: Iterator, 335 | { 336 | let mut y = data_lo + 1; 337 | // XXX: skip leading token and leading spaces 338 | while y < data_hi && data[y].is_ascii_whitespace() { 339 | y += 1 340 | } 341 | let mut pending = (data_lo, y, false); 342 | let mut trailing_ws = ColorSpec::new(); 343 | trailing_ws.set_bg(Some(Color::Red)); 344 | let color = |h| if h { &highlight } else { &no_highlight }; 345 | let mut output1 = |lo, hi, highlighted| -> std::io::Result<()> { 346 | if lo == hi { 347 | return Ok(()); 348 | } 349 | let (lo1, hi1, highlighted1) = pending; 350 | let color = if &data[lo..hi] == b"\n" 351 | && data[lo1..hi1].iter().all(|b| b.is_ascii_whitespace()) 352 | { 353 | &trailing_ws 354 | } else { 355 | color(highlighted1) 356 | }; 357 | output(data, lo1, hi1, color, out)?; 358 | pending = (lo, hi, highlighted); 359 | Ok(()) 360 | }; 361 | // special case: all whitespaces 362 | if y == data_hi { 363 | output(data, data_lo, data_lo + 1, no_highlight, out)?; 364 | output(data, data_lo + 1, data_hi, &trailing_ws, out)?; 365 | return Ok(()); 366 | } 367 | 368 | while let Some((lo, hi)) = shared.peek() { 369 | if data_hi <= y { 370 | break; 371 | } 372 | let last_iter = data_hi <= *hi; 373 | let lo = (*lo).min(data_hi).max(y); 374 | let hi = (*hi).min(data_hi); 375 | if hi <= data_lo { 376 | shared.next(); 377 | continue; 378 | } 379 | if hi < lo { 380 | continue; 381 | } 382 | output1(y, lo, true)?; 383 | output1(lo, hi, false)?; 384 | y = hi; 385 | if last_iter { 386 | break; 387 | } else { 388 | shared.next(); 389 | } 390 | } 391 | output1(y, data_hi, true)?; 392 | let (lo1, hi1, highlighted1) = pending; 393 | output(data, lo1, hi1, color(highlighted1), out)?; 394 | Ok(()) 395 | } 396 | 397 | fn process_with_stats(&mut self, out: &mut Stream) -> io::Result<()> 398 | where 399 | Stream: WriteColor, 400 | { 401 | let start = now(self.stats.do_timings()); 402 | let result = self.process(out); 403 | self.stats.time_computing_diff_ms += duration_ms_since(&start); 404 | result 405 | } 406 | 407 | fn process(&mut self, out: &mut Stream) -> io::Result<()> 408 | where 409 | Stream: WriteColor, 410 | { 411 | let Self { 412 | v, 413 | diff_buffer, 414 | added_tokens, 415 | removed_tokens, 416 | line_number_info, 417 | lines, 418 | config, 419 | margin, 420 | warning_lines, 421 | stats, 422 | } = self; 423 | let mut margin = match line_number_info { 424 | Some(lni) => Margin::new(lni, margin, config), 425 | None => Default::default(), 426 | }; 427 | let data = lines.data(); 428 | let m = TokenMap::new(&mut [(removed_tokens.iter(), data), (added_tokens.iter(), data)]); 429 | let removed = Tokenization::new(data, removed_tokens, &m); 430 | let added = Tokenization::new(data, added_tokens, &m); 431 | let tokens = DiffInput::new(&added, &removed, config.large_diff_threshold); 432 | let start = now(stats.do_timings()); 433 | diffr_lib::diff(&tokens, v, diff_buffer); 434 | // TODO output the lcs directly out of `diff` instead 435 | let shared_spans = shared_spans(&added, diff_buffer); 436 | let lcs = Tokenization::new(data, &shared_spans, &m); 437 | stats.time_lcs_ms += duration_ms_since(&start); 438 | let start = now(stats.do_timings()); 439 | let normalized_lcs_added = optimize_partition(&added, &lcs); 440 | let normalized_lcs_removed = optimize_partition(&removed, &lcs); 441 | stats.time_opt_lcs_ms += duration_ms_since(&start); 442 | let mut shared_added = normalized_lcs_added.shared_segments(&added).peekable(); 443 | let mut shared_removed = normalized_lcs_removed.shared_segments(&removed).peekable(); 444 | let mut warnings = warning_lines.iter().peekable(); 445 | let defaultspec = ColorSpec::default(); 446 | 447 | for (i, range) in lines.iter().enumerate() { 448 | if let Some(&&nline) = warnings.peek() { 449 | if nline == i { 450 | let w = &lines.data()[range.0..range.1]; 451 | output(w, 0, w.len(), &defaultspec, out)?; 452 | warnings.next(); 453 | continue; 454 | } 455 | } 456 | let first = data[range.0]; 457 | match first { 458 | b'-' | b'+' => { 459 | let is_plus = first == b'+'; 460 | let (nhl, hl, toks, shared) = if is_plus { 461 | ( 462 | &config.added_face, 463 | &config.refine_added_face, 464 | tokens.added(), 465 | &mut shared_added, 466 | ) 467 | } else { 468 | ( 469 | &config.removed_face, 470 | &config.refine_removed_face, 471 | tokens.removed(), 472 | &mut shared_removed, 473 | ) 474 | }; 475 | if config.has_line_numbers() { 476 | margin.write_margin_changed(is_plus, config, out)? 477 | } 478 | Self::paint_line(toks.data(), &range, nhl, hl, shared, out)?; 479 | } 480 | _ => { 481 | if config.has_line_numbers() { 482 | margin.write_margin_context(config, out)? 483 | } 484 | output(data, range.0, range.1, &defaultspec, out)? 485 | } 486 | } 487 | } 488 | assert!(warnings.peek().is_none()); 489 | drop(shared_removed); 490 | drop(shared_added); 491 | lines.clear(); 492 | added_tokens.clear(); 493 | removed_tokens.clear(); 494 | warning_lines.clear(); 495 | Ok(()) 496 | } 497 | 498 | fn push_added(&mut self, line: &[u8]) { 499 | self.push_aux(line, true) 500 | } 501 | 502 | fn push_removed(&mut self, line: &[u8]) { 503 | self.push_aux(line, false) 504 | } 505 | 506 | fn push_aux(&mut self, line: &[u8], added: bool) { 507 | // XXX: skip leading token 508 | let mut ofs = self.lines.len() + 1; 509 | add_raw_line(&mut self.lines, line); 510 | // get back the line sanitized from escape codes: 511 | let line = &self.lines.data()[ofs..]; 512 | // skip leading spaces 513 | ofs += line 514 | .iter() 515 | .take_while(|ch| ch.is_ascii_whitespace()) 516 | .count(); 517 | diffr_lib::tokenize( 518 | self.lines.data(), 519 | ofs, 520 | if added { 521 | &mut self.added_tokens 522 | } else { 523 | &mut self.removed_tokens 524 | }, 525 | ); 526 | } 527 | 528 | fn run(&mut self) -> io::Result<()> { 529 | let stdin = io::stdin(); 530 | let stdout = StandardStream::stdout(ColorChoice::Always); 531 | let mut buffer = vec![]; 532 | let mut stdin = stdin.lock(); 533 | let mut stdout = stdout.lock(); 534 | let mut in_hunk = false; 535 | let mut hunk_line_number = 0; 536 | 537 | // process hunks 538 | loop { 539 | stdin.read_until(b'\n', &mut buffer)?; 540 | if buffer.is_empty() { 541 | break; 542 | } 543 | 544 | let first = first_after_escape(&buffer); 545 | if in_hunk { 546 | hunk_line_number += 1; 547 | match first { 548 | Some(b'+') => self.push_added(&buffer), 549 | Some(b'-') => self.push_removed(&buffer), 550 | Some(b' ') => add_raw_line(&mut self.lines, &buffer), 551 | Some(b'\\') => { 552 | add_raw_line(&mut self.lines, &buffer); 553 | self.warning_lines.push(hunk_line_number - 1); 554 | } 555 | _ => { 556 | self.process_with_stats(&mut stdout)?; 557 | in_hunk = false; 558 | } 559 | } 560 | } 561 | if !in_hunk { 562 | hunk_line_number = 0; 563 | in_hunk = first == Some(b'@'); 564 | if self.config.has_line_numbers() && in_hunk { 565 | self.line_number_info = parse_line_number(&buffer); 566 | } 567 | output(&buffer, 0, buffer.len(), &ColorSpec::default(), &mut stdout)?; 568 | } 569 | 570 | buffer.clear(); 571 | } 572 | 573 | // flush remaining hunk 574 | self.process_with_stats(&mut stdout)?; 575 | self.stats.stop(); 576 | self.stats.report()?; 577 | Ok(()) 578 | } 579 | } 580 | 581 | // TODO count whitespace characters as well here 582 | fn add_raw_line(dst: &mut LineSplit, line: &[u8]) { 583 | let mut i = 0; 584 | let len = line.len(); 585 | while i < len { 586 | i += skip_all_escape_code(&line[i..]); 587 | let tok_len = skip_token(&line[i..]); 588 | dst.append_line(&line[i..i + tok_len]); 589 | i += tok_len; 590 | } 591 | } 592 | 593 | fn output( 594 | buf: &[u8], 595 | from: usize, 596 | to: usize, 597 | colorspec: &ColorSpec, 598 | out: &mut Stream, 599 | ) -> io::Result<()> 600 | where 601 | Stream: WriteColor, 602 | { 603 | let to = to.min(buf.len()); 604 | if from >= to { 605 | return Ok(()); 606 | } 607 | let buf = &buf[from..to]; 608 | let ends_with_newline = buf.last().cloned() == Some(b'\n'); 609 | let buf = if ends_with_newline { 610 | &buf[..buf.len() - 1] 611 | } else { 612 | buf 613 | }; 614 | out.set_color(colorspec)?; 615 | out.write_all(buf)?; 616 | out.reset()?; 617 | if ends_with_newline { 618 | out.write_all(b"\n")?; 619 | } 620 | Ok(()) 621 | } 622 | 623 | /// Returns the number of bytes of escape code that start the slice. 624 | fn skip_all_escape_code(buf: &[u8]) -> usize { 625 | // Skip one sequence 626 | fn skip_escape_code(buf: &[u8]) -> Option { 627 | if 2 <= buf.len() && &buf[..2] == b"\x1b[" { 628 | // "\x1b[" + sequence body + "m" => 3 additional bytes 629 | Some(index_of(&buf[2..], b'm')? + 3) 630 | } else { 631 | None 632 | } 633 | } 634 | let mut buf = buf; 635 | let mut sum = 0; 636 | while let Some(nbytes) = skip_escape_code(buf) { 637 | buf = &buf[nbytes..]; 638 | sum += nbytes 639 | } 640 | sum 641 | } 642 | 643 | /// Returns the first byte of the slice, after skipping the escape 644 | /// code bytes. 645 | fn first_after_escape(buf: &[u8]) -> Option { 646 | let nbytes = skip_all_escape_code(buf); 647 | buf.iter().skip(nbytes).cloned().next() 648 | } 649 | 650 | /// Scan the slice looking for the given byte, returning the index of 651 | /// its first appearance. 652 | fn index_of(buf: &[u8], target: u8) -> Option { 653 | let mut it = buf.iter().enumerate(); 654 | loop { 655 | match it.next() { 656 | Some((index, c)) => { 657 | if *c == target { 658 | return Some(index); 659 | } 660 | } 661 | None => return None, 662 | } 663 | } 664 | } 665 | 666 | /// Computes the number of bytes until either the next escape code, or 667 | /// the end of buf. 668 | fn skip_token(buf: &[u8]) -> usize { 669 | match buf.len() { 670 | 0 => 0, 671 | len => { 672 | for i in 0..buf.len() - 1 { 673 | if &buf[i..i + 2] == b"\x1b[" { 674 | return i; 675 | } 676 | } 677 | len 678 | } 679 | } 680 | } 681 | 682 | // TODO: extend to the multiple range case 683 | #[derive(Default, PartialEq, Eq)] 684 | struct HunkHeader { 685 | // range are (ofs,len) for the interval [ofs, ofs + len) 686 | minus_range: (usize, usize), 687 | plus_range: (usize, usize), 688 | } 689 | 690 | const WIDTH: [u64; 20] = [ 691 | 0, 692 | 9, 693 | 99, 694 | 999, 695 | 9999, 696 | 99999, 697 | 999999, 698 | 9999999, 699 | 99999999, 700 | 999999999, 701 | 9999999999, 702 | 99999999999, 703 | 999999999999, 704 | 9999999999999, 705 | 99999999999999, 706 | 999999999999999, 707 | 9999999999999999, 708 | 99999999999999999, 709 | 999999999999999999, 710 | 9999999999999999999, 711 | ]; 712 | 713 | fn width1(x: u64, st: Option) -> usize { 714 | let result = WIDTH.binary_search(&x); 715 | let result = match result { 716 | Ok(i) | Err(i) => i, 717 | }; 718 | st.map(|style| style.min_width()).unwrap_or(0).max(result) 719 | } 720 | 721 | impl HunkHeader { 722 | fn new(minus_range: (usize, usize), plus_range: (usize, usize)) -> Self { 723 | HunkHeader { 724 | minus_range, 725 | plus_range, 726 | } 727 | } 728 | 729 | fn width(&self, st: Option) -> usize { 730 | let w1 = width1((self.minus_range.0 + self.minus_range.1) as u64, st); 731 | let w2 = width1((self.plus_range.0 + self.plus_range.1) as u64, st); 732 | 2 * w1.max(w2) + 1 733 | } 734 | } 735 | 736 | impl Debug for HunkHeader { 737 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> { 738 | f.write_fmt(format_args!( 739 | "-{},{} +{},{}", 740 | self.minus_range.0, self.minus_range.1, self.plus_range.0, self.plus_range.1, 741 | )) 742 | } 743 | } 744 | 745 | impl Display for HunkHeader { 746 | fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> { 747 | Debug::fmt(&self, f) 748 | } 749 | } 750 | 751 | struct LineNumberParser<'a> { 752 | buf: &'a [u8], 753 | i: usize, 754 | } 755 | 756 | impl<'a> LineNumberParser<'a> { 757 | fn new(buf: &'a [u8]) -> Self { 758 | LineNumberParser { buf, i: 0 } 759 | } 760 | 761 | fn skip_escape_code(&mut self) { 762 | if self.i < self.buf.len() { 763 | let to_skip = skip_all_escape_code(&self.buf[self.i..]); 764 | self.i += to_skip; 765 | } 766 | } 767 | 768 | fn looking_at(&mut self, matcher: M) -> bool 769 | where 770 | M: Fn(u8) -> bool, 771 | { 772 | self.skip_escape_code(); 773 | self.i < self.buf.len() && matcher(self.buf[self.i]) 774 | } 775 | 776 | fn read_digit(&mut self) -> Option { 777 | if self.looking_at(|x| x.is_ascii_digit()) { 778 | let cur = self.buf[self.i]; 779 | self.i += 1; 780 | Some((cur - b'0') as usize) 781 | } else { 782 | None 783 | } 784 | } 785 | 786 | fn skip_whitespaces(&mut self) { 787 | while self.looking_at(|x| x.is_ascii_whitespace()) { 788 | self.i += 1; 789 | } 790 | } 791 | 792 | fn expect_multiple(&mut self, matcher: M) -> Option 793 | where 794 | M: Fn(u8) -> bool, 795 | { 796 | self.skip_escape_code(); 797 | let iorig = self.i; 798 | while self.looking_at(&matcher) { 799 | self.i += 1; 800 | } 801 | if self.i == iorig { 802 | None 803 | } else { 804 | Some(self.i - iorig) 805 | } 806 | } 807 | 808 | fn expect(&mut self, target: u8) -> Option<()> { 809 | if self.looking_at(|x| x == target) { 810 | self.i += 1; 811 | Some(()) 812 | } else { 813 | None 814 | } 815 | } 816 | 817 | fn parse_usize(&mut self) -> Option { 818 | let mut res = 0usize; 819 | let mut any = false; 820 | while let Some(digit) = self.read_digit() { 821 | any = true; 822 | res = res.checked_mul(10)?; 823 | res = res.checked_add(digit)?; 824 | } 825 | if any { 826 | Some(res) 827 | } else { 828 | None 829 | } 830 | } 831 | 832 | fn parse_pair(&mut self) -> Option<(usize, usize)> { 833 | let p0 = self.parse_usize()?; 834 | if self.expect(b',').is_none() { 835 | return Some((p0, 1)); 836 | } 837 | let p1 = self.parse_usize()?; 838 | Some((p0, p1)) 839 | } 840 | 841 | fn expect_multiple_minus_ranges(&mut self) -> Option<(usize, usize)> { 842 | let next = |that: &mut Self| { 843 | that.expect(b'-')?; 844 | that.parse_pair() 845 | }; 846 | let mut res = None; 847 | for i in 0.. { 848 | if i != 0 { 849 | self.expect_multiple(|x| x.is_ascii_whitespace())?; 850 | } 851 | match next(self) { 852 | next @ Some(_) => res = next, 853 | None => break, 854 | } 855 | } 856 | res 857 | } 858 | 859 | fn parse_line_number(&mut self) -> Option { 860 | self.skip_whitespaces(); 861 | self.expect_multiple(|x| x == b'@')?; 862 | self.expect_multiple(|x| x.is_ascii_whitespace())?; 863 | let minus_range = self.expect_multiple_minus_ranges()?; 864 | self.expect(b'+')?; 865 | let plus_range = self.parse_pair()?; 866 | self.expect_multiple(|x| x.is_ascii_whitespace())?; 867 | self.expect_multiple(|x| x == b'@')?; 868 | Some(HunkHeader::new(minus_range, plus_range)) 869 | } 870 | } 871 | 872 | fn parse_line_number(buf: &[u8]) -> Option { 873 | LineNumberParser::new(buf).parse_line_number() 874 | } 875 | 876 | #[cfg(test)] 877 | mod tests_app; 878 | 879 | #[cfg(test)] 880 | mod tests_cli; 881 | -------------------------------------------------------------------------------- /src/tests_app.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | 3 | #[test] 4 | fn skip_all_escape_code_test() { 5 | assert_eq!(5, skip_all_escape_code(b"\x1b[42m@@@")); 6 | assert_eq!(10, skip_all_escape_code(b"\x1b[42m\x1b[33m@@@")); 7 | assert_eq!(0, skip_all_escape_code(b"\x1b[42@@@")); 8 | } 9 | 10 | #[test] 11 | fn first_after_escape_test() { 12 | assert_eq!(Some(b'+'), first_after_escape(b"+abc")); 13 | assert_eq!(Some(b'+'), first_after_escape(b"\x1b[42m\x1b[33m+abc")); 14 | assert_eq!(None, first_after_escape(b"\x1b[42m")); 15 | } 16 | 17 | // TODO test index_of? 18 | 19 | #[test] 20 | fn skip_token_test() { 21 | assert_eq!(4, skip_token(b"abc\x1b")); 22 | assert_eq!(3, skip_token(b"abc\x1b[")); 23 | assert_eq!(3, skip_token(b"abc")); 24 | assert_eq!(1, skip_token(b"\x1b")); 25 | assert_eq!(0, skip_token(b"")); 26 | } 27 | 28 | #[test] 29 | fn parse_line_number_test() { 30 | let test_ok = |ofs1, len1, ofs2, len2, input| { 31 | eprintln!("test_ok {}...", String::from_utf8_lossy(input)); 32 | assert_eq!( 33 | Some(HunkHeader { 34 | minus_range: (ofs1, len1), 35 | plus_range: (ofs2, len2), 36 | }), 37 | parse_line_number(input) 38 | ); 39 | }; 40 | let test_fail = |input| { 41 | eprintln!("test_fail {}...", String::from_utf8_lossy(input)); 42 | assert_eq!(None, parse_line_number(input)); 43 | }; 44 | test_ok(133, 6, 133, 8, b"@@ -133,6 +133,8 @@"); 45 | test_ok(0, 0, 1, 1, b"@@ -0,0 +1 @@"); 46 | test_ok(0, 0, 1, 1, b" @@ -0,0 +1 @@"); 47 | test_ok(0, 0, 1, 1, b"@@ -0,0 +1 @@"); 48 | // last one wins 49 | test_ok(0, 2, 0, 3, b"@@@ -0,0 -0,2 +0,3 @@@"); 50 | test_fail(b"@@-0,0 +1 @@"); 51 | test_fail(b"@@ -0,0+1 @@"); 52 | test_fail(b"@@ -0,0 +1@@"); 53 | test_fail(b"@@ -0,0 +1 "); 54 | test_fail(b"-0,0 +1"); 55 | test_fail(b"@@ 0,0 +1 @@"); 56 | test_fail(b"@@ -0,0 1 @@"); 57 | test_fail(b"@@@ -0,0 +0,2 +0,3 @@@"); 58 | 59 | // overflow 60 | test_fail(b"@@ -0,0 +19999999999999999999 @@"); 61 | 62 | // with escape code 63 | test_ok(0, 0, 1, 1, b"\x1b[42;43m@\x1b[42;43m@\x1b[42;43m \x1b[42;43m-\x1b[42;43m0\x1b[42;43m,\x1b[42;43m0\x1b[42;43m \x1b[42;43m+1 @@"); 64 | } 65 | 66 | #[test] 67 | fn test_width() { 68 | for (i, x) in WIDTH.iter().enumerate() { 69 | if x < &u64::max_value() { 70 | assert_eq!(format!("{}", x + 1).len(), i + 1); 71 | } 72 | } 73 | assert_eq!(0, width1(0, None)); 74 | fn test(x: u64) { 75 | assert_eq!(format!("{}", x).len(), width1(x, None)); 76 | for i in 0..5 { 77 | assert_eq!( 78 | format!("{}", x).len().max(i), 79 | width1(x, Some(LineNumberStyle::Fixed(i))) 80 | ); 81 | } 82 | } 83 | for i in 1..=10000 { 84 | test(i); 85 | } 86 | test(9999999999); 87 | test(10000000000); 88 | test(14284238234); 89 | for i in 0..64 { 90 | test(1 << i); 91 | } 92 | test(u64::max_value()); 93 | 94 | assert_eq!( 95 | "123:456".len(), 96 | HunkHeader::new((123, 5), (456, 9)).width(None) 97 | ); 98 | assert_eq!( 99 | "1122: 456".len(), 100 | HunkHeader::new((123, 999), (456, 9)).width(None) 101 | ); 102 | assert_eq!( 103 | " :456".len(), 104 | HunkHeader::new((0, 0), (456, 9)).width(None) 105 | ); 106 | assert_eq!(MAX_MARGIN, 2 * width1(u64::max_value(), None) + 1); 107 | 108 | // with fixed width 109 | assert_eq!( 110 | " 123: 456".len(), 111 | HunkHeader::new((123, 5), (456, 9)).width(Some(LineNumberStyle::Fixed(4))) 112 | ); 113 | assert_eq!( 114 | "1122: 456".len(), 115 | HunkHeader::new((123, 999), (456, 9)).width(Some(LineNumberStyle::Fixed(4))) 116 | ); 117 | assert_eq!( 118 | " : 456".len(), 119 | HunkHeader::new((0, 0), (456, 9)).width(Some(LineNumberStyle::Fixed(4))) 120 | ); 121 | assert_eq!( 122 | MAX_MARGIN, 123 | 2 * width1(u64::max_value(), Some(LineNumberStyle::Fixed(4))) + 1 124 | ); 125 | } 126 | -------------------------------------------------------------------------------- /src/tests_cli.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use std::path::PathBuf; 3 | use std::process::{Command, Stdio}; 4 | use StringTest::*; 5 | 6 | enum StringTest { 7 | Empty, 8 | AtLeast(&'static str), 9 | Exactly(&'static str), 10 | } 11 | 12 | fn quote_or_empty(msg: &str) -> String { 13 | if msg.is_empty() { 14 | "".to_owned() 15 | } else { 16 | format!("\"{}\"", msg) 17 | } 18 | } 19 | 20 | impl StringTest { 21 | fn test(&self, actual: &str, prefix: &str) { 22 | match self { 23 | Empty => assert!( 24 | actual.is_empty(), 25 | "{}: expected empty, got\n\n{}", 26 | quote_or_empty(prefix), 27 | quote_or_empty(actual) 28 | ), 29 | AtLeast(exp) => assert!( 30 | actual.contains(exp), 31 | "{}: expected at least\n\n{}\n\ngot\n\n{}", 32 | prefix, 33 | quote_or_empty(exp), 34 | quote_or_empty(actual) 35 | ), 36 | Exactly(exp) => assert!( 37 | actual.trim() == exp.trim(), 38 | "{}: expected\n\n{}\n\ngot\n\n{}", 39 | prefix, 40 | quote_or_empty(exp), 41 | quote_or_empty(actual) 42 | ), 43 | } 44 | } 45 | } 46 | 47 | struct ProcessTest { 48 | args: &'static [&'static str], 49 | out: StringTest, 50 | err: StringTest, 51 | is_success: bool, 52 | } 53 | 54 | fn diffr_path_default() -> PathBuf { 55 | let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); 56 | dir.push("target"); 57 | dir.push("debug"); 58 | dir.push(if cfg!(windows) { "diffr.exe" } else { "diffr" }); 59 | dir 60 | } 61 | 62 | fn diffr_path() -> PathBuf { 63 | match env::var("DIFFR_TESTS_BINARY_PATH") { 64 | Err(_) => diffr_path_default(), 65 | Ok(path) => PathBuf::from(path), 66 | } 67 | } 68 | 69 | fn test_cli(descr: ProcessTest) { 70 | let mut cmd = Command::new(diffr_path()); 71 | cmd.stdout(Stdio::piped()); 72 | cmd.stderr(Stdio::piped()); 73 | cmd.stdin(Stdio::piped()); 74 | for arg in descr.args { 75 | cmd.arg(*arg); 76 | } 77 | let child = cmd.spawn().expect("spawn"); 78 | let output = child.wait_with_output().expect("wait_with_output"); 79 | fn string_of_status(code: bool) -> &'static str { 80 | if code { 81 | "success" 82 | } else { 83 | "failure" 84 | } 85 | } 86 | assert!( 87 | descr.is_success == output.status.success(), 88 | "unexpected status: expected {} got {}", 89 | string_of_status(descr.is_success), 90 | string_of_status(output.status.success()), 91 | ); 92 | descr 93 | .out 94 | .test(&String::from_utf8_lossy(&output.stdout), "stdout"); 95 | descr 96 | .err 97 | .test(&String::from_utf8_lossy(&output.stderr), "stderr"); 98 | } 99 | 100 | #[test] 101 | fn debug_flag() { 102 | test_cli(ProcessTest { 103 | args: &["--debug"], 104 | out: Empty, 105 | err: AtLeast("hunk processing time (ms)"), 106 | is_success: true, 107 | }) 108 | } 109 | 110 | #[test] 111 | fn color_invalid_face_name() { 112 | test_cli(ProcessTest { 113 | args: &["--colors", "notafacename"], 114 | out: Empty, 115 | err: Exactly("unexpected face name: got 'notafacename', expected added|refine-added|removed|refine-removed"), 116 | is_success: false, 117 | }) 118 | } 119 | 120 | #[test] 121 | fn color_only_face_name() { 122 | test_cli(ProcessTest { 123 | args: &["--colors", "added"], 124 | out: Empty, 125 | err: Exactly(""), 126 | is_success: true, 127 | }) 128 | } 129 | 130 | #[test] 131 | fn color_invalid_attribute_name() { 132 | test_cli(ProcessTest { 133 | args: &["--colors", "added:bar"], 134 | out: Empty, 135 | err: Exactly("unexpected attribute name: got 'bar', expected foreground|background|italic|noitalic|bold|nobold|intense|nointense|underline|nounderline|none"), 136 | is_success: false, 137 | }) 138 | } 139 | 140 | #[test] 141 | fn color_invalid_color_value_name() { 142 | test_cli(ProcessTest { 143 | args: &["--colors", "added:foreground:baz"], 144 | out: Empty, 145 | err: Exactly("unexpected color value: unrecognized color name 'baz'. Choose from: black, blue, green, red, cyan, magenta, yellow, white"), 146 | is_success: false, 147 | }) 148 | } 149 | 150 | #[test] 151 | fn color_invalid_color_value_ansi() { 152 | test_cli(ProcessTest { 153 | args: &["--colors", "added:foreground:777"], 154 | out: Empty, 155 | err: AtLeast("unexpected color value: unrecognized ansi256 color number"), 156 | is_success: false, 157 | }) 158 | } 159 | 160 | #[test] 161 | fn color_invalid_color_value_rgb() { 162 | test_cli(ProcessTest { 163 | args: &["--colors", "added:foreground:0,0,777"], 164 | out: Empty, 165 | err: AtLeast("unexpected color value: unrecognized RGB color triple"), 166 | is_success: false, 167 | }) 168 | } 169 | 170 | #[test] 171 | fn color_invalid_color_not_done() { 172 | test_cli(ProcessTest { 173 | args: &["--colors", "added:foreground"], 174 | out: Empty, 175 | err: Exactly("error parsing color: missing color value for face 'added'"), 176 | is_success: false, 177 | }) 178 | } 179 | 180 | #[test] 181 | fn color_ok() { 182 | test_cli(ProcessTest { 183 | args: &["--colors", "added:foreground:0"], 184 | out: Empty, 185 | err: Exactly(""), 186 | is_success: true, 187 | }) 188 | } 189 | 190 | #[test] 191 | fn color_ok_multiple() { 192 | test_cli(ProcessTest { 193 | args: &[ 194 | "--colors", 195 | "added:foreground:0", 196 | "--colors", 197 | "removed:background:red", 198 | ], 199 | out: Empty, 200 | err: Exactly(""), 201 | is_success: true, 202 | }) 203 | } 204 | 205 | #[test] 206 | fn threshold() { 207 | // ok 208 | test_cli(ProcessTest { 209 | args: &["--large-diff-threshold", "123"], 210 | out: Empty, 211 | err: Empty, 212 | is_success: true, 213 | }); 214 | 215 | // fail 216 | test_cli(ProcessTest { 217 | args: &["--large-diff-threshold"], 218 | out: Empty, 219 | err: Exactly("option requires an argument: '--large-diff-threshold'"), 220 | is_success: false, 221 | }); 222 | test_cli(ProcessTest { 223 | args: &["--large-diff-threshold", "a"], 224 | out: Empty, 225 | err: Exactly("invalid threshold value: invalid digit found in string"), 226 | is_success: false, 227 | }); 228 | test_cli(ProcessTest { 229 | args: &["--large-diff-threshold", "-1"], 230 | out: Empty, 231 | err: Exactly("invalid threshold value: invalid digit found in string"), 232 | is_success: false, 233 | }); 234 | } 235 | 236 | #[test] 237 | fn line_numbers_style() { 238 | // TODO check config? 239 | 240 | // ok 241 | test_cli(ProcessTest { 242 | args: &["--line-numbers"], 243 | out: Empty, 244 | err: Empty, 245 | is_success: true, 246 | }); 247 | test_cli(ProcessTest { 248 | args: &["--line-numbers", "compact"], 249 | out: Empty, 250 | err: Empty, 251 | is_success: true, 252 | }); 253 | test_cli(ProcessTest { 254 | args: &["--line-numbers", "aligned"], 255 | out: Empty, 256 | err: Empty, 257 | is_success: true, 258 | }); 259 | 260 | // fail 261 | test_cli(ProcessTest { 262 | args: &["--line-numbers", "foo"], 263 | out: Empty, 264 | err: Exactly("unexpected line number style: got 'foo', expected aligned|compact|fixed"), 265 | is_success: false, 266 | }); 267 | } 268 | 269 | #[test] 270 | fn test_bad_argument() { 271 | test_cli(ProcessTest { 272 | args: &["--invalid-option"], 273 | out: Empty, 274 | err: AtLeast("bad argument: '--invalid-option'"), 275 | is_success: false, 276 | }); 277 | } 278 | --------------------------------------------------------------------------------