├── .gitattributes
├── .gitignore
├── CHANGELOG.md
├── Cargo.lock
├── Cargo.toml
├── LICENSE.txt
├── README.md
├── assets
    ├── diffr.1.md
    ├── h.txt
    └── help.txt
├── azure-pipelines.yml
├── ci
    ├── azure-install-rust.yml
    ├── azure-runtests.yml
    ├── azure-rustfmt.yml
    └── azure_integration_test.yml
├── screenshots
    ├── example_cross_lines_common_tokens.png
    ├── example_nonconsecutive.png
    ├── example_simple.png
    └── example_simple_mac.png
└── src
    ├── cli_args.rs
    ├── diffr_lib
        ├── best_projection.rs
        ├── mod.rs
        └── tests_lib.rs
    ├── main.rs
    ├── tests_app.rs
    └── tests_cli.rs


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.rs text
2 | *.toml text
3 | *.lock text
4 | *.md text
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | **/target/**
2 | **/*.rs.bk
3 | *.cmd
4 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## 0.1.3 (2020/03/19)
 2 | - diffr: add --line-numbers flag to display the line numbers
 3 |   (Github #44, Athir Saleem).
 4 | 
 5 | - diffr: --colors: allow to display italic faces (Github #45).
 6 | 
 7 | ## 0.1.3 (2019/12/07)
 8 | - diffr-lib: optimize_partition: new function.
 9 |   Postprocessing of the results of the LCS algorithm to reduce the
10 |   number of segments of consecutive shared tokens.
11 | 
12 | ## 0.1.2 (2019/09/07)
13 | - Split in two crates: diffr-lib contains reusable parts, while diffr
14 |   only contains application logic.
15 |   
16 | - Fix a bug in display code that messed up the colors in diffs with
17 |   lines starting with dashes.
18 |   
19 | - Configuration: default to use 16 colors everywhere (Github #16).
20 | 
21 | ## 0.1.1 (2019/07/15)
22 | - Add --colors flag to customize faces propertized by diffr (Github #3).
23 |   This changes the default colors used on linux and macOS.
24 |   The default still works on windows.
25 | 
26 | ## 0.1.0 (2019/07/01) Initial release.
27 | - Initial release.
28 | 


--------------------------------------------------------------------------------
/Cargo.lock:
--------------------------------------------------------------------------------
  1 | # This file is automatically @generated by Cargo.
  2 | # It is not intended for manual editing.
  3 | version = 3
  4 | 
  5 | [[package]]
  6 | name = "bstr"
  7 | version = "1.9.1"
  8 | source = "registry+https://github.com/rust-lang/crates.io-index"
  9 | checksum = "05efc5cfd9110c8416e471df0e96702d58690178e206e61b7173706673c93706"
 10 | dependencies = [
 11 |  "memchr",
 12 |  "regex-automata",
 13 | ]
 14 | 
 15 | [[package]]
 16 | name = "diffr"
 17 | version = "0.1.5"
 18 | dependencies = [
 19 |  "bstr",
 20 |  "termcolor",
 21 | ]
 22 | 
 23 | [[package]]
 24 | name = "memchr"
 25 | version = "2.7.2"
 26 | source = "registry+https://github.com/rust-lang/crates.io-index"
 27 | checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
 28 | 
 29 | [[package]]
 30 | name = "regex-automata"
 31 | version = "0.4.6"
 32 | source = "registry+https://github.com/rust-lang/crates.io-index"
 33 | checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
 34 | 
 35 | [[package]]
 36 | name = "termcolor"
 37 | version = "1.4.1"
 38 | source = "registry+https://github.com/rust-lang/crates.io-index"
 39 | checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
 40 | dependencies = [
 41 |  "winapi-util",
 42 | ]
 43 | 
 44 | [[package]]
 45 | name = "winapi-util"
 46 | version = "0.1.8"
 47 | source = "registry+https://github.com/rust-lang/crates.io-index"
 48 | checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
 49 | dependencies = [
 50 |  "windows-sys",
 51 | ]
 52 | 
 53 | [[package]]
 54 | name = "windows-sys"
 55 | version = "0.52.0"
 56 | source = "registry+https://github.com/rust-lang/crates.io-index"
 57 | checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
 58 | dependencies = [
 59 |  "windows-targets",
 60 | ]
 61 | 
 62 | [[package]]
 63 | name = "windows-targets"
 64 | version = "0.52.5"
 65 | source = "registry+https://github.com/rust-lang/crates.io-index"
 66 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
 67 | dependencies = [
 68 |  "windows_aarch64_gnullvm",
 69 |  "windows_aarch64_msvc",
 70 |  "windows_i686_gnu",
 71 |  "windows_i686_gnullvm",
 72 |  "windows_i686_msvc",
 73 |  "windows_x86_64_gnu",
 74 |  "windows_x86_64_gnullvm",
 75 |  "windows_x86_64_msvc",
 76 | ]
 77 | 
 78 | [[package]]
 79 | name = "windows_aarch64_gnullvm"
 80 | version = "0.52.5"
 81 | source = "registry+https://github.com/rust-lang/crates.io-index"
 82 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
 83 | 
 84 | [[package]]
 85 | name = "windows_aarch64_msvc"
 86 | version = "0.52.5"
 87 | source = "registry+https://github.com/rust-lang/crates.io-index"
 88 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
 89 | 
 90 | [[package]]
 91 | name = "windows_i686_gnu"
 92 | version = "0.52.5"
 93 | source = "registry+https://github.com/rust-lang/crates.io-index"
 94 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
 95 | 
 96 | [[package]]
 97 | name = "windows_i686_gnullvm"
 98 | version = "0.52.5"
 99 | source = "registry+https://github.com/rust-lang/crates.io-index"
100 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
101 | 
102 | [[package]]
103 | name = "windows_i686_msvc"
104 | version = "0.52.5"
105 | source = "registry+https://github.com/rust-lang/crates.io-index"
106 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
107 | 
108 | [[package]]
109 | name = "windows_x86_64_gnu"
110 | version = "0.52.5"
111 | source = "registry+https://github.com/rust-lang/crates.io-index"
112 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
113 | 
114 | [[package]]
115 | name = "windows_x86_64_gnullvm"
116 | version = "0.52.5"
117 | source = "registry+https://github.com/rust-lang/crates.io-index"
118 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
119 | 
120 | [[package]]
121 | name = "windows_x86_64_msvc"
122 | version = "0.52.5"
123 | source = "registry+https://github.com/rust-lang/crates.io-index"
124 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
125 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "diffr"
 3 | version = "0.1.5"
 4 | authors = ["Nathan Moreau <nathan.moreau@m4x.org>"]
 5 | description = """
 6 | An LCS based diff highlighting tool to ease code review from your terminal.
 7 | """
 8 | categories = ["command-line-utilities"]
 9 | edition = "2018"
10 | homepage = "https://github.com/mookid/diffr"
11 | repository = "https://github.com/mookid/diffr"
12 | keywords = ["diff", "code-review", "git", "console", "cli"]
13 | license = "MIT"
14 | readme = "README.md"
15 | 
16 | [profile.release]
17 | debug = true
18 | 
19 | [dependencies]
20 | bstr = { version = "1.9.1", default-features = false, features = ["unicode"] }
21 | termcolor = "1.1"
22 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright 2019 Nathan Moreau
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## diffr
  2 | 
  3 | Reviewing changes involves reading diffs.  Sometimes, a line-oriented
  4 | presentation of changes is not precise enough, especially when changes
  5 | involve long lines or very similar consecutive lines.
  6 | 
  7 | This program processes such diffs, and outputs them (in the console)
  8 | with additional diff information on top of the unified diff format,
  9 | using text attributes.
 10 | 
 11 | It works hunk by hunk, recomputing the diff on a word-by-word basis.
 12 | 
 13 | The current implementation uses 
 14 | [Myers' longest common subsequence](http://www.xmailserver.org/diff2.pdf) 
 15 | algorithm.
 16 | 
 17 | [![crates.io](https://img.shields.io/crates/v/diffr.svg)](https://crates.io/crates/diffr)
 18 | [![crates.io](https://img.shields.io/crates/d/diffr.svg)](https://crates.io/crates/diffr)
 19 | [![Build Status](https://dev.azure.com/nathanmoreau/diffr/_apis/build/status/mookid.diffr?branchName=master)](https://dev.azure.com/nathanmoreau/diffr/_build/latest?definitionId=4&branchName=master)
 20 | 
 21 | ![Demo](screenshots/example_simple_mac.png)
 22 | ![Demo](screenshots/example_nonconsecutive.png)
 23 | ![Demo](screenshots/example_cross_lines_common_tokens.png)
 24 | 
 25 | ### Installation
 26 | 
 27 | #### Arch Linux
 28 | 
 29 | Install from the [AUR](https://aur.archlinux.org/packages/diffr/):
 30 | 
 31 | ```
 32 | git clone https://aur.archlinux.org/diffr.git
 33 | cd diffr
 34 | makepkg -si
 35 | ```
 36 | 
 37 | #### Homebrew
 38 | 
 39 | ```
 40 | brew install diffr
 41 | ```
 42 | 
 43 | #### From source
 44 | 
 45 | You will need the [Rust compiler installed](https://www.rust-lang.org/tools/install).
 46 | 
 47 | To install the latest published version:
 48 | 
 49 | ```
 50 | cargo install diffr
 51 | ```
 52 | 
 53 | Alternatively, you can build the development version:
 54 | 
 55 | ```
 56 | git clone https://github.com/mookid/diffr.git
 57 | cd diffr
 58 | cargo install --path .
 59 | ```
 60 | 
 61 | ### How to use it?
 62 | 
 63 | diffr tries to be a well behaved Unix program: it reads its input from stdin
 64 | and writes to stdout.
 65 | 
 66 | #### One-off usage
 67 | 
 68 | ```
 69 | git show HEAD | diffr
 70 | ```
 71 | 
 72 | #### Integration with git
 73 | 
 74 | Add the following section to your `.gitconfig` file:
 75 | 
 76 | ```
 77 | [core]
 78 |     pager = diffr | less -R
 79 | [interactive]
 80 |     diffFilter = diffr
 81 | ```
 82 | 
 83 | Alternatively, you can run from the command line:
 84 | 
 85 | ```
 86 | git config --global core.pager 'diffr | less -R'
 87 | git config --global interactive.difffilter diffr
 88 | ```
 89 | 
 90 | #### Color customization
 91 | 
 92 | Use the --colors flag.
 93 | 
 94 | You can customize the display of diffing and common segments of added
 95 | and removed lines.
 96 | 
 97 | For example,
 98 | 
 99 | ```
100 | diffr --colors refine-removed:background:200,0,0:foreground:white:bold
101 | ```
102 | 
103 | tweaks the red used for uniquely removed text;
104 | 
105 | The configuration used in the first screenshot is
106 | 
107 | ```
108 | diffr --colors refine-added:none:background:0x33,0x99,0x33:bold --colors added:none:background:0x33,0x55,0x33 --colors refine-removed:none:background:0x99,0x33,0x33:bold --colors removed:none:background:0x55,0x33,0x33
109 | ```
110 | 
111 | #### Display line numbers
112 | 
113 | The ` --line-numbers` displays the line numbers of the hunk.
114 | 
115 | ### Related projects
116 | 
117 | This is improvement on the
118 | [diff-highlight](https://github.com/git/git/tree/master/contrib/diff-highlight)
119 | script distributed with git.
120 | 
121 | git itself provides both `--word-diff` and `--color-words` options to
122 | several commands.
123 | 


--------------------------------------------------------------------------------
/assets/diffr.1.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: DIFFR
 3 | section: 1
 4 | header: User Manual
 5 | footer: diffr 1.5.0
 6 | date: April 14, 2023
 7 | ---
 8 | # NAME
 9 | diffr - adds word-level diff on top of unified diffs
10 | 
11 | # SYNOPSIS
12 | **diffr** [**\-\-colors** *\<color_spec\>*] [**\-\-line-numbers** \<compact|aligned\>]
13 | 
14 | diff -u <file1> <file2> | **diffr** [OPTIONS]
15 | 
16 | git show | **diffr** [OPTIONS]
17 | 
18 | # DESCRIPTION
19 | **\-\-colors** *\<color_spec\>*
20 |     Configure color settings for console ouput.
21 | 
22 |     There are four faces to customize:
23 |     +----------------+--------------+----------------+
24 |     |  line prefix   |      +       |       -        |
25 |     +----------------+--------------+----------------+
26 |     | common segment |    added     |    removed     |
27 |     | unique segment | refine-added | refine-removed |
28 |     +----------------+--------------+----------------+
29 | 
30 |     The customization allows
31 |     - to change the foreground or background color;
32 |     - to set or unset the attributes 'bold', 'intense', 'underline';
33 |     - to clear all attributes.
34 | 
35 |     Customization is done passing a color_spec argument.
36 |     This flag may be provided multiple times.
37 | 
38 |     The syntax is the following:
39 | 
40 |     color_spec = face-name + ':' + attributes
41 |     attributes = attribute
42 |                | attribute + ':' + attributes
43 |     attribute  = ('foreground' | 'background') + ':' + color
44 |                | (<empty> | 'no') + font-flag
45 |                | 'none'
46 |     font-flag  = 'italic'
47 |                | 'bold'
48 |                | 'intense'
49 |                | 'underline'
50 |     color      = 'none'
51 |                | [0-255]
52 |                | [0-255] + ',' + [0-255] + ',' + [0-255]
53 |                | ('black', 'blue', 'green', 'red',
54 |                   'cyan', 'magenta', 'yellow', 'white')
55 | 
56 |     For example, the color_spec
57 | 
58 |         'refine-added:background:blue:bold'
59 | 
60 |     sets the color of unique added segments with
61 |     a blue background, written with a bold font.
62 | 
63 | **\-\-line-numbers** \<compact|aligned\>
64 |     Display line numbers. Style is optional.
65 |     When style = 'compact', take as little width as possible.
66 |     When style = 'aligned', align to tab stops (useful if tab is used for indentation). [default: compact]
67 | 
68 | **-h**, **\-\-help**
69 |         Prints help information
70 | 
71 | **-V**, **\-\-version**
72 |         Prints version information
73 | 
74 | # AUTHOR
75 | Nathan Moreau \<nathan.moreau@m4x.org\>
76 | 
77 | # LICENSE
78 | The MIT License (MIT)
79 | 


--------------------------------------------------------------------------------
/assets/h.txt:
--------------------------------------------------------------------------------
 1 | diffr $VERSION
 2 | Nathan Moreau <nathan.moreau@m4x.org>
 3 | 
 4 | diffr adds word-level diff on top of unified diffs.
 5 | word-level diff information is displayed using text attributes.
 6 | 
 7 | USAGE:
 8 |     diffr reads from standard input and writes to standard output.
 9 | 
10 |     Typical usage is for interactive use of diff:
11 |     diff -u <file1> <file2> | diffr
12 |     git show | diffr
13 | 
14 | OPTIONS:
15 |         --colors <COLOR_SPEC>...    Configure color settings.
16 |         --line-numbers              Display line numbers.
17 |     -h, --help                      Prints help information
18 |     -V, --version                   Prints version information
19 | 


--------------------------------------------------------------------------------
/assets/help.txt:
--------------------------------------------------------------------------------
 1 | diffr $VERSION
 2 | Nathan Moreau <nathan.moreau@m4x.org>
 3 | 
 4 | diffr adds word-level diff on top of unified diffs.
 5 | word-level diff information is displayed using text attributes.
 6 | 
 7 | USAGE:
 8 |     diffr reads from standard input and writes to standard output.
 9 | 
10 |     Typical usage is for interactive use of diff:
11 |     diff -u <file1> <file2> | diffr
12 |     git show | diffr
13 | 
14 | OPTIONS:
15 |         --colors <COLOR_SPEC>...
16 |             Configure color settings for console ouput.
17 | 
18 |             There are four faces to customize:
19 |             +----------------+--------------+----------------+
20 |             |  line prefix   |      +       |       -        |
21 |             +----------------+--------------+----------------+
22 |             | common segment |    added     |    removed     |
23 |             | unique segment | refine-added | refine-removed |
24 |             +----------------+--------------+----------------+
25 | 
26 |             The customization allows
27 |             - to change the foreground or background color;
28 |             - to set or unset the attributes 'bold', 'intense', 'underline';
29 |             - to clear all attributes.
30 | 
31 |             Customization is done passing a color_spec argument.
32 |             This flag may be provided multiple times.
33 | 
34 |             The syntax is the following:
35 | 
36 |             color_spec = face-name + ':' + attributes
37 |             attributes = attribute
38 |                        | attribute + ':' + attributes
39 |             attribute  = ('foreground' | 'background') + ':' + color
40 |                        | (<empty> | 'no') + font-flag
41 |                        | 'none'
42 |             font-flag  = 'italic'
43 |                        | 'bold'
44 |                        | 'intense'
45 |                        | 'underline'
46 |             color      = 'none'
47 |                        | [0-255]
48 |                        | [0-255] + ',' + [0-255] + ',' + [0-255]
49 |                        | ('black', 'blue', 'green', 'red',
50 |                           'cyan', 'magenta', 'yellow', 'white')
51 | 
52 |             For example, the color_spec
53 | 
54 |                 'refine-added:background:blue:bold'
55 | 
56 |             sets the color of unique added segments with
57 |             a blue background, written with a bold font.
58 | 
59 |         --line-numbers <compact|aligned>
60 |             Display line numbers. Style is optional.
61 |             When style = 'compact', take as little width as possible.
62 |             When style = 'aligned', align to tab stops (useful if tab is used for indentation). [default: compact]
63 | 
64 |     -h, --help
65 |             Prints help information
66 | 
67 |     -V, --version
68 |             Prints version information
69 | 


--------------------------------------------------------------------------------
/azure-pipelines.yml:
--------------------------------------------------------------------------------
 1 | trigger:
 2 | - master
 3 | 
 4 | strategy:
 5 |   matrix:
 6 |     linux:
 7 |       vmImage: ubuntu-latest
 8 |     macOS:
 9 |       vmImage: macOS-latest
10 |     windows:
11 |       vmImage: windows-latest
12 | 
13 | pool:
14 |   vmImage: $(vmImage)
15 | 
16 | steps:
17 |   - template: ci/azure-install-rust.yml
18 |     parameters:
19 |       rust_version: stable
20 | 
21 |   - template: ci/azure-rustfmt.yml
22 |     parameters:
23 |       crate_path: .
24 | 
25 |   - template: ci/azure-runtests.yml
26 |     parameters:
27 |       crate_path: .
28 | 
29 |   - template: ci/azure_integration_test.yml
30 | 


--------------------------------------------------------------------------------
/ci/azure-install-rust.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   # Linux and macOS.
 3 |   - script: |
 4 |       set -e
 5 |       curl https://sh.rustup.rs -sSf | sh -s -- -y --default-toolchain none
 6 |       export PATH=$PATH:$HOME/.cargo/bin
 7 |       rustup toolchain install $RUSTUP_TOOLCHAIN
 8 |       rustup default $RUSTUP_TOOLCHAIN
 9 |       echo "##vso[task.setvariable variable=PATH;]$PATH:$HOME/.cargo/bin"
10 |     env:
11 |       RUSTUP_TOOLCHAIN: ${{parameters.rust_version}}
12 |     displayName: "Install rust (*nix)"
13 |     condition: not(eq(variables['Agent.OS'], 'Windows_NT'))
14 | 
15 |   # Windows.
16 |   - script: |
17 |       curl -sSf -o rustup-init.exe https://win.rustup.rs
18 |       rustup-init.exe -y --default-toolchain none
19 |       set PATH=%PATH%;%USERPROFILE%\.cargo\bin
20 |       rustup toolchain install %RUSTUP_TOOLCHAIN%
21 |       rustup default %RUSTUP_TOOLCHAIN%
22 |       echo "##vso[task.setvariable variable=PATH;]%PATH%;%USERPROFILE%\.cargo\bin"
23 |     env:
24 |       RUSTUP_TOOLCHAIN: ${{parameters.rust_version}}
25 |     displayName: "Install rust (windows)"
26 |     condition: eq(variables['Agent.OS'], 'Windows_NT')
27 | 
28 |   # All platforms.
29 |   - script: |
30 |         rustc -Vv
31 |         cargo -V
32 |     displayName: Query rust and cargo versions
33 | 
34 | 


--------------------------------------------------------------------------------
/ci/azure-runtests.yml:
--------------------------------------------------------------------------------
1 | steps:
2 |   - script: |
3 |       set -e
4 |       cd ${{parameters.crate_path}}
5 |       cargo build
6 |       cargo test
7 |     displayName: Run tests (${{parameters.crate_path}})
8 | 


--------------------------------------------------------------------------------
/ci/azure-rustfmt.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   - script: |
 3 |       set -e
 4 |       cd ${{parameters.crate_path}}
 5 |       rustup component add rustfmt
 6 |       cargo fmt --version
 7 |       cargo fmt --all -- --check
 8 |     displayName: Check formatting (${{parameters.crate_path}})
 9 |     condition: eq(variables['Agent.OS'], 'Linux')
10 | 


--------------------------------------------------------------------------------
/ci/azure_integration_test.yml:
--------------------------------------------------------------------------------
 1 | steps:
 2 |   - script: |
 3 |       set -e
 4 |       cargo install --git https://github.com/mookid/trimcolor
 5 |     displayName: Install trimcolor
 6 | 
 7 |   - script: |
 8 |       set -e
 9 |       git log -p >whole_log
10 |       cargo run <whole_log | trimcolor >whole_log_diffr
11 |       if git diff --no-index whole_log whole_log_diffr
12 |       then
13 |               exit 0
14 |       else
15 |               exit 1
16 |       fi
17 |       
18 |     displayName: Integration test
19 |     condition: eq(variables['Agent.OS'], 'Linux')
20 | 


--------------------------------------------------------------------------------
/screenshots/example_cross_lines_common_tokens.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_cross_lines_common_tokens.png


--------------------------------------------------------------------------------
/screenshots/example_nonconsecutive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_nonconsecutive.png


--------------------------------------------------------------------------------
/screenshots/example_simple.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_simple.png


--------------------------------------------------------------------------------
/screenshots/example_simple_mac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mookid/diffr/b0ebd4c04a5909b5af44f6ac96014148b7384858/screenshots/example_simple_mac.png


--------------------------------------------------------------------------------
/src/cli_args.rs:
--------------------------------------------------------------------------------
  1 | use super::AppConfig;
  2 | use super::LineNumberStyle;
  3 | 
  4 | use std::fmt::Display;
  5 | use std::fmt::Error as FmtErr;
  6 | use std::fmt::Formatter;
  7 | use std::io::IsTerminal;
  8 | use std::io::Write;
  9 | use std::iter::Peekable;
 10 | use std::process;
 11 | use std::str::FromStr;
 12 | 
 13 | use termcolor::Color;
 14 | use termcolor::ColorSpec;
 15 | use termcolor::ParseColorError;
 16 | 
 17 | const FLAG_DEBUG: &str = "--debug";
 18 | const FLAG_COLOR: &str = "--colors";
 19 | const FLAG_LINE_NUMBERS: &str = "--line-numbers";
 20 | const FLAG_TOO_LARGE: &str = "--large-diff-threshold";
 21 | 
 22 | const BIN_NAME: &str = env!("CARGO_PKG_NAME");
 23 | const VERSION: &str = env!("CARGO_PKG_VERSION");
 24 | 
 25 | const HELP_SHORT: &str = include_str!("../assets/h.txt");
 26 | const HELP_LONG: &str = include_str!("../assets/help.txt");
 27 | 
 28 | fn show_version() -> ! {
 29 |     eprintln!("{} {}", BIN_NAME, VERSION);
 30 |     process::exit(0);
 31 | }
 32 | 
 33 | #[derive(Debug, Clone, Copy)]
 34 | enum FaceName {
 35 |     Added,
 36 |     RefineAdded,
 37 |     Removed,
 38 |     RefineRemoved,
 39 | }
 40 | 
 41 | fn missing_arg(arg: impl std::fmt::Display) -> ! {
 42 |     eprintln!("option requires an argument: '{}'", arg);
 43 |     process::exit(2);
 44 | }
 45 | 
 46 | fn interpolate(s: &str) -> String {
 47 |     s.replace("$VERSION", VERSION)
 48 | }
 49 | 
 50 | fn usage(code: i32) -> ! {
 51 |     let txt = interpolate(HELP_SHORT);
 52 |     let _ = std::io::stderr().write(txt.as_bytes());
 53 |     process::exit(code);
 54 | }
 55 | 
 56 | fn help(long: bool) -> ! {
 57 |     let txt = if long { HELP_LONG } else { HELP_SHORT };
 58 |     let txt = interpolate(txt);
 59 |     let _ = std::io::stdout().write(txt.as_bytes());
 60 |     process::exit(0);
 61 | }
 62 | 
 63 | impl EnumString for FaceName {
 64 |     fn data() -> &'static [(&'static str, Self)] {
 65 |         use FaceName::*;
 66 |         &[
 67 |             ("added", Added),
 68 |             ("refine-added", RefineAdded),
 69 |             ("removed", Removed),
 70 |             ("refine-removed", RefineRemoved),
 71 |         ]
 72 |     }
 73 | }
 74 | 
 75 | impl Display for FaceName {
 76 |     fn fmt(&self, f: &mut Formatter) -> Result<(), FmtErr> {
 77 |         use FaceName::*;
 78 |         match self {
 79 |             Added => write!(f, "added"),
 80 |             RefineAdded => write!(f, "refine-added"),
 81 |             Removed => write!(f, "removed"),
 82 |             RefineRemoved => write!(f, "refine-removed"),
 83 |         }
 84 |     }
 85 | }
 86 | 
 87 | impl FaceName {
 88 |     fn get_face_mut<'b>(&self, config: &'b mut super::AppConfig) -> &'b mut ColorSpec {
 89 |         use FaceName::*;
 90 |         match self {
 91 |             Added => &mut config.added_face,
 92 |             RefineAdded => &mut config.refine_added_face,
 93 |             Removed => &mut config.removed_face,
 94 |             RefineRemoved => &mut config.refine_removed_face,
 95 |         }
 96 |     }
 97 | }
 98 | 
 99 | // custom parsing of Option<Color>
100 | struct ColorOpt(Option<Color>);
101 | 
102 | impl FromStr for ColorOpt {
103 |     type Err = ArgParsingError;
104 |     fn from_str(input: &str) -> Result<Self, Self::Err> {
105 |         if input == "none" {
106 |             Ok(ColorOpt(None))
107 |         } else {
108 |             match input.parse() {
109 |                 Ok(color) => Ok(ColorOpt(Some(color))),
110 |                 Err(err) => Err(ArgParsingError::Color(err)),
111 |             }
112 |         }
113 |     }
114 | }
115 | 
116 | trait EnumString: Copy {
117 |     fn data() -> &'static [(&'static str, Self)];
118 | }
119 | 
120 | fn tryparse<T>(input: &str) -> Result<T, String>
121 | where
122 |     T: EnumString + 'static,
123 | {
124 |     T::data()
125 |         .iter()
126 |         .find(|p| p.0 == input)
127 |         .map(|&p| p.1)
128 |         .ok_or_else(|| {
129 |             format!(
130 |                 "got '{}', expected {}",
131 |                 input,
132 |                 T::data().iter().map(|p| p.0).collect::<Vec<_>>().join("|")
133 |             )
134 |         })
135 | }
136 | 
137 | #[derive(Debug, Clone, Copy)]
138 | struct LineNumberStyleOpt(LineNumberStyle);
139 | 
140 | impl EnumString for LineNumberStyleOpt {
141 |     fn data() -> &'static [(&'static str, Self)] {
142 |         use LineNumberStyle::*;
143 |         &[
144 |             ("aligned", LineNumberStyleOpt(Aligned)),
145 |             ("compact", LineNumberStyleOpt(Compact)),
146 |             ("fixed", LineNumberStyleOpt(Fixed(3))),
147 |         ]
148 |     }
149 | }
150 | 
151 | #[derive(Debug, Clone, Copy)]
152 | enum FaceColor {
153 |     Foreground,
154 |     Background,
155 | }
156 | 
157 | #[derive(Debug, Clone, Copy)]
158 | enum AttributeName {
159 |     Color(FaceColor),
160 |     Italic(bool),
161 |     Bold(bool),
162 |     Intense(bool),
163 |     Underline(bool),
164 |     Reset,
165 | }
166 | 
167 | impl EnumString for AttributeName {
168 |     fn data() -> &'static [(&'static str, Self)] {
169 |         use AttributeName::*;
170 |         &[
171 |             ("foreground", Color(FaceColor::Foreground)),
172 |             ("background", Color(FaceColor::Background)),
173 |             ("italic", Italic(true)),
174 |             ("noitalic", Italic(false)),
175 |             ("bold", Bold(true)),
176 |             ("nobold", Bold(false)),
177 |             ("intense", Intense(true)),
178 |             ("nointense", Intense(false)),
179 |             ("underline", Underline(true)),
180 |             ("nounderline", Underline(false)),
181 |             ("none", Reset),
182 |         ]
183 |     }
184 | }
185 | 
186 | #[derive(Debug)]
187 | enum ArgParsingError {
188 |     FaceName(String),
189 |     AttributeName(String),
190 |     Color(ParseColorError),
191 |     MissingValue(FaceName),
192 |     LineNumberStyle(String),
193 |     LargeDiffThreshold(String),
194 | }
195 | 
196 | impl Display for ArgParsingError {
197 |     fn fmt(&self, f: &mut Formatter) -> Result<(), FmtErr> {
198 |         match self {
199 |             ArgParsingError::FaceName(err) => write!(f, "unexpected face name: {}", err),
200 |             ArgParsingError::AttributeName(err) => write!(f, "unexpected attribute name: {}", err),
201 |             ArgParsingError::Color(err) => write!(f, "unexpected color value: {}", err),
202 |             ArgParsingError::MissingValue(face_name) => write!(
203 |                 f,
204 |                 "error parsing color: missing color value for face '{}'",
205 |                 face_name
206 |             ),
207 |             ArgParsingError::LineNumberStyle(err) => {
208 |                 write!(f, "unexpected line number style: {}", err)
209 |             }
210 |             ArgParsingError::LargeDiffThreshold(err) => {
211 |                 write!(f, "invalid threshold value: {}", err)
212 |             }
213 |         }
214 |     }
215 | }
216 | 
217 | impl FromStr for FaceName {
218 |     type Err = ArgParsingError;
219 |     fn from_str(input: &str) -> Result<Self, Self::Err> {
220 |         tryparse(input).map_err(ArgParsingError::FaceName)
221 |     }
222 | }
223 | 
224 | impl FromStr for AttributeName {
225 |     type Err = ArgParsingError;
226 |     fn from_str(input: &str) -> Result<Self, Self::Err> {
227 |         tryparse(input).map_err(ArgParsingError::AttributeName)
228 |     }
229 | }
230 | 
231 | impl FromStr for LineNumberStyleOpt {
232 |     type Err = ArgParsingError;
233 |     fn from_str(input: &str) -> Result<Self, Self::Err> {
234 |         tryparse(input).map_err(ArgParsingError::LineNumberStyle)
235 |     }
236 | }
237 | 
238 | fn ignore<T>(_: T) {}
239 | 
240 | fn parse_line_number_style(
241 |     config: &mut AppConfig,
242 |     value: Option<&str>,
243 | ) -> Result<(), ArgParsingError> {
244 |     let style = if let Some(style) = value {
245 |         style.parse::<LineNumberStyleOpt>()?.0
246 |     } else {
247 |         LineNumberStyle::Compact
248 |     };
249 |     config.line_numbers_style = Some(style);
250 |     Ok(())
251 | }
252 | 
253 | fn parse_color_attributes<'a, Values>(
254 |     config: &mut AppConfig,
255 |     mut values: Values,
256 |     face_name: FaceName,
257 | ) -> Result<(), ArgParsingError>
258 | where
259 |     Values: Iterator<Item = &'a str>,
260 | {
261 |     use AttributeName::*;
262 |     let face = face_name.get_face_mut(config);
263 |     while let Some(value) = values.next() {
264 |         let attribute_name = value.parse::<AttributeName>()?;
265 |         match attribute_name {
266 |             Color(kind) => {
267 |                 if let Some(value) = values.next() {
268 |                     let ColorOpt(color) = value.parse::<ColorOpt>()?;
269 |                     match kind {
270 |                         FaceColor::Foreground => face.set_fg(color),
271 |                         FaceColor::Background => face.set_bg(color),
272 |                     };
273 |                 } else {
274 |                     return Err(ArgParsingError::MissingValue(face_name));
275 |                 }
276 |             }
277 |             Italic(italic) => ignore(face.set_italic(italic)),
278 |             Bold(bold) => ignore(face.set_bold(bold)),
279 |             Intense(intense) => ignore(face.set_intense(intense)),
280 |             Underline(underline) => ignore(face.set_underline(underline)),
281 |             Reset => *face = Default::default(),
282 |         }
283 |     }
284 |     Ok(())
285 | }
286 | 
287 | fn parse_color_arg(value: &str, config: &mut AppConfig) -> Result<(), ArgParsingError> {
288 |     let mut pieces = value.split(':');
289 |     if let Some(piece) = pieces.next() {
290 |         let face_name = piece.parse::<FaceName>()?;
291 |         parse_color_attributes(config, pieces, face_name)?;
292 |     };
293 |     Ok(())
294 | }
295 | 
296 | fn parse_large_diff_threshold(value: &str, config: &mut AppConfig) -> Result<(), ArgParsingError> {
297 |     match value.parse() {
298 |         Ok(val) => {
299 |             config.large_diff_threshold = val;
300 |             Ok(())
301 |         }
302 |         Err(err) => Err(ArgParsingError::LargeDiffThreshold(err.to_string())),
303 |     }
304 | }
305 | 
306 | fn die_error<TRes>(result: Result<TRes, ArgParsingError>) -> bool {
307 |     if let Err(err) = result {
308 |         eprintln!("{}", err);
309 |         process::exit(-1);
310 |     }
311 |     true
312 | }
313 | 
314 | fn color(config: &mut AppConfig, args: &mut Peekable<impl Iterator<Item = String>>) -> bool {
315 |     let arg = args.next().unwrap();
316 |     if let Some(spec) = args.next() {
317 |         die_error(parse_color_arg(&spec, config))
318 |     } else {
319 |         missing_arg(arg)
320 |     }
321 | }
322 | 
323 | fn line_numbers(config: &mut AppConfig, args: &mut Peekable<impl Iterator<Item = String>>) -> bool {
324 |     args.next();
325 |     let spec = if let Some(spec) = args.next() {
326 |         parse_line_number_style(config, Some(&*spec))
327 |     } else {
328 |         parse_line_number_style(config, None)
329 |     };
330 |     die_error(spec)
331 | }
332 | 
333 | fn large_diff(config: &mut AppConfig, args: &mut Peekable<impl Iterator<Item = String>>) -> bool {
334 |     let arg = args.next().unwrap();
335 |     if let Some(spec) = args.next() {
336 |         die_error(parse_large_diff_threshold(&spec, config))
337 |     } else {
338 |         missing_arg(arg)
339 |     }
340 | }
341 | 
342 | fn debug(config: &mut AppConfig, args: &mut Peekable<impl Iterator<Item = String>>) -> bool {
343 |     config.debug = true;
344 |     args.next();
345 |     true
346 | }
347 | 
348 | fn bad_arg(arg: &str) -> ! {
349 |     eprintln!("bad argument: '{}'", arg);
350 |     usage(2);
351 | }
352 | 
353 | fn parse_options(
354 |     config: &mut AppConfig,
355 |     args: &mut Peekable<impl Iterator<Item = String>>,
356 | ) -> bool {
357 |     if let Some(arg) = args.peek() {
358 |         match &arg[..] {
359 |             // generic flags
360 |             "-h" | "--help" => help(&arg[..] == "--help"),
361 |             "-V" | "--version" => show_version(),
362 | 
363 |             // documented flags
364 |             FLAG_COLOR => color(config, args),
365 |             FLAG_LINE_NUMBERS => line_numbers(config, args),
366 | 
367 |             // hidden flags
368 |             FLAG_TOO_LARGE => large_diff(config, args),
369 |             FLAG_DEBUG => debug(config, args),
370 | 
371 |             arg => bad_arg(arg),
372 |         }
373 |     } else {
374 |         false
375 |     }
376 | }
377 | 
378 | pub fn parse_config() -> AppConfig {
379 |     let args = || std::env::args().skip(1);
380 |     if args().any(|s| s == "--help") {
381 |         help(true);
382 |     }
383 | 
384 |     let mut config = AppConfig::default();
385 |     let mut args = args().peekable();
386 |     while parse_options(&mut config, &mut args) {}
387 | 
388 |     if std::io::stdin().is_terminal() {
389 |         usage(-1);
390 |     }
391 |     config
392 | }
393 | 


--------------------------------------------------------------------------------
/src/diffr_lib/best_projection.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::hash_map::Entry::*;
  2 | use std::collections::HashMap;
  3 | use std::convert::TryFrom;
  4 | 
  5 | use super::TokenId;
  6 | use super::Tokenization;
  7 | 
  8 | #[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy, Hash)]
  9 | struct Coord {
 10 |     next_lcs: usize,
 11 |     next_seq: usize,
 12 | }
 13 | 
 14 | #[derive(Debug)]
 15 | struct Context {
 16 |     seq_index: HashMap<TokenId, Vec<usize>>,
 17 | }
 18 | 
 19 | impl Context {
 20 |     fn new<'a>(seq: &'a Tokenization<'a>, lcs: &'a Tokenization<'a>) -> Self {
 21 |         let mut seq_index = HashMap::new();
 22 |         for v in lcs.tokens() {
 23 |             match seq_index.entry(*v) {
 24 |                 Occupied(_) => (),
 25 |                 Vacant(e) => {
 26 |                     e.insert(vec![]);
 27 |                 }
 28 |             }
 29 |         }
 30 |         for (i, v) in seq.tokens().iter().enumerate() {
 31 |             match seq_index.entry(*v) {
 32 |                 Occupied(e) => {
 33 |                     e.into_mut().push(i);
 34 |                 }
 35 |                 Vacant(_) => (),
 36 |             }
 37 |         }
 38 |         Context { seq_index }
 39 |     }
 40 | 
 41 |     fn get_indexes(&self, tok: TokenId, min_value: usize) -> &[usize] {
 42 |         match self.seq_index.get(&tok) {
 43 |             Some(values) => {
 44 |                 let min_idx = match values.binary_search(&min_value) {
 45 |                     Ok(i) | Err(i) => i,
 46 |                 };
 47 |                 &values[min_idx..]
 48 |             }
 49 |             None => &[],
 50 |         }
 51 |     }
 52 | }
 53 | 
 54 | /// The result of `optimize_partition`. This is mostly used by `shared_segments`.
 55 | #[derive(Debug)]
 56 | pub struct NormalizationResult {
 57 |     pub path: Vec<isize>,
 58 |     pub starts_with_shared: bool,
 59 | }
 60 | 
 61 | impl NormalizationResult {
 62 |     /// The shared segments between both inputs of `optimize_partition`.
 63 |     /// The `seq` argument is the longest of the two inputs.
 64 |     pub fn shared_segments<'a>(
 65 |         &'a self,
 66 |         seq: &'a Tokenization,
 67 |     ) -> impl Iterator<Item = (usize, usize)> + 'a {
 68 |         SharedSegments::new(self, seq)
 69 |     }
 70 | }
 71 | 
 72 | fn snake_len(seq: &Tokenization, lcs: &Tokenization, start_lcs: usize, start_seq: usize) -> usize {
 73 |     let lcs_len = lcs.nb_tokens() - start_lcs;
 74 |     let seq_len = seq.nb_tokens() - start_seq;
 75 |     let max_snake_len = lcs_len.min(seq_len);
 76 |     let mut snake_len = 0;
 77 |     let seq = &seq.tokens()[start_seq..start_seq + max_snake_len];
 78 |     let lcs = &lcs.tokens()[start_lcs..start_lcs + max_snake_len];
 79 | 
 80 |     while snake_len < max_snake_len && lcs[snake_len] == seq[snake_len] {
 81 |         snake_len += 1
 82 |     }
 83 |     snake_len
 84 | }
 85 | 
 86 | /// Minimize the number of elements when partitioning `seq` according to `lcs`.
 87 | /// `lcs` is a subsequence of `seq`.
 88 | pub fn optimize_partition(seq: &Tokenization, lcs: &Tokenization) -> NormalizationResult {
 89 |     let context = Context::new(seq, lcs);
 90 |     let root = Coord {
 91 |         next_lcs: 0,
 92 |         next_seq: 0,
 93 |     };
 94 |     let target = Coord {
 95 |         next_lcs: lcs.nb_tokens(),
 96 |         next_seq: seq.nb_tokens(),
 97 |     };
 98 |     let mut frontier = vec![root];
 99 |     let mut new_frontier = vec![];
100 |     let mut prev = HashMap::new();
101 |     let mut found_seq = None;
102 |     while !frontier.is_empty() && found_seq.is_none() {
103 |         new_frontier.clear();
104 |         for &coord in frontier.iter() {
105 |             if coord.next_lcs == target.next_lcs {
106 |                 found_seq = Some(coord.next_seq);
107 |                 if coord.next_seq == target.next_seq {
108 |                     break;
109 |                 } else {
110 |                     // TODO do something more clever here
111 |                     continue;
112 |                 }
113 |             }
114 |             let start_lcs = coord.next_lcs;
115 |             let lcs_len = lcs.nb_tokens() - start_lcs;
116 |             let mut last_enqueued_snake_len = 0;
117 |             for start_seq in
118 |                 context.get_indexes(lcs.nth_token(to_isize(coord.next_lcs)), coord.next_seq)
119 |             {
120 |                 if start_seq + lcs_len > seq.nb_tokens() {
121 |                     break;
122 |                 }
123 |                 let snake_len = 1 + snake_len(seq, lcs, start_lcs + 1, start_seq + 1);
124 |                 let next_coord = Coord {
125 |                     next_lcs: start_lcs + snake_len,
126 |                     next_seq: start_seq + snake_len,
127 |                 };
128 |                 if last_enqueued_snake_len < snake_len || next_coord == target {
129 |                     if next_coord.next_lcs == target.next_lcs
130 |                         && (next_coord.next_seq == target.next_seq || found_seq.is_none())
131 |                     {
132 |                         found_seq = Some(next_coord.next_seq);
133 |                     }
134 |                     match prev.entry(next_coord) {
135 |                         Occupied(_) => continue,
136 |                         Vacant(e) => e.insert(coord),
137 |                     };
138 |                     new_frontier.push(next_coord);
139 |                     last_enqueued_snake_len = snake_len;
140 |                 }
141 |             }
142 |         }
143 |         std::mem::swap(&mut frontier, &mut new_frontier)
144 |     }
145 | 
146 |     let target = found_seq.map(|next_seq| Coord {
147 |         next_lcs: lcs.nb_tokens(),
148 |         next_seq,
149 |     });
150 |     let mut path = vec![];
151 |     let mut starts_with_shared = false;
152 |     let mut coord = target.as_ref();
153 |     let mut seq = seq.nb_tokens();
154 |     let mut lcs = lcs.nb_tokens();
155 |     while let Some(&coord_content) = coord {
156 |         let next_seq = coord_content.next_seq;
157 |         let next_lcs = coord_content.next_lcs;
158 |         let snake_len = lcs - next_lcs;
159 |         push_if_not_last(&mut path, to_isize(seq - snake_len));
160 |         starts_with_shared = !push_if_not_last(&mut path, to_isize(next_seq));
161 | 
162 |         coord = prev.get(&coord_content);
163 | 
164 |         seq = next_seq;
165 |         lcs = next_lcs;
166 |     }
167 |     path.reverse();
168 |     NormalizationResult {
169 |         path,
170 |         starts_with_shared,
171 |     }
172 | }
173 | 
174 | fn push_if_not_last(v: &mut Vec<isize>, val: isize) -> bool {
175 |     let should_push = v.last() != Some(&val);
176 |     if should_push {
177 |         v.push(val);
178 |     }
179 |     should_push
180 | }
181 | 
182 | fn to_isize(input: usize) -> isize {
183 |     isize::try_from(input).unwrap()
184 | }
185 | 
186 | /// The shared segments between both inputs of `optimize_partition`.
187 | struct SharedSegments<'a> {
188 |     index: usize,
189 |     normalization: &'a Vec<isize>,
190 |     seq: &'a Tokenization<'a>,
191 | }
192 | 
193 | impl<'a> SharedSegments<'a> {
194 |     fn new(normalization: &'a NormalizationResult, seq: &'a Tokenization) -> Self {
195 |         SharedSegments {
196 |             index: if normalization.starts_with_shared {
197 |                 0
198 |             } else {
199 |                 1
200 |             },
201 |             normalization: &normalization.path,
202 |             seq,
203 |         }
204 |     }
205 | }
206 | 
207 | impl<'a> Iterator for SharedSegments<'a> {
208 |     type Item = (usize, usize);
209 |     fn next(&mut self) -> Option<Self::Item> {
210 |         if self.index + 1 < self.normalization.len() {
211 |             let prev = self.normalization[self.index];
212 |             let curr = self.normalization[self.index + 1];
213 |             let from = self.seq.nth_span(prev).0;
214 |             let to = self.seq.nth_span(curr - 1).1;
215 |             self.index += 2;
216 |             Some((from, to))
217 |         } else {
218 |             None
219 |         }
220 |     }
221 | }
222 | 


--------------------------------------------------------------------------------
/src/diffr_lib/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Algorithms to compute diffs.
  2 | //!
  3 | //! This module implements various algorithms described in E. Myers
  4 | //! paper: [An O(ND) Difference Algorithm and Its
  5 | //! Variations](http://www.xmailserver.org/diff2.pdf).
  6 | //!
  7 | //! The main entrypoint is `diff`, which allows to compute the longest
  8 | //! common subsequence between two sequences of byte slices.
  9 | 
 10 | use bstr::ByteSlice;
 11 | use std::collections::hash_map::Entry::*;
 12 | use std::collections::HashMap;
 13 | use std::convert::TryFrom;
 14 | use std::fmt::Debug;
 15 | use std::fmt::{Error as FmtErr, Formatter};
 16 | 
 17 | mod best_projection;
 18 | pub use best_projection::optimize_partition;
 19 | 
 20 | type Span = (usize, usize);
 21 | 
 22 | type TokenId = u64;
 23 | 
 24 | pub struct TokenMap<'a>(HashMap<&'a [u8], TokenId>);
 25 | 
 26 | impl<'a> TokenMap<'a> {
 27 |     pub fn new(input: &mut [(impl Iterator<Item = &'a Span>, &'a [u8])]) -> Self {
 28 |         let mut m = HashMap::new();
 29 |         let mut counter = 0;
 30 |         for (spans, data) in input.iter_mut() {
 31 |             for span in spans {
 32 |                 let data = &data[span.0..span.1];
 33 |                 match m.entry(data) {
 34 |                     Vacant(e) => {
 35 |                         e.insert(counter);
 36 |                         counter += 1
 37 |                     }
 38 |                     Occupied(_) => {}
 39 |                 }
 40 |             }
 41 |         }
 42 |         TokenMap(m)
 43 |     }
 44 | 
 45 |     fn get(&self, slice: &'a [u8]) -> TokenId {
 46 |         *self.0.get(slice).unwrap()
 47 |     }
 48 | }
 49 | 
 50 | pub struct Tokenization<'a> {
 51 |     data: &'a [u8],
 52 |     spans: &'a [Span],
 53 |     token_ids: Vec<TokenId>,
 54 | }
 55 | 
 56 | impl Debug for Tokenization<'_> {
 57 |     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> {
 58 |         let Self { data, spans, .. } = self;
 59 |         let data_pp = String::from_utf8_lossy(data);
 60 |         let tokens_pp = spans
 61 |             .iter()
 62 |             .map(|sref| String::from_utf8_lossy(&data[sref.0..sref.1]))
 63 |             .collect::<Vec<_>>();
 64 |         f.debug_struct("Tokenization")
 65 |             .field("data", &data_pp)
 66 |             .field("tokens", &tokens_pp)
 67 |             .finish()
 68 |     }
 69 | }
 70 | 
 71 | struct TokenizationRange<'a> {
 72 |     t: &'a Tokenization<'a>,
 73 |     start_index: isize,
 74 |     one_past_end_index: isize,
 75 | }
 76 | 
 77 | impl<'a> Debug for TokenizationRange<'a> {
 78 |     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> {
 79 |         let Self {
 80 |             t: Tokenization { data, spans, .. },
 81 |             start_index,
 82 |             one_past_end_index,
 83 |         } = self;
 84 |         let data_pp = String::from_utf8_lossy(data);
 85 |         let tokens_pp = spans[to_usize(*start_index)..to_usize(*one_past_end_index)]
 86 |             .iter()
 87 |             .map(|sref| String::from_utf8_lossy(&data[sref.0..sref.1]))
 88 |             .collect::<Vec<_>>();
 89 |         f.debug_struct("TokenizationRange")
 90 |             .field("data", &data_pp)
 91 |             .field("tokens", &tokens_pp)
 92 |             .finish()
 93 |     }
 94 | }
 95 | 
 96 | impl<'a> Tokenization<'a> {
 97 |     pub fn new(data: &'a [u8], spans: &'a [Span], token_map: &TokenMap) -> Self {
 98 |         let mut token_ids = Vec::with_capacity(spans.len());
 99 |         for span in spans {
100 |             token_ids.push(token_map.get(&data[span.0..span.1]));
101 |         }
102 |         Tokenization {
103 |             data,
104 |             spans,
105 |             token_ids,
106 |         }
107 |     }
108 | 
109 |     pub fn data(&self) -> &[u8] {
110 |         self.data
111 |     }
112 | 
113 |     pub fn nb_tokens(&self) -> usize {
114 |         self.spans.len()
115 |     }
116 | 
117 |     pub fn nth_span(&self, n: isize) -> Span {
118 |         self.spans[to_usize(n)]
119 |     }
120 | 
121 |     pub fn tokens(&self) -> &[TokenId] {
122 |         &self.token_ids
123 |     }
124 | 
125 |     pub fn nth_token(&self, n: isize) -> TokenId {
126 |         self.token_ids[to_usize(n)]
127 |     }
128 | }
129 | 
130 | impl<'a> TokenizationRange<'a> {
131 |     fn new(t: &'a Tokenization<'a>) -> Self {
132 |         TokenizationRange {
133 |             t,
134 |             start_index: 0,
135 |             one_past_end_index: to_isize(t.spans.len()),
136 |         }
137 |     }
138 | 
139 |     /// Split `self` in two tokenizations:
140 |     /// * the first one from the start to `lo`;
141 |     /// * the second one from `hi` to the end.
142 |     fn split_at(&self, lo: isize, hi: isize) -> (Self, Self) {
143 |         let start = self.start_index;
144 |         let end = self.one_past_end_index;
145 |         assert!(start <= lo);
146 |         assert!(lo <= hi);
147 |         assert!(hi <= end);
148 |         (
149 |             TokenizationRange {
150 |                 one_past_end_index: lo,
151 |                 ..*self
152 |             },
153 |             TokenizationRange {
154 |                 start_index: hi,
155 |                 ..*self
156 |             },
157 |         )
158 |     }
159 | 
160 |     /// Get `self`'s number of tokens.
161 |     fn nb_tokens(&self) -> usize {
162 |         to_usize(self.one_past_end_index - self.start_index)
163 |     }
164 | 
165 |     /// Get `self`'s `n`th token.
166 |     fn nth_token(&self, n: isize) -> TokenId {
167 |         self.t.token_ids[to_usize(self.start_index + n)]
168 |     }
169 | }
170 | 
171 | /// A pair of `TokenizationRange`s to compare.
172 | #[derive(Debug)]
173 | pub struct DiffInput<'a> {
174 |     added: TokenizationRange<'a>,
175 |     removed: TokenizationRange<'a>,
176 |     large_diff_threshold: usize,
177 | }
178 | 
179 | impl<'a> DiffInput<'a> {
180 |     pub fn new(
181 |         added: &'a Tokenization<'a>,
182 |         removed: &'a Tokenization<'a>,
183 |         large_diff_threshold: usize,
184 |     ) -> Self {
185 |         DiffInput {
186 |             added: TokenizationRange::new(added),
187 |             removed: TokenizationRange::new(removed),
188 |             large_diff_threshold,
189 |         }
190 |     }
191 | 
192 |     pub fn to_owned(&'a self) -> Self {
193 |         Self::new(self.added(), self.removed(), self.large_diff_threshold)
194 |     }
195 | 
196 |     pub fn added(&self) -> &Tokenization<'a> {
197 |         self.added.t
198 |     }
199 | 
200 |     pub fn removed(&self) -> &Tokenization<'a> {
201 |         self.removed.t
202 |     }
203 | 
204 |     fn split_at(&self, (x0, y0): (isize, isize), (x1, y1): (isize, isize)) -> (Self, Self) {
205 |         let (removed1, removed2) = self.removed.split_at(x0, x1);
206 |         let (added1, added2) = self.added.split_at(y0, y1);
207 | 
208 |         (
209 |             DiffInput {
210 |                 added: added1,
211 |                 removed: removed1,
212 |                 large_diff_threshold: self.large_diff_threshold,
213 |             },
214 |             DiffInput {
215 |                 added: added2,
216 |                 removed: removed2,
217 |                 large_diff_threshold: self.large_diff_threshold,
218 |             },
219 |         )
220 |     }
221 | 
222 |     fn n(&self) -> usize {
223 |         self.removed.nb_tokens()
224 |     }
225 | 
226 |     fn m(&self) -> usize {
227 |         self.added.nb_tokens()
228 |     }
229 | 
230 |     fn seq_a(&self, index: isize) -> TokenId {
231 |         self.removed.nth_token(index)
232 |     }
233 | 
234 |     fn seq_b(&self, index: isize) -> TokenId {
235 |         self.added.nth_token(index)
236 |     }
237 | }
238 | 
239 | struct DiffTraversal<'a> {
240 |     v: &'a mut [isize],
241 |     max: usize,
242 |     _end: (isize, isize),
243 | }
244 | 
245 | impl<'a> DiffTraversal<'a> {
246 |     fn from_slice(input: &'a DiffInput<'a>, v: &'a mut [isize], forward: bool, max: usize) -> Self {
247 |         let start = (input.removed.start_index, input.added.start_index);
248 |         let end = (
249 |             input.removed.one_past_end_index,
250 |             input.added.one_past_end_index,
251 |         );
252 |         assert!(max * 2 < v.len());
253 |         let (start, end) = if forward { (start, end) } else { (end, start) };
254 |         let mut res = DiffTraversal { v, max, _end: end };
255 |         if max != 0 {
256 |             *res.v_mut(1) = start.0 - input.removed.start_index
257 |         }
258 |         res
259 |     }
260 | 
261 |     #[cfg(test)]
262 |     fn from_vector(
263 |         input: &'a DiffInput<'a>,
264 |         v: &'a mut Vec<isize>,
265 |         forward: bool,
266 |         max: usize,
267 |     ) -> Self {
268 |         v.resize(max * 2 + 1, 0);
269 |         Self::from_slice(input, v, forward, max)
270 |     }
271 | 
272 |     fn v(&self, index: isize) -> isize {
273 |         self.v[to_usize(index + to_isize(self.max))]
274 |     }
275 | 
276 |     fn v_mut(&mut self, index: isize) -> &mut isize {
277 |         &mut self.v[to_usize(index + to_isize(self.max))]
278 |     }
279 | }
280 | 
281 | #[cfg(test)]
282 | fn diff_sequences_kernel_forward(
283 |     input: &DiffInput,
284 |     ctx: &mut DiffTraversal,
285 |     d: usize,
286 | ) -> Option<usize> {
287 |     let n = to_isize(input.n());
288 |     let m = to_isize(input.m());
289 |     assert!(d < ctx.max);
290 |     let d = to_isize(d);
291 |     for k in (-d..=d).step_by(2) {
292 |         let mut x = if k == -d || k != d && ctx.v(k - 1) < ctx.v(k + 1) {
293 |             ctx.v(k + 1)
294 |         } else {
295 |             ctx.v(k - 1) + 1
296 |         };
297 |         let mut y = x - k;
298 |         while x < n && y < m && input.seq_a(x) == input.seq_b(y) {
299 |             x += 1;
300 |             y += 1;
301 |         }
302 |         *ctx.v_mut(k) = x;
303 |         if ctx._end == (x, y) {
304 |             return Some(to_usize(d));
305 |         }
306 |     }
307 |     None
308 | }
309 | 
310 | #[cfg(test)]
311 | fn diff_sequences_kernel_backward(
312 |     input: &DiffInput,
313 |     ctx: &mut DiffTraversal,
314 |     d: usize,
315 | ) -> Option<usize> {
316 |     let n = to_isize(input.n());
317 |     let m = to_isize(input.m());
318 |     let delta = n - m;
319 |     assert!(d < ctx.max);
320 |     let d = to_isize(d);
321 |     for k in (-d..=d).step_by(2) {
322 |         let mut x = if k == -d || k != d && ctx.v(k + 1) < ctx.v(k - 1) {
323 |             ctx.v(k + 1)
324 |         } else {
325 |             ctx.v(k - 1) + 1
326 |         };
327 |         let mut y = x - (k + delta);
328 |         while 0 < x && 0 < y && input.seq_a(x - 1) == input.seq_b(y - 1) {
329 |             x -= 1;
330 |             y -= 1;
331 |         }
332 |         *ctx.v_mut(k) = x - 1;
333 |         if ctx._end == (x, y) {
334 |             return Some(to_usize(d));
335 |         }
336 |     }
337 |     None
338 | }
339 | 
340 | /// A wrapper around a vector of bytes that keeps track of end of lines.
341 | #[derive(Debug, Default)]
342 | pub struct LineSplit {
343 |     data: Vec<u8>,
344 |     line_lengths: Vec<usize>,
345 | }
346 | 
347 | impl LineSplit {
348 |     pub fn iter(&self) -> impl Iterator<Item = (usize, usize)> + '_ {
349 |         LineSplitIter {
350 |             line_split: self,
351 |             index: 0,
352 |             start_of_slice: 0,
353 |         }
354 |     }
355 | 
356 |     pub fn data(&self) -> &[u8] {
357 |         &self.data
358 |     }
359 | 
360 |     pub fn append_line(&mut self, line: &[u8]) {
361 |         if self.data.last().cloned() == Some(b'\n') {
362 |             self.line_lengths.push(line.len());
363 |         } else {
364 |             match self.line_lengths.last_mut() {
365 |                 Some(len) => *len += line.len(),
366 |                 None => self.line_lengths.push(line.len()),
367 |             }
368 |         }
369 |         self.data.extend_from_slice(line)
370 |     }
371 | 
372 |     pub fn clear(&mut self) {
373 |         self.data.clear();
374 |         self.line_lengths.clear();
375 |     }
376 | 
377 |     pub fn len(&self) -> usize {
378 |         self.data.len()
379 |     }
380 | }
381 | 
382 | struct LineSplitIter<'a> {
383 |     line_split: &'a LineSplit,
384 |     start_of_slice: usize,
385 |     index: usize,
386 | }
387 | 
388 | impl<'a> Iterator for LineSplitIter<'a> {
389 |     type Item = (usize, usize);
390 |     fn next(&mut self) -> Option<Self::Item> {
391 |         let &mut LineSplitIter {
392 |             line_split:
393 |                 LineSplit {
394 |                     data: _,
395 |                     line_lengths,
396 |                 },
397 |             index,
398 |             start_of_slice,
399 |         } = self;
400 |         if index < line_lengths.len() {
401 |             let len = line_lengths[index];
402 |             self.start_of_slice += len;
403 |             self.index += 1;
404 |             Some((start_of_slice, start_of_slice + len))
405 |         } else {
406 |             None
407 |         }
408 |     }
409 | }
410 | 
411 | /// A pair of spans with the same content in two different slices.
412 | #[derive(Clone, Debug, Default)]
413 | pub struct Snake {
414 |     /// The start of the span in the removed bytes.
415 |     pub x0: isize,
416 | 
417 |     /// The start of the span in the added bytes.
418 |     pub y0: isize,
419 | 
420 |     /// The length of the span.
421 |     pub len: isize,
422 | }
423 | 
424 | impl Snake {
425 |     fn from(mut self, x0: isize, y0: isize) -> Self {
426 |         self.x0 = x0;
427 |         self.y0 = y0;
428 |         self
429 |     }
430 | 
431 |     fn len(mut self, len: isize) -> Self {
432 |         self.len = len;
433 |         self
434 |     }
435 | }
436 | 
437 | fn diff_sequences_kernel_bidirectional(
438 |     input: &DiffInput,
439 |     ctx_fwd: &mut DiffTraversal,
440 |     ctx_bwd: &mut DiffTraversal,
441 |     d: usize,
442 | ) -> Option<(Snake, isize)> {
443 |     let n = to_isize(input.n());
444 |     let m = to_isize(input.m());
445 |     let delta = n - m;
446 |     let odd = delta % 2 != 0;
447 |     assert!(d < ctx_fwd.max);
448 |     assert!(d < ctx_bwd.max);
449 |     let d = to_isize(d);
450 |     let mut k = -d;
451 |     while k <= d {
452 |         let mut x = if k == -d || k != d && ctx_fwd.v(k - 1) < ctx_fwd.v(k + 1) {
453 |             ctx_fwd.v(k + 1)
454 |         } else {
455 |             ctx_fwd.v(k - 1) + 1
456 |         };
457 |         let mut y = x - k;
458 |         let (x0, y0) = (x, y);
459 |         while x < n && y < m && input.seq_a(x) == input.seq_b(y) {
460 |             x += 1;
461 |             y += 1;
462 |         }
463 |         if odd && (k - delta).abs() < d && x > ctx_bwd.v(k - delta) {
464 |             return Some((Snake::default().from(x0, y0).len(x - x0), 2 * d - 1));
465 |         }
466 |         *ctx_fwd.v_mut(k) = x;
467 | 
468 |         k += 2;
469 |     }
470 |     let mut k = -d;
471 |     while k <= d {
472 |         let mut x = if k == -d || k != d && ctx_bwd.v(k + 1) < ctx_bwd.v(k - 1) {
473 |             ctx_bwd.v(k + 1)
474 |         } else {
475 |             ctx_bwd.v(k - 1) + 1
476 |         };
477 |         let mut y = x - (k + delta);
478 |         let x1 = x;
479 |         while 0 < x && 0 < y && input.seq_a(x - 1) == input.seq_b(y - 1) {
480 |             x -= 1;
481 |             y -= 1;
482 |         }
483 |         if !odd && (k + delta).abs() <= d && x - 1 < ctx_fwd.v(k + delta) {
484 |             return Some((Snake::default().from(x, y).len(x1 - x), 2 * d));
485 |         }
486 |         *ctx_bwd.v_mut(k) = x - 1;
487 | 
488 |         k += 2;
489 |     }
490 |     None
491 | }
492 | 
493 | /// Compute the length of the edit script for `input`.
494 | /// This is the forward version.
495 | #[cfg(test)]
496 | fn diff_sequences_simple_forward(input: &DiffInput, v: &mut Vec<isize>) -> usize {
497 |     diff_sequences_simple(input, v, true)
498 | }
499 | 
500 | /// Compute the length of the edit script for `input`.
501 | /// This is the backward version.
502 | #[cfg(test)]
503 | fn diff_sequences_simple_backward(input: &DiffInput, v: &mut Vec<isize>) -> usize {
504 |     diff_sequences_simple(input, v, false)
505 | }
506 | 
507 | #[cfg(test)]
508 | fn diff_sequences_simple(input: &DiffInput, v: &mut Vec<isize>, forward: bool) -> usize {
509 |     let max_result = input.n() + input.m();
510 |     let ctx = &mut DiffTraversal::from_vector(input, v, forward, max_result);
511 |     (0..max_result)
512 |         .filter_map(|d| {
513 |             if forward {
514 |                 diff_sequences_kernel_forward(input, ctx, d)
515 |             } else {
516 |                 diff_sequences_kernel_backward(input, ctx, d)
517 |             }
518 |         })
519 |         .next()
520 |         .unwrap_or(max_result)
521 | }
522 | 
523 | /// Compute the longest common subsequence for `input` into `dst`.
524 | pub fn diff(input: &DiffInput, v: &mut Vec<isize>, dst: &mut Vec<Snake>) {
525 |     dst.clear();
526 |     enum Task<'a> {
527 |         Diff(DiffInput<'a>),
528 |         PushSnake(Snake),
529 |     }
530 |     use Task::*;
531 | 
532 |     let mut todo = vec![Diff(input.to_owned())];
533 |     while let Some(task) = todo.pop() {
534 |         match task {
535 |             Diff(input) => {
536 |                 let n = to_isize(input.n());
537 |                 fn trivial_diff(tok: &TokenizationRange) -> bool {
538 |                     tok.one_past_end_index <= tok.start_index
539 |                 }
540 | 
541 |                 if trivial_diff(&input.removed) || trivial_diff(&input.added) {
542 |                     continue;
543 |                 }
544 | 
545 |                 let snake = diff_sequences_bidirectional_snake(&input, v);
546 |                 if let Some((snake @ Snake { x0, y0, len }, d)) = snake {
547 |                     if 1 < d {
548 |                         let (input1, input2) = input.split_at((x0, y0), (x0 + len, y0 + len));
549 |                         todo.push(Diff(input2));
550 |                         if len != 0 {
551 |                             todo.push(PushSnake(snake));
552 |                         }
553 |                         todo.push(Diff(input1));
554 |                     } else {
555 |                         let SplittingPoint { sp, dx, dy } = find_splitting_point(&input);
556 |                         let x0 = input.removed.start_index;
557 |                         let y0 = input.added.start_index;
558 |                         if sp != 0 {
559 |                             dst.push(Snake::default().from(x0, y0).len(sp));
560 |                         }
561 |                         let len = n - sp - dx;
562 |                         if len != 0 {
563 |                             dst.push(Snake::default().from(x0 + sp + dx, y0 + sp + dy).len(len));
564 |                         }
565 |                     }
566 |                 }
567 |             }
568 |             PushSnake(snake) => dst.push(snake),
569 |         }
570 |     }
571 | }
572 | 
573 | struct SplittingPoint {
574 |     sp: isize,
575 |     dx: isize,
576 |     dy: isize,
577 | }
578 | 
579 | // Find the splitting point when two sequences differ by one element.
580 | fn find_splitting_point(input: &DiffInput) -> SplittingPoint {
581 |     use std::cmp::Ordering::*;
582 | 
583 |     let n = to_isize(input.n());
584 |     let m = to_isize(input.m());
585 |     let (short, long, nb_tokens, dx, dy) = match n.cmp(&m) {
586 |         Less => (&input.removed, &input.added, n, 0, 1),
587 |         Greater => (&input.added, &input.removed, m, 1, 0),
588 |         Equal => (&input.added, &input.removed, m, 0, 0),
589 |     };
590 |     let mut sp = nb_tokens;
591 |     for i in 0..nb_tokens {
592 |         if long.nth_token(i) != short.nth_token(i) {
593 |             sp = i;
594 |             break;
595 |         }
596 |     }
597 |     SplittingPoint { sp, dx, dy }
598 | }
599 | 
600 | /// Compute the length of the edit script for `input`.
601 | /// This is the bidirectional version.
602 | #[cfg(test)]
603 | fn diff_sequences_bidirectional(input: &DiffInput, v: &mut Vec<isize>) -> usize {
604 |     if input.n() + input.m() == 0 {
605 |         return 0;
606 |     }
607 |     to_usize(diff_sequences_bidirectional_snake(input, v).unwrap().1)
608 | }
609 | 
610 | fn diff_sequences_bidirectional_snake(
611 |     input: &DiffInput,
612 |     v: &mut Vec<isize>,
613 | ) -> Option<(Snake, isize)> {
614 |     let mut max = (input.n() + input.m() + 1) / 2 + 1;
615 |     if input.large_diff_threshold > 0 {
616 |         max = max.min(input.large_diff_threshold);
617 |     }
618 |     let iter_len = 2 * max + 1;
619 |     v.resize(2 * iter_len, 0);
620 | 
621 |     let (v1, v2) = v.split_at_mut(iter_len);
622 |     let ctx_fwd = &mut DiffTraversal::from_slice(input, v1, true, max);
623 |     let ctx_bwd = &mut DiffTraversal::from_slice(input, v2, false, max);
624 |     let result = (0..max)
625 |         .filter_map(|d| diff_sequences_kernel_bidirectional(input, ctx_fwd, ctx_bwd, d))
626 |         .next();
627 |     match result {
628 |         Some(mut result) => {
629 |             result.0.x0 += input.removed.start_index;
630 |             result.0.y0 += input.added.start_index;
631 |             Some(result)
632 |         }
633 |         None => None,
634 |     }
635 | }
636 | 
637 | fn to_isize(input: usize) -> isize {
638 |     if cfg!(debug_assertions) {
639 |         isize::try_from(input).unwrap()
640 |     } else {
641 |         input as _
642 |     }
643 | }
644 | 
645 | fn to_usize(input: isize) -> usize {
646 |     if cfg!(debug_assertions) {
647 |         usize::try_from(input).unwrap()
648 |     } else {
649 |         input as _
650 |     }
651 | }
652 | #[derive(PartialEq, Eq, Clone, Copy, Debug)]
653 | enum TokenKind {
654 |     Other,
655 |     Word,
656 |     Spaces,
657 | }
658 | 
659 | /// Tokenize data from `src` from the position `ofs` into `tokens`.
660 | pub fn tokenize(src: &[u8], ofs: usize, tokens: &mut Vec<Span>) {
661 |     let mut push = |lo: usize, hi: usize| {
662 |         if lo < hi {
663 |             tokens.push((lo, hi))
664 |         }
665 |     };
666 |     let mut kind = TokenKind::Other;
667 |     let mut lo = ofs;
668 |     #[allow(clippy::needless_range_loop)]
669 |     for (grapheme_start, _, g) in src[ofs..].grapheme_indices() {
670 |         let hi = grapheme_start + ofs;
671 |         let oldkind = kind;
672 |         kind = classify_grapheme(g);
673 |         if kind != oldkind || oldkind == TokenKind::Other {
674 |             push(lo, hi);
675 |             lo = hi
676 |         }
677 |     }
678 |     push(lo, src.len());
679 | }
680 | 
681 | fn classify_grapheme(g: &str) -> TokenKind {
682 |     let first_char = g.chars().next().unwrap_or_default();
683 |     if first_char.is_alphanumeric() || first_char == '_' {
684 |         TokenKind::Word
685 |     } else if first_char == ' ' || first_char == '\t' {
686 |         TokenKind::Spaces
687 |     } else {
688 |         TokenKind::Other
689 |     }
690 | }
691 | 
692 | #[cfg(test)]
693 | mod tests_lib;
694 | 


--------------------------------------------------------------------------------
/src/diffr_lib/tests_lib.rs:
--------------------------------------------------------------------------------
  1 | use super::*;
  2 | use DiffKind::*;
  3 | 
  4 | impl<'a> TokenizationRange<'a> {
  5 |     fn nth_span(&self, n: isize) -> Span {
  6 |         self.t.spans[to_usize(self.start_index + n)]
  7 |     }
  8 | }
  9 | 
 10 | #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 11 | enum DiffKind {
 12 |     Keep,
 13 |     Added,
 14 |     Removed,
 15 | }
 16 | 
 17 | fn string_of_bytes(buf: &[u8]) -> String {
 18 |     String::from_utf8_lossy(buf).into()
 19 | }
 20 | 
 21 | fn to_strings<It>(buf: &[u8], tokens: It) -> Vec<String>
 22 | where
 23 |     It: Iterator<Item = (usize, usize)>,
 24 | {
 25 |     mk_vec(tokens.map(|range| string_of_bytes(&buf[range.0..range.1])))
 26 | }
 27 | 
 28 | fn mk_vec<It, T>(it: It) -> Vec<T>
 29 | where
 30 |     It: Iterator<Item = T>,
 31 | {
 32 |     it.collect()
 33 | }
 34 | 
 35 | fn nth_token<'a>(input: &'a TokenizationRange, idx: isize) -> &'a [u8] {
 36 |     let span = input.nth_span(idx);
 37 |     &input.t.data()[span.0..span.1]
 38 | }
 39 | 
 40 | fn compress_path(values: &Vec<(Vec<u8>, DiffKind)>) -> Vec<(Vec<u8>, DiffKind)> {
 41 |     let mut values = values.clone();
 42 |     let mut it = values.iter_mut();
 43 |     let mut result = vec![];
 44 |     let mut current = it.next();
 45 |     for next in it {
 46 |         match current {
 47 |             Some(ref mut c) => {
 48 |                 if c.1 == next.1 {
 49 |                     c.0.extend_from_slice(&next.0)
 50 |                 } else {
 51 |                     result.push(c.clone());
 52 |                     *c = next;
 53 |                 }
 54 |             }
 55 |             None => panic!(),
 56 |         }
 57 |     }
 58 | 
 59 |     if let Some(last) = current {
 60 |         result.push(last.clone());
 61 |     }
 62 |     result
 63 | }
 64 | 
 65 | fn dummy_tokenize(data: &[u8]) -> Vec<Span> {
 66 |     let mut toks = vec![];
 67 |     for i in 0..data.len() {
 68 |         toks.push((i, i + 1));
 69 |     }
 70 |     toks
 71 | }
 72 | 
 73 | fn really_tokenize(data: &[u8]) -> Vec<Span> {
 74 |     let mut toks = vec![];
 75 |     tokenize(data, 0, &mut toks);
 76 |     toks
 77 | }
 78 | 
 79 | fn diff_sequences_test(expected: &[(&[u8], DiffKind)], seq_a: &[u8], seq_b: &[u8]) {
 80 |     diff_sequences_test_aux(expected, seq_a, seq_b, dummy_tokenize)
 81 | }
 82 | 
 83 | fn diff_sequences_test_tokenized(expected: &[(&[u8], DiffKind)], seq_a: &[u8], seq_b: &[u8]) {
 84 |     diff_sequences_test_aux(expected, seq_a, seq_b, really_tokenize)
 85 | }
 86 | 
 87 | fn diff_sequences_test_aux(
 88 |     expected: &[(&[u8], DiffKind)],
 89 |     seq_a: &[u8],
 90 |     seq_b: &[u8],
 91 |     tok: impl Fn(&[u8]) -> Vec<Span>,
 92 | ) {
 93 |     let toks_a = tok(seq_a);
 94 |     let toks_b = tok(seq_b);
 95 |     let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]);
 96 |     let tok_a = Tokenization::new(seq_a, &toks_a, &m);
 97 |     let tok_b = Tokenization::new(seq_b, &toks_b, &m);
 98 |     let input = DiffInput::new(&tok_b, &tok_a, 123);
 99 |     let input_r = DiffInput::new(&tok_a, &tok_b, 123);
100 | 
101 |     let mut v = vec![];
102 |     let result = diff_sequences_simple_forward(&input, &mut v);
103 |     let result_bwd = diff_sequences_simple_backward(&input, &mut v);
104 |     let result_bidi = diff_sequences_bidirectional(&input, &mut v);
105 |     let result_r = diff_sequences_simple(&input_r, &mut v, true);
106 |     let result_r_bwd = diff_sequences_simple(&input_r, &mut v, false);
107 |     let result_r_bidi = diff_sequences_bidirectional(&input_r, &mut v);
108 | 
109 |     let mut result_complete = vec![];
110 |     diff(&input, &mut v, &mut result_complete);
111 |     let mut result_r_complete = vec![];
112 |     diff(&input_r, &mut v, &mut result_r_complete);
113 | 
114 |     let d = expected
115 |         .iter()
116 |         .map(|(buf, kind)| match kind {
117 |             Added | Removed => tok(buf).len(),
118 |             Keep => 0,
119 |         })
120 |         .sum::<usize>();
121 | 
122 |     assert_eq!(d, result);
123 |     assert_eq!(d, result_r);
124 |     assert_eq!(d, result_bwd);
125 |     assert_eq!(d, result_r_bwd);
126 |     assert_eq!(d, result_bidi);
127 |     assert_eq!(d, result_r_bidi);
128 | 
129 |     for complete in &[&result_complete, &result_r_complete] {
130 |         let all_snakes = complete.iter().fold(0, |acc, s| acc + s.len);
131 | 
132 |         let d_calc = input.n() + input.m() - 2 * to_usize(all_snakes);
133 |         assert_eq!(d, d_calc);
134 |     }
135 |     // construct edit script
136 |     let mut x0 = 0;
137 |     let mut y0 = 0;
138 |     let mut script = vec![];
139 |     for snake in result_complete {
140 |         let Snake {
141 |             x0: x, y0: y, len, ..
142 |         } = snake;
143 | 
144 |         if x0 != x {
145 |             assert!(x0 < x);
146 |             let lo = input.removed.nth_span(x0).0;
147 |             let hi = input.removed.nth_span(x - 1).1;
148 |             script.push((input.removed.t.data[lo..hi].to_vec(), Removed));
149 |         }
150 |         if y0 != y {
151 |             assert!(y0 < y);
152 |             let lo = input.added.nth_span(y0).0;
153 |             let hi = input.added.nth_span(y - 1).1;
154 |             script.push((input.added.t.data[lo..hi].to_vec(), Added));
155 |         }
156 | 
157 |         let mut added = vec![];
158 |         let mut removed = vec![];
159 |         for i in 0..len {
160 |             let r = input.removed.nth_span(x + i);
161 |             removed.extend_from_slice(&input.removed.t.data[r.0..r.1]);
162 |             let r = input.added.nth_span(y + i);
163 |             added.extend_from_slice(&input.added.t.data[r.0..r.1]);
164 |         }
165 | 
166 |         assert_eq!(added, removed, "{:?}", snake);
167 |         script.push((added.to_vec(), Keep));
168 | 
169 |         x0 = x + len;
170 |         y0 = y + len;
171 |     }
172 | 
173 |     let x = input.removed.nb_tokens();
174 |     let x0 = to_usize(x0);
175 |     if x0 != x {
176 |         assert!(x0 < x);
177 |         script.push((input.removed.t.data[x0..x].to_vec(), Removed));
178 |     }
179 |     let y = input.added.nb_tokens();
180 |     let y0 = to_usize(y0);
181 |     if y0 != y {
182 |         assert!(y0 < y);
183 |         script.push((input.added.t.data[y0..y].to_vec(), Added));
184 |     }
185 | 
186 |     assert_eq!(
187 |         &*mk_vec(expected.iter().map(|p| (string_of_bytes(p.0), p.1))),
188 |         &*mk_vec(script.iter().map(|p| (string_of_bytes(&p.0), p.1))),
189 |     );
190 | }
191 | 
192 | #[test]
193 | fn compress_path_test() {
194 |     let test = |expected: Vec<(Vec<u8>, DiffKind)>, input| {
195 |         assert_eq!(expected, compress_path(&input));
196 |     };
197 | 
198 |     test(vec![], vec![]);
199 | 
200 |     test(
201 |         vec![(b"abc".to_vec(), Added)],
202 |         vec![(b"abc".to_vec(), Added)],
203 |     );
204 |     test(
205 |         vec![(b"abcdef".to_vec(), Added)],
206 |         vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Added)],
207 |     );
208 |     test(
209 |         vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Removed)],
210 |         vec![(b"abc".to_vec(), Added), (b"def".to_vec(), Removed)],
211 |     );
212 | 
213 |     test(
214 |         vec![
215 |             (b"abc".to_vec(), Added),
216 |             (b"defghijkl".to_vec(), Removed),
217 |             (b"xyz".to_vec(), Keep),
218 |         ],
219 |         vec![
220 |             (b"abc".to_vec(), Added),
221 |             (b"def".to_vec(), Removed),
222 |             (b"ghi".to_vec(), Removed),
223 |             (b"jkl".to_vec(), Removed),
224 |             (b"xyz".to_vec(), Keep),
225 |         ],
226 |     );
227 | }
228 | 
229 | #[test]
230 | fn diff_sequences_test_1() {
231 |     diff_sequences_test(
232 |         &[
233 |             (b"a", Removed),
234 |             (b"c", Added),
235 |             (b"b", Keep),
236 |             (b"c", Removed),
237 |             (b"ab", Keep),
238 |             (b"b", Removed),
239 |             (b"a", Keep),
240 |             (b"c", Added),
241 |         ],
242 |         b"abcabba",
243 |         b"cbabac",
244 |     )
245 | }
246 | 
247 | #[test]
248 | fn diff_sequences_test_2() {
249 |     diff_sequences_test(
250 |         &[(b"xaxbx", Added), (b"abc", Keep), (b"y", Removed)],
251 |         b"abcy",
252 |         b"xaxbxabc",
253 |     )
254 | }
255 | 
256 | #[test]
257 | fn diff_sequences_test_3() {
258 |     diff_sequences_test(&[(b"abc", Removed), (b"defgh", Added)], b"abc", b"defgh")
259 | }
260 | 
261 | #[test]
262 | fn diff_sequences_test_4() {
263 |     diff_sequences_test(
264 |         &[(b"abc", Removed), (b"defg", Added), (b"zzz", Keep)],
265 |         b"abczzz",
266 |         b"defgzzz",
267 |     )
268 | }
269 | 
270 | #[test]
271 | fn diff_sequences_test_5() {
272 |     diff_sequences_test(
273 |         &[(b"zzz", Keep), (b"abcd", Removed), (b"efgh", Added)],
274 |         b"zzzabcd",
275 |         b"zzzefgh",
276 |     )
277 | }
278 | 
279 | #[test]
280 | fn diff_sequences_test_6() {
281 |     diff_sequences_test(&[(b"abcd", Added)], b"", b"abcd")
282 | }
283 | 
284 | #[test]
285 | fn diff_sequences_test_7() {
286 |     diff_sequences_test(&[], b"", b"")
287 | }
288 | 
289 | #[test]
290 | fn diff_sequences_test_8() {
291 |     // This tests the recursion in diff
292 |     diff_sequences_test(
293 |         &[
294 |             (b"a", Removed),
295 |             (b"c", Added),
296 |             (b"b", Keep),
297 |             (b"c", Removed),
298 |             (b"a", Keep),
299 |             (b"b", Removed),
300 |             (b"ba", Keep),
301 |             (b"a", Removed),
302 |             (b"cc", Added),
303 |             (b"b", Keep),
304 |             (b"c", Removed),
305 |             (b"ab", Keep),
306 |             (b"b", Removed),
307 |             (b"a", Keep),
308 |             (b"a", Removed),
309 |             (b"cc", Added),
310 |             (b"b", Keep),
311 |             (b"c", Removed),
312 |             // this is weird; the 2 next should be combined?
313 |             (b"a", Keep),
314 |             (b"b", Keep),
315 |             (b"b", Removed),
316 |             (b"a", Keep),
317 |             (b"c", Added),
318 |         ],
319 |         b"abcabbaabcabbaabcabba",
320 |         b"cbabaccbabaccbabac",
321 |     )
322 | }
323 | 
324 | #[test]
325 | fn range_equality_test() {
326 |     let range_a = [1, 2, 3];
327 |     let range_b = [1, 2, 3];
328 |     let range_c = [1, 2, 4];
329 |     assert!(range_a == range_b);
330 |     assert!(range_a != range_c);
331 | }
332 | 
333 | #[test]
334 | fn tokenize_test() {
335 |     fn test(expected: &[&str], buf: &[u8]) {
336 |         let mut tokens = vec![];
337 |         tokenize(buf, 0, &mut tokens);
338 |         assert_eq!(
339 |             buf.len(),
340 |             tokens.iter().map(|range| range.1 - range.0).sum()
341 |         );
342 |         for token in &tokens {
343 |             assert!(token.0 < token.1)
344 |         }
345 |         assert_eq!(
346 |             mk_vec(buf.iter()),
347 |             mk_vec(tokens.iter().flat_map(|range| &buf[range.0..range.1]))
348 |         );
349 | 
350 |         let foo = mk_vec(
351 |             tokens
352 |                 .iter()
353 |                 .map(|range| &buf[range.0..range.1])
354 |                 .map(string_of_bytes),
355 |         );
356 | 
357 |         let foo = mk_vec(foo.iter().map(|str| &**str));
358 | 
359 |         assert_eq!(expected, &*foo);
360 | 
361 |         // TODO
362 |         let tokens = tokens.iter().map(|hsr| (hsr.0, hsr.1));
363 |         assert_eq!(expected, &to_strings(buf, tokens)[..]);
364 |     }
365 |     test(&[], b"");
366 |     test(&[" "], b" ");
367 |     test(&["a"], b"a");
368 |     test(&["abcd", " ", "defg", " "], b"abcd defg ");
369 |     test(&["abcd", " ", "defg"], b"abcd defg");
370 |     test(&["abcd", "    ", "defg"], b"abcd    defg");
371 |     test(&["abcd", "\t    ", "defg"], b"abcd\t    defg");
372 |     test(&["ab_cd", " ", "de", "-", "fg"], b"ab_cd de-fg");
373 |     test(
374 |         &["*", "(", "abcd", ")", " ", "#", "[", "efgh", "]"],
375 |         b"*(abcd) #[efgh]",
376 |     );
377 |     test(&["кирилица", " ", "🧑🏼‍🌾"], "кирилица 🧑🏼‍🌾".as_bytes()); // (cyrilic and a farmer emoji)
378 | }
379 | 
380 | #[test]
381 | fn find_splitting_point_test() {
382 |     fn test(expected: isize, seq_a: &[u8], seq_b: &[u8]) {
383 |         let toks_a = dummy_tokenize(seq_a);
384 |         let toks_b = dummy_tokenize(seq_b);
385 |         let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]);
386 |         let tok_a = Tokenization::new(seq_a, &toks_a, &m);
387 |         let tok_b = Tokenization::new(seq_b, &toks_b, &m);
388 |         let input = DiffInput::new(&tok_b, &tok_a, 123);
389 | 
390 |         assert_eq!(expected, find_splitting_point(&input).sp);
391 |         for i in 0..expected {
392 |             assert_eq!(input.removed.nth_token(i), input.added.nth_token(i));
393 |         }
394 |         for i in expected..to_isize(input.removed.nb_tokens()) {
395 |             assert_eq!(input.removed.nth_token(i), input.added.nth_token(i + 1));
396 |         }
397 |     }
398 | 
399 |     test(0, b"abc", b"zabc");
400 |     test(1, b"abc", b"azbc");
401 |     test(2, b"abc", b"abzc");
402 |     test(3, b"abc", b"abcz");
403 | }
404 | 
405 | fn get_lcs(seq_a: &[u8], seq_b: &[u8]) -> Vec<Vec<u8>> {
406 |     fn subsequences(seq_a: &[u8]) -> Vec<Vec<u8>> {
407 |         let res: Vec<Vec<u8>> = {
408 |             if seq_a.is_empty() {
409 |                 vec![vec![]]
410 |             } else if seq_a.len() == 1 {
411 |                 vec![vec![], seq_a.to_owned()]
412 |             } else {
413 |                 let (seq_a1, seq_a2) = seq_a.split_at(seq_a.len() / 2);
414 |                 let mut res = vec![];
415 |                 for part1 in subsequences(seq_a1) {
416 |                     for part2 in subsequences(seq_a2) {
417 |                         let mut nth_token = vec![];
418 |                         nth_token.extend_from_slice(&part1);
419 |                         nth_token.extend_from_slice(&part2);
420 |                         res.push(nth_token);
421 |                     }
422 |                 }
423 |                 res
424 |             }
425 |         };
426 |         assert_eq!(res.len(), 1 << seq_a.len());
427 |         res
428 |     }
429 |     fn is_subseq(subseq: &[u8], nth_token: &[u8]) -> bool {
430 |         if subseq.is_empty() {
431 |             true
432 |         } else {
433 |             let target = subseq[0];
434 |             for i in 0..nth_token.len() {
435 |                 if nth_token[i] == target {
436 |                     return is_subseq(&subseq[1..], &nth_token[i + 1..]);
437 |                 }
438 |             }
439 |             false
440 |         }
441 |     }
442 | 
443 |     let mut bests = vec![];
444 |     let mut best_len = 0;
445 |     for subseq in subsequences(seq_a) {
446 |         if subseq.len() < best_len || !is_subseq(&subseq, seq_b) {
447 |             continue;
448 |         }
449 |         if best_len < subseq.len() {
450 |             bests.clear();
451 |             best_len = subseq.len();
452 |         }
453 |         if best_len <= subseq.len() {
454 |             bests.push(subseq)
455 |         }
456 |     }
457 |     bests
458 | }
459 | 
460 | #[test]
461 | fn test_get_lcs() {
462 |     dbg!(get_lcs(b"abcd", b"cdef"));
463 |     let expected: &[u8] = b"cd";
464 |     assert_eq!(expected, &**get_lcs(b"abcd", b"cdef").first().unwrap())
465 | }
466 | 
467 | #[test]
468 | fn test_lcs_random() {
469 |     fn test_lcs(seq_a: &[u8], seq_b: &[u8]) {
470 |         let toks_a = dummy_tokenize(seq_a);
471 |         let toks_b = dummy_tokenize(seq_b);
472 |         let m = TokenMap::new(&mut [(toks_a.iter(), &seq_a), (toks_b.iter(), &seq_b)]);
473 |         let tok_a = Tokenization::new(seq_a, &toks_a, &m);
474 |         let tok_b = Tokenization::new(seq_b, &toks_b, &m);
475 |         let input = DiffInput::new(&tok_a, &tok_b, 123);
476 |         let mut v = vec![];
477 |         let mut dst = vec![];
478 |         diff(&input, &mut v, &mut dst);
479 | 
480 |         // check that dst content defines a subsequence of seq_a and seq_b
481 |         let mut diff_lcs = vec![];
482 |         for Snake { x0, y0, len, .. } in dst {
483 |             let part_seq_a = (x0..x0 + len)
484 |                 .flat_map(|idx| nth_token(&input.removed, idx).iter().cloned())
485 |                 .collect::<Vec<_>>();
486 |             let part_seq_b = (y0..y0 + len)
487 |                 .flat_map(|idx| nth_token(&input.added, idx).iter().cloned())
488 |                 .collect::<Vec<_>>();
489 |             assert_eq!(&*part_seq_a, &*part_seq_b);
490 |             diff_lcs.extend_from_slice(&part_seq_a);
491 |         }
492 | 
493 |         // bruteforce check that it is the longest
494 |         assert!(get_lcs(seq_a, seq_b)
495 |             .iter()
496 |             .any(|nth_token| *nth_token == diff_lcs));
497 |     }
498 | 
499 |     let len_a = 6;
500 |     let len_b = 6;
501 |     let nletters = 3_u8;
502 |     let mut seq_a = vec![b'1'; len_a];
503 |     let mut seq_b = vec![b'1'; len_b];
504 |     for i in 0..len_a {
505 |         for j in 0..len_b {
506 |             for la in 0..nletters {
507 |                 for lb in 0..nletters {
508 |                     seq_a[i] = la;
509 |                     seq_b[j] = lb;
510 |                     test_lcs(&seq_a, &seq_b);
511 |                 }
512 |             }
513 |         }
514 |     }
515 | }
516 | 
517 | #[should_panic]
518 | #[test]
519 | fn to_usize_checked_negative_test() {
520 |     to_usize(-1_isize);
521 | }
522 | 
523 | #[test]
524 | fn split_lines_test() {
525 |     let input: &[u8] = b"abcd\nefgh\nij";
526 |     let split = LineSplit {
527 |         data: input.to_vec(),
528 |         line_lengths: vec![5, 5, 2],
529 |     };
530 |     check_split(input, &split)
531 | }
532 | 
533 | #[test]
534 | fn split_lines_append_test() {
535 |     let input: &[u8] = b"abcd\nefgh\nij";
536 |     let mut split = LineSplit::default();
537 |     split.append_line(&input[..3]);
538 |     split.append_line(&input[3..6]);
539 |     split.append_line(&input[6..]);
540 |     check_split(input, &split)
541 | }
542 | 
543 | fn check_split(input: &[u8], split: &LineSplit) {
544 |     assert_eq!(
545 |         input,
546 |         &*split.iter().fold(vec![], |mut acc, (lo, hi)| {
547 |             acc.extend_from_slice(&input[lo..hi]);
548 |             acc
549 |         })
550 |     );
551 | }
552 | 
553 | #[test]
554 | fn issue15() {
555 |     diff_sequences_test_tokenized(
556 |         &[
557 |             (b"+      ", Added),
558 |             (b"-", Keep),
559 |             (b"    -", Removed),
560 |             (b"01234;\r\n", Keep),
561 |             (b"+      ", Added),
562 |             (b"-", Keep),
563 |             (b"    ", Removed),
564 |             (b"-", Keep),
565 |             (b"-", Removed),
566 |             (b"abc;\r\n", Keep),
567 |             (b"-    ", Removed),
568 |             (b"+      ", Added),
569 |             (b"--", Keep),
570 |             (b"def;\r\n", Keep),
571 |             (b"-    ", Removed),
572 |             (b"+      ", Added),
573 |             (b"--jkl;\r\n", Keep),
574 |             (b"+      ", Added),
575 |             (b"-", Keep),
576 |             (b"    ", Removed),
577 |             (b"-", Keep),
578 |             (b"-", Removed),
579 |             (b"poi;\r\n", Keep),
580 |         ],
581 |         b"-    -01234;\r\n-    --abc;\r\n-    --def;\r\n-    --jkl;\r\n-    --poi;\r\n",
582 |         b"+      -01234;\r\n+      --abc;\r\n+      --def;\r\n+      --jkl;\r\n+      --poi;\r\n",
583 |     )
584 | }
585 | 
586 | #[test]
587 | fn issue15_2() {
588 |     diff_sequences_test_tokenized(
589 |         &[
590 |             (b"-", Removed),
591 |             (b"+", Added),
592 |             (b"        --include \'+ */\'", Keep),
593 |             (b" ", Added),
594 |             (b"\r\n", Keep),
595 |         ],
596 |         b"-        --include '+ */'\r\n",
597 |         b"+        --include '+ */' \r\n",
598 |     )
599 | }
600 | 
601 | #[test]
602 | fn issue27() {
603 |     diff_sequences_test(
604 |         &[
605 |             (b"note: ", Keep),
606 |             (b"AAA", Removed),
607 |             (b"BBB CCC", Added),
608 |             (b"\r\n", Keep),
609 |         ],
610 |         b"note: AAA\r\n",
611 |         b"note: BBB CCC\r\n",
612 |     );
613 |     diff_sequences_test(
614 |         &[(b"^", Added), (b"^^^^^^^^^^", Keep), (b"^^^^", Added)],
615 |         b"^^^^^^^^^^",
616 |         b"^^^^^^^^^^^^^^^",
617 |     );
618 |     diff_sequences_test(
619 |         &[
620 |             (b"a", Keep),
621 |             (b"cbc", Added),
622 |             (b"bcz", Keep),
623 |             (b"c", Added),
624 |             (b"z", Keep),
625 |             (b"abz", Added),
626 |         ],
627 |         b"abczz",
628 |         b"acbcbczczabz",
629 |     );
630 | }
631 | 
632 | #[derive(Debug)]
633 | struct TestNormalizePartitionExpected<'a> {
634 |     expected: &'a [&'a [u8]],
635 |     expected_starts_with_shared: bool,
636 | }
637 | 
638 | fn test_optimize_alternatives(
639 |     alternatives: &[TestNormalizePartitionExpected],
640 |     seq: &[u8],
641 |     lcs: &[u8],
642 | ) {
643 |     let toks_seq = dummy_tokenize(seq);
644 |     let toks_lcs = dummy_tokenize(lcs);
645 |     let m = TokenMap::new(&mut [(toks_seq.iter(), &seq), (toks_lcs.iter(), &lcs)]);
646 |     let seq = Tokenization::new(seq, &toks_seq, &m);
647 |     let lcs = Tokenization::new(lcs, &toks_lcs, &m);
648 |     let opt_result = optimize_partition(&seq, &lcs);
649 |     let seq = TokenizationRange::new(&seq);
650 |     let mut it = opt_result.path.iter().cloned();
651 |     let mut prev = match it.next() {
652 |         None => {
653 |             assert!(alternatives.iter().any(|e| e.expected.is_empty()));
654 |             return;
655 |         }
656 |         Some(val) => val,
657 |     };
658 |     let mut partition = vec![];
659 |     for i in it {
660 |         let mut part = vec![];
661 |         for j in prev..i {
662 |             part.extend_from_slice(nth_token(&seq, j));
663 |         }
664 |         partition.push(part);
665 |         prev = i;
666 |     }
667 |     assert!(
668 |         alternatives.iter().any(|e| {
669 |             let expected = e
670 |                 .expected
671 |                 .iter()
672 |                 .map(|slice| slice.to_vec())
673 |                 .collect::<Vec<_>>();
674 |             expected == &*partition
675 |                 && e.expected_starts_with_shared == opt_result.starts_with_shared
676 |         }),
677 |         "alternatives:\n\t{:?}\n\nactual:\n\t{:?}",
678 |         &alternatives,
679 |         (&partition, opt_result.starts_with_shared),
680 |     )
681 | }
682 | 
683 | fn test_optimize_partition1(
684 |     expected: &[&[u8]],
685 |     expected_starts_with_shared: bool,
686 |     seq: &[u8],
687 |     lcs: &[u8],
688 | ) {
689 |     let expected = vec![TestNormalizePartitionExpected {
690 |         expected,
691 |         expected_starts_with_shared,
692 |     }];
693 |     test_optimize_alternatives(&expected, seq, lcs)
694 | }
695 | 
696 | #[test]
697 | fn test_optimize_partition() {
698 |     test_optimize_partition1(&[b"abcd"], true, b"abcd", b"abcd");
699 |     test_optimize_partition1(&[b"abcd"], false, b"abcd", b"");
700 |     test_optimize_partition1(&[b"a", b"xyz", b"bc"], true, b"axyzbc", b"abc");
701 |     test_optimize_partition1(&[b"zab", b"a"], false, b"zaba", b"a");
702 |     test_optimize_partition1(&[b"k", b"a", b"xyz", b"bc"], false, b"kaxyzbc", b"abc");
703 |     test_optimize_partition1(
704 |         &[b"k", b"a", b"xyz", b"bc", b"x"],
705 |         false,
706 |         b"kaxyzbcx",
707 |         b"abc",
708 |     );
709 |     test_optimize_partition1(
710 |         &[b"a", b"cbc", b"bcz", b"czab", b"z"],
711 |         true,
712 |         b"acbcbczczabz",
713 |         b"abczz",
714 |     );
715 |     test_optimize_alternatives(
716 |         &[
717 |             TestNormalizePartitionExpected {
718 |                 expected: &[b"^^^^^^^^^^", b"^^^^^"],
719 |                 expected_starts_with_shared: true,
720 |             },
721 |             TestNormalizePartitionExpected {
722 |                 expected: &[b"^^^^^", b"^^^^^^^^^^"],
723 |                 expected_starts_with_shared: false,
724 |             },
725 |         ],
726 |         b"^^^^^^^^^^^^^^^",
727 |         b"^^^^^^^^^^",
728 |     );
729 | 
730 |     test_optimize_partition1(
731 |         &[b"note: ", b"AAA", b"\r\n"],
732 |         true,
733 |         b"note: AAA\r\n",
734 |         b"note: \r\n",
735 |     );
736 | 
737 |     test_optimize_partition1(
738 |         &[b"note: ", b"BBB CCC", b"\r\n"],
739 |         true,
740 |         b"note: BBB CCC\r\n",
741 |         b"note: \r\n",
742 |     );
743 | }
744 | 


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | use std::fmt::{Debug, Display, Error as FmtErr, Formatter};
  2 | use std::io::{self, BufRead, Write};
  3 | use std::iter::Peekable;
  4 | use std::time::SystemTime;
  5 | use termcolor::{
  6 |     Color::{self, Green, Red, Rgb},
  7 |     ColorChoice, ColorSpec, StandardStream, WriteColor,
  8 | };
  9 | 
 10 | use diffr_lib::*;
 11 | 
 12 | mod cli_args;
 13 | mod diffr_lib;
 14 | 
 15 | #[derive(Debug, Clone, Copy)]
 16 | pub enum LineNumberStyle {
 17 |     Compact,
 18 |     Aligned,
 19 |     Fixed(usize),
 20 | }
 21 | 
 22 | impl LineNumberStyle {
 23 |     fn min_width(&self) -> usize {
 24 |         match *self {
 25 |             LineNumberStyle::Compact | LineNumberStyle::Aligned => 0,
 26 |             LineNumberStyle::Fixed(w) => w,
 27 |         }
 28 |     }
 29 | }
 30 | 
 31 | #[derive(Debug)]
 32 | pub struct AppConfig {
 33 |     debug: bool,
 34 |     line_numbers_style: Option<LineNumberStyle>,
 35 |     added_face: ColorSpec,
 36 |     refine_added_face: ColorSpec,
 37 |     removed_face: ColorSpec,
 38 |     refine_removed_face: ColorSpec,
 39 |     large_diff_threshold: usize,
 40 | }
 41 | 
 42 | impl Default for AppConfig {
 43 |     fn default() -> Self {
 44 |         // The ANSI white is actually gray on many implementations. The actual white
 45 |         // that seem to work on all implementations is "bright white". `termcolor`
 46 |         // crate has no enum member for it, so we create it with Rgb.
 47 |         let bright_white = Rgb(255, 255, 255);
 48 |         AppConfig {
 49 |             debug: false,
 50 |             line_numbers_style: None,
 51 |             added_face: color_spec(Some(Green), None, false),
 52 |             refine_added_face: color_spec(Some(bright_white), Some(Green), true),
 53 |             removed_face: color_spec(Some(Red), None, false),
 54 |             refine_removed_face: color_spec(Some(bright_white), Some(Red), true),
 55 |             large_diff_threshold: 1000,
 56 |         }
 57 |     }
 58 | }
 59 | 
 60 | impl AppConfig {
 61 |     fn has_line_numbers(&self) -> bool {
 62 |         self.line_numbers_style.is_some()
 63 |     }
 64 | 
 65 |     fn line_numbers_aligned(&self) -> bool {
 66 |         if let Some(LineNumberStyle::Aligned) = self.line_numbers_style {
 67 |             return true;
 68 |         }
 69 |         false
 70 |     }
 71 | }
 72 | 
 73 | fn main() {
 74 |     let config = cli_args::parse_config();
 75 |     let mut hunk_buffer = HunkBuffer::new(config);
 76 |     match hunk_buffer.run() {
 77 |         Ok(()) => (),
 78 |         Err(ref err) if err.kind() == io::ErrorKind::BrokenPipe => (),
 79 |         Err(ref err) => {
 80 |             eprintln!("io error: {}", err);
 81 |             std::process::exit(-1)
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | fn now(do_timings: bool) -> Option<SystemTime> {
 87 |     if do_timings {
 88 |         Some(SystemTime::now())
 89 |     } else {
 90 |         None
 91 |     }
 92 | }
 93 | 
 94 | fn duration_ms_since(time: &Option<SystemTime>) -> u128 {
 95 |     if let Some(time) = time {
 96 |         if let Ok(elapsed) = time.elapsed() {
 97 |             elapsed.as_millis()
 98 |         } else {
 99 |             // some non monotonically increasing clock
100 |             // this is a short period of time anyway,
101 |             // let us map it to 0
102 |             0
103 |         }
104 |     } else {
105 |         0
106 |     }
107 | }
108 | 
109 | fn color_spec(fg: Option<Color>, bg: Option<Color>, bold: bool) -> ColorSpec {
110 |     let mut colorspec: ColorSpec = ColorSpec::default();
111 |     colorspec.set_fg(fg);
112 |     colorspec.set_bg(bg);
113 |     colorspec.set_bold(bold);
114 |     colorspec
115 | }
116 | 
117 | #[derive(Default)]
118 | struct ExecStats {
119 |     time_computing_diff_ms: u128,
120 |     time_lcs_ms: u128,
121 |     time_opt_lcs_ms: u128,
122 |     total_time_ms: u128,
123 |     program_start: Option<SystemTime>,
124 | }
125 | 
126 | impl ExecStats {
127 |     fn new(debug: bool) -> Self {
128 |         ExecStats {
129 |             time_computing_diff_ms: 0,
130 |             time_lcs_ms: 0,
131 |             time_opt_lcs_ms: 0,
132 |             total_time_ms: 0,
133 |             program_start: now(debug),
134 |         }
135 |     }
136 | 
137 |     /// Should we call SystemTime::now at all?
138 |     fn do_timings(&self) -> bool {
139 |         self.program_start.is_some()
140 |     }
141 | 
142 |     fn stop(&mut self) {
143 |         if self.do_timings() {
144 |             self.total_time_ms = duration_ms_since(&self.program_start);
145 |         }
146 |     }
147 | 
148 |     fn report(&self) -> std::io::Result<()> {
149 |         self.report_into(&mut std::io::stderr())
150 |     }
151 | 
152 |     fn report_into<W>(&self, w: &mut W) -> std::io::Result<()>
153 |     where
154 |         W: std::io::Write,
155 |     {
156 |         const WORD_PADDING: usize = 35;
157 |         const FIELD_PADDING: usize = 15;
158 |         if self.do_timings() {
159 |             let format_header = |name| format!("{} (ms)", name);
160 |             let format_ratio = |dt: u128| {
161 |                 format!(
162 |                     "({:3.3}%)",
163 |                     100.0 * (dt as f64) / (self.total_time_ms as f64)
164 |                 )
165 |             };
166 |             let mut report = |name: &'static str, dt: u128| {
167 |                 writeln!(
168 |                     w,
169 |                     "{:>w$} {:>f$} {:>f$}",
170 |                     format_header(name),
171 |                     dt,
172 |                     format_ratio(dt),
173 |                     w = WORD_PADDING,
174 |                     f = FIELD_PADDING,
175 |                 )
176 |             };
177 |             report("hunk processing time", self.time_computing_diff_ms)?;
178 |             report("-- compute lcs", self.time_lcs_ms)?;
179 |             report("-- optimize lcs", self.time_opt_lcs_ms)?;
180 |             writeln!(
181 |                 w,
182 |                 "{:>w$} {:>f$}",
183 |                 format_header("total processing time"),
184 |                 self.total_time_ms,
185 |                 w = WORD_PADDING,
186 |                 f = FIELD_PADDING,
187 |             )?;
188 |         }
189 |         Ok(())
190 |     }
191 | }
192 | 
193 | struct HunkBuffer {
194 |     v: Vec<isize>,
195 |     diff_buffer: Vec<Snake>,
196 |     added_tokens: Vec<(usize, usize)>,
197 |     removed_tokens: Vec<(usize, usize)>,
198 |     line_number_info: Option<HunkHeader>,
199 |     lines: LineSplit,
200 |     config: AppConfig,
201 |     margin: Vec<u8>,
202 |     warning_lines: Vec<usize>,
203 |     stats: ExecStats,
204 | }
205 | 
206 | #[derive(Default)]
207 | struct Margin<'a> {
208 |     lino_minus: usize,
209 |     lino_plus: usize,
210 |     margin: &'a mut [u8],
211 |     half_margin: usize,
212 | }
213 | 
214 | const MARGIN_TAB_STOP: usize = 8;
215 | 
216 | impl<'a> Margin<'a> {
217 |     fn new(header: &'a HunkHeader, margin: &'a mut [u8], config: &'a AppConfig) -> Self {
218 |         let full_margin = header.width(config.line_numbers_style);
219 |         let half_margin = full_margin / 2;
220 | 
221 |         // If line number is 0, the column is empty and
222 |         // shouldn't be printed
223 |         let margin_size = if header.minus_range.0 == 0 || header.plus_range.0 == 0 {
224 |             half_margin
225 |         } else {
226 |             full_margin
227 |         };
228 |         assert!(margin.len() >= margin_size);
229 |         Margin {
230 |             lino_plus: header.plus_range.0,
231 |             lino_minus: header.minus_range.0,
232 |             margin: &mut margin[..margin_size],
233 |             half_margin,
234 |         }
235 |     }
236 | 
237 |     fn write_margin_padding(&mut self, out: &mut impl WriteColor) -> io::Result<()> {
238 |         if self.margin.len() % MARGIN_TAB_STOP != 0 {
239 |             write!(out, "\t")?;
240 |         }
241 |         Ok(())
242 |     }
243 | 
244 |     fn write_margin_changed(
245 |         &mut self,
246 |         is_plus: bool,
247 |         config: &AppConfig,
248 |         out: &mut impl WriteColor,
249 |     ) -> io::Result<()> {
250 |         let mut margin_buf = &mut self.margin[..];
251 |         let color;
252 |         if is_plus {
253 |             color = &config.added_face;
254 |             if self.lino_minus != 0 {
255 |                 write!(margin_buf, "{:w$} ", ' ', w = self.half_margin)?;
256 |             }
257 |             write!(margin_buf, "{:w$}", self.lino_plus, w = self.half_margin)?;
258 |             self.lino_plus += 1;
259 |         } else {
260 |             color = &config.removed_face;
261 |             write!(margin_buf, "{:w$}", self.lino_minus, w = self.half_margin)?;
262 |             if self.lino_plus != 0 {
263 |                 write!(margin_buf, " {:w$}", ' ', w = self.half_margin)?;
264 |             }
265 |             self.lino_minus += 1;
266 |         };
267 |         output(self.margin, 0, self.margin.len(), color, out)?;
268 |         if config.line_numbers_aligned() {
269 |             self.write_margin_padding(out)?;
270 |         }
271 |         Ok(())
272 |     }
273 | 
274 |     fn write_margin_context(
275 |         &mut self,
276 |         config: &AppConfig,
277 |         out: &mut impl WriteColor,
278 |     ) -> io::Result<()> {
279 |         if self.lino_minus != self.lino_plus {
280 |             write!(out, "{:w$}", self.lino_minus, w = self.half_margin)?;
281 |         } else {
282 |             write!(out, "{:w$}", ' ', w = self.half_margin)?;
283 |         }
284 |         write!(out, " {:w$}", self.lino_plus, w = self.half_margin)?;
285 |         if config.line_numbers_aligned() {
286 |             self.write_margin_padding(out)?;
287 |         }
288 |         self.lino_minus += 1;
289 |         self.lino_plus += 1;
290 |         Ok(())
291 |     }
292 | }
293 | 
294 | fn shared_spans(added_tokens: &Tokenization, diff_buffer: &[Snake]) -> Vec<(usize, usize)> {
295 |     let mut shared_spans = vec![];
296 |     for snake in diff_buffer.iter() {
297 |         for i in 0..snake.len {
298 |             shared_spans.push(added_tokens.nth_span(snake.y0 + i));
299 |         }
300 |     }
301 |     shared_spans
302 | }
303 | 
304 | const MAX_MARGIN: usize = 41;
305 | 
306 | impl HunkBuffer {
307 |     fn new(config: AppConfig) -> Self {
308 |         let debug = config.debug;
309 |         HunkBuffer {
310 |             v: vec![],
311 |             diff_buffer: vec![],
312 |             added_tokens: vec![],
313 |             removed_tokens: vec![],
314 |             line_number_info: None,
315 |             lines: Default::default(),
316 |             config,
317 |             margin: vec![0; MAX_MARGIN],
318 |             warning_lines: vec![],
319 |             stats: ExecStats::new(debug),
320 |         }
321 |     }
322 | 
323 |     // Returns the number of completely printed snakes
324 |     fn paint_line<Stream, Positions>(
325 |         data: &[u8],
326 |         &(data_lo, data_hi): &(usize, usize),
327 |         no_highlight: &ColorSpec,
328 |         highlight: &ColorSpec,
329 |         shared: &mut Peekable<Positions>,
330 |         out: &mut Stream,
331 |     ) -> io::Result<()>
332 |     where
333 |         Stream: WriteColor,
334 |         Positions: Iterator<Item = (usize, usize)>,
335 |     {
336 |         let mut y = data_lo + 1;
337 |         // XXX: skip leading token and leading spaces
338 |         while y < data_hi && data[y].is_ascii_whitespace() {
339 |             y += 1
340 |         }
341 |         let mut pending = (data_lo, y, false);
342 |         let mut trailing_ws = ColorSpec::new();
343 |         trailing_ws.set_bg(Some(Color::Red));
344 |         let color = |h| if h { &highlight } else { &no_highlight };
345 |         let mut output1 = |lo, hi, highlighted| -> std::io::Result<()> {
346 |             if lo == hi {
347 |                 return Ok(());
348 |             }
349 |             let (lo1, hi1, highlighted1) = pending;
350 |             let color = if &data[lo..hi] == b"\n"
351 |                 && data[lo1..hi1].iter().all(|b| b.is_ascii_whitespace())
352 |             {
353 |                 &trailing_ws
354 |             } else {
355 |                 color(highlighted1)
356 |             };
357 |             output(data, lo1, hi1, color, out)?;
358 |             pending = (lo, hi, highlighted);
359 |             Ok(())
360 |         };
361 |         // special case: all whitespaces
362 |         if y == data_hi {
363 |             output(data, data_lo, data_lo + 1, no_highlight, out)?;
364 |             output(data, data_lo + 1, data_hi, &trailing_ws, out)?;
365 |             return Ok(());
366 |         }
367 | 
368 |         while let Some((lo, hi)) = shared.peek() {
369 |             if data_hi <= y {
370 |                 break;
371 |             }
372 |             let last_iter = data_hi <= *hi;
373 |             let lo = (*lo).min(data_hi).max(y);
374 |             let hi = (*hi).min(data_hi);
375 |             if hi <= data_lo {
376 |                 shared.next();
377 |                 continue;
378 |             }
379 |             if hi < lo {
380 |                 continue;
381 |             }
382 |             output1(y, lo, true)?;
383 |             output1(lo, hi, false)?;
384 |             y = hi;
385 |             if last_iter {
386 |                 break;
387 |             } else {
388 |                 shared.next();
389 |             }
390 |         }
391 |         output1(y, data_hi, true)?;
392 |         let (lo1, hi1, highlighted1) = pending;
393 |         output(data, lo1, hi1, color(highlighted1), out)?;
394 |         Ok(())
395 |     }
396 | 
397 |     fn process_with_stats<Stream>(&mut self, out: &mut Stream) -> io::Result<()>
398 |     where
399 |         Stream: WriteColor,
400 |     {
401 |         let start = now(self.stats.do_timings());
402 |         let result = self.process(out);
403 |         self.stats.time_computing_diff_ms += duration_ms_since(&start);
404 |         result
405 |     }
406 | 
407 |     fn process<Stream>(&mut self, out: &mut Stream) -> io::Result<()>
408 |     where
409 |         Stream: WriteColor,
410 |     {
411 |         let Self {
412 |             v,
413 |             diff_buffer,
414 |             added_tokens,
415 |             removed_tokens,
416 |             line_number_info,
417 |             lines,
418 |             config,
419 |             margin,
420 |             warning_lines,
421 |             stats,
422 |         } = self;
423 |         let mut margin = match line_number_info {
424 |             Some(lni) => Margin::new(lni, margin, config),
425 |             None => Default::default(),
426 |         };
427 |         let data = lines.data();
428 |         let m = TokenMap::new(&mut [(removed_tokens.iter(), data), (added_tokens.iter(), data)]);
429 |         let removed = Tokenization::new(data, removed_tokens, &m);
430 |         let added = Tokenization::new(data, added_tokens, &m);
431 |         let tokens = DiffInput::new(&added, &removed, config.large_diff_threshold);
432 |         let start = now(stats.do_timings());
433 |         diffr_lib::diff(&tokens, v, diff_buffer);
434 |         // TODO output the lcs directly out of `diff` instead
435 |         let shared_spans = shared_spans(&added, diff_buffer);
436 |         let lcs = Tokenization::new(data, &shared_spans, &m);
437 |         stats.time_lcs_ms += duration_ms_since(&start);
438 |         let start = now(stats.do_timings());
439 |         let normalized_lcs_added = optimize_partition(&added, &lcs);
440 |         let normalized_lcs_removed = optimize_partition(&removed, &lcs);
441 |         stats.time_opt_lcs_ms += duration_ms_since(&start);
442 |         let mut shared_added = normalized_lcs_added.shared_segments(&added).peekable();
443 |         let mut shared_removed = normalized_lcs_removed.shared_segments(&removed).peekable();
444 |         let mut warnings = warning_lines.iter().peekable();
445 |         let defaultspec = ColorSpec::default();
446 | 
447 |         for (i, range) in lines.iter().enumerate() {
448 |             if let Some(&&nline) = warnings.peek() {
449 |                 if nline == i {
450 |                     let w = &lines.data()[range.0..range.1];
451 |                     output(w, 0, w.len(), &defaultspec, out)?;
452 |                     warnings.next();
453 |                     continue;
454 |                 }
455 |             }
456 |             let first = data[range.0];
457 |             match first {
458 |                 b'-' | b'+' => {
459 |                     let is_plus = first == b'+';
460 |                     let (nhl, hl, toks, shared) = if is_plus {
461 |                         (
462 |                             &config.added_face,
463 |                             &config.refine_added_face,
464 |                             tokens.added(),
465 |                             &mut shared_added,
466 |                         )
467 |                     } else {
468 |                         (
469 |                             &config.removed_face,
470 |                             &config.refine_removed_face,
471 |                             tokens.removed(),
472 |                             &mut shared_removed,
473 |                         )
474 |                     };
475 |                     if config.has_line_numbers() {
476 |                         margin.write_margin_changed(is_plus, config, out)?
477 |                     }
478 |                     Self::paint_line(toks.data(), &range, nhl, hl, shared, out)?;
479 |                 }
480 |                 _ => {
481 |                     if config.has_line_numbers() {
482 |                         margin.write_margin_context(config, out)?
483 |                     }
484 |                     output(data, range.0, range.1, &defaultspec, out)?
485 |                 }
486 |             }
487 |         }
488 |         assert!(warnings.peek().is_none());
489 |         drop(shared_removed);
490 |         drop(shared_added);
491 |         lines.clear();
492 |         added_tokens.clear();
493 |         removed_tokens.clear();
494 |         warning_lines.clear();
495 |         Ok(())
496 |     }
497 | 
498 |     fn push_added(&mut self, line: &[u8]) {
499 |         self.push_aux(line, true)
500 |     }
501 | 
502 |     fn push_removed(&mut self, line: &[u8]) {
503 |         self.push_aux(line, false)
504 |     }
505 | 
506 |     fn push_aux(&mut self, line: &[u8], added: bool) {
507 |         // XXX: skip leading token
508 |         let mut ofs = self.lines.len() + 1;
509 |         add_raw_line(&mut self.lines, line);
510 |         // get back the line sanitized from escape codes:
511 |         let line = &self.lines.data()[ofs..];
512 |         // skip leading spaces
513 |         ofs += line
514 |             .iter()
515 |             .take_while(|ch| ch.is_ascii_whitespace())
516 |             .count();
517 |         diffr_lib::tokenize(
518 |             self.lines.data(),
519 |             ofs,
520 |             if added {
521 |                 &mut self.added_tokens
522 |             } else {
523 |                 &mut self.removed_tokens
524 |             },
525 |         );
526 |     }
527 | 
528 |     fn run(&mut self) -> io::Result<()> {
529 |         let stdin = io::stdin();
530 |         let stdout = StandardStream::stdout(ColorChoice::Always);
531 |         let mut buffer = vec![];
532 |         let mut stdin = stdin.lock();
533 |         let mut stdout = stdout.lock();
534 |         let mut in_hunk = false;
535 |         let mut hunk_line_number = 0;
536 | 
537 |         // process hunks
538 |         loop {
539 |             stdin.read_until(b'\n', &mut buffer)?;
540 |             if buffer.is_empty() {
541 |                 break;
542 |             }
543 | 
544 |             let first = first_after_escape(&buffer);
545 |             if in_hunk {
546 |                 hunk_line_number += 1;
547 |                 match first {
548 |                     Some(b'+') => self.push_added(&buffer),
549 |                     Some(b'-') => self.push_removed(&buffer),
550 |                     Some(b' ') => add_raw_line(&mut self.lines, &buffer),
551 |                     Some(b'\\') => {
552 |                         add_raw_line(&mut self.lines, &buffer);
553 |                         self.warning_lines.push(hunk_line_number - 1);
554 |                     }
555 |                     _ => {
556 |                         self.process_with_stats(&mut stdout)?;
557 |                         in_hunk = false;
558 |                     }
559 |                 }
560 |             }
561 |             if !in_hunk {
562 |                 hunk_line_number = 0;
563 |                 in_hunk = first == Some(b'@');
564 |                 if self.config.has_line_numbers() && in_hunk {
565 |                     self.line_number_info = parse_line_number(&buffer);
566 |                 }
567 |                 output(&buffer, 0, buffer.len(), &ColorSpec::default(), &mut stdout)?;
568 |             }
569 | 
570 |             buffer.clear();
571 |         }
572 | 
573 |         // flush remaining hunk
574 |         self.process_with_stats(&mut stdout)?;
575 |         self.stats.stop();
576 |         self.stats.report()?;
577 |         Ok(())
578 |     }
579 | }
580 | 
581 | // TODO count whitespace characters as well here
582 | fn add_raw_line(dst: &mut LineSplit, line: &[u8]) {
583 |     let mut i = 0;
584 |     let len = line.len();
585 |     while i < len {
586 |         i += skip_all_escape_code(&line[i..]);
587 |         let tok_len = skip_token(&line[i..]);
588 |         dst.append_line(&line[i..i + tok_len]);
589 |         i += tok_len;
590 |     }
591 | }
592 | 
593 | fn output<Stream>(
594 |     buf: &[u8],
595 |     from: usize,
596 |     to: usize,
597 |     colorspec: &ColorSpec,
598 |     out: &mut Stream,
599 | ) -> io::Result<()>
600 | where
601 |     Stream: WriteColor,
602 | {
603 |     let to = to.min(buf.len());
604 |     if from >= to {
605 |         return Ok(());
606 |     }
607 |     let buf = &buf[from..to];
608 |     let ends_with_newline = buf.last().cloned() == Some(b'\n');
609 |     let buf = if ends_with_newline {
610 |         &buf[..buf.len() - 1]
611 |     } else {
612 |         buf
613 |     };
614 |     out.set_color(colorspec)?;
615 |     out.write_all(buf)?;
616 |     out.reset()?;
617 |     if ends_with_newline {
618 |         out.write_all(b"\n")?;
619 |     }
620 |     Ok(())
621 | }
622 | 
623 | /// Returns the number of bytes of escape code that start the slice.
624 | fn skip_all_escape_code(buf: &[u8]) -> usize {
625 |     // Skip one sequence
626 |     fn skip_escape_code(buf: &[u8]) -> Option<usize> {
627 |         if 2 <= buf.len() && &buf[..2] == b"\x1b[" {
628 |             // "\x1b[" + sequence body + "m" => 3 additional bytes
629 |             Some(index_of(&buf[2..], b'm')? + 3)
630 |         } else {
631 |             None
632 |         }
633 |     }
634 |     let mut buf = buf;
635 |     let mut sum = 0;
636 |     while let Some(nbytes) = skip_escape_code(buf) {
637 |         buf = &buf[nbytes..];
638 |         sum += nbytes
639 |     }
640 |     sum
641 | }
642 | 
643 | /// Returns the first byte of the slice, after skipping the escape
644 | /// code bytes.
645 | fn first_after_escape(buf: &[u8]) -> Option<u8> {
646 |     let nbytes = skip_all_escape_code(buf);
647 |     buf.iter().skip(nbytes).cloned().next()
648 | }
649 | 
650 | /// Scan the slice looking for the given byte, returning the index of
651 | /// its first appearance.
652 | fn index_of(buf: &[u8], target: u8) -> Option<usize> {
653 |     let mut it = buf.iter().enumerate();
654 |     loop {
655 |         match it.next() {
656 |             Some((index, c)) => {
657 |                 if *c == target {
658 |                     return Some(index);
659 |                 }
660 |             }
661 |             None => return None,
662 |         }
663 |     }
664 | }
665 | 
666 | /// Computes the number of bytes until either the next escape code, or
667 | /// the end of buf.
668 | fn skip_token(buf: &[u8]) -> usize {
669 |     match buf.len() {
670 |         0 => 0,
671 |         len => {
672 |             for i in 0..buf.len() - 1 {
673 |                 if &buf[i..i + 2] == b"\x1b[" {
674 |                     return i;
675 |                 }
676 |             }
677 |             len
678 |         }
679 |     }
680 | }
681 | 
682 | // TODO: extend to the multiple range case
683 | #[derive(Default, PartialEq, Eq)]
684 | struct HunkHeader {
685 |     // range are (ofs,len) for the interval [ofs, ofs + len)
686 |     minus_range: (usize, usize),
687 |     plus_range: (usize, usize),
688 | }
689 | 
690 | const WIDTH: [u64; 20] = [
691 |     0,
692 |     9,
693 |     99,
694 |     999,
695 |     9999,
696 |     99999,
697 |     999999,
698 |     9999999,
699 |     99999999,
700 |     999999999,
701 |     9999999999,
702 |     99999999999,
703 |     999999999999,
704 |     9999999999999,
705 |     99999999999999,
706 |     999999999999999,
707 |     9999999999999999,
708 |     99999999999999999,
709 |     999999999999999999,
710 |     9999999999999999999,
711 | ];
712 | 
713 | fn width1(x: u64, st: Option<LineNumberStyle>) -> usize {
714 |     let result = WIDTH.binary_search(&x);
715 |     let result = match result {
716 |         Ok(i) | Err(i) => i,
717 |     };
718 |     st.map(|style| style.min_width()).unwrap_or(0).max(result)
719 | }
720 | 
721 | impl HunkHeader {
722 |     fn new(minus_range: (usize, usize), plus_range: (usize, usize)) -> Self {
723 |         HunkHeader {
724 |             minus_range,
725 |             plus_range,
726 |         }
727 |     }
728 | 
729 |     fn width(&self, st: Option<LineNumberStyle>) -> usize {
730 |         let w1 = width1((self.minus_range.0 + self.minus_range.1) as u64, st);
731 |         let w2 = width1((self.plus_range.0 + self.plus_range.1) as u64, st);
732 |         2 * w1.max(w2) + 1
733 |     }
734 | }
735 | 
736 | impl Debug for HunkHeader {
737 |     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> {
738 |         f.write_fmt(format_args!(
739 |             "-{},{} +{},{}",
740 |             self.minus_range.0, self.minus_range.1, self.plus_range.0, self.plus_range.1,
741 |         ))
742 |     }
743 | }
744 | 
745 | impl Display for HunkHeader {
746 |     fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtErr> {
747 |         Debug::fmt(&self, f)
748 |     }
749 | }
750 | 
751 | struct LineNumberParser<'a> {
752 |     buf: &'a [u8],
753 |     i: usize,
754 | }
755 | 
756 | impl<'a> LineNumberParser<'a> {
757 |     fn new(buf: &'a [u8]) -> Self {
758 |         LineNumberParser { buf, i: 0 }
759 |     }
760 | 
761 |     fn skip_escape_code(&mut self) {
762 |         if self.i < self.buf.len() {
763 |             let to_skip = skip_all_escape_code(&self.buf[self.i..]);
764 |             self.i += to_skip;
765 |         }
766 |     }
767 | 
768 |     fn looking_at<M>(&mut self, matcher: M) -> bool
769 |     where
770 |         M: Fn(u8) -> bool,
771 |     {
772 |         self.skip_escape_code();
773 |         self.i < self.buf.len() && matcher(self.buf[self.i])
774 |     }
775 | 
776 |     fn read_digit(&mut self) -> Option<usize> {
777 |         if self.looking_at(|x| x.is_ascii_digit()) {
778 |             let cur = self.buf[self.i];
779 |             self.i += 1;
780 |             Some((cur - b'0') as usize)
781 |         } else {
782 |             None
783 |         }
784 |     }
785 | 
786 |     fn skip_whitespaces(&mut self) {
787 |         while self.looking_at(|x| x.is_ascii_whitespace()) {
788 |             self.i += 1;
789 |         }
790 |     }
791 | 
792 |     fn expect_multiple<M>(&mut self, matcher: M) -> Option<usize>
793 |     where
794 |         M: Fn(u8) -> bool,
795 |     {
796 |         self.skip_escape_code();
797 |         let iorig = self.i;
798 |         while self.looking_at(&matcher) {
799 |             self.i += 1;
800 |         }
801 |         if self.i == iorig {
802 |             None
803 |         } else {
804 |             Some(self.i - iorig)
805 |         }
806 |     }
807 | 
808 |     fn expect(&mut self, target: u8) -> Option<()> {
809 |         if self.looking_at(|x| x == target) {
810 |             self.i += 1;
811 |             Some(())
812 |         } else {
813 |             None
814 |         }
815 |     }
816 | 
817 |     fn parse_usize(&mut self) -> Option<usize> {
818 |         let mut res = 0usize;
819 |         let mut any = false;
820 |         while let Some(digit) = self.read_digit() {
821 |             any = true;
822 |             res = res.checked_mul(10)?;
823 |             res = res.checked_add(digit)?;
824 |         }
825 |         if any {
826 |             Some(res)
827 |         } else {
828 |             None
829 |         }
830 |     }
831 | 
832 |     fn parse_pair(&mut self) -> Option<(usize, usize)> {
833 |         let p0 = self.parse_usize()?;
834 |         if self.expect(b',').is_none() {
835 |             return Some((p0, 1));
836 |         }
837 |         let p1 = self.parse_usize()?;
838 |         Some((p0, p1))
839 |     }
840 | 
841 |     fn expect_multiple_minus_ranges(&mut self) -> Option<(usize, usize)> {
842 |         let next = |that: &mut Self| {
843 |             that.expect(b'-')?;
844 |             that.parse_pair()
845 |         };
846 |         let mut res = None;
847 |         for i in 0.. {
848 |             if i != 0 {
849 |                 self.expect_multiple(|x| x.is_ascii_whitespace())?;
850 |             }
851 |             match next(self) {
852 |                 next @ Some(_) => res = next,
853 |                 None => break,
854 |             }
855 |         }
856 |         res
857 |     }
858 | 
859 |     fn parse_line_number(&mut self) -> Option<HunkHeader> {
860 |         self.skip_whitespaces();
861 |         self.expect_multiple(|x| x == b'@')?;
862 |         self.expect_multiple(|x| x.is_ascii_whitespace())?;
863 |         let minus_range = self.expect_multiple_minus_ranges()?;
864 |         self.expect(b'+')?;
865 |         let plus_range = self.parse_pair()?;
866 |         self.expect_multiple(|x| x.is_ascii_whitespace())?;
867 |         self.expect_multiple(|x| x == b'@')?;
868 |         Some(HunkHeader::new(minus_range, plus_range))
869 |     }
870 | }
871 | 
872 | fn parse_line_number(buf: &[u8]) -> Option<HunkHeader> {
873 |     LineNumberParser::new(buf).parse_line_number()
874 | }
875 | 
876 | #[cfg(test)]
877 | mod tests_app;
878 | 
879 | #[cfg(test)]
880 | mod tests_cli;
881 | 


--------------------------------------------------------------------------------
/src/tests_app.rs:
--------------------------------------------------------------------------------
  1 | use super::*;
  2 | 
  3 | #[test]
  4 | fn skip_all_escape_code_test() {
  5 |     assert_eq!(5, skip_all_escape_code(b"\x1b[42m@@@"));
  6 |     assert_eq!(10, skip_all_escape_code(b"\x1b[42m\x1b[33m@@@"));
  7 |     assert_eq!(0, skip_all_escape_code(b"\x1b[42@@@"));
  8 | }
  9 | 
 10 | #[test]
 11 | fn first_after_escape_test() {
 12 |     assert_eq!(Some(b'+'), first_after_escape(b"+abc"));
 13 |     assert_eq!(Some(b'+'), first_after_escape(b"\x1b[42m\x1b[33m+abc"));
 14 |     assert_eq!(None, first_after_escape(b"\x1b[42m"));
 15 | }
 16 | 
 17 | // TODO test index_of?
 18 | 
 19 | #[test]
 20 | fn skip_token_test() {
 21 |     assert_eq!(4, skip_token(b"abc\x1b"));
 22 |     assert_eq!(3, skip_token(b"abc\x1b["));
 23 |     assert_eq!(3, skip_token(b"abc"));
 24 |     assert_eq!(1, skip_token(b"\x1b"));
 25 |     assert_eq!(0, skip_token(b""));
 26 | }
 27 | 
 28 | #[test]
 29 | fn parse_line_number_test() {
 30 |     let test_ok = |ofs1, len1, ofs2, len2, input| {
 31 |         eprintln!("test_ok {}...", String::from_utf8_lossy(input));
 32 |         assert_eq!(
 33 |             Some(HunkHeader {
 34 |                 minus_range: (ofs1, len1),
 35 |                 plus_range: (ofs2, len2),
 36 |             }),
 37 |             parse_line_number(input)
 38 |         );
 39 |     };
 40 |     let test_fail = |input| {
 41 |         eprintln!("test_fail {}...", String::from_utf8_lossy(input));
 42 |         assert_eq!(None, parse_line_number(input));
 43 |     };
 44 |     test_ok(133, 6, 133, 8, b"@@ -133,6 +133,8 @@");
 45 |     test_ok(0, 0, 1, 1, b"@@ -0,0 +1 @@");
 46 |     test_ok(0, 0, 1, 1, b"  @@ -0,0 +1 @@");
 47 |     test_ok(0, 0, 1, 1, b"@@   -0,0 +1 @@");
 48 |     // last one wins
 49 |     test_ok(0, 2, 0, 3, b"@@@ -0,0 -0,2 +0,3 @@@");
 50 |     test_fail(b"@@-0,0 +1 @@");
 51 |     test_fail(b"@@ -0,0+1 @@");
 52 |     test_fail(b"@@ -0,0 +1@@");
 53 |     test_fail(b"@@ -0,0 +1 ");
 54 |     test_fail(b"-0,0 +1");
 55 |     test_fail(b"@@ 0,0 +1 @@");
 56 |     test_fail(b"@@ -0,0 1 @@");
 57 |     test_fail(b"@@@ -0,0 +0,2 +0,3 @@@");
 58 | 
 59 |     // overflow
 60 |     test_fail(b"@@ -0,0 +19999999999999999999 @@");
 61 | 
 62 |     // with escape code
 63 |     test_ok(0, 0, 1, 1, b"\x1b[42;43m@\x1b[42;43m@\x1b[42;43m \x1b[42;43m-\x1b[42;43m0\x1b[42;43m,\x1b[42;43m0\x1b[42;43m \x1b[42;43m+1 @@");
 64 | }
 65 | 
 66 | #[test]
 67 | fn test_width() {
 68 |     for (i, x) in WIDTH.iter().enumerate() {
 69 |         if x < &u64::max_value() {
 70 |             assert_eq!(format!("{}", x + 1).len(), i + 1);
 71 |         }
 72 |     }
 73 |     assert_eq!(0, width1(0, None));
 74 |     fn test(x: u64) {
 75 |         assert_eq!(format!("{}", x).len(), width1(x, None));
 76 |         for i in 0..5 {
 77 |             assert_eq!(
 78 |                 format!("{}", x).len().max(i),
 79 |                 width1(x, Some(LineNumberStyle::Fixed(i)))
 80 |             );
 81 |         }
 82 |     }
 83 |     for i in 1..=10000 {
 84 |         test(i);
 85 |     }
 86 |     test(9999999999);
 87 |     test(10000000000);
 88 |     test(14284238234);
 89 |     for i in 0..64 {
 90 |         test(1 << i);
 91 |     }
 92 |     test(u64::max_value());
 93 | 
 94 |     assert_eq!(
 95 |         "123:456".len(),
 96 |         HunkHeader::new((123, 5), (456, 9)).width(None)
 97 |     );
 98 |     assert_eq!(
 99 |         "1122: 456".len(),
100 |         HunkHeader::new((123, 999), (456, 9)).width(None)
101 |     );
102 |     assert_eq!(
103 |         "   :456".len(),
104 |         HunkHeader::new((0, 0), (456, 9)).width(None)
105 |     );
106 |     assert_eq!(MAX_MARGIN, 2 * width1(u64::max_value(), None) + 1);
107 | 
108 |     // with fixed width
109 |     assert_eq!(
110 |         " 123: 456".len(),
111 |         HunkHeader::new((123, 5), (456, 9)).width(Some(LineNumberStyle::Fixed(4)))
112 |     );
113 |     assert_eq!(
114 |         "1122: 456".len(),
115 |         HunkHeader::new((123, 999), (456, 9)).width(Some(LineNumberStyle::Fixed(4)))
116 |     );
117 |     assert_eq!(
118 |         "    : 456".len(),
119 |         HunkHeader::new((0, 0), (456, 9)).width(Some(LineNumberStyle::Fixed(4)))
120 |     );
121 |     assert_eq!(
122 |         MAX_MARGIN,
123 |         2 * width1(u64::max_value(), Some(LineNumberStyle::Fixed(4))) + 1
124 |     );
125 | }
126 | 


--------------------------------------------------------------------------------
/src/tests_cli.rs:
--------------------------------------------------------------------------------
  1 | use std::env;
  2 | use std::path::PathBuf;
  3 | use std::process::{Command, Stdio};
  4 | use StringTest::*;
  5 | 
  6 | enum StringTest {
  7 |     Empty,
  8 |     AtLeast(&'static str),
  9 |     Exactly(&'static str),
 10 | }
 11 | 
 12 | fn quote_or_empty(msg: &str) -> String {
 13 |     if msg.is_empty() {
 14 |         "<empty>".to_owned()
 15 |     } else {
 16 |         format!("\"{}\"", msg)
 17 |     }
 18 | }
 19 | 
 20 | impl StringTest {
 21 |     fn test(&self, actual: &str, prefix: &str) {
 22 |         match self {
 23 |             Empty => assert!(
 24 |                 actual.is_empty(),
 25 |                 "{}: expected empty, got\n\n{}",
 26 |                 quote_or_empty(prefix),
 27 |                 quote_or_empty(actual)
 28 |             ),
 29 |             AtLeast(exp) => assert!(
 30 |                 actual.contains(exp),
 31 |                 "{}: expected at least\n\n{}\n\ngot\n\n{}",
 32 |                 prefix,
 33 |                 quote_or_empty(exp),
 34 |                 quote_or_empty(actual)
 35 |             ),
 36 |             Exactly(exp) => assert!(
 37 |                 actual.trim() == exp.trim(),
 38 |                 "{}: expected\n\n{}\n\ngot\n\n{}",
 39 |                 prefix,
 40 |                 quote_or_empty(exp),
 41 |                 quote_or_empty(actual)
 42 |             ),
 43 |         }
 44 |     }
 45 | }
 46 | 
 47 | struct ProcessTest {
 48 |     args: &'static [&'static str],
 49 |     out: StringTest,
 50 |     err: StringTest,
 51 |     is_success: bool,
 52 | }
 53 | 
 54 | fn diffr_path_default() -> PathBuf {
 55 |     let mut dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
 56 |     dir.push("target");
 57 |     dir.push("debug");
 58 |     dir.push(if cfg!(windows) { "diffr.exe" } else { "diffr" });
 59 |     dir
 60 | }
 61 | 
 62 | fn diffr_path() -> PathBuf {
 63 |     match env::var("DIFFR_TESTS_BINARY_PATH") {
 64 |         Err(_) => diffr_path_default(),
 65 |         Ok(path) => PathBuf::from(path),
 66 |     }
 67 | }
 68 | 
 69 | fn test_cli(descr: ProcessTest) {
 70 |     let mut cmd = Command::new(diffr_path());
 71 |     cmd.stdout(Stdio::piped());
 72 |     cmd.stderr(Stdio::piped());
 73 |     cmd.stdin(Stdio::piped());
 74 |     for arg in descr.args {
 75 |         cmd.arg(*arg);
 76 |     }
 77 |     let child = cmd.spawn().expect("spawn");
 78 |     let output = child.wait_with_output().expect("wait_with_output");
 79 |     fn string_of_status(code: bool) -> &'static str {
 80 |         if code {
 81 |             "success"
 82 |         } else {
 83 |             "failure"
 84 |         }
 85 |     }
 86 |     assert!(
 87 |         descr.is_success == output.status.success(),
 88 |         "unexpected status: expected {} got {}",
 89 |         string_of_status(descr.is_success),
 90 |         string_of_status(output.status.success()),
 91 |     );
 92 |     descr
 93 |         .out
 94 |         .test(&String::from_utf8_lossy(&output.stdout), "stdout");
 95 |     descr
 96 |         .err
 97 |         .test(&String::from_utf8_lossy(&output.stderr), "stderr");
 98 | }
 99 | 
100 | #[test]
101 | fn debug_flag() {
102 |     test_cli(ProcessTest {
103 |         args: &["--debug"],
104 |         out: Empty,
105 |         err: AtLeast("hunk processing time (ms)"),
106 |         is_success: true,
107 |     })
108 | }
109 | 
110 | #[test]
111 | fn color_invalid_face_name() {
112 |     test_cli(ProcessTest {
113 |         args: &["--colors", "notafacename"],
114 |         out: Empty,
115 |         err: Exactly("unexpected face name: got 'notafacename', expected added|refine-added|removed|refine-removed"),
116 |         is_success: false,
117 |     })
118 | }
119 | 
120 | #[test]
121 | fn color_only_face_name() {
122 |     test_cli(ProcessTest {
123 |         args: &["--colors", "added"],
124 |         out: Empty,
125 |         err: Exactly(""),
126 |         is_success: true,
127 |     })
128 | }
129 | 
130 | #[test]
131 | fn color_invalid_attribute_name() {
132 |     test_cli(ProcessTest {
133 |         args: &["--colors", "added:bar"],
134 |         out: Empty,
135 |         err: Exactly("unexpected attribute name: got 'bar', expected foreground|background|italic|noitalic|bold|nobold|intense|nointense|underline|nounderline|none"),
136 |         is_success: false,
137 |     })
138 | }
139 | 
140 | #[test]
141 | fn color_invalid_color_value_name() {
142 |     test_cli(ProcessTest {
143 |         args: &["--colors", "added:foreground:baz"],
144 |         out: Empty,
145 |         err: Exactly("unexpected color value: unrecognized color name 'baz'. Choose from: black, blue, green, red, cyan, magenta, yellow, white"),
146 |         is_success: false,
147 |     })
148 | }
149 | 
150 | #[test]
151 | fn color_invalid_color_value_ansi() {
152 |     test_cli(ProcessTest {
153 |         args: &["--colors", "added:foreground:777"],
154 |         out: Empty,
155 |         err: AtLeast("unexpected color value: unrecognized ansi256 color number"),
156 |         is_success: false,
157 |     })
158 | }
159 | 
160 | #[test]
161 | fn color_invalid_color_value_rgb() {
162 |     test_cli(ProcessTest {
163 |         args: &["--colors", "added:foreground:0,0,777"],
164 |         out: Empty,
165 |         err: AtLeast("unexpected color value: unrecognized RGB color triple"),
166 |         is_success: false,
167 |     })
168 | }
169 | 
170 | #[test]
171 | fn color_invalid_color_not_done() {
172 |     test_cli(ProcessTest {
173 |         args: &["--colors", "added:foreground"],
174 |         out: Empty,
175 |         err: Exactly("error parsing color: missing color value for face 'added'"),
176 |         is_success: false,
177 |     })
178 | }
179 | 
180 | #[test]
181 | fn color_ok() {
182 |     test_cli(ProcessTest {
183 |         args: &["--colors", "added:foreground:0"],
184 |         out: Empty,
185 |         err: Exactly(""),
186 |         is_success: true,
187 |     })
188 | }
189 | 
190 | #[test]
191 | fn color_ok_multiple() {
192 |     test_cli(ProcessTest {
193 |         args: &[
194 |             "--colors",
195 |             "added:foreground:0",
196 |             "--colors",
197 |             "removed:background:red",
198 |         ],
199 |         out: Empty,
200 |         err: Exactly(""),
201 |         is_success: true,
202 |     })
203 | }
204 | 
205 | #[test]
206 | fn threshold() {
207 |     // ok
208 |     test_cli(ProcessTest {
209 |         args: &["--large-diff-threshold", "123"],
210 |         out: Empty,
211 |         err: Empty,
212 |         is_success: true,
213 |     });
214 | 
215 |     // fail
216 |     test_cli(ProcessTest {
217 |         args: &["--large-diff-threshold"],
218 |         out: Empty,
219 |         err: Exactly("option requires an argument: '--large-diff-threshold'"),
220 |         is_success: false,
221 |     });
222 |     test_cli(ProcessTest {
223 |         args: &["--large-diff-threshold", "a"],
224 |         out: Empty,
225 |         err: Exactly("invalid threshold value: invalid digit found in string"),
226 |         is_success: false,
227 |     });
228 |     test_cli(ProcessTest {
229 |         args: &["--large-diff-threshold", "-1"],
230 |         out: Empty,
231 |         err: Exactly("invalid threshold value: invalid digit found in string"),
232 |         is_success: false,
233 |     });
234 | }
235 | 
236 | #[test]
237 | fn line_numbers_style() {
238 |     // TODO  check config?
239 | 
240 |     // ok
241 |     test_cli(ProcessTest {
242 |         args: &["--line-numbers"],
243 |         out: Empty,
244 |         err: Empty,
245 |         is_success: true,
246 |     });
247 |     test_cli(ProcessTest {
248 |         args: &["--line-numbers", "compact"],
249 |         out: Empty,
250 |         err: Empty,
251 |         is_success: true,
252 |     });
253 |     test_cli(ProcessTest {
254 |         args: &["--line-numbers", "aligned"],
255 |         out: Empty,
256 |         err: Empty,
257 |         is_success: true,
258 |     });
259 | 
260 |     // fail
261 |     test_cli(ProcessTest {
262 |         args: &["--line-numbers", "foo"],
263 |         out: Empty,
264 |         err: Exactly("unexpected line number style: got 'foo', expected aligned|compact|fixed"),
265 |         is_success: false,
266 |     });
267 | }
268 | 
269 | #[test]
270 | fn test_bad_argument() {
271 |     test_cli(ProcessTest {
272 |         args: &["--invalid-option"],
273 |         out: Empty,
274 |         err: AtLeast("bad argument: '--invalid-option'"),
275 |         is_success: false,
276 |     });
277 | }
278 | 


--------------------------------------------------------------------------------