├── Cargo.toml ├── .travis.yml ├── LICENSE ├── src ├── utils.rs ├── lib.rs ├── differ.rs └── sequencematcher.rs ├── examples └── example.rs ├── README.md └── tests └── tests.rs /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "difflib" 3 | version = "0.4.0" 4 | authors = ["Dima Kudosh "] 5 | description = "Port of Python's difflib library to Rust." 6 | documentation = "https://github.com/DimaKudosh/difflib/wiki" 7 | homepage = "https://github.com/DimaKudosh/difflib" 8 | repository = "https://github.com/DimaKudosh/difflib" 9 | keywords = ["difflib", "text", "diff"] 10 | license = "MIT" 11 | include = [ 12 | "**/*.rs", 13 | "Cargo.toml", 14 | ] 15 | 16 | 17 | [[test]] 18 | name = "tests" 19 | 20 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: rust 3 | addons: 4 | apt: 5 | packages: 6 | - libcurl4-openssl-dev 7 | - libelf-dev 8 | - libdw-dev 9 | - binutils-dev # optional: only required for the --verify flag of coveralls 10 | rust: 11 | - stable 12 | - beta 13 | - nightly 14 | before_script: 15 | - pip install 'travis-cargo<0.2' --user 16 | - export PATH=$HOME/.local/bin/:$PATH 17 | script: 18 | - travis-cargo build 19 | - travis-cargo test 20 | - travis-cargo bench 21 | - travis-cargo --only stable doc 22 | after_success: 23 | - travis-cargo --only stable doc-upload 24 | - travis-cargo coveralls --no-sudo --coveralls-id=$TRAVIS_JOB_ID --verify 25 | 26 | env: 27 | global: 28 | - TRAVIS_CARGO_NIGHTLY_FEATURE=nightly 29 | 30 | matrix: 31 | allow_failures: 32 | - rust: nightly 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016 Kevin B. Knapp 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/utils.rs: -------------------------------------------------------------------------------- 1 | pub fn calculate_ratio(matches: usize, length: usize) -> f32 { 2 | if length != 0 { 3 | return 2.0 * matches as f32 / length as f32; 4 | } 5 | 1.0 6 | } 7 | 8 | pub fn str_with_similar_chars(c: char, length: usize) -> String { 9 | let mut s = String::new(); 10 | for _ in 0..length { 11 | s.push_str(&c.to_string()); 12 | } 13 | s 14 | } 15 | 16 | pub fn count_leading(line: &str, c: char) -> usize { 17 | let (mut i, n) = (0, line.len()); 18 | let line: Vec = line.chars().collect(); 19 | while (i < n) && line[i] == c { 20 | i += 1; 21 | } 22 | i 23 | } 24 | 25 | pub fn format_range_unified(start: usize, end: usize) -> String { 26 | let mut beginning = start + 1; 27 | let length = end - start; 28 | if length == 1 { 29 | return beginning.to_string(); 30 | } 31 | if length == 0 { 32 | beginning -= 1; 33 | } 34 | format!("{},{}", beginning, length) 35 | } 36 | 37 | pub fn format_range_context(start: usize, end: usize) -> String { 38 | let mut beginning = start + 1; 39 | let length = end - start; 40 | if length == 0 { 41 | beginning -= 1 42 | } 43 | if length <= 1 { 44 | return beginning.to_string(); 45 | } 46 | format!("{},{}", beginning, beginning + length - 1) 47 | } 48 | -------------------------------------------------------------------------------- /examples/example.rs: -------------------------------------------------------------------------------- 1 | extern crate difflib; 2 | 3 | use difflib::differ::Differ; 4 | use difflib::sequencematcher::SequenceMatcher; 5 | 6 | fn main() { 7 | // unified_diff 8 | let first_text = "one two three four".split(" ").collect::>(); 9 | let second_text = "zero one tree four".split(" ").collect::>(); 10 | let diff = difflib::unified_diff( 11 | &first_text, 12 | &second_text, 13 | "Original", 14 | "Current", 15 | "2005-01-26 23:30:50", 16 | "2010-04-02 10:20:52", 17 | 3, 18 | ); 19 | for line in &diff { 20 | println!("{:?}", line); 21 | } 22 | 23 | //context_diff 24 | let diff = difflib::context_diff( 25 | &first_text, 26 | &second_text, 27 | "Original", 28 | "Current", 29 | "2005-01-26 23:30:50", 30 | "2010-04-02 10:20:52", 31 | 3, 32 | ); 33 | for line in &diff { 34 | println!("{:?}", line); 35 | } 36 | 37 | //get_close_matches 38 | let words = vec!["ape", "apple", "peach", "puppy"]; 39 | let result = difflib::get_close_matches("appel", words, 3, 0.6); 40 | println!("{:?}", result); 41 | 42 | //Differ examples 43 | let differ = Differ::new(); 44 | let diff = differ.compare(&first_text, &second_text); 45 | for line in &diff { 46 | println!("{:?}", line); 47 | } 48 | 49 | //SequenceMatcher examples 50 | let mut matcher = SequenceMatcher::new("one two three four", "zero one tree four"); 51 | let m = matcher.find_longest_match(0, 18, 0, 18); 52 | println!("{:?}", m); 53 | let all_matches = matcher.get_matching_blocks(); 54 | println!("{:?}", all_matches); 55 | let opcode = matcher.get_opcodes(); 56 | println!("{:?}", opcode); 57 | let grouped_opcodes = matcher.get_grouped_opcodes(2); 58 | println!("{:?}", grouped_opcodes); 59 | let ratio = matcher.ratio(); 60 | println!("{:?}", ratio); 61 | matcher.set_seqs("aaaaa", "aaaab"); 62 | println!("{:?}", matcher.ratio()); 63 | } 64 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Difflib [![Build Status](https://travis-ci.org/DimaKudosh/difflib.svg?branch=master)](https://travis-ci.org/DimaKudosh/difflib) 2 | 3 | Port of Python's difflib library to Rust. 4 | It's provide all necessary tools for comparing word sequences. 5 | 6 | ## Installation 7 | Simply add difflib to your dependencies block in Cargo.toml 8 | 9 | ```rust 10 | [dependencies] 11 | difflib = "0.4.0" 12 | ``` 13 | 14 | ## Documentation 15 | Documentation is available at https://github.com/DimaKudosh/difflib/wiki 16 | 17 | ## Example 18 | ```rust 19 | extern crate difflib; 20 | 21 | use difflib::differ::Differ; 22 | use difflib::sequencematcher::SequenceMatcher; 23 | 24 | fn main() { 25 | // unified_diff 26 | let first_text = "one two three four".split(" ").collect::>(); 27 | let second_text = "zero one tree four".split(" ").collect::>(); 28 | let diff = difflib::unified_diff( 29 | &first_text, 30 | &second_text, 31 | "Original", 32 | "Current", 33 | "2005-01-26 23:30:50", 34 | "2010-04-02 10:20:52", 35 | 3, 36 | ); 37 | for line in &diff { 38 | println!("{:?}", line); 39 | } 40 | 41 | //context_diff 42 | let diff = difflib::context_diff( 43 | &first_text, 44 | &second_text, 45 | "Original", 46 | "Current", 47 | "2005-01-26 23:30:50", 48 | "2010-04-02 10:20:52", 49 | 3, 50 | ); 51 | for line in &diff { 52 | println!("{:?}", line); 53 | } 54 | 55 | //get_close_matches 56 | let words = vec!["ape", "apple", "peach", "puppy"]; 57 | let result = difflib::get_close_matches("appel", words, 3, 0.6); 58 | println!("{:?}", result); 59 | 60 | //Differ examples 61 | let differ = Differ::new(); 62 | let diff = differ.compare(&first_text, &second_text); 63 | for line in &diff { 64 | println!("{:?}", line); 65 | } 66 | 67 | //SequenceMatcher examples 68 | let mut matcher = SequenceMatcher::new("one two three four", "zero one tree four"); 69 | let m = matcher.find_longest_match(0, 18, 0, 18); 70 | println!("{:?}", m); 71 | let all_matches = matcher.get_matching_blocks(); 72 | println!("{:?}", all_matches); 73 | let opcode = matcher.get_opcodes(); 74 | println!("{:?}", opcode); 75 | let grouped_opcodes = matcher.get_grouped_opcodes(2); 76 | println!("{:?}", grouped_opcodes); 77 | let ratio = matcher.ratio(); 78 | println!("{:?}", ratio); 79 | matcher.set_seqs("aaaaa", "aaaab"); 80 | println!("{:?}", matcher.ratio()); 81 | } 82 | ``` 83 | -------------------------------------------------------------------------------- /tests/tests.rs: -------------------------------------------------------------------------------- 1 | extern crate difflib; 2 | 3 | use difflib::differ::Differ; 4 | use difflib::sequencematcher::{Match, Opcode, SequenceMatcher}; 5 | 6 | #[test] 7 | fn test_longest_match() { 8 | let matcher = SequenceMatcher::new(" abcd", "abcd abcd"); 9 | let m = matcher.find_longest_match(0, 5, 0, 9); 10 | assert_eq!(m.first_start, 0); 11 | assert_eq!(m.second_start, 4); 12 | assert_eq!(m.size, 5); 13 | } 14 | 15 | #[test] 16 | fn test_all_matches() { 17 | let mut matcher = SequenceMatcher::new("abxcd", "abcd"); 18 | let result = matcher.get_matching_blocks(); 19 | let mut expected_result = Vec::new(); 20 | expected_result.push(Match { 21 | first_start: 0, 22 | second_start: 0, 23 | size: 2, 24 | }); 25 | expected_result.push(Match { 26 | first_start: 3, 27 | second_start: 2, 28 | size: 2, 29 | }); 30 | expected_result.push(Match { 31 | first_start: 5, 32 | second_start: 4, 33 | size: 0, 34 | }); 35 | assert_eq!(result, expected_result); 36 | } 37 | 38 | #[test] 39 | fn test_get_opcodes() { 40 | let mut matcher = SequenceMatcher::new("qabxcd", "abycdf"); 41 | let result = matcher.get_opcodes(); 42 | let mut expected_result = Vec::new(); 43 | expected_result.push(Opcode { 44 | tag: "delete".to_string(), 45 | first_start: 0, 46 | first_end: 1, 47 | second_start: 0, 48 | second_end: 0, 49 | }); 50 | expected_result.push(Opcode { 51 | tag: "equal".to_string(), 52 | first_start: 1, 53 | first_end: 3, 54 | second_start: 0, 55 | second_end: 2, 56 | }); 57 | expected_result.push(Opcode { 58 | tag: "replace".to_string(), 59 | first_start: 3, 60 | first_end: 4, 61 | second_start: 2, 62 | second_end: 3, 63 | }); 64 | expected_result.push(Opcode { 65 | tag: "equal".to_string(), 66 | first_start: 4, 67 | first_end: 6, 68 | second_start: 3, 69 | second_end: 5, 70 | }); 71 | expected_result.push(Opcode { 72 | tag: "insert".to_string(), 73 | first_start: 6, 74 | first_end: 6, 75 | second_start: 5, 76 | second_end: 6, 77 | }); 78 | assert_eq!(result, expected_result); 79 | } 80 | 81 | #[test] 82 | fn test_ratio() { 83 | let mut matcher = SequenceMatcher::new("abcd", "bcde"); 84 | assert_eq!(matcher.ratio(), 0.75); 85 | } 86 | 87 | #[test] 88 | fn test_get_close_matches() { 89 | let words = vec!["ape", "apple", "peach", "puppy"]; 90 | let result = difflib::get_close_matches("appel", words, 3, 0.6); 91 | assert_eq!(result, vec!["apple", "ape"]); 92 | } 93 | 94 | #[test] 95 | fn test_differ_compare() { 96 | let first_text = vec!["one\n", "two\n", "three\n"]; 97 | let second_text = vec!["ore\n", "tree\n", "emu\n"]; 98 | let differ = Differ::new(); 99 | let result = differ.compare(&first_text, &second_text).join(""); 100 | assert_eq!( 101 | result, 102 | "- one\n? ^\n+ ore\n? ^\n- two\n- three\n? -\n+ tree\n+ emu\n" 103 | ); 104 | } 105 | 106 | fn is_junk_char(ch: &char) -> bool { 107 | if *ch == ' ' || *ch == '\t' { 108 | return true; 109 | } 110 | false 111 | } 112 | 113 | #[test] 114 | fn test_differ_compare_with_func() { 115 | let first_text = vec!["one\n", "two\n", "three\n"]; 116 | let second_text = vec!["ore\n", "tree\n", "emu\n"]; 117 | let mut differ = Differ::new(); 118 | differ.char_junk = Some(is_junk_char); 119 | let result = differ.compare(&first_text, &second_text).join(""); 120 | assert_eq!( 121 | result, 122 | "- one\n? ^\n+ ore\n? ^\n- two\n- three\n? -\n+ tree\n+ emu\n" 123 | ); 124 | } 125 | 126 | #[test] 127 | fn test_differ_restore() { 128 | let first_text = vec!["one\n", " two\n", "three\n"]; 129 | let second_text = vec!["ore\n", "tree\n", "emu\n"]; 130 | let differ = Differ::new(); 131 | let diff = differ.compare(&first_text, &second_text); 132 | assert_eq!(first_text, Differ::restore(&diff, 1)); 133 | assert_eq!(second_text, Differ::restore(&diff, 2)); 134 | } 135 | 136 | #[test] 137 | fn test_unified_diff() { 138 | let first_text = "one two three four".split(" ").collect::>(); 139 | let second_text = "zero one tree four".split(" ").collect::>(); 140 | let result = difflib::unified_diff( 141 | &first_text, 142 | &second_text, 143 | "Original", 144 | "Current", 145 | "2005-01-26 23:30:50", 146 | "2010-04-02 10:20:52", 147 | 3, 148 | ).join(""); 149 | assert_eq!( 150 | result, 151 | "--- Original\t2005-01-26 23:30:50\n+++ Current\t2010-04-02 10:20:52\n@@ -1,4 \ 152 | +1,4 @@\n+zero one-two-three+tree four" 153 | ); 154 | } 155 | 156 | #[test] 157 | fn test_context_diff() { 158 | let first_text = "one two three four".split(" ").collect::>(); 159 | let second_text = "zero one tree four".split(" ").collect::>(); 160 | let result = difflib::context_diff( 161 | &first_text, 162 | &second_text, 163 | "Original", 164 | "Current", 165 | "2005-01-26 23:30:50", 166 | "2010-04-02 10:20:52", 167 | 3, 168 | ).join(""); 169 | assert_eq!( 170 | result, 171 | "*** Original\t2005-01-26 23:30:50\n--- Current\t2010-04-02 \ 172 | 10:20:52\n***************\n*** 1,4 ****\n one! two! three four--- 1,4 ----\n+ \ 173 | zero one! tree four" 174 | ); 175 | } 176 | 177 | #[test] 178 | fn test_integer_slice() { 179 | let s1 = vec![1, 2, 3, 4, 5]; 180 | let s2 = vec![5, 4, 3, 2, 1]; 181 | let result = SequenceMatcher::new(&s1, &s2).get_matching_blocks(); 182 | let mut expected_result = Vec::new(); 183 | expected_result.push(Match { 184 | first_start: 0, 185 | second_start: 4, 186 | size: 1, 187 | }); 188 | expected_result.push(Match { 189 | first_start: 5, 190 | second_start: 5, 191 | size: 0, 192 | }); 193 | assert_eq!(result, expected_result); 194 | } 195 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub mod differ; 2 | pub mod sequencematcher; 3 | mod utils; 4 | 5 | use sequencematcher::{Sequence, SequenceMatcher}; 6 | use std::collections::HashMap; 7 | use std::fmt::Display; 8 | use utils::{format_range_context, format_range_unified}; 9 | 10 | pub fn get_close_matches<'a>( 11 | word: &str, 12 | possibilities: Vec<&'a str>, 13 | n: usize, 14 | cutoff: f32, 15 | ) -> Vec<&'a str> { 16 | if !(0.0 <= cutoff && cutoff <= 1.0) { 17 | panic!("Cutoff must be greater than 0.0 and lower than 1.0"); 18 | } 19 | let mut res: Vec<(f32, &str)> = Vec::new(); 20 | let mut matcher = SequenceMatcher::new("", word); 21 | for i in &possibilities { 22 | matcher.set_first_seq(i); 23 | let ratio = matcher.ratio(); 24 | if ratio >= cutoff { 25 | res.push((ratio, i)); 26 | } 27 | } 28 | res.sort_by(|a, b| b.0.partial_cmp(&a.0).unwrap()); 29 | res.truncate(n); 30 | res.iter().map(|x| x.1).collect() 31 | } 32 | 33 | pub fn unified_diff( 34 | first_sequence: &[T], 35 | second_sequence: &[T], 36 | from_file: &str, 37 | to_file: &str, 38 | from_file_date: &str, 39 | to_file_date: &str, 40 | n: usize, 41 | ) -> Vec { 42 | let mut res = Vec::new(); 43 | let lineterm = '\n'; 44 | let mut started = false; 45 | let mut matcher = SequenceMatcher::new(first_sequence, second_sequence); 46 | for group in &matcher.get_grouped_opcodes(n) { 47 | if !started { 48 | started = true; 49 | let from_date = format!("\t{}", from_file_date); 50 | let to_date = format!("\t{}", to_file_date); 51 | res.push(format!("--- {}{}{}", from_file, from_date, lineterm)); 52 | res.push(format!("+++ {}{}{}", to_file, to_date, lineterm)); 53 | } 54 | let (first, last) = (group.first().unwrap(), group.last().unwrap()); 55 | let file1_range = format_range_unified(first.first_start, last.first_end); 56 | let file2_range = format_range_unified(first.second_start, last.second_end); 57 | res.push(format!( 58 | "@@ -{} +{} @@{}", 59 | file1_range, file2_range, lineterm 60 | )); 61 | for code in group { 62 | if code.tag == "equal" { 63 | for item in first_sequence 64 | .iter() 65 | .take(code.first_end) 66 | .skip(code.first_start) 67 | { 68 | res.push(format!(" {}", item)); 69 | } 70 | continue; 71 | } 72 | if code.tag == "replace" || code.tag == "delete" { 73 | for item in first_sequence 74 | .iter() 75 | .take(code.first_end) 76 | .skip(code.first_start) 77 | { 78 | res.push(format!("-{}", item)); 79 | } 80 | } 81 | if code.tag == "replace" || code.tag == "insert" { 82 | for item in second_sequence 83 | .iter() 84 | .take(code.second_end) 85 | .skip(code.second_start) 86 | { 87 | res.push(format!("+{}", item)); 88 | } 89 | } 90 | } 91 | } 92 | res 93 | } 94 | 95 | pub fn context_diff( 96 | first_sequence: &[T], 97 | second_sequence: &[T], 98 | from_file: &str, 99 | to_file: &str, 100 | from_file_date: &str, 101 | to_file_date: &str, 102 | n: usize, 103 | ) -> Vec { 104 | let mut res = Vec::new(); 105 | let lineterm = '\n'; 106 | let mut prefix: HashMap = HashMap::new(); 107 | prefix.insert(String::from("insert"), String::from("+ ")); 108 | prefix.insert(String::from("delete"), String::from("- ")); 109 | prefix.insert(String::from("replace"), String::from("! ")); 110 | prefix.insert(String::from("equal"), String::from(" ")); 111 | let mut started = false; 112 | let mut matcher = SequenceMatcher::new(first_sequence, second_sequence); 113 | for group in &matcher.get_grouped_opcodes(n) { 114 | if !started { 115 | started = true; 116 | let from_date = format!("\t{}", from_file_date); 117 | let to_date = format!("\t{}", to_file_date); 118 | res.push(format!("*** {}{}{}", from_file, from_date, lineterm)); 119 | res.push(format!("--- {}{}{}", to_file, to_date, lineterm)); 120 | } 121 | let (first, last) = (group.first().unwrap(), group.last().unwrap()); 122 | res.push(format!("***************{}", lineterm)); 123 | let file1_range = format_range_context(first.first_start, last.first_end); 124 | res.push(format!("*** {} ****{}", file1_range, lineterm)); 125 | let mut any = false; 126 | for opcode in group { 127 | if opcode.tag == "replace" || opcode.tag == "delete" { 128 | any = true; 129 | break; 130 | } 131 | } 132 | if any { 133 | for opcode in group { 134 | if opcode.tag != "insert" { 135 | for item in first_sequence 136 | .iter() 137 | .take(opcode.first_end) 138 | .skip(opcode.first_start) 139 | { 140 | res.push(format!("{}{}", &prefix[&opcode.tag], item)); 141 | } 142 | } 143 | } 144 | } 145 | let file2_range = format_range_context(first.second_start, last.second_end); 146 | res.push(format!("--- {} ----{}", file2_range, lineterm)); 147 | any = false; 148 | for opcode in group { 149 | if opcode.tag == "replace" || opcode.tag == "insert" { 150 | any = true; 151 | break; 152 | } 153 | } 154 | if any { 155 | for opcode in group { 156 | if opcode.tag != "delete" { 157 | for item in second_sequence 158 | .iter() 159 | .take(opcode.second_end) 160 | .skip(opcode.second_start) 161 | { 162 | res.push(format!("{}{}", prefix[&opcode.tag], item)); 163 | } 164 | } 165 | } 166 | } 167 | } 168 | res 169 | } 170 | -------------------------------------------------------------------------------- /src/differ.rs: -------------------------------------------------------------------------------- 1 | use sequencematcher::SequenceMatcher; 2 | use std::cmp; 3 | use utils::{count_leading, str_with_similar_chars}; 4 | 5 | #[derive(Default)] 6 | pub struct Differ { 7 | pub line_junk: Option bool>, 8 | pub char_junk: Option bool>, 9 | } 10 | 11 | impl Differ { 12 | pub fn new() -> Differ { 13 | Differ { 14 | line_junk: None, 15 | char_junk: None, 16 | } 17 | } 18 | 19 | pub fn compare(&self, first_sequence: &[&str], second_sequence: &[&str]) -> Vec { 20 | let mut matcher = SequenceMatcher::new(first_sequence, second_sequence); 21 | matcher.set_is_junk(self.line_junk); 22 | let mut res = Vec::new(); 23 | for opcode in matcher.get_opcodes() { 24 | let mut gen = Vec::new(); 25 | match opcode.tag.as_ref() { 26 | "replace" => { 27 | gen = self.fancy_replace( 28 | first_sequence, 29 | opcode.first_start, 30 | opcode.first_end, 31 | second_sequence, 32 | opcode.second_start, 33 | opcode.second_end, 34 | ) 35 | } 36 | "delete" => { 37 | gen = self.dump("-", first_sequence, opcode.first_start, opcode.first_end) 38 | } 39 | "insert" => { 40 | gen = self.dump("+", second_sequence, opcode.second_start, opcode.second_end) 41 | } 42 | "equal" => { 43 | gen = self.dump(" ", first_sequence, opcode.first_start, opcode.first_end) 44 | } 45 | _ => {} 46 | } 47 | for i in gen { 48 | res.push(i); 49 | } 50 | } 51 | res 52 | } 53 | 54 | fn dump(&self, tag: &str, sequence: &[&str], start: usize, end: usize) -> Vec { 55 | let mut res = Vec::new(); 56 | for i in start..end { 57 | if let Some(s) = sequence.get(i) { 58 | res.push(format!("{} {}", tag, s)) 59 | } 60 | } 61 | res 62 | } 63 | 64 | fn plain_replace( 65 | &self, 66 | first_sequence: &[&str], 67 | first_start: usize, 68 | first_end: usize, 69 | second_sequence: &[&str], 70 | second_start: usize, 71 | second_end: usize, 72 | ) -> Vec { 73 | if !(first_start < first_end && second_start < second_end) { 74 | return Vec::new(); 75 | } 76 | let (mut first, second) = if second_end - second_start < first_end - first_start { 77 | ( 78 | self.dump("+", second_sequence, second_start, second_end), 79 | self.dump("-", first_sequence, first_start, first_end), 80 | ) 81 | } else { 82 | ( 83 | self.dump("-", first_sequence, first_start, first_end), 84 | self.dump("+", second_sequence, second_start, second_end), 85 | ) 86 | }; 87 | for s in second { 88 | first.push(s); 89 | } 90 | first 91 | } 92 | 93 | fn fancy_replace( 94 | &self, 95 | first_sequence: &[&str], 96 | first_start: usize, 97 | first_end: usize, 98 | second_sequence: &[&str], 99 | second_start: usize, 100 | second_end: usize, 101 | ) -> Vec { 102 | let mut res = Vec::new(); 103 | let (mut best_ratio, cutoff) = (0.74, 0.75); 104 | let (mut best_i, mut best_j) = (0, 0); 105 | let mut eqi: Option = None; 106 | let mut eqj: Option = None; 107 | for (j, second_sequence_str) in second_sequence 108 | .iter() 109 | .enumerate() 110 | .take(second_end) 111 | .skip(second_start) 112 | { 113 | for (i, first_sequence_str) in first_sequence 114 | .iter() 115 | .enumerate() 116 | .take(second_end) 117 | .skip(second_start) 118 | { 119 | if first_sequence_str == second_sequence_str { 120 | if eqi.is_none() { 121 | eqi = Some(i); 122 | eqj = Some(j); 123 | } 124 | continue; 125 | } 126 | let (first_sequence_chars, second_sequence_chars) = ( 127 | first_sequence_str.chars().collect::>(), 128 | second_sequence_str.chars().collect::>(), 129 | ); 130 | let mut cruncher = 131 | SequenceMatcher::new(&first_sequence_chars, &second_sequence_chars); 132 | cruncher.set_is_junk(self.char_junk); 133 | if cruncher.ratio() > best_ratio { 134 | best_ratio = cruncher.ratio(); 135 | best_i = i; 136 | best_j = j; 137 | } 138 | } 139 | } 140 | if best_ratio < cutoff { 141 | if eqi.is_none() { 142 | res.extend( 143 | self.plain_replace( 144 | first_sequence, 145 | first_start, 146 | first_end, 147 | second_sequence, 148 | second_start, 149 | second_end, 150 | ).iter() 151 | .cloned(), 152 | ); 153 | return res; 154 | } 155 | best_i = eqi.unwrap(); 156 | best_j = eqj.unwrap(); 157 | } else { 158 | eqi = None; 159 | } 160 | res.extend( 161 | self.fancy_helper( 162 | first_sequence, 163 | first_start, 164 | best_i, 165 | second_sequence, 166 | second_start, 167 | best_j, 168 | ).iter() 169 | .cloned(), 170 | ); 171 | let first_element = &first_sequence[best_i]; 172 | let second_element = &second_sequence[best_j]; 173 | if eqi.is_none() { 174 | let (mut first_tag, mut second_tag) = (String::new(), String::new()); 175 | let first_element_chars: Vec = first_element.chars().collect(); 176 | let second_element_chars: Vec = second_element.chars().collect(); 177 | let mut cruncher = SequenceMatcher::new(&first_element_chars, &second_element_chars); 178 | cruncher.set_is_junk(self.char_junk); 179 | for opcode in &cruncher.get_opcodes() { 180 | let (first_length, second_length) = ( 181 | opcode.first_end - opcode.first_start, 182 | opcode.second_end - opcode.second_start, 183 | ); 184 | match opcode.tag.as_ref() { 185 | "replace" => { 186 | first_tag.push_str(&str_with_similar_chars('^', first_length)); 187 | second_tag.push_str(&str_with_similar_chars('^', second_length)); 188 | } 189 | "delete" => { 190 | first_tag.push_str(&str_with_similar_chars('-', first_length)); 191 | } 192 | "insert" => { 193 | second_tag.push_str(&str_with_similar_chars('+', second_length)); 194 | } 195 | "equal" => { 196 | first_tag.push_str(&str_with_similar_chars(' ', first_length)); 197 | second_tag.push_str(&str_with_similar_chars(' ', second_length)); 198 | } 199 | _ => {} 200 | } 201 | } 202 | res.extend( 203 | self.qformat(&first_element, &second_element, &first_tag, &second_tag) 204 | .iter() 205 | .cloned(), 206 | ); 207 | } else { 208 | let mut s = String::from(" "); 209 | s.push_str(&first_element); 210 | res.push(s); 211 | } 212 | res.extend( 213 | self.fancy_helper( 214 | first_sequence, 215 | best_i + 1, 216 | first_end, 217 | second_sequence, 218 | best_j + 1, 219 | second_end, 220 | ).iter() 221 | .cloned(), 222 | ); 223 | res 224 | } 225 | 226 | fn fancy_helper( 227 | &self, 228 | first_sequence: &[&str], 229 | first_start: usize, 230 | first_end: usize, 231 | second_sequence: &[&str], 232 | second_start: usize, 233 | second_end: usize, 234 | ) -> Vec { 235 | let mut res = Vec::new(); 236 | if first_start < first_end { 237 | if second_start < second_end { 238 | res = self.fancy_replace( 239 | first_sequence, 240 | first_start, 241 | first_end, 242 | second_sequence, 243 | second_start, 244 | second_end, 245 | ); 246 | } else { 247 | res = self.dump("-", first_sequence, first_start, first_end); 248 | } 249 | } else if second_start < second_end { 250 | res = self.dump("+", second_sequence, second_start, second_end); 251 | } 252 | res 253 | } 254 | 255 | fn qformat( 256 | &self, 257 | first_line: &str, 258 | second_line: &str, 259 | first_tags: &str, 260 | second_tags: &str, 261 | ) -> Vec { 262 | let mut res = Vec::new(); 263 | let mut first_tags = first_tags; 264 | let mut second_tags = second_tags; 265 | let mut common = cmp::min( 266 | count_leading(first_line, '\t'), 267 | count_leading(second_line, '\t'), 268 | ); 269 | common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ')); 270 | common = cmp::min(common, count_leading(first_tags.split_at(common).0, ' ')); 271 | first_tags = first_tags.split_at(common).1.trim_right(); 272 | second_tags = second_tags.split_at(common).1.trim_right(); 273 | let mut s = format!("- {}", first_line); 274 | res.push(s); 275 | if first_tags != "" { 276 | s = format!("? {}{}\n", str_with_similar_chars('\t', common), first_tags); 277 | res.push(s); 278 | } 279 | s = format!("+ {}", second_line); 280 | res.push(s); 281 | if second_tags != "" { 282 | s = format!( 283 | "? {}{}\n", 284 | str_with_similar_chars('\t', common), 285 | second_tags 286 | ); 287 | res.push(s); 288 | } 289 | res 290 | } 291 | 292 | pub fn restore(delta: &[String], which: usize) -> Vec { 293 | if !(which == 1 || which == 2) { 294 | panic!("Second parameter must be 1 or 2"); 295 | } 296 | let mut res = Vec::new(); 297 | let tag = if which == 1 { "- " } else { "+ " }.to_string(); 298 | let prefixes = vec![tag, " ".to_string()]; 299 | for line in delta { 300 | for prefix in &prefixes { 301 | if line.starts_with(prefix) { 302 | res.push(line.split_at(2).1.to_string()); 303 | } 304 | } 305 | } 306 | res 307 | } 308 | } 309 | 310 | #[test] 311 | fn test_fancy_replace() { 312 | let differ = Differ::new(); 313 | let result = differ 314 | .fancy_replace(&vec!["abcDefghiJkl\n"], 0, 1, &vec!["abcdefGhijkl\n"], 0, 1) 315 | .join(""); 316 | assert_eq!( 317 | result, 318 | "- abcDefghiJkl\n? ^ ^ ^\n+ abcdefGhijkl\n? ^ ^ ^\n" 319 | ); 320 | } 321 | 322 | #[test] 323 | fn test_qformat() { 324 | let differ = Differ::new(); 325 | let result = differ.qformat( 326 | "\tabcDefghiJkl\n", 327 | "\tabcdefGhijkl\n", 328 | " ^ ^ ^ ", 329 | " ^ ^ ^ ", 330 | ); 331 | assert_eq!( 332 | result, 333 | vec![ 334 | "- \tabcDefghiJkl\n", 335 | "? \t ^ ^ ^\n", 336 | "+ \tabcdefGhijkl\n", 337 | "? \t ^ ^ ^\n", 338 | ] 339 | ); 340 | } 341 | -------------------------------------------------------------------------------- /src/sequencematcher.rs: -------------------------------------------------------------------------------- 1 | use std::cmp::{max, min}; 2 | use std::collections::HashMap; 3 | use std::hash::Hash; 4 | use utils::calculate_ratio; 5 | 6 | #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord)] 7 | pub struct Match { 8 | pub first_start: usize, 9 | pub second_start: usize, 10 | pub size: usize, 11 | } 12 | 13 | impl Match { 14 | fn new(first_start: usize, second_start: usize, size: usize) -> Match { 15 | Match { 16 | first_start, 17 | second_start, 18 | size, 19 | } 20 | } 21 | } 22 | 23 | #[derive(Debug, Clone, PartialEq)] 24 | pub struct Opcode { 25 | pub tag: String, 26 | pub first_start: usize, 27 | pub first_end: usize, 28 | pub second_start: usize, 29 | pub second_end: usize, 30 | } 31 | 32 | impl Opcode { 33 | fn new( 34 | tag: String, 35 | first_start: usize, 36 | first_end: usize, 37 | second_start: usize, 38 | second_end: usize, 39 | ) -> Opcode { 40 | Opcode { 41 | tag, 42 | first_start, 43 | first_end, 44 | second_start, 45 | second_end, 46 | } 47 | } 48 | } 49 | 50 | pub trait Sequence: Eq + Hash {} 51 | impl Sequence for T {} 52 | 53 | pub struct SequenceMatcher<'a, T: 'a + Sequence> { 54 | first_sequence: &'a [T], 55 | second_sequence: &'a [T], 56 | matching_blocks: Option>, 57 | opcodes: Option>, 58 | is_junk: Option bool>, 59 | second_sequence_elements: HashMap<&'a T, Vec>, 60 | } 61 | 62 | impl<'a, T: Sequence> SequenceMatcher<'a, T> { 63 | pub fn new(first_sequence: &'a S, second_sequence: &'a S) -> SequenceMatcher<'a, T> 64 | where 65 | S: AsRef<[T]> + ?Sized, 66 | { 67 | let mut matcher = SequenceMatcher { 68 | first_sequence: first_sequence.as_ref(), 69 | second_sequence: second_sequence.as_ref(), 70 | matching_blocks: None, 71 | opcodes: None, 72 | is_junk: None, 73 | second_sequence_elements: HashMap::new(), 74 | }; 75 | matcher.set_seqs(first_sequence, second_sequence); 76 | matcher 77 | } 78 | 79 | pub fn set_is_junk(&mut self, is_junk: Option bool>) { 80 | self.is_junk = is_junk; 81 | self.matching_blocks = None; 82 | self.opcodes = None; 83 | self.chain_second_seq(); 84 | } 85 | 86 | pub fn set_seqs(&mut self, first_sequence: &'a S, second_sequence: &'a S) 87 | where 88 | S: AsRef<[T]> + ?Sized, 89 | { 90 | self.set_first_seq(first_sequence); 91 | self.set_second_seq(second_sequence); 92 | } 93 | 94 | pub fn set_first_seq(&mut self, sequence: &'a S) 95 | where 96 | S: AsRef<[T]> + ?Sized, 97 | { 98 | self.first_sequence = sequence.as_ref(); 99 | self.matching_blocks = None; 100 | self.opcodes = None; 101 | } 102 | 103 | pub fn set_second_seq(&mut self, sequence: &'a S) 104 | where 105 | S: AsRef<[T]> + ?Sized, 106 | { 107 | self.second_sequence = sequence.as_ref(); 108 | self.matching_blocks = None; 109 | self.opcodes = None; 110 | self.chain_second_seq(); 111 | } 112 | 113 | fn chain_second_seq(&mut self) { 114 | let second_sequence = self.second_sequence; 115 | let mut second_sequence_elements = HashMap::new(); 116 | for (i, item) in second_sequence.iter().enumerate() { 117 | let mut counter = second_sequence_elements 118 | .entry(item) 119 | .or_insert_with(Vec::new); 120 | counter.push(i); 121 | } 122 | if let Some(junk_func) = self.is_junk { 123 | second_sequence_elements = second_sequence_elements 124 | .into_iter() 125 | .filter(|&(element, _)| !junk_func(element)) 126 | .collect(); 127 | } 128 | // Filter out popular elements 129 | let len = second_sequence.len(); 130 | if len >= 200 { 131 | let test_len = (len as f32 / 100.0).floor() as usize + 1; 132 | second_sequence_elements = second_sequence_elements 133 | .into_iter() 134 | .filter(|&(_, ref indexes)| indexes.len() > test_len) 135 | .collect(); 136 | } 137 | self.second_sequence_elements = second_sequence_elements; 138 | } 139 | 140 | pub fn find_longest_match( 141 | &self, 142 | first_start: usize, 143 | first_end: usize, 144 | second_start: usize, 145 | second_end: usize, 146 | ) -> Match { 147 | let first_sequence = &self.first_sequence; 148 | let second_sequence = &self.second_sequence; 149 | let second_sequence_elements = &self.second_sequence_elements; 150 | let (mut best_i, mut best_j, mut best_size) = (first_start, second_start, 0); 151 | let mut j2len: HashMap = HashMap::new(); 152 | for (i, item) in first_sequence 153 | .iter() 154 | .enumerate() 155 | .take(first_end) 156 | .skip(first_start) 157 | { 158 | let mut new_j2len: HashMap = HashMap::new(); 159 | if let Some(indexes) = second_sequence_elements.get(item) { 160 | for j in indexes { 161 | let j = *j; 162 | if j < second_start { 163 | continue; 164 | }; 165 | if j >= second_end { 166 | break; 167 | }; 168 | let mut size = 0; 169 | if j > 0 { 170 | if let Some(k) = j2len.get(&(j - 1)) { 171 | size = *k; 172 | } 173 | } 174 | size += 1; 175 | new_j2len.insert(j, size); 176 | if size > best_size { 177 | best_i = i + 1 - size; 178 | best_j = j + 1 - size; 179 | best_size = size; 180 | } 181 | } 182 | } 183 | j2len = new_j2len; 184 | } 185 | for _ in 0..2 { 186 | while best_i > first_start 187 | && best_j > second_start 188 | && first_sequence.get(best_i - 1) == second_sequence.get(best_j - 1) 189 | { 190 | best_i -= 1; 191 | best_j -= 1; 192 | best_size += 1; 193 | } 194 | while best_i + best_size < first_end 195 | && best_j + best_size < second_end 196 | && first_sequence.get(best_i + best_size) == second_sequence.get(best_j + best_size) 197 | { 198 | best_size += 1; 199 | } 200 | } 201 | Match::new(best_i, best_j, best_size) 202 | } 203 | 204 | pub fn get_matching_blocks(&mut self) -> Vec { 205 | if self.matching_blocks.as_ref().is_some() { 206 | return self.matching_blocks.as_ref().unwrap().clone(); 207 | } 208 | let (first_length, second_length) = (self.first_sequence.len(), self.second_sequence.len()); 209 | let mut matches = Vec::new(); 210 | let mut queue = vec![(0, first_length, 0, second_length)]; 211 | while !queue.is_empty() { 212 | let (first_start, first_end, second_start, second_end) = queue.pop().unwrap(); 213 | let m = self.find_longest_match(first_start, first_end, second_start, second_end); 214 | match m.size { 215 | 0 => {} 216 | _ => { 217 | if first_start < m.first_start && second_start < m.second_start { 218 | queue.push((first_start, m.first_start, second_start, m.second_start)); 219 | } 220 | if m.first_start + m.size < first_end && m.second_start + m.size < second_end { 221 | queue.push(( 222 | m.first_start + m.size, 223 | first_end, 224 | m.second_start + m.size, 225 | second_end, 226 | )); 227 | } 228 | matches.push(m); 229 | } 230 | } 231 | } 232 | matches.sort_by(|a, b| a.cmp(b)); 233 | let (mut first_start, mut second_start, mut size) = (0, 0, 0); 234 | let mut non_adjacent = Vec::new(); 235 | for m in &matches { 236 | if first_start + size == m.first_start && second_start + size == m.second_start { 237 | size += m.size 238 | } else { 239 | if size != 0 { 240 | non_adjacent.push(Match::new(first_start, second_start, size)); 241 | } 242 | first_start = m.first_start; 243 | second_start = m.second_start; 244 | size = m.size; 245 | } 246 | } 247 | if size != 0 { 248 | non_adjacent.push(Match::new(first_start, second_start, size)); 249 | } 250 | non_adjacent.push(Match::new(first_length, second_length, 0)); 251 | self.matching_blocks = Some(non_adjacent); 252 | self.matching_blocks.as_ref().unwrap().clone() 253 | } 254 | 255 | pub fn get_opcodes(&mut self) -> Vec { 256 | if self.opcodes.as_ref().is_some() { 257 | return self.opcodes.as_ref().unwrap().clone(); 258 | } 259 | let mut opcodes = Vec::new(); 260 | let (mut i, mut j) = (0, 0); 261 | for m in self.get_matching_blocks() { 262 | let mut tag = String::new(); 263 | if i < m.first_start && j < m.second_start { 264 | tag = String::from("replace"); 265 | } else if i < m.first_start { 266 | tag = String::from("delete"); 267 | } else if j < m.second_start { 268 | tag = String::from("insert"); 269 | } 270 | if !tag.is_empty() { 271 | opcodes.push(Opcode::new(tag, i, m.first_start, j, m.second_start)); 272 | } 273 | i = m.first_start + m.size; 274 | j = m.second_start + m.size; 275 | if m.size != 0 { 276 | opcodes.push(Opcode::new( 277 | String::from("equal"), 278 | m.first_start, 279 | i, 280 | m.second_start, 281 | j, 282 | )); 283 | } 284 | } 285 | self.opcodes = Some(opcodes); 286 | self.opcodes.as_ref().unwrap().clone() 287 | } 288 | 289 | pub fn get_grouped_opcodes(&mut self, n: usize) -> Vec> { 290 | let mut res = Vec::new(); 291 | let mut codes = self.get_opcodes(); 292 | if codes.is_empty() { 293 | codes.push(Opcode::new("equal".to_string(), 0, 1, 0, 1)); 294 | } 295 | 296 | if codes.first().unwrap().tag == "equal" { 297 | let opcode = codes.first_mut().unwrap(); 298 | opcode.first_start = max(opcode.first_start, opcode.first_end.saturating_sub(n)); 299 | opcode.second_start = max(opcode.second_start, opcode.second_end.saturating_sub(n)); 300 | } 301 | if codes.last().unwrap().tag == "equal" { 302 | let opcode = codes.last_mut().unwrap(); 303 | opcode.first_end = min(opcode.first_start + n, opcode.first_end); 304 | opcode.second_end = min(opcode.second_start + n, opcode.second_end); 305 | } 306 | let nn = n + n; 307 | let mut group = Vec::new(); 308 | for code in &codes { 309 | let (mut first_start, mut second_start) = (code.first_start, code.second_start); 310 | if code.tag == "equal" && code.first_end - code.first_start > nn { 311 | group.push(Opcode::new( 312 | code.tag.clone(), 313 | code.first_start, 314 | min(code.first_end, code.first_start + n), 315 | code.second_start, 316 | min(code.second_end, code.second_start + n), 317 | )); 318 | res.push(group.clone()); 319 | group.clear(); 320 | first_start = max(first_start, code.first_end.saturating_sub(n)); 321 | second_start = max(second_start, code.second_end.saturating_sub(n)); 322 | } 323 | group.push(Opcode::new( 324 | code.tag.clone(), 325 | first_start, 326 | code.first_end, 327 | second_start, 328 | code.second_end, 329 | )); 330 | } 331 | if !(group.len() == 1 && group.first().unwrap().tag == "equal") || group.is_empty() { 332 | res.push(group.clone()); 333 | } 334 | res 335 | } 336 | 337 | pub fn ratio(&mut self) -> f32 { 338 | let matches = self.get_matching_blocks() 339 | .iter() 340 | .fold(0, |res, &m| res + m.size); 341 | calculate_ratio( 342 | matches, 343 | self.first_sequence.len() + self.second_sequence.len(), 344 | ) 345 | } 346 | } 347 | --------------------------------------------------------------------------------