├── .gitignore ├── .travis.yml ├── Cargo.toml ├── README.md ├── deploy.sh ├── examples ├── file_watcher.rs ├── filev1.txt ├── filev2.txt └── predefined.rs └── src ├── hashing.rs ├── lib.rs ├── string_diff.rs └── window.rs /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: 3 | - stable 4 | - beta 5 | - nightly 6 | install: 7 | - cargo build 8 | script: 9 | - cargo test 10 | - cargo doc --no-deps 11 | after_success: 12 | - bash deploy.sh 13 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "rdiff" 3 | version = "0.1.2" 4 | authors = ["Daniel Yule "] 5 | description = "A library for tracking changes to a file over time" 6 | documentation = "https://dyule.github.io/rdiff/rdiff/" 7 | homepage = "https://github.com/dyule/rdiff" 8 | repository = "https://github.com/dyule/rdiff" 9 | readme = "README.md" 10 | keywords = ["rsync", "diff"] 11 | license = "CC0-1.0" 12 | 13 | [dependencies] 14 | rust-crypto = "^0.2" 15 | log = "0.3" 16 | byteorder = "0.5" 17 | 18 | [dev-dependencies] 19 | notify = "2.6.1" 20 | env_logger = "0.3" 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | rdiff 2 | ===== 3 | [![CC0](http://i.creativecommons.org/p/zero/1.0/88x31.png)](http://creativecommons.org/publicdomain/zero/1.0/) 4 | [![Build Status](https://travis-ci.org/dyule/rdiff.svg?branch=master)](https://travis-ci.org/dyule/rdiff) 5 | [![Crates.io](https://img.shields.io/crates/v/rdiff.svg?maxAge=2592000)](https://crates.io/crates/rdiff) 6 | 7 | rdiff is a package for comparing versions of a file over time. It is written is Rust, and expects version > 1.17. 8 | 9 | To the extent possible under law, rdiff contributors have waived all copyright and related or neighboring rights to rdiff. 10 | 11 | [Documentation](https://dyule.github.io/rdiff/rdiff/) 12 | 13 | # Usage 14 | 15 | in `Cargo.toml`: 16 | 17 | ``` toml 18 | [dependencies] 19 | rdiff = "0.1" 20 | ``` 21 | 22 | In your rust file (taken from [examples/predefined.rs](examples/predefined.rs)): 23 | 24 | ``` rust 25 | extern crate rdiff; 26 | 27 | use rdiff::BlockHashes; 28 | use std::fs::File; 29 | 30 | pub fn example() { 31 | let file = File::open("examples/filev1.txt").unwrap(); 32 | let mut hashes = BlockHashes::new(file, 8).unwrap(); 33 | let file = File::open("examples/filev2.txt").unwrap(); 34 | let difference = hashes.diff_and_update(file).unwrap(); 35 | println!("Inserts: {:?}", difference.inserts().collect::>()); 36 | println!("Deletes: {:?}", difference.deletes().collect::>()); 37 | } 38 | ``` 39 | 40 | This will output 41 | ``` 42 | Inserts: [Insert(8, 'widely understood '), Insert(90, ' absolutely'), Insert(381, 'hters, or sons if the family was progressive.\n'), Insert(572, 'not, even though he had been following the news quite closely.\n\n'), Insert(734, '\nMr. Ben')] 43 | Deletes: [Delete(34, 24), Delete(428, 8), Delete(638, 8), Delete(742, 8)] 44 | ``` 45 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -o errexit -o nounset 4 | 5 | if [ "$TRAVIS_BRANCH" != "master" ] 6 | then 7 | echo "This commit was made against the $TRAVIS_BRANCH and not the master! No deploy!" 8 | exit 0 9 | fi 10 | 11 | rev=$(git rev-parse --short HEAD) 12 | 13 | cd target/docs 14 | 15 | git init 16 | git config user.name "Daniel Yule" 17 | git config user.email "daniel.yule@gmail.com" 18 | 19 | git remote add upstream "https://$GH_TOKEN@github.com/dyule/rdiff.git" 20 | git fetch upstream 21 | git reset upstream/gh-pages 22 | 23 | touch . 24 | 25 | git add -A . 26 | git commit -m "rebuild pages at ${rev}" 27 | git push -q upstream HEAD:gh-pages 28 | -------------------------------------------------------------------------------- /examples/file_watcher.rs: -------------------------------------------------------------------------------- 1 | extern crate notify; 2 | extern crate rdiff; 3 | 4 | use notify::{RecommendedWatcher, Watcher, op}; 5 | use std::sync::mpsc::channel; 6 | use std::fs; 7 | use std::io; 8 | use rdiff::BlockHashes; 9 | 10 | macro_rules! try_io { 11 | ($e: expr) => ({ 12 | match $e { 13 | Ok(v) => v, 14 | Err(e) => return Err(notify::Error::Io(e)) 15 | } 16 | }); 17 | } 18 | 19 | fn create_hashes(file: &str) -> io::Result { 20 | let file = try!(fs::File::open(file)); 21 | BlockHashes::new(file, 8) 22 | } 23 | 24 | fn update_hashes(hashes: &mut BlockHashes, file: &str) -> io::Result<()> { 25 | let file = try!(fs::File::open(file)); 26 | let diffs = try!(hashes.diff_and_update(file)); 27 | if diffs.inserts().len() != 0 || diffs.deletes().len() != 0 { 28 | println!("{:?}", diffs); 29 | } 30 | Ok(()) 31 | } 32 | 33 | fn watch(file_name: &str) -> notify::Result<()> { 34 | 35 | let mut hashes = try_io!(create_hashes(file_name)); 36 | // Create a channel to receive the events. 37 | let (tx, rx) = channel(); 38 | 39 | // Automatically select the best implementation for your platform. 40 | // You can also access each implementation directly e.g. INotifyWatcher. 41 | let mut watcher: RecommendedWatcher = try!(Watcher::new(tx)); 42 | 43 | // Add a path to be watched. All files and directories at that path and 44 | // below will be monitored for changes. 45 | try!(watcher.watch(file_name)); 46 | 47 | // This is a simple loop, but you may want to use more complex logic here, 48 | // for example to handle I/O. 49 | loop { 50 | match rx.recv() { 51 | Ok(notify::Event{ path: Some(_),op:Ok(operation) }) => { 52 | if operation == op::WRITE { 53 | try_io!(update_hashes(&mut hashes, file_name)); 54 | } 55 | }, 56 | Err(e) => println!("watch error {}", e), 57 | _ => () 58 | } 59 | } 60 | } 61 | 62 | fn main() { 63 | let args:Vec<_> = std::env::args().collect(); 64 | if args.len() != 2 { 65 | println!("Usage: file_watcher "); 66 | return; 67 | } 68 | 69 | if let Err(err) = watch(&args[1]) { 70 | println!("Error! {:?}", err) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /examples/filev1.txt: -------------------------------------------------------------------------------- 1 | It is a truth universally acknowledged, that a single man in possession of 2 | a good fortune, must be in want of a wife. 3 | 4 | However little known the feelings or views of such a man may be on his 5 | first entering a neighbourhood, this truth is so well fixed in the minds 6 | of the surrounding families, that he is considered the rightful property 7 | of some one or other of their daughters. 8 | 9 | "My dear Mr. Bennet," said his lady to him one day, "have you heard that 10 | Netherfield Park is let at last?" 11 | 12 | Mr. Bennet replied that he had not. 13 | 14 | "But it is," returned she; "for Mrs. Long has just been here, and she 15 | told me all about it." 16 | 17 | Mr. Bennet made no answer. 18 | 19 | "Do you not want to know who has taken it?" cried his wife impatiently. 20 | 21 | "_You_ want to tell me, and I have no objection to hearing it." 22 | 23 | This was invitation enough. 24 | -------------------------------------------------------------------------------- /examples/filev2.txt: -------------------------------------------------------------------------------- 1 | It is a widely understood truth unthat a single man in possession of 2 | a good fortune, must absolutely be in want of a wife. 3 | 4 | However little known the feelings or views of such a man may be on his 5 | first entering a neighbourhood, this truth is so well fixed in the minds 6 | of the surrounding families, that he is considered the rightful property 7 | of some one or other of their daughters, or sons if the family was progressive. 8 | 9 | "My dear Mr. Bennet," said his lady to him one day, "have you heard that 10 | Netherfield Park is let at last?" 11 | 12 | Mr. Bennet replied that he had not, even though he had been following the news quite closely. 13 | 14 | "But it is," returned she; "for Mrs. Long has just been here, and she 15 | told me all about it." 16 | 17 | Mr. Bennet made no answer. 18 | 19 | "Do you not want to know who has taken it?" cried his wife impatiently. 20 | 21 | "_You_ want to tell me, and I have no objection to hearing it." 22 | 23 | This was invitation enough. 24 | -------------------------------------------------------------------------------- /examples/predefined.rs: -------------------------------------------------------------------------------- 1 | extern crate rdiff; 2 | 3 | use rdiff::BlockHashes; 4 | use std::fs::File; 5 | 6 | pub fn main() { 7 | let file = File::open("examples/filev1.txt").unwrap(); 8 | let mut hashes = BlockHashes::new(file, 8).unwrap(); 9 | let file = File::open("examples/filev2.txt").unwrap(); 10 | let difference = hashes.diff_and_update(file).unwrap(); 11 | println!("Inserts: {:?}", difference.inserts().collect::>()); 12 | println!("Deletes: {:?}", difference.deletes().collect::>()); 13 | } 14 | -------------------------------------------------------------------------------- /src/hashing.rs: -------------------------------------------------------------------------------- 1 | use super::{BlockHashes, Diff, Window}; 2 | use std::io::{Read, Write, Result}; 3 | use std::collections::HashMap; 4 | use crypto::md5::Md5; 5 | use crypto::digest::Digest; 6 | use byteorder::{NetworkEndian, ByteOrder}; 7 | 8 | /// Implements a weak, but easy to calculate hash for a block of bytes 9 | /// 10 | /// The hash is comprised of two bytes. The first is the sum of the bytes 11 | // in the block, the second is the sum of the sum of the bytes in the block 12 | struct RollingHash { 13 | a: u16, 14 | b: u16, 15 | block_size: u16 16 | } 17 | 18 | impl RollingHash { 19 | 20 | /// Creates a new rolling hash over the bytes in `initial_data`. 21 | /// It will be assumed that the size of blocks will be the size of the initial data. 22 | pub fn new<'a, I: Iterator>(initial_data: I) -> RollingHash { 23 | 24 | let mut a:u16 = 0; 25 | let mut b:u16 = 0; 26 | let mut block_size: u16 = 0; 27 | for byte in initial_data { 28 | a = a.wrapping_add(*byte as u16); 29 | b = b.wrapping_add(a); 30 | block_size += 1; 31 | } 32 | RollingHash { 33 | a: a, 34 | b: b, 35 | block_size: block_size 36 | } 37 | } 38 | 39 | /// Gets the hash as it currently stands 40 | pub fn get_hash(&self) -> u32 { 41 | return (self.b as u32) << 16 | self.a as u32; 42 | } 43 | 44 | /// Roll the has forward one byte. This function will remove `old_byte` from its calculation 45 | /// and add `new_byte` if it exists. 46 | /// To get the hash afterwards, use `get_hash()`. 47 | pub fn roll_hash(&mut self, new_byte: Option, old_byte: u8) { 48 | self.a = self.a.wrapping_sub(old_byte as u16); 49 | self.b = self.b.wrapping_sub(((old_byte as u16).wrapping_mul(self.block_size as u16)) as u16); 50 | if let Some(new_byte) = new_byte { 51 | self.a = self.a.wrapping_add(new_byte as u16); 52 | self.b = self.b.wrapping_add(self.a); 53 | } else { 54 | self.block_size -= 1 55 | } 56 | } 57 | 58 | /// Calculate the hash of a collection of bytes. 59 | pub fn hash_buffer(buffer: &[u8]) -> u32 { 60 | let mut a:u16 = 0; 61 | let mut b:u16 = 0; 62 | for byte in buffer { 63 | a = a.wrapping_add(*byte as u16); 64 | b = b.wrapping_add(a); 65 | 66 | } 67 | (b as u32) << 16 | a as u32 68 | } 69 | } 70 | 71 | 72 | impl BlockHashes { 73 | 74 | /// Create a new BlockHash based on the data in data_source. This method 75 | /// will create a hash for every `block_size` set of bytes in `data_source`. 76 | /// 77 | /// To see the difference after `data_source` has been updated, use `diff_and_update()` 78 | /// 79 | /// This method returns an error when there is a problem reading from `data_source`. 80 | pub fn new(mut data_source: R, block_size: usize) -> Result { 81 | let mut block = vec![0;block_size]; 82 | let mut hashes = HashMap::new(); 83 | let mut block_index = 0; 84 | let mut strong_hasher = Md5::new(); 85 | let mut total_size = 0; 86 | 87 | let mut read_size = try!(data_source.read(&mut block)); 88 | while read_size > 0 { 89 | let weak_hash = RollingHash::hash_buffer(&block[..read_size]); 90 | 91 | let mut strong_hash:[u8;16] = [0;16]; 92 | strong_hasher.reset(); 93 | strong_hasher.input(&block[..read_size]); 94 | strong_hasher.result(&mut strong_hash); 95 | 96 | hashes.entry(weak_hash).or_insert(Vec::new()).push((block_index, strong_hash)); 97 | 98 | block_index += 1; 99 | total_size += read_size; 100 | read_size = try!(data_source.read(&mut block)); 101 | } 102 | Ok(BlockHashes { 103 | hashes, 104 | block_size, 105 | file_size: total_size 106 | }) 107 | } 108 | 109 | /// Construct a new block hash for a file that was just created 110 | pub fn empty(block_size: usize) -> BlockHashes { 111 | BlockHashes { 112 | hashes: HashMap::new(), 113 | block_size: block_size, 114 | file_size: 0 115 | } 116 | } 117 | 118 | /// Compare the data in `new_data` with the hashes computed from either 119 | /// the most recent call to `diff_and_update()` or when this `BlockHashes` was updated 120 | /// 121 | /// # Example 122 | /// 123 | /// ``` 124 | /// use rdiff::BlockHashes; 125 | /// use std::io::Cursor; 126 | /// let mut hashes = BlockHashes::new(Cursor::new("It was the best of times"), 6).unwrap(); 127 | /// let diff = hashes.diff_and_update(Cursor::new("It was not the best of things")).unwrap(); 128 | /// // prints (6, ' not') and (22, ' things')) 129 | /// for insert in diff.inserts() { 130 | /// println!("{:?}", insert); 131 | /// } 132 | /// // prints (29, 6) 133 | /// for delete in diff.deletes() { 134 | /// println!("{:?}", delete); 135 | /// } 136 | /// assert_eq!("It was not the best of things", 137 | /// diff.apply_to_string("It was the best of times").unwrap()); 138 | /// ``` 139 | pub fn diff_and_update(&mut self, new_data: R) -> Result { 140 | use std::mem; 141 | let mut diffs = Diff::new(); 142 | let mut window = try!(Window::new(new_data, self.block_size)); 143 | let mut weak_hasher = RollingHash::new(window.frame().0.iter()); 144 | let mut strong_hasher = Md5::new(); 145 | let mut last_matching_block_index = -1; 146 | let mut insert_buffer = Vec::new(); 147 | let mut new_hashes = HashMap::new(); 148 | let mut current_block_index = 0; 149 | while window.frame_size() > 0 { 150 | 151 | if let Some(other_block_index) = self.check_match(&weak_hasher, &mut strong_hasher, &mut window, &mut last_matching_block_index) { 152 | //create an insert if the insert buffer has anything in it 153 | if insert_buffer.len() > 0 { 154 | // XXX with some work here, we could probably track the insert buffer as a piece of the window, which is then 155 | // moved into the diff list. 156 | diffs.add_insert(window.get_bytes_read() - insert_buffer.len(), mem::replace(&mut insert_buffer, Vec::new())); 157 | } 158 | //create a delete if the index is more than it should be 159 | if other_block_index as i32 > last_matching_block_index + 1 { 160 | diffs.add_delete(window.get_bytes_read(), self.block_size * (other_block_index as i32 - last_matching_block_index - 1) as usize) 161 | } 162 | last_matching_block_index = other_block_index as i32; 163 | //advance forward an entire block's worth 164 | for i in 0..self.block_size { 165 | if window.on_boundry() { 166 | // This might iterate past the end of the data. If so, bail out 167 | if window.frame_size() == 0 { 168 | break; 169 | } 170 | let mut strong_hash:[u8;16] = [0;16]; 171 | // If the boundry happened where we saw a match, we can skip the 172 | // strong hashing, because it was already done during the 173 | // match checking 174 | if i != 0 { 175 | let (front, back) = window.frame(); 176 | strong_hasher.reset(); 177 | strong_hasher.input(front); 178 | strong_hasher.input(back); 179 | } 180 | strong_hasher.result(&mut strong_hash); 181 | 182 | new_hashes.entry(weak_hasher.get_hash()).or_insert(Vec::new()).push((current_block_index, strong_hash)); 183 | current_block_index += 1; 184 | } 185 | let (tail, head) = try!(window.advance()); 186 | if let Some(tail) = tail { 187 | weak_hasher.roll_hash(head, tail); 188 | } else { 189 | break; 190 | } 191 | } 192 | } else { 193 | //advance forward one byte 194 | if window.on_boundry() { 195 | // XXX There is a slight optimization possible here, where 196 | // when the weak checksum matches, but the strong one doesn't 197 | // we are re-computing the strong checksum here. 198 | let mut strong_hash:[u8;16] = [0;16]; 199 | let (front, back) = window.frame(); 200 | strong_hasher.reset(); 201 | strong_hasher.input(front); 202 | strong_hasher.input(back); 203 | strong_hasher.result(&mut strong_hash); 204 | 205 | new_hashes.entry(weak_hasher.get_hash()).or_insert(Vec::new()).push((current_block_index, strong_hash)); 206 | current_block_index += 1; 207 | } 208 | let (tail, head) = try!(window.advance()); 209 | weak_hasher.roll_hash(head, tail.unwrap()); 210 | insert_buffer.push(tail.unwrap()); 211 | } 212 | } 213 | if insert_buffer.len() > 0 { 214 | diffs.add_insert(window.get_bytes_read() - insert_buffer.len(), insert_buffer); 215 | } 216 | let old_block_count = (self.file_size + self.block_size - 1) as i32 / self.block_size as i32; 217 | if last_matching_block_index + 1 < old_block_count { 218 | diffs.add_delete(window.get_bytes_read(), (self.file_size as i32 - (last_matching_block_index + 1) * self.block_size as i32) as usize); 219 | } 220 | self.hashes = new_hashes; 221 | self.file_size = window.get_bytes_read(); 222 | Ok(diffs) 223 | } 224 | 225 | /// Checks if `data_source` has changed since the last time the hashes were updated. 226 | /// 227 | /// Returns true if `data_source` is identical to what it was when the hashes were generated, false otherwise 228 | pub fn verify_unchanged(&self, data_source: &mut R) -> Result { 229 | let mut block = vec![0;self.block_size]; 230 | let mut block_index = 0; 231 | let mut strong_hasher = Md5::new(); 232 | let mut total_size = 0; 233 | 234 | let mut read_size = try!(data_source.read(&mut block)); 235 | while read_size > 0 { 236 | let weak_hash = RollingHash::hash_buffer(&block[..read_size]); 237 | if let Some(entry) = self.hashes.get(&weak_hash) { 238 | let mut strong_hash:[u8;16] = [0;16]; 239 | strong_hasher.reset(); 240 | strong_hasher.input(&block[..read_size]); 241 | strong_hasher.result(&mut strong_hash); 242 | if !entry.contains(&(block_index, strong_hash)) { 243 | return Ok(false); 244 | } 245 | } 246 | 247 | 248 | block_index += 1; 249 | total_size += read_size; 250 | read_size = try!(data_source.read(&mut block)); 251 | } 252 | Ok(total_size == self.file_size) 253 | } 254 | 255 | /// Compress these Hashes and write to `writer`. The output can then be expanded 256 | /// back into an equivilent Hash collection using `expand_from()` 257 | pub fn compress_to(&self, writer: &mut W) -> Result<()> { 258 | 259 | let mut int_buf = [0;4]; 260 | NetworkEndian::write_u32(&mut int_buf, self.file_size as u32); 261 | try!(writer.write(&int_buf)); 262 | NetworkEndian::write_u32(&mut int_buf, self.block_size as u32); 263 | try!(writer.write(&int_buf)); 264 | let block_count = (self.file_size + self.block_size - 1) / self.block_size; 265 | let dummy_hash = [0u8;16]; 266 | let mut sequential_hashes = Vec::with_capacity(block_count); 267 | sequential_hashes.resize(block_count, (0, &dummy_hash)); 268 | for (weak_hash, entry) in self.hashes.iter() { 269 | for &(index, ref strong_hash) in entry.iter() { 270 | sequential_hashes[index] = (*weak_hash, strong_hash); 271 | } 272 | } 273 | for (weak, strong) in sequential_hashes { 274 | NetworkEndian::write_u32(&mut int_buf, weak); 275 | try!(writer.write(&int_buf)); 276 | try!(writer.write(strong)); 277 | } 278 | Ok(()) 279 | } 280 | 281 | /// Expand these hashes from previously compressed data in `reader`. The data in reader 282 | /// should have been written using `compress_to()` 283 | pub fn expand_from(reader: &mut R) -> Result { 284 | let mut int_buf = [0;4]; 285 | let mut strong_hash = [0u8;16]; 286 | try!(reader.read(&mut int_buf)); 287 | let file_size = NetworkEndian::read_u32(&mut int_buf) as usize; 288 | try!(reader.read(&mut int_buf)); 289 | let block_size = NetworkEndian::read_u32(&mut int_buf) as usize; 290 | let block_count = (file_size + block_size - 1) / block_size; 291 | // Might be an overestimate, but not by more than a few 292 | let mut hashes = HashMap::with_capacity(block_count); 293 | 294 | for block_index in 0..block_count { 295 | try!(reader.read(&mut int_buf)); 296 | let weak_hash = NetworkEndian::read_u32(&mut int_buf); 297 | try!(reader.read(&mut strong_hash)); 298 | hashes.entry(weak_hash).or_insert(Vec::new()).push((block_index, strong_hash)); 299 | } 300 | Ok(BlockHashes { 301 | file_size: file_size, 302 | block_size: block_size, 303 | hashes: hashes 304 | }) 305 | } 306 | 307 | /// Checks if the current window frame matches any existing block with an index greater than the previously matched block. 308 | /// 309 | /// Returns the index of the matching block if it does 310 | fn check_match(&self, weak_hasher: &RollingHash, mut strong_hasher: &mut Md5, mut window: &Window, last_matching_block_index: &mut i32) -> Option { 311 | if let Some(other_block_index) = self.hash_match(&weak_hasher, &mut strong_hasher, &mut window) { 312 | if other_block_index as i32 > *last_matching_block_index { 313 | return Some(other_block_index); 314 | } 315 | } 316 | None 317 | } 318 | 319 | /// Checks to see if the hash of the current window frame matches an existing hash. 320 | /// 321 | /// If so, returns the index of the matching block 322 | fn hash_match(&self, weak_hasher: &RollingHash, strong_hasher: &mut Md5, window: &Window) -> Option { 323 | let mut new_result = [0;16]; 324 | if let Some(matches) = self.hashes.get(&weak_hasher.get_hash()) { 325 | for &(index, strong_hash) in matches.iter() { 326 | strong_hasher.reset(); 327 | let (front, back) = window.frame(); 328 | strong_hasher.input(front); 329 | strong_hasher.input(back); 330 | strong_hasher.result(&mut new_result); 331 | if new_result == strong_hash { 332 | return Some(index) 333 | } 334 | } 335 | } 336 | return None 337 | } 338 | } 339 | 340 | #[cfg(test)] 341 | mod test { 342 | use super::super::{BlockHashes, Diff, Insert, Delete}; 343 | use super::{RollingHash}; 344 | use std::io::{Cursor}; 345 | use std::collections::HashMap; 346 | 347 | macro_rules! check_diff { 348 | ($start: tt | $block_size: tt | $new: tt | $(($insert_pos : tt, $insert_value: tt)),* | $(($delete_pos: tt, $delete_len: tt)),*) => { 349 | { 350 | check_diff_workaround!($start; $block_size; $new; $(($insert_pos, $insert_value)),*; $(($delete_pos, $delete_len)),*) 351 | } 352 | }; 353 | } 354 | 355 | // Caused by a bug in the implementation of the tt macro type. It currently has to be passed as an expr into another macro 356 | // or it throws a fit for no reason. See https://github.com/rust-lang/rust/issues/5846 357 | macro_rules! check_diff_workaround { 358 | ($start: expr ; $block_size: expr ; $new: expr ; $(($insert_pos : tt, $insert_value: tt)),* ; $(($delete_pos: tt, $delete_len: tt)),*) => { 359 | { 360 | let mut hashes = BlockHashes::new(Cursor::new($start), $block_size).unwrap(); 361 | let diff = hashes.diff_and_update(Cursor::new($new)).unwrap(); 362 | assert_eq!(Diff { 363 | inserts: vec![$(Insert{position: $insert_pos, data: $insert_value.bytes().collect()}),*], 364 | deletes: vec![$(Delete{position: $delete_pos, len: $delete_len}),*] 365 | }, diff); 366 | check_hashes(&hashes, $new); 367 | } 368 | }; 369 | } 370 | 371 | fn check_hashes(hashes: &BlockHashes, starting_data: &'static str) { 372 | let expected_hashes = BlockHashes::new(Cursor::new(starting_data), hashes.block_size).unwrap(); 373 | assert_eq!(hashes, &expected_hashes); 374 | } 375 | 376 | #[test] 377 | fn rolling_hash_small() { 378 | let mut hash = RollingHash::new(vec![7, 2, 9, 1, 7, 8].iter()); 379 | assert_eq!(hash.get_hash(), 0x710022); // a: 34 b: 113 380 | hash.roll_hash(Some(12), 7); // [2, 9, 1, 7, 8, 12] 381 | assert_eq!(hash.get_hash(), 0x6E0027); // a: 39 b:110 382 | hash.roll_hash(Some(1), 2); // [9, 1, 7, 8, 12, 1] 383 | assert_eq!(hash.get_hash(), 0x880026); // a: 38 b:136 384 | hash.roll_hash(None, 9); // [1, 7, 8, 12, 1] 385 | assert_eq!(hash.get_hash(), 0x52001D); // a: 29 b:82 386 | hash.roll_hash(None, 1); // [7, 8, 12, 1] 387 | assert_eq!(hash.get_hash(), 0x4D001C); // a: 28 b: 77 388 | hash.roll_hash(None, 7); // [8, 12, 1] 389 | assert_eq!(hash.get_hash(), 0x310015); // a: 21 b: 49 390 | hash.roll_hash(None, 8); // [12, 1] 391 | assert_eq!(hash.get_hash(), 0x19000D); // a: 13 b: 25 392 | hash.roll_hash(None, 12); // [1] 393 | assert_eq!(hash.get_hash(), 0x10001); // a: 1 b: 1 394 | hash.roll_hash(None, 1); // [] 395 | assert_eq!(hash.get_hash(), 0x0); // a: 0 b: 0 396 | } 397 | #[test] 398 | fn rolling_hash_big() { 399 | let mut numbers = Vec::new(); 400 | for i in 0..4000 { 401 | numbers.push((200 + i * i) as u8); 402 | } 403 | let mut hash = RollingHash::new(numbers.iter()); 404 | assert_eq!(hash.get_hash(), 0x1880A9F0); // a: A9f0 b: 1880 405 | hash.roll_hash(Some(237), 200); 406 | assert_eq!(hash.get_hash(), 0x8D95AA15); // a: AA15 b: 8D95 407 | hash.roll_hash(None, 201); 408 | assert_eq!(hash.get_hash(), 0x48F5A94C) // a: A94C b: 48F5 409 | 410 | } 411 | 412 | #[test] 413 | fn hash_blocks_init() { 414 | let test_string = "It was the best of times, it was the worst of times"; 415 | // Blocks: 416 | // It was t : 202900156 - ad721d63c3dabb32cc9096824071a919 417 | // he best : 211944123 - 2712A22DDA5585758AEBC4D298142F8B 418 | // of times : 225313559 - 3160523454fa59e4c14badf9435d6212 419 | // , it was : 169083540 - 5fa8fa659adc38997bb365f17648ea8a 420 | // the wor : 197788377 - d7aad88e1f5098bdae1da2e564749322 421 | // st of ti : 217580249 - 1c64811671e43ea5f82da6ffc4a5bbee 422 | // mes : 42205509 - d2db8a610f8c7c0785d2d92a6e8c450e 423 | let hashes = BlockHashes::new(Cursor::new(test_string), 8).unwrap(); 424 | 425 | let mut expected_hashes:HashMap> = HashMap::new(); 426 | expected_hashes.insert(202900156, vec![(0, [0xad, 0x72, 0x1d, 0x63, 0xc3, 0xda, 0xbb, 0x32, 0xcc, 0x90, 0x96, 0x82, 0x40, 0x71, 0xa9, 0x19])]); 427 | expected_hashes.insert(211944123, vec![(1, [0x27, 0x12, 0xA2, 0x2D, 0xDA, 0x55, 0x85, 0x75, 0x8A, 0xEB, 0xC4, 0xD2, 0x98, 0x14, 0x2F, 0x8B])]); 428 | expected_hashes.insert(225313559, vec![(2, [0x31, 0x60, 0x52, 0x34, 0x54, 0xfa, 0x59, 0xe4, 0xc1, 0x4b, 0xad, 0xf9, 0x43, 0x5d, 0x62, 0x12])]); 429 | expected_hashes.insert(169083540, vec![(3, [0x5f, 0xa8, 0xfa, 0x65, 0x9a, 0xdc, 0x38, 0x99, 0x7b, 0xb3, 0x65, 0xf1, 0x76, 0x48, 0xea, 0x8a])]); 430 | expected_hashes.insert(197788377, vec![(4, [0x6B, 0xF2, 0x9B, 0x2C, 0xD5, 0x03, 0x3E, 0xFC, 0x07, 0x9C, 0x2E, 0xA1, 0x27, 0xFD, 0x7B, 0x13])]); 431 | expected_hashes.insert(217580249, vec![(5, [0x1c, 0x64, 0x81, 0x16, 0x71, 0xe4, 0x3e, 0xa5, 0xf8, 0x2d, 0xa6, 0xff, 0xc4, 0xa5, 0xbb, 0xee])]); 432 | expected_hashes.insert(42205509, vec![(6, [0xd2, 0xdb, 0x8a, 0x61, 0x0f, 0x8c, 0x7c, 0x07, 0x85, 0xd2, 0xd9, 0x2a, 0x6e, 0x8c, 0x45, 0x0e])]); 433 | 434 | assert_eq!(hashes, BlockHashes { 435 | hashes: expected_hashes, 436 | block_size: 8, 437 | file_size: 51 438 | }); 439 | } 440 | 441 | 442 | #[test] 443 | fn empty_hashes() { 444 | check_diff!("" | 445 | 16 | 446 | "The New Data" | 447 | (0, "The New Data") | 448 | 449 | ); 450 | } 451 | 452 | #[test] 453 | fn no_change() { 454 | check_diff!("Same Data" | 455 | 8 | 456 | "Same Data" | 457 | | 458 | 459 | ); 460 | } 461 | 462 | #[test] 463 | fn multiple_overwrites() { 464 | check_diff!("" | 465 | 8 | 466 | "New Data" | 467 | (0, "New Data")| 468 | 469 | ); 470 | check_diff!("New Data" | 471 | 8 | 472 | "Other Stuff" | 473 | (0, "Other Stuff")| 474 | (11, 8) 475 | ); 476 | check_diff!("Other Stuff" | 477 | 8 | 478 | "More Things" | 479 | (0, "More Things")| 480 | (11, 11) 481 | ); 482 | } 483 | 484 | #[test] 485 | fn insertions() { 486 | check_diff!("Starting data is a long sentence" | 487 | 8 | 488 | "Starting data is now a long sentence" | 489 | (16, " now") | 490 | 491 | ); 492 | check_diff!("Starting data is a long sentence" | 493 | 8 | 494 | "This Starting data is a long sentence" | 495 | (0, "This ") | 496 | 497 | ); 498 | check_diff!("Starting data is a long sentence" | 499 | 8 | 500 | "Starting data is a long sentence. With more" | 501 | (32, ". With more") | 502 | 503 | ); 504 | check_diff!("Starting data is a long sentence" | 505 | 8 | 506 | "This Starting data is now a long sentence. With more" | 507 | (0, "This "), 508 | (21, " now"), 509 | (41, ". With more") | 510 | 511 | ); 512 | } 513 | 514 | #[test] 515 | fn delete_on_boundry() { 516 | check_diff!("13 chars long, no longer" | 517 | 13 | 518 | "13 chars long" | 519 | | 520 | (13, 11) 521 | ); 522 | } 523 | 524 | #[test] 525 | fn deletions() { 526 | check_diff!("Starting data is a long sentence" | 527 | 8 | 528 | "Starting a long sentence" | 529 | | 530 | (8, 8) 531 | ); 532 | check_diff!("Starting data is a long sentence" | 533 | 8 | 534 | "Starting data is a long " | 535 | | 536 | (24, 8) 537 | ); 538 | check_diff!("Starting data is a long sentence" | 539 | 8 | 540 | " data is a long sentence" | 541 | | 542 | (0, 8) 543 | ); 544 | check_diff!("Starting data is a long sentence" | 545 | 8 | 546 | " a long " | 547 | | 548 | (0, 16), (8, 8) 549 | ); 550 | 551 | } 552 | 553 | #[test] 554 | fn insertions_and_deletions() { 555 | check_diff!("Starting data is a long sentence" | 556 | 8 | 557 | "Starting data a long sentence" | 558 | (8, " data") | 559 | (13, 8) 560 | ); 561 | check_diff!("Starting data is a long sentence" | 562 | 8 | 563 | "Starting data is a long sentenc" | 564 | (24, "sentenc")| 565 | (31, 8) 566 | ); 567 | check_diff!("Starting data is a long sentence" | 568 | 8 | 569 | "This Starting data a very long sentence" | 570 | (0, "This "), (13, " data a very long ") | 571 | (31, 16) 572 | ); 573 | 574 | } 575 | } 576 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! Finds the difference between sequential versions of files. 2 | //! 3 | //! Based on the rsync algorithm. 4 | //! The `BlockHashes` struct will find the differences between versions of the same file. 5 | //! It does this through the [`diff_and_update()`](struct.BlockHashes.html#method.diff_and_update) method. 6 | //! 7 | //! # Example 8 | //! 9 | //! ``` 10 | //! use std::io::Cursor; 11 | //! use rdiff::BlockHashes; 12 | //! 13 | //! let mut hash = BlockHashes::new(Cursor::new("The initial version"), 8).unwrap(); 14 | //! let diffs = hash.diff_and_update(Cursor::new("The next version")).unwrap(); 15 | //! println!("Diffs: {:?}", diffs); 16 | //! // Outputs "Diffs: Diff{inserts: [Insert(0, The next vers)], deletes:[Delete(13, 16)]}" 17 | //! ``` 18 | //! 19 | //! This crate also contains methods relating to finding the differences between two strings, in the [string_diff](string_diff/index.html) module. 20 | //! These methods can be used to refine the course differences found through the rsync method. 21 | 22 | #![deny(missing_docs)] 23 | extern crate crypto; 24 | extern crate byteorder; 25 | #[macro_use] 26 | extern crate log; 27 | 28 | mod window; 29 | mod hashing; 30 | pub mod string_diff; 31 | 32 | use std::collections::HashMap; 33 | use std::fs::File; 34 | use std::io::{self, Read, Write, Seek, SeekFrom}; 35 | use std::slice::Iter; 36 | use std::fmt; 37 | use std::mem; 38 | use std::string::FromUtf8Error; 39 | 40 | use byteorder::{NetworkEndian, ByteOrder}; 41 | 42 | /// Used for calculating and re-calculating the differences between two versions of the same file 43 | /// 44 | /// See the [module level documentation](index.html) for examples on how to use this 45 | #[derive(Debug, PartialEq)] 46 | pub struct BlockHashes { 47 | hashes: HashMap>, 48 | block_size: usize, 49 | file_size: usize 50 | } 51 | 52 | /// Represents an operation to insert bytes at a particular position into a file 53 | #[derive(PartialEq)] 54 | pub struct Insert { 55 | position: usize, 56 | data: Vec 57 | } 58 | 59 | /// Represents an operation to delete a certain number of bytes at a particular position in a file 60 | #[derive(PartialEq)] 61 | pub struct Delete { 62 | position: usize, 63 | len: usize 64 | } 65 | 66 | /// Represents a series of operations that were performed on a file to transform it into a new 67 | /// version. 68 | /// 69 | /// The operations are stored in file order, which means that every operation that affects 70 | /// an earlier part of the file must be stored before an operation that affects a later part. 71 | /// The diff also assumes that insert operations are performed prior to delete operations. 72 | #[derive(Debug, PartialEq)] 73 | pub struct Diff { 74 | inserts: Vec, 75 | deletes: Vec 76 | } 77 | 78 | /// A sliding window over a reader. This monatins an internal buffer read from the file, 79 | /// which can be read from at any time. 80 | struct Window { 81 | front: Vec, 82 | back: Vec, 83 | block_size: usize, 84 | offset: usize, 85 | bytes_read: usize, 86 | reader: R 87 | } 88 | 89 | impl Diff { 90 | /// Creates a new `Diff` 91 | #[inline] 92 | pub fn new() -> Diff { 93 | Diff { 94 | inserts: Vec::new(), 95 | deletes: Vec::new() 96 | } 97 | } 98 | 99 | /// Adds an insert operation into this diff. The operation must occur after 100 | /// all previously added insert operations in file order. If the operation 101 | /// can be merged with the previous operation, then it is. 102 | /// 103 | /// Consumes the data that is passed in 104 | fn add_insert(&mut self, position: usize, mut data: Vec) { 105 | if let Some(tail) = self.inserts.last_mut() { 106 | if tail.position + tail.data.len() == position { 107 | tail.data.append(&mut data); 108 | return; 109 | } 110 | } 111 | self.inserts.push(Insert::new(data, position)); 112 | } 113 | 114 | // Adds an delete operation into this diff. The operation must occur after 115 | /// all previously added insert and delete operations in file order. If the operation 116 | /// can be merged with the previous operation, then it is. 117 | fn add_delete(&mut self, position: usize, len: usize) { 118 | if let Some(tail) = self.deletes.last_mut() { 119 | if tail.position == position { 120 | tail.len += len; 121 | return; 122 | } 123 | } 124 | self.deletes.push(Delete::new(position, len)); 125 | } 126 | 127 | /// Gets an iterator over all insert operations 128 | pub fn inserts(&self) -> Iter { 129 | self.inserts.iter() 130 | } 131 | 132 | /// Gets an iterator over all delete operations 133 | pub fn deletes(&self) -> Iter { 134 | self.deletes.iter() 135 | } 136 | 137 | /// Checks if this set of diffs has any actual content 138 | pub fn is_empty(&self) -> bool { 139 | self.deletes.is_empty() && self.inserts.is_empty() 140 | } 141 | 142 | /// Applies all of the operations in the diff to the given string. 143 | /// Gives an error if the resulting string can't be represented by utf8. 144 | /// 145 | /// # Panics 146 | /// When the operations refer to positions that are not represented by the string. 147 | pub fn apply_to_string(&self, string: &str) -> Result { 148 | let mut old_bytes = string.bytes(); 149 | let mut new_bytes = Vec::new(); 150 | let mut index = 0; 151 | for insert in self.inserts() { 152 | while index < insert.position { 153 | new_bytes.push(old_bytes.next().unwrap().clone()); 154 | index += 1; 155 | } 156 | new_bytes.append(&mut insert.data.clone()); 157 | index += insert.data.len(); 158 | } 159 | while let Some(byte) = old_bytes.next() { 160 | new_bytes.push(byte); 161 | } 162 | let old_bytes = mem::replace(&mut new_bytes, Vec::new()); 163 | let mut old_bytes = old_bytes.into_iter(); 164 | index = 0; 165 | for delete in self.deletes() { 166 | while index < delete.position { 167 | new_bytes.push(old_bytes.next().unwrap()); 168 | index += 1; 169 | } 170 | for _ in 0..delete.len { 171 | old_bytes.next(); 172 | } 173 | } 174 | while let Some(byte) = old_bytes.next() { 175 | new_bytes.push(byte); 176 | } 177 | String::from_utf8(new_bytes) 178 | } 179 | 180 | /// Apply the operations in this sequence to a file. This should not be called until after 181 | /// the sequence has been integrated via [`Engine::integrate_remote`](struct.Engine.html#method.integrate_remote) 182 | /// The file must have been opened on both read and write mode (see [OpenOptions](https://doc.rust-lang.org/nightly/std/fs/struct.OpenOptions.html)). 183 | pub fn apply(&self, file: &mut File) -> io::Result<()> { 184 | let mut new_bytes = Vec::new(); 185 | try!(file.seek(SeekFrom::Start(0))); 186 | let mut old_bytes = file.try_clone().unwrap().bytes(); 187 | let mut index = 0; 188 | for insert in self.inserts.iter() { 189 | while index < insert.position { 190 | new_bytes.push(try!(old_bytes.next().unwrap()).clone()); 191 | index += 1; 192 | } 193 | new_bytes.extend_from_slice(&insert.data[..]); 194 | index += insert.data.len(); 195 | } 196 | while let Some(byte) = old_bytes.next() { 197 | new_bytes.push(try!(byte)); 198 | } 199 | let old_bytes = mem::replace(&mut new_bytes, Vec::new()); 200 | let mut old_bytes = old_bytes.into_iter(); 201 | index = 0; 202 | for delete in self.deletes.iter() { 203 | while index < delete.position { 204 | new_bytes.push(old_bytes.next().unwrap()); 205 | index += 1; 206 | } 207 | for _ in 0..delete.len { 208 | old_bytes.next(); 209 | } 210 | } 211 | while let Some(byte) = old_bytes.next() { 212 | new_bytes.push(byte); 213 | } 214 | 215 | try!(file.seek(SeekFrom::Start(0))); 216 | try!(file.set_len(new_bytes.len() as u64)); 217 | file.write_all(new_bytes.as_slice()) 218 | } 219 | 220 | /// Compress this diff and write to `writer`. The output can then be expanded 221 | /// back into an equivilent Diff using `expand_from()` 222 | pub fn compress_to(&self, writer: &mut W) -> io::Result<()> { 223 | 224 | let mut int_buf = [0;4]; 225 | NetworkEndian::write_u32(&mut int_buf, self.inserts.len() as u32); 226 | try!(writer.write(&mut int_buf)); 227 | for insert in self.inserts.iter() { 228 | try!(insert.compress_to(writer)); 229 | } 230 | NetworkEndian::write_u32(&mut int_buf, self.deletes.len() as u32); 231 | try!(writer.write(&mut int_buf)); 232 | for delete in self.deletes.iter() { 233 | try!(delete.compress_to(writer)); 234 | } 235 | Ok(()) 236 | } 237 | 238 | /// Expand this diff from previously compressed data in `reader`. The data in reader 239 | /// should have been written using `compress_to()` 240 | pub fn expand_from(reader: &mut R) -> io::Result { 241 | let mut int_buf = [0;4]; 242 | 243 | trace!("Reading insert length"); 244 | try!(reader.read_exact(&mut int_buf)); 245 | let insert_len = NetworkEndian::read_u32(&int_buf); 246 | trace!("Insert length was: {}", insert_len); 247 | let inserts = (0..insert_len).map(|_|Insert::expand_from(reader).unwrap()).collect(); 248 | trace!("Read inserts"); 249 | trace!("Reading delete length"); 250 | try!(reader.read_exact(&mut int_buf)); 251 | let delete_len = NetworkEndian::read_u32(&int_buf); 252 | trace!("Delete length was: {}", delete_len); 253 | let deletes = (0..delete_len).map(|_|Delete::expand_from(reader).unwrap()).collect(); 254 | trace!("Read deletes"); 255 | Ok(Diff { 256 | inserts: inserts, 257 | deletes: deletes 258 | }) 259 | } 260 | } 261 | 262 | impl fmt::Debug for Insert { 263 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 264 | write!(fmt, "Insert({}, '{}')", self.position, String::from_utf8_lossy(&self.data).replace('\r', "").replace('\n', "\\n")) 265 | } 266 | } 267 | 268 | impl fmt::Debug for Delete { 269 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { 270 | write!(fmt, "Delete({}, {})", self.position, self.len) 271 | } 272 | } 273 | 274 | impl Insert { 275 | /// Builds a new `Insert` from the data and position 276 | #[inline] 277 | pub fn new(data: Vec, position: usize) -> Insert { 278 | Insert { 279 | data, 280 | position, 281 | } 282 | } 283 | 284 | /// Gets the byte position of this insert operation in its file 285 | #[inline] 286 | pub fn get_position(&self) -> usize { 287 | self.position 288 | } 289 | 290 | /// Gets the data this insert operation will insert 291 | #[inline] 292 | pub fn get_data(&self) -> &Vec { 293 | &self.data 294 | } 295 | 296 | /// Compress this operation and write to `writer`. The output can then be expanded 297 | /// back into an equivilent operation using `expand_from()` 298 | pub fn compress_to(&self, writer: &mut W) -> io::Result<()> { 299 | 300 | let mut int_buf = [0;4]; 301 | NetworkEndian::write_u32(&mut int_buf, self.position as u32); 302 | try!(writer.write(&int_buf)); 303 | NetworkEndian::write_u32(&mut int_buf, self.data.len() as u32); 304 | try!(writer.write(&int_buf)); 305 | try!(writer.write(&self.data)); 306 | Ok(()) 307 | } 308 | 309 | /// Expand this operation from previously compressed data in `reader`. The data in reader 310 | /// should have been written using `compress_to()` 311 | pub fn expand_from(reader: &mut R) -> io::Result { 312 | let mut int_buf = [0;4]; 313 | try!(reader.read_exact(&mut int_buf)); 314 | let position = NetworkEndian::read_u32(&int_buf); 315 | try!(reader.read_exact(&mut int_buf)); 316 | let data_len = NetworkEndian::read_u32(&int_buf) as usize; 317 | let mut data = Vec::with_capacity(data_len); 318 | data.resize(data_len, 0); 319 | try!(reader.read_exact(&mut data)); 320 | Ok(Insert{ 321 | position: position as usize, 322 | data: data 323 | }) 324 | } 325 | 326 | } 327 | 328 | impl Delete { 329 | /// Builds a new `Delete` from a position and length 330 | #[inline] 331 | pub fn new(position: usize, len: usize) -> Delete { 332 | Delete { 333 | position, 334 | len, 335 | } 336 | } 337 | 338 | /// Gets the byte position of this delete operation in its file 339 | #[inline] 340 | pub fn get_position(&self) -> usize { 341 | self.position 342 | } 343 | 344 | /// Gets the length in bytes of this delete operation 345 | #[inline] 346 | pub fn get_length(&self) -> usize { 347 | self.len 348 | } 349 | 350 | /// Compress this operation and write to `writer`. The output can then be expanded 351 | /// back into an equivilent operation using `expand_from()` 352 | pub fn compress_to(&self, writer: &mut W) -> io::Result<()> { 353 | 354 | let mut int_buf = [0;4]; 355 | NetworkEndian::write_u32(&mut int_buf, self.position as u32); 356 | try!(writer.write(&int_buf)); 357 | NetworkEndian::write_u32(&mut int_buf, self.len as u32); 358 | try!(writer.write(&int_buf)); 359 | Ok(()) 360 | } 361 | 362 | /// Expand this operation from previously compressed data in `reader`. The data in reader 363 | /// should have been written using `compress_to()` 364 | pub fn expand_from(reader: &mut R) -> io::Result { 365 | let mut int_buf = [0;4]; 366 | try!(reader.read_exact(&mut int_buf)); 367 | let position = NetworkEndian::read_u32(&int_buf); 368 | try!(reader.read_exact(&mut int_buf)); 369 | let len = NetworkEndian::read_u32(&int_buf); 370 | Ok(Delete{ 371 | position: position as usize, 372 | len: len as usize, 373 | }) 374 | } 375 | 376 | } 377 | 378 | #[cfg(test)] 379 | mod test { 380 | use super::Diff; 381 | 382 | 383 | 384 | 385 | #[test] 386 | fn applying_diff_to_string() { 387 | let string = "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense."; 388 | let mut diff = Diff::new(); 389 | diff.add_insert(2, vec![115]); // 's' 390 | diff.add_insert(37, vec![116, 121]); //'ty' 391 | diff.add_insert(98, vec![97, 98]); // ab 392 | diff.add_insert(253, vec![109]); // m 393 | diff.add_delete(35, 1); // 'u' 394 | diff.add_delete(181, 34); 395 | diff.add_delete(219, 1); 396 | let result = diff.apply_to_string(string).unwrap(); 397 | assert_eq!(result, "Mrs. and Mrs. Dursley, of number forty, Privet Drive, were proud to say that they were perfectly abnormal, thank you very much. They were the last people you'd expect to be involved, because they just didn't hold with much nonsense.".to_string()); 398 | } 399 | } 400 | -------------------------------------------------------------------------------- /src/string_diff.rs: -------------------------------------------------------------------------------- 1 | //! Used for finding the minimal set of operations to transform one string into another. 2 | //! 3 | //! The primary function of this module is [find diff](fn.find_diff.html). 4 | use std::mem; 5 | use std::cmp::max; 6 | use super::{Diff}; 7 | 8 | 9 | /// Finds the difference on a character by character level between two strings 10 | /// 11 | /// Uses the Hirschberg algorithm (doi: [10.1145/360825.360861](http://dx.doi.org/10.1145/360825.360861)) 12 | /// which operates in `O(x * y)` time and `O(y)` space. The algorithm finds the minimal set of operations 13 | /// that will transform 'old' into 'new'. The 'weight' of each operation is determined by the `scorer.` 14 | /// For more details about weighting, see the [OperationScore](trait.OperationScore.html) documentation. 15 | /// 16 | /// The operations in the returned `Diff `are presented in file order, with offsets assuming the 17 | /// previous operations have already been performed. Furthermore, the inserts are assumed to 18 | /// be performed prior to the deletes. 19 | /// 20 | /// # Example 21 | /// 22 | /// ``` 23 | /// use rdiff::string_diff::{find_diff, EditDistance}; 24 | /// // Find the difference between meadow and yellowing using the edit distance as the weighting. 25 | /// let diff = find_diff("meadow", "yellowing", &EditDistance{}); 26 | /// // prints (0, 'y'), (3, 'll') and (9, 'ing') 27 | /// for insert in diff.inserts() { 28 | /// println!("{:?}", insert); 29 | /// } 30 | /// // prints (1, 1) and (4, 2) 31 | /// for delete in diff.deletes() { 32 | /// println!("{:?}", delete); 33 | /// } 34 | /// assert_eq!("yellowing", diff.apply_to_string("meadow").unwrap()); 35 | /// ``` 36 | pub fn find_diff(old: &str, new: &str, scorer: &S) -> Diff { 37 | let mut diff = Diff::new(); 38 | let mut insert_index = 0; 39 | let mut delete_index = 0; 40 | let old_rev = old.chars().rev().collect::(); 41 | let new_rev = new.chars().rev().collect::(); 42 | hirschberg(old, new, &old_rev, &new_rev, scorer, &mut diff, &mut insert_index, &mut delete_index); 43 | diff 44 | } 45 | 46 | /// Handles updating the diff and relevant indexes when inserting a string 47 | /// Needed because the string must be converted to bytes before it can be used in the diff 48 | macro_rules! do_insert { 49 | ($s: expr, $index: expr, $diff: expr) => ( 50 | { 51 | let bytes = $s.bytes().collect:: >(); 52 | let byte_len = bytes.len(); 53 | $diff.add_insert(*$index, bytes); 54 | *$index += byte_len; 55 | } 56 | ); 57 | } 58 | 59 | /// Handles updating the diff and relevant indexes when deleting a suvstring 60 | /// Needed because the string must be converted to bytes before it can be used in the diff 61 | macro_rules! do_delete { 62 | ($length: expr, $delete_index: expr, $insert_index: expr, $diff: expr) => ( 63 | { 64 | $diff.add_delete(*$insert_index - *$delete_index, $length); 65 | *$delete_index += $length; 66 | *$insert_index += $length; 67 | } 68 | ); 69 | } 70 | 71 | /// Uses the Hirschberg algorithm to calculate the optimal set of operations to transform 'old' into 'new'. 72 | /// The only parameters that are input are 'old', 'new' and `scorer`. `x_rev` and `y_rev` are just 73 | /// cached so that 'old' and 'new' don't need to be reversed for every recursion of the algorithm. 74 | /// `diff` is the output of the algorithm and `insert_index` and `delete_index` are simply intermediate state 75 | /// being passed around. 76 | fn hirschberg(old: &str, new: &str, old_rev: &str, new_rev: &str, scorer: &S, diff: &mut Diff, insert_index: &mut usize, delete_index: &mut usize) { 77 | trace!("'{}' ({}) '{}' ({})", old, old_rev, new, new_rev); 78 | // We're going to use these lengths over and over again, we might as well cache them. 79 | let old_len = old.len(); 80 | let new_len = new.len(); 81 | 82 | // If one of the two strings is 0, then it's trvial to transform one into the other 83 | if old_len == 0 { 84 | do_insert!(new, insert_index, diff); 85 | } else if new_len == 0 { 86 | do_delete!(old_len, delete_index, insert_index, diff); 87 | } 88 | // If old is legnth 1, then there are two cases: 89 | else if old_len == 1 { 90 | let old_char = old.chars().next().unwrap(); 91 | match new.chars().position(|c| c == old_char) { 92 | // Either new contains old, in which case 93 | Some(position) => { 94 | // We insert whatever is on the left of old in new 95 | if position > 0 { 96 | do_insert!(new[..position], insert_index, diff); 97 | } 98 | *insert_index += 1; 99 | // and we insert whatever is on the right of old in new 100 | if new_len - position > 1 { 101 | do_insert!(new[position + 1..], insert_index, diff); 102 | } 103 | } None => { 104 | //or new does not contain old, in which case 105 | // we simply delete old and insert new 106 | do_insert!(new, insert_index, diff); 107 | do_delete!(1, delete_index, insert_index, diff); 108 | } 109 | } 110 | } 111 | // If new is length 1, then there are two cases: 112 | else if new_len == 1 { 113 | let new_char = new.chars().next().unwrap(); 114 | match old.chars().position(|c| c == new_char) { 115 | // either old contains new, in which case 116 | Some(position) => { 117 | // We delete everything in old to the left of new 118 | if position > 0 { 119 | do_delete!(position, delete_index, insert_index, diff); 120 | } 121 | *insert_index += 1; 122 | // and we delete everything in old to the right of new 123 | if old_len - position > 1 { 124 | let delete_len = old_len - position - 1; 125 | do_delete!(delete_len, delete_index, insert_index, diff); 126 | } 127 | } None => { 128 | // or old does not contain new, in which case we simply insert new and delete 129 | // everything that was previously in old 130 | do_insert!(new, insert_index, diff); 131 | do_delete!(old_len, delete_index, insert_index, diff); 132 | } 133 | } 134 | } else { 135 | // If it's not trivial, then we recurse until it is. 136 | // We begin bnew dividing old in half. 137 | let old_mid = old_len / 2; 138 | // We then find the index in new where splitting the string will give us the 139 | // highest possible score. This index is the point where the trace of the edit 140 | // operations performed is guaranteed to cross. 141 | let score_l = nw_score(&old[..old_mid], new, scorer); 142 | let score_r = nw_score(&old_rev[..old_len - old_mid], new_rev, scorer); 143 | let new_mid = score_l.iter() 144 | .zip(score_r.iter().rev()) 145 | .map(|(l, r)| l + r) 146 | .zip(0..new_len + 1).max().unwrap().1; 147 | // We then recurse on the left side of old and new 148 | hirschberg(&old[..old_mid], &new[..new_mid], &old_rev[old_len - old_mid..], &new_rev[new_len - new_mid..], scorer, diff, insert_index, delete_index); 149 | // and the right side of old and new 150 | hirschberg(&old[old_mid..], &new[new_mid..], &old_rev[..old_len - old_mid], &new_rev[..new_len - new_mid], scorer, diff, insert_index, delete_index); 151 | 152 | 153 | } 154 | 155 | } 156 | 157 | /// Used to calculate the score for each operation that 158 | /// will be performed. The score can be static, or it can 159 | /// vary based on which character is being deleted inserted or substituted. 160 | /// It is highly recommended to inline the implementation of these characters 161 | pub trait OperationScore { 162 | /// The score for inserting character `c` into the string 163 | fn insert_score(&self, c: char) -> i32; 164 | /// The score for deleting character `c` from the string 165 | fn delete_score(&self, c: char) -> i32; 166 | /// The score for replacing character `old` with character `new` 167 | fn substitution_score(&self, old: char, new: char) -> i32; 168 | /// The score for when a character is one string matches the character in the other string 169 | fn match_score(&self, c: char) -> i32; 170 | } 171 | 172 | /// Used as the classiscal definition of edit distance. 173 | /// 174 | /// That is: 175 | /// 176 | /// * Insert is cost -1 177 | /// * Delete is cost -1 178 | /// * Substitution is cost -2 (an insert + a delete) 179 | /// * Matching is cost 0 180 | pub struct EditDistance; 181 | 182 | impl OperationScore for EditDistance { 183 | #[inline] 184 | fn insert_score(&self, _: char) -> i32 { 185 | -1 186 | } 187 | 188 | #[inline] 189 | fn delete_score(&self, _: char) -> i32 { 190 | -1 191 | } 192 | 193 | #[inline] 194 | fn substitution_score(&self, _: char, _: char) -> i32 { 195 | -2 196 | } 197 | 198 | #[inline] 199 | fn match_score(&self, _: char) -> i32 { 200 | 0 201 | } 202 | } 203 | 204 | /// Calculate the score based on the Needleman-Wunsch algorithm. This algorithm 205 | /// calculates the cost of transforming string 'old' into string 'new' using operation scoring 206 | /// given by `scorer`. 207 | /// 208 | /// It operates by iteratively generating the score for progressively longer 209 | /// substrings of 'old' and 'new'. The result is a vector of the transformation score 210 | /// from 'old' to a substring of length `i` of 'new' where `i` is the index of an element in 211 | /// the resulting vector. 212 | fn nw_score(old: &str, new: &str, scorer: &S) -> Vec { 213 | 214 | trace!("nw_score for '{}' - '{}'", old, new); 215 | let row_len = new.len() + 1; 216 | let mut last_row = Vec::with_capacity(row_len); 217 | let mut this_row = Vec::with_capacity(row_len); 218 | let mut total_insert = 0; 219 | last_row.push(0); 220 | for new_char in new.chars() { 221 | total_insert += scorer.insert_score(new_char); 222 | last_row.push(total_insert); 223 | } 224 | trace!("{:?}", last_row); 225 | for old_char in old.chars() { 226 | this_row.push(last_row[0] + scorer.delete_score(old_char)); 227 | for (new_index, new_char) in new.chars().enumerate() { 228 | let score_sub = last_row[new_index] + if old_char == new_char { 229 | scorer.match_score(old_char) 230 | } else { 231 | scorer.substitution_score(old_char, new_char) 232 | }; 233 | let score_del = last_row[new_index + 1] + scorer.delete_score(old_char); 234 | let score_ins = this_row[new_index] + scorer.insert_score(new_char); 235 | this_row.push(max(max(score_sub, score_del), score_ins)) 236 | } 237 | trace!("{:?}", this_row); 238 | last_row = mem::replace(&mut this_row, Vec::with_capacity(row_len)); 239 | } 240 | last_row 241 | 242 | } 243 | 244 | #[cfg(test)] 245 | mod test { 246 | extern crate env_logger; 247 | use super::{nw_score, find_diff, EditDistance, OperationScore}; 248 | use super::super::{Insert, Delete, Diff}; 249 | 250 | struct ExampleScores; 251 | 252 | macro_rules! check_diff { 253 | ($start: tt | $new: tt | $scorer: tt | $(($insert_pos : tt, $insert_value: tt)),* | $(($delete_pos: tt, $delete_len: tt)),*) => { 254 | { 255 | check_diff_workaround!($start; $new; $scorer; $(($insert_pos, $insert_value)),*; $(($delete_pos, $delete_len)),*) 256 | } 257 | }; 258 | } 259 | 260 | // Caused by a bug in the implementation of the tt macro type. It currently has to be passed as an expr into another macro 261 | // or it throws a fit for no reason. See https://github.com/rust-lang/rust/issues/5846 262 | macro_rules! check_diff_workaround { 263 | ($start: expr ; $new: expr ; $scorer: expr; $(($insert_pos : tt, $insert_value: tt)),* ; $(($delete_pos: tt, $delete_len: tt)),*) => { 264 | { 265 | let diff = find_diff($start, $new, &$scorer); 266 | assert_eq!(Diff { 267 | inserts: vec![$(Insert{position: $insert_pos, data: $insert_value.bytes().collect()}),*], 268 | deletes: vec![$(Delete{position: $delete_pos, len: $delete_len}),*] 269 | }, diff); 270 | assert_eq!(diff.apply_to_string($start).unwrap(), $new.to_string()); 271 | } 272 | }; 273 | } 274 | 275 | // From the wikipedia example at https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm 276 | impl OperationScore for ExampleScores { 277 | #[inline] 278 | fn insert_score(&self, _: char) -> i32 { 279 | -2 280 | } 281 | 282 | #[inline] 283 | fn delete_score(&self, _: char) -> i32 { 284 | -2 285 | } 286 | 287 | #[inline] 288 | fn substitution_score(&self, _: char, _: char) -> i32 { 289 | -1 290 | } 291 | 292 | #[inline] 293 | fn match_score(&self, _: char) -> i32 { 294 | 2 295 | } 296 | } 297 | 298 | #[test] 299 | fn score() { 300 | assert_eq!(nw_score("ACGC", "CGTAT", &EditDistance{}), vec![-4, -3, -2, -3, -4, -5]); 301 | assert_eq!(nw_score("AGTA", "TATGC", &EditDistance{}), vec![-4, -3, -2, -3, -4, -5]); 302 | 303 | assert_eq!(nw_score("ACGC", "CGTAT", &ExampleScores{}), vec![-8, -4, 0, 1, -1, -3]); 304 | assert_eq!(nw_score("AGTA", "TATGC", &ExampleScores{}), vec![-8, -4, 0, -2, -1, -3]); 305 | } 306 | 307 | #[test] 308 | fn do_find_diff() { 309 | //env_logger::init().unwrap(); 310 | check_diff!( 311 | "kitten" | 312 | "kettle" | 313 | EditDistance | 314 | (1, "e"), (5, "l") | 315 | (2, 1), (6, 1) 316 | ); 317 | check_diff!( 318 | "meadow" | 319 | "yellowing" | 320 | EditDistance | 321 | (0, "y"), (3, "ll"), (9, "ing") | 322 | (1, 1), (4, 2) 323 | ); 324 | 325 | check_diff!(" I've" | 326 | " I" | 327 | EditDistance | 328 | | 329 | (2, 3) 330 | ); 331 | 332 | check_diff!(" I've got a new place" | 333 | " I found a new place" | 334 | EditDistance | 335 | (6, "f"), (9, "und") | 336 | (2, 3), (4, 1), (8, 1) 337 | ); 338 | check_diff!( 339 | "Since my baby left me I've got a new place to dwell\nI walk down a lonely street to Heartbreak Hotel." | 340 | "Since my baby left me I found a new place to dwell\nDown at the end of 'Lonely Street' to 'Heartbreak Hotel.'" | 341 | EditDistance | 342 | (27, "f"), (30, "und"), (56, "Down"), (64, "t the"), (72, "en"), (75, " "), (77, "f"), (81, "'L"), (92, "S"), (99, "'"), (104, "'"), (122, "'") | 343 | (23, 3), (25, 1), (29, 1),(55, 1), (56, 1), (62, 2), (69, 2), (72, 3), (79, 1) 344 | ); 345 | } 346 | } 347 | -------------------------------------------------------------------------------- /src/window.rs: -------------------------------------------------------------------------------- 1 | use std::io::{Read, Result}; 2 | use std::mem; 3 | use std::cmp::min; 4 | use ::Window; 5 | 6 | 7 | impl Window { 8 | pub fn new(mut reader:R, block_size: usize) -> Result> { 9 | let mut front = vec!(0;block_size); 10 | let mut back = vec!(0;block_size); 11 | let size = try!(reader.read(front.as_mut_slice())); 12 | unsafe { 13 | front.set_len(size); 14 | } 15 | let size = try!(reader.read(back.as_mut_slice())); 16 | unsafe { 17 | back.set_len(size); 18 | } 19 | Ok(Window { 20 | front, 21 | back, 22 | block_size, 23 | offset: 0, 24 | reader, 25 | bytes_read: 0 26 | }) 27 | } 28 | 29 | pub fn advance(&mut self) -> Result<(Option, Option)> { 30 | if self.front.len() == 0 { 31 | return Ok((None, None)); 32 | } 33 | 34 | if self.offset >= self.front.len() { 35 | if self.back.len() == 0 { 36 | return Ok((None, None)); 37 | } 38 | try!(self.load_next_block()); 39 | } 40 | let tail = self.front[self.offset]; 41 | let head = self.get_head(); 42 | self.offset += 1; 43 | self.bytes_read += 1; 44 | Ok((Some(tail), head)) 45 | } 46 | 47 | fn get_head(&self) -> Option { 48 | let head_index = self.offset + self.block_size - self.front.len(); 49 | if head_index >= self.back.len() { 50 | return None; 51 | } 52 | return Some(self.back[head_index]); 53 | } 54 | 55 | fn load_next_block(&mut self) -> Result<()> { 56 | // We've gone past the end of the front half 57 | self.front = mem::replace(&mut self.back, vec!(0;self.block_size)); 58 | let size = try!(self.reader.read(self.back.as_mut_slice())); 59 | unsafe{ 60 | self.back.set_len(size); 61 | } 62 | self.offset = 0; 63 | Ok(()) 64 | } 65 | 66 | pub fn frame<'a>(&'a self) -> (&'a [u8], &'a [u8]) { 67 | let front_offset = min(self.offset, self.front.len()); 68 | let back_offset = min(self.offset, self.back.len()); 69 | (&self.front[front_offset..], &self.back[..back_offset]) 70 | } 71 | 72 | pub fn frame_size(&self) -> usize { 73 | self.front.len() + self.back.len() - self.offset 74 | } 75 | 76 | pub fn on_boundry(&self) -> bool { 77 | self.offset == 0 || self.offset == self.front.len() 78 | } 79 | 80 | pub fn get_bytes_read(&self) -> usize { 81 | self.bytes_read 82 | } 83 | } 84 | 85 | #[cfg(test)] 86 | mod test { 87 | use super::super::Window; 88 | use std::io::Cursor; 89 | #[test] 90 | fn frame_iterator() { 91 | let mut window_basic = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10]), 5).unwrap(); 92 | //assert_eq!(window_basic.frame().map(|a| *a).collect::>(), vec![1, 2, 3, 4, 5]); 93 | assert_eq!(window_basic.frame(), (&[1, 2, 3, 4, 5][..], &[][..])); 94 | 95 | window_basic.advance().unwrap(); 96 | // assert_eq!(window_basic.frame().map(|a| *a).collect::>(), vec![2, 3, 4, 5, 6]); 97 | assert_eq!(window_basic.frame(), (&[2, 3, 4, 5][..], &[6][..])); 98 | 99 | window_basic.advance().unwrap(); 100 | window_basic.advance().unwrap(); 101 | window_basic.advance().unwrap(); 102 | window_basic.advance().unwrap(); 103 | assert_eq!(window_basic.frame(), (&[][..], &[6, 7, 8, 9, 10][..])); 104 | 105 | 106 | window_basic.advance().unwrap(); 107 | assert_eq!(window_basic.frame(), (&[7, 8, 9, 10][..], &[][..])); 108 | 109 | window_basic.advance().unwrap(); 110 | window_basic.advance().unwrap(); 111 | window_basic.advance().unwrap(); 112 | assert_eq!(window_basic.frame(), (&[10][..], &[][..])); 113 | 114 | window_basic.advance().unwrap(); 115 | assert_eq!(window_basic.frame(), (&[][..], &[][..])); 116 | 117 | 118 | let window_too_small = Window::new(Cursor::new(vec![1, 2, 3, 4]), 5).unwrap(); 119 | assert_eq!(window_too_small.frame(), (&[1, 2, 3, 4][..], &[][..])); 120 | 121 | let window_empty = Window::new(Cursor::new(vec![]), 5).unwrap(); 122 | assert_eq!(window_empty.frame(), (&[][..], &[][..])); 123 | 124 | let mut window_bigger = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10, 11, 12]), 5).unwrap(); 125 | assert_eq!(window_bigger.frame(), (&[1, 2, 3, 4, 5][..], &[][..])); 126 | window_bigger.advance().unwrap(); 127 | window_bigger.advance().unwrap(); 128 | window_bigger.advance().unwrap(); 129 | window_bigger.advance().unwrap(); 130 | window_bigger.advance().unwrap(); 131 | window_bigger.advance().unwrap(); 132 | assert_eq!(window_bigger.frame(), (&[7, 8, 9, 10][..], &[11][..])); 133 | 134 | window_bigger.advance().unwrap(); 135 | assert_eq!(window_bigger.frame(), (&[8, 9, 10][..], &[11, 12][..])); 136 | window_bigger.advance().unwrap(); 137 | assert_eq!(window_bigger.frame(), (&[9, 10][..], &[11, 12][..])); 138 | window_bigger.advance().unwrap(); 139 | assert_eq!(window_bigger.frame(), (&[10][..], &[11, 12][..])); 140 | window_bigger.advance().unwrap(); 141 | assert_eq!(window_bigger.frame(), (&[][..], &[11, 12][..])); 142 | window_bigger.advance().unwrap(); 143 | assert_eq!(window_bigger.frame(), (&[12][..], &[][..])); 144 | 145 | } 146 | #[test] 147 | fn advance() { 148 | let mut window_basic = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10]), 5).unwrap(); 149 | assert_eq!(window_basic.advance().unwrap(), (Some(1), Some(6))); 150 | assert_eq!(window_basic.advance().unwrap(), (Some(2), Some(7))); 151 | assert_eq!(window_basic.advance().unwrap(), (Some(3), Some(8))); 152 | assert_eq!(window_basic.advance().unwrap(), (Some(4), Some(9))); 153 | assert_eq!(window_basic.advance().unwrap(), (Some(5), Some(10))); 154 | assert_eq!(window_basic.advance().unwrap(), (Some(6), None)); 155 | assert_eq!(window_basic.advance().unwrap(), (Some(7), None)); 156 | assert_eq!(window_basic.advance().unwrap(), (Some(8), None)); 157 | assert_eq!(window_basic.advance().unwrap(), (Some(9), None)); 158 | assert_eq!(window_basic.advance().unwrap(), (Some(10), None)); 159 | assert_eq!(window_basic.advance().unwrap(), (None, None)); 160 | 161 | let mut window_huge = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 5).unwrap(); 162 | assert_eq!(window_huge.advance().unwrap(), (Some(1), Some(6))); 163 | assert_eq!(window_huge.advance().unwrap(), (Some(2), Some(7))); 164 | assert_eq!(window_huge.advance().unwrap(), (Some(3), Some(8))); 165 | assert_eq!(window_huge.advance().unwrap(), (Some(4), Some(9))); 166 | assert_eq!(window_huge.advance().unwrap(), (Some(5), Some(10))); 167 | assert_eq!(window_huge.advance().unwrap(), (Some(6), Some(11))); 168 | assert_eq!(window_huge.advance().unwrap(), (Some(7), Some(12))); 169 | assert_eq!(window_huge.advance().unwrap(), (Some(8), Some(13))); 170 | assert_eq!(window_huge.advance().unwrap(), (Some(9), Some(14))); 171 | assert_eq!(window_huge.advance().unwrap(), (Some(10), Some(15))); 172 | assert_eq!(window_huge.advance().unwrap(), (Some(11), Some(16))); 173 | assert_eq!(window_huge.advance().unwrap(), (Some(12), Some(17))); 174 | assert_eq!(window_huge.advance().unwrap(), (Some(13), Some(18))); 175 | assert_eq!(window_huge.advance().unwrap(), (Some(14), None)); 176 | assert_eq!(window_huge.advance().unwrap(), (Some(15), None)); 177 | assert_eq!(window_huge.advance().unwrap(), (Some(16), None)); 178 | assert_eq!(window_huge.advance().unwrap(), (Some(17), None)); 179 | assert_eq!(window_huge.advance().unwrap(), (Some(18), None)); 180 | assert_eq!(window_huge.advance().unwrap(), (None, None)); 181 | 182 | let mut window_empty = Window::new(Cursor::new(vec![]), 5).unwrap(); 183 | assert_eq!(window_empty.advance().unwrap(), (None, None)); 184 | 185 | let mut window_too_small = Window::new(Cursor::new(vec![1, 2, 3, 4]), 5).unwrap(); 186 | assert_eq!(window_too_small.advance().unwrap(), (Some(1), None)); 187 | assert_eq!(window_too_small.advance().unwrap(), (Some(2), None)); 188 | assert_eq!(window_too_small.advance().unwrap(), (Some(3), None)); 189 | assert_eq!(window_too_small.advance().unwrap(), (Some(4), None)); 190 | assert_eq!(window_too_small.advance().unwrap(), (None, None)); 191 | assert_eq!(window_too_small.advance().unwrap(), (None, None)); 192 | assert_eq!(window_too_small.advance().unwrap(), (None, None)); 193 | } 194 | } 195 | --------------------------------------------------------------------------------