├── .gitignore
├── .travis.yml
├── Cargo.toml
├── README.md
├── deploy.sh
├── examples
    ├── file_watcher.rs
    ├── filev1.txt
    ├── filev2.txt
    └── predefined.rs
└── src
    ├── hashing.rs
    ├── lib.rs
    ├── string_diff.rs
    └── window.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: rust
 2 | rust:
 3 |   - stable
 4 |   - beta
 5 |   - nightly
 6 | install:
 7 |   - cargo build
 8 | script:
 9 |   - cargo test
10 |   - cargo doc --no-deps
11 | after_success:
12 |   - bash deploy.sh
13 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "rdiff"
 3 | version = "0.1.2"
 4 | authors = ["Daniel Yule <daniel.yule@gmail.com>"]
 5 | description = "A library for tracking changes to a file over time"
 6 | documentation = "https://dyule.github.io/rdiff/rdiff/"
 7 | homepage = "https://github.com/dyule/rdiff"
 8 | repository = "https://github.com/dyule/rdiff"
 9 | readme = "README.md"
10 | keywords = ["rsync", "diff"]
11 | license = "CC0-1.0"
12 | 
13 | [dependencies]
14 | rust-crypto = "^0.2"
15 | log = "0.3"
16 | byteorder = "0.5"
17 | 
18 | [dev-dependencies]
19 | notify = "2.6.1"
20 | env_logger = "0.3"
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | rdiff
 2 | =====
 3 | [![CC0](http://i.creativecommons.org/p/zero/1.0/88x31.png)](http://creativecommons.org/publicdomain/zero/1.0/)
 4 | [![Build Status](https://travis-ci.org/dyule/rdiff.svg?branch=master)](https://travis-ci.org/dyule/rdiff)
 5 | [![Crates.io](https://img.shields.io/crates/v/rdiff.svg?maxAge=2592000)](https://crates.io/crates/rdiff)
 6 | 
 7 | rdiff is a package for comparing versions of a file over time.  It is written is Rust, and expects version > 1.17.
 8 | 
 9 | To the extent possible under law, rdiff contributors have waived all copyright and related or neighboring rights to rdiff.
10 | 
11 | [Documentation](https://dyule.github.io/rdiff/rdiff/)
12 | 
13 | # Usage
14 | 
15 | in `Cargo.toml`:
16 | 
17 | ``` toml
18 | [dependencies]
19 | rdiff = "0.1"
20 | ```
21 | 
22 | In your rust file (taken from [examples/predefined.rs](examples/predefined.rs)):
23 | 
24 | ``` rust
25 | extern crate rdiff;
26 | 
27 | use rdiff::BlockHashes;
28 | use std::fs::File;
29 | 
30 | pub fn example() {
31 |     let file = File::open("examples/filev1.txt").unwrap();
32 |     let mut hashes = BlockHashes::new(file, 8).unwrap();
33 |     let file = File::open("examples/filev2.txt").unwrap();
34 |     let difference = hashes.diff_and_update(file).unwrap();
35 |     println!("Inserts: {:?}", difference.inserts().collect::<Vec<_>>());
36 |     println!("Deletes: {:?}", difference.deletes().collect::<Vec<_>>());
37 | }
38 | ```
39 | 
40 | This will output
41 | ```
42 | Inserts: [Insert(8, 'widely understood '), Insert(90, ' absolutely'), Insert(381, 'hters, or sons if the family was progressive.\n'), Insert(572, 'not, even though he had been following the news quite closely.\n\n'), Insert(734, '\nMr. Ben')]
43 | Deletes: [Delete(34, 24), Delete(428, 8), Delete(638, 8), Delete(742, 8)]
44 | ```
45 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -o errexit -o nounset
 4 | 
 5 | if [ "$TRAVIS_BRANCH" != "master" ]
 6 | then
 7 |   echo "This commit was made against the $TRAVIS_BRANCH and not the master! No deploy!"
 8 |   exit 0
 9 | fi
10 | 
11 | rev=$(git rev-parse --short HEAD)
12 | 
13 | cd target/docs
14 | 
15 | git init
16 | git config user.name "Daniel Yule"
17 | git config user.email "daniel.yule@gmail.com"
18 | 
19 | git remote add upstream "https://$GH_TOKEN@github.com/dyule/rdiff.git"
20 | git fetch upstream
21 | git reset upstream/gh-pages
22 | 
23 | touch .
24 | 
25 | git add -A .
26 | git commit -m "rebuild pages at ${rev}"
27 | git push -q upstream HEAD:gh-pages
28 | 


--------------------------------------------------------------------------------
/examples/file_watcher.rs:
--------------------------------------------------------------------------------
 1 | extern crate notify;
 2 | extern crate rdiff;
 3 | 
 4 | use notify::{RecommendedWatcher, Watcher, op};
 5 | use std::sync::mpsc::channel;
 6 | use std::fs;
 7 | use std::io;
 8 | use rdiff::BlockHashes;
 9 | 
10 | macro_rules! try_io {
11 |     ($e: expr) => ({
12 |         match $e {
13 |             Ok(v) => v,
14 |             Err(e) => return Err(notify::Error::Io(e))
15 |         }
16 |     });
17 | }
18 | 
19 | fn create_hashes(file: &str) -> io::Result<rdiff::BlockHashes> {
20 |     let file = try!(fs::File::open(file));
21 |     BlockHashes::new(file, 8)
22 | }
23 | 
24 | fn update_hashes(hashes: &mut BlockHashes, file: &str) -> io::Result<()> {
25 |     let file = try!(fs::File::open(file));
26 |     let diffs = try!(hashes.diff_and_update(file));
27 |     if diffs.inserts().len() != 0 || diffs.deletes().len() != 0 {
28 |         println!("{:?}", diffs);
29 |     }
30 |     Ok(())
31 | }
32 | 
33 | fn watch(file_name: &str) -> notify::Result<()> {
34 | 
35 |     let mut hashes = try_io!(create_hashes(file_name));
36 |   // Create a channel to receive the events.
37 |   let (tx, rx) = channel();
38 | 
39 |   // Automatically select the best implementation for your platform.
40 |   // You can also access each implementation directly e.g. INotifyWatcher.
41 |   let mut watcher: RecommendedWatcher = try!(Watcher::new(tx));
42 | 
43 |   // Add a path to be watched. All files and directories at that path and
44 |   // below will be monitored for changes.
45 |   try!(watcher.watch(file_name));
46 | 
47 |   // This is a simple loop, but you may want to use more complex logic here,
48 |   // for example to handle I/O.
49 |   loop {
50 |       match rx.recv() {
51 |         Ok(notify::Event{ path: Some(_),op:Ok(operation) }) => {
52 |             if operation == op::WRITE {
53 |                 try_io!(update_hashes(&mut hashes, file_name));
54 |             }
55 |         },
56 |         Err(e) => println!("watch error {}", e),
57 |         _ => ()
58 |       }
59 |   }
60 | }
61 | 
62 | fn main() {
63 |     let args:Vec<_> = std::env::args().collect();
64 |     if args.len() != 2 {
65 |         println!("Usage: file_watcher <file_name>");
66 |         return;
67 |     }
68 | 
69 |   if let Err(err) = watch(&args[1]) {
70 |     println!("Error! {:?}", err)
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/examples/filev1.txt:
--------------------------------------------------------------------------------
 1 | It is a truth universally acknowledged, that a single man in possession of
 2 | a good fortune, must be in want of a wife.
 3 | 
 4 | However little known the feelings or views of such a man may be on his
 5 | first entering a neighbourhood, this truth is so well fixed in the minds
 6 | of the surrounding families, that he is considered the rightful property
 7 | of some one or other of their daughters.
 8 | 
 9 | "My dear Mr. Bennet," said his lady to him one day, "have you heard that
10 | Netherfield Park is let at last?"
11 | 
12 | Mr. Bennet replied that he had not.
13 | 
14 | "But it is," returned she; "for Mrs. Long has just been here, and she
15 | told me all about it."
16 | 
17 | Mr. Bennet made no answer.
18 | 
19 | "Do you not want to know who has taken it?" cried his wife impatiently.
20 | 
21 | "_You_ want to tell me, and I have no objection to hearing it."
22 | 
23 | This was invitation enough.
24 | 


--------------------------------------------------------------------------------
/examples/filev2.txt:
--------------------------------------------------------------------------------
 1 | It is a widely understood truth unthat a single man in possession of
 2 | a good fortune, must absolutely be in want of a wife.
 3 | 
 4 | However little known the feelings or views of such a man may be on his
 5 | first entering a neighbourhood, this truth is so well fixed in the minds
 6 | of the surrounding families, that he is considered the rightful property
 7 | of some one or other of their daughters, or sons if the family was progressive.
 8 | 
 9 | "My dear Mr. Bennet," said his lady to him one day, "have you heard that
10 | Netherfield Park is let at last?"
11 | 
12 | Mr. Bennet replied that he had not, even though he had been following the news quite closely.
13 | 
14 | "But it is," returned she; "for Mrs. Long has just been here, and she
15 | told me all about it."
16 | 
17 | Mr. Bennet made no answer.
18 | 
19 | "Do you not want to know who has taken it?" cried his wife impatiently.
20 | 
21 | "_You_ want to tell me, and I have no objection to hearing it."
22 | 
23 | This was invitation enough.
24 | 


--------------------------------------------------------------------------------
/examples/predefined.rs:
--------------------------------------------------------------------------------
 1 | extern crate rdiff;
 2 | 
 3 | use rdiff::BlockHashes;
 4 | use std::fs::File;
 5 | 
 6 | pub fn main() {
 7 |     let file = File::open("examples/filev1.txt").unwrap();
 8 |     let mut hashes = BlockHashes::new(file, 8).unwrap();
 9 |     let file = File::open("examples/filev2.txt").unwrap();
10 |     let difference = hashes.diff_and_update(file).unwrap();
11 |     println!("Inserts: {:?}", difference.inserts().collect::<Vec<_>>());
12 |     println!("Deletes: {:?}", difference.deletes().collect::<Vec<_>>());
13 | }
14 | 


--------------------------------------------------------------------------------
/src/hashing.rs:
--------------------------------------------------------------------------------
  1 | use super::{BlockHashes, Diff, Window};
  2 | use std::io::{Read, Write, Result};
  3 | use std::collections::HashMap;
  4 | use crypto::md5::Md5;
  5 | use crypto::digest::Digest;
  6 | use byteorder::{NetworkEndian, ByteOrder};
  7 | 
  8 | /// Implements a weak, but easy to calculate hash for a block of bytes
  9 | ///
 10 | /// The hash is comprised of two bytes.  The first is the sum of the bytes
 11 | // in the block, the second is the sum of the sum of the bytes in the block
 12 | struct RollingHash {
 13 |     a: u16,
 14 |     b: u16,
 15 |     block_size: u16
 16 | }
 17 | 
 18 | impl RollingHash {
 19 | 
 20 |     /// Creates a new rolling hash over the bytes in `initial_data`.
 21 |     /// It will be assumed that the size of blocks will be the size of the initial data.
 22 |     pub fn new<'a, I: Iterator<Item=&'a u8>>(initial_data: I) -> RollingHash {
 23 | 
 24 |         let mut a:u16 = 0;
 25 |         let mut b:u16 = 0;
 26 |         let mut block_size: u16 = 0;
 27 |         for byte in initial_data {
 28 |             a = a.wrapping_add(*byte as u16);
 29 |             b = b.wrapping_add(a);
 30 |             block_size += 1;
 31 |         }
 32 |         RollingHash {
 33 |             a: a,
 34 |             b: b,
 35 |             block_size: block_size
 36 |         }
 37 |     }
 38 | 
 39 |     /// Gets the hash as it currently stands
 40 |     pub fn get_hash(&self) -> u32 {
 41 |         return (self.b as u32) << 16 | self.a as u32;
 42 |     }
 43 | 
 44 |     /// Roll the has forward one byte.  This function will remove `old_byte` from its calculation
 45 |     /// and add `new_byte` if it exists.
 46 |     /// To get the hash afterwards, use `get_hash()`.
 47 |     pub fn roll_hash(&mut self, new_byte: Option<u8>, old_byte: u8) {
 48 |         self.a = self.a.wrapping_sub(old_byte as u16);
 49 |         self.b = self.b.wrapping_sub(((old_byte as u16).wrapping_mul(self.block_size as u16)) as u16);
 50 |         if let Some(new_byte) = new_byte {
 51 |             self.a = self.a.wrapping_add(new_byte as u16);
 52 |             self.b = self.b.wrapping_add(self.a);
 53 |         } else {
 54 |             self.block_size -= 1
 55 |         }
 56 |     }
 57 | 
 58 |     /// Calculate the hash of a collection of bytes.
 59 |     pub fn hash_buffer(buffer: &[u8]) -> u32 {
 60 |         let mut a:u16 = 0;
 61 |         let mut b:u16 = 0;
 62 |         for byte in buffer {
 63 |             a = a.wrapping_add(*byte as u16);
 64 |             b = b.wrapping_add(a);
 65 | 
 66 |         }
 67 |         (b as u32) << 16 | a as u32
 68 |     }
 69 | }
 70 | 
 71 | 
 72 | impl BlockHashes {
 73 | 
 74 |     /// Create a new BlockHash based on the data in data_source.  This method
 75 |     /// will create a hash for every `block_size` set of bytes in `data_source`.
 76 |     ///
 77 |     /// To see the difference after `data_source` has been updated, use `diff_and_update()`
 78 |     ///
 79 |     /// This method returns an error when there is a problem reading from `data_source`.
 80 |     pub fn new<R: Read>(mut data_source: R, block_size: usize) -> Result<BlockHashes> {
 81 |         let mut block = vec![0;block_size];
 82 |         let mut hashes = HashMap::new();
 83 |         let mut block_index = 0;
 84 |         let mut strong_hasher = Md5::new();
 85 |         let mut total_size = 0;
 86 | 
 87 |         let mut read_size = try!(data_source.read(&mut block));
 88 |         while read_size > 0 {
 89 |             let weak_hash = RollingHash::hash_buffer(&block[..read_size]);
 90 | 
 91 |             let mut strong_hash:[u8;16] = [0;16];
 92 |             strong_hasher.reset();
 93 |             strong_hasher.input(&block[..read_size]);
 94 |             strong_hasher.result(&mut strong_hash);
 95 | 
 96 |             hashes.entry(weak_hash).or_insert(Vec::new()).push((block_index, strong_hash));
 97 | 
 98 |             block_index += 1;
 99 |             total_size += read_size;
100 |             read_size = try!(data_source.read(&mut block));
101 |         }
102 |         Ok(BlockHashes {
103 |             hashes,
104 |             block_size,
105 |             file_size: total_size
106 |         })
107 |     }
108 | 
109 |     /// Construct a new block hash for a file that was just created
110 |     pub fn empty(block_size: usize) -> BlockHashes {
111 |         BlockHashes {
112 |             hashes: HashMap::new(),
113 |             block_size: block_size,
114 |             file_size: 0
115 |         }
116 |     }
117 | 
118 |     /// Compare the data in `new_data` with the hashes computed from either
119 |     /// the most recent call to `diff_and_update()` or when this `BlockHashes` was updated
120 |     ///
121 |     /// # Example
122 |     ///
123 |     /// ```
124 |     /// use rdiff::BlockHashes;
125 |     /// use std::io::Cursor;
126 |     /// let mut hashes = BlockHashes::new(Cursor::new("It was the best of times"), 6).unwrap();
127 |     /// let diff = hashes.diff_and_update(Cursor::new("It was not the best of things")).unwrap();
128 |     /// // prints (6, ' not') and (22, ' things'))
129 |     /// for insert in diff.inserts() {
130 |     ///     println!("{:?}", insert);
131 |     /// }
132 |     /// // prints (29, 6)
133 |     /// for delete in diff.deletes() {
134 |     ///     println!("{:?}", delete);
135 |     /// }
136 |     /// assert_eq!("It was not the best of things",
137 |     ///             diff.apply_to_string("It was the best of times").unwrap());
138 |     /// ```
139 |     pub fn diff_and_update<R: Read>(&mut self, new_data: R) -> Result<Diff> {
140 |         use std::mem;
141 |         let mut diffs = Diff::new();
142 |         let mut window = try!(Window::new(new_data, self.block_size));
143 |         let mut weak_hasher = RollingHash::new(window.frame().0.iter());
144 |         let mut strong_hasher = Md5::new();
145 |         let mut last_matching_block_index = -1;
146 |         let mut insert_buffer = Vec::new();
147 |         let mut new_hashes = HashMap::new();
148 |         let mut current_block_index = 0;
149 |         while window.frame_size() > 0 {
150 | 
151 |             if let Some(other_block_index) = self.check_match(&weak_hasher, &mut strong_hasher, &mut window, &mut last_matching_block_index) {
152 |                 //create an insert if the insert buffer has anything in it
153 |                 if insert_buffer.len() > 0 {
154 |                     // XXX with some work here, we could probably track the insert buffer as a piece of the window, which is then
155 |                     // moved into the diff list.
156 |                     diffs.add_insert(window.get_bytes_read() - insert_buffer.len(), mem::replace(&mut insert_buffer, Vec::new()));
157 |                 }
158 |                 //create a delete if the index is more than it should be
159 |                 if other_block_index as i32 > last_matching_block_index + 1 {
160 |                     diffs.add_delete(window.get_bytes_read(), self.block_size * (other_block_index as i32 - last_matching_block_index - 1) as usize)
161 |                 }
162 |                 last_matching_block_index = other_block_index as i32;
163 |                 //advance forward an entire block's worth
164 |                 for i in 0..self.block_size {
165 |                     if window.on_boundry() {
166 |                         // This might iterate past the end of the data.  If so, bail out
167 |                         if window.frame_size() == 0 {
168 |                             break;
169 |                         }
170 |                         let mut strong_hash:[u8;16] = [0;16];
171 |                         // If the boundry happened where we saw a match, we can skip the
172 |                         // strong hashing, because it was already done during the
173 |                         // match checking
174 |                         if i != 0 {
175 |                             let (front, back) = window.frame();
176 |                             strong_hasher.reset();
177 |                             strong_hasher.input(front);
178 |                             strong_hasher.input(back);
179 |                         }
180 |                         strong_hasher.result(&mut strong_hash);
181 | 
182 |                         new_hashes.entry(weak_hasher.get_hash()).or_insert(Vec::new()).push((current_block_index, strong_hash));
183 |                         current_block_index += 1;
184 |                     }
185 |                     let (tail, head) = try!(window.advance());
186 |                     if let Some(tail) = tail {
187 |                         weak_hasher.roll_hash(head, tail);
188 |                     } else {
189 |                         break;
190 |                     }
191 |                 }
192 |             } else {
193 |                 //advance forward one byte
194 |                 if window.on_boundry() {
195 |                     // XXX There is a slight optimization possible here, where
196 |                     // when the weak checksum matches, but the strong one doesn't
197 |                     // we are re-computing the strong checksum here.
198 |                     let mut strong_hash:[u8;16] = [0;16];
199 |                     let (front, back) = window.frame();
200 |                     strong_hasher.reset();
201 |                     strong_hasher.input(front);
202 |                     strong_hasher.input(back);
203 |                     strong_hasher.result(&mut strong_hash);
204 | 
205 |                     new_hashes.entry(weak_hasher.get_hash()).or_insert(Vec::new()).push((current_block_index, strong_hash));
206 |                     current_block_index += 1;
207 |                 }
208 |                 let (tail, head) = try!(window.advance());
209 |                 weak_hasher.roll_hash(head, tail.unwrap());
210 |                 insert_buffer.push(tail.unwrap());
211 |             }
212 |         }
213 |         if insert_buffer.len() > 0 {
214 |             diffs.add_insert(window.get_bytes_read() - insert_buffer.len(), insert_buffer);
215 |         }
216 |         let old_block_count = (self.file_size + self.block_size - 1) as i32 / self.block_size as i32;
217 |         if last_matching_block_index + 1 < old_block_count {
218 |             diffs.add_delete(window.get_bytes_read(), (self.file_size as i32 - (last_matching_block_index + 1) * self.block_size as i32) as usize);
219 |         }
220 |         self.hashes = new_hashes;
221 |         self.file_size = window.get_bytes_read();
222 |         Ok(diffs)
223 |     }
224 | 
225 |     /// Checks if `data_source` has changed since the last time the hashes were updated.
226 |     ///
227 |     /// Returns true if `data_source` is identical to what it was when the hashes were generated, false otherwise
228 |     pub fn verify_unchanged<R: Read>(&self, data_source: &mut R) -> Result<bool> {
229 |         let mut block = vec![0;self.block_size];
230 |         let mut block_index = 0;
231 |         let mut strong_hasher = Md5::new();
232 |         let mut total_size = 0;
233 | 
234 |         let mut read_size = try!(data_source.read(&mut block));
235 |         while read_size > 0 {
236 |             let weak_hash = RollingHash::hash_buffer(&block[..read_size]);
237 |             if let Some(entry) = self.hashes.get(&weak_hash) {
238 |                 let mut strong_hash:[u8;16] = [0;16];
239 |                 strong_hasher.reset();
240 |                 strong_hasher.input(&block[..read_size]);
241 |                 strong_hasher.result(&mut strong_hash);
242 |                 if !entry.contains(&(block_index, strong_hash)) {
243 |                     return Ok(false);
244 |                 }
245 |             }
246 | 
247 | 
248 |             block_index += 1;
249 |             total_size += read_size;
250 |             read_size = try!(data_source.read(&mut block));
251 |         }
252 |         Ok(total_size == self.file_size)
253 |     }
254 | 
255 |     /// Compress these Hashes and write to `writer`.  The output can then be expanded
256 |     /// back into an equivilent Hash collection using `expand_from()`
257 |     pub fn compress_to<W: Write>(&self, writer: &mut W) -> Result<()> {
258 | 
259 |         let mut int_buf = [0;4];
260 |         NetworkEndian::write_u32(&mut int_buf, self.file_size as u32);
261 |         try!(writer.write(&int_buf));
262 |         NetworkEndian::write_u32(&mut int_buf, self.block_size as u32);
263 |         try!(writer.write(&int_buf));
264 |         let block_count = (self.file_size + self.block_size - 1) / self.block_size;
265 |         let dummy_hash = [0u8;16];
266 |         let mut sequential_hashes = Vec::with_capacity(block_count);
267 |         sequential_hashes.resize(block_count, (0, &dummy_hash));
268 |         for (weak_hash, entry) in self.hashes.iter() {
269 |             for &(index, ref strong_hash) in entry.iter() {
270 |                 sequential_hashes[index] = (*weak_hash, strong_hash);
271 |             }
272 |         }
273 |         for (weak, strong) in sequential_hashes {
274 |             NetworkEndian::write_u32(&mut int_buf, weak);
275 |             try!(writer.write(&int_buf));
276 |             try!(writer.write(strong));
277 |         }
278 |         Ok(())
279 |     }
280 | 
281 |     /// Expand these hashes from previously compressed data in `reader`.  The data in reader
282 |     /// should have been written using `compress_to()`
283 |     pub fn expand_from<R: Read>(reader: &mut R) -> Result<BlockHashes> {
284 |         let mut int_buf = [0;4];
285 |         let mut strong_hash = [0u8;16];
286 |         try!(reader.read(&mut int_buf));
287 |         let file_size = NetworkEndian::read_u32(&mut int_buf) as usize;
288 |         try!(reader.read(&mut int_buf));
289 |         let block_size = NetworkEndian::read_u32(&mut int_buf) as usize;
290 |         let block_count = (file_size + block_size - 1) / block_size;
291 |         // Might be an overestimate, but not by more than a few
292 |         let mut hashes = HashMap::with_capacity(block_count);
293 | 
294 |         for block_index in 0..block_count {
295 |             try!(reader.read(&mut int_buf));
296 |             let weak_hash = NetworkEndian::read_u32(&mut int_buf);
297 |             try!(reader.read(&mut strong_hash));
298 |             hashes.entry(weak_hash).or_insert(Vec::new()).push((block_index, strong_hash));
299 |         }
300 |         Ok(BlockHashes {
301 |             file_size: file_size,
302 |             block_size: block_size,
303 |             hashes: hashes
304 |         })
305 |     }
306 | 
307 |     /// Checks if the current window frame matches any existing block with an index greater than the previously matched block.
308 |     ///
309 |     /// Returns the index of the matching block if it does
310 |     fn check_match<R: Read>(&self, weak_hasher: &RollingHash, mut strong_hasher: &mut Md5, mut window: &Window<R>, last_matching_block_index: &mut i32) -> Option<usize> {
311 |         if let Some(other_block_index) = self.hash_match(&weak_hasher, &mut strong_hasher, &mut window) {
312 |             if other_block_index as i32 > *last_matching_block_index {
313 |                 return Some(other_block_index);
314 |             }
315 |         }
316 |         None
317 |     }
318 | 
319 |     /// Checks to see if the hash of the current window frame matches an existing hash.
320 |     ///
321 |     /// If so, returns the index of the matching block
322 |     fn hash_match<R: Read>(&self, weak_hasher: &RollingHash,  strong_hasher: &mut Md5, window: &Window<R>) -> Option<usize> {
323 |         let mut new_result = [0;16];
324 |         if let Some(matches) = self.hashes.get(&weak_hasher.get_hash()) {
325 |             for &(index, strong_hash) in matches.iter() {
326 |                 strong_hasher.reset();
327 |                 let (front, back) = window.frame();
328 |                 strong_hasher.input(front);
329 |                 strong_hasher.input(back);
330 |                 strong_hasher.result(&mut new_result);
331 |                 if new_result == strong_hash {
332 |                     return Some(index)
333 |                 }
334 |             }
335 |         }
336 |         return None
337 |     }
338 | }
339 | 
340 | #[cfg(test)]
341 | mod test {
342 |     use super::super::{BlockHashes, Diff, Insert, Delete};
343 |     use super::{RollingHash};
344 |     use std::io::{Cursor};
345 |     use std::collections::HashMap;
346 | 
347 |     macro_rules! check_diff {
348 |         ($start: tt | $block_size: tt | $new: tt | $(($insert_pos : tt, $insert_value: tt)),* | $(($delete_pos: tt, $delete_len: tt)),*) => {
349 |             {
350 |                 check_diff_workaround!($start; $block_size; $new; $(($insert_pos, $insert_value)),*; $(($delete_pos, $delete_len)),*)
351 |             }
352 |         };
353 |     }
354 | 
355 |     // Caused by a bug in the implementation of the tt macro type.  It currently has to be passed as an expr into another macro
356 |     // or it throws a fit for no reason.  See https://github.com/rust-lang/rust/issues/5846
357 |     macro_rules! check_diff_workaround {
358 |         ($start: expr ; $block_size: expr ; $new: expr ; $(($insert_pos : tt, $insert_value: tt)),* ; $(($delete_pos: tt, $delete_len: tt)),*) => {
359 |             {
360 |                 let mut hashes = BlockHashes::new(Cursor::new($start), $block_size).unwrap();
361 |                 let diff = hashes.diff_and_update(Cursor::new($new)).unwrap();
362 |                 assert_eq!(Diff {
363 |                     inserts: vec![$(Insert{position: $insert_pos, data: $insert_value.bytes().collect()}),*],
364 |                     deletes: vec![$(Delete{position: $delete_pos, len: $delete_len}),*]
365 |                 }, diff);
366 |                 check_hashes(&hashes, $new);
367 |             }
368 |         };
369 |     }
370 | 
371 |     fn check_hashes(hashes: &BlockHashes, starting_data: &'static str) {
372 |         let expected_hashes = BlockHashes::new(Cursor::new(starting_data), hashes.block_size).unwrap();
373 |         assert_eq!(hashes, &expected_hashes);
374 |     }
375 | 
376 |     #[test]
377 |     fn rolling_hash_small() {
378 |         let mut hash = RollingHash::new(vec![7, 2, 9, 1, 7, 8].iter());
379 |         assert_eq!(hash.get_hash(), 0x710022); // a: 34 b: 113
380 |         hash.roll_hash(Some(12), 7); // [2, 9, 1, 7, 8, 12]
381 |         assert_eq!(hash.get_hash(), 0x6E0027); // a: 39 b:110
382 |         hash.roll_hash(Some(1), 2); // [9, 1, 7, 8, 12, 1]
383 |         assert_eq!(hash.get_hash(), 0x880026); // a: 38 b:136
384 |         hash.roll_hash(None, 9); // [1, 7, 8, 12, 1]
385 |         assert_eq!(hash.get_hash(), 0x52001D); // a: 29 b:82
386 |         hash.roll_hash(None, 1); // [7, 8, 12, 1]
387 |         assert_eq!(hash.get_hash(), 0x4D001C); // a: 28 b: 77
388 |         hash.roll_hash(None, 7); // [8, 12, 1]
389 |         assert_eq!(hash.get_hash(), 0x310015); // a: 21 b: 49
390 |         hash.roll_hash(None, 8); // [12, 1]
391 |         assert_eq!(hash.get_hash(), 0x19000D); // a: 13 b: 25
392 |         hash.roll_hash(None, 12); // [1]
393 |         assert_eq!(hash.get_hash(), 0x10001); // a: 1 b: 1
394 |         hash.roll_hash(None, 1); // []
395 |         assert_eq!(hash.get_hash(), 0x0); // a: 0 b: 0
396 |     }
397 |     #[test]
398 |     fn rolling_hash_big() {
399 |         let mut numbers = Vec::new();
400 |         for i in 0..4000 {
401 |             numbers.push((200 + i * i) as u8);
402 |         }
403 |         let mut hash = RollingHash::new(numbers.iter());
404 |         assert_eq!(hash.get_hash(), 0x1880A9F0); // a: A9f0 b: 1880
405 |         hash.roll_hash(Some(237), 200);
406 |         assert_eq!(hash.get_hash(), 0x8D95AA15); // a: AA15 b: 8D95
407 |         hash.roll_hash(None, 201);
408 |         assert_eq!(hash.get_hash(), 0x48F5A94C) // a: A94C b: 48F5
409 | 
410 |     }
411 | 
412 |     #[test]
413 |     fn hash_blocks_init() {
414 |         let test_string = "It was the best of times, it was the worst of times";
415 |         // Blocks:
416 |         // It was t : 202900156 - ad721d63c3dabb32cc9096824071a919
417 |         // he best  : 211944123 - 2712A22DDA5585758AEBC4D298142F8B
418 |         // of times : 225313559 - 3160523454fa59e4c14badf9435d6212
419 |         // , it was : 169083540 - 5fa8fa659adc38997bb365f17648ea8a
420 |         //  the wor : 197788377 - d7aad88e1f5098bdae1da2e564749322
421 |         // st of ti : 217580249 - 1c64811671e43ea5f82da6ffc4a5bbee
422 |         // mes      : 42205509  - d2db8a610f8c7c0785d2d92a6e8c450e
423 |         let hashes = BlockHashes::new(Cursor::new(test_string), 8).unwrap();
424 | 
425 |         let mut expected_hashes:HashMap<u32, Vec<(usize, [u8;16])>> = HashMap::new();
426 |         expected_hashes.insert(202900156, vec![(0, [0xad, 0x72, 0x1d, 0x63, 0xc3, 0xda, 0xbb, 0x32, 0xcc, 0x90, 0x96, 0x82, 0x40, 0x71, 0xa9, 0x19])]);
427 |         expected_hashes.insert(211944123, vec![(1, [0x27, 0x12, 0xA2, 0x2D, 0xDA, 0x55, 0x85, 0x75, 0x8A, 0xEB, 0xC4, 0xD2, 0x98, 0x14, 0x2F, 0x8B])]);
428 |         expected_hashes.insert(225313559, vec![(2, [0x31, 0x60, 0x52, 0x34, 0x54, 0xfa, 0x59, 0xe4, 0xc1, 0x4b, 0xad, 0xf9, 0x43, 0x5d, 0x62, 0x12])]);
429 |         expected_hashes.insert(169083540, vec![(3, [0x5f, 0xa8, 0xfa, 0x65, 0x9a, 0xdc, 0x38, 0x99, 0x7b, 0xb3, 0x65, 0xf1, 0x76, 0x48, 0xea, 0x8a])]);
430 |         expected_hashes.insert(197788377, vec![(4, [0x6B, 0xF2, 0x9B, 0x2C, 0xD5, 0x03, 0x3E, 0xFC, 0x07, 0x9C, 0x2E, 0xA1, 0x27, 0xFD, 0x7B, 0x13])]);
431 |         expected_hashes.insert(217580249, vec![(5, [0x1c, 0x64, 0x81, 0x16, 0x71, 0xe4, 0x3e, 0xa5, 0xf8, 0x2d, 0xa6, 0xff, 0xc4, 0xa5, 0xbb, 0xee])]);
432 |         expected_hashes.insert(42205509,  vec![(6, [0xd2, 0xdb, 0x8a, 0x61, 0x0f, 0x8c, 0x7c, 0x07, 0x85, 0xd2, 0xd9, 0x2a, 0x6e, 0x8c, 0x45, 0x0e])]);
433 | 
434 |         assert_eq!(hashes, BlockHashes {
435 |             hashes: expected_hashes,
436 |             block_size: 8,
437 |             file_size: 51
438 |         });
439 |     }
440 | 
441 | 
442 |     #[test]
443 |     fn empty_hashes() {
444 |         check_diff!("" |
445 |                     16 |
446 |                     "The New Data" |
447 |                     (0, "The New Data") |
448 | 
449 |                 );
450 |     }
451 | 
452 |     #[test]
453 |     fn no_change() {
454 |         check_diff!("Same Data" |
455 |                     8 |
456 |                     "Same Data" |
457 |                     |
458 | 
459 |                 );
460 |     }
461 | 
462 |     #[test]
463 |     fn multiple_overwrites() {
464 |         check_diff!("" |
465 |                     8 |
466 |                     "New Data" |
467 |                     (0, "New Data")|
468 | 
469 |                 );
470 |         check_diff!("New Data" |
471 |                     8 |
472 |                     "Other Stuff" |
473 |                     (0, "Other Stuff")|
474 |                     (11, 8)
475 |                 );
476 |         check_diff!("Other Stuff" |
477 |                     8 |
478 |                     "More Things" |
479 |                     (0, "More Things")|
480 |                     (11, 11)
481 |                 );
482 |     }
483 | 
484 |     #[test]
485 |     fn insertions() {
486 |         check_diff!("Starting data is a long sentence" |
487 |                     8 |
488 |                     "Starting data is now a long sentence" |
489 |                     (16, " now") |
490 | 
491 |                 );
492 |         check_diff!("Starting data is a long sentence" |
493 |                     8 |
494 |                     "This Starting data is a long sentence" |
495 |                     (0, "This ") |
496 | 
497 |                 );
498 |         check_diff!("Starting data is a long sentence" |
499 |                     8 |
500 |                     "Starting data is a long sentence. With more" |
501 |                     (32, ". With more") |
502 | 
503 |                 );
504 |         check_diff!("Starting data is a long sentence" |
505 |                     8 |
506 |                     "This Starting data is now a long sentence. With more" |
507 |                     (0, "This "),
508 |                     (21, " now"),
509 |                     (41, ". With more") |
510 | 
511 |                 );
512 |     }
513 | 
514 |     #[test]
515 |     fn delete_on_boundry() {
516 |         check_diff!("13 chars long, no longer" |
517 |                     13 |
518 |                     "13 chars long" |
519 |                     |
520 |                     (13, 11)
521 |                 );
522 |     }
523 | 
524 |     #[test]
525 |     fn deletions() {
526 |         check_diff!("Starting data is a long sentence" |
527 |                     8 |
528 |                     "Starting a long sentence" |
529 |                     |
530 |                     (8, 8)
531 |                 );
532 |         check_diff!("Starting data is a long sentence" |
533 |                     8 |
534 |                     "Starting data is a long " |
535 |                     |
536 |                     (24, 8)
537 |                 );
538 |         check_diff!("Starting data is a long sentence" |
539 |                     8 |
540 |                     " data is a long sentence" |
541 |                     |
542 |                     (0, 8)
543 |                 );
544 |         check_diff!("Starting data is a long sentence" |
545 |                     8 |
546 |                     " a long " |
547 |                     |
548 |                     (0, 16), (8, 8)
549 |                 );
550 | 
551 |     }
552 | 
553 |     #[test]
554 |     fn insertions_and_deletions() {
555 |         check_diff!("Starting data is a long sentence" |
556 |                     8 |
557 |                     "Starting data a long sentence" |
558 |                     (8, " data") |
559 |                     (13, 8)
560 |                 );
561 |         check_diff!("Starting data is a long sentence" |
562 |                     8 |
563 |                     "Starting data is a long sentenc" |
564 |                     (24, "sentenc")|
565 |                     (31, 8)
566 |                 );
567 |         check_diff!("Starting data is a long sentence" |
568 |                     8 |
569 |                     "This Starting data a very long sentence" |
570 |                     (0, "This "), (13, " data a very long ") |
571 |                     (31, 16)
572 |                 );
573 | 
574 |     }
575 | }
576 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Finds the difference between sequential versions of files.
  2 | //!
  3 | //! Based on the rsync algorithm.
  4 | //! The `BlockHashes` struct will find the differences between versions of the same file.
  5 | //! It does this through the [`diff_and_update()`](struct.BlockHashes.html#method.diff_and_update) method.
  6 | //!
  7 | //! # Example
  8 | //!
  9 | //! ```
 10 | //! use std::io::Cursor;
 11 | //! use rdiff::BlockHashes;
 12 | //!
 13 | //! let mut hash = BlockHashes::new(Cursor::new("The initial version"), 8).unwrap();
 14 | //! let diffs = hash.diff_and_update(Cursor::new("The next version")).unwrap();
 15 | //! println!("Diffs: {:?}", diffs);
 16 | //! // Outputs "Diffs: Diff{inserts: [Insert(0, The next vers)], deletes:[Delete(13, 16)]}"
 17 | //! ```
 18 | //!
 19 | //! This crate also contains methods relating to finding the differences between two strings, in the [string_diff](string_diff/index.html) module.
 20 | //! These methods can be used to refine the course differences found through the rsync method.
 21 | 
 22 | #![deny(missing_docs)]
 23 | extern crate crypto;
 24 | extern crate byteorder;
 25 | #[macro_use]
 26 | extern crate log;
 27 | 
 28 | mod window;
 29 | mod hashing;
 30 | pub mod string_diff;
 31 | 
 32 | use std::collections::HashMap;
 33 | use std::fs::File;
 34 | use std::io::{self, Read, Write, Seek, SeekFrom};
 35 | use std::slice::Iter;
 36 | use std::fmt;
 37 | use std::mem;
 38 | use std::string::FromUtf8Error;
 39 | 
 40 | use byteorder::{NetworkEndian, ByteOrder};
 41 | 
 42 | /// Used for calculating and re-calculating the differences between two versions of the same file
 43 | ///
 44 | /// See the [module level documentation](index.html) for examples on how to use this
 45 | #[derive(Debug, PartialEq)]
 46 | pub struct BlockHashes {
 47 |     hashes: HashMap<u32, Vec<(usize, [u8; 16])>>,
 48 |     block_size: usize,
 49 |     file_size: usize
 50 | }
 51 | 
 52 | /// Represents an operation to insert bytes at a particular position into a file
 53 | #[derive(PartialEq)]
 54 | pub struct Insert {
 55 |     position: usize,
 56 |     data: Vec<u8>
 57 | }
 58 | 
 59 | /// Represents an operation to delete a certain number of bytes at a particular position in a file
 60 | #[derive(PartialEq)]
 61 | pub struct Delete {
 62 |     position: usize,
 63 |     len: usize
 64 | }
 65 | 
 66 | /// Represents a series of operations that were performed on a file to transform it into a new
 67 | /// version.
 68 | ///
 69 | /// The operations are stored in file order, which means that every operation that affects
 70 | /// an earlier part of the file must be stored before an operation that affects a later part.
 71 | /// The diff also assumes that insert operations are performed prior to delete operations.
 72 | #[derive(Debug, PartialEq)]
 73 | pub struct Diff {
 74 |     inserts: Vec<Insert>,
 75 |     deletes: Vec<Delete>
 76 | }
 77 | 
 78 | /// A sliding window over a reader.  This monatins an internal buffer read from the file,
 79 | /// which can be read from at any time.
 80 | struct Window<R: Read> {
 81 |     front: Vec<u8>,
 82 |     back: Vec<u8>,
 83 |     block_size: usize,
 84 |     offset: usize,
 85 |     bytes_read: usize,
 86 |     reader: R
 87 | }
 88 | 
 89 | impl Diff {
 90 |     /// Creates a new `Diff`
 91 |     #[inline]
 92 |     pub fn new() -> Diff {
 93 |         Diff {
 94 |             inserts: Vec::new(),
 95 |             deletes: Vec::new()
 96 |         }
 97 |     }
 98 | 
 99 |     /// Adds an insert operation into this diff.  The operation must occur after
100 |     /// all previously added insert operations in file order.  If the operation
101 |     /// can be merged with the previous operation, then it is.
102 |     ///
103 |     /// Consumes the data that is passed in
104 |     fn add_insert(&mut self, position: usize, mut data: Vec<u8>) {
105 |         if let Some(tail) = self.inserts.last_mut() {
106 |             if tail.position + tail.data.len() == position {
107 |                 tail.data.append(&mut data);
108 |                 return;
109 |             }
110 |         }
111 |         self.inserts.push(Insert::new(data, position));
112 |     }
113 | 
114 |     // Adds an delete operation into this diff.  The operation must occur after
115 |     /// all previously added insert and delete operations in file order.  If the operation
116 |     /// can be merged with the previous operation, then it is.
117 |     fn add_delete(&mut self, position: usize, len: usize) {
118 |         if let Some(tail) = self.deletes.last_mut() {
119 |             if tail.position  == position {
120 |                 tail.len += len;
121 |                 return;
122 |             }
123 |         }
124 |         self.deletes.push(Delete::new(position, len));
125 |     }
126 | 
127 |     /// Gets an iterator over all insert operations
128 |     pub fn inserts(&self) -> Iter<Insert> {
129 |         self.inserts.iter()
130 |     }
131 | 
132 |     /// Gets an iterator over all delete operations
133 |     pub fn deletes(&self) -> Iter<Delete> {
134 |         self.deletes.iter()
135 |     }
136 | 
137 |     /// Checks if this set of diffs has any actual content
138 |     pub fn is_empty(&self) -> bool {
139 |         self.deletes.is_empty() && self.inserts.is_empty()
140 |     }
141 | 
142 |     /// Applies all of the operations in the diff to the given string.
143 |     /// Gives an error if the resulting string can't be represented by utf8.
144 |     ///
145 |     /// # Panics
146 |     /// When the operations refer to positions that are not represented by the string.
147 |     pub fn apply_to_string(&self, string: &str) -> Result<String, FromUtf8Error> {
148 |         let mut old_bytes = string.bytes();
149 |         let mut new_bytes = Vec::new();
150 |         let mut index = 0;
151 |         for insert in self.inserts() {
152 |             while index < insert.position {
153 |                 new_bytes.push(old_bytes.next().unwrap().clone());
154 |                 index += 1;
155 |             }
156 |             new_bytes.append(&mut insert.data.clone());
157 |             index += insert.data.len();
158 |         }
159 |         while let Some(byte) = old_bytes.next() {
160 |             new_bytes.push(byte);
161 |         }
162 |         let old_bytes = mem::replace(&mut new_bytes, Vec::new());
163 |         let mut  old_bytes = old_bytes.into_iter();
164 |         index = 0;
165 |         for delete in self.deletes() {
166 |             while index < delete.position {
167 |                 new_bytes.push(old_bytes.next().unwrap());
168 |                 index += 1;
169 |             }
170 |             for _ in 0..delete.len {
171 |                 old_bytes.next();
172 |             }
173 |         }
174 |         while let Some(byte) = old_bytes.next() {
175 |             new_bytes.push(byte);
176 |         }
177 |         String::from_utf8(new_bytes)
178 |     }
179 | 
180 |     /// Apply the operations in this sequence to a file.  This should not be called until after
181 |     /// the sequence has been integrated via [`Engine::integrate_remote`](struct.Engine.html#method.integrate_remote)
182 |     /// The file must have been opened on both read and write mode (see [OpenOptions](https://doc.rust-lang.org/nightly/std/fs/struct.OpenOptions.html)).
183 |     pub fn apply(&self, file: &mut File) -> io::Result<()> {
184 |         let mut new_bytes = Vec::new();
185 |         try!(file.seek(SeekFrom::Start(0)));
186 |         let mut old_bytes = file.try_clone().unwrap().bytes();
187 |         let mut index = 0;
188 |         for insert in self.inserts.iter() {
189 |             while index < insert.position {
190 |                 new_bytes.push(try!(old_bytes.next().unwrap()).clone());
191 |                 index += 1;
192 |             }
193 |             new_bytes.extend_from_slice(&insert.data[..]);
194 |             index += insert.data.len();
195 |         }
196 |         while let Some(byte) = old_bytes.next() {
197 |             new_bytes.push(try!(byte));
198 |         }
199 |         let old_bytes = mem::replace(&mut new_bytes, Vec::new());
200 |         let mut old_bytes = old_bytes.into_iter();
201 |         index = 0;
202 |         for delete in self.deletes.iter() {
203 |             while index < delete.position {
204 |                 new_bytes.push(old_bytes.next().unwrap());
205 |                 index += 1;
206 |             }
207 |             for _ in 0..delete.len {
208 |                 old_bytes.next();
209 |             }
210 |         }
211 |         while let Some(byte) = old_bytes.next() {
212 |             new_bytes.push(byte);
213 |         }
214 | 
215 |         try!(file.seek(SeekFrom::Start(0)));
216 |         try!(file.set_len(new_bytes.len() as u64));
217 |         file.write_all(new_bytes.as_slice())
218 |     }
219 | 
220 |     /// Compress this diff and write to `writer`.  The output can then be expanded
221 |     /// back into an equivilent Diff using `expand_from()`
222 |     pub fn compress_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
223 | 
224 |         let mut int_buf = [0;4];
225 |         NetworkEndian::write_u32(&mut int_buf, self.inserts.len() as u32);
226 |         try!(writer.write(&mut int_buf));
227 |         for insert in self.inserts.iter() {
228 |             try!(insert.compress_to(writer));
229 |         }
230 |         NetworkEndian::write_u32(&mut int_buf, self.deletes.len() as u32);
231 |         try!(writer.write(&mut int_buf));
232 |         for delete in self.deletes.iter() {
233 |             try!(delete.compress_to(writer));
234 |         }
235 |         Ok(())
236 |     }
237 | 
238 |     /// Expand this diff from previously compressed data in `reader`.  The data in reader
239 |     /// should have been written using `compress_to()`
240 |     pub fn expand_from<R: Read>(reader: &mut R) -> io::Result<Diff> {
241 |         let mut int_buf = [0;4];
242 | 
243 |         trace!("Reading insert length");
244 |         try!(reader.read_exact(&mut int_buf));
245 |         let insert_len = NetworkEndian::read_u32(&int_buf);
246 |         trace!("Insert length was: {}", insert_len);
247 |         let inserts = (0..insert_len).map(|_|Insert::expand_from(reader).unwrap()).collect();
248 |         trace!("Read inserts");
249 |         trace!("Reading delete length");
250 |         try!(reader.read_exact(&mut int_buf));
251 |         let delete_len = NetworkEndian::read_u32(&int_buf);
252 |         trace!("Delete length was: {}", delete_len);
253 |         let deletes = (0..delete_len).map(|_|Delete::expand_from(reader).unwrap()).collect();
254 |         trace!("Read deletes");
255 |         Ok(Diff {
256 |             inserts: inserts,
257 |             deletes: deletes
258 |         })
259 |     }
260 | }
261 | 
262 | impl fmt::Debug for Insert {
263 |     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
264 |         write!(fmt, "Insert({}, '{}')", self.position, String::from_utf8_lossy(&self.data).replace('\r', "").replace('\n', "\\n"))
265 |     }
266 | }
267 | 
268 | impl fmt::Debug for Delete {
269 |     fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
270 |         write!(fmt, "Delete({}, {})", self.position, self.len)
271 |     }
272 | }
273 | 
274 | impl Insert {
275 |     /// Builds a new `Insert` from the data and position
276 |     #[inline]
277 |     pub fn new(data: Vec<u8>, position: usize) -> Insert {
278 |         Insert {
279 |             data,
280 |             position,
281 |         }
282 |     }
283 | 
284 |     /// Gets the byte position of this insert operation in its file
285 |     #[inline]
286 |     pub fn get_position(&self) -> usize {
287 |         self.position
288 |     }
289 | 
290 |     /// Gets the data this insert operation will insert
291 |     #[inline]
292 |     pub fn get_data(&self) -> &Vec<u8> {
293 |         &self.data
294 |     }
295 | 
296 |     /// Compress this operation and write to `writer`.  The output can then be expanded
297 |     /// back into an equivilent operation using `expand_from()`
298 |     pub fn compress_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
299 | 
300 |         let mut int_buf = [0;4];
301 |         NetworkEndian::write_u32(&mut int_buf, self.position as u32);
302 |         try!(writer.write(&int_buf));
303 |         NetworkEndian::write_u32(&mut int_buf, self.data.len() as u32);
304 |         try!(writer.write(&int_buf));
305 |         try!(writer.write(&self.data));
306 |         Ok(())
307 |     }
308 | 
309 |     /// Expand this operation from previously compressed data in `reader`.  The data in reader
310 |     /// should have been written using `compress_to()`
311 |     pub fn expand_from<R: Read>(reader: &mut R) -> io::Result<Insert> {
312 |         let mut int_buf = [0;4];
313 |         try!(reader.read_exact(&mut int_buf));
314 |         let position = NetworkEndian::read_u32(&int_buf);
315 |         try!(reader.read_exact(&mut int_buf));
316 |         let data_len = NetworkEndian::read_u32(&int_buf) as usize;
317 |         let mut data = Vec::with_capacity(data_len);
318 |         data.resize(data_len, 0);
319 |         try!(reader.read_exact(&mut data));
320 |         Ok(Insert{
321 |             position: position as usize,
322 |             data: data
323 |         })
324 |     }
325 | 
326 | }
327 | 
328 | impl Delete {
329 |     /// Builds a new `Delete` from a position and length
330 |     #[inline]
331 |     pub fn new(position: usize, len: usize) -> Delete {
332 |         Delete {
333 |             position,
334 |             len,
335 |         }
336 |     }
337 | 
338 |     /// Gets the byte position of this delete operation in its file
339 |     #[inline]
340 |     pub fn get_position(&self) -> usize {
341 |         self.position
342 |     }
343 | 
344 |     /// Gets the length in bytes of this delete operation
345 |     #[inline]
346 |     pub fn get_length(&self) -> usize {
347 |         self.len
348 |     }
349 | 
350 |     /// Compress this operation and write to `writer`.  The output can then be expanded
351 |     /// back into an equivilent operation using `expand_from()`
352 |     pub fn compress_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
353 | 
354 |         let mut int_buf = [0;4];
355 |         NetworkEndian::write_u32(&mut int_buf, self.position as u32);
356 |         try!(writer.write(&int_buf));
357 |         NetworkEndian::write_u32(&mut int_buf, self.len as u32);
358 |         try!(writer.write(&int_buf));
359 |         Ok(())
360 |     }
361 | 
362 |     /// Expand this operation from previously compressed data in `reader`.  The data in reader
363 |     /// should have been written using `compress_to()`
364 |     pub fn expand_from<R: Read>(reader: &mut R) -> io::Result<Delete> {
365 |         let mut int_buf = [0;4];
366 |         try!(reader.read_exact(&mut int_buf));
367 |         let position = NetworkEndian::read_u32(&int_buf);
368 |         try!(reader.read_exact(&mut int_buf));
369 |         let len = NetworkEndian::read_u32(&int_buf);
370 |         Ok(Delete{
371 |             position: position as usize,
372 |             len: len as usize,
373 |         })
374 |     }
375 | 
376 | }
377 | 
378 | #[cfg(test)]
379 | mod test {
380 |     use super::Diff;
381 | 
382 | 
383 | 
384 | 
385 |     #[test]
386 |     fn applying_diff_to_string() {
387 |         let string = "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense.";
388 |         let mut diff = Diff::new();
389 |         diff.add_insert(2, vec![115]); // 's'
390 |         diff.add_insert(37, vec![116, 121]); //'ty'
391 |         diff.add_insert(98, vec![97, 98]); // ab
392 |         diff.add_insert(253, vec![109]); // m
393 |         diff.add_delete(35, 1); // 'u'
394 |         diff.add_delete(181, 34);
395 |         diff.add_delete(219, 1);
396 |         let result = diff.apply_to_string(string).unwrap();
397 |         assert_eq!(result, "Mrs. and Mrs. Dursley, of number forty, Privet Drive, were proud to say that they were perfectly abnormal, thank you very much. They were the last people you'd expect to be involved, because they just didn't hold with much nonsense.".to_string());
398 |     }
399 | }
400 | 


--------------------------------------------------------------------------------
/src/string_diff.rs:
--------------------------------------------------------------------------------
  1 | //! Used for finding the minimal set of operations to transform one string into another.
  2 | //!
  3 | //! The primary function of this module is [find diff](fn.find_diff.html).
  4 | use std::mem;
  5 | use std::cmp::max;
  6 | use super::{Diff};
  7 | 
  8 | 
  9 | /// Finds the difference on a character by character level between two strings
 10 | ///
 11 | /// Uses the Hirschberg algorithm (doi: [10.1145/360825.360861](http://dx.doi.org/10.1145/360825.360861))
 12 | /// which operates in `O(x * y)` time and `O(y)` space.  The algorithm finds the minimal set of operations
 13 | /// that will transform 'old' into 'new'.  The 'weight' of each operation is determined by the `scorer.`
 14 | /// For more details about weighting, see the [OperationScore](trait.OperationScore.html) documentation.
 15 | ///
 16 | /// The operations in the returned `Diff `are presented in file order, with offsets assuming the
 17 | /// previous operations have already been performed.  Furthermore, the inserts are assumed to
 18 | /// be performed prior to the deletes.
 19 | ///
 20 | /// # Example
 21 | ///
 22 | /// ```
 23 | /// use rdiff::string_diff::{find_diff, EditDistance};
 24 | /// // Find the difference between meadow and yellowing using the edit distance as the weighting.
 25 | /// let diff = find_diff("meadow", "yellowing", &EditDistance{});
 26 | /// // prints (0, 'y'), (3, 'll') and (9, 'ing')
 27 | /// for insert in diff.inserts() {
 28 | ///     println!("{:?}", insert);
 29 | /// }
 30 | /// // prints (1, 1) and (4, 2)
 31 | /// for delete in diff.deletes() {
 32 | ///     println!("{:?}", delete);
 33 | /// }
 34 | /// assert_eq!("yellowing", diff.apply_to_string("meadow").unwrap());
 35 | /// ```
 36 | pub fn find_diff<S: OperationScore>(old: &str, new: &str, scorer: &S) -> Diff {
 37 |     let mut diff = Diff::new();
 38 |     let mut insert_index = 0;
 39 |     let mut delete_index = 0;
 40 |     let old_rev = old.chars().rev().collect::<String>();
 41 |     let new_rev = new.chars().rev().collect::<String>();
 42 |     hirschberg(old, new, &old_rev, &new_rev, scorer, &mut diff, &mut insert_index, &mut delete_index);
 43 |     diff
 44 | }
 45 | 
 46 | /// Handles updating the diff and relevant indexes when inserting a string
 47 | /// Needed because the string must be converted to bytes before it can be used in the diff
 48 | macro_rules! do_insert {
 49 |     ($s: expr, $index: expr, $diff: expr) => (
 50 |         {
 51 |             let bytes = $s.bytes().collect::<Vec<_> >();
 52 |             let byte_len = bytes.len();
 53 |             $diff.add_insert(*$index, bytes);
 54 |             *$index += byte_len;
 55 |         }
 56 |     );
 57 | }
 58 | 
 59 | /// Handles updating the diff and relevant indexes when deleting a suvstring
 60 | /// Needed because the string must be converted to bytes before it can be used in the diff
 61 | macro_rules! do_delete {
 62 |     ($length: expr, $delete_index: expr, $insert_index: expr, $diff: expr) => (
 63 |         {
 64 |             $diff.add_delete(*$insert_index - *$delete_index, $length);
 65 |             *$delete_index += $length;
 66 |             *$insert_index += $length;
 67 |         }
 68 |     );
 69 | }
 70 | 
 71 | /// Uses the Hirschberg algorithm to calculate the optimal set of operations to transform 'old' into 'new'.
 72 | /// The only parameters that are input are 'old', 'new' and `scorer`.  `x_rev` and `y_rev` are just
 73 | /// cached so that 'old' and 'new' don't need to be reversed for every recursion of the algorithm.
 74 | /// `diff` is the output of the algorithm and `insert_index` and `delete_index` are simply intermediate state
 75 | /// being passed around.
 76 | fn hirschberg<S: OperationScore>(old: &str, new: &str, old_rev: &str, new_rev: &str, scorer: &S, diff: &mut Diff, insert_index: &mut usize, delete_index: &mut usize) {
 77 |     trace!("'{}' ({}) '{}' ({})", old, old_rev, new, new_rev);
 78 |     // We're going to use these lengths over and over again, we might as well cache them.
 79 |     let old_len = old.len();
 80 |     let new_len = new.len();
 81 | 
 82 |     // If one of the two strings is 0, then it's trvial to transform one into the other
 83 |     if old_len == 0 {
 84 |         do_insert!(new, insert_index, diff);
 85 |     } else if new_len == 0 {
 86 |         do_delete!(old_len, delete_index, insert_index, diff);
 87 |     }
 88 |     // If old is legnth 1, then there are two cases:
 89 |     else if old_len == 1 {
 90 |         let old_char = old.chars().next().unwrap();
 91 |         match new.chars().position(|c| c == old_char) {
 92 |             // Either new contains old, in which case
 93 |             Some(position) => {
 94 |                 // We insert whatever is on the left of old in new
 95 |                 if position > 0 {
 96 |                     do_insert!(new[..position], insert_index, diff);
 97 |                 }
 98 |                 *insert_index += 1;
 99 |                 // and we insert whatever is on the right of old in new
100 |                 if new_len - position > 1 {
101 |                     do_insert!(new[position + 1..], insert_index, diff);
102 |                 }
103 |             } None => {
104 |                 //or new does not contain old, in which case
105 |                 // we simply delete old and insert new
106 |                 do_insert!(new, insert_index, diff);
107 |                 do_delete!(1, delete_index, insert_index, diff);
108 |             }
109 |         }
110 |     }
111 |     // If new is length 1, then there are two cases:
112 |     else if new_len == 1 {
113 |         let new_char = new.chars().next().unwrap();
114 |         match old.chars().position(|c| c == new_char) {
115 |             // either old contains new, in which case
116 |             Some(position) => {
117 |                 // We delete everything in old to the left of new
118 |                 if position > 0 {
119 |                     do_delete!(position, delete_index, insert_index, diff);
120 |                 }
121 |                 *insert_index += 1;
122 |                 // and we delete everything in old to the right of new
123 |                 if old_len - position > 1 {
124 |                     let delete_len = old_len - position - 1;
125 |                     do_delete!(delete_len, delete_index, insert_index, diff);
126 |                 }
127 |             } None => {
128 |                 // or old does not contain new, in which case we simply insert new and delete
129 |                 // everything that was previously in old
130 |                 do_insert!(new, insert_index, diff);
131 |                 do_delete!(old_len, delete_index, insert_index, diff);
132 |             }
133 |         }
134 |     } else {
135 |         // If it's not trivial, then we recurse until it is.
136 |         // We begin bnew dividing old in half.
137 |         let old_mid = old_len / 2;
138 |         // We then find the index in new where splitting the string will give us the
139 |         // highest possible score.  This index is the point where the trace of the edit
140 |         // operations performed is guaranteed to cross.
141 |         let score_l = nw_score(&old[..old_mid], new, scorer);
142 |         let score_r = nw_score(&old_rev[..old_len - old_mid], new_rev, scorer);
143 |         let new_mid = score_l.iter()
144 |                             .zip(score_r.iter().rev())
145 |                             .map(|(l, r)| l + r)
146 |                             .zip(0..new_len + 1).max().unwrap().1;
147 |         // We then recurse on the left side of old and new
148 |         hirschberg(&old[..old_mid], &new[..new_mid], &old_rev[old_len - old_mid..], &new_rev[new_len - new_mid..], scorer, diff, insert_index, delete_index);
149 |         // and the right side of old and new
150 |         hirschberg(&old[old_mid..], &new[new_mid..], &old_rev[..old_len - old_mid], &new_rev[..new_len - new_mid], scorer, diff, insert_index, delete_index);
151 | 
152 | 
153 |     }
154 | 
155 | }
156 | 
157 | /// Used to calculate the score for each operation that
158 | /// will be performed.  The score can be static, or it can
159 | /// vary based on which character is being deleted inserted or substituted.
160 | /// It is highly recommended to inline the implementation of these characters
161 | pub trait OperationScore {
162 |     /// The score for inserting character `c` into the string
163 |     fn insert_score(&self, c: char) -> i32;
164 |     /// The score for deleting character `c` from the string
165 |     fn delete_score(&self, c: char) -> i32;
166 |     /// The score for replacing character `old` with character `new`
167 |     fn substitution_score(&self, old: char, new: char) -> i32;
168 |     /// The score for when a character is one string matches the character in the other string
169 |     fn match_score(&self, c: char) -> i32;
170 | }
171 | 
172 | /// Used as the classiscal definition of edit distance.
173 | ///
174 | /// That is:
175 | ///
176 | /// * Insert is cost -1
177 | /// * Delete is cost -1
178 | /// * Substitution is cost -2 (an insert + a delete)
179 | /// * Matching is cost 0
180 | pub struct EditDistance;
181 | 
182 | impl OperationScore for EditDistance {
183 |     #[inline]
184 |     fn insert_score(&self, _: char) -> i32 {
185 |         -1
186 |     }
187 | 
188 |     #[inline]
189 |     fn delete_score(&self, _: char) -> i32 {
190 |         -1
191 |     }
192 | 
193 |     #[inline]
194 |     fn substitution_score(&self, _: char, _: char) -> i32 {
195 |         -2
196 |     }
197 | 
198 |     #[inline]
199 |     fn match_score(&self, _: char) -> i32 {
200 |         0
201 |     }
202 | }
203 | 
204 | /// Calculate the score based on the Needleman-Wunsch algorithm.  This algorithm
205 | /// calculates the cost of transforming string 'old' into string 'new' using operation scoring
206 | /// given by `scorer`.
207 | ///
208 | /// It operates by iteratively generating the score for progressively longer
209 | /// substrings of 'old' and 'new'.  The result is a vector of the transformation score
210 | /// from 'old' to a substring of length `i` of 'new' where `i` is the index of an element in
211 | /// the resulting vector.
212 | fn nw_score<S: OperationScore>(old: &str, new: &str, scorer: &S) -> Vec<i32> {
213 | 
214 |     trace!("nw_score for '{}' - '{}'", old, new);
215 |     let row_len = new.len() + 1;
216 |     let mut last_row = Vec::with_capacity(row_len);
217 |     let mut this_row = Vec::with_capacity(row_len);
218 |     let mut total_insert = 0;
219 |     last_row.push(0);
220 |     for new_char in new.chars() {
221 |         total_insert += scorer.insert_score(new_char);
222 |         last_row.push(total_insert);
223 |     }
224 |     trace!("{:?}", last_row);
225 |     for old_char in old.chars() {
226 |         this_row.push(last_row[0] + scorer.delete_score(old_char));
227 |         for (new_index, new_char) in new.chars().enumerate() {
228 |             let score_sub = last_row[new_index] + if old_char == new_char {
229 |                 scorer.match_score(old_char)
230 |             } else {
231 |                 scorer.substitution_score(old_char, new_char)
232 |             };
233 |             let score_del = last_row[new_index + 1] + scorer.delete_score(old_char);
234 |             let score_ins = this_row[new_index] + scorer.insert_score(new_char);
235 |             this_row.push(max(max(score_sub, score_del), score_ins))
236 |         }
237 |         trace!("{:?}", this_row);
238 |         last_row = mem::replace(&mut this_row, Vec::with_capacity(row_len));
239 |     }
240 |     last_row
241 | 
242 | }
243 | 
244 | #[cfg(test)]
245 | mod test {
246 |     extern crate env_logger;
247 |     use super::{nw_score, find_diff, EditDistance, OperationScore};
248 |     use super::super::{Insert, Delete, Diff};
249 | 
250 |     struct ExampleScores;
251 | 
252 |     macro_rules! check_diff {
253 |         ($start: tt |  $new: tt | $scorer: tt | $(($insert_pos : tt, $insert_value: tt)),* | $(($delete_pos: tt, $delete_len: tt)),*) => {
254 |             {
255 |                 check_diff_workaround!($start; $new; $scorer; $(($insert_pos, $insert_value)),*; $(($delete_pos, $delete_len)),*)
256 |             }
257 |         };
258 |     }
259 | 
260 |     // Caused by a bug in the implementation of the tt macro type.  It currently has to be passed as an expr into another macro
261 |     // or it throws a fit for no reason.  See https://github.com/rust-lang/rust/issues/5846
262 |     macro_rules! check_diff_workaround {
263 |         ($start: expr ; $new: expr ; $scorer: expr; $(($insert_pos : tt, $insert_value: tt)),* ; $(($delete_pos: tt, $delete_len: tt)),*) => {
264 |             {
265 |                 let diff = find_diff($start, $new, &$scorer);
266 |                 assert_eq!(Diff {
267 |                     inserts: vec![$(Insert{position: $insert_pos, data: $insert_value.bytes().collect()}),*],
268 |                     deletes: vec![$(Delete{position: $delete_pos, len: $delete_len}),*]
269 |                 }, diff);
270 |                 assert_eq!(diff.apply_to_string($start).unwrap(), $new.to_string());
271 |             }
272 |         };
273 |     }
274 | 
275 |     // From the wikipedia example at https://en.wikipedia.org/wiki/Hirschberg%27s_algorithm
276 |     impl OperationScore for ExampleScores {
277 |         #[inline]
278 |         fn insert_score(&self, _: char) -> i32 {
279 |             -2
280 |         }
281 | 
282 |         #[inline]
283 |         fn delete_score(&self, _: char) -> i32 {
284 |             -2
285 |         }
286 | 
287 |         #[inline]
288 |         fn substitution_score(&self, _: char, _: char) -> i32 {
289 |             -1
290 |         }
291 | 
292 |         #[inline]
293 |         fn match_score(&self, _: char) -> i32 {
294 |             2
295 |         }
296 |     }
297 | 
298 |     #[test]
299 |     fn score() {
300 |         assert_eq!(nw_score("ACGC", "CGTAT", &EditDistance{}), vec![-4, -3, -2, -3, -4, -5]);
301 |         assert_eq!(nw_score("AGTA", "TATGC", &EditDistance{}), vec![-4, -3, -2, -3, -4, -5]);
302 | 
303 |         assert_eq!(nw_score("ACGC", "CGTAT", &ExampleScores{}), vec![-8, -4, 0, 1, -1, -3]);
304 |         assert_eq!(nw_score("AGTA", "TATGC", &ExampleScores{}), vec![-8, -4, 0, -2, -1, -3]);
305 |     }
306 | 
307 |     #[test]
308 |     fn do_find_diff() {
309 |         //env_logger::init().unwrap();
310 |         check_diff!(
311 |             "kitten" |
312 |             "kettle" |
313 |             EditDistance |
314 |             (1, "e"), (5, "l") |
315 |             (2, 1), (6, 1)
316 |         );
317 |         check_diff!(
318 |             "meadow" |
319 |             "yellowing" |
320 |             EditDistance |
321 |             (0, "y"), (3, "ll"), (9, "ing") |
322 |             (1, 1), (4, 2)
323 |         );
324 | 
325 |         check_diff!(" I've" |
326 |                     " I" |
327 |                     EditDistance |
328 |                     |
329 |                     (2, 3)
330 |                 );
331 | 
332 |         check_diff!(" I've got a new place" |
333 |                     " I found a new place" |
334 |                     EditDistance |
335 |                     (6, "f"), (9, "und") |
336 |                     (2, 3), (4, 1), (8, 1)
337 |                 );
338 |         check_diff!(
339 |             "Since my baby left me I've got a new place to dwell\nI walk down a lonely street to Heartbreak Hotel." |
340 |             "Since my baby left me I found a new place to dwell\nDown at the end of 'Lonely Street' to 'Heartbreak Hotel.'" |
341 |             EditDistance |
342 |             (27, "f"), (30, "und"), (56, "Down"), (64, "t the"), (72, "en"), (75, " "), (77, "f"), (81, "'L"), (92, "S"), (99, "'"),  (104, "'"), (122, "'") |
343 |             (23, 3), (25, 1), (29, 1),(55, 1), (56, 1), (62, 2), (69, 2), (72, 3), (79, 1)
344 |         );
345 |     }
346 | }
347 | 


--------------------------------------------------------------------------------
/src/window.rs:
--------------------------------------------------------------------------------
  1 | use std::io::{Read, Result};
  2 | use std::mem;
  3 | use std::cmp::min;
  4 | use ::Window;
  5 | 
  6 | 
  7 | impl<R:Read> Window<R> {
  8 |     pub fn new(mut reader:R, block_size: usize) -> Result<Window<R>> {
  9 |         let mut front = vec!(0;block_size);
 10 |         let mut back = vec!(0;block_size);
 11 |         let size = try!(reader.read(front.as_mut_slice()));
 12 |         unsafe {
 13 |             front.set_len(size);
 14 |         }
 15 |         let size = try!(reader.read(back.as_mut_slice()));
 16 |         unsafe {
 17 |             back.set_len(size);
 18 |         }
 19 |         Ok(Window {
 20 |             front,
 21 |             back,
 22 |             block_size,
 23 |             offset: 0,
 24 |             reader,
 25 |             bytes_read: 0
 26 |         })
 27 |     }
 28 | 
 29 |     pub fn advance(&mut self) -> Result<(Option<u8>, Option<u8>)> {
 30 |         if self.front.len() == 0 {
 31 |             return Ok((None, None));
 32 |         }
 33 | 
 34 |         if self.offset >= self.front.len() {
 35 |             if self.back.len() == 0 {
 36 |                 return Ok((None, None));
 37 |             }
 38 |             try!(self.load_next_block());
 39 |         }
 40 |         let tail = self.front[self.offset];
 41 |         let head = self.get_head();
 42 |         self.offset += 1;
 43 |         self.bytes_read += 1;
 44 |         Ok((Some(tail), head))
 45 |     }
 46 | 
 47 |     fn get_head(&self) -> Option<u8> {
 48 |         let head_index = self.offset + self.block_size - self.front.len();
 49 |         if head_index >= self.back.len() {
 50 |             return None;
 51 |         }
 52 |         return Some(self.back[head_index]);
 53 |     }
 54 | 
 55 |     fn load_next_block(&mut self) -> Result<()> {
 56 |         // We've gone past the end of the front half
 57 |         self.front = mem::replace(&mut self.back, vec!(0;self.block_size));
 58 |         let size = try!(self.reader.read(self.back.as_mut_slice()));
 59 |         unsafe{
 60 |             self.back.set_len(size);
 61 |         }
 62 |         self.offset = 0;
 63 |         Ok(())
 64 |     }
 65 | 
 66 |     pub fn frame<'a>(&'a self) -> (&'a [u8], &'a [u8]) {
 67 |         let front_offset = min(self.offset, self.front.len());
 68 |         let back_offset = min(self.offset, self.back.len());
 69 |         (&self.front[front_offset..], &self.back[..back_offset])
 70 |     }
 71 | 
 72 |     pub fn frame_size(&self) -> usize {
 73 |         self.front.len() + self.back.len() - self.offset
 74 |     }
 75 | 
 76 |     pub fn on_boundry(&self) -> bool {
 77 |         self.offset == 0 || self.offset == self.front.len()
 78 |     }
 79 | 
 80 |     pub fn get_bytes_read(&self) -> usize {
 81 |         self.bytes_read
 82 |     }
 83 | }
 84 | 
 85 | #[cfg(test)]
 86 | mod test {
 87 |     use super::super::Window;
 88 |     use std::io::Cursor;
 89 |     #[test]
 90 |     fn frame_iterator() {
 91 |         let mut window_basic = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10]), 5).unwrap();
 92 |         //assert_eq!(window_basic.frame().map(|a| *a).collect::<Vec<u8>>(), vec![1, 2, 3, 4, 5]);
 93 |         assert_eq!(window_basic.frame(), (&[1, 2, 3, 4, 5][..], &[][..]));
 94 | 
 95 |         window_basic.advance().unwrap();
 96 |         // assert_eq!(window_basic.frame().map(|a| *a).collect::<Vec<u8>>(), vec![2, 3, 4, 5, 6]);
 97 |         assert_eq!(window_basic.frame(), (&[2, 3, 4, 5][..], &[6][..]));
 98 | 
 99 |         window_basic.advance().unwrap();
100 |         window_basic.advance().unwrap();
101 |         window_basic.advance().unwrap();
102 |         window_basic.advance().unwrap();
103 |         assert_eq!(window_basic.frame(), (&[][..], &[6, 7, 8, 9, 10][..]));
104 | 
105 | 
106 |         window_basic.advance().unwrap();
107 |         assert_eq!(window_basic.frame(), (&[7, 8, 9, 10][..], &[][..]));
108 | 
109 |         window_basic.advance().unwrap();
110 |         window_basic.advance().unwrap();
111 |         window_basic.advance().unwrap();
112 |         assert_eq!(window_basic.frame(), (&[10][..], &[][..]));
113 | 
114 |          window_basic.advance().unwrap();
115 |         assert_eq!(window_basic.frame(), (&[][..], &[][..]));
116 | 
117 | 
118 |         let window_too_small = Window::new(Cursor::new(vec![1, 2, 3, 4]), 5).unwrap();
119 |         assert_eq!(window_too_small.frame(), (&[1, 2, 3, 4][..], &[][..]));
120 | 
121 |         let window_empty = Window::new(Cursor::new(vec![]), 5).unwrap();
122 |         assert_eq!(window_empty.frame(), (&[][..], &[][..]));
123 | 
124 |         let mut window_bigger = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10, 11, 12]), 5).unwrap();
125 |         assert_eq!(window_bigger.frame(), (&[1, 2, 3, 4, 5][..], &[][..]));
126 |         window_bigger.advance().unwrap();
127 |         window_bigger.advance().unwrap();
128 |         window_bigger.advance().unwrap();
129 |         window_bigger.advance().unwrap();
130 |         window_bigger.advance().unwrap();
131 |         window_bigger.advance().unwrap();
132 |         assert_eq!(window_bigger.frame(), (&[7, 8, 9, 10][..], &[11][..]));
133 | 
134 |         window_bigger.advance().unwrap();
135 |         assert_eq!(window_bigger.frame(), (&[8, 9, 10][..], &[11, 12][..]));
136 |         window_bigger.advance().unwrap();
137 |         assert_eq!(window_bigger.frame(), (&[9, 10][..], &[11, 12][..]));
138 |         window_bigger.advance().unwrap();
139 |         assert_eq!(window_bigger.frame(), (&[10][..], &[11, 12][..]));
140 |         window_bigger.advance().unwrap();
141 |         assert_eq!(window_bigger.frame(), (&[][..], &[11, 12][..]));
142 |         window_bigger.advance().unwrap();
143 |         assert_eq!(window_bigger.frame(), (&[12][..], &[][..]));
144 | 
145 |     }
146 |     #[test]
147 |     fn advance() {
148 |         let mut window_basic = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10]), 5).unwrap();
149 |         assert_eq!(window_basic.advance().unwrap(), (Some(1), Some(6)));
150 |         assert_eq!(window_basic.advance().unwrap(), (Some(2), Some(7)));
151 |         assert_eq!(window_basic.advance().unwrap(), (Some(3), Some(8)));
152 |         assert_eq!(window_basic.advance().unwrap(), (Some(4), Some(9)));
153 |         assert_eq!(window_basic.advance().unwrap(), (Some(5), Some(10)));
154 |         assert_eq!(window_basic.advance().unwrap(), (Some(6), None));
155 |         assert_eq!(window_basic.advance().unwrap(), (Some(7), None));
156 |         assert_eq!(window_basic.advance().unwrap(), (Some(8), None));
157 |         assert_eq!(window_basic.advance().unwrap(), (Some(9), None));
158 |         assert_eq!(window_basic.advance().unwrap(), (Some(10), None));
159 |         assert_eq!(window_basic.advance().unwrap(), (None, None));
160 | 
161 |         let mut window_huge = Window::new(Cursor::new(vec![1, 2, 3, 4, 5, 6 ,7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), 5).unwrap();
162 |         assert_eq!(window_huge.advance().unwrap(), (Some(1), Some(6)));
163 |         assert_eq!(window_huge.advance().unwrap(), (Some(2), Some(7)));
164 |         assert_eq!(window_huge.advance().unwrap(), (Some(3), Some(8)));
165 |         assert_eq!(window_huge.advance().unwrap(), (Some(4), Some(9)));
166 |         assert_eq!(window_huge.advance().unwrap(), (Some(5), Some(10)));
167 |         assert_eq!(window_huge.advance().unwrap(), (Some(6), Some(11)));
168 |         assert_eq!(window_huge.advance().unwrap(), (Some(7), Some(12)));
169 |         assert_eq!(window_huge.advance().unwrap(), (Some(8), Some(13)));
170 |         assert_eq!(window_huge.advance().unwrap(), (Some(9), Some(14)));
171 |         assert_eq!(window_huge.advance().unwrap(), (Some(10), Some(15)));
172 |         assert_eq!(window_huge.advance().unwrap(), (Some(11), Some(16)));
173 |         assert_eq!(window_huge.advance().unwrap(), (Some(12), Some(17)));
174 |         assert_eq!(window_huge.advance().unwrap(), (Some(13), Some(18)));
175 |         assert_eq!(window_huge.advance().unwrap(), (Some(14), None));
176 |         assert_eq!(window_huge.advance().unwrap(), (Some(15), None));
177 |         assert_eq!(window_huge.advance().unwrap(), (Some(16), None));
178 |         assert_eq!(window_huge.advance().unwrap(), (Some(17), None));
179 |         assert_eq!(window_huge.advance().unwrap(), (Some(18), None));
180 |         assert_eq!(window_huge.advance().unwrap(), (None, None));
181 | 
182 |         let mut window_empty = Window::new(Cursor::new(vec![]), 5).unwrap();
183 |         assert_eq!(window_empty.advance().unwrap(), (None, None));
184 | 
185 |         let mut window_too_small = Window::new(Cursor::new(vec![1, 2, 3, 4]), 5).unwrap();
186 |         assert_eq!(window_too_small.advance().unwrap(), (Some(1), None));
187 |         assert_eq!(window_too_small.advance().unwrap(), (Some(2), None));
188 |         assert_eq!(window_too_small.advance().unwrap(), (Some(3), None));
189 |         assert_eq!(window_too_small.advance().unwrap(), (Some(4), None));
190 |         assert_eq!(window_too_small.advance().unwrap(), (None, None));
191 |         assert_eq!(window_too_small.advance().unwrap(), (None, None));
192 |         assert_eq!(window_too_small.advance().unwrap(), (None, None));
193 |     }
194 | }
195 | 


--------------------------------------------------------------------------------