├── .travis.yml
├── Cargo.toml
├── .gitignore
├── LICENSE
├── README.md
└── src
    ├── ringbuffer.rs
    └── main.rs


/.travis.yml:
--------------------------------------------------------------------------------
1 | language: rust
2 | rust: beta
3 | sudo: false
4 | 
5 | script:
6 |  - cargo test


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "pipebuffer"
3 | version = "0.0.1"
4 | authors = [ "Tim Fennell <tfenne@tfenne.com>" ]
5 | 
6 | [dependencies]
7 | clap = "2"
8 | regex = "0.1"
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Compiled files
 2 | *.o
 3 | *.so
 4 | *.rlib
 5 | *.dll
 6 | 
 7 | # Executables
 8 | *.exe
 9 | 
10 | # Generated by Cargo
11 | /target/
12 | 
13 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
14 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock
15 | Cargo.lock
16 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Tim Fennell
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pipebuffer
 2 | [![Build Status](https://travis-ci.org/tfenne/pipebuffer.svg?branch=master)](https://travis-ci.org/tfenne/pipebuffer)
 3 | [![Language](http://img.shields.io/badge/language-rust-blue.svg)](http://www.rust-lang.org/)
 4 | [![License](https://img.shields.io/badge/license-MIT-A31F34.svg)](LICENSE)
 5 | 
 6 | A simple command line program for buffering `stdin`/`stdout` between piped processes when the operating system's pipe buffer is insufficient.  Modern linux limits the buffer between pipes in a pipeline to `64kb`, which can cause bottlenecks when working with processes with "lumpy" IO profiles.
 7 | 
 8 | `pipebuffer` is particularly useful when dealing with large volumes of data and a mix of processes that work in "chunks" of data and more stream-oriented processes.
 9 | 
10 | ## Usage
11 | 
12 | To use, you just replace:
13 | ```bash
14 | foo | bar
15 | ```
16 | with
17 | ```bash
18 | foo | pipebuffer | bar
19 | ```
20 | or 
21 | ```bash
22 | foo | pipebuffer --size=512m | bar
23 | ```
24 | 
25 | And, of course, you can use many `pipebuffer`s together:
26 | ```bash
27 | foo | pipebuffer --size=128m | bar | pipebuffer --size=64m | splat | pipebuffer --size=1g | whee
28 | ```
29 | 
30 | ## License
31 | 
32 | `pipebuffer` is open source software released under the [MIT License](LICENSE).
33 | 
34 | ## Building
35 | 
36 | `pipebuffer` is written in [Rust](https://www.rust-lang.org/) and currently requires Rust 1.7 (beta) or greater.  You'll need Rust installed.  You can [download here](https://www.rust-lang.org/downloads.html), or run the first command below:
37 | 
38 | ```bash
39 | // Optionally install Rust: if 1.7 is stable, which it should be after ~3rd March 2016, omit '--channel=beta'
40 | curl -sSf https://static.rust-lang.org/rustup.sh | sh -s -- --channel=beta
41 | 
42 | // Clone the repo
43 | git clone https://github.com/tfenne/pipebuffer.git
44 | 
45 | // Build and run the tests
46 | pushd pipebuffer && (cargo test; cargo build --release); popd
47 | 
48 | // Produces executable at ./pipebuffer/target/release/pipebuffer
49 | ```
50 | 


--------------------------------------------------------------------------------
/src/ringbuffer.rs:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | //
  3 | // Copyright (c) 2016 Tim Fennell
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | //! A module that provides a generic ring buffer.
 24 | 
 25 | use std::cmp;
 26 | use std::clone::Clone;
 27 | 
 28 | /// Implementation of a non-blocking, fixed size ring-buffer.
 29 | /// Allocates enough space on the heap to store `size` items.  Provides
 30 | /// non-blocking methods to `put` into the buffer from a slice and `get`
 31 | /// from the buffer into a slice.
 32 | ///
 33 | /// When full (as reported by `is_full()`) the buffer will accept calls to
 34 | /// `put`, but will copy nothing into the target slice and will report that
 35 | /// zero items were put.  Similarly, when `empty`, calls to `get` will return
 36 | /// immediately and report that zero items were retrieved.
 37 | ///
 38 | /// After calls to `close()`, further attempts to put into the buffer will
 39 | /// cause panics, but `gets()` continue to be allowed in order to let the 
 40 | /// buffer be drained.
 41 | pub struct RingBuffer<T: Clone> {
 42 |     capacity          : usize,
 43 |     buffer            : Vec<T>,
 44 |     write_pos         : usize,
 45 |     available_to_write: usize,
 46 |     read_pos          : usize,
 47 |     available_to_read : usize,
 48 |     closed            : bool
 49 | }
 50 | 
 51 | impl<T: Clone> RingBuffer<T> {
 52 |     /// Constructs a new RingBuffer with capacity `size`.
 53 |     pub fn new (size: usize) -> RingBuffer<T> {
 54 |         let mut buf = RingBuffer {
 55 |             capacity           : size,
 56 |             buffer             : Vec::with_capacity(size),
 57 |             write_pos          : 0,
 58 |             available_to_write : size,
 59 |             read_pos           : 0,
 60 |             available_to_read  : 0,
 61 |             closed             : false
 62 |         };
 63 |         
 64 |         // Push 'size' up to capacity so we can use slices without having to fill first.
 65 |         unsafe { buf.buffer.set_len(size); }
 66 |         buf
 67 |     }
 68 |     
 69 |     /// Attempts to `put` items from the slice into the buffer. The only guarantees
 70 |     /// made by this method are:
 71 |     ///
 72 |     /// 1. That if the buffer is not full, one or more items will be put
 73 |     /// 2. That the number of items put will be reported correctly by the return
 74 |     /// 
 75 |     /// Specifically, `put` does not guarantee that all items from the `input` slice
 76 |     /// will be put, even in the case where there _is_ capacity for all the `input` items.
 77 |     /// For this reason, `put` should generally be called in a loop until all items have
 78 |     /// been put.
 79 |     ///
 80 |     /// # Return
 81 |     /// The number of items, `>= 0`, that were put into the buffer.
 82 | 
 83 |     /// # Panics
 84 |     /// Will panic if invoked on a closed buffer.
 85 |     pub fn put(&mut self, input: &[T]) -> usize {
 86 |         if self.closed { panic!("Cannot write to closed buffer."); }
 87 |         if self.available_to_write == 0 { return 0; }
 88 |         
 89 |         let distance_to_end = self.capacity - self.write_pos;
 90 |         let available       = cmp::min(distance_to_end, self.available_to_write);
 91 |         let length          = cmp::min(available, input.len());
 92 |         let target_slice = &mut self.buffer[self.write_pos..self.write_pos+length];
 93 |         let source_slice = &input[0..length];
 94 |         target_slice.clone_from_slice(source_slice);
 95 |         self.available_to_write -= length;
 96 |         self.available_to_read  += length;
 97 |         self.write_pos           = (self.write_pos + length) % self.capacity;        
 98 |         length
 99 |     }
100 |     
101 |     /// Attempts to `get` items from the buffer and put them into the slice.
102 |     /// The only guarantees made by this method are:
103 |     ///
104 |     /// 1. That if the buffer is not empty, one or more items will be fetched
105 |     /// 2. That the number of items fetched will be reported correctly by the return
106 |     /// 
107 |     /// Specifically, `get` does not guarantee that the `output` slice will be filled,
108 |     /// even in the case where there _is_ enough in the buffer to do so.
109 |     /// 
110 |     /// # Return
111 |     /// The number of items, `>= 0`, that were fetched from the buffer.
112 |     pub fn get(&mut self, output: &mut [T]) -> usize {
113 |         let distance_to_end = self.capacity - self.read_pos;
114 |         let available       = cmp::min(distance_to_end, self.available_to_read);
115 |         let length          = cmp::min(available, output.len());
116 |         let source_slice = & self.buffer[self.read_pos..self.read_pos+length];
117 |         let target_slice = &mut output[0..length];
118 |         target_slice.clone_from_slice(source_slice);
119 |         self.available_to_read  -= length;
120 |         self.available_to_write += length;
121 |         self.read_pos = (self.read_pos + length) % self.capacity;
122 |         length
123 |     }
124 |     
125 |     /// Returns true if the buffer is currently empty, implying calls to `get()` will 
126 |     /// yield zero items.
127 |     pub fn is_empty(&self) -> bool { self.available_to_read == 0 }
128 | 
129 |     /// Returns true if the buffer is currently full, implying calls to `put()` will 
130 |     /// consume zero items.
131 |     pub fn is_full(&self) -> bool { self.available_to_write == 0 }
132 |     
133 |     /// Closes the buffer such that future calls to `put()` will panic.
134 |     pub fn close(&mut self) -> () { self.closed = true; }
135 |     
136 |     /// Returns true if the buffer is closed, and false otherwise.
137 |     pub fn is_closed(&self) -> bool { self.closed }
138 | }
139 | 
140 | #[test]
141 | fn test_basic_read_write() {    
142 |     let mut buffer : RingBuffer<u8> = RingBuffer::new(100);
143 |     let xs: [u8; 10] = [0,1,2,3,4,5,6,7,8,9];
144 |     let mut ys: [u8; 10] = [0; 10];
145 |     for _ in 0..100 {
146 |         buffer.put(&xs);
147 |         buffer.get(&mut ys);
148 |         for i in 0..10 {
149 |             assert!(xs[i] == ys[i]);
150 |         }
151 |     }
152 | }
153 | 
154 | #[test]
155 | fn test_write_on_full_buffer() {    
156 |     let mut buffer : RingBuffer<u8> = RingBuffer::new(10);
157 |     let xs: [u8; 10] = [0,1,2,3,4,5,6,7,8,9];
158 |     buffer.put(&xs);
159 |     let n = buffer.put(&xs);
160 |     assert!(n == 0);
161 | }
162 | 
163 | #[test]
164 | fn test_read_on_empty_buffer() {
165 |     let mut buffer : RingBuffer<u8> = RingBuffer::new(10);
166 |     let mut xs: [u8; 10] = [7; 10];
167 |     let n = buffer.get(&mut xs);
168 |     assert!(n == 0);
169 |     for i in 0..10 { assert!(xs[i] == 7); }
170 | }


--------------------------------------------------------------------------------
/src/main.rs:
--------------------------------------------------------------------------------
  1 | // The MIT License (MIT)
  2 | //
  3 | // Copyright (c) 2016 Tim Fennell
  4 | //
  5 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | // of this software and associated documentation files (the "Software"), to deal
  7 | // in the Software without restriction, including without limitation the rights
  8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | // copies of the Software, and to permit persons to whom the Software is
 10 | // furnished to do so, subject to the following conditions:
 11 | //
 12 | // The above copyright notice and this permission notice shall be included in all
 13 | // copies or substantial portions of the Software.
 14 | //
 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | // SOFTWARE.
 22 | 
 23 | //! 
 24 | //! Command line program that can be sandwiched between pipes to effectively increase
 25 | //! the size of the pipe buffer.  Since linux pipes are generally limited to `64k` it
 26 | //! is sometimes useful to provide significantly more buffering between programs in a
 27 | //! pipe in order to smooth out any "lumpiness" in the flow of data.
 28 | //! 
 29 | 
 30 | mod ringbuffer;
 31 | 
 32 | #[macro_use] extern crate clap;
 33 | extern crate regex;
 34 | 
 35 | use std::io;
 36 | use std::io::{Read,Write};
 37 | use std::sync::{Arc, Mutex, Condvar};
 38 | use std::thread;
 39 | use clap::{Arg, App};
 40 | use ringbuffer::RingBuffer;
 41 | use regex::Regex;
 42 | 
 43 | // How big should the thread-local buffers for the reader and writer threads be
 44 | const THREAD_BUFFER_SIZE: usize = 1024 * 64;
 45 | 
 46 | /// Main function that coordinates argument parsing and then delegates to the
 47 | /// `run()` function to do the actual work.
 48 | pub fn main() {
 49 |     let matches =
 50 |         App::new("pipebuffer")
 51 |             .version(crate_version!())
 52 |             .about("A tool to rapidly buffer and re-emit data in unix pipelines.")
 53 |             .arg(Arg::with_name("size")
 54 |                      .short("s").long("size")
 55 |                      .help("The size, in bytes or with k[b]/m[b]/g[b] suffix.")
 56 |                      .default_value("256m"))
 57 |             .get_matches();
 58 | 
 59 |     let buffer_size = match parse_memory(matches.value_of("size").unwrap()) {
 60 |         Some(size) => size,
 61 |         None       => {
 62 |             println!("{}", matches.usage());
 63 |             println!("Error: Argument {} is not a valid size.", matches.value_of("size").unwrap());
 64 |             std::process::exit(1)
 65 |         }
 66 |     };
 67 | 
 68 |     run(buffer_size);
 69 | }
 70 | 
 71 | /// Parses memory unit values from strings. Specifically accepts any value
 72 | /// that is an integer number followed optionally by `k/kb/m/mb/g/gb/p/pb` in
 73 | /// either upper or lower case. If the value can be parsed returns a 
 74 | /// `Some(bytes)`, otherwise returns a None.
 75 | fn parse_memory(s: &str) -> Option<usize> {
 76 |     match Regex::new("^([0-9]+)([kmgp])?b?$").unwrap().captures(&s.to_lowercase()) {
 77 |         None => None,
 78 |         Some(groups) => {
 79 |             let num : Option<usize> = groups.at(1).unwrap().parse().ok();
 80 |             let exp = match groups.at(2) {
 81 |                 Some("k") => 1,
 82 |                 Some("m") => 2,
 83 |                 Some("g") => 3,
 84 |                 Some("p") => 4,
 85 |                 _         => 0
 86 |             };
 87 |             num.map(|n| n * (1024 as usize).pow(exp))
 88 |         }
 89 |     }
 90 | }
 91 | 
 92 | /// Funtion that uses a pair of threads to move data from Stdin to Stdout
 93 | /// with a RungBuffer in the middle.
 94 | fn run(buffer_size: usize) {
 95 |     // The shared ring buffer and the thread handles
 96 |     let ring = Arc::new(Mutex::new(RingBuffer::new(buffer_size)));
 97 |     let cond = Arc::new(Condvar::new());
 98 | 
 99 |     // Setup the writer thread
100 |     let writer_handle = {
101 |         let ring = ring.clone();
102 |         let cond = cond.clone();
103 |         thread::spawn(move || {
104 |             let mut bytes: [u8; THREAD_BUFFER_SIZE] = [0; THREAD_BUFFER_SIZE];
105 |             let mut output = io::stdout();
106 |             'main_loop : loop {
107 |                 let n = {
108 |                     // Lock the buffer, but wait on it if it's empty
109 |                     let mut buffer = ring.lock().unwrap();
110 |                     while buffer.is_empty() {
111 |                         if buffer.is_closed() { break 'main_loop; }
112 |                         else { buffer = cond.wait(buffer).unwrap(); }
113 |                     }
114 | 
115 |                     // Fetch from the buffer, and notify writers if we went from full to not full
116 |                     let was_full = buffer.is_full();
117 |                     let n = buffer.get(&mut bytes);
118 |                     if was_full && n > 0 { cond.notify_one(); }
119 |                     n
120 |                 }; // lock released here
121 | 
122 |                 // Write the data, if any, to stdout
123 |                 let mut start = 0;
124 |                 while start < n { start += output.write(&bytes[start..n]).unwrap(); }
125 |                 output.flush().unwrap();
126 |             }
127 |         })
128 |     };
129 | 
130 |     // Setup this thread as the reader thread
131 |     let mut bytes: [u8; THREAD_BUFFER_SIZE] = [0; THREAD_BUFFER_SIZE];
132 |     let mut input = io::stdin();
133 |     loop {
134 |         let n = input.read(&mut bytes).unwrap();
135 |         let mut buffer = ring.lock().unwrap();
136 |         
137 |         if n == 0 { // input stream is closed
138 |             buffer.close();
139 |             cond.notify_one();
140 |             break; 
141 |         }
142 |         else {
143 |             let mut start = 0;
144 |             while start < n {
145 |                 while buffer.is_full() {
146 |                     buffer = cond.wait(buffer).unwrap();
147 |                 }
148 |                 let was_empty = buffer.is_empty();
149 |                 start += buffer.put(&bytes[start..n]);
150 |                 if was_empty { cond.notify_one(); }
151 |              }
152 |         }
153 |     }
154 |     
155 |     writeln!(&mut io::stderr(), "Attempting to join on the writer.").unwrap();
156 |     writer_handle.join().unwrap();
157 | }
158 | 
159 | ////////////////////////////////////////////////////////////////////////////////
160 | // Tests only beyond this point
161 | ////////////////////////////////////////////////////////////////////////////////
162 | 
163 | #[test]
164 | fn test_parse_mem_bytes() -> () {
165 |     assert!(parse_memory("1") == Some::<usize>(1));
166 |     assert!(parse_memory("1024") == Some::<usize>(1024));
167 |     assert!(parse_memory("1000000000") == Some::<usize>(1000000000));
168 |     assert!(parse_memory("10000000000000000000000000000") == None);
169 | }
170 | 
171 | #[test]
172 | fn test_parse_mem_suffixed() -> () {
173 |     assert!(parse_memory("1k")      == Some::<usize>(1024));
174 |     assert!(parse_memory("99k")     == Some::<usize>(99 * 1024));
175 |     assert!(parse_memory("99kb")    == Some::<usize>(99 * 1024));
176 |     assert!(parse_memory("99K")     == Some::<usize>(99 * 1024));
177 |     assert!(parse_memory("99KB")    == Some::<usize>(99 * 1024));
178 | 
179 |     assert!(parse_memory("1m")      == Some::<usize>(1024*1024));
180 |     assert!(parse_memory("10m")     == Some::<usize>(10*1024*1024));
181 |     assert!(parse_memory("101m")    == Some::<usize>(101*1024*1024));
182 |     assert!(parse_memory("1024m")   == Some::<usize>(1024*1024*1024));
183 |     
184 |     assert!(parse_memory("6g")      == Some::<usize>(6*1024*1024*1024));
185 |     assert!(parse_memory("60g")     == Some::<usize>(60*1024*1024*1024));
186 |     
187 |     assert!(parse_memory("1p")     == Some::<usize>(1024*1024*1024*1024));    
188 | }
189 | 
190 | #[test]
191 | fn test_parse_mem_fails() -> () {
192 |     assert!(parse_memory("") == None);
193 |     assert!(parse_memory("k") == None);
194 |     assert!(parse_memory("kb") == None);
195 |     assert!(parse_memory("foo") == None);
196 |     assert!(parse_memory("not1024m") == None);
197 |     assert!(parse_memory("-12g") == None);
198 |     assert!(parse_memory("12x") == None);
199 |     assert!(parse_memory("7y") == None);
200 |     assert!(parse_memory("1024x1024") == None);
201 |     assert!(parse_memory("1024mi") == None);
202 | }
203 | 


--------------------------------------------------------------------------------