├── .travis.yml ├── Cargo.toml ├── .gitignore ├── LICENSE ├── README.md └── src ├── ringbuffer.rs └── main.rs /.travis.yml: -------------------------------------------------------------------------------- 1 | language: rust 2 | rust: beta 3 | sudo: false 4 | 5 | script: 6 | - cargo test -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pipebuffer" 3 | version = "0.0.1" 4 | authors = [ "Tim Fennell " ] 5 | 6 | [dependencies] 7 | clap = "2" 8 | regex = "0.1" 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | 7 | # Executables 8 | *.exe 9 | 10 | # Generated by Cargo 11 | /target/ 12 | 13 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 14 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock 15 | Cargo.lock 16 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Tim Fennell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pipebuffer 2 | [![Build Status](https://travis-ci.org/tfenne/pipebuffer.svg?branch=master)](https://travis-ci.org/tfenne/pipebuffer) 3 | [![Language](http://img.shields.io/badge/language-rust-blue.svg)](http://www.rust-lang.org/) 4 | [![License](https://img.shields.io/badge/license-MIT-A31F34.svg)](LICENSE) 5 | 6 | A simple command line program for buffering `stdin`/`stdout` between piped processes when the operating system's pipe buffer is insufficient. Modern linux limits the buffer between pipes in a pipeline to `64kb`, which can cause bottlenecks when working with processes with "lumpy" IO profiles. 7 | 8 | `pipebuffer` is particularly useful when dealing with large volumes of data and a mix of processes that work in "chunks" of data and more stream-oriented processes. 9 | 10 | ## Usage 11 | 12 | To use, you just replace: 13 | ```bash 14 | foo | bar 15 | ``` 16 | with 17 | ```bash 18 | foo | pipebuffer | bar 19 | ``` 20 | or 21 | ```bash 22 | foo | pipebuffer --size=512m | bar 23 | ``` 24 | 25 | And, of course, you can use many `pipebuffer`s together: 26 | ```bash 27 | foo | pipebuffer --size=128m | bar | pipebuffer --size=64m | splat | pipebuffer --size=1g | whee 28 | ``` 29 | 30 | ## License 31 | 32 | `pipebuffer` is open source software released under the [MIT License](LICENSE). 33 | 34 | ## Building 35 | 36 | `pipebuffer` is written in [Rust](https://www.rust-lang.org/) and currently requires Rust 1.7 (beta) or greater. You'll need Rust installed. You can [download here](https://www.rust-lang.org/downloads.html), or run the first command below: 37 | 38 | ```bash 39 | // Optionally install Rust: if 1.7 is stable, which it should be after ~3rd March 2016, omit '--channel=beta' 40 | curl -sSf https://static.rust-lang.org/rustup.sh | sh -s -- --channel=beta 41 | 42 | // Clone the repo 43 | git clone https://github.com/tfenne/pipebuffer.git 44 | 45 | // Build and run the tests 46 | pushd pipebuffer && (cargo test; cargo build --release); popd 47 | 48 | // Produces executable at ./pipebuffer/target/release/pipebuffer 49 | ``` 50 | -------------------------------------------------------------------------------- /src/ringbuffer.rs: -------------------------------------------------------------------------------- 1 | // The MIT License (MIT) 2 | // 3 | // Copyright (c) 2016 Tim Fennell 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | //! A module that provides a generic ring buffer. 24 | 25 | use std::cmp; 26 | use std::clone::Clone; 27 | 28 | /// Implementation of a non-blocking, fixed size ring-buffer. 29 | /// Allocates enough space on the heap to store `size` items. Provides 30 | /// non-blocking methods to `put` into the buffer from a slice and `get` 31 | /// from the buffer into a slice. 32 | /// 33 | /// When full (as reported by `is_full()`) the buffer will accept calls to 34 | /// `put`, but will copy nothing into the target slice and will report that 35 | /// zero items were put. Similarly, when `empty`, calls to `get` will return 36 | /// immediately and report that zero items were retrieved. 37 | /// 38 | /// After calls to `close()`, further attempts to put into the buffer will 39 | /// cause panics, but `gets()` continue to be allowed in order to let the 40 | /// buffer be drained. 41 | pub struct RingBuffer { 42 | capacity : usize, 43 | buffer : Vec, 44 | write_pos : usize, 45 | available_to_write: usize, 46 | read_pos : usize, 47 | available_to_read : usize, 48 | closed : bool 49 | } 50 | 51 | impl RingBuffer { 52 | /// Constructs a new RingBuffer with capacity `size`. 53 | pub fn new (size: usize) -> RingBuffer { 54 | let mut buf = RingBuffer { 55 | capacity : size, 56 | buffer : Vec::with_capacity(size), 57 | write_pos : 0, 58 | available_to_write : size, 59 | read_pos : 0, 60 | available_to_read : 0, 61 | closed : false 62 | }; 63 | 64 | // Push 'size' up to capacity so we can use slices without having to fill first. 65 | unsafe { buf.buffer.set_len(size); } 66 | buf 67 | } 68 | 69 | /// Attempts to `put` items from the slice into the buffer. The only guarantees 70 | /// made by this method are: 71 | /// 72 | /// 1. That if the buffer is not full, one or more items will be put 73 | /// 2. That the number of items put will be reported correctly by the return 74 | /// 75 | /// Specifically, `put` does not guarantee that all items from the `input` slice 76 | /// will be put, even in the case where there _is_ capacity for all the `input` items. 77 | /// For this reason, `put` should generally be called in a loop until all items have 78 | /// been put. 79 | /// 80 | /// # Return 81 | /// The number of items, `>= 0`, that were put into the buffer. 82 | 83 | /// # Panics 84 | /// Will panic if invoked on a closed buffer. 85 | pub fn put(&mut self, input: &[T]) -> usize { 86 | if self.closed { panic!("Cannot write to closed buffer."); } 87 | if self.available_to_write == 0 { return 0; } 88 | 89 | let distance_to_end = self.capacity - self.write_pos; 90 | let available = cmp::min(distance_to_end, self.available_to_write); 91 | let length = cmp::min(available, input.len()); 92 | let target_slice = &mut self.buffer[self.write_pos..self.write_pos+length]; 93 | let source_slice = &input[0..length]; 94 | target_slice.clone_from_slice(source_slice); 95 | self.available_to_write -= length; 96 | self.available_to_read += length; 97 | self.write_pos = (self.write_pos + length) % self.capacity; 98 | length 99 | } 100 | 101 | /// Attempts to `get` items from the buffer and put them into the slice. 102 | /// The only guarantees made by this method are: 103 | /// 104 | /// 1. That if the buffer is not empty, one or more items will be fetched 105 | /// 2. That the number of items fetched will be reported correctly by the return 106 | /// 107 | /// Specifically, `get` does not guarantee that the `output` slice will be filled, 108 | /// even in the case where there _is_ enough in the buffer to do so. 109 | /// 110 | /// # Return 111 | /// The number of items, `>= 0`, that were fetched from the buffer. 112 | pub fn get(&mut self, output: &mut [T]) -> usize { 113 | let distance_to_end = self.capacity - self.read_pos; 114 | let available = cmp::min(distance_to_end, self.available_to_read); 115 | let length = cmp::min(available, output.len()); 116 | let source_slice = & self.buffer[self.read_pos..self.read_pos+length]; 117 | let target_slice = &mut output[0..length]; 118 | target_slice.clone_from_slice(source_slice); 119 | self.available_to_read -= length; 120 | self.available_to_write += length; 121 | self.read_pos = (self.read_pos + length) % self.capacity; 122 | length 123 | } 124 | 125 | /// Returns true if the buffer is currently empty, implying calls to `get()` will 126 | /// yield zero items. 127 | pub fn is_empty(&self) -> bool { self.available_to_read == 0 } 128 | 129 | /// Returns true if the buffer is currently full, implying calls to `put()` will 130 | /// consume zero items. 131 | pub fn is_full(&self) -> bool { self.available_to_write == 0 } 132 | 133 | /// Closes the buffer such that future calls to `put()` will panic. 134 | pub fn close(&mut self) -> () { self.closed = true; } 135 | 136 | /// Returns true if the buffer is closed, and false otherwise. 137 | pub fn is_closed(&self) -> bool { self.closed } 138 | } 139 | 140 | #[test] 141 | fn test_basic_read_write() { 142 | let mut buffer : RingBuffer = RingBuffer::new(100); 143 | let xs: [u8; 10] = [0,1,2,3,4,5,6,7,8,9]; 144 | let mut ys: [u8; 10] = [0; 10]; 145 | for _ in 0..100 { 146 | buffer.put(&xs); 147 | buffer.get(&mut ys); 148 | for i in 0..10 { 149 | assert!(xs[i] == ys[i]); 150 | } 151 | } 152 | } 153 | 154 | #[test] 155 | fn test_write_on_full_buffer() { 156 | let mut buffer : RingBuffer = RingBuffer::new(10); 157 | let xs: [u8; 10] = [0,1,2,3,4,5,6,7,8,9]; 158 | buffer.put(&xs); 159 | let n = buffer.put(&xs); 160 | assert!(n == 0); 161 | } 162 | 163 | #[test] 164 | fn test_read_on_empty_buffer() { 165 | let mut buffer : RingBuffer = RingBuffer::new(10); 166 | let mut xs: [u8; 10] = [7; 10]; 167 | let n = buffer.get(&mut xs); 168 | assert!(n == 0); 169 | for i in 0..10 { assert!(xs[i] == 7); } 170 | } -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // The MIT License (MIT) 2 | // 3 | // Copyright (c) 2016 Tim Fennell 4 | // 5 | // Permission is hereby granted, free of charge, to any person obtaining a copy 6 | // of this software and associated documentation files (the "Software"), to deal 7 | // in the Software without restriction, including without limitation the rights 8 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | // copies of the Software, and to permit persons to whom the Software is 10 | // furnished to do so, subject to the following conditions: 11 | // 12 | // The above copyright notice and this permission notice shall be included in all 13 | // copies or substantial portions of the Software. 14 | // 15 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | // SOFTWARE. 22 | 23 | //! 24 | //! Command line program that can be sandwiched between pipes to effectively increase 25 | //! the size of the pipe buffer. Since linux pipes are generally limited to `64k` it 26 | //! is sometimes useful to provide significantly more buffering between programs in a 27 | //! pipe in order to smooth out any "lumpiness" in the flow of data. 28 | //! 29 | 30 | mod ringbuffer; 31 | 32 | #[macro_use] extern crate clap; 33 | extern crate regex; 34 | 35 | use std::io; 36 | use std::io::{Read,Write}; 37 | use std::sync::{Arc, Mutex, Condvar}; 38 | use std::thread; 39 | use clap::{Arg, App}; 40 | use ringbuffer::RingBuffer; 41 | use regex::Regex; 42 | 43 | // How big should the thread-local buffers for the reader and writer threads be 44 | const THREAD_BUFFER_SIZE: usize = 1024 * 64; 45 | 46 | /// Main function that coordinates argument parsing and then delegates to the 47 | /// `run()` function to do the actual work. 48 | pub fn main() { 49 | let matches = 50 | App::new("pipebuffer") 51 | .version(crate_version!()) 52 | .about("A tool to rapidly buffer and re-emit data in unix pipelines.") 53 | .arg(Arg::with_name("size") 54 | .short("s").long("size") 55 | .help("The size, in bytes or with k[b]/m[b]/g[b] suffix.") 56 | .default_value("256m")) 57 | .get_matches(); 58 | 59 | let buffer_size = match parse_memory(matches.value_of("size").unwrap()) { 60 | Some(size) => size, 61 | None => { 62 | println!("{}", matches.usage()); 63 | println!("Error: Argument {} is not a valid size.", matches.value_of("size").unwrap()); 64 | std::process::exit(1) 65 | } 66 | }; 67 | 68 | run(buffer_size); 69 | } 70 | 71 | /// Parses memory unit values from strings. Specifically accepts any value 72 | /// that is an integer number followed optionally by `k/kb/m/mb/g/gb/p/pb` in 73 | /// either upper or lower case. If the value can be parsed returns a 74 | /// `Some(bytes)`, otherwise returns a None. 75 | fn parse_memory(s: &str) -> Option { 76 | match Regex::new("^([0-9]+)([kmgp])?b?$").unwrap().captures(&s.to_lowercase()) { 77 | None => None, 78 | Some(groups) => { 79 | let num : Option = groups.at(1).unwrap().parse().ok(); 80 | let exp = match groups.at(2) { 81 | Some("k") => 1, 82 | Some("m") => 2, 83 | Some("g") => 3, 84 | Some("p") => 4, 85 | _ => 0 86 | }; 87 | num.map(|n| n * (1024 as usize).pow(exp)) 88 | } 89 | } 90 | } 91 | 92 | /// Funtion that uses a pair of threads to move data from Stdin to Stdout 93 | /// with a RungBuffer in the middle. 94 | fn run(buffer_size: usize) { 95 | // The shared ring buffer and the thread handles 96 | let ring = Arc::new(Mutex::new(RingBuffer::new(buffer_size))); 97 | let cond = Arc::new(Condvar::new()); 98 | 99 | // Setup the writer thread 100 | let writer_handle = { 101 | let ring = ring.clone(); 102 | let cond = cond.clone(); 103 | thread::spawn(move || { 104 | let mut bytes: [u8; THREAD_BUFFER_SIZE] = [0; THREAD_BUFFER_SIZE]; 105 | let mut output = io::stdout(); 106 | 'main_loop : loop { 107 | let n = { 108 | // Lock the buffer, but wait on it if it's empty 109 | let mut buffer = ring.lock().unwrap(); 110 | while buffer.is_empty() { 111 | if buffer.is_closed() { break 'main_loop; } 112 | else { buffer = cond.wait(buffer).unwrap(); } 113 | } 114 | 115 | // Fetch from the buffer, and notify writers if we went from full to not full 116 | let was_full = buffer.is_full(); 117 | let n = buffer.get(&mut bytes); 118 | if was_full && n > 0 { cond.notify_one(); } 119 | n 120 | }; // lock released here 121 | 122 | // Write the data, if any, to stdout 123 | let mut start = 0; 124 | while start < n { start += output.write(&bytes[start..n]).unwrap(); } 125 | output.flush().unwrap(); 126 | } 127 | }) 128 | }; 129 | 130 | // Setup this thread as the reader thread 131 | let mut bytes: [u8; THREAD_BUFFER_SIZE] = [0; THREAD_BUFFER_SIZE]; 132 | let mut input = io::stdin(); 133 | loop { 134 | let n = input.read(&mut bytes).unwrap(); 135 | let mut buffer = ring.lock().unwrap(); 136 | 137 | if n == 0 { // input stream is closed 138 | buffer.close(); 139 | cond.notify_one(); 140 | break; 141 | } 142 | else { 143 | let mut start = 0; 144 | while start < n { 145 | while buffer.is_full() { 146 | buffer = cond.wait(buffer).unwrap(); 147 | } 148 | let was_empty = buffer.is_empty(); 149 | start += buffer.put(&bytes[start..n]); 150 | if was_empty { cond.notify_one(); } 151 | } 152 | } 153 | } 154 | 155 | writeln!(&mut io::stderr(), "Attempting to join on the writer.").unwrap(); 156 | writer_handle.join().unwrap(); 157 | } 158 | 159 | //////////////////////////////////////////////////////////////////////////////// 160 | // Tests only beyond this point 161 | //////////////////////////////////////////////////////////////////////////////// 162 | 163 | #[test] 164 | fn test_parse_mem_bytes() -> () { 165 | assert!(parse_memory("1") == Some::(1)); 166 | assert!(parse_memory("1024") == Some::(1024)); 167 | assert!(parse_memory("1000000000") == Some::(1000000000)); 168 | assert!(parse_memory("10000000000000000000000000000") == None); 169 | } 170 | 171 | #[test] 172 | fn test_parse_mem_suffixed() -> () { 173 | assert!(parse_memory("1k") == Some::(1024)); 174 | assert!(parse_memory("99k") == Some::(99 * 1024)); 175 | assert!(parse_memory("99kb") == Some::(99 * 1024)); 176 | assert!(parse_memory("99K") == Some::(99 * 1024)); 177 | assert!(parse_memory("99KB") == Some::(99 * 1024)); 178 | 179 | assert!(parse_memory("1m") == Some::(1024*1024)); 180 | assert!(parse_memory("10m") == Some::(10*1024*1024)); 181 | assert!(parse_memory("101m") == Some::(101*1024*1024)); 182 | assert!(parse_memory("1024m") == Some::(1024*1024*1024)); 183 | 184 | assert!(parse_memory("6g") == Some::(6*1024*1024*1024)); 185 | assert!(parse_memory("60g") == Some::(60*1024*1024*1024)); 186 | 187 | assert!(parse_memory("1p") == Some::(1024*1024*1024*1024)); 188 | } 189 | 190 | #[test] 191 | fn test_parse_mem_fails() -> () { 192 | assert!(parse_memory("") == None); 193 | assert!(parse_memory("k") == None); 194 | assert!(parse_memory("kb") == None); 195 | assert!(parse_memory("foo") == None); 196 | assert!(parse_memory("not1024m") == None); 197 | assert!(parse_memory("-12g") == None); 198 | assert!(parse_memory("12x") == None); 199 | assert!(parse_memory("7y") == None); 200 | assert!(parse_memory("1024x1024") == None); 201 | assert!(parse_memory("1024mi") == None); 202 | } 203 | --------------------------------------------------------------------------------