├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── async_std_http.rs ├── sync_http.rs └── tokio_http.rs └── src ├── async_bufreader.rs ├── bufreader.rs └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "nom-bufreader" 3 | version = "0.2.0" 4 | edition = "2018" 5 | authors = [ "contact@geoffroycouprie.com" ] 6 | description = "BufReader adapter for nom parsers" 7 | license = "MIT" 8 | keywords = ["parser", "parser-combinators", "parsing", "streaming", "async"] 9 | categories = ["parsing"] 10 | repository = "https://github.com/rust-bakery/nom-bufreader" 11 | readme = "README.md" 12 | documentation = "https://docs.rs/nom-bufreader" 13 | 14 | [dependencies] 15 | nom = "7.0.0" 16 | async-trait = { version = "0.1.51", optional = true } 17 | futures = { version = "0.3.16", optional = true } 18 | pin-project-lite = { version = "0.2.7", optional = true } 19 | 20 | [dev-dependencies] 21 | async-std = { version = "1.9.0", features = ["attributes"] } 22 | tokio = { version = "1.9.0", features = ["full"] } 23 | tokio-util = { version = "0.6.7", features = ["compat"] } 24 | 25 | [features] 26 | default = ["async"] 27 | async = ["futures", "async-trait", "pin-project-lite"] 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021 Geoffroy Couprie 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # nom-bufreader, adapters for BufReader around nom 2 | 3 | **/!\Work in progress, if you put it in production, you fix it/!\** 4 | 5 | With this crate, you can assemble a nom parser with a `BufReader` alternative, synchronous or asynchronous. 6 | Due to incompatible buffering strategies, [std::io::BufReader](https://doc.rust-lang.org/stable/std/io/struct.BufReader.html) 7 | and [futures::io::BufReader](https://docs.rs/futures/0.3.16/futures/io/struct.BufReader.html) 8 | cannot be used directly. This crate proovide compatible forks instead, in the 9 | `bufreader` and `async_bufreader` modules. 10 | 11 | It will hide for you the [Incomplete](https://docs.rs/nom/7.0.0/nom/enum.Err.html#variant.Incomplete) handling in nom for streaming parsers, retrying and refilling buffers automatically. 12 | 13 | ## Examples 14 | 15 | ### sync 16 | 17 | ```rust 18 | use nom_bufreader::bufreader::BufReader; 19 | use nom_bufreader::{Error, Parse}; 20 | use std::{net::TcpListener, str::from_utf8}; 21 | 22 | fn main() -> Result<(), Error<()>> { 23 | let listener = TcpListener::bind("127.0.0.1:8080")?; 24 | let mut i = BufReader::new(listener.incoming().next().unwrap()?); 25 | 26 | // method, space and path are nom parsers 27 | let m = i.parse(method)?; 28 | let _ = i.parse(space)?; 29 | let p = i.parse(path)?; 30 | println!("got method {}, path {}", m, p); 31 | Ok(()) 32 | } 33 | ``` 34 | 35 | ### async 36 | 37 | #### tokio 38 | 39 | ```rust 40 | use nom_bufreader::async_bufreader::BufReader; 41 | use nom_bufreader::{AsyncParse, Error}; 42 | use std::str::from_utf8; 43 | use tokio_util::compat::TokioAsyncReadCompatExt; 44 | use tokio::net::TcpListener; 45 | 46 | #[tokio::main] 47 | async fn main() -> Result<(), Error<()>> { 48 | let listener = TcpListener::bind("127.0.0.1:8080").await?; 49 | let mut i = BufReader::new(listener.accept().await?.0.compat()); 50 | 51 | let m = i.parse(method).await?; 52 | let _ = i.parse(space).await?; 53 | let p = i.parse(path).await?; 54 | println!("got method {}, path {}", m, p); 55 | Ok(()) 56 | } 57 | ``` 58 | 59 | #### async-std 60 | 61 | ```rust 62 | use nom_bufreader::async_bufreader::BufReader; 63 | use nom_bufreader::{AsyncParse, Error}; 64 | use std::str::from_utf8; 65 | use async_std::net::TcpListener; 66 | 67 | #[async_std::main] 68 | async fn main() -> Result<(), Error<()>> { 69 | let listener = TcpListener::bind("127.0.0.1:8080").await?; 70 | let mut i = BufReader::new(listener.accept().await?.0); 71 | 72 | let m = i.parse(method).await?; 73 | let _ = i.parse(space).await?; 74 | let p = i.parse(path).await?; 75 | println!("got method {}, path {}", m, p); 76 | Ok(()) 77 | } 78 | ``` 79 | -------------------------------------------------------------------------------- /examples/async_std_http.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | branch::alt, 3 | bytes::streaming::{tag, take_until}, 4 | character::streaming::space0, 5 | combinator::map_res, 6 | IResult, 7 | }; 8 | use nom_bufreader::async_bufreader::BufReader; 9 | use nom_bufreader::{AsyncParse, Error}; 10 | use std::str::from_utf8; 11 | 12 | fn method(i: &[u8]) -> IResult<&[u8], String, ()> { 13 | map_res(alt((tag("GET"), tag("POST"), tag("HEAD"))), |s| { 14 | from_utf8(s).map(|s| s.to_string()) 15 | })(i) 16 | } 17 | 18 | fn path(i: &[u8]) -> IResult<&[u8], String, ()> { 19 | map_res(take_until(" "), |s| from_utf8(s).map(|s| s.to_string()))(i) 20 | } 21 | 22 | fn space(i: &[u8]) -> IResult<&[u8], (), ()> { 23 | let (i, _) = space0(i)?; 24 | Ok((i, ())) 25 | } 26 | 27 | #[async_std::main] 28 | async fn main() -> Result<(), Error<()>> { 29 | let listener = async_std::net::TcpListener::bind("127.0.0.1:8080").await?; 30 | let mut i = BufReader::new(listener.accept().await?.0); 31 | 32 | let m = i.parse(method).await?; 33 | let _ = i.parse(space).await?; 34 | let p = i.parse(path).await?; 35 | println!("got method {}, path {}", m, p); 36 | Ok(()) 37 | } 38 | -------------------------------------------------------------------------------- /examples/sync_http.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | branch::alt, 3 | bytes::streaming::{tag, take_until}, 4 | character::streaming::space0, 5 | combinator::map_res, 6 | IResult, 7 | }; 8 | use nom_bufreader::bufreader::BufReader; 9 | use nom_bufreader::{Error, Parse}; 10 | use std::{net::TcpListener, str::from_utf8}; 11 | 12 | fn method(i: &[u8]) -> IResult<&[u8], String, ()> { 13 | map_res(alt((tag("GET"), tag("POST"), tag("HEAD"))), |s| { 14 | from_utf8(s).map(|s| s.to_string()) 15 | })(i) 16 | } 17 | 18 | fn path(i: &[u8]) -> IResult<&[u8], String, ()> { 19 | map_res(take_until(" "), |s| from_utf8(s).map(|s| s.to_string()))(i) 20 | } 21 | 22 | fn space(i: &[u8]) -> IResult<&[u8], (), ()> { 23 | let (i, _) = space0(i)?; 24 | Ok((i, ())) 25 | } 26 | 27 | fn main() -> Result<(), Error<()>> { 28 | let listener = TcpListener::bind("127.0.0.1:8080")?; 29 | let mut i = BufReader::new(listener.incoming().next().unwrap()?); 30 | 31 | let m = i.parse(method)?; 32 | let _ = i.parse(space)?; 33 | let p = i.parse(path)?; 34 | println!("got method {}, path {}", m, p); 35 | Ok(()) 36 | } 37 | -------------------------------------------------------------------------------- /examples/tokio_http.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | branch::alt, 3 | bytes::streaming::{tag, take_until}, 4 | character::streaming::space0, 5 | combinator::map_res, 6 | IResult, 7 | }; 8 | use nom_bufreader::async_bufreader::BufReader; 9 | use nom_bufreader::{AsyncParse, Error}; 10 | use std::str::from_utf8; 11 | use tokio_util::compat::TokioAsyncReadCompatExt; 12 | 13 | fn method(i: &[u8]) -> IResult<&[u8], String, ()> { 14 | map_res(alt((tag("GET"), tag("POST"), tag("HEAD"))), |s| { 15 | from_utf8(s).map(|s| s.to_string()) 16 | })(i) 17 | } 18 | 19 | fn path(i: &[u8]) -> IResult<&[u8], String, ()> { 20 | map_res(take_until(" "), |s| from_utf8(s).map(|s| s.to_string()))(i) 21 | } 22 | 23 | fn space(i: &[u8]) -> IResult<&[u8], (), ()> { 24 | let (i, _) = space0(i)?; 25 | Ok((i, ())) 26 | } 27 | 28 | #[tokio::main] 29 | async fn main() -> Result<(), Error<()>> { 30 | let listener = tokio::net::TcpListener::bind("127.0.0.1:8080").await?; 31 | let mut i = BufReader::new(listener.accept().await?.0.compat()); 32 | 33 | let m = i.parse(method).await?; 34 | let _ = i.parse(space).await?; 35 | let p = i.parse(path).await?; 36 | println!("got method {}, path {}", m, p); 37 | Ok(()) 38 | } 39 | -------------------------------------------------------------------------------- /src/async_bufreader.rs: -------------------------------------------------------------------------------- 1 | use super::bufreader::DEFAULT_BUF_SIZE; 2 | use futures::io::{AsyncBufRead, AsyncRead, AsyncSeek, AsyncWrite, IoSliceMut, SeekFrom}; 3 | use futures::ready; 4 | use futures::task::{Context, Poll}; 5 | use pin_project_lite::pin_project; 6 | use std::io::{self, Read}; 7 | use std::pin::Pin; 8 | use std::{cmp, fmt}; 9 | 10 | pin_project! { 11 | /// The `BufReader` struct adds buffering to any reader. 12 | /// 13 | /// It can be excessively inefficient to work directly with a [`AsyncRead`] 14 | /// instance. A `BufReader` performs large, infrequent reads on the underlying 15 | /// [`AsyncRead`] and maintains an in-memory buffer of the results. 16 | /// 17 | /// `BufReader` can improve the speed of programs that make *small* and 18 | /// *repeated* read calls to the same file or network socket. It does not 19 | /// help when reading very large amounts at once, or reading just one or a few 20 | /// times. It also provides no advantage when reading from a source that is 21 | /// already in memory, like a `Vec`. 22 | /// 23 | /// When the `BufReader` is dropped, the contents of its buffer will be 24 | /// discarded. Creating multiple instances of a `BufReader` on the same 25 | /// stream can cause data loss. 26 | /// 27 | /// **Note: this is a fork from `std::io::BufReader` that reads more data in 28 | /// `fill_buf` even if there is already some data in the buffer** 29 | /// 30 | /// [`AsyncRead`]: futures_io::AsyncRead 31 | /// 32 | // TODO: Examples 33 | pub struct BufReader { 34 | #[pin] 35 | inner: R, 36 | buffer: Vec, 37 | pos: usize, 38 | cap: usize, 39 | } 40 | } 41 | 42 | impl BufReader { 43 | /// Creates a new `BufReader` with a default buffer capacity. The default is currently 8 KB, 44 | /// but may change in the future. 45 | pub fn new(inner: R) -> Self { 46 | Self::with_capacity(DEFAULT_BUF_SIZE, inner) 47 | } 48 | 49 | /// Creates a new `BufReader` with the specified buffer capacity. 50 | pub fn with_capacity(capacity: usize, inner: R) -> Self { 51 | let buffer = vec![0; capacity]; 52 | Self { 53 | inner, 54 | buffer, 55 | pos: 0, 56 | cap: 0, 57 | } 58 | } 59 | 60 | /// Acquires a reference to the underlying sink or stream that this combinator is 61 | /// pulling from. 62 | pub fn get_ref(&self) -> &R { 63 | &self.inner 64 | } 65 | 66 | /// Acquires a mutable reference to the underlying sink or stream that this 67 | /// combinator is pulling from. 68 | /// 69 | /// Note that care must be taken to avoid tampering with the state of the 70 | /// sink or stream which may otherwise confuse this combinator. 71 | pub fn get_mut(&mut self) -> &mut R { 72 | &mut self.inner 73 | } 74 | 75 | /// Acquires a pinned mutable reference to the underlying sink or stream that this 76 | /// combinator is pulling from. 77 | /// 78 | /// Note that care must be taken to avoid tampering with the state of the 79 | /// sink or stream which may otherwise confuse this combinator. 80 | pub fn get_pin_mut(self: core::pin::Pin<&mut Self>) -> core::pin::Pin<&mut R> { 81 | self.project().inner 82 | } 83 | 84 | /// Consumes this combinator, returning the underlying sink or stream. 85 | /// 86 | /// Note that this may discard intermediate state of this combinator, so 87 | /// care should be taken to avoid losing resources when this is called. 88 | pub fn into_inner(self) -> R { 89 | self.inner 90 | } 91 | 92 | /// Returns a reference to the internally buffered data. 93 | /// 94 | /// Unlike `fill_buf`, this will not attempt to fill the buffer if it is empty. 95 | pub fn buffer(&self) -> &[u8] { 96 | &self.buffer[self.pos..self.cap] 97 | } 98 | 99 | /// Invalidates all data in the internal buffer. 100 | #[inline] 101 | fn discard_buffer(self: Pin<&mut Self>) { 102 | let this = self.project(); 103 | *this.pos = 0; 104 | *this.cap = 0; 105 | } 106 | } 107 | 108 | impl AsyncRead for BufReader { 109 | fn poll_read( 110 | mut self: Pin<&mut Self>, 111 | cx: &mut Context<'_>, 112 | buf: &mut [u8], 113 | ) -> Poll> { 114 | // If we don't have any buffered data and we're doing a massive read 115 | // (larger than our internal buffer), bypass our internal buffer 116 | // entirely. 117 | if self.pos == self.cap && buf.len() >= self.buffer.len() { 118 | let res = ready!(self.as_mut().project().inner.poll_read(cx, buf)); 119 | self.discard_buffer(); 120 | return Poll::Ready(res); 121 | } 122 | let mut rem = ready!(self.as_mut().poll_fill_buf(cx))?; 123 | let nread = rem.read(buf)?; 124 | self.consume(nread); 125 | Poll::Ready(Ok(nread)) 126 | } 127 | 128 | fn poll_read_vectored( 129 | mut self: Pin<&mut Self>, 130 | cx: &mut Context<'_>, 131 | bufs: &mut [IoSliceMut<'_>], 132 | ) -> Poll> { 133 | let total_len = bufs.iter().map(|b| b.len()).sum::(); 134 | if self.pos == self.cap && total_len >= self.buffer.len() { 135 | let res = ready!(self.as_mut().project().inner.poll_read_vectored(cx, bufs)); 136 | self.discard_buffer(); 137 | return Poll::Ready(res); 138 | } 139 | let mut rem = ready!(self.as_mut().poll_fill_buf(cx))?; 140 | let nread = rem.read_vectored(bufs)?; 141 | self.consume(nread); 142 | Poll::Ready(Ok(nread)) 143 | } 144 | 145 | // we can't skip unconditionally because of the large buffer case in read. 146 | #[cfg(feature = "read-initializer")] 147 | unsafe fn initializer(&self) -> Initializer { 148 | self.inner.initializer() 149 | } 150 | } 151 | 152 | impl AsyncBufRead for BufReader { 153 | fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { 154 | let this = self.project(); 155 | 156 | if *this.cap == this.buffer.len() { 157 | if *this.pos == 0 { 158 | return Poll::Ready(Err(io::Error::new( 159 | io::ErrorKind::Interrupted, 160 | "buffer completely filled", 161 | ))); 162 | } else { 163 | // reset buffer position 164 | if *this.cap - *this.pos > 0 { 165 | for i in 0..(*this.cap - *this.pos) { 166 | this.buffer[i] = this.buffer[*this.pos + i]; 167 | } 168 | } 169 | *this.cap = *this.cap - *this.pos; 170 | *this.pos = 0; 171 | } 172 | } 173 | 174 | let read = ready!(this.inner.poll_read(cx, this.buffer))?; 175 | *this.cap += read; 176 | 177 | Poll::Ready(Ok(&this.buffer[*this.pos..*this.cap])) 178 | } 179 | 180 | fn consume(self: Pin<&mut Self>, amt: usize) { 181 | *self.project().pos = cmp::min(self.pos + amt, self.cap); 182 | } 183 | } 184 | 185 | impl AsyncWrite for BufReader { 186 | fn poll_write( 187 | self: core::pin::Pin<&mut Self>, 188 | cx: &mut core::task::Context<'_>, 189 | buf: &[u8], 190 | ) -> core::task::Poll> { 191 | self.project().inner.poll_write(cx, buf) 192 | } 193 | fn poll_write_vectored( 194 | self: core::pin::Pin<&mut Self>, 195 | cx: &mut core::task::Context<'_>, 196 | bufs: &[std::io::IoSlice<'_>], 197 | ) -> core::task::Poll> { 198 | self.project().inner.poll_write_vectored(cx, bufs) 199 | } 200 | fn poll_flush( 201 | self: core::pin::Pin<&mut Self>, 202 | cx: &mut core::task::Context<'_>, 203 | ) -> core::task::Poll> { 204 | self.project().inner.poll_flush(cx) 205 | } 206 | fn poll_close( 207 | self: core::pin::Pin<&mut Self>, 208 | cx: &mut core::task::Context<'_>, 209 | ) -> core::task::Poll> { 210 | self.project().inner.poll_close(cx) 211 | } 212 | } 213 | 214 | impl fmt::Debug for BufReader { 215 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 216 | f.debug_struct("BufReader") 217 | .field("reader", &self.inner) 218 | .field( 219 | "buffer", 220 | &format_args!("{}/{}", self.cap - self.pos, self.buffer.len()), 221 | ) 222 | .finish() 223 | } 224 | } 225 | 226 | impl AsyncSeek for BufReader { 227 | /// Seek to an offset, in bytes, in the underlying reader. 228 | /// 229 | /// The position used for seeking with `SeekFrom::Current(_)` is the 230 | /// position the underlying reader would be at if the `BufReader` had no 231 | /// internal buffer. 232 | /// 233 | /// Seeking always discards the internal buffer, even if the seek position 234 | /// would otherwise fall within it. This guarantees that calling 235 | /// `.into_inner()` immediately after a seek yields the underlying reader 236 | /// at the same position. 237 | /// 238 | /// See [`AsyncSeek`](futures_io::AsyncSeek) for more details. 239 | /// 240 | /// Note: In the edge case where you're seeking with `SeekFrom::Current(n)` 241 | /// where `n` minus the internal buffer length overflows an `i64`, two 242 | /// seeks will be performed instead of one. If the second seek returns 243 | /// `Err`, the underlying reader will be left at the same position it would 244 | /// have if you called `seek` with `SeekFrom::Current(0)`. 245 | fn poll_seek( 246 | mut self: Pin<&mut Self>, 247 | cx: &mut Context<'_>, 248 | pos: SeekFrom, 249 | ) -> Poll> { 250 | let result: u64; 251 | if let SeekFrom::Current(n) = pos { 252 | let remainder = (self.cap - self.pos) as i64; 253 | // it should be safe to assume that remainder fits within an i64 as the alternative 254 | // means we managed to allocate 8 exbibytes and that's absurd. 255 | // But it's not out of the realm of possibility for some weird underlying reader to 256 | // support seeking by i64::min_value() so we need to handle underflow when subtracting 257 | // remainder. 258 | if let Some(offset) = n.checked_sub(remainder) { 259 | result = ready!(self 260 | .as_mut() 261 | .project() 262 | .inner 263 | .poll_seek(cx, SeekFrom::Current(offset)))?; 264 | } else { 265 | // seek backwards by our remainder, and then by the offset 266 | ready!(self 267 | .as_mut() 268 | .project() 269 | .inner 270 | .poll_seek(cx, SeekFrom::Current(-remainder)))?; 271 | self.as_mut().discard_buffer(); 272 | result = ready!(self 273 | .as_mut() 274 | .project() 275 | .inner 276 | .poll_seek(cx, SeekFrom::Current(n)))?; 277 | } 278 | } else { 279 | // Seeking with Start/End doesn't care about our buffer length. 280 | result = ready!(self.as_mut().project().inner.poll_seek(cx, pos))?; 281 | } 282 | self.discard_buffer(); 283 | Poll::Ready(Ok(result)) 284 | } 285 | } 286 | -------------------------------------------------------------------------------- /src/bufreader.rs: -------------------------------------------------------------------------------- 1 | // originally extracted from Rust's std::io::BufReader 2 | // 3 | // this version allows refilling even if the buffer still has some data 4 | 5 | use std::cmp; 6 | use std::fmt; 7 | use std::io::{self, BufRead, Error, ErrorKind, IoSliceMut, Read, Result, Seek, SeekFrom}; 8 | 9 | pub(crate) const DEFAULT_BUF_SIZE: usize = 8 * 1024; 10 | 11 | fn default_read_exact(this: &mut R, mut buf: &mut [u8]) -> Result<()> { 12 | while !buf.is_empty() { 13 | match this.read(buf) { 14 | Ok(0) => break, 15 | Ok(n) => { 16 | let tmp = buf; 17 | buf = &mut tmp[n..]; 18 | } 19 | Err(ref e) if e.kind() == ErrorKind::Interrupted => {} 20 | Err(e) => return Err(e), 21 | } 22 | } 23 | if !buf.is_empty() { 24 | Err(Error::new( 25 | ErrorKind::UnexpectedEof, 26 | "failed to fill whole buffer", 27 | )) 28 | } else { 29 | Ok(()) 30 | } 31 | } 32 | 33 | /// The `BufReader` struct adds buffering to any reader. 34 | /// 35 | /// It can be excessively inefficient to work directly with a [`Read`] instance. 36 | /// For example, every call to [`read`][`TcpStream::read`] on [`TcpStream`] 37 | /// results in a system call. A `BufReader` performs large, infrequent reads on 38 | /// the underlying [`Read`] and maintains an in-memory buffer of the results. 39 | /// 40 | /// `BufReader` can improve the speed of programs that make *small* and 41 | /// *repeated* read calls to the same file or network socket. It does not 42 | /// help when reading very large amounts at once, or reading just one or a few 43 | /// times. It also provides no advantage when reading from a source that is 44 | /// already in memory, like a [`Vec`]``. 45 | /// 46 | /// When the `BufReader` is dropped, the contents of its buffer will be 47 | /// discarded. Creating multiple instances of a `BufReader` on the same 48 | /// stream can cause data loss. Reading from the underlying reader after 49 | /// unwrapping the `BufReader` with [`BufReader::into_inner`] can also cause 50 | /// data loss. 51 | /// 52 | /// **Note: this is a fork from `std::io::BufReader` that reads more data in 53 | /// `fill_buf` even if there is already some data in the buffer** 54 | /// 55 | // HACK(#78696): can't use `crate` for associated items 56 | /// [`TcpStream::read`]: super::super::super::net::TcpStream::read 57 | /// [`TcpStream`]: crate::net::TcpStream 58 | /// 59 | /// # Examples 60 | /// 61 | /// ```no_run 62 | /// use std::io::prelude::*; 63 | /// use std::io::BufReader; 64 | /// use std::fs::File; 65 | /// 66 | /// fn main() -> std::io::Result<()> { 67 | /// let f = File::open("log.txt")?; 68 | /// let mut reader = BufReader::new(f); 69 | /// 70 | /// let mut line = String::new(); 71 | /// let len = reader.read_line(&mut line)?; 72 | /// println!("First line is {} bytes long", len); 73 | /// Ok(()) 74 | /// } 75 | /// ``` 76 | pub struct BufReader { 77 | inner: R, 78 | buf: Vec, 79 | pos: usize, 80 | cap: usize, 81 | } 82 | 83 | impl BufReader { 84 | /// Creates a new `BufReader` with a default buffer capacity. The default is currently 8 KB, 85 | /// but may change in the future. 86 | /// 87 | /// # Examples 88 | /// 89 | /// ```no_run 90 | /// use std::io::BufReader; 91 | /// use std::fs::File; 92 | /// 93 | /// fn main() -> std::io::Result<()> { 94 | /// let f = File::open("log.txt")?; 95 | /// let reader = BufReader::new(f); 96 | /// Ok(()) 97 | /// } 98 | /// ``` 99 | pub fn new(inner: R) -> BufReader { 100 | BufReader::with_capacity(DEFAULT_BUF_SIZE, inner) 101 | } 102 | 103 | /// Creates a new `BufReader` with the specified buffer capacity. 104 | /// 105 | /// # Examples 106 | /// 107 | /// Creating a buffer with ten bytes of capacity: 108 | /// 109 | /// ```no_run 110 | /// use std::io::BufReader; 111 | /// use std::fs::File; 112 | /// 113 | /// fn main() -> std::io::Result<()> { 114 | /// let f = File::open("log.txt")?; 115 | /// let reader = BufReader::with_capacity(10, f); 116 | /// Ok(()) 117 | /// } 118 | /// ``` 119 | pub fn with_capacity(capacity: usize, inner: R) -> BufReader { 120 | let buf = vec![0; capacity]; 121 | BufReader { 122 | inner, 123 | buf, 124 | pos: 0, 125 | cap: 0, 126 | } 127 | } 128 | } 129 | 130 | impl BufReader { 131 | /// Gets a reference to the underlying reader. 132 | /// 133 | /// It is inadvisable to directly read from the underlying reader. 134 | /// 135 | /// # Examples 136 | /// 137 | /// ```no_run 138 | /// use std::io::BufReader; 139 | /// use std::fs::File; 140 | /// 141 | /// fn main() -> std::io::Result<()> { 142 | /// let f1 = File::open("log.txt")?; 143 | /// let reader = BufReader::new(f1); 144 | /// 145 | /// let f2 = reader.get_ref(); 146 | /// Ok(()) 147 | /// } 148 | /// ``` 149 | pub fn get_ref(&self) -> &R { 150 | &self.inner 151 | } 152 | 153 | /// Gets a mutable reference to the underlying reader. 154 | /// 155 | /// It is inadvisable to directly read from the underlying reader. 156 | /// 157 | /// # Examples 158 | /// 159 | /// ```no_run 160 | /// use std::io::BufReader; 161 | /// use std::fs::File; 162 | /// 163 | /// fn main() -> std::io::Result<()> { 164 | /// let f1 = File::open("log.txt")?; 165 | /// let mut reader = BufReader::new(f1); 166 | /// 167 | /// let f2 = reader.get_mut(); 168 | /// Ok(()) 169 | /// } 170 | /// ``` 171 | pub fn get_mut(&mut self) -> &mut R { 172 | &mut self.inner 173 | } 174 | 175 | /// Returns a reference to the internally buffered data. 176 | /// 177 | /// Unlike [`fill_buf`], this will not attempt to fill the buffer if it is empty. 178 | /// 179 | /// [`fill_buf`]: BufRead::fill_buf 180 | /// 181 | /// # Examples 182 | /// 183 | /// ```no_run 184 | /// use std::io::{BufReader, BufRead}; 185 | /// use std::fs::File; 186 | /// 187 | /// fn main() -> std::io::Result<()> { 188 | /// let f = File::open("log.txt")?; 189 | /// let mut reader = BufReader::new(f); 190 | /// assert!(reader.buffer().is_empty()); 191 | /// 192 | /// if reader.fill_buf()?.len() > 0 { 193 | /// assert!(!reader.buffer().is_empty()); 194 | /// } 195 | /// Ok(()) 196 | /// } 197 | /// ``` 198 | pub fn buffer(&self) -> &[u8] { 199 | &self.buf[self.pos..self.cap] 200 | } 201 | 202 | /// Returns the number of bytes the internal buffer can hold at once. 203 | /// 204 | /// # Examples 205 | /// 206 | /// ```no_run 207 | /// use std::io::{BufReader, BufRead}; 208 | /// use std::fs::File; 209 | /// 210 | /// fn main() -> std::io::Result<()> { 211 | /// let f = File::open("log.txt")?; 212 | /// let mut reader = BufReader::new(f); 213 | /// 214 | /// let capacity = reader.capacity(); 215 | /// let buffer = reader.fill_buf()?; 216 | /// assert!(buffer.len() <= capacity); 217 | /// Ok(()) 218 | /// } 219 | /// ``` 220 | pub fn capacity(&self) -> usize { 221 | self.buf.len() 222 | } 223 | 224 | /// Unwraps this `BufReader`, returning the underlying reader. 225 | /// 226 | /// Note that any leftover data in the internal buffer is lost. Therefore, 227 | /// a following read from the underlying reader may lead to data loss. 228 | /// 229 | /// # Examples 230 | /// 231 | /// ```no_run 232 | /// use std::io::BufReader; 233 | /// use std::fs::File; 234 | /// 235 | /// fn main() -> std::io::Result<()> { 236 | /// let f1 = File::open("log.txt")?; 237 | /// let reader = BufReader::new(f1); 238 | /// 239 | /// let f2 = reader.into_inner(); 240 | /// Ok(()) 241 | /// } 242 | /// ``` 243 | pub fn into_inner(self) -> R { 244 | self.inner 245 | } 246 | 247 | /// Invalidates all data in the internal buffer. 248 | #[inline] 249 | fn discard_buffer(&mut self) { 250 | self.pos = 0; 251 | self.cap = 0; 252 | } 253 | 254 | fn reset_buffer_position(&mut self) { 255 | //println!("resetting buffer at pos: {} capacity: {}", self.pos, self.cap); 256 | if self.cap - self.pos > 0 { 257 | for i in 0..(self.cap - self.pos) { 258 | //println!("buf[{}] = buf[{}]", i, self.pos + i); 259 | self.buf[i] = self.buf[self.pos + i]; 260 | } 261 | } 262 | self.cap = self.cap - self.pos; 263 | self.pos = 0; 264 | } 265 | } 266 | 267 | impl BufReader { 268 | /// Seeks relative to the current position. If the new position lies within the buffer, 269 | /// the buffer will not be flushed, allowing for more efficient seeks. 270 | /// This method does not return the location of the underlying reader, so the caller 271 | /// must track this information themselves if it is required. 272 | pub fn seek_relative(&mut self, offset: i64) -> io::Result<()> { 273 | let pos = self.pos as u64; 274 | if offset < 0 { 275 | if let Some(new_pos) = pos.checked_sub((-offset) as u64) { 276 | self.pos = new_pos as usize; 277 | return Ok(()); 278 | } 279 | } else { 280 | if let Some(new_pos) = pos.checked_add(offset as u64) { 281 | if new_pos <= self.cap as u64 { 282 | self.pos = new_pos as usize; 283 | return Ok(()); 284 | } 285 | } 286 | } 287 | self.seek(SeekFrom::Current(offset)).map(drop) 288 | } 289 | } 290 | 291 | impl Read for BufReader { 292 | fn read(&mut self, buf: &mut [u8]) -> io::Result { 293 | // If we don't have any buffered data and we're doing a massive read 294 | // (larger than our internal buffer), bypass our internal buffer 295 | // entirely. 296 | if self.pos == self.cap && buf.len() >= self.buf.len() { 297 | self.discard_buffer(); 298 | return self.inner.read(buf); 299 | } 300 | let nread = { 301 | let mut rem = self.fill_buf()?; 302 | rem.read(buf)? 303 | }; 304 | self.consume(nread); 305 | Ok(nread) 306 | } 307 | 308 | // Small read_exacts from a BufReader are extremely common when used with a deserializer. 309 | // The default implementation calls read in a loop, which results in surprisingly poor code 310 | // generation for the common path where the buffer has enough bytes to fill the passed-in 311 | // buffer. 312 | fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { 313 | if self.buffer().len() >= buf.len() { 314 | buf.copy_from_slice(&self.buffer()[..buf.len()]); 315 | self.consume(buf.len()); 316 | return Ok(()); 317 | } 318 | 319 | default_read_exact(self, buf) 320 | } 321 | 322 | fn read_vectored(&mut self, bufs: &mut [IoSliceMut<'_>]) -> io::Result { 323 | let total_len = bufs.iter().map(|b| b.len()).sum::(); 324 | if self.pos == self.cap && total_len >= self.buf.len() { 325 | self.discard_buffer(); 326 | return self.inner.read_vectored(bufs); 327 | } 328 | let nread = { 329 | let mut rem = self.fill_buf()?; 330 | rem.read_vectored(bufs)? 331 | }; 332 | self.consume(nread); 333 | Ok(nread) 334 | } 335 | } 336 | 337 | impl BufRead for BufReader { 338 | fn fill_buf(&mut self) -> io::Result<&[u8]> { 339 | if self.cap == self.buf.len() { 340 | if self.pos == 0 { 341 | return Err(io::Error::new( 342 | io::ErrorKind::Interrupted, 343 | "buffer completely filled", 344 | )); 345 | } else { 346 | self.reset_buffer_position(); 347 | } 348 | } 349 | 350 | let read = self.inner.read(&mut self.buf[self.cap..])?; 351 | self.cap += read; 352 | Ok(&self.buf[self.pos..self.cap]) 353 | } 354 | 355 | fn consume(&mut self, amt: usize) { 356 | self.pos = cmp::min(self.pos + amt, self.cap); 357 | } 358 | } 359 | 360 | impl fmt::Debug for BufReader 361 | where 362 | R: fmt::Debug, 363 | { 364 | fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result { 365 | fmt.debug_struct("BufReader") 366 | .field("reader", &self.inner) 367 | .field( 368 | "buffer", 369 | &format_args!("{}/{}", self.cap - self.pos, self.buf.len()), 370 | ) 371 | .finish() 372 | } 373 | } 374 | 375 | impl Seek for BufReader { 376 | /// Seek to an offset, in bytes, in the underlying reader. 377 | /// 378 | /// The position used for seeking with [`SeekFrom::Current`]`(_)` is the 379 | /// position the underlying reader would be at if the `BufReader` had no 380 | /// internal buffer. 381 | /// 382 | /// Seeking always discards the internal buffer, even if the seek position 383 | /// would otherwise fall within it. This guarantees that calling 384 | /// [`BufReader::into_inner()`] immediately after a seek yields the underlying reader 385 | /// at the same position. 386 | /// 387 | /// To seek without discarding the internal buffer, use [`BufReader::seek_relative`]. 388 | /// 389 | /// See [`std::io::Seek`] for more details. 390 | /// 391 | /// Note: In the edge case where you're seeking with [`SeekFrom::Current`]`(n)` 392 | /// where `n` minus the internal buffer length overflows an `i64`, two 393 | /// seeks will be performed instead of one. If the second seek returns 394 | /// [`Err`], the underlying reader will be left at the same position it would 395 | /// have if you called `seek` with [`SeekFrom::Current`]`(0)`. 396 | /// 397 | /// [`std::io::Seek`]: Seek 398 | fn seek(&mut self, pos: SeekFrom) -> io::Result { 399 | let result: u64; 400 | if let SeekFrom::Current(n) = pos { 401 | let remainder = (self.cap - self.pos) as i64; 402 | // it should be safe to assume that remainder fits within an i64 as the alternative 403 | // means we managed to allocate 8 exbibytes and that's absurd. 404 | // But it's not out of the realm of possibility for some weird underlying reader to 405 | // support seeking by i64::MIN so we need to handle underflow when subtracting 406 | // remainder. 407 | if let Some(offset) = n.checked_sub(remainder) { 408 | result = self.inner.seek(SeekFrom::Current(offset))?; 409 | } else { 410 | // seek backwards by our remainder, and then by the offset 411 | self.inner.seek(SeekFrom::Current(-remainder))?; 412 | self.discard_buffer(); 413 | result = self.inner.seek(SeekFrom::Current(n))?; 414 | } 415 | } else { 416 | // Seeking with Start/End doesn't care about our buffer length. 417 | result = self.inner.seek(pos)?; 418 | } 419 | self.discard_buffer(); 420 | Ok(result) 421 | } 422 | 423 | /// Returns the current seek position from the start of the stream. 424 | /// 425 | /// The value returned is equivalent to `self.seek(SeekFrom::Current(0))` 426 | /// but does not flush the internal buffer. Due to this optimization the 427 | /// function does not guarantee that calling `.into_inner()` immediately 428 | /// afterwards will yield the underlying reader at the same position. Use 429 | /// [`BufReader::seek`] instead if you require that guarantee. 430 | /// 431 | /// # Panics 432 | /// 433 | /// This function will panic if the position of the inner reader is smaller 434 | /// than the amount of buffered data. That can happen if the inner reader 435 | /// has an incorrect implementation of [`Seek::stream_position`], or if the 436 | /// position has gone out of sync due to calling [`Seek::seek`] directly on 437 | /// the underlying reader. 438 | /// 439 | /// # Example 440 | /// 441 | /// ```no_run 442 | /// use std::{ 443 | /// io::{self, BufRead, BufReader, Seek}, 444 | /// fs::File, 445 | /// }; 446 | /// 447 | /// fn main() -> io::Result<()> { 448 | /// let mut f = BufReader::new(File::open("foo.txt")?); 449 | /// 450 | /// let before = f.stream_position()?; 451 | /// f.read_line(&mut String::new())?; 452 | /// let after = f.stream_position()?; 453 | /// 454 | /// println!("The first line was {} bytes long", after - before); 455 | /// Ok(()) 456 | /// } 457 | /// ``` 458 | fn stream_position(&mut self) -> io::Result { 459 | let remainder = (self.cap - self.pos) as u64; 460 | self.inner.stream_position().map(|pos| { 461 | pos.checked_sub(remainder).expect( 462 | "overflow when subtracting remaining buffer size from inner stream position", 463 | ) 464 | }) 465 | } 466 | } 467 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | //! This crate provides a `BufReader` alternative that can be used with 2 | //! [nom parsers](http://docs.rs/nom) 3 | //! 4 | //! It will hide for you the [Incomplete](https://docs.rs/nom/7.0.0/nom/enum.Err.html#variant.Incomplete) handling in nom for streaming parsers, retrying and refilling buffers automatically. 5 | //! 6 | //! For synchronous io, use `bufreader::BufReader`, while for asynchronous 7 | //! IO, you should use `async_bufreader::BufReader` 8 | //! 9 | //! # Examples 10 | //! 11 | //! ## sync 12 | //! 13 | //! ```rust,ignore 14 | //! use nom_bufreader::bufreader::BufReader; 15 | //! use nom_bufreader::{Error, Parse}; 16 | //! use std::{net::TcpListener, str::from_utf8}; 17 | //! 18 | //! fn main() -> Result<(), Error<()>> { 19 | //! let listener = TcpListener::bind("127.0.0.1:8080")?; 20 | //! let mut i = BufReader::new(listener.incoming().next().unwrap()?); 21 | //! 22 | //! // method, space and path are nom parsers 23 | //! let m = i.parse(method)?; 24 | //! let _ = i.parse(space)?; 25 | //! let p = i.parse(path)?; 26 | //! println!("got method {}, path {}", m, p); 27 | //! Ok(()) 28 | //! } 29 | //! ``` 30 | //! 31 | //! ### async 32 | //! 33 | //! #### tokio 34 | //! 35 | //! ```rust,ignore 36 | //! use nom_bufreader::async_bufreader::BufReader; 37 | //! use nom_bufreader::{AsyncParse, Error}; 38 | //! use std::str::from_utf8; 39 | //! use tokio_util::compat::TokioAsyncReadCompatExt; 40 | //! use tokio::net::TcpListener; 41 | //! 42 | //! #[tokio::main] 43 | //! async fn main() -> Result<(), Error<()>> { 44 | //! let listener = TcpListener::bind("127.0.0.1:8080").await?; 45 | //! let mut i = BufReader::new(listener.accept().await?.0.compat()); 46 | //! 47 | //! let m = i.parse(method).await?; 48 | //! let _ = i.parse(space).await?; 49 | //! let p = i.parse(path).await?; 50 | //! println!("got method {}, path {}", m, p); 51 | //! Ok(()) 52 | //! } 53 | //! ``` 54 | //! 55 | //! #### async-std 56 | //! 57 | //! ```rust,ignore 58 | //! use nom_bufreader::async_bufreader::BufReader; 59 | //! use nom_bufreader::{AsyncParse, Error}; 60 | //! use std::str::from_utf8; 61 | //! use async_std::net::TcpListener; 62 | //! 63 | //! #[async_std::main] 64 | //! async fn main() -> Result<(), Error<()>> { 65 | //! let listener = TcpListener::bind("127.0.0.1:8080").await?; 66 | //! let mut i = BufReader::new(listener.accept().await?.0); 67 | //! 68 | //! let m = i.parse(method).await?; 69 | //! let _ = i.parse(space).await?; 70 | //! let p = i.parse(path).await?; 71 | //! println!("got method {}, path {}", m, p); 72 | //! Ok(()) 73 | //! } 74 | //! ``` 75 | use nom::{Err, Offset, Parser}; 76 | use std::io::{self, BufRead, Read}; 77 | 78 | #[cfg(feature = "async")] 79 | use async_trait::async_trait; 80 | #[cfg(feature = "async")] 81 | use futures::{ 82 | io::{AsyncBufReadExt, BufReader}, 83 | AsyncRead, 84 | }; 85 | 86 | #[cfg(feature = "async")] 87 | pub mod async_bufreader; 88 | pub mod bufreader; 89 | 90 | #[derive(Debug)] 91 | pub enum Error { 92 | Error(E), 93 | Failure(E), 94 | Io(io::Error), 95 | Eof, 96 | } 97 | 98 | impl From for Error { 99 | fn from(e: io::Error) -> Self { 100 | Error::Io(e) 101 | } 102 | } 103 | 104 | pub trait Parse { 105 | fn parse(&mut self, p: P) -> Result> 106 | where 107 | for<'a> P: Parser<&'a [u8], O, E>; 108 | } 109 | 110 | impl Parse for std::io::BufReader { 111 | fn parse(&mut self, mut p: P) -> Result> 112 | where 113 | for<'a> P: Parser<&'a [u8], O, E>, 114 | { 115 | loop { 116 | let opt = 117 | //match p(input.buffer()) { 118 | match p.parse(self.buffer()) { 119 | Err(Err::Error(e)) => return Err(Error::Error(e)), 120 | Err(Err::Failure(e)) => return Err(Error::Failure(e)), 121 | Err(Err::Incomplete(_)) => { 122 | None 123 | }, 124 | Ok((i, o)) => { 125 | let offset = self.buffer().offset(i); 126 | Some((offset, o)) 127 | }, 128 | }; 129 | 130 | match opt { 131 | Some((sz, o)) => { 132 | self.consume(sz); 133 | return Ok(o); 134 | } 135 | None => { 136 | self.fill_buf()?; 137 | } 138 | } 139 | } 140 | } 141 | } 142 | 143 | impl Parse for bufreader::BufReader { 144 | fn parse(&mut self, mut p: P) -> Result> 145 | where 146 | for<'a> P: Parser<&'a [u8], O, E>, 147 | { 148 | let mut eof = false; 149 | let mut error = None; 150 | loop { 151 | let opt = 152 | //match p(input.buffer()) { 153 | match p.parse(self.buffer()) { 154 | Err(Err::Error(e)) => return Err(Error::Error(e)), 155 | Err(Err::Failure(e)) => return Err(Error::Failure(e)), 156 | Err(Err::Incomplete(_)) => { 157 | None 158 | }, 159 | Ok((i, o)) => { 160 | let offset = self.buffer().offset(i); 161 | Some((offset, o)) 162 | }, 163 | }; 164 | 165 | match opt { 166 | Some((sz, o)) => { 167 | self.consume(sz); 168 | return Ok(o); 169 | } 170 | None => { 171 | if eof { 172 | return Err(Error::Eof); 173 | } 174 | 175 | if let Some(e) = error.take() { 176 | return Err(Error::Io(e)); 177 | } 178 | 179 | match self.fill_buf() { 180 | Err(e) => error = Some(e), 181 | Ok(s) => { 182 | if s.is_empty() { 183 | eof = true; 184 | } 185 | } 186 | } 187 | } 188 | } 189 | } 190 | } 191 | } 192 | 193 | #[cfg(feature = "async")] 194 | #[async_trait] 195 | pub trait AsyncParse { 196 | async fn parse(&mut self, p: P) -> Result> 197 | where 198 | for<'a> P: Parser<&'a [u8], O, E> + Send + 'async_trait; 199 | } 200 | 201 | #[cfg(feature = "async")] 202 | #[async_trait] 203 | impl AsyncParse for BufReader { 204 | async fn parse(&mut self, mut p: P) -> Result> 205 | where 206 | for<'a> P: Parser<&'a [u8], O, E> + Send + 'async_trait, 207 | { 208 | loop { 209 | let opt = 210 | //match p(input.buffer()) { 211 | match p.parse(self.buffer()) { 212 | Err(Err::Error(e)) => return Err(Error::Error(e)), 213 | Err(Err::Failure(e)) => return Err(Error::Failure(e)), 214 | Err(Err::Incomplete(_)) => { 215 | None 216 | }, 217 | Ok((i, o)) => { 218 | let offset = self.buffer().offset(i); 219 | Some((offset, o)) 220 | }, 221 | }; 222 | 223 | match opt { 224 | Some((sz, o)) => { 225 | self.consume_unpin(sz); 226 | return Ok(o); 227 | } 228 | None => { 229 | self.fill_buf().await?; 230 | } 231 | } 232 | } 233 | } 234 | } 235 | 236 | #[cfg(feature = "async")] 237 | #[async_trait] 238 | impl AsyncParse 239 | for async_bufreader::BufReader 240 | { 241 | async fn parse(&mut self, mut p: P) -> Result> 242 | where 243 | for<'a> P: Parser<&'a [u8], O, E> + Send + 'async_trait, 244 | { 245 | loop { 246 | let opt = 247 | //match p(input.buffer()) { 248 | match p.parse(self.buffer()) { 249 | Err(Err::Error(e)) => return Err(Error::Error(e)), 250 | Err(Err::Failure(e)) => return Err(Error::Failure(e)), 251 | Err(Err::Incomplete(_)) => { 252 | None 253 | }, 254 | Ok((i, o)) => { 255 | let offset = self.buffer().offset(i); 256 | Some((offset, o)) 257 | }, 258 | }; 259 | 260 | match opt { 261 | Some((sz, o)) => { 262 | self.consume_unpin(sz); 263 | return Ok(o); 264 | } 265 | None => { 266 | self.fill_buf().await?; 267 | } 268 | } 269 | } 270 | } 271 | } 272 | --------------------------------------------------------------------------------