├── .github └── workflows │ ├── codecov.yml │ ├── linux.yml │ ├── macos.yml │ └── windows.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── codecov.yml ├── src ├── async_readers │ ├── ades_futures.rs │ ├── ades_tokio.rs │ ├── ardr_futures.rs │ ├── ardr_tokio.rs │ └── mod.rs ├── async_writers │ ├── aser_futures.rs │ ├── aser_tokio.rs │ ├── awtr_futures.rs │ ├── awtr_tokio.rs │ ├── mod.rs │ └── mwtr_serde.rs ├── byte_record.rs ├── debug.rs ├── deserializer.rs ├── error.rs ├── lib.rs ├── serializer.rs └── string_record.rs └── tests ├── data ├── cities_incomplete_row.csv ├── cities_non_int.csv ├── cities_ok.csv ├── cities_pl_win1250.csv └── invalid_date_time.csv ├── helpers ├── helpers_async_std.rs ├── helpers_tokio.rs └── mod.rs ├── read_records.rs └── read_serde.rs /.github/workflows/codecov.yml: -------------------------------------------------------------------------------- 1 | name: Coverage 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | codecov: 14 | runs-on: ubuntu-latest 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | - uses: actions-rs/install@v0.1 19 | with: 20 | crate: cargo-tarpaulin 21 | version: latest 22 | use-tool-cache: true 23 | - name: Generate coverage file 24 | run: | 25 | cargo tarpaulin --version 26 | cargo tarpaulin --out Xml --exclude-files *tokio*.rs 27 | - name: Upload to Codecov 28 | uses: codecov/codecov-action@v1 29 | with: 30 | file: cobertura.xml 31 | -------------------------------------------------------------------------------- /.github/workflows/linux.yml: -------------------------------------------------------------------------------- 1 | name: Linux 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build_and_test: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | version: 18 | - 1.61.0 19 | - stable 20 | - beta 21 | - nightly 22 | 23 | runs-on: ubuntu-latest 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | - name: Build 28 | run: cargo build --verbose 29 | - name: Run tests 30 | run: cargo test --verbose 31 | - name: Run tests with Tokio 32 | run: cargo test --verbose --features tokio 33 | -------------------------------------------------------------------------------- /.github/workflows/macos.yml: -------------------------------------------------------------------------------- 1 | name: MacOS 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build_and_test: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | version: 18 | - stable 19 | - nightly 20 | 21 | runs-on: macos-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v2 25 | - name: Build 26 | run: cargo build --verbose 27 | - name: Run tests 28 | run: cargo test --verbose 29 | - name: Run tests with Tokio 30 | run: cargo test --verbose --features tokio 31 | -------------------------------------------------------------------------------- /.github/workflows/windows.yml: -------------------------------------------------------------------------------- 1 | name: Windows 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build_and_test: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | version: 18 | - stable 19 | - nightly 20 | target: 21 | - x86_64-pc-windows-msvc 22 | - x86_64-pc-windows-gnu 23 | 24 | runs-on: windows-latest 25 | 26 | steps: 27 | - uses: actions/checkout@v2 28 | - name: Build 29 | run: cargo build --verbose 30 | - name: Run tests 31 | run: cargo test --verbose 32 | - name: Run tests with Tokio 33 | run: cargo test --verbose --features tokio 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | # IDE 13 | .vscode/** 14 | 15 | # Tests generated files 16 | /examples/* 17 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "csv-async" 3 | version = "1.3.1" 4 | authors = ["gwierzchowski "] 5 | description = "CSV parsing for async." 6 | edition = "2021" 7 | rust-version = "1.61" 8 | documentation = "https://docs.rs/csv-async" 9 | keywords = ["csv", "comma", "parser", "async"] 10 | categories = ["asynchronous", "encoding", "parser-implementations"] 11 | repository = "https://github.com/gwierzchowski/csv-async" 12 | license = "MIT" 13 | readme = "README.md" 14 | exclude = [".gitignore", ".github/**", "codecov.yml"] 15 | 16 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 17 | 18 | [features] 19 | # For tests 20 | # default = ["with_serde", "tokio"] 21 | # default = ["tokio"] 22 | 23 | # This is default set 24 | default = ["with_serde"] 25 | with_serde = ["serde", "bstr/serde", "itoa", "ryu"] 26 | tokio = ["tokio1", "tokio-stream"] 27 | 28 | [dependencies] 29 | cfg-if = "1" 30 | csv-core = "0.1.11" 31 | futures = "0.3" 32 | itoa = { version = "1", optional = true } 33 | ryu = { version = "1", optional = true } 34 | serde = { version = "1", optional = true } 35 | tokio1 = { package = "tokio", version = "1.25", features = ["io-util"], optional = true } 36 | tokio-stream = { version = "0.1", optional = true } 37 | 38 | [dev-dependencies] 39 | async-std = { version = "1", features = ["attributes"]} 40 | bstr = "1" 41 | chrono = { version = "0.4", features = ["serde"] } 42 | indoc = "2" 43 | serde = { version = "1", features = ["derive"] } 44 | tokio1 = { package = "tokio", version = "1.25", features = ["fs", "rt", "rt-multi-thread", "macros"] } 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Grzegorz Wierzchowski 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # csv-async 2 | [![crates.io](https://img.shields.io/crates/v/csv-async)](https://crates.io/crates/csv-async) 3 | [![](https://img.shields.io/crates/d/csv-async.svg)](https://crates.io/crates/csv-async) 4 | [![](https://img.shields.io/crates/dv/csv-async.svg)](https://crates.io/crates/csv-async) 5 | [![Documentation](https://docs.rs/csv-async/badge.svg)](https://docs.rs/csv-async) 6 | [![Version](https://img.shields.io/badge/rustc-1.61+-ab6000.svg)](https://blog.rust-lang.org/2022/05/19/Rust-1.61.0.html) 7 | 8 | [![build status](https://github.com/gwierzchowski/csv-async/workflows/Linux/badge.svg?branch=master&event=push)](https://github.com/gwierzchowski/csv-async/actions?query=workflow%3ALinux) 9 | [![build status](https://github.com/gwierzchowski/csv-async/workflows/Windows/badge.svg?branch=master&event=push)](https://github.com/gwierzchowski/csv-async/actions?query=workflow%3AWindows) 10 | [![build status](https://github.com/gwierzchowski/csv-async/workflows/MacOS/badge.svg?branch=master&event=push)](https://github.com/gwierzchowski/csv-async/actions?query=workflow%3AMacOS) 11 | [![codecov](https://codecov.io/gh/gwierzchowski/csv-async/branch/master/graph/badge.svg)](https://codecov.io/gh/gwierzchowski/csv-async) 12 | 13 | This is CSV library to use in asynchronous environment. 14 | Implemented API is similar to existing [csv](https://github.com/BurntSushi/rust-csv) crate with few exceptions like builder's `create_` functions instead of `from_` as in `csv`. 15 | 16 | Some code is borrowed from `csv` crate (synchronized with version 1.3.3 - Oct 2023). 17 | This package shares CSV parsing routines with `csv` by means of using `csv-core` crate. 18 | Major version of this crate will be kept in sync with major version of `csv` with which it is API compatible. 19 | 20 | CSV files are being read or written by objects of types `AsyncReader` / `AsyncWriter` to / from generic 21 | text-based structures or by `AsyncDeserializer` / `AsyncSerializer` to / from data specific structures with generated `serde` interfaces. 22 | 23 | Library does not contain synchronous reader/writer. If you need it - please use `csv` crate. 24 | 25 | ## Cargo Features 26 | Features which can be enabled / disabled during library build. 27 | 28 | | Feature | Default | Description | 29 | |--------------|---------|-------------| 30 | | `with_serde` | on | Enables crate to use [serde](https://serde.rs) derive macros | 31 | | `tokio` | off | Enables crate to be used with [tokio](https://tokio.rs) runtime and libraries | 32 | 33 | Enabling `tokio` feature allows user to use `tokio::fs::File` and makes `AsyncReader` (`AsyncWriter`) 34 | to be based on `tokio::io::AsyncRead` (`tokio::io::AsyncWrite`). Currently this crate depends on tokio version 1.25. 35 | 36 | Without `tokio` feature, this crate depends only on `futures` crate and reader (writer) are based on traits `futures::io::AsyncRead` (`futures::io::AsyncWrite`), what allows user to use `async_std::fs::File`. 37 | 38 | ## Example usage: 39 | Sample input file: 40 | ```csv 41 | city,region,country,population 42 | Southborough,MA,United States,9686 43 | Northbridge,MA,United States,14061 44 | Marlborough,MA,United States,38334 45 | Springfield,MA,United States,152227 46 | Springfield,MO,United States,150443 47 | Springfield,NJ,United States,14976 48 | Concord,NH,United States,42605 49 | ``` 50 | 51 | ```rust 52 | use std::error::Error; 53 | use std::process; 54 | use futures::stream::StreamExt; 55 | use async_std::fs::File; 56 | 57 | async fn filter_by_region(region:&str, file_in:&str, file_out:&str) -> Result<(), Box> { 58 | // Function reads CSV file that has column named "region" at second position (index = 1). 59 | // It writes to new file only rows with region equal to passed argument 60 | // and removes region column. 61 | let mut rdr = csv_async::AsyncReader::from_reader( 62 | File::open(file_in).await? 63 | ); 64 | let mut wri = csv_async::AsyncWriter::from_writer( 65 | File::create(file_out).await? 66 | ); 67 | wri.write_record(rdr 68 | .headers() 69 | .await?.into_iter() 70 | .filter(|h| *h != "region") 71 | ).await?; 72 | let mut records = rdr.records(); 73 | while let Some(record) = records.next().await { 74 | let record = record?; 75 | match record.get(1) { 76 | Some(reg) if reg == region => 77 | wri.write_record(record 78 | .iter() 79 | .enumerate() 80 | .filter(|(i, _)| *i != 1) 81 | .map(|(_, s)| s) 82 | ).await?, 83 | _ => {}, 84 | } 85 | } 86 | Ok(()) 87 | } 88 | 89 | fn main() { 90 | async_std::task::block_on(async { 91 | if let Err(err) = filter_by_region( 92 | "MA", 93 | "/tmp/all_regions.csv", 94 | "/tmp/MA_only.csv" 95 | ).await { 96 | eprintln!("error running filter_by_region: {}", err); 97 | process::exit(1); 98 | } 99 | }); 100 | } 101 | ``` 102 | 103 | For serde example please see documentation [root](https://docs.rs/csv-async) page. 104 | 105 | ## Plans 106 | Some ideas for future development: 107 | 108 | - Create benchmarks, maybe some performance improvements. 109 | - Things marked as TODO in the code. 110 | - Support for `smol` asynchronous runtime. 111 | - Create more examples and tutorial. 112 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | threshold: 95% # make CI green anyway 6 | patch: 7 | default: 8 | threshold: 95% # make CI green anyway 9 | 10 | ignore: # ignore codecoverage on following paths 11 | - ".github" 12 | -------------------------------------------------------------------------------- /src/async_writers/aser_futures.rs: -------------------------------------------------------------------------------- 1 | use std::result; 2 | 3 | use futures::io::{self, AsyncWrite, AsyncWriteExt}; 4 | use serde::Serialize; 5 | 6 | use crate::AsyncWriterBuilder; 7 | use crate::error::{IntoInnerError, Result}; 8 | use super::mwtr_serde::MemWriter; 9 | 10 | impl AsyncWriterBuilder { 11 | /// Build a CSV `serde` serializer from this configuration that writes data to `ser`. 12 | /// 13 | /// Note that the CSV serializer is buffered automatically, so you should not 14 | /// wrap `ser` in a buffered writer. 15 | /// 16 | /// # Example 17 | /// 18 | /// ``` 19 | /// use std::error::Error; 20 | /// use csv_async::AsyncWriterBuilder; 21 | /// use serde::Serialize; 22 | /// 23 | /// #[derive(Serialize)] 24 | /// struct Row<'a> { 25 | /// name: &'a str, 26 | /// x: u64, 27 | /// y: u64, 28 | /// } 29 | /// 30 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 31 | /// async fn example() -> Result<(), Box> { 32 | /// let mut ser = AsyncWriterBuilder::new().has_headers(false).create_serializer(vec![]); 33 | /// ser.serialize(Row {name: "p1", x: 1, y: 2}).await?; 34 | /// ser.serialize(Row {name: "p2", x: 3, y: 4}).await?; 35 | /// 36 | /// let data = String::from_utf8(ser.into_inner().await?)?; 37 | /// assert_eq!(data, "p1,1,2\np2,3,4\n"); 38 | /// Ok(()) 39 | /// } 40 | /// ``` 41 | pub fn create_serializer(&self, wtr: W) -> AsyncSerializer { 42 | AsyncSerializer::new(self, wtr) 43 | } 44 | } 45 | 46 | /// An already configured CSV `serde` serializer. 47 | /// 48 | /// A CSV serializer takes as input Rust structures that implement `serde::Serialize` trait 49 | /// and writes those data in a valid CSV output. 50 | /// 51 | /// While CSV writing is considerably easier than parsing CSV, a proper writer 52 | /// will do a number of things for you: 53 | /// 54 | /// 1. Quote fields when necessary. 55 | /// 2. Check that all records have the same number of fields. 56 | /// 3. Write records with a single empty field correctly. 57 | /// 4. Automatically serialize normal Rust types to CSV records. When that 58 | /// type is a struct, a header row is automatically written corresponding 59 | /// to the fields of that struct. 60 | /// 5. Use buffering intelligently and otherwise avoid allocation. (This means 61 | /// that callers should not do their own buffering.) 62 | /// 63 | /// All of the above can be configured using a 64 | /// [`AsyncWriterBuilder`](struct.AsyncWriterBuilder.html). 65 | /// However, a `AsyncSerializer` has convenient constructor (`from_writer`) 66 | /// that use the default configuration. 67 | /// 68 | /// Note that the default configuration of a `AsyncSerializer` uses `\n` for record 69 | /// terminators instead of `\r\n` as specified by RFC 4180. Use the 70 | /// `terminator` method on `AsyncWriterBuilder` to set the terminator to `\r\n` if 71 | /// it's desired. 72 | #[derive(Debug)] 73 | pub struct AsyncSerializer { 74 | ser_wtr: MemWriter, 75 | asy_wtr: Option, 76 | } 77 | 78 | impl Drop for AsyncSerializer { 79 | fn drop(&mut self) { 80 | // We ignore result of flush() call while dropping 81 | // Well known problem. 82 | // If you care about flush result call it explicitly 83 | // before AsyncSerializer goes out of scope, 84 | // second flush() call should be no op. 85 | let _ = futures::executor::block_on(self.flush()); 86 | } 87 | } 88 | 89 | impl AsyncSerializer { 90 | fn new(builder: &AsyncWriterBuilder, wtr: W) -> Self { 91 | AsyncSerializer { 92 | ser_wtr: MemWriter::new(builder), 93 | asy_wtr: Some(wtr), 94 | } 95 | } 96 | 97 | /// Build a CSV serializer with a default configuration that writes data to 98 | /// `ser`. 99 | /// 100 | /// Note that the CSV serializer is buffered automatically, so you should not 101 | /// wrap `ser` in a buffered writer. 102 | /// 103 | /// # Example 104 | /// 105 | /// ``` 106 | /// use std::error::Error; 107 | /// use csv_async::AsyncSerializer; 108 | /// use serde::Serialize; 109 | /// 110 | /// #[derive(Serialize)] 111 | /// struct Row<'a> { 112 | /// name: &'a str, 113 | /// x: u64, 114 | /// y: u64, 115 | /// } 116 | /// 117 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 118 | /// async fn example() -> Result<(), Box> { 119 | /// let mut ser = AsyncSerializer::from_writer(vec![]); 120 | /// ser.serialize(Row {name: "p1", x: 1, y: 2}).await?; 121 | /// ser.serialize(Row {name: "p2", x: 3, y: 4}).await?; 122 | /// 123 | /// let data = String::from_utf8(ser.into_inner().await?)?; 124 | /// assert_eq!(data, "name,x,y\np1,1,2\np2,3,4\n"); 125 | /// Ok(()) 126 | /// } 127 | /// ``` 128 | pub fn from_writer(wtr: W) -> AsyncSerializer { 129 | AsyncWriterBuilder::new().create_serializer(wtr) 130 | } 131 | 132 | /// Serialize a single record using Serde. 133 | /// 134 | /// # Example 135 | /// 136 | /// This shows how to serialize normal Rust structs as CSV records. The 137 | /// fields of the struct are used to write a header row automatically. 138 | /// (Writing the header row automatically can be disabled by building the 139 | /// CSV writer with a [`WriterBuilder`](struct.WriterBuilder.html) and 140 | /// calling the `has_headers` method.) 141 | /// 142 | /// ``` 143 | /// use std::error::Error; 144 | /// use csv_async::AsyncSerializer; 145 | /// use serde::Serialize; 146 | /// 147 | /// #[derive(Serialize)] 148 | /// struct Row<'a> { 149 | /// city: &'a str, 150 | /// country: &'a str, 151 | /// // Serde allows us to name our headers exactly, 152 | /// // even if they don't match our struct field names. 153 | /// #[serde(rename = "popcount")] 154 | /// population: u64, 155 | /// } 156 | /// 157 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 158 | /// async fn example() -> Result<(), Box> { 159 | /// let mut ser = AsyncSerializer::from_writer(vec![]); 160 | /// ser.serialize(Row { 161 | /// city: "Boston", 162 | /// country: "United States", 163 | /// population: 4628910, 164 | /// }).await?; 165 | /// ser.serialize(Row { 166 | /// city: "Concord", 167 | /// country: "United States", 168 | /// population: 42695, 169 | /// }).await?; 170 | /// 171 | /// let data = String::from_utf8(ser.into_inner().await?)?; 172 | /// assert_eq!(data, indoc::indoc! {" 173 | /// city,country,popcount 174 | /// Boston,United States,4628910 175 | /// Concord,United States,42695 176 | /// "}); 177 | /// Ok(()) 178 | /// } 179 | /// ``` 180 | /// 181 | /// # Rules 182 | /// 183 | /// The behavior of `serialize` is fairly simple: 184 | /// 185 | /// 1. Nested containers (tuples, `Vec`s, structs, etc.) are always 186 | /// flattened (depth-first order). 187 | /// 188 | /// 2. If `has_headers` is `true` and the type contains field names, then 189 | /// a header row is automatically generated. 190 | /// 191 | /// However, some container types cannot be serialized, and if 192 | /// `has_headers` is `true`, there are some additional restrictions on the 193 | /// types that can be serialized. See below for details. 194 | /// 195 | /// For the purpose of this section, Rust types can be divided into three 196 | /// categories: scalars, non-struct containers, and structs. 197 | /// 198 | /// ## Scalars 199 | /// 200 | /// Single values with no field names are written like the following. Note 201 | /// that some of the outputs may be quoted, according to the selected 202 | /// quoting style. 203 | /// 204 | /// | Name | Example Type | Example Value | Output | 205 | /// | ---- | ---- | ---- | ---- | 206 | /// | boolean | `bool` | `true` | `true` | 207 | /// | integers | `i8`, `i16`, `i32`, `i64`, `i128`, `u8`, `u16`, `u32`, `u64`, `u128` | `5` | `5` | 208 | /// | floats | `f32`, `f64` | `3.14` | `3.14` | 209 | /// | character | `char` | `'☃'` | `☃` | 210 | /// | string | `&str` | `"hi"` | `hi` | 211 | /// | bytes | `&[u8]` | `b"hi"[..]` | `hi` | 212 | /// | option | `Option` | `None` | *empty* | 213 | /// | option | | `Some(5)` | `5` | 214 | /// | unit | `()` | `()` | *empty* | 215 | /// | unit struct | `struct Foo;` | `Foo` | `Foo` | 216 | /// | unit enum variant | `enum E { A, B }` | `E::A` | `A` | 217 | /// | newtype struct | `struct Foo(u8);` | `Foo(5)` | `5` | 218 | /// | newtype enum variant | `enum E { A(u8) }` | `E::A(5)` | `5` | 219 | /// 220 | /// Note that this table includes simple structs and enums. For example, to 221 | /// serialize a field from either an integer or a float type, one can do 222 | /// this: 223 | /// 224 | /// ``` 225 | /// use std::error::Error; 226 | /// 227 | /// use csv_async::AsyncSerializer; 228 | /// use serde::Serialize; 229 | /// 230 | /// #[derive(Serialize)] 231 | /// struct Row { 232 | /// label: String, 233 | /// value: Value, 234 | /// } 235 | /// 236 | /// #[derive(Serialize)] 237 | /// enum Value { 238 | /// Integer(i64), 239 | /// Float(f64), 240 | /// } 241 | /// 242 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 243 | /// async fn example() -> Result<(), Box> { 244 | /// let mut ser = AsyncSerializer::from_writer(vec![]); 245 | /// ser.serialize(Row { 246 | /// label: "foo".to_string(), 247 | /// value: Value::Integer(3), 248 | /// }).await?; 249 | /// ser.serialize(Row { 250 | /// label: "bar".to_string(), 251 | /// value: Value::Float(3.14), 252 | /// }).await?; 253 | /// 254 | /// let data = String::from_utf8(ser.into_inner().await?)?; 255 | /// assert_eq!(data, indoc::indoc! {" 256 | /// label,value 257 | /// foo,3 258 | /// bar,3.14 259 | /// "}); 260 | /// Ok(()) 261 | /// } 262 | /// ``` 263 | /// 264 | /// ## Non-Struct Containers 265 | /// 266 | /// Nested containers are flattened to their scalar components, with the 267 | /// exception of a few types that are not allowed: 268 | /// 269 | /// | Name | Example Type | Example Value | Output | 270 | /// | ---- | ---- | ---- | ---- | 271 | /// | sequence | `Vec` | `vec![1, 2, 3]` | `1,2,3` | 272 | /// | tuple | `(u8, bool)` | `(5, true)` | `5,true` | 273 | /// | tuple struct | `Foo(u8, bool)` | `Foo(5, true)` | `5,true` | 274 | /// | tuple enum variant | `enum E { A(u8, bool) }` | `E::A(5, true)` | *error* | 275 | /// | struct enum variant | `enum E { V { a: u8, b: bool } }` | `E::V { a: 5, b: true }` | *error* | 276 | /// | map | `BTreeMap` | `BTreeMap::new()` | *error* | 277 | /// 278 | /// ## Structs 279 | /// 280 | /// Like the other containers, structs are flattened to their scalar 281 | /// components: 282 | /// 283 | /// | Name | Example Type | Example Value | Output | 284 | /// | ---- | ---- | ---- | ---- | 285 | /// | struct | `struct Foo { a: u8, b: bool }` | `Foo { a: 5, b: true }` | `5,true` | 286 | /// 287 | /// If `has_headers` is `false`, then there are no additional restrictions; 288 | /// types can be nested arbitrarily. For example: 289 | /// 290 | /// ``` 291 | /// use std::error::Error; 292 | /// use csv_async::AsyncWriterBuilder; 293 | /// use serde::Serialize; 294 | /// 295 | /// #[derive(Serialize)] 296 | /// struct Row { 297 | /// label: String, 298 | /// values: Vec, 299 | /// } 300 | /// 301 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 302 | /// async fn example() -> Result<(), Box> { 303 | /// let mut ser = AsyncWriterBuilder::new() 304 | /// .has_headers(false) 305 | /// .create_serializer(vec![]); 306 | /// ser.serialize(Row { 307 | /// label: "foo".to_string(), 308 | /// values: vec![1.1234, 2.5678, 3.14], 309 | /// }).await?; 310 | /// 311 | /// let data = String::from_utf8(ser.into_inner().await?)?; 312 | /// assert_eq!(data, indoc::indoc! {" 313 | /// foo,1.1234,2.5678,3.14 314 | /// "}); 315 | /// Ok(()) 316 | /// } 317 | /// ``` 318 | /// 319 | /// However, if `has_headers` were enabled in the above example, then 320 | /// serialization would return an error. Specifically, when `has_headers` is 321 | /// `true`, there are two restrictions: 322 | /// 323 | /// 1. Named field values in structs must be scalars. 324 | /// 325 | /// 2. All scalars must be named field values in structs. 326 | /// 327 | /// Other than these two restrictions, types can be nested arbitrarily. 328 | /// Here are a few examples: 329 | /// 330 | /// | Value | Header | Record | 331 | /// | ---- | ---- | ---- | 332 | /// | `(Foo { x: 5, y: 6 }, Bar { z: true })` | `x,y,z` | `5,6,true` | 333 | /// | `vec![Foo { x: 5, y: 6 }, Foo { x: 7, y: 8 }]` | `x,y,x,y` | `5,6,7,8` | 334 | /// | `(Foo { x: 5, y: 6 }, vec![Bar { z: Baz(true) }])` | `x,y,z` | `5,6,true` | 335 | /// | `Foo { x: 5, y: (6, 7) }` | *error: restriction 1* | `5,6,7` | 336 | /// | `(5, Foo { x: 6, y: 7 }` | *error: restriction 2* | `5,6,7` | 337 | /// | `(Foo { x: 5, y: 6 }, true)` | *error: restriction 2* | `5,6,true` | 338 | pub async fn serialize(&mut self, record: S) -> Result<()> { 339 | self.ser_wtr.serialize(record)?; 340 | self.ser_wtr.flush()?; 341 | self.asy_wtr.as_mut().unwrap().write_all(self.ser_wtr.data()).await?; 342 | self.ser_wtr.clear(); 343 | Ok(()) 344 | } 345 | 346 | /// Flushes the underlying asynchronous writer. 347 | pub async fn flush(&mut self) -> io::Result<()> { 348 | if let Some(ref mut asy_wtr) = self.asy_wtr { 349 | asy_wtr.flush().await?; 350 | } 351 | Ok(()) 352 | } 353 | 354 | /// Flush the contents of the internal buffer and return the underlying 355 | /// writer. 356 | pub async fn into_inner( 357 | mut self, 358 | ) -> result::Result>> { 359 | match self.flush().await { 360 | Ok(()) => Ok(self.asy_wtr.take().unwrap()), 361 | Err(err) => Err(IntoInnerError::new(self, err)), 362 | } 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /src/async_writers/aser_tokio.rs: -------------------------------------------------------------------------------- 1 | use std::result; 2 | 3 | use tokio::io::{self, AsyncWrite, AsyncWriteExt}; 4 | use serde::Serialize; 5 | 6 | use crate::AsyncWriterBuilder; 7 | use crate::error::{IntoInnerError, Result}; 8 | use super::mwtr_serde::MemWriter; 9 | 10 | impl AsyncWriterBuilder { 11 | /// Build a CSV `serde` serializer from this configuration that writes data to `ser`. 12 | /// 13 | /// Note that the CSV serializer is buffered automatically, so you should not 14 | /// wrap `ser` in a buffered writer. 15 | /// 16 | /// # Example 17 | /// 18 | /// ``` 19 | /// use std::error::Error; 20 | /// use csv_async::AsyncWriterBuilder; 21 | /// use serde::Serialize; 22 | /// 23 | /// #[derive(Serialize)] 24 | /// struct Row<'a> { 25 | /// name: &'a str, 26 | /// x: u64, 27 | /// y: u64, 28 | /// } 29 | /// 30 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 31 | /// async fn example() -> Result<(), Box> { 32 | /// let mut ser = AsyncWriterBuilder::new().has_headers(false).create_serializer(vec![]); 33 | /// ser.serialize(Row {name: "p1", x: 1, y: 2}).await?; 34 | /// ser.serialize(Row {name: "p2", x: 3, y: 4}).await?; 35 | /// 36 | /// let data = String::from_utf8(ser.into_inner().await?)?; 37 | /// assert_eq!(data, "p1,1,2\np2,3,4\n"); 38 | /// Ok(()) 39 | /// } 40 | /// ``` 41 | pub fn create_serializer(&self, wtr: W) -> AsyncSerializer { 42 | AsyncSerializer::new(self, wtr) 43 | } 44 | } 45 | 46 | /// An already configured CSV `serde` serializer for `tokio` runtime. 47 | /// 48 | /// A CSV serializer takes as input Rust structures that implement `serde::Serialize` trait 49 | /// and writes those data in a valid CSV output. 50 | /// 51 | /// While CSV writing is considerably easier than parsing CSV, a proper writer 52 | /// will do a number of things for you: 53 | /// 54 | /// 1. Quote fields when necessary. 55 | /// 2. Check that all records have the same number of fields. 56 | /// 3. Write records with a single empty field correctly. 57 | /// 4. Automatically serialize normal Rust types to CSV records. When that 58 | /// type is a struct, a header row is automatically written corresponding 59 | /// to the fields of that struct. 60 | /// 5. Use buffering intelligently and otherwise avoid allocation. (This means 61 | /// that callers should not do their own buffering.) 62 | /// 63 | /// All of the above can be configured using a 64 | /// [`AsyncWriterBuilder`](struct.AsyncWriterBuilder.html). 65 | /// However, a `AsyncSerializer` has convenient constructor (`from_writer`) 66 | /// that use the default configuration. 67 | /// 68 | /// Note that the default configuration of a `AsyncSerializer` uses `\n` for record 69 | /// terminators instead of `\r\n` as specified by RFC 4180. Use the 70 | /// `terminator` method on `AsyncWriterBuilder` to set the terminator to `\r\n` if 71 | /// it's desired. 72 | #[derive(Debug)] 73 | pub struct AsyncSerializer { 74 | ser_wtr: MemWriter, 75 | asy_wtr: Option, 76 | } 77 | 78 | impl Drop for AsyncSerializer { 79 | fn drop(&mut self) { 80 | // We ignore result of flush() call while dropping 81 | // Well known problem. 82 | // If you care about flush result call it explicitly 83 | // before AsyncSerializer goes out of scope, 84 | // second flush() call should be no op. 85 | let _ = futures::executor::block_on(self.flush()); 86 | } 87 | } 88 | 89 | impl AsyncSerializer { 90 | fn new(builder: &AsyncWriterBuilder, wtr: W) -> Self { 91 | AsyncSerializer { 92 | ser_wtr: MemWriter::new(builder), 93 | asy_wtr: Some(wtr), 94 | } 95 | } 96 | 97 | /// Build a CSV serializer with a default configuration that writes data to 98 | /// `ser`. 99 | /// 100 | /// Note that the CSV serializer is buffered automatically, so you should not 101 | /// wrap `ser` in a buffered writer. 102 | /// 103 | /// # Example 104 | /// 105 | /// ``` 106 | /// use std::error::Error; 107 | /// use csv_async::AsyncSerializer; 108 | /// use serde::Serialize; 109 | /// 110 | /// #[derive(Serialize)] 111 | /// struct Row<'a> { 112 | /// name: &'a str, 113 | /// x: u64, 114 | /// y: u64, 115 | /// } 116 | /// 117 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 118 | /// async fn example() -> Result<(), Box> { 119 | /// let mut ser = AsyncSerializer::from_writer(vec![]); 120 | /// ser.serialize(Row {name: "p1", x: 1, y: 2}).await?; 121 | /// ser.serialize(Row {name: "p2", x: 3, y: 4}).await?; 122 | /// 123 | /// let data = String::from_utf8(ser.into_inner().await?)?; 124 | /// assert_eq!(data, "name,x,y\np1,1,2\np2,3,4\n"); 125 | /// Ok(()) 126 | /// } 127 | /// ``` 128 | pub fn from_writer(wtr: W) -> AsyncSerializer { 129 | AsyncWriterBuilder::new().create_serializer(wtr) 130 | } 131 | 132 | /// Serialize a single record using Serde. 133 | /// 134 | /// # Example 135 | /// 136 | /// This shows how to serialize normal Rust structs as CSV records. The 137 | /// fields of the struct are used to write a header row automatically. 138 | /// (Writing the header row automatically can be disabled by building the 139 | /// CSV writer with a [`WriterBuilder`](struct.WriterBuilder.html) and 140 | /// calling the `has_headers` method.) 141 | /// 142 | /// ``` 143 | /// use std::error::Error; 144 | /// use csv_async::AsyncSerializer; 145 | /// use serde::Serialize; 146 | /// 147 | /// #[derive(Serialize)] 148 | /// struct Row<'a> { 149 | /// city: &'a str, 150 | /// country: &'a str, 151 | /// // Serde allows us to name our headers exactly, 152 | /// // even if they don't match our struct field names. 153 | /// #[serde(rename = "popcount")] 154 | /// population: u64, 155 | /// } 156 | /// 157 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 158 | /// async fn example() -> Result<(), Box> { 159 | /// let mut ser = AsyncSerializer::from_writer(vec![]); 160 | /// ser.serialize(Row { 161 | /// city: "Boston", 162 | /// country: "United States", 163 | /// population: 4628910, 164 | /// }).await?; 165 | /// ser.serialize(Row { 166 | /// city: "Concord", 167 | /// country: "United States", 168 | /// population: 42695, 169 | /// }).await?; 170 | /// 171 | /// let data = String::from_utf8(ser.into_inner().await?)?; 172 | /// assert_eq!(data, indoc::indoc! {" 173 | /// city,country,popcount 174 | /// Boston,United States,4628910 175 | /// Concord,United States,42695 176 | /// "}); 177 | /// Ok(()) 178 | /// } 179 | /// ``` 180 | /// 181 | /// # Rules 182 | /// 183 | /// The behavior of `serialize` is fairly simple: 184 | /// 185 | /// 1. Nested containers (tuples, `Vec`s, structs, etc.) are always 186 | /// flattened (depth-first order). 187 | /// 188 | /// 2. If `has_headers` is `true` and the type contains field names, then 189 | /// a header row is automatically generated. 190 | /// 191 | /// However, some container types cannot be serialized, and if 192 | /// `has_headers` is `true`, there are some additional restrictions on the 193 | /// types that can be serialized. See below for details. 194 | /// 195 | /// For the purpose of this section, Rust types can be divided into three 196 | /// categories: scalars, non-struct containers, and structs. 197 | /// 198 | /// ## Scalars 199 | /// 200 | /// Single values with no field names are written like the following. Note 201 | /// that some of the outputs may be quoted, according to the selected 202 | /// quoting style. 203 | /// 204 | /// | Name | Example Type | Example Value | Output | 205 | /// | ---- | ---- | ---- | ---- | 206 | /// | boolean | `bool` | `true` | `true` | 207 | /// | integers | `i8`, `i16`, `i32`, `i64`, `i128`, `u8`, `u16`, `u32`, `u64`, `u128` | `5` | `5` | 208 | /// | floats | `f32`, `f64` | `3.14` | `3.14` | 209 | /// | character | `char` | `'☃'` | `☃` | 210 | /// | string | `&str` | `"hi"` | `hi` | 211 | /// | bytes | `&[u8]` | `b"hi"[..]` | `hi` | 212 | /// | option | `Option` | `None` | *empty* | 213 | /// | option | | `Some(5)` | `5` | 214 | /// | unit | `()` | `()` | *empty* | 215 | /// | unit struct | `struct Foo;` | `Foo` | `Foo` | 216 | /// | unit enum variant | `enum E { A, B }` | `E::A` | `A` | 217 | /// | newtype struct | `struct Foo(u8);` | `Foo(5)` | `5` | 218 | /// | newtype enum variant | `enum E { A(u8) }` | `E::A(5)` | `5` | 219 | /// 220 | /// Note that this table includes simple structs and enums. For example, to 221 | /// serialize a field from either an integer or a float type, one can do 222 | /// this: 223 | /// 224 | /// ``` 225 | /// use std::error::Error; 226 | /// 227 | /// use csv_async::AsyncSerializer; 228 | /// use serde::Serialize; 229 | /// 230 | /// #[derive(Serialize)] 231 | /// struct Row { 232 | /// label: String, 233 | /// value: Value, 234 | /// } 235 | /// 236 | /// #[derive(Serialize)] 237 | /// enum Value { 238 | /// Integer(i64), 239 | /// Float(f64), 240 | /// } 241 | /// 242 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 243 | /// async fn example() -> Result<(), Box> { 244 | /// let mut ser = AsyncSerializer::from_writer(vec![]); 245 | /// ser.serialize(Row { 246 | /// label: "foo".to_string(), 247 | /// value: Value::Integer(3), 248 | /// }).await?; 249 | /// ser.serialize(Row { 250 | /// label: "bar".to_string(), 251 | /// value: Value::Float(3.14), 252 | /// }).await?; 253 | /// 254 | /// let data = String::from_utf8(ser.into_inner().await?)?; 255 | /// assert_eq!(data, indoc::indoc! {" 256 | /// label,value 257 | /// foo,3 258 | /// bar,3.14 259 | /// "}); 260 | /// Ok(()) 261 | /// } 262 | /// ``` 263 | /// 264 | /// ## Non-Struct Containers 265 | /// 266 | /// Nested containers are flattened to their scalar components, with the 267 | /// exception of a few types that are not allowed: 268 | /// 269 | /// | Name | Example Type | Example Value | Output | 270 | /// | ---- | ---- | ---- | ---- | 271 | /// | sequence | `Vec` | `vec![1, 2, 3]` | `1,2,3` | 272 | /// | tuple | `(u8, bool)` | `(5, true)` | `5,true` | 273 | /// | tuple struct | `Foo(u8, bool)` | `Foo(5, true)` | `5,true` | 274 | /// | tuple enum variant | `enum E { A(u8, bool) }` | `E::A(5, true)` | *error* | 275 | /// | struct enum variant | `enum E { V { a: u8, b: bool } }` | `E::V { a: 5, b: true }` | *error* | 276 | /// | map | `BTreeMap` | `BTreeMap::new()` | *error* | 277 | /// 278 | /// ## Structs 279 | /// 280 | /// Like the other containers, structs are flattened to their scalar 281 | /// components: 282 | /// 283 | /// | Name | Example Type | Example Value | Output | 284 | /// | ---- | ---- | ---- | ---- | 285 | /// | struct | `struct Foo { a: u8, b: bool }` | `Foo { a: 5, b: true }` | `5,true` | 286 | /// 287 | /// If `has_headers` is `false`, then there are no additional restrictions; 288 | /// types can be nested arbitrarily. For example: 289 | /// 290 | /// ``` 291 | /// use std::error::Error; 292 | /// use csv_async::AsyncWriterBuilder; 293 | /// use serde::Serialize; 294 | /// 295 | /// #[derive(Serialize)] 296 | /// struct Row { 297 | /// label: String, 298 | /// values: Vec, 299 | /// } 300 | /// 301 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 302 | /// async fn example() -> Result<(), Box> { 303 | /// let mut ser = AsyncWriterBuilder::new() 304 | /// .has_headers(false) 305 | /// .create_serializer(vec![]); 306 | /// ser.serialize(Row { 307 | /// label: "foo".to_string(), 308 | /// values: vec![1.1234, 2.5678, 3.14], 309 | /// }).await?; 310 | /// 311 | /// let data = String::from_utf8(ser.into_inner().await?)?; 312 | /// assert_eq!(data, indoc::indoc! {" 313 | /// foo,1.1234,2.5678,3.14 314 | /// "}); 315 | /// Ok(()) 316 | /// } 317 | /// ``` 318 | /// 319 | /// However, if `has_headers` were enabled in the above example, then 320 | /// serialization would return an error. Specifically, when `has_headers` is 321 | /// `true`, there are two restrictions: 322 | /// 323 | /// 1. Named field values in structs must be scalars. 324 | /// 325 | /// 2. All scalars must be named field values in structs. 326 | /// 327 | /// Other than these two restrictions, types can be nested arbitrarily. 328 | /// Here are a few examples: 329 | /// 330 | /// | Value | Header | Record | 331 | /// | ---- | ---- | ---- | 332 | /// | `(Foo { x: 5, y: 6 }, Bar { z: true })` | `x,y,z` | `5,6,true` | 333 | /// | `vec![Foo { x: 5, y: 6 }, Foo { x: 7, y: 8 }]` | `x,y,x,y` | `5,6,7,8` | 334 | /// | `(Foo { x: 5, y: 6 }, vec![Bar { z: Baz(true) }])` | `x,y,z` | `5,6,true` | 335 | /// | `Foo { x: 5, y: (6, 7) }` | *error: restriction 1* | `5,6,7` | 336 | /// | `(5, Foo { x: 6, y: 7 }` | *error: restriction 2* | `5,6,7` | 337 | /// | `(Foo { x: 5, y: 6 }, true)` | *error: restriction 2* | `5,6,true` | 338 | pub async fn serialize(&mut self, record: S) -> Result<()> { 339 | self.ser_wtr.serialize(record)?; 340 | self.ser_wtr.flush()?; 341 | self.asy_wtr.as_mut().unwrap().write_all(self.ser_wtr.data()).await?; 342 | self.ser_wtr.clear(); 343 | Ok(()) 344 | } 345 | 346 | /// Flushes the underlying asynchronous writer. 347 | pub async fn flush(&mut self) -> io::Result<()> { 348 | if let Some(ref mut asy_wtr) = self.asy_wtr { 349 | asy_wtr.flush().await?; 350 | } 351 | Ok(()) 352 | } 353 | 354 | /// Flush the contents of the internal buffer and return the underlying 355 | /// writer. 356 | pub async fn into_inner( 357 | mut self, 358 | ) -> result::Result>> { 359 | match self.flush().await { 360 | Ok(()) => Ok(self.asy_wtr.take().unwrap()), 361 | Err(err) => Err(IntoInnerError::new(self, err)), 362 | } 363 | } 364 | } 365 | -------------------------------------------------------------------------------- /src/async_writers/awtr_futures.rs: -------------------------------------------------------------------------------- 1 | use std::result; 2 | 3 | use futures::io::{self, AsyncWrite}; 4 | 5 | use crate::AsyncWriterBuilder; 6 | use crate::byte_record::ByteRecord; 7 | use crate::error::Result; 8 | use super::AsyncWriterImpl; 9 | 10 | impl AsyncWriterBuilder { 11 | /// Build a CSV writer from this configuration that writes data to `wtr`. 12 | /// 13 | /// Note that the CSV writer is buffered automatically, so you should not 14 | /// wrap `wtr` in a buffered writer like. 15 | /// 16 | /// # Example 17 | /// 18 | /// ``` 19 | /// use std::error::Error; 20 | /// use csv_async::AsyncWriterBuilder; 21 | /// 22 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 23 | /// async fn example() -> Result<(), Box> { 24 | /// let mut wtr = AsyncWriterBuilder::new().create_writer(vec![]); 25 | /// wtr.write_record(&["a", "b", "c"]).await?; 26 | /// wtr.write_record(&["x", "y", "z"]).await?; 27 | /// 28 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 29 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 30 | /// Ok(()) 31 | /// } 32 | /// ``` 33 | pub fn create_writer(&self, wtr: W) -> AsyncWriter { 34 | AsyncWriter::new(self, wtr) 35 | } 36 | } 37 | 38 | /// An already configured CSV writer. 39 | /// 40 | /// A CSV writer takes as input Rust values and writes those values in a valid 41 | /// CSV format as output. 42 | /// 43 | /// While CSV writing is considerably easier than parsing CSV, a proper writer 44 | /// will do a number of things for you: 45 | /// 46 | /// 1. Quote fields when necessary. 47 | /// 2. Check that all records have the same number of fields. 48 | /// 3. Write records with a single empty field correctly. 49 | /// 4. Use buffering intelligently and otherwise avoid allocation. (This means 50 | /// that callers should not do their own buffering.) 51 | /// 52 | /// All of the above can be configured using a 53 | /// [`AsyncWriterBuilder`](struct.AsyncWriterBuilder.html). 54 | /// However, a `AsyncWriter` has convenient constructor (from_writer`) 55 | /// that use the default configuration. 56 | /// 57 | /// Note that the default configuration of a `AsyncWriter` uses `\n` for record 58 | /// terminators instead of `\r\n` as specified by RFC 4180. Use the 59 | /// `terminator` method on `AsyncWriterBuilder` to set the terminator to `\r\n` if 60 | /// it's desired. 61 | #[derive(Debug)] 62 | pub struct AsyncWriter(AsyncWriterImpl); 63 | 64 | impl AsyncWriter { 65 | fn new(builder: &AsyncWriterBuilder, wtr: W) -> AsyncWriter { 66 | AsyncWriter(AsyncWriterImpl::new(builder, wtr)) 67 | } 68 | 69 | /// Build a CSV writer with a default configuration that writes data to 70 | /// `wtr`. 71 | /// 72 | /// Note that the CSV writer is buffered automatically, so you should not 73 | /// wrap `wtr` in a buffered writer. 74 | /// 75 | /// # Example 76 | /// 77 | /// ``` 78 | /// use std::error::Error; 79 | /// use csv_async::AsyncWriter; 80 | /// 81 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 82 | /// async fn example() -> Result<(), Box> { 83 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 84 | /// wtr.write_record(&["a", "b", "c"]).await?; 85 | /// wtr.write_record(&["x", "y", "z"]).await?; 86 | /// 87 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 88 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 89 | /// Ok(()) 90 | /// } 91 | /// ``` 92 | pub fn from_writer(wtr: W) -> AsyncWriter { 93 | AsyncWriterBuilder::new().create_writer(wtr) 94 | } 95 | 96 | /// Write a single record. 97 | /// 98 | /// This method accepts something that can be turned into an iterator that 99 | /// yields elements that can be represented by a `&[u8]`. 100 | /// 101 | /// This may be called with an empty iterator, which will cause a record 102 | /// terminator to be written. If no fields had been written, then a single 103 | /// empty field is written before the terminator. 104 | /// 105 | /// # Example 106 | /// 107 | /// ``` 108 | /// use std::error::Error; 109 | /// use csv_async::AsyncWriter; 110 | /// 111 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 112 | /// async fn example() -> Result<(), Box> { 113 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 114 | /// wtr.write_record(&["a", "b", "c"]).await?; 115 | /// wtr.write_record(&["x", "y", "z"]).await?; 116 | /// 117 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 118 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 119 | /// Ok(()) 120 | /// } 121 | /// ``` 122 | #[inline] 123 | pub async fn write_record(&mut self, record: I) -> Result<()> 124 | where 125 | I: IntoIterator, 126 | T: AsRef<[u8]>, 127 | { 128 | self.0.write_record(record).await 129 | } 130 | 131 | /// Write a single `ByteRecord`. 132 | /// 133 | /// This method accepts a borrowed `ByteRecord` and writes its contents 134 | /// to the underlying writer. 135 | /// 136 | /// This is similar to `write_record` except that it specifically requires 137 | /// a `ByteRecord`. This permits the writer to possibly write the record 138 | /// more quickly than the more generic `write_record`. 139 | /// 140 | /// This may be called with an empty record, which will cause a record 141 | /// terminator to be written. If no fields had been written, then a single 142 | /// empty field is written before the terminator. 143 | /// 144 | /// # Example 145 | /// 146 | /// ``` 147 | /// use std::error::Error; 148 | /// use csv_async::{ByteRecord, AsyncWriter}; 149 | /// 150 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 151 | /// async fn example() -> Result<(), Box> { 152 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 153 | /// wtr.write_byte_record(&ByteRecord::from(&["a", "b", "c"][..])).await?; 154 | /// wtr.write_byte_record(&ByteRecord::from(&["x", "y", "z"][..])).await?; 155 | /// 156 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 157 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 158 | /// Ok(()) 159 | /// } 160 | /// ``` 161 | #[inline] 162 | pub async fn write_byte_record(&mut self, record: &ByteRecord) -> Result<()> { 163 | self.0.write_byte_record(record).await 164 | } 165 | 166 | /// Write a single field. 167 | /// 168 | /// One should prefer using `write_record` over this method. It is provided 169 | /// for cases where writing a field at a time is more convenient than 170 | /// writing a record at a time. 171 | /// 172 | /// Note that if this API is used, `write_record` should be called with an 173 | /// empty iterator to write a record terminator. 174 | /// 175 | /// # Example 176 | /// 177 | /// ``` 178 | /// use std::error::Error; 179 | /// use csv_async::AsyncWriter; 180 | /// 181 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 182 | /// async fn example() -> Result<(), Box> { 183 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 184 | /// wtr.write_field("a").await?; 185 | /// wtr.write_field("b").await?; 186 | /// wtr.write_field("c").await?; 187 | /// wtr.write_record(None::<&[u8]>).await?; 188 | /// wtr.write_field("x").await?; 189 | /// wtr.write_field("y").await?; 190 | /// wtr.write_field("z").await?; 191 | /// wtr.write_record(None::<&[u8]>).await?; 192 | /// 193 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 194 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 195 | /// Ok(()) 196 | /// } 197 | /// ``` 198 | #[inline] 199 | pub async fn write_field>(&mut self, field: T) -> Result<()> { 200 | self.0.write_field(field).await 201 | } 202 | 203 | /// Flush the contents of the internal buffer to the underlying writer. 204 | /// 205 | /// If there was a problem writing to the underlying writer, then an error 206 | /// is returned. 207 | /// 208 | /// This finction is also called by writer destructor. 209 | #[inline] 210 | pub async fn flush(&mut self) -> io::Result<()> { 211 | self.0.flush().await 212 | } 213 | 214 | /// Flush the contents of the internal buffer and return the underlying writer. 215 | /// 216 | pub async fn into_inner( 217 | self, 218 | ) -> result::Result { 219 | match self.0.into_inner().await { 220 | Ok(w) => Ok(w), 221 | Err(err) => Err(err.into_error()), 222 | } 223 | } 224 | } 225 | 226 | #[cfg(test)] 227 | mod tests { 228 | use std::pin::Pin; 229 | use std::task::{Context, Poll}; 230 | 231 | use futures::io; 232 | 233 | use crate::byte_record::ByteRecord; 234 | use crate::error::ErrorKind; 235 | use crate::string_record::StringRecord; 236 | 237 | use super::{AsyncWriter, AsyncWriterBuilder}; 238 | 239 | async fn wtr_as_string<'w>(wtr: AsyncWriter>) -> String { 240 | String::from_utf8(wtr.into_inner().await.unwrap()).unwrap() 241 | } 242 | 243 | #[async_std::test] 244 | async fn one_record() { 245 | let mut wtr = AsyncWriter::from_writer(vec![]); 246 | wtr.write_record(&["a", "b", "c"]).await.unwrap(); 247 | 248 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 249 | } 250 | 251 | #[async_std::test] 252 | async fn one_string_record() { 253 | let mut wtr = AsyncWriter::from_writer(vec![]); 254 | wtr.write_record(&StringRecord::from(vec!["a", "b", "c"])).await.unwrap(); 255 | 256 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 257 | } 258 | 259 | #[async_std::test] 260 | async fn one_byte_record() { 261 | let mut wtr = AsyncWriter::from_writer(vec![]); 262 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 263 | 264 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 265 | } 266 | 267 | #[async_std::test] 268 | async fn raw_one_byte_record() { 269 | let mut wtr = AsyncWriter::from_writer(vec![]); 270 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 271 | 272 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 273 | } 274 | 275 | #[async_std::test] 276 | async fn one_empty_record() { 277 | let mut wtr = AsyncWriter::from_writer(vec![]); 278 | wtr.write_record(&[""]).await.unwrap(); 279 | 280 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n"); 281 | } 282 | 283 | #[async_std::test] 284 | async fn raw_one_empty_record() { 285 | let mut wtr = AsyncWriter::from_writer(vec![]); 286 | wtr.write_byte_record(&ByteRecord::from(vec![""])).await.unwrap(); 287 | 288 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n"); 289 | } 290 | 291 | #[async_std::test] 292 | async fn two_empty_records() { 293 | let mut wtr = AsyncWriter::from_writer(vec![]); 294 | wtr.write_record(&[""]).await.unwrap(); 295 | wtr.write_record(&[""]).await.unwrap(); 296 | 297 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n\"\"\n"); 298 | } 299 | 300 | #[async_std::test] 301 | async fn raw_two_empty_records() { 302 | let mut wtr = AsyncWriter::from_writer(vec![]); 303 | wtr.write_byte_record(&ByteRecord::from(vec![""])).await.unwrap(); 304 | wtr.write_byte_record(&ByteRecord::from(vec![""])).await.unwrap(); 305 | 306 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n\"\"\n"); 307 | } 308 | 309 | #[async_std::test] 310 | async fn unequal_records_bad() { 311 | let mut wtr = AsyncWriter::from_writer(vec![]); 312 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 313 | let err = wtr.write_record(&ByteRecord::from(vec!["a"])).await.unwrap_err(); 314 | match *err.kind() { 315 | ErrorKind::UnequalLengths { ref pos, expected_len, len } => { 316 | assert!(pos.is_none()); 317 | assert_eq!(expected_len, 3); 318 | assert_eq!(len, 1); 319 | } 320 | ref x => { 321 | panic!("expected UnequalLengths error, but got '{:?}'", x); 322 | } 323 | } 324 | } 325 | 326 | #[async_std::test] 327 | async fn raw_unequal_records_bad() { 328 | let mut wtr = AsyncWriter::from_writer(vec![]); 329 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 330 | let err = 331 | wtr.write_byte_record(&ByteRecord::from(vec!["a"])).await.unwrap_err(); 332 | match *err.kind() { 333 | ErrorKind::UnequalLengths { ref pos, expected_len, len } => { 334 | assert!(pos.is_none()); 335 | assert_eq!(expected_len, 3); 336 | assert_eq!(len, 1); 337 | } 338 | ref x => { 339 | panic!("expected UnequalLengths error, but got '{:?}'", x); 340 | } 341 | } 342 | } 343 | 344 | #[async_std::test] 345 | async fn unequal_records_ok() { 346 | let mut wtr = AsyncWriterBuilder::new().flexible(true).create_writer(vec![]); 347 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 348 | wtr.write_record(&ByteRecord::from(vec!["a"])).await.unwrap(); 349 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\na\n"); 350 | } 351 | 352 | #[async_std::test] 353 | async fn raw_unequal_records_ok() { 354 | let mut wtr = AsyncWriterBuilder::new().flexible(true).create_writer(vec![]); 355 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 356 | wtr.write_byte_record(&ByteRecord::from(vec!["a"])).await.unwrap(); 357 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\na\n"); 358 | } 359 | 360 | #[async_std::test] 361 | async fn full_buffer_should_not_flush_underlying() { 362 | #[derive(Debug)] 363 | struct MarkWriteAndFlush(Vec); 364 | 365 | impl MarkWriteAndFlush { 366 | fn to_str(self) -> String { 367 | String::from_utf8(self.0).unwrap() 368 | } 369 | } 370 | 371 | impl io::AsyncWrite for MarkWriteAndFlush { 372 | fn poll_write( 373 | mut self: Pin<&mut Self>, 374 | _: &mut Context, 375 | buf: &[u8] 376 | ) -> Poll> { 377 | use std::io::Write; 378 | self.0.write(b">").unwrap(); 379 | let written = self.0.write(buf).unwrap(); 380 | assert_eq!(written, buf.len()); 381 | self.0.write(b"<").unwrap(); 382 | // AsyncWriteExt::write_all panics if write returns more than buf.len() 383 | // Poll::Ready(Ok(written + 2)) 384 | Poll::Ready(Ok(written)) 385 | } 386 | 387 | fn poll_flush(mut self: Pin<&mut Self>, _: &mut Context) -> Poll> { 388 | use std::io::Write; 389 | self.0.write(b"!").unwrap(); 390 | Poll::Ready(Ok(())) 391 | } 392 | 393 | fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 394 | self.poll_flush(cx) 395 | } 396 | } 397 | 398 | let underlying = MarkWriteAndFlush(vec![]); 399 | let mut wtr = 400 | AsyncWriterBuilder::new().buffer_capacity(4).create_writer(underlying); 401 | 402 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b"])).await.unwrap(); 403 | wtr.write_byte_record(&ByteRecord::from(vec!["c", "d"])).await.unwrap(); 404 | wtr.flush().await.unwrap(); 405 | wtr.write_byte_record(&ByteRecord::from(vec!["e", "f"])).await.unwrap(); 406 | 407 | let got = wtr.into_inner().await.unwrap().to_str(); 408 | 409 | // As the buffer size is 4 we should write each record separately, and 410 | // flush when explicitly called and implictly in into_inner. 411 | assert_eq!(got, ">a,b\n<>c,d\ne,f\n Result<(), Box> { 24 | /// let mut wtr = AsyncWriterBuilder::new().create_writer(vec![]); 25 | /// wtr.write_record(&["a", "b", "c"]).await?; 26 | /// wtr.write_record(&["x", "y", "z"]).await?; 27 | /// 28 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 29 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 30 | /// Ok(()) 31 | /// } 32 | /// ``` 33 | pub fn create_writer(&self, wtr: W) -> AsyncWriter { 34 | AsyncWriter::new(self, wtr) 35 | } 36 | } 37 | 38 | /// An already configured CSV writer for `tokio` runtime. 39 | /// 40 | /// A CSV writer takes as input Rust values and writes those values in a valid 41 | /// CSV format as output. 42 | /// 43 | /// While CSV writing is considerably easier than parsing CSV, a proper writer 44 | /// will do a number of things for you: 45 | /// 46 | /// 1. Quote fields when necessary. 47 | /// 2. Check that all records have the same number of fields. 48 | /// 3. Write records with a single empty field correctly. 49 | /// 4. Use buffering intelligently and otherwise avoid allocation. (This means 50 | /// that callers should not do their own buffering.) 51 | /// 52 | /// All of the above can be configured using a 53 | /// [`AsyncWriterBuilder`](struct.AsyncWriterBuilder.html). 54 | /// However, a `AsyncWriter` has convenient constructor (from_writer`) 55 | /// that use the default configuration. 56 | /// 57 | /// Note that the default configuration of a `AsyncWriter` uses `\n` for record 58 | /// terminators instead of `\r\n` as specified by RFC 4180. Use the 59 | /// `terminator` method on `AsyncWriterBuilder` to set the terminator to `\r\n` if 60 | /// it's desired. 61 | #[derive(Debug)] 62 | pub struct AsyncWriter(AsyncWriterImpl); 63 | 64 | impl AsyncWriter { 65 | fn new(builder: &AsyncWriterBuilder, wtr: W) -> AsyncWriter { 66 | AsyncWriter(AsyncWriterImpl::new(builder, wtr)) 67 | } 68 | 69 | /// Build a CSV writer with a default configuration that writes data to 70 | /// `wtr`. 71 | /// 72 | /// Note that the CSV writer is buffered automatically, so you should not 73 | /// wrap `wtr` in a buffered writer. 74 | /// 75 | /// # Example 76 | /// 77 | /// ``` 78 | /// use std::error::Error; 79 | /// use csv_async::AsyncWriter; 80 | /// 81 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 82 | /// async fn example() -> Result<(), Box> { 83 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 84 | /// wtr.write_record(&["a", "b", "c"]).await?; 85 | /// wtr.write_record(&["x", "y", "z"]).await?; 86 | /// 87 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 88 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 89 | /// Ok(()) 90 | /// } 91 | /// ``` 92 | pub fn from_writer(wtr: W) -> AsyncWriter { 93 | AsyncWriterBuilder::new().create_writer(wtr) 94 | } 95 | 96 | /// Write a single record. 97 | /// 98 | /// This method accepts something that can be turned into an iterator that 99 | /// yields elements that can be represented by a `&[u8]`. 100 | /// 101 | /// This may be called with an empty iterator, which will cause a record 102 | /// terminator to be written. If no fields had been written, then a single 103 | /// empty field is written before the terminator. 104 | /// 105 | /// # Example 106 | /// 107 | /// ``` 108 | /// use std::error::Error; 109 | /// use csv_async::AsyncWriter; 110 | /// 111 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 112 | /// async fn example() -> Result<(), Box> { 113 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 114 | /// wtr.write_record(&["a", "b", "c"]).await?; 115 | /// wtr.write_record(&["x", "y", "z"]).await?; 116 | /// 117 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 118 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 119 | /// Ok(()) 120 | /// } 121 | /// ``` 122 | #[inline] 123 | pub async fn write_record(&mut self, record: I) -> Result<()> 124 | where 125 | I: IntoIterator, 126 | T: AsRef<[u8]>, 127 | { 128 | self.0.write_record(record).await 129 | } 130 | 131 | /// Write a single `ByteRecord`. 132 | /// 133 | /// This method accepts a borrowed `ByteRecord` and writes its contents 134 | /// to the underlying writer. 135 | /// 136 | /// This is similar to `write_record` except that it specifically requires 137 | /// a `ByteRecord`. This permits the writer to possibly write the record 138 | /// more quickly than the more generic `write_record`. 139 | /// 140 | /// This may be called with an empty record, which will cause a record 141 | /// terminator to be written. If no fields had been written, then a single 142 | /// empty field is written before the terminator. 143 | /// 144 | /// # Example 145 | /// 146 | /// ``` 147 | /// use std::error::Error; 148 | /// use csv_async::{ByteRecord, AsyncWriter}; 149 | /// 150 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 151 | /// async fn example() -> Result<(), Box> { 152 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 153 | /// wtr.write_byte_record(&ByteRecord::from(&["a", "b", "c"][..])).await?; 154 | /// wtr.write_byte_record(&ByteRecord::from(&["x", "y", "z"][..])).await?; 155 | /// 156 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 157 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 158 | /// Ok(()) 159 | /// } 160 | /// ``` 161 | #[inline] 162 | pub async fn write_byte_record(&mut self, record: &ByteRecord) -> Result<()> { 163 | self.0.write_byte_record(record).await 164 | } 165 | 166 | /// Write a single field. 167 | /// 168 | /// One should prefer using `write_record` over this method. It is provided 169 | /// for cases where writing a field at a time is more convenient than 170 | /// writing a record at a time. 171 | /// 172 | /// Note that if this API is used, `write_record` should be called with an 173 | /// empty iterator to write a record terminator. 174 | /// 175 | /// # Example 176 | /// 177 | /// ``` 178 | /// use std::error::Error; 179 | /// use csv_async::AsyncWriter; 180 | /// 181 | /// # fn main() { tokio1::runtime::Runtime::new().unwrap().block_on(async {example().await.unwrap()}); } 182 | /// async fn example() -> Result<(), Box> { 183 | /// let mut wtr = AsyncWriter::from_writer(vec![]); 184 | /// wtr.write_field("a").await?; 185 | /// wtr.write_field("b").await?; 186 | /// wtr.write_field("c").await?; 187 | /// wtr.write_record(None::<&[u8]>).await?; 188 | /// wtr.write_field("x").await?; 189 | /// wtr.write_field("y").await?; 190 | /// wtr.write_field("z").await?; 191 | /// wtr.write_record(None::<&[u8]>).await?; 192 | /// 193 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 194 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 195 | /// Ok(()) 196 | /// } 197 | /// ``` 198 | #[inline] 199 | pub async fn write_field>(&mut self, field: T) -> Result<()> { 200 | self.0.write_field(field).await 201 | } 202 | 203 | /// Flush the contents of the internal buffer to the underlying writer. 204 | /// 205 | /// If there was a problem writing to the underlying writer, then an error 206 | /// is returned. 207 | /// 208 | /// This finction is also called by writer destructor. 209 | #[inline] 210 | pub async fn flush(&mut self) -> io::Result<()> { 211 | self.0.flush().await 212 | } 213 | 214 | /// Flush the contents of the internal buffer and return the underlying writer. 215 | /// 216 | pub async fn into_inner( 217 | self, 218 | ) -> result::Result { 219 | match self.0.into_inner().await { 220 | Ok(w) => Ok(w), 221 | Err(err) => Err(err.into_error()), 222 | } 223 | } 224 | } 225 | 226 | #[cfg(test)] 227 | mod tests { 228 | use std::pin::Pin; 229 | use std::task::{Context, Poll}; 230 | 231 | use tokio::io; 232 | 233 | use crate::byte_record::ByteRecord; 234 | use crate::error::ErrorKind; 235 | use crate::string_record::StringRecord; 236 | 237 | use super::{AsyncWriter, AsyncWriterBuilder}; 238 | 239 | async fn wtr_as_string<'w>(wtr: AsyncWriter>) -> String { 240 | String::from_utf8(wtr.into_inner().await.unwrap()).unwrap() 241 | } 242 | 243 | #[tokio::test] 244 | async fn one_record() { 245 | let mut wtr = AsyncWriter::from_writer(vec![]); 246 | wtr.write_record(&["a", "b", "c"]).await.unwrap(); 247 | 248 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 249 | } 250 | 251 | #[tokio::test] 252 | async fn one_string_record() { 253 | let mut wtr = AsyncWriter::from_writer(vec![]); 254 | wtr.write_record(&StringRecord::from(vec!["a", "b", "c"])).await.unwrap(); 255 | 256 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 257 | } 258 | 259 | #[tokio::test] 260 | async fn one_byte_record() { 261 | let mut wtr = AsyncWriter::from_writer(vec![]); 262 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 263 | 264 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 265 | } 266 | 267 | #[tokio::test] 268 | async fn raw_one_byte_record() { 269 | let mut wtr = AsyncWriter::from_writer(vec![]); 270 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 271 | 272 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\n"); 273 | } 274 | 275 | #[tokio::test] 276 | async fn one_empty_record() { 277 | let mut wtr = AsyncWriter::from_writer(vec![]); 278 | wtr.write_record(&[""]).await.unwrap(); 279 | 280 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n"); 281 | } 282 | 283 | #[tokio::test] 284 | async fn raw_one_empty_record() { 285 | let mut wtr = AsyncWriter::from_writer(vec![]); 286 | wtr.write_byte_record(&ByteRecord::from(vec![""])).await.unwrap(); 287 | 288 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n"); 289 | } 290 | 291 | #[tokio::test] 292 | async fn two_empty_records() { 293 | let mut wtr = AsyncWriter::from_writer(vec![]); 294 | wtr.write_record(&[""]).await.unwrap(); 295 | wtr.write_record(&[""]).await.unwrap(); 296 | 297 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n\"\"\n"); 298 | } 299 | 300 | #[tokio::test] 301 | async fn raw_two_empty_records() { 302 | let mut wtr = AsyncWriter::from_writer(vec![]); 303 | wtr.write_byte_record(&ByteRecord::from(vec![""])).await.unwrap(); 304 | wtr.write_byte_record(&ByteRecord::from(vec![""])).await.unwrap(); 305 | 306 | assert_eq!(wtr_as_string(wtr).await, "\"\"\n\"\"\n"); 307 | } 308 | 309 | #[tokio::test] 310 | async fn unequal_records_bad() { 311 | let mut wtr = AsyncWriter::from_writer(vec![]); 312 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 313 | let err = wtr.write_record(&ByteRecord::from(vec!["a"])).await.unwrap_err(); 314 | match *err.kind() { 315 | ErrorKind::UnequalLengths { ref pos, expected_len, len } => { 316 | assert!(pos.is_none()); 317 | assert_eq!(expected_len, 3); 318 | assert_eq!(len, 1); 319 | } 320 | ref x => { 321 | panic!("expected UnequalLengths error, but got '{:?}'", x); 322 | } 323 | } 324 | } 325 | 326 | #[tokio::test] 327 | async fn raw_unequal_records_bad() { 328 | let mut wtr = AsyncWriter::from_writer(vec![]); 329 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 330 | let err = 331 | wtr.write_byte_record(&ByteRecord::from(vec!["a"])).await.unwrap_err(); 332 | match *err.kind() { 333 | ErrorKind::UnequalLengths { ref pos, expected_len, len } => { 334 | assert!(pos.is_none()); 335 | assert_eq!(expected_len, 3); 336 | assert_eq!(len, 1); 337 | } 338 | ref x => { 339 | panic!("expected UnequalLengths error, but got '{:?}'", x); 340 | } 341 | } 342 | } 343 | 344 | #[tokio::test] 345 | async fn unequal_records_ok() { 346 | let mut wtr = AsyncWriterBuilder::new().flexible(true).create_writer(vec![]); 347 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 348 | wtr.write_record(&ByteRecord::from(vec!["a"])).await.unwrap(); 349 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\na\n"); 350 | } 351 | 352 | #[tokio::test] 353 | async fn raw_unequal_records_ok() { 354 | let mut wtr = AsyncWriterBuilder::new().flexible(true).create_writer(vec![]); 355 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b", "c"])).await.unwrap(); 356 | wtr.write_byte_record(&ByteRecord::from(vec!["a"])).await.unwrap(); 357 | assert_eq!(wtr_as_string(wtr).await, "a,b,c\na\n"); 358 | } 359 | 360 | #[tokio::test] 361 | async fn full_buffer_should_not_flush_underlying() { 362 | #[derive(Debug)] 363 | struct MarkWriteAndFlush(Vec); 364 | 365 | impl MarkWriteAndFlush { 366 | fn to_str(self) -> String { 367 | String::from_utf8(self.0).unwrap() 368 | } 369 | } 370 | 371 | impl io::AsyncWrite for MarkWriteAndFlush { 372 | fn poll_write( 373 | mut self: Pin<&mut Self>, 374 | _: &mut Context, 375 | buf: &[u8] 376 | ) -> Poll> { 377 | use std::io::Write; 378 | self.0.write(b">").unwrap(); 379 | let written = self.0.write(buf).unwrap(); 380 | assert_eq!(written, buf.len()); 381 | self.0.write(b"<").unwrap(); 382 | // AsyncWriteExt::write_all panics if write returns more than buf.len() 383 | // Poll::Ready(Ok(written + 2)) 384 | Poll::Ready(Ok(written)) 385 | } 386 | 387 | fn poll_flush(mut self: Pin<&mut Self>, _: &mut Context) -> Poll> { 388 | use std::io::Write; 389 | self.0.write(b"!").unwrap(); 390 | Poll::Ready(Ok(())) 391 | } 392 | 393 | fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { 394 | self.poll_flush(cx) 395 | } 396 | } 397 | 398 | let underlying = MarkWriteAndFlush(vec![]); 399 | let mut wtr = 400 | AsyncWriterBuilder::new().buffer_capacity(4).create_writer(underlying); 401 | 402 | wtr.write_byte_record(&ByteRecord::from(vec!["a", "b"])).await.unwrap(); 403 | wtr.write_byte_record(&ByteRecord::from(vec!["c", "d"])).await.unwrap(); 404 | wtr.flush().await.unwrap(); 405 | wtr.write_byte_record(&ByteRecord::from(vec!["e", "f"])).await.unwrap(); 406 | 407 | let got = wtr.into_inner().await.unwrap().to_str(); 408 | 409 | // As the buffer size is 4 we should write each record separately, and 410 | // flush when explicitly called and implictly in into_inner. 411 | assert_eq!(got, ">a,b\n<>c,d\ne,f\n AsyncWriterBuilder { 52 | AsyncWriterBuilder { 53 | builder: CoreWriterBuilder::default(), 54 | capacity: 8 * (1 << 10), 55 | flexible: false, 56 | has_headers: true, 57 | } 58 | } 59 | } 60 | 61 | impl AsyncWriterBuilder { 62 | /// Create a new builder for configuring CSV writing. 63 | /// 64 | /// To convert a builder into a writer, call one of the methods starting 65 | /// with `from_`. 66 | /// 67 | /// # Example 68 | /// 69 | /// ``` 70 | /// use std::error::Error; 71 | /// use csv_async::AsyncWriterBuilder; 72 | /// 73 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 74 | /// async fn example() -> Result<(), Box> { 75 | /// let mut wtr = AsyncWriterBuilder::new().create_writer(vec![]); 76 | /// wtr.write_record(&["a", "b", "c"]).await?; 77 | /// wtr.write_record(&["x", "y", "z"]).await?; 78 | /// 79 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 80 | /// assert_eq!(data, "a,b,c\nx,y,z\n"); 81 | /// Ok(()) 82 | /// } 83 | /// ``` 84 | pub fn new() -> AsyncWriterBuilder { 85 | AsyncWriterBuilder::default() 86 | } 87 | 88 | /// The field delimiter to use when writing CSV. 89 | /// 90 | /// The default is `b','`. 91 | /// 92 | /// # Example 93 | /// 94 | /// ``` 95 | /// use std::error::Error; 96 | /// use csv_async::AsyncWriterBuilder; 97 | /// 98 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 99 | /// async fn example() -> Result<(), Box> { 100 | /// let mut wtr = AsyncWriterBuilder::new() 101 | /// .delimiter(b';') 102 | /// .create_writer(vec![]); 103 | /// wtr.write_record(&["a", "b", "c"]).await?; 104 | /// wtr.write_record(&["x", "y", "z"]).await?; 105 | /// 106 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 107 | /// assert_eq!(data, "a;b;c\nx;y;z\n"); 108 | /// Ok(()) 109 | /// } 110 | /// ``` 111 | pub fn delimiter(&mut self, delimiter: u8) -> &mut AsyncWriterBuilder { 112 | self.builder.delimiter(delimiter); 113 | self 114 | } 115 | /// Whether to write a header row before writing any other row. 116 | /// 117 | /// When this is enabled and the `serialize` method is used to write data 118 | /// with something that contains field names (i.e., a struct), then a 119 | /// header row is written containing the field names before any other row 120 | /// is written. 121 | /// 122 | /// This option has no effect when using other methods to write rows. That 123 | /// is, if you don't use `serialize`, then you must write your header row 124 | /// explicitly if you want a header row. 125 | /// 126 | /// This is enabled by default. 127 | /// 128 | // / # Example: with headers 129 | // / 130 | // / This shows how the header will be automatically written from the field 131 | // / names of a struct. 132 | // / 133 | // / ``` 134 | // / use std::error::Error; 135 | // / 136 | // / use csv::WriterBuilder; 137 | // / use serde::Serialize; 138 | // / 139 | // / #[derive(Serialize)] 140 | // / struct Row<'a> { 141 | // / city: &'a str, 142 | // / country: &'a str, 143 | // / // Serde allows us to name our headers exactly, 144 | // / // even if they don't match our struct field names. 145 | // / #[serde(rename = "popcount")] 146 | // / population: u64, 147 | // / } 148 | // / 149 | // / # fn main() { example().unwrap(); } 150 | // / fn example() -> Result<(), Box> { 151 | // / let mut wtr = WriterBuilder::new().from_writer(vec![]); 152 | // / wtr.serialize(Row { 153 | // / city: "Boston", 154 | // / country: "United States", 155 | // / population: 4628910, 156 | // / })?; 157 | // / wtr.serialize(Row { 158 | // / city: "Concord", 159 | // / country: "United States", 160 | // / population: 42695, 161 | // / })?; 162 | // / 163 | // / let data = String::from_utf8(wtr.into_inner()?)?; 164 | // / assert_eq!(data, "\ 165 | // / city,country,popcount 166 | // / Boston,United States,4628910 167 | // / Concord,United States,42695 168 | // / "); 169 | // / Ok(()) 170 | // / } 171 | // / ``` 172 | // / 173 | // / # Example: without headers 174 | // / 175 | // / This shows that serializing things that aren't structs (in this case, 176 | // / a tuple struct) won't result in a header row being written. This means 177 | // / you usually don't need to set `has_headers(false)` unless you 178 | // / explicitly want to both write custom headers and serialize structs. 179 | // / 180 | // / ``` 181 | // / use std::error::Error; 182 | // / use csv::WriterBuilder; 183 | // / 184 | // / # fn main() { example().unwrap(); } 185 | // / fn example() -> Result<(), Box> { 186 | // / let mut wtr = WriterBuilder::new().from_writer(vec![]); 187 | // / wtr.serialize(("Boston", "United States", 4628910))?; 188 | // / wtr.serialize(("Concord", "United States", 42695))?; 189 | // / 190 | // / let data = String::from_utf8(wtr.into_inner()?)?; 191 | // / assert_eq!(data, "\ 192 | // / Boston,United States,4628910 193 | // / Concord,United States,42695 194 | // / "); 195 | // / Ok(()) 196 | // / } 197 | // / ``` 198 | pub fn has_headers(&mut self, yes: bool) -> &mut AsyncWriterBuilder { 199 | self.has_headers = yes; 200 | self 201 | } 202 | 203 | /// Whether the number of fields in records is allowed to change or not. 204 | /// 205 | /// When disabled (which is the default), writing CSV data will return an 206 | /// error if a record is written with a number of fields different from the 207 | /// number of fields written in a previous record. 208 | /// 209 | /// When enabled, this error checking is turned off. 210 | /// 211 | /// # Example: writing flexible records 212 | /// 213 | /// ``` 214 | /// use std::error::Error; 215 | /// use csv_async::AsyncWriterBuilder; 216 | /// 217 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 218 | /// async fn example() -> Result<(), Box> { 219 | /// let mut wtr = AsyncWriterBuilder::new() 220 | /// .flexible(true) 221 | /// .create_writer(vec![]); 222 | /// wtr.write_record(&["a", "b"]).await?; 223 | /// wtr.write_record(&["x", "y", "z"]).await?; 224 | /// 225 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 226 | /// assert_eq!(data, "a,b\nx,y,z\n"); 227 | /// Ok(()) 228 | /// } 229 | /// ``` 230 | /// 231 | /// # Example: error when `flexible` is disabled 232 | /// 233 | /// ``` 234 | /// use std::error::Error; 235 | /// use csv_async::AsyncWriterBuilder; 236 | /// 237 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 238 | /// async fn example() -> Result<(), Box> { 239 | /// let mut wtr = AsyncWriterBuilder::new() 240 | /// .flexible(false) 241 | /// .create_writer(vec![]); 242 | /// wtr.write_record(&["a", "b"]).await?; 243 | /// let err = wtr.write_record(&["x", "y", "z"]).await.unwrap_err(); 244 | /// match *err.kind() { 245 | /// csv_async::ErrorKind::UnequalLengths { expected_len, len, .. } => { 246 | /// assert_eq!(expected_len, 2); 247 | /// assert_eq!(len, 3); 248 | /// } 249 | /// ref wrong => { 250 | /// panic!("expected UnequalLengths but got {:?}", wrong); 251 | /// } 252 | /// } 253 | /// Ok(()) 254 | /// } 255 | /// ``` 256 | pub fn flexible(&mut self, yes: bool) -> &mut AsyncWriterBuilder { 257 | self.flexible = yes; 258 | self 259 | } 260 | 261 | /// The record terminator to use when writing CSV. 262 | /// 263 | /// A record terminator can be any single byte. The default is `\n`. 264 | /// 265 | /// Note that RFC 4180 specifies that record terminators should be `\r\n`. 266 | /// To use `\r\n`, use the special `Terminator::CRLF` value. 267 | /// 268 | /// # Example: CRLF 269 | /// 270 | /// This shows how to use RFC 4180 compliant record terminators. 271 | /// 272 | /// ``` 273 | /// use std::error::Error; 274 | /// use csv_async::{Terminator, AsyncWriterBuilder}; 275 | /// 276 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 277 | /// async fn example() -> Result<(), Box> { 278 | /// let mut wtr = AsyncWriterBuilder::new() 279 | /// .terminator(Terminator::CRLF) 280 | /// .create_writer(vec![]); 281 | /// wtr.write_record(&["a", "b", "c"]).await?; 282 | /// wtr.write_record(&["x", "y", "z"]).await?; 283 | /// 284 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 285 | /// assert_eq!(data, "a,b,c\r\nx,y,z\r\n"); 286 | /// Ok(()) 287 | /// } 288 | /// ``` 289 | pub fn terminator(&mut self, term: Terminator) -> &mut AsyncWriterBuilder { 290 | self.builder.terminator(term.to_core()); 291 | self 292 | } 293 | 294 | /// The quoting style to use when writing CSV. 295 | /// 296 | /// By default, this is set to `QuoteStyle::Necessary`, which will only 297 | /// use quotes when they are necessary to preserve the integrity of data. 298 | /// 299 | /// Note that unless the quote style is set to `Never`, an empty field is 300 | /// quoted if it is the only field in a record. 301 | /// 302 | /// # Example: non-numeric quoting 303 | /// 304 | /// This shows how to quote non-numeric fields only. 305 | /// 306 | /// ``` 307 | /// use std::error::Error; 308 | /// use csv_async::{QuoteStyle, AsyncWriterBuilder}; 309 | /// 310 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 311 | /// async fn example() -> Result<(), Box> { 312 | /// let mut wtr = AsyncWriterBuilder::new() 313 | /// .quote_style(QuoteStyle::NonNumeric) 314 | /// .create_writer(vec![]); 315 | /// wtr.write_record(&["a", "5", "c"]).await?; 316 | /// wtr.write_record(&["3.14", "y", "z"]).await?; 317 | /// 318 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 319 | /// assert_eq!(data, "\"a\",5,\"c\"\n3.14,\"y\",\"z\"\n"); 320 | /// Ok(()) 321 | /// } 322 | /// ``` 323 | /// 324 | /// # Example: never quote 325 | /// 326 | /// This shows how the CSV writer can be made to never write quotes, even 327 | /// if it sacrifices the integrity of the data. 328 | /// 329 | /// ``` 330 | /// use std::error::Error; 331 | /// use csv_async::{QuoteStyle, AsyncWriterBuilder}; 332 | /// 333 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 334 | /// async fn example() -> Result<(), Box> { 335 | /// let mut wtr = AsyncWriterBuilder::new() 336 | /// .quote_style(QuoteStyle::Never) 337 | /// .create_writer(vec![]); 338 | /// wtr.write_record(&["a", "foo\nbar", "c"]).await?; 339 | /// wtr.write_record(&["g\"h\"i", "y", "z"]).await?; 340 | /// 341 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 342 | /// assert_eq!(data, "a,foo\nbar,c\ng\"h\"i,y,z\n"); 343 | /// Ok(()) 344 | /// } 345 | /// ``` 346 | pub fn quote_style(&mut self, style: QuoteStyle) -> &mut AsyncWriterBuilder { 347 | self.builder.quote_style(style.to_core()); 348 | self 349 | } 350 | 351 | /// The quote character to use when writing CSV. 352 | /// 353 | /// The default is `b'"'`. 354 | /// 355 | /// # Example 356 | /// 357 | /// ``` 358 | /// use std::error::Error; 359 | /// use csv_async::AsyncWriterBuilder; 360 | /// 361 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 362 | /// async fn example() -> Result<(), Box> { 363 | /// let mut wtr = AsyncWriterBuilder::new() 364 | /// .quote(b'\'') 365 | /// .create_writer(vec![]); 366 | /// wtr.write_record(&["a", "foo\nbar", "c"]).await?; 367 | /// wtr.write_record(&["g'h'i", "y\"y\"y", "z"]).await?; 368 | /// 369 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 370 | /// assert_eq!(data, "a,'foo\nbar',c\n'g''h''i',y\"y\"y,z\n"); 371 | /// Ok(()) 372 | /// } 373 | /// ``` 374 | pub fn quote(&mut self, quote: u8) -> &mut AsyncWriterBuilder { 375 | self.builder.quote(quote); 376 | self 377 | } 378 | 379 | /// Enable double quote escapes. 380 | /// 381 | /// This is enabled by default, but it may be disabled. When disabled, 382 | /// quotes in field data are escaped instead of doubled. 383 | /// 384 | /// # Example 385 | /// 386 | /// ``` 387 | /// use std::error::Error; 388 | /// use csv_async::AsyncWriterBuilder; 389 | /// 390 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 391 | /// async fn example() -> Result<(), Box> { 392 | /// let mut wtr = AsyncWriterBuilder::new() 393 | /// .double_quote(false) 394 | /// .create_writer(vec![]); 395 | /// wtr.write_record(&["a", "foo\"bar", "c"]).await?; 396 | /// wtr.write_record(&["x", "y", "z"]).await?; 397 | /// 398 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 399 | /// assert_eq!(data, "a,\"foo\\\"bar\",c\nx,y,z\n"); 400 | /// Ok(()) 401 | /// } 402 | /// ``` 403 | pub fn double_quote(&mut self, yes: bool) -> &mut AsyncWriterBuilder { 404 | self.builder.double_quote(yes); 405 | self 406 | } 407 | 408 | /// The escape character to use when writing CSV. 409 | /// 410 | /// In some variants of CSV, quotes are escaped using a special escape 411 | /// character like `\` (instead of escaping quotes by doubling them). 412 | /// 413 | /// By default, writing these idiosyncratic escapes is disabled, and is 414 | /// only used when `double_quote` is disabled. 415 | /// 416 | /// # Example 417 | /// 418 | /// ``` 419 | /// use std::error::Error; 420 | /// use csv_async::AsyncWriterBuilder; 421 | /// 422 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 423 | /// async fn example() -> Result<(), Box> { 424 | /// let mut wtr = AsyncWriterBuilder::new() 425 | /// .double_quote(false) 426 | /// .escape(b'$') 427 | /// .create_writer(vec![]); 428 | /// wtr.write_record(&["a", "foo\"bar", "c"]).await?; 429 | /// wtr.write_record(&["x", "y", "z"]).await?; 430 | /// 431 | /// let data = String::from_utf8(wtr.into_inner().await?)?; 432 | /// assert_eq!(data, "a,\"foo$\"bar\",c\nx,y,z\n"); 433 | /// Ok(()) 434 | /// } 435 | /// ``` 436 | pub fn escape(&mut self, escape: u8) -> &mut AsyncWriterBuilder { 437 | self.builder.escape(escape); 438 | self 439 | } 440 | 441 | /// Use this when you are going to set comment for reader used to read saved file. 442 | /// 443 | /// If `quote_style` is set to `QuoteStyle::Necessary`, a field will 444 | /// be quoted if the comment character is detected anywhere in the field. 445 | /// 446 | /// The default value is None. 447 | /// 448 | /// # Example 449 | /// 450 | /// ``` 451 | /// use std::error::Error; 452 | /// use csv_async::AsyncWriterBuilder; 453 | /// 454 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 455 | /// async fn example() -> Result<(), Box> { 456 | /// let mut wtr = 457 | /// AsyncWriterBuilder::new().comment(Some(b'#')).create_writer(Vec::new()); 458 | /// wtr.write_record(&["# comment", "another"]).await?; 459 | /// let buf = wtr.into_inner().await?; 460 | /// assert_eq!(String::from_utf8(buf).unwrap(), "\"# comment\",another\n"); 461 | /// Ok(()) 462 | /// } 463 | /// ``` 464 | pub fn comment(&mut self, comment: Option) -> &mut AsyncWriterBuilder { 465 | self.builder.comment(comment); 466 | self 467 | } 468 | 469 | /// Set the capacity (in bytes) of the internal buffer used in the CSV 470 | /// writer. This defaults to a reasonable setting. 471 | pub fn buffer_capacity(&mut self, capacity: usize) -> &mut AsyncWriterBuilder { 472 | self.capacity = capacity; 473 | self 474 | } 475 | } 476 | 477 | //-////////////////////////////////////////////////////////////////////////////////////////////// 478 | //-// Writer 479 | //-////////////////////////////////////////////////////////////////////////////////////////////// 480 | 481 | #[derive(Debug)] 482 | struct WriterState { 483 | /// Whether inconsistent record lengths are allowed. 484 | flexible: bool, 485 | /// The number of fields writtein in the first record. This is compared 486 | /// with `fields_written` on all subsequent records to check for 487 | /// inconsistent record lengths. 488 | first_field_count: Option, 489 | /// The number of fields written in this record. This is used to report 490 | /// errors for inconsistent record lengths if `flexible` is disabled. 491 | fields_written: u64, 492 | /// This is set immediately before flushing the buffer and then unset 493 | /// immediately after flushing the buffer. This avoids flushing the buffer 494 | /// twice if the inner writer panics. 495 | panicked: bool, 496 | } 497 | 498 | /// A simple internal buffer for buffering writes. 499 | /// 500 | /// We need this because the `csv_core` APIs want to write into a `&mut [u8]`, 501 | /// which is not available with the `std::io::BufWriter` API. 502 | #[derive(Debug)] 503 | struct Buffer { 504 | /// The contents of the buffer. 505 | buf: Vec, 506 | /// The number of bytes written to the buffer. 507 | len: usize, 508 | } 509 | 510 | impl Buffer { 511 | /// Returns a slice of the buffer's current contents. 512 | /// 513 | /// The slice returned may be empty. 514 | #[inline] 515 | fn readable(&self) -> &[u8] { 516 | &self.buf[..self.len] 517 | } 518 | 519 | /// Returns a mutable slice of the remaining space in this buffer. 520 | /// 521 | /// The slice returned may be empty. 522 | #[inline] 523 | fn writable(&mut self) -> &mut [u8] { 524 | &mut self.buf[self.len..] 525 | } 526 | 527 | /// Indicates that `n` bytes have been written to this buffer. 528 | #[inline] 529 | fn written(&mut self, n: usize) { 530 | self.len += n; 531 | } 532 | 533 | /// Clear the buffer. 534 | #[inline] 535 | fn clear(&mut self) { 536 | self.len = 0; 537 | } 538 | } 539 | 540 | /// CSV async writer internal implementation used by both record writer and serializer. 541 | /// 542 | #[derive(Debug)] 543 | pub struct AsyncWriterImpl { 544 | core: CoreWriter, 545 | wtr: Option, 546 | buf: Buffer, 547 | state: WriterState, 548 | } 549 | 550 | impl Drop for AsyncWriterImpl { 551 | fn drop(&mut self) { 552 | if self.wtr.is_some() && !self.state.panicked { 553 | // We ignore result of flush() call while dropping 554 | // Well known problem. 555 | // If you care about flush result call it explicitly 556 | // before AsyncWriter goes out of scope, 557 | // second flush() call should be no op. 558 | let _ = futures::executor::block_on(self.flush()); 559 | } 560 | } 561 | } 562 | 563 | impl AsyncWriterImpl { 564 | fn new(builder: &AsyncWriterBuilder, wtr: W) -> AsyncWriterImpl { 565 | AsyncWriterImpl { 566 | core: builder.builder.build(), 567 | wtr: Some(wtr), 568 | buf: Buffer { buf: vec![0; builder.capacity], len: 0 }, 569 | state: WriterState { 570 | flexible: builder.flexible, 571 | first_field_count: None, 572 | fields_written: 0, 573 | panicked: false, 574 | }, 575 | } 576 | } 577 | 578 | /// Write a single record. 579 | /// 580 | pub async fn write_record(&mut self, record: I) -> Result<()> 581 | where 582 | I: IntoIterator, 583 | T: AsRef<[u8]>, 584 | { 585 | for field in record.into_iter() { 586 | self.write_field_impl(field).await?; 587 | } 588 | self.write_terminator().await 589 | } 590 | 591 | /// Write a single `ByteRecord`. 592 | /// 593 | #[inline(never)] 594 | pub async fn write_byte_record(&mut self, record: &ByteRecord) -> Result<()> { 595 | if record.as_slice().is_empty() { 596 | return self.write_record(record).await; 597 | } 598 | // The idea here is to find a fast path for shuffling our record into 599 | // our buffer as quickly as possible. We do this because the underlying 600 | // "core" CSV writer does a lot of book-keeping to maintain its state 601 | // oriented API. 602 | // 603 | // The fast path occurs when we know our record will fit in whatever 604 | // space we have left in our buffer. We can actually quickly compute 605 | // the upper bound on the space required: 606 | let upper_bound = 607 | // The data itself plus the worst case: every byte is a quote. 608 | (2 * record.as_slice().len()) 609 | // The number of field delimiters. 610 | + (record.len().saturating_sub(1)) 611 | // The maximum number of quotes inserted around each field. 612 | + (2 * record.len()) 613 | // The maximum number of bytes for the terminator. 614 | + 2; 615 | if self.buf.writable().len() < upper_bound { 616 | return self.write_record(record).await; 617 | } 618 | let mut first = true; 619 | for field in record.iter() { 620 | if !first { 621 | self.buf.writable()[0] = self.core.get_delimiter(); 622 | self.buf.written(1); 623 | } 624 | first = false; 625 | 626 | if !self.core.should_quote(field) { 627 | self.buf.writable()[..field.len()].copy_from_slice(field); 628 | self.buf.written(field.len()); 629 | } else { 630 | self.buf.writable()[0] = self.core.get_quote(); 631 | self.buf.written(1); 632 | let (res, nin, nout) = csv_core::quote( 633 | field, 634 | self.buf.writable(), 635 | self.core.get_quote(), 636 | self.core.get_escape(), 637 | self.core.get_double_quote(), 638 | ); 639 | debug_assert!(res == WriteResult::InputEmpty); 640 | debug_assert!(nin == field.len()); 641 | self.buf.written(nout); 642 | self.buf.writable()[0] = self.core.get_quote(); 643 | self.buf.written(1); 644 | } 645 | } 646 | self.state.fields_written = record.len() as u64; 647 | self.write_terminator_into_buffer() 648 | } 649 | 650 | /// Write a single field. 651 | /// 652 | pub async fn write_field>(&mut self, field: T) -> Result<()> { 653 | self.write_field_impl(field).await 654 | } 655 | 656 | /// Implementation of write_field. 657 | /// 658 | /// This is a separate method so we can force the compiler to inline it 659 | /// into write_record. 660 | #[inline(always)] 661 | async fn write_field_impl>(&mut self, field: T) -> Result<()> { 662 | if self.state.fields_written > 0 { 663 | self.write_delimiter().await?; 664 | } 665 | let mut field = field.as_ref(); 666 | loop { 667 | let (res, nin, nout) = self.core.field(field, self.buf.writable()); 668 | field = &field[nin..]; 669 | self.buf.written(nout); 670 | match res { 671 | WriteResult::InputEmpty => { 672 | self.state.fields_written += 1; 673 | return Ok(()); 674 | } 675 | WriteResult::OutputFull => self.flush_buf().await?, 676 | } 677 | } 678 | } 679 | 680 | /// Flush the contents of the internal buffer to the underlying writer. 681 | /// 682 | /// If there was a problem writing to the underlying writer, then an error 683 | /// is returned. 684 | /// 685 | /// Note that this also flushes the underlying writer. 686 | pub async fn flush(&mut self) -> io::Result<()> { 687 | self.flush_buf().await?; 688 | self.wtr.as_mut().unwrap().flush().await?; 689 | Ok(()) 690 | } 691 | 692 | /// Flush the contents of the internal buffer to the underlying writer, 693 | /// without flushing the underlying writer. 694 | async fn flush_buf(&mut self) -> io::Result<()> { 695 | self.state.panicked = true; 696 | let result = self.wtr.as_mut().unwrap().write_all(self.buf.readable()).await; 697 | self.state.panicked = false; 698 | result?; 699 | self.buf.clear(); 700 | Ok(()) 701 | } 702 | 703 | /// Flush the contents of the internal buffer and return the underlying 704 | /// writer. 705 | pub async fn into_inner( 706 | mut self, 707 | ) -> result::Result>> { 708 | match self.flush().await { 709 | Ok(()) => Ok(self.wtr.take().unwrap()), 710 | Err(err) => Err(IntoInnerError::new(self, err)), 711 | } 712 | } 713 | 714 | /// Write a CSV delimiter. 715 | async fn write_delimiter(&mut self) -> Result<()> { 716 | loop { 717 | let (res, nout) = self.core.delimiter(self.buf.writable()); 718 | self.buf.written(nout); 719 | match res { 720 | WriteResult::InputEmpty => return Ok(()), 721 | WriteResult::OutputFull => self.flush_buf().await?, 722 | } 723 | } 724 | } 725 | 726 | /// Write a CSV terminator. 727 | async fn write_terminator(&mut self) -> Result<()> { 728 | self.check_field_count()?; 729 | loop { 730 | let (res, nout) = self.core.terminator(self.buf.writable()); 731 | self.buf.written(nout); 732 | match res { 733 | WriteResult::InputEmpty => { 734 | self.state.fields_written = 0; 735 | return Ok(()); 736 | } 737 | WriteResult::OutputFull => self.flush_buf().await?, 738 | } 739 | } 740 | } 741 | 742 | /// Write a CSV terminator that is guaranteed to fit into the current buffer. 743 | /// 744 | #[inline(never)] 745 | fn write_terminator_into_buffer(&mut self) -> Result<()> { 746 | self.check_field_count()?; 747 | match self.core.get_terminator() { 748 | csv_core::Terminator::CRLF => { 749 | self.buf.writable()[0] = b'\r'; 750 | self.buf.writable()[1] = b'\n'; 751 | self.buf.written(2); 752 | } 753 | csv_core::Terminator::Any(b) => { 754 | self.buf.writable()[0] = b; 755 | self.buf.written(1); 756 | } 757 | _ => unreachable!(), 758 | } 759 | self.state.fields_written = 0; 760 | Ok(()) 761 | } 762 | 763 | fn check_field_count(&mut self) -> Result<()> { 764 | if !self.state.flexible { 765 | match self.state.first_field_count { 766 | None => { 767 | self.state.first_field_count = 768 | Some(self.state.fields_written); 769 | } 770 | Some(expected) if expected != self.state.fields_written => { 771 | return Err(Error::new(ErrorKind::UnequalLengths { 772 | pos: None, 773 | expected_len: expected, 774 | len: self.state.fields_written, 775 | })) 776 | } 777 | Some(_) => {} 778 | } 779 | } 780 | Ok(()) 781 | } 782 | } 783 | -------------------------------------------------------------------------------- /src/async_writers/mwtr_serde.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use std::io::Write; 3 | 4 | use csv_core::{ 5 | self, WriteResult, Writer as CoreWriter 6 | }; 7 | use serde::Serialize; 8 | 9 | use crate::error::{Error, ErrorKind, Result}; 10 | use crate::serializer::{serialize, serialize_header}; 11 | use crate::AsyncWriterBuilder; 12 | 13 | /// A helper struct to synchronously perform serialization of structures to bytes stored in memory 14 | /// according to interface provided by serde::Serialize. 15 | /// Those bytes are being then asynchronously sent to writer. 16 | /// 17 | // TODO: The `buf` here is present to ease using csv_core interface, 18 | // but is redundant, degrade performance and should be eliminated. 19 | #[derive(Debug)] 20 | pub struct MemWriter { 21 | core: CoreWriter, 22 | wtr: io::Cursor>, 23 | buf: Buffer, 24 | state: WriterState, 25 | } 26 | 27 | #[derive(Debug)] 28 | struct WriterState { 29 | /// Whether the Serde serializer should attempt to write a header row. 30 | header: HeaderState, 31 | /// Whether inconsistent record lengths are allowed. 32 | flexible: bool, 33 | /// The number of fields writtein in the first record. This is compared 34 | /// with `fields_written` on all subsequent records to check for 35 | /// inconsistent record lengths. 36 | first_field_count: Option, 37 | /// The number of fields written in this record. This is used to report 38 | /// errors for inconsistent record lengths if `flexible` is disabled. 39 | fields_written: u64, 40 | /// This is set immediately before flushing the buffer and then unset 41 | /// immediately after flushing the buffer. This avoids flushing the buffer 42 | /// twice if the inner writer panics. 43 | panicked: bool, 44 | } 45 | 46 | /// HeaderState encodes a small state machine for handling header writes. 47 | #[derive(Debug)] 48 | enum HeaderState { 49 | /// Indicates that we should attempt to write a header. 50 | Write, 51 | /// Indicates that writing a header was attempt, and a header was written. 52 | DidWrite, 53 | /// Indicates that writing a header was attempted, but no headers were 54 | /// written or the attempt failed. 55 | DidNotWrite, 56 | /// This state is used when headers are disabled. It cannot transition 57 | /// to any other state. 58 | None, 59 | } 60 | 61 | /// A simple internal buffer for buffering writes. 62 | /// 63 | /// We need this because the `csv_core` APIs want to write into a `&mut [u8]`, 64 | /// which is not available with the `std::io::BufWriter` API. 65 | #[derive(Debug)] 66 | struct Buffer { 67 | /// The contents of the buffer. 68 | buf: Vec, 69 | /// The number of bytes written to the buffer. 70 | len: usize, 71 | } 72 | 73 | impl Drop for MemWriter { 74 | fn drop(&mut self) { 75 | if !self.state.panicked { 76 | let _ = self.flush(); 77 | } 78 | } 79 | } 80 | 81 | impl MemWriter { 82 | /// Create MemWriter using configuration stored in AsyncWriterBuilder. 83 | /// 84 | pub fn new(builder: &AsyncWriterBuilder) -> Self { 85 | let header_state = if builder.has_headers { 86 | HeaderState::Write 87 | } else { 88 | HeaderState::None 89 | }; 90 | MemWriter { 91 | core: builder.builder.build(), 92 | wtr: io::Cursor::new(Vec::new()), 93 | buf: Buffer { buf: vec![0; builder.capacity], len: 0 }, 94 | state: WriterState { 95 | header: header_state, 96 | flexible: builder.flexible, 97 | first_field_count: None, 98 | fields_written: 0, 99 | panicked: false, 100 | }, 101 | } 102 | } 103 | 104 | /// Serialize a single record using Serde. 105 | /// 106 | pub fn serialize(&mut self, record: S) -> Result<()> { 107 | if let HeaderState::Write = self.state.header { 108 | let wrote_header = serialize_header(self, &record)?; 109 | if wrote_header { 110 | self.write_terminator()?; 111 | self.state.header = HeaderState::DidWrite; 112 | } else { 113 | self.state.header = HeaderState::DidNotWrite; 114 | }; 115 | } 116 | serialize(self, &record)?; 117 | self.write_terminator()?; 118 | Ok(()) 119 | } 120 | 121 | /// Write a single field. 122 | pub fn write_field>(&mut self, field: T) -> Result<()> { 123 | self.write_field_impl(field) 124 | } 125 | 126 | /// Implementation of write_field. 127 | /// 128 | /// This is a separate method so we can force the compiler to inline it 129 | /// into write_record. 130 | #[inline(always)] 131 | fn write_field_impl>(&mut self, field: T) -> Result<()> { 132 | if self.state.fields_written > 0 { 133 | self.write_delimiter()?; 134 | } 135 | let mut field = field.as_ref(); 136 | loop { 137 | let (res, nin, nout) = self.core.field(field, self.buf.writable()); 138 | field = &field[nin..]; 139 | self.buf.written(nout); 140 | match res { 141 | WriteResult::InputEmpty => { 142 | self.state.fields_written += 1; 143 | return Ok(()); 144 | } 145 | WriteResult::OutputFull => self.flush_buf()?, 146 | } 147 | } 148 | } 149 | 150 | /// Flush the contents of the internal buffer to the underlying writer. 151 | /// 152 | /// If there was a problem writing to the underlying writer, then an error 153 | /// is returned. 154 | /// 155 | /// Note that this also flushes the underlying writer. 156 | pub fn flush(&mut self) -> io::Result<()> { 157 | self.flush_buf()?; 158 | self.wtr.flush()?; 159 | Ok(()) 160 | } 161 | 162 | /// Flush the contents of the internal buffer to the underlying writer, 163 | /// without flushing the underlying writer. 164 | fn flush_buf(&mut self) -> io::Result<()> { 165 | self.state.panicked = true; 166 | let result = self.wtr.write_all(self.buf.readable()); 167 | self.state.panicked = false; 168 | result?; 169 | self.buf.clear(); 170 | Ok(()) 171 | } 172 | 173 | /// Returns slice with the accumulated data. 174 | /// Caller is responsible for calling `flush()` before this call, 175 | /// otherwise returned vector may not contain all the data. 176 | pub fn data(&mut self) -> &[u8] { 177 | self.wtr.get_mut().as_slice() 178 | } 179 | 180 | /// Clears Writer internal vector, but not buffer. 181 | // TODO: See note about removing double buffering 182 | pub fn clear(&mut self) { 183 | self.wtr.get_mut().clear(); 184 | self.wtr.set_position(0); 185 | } 186 | 187 | /// Write a CSV delimiter. 188 | fn write_delimiter(&mut self) -> Result<()> { 189 | loop { 190 | let (res, nout) = self.core.delimiter(self.buf.writable()); 191 | self.buf.written(nout); 192 | match res { 193 | WriteResult::InputEmpty => return Ok(()), 194 | WriteResult::OutputFull => self.flush_buf()?, 195 | } 196 | } 197 | } 198 | 199 | /// Write a CSV terminator. 200 | fn write_terminator(&mut self) -> Result<()> { 201 | self.check_field_count()?; 202 | loop { 203 | let (res, nout) = self.core.terminator(self.buf.writable()); 204 | self.buf.written(nout); 205 | match res { 206 | WriteResult::InputEmpty => { 207 | self.state.fields_written = 0; 208 | return Ok(()); 209 | } 210 | WriteResult::OutputFull => self.flush_buf()?, 211 | } 212 | } 213 | } 214 | 215 | fn check_field_count(&mut self) -> Result<()> { 216 | if !self.state.flexible { 217 | match self.state.first_field_count { 218 | None => { 219 | self.state.first_field_count = 220 | Some(self.state.fields_written); 221 | } 222 | Some(expected) if expected != self.state.fields_written => { 223 | return Err(Error::new(ErrorKind::UnequalLengths { 224 | pos: None, 225 | expected_len: expected, 226 | len: self.state.fields_written, 227 | })) 228 | } 229 | Some(_) => {} 230 | } 231 | } 232 | Ok(()) 233 | } 234 | } 235 | 236 | impl Buffer { 237 | /// Returns a slice of the buffer's current contents. 238 | /// 239 | /// The slice returned may be empty. 240 | #[inline] 241 | fn readable(&self) -> &[u8] { 242 | &self.buf[..self.len] 243 | } 244 | 245 | /// Returns a mutable slice of the remaining space in this buffer. 246 | /// 247 | /// The slice returned may be empty. 248 | #[inline] 249 | fn writable(&mut self) -> &mut [u8] { 250 | &mut self.buf[self.len..] 251 | } 252 | 253 | /// Indicates that `n` bytes have been written to this buffer. 254 | #[inline] 255 | fn written(&mut self, n: usize) { 256 | self.len += n; 257 | } 258 | 259 | /// Clear the buffer. 260 | #[inline] 261 | fn clear(&mut self) { 262 | self.len = 0; 263 | } 264 | } 265 | 266 | #[cfg(test)] 267 | mod tests { 268 | use std::error::Error; 269 | 270 | use serde::{serde_if_integer128, Serialize}; 271 | 272 | use crate::byte_record::ByteRecord; 273 | use crate::error::{ErrorKind, IntoInnerError}; 274 | use crate::string_record::StringRecord; 275 | 276 | use super::{MemWriter, AsyncWriterBuilder}; 277 | 278 | fn wtr_as_string(wtr: MemWriter) -> String { 279 | String::from_utf8(wtr.into_inner().unwrap()).unwrap() 280 | } 281 | 282 | impl MemWriter { 283 | pub fn default() -> Self { 284 | Self::new(&AsyncWriterBuilder::new()) 285 | } 286 | 287 | pub fn into_inner( 288 | mut self, 289 | ) -> Result, IntoInnerError> { 290 | match self.flush() { 291 | // This is not official API, so not worth to use Option trick. 292 | Ok(()) => Ok(self.wtr.clone().into_inner()), 293 | Err(err) => Err(IntoInnerError::new(self, err)), 294 | } 295 | } 296 | 297 | pub fn write_record(&mut self, record: I) -> crate::error::Result<()> 298 | where 299 | I: IntoIterator, 300 | T: AsRef<[u8]>, 301 | { 302 | for field in record.into_iter() { 303 | self.write_field_impl(field)?; 304 | } 305 | self.write_terminator() 306 | } 307 | } 308 | 309 | #[test] 310 | fn one_record() { 311 | let mut wtr = MemWriter::default(); 312 | wtr.write_record(&["a", "b", "c"]).unwrap(); 313 | 314 | assert_eq!(wtr_as_string(wtr), "a,b,c\n"); 315 | } 316 | 317 | #[test] 318 | fn one_string_record() { 319 | let mut wtr = MemWriter::default(); 320 | wtr.write_record(&StringRecord::from(vec!["a", "b", "c"])).unwrap(); 321 | 322 | assert_eq!(wtr_as_string(wtr), "a,b,c\n"); 323 | } 324 | 325 | #[test] 326 | fn one_byte_record() { 327 | let mut wtr = MemWriter::default(); 328 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap(); 329 | 330 | assert_eq!(wtr_as_string(wtr), "a,b,c\n"); 331 | } 332 | 333 | #[test] 334 | fn one_empty_record() { 335 | let mut wtr = MemWriter::default(); 336 | wtr.write_record(&[""]).unwrap(); 337 | 338 | assert_eq!(wtr_as_string(wtr), "\"\"\n"); 339 | } 340 | 341 | #[test] 342 | fn two_empty_records() { 343 | let mut wtr = MemWriter::default(); 344 | wtr.write_record(&[""]).unwrap(); 345 | wtr.write_record(&[""]).unwrap(); 346 | 347 | assert_eq!(wtr_as_string(wtr), "\"\"\n\"\"\n"); 348 | } 349 | 350 | #[test] 351 | fn unequal_records_bad() { 352 | let mut wtr = MemWriter::default(); 353 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap(); 354 | let err = wtr.write_record(&ByteRecord::from(vec!["a"])).unwrap_err(); 355 | match *err.kind() { 356 | ErrorKind::UnequalLengths { ref pos, expected_len, len } => { 357 | assert!(pos.is_none()); 358 | assert_eq!(expected_len, 3); 359 | assert_eq!(len, 1); 360 | } 361 | ref x => { 362 | panic!("expected UnequalLengths error, but got '{:?}'", x); 363 | } 364 | } 365 | } 366 | 367 | #[test] 368 | fn unequal_records_ok() { 369 | let mut wtr = MemWriter::new(&AsyncWriterBuilder::new().flexible(true)); 370 | wtr.write_record(&ByteRecord::from(vec!["a", "b", "c"])).unwrap(); 371 | wtr.write_record(&ByteRecord::from(vec!["a"])).unwrap(); 372 | assert_eq!(wtr_as_string(wtr), "a,b,c\na\n"); 373 | } 374 | 375 | #[test] 376 | fn write_field() -> Result<(), Box> { 377 | let mut wtr = MemWriter::default(); 378 | wtr.write_field("a")?; 379 | wtr.write_field("b")?; 380 | wtr.write_field("c")?; 381 | wtr.write_terminator()?; 382 | wtr.write_field("x")?; 383 | wtr.write_field("y")?; 384 | wtr.write_field("z")?; 385 | wtr.write_terminator()?; 386 | 387 | let data = String::from_utf8(wtr.into_inner()?)?; 388 | assert_eq!(data, "a,b,c\nx,y,z\n"); 389 | Ok(()) 390 | } 391 | 392 | #[test] 393 | fn serialize_with_headers() { 394 | #[derive(Serialize)] 395 | struct Row { 396 | foo: i32, 397 | bar: f64, 398 | baz: bool, 399 | } 400 | 401 | let mut wtr = MemWriter::default(); 402 | wtr.serialize(Row { foo: 42, bar: 42.5, baz: true }).unwrap(); 403 | assert_eq!(wtr_as_string(wtr), "foo,bar,baz\n42,42.5,true\n"); 404 | } 405 | 406 | #[test] 407 | fn serialize_no_headers() { 408 | #[derive(Serialize)] 409 | struct Row { 410 | foo: i32, 411 | bar: f64, 412 | baz: bool, 413 | } 414 | 415 | let mut wtr = MemWriter::new(&AsyncWriterBuilder::new().has_headers(false)); 416 | wtr.serialize(Row { foo: 42, bar: 42.5, baz: true }).unwrap(); 417 | assert_eq!(wtr_as_string(wtr), "42,42.5,true\n"); 418 | } 419 | 420 | serde_if_integer128! { 421 | #[test] 422 | fn serialize_no_headers_128() { 423 | #[derive(Serialize)] 424 | struct Row { 425 | foo: i128, 426 | bar: f64, 427 | baz: bool, 428 | } 429 | 430 | let mut wtr = MemWriter::new(&AsyncWriterBuilder::new().has_headers(false)); 431 | wtr.serialize(Row { 432 | foo: 9_223_372_036_854_775_808, 433 | bar: 42.5, 434 | baz: true, 435 | }).unwrap(); 436 | assert_eq!(wtr_as_string(wtr), "9223372036854775808,42.5,true\n"); 437 | } 438 | } 439 | 440 | #[test] 441 | fn serialize_tuple() { 442 | let mut wtr = MemWriter::default(); 443 | wtr.serialize((true, 1.3, "hi")).unwrap(); 444 | assert_eq!(wtr_as_string(wtr), "true,1.3,hi\n"); 445 | } 446 | 447 | #[test] 448 | fn serialize_struct() -> Result<(), Box> { 449 | #[derive(Serialize)] 450 | struct Row<'a> { 451 | city: &'a str, 452 | country: &'a str, 453 | // Serde allows us to name our headers exactly, 454 | // even if they don't match our struct field names. 455 | #[serde(rename = "popcount")] 456 | population: u64, 457 | } 458 | 459 | let mut wtr = MemWriter::default(); 460 | wtr.serialize(Row { 461 | city: "Boston", 462 | country: "United States", 463 | population: 4628910, 464 | })?; 465 | wtr.serialize(Row { 466 | city: "Concord", 467 | country: "United States", 468 | population: 42695, 469 | })?; 470 | 471 | let data = String::from_utf8(wtr.into_inner()?)?; 472 | assert_eq!(data, "\ 473 | city,country,popcount 474 | Boston,United States,4628910 475 | Concord,United States,42695 476 | "); 477 | Ok(()) 478 | } 479 | 480 | #[test] 481 | fn serialize_enum() -> Result<(), Box> { 482 | #[derive(Serialize)] 483 | struct Row { 484 | label: String, 485 | value: Value, 486 | } 487 | #[derive(Serialize)] 488 | enum Value { 489 | Integer(i64), 490 | Float(f64), 491 | } 492 | 493 | let mut wtr = MemWriter::default(); 494 | wtr.serialize(Row { 495 | label: "foo".to_string(), 496 | value: Value::Integer(3), 497 | })?; 498 | wtr.serialize(Row { 499 | label: "bar".to_string(), 500 | value: Value::Float(3.14), 501 | })?; 502 | 503 | let data = String::from_utf8(wtr.into_inner()?)?; 504 | assert_eq!(data, "\ 505 | label,value 506 | foo,3 507 | bar,3.14 508 | "); 509 | Ok(()) 510 | } 511 | 512 | #[test] 513 | fn serialize_vec() -> Result<(), Box> { 514 | #[derive(Serialize)] 515 | struct Row { 516 | label: String, 517 | values: Vec, 518 | } 519 | 520 | let mut wtr = MemWriter::new( 521 | &AsyncWriterBuilder::new() 522 | .has_headers(false) 523 | ); 524 | wtr.serialize(Row { 525 | label: "foo".to_string(), 526 | values: vec![1.1234, 2.5678, 3.14], 527 | })?; 528 | 529 | let data = String::from_utf8(wtr.into_inner()?)?; 530 | assert_eq!(data, "foo,1.1234,2.5678,3.14\n"); 531 | Ok(()) 532 | } 533 | } 534 | -------------------------------------------------------------------------------- /src/debug.rs: -------------------------------------------------------------------------------- 1 | /// A type that provides a human readable debug impl for arbitrary bytes. 2 | /// 3 | /// This generally works best when the bytes are presumed to be mostly UTF-8, 4 | /// but will work for anything. 5 | /// 6 | /// N.B. This is copied nearly verbatim from regex-automata. Sigh. 7 | pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); 8 | 9 | impl<'a> core::fmt::Debug for Bytes<'a> { 10 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 11 | write!(f, "\"")?; 12 | // This is a sad re-implementation of a similar impl found in bstr. 13 | let mut bytes = self.0; 14 | while let Some(result) = utf8_decode(bytes) { 15 | let ch = match result { 16 | Ok(ch) => ch, 17 | Err(byte) => { 18 | write!(f, r"\x{:02x}", byte)?; 19 | bytes = &bytes[1..]; 20 | continue; 21 | } 22 | }; 23 | bytes = &bytes[ch.len_utf8()..]; 24 | match ch { 25 | '\0' => write!(f, "\\0")?, 26 | // ASCII control characters except \0, \n, \r, \t 27 | '\x01'..='\x08' 28 | | '\x0b' 29 | | '\x0c' 30 | | '\x0e'..='\x19' 31 | | '\x7f' => { 32 | write!(f, "\\x{:02x}", u32::from(ch))?; 33 | } 34 | _ => { 35 | write!(f, "{}", ch.escape_debug())?; 36 | } 37 | } 38 | } 39 | write!(f, "\"")?; 40 | Ok(()) 41 | } 42 | } 43 | 44 | /// Decodes the next UTF-8 encoded codepoint from the given byte slice. 45 | /// 46 | /// If no valid encoding of a codepoint exists at the beginning of the given 47 | /// byte slice, then the first byte is returned instead. 48 | /// 49 | /// This returns `None` if and only if `bytes` is empty. 50 | pub(crate) fn utf8_decode(bytes: &[u8]) -> Option> { 51 | fn len(byte: u8) -> Option { 52 | if byte <= 0x7F { 53 | Some(1) 54 | } else if byte & 0b1100_0000 == 0b1000_0000 { 55 | None 56 | } else if byte <= 0b1101_1111 { 57 | Some(2) 58 | } else if byte <= 0b1110_1111 { 59 | Some(3) 60 | } else if byte <= 0b1111_0111 { 61 | Some(4) 62 | } else { 63 | None 64 | } 65 | } 66 | 67 | if bytes.is_empty() { 68 | return None; 69 | } 70 | let len = match len(bytes[0]) { 71 | None => return Some(Err(bytes[0])), 72 | Some(len) if len > bytes.len() => return Some(Err(bytes[0])), 73 | Some(1) => return Some(Ok(char::from(bytes[0]))), 74 | Some(len) => len, 75 | }; 76 | match core::str::from_utf8(&bytes[..len]) { 77 | Ok(s) => Some(Ok(s.chars().next().unwrap())), 78 | Err(_) => Some(Err(bytes[0])), 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use std::error::Error as StdError; 2 | use std::fmt; 3 | use std::io; 4 | use std::result; 5 | 6 | use crate::byte_record::{ByteRecord, Position}; 7 | #[cfg(feature = "with_serde")] 8 | use crate::deserializer::DeserializeError; 9 | 10 | /// A type alias for `Result`. 11 | pub type Result = result::Result; 12 | 13 | /// An error that can occur when processing CSV data. 14 | /// 15 | /// This error can happen when writing or reading CSV data. 16 | /// 17 | /// There are some important scenarios where an error is impossible to occur. 18 | /// For example, if a CSV reader is used on an in-memory buffer with the 19 | /// `flexible` option enabled and one is reading records as raw byte strings, 20 | /// then no error can occur. 21 | #[derive(Debug)] 22 | pub struct Error(Box); 23 | 24 | impl Error { 25 | /// A crate private constructor for `Error`. 26 | pub(crate) fn new(kind: ErrorKind) -> Error { 27 | Error(Box::new(kind)) 28 | } 29 | 30 | /// Return the specific type of this error. 31 | pub fn kind(&self) -> &ErrorKind { 32 | &self.0 33 | } 34 | 35 | /// Unwrap this error into its underlying type. 36 | pub fn into_kind(self) -> ErrorKind { 37 | *self.0 38 | } 39 | 40 | /// Returns true if this is an I/O error. 41 | /// 42 | /// If this is true, the underlying `ErrorKind` is guaranteed to be 43 | /// `ErrorKind::Io`. 44 | pub fn is_io_error(&self) -> bool { 45 | matches!(*self.0, ErrorKind::Io(_)) 46 | } 47 | 48 | /// Return the position for this error, if one exists. 49 | /// 50 | /// This is a convenience function that permits callers to easily access 51 | /// the position on an error without doing case analysis on `ErrorKind`. 52 | pub fn position(&self) -> Option<&Position> { 53 | self.0.position() 54 | } 55 | } 56 | 57 | /// The specific type of an error. 58 | #[derive(Debug)] 59 | #[non_exhaustive] 60 | pub enum ErrorKind { 61 | /// An I/O error that occurred while reading CSV data. 62 | Io(io::Error), 63 | /// A UTF-8 decoding error that occured while reading CSV data into Rust 64 | /// `String`s. 65 | Utf8 { 66 | /// The position of the record in which this error occurred, if 67 | /// available. 68 | pos: Option, 69 | /// The corresponding UTF-8 error. 70 | err: Utf8Error, 71 | }, 72 | /// This error occurs when two records with an unequal number of fields 73 | /// are found. This error only occurs when the `flexible` option in a 74 | /// CSV reader/writer is disabled. 75 | UnequalLengths { 76 | /// The position of the first record with an unequal number of fields 77 | /// to the previous record, if available. 78 | pos: Option, 79 | /// The expected number of fields in a record. This is the number of 80 | /// fields in the record read prior to the record indicated by 81 | /// `pos`. 82 | expected_len: u64, 83 | /// The number of fields in the bad record. 84 | len: u64, 85 | }, 86 | /// This error occurs when either the `byte_headers` or `headers` methods 87 | /// are called on a CSV reader that was asked to `seek` before it parsed 88 | /// the first record. 89 | Seek, 90 | /// An error of this kind occurs only when using the Serde serializer. 91 | #[cfg(feature = "with_serde")] 92 | Serialize(String), 93 | /// An error of this kind occurs only when performing automatic 94 | /// deserialization with serde. 95 | #[cfg(feature = "with_serde")] 96 | Deserialize { 97 | /// The position of this error, if available. 98 | pos: Option, 99 | /// The deserialization error. 100 | err: DeserializeError, 101 | } 102 | } 103 | 104 | impl ErrorKind { 105 | /// Return the position for this error, if one exists. 106 | /// 107 | /// This is a convenience function that permits callers to easily access 108 | /// the position on an error without doing case analysis on `ErrorKind`. 109 | pub fn position(&self) -> Option<&Position> { 110 | match *self { 111 | ErrorKind::Utf8 { ref pos, .. } => pos.as_ref(), 112 | ErrorKind::UnequalLengths { ref pos, .. } => pos.as_ref(), 113 | #[cfg(feature = "with_serde")] 114 | ErrorKind::Deserialize{ ref pos, .. } => pos.as_ref(), 115 | _ => None, 116 | } 117 | } 118 | } 119 | 120 | impl From for Error { 121 | fn from(err: io::Error) -> Error { 122 | Error::new(ErrorKind::Io(err)) 123 | } 124 | } 125 | 126 | impl From for io::Error { 127 | fn from(err: Error) -> io::Error { 128 | io::Error::new(io::ErrorKind::Other, err) 129 | } 130 | } 131 | 132 | impl StdError for Error {} 133 | 134 | impl fmt::Display for Error { 135 | #[allow(unreachable_patterns)] 136 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 137 | match *self.0 { 138 | ErrorKind::Io(ref err) => err.fmt(f), 139 | ErrorKind::Utf8 { pos: None, ref err } => { 140 | write!(f, "CSV parse error: field {}: {err}", err.field() + 1) 141 | } 142 | ErrorKind::Utf8 { pos: Some(ref pos), ref err } => write!( 143 | f, 144 | "CSV parse error: record {} \ 145 | (line {}, field: {}, byte: {}): {err}", 146 | pos.record(), 147 | pos.line(), 148 | err.field() + 1, 149 | pos.byte(), 150 | ), 151 | ErrorKind::UnequalLengths { pos: None, expected_len, len } => { 152 | write!( 153 | f, 154 | "CSV error: \ 155 | found record with {len} fields, but the previous record \ 156 | has {expected_len} fields" 157 | ) 158 | } 159 | ErrorKind::UnequalLengths { 160 | pos: Some(ref pos), 161 | expected_len, 162 | len, 163 | } => write!( 164 | f, 165 | "CSV error: record {} (line: {}, byte: {}): \ 166 | found record with {len} fields, but the previous record \ 167 | has {expected_len} fields", 168 | pos.record(), 169 | pos.line(), 170 | pos.byte(), 171 | ), 172 | ErrorKind::Seek => write!( 173 | f, 174 | "CSV error: cannot access headers of CSV data \ 175 | when the parser was seeked before the first record \ 176 | could be read" 177 | ), 178 | #[cfg(feature = "with_serde")] 179 | ErrorKind::Serialize(ref msg) => { 180 | write!(f, "CSV serialize error: {msg}") 181 | } 182 | #[cfg(feature = "with_serde")] 183 | ErrorKind::Deserialize { pos: None, ref err } => { 184 | write!(f, "CSV deserialize error: {err}") 185 | } 186 | #[cfg(feature = "with_serde")] 187 | ErrorKind::Deserialize { 188 | pos: Some(ref pos), 189 | ref err, 190 | } => write!( 191 | f, 192 | "CSV deserialize error: record {} \ 193 | (line {}, byte: {}): {err}", 194 | pos.record(), 195 | pos.line(), 196 | pos.byte(), 197 | ), 198 | _ => write!(f,"CSV other error") 199 | } 200 | } 201 | } 202 | 203 | /// A UTF-8 validation error during record conversion. 204 | /// 205 | /// This occurs when attempting to convert a `ByteRecord` into a 206 | /// `StringRecord`. 207 | #[derive(Clone, Debug, Eq, PartialEq)] 208 | pub struct FromUtf8Error { 209 | record: ByteRecord, 210 | err: Utf8Error, 211 | } 212 | 213 | impl FromUtf8Error { 214 | /// Create a new FromUtf8Error. 215 | pub(crate) fn new(record: ByteRecord, err: Utf8Error) -> FromUtf8Error { 216 | FromUtf8Error { record, err } 217 | } 218 | 219 | /// Access the underlying `ByteRecord` that failed UTF-8 validation. 220 | pub fn into_byte_record(self) -> ByteRecord { 221 | self.record 222 | } 223 | 224 | /// Access the underlying UTF-8 validation error. 225 | pub fn utf8_error(&self) -> &Utf8Error { 226 | &self.err 227 | } 228 | } 229 | 230 | impl fmt::Display for FromUtf8Error { 231 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 232 | self.err.fmt(f) 233 | } 234 | } 235 | 236 | impl StdError for FromUtf8Error { 237 | fn source(&self) -> Option<&(dyn StdError + 'static)> { 238 | Some(&self.err) 239 | } 240 | } 241 | 242 | /// A UTF-8 validation error. 243 | /// 244 | /// This occurs when attempting to convert a `ByteRecord` into a 245 | /// `StringRecord`. 246 | /// 247 | /// The error includes the index of the field that failed validation, and the 248 | /// last byte at which valid UTF-8 was verified. 249 | #[derive(Clone, Debug, Eq, PartialEq)] 250 | pub struct Utf8Error { 251 | /// The field index of a byte record in which UTF-8 validation failed. 252 | field: usize, 253 | /// The index into the given field up to which valid UTF-8 was verified. 254 | valid_up_to: usize, 255 | } 256 | 257 | /// Create a new UTF-8 error. 258 | pub fn new_utf8_error(field: usize, valid_up_to: usize) -> Utf8Error { 259 | Utf8Error { field, valid_up_to } 260 | } 261 | 262 | impl Utf8Error { 263 | /// The field index (zero based) of a byte record in which UTF-8 validation failed. 264 | pub fn field(&self) -> usize { 265 | self.field 266 | } 267 | /// The index (zero based) into the given field up to which valid UTF-8 was verified. 268 | pub fn valid_up_to(&self) -> usize { 269 | self.valid_up_to 270 | } 271 | } 272 | 273 | impl StdError for Utf8Error {} 274 | 275 | impl fmt::Display for Utf8Error { 276 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 277 | write!( 278 | f, 279 | "invalid utf-8: invalid UTF-8 in field {} near byte index {}", 280 | self.field + 1, self.valid_up_to 281 | ) 282 | } 283 | } 284 | 285 | /// `IntoInnerError` occurs when consuming a `Writer` fails. 286 | /// 287 | /// Consuming the `Writer` causes a flush to happen. If the flush fails, then 288 | /// this error is returned, which contains both the original `Writer` and 289 | /// the error that occurred. 290 | /// 291 | /// The type parameter `W` is the unconsumed writer. 292 | pub struct IntoInnerError { 293 | wtr: W, 294 | err: io::Error, 295 | } 296 | 297 | impl IntoInnerError { 298 | /// Creates a new `IntoInnerError`. 299 | /// 300 | /// (This is a visibility hack. It's public in this module, but not in the 301 | /// crate.) 302 | #[allow(dead_code)] 303 | pub(crate) fn new(wtr: W, err: io::Error) -> IntoInnerError { 304 | IntoInnerError { wtr, err } 305 | } 306 | 307 | /// Returns reference to the i/o error which caused the failure. 308 | /// 309 | /// This error was returned when attempting to flush the writer internal buffer. 310 | pub fn error(&self) -> &io::Error { 311 | &self.err 312 | } 313 | 314 | /// Returns the i/o error which caused the failure. 315 | /// 316 | /// This error was returned when attempting to flush the writer internal buffer. 317 | pub fn into_error(self) -> io::Error { 318 | self.err 319 | } 320 | 321 | /// Returns the underlying writer which generated the error. 322 | /// 323 | /// The returned value can be used for error recovery, such as 324 | /// re-inspecting the buffer. 325 | pub fn into_writer(self) -> W { 326 | self.wtr 327 | } 328 | } 329 | 330 | impl StdError for IntoInnerError {} 331 | 332 | impl fmt::Display for IntoInnerError { 333 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 334 | self.err.fmt(f) 335 | } 336 | } 337 | 338 | impl fmt::Debug for IntoInnerError { 339 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 340 | self.err.fmt(f) 341 | } 342 | } 343 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(missing_docs)] 2 | 3 | // Few unsafe lines are in src/string_record.rs 4 | // #![deny(unsafe_code)] 5 | 6 | /*! 7 | The `csv-async` crate provides a fast and flexible CSV reader and writer, 8 | which is intended to be run in asynchronous environment - i.e. 9 | inside functions with `async` attribute called by tasks run by executor. 10 | This library does not imply using any particular executor. 11 | Unit tests and documentation snippets use either `async-std` or `tokio` crates. 12 | Synchronous interface for reading and writing CSV files is not contained in this crate, 13 | please use `csv` crate for this. This crate attempts to mimic `csv` crate API, but there are some exceptions. 14 | E.g. configuration builders have `create_...` factory functions instead of `from_...` as in `csv` crate. 15 | 16 | # Brief overview 17 | 18 | The primary types in this crate are 19 | [`AsyncReader`](struct.AsyncReader.html) 20 | and 21 | [`AsyncWriter`](struct.AsyncWriter.html) 22 | for reading and writing CSV data respectively. 23 | Or [`AsyncDeserializer`](struct.AsyncDeserializer.html) 24 | and 25 | [`AsyncSerializer`](struct.AsyncSerializer.html) 26 | for reading and writing CSV data using interfaces generated by `serde_derive` macros. 27 | 28 | Correspondingly, to support CSV data with custom field or record delimiters 29 | (among many other things), you should use either a 30 | [`AsyncReaderBuilder`](struct.AsyncReaderBuilder.html) 31 | or a 32 | [`AsyncWriterBuilder`](struct.AsyncWriterBuilder.html), 33 | depending on whether you're reading or writing CSV data. 34 | 35 | The standard CSV record types are 36 | [`StringRecord`](struct.StringRecord.html) 37 | and 38 | [`ByteRecord`](struct.ByteRecord.html). 39 | `StringRecord` should be used when you know your data to be valid UTF-8. 40 | For data that may be invalid UTF-8, `ByteRecord` is suitable. 41 | 42 | Finally, the set of errors is described by the 43 | [`Error`](struct.Error.html) 44 | type. 45 | 46 | The rest of the types in this crate mostly correspond to more detailed errors, 47 | position information, configuration knobs or iterator types. 48 | 49 | # Setup 50 | 51 | In root folder for your project run `cargo add csv-async` or `cargo add --features tokio csv-async` to add this crate to your projext. 52 | 53 | # Examples 54 | 55 | This example shows how to read and write CSV file in asynchronous context and get into some record details. 56 | 57 | Sample input file: 58 | ```csv 59 | city,region,country,population 60 | Southborough,MA,United States,9686 61 | Northbridge,MA,United States,14061 62 | Marlborough,MA,United States,38334 63 | Springfield,MA,United States,152227 64 | Springfield,MO,United States,150443 65 | Springfield,NJ,United States,14976 66 | Concord,NH,United States,42605 67 | ``` 68 | 69 | ```no_run 70 | use std::error::Error; 71 | use std::process; 72 | #[cfg(not(feature = "tokio"))] 73 | use futures::stream::StreamExt; 74 | #[cfg(not(feature = "tokio"))] 75 | use async_std::fs::File; 76 | #[cfg(feature = "tokio")] 77 | use tokio1 as tokio; 78 | #[cfg(feature = "tokio")] 79 | use tokio_stream::StreamExt; 80 | #[cfg(feature = "tokio")] 81 | use tokio::fs::File; 82 | 83 | async fn filter_by_region(region:&str, file_in:&str, file_out:&str) -> Result<(), Box> { 84 | // Function reads CSV file that has column named "region" at second position (index = 1). 85 | // It writes to new file only rows with region equal to passed argument 86 | // and removes region column. 87 | let mut rdr = csv_async::AsyncReader::from_reader( 88 | File::open(file_in).await? 89 | ); 90 | let mut wri = csv_async::AsyncWriter::from_writer( 91 | File::create(file_out).await? 92 | ); 93 | wri.write_record(rdr 94 | .headers() 95 | .await?.into_iter() 96 | .filter(|h| *h != "region") 97 | ).await?; 98 | let mut records = rdr.records(); 99 | while let Some(record) = records.next().await { 100 | let record = record?; 101 | match record.get(1) { 102 | Some(reg) if reg == region => 103 | wri.write_record(record 104 | .iter() 105 | .enumerate() 106 | .filter(|(i, _)| *i != 1) 107 | .map(|(_, s)| s) 108 | ).await?, 109 | _ => {}, 110 | } 111 | } 112 | Ok(()) 113 | } 114 | 115 | #[cfg(not(feature = "tokio"))] 116 | fn main() { 117 | async_std::task::block_on(async { 118 | if let Err(err) = filter_by_region( 119 | "MA", 120 | "/tmp/all_regions.csv", 121 | "/tmp/MA_only.csv" 122 | ).await { 123 | eprintln!("error running filter_by_region: {}", err); 124 | process::exit(1); 125 | } 126 | }); 127 | } 128 | 129 | #[cfg(feature = "tokio")] 130 | fn main() { 131 | tokio::runtime::Runtime::new().unwrap().block_on(async { 132 | if let Err(err) = filter_by_region( 133 | "MA", 134 | "/tmp/all_regions.csv", 135 | "/tmp/MA_only.csv" 136 | ).await { 137 | eprintln!("error running filter_by_region: {}", err); 138 | process::exit(1); 139 | } 140 | }); 141 | } 142 | ``` 143 | 144 | ```no_run 145 | use std::error::Error; 146 | use std::process; 147 | #[cfg(feature = "with_serde")] 148 | use serde::{Deserialize, Serialize}; 149 | #[cfg(not(feature = "tokio"))] 150 | use futures::stream::StreamExt; 151 | #[cfg(not(feature = "tokio"))] 152 | use async_std::fs::File; 153 | #[cfg(feature = "tokio")] 154 | use tokio1 as tokio; 155 | #[cfg(feature = "tokio")] 156 | use tokio_stream::StreamExt; 157 | #[cfg(feature = "tokio")] 158 | use tokio::fs::File; 159 | 160 | #[cfg(feature = "with_serde")] 161 | #[derive(Deserialize, Serialize)] 162 | struct Row { 163 | city: String, 164 | region: String, 165 | country: String, 166 | population: u64, 167 | } 168 | 169 | #[cfg(feature = "with_serde")] 170 | async fn filter_by_region_serde(region:&str, file_in:&str, file_out:&str) -> Result<(), Box> { 171 | // Function reads CSV file that has column named "region" at second position (index = 1). 172 | // It writes to new file only rows with region equal to passed argument. 173 | let mut rdr = csv_async::AsyncDeserializer::from_reader( 174 | File::open(file_in).await? 175 | ); 176 | let mut wri = csv_async::AsyncSerializer::from_writer( 177 | File::create(file_out).await? 178 | ); 179 | let mut records = rdr.deserialize::(); 180 | while let Some(record) = records.next().await { 181 | let record = record?; 182 | if record.region == region { 183 | wri.serialize(&record).await?; 184 | } 185 | } 186 | Ok(()) 187 | } 188 | 189 | #[cfg(feature = "with_serde")] 190 | #[cfg(not(feature = "tokio"))] 191 | fn main() { 192 | async_std::task::block_on(async { 193 | if let Err(err) = filter_by_region_serde( 194 | "MA", 195 | "/tmp/all_regions.csv", 196 | "/tmp/MA_only.csv" 197 | ).await { 198 | eprintln!("error running filter_by_region_serde: {}", err); 199 | process::exit(1); 200 | } 201 | }); 202 | } 203 | 204 | #[cfg(feature = "with_serde")] 205 | #[cfg(feature = "tokio")] 206 | fn main() { 207 | tokio::runtime::Runtime::new().unwrap().block_on(async { 208 | if let Err(err) = filter_by_region_serde( 209 | "MA", 210 | "/tmp/all_regions.csv", 211 | "/tmp/MA_only.csv" 212 | ).await { 213 | eprintln!("error running filter_by_region_serde: {}", err); 214 | process::exit(1); 215 | } 216 | }); 217 | } 218 | 219 | #[cfg(not(feature = "with_serde"))] 220 | fn main() {} 221 | ``` 222 | */ 223 | 224 | #[cfg(feature = "tokio")] 225 | extern crate tokio1 as tokio; 226 | 227 | #[cfg(test)] 228 | mod tests { 229 | use std::error::Error; 230 | 231 | cfg_if::cfg_if! { 232 | if #[cfg(feature = "tokio")] { 233 | use tokio_stream::StreamExt; 234 | use tokio::fs::File; 235 | } else { 236 | use futures::stream::StreamExt; 237 | use async_std::fs::File; 238 | } 239 | } 240 | 241 | async fn create_async(file:&str) -> Result<(), Box> { 242 | // Build the CSV reader and iterate over each record. 243 | let mut wri = crate::AsyncWriter::from_writer( 244 | File::create(file).await? 245 | ); 246 | wri.write_record(&["city","region","country","population","avg_age"]).await?; 247 | wri.write_record(&["Northbridge","MA","United States","14061","42.5"]).await?; 248 | wri.write_record(&["Westborough","MA","United States","29313", "45.1"]).await?; 249 | wri.write_record(&["Springfield","NJ","United States","14976", "35.0"]).await?; 250 | wri.flush().await?; 251 | Ok(()) 252 | } 253 | 254 | async fn copy_async(file_in:&str, file_out:&str) -> Result<(), Box> { 255 | let mut rdr = crate::AsyncReader::from_reader( 256 | File::open(file_in).await? 257 | ); 258 | let mut wri = crate::AsyncWriter::from_writer( 259 | File::create(file_out).await? 260 | ); 261 | wri.write_record(rdr.headers().await?.into_iter()).await?; 262 | let mut records = rdr.records(); 263 | while let Some(record) = records.next().await { 264 | wri.write_record(&record?).await?; 265 | } 266 | Ok(()) 267 | } 268 | 269 | #[test] 270 | fn test_on_files() { 271 | use std::io::Read; 272 | use std::hash::Hasher; 273 | std::fs::create_dir_all("examples/data").unwrap(); 274 | let file_in = "examples/data/smallpop.csv"; 275 | let file_out = "examples/data/smallpop_out.csv"; 276 | 277 | #[cfg(not(feature = "tokio"))] 278 | async_std::task::block_on(async { 279 | if let Err(err) = create_async(file_in).await { 280 | assert!(false, "error running create_async: {}", err); 281 | } 282 | if let Err(err) = copy_async(file_in, file_out).await { 283 | assert!(false, "error running copy_async: {}", err); 284 | } 285 | }); 286 | #[cfg(feature = "tokio")] 287 | tokio::runtime::Runtime::new().unwrap().block_on(async { 288 | if let Err(err) = create_async(file_in).await { 289 | assert!(false, "error running create_async: {}", err); 290 | } 291 | if let Err(err) = copy_async(file_in, file_out).await { 292 | assert!(false, "error running copy_async: {}", err); 293 | } 294 | }); 295 | 296 | let mut bytes_in = vec![]; 297 | std::fs::File::open(file_in).unwrap().read_to_end(&mut bytes_in).unwrap(); 298 | let mut hasher_in = std::collections::hash_map::DefaultHasher::new(); 299 | hasher_in.write(&bytes_in); 300 | 301 | let mut bytes_out = vec![]; 302 | std::fs::File::open(file_out).unwrap().read_to_end(&mut bytes_out).unwrap(); 303 | let mut hasher_out = std::collections::hash_map::DefaultHasher::new(); 304 | hasher_out.write(&bytes_out); 305 | 306 | assert_eq!(hasher_in.finish(), hasher_out.finish(), "Copied file {} is different than source {}", file_out, file_in); 307 | 308 | std::fs::remove_file(file_in).unwrap(); 309 | std::fs::remove_file(file_out).unwrap(); 310 | } 311 | 312 | cfg_if::cfg_if! { 313 | if #[cfg(feature = "with_serde")] { 314 | use serde::{Deserialize, Serialize}; 315 | 316 | #[derive(Deserialize, Serialize)] 317 | struct Row { 318 | city: String, 319 | region: String, 320 | country: String, 321 | population: u64, 322 | avg_age: f32, 323 | } 324 | 325 | async fn copy_async_serde(file_in:&str, file_out:&str) -> Result<(), Box> { 326 | let mut rdr = crate::AsyncDeserializer::from_reader( 327 | File::open(file_in).await? 328 | ); 329 | let mut wri = crate::AsyncSerializer::from_writer( 330 | File::create(file_out).await? 331 | ); 332 | // Caution: 333 | // let mut records = rdr.deserialize(); 334 | // does compile, but produce empty output (deserialize to "()" type) 335 | let mut records = rdr.deserialize::(); 336 | while let Some(record) = records.next().await { 337 | wri.serialize(&record?).await?; 338 | } 339 | Ok(()) 340 | } 341 | 342 | #[test] 343 | fn test_on_files_serde() { 344 | use std::io::Read; 345 | use std::hash::Hasher; 346 | std::fs::create_dir_all("examples/data").unwrap(); 347 | let file_in = "examples/data/smallpop_serde.csv"; 348 | let file_out = "examples/data/smallpop_serde_out.csv"; 349 | 350 | #[cfg(not(feature = "tokio"))] 351 | async_std::task::block_on(async { 352 | if let Err(err) = create_async(file_in).await { 353 | assert!(false, "error running create_async: {}", err); 354 | } 355 | if let Err(err) = copy_async_serde(file_in, file_out).await { 356 | assert!(false, "error running copy_async_serde: {}", err); 357 | } 358 | }); 359 | #[cfg(feature = "tokio")] 360 | tokio::runtime::Runtime::new().unwrap().block_on(async { 361 | if let Err(err) = create_async(file_in).await { 362 | assert!(false, "error running create_async: {}", err); 363 | } 364 | if let Err(err) = copy_async_serde(file_in, file_out).await { 365 | assert!(false, "error running copy_async_serde: {}", err); 366 | } 367 | }); 368 | 369 | let mut bytes_in = vec![]; 370 | std::fs::File::open(file_in).unwrap().read_to_end(&mut bytes_in).unwrap(); 371 | let mut hasher_in = std::collections::hash_map::DefaultHasher::new(); 372 | hasher_in.write(&bytes_in); 373 | 374 | let mut bytes_out = vec![]; 375 | std::fs::File::open(file_out).unwrap().read_to_end(&mut bytes_out).unwrap(); 376 | let mut hasher_out = std::collections::hash_map::DefaultHasher::new(); 377 | hasher_out.write(&bytes_out); 378 | 379 | assert_eq!(hasher_in.finish(), hasher_out.finish(), "Copied file {} is different than source {}", file_out, file_in); 380 | 381 | std::fs::remove_file(file_in).unwrap(); 382 | std::fs::remove_file(file_out).unwrap(); 383 | } 384 | 385 | #[test] 386 | #[cfg(not(tarpaulin))] 387 | fn test_on_files_serde_send() { 388 | use std::io::Read; 389 | use std::hash::Hasher; 390 | std::fs::create_dir_all("examples/data").unwrap(); 391 | let file_in = "examples/data/smallpop_serde_send.csv"; 392 | let file_out = "examples/data/smallpop_serde_send_out.csv"; 393 | 394 | // Below code requires / check that deserializers are Send. 395 | #[cfg(not(feature = "tokio"))] 396 | { 397 | let jh = async_std::task::spawn(async move { 398 | if let Err(err) = create_async(file_in).await { 399 | assert!(false, "error running create_async: {}", err); 400 | } 401 | if let Err(err) = copy_async_serde(file_in, file_out).await { 402 | assert!(false, "error running copy_async_serde: {}", err); 403 | } 404 | }); 405 | async_std::task::block_on(jh); 406 | } 407 | #[cfg(feature = "tokio")] 408 | { 409 | let rt = tokio::runtime::Runtime::new().unwrap(); 410 | let jh = rt.spawn(async move { 411 | if let Err(err) = create_async(file_in).await { 412 | assert!(false, "error running create_async: {}", err); 413 | } 414 | if let Err(err) = copy_async_serde(file_in, file_out).await { 415 | assert!(false, "error running copy_async_serde: {}", err); 416 | } 417 | }); 418 | rt.block_on(jh).unwrap(); 419 | } 420 | 421 | let mut bytes_in = vec![]; 422 | std::fs::File::open(file_in).unwrap().read_to_end(&mut bytes_in).unwrap(); 423 | let mut hasher_in = std::collections::hash_map::DefaultHasher::new(); 424 | hasher_in.write(&bytes_in); 425 | 426 | let mut bytes_out = vec![]; 427 | std::fs::File::open(file_out).unwrap().read_to_end(&mut bytes_out).unwrap(); 428 | let mut hasher_out = std::collections::hash_map::DefaultHasher::new(); 429 | hasher_out.write(&bytes_out); 430 | 431 | assert_eq!(hasher_in.finish(), hasher_out.finish(), "Copied file {} is different than source {}", file_out, file_in); 432 | 433 | std::fs::remove_file(file_in).unwrap(); 434 | std::fs::remove_file(file_out).unwrap(); 435 | } 436 | } 437 | } 438 | } 439 | 440 | mod byte_record; 441 | mod debug; 442 | mod error; 443 | mod string_record; 444 | 445 | cfg_if::cfg_if! { 446 | if #[cfg(feature = "with_serde")] { 447 | mod deserializer; 448 | mod serializer; 449 | pub use deserializer::{DeserializeError, DeserializeErrorKind}; 450 | }} 451 | 452 | mod async_readers; 453 | mod async_writers; 454 | 455 | // pub mod cookbook; 456 | // pub mod tutorial; 457 | 458 | 459 | pub use crate::byte_record::{ByteRecord, ByteRecordIter, Position}; 460 | pub use crate::error::{ 461 | Error, ErrorKind, FromUtf8Error, IntoInnerError, Result, Utf8Error, 462 | }; 463 | pub use crate::string_record::{StringRecord, StringRecordIter}; 464 | 465 | pub use crate::async_readers::AsyncReaderBuilder; 466 | pub use crate::async_writers::AsyncWriterBuilder; 467 | 468 | cfg_if::cfg_if! { 469 | if #[cfg(feature = "tokio")] { 470 | pub use crate::async_readers::{ 471 | ardr_tokio::AsyncReader, 472 | ByteRecordsIntoStream, ByteRecordsStream, 473 | StringRecordsIntoStream, StringRecordsStream, 474 | }; 475 | pub use crate::async_writers::awtr_tokio::AsyncWriter; 476 | } else { 477 | pub use crate::async_readers::{ 478 | ardr_futures::AsyncReader, 479 | ByteRecordsIntoStream, ByteRecordsStream, 480 | StringRecordsIntoStream, StringRecordsStream, 481 | }; 482 | pub use crate::async_writers::awtr_futures::AsyncWriter; 483 | }} 484 | 485 | #[cfg(all(feature = "with_serde", not(feature = "tokio")))] 486 | pub use crate::async_readers::{ 487 | ades_futures::AsyncDeserializer, 488 | DeserializeRecordsStream, DeserializeRecordsIntoStream, 489 | DeserializeRecordsStreamPos, DeserializeRecordsIntoStreamPos, 490 | }; 491 | #[cfg(all(feature = "with_serde", not(feature = "tokio")))] 492 | pub use crate::async_writers::aser_futures::AsyncSerializer; 493 | #[cfg(all(feature = "with_serde", feature = "tokio"))] 494 | pub use crate::async_readers::{ 495 | ades_tokio::AsyncDeserializer, 496 | DeserializeRecordsStream, DeserializeRecordsIntoStream, 497 | DeserializeRecordsStreamPos, DeserializeRecordsIntoStreamPos, 498 | }; 499 | #[cfg(all(feature = "with_serde", feature = "tokio"))] 500 | pub use crate::async_writers::aser_tokio::AsyncSerializer; 501 | 502 | 503 | /// The quoting style to use when writing CSV data. 504 | #[derive(Clone, Copy, Debug)] 505 | #[non_exhaustive] 506 | pub enum QuoteStyle { 507 | /// This puts quotes around every field. Always. 508 | Always, 509 | /// This puts quotes around fields only when necessary. 510 | /// 511 | /// They are necessary when fields contain a quote, delimiter or record 512 | /// terminator. Quotes are also necessary when writing an empty record 513 | /// (which is indistinguishable from a record with one empty field). 514 | /// 515 | /// This is the default. 516 | Necessary, 517 | /// This puts quotes around all fields that are non-numeric. Namely, when 518 | /// writing a field that does not parse as a valid float or integer, then 519 | /// quotes will be used even if they aren't strictly necessary. 520 | NonNumeric, 521 | /// This *never* writes quotes, even if it would produce invalid CSV data. 522 | Never, 523 | } 524 | 525 | impl QuoteStyle { 526 | #[allow(unreachable_patterns)] 527 | fn to_core(self) -> csv_core::QuoteStyle { 528 | match self { 529 | QuoteStyle::Always => csv_core::QuoteStyle::Always, 530 | QuoteStyle::Necessary => csv_core::QuoteStyle::Necessary, 531 | QuoteStyle::NonNumeric => csv_core::QuoteStyle::NonNumeric, 532 | QuoteStyle::Never => csv_core::QuoteStyle::Never 533 | } 534 | } 535 | } 536 | 537 | impl Default for QuoteStyle { 538 | fn default() -> QuoteStyle { 539 | QuoteStyle::Necessary 540 | } 541 | } 542 | 543 | /// A record terminator. 544 | /// 545 | /// Use this to specify the record terminator while parsing CSV. The default is 546 | /// CRLF, which treats `\r`, `\n` or `\r\n` as a single record terminator. 547 | #[derive(Clone, Copy, Debug)] 548 | #[non_exhaustive] 549 | pub enum Terminator { 550 | /// Parses `\r`, `\n` or `\r\n` as a single record terminator. 551 | CRLF, 552 | /// Parses the byte given as a record terminator. 553 | Any(u8), 554 | } 555 | 556 | impl Terminator { 557 | /// Convert this to the csv_core type of the same name. 558 | #[allow(unreachable_patterns)] 559 | fn to_core(self) -> csv_core::Terminator { 560 | match self { 561 | Terminator::CRLF => csv_core::Terminator::CRLF, 562 | Terminator::Any(b) => csv_core::Terminator::Any(b) 563 | } 564 | } 565 | } 566 | 567 | impl Default for Terminator { 568 | fn default() -> Terminator { 569 | Terminator::CRLF 570 | } 571 | } 572 | 573 | /// The whitespace preservation behavior when reading CSV data. 574 | #[derive(Clone, Copy, Debug, PartialEq)] 575 | #[non_exhaustive] 576 | pub enum Trim { 577 | /// Preserves fields and headers. This is the default. 578 | None, 579 | /// Trim whitespace from headers. 580 | Headers, 581 | /// Trim whitespace from fields, but not headers. 582 | Fields, 583 | /// Trim whitespace from fields and headers. 584 | All, 585 | } 586 | 587 | impl Trim { 588 | fn should_trim_fields(&self) -> bool { 589 | self == &Trim::Fields || self == &Trim::All 590 | } 591 | 592 | fn should_trim_headers(&self) -> bool { 593 | self == &Trim::Headers || self == &Trim::All 594 | } 595 | } 596 | 597 | impl Default for Trim { 598 | fn default() -> Trim { 599 | Trim::None 600 | } 601 | } 602 | 603 | -------------------------------------------------------------------------------- /src/string_record.rs: -------------------------------------------------------------------------------- 1 | use std::fmt; 2 | use std::iter::FromIterator; 3 | use std::ops::{self, Range}; 4 | use std::result; 5 | use std::str; 6 | 7 | #[cfg(not(feature = "tokio"))] 8 | use futures::io; 9 | 10 | #[cfg(feature = "tokio")] 11 | use tokio::io; 12 | 13 | #[cfg(feature = "with_serde")] 14 | use serde::de::Deserialize; 15 | 16 | use crate::async_readers::AsyncReaderImpl; 17 | use crate::byte_record::{ByteRecord, ByteRecordIter, Position}; 18 | #[cfg(feature = "with_serde")] 19 | use crate::deserializer::deserialize_string_record; 20 | use crate::error::{Error, ErrorKind, FromUtf8Error, Result}; 21 | 22 | /// A single CSV record stored as valid UTF-8 bytes. 23 | /// 24 | /// A string record permits reading or writing CSV rows that are valid UTF-8. 25 | /// If string records are used to read CSV data that is not valid UTF-8, then 26 | /// the CSV reader will return an invalid UTF-8 error. If you do need to read 27 | /// possibly invalid UTF-8 data, then you should prefer using a 28 | /// [`ByteRecord`](struct.ByteRecord.html), 29 | /// since it makes no assumptions about UTF-8. 30 | /// 31 | /// If you are using the Serde (de)serialization APIs, then you probably never 32 | /// need to interact with a `ByteRecord` or a `StringRecord`. However, there 33 | /// are some circumstances in which you might need to use a raw record type 34 | /// while still using Serde. For example, if you need to deserialize possibly 35 | /// invalid UTF-8 fields, then you'll need to first read your record into a 36 | /// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another 37 | /// reason for using the raw record deserialization APIs is if you're using 38 | /// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`. 39 | /// 40 | /// Two `StringRecord`s are compared on the basis of their field data. Any 41 | /// position information associated with the records is ignored. 42 | #[derive(Clone, Eq)] 43 | pub struct StringRecord(ByteRecord); 44 | 45 | impl PartialEq for StringRecord { 46 | fn eq(&self, other: &StringRecord) -> bool { 47 | self.0.iter_eq(&other.0) 48 | } 49 | } 50 | 51 | impl> PartialEq> for StringRecord { 52 | fn eq(&self, other: &Vec) -> bool { 53 | self.0.iter_eq(other) 54 | } 55 | } 56 | 57 | impl<'a, T: AsRef<[u8]>> PartialEq> for &'a StringRecord { 58 | fn eq(&self, other: &Vec) -> bool { 59 | self.0.iter_eq(other) 60 | } 61 | } 62 | 63 | impl> PartialEq<[T]> for StringRecord { 64 | fn eq(&self, other: &[T]) -> bool { 65 | self.0.iter_eq(other) 66 | } 67 | } 68 | 69 | impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a StringRecord { 70 | fn eq(&self, other: &[T]) -> bool { 71 | self.0.iter_eq(other) 72 | } 73 | } 74 | 75 | impl fmt::Debug for StringRecord { 76 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 77 | let fields: Vec<&str> = self.iter().collect(); 78 | write!(f, "StringRecord({:?})", fields) 79 | } 80 | } 81 | 82 | impl Default for StringRecord { 83 | #[inline] 84 | fn default() -> StringRecord { 85 | StringRecord::new() 86 | } 87 | } 88 | 89 | impl StringRecord { 90 | /// Create a new empty `StringRecord`. 91 | /// 92 | /// Note that you may find the `StringRecord::from` constructor more 93 | /// convenient, which is provided by an impl on the `From` trait. 94 | /// 95 | /// # Example: create an empty record 96 | /// 97 | /// ``` 98 | /// use csv_async::StringRecord; 99 | /// 100 | /// let record = StringRecord::new(); 101 | /// assert_eq!(record.len(), 0); 102 | /// ``` 103 | /// 104 | /// # Example: initialize a record from a `Vec` 105 | /// 106 | /// ``` 107 | /// use csv_async::StringRecord; 108 | /// 109 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 110 | /// assert_eq!(record.len(), 3); 111 | /// ``` 112 | #[inline] 113 | pub fn new() -> StringRecord { 114 | StringRecord(ByteRecord::new()) 115 | } 116 | 117 | /// Create a new empty `StringRecord` with the given capacity. 118 | /// 119 | /// `buffer` refers to the capacity of the buffer used to store the 120 | /// actual row contents. `fields` refers to the number of fields one 121 | /// might expect to store. 122 | #[inline] 123 | pub fn with_capacity(buffer: usize, fields: usize) -> StringRecord { 124 | StringRecord(ByteRecord::with_capacity(buffer, fields)) 125 | } 126 | 127 | /// Create a new `StringRecord` from a `ByteRecord`. 128 | /// 129 | /// Note that this does UTF-8 validation. If the given `ByteRecord` does 130 | /// not contain valid UTF-8, then this returns an error. The error includes 131 | /// the UTF-8 error and the original `ByteRecord`. 132 | /// 133 | /// # Example: valid UTF-8 134 | /// 135 | /// ``` 136 | /// use std::error::Error; 137 | /// use csv_async::{ByteRecord, StringRecord}; 138 | /// 139 | /// # fn main() { example().unwrap(); } 140 | /// fn example() -> Result<(), Box> { 141 | /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]); 142 | /// let str_record = StringRecord::from_byte_record(byte_record)?; 143 | /// assert_eq!(str_record.len(), 3); 144 | /// Ok(()) 145 | /// } 146 | /// ``` 147 | /// 148 | /// # Example: invalid UTF-8 149 | /// 150 | /// ``` 151 | /// use csv_async::{ByteRecord, StringRecord}; 152 | /// 153 | /// let byte_record = ByteRecord::from(vec![ 154 | /// &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..], 155 | /// ]); 156 | /// let err = StringRecord::from_byte_record(byte_record).unwrap_err(); 157 | /// assert_eq!(err.utf8_error().field(), 1); 158 | /// assert_eq!(err.utf8_error().valid_up_to(), 3); 159 | /// ``` 160 | #[inline] 161 | pub fn from_byte_record( 162 | record: ByteRecord, 163 | ) -> result::Result { 164 | match record.validate() { 165 | Ok(()) => Ok(StringRecord(record)), 166 | Err(err) => Err(FromUtf8Error::new(record, err)), 167 | } 168 | } 169 | 170 | /// Lossily create a new `StringRecord` from a `ByteRecord`. 171 | /// 172 | /// This is like `StringRecord::from_byte_record`, except all invalid UTF-8 173 | /// sequences are replaced with the `U+FFFD REPLACEMENT CHARACTER`, which 174 | /// looks like this: �. 175 | /// 176 | /// # Example: valid UTF-8 177 | /// 178 | /// ``` 179 | /// use csv_async::{ByteRecord, StringRecord}; 180 | /// 181 | /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]); 182 | /// let str_record = StringRecord::from_byte_record_lossy(byte_record); 183 | /// assert_eq!(str_record.len(), 3); 184 | /// ``` 185 | /// 186 | /// # Example: invalid UTF-8 187 | /// 188 | /// ``` 189 | /// use csv_async::{ByteRecord, StringRecord}; 190 | /// 191 | /// let byte_record = ByteRecord::from(vec![ 192 | /// &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..], 193 | /// ]); 194 | /// let str_record = StringRecord::from_byte_record_lossy(byte_record); 195 | /// assert_eq!(&str_record[0], "quux"); 196 | /// assert_eq!(&str_record[1], "foo�bar"); 197 | /// assert_eq!(&str_record[2], "c"); 198 | /// ``` 199 | #[inline] 200 | pub fn from_byte_record_lossy(record: ByteRecord) -> StringRecord { 201 | // If the record is valid UTF-8, then take the easy path. 202 | if let Ok(()) = record.validate() { 203 | return StringRecord(record); 204 | } 205 | // TODO: We can be faster here. Not sure if it's worth it. 206 | let mut str_record = 207 | StringRecord::with_capacity(record.as_slice().len(), record.len()); 208 | for field in &record { 209 | str_record.push_field(&String::from_utf8_lossy(field)); 210 | } 211 | str_record 212 | } 213 | 214 | /// Returns an iterator over all fields in this record. 215 | /// 216 | /// # Example 217 | /// 218 | /// This example shows how to iterate over each field in a `StringRecord`. 219 | /// 220 | /// ``` 221 | /// use csv_async::StringRecord; 222 | /// 223 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 224 | /// for field in record.iter() { 225 | /// assert!(field == "a" || field == "b" || field == "c"); 226 | /// } 227 | /// ``` 228 | #[inline] 229 | pub fn iter(&self) -> StringRecordIter { 230 | self.into_iter() 231 | } 232 | 233 | /// Return the field at zero-based index `i`. 234 | /// 235 | /// If no field at index `i` exists, then this returns `None`. 236 | /// 237 | /// # Example 238 | /// 239 | /// ``` 240 | /// use csv_async::StringRecord; 241 | /// 242 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 243 | /// assert_eq!(record.get(1), Some("b")); 244 | /// assert_eq!(record.get(3), None); 245 | /// ``` 246 | #[inline] 247 | pub fn get(&self, i: usize) -> Option<&str> { 248 | self.0.get(i).map(|bytes| { 249 | debug_assert!(str::from_utf8(bytes).is_ok()); 250 | // This is safe because we guarantee that all string records 251 | // have a valid UTF-8 buffer. It's also safe because we 252 | // individually check each field for valid UTF-8. 253 | unsafe { str::from_utf8_unchecked(bytes) } 254 | }) 255 | } 256 | 257 | /// Returns true if and only if this record is empty. 258 | /// 259 | /// # Example 260 | /// 261 | /// ``` 262 | /// use csv_async::StringRecord; 263 | /// 264 | /// assert!(StringRecord::new().is_empty()); 265 | /// ``` 266 | #[inline] 267 | pub fn is_empty(&self) -> bool { 268 | self.len() == 0 269 | } 270 | 271 | /// Returns the number of fields in this record. 272 | /// 273 | /// # Example 274 | /// 275 | /// ``` 276 | /// use csv_async::StringRecord; 277 | /// 278 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 279 | /// assert_eq!(record.len(), 3); 280 | /// ``` 281 | #[inline] 282 | pub fn len(&self) -> usize { 283 | self.0.len() 284 | } 285 | 286 | /// Truncate this record to `n` fields. 287 | /// 288 | /// If `n` is greater than the number of fields in this record, then this 289 | /// has no effect. 290 | /// 291 | /// # Example 292 | /// 293 | /// ``` 294 | /// use csv_async::StringRecord; 295 | /// 296 | /// let mut record = StringRecord::from(vec!["a", "b", "c"]); 297 | /// assert_eq!(record.len(), 3); 298 | /// record.truncate(1); 299 | /// assert_eq!(record.len(), 1); 300 | /// assert_eq!(record, vec!["a"]); 301 | /// ``` 302 | #[inline] 303 | pub fn truncate(&mut self, n: usize) { 304 | self.0.truncate(n); 305 | } 306 | 307 | /// Clear this record so that it has zero fields. 308 | /// 309 | /// Note that it is not necessary to clear the record to reuse it with 310 | /// the CSV reader. 311 | /// 312 | /// # Example 313 | /// 314 | /// ``` 315 | /// use csv_async::StringRecord; 316 | /// 317 | /// let mut record = StringRecord::from(vec!["a", "b", "c"]); 318 | /// assert_eq!(record.len(), 3); 319 | /// record.clear(); 320 | /// assert_eq!(record.len(), 0); 321 | /// ``` 322 | #[inline] 323 | pub fn clear(&mut self) { 324 | self.0.clear(); 325 | } 326 | 327 | /// Trim the fields of this record so that leading and trailing whitespace 328 | /// is removed. 329 | /// 330 | /// This method uses the Unicode definition of whitespace. 331 | /// 332 | /// # Example 333 | /// 334 | /// ``` 335 | /// use csv_async::StringRecord; 336 | /// 337 | /// let mut record = StringRecord::from(vec![ 338 | /// " ", "\u{3000}\tfoo ", "bar ", "b a z", 339 | /// ]); 340 | /// record.trim(); 341 | /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]); 342 | /// ``` 343 | pub fn trim(&mut self) { 344 | let length = self.len(); 345 | if length == 0 { 346 | return; 347 | } 348 | // TODO: We could likely do this in place, but for now, we allocate. 349 | let mut trimmed = 350 | StringRecord::with_capacity(self.as_slice().len(), self.len()); 351 | trimmed.set_position(self.position().cloned()); 352 | for field in &*self { 353 | trimmed.push_field(field.trim()); 354 | } 355 | *self = trimmed; 356 | } 357 | 358 | /// Add a new field to this record. 359 | /// 360 | /// # Example 361 | /// 362 | /// ``` 363 | /// use csv_async::StringRecord; 364 | /// 365 | /// let mut record = StringRecord::new(); 366 | /// record.push_field("foo"); 367 | /// assert_eq!(&record[0], "foo"); 368 | /// ``` 369 | #[inline] 370 | pub fn push_field(&mut self, field: &str) { 371 | self.0.push_field(field.as_bytes()); 372 | } 373 | 374 | /// Return the position of this record, if available. 375 | /// 376 | /// # Example 377 | /// 378 | /// ``` 379 | /// use std::error::Error; 380 | /// use futures::stream::{self, StreamExt}; 381 | /// use csv_async::{StringRecord, AsyncReaderBuilder}; 382 | /// 383 | /// # fn main() { async_std::task::block_on(async {example().await.unwrap()}); } 384 | /// async fn example() -> Result<(), Box> { 385 | /// let mut record = StringRecord::new(); 386 | /// let mut rdr = AsyncReaderBuilder::new() 387 | /// .has_headers(false) 388 | /// .create_reader("a,b,c\nx,y,z".as_bytes() 389 | /// ); 390 | /// assert!(rdr.read_record(&mut record).await?); 391 | /// { 392 | /// let pos = record.position().expect("a record position"); 393 | /// assert_eq!(pos.byte(), 0); 394 | /// assert_eq!(pos.line(), 1); 395 | /// assert_eq!(pos.record(), 0); 396 | /// } 397 | /// 398 | /// assert!(rdr.read_record(&mut record).await?); 399 | /// { 400 | /// let pos = record.position().expect("a record position"); 401 | /// assert_eq!(pos.byte(), 6); 402 | /// assert_eq!(pos.line(), 2); 403 | /// assert_eq!(pos.record(), 1); 404 | /// } 405 | /// 406 | /// // Finish the CSV reader for good measure. 407 | /// assert!(!rdr.read_record(&mut record).await?); 408 | /// Ok(()) 409 | /// } 410 | /// ``` 411 | #[inline] 412 | pub fn position(&self) -> Option<&Position> { 413 | self.0.position() 414 | } 415 | 416 | /// Set the position of this record. 417 | /// 418 | /// # Example 419 | /// 420 | /// ``` 421 | /// use csv_async::{StringRecord, Position}; 422 | /// 423 | /// let mut record = StringRecord::from(vec!["a", "b", "c"]); 424 | /// let mut pos = Position::new(); 425 | /// pos.set_byte(100); 426 | /// pos.set_line(4); 427 | /// pos.set_record(2); 428 | /// 429 | /// record.set_position(Some(pos.clone())); 430 | /// assert_eq!(record.position(), Some(&pos)); 431 | /// ``` 432 | #[inline] 433 | pub fn set_position(&mut self, pos: Option) { 434 | self.0.set_position(pos); 435 | } 436 | 437 | /// Return the start and end position of a field in this record. 438 | /// 439 | /// If no such field exists at the given index, then return `None`. 440 | /// 441 | /// The range returned can be used with the slice returned by `as_slice`. 442 | /// Namely, the range returned is guaranteed to start and end at valid 443 | /// UTF-8 sequence boundaries. 444 | /// 445 | /// # Example 446 | /// 447 | /// ``` 448 | /// use csv_async::StringRecord; 449 | /// 450 | /// let record = StringRecord::from(vec!["foo", "quux", "z"]); 451 | /// let range = record.range(1).expect("a record range"); 452 | /// assert_eq!(&record.as_slice()[range], "quux"); 453 | /// ``` 454 | #[inline] 455 | pub fn range(&self, i: usize) -> Option> { 456 | self.0.range(i) 457 | } 458 | 459 | /// Return the entire row as a single string slice. The slice returned 460 | /// stores all fields contiguously. The boundaries of each field can be 461 | /// determined via the `range` method. 462 | /// 463 | /// # Example 464 | /// 465 | /// ``` 466 | /// use csv_async::StringRecord; 467 | /// 468 | /// let record = StringRecord::from(vec!["foo", "quux", "z"]); 469 | /// assert_eq!(record.as_slice(), "fooquuxz"); 470 | /// ``` 471 | #[inline] 472 | pub fn as_slice(&self) -> &str { 473 | debug_assert!(str::from_utf8(self.0.as_slice()).is_ok()); 474 | // This is safe because we guarantee that each field is valid UTF-8. 475 | // If each field is valid UTF-8, then the entire buffer (up to the end 476 | // of the last field) must also be valid UTF-8. 477 | unsafe { str::from_utf8_unchecked(self.0.as_slice()) } 478 | } 479 | 480 | /// Return a reference to this record's raw 481 | /// [`ByteRecord`](struct.ByteRecord.html). 482 | /// 483 | /// # Example 484 | /// 485 | /// ``` 486 | /// use csv_async::StringRecord; 487 | /// 488 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 489 | /// let byte_record = str_record.as_byte_record(); 490 | /// assert_eq!(&byte_record[2], b"c"); 491 | /// ``` 492 | #[inline] 493 | pub fn as_byte_record(&self) -> &ByteRecord { 494 | &self.0 495 | } 496 | 497 | /// Convert this `StringRecord` into a 498 | /// [`ByteRecord`](struct.ByteRecord.html). 499 | /// 500 | /// # Example 501 | /// 502 | /// ``` 503 | /// use csv_async::StringRecord; 504 | /// 505 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 506 | /// let byte_record = str_record.into_byte_record(); 507 | /// assert_eq!(&byte_record[2], b"c"); 508 | /// ``` 509 | /// 510 | /// Note that this can also be achieved using the `From` impl: 511 | /// 512 | /// ``` 513 | /// use csv_async::{ByteRecord, StringRecord}; 514 | /// 515 | /// // Using ByteRecord::from... 516 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 517 | /// assert_eq!(ByteRecord::from(str_record).len(), 3); 518 | /// 519 | /// // Using StringRecord::into... 520 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 521 | /// let byte_record: ByteRecord = str_record.into(); 522 | /// assert_eq!(byte_record.len(), 3); 523 | /// ``` 524 | #[inline] 525 | pub fn into_byte_record(self) -> ByteRecord { 526 | self.0 527 | } 528 | 529 | /// Deserialize this record. 530 | /// 531 | /// The `D` type parameter refers to the type that this record should be 532 | /// deserialized into. The `'de` lifetime refers to the lifetime of the 533 | /// `StringRecord`. The `'de` lifetime permits deserializing into structs 534 | /// that borrow field data from this record. 535 | /// 536 | /// An optional `headers` parameter permits deserializing into a struct 537 | /// based on its field names (corresponding to header values) rather than 538 | /// the order in which the fields are defined. 539 | /// 540 | /// # Example: without headers 541 | /// 542 | /// This shows how to deserialize a single row into a struct based on the 543 | /// order in which fields occur. This example also shows how to borrow 544 | /// fields from the `StringRecord`, which results in zero allocation 545 | /// deserialization. 546 | /// 547 | /// ``` 548 | /// use std::error::Error; 549 | /// 550 | /// use csv_async::StringRecord; 551 | /// use serde::Deserialize; 552 | /// 553 | /// #[derive(Deserialize)] 554 | /// struct Row<'a> { 555 | /// city: &'a str, 556 | /// country: &'a str, 557 | /// population: u64, 558 | /// } 559 | /// 560 | /// # fn main() { example().unwrap() } 561 | /// fn example() -> Result<(), Box> { 562 | /// let record = StringRecord::from(vec![ 563 | /// "Boston", "United States", "4628910", 564 | /// ]); 565 | /// 566 | /// let row: Row = record.deserialize(None)?; 567 | /// assert_eq!(row.city, "Boston"); 568 | /// assert_eq!(row.country, "United States"); 569 | /// assert_eq!(row.population, 4628910); 570 | /// Ok(()) 571 | /// } 572 | /// ``` 573 | /// 574 | /// # Example: with headers 575 | /// 576 | /// This example is like the previous one, but shows how to deserialize 577 | /// into a struct based on the struct's field names. For this to work, 578 | /// you must provide a header row. 579 | /// 580 | /// This example also shows that you can deserialize into owned data 581 | /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`). 582 | /// 583 | /// ``` 584 | /// use std::error::Error; 585 | /// 586 | /// use csv_async::StringRecord; 587 | /// use serde::Deserialize; 588 | /// 589 | /// #[derive(Deserialize)] 590 | /// struct Row { 591 | /// city: String, 592 | /// country: String, 593 | /// population: u64, 594 | /// } 595 | /// 596 | /// # fn main() { example().unwrap() } 597 | /// fn example() -> Result<(), Box> { 598 | /// // Notice that the fields are not in the same order 599 | /// // as the fields in the struct! 600 | /// let header = StringRecord::from(vec![ 601 | /// "country", "city", "population", 602 | /// ]); 603 | /// let record = StringRecord::from(vec![ 604 | /// "United States", "Boston", "4628910", 605 | /// ]); 606 | /// 607 | /// let row: Row = record.deserialize(Some(&header))?; 608 | /// assert_eq!(row.city, "Boston"); 609 | /// assert_eq!(row.country, "United States"); 610 | /// assert_eq!(row.population, 4628910); 611 | /// Ok(()) 612 | /// } 613 | /// ``` 614 | #[cfg(feature = "with_serde")] 615 | pub fn deserialize<'de, D: Deserialize<'de>>( 616 | &'de self, 617 | headers: Option<&'de StringRecord>, 618 | ) -> Result { 619 | deserialize_string_record(self, headers) 620 | } 621 | 622 | /// A safe function for reading CSV data into a `StringRecord`. 623 | /// 624 | /// This relies on the internal representation of `StringRecord`. 625 | #[inline(always)] 626 | pub(crate) async fn read( 627 | &mut self, 628 | rdr: &mut AsyncReaderImpl, 629 | ) -> Result { 630 | // SAFETY: This code is critical to upholding the safety of other code 631 | // blocks in this module. Namely, after calling `read_byte_record`, 632 | // it is possible for `record` to contain invalid UTF-8. We check for 633 | // this in the `validate` method, and if it does have invalid UTF-8, we 634 | // clear the record. (It is bad for `record` to contain invalid UTF-8 635 | // because other accessor methods, like `get`, assume that every field 636 | // is valid UTF-8.) 637 | let pos = rdr.position().clone(); 638 | let read_res = rdr.read_byte_record(&mut self.0).await; 639 | let utf8_res = match self.0.validate() { 640 | Ok(()) => Ok(()), 641 | Err(err) => { 642 | // If this record isn't valid UTF-8, then completely wipe it. 643 | self.0.clear(); 644 | Err(err) 645 | } 646 | }; 647 | match (read_res, utf8_res) { 648 | (Err(err), _) => Err(err), 649 | (Ok(_), Err(err)) => { 650 | Err(Error::new(ErrorKind::Utf8 { pos: Some(pos), err })) 651 | } 652 | (Ok(eof), Ok(())) => Ok(eof), 653 | } 654 | } 655 | } 656 | 657 | impl ops::Index for StringRecord { 658 | type Output = str; 659 | #[inline] 660 | fn index(&self, i: usize) -> &str { 661 | self.get(i).unwrap() 662 | } 663 | } 664 | 665 | impl> From> for StringRecord { 666 | #[inline] 667 | fn from(xs: Vec) -> StringRecord { 668 | StringRecord::from_iter(xs) 669 | } 670 | } 671 | 672 | impl<'a, T: AsRef> From<&'a [T]> for StringRecord { 673 | #[inline] 674 | fn from(xs: &'a [T]) -> StringRecord { 675 | StringRecord::from_iter(xs) 676 | } 677 | } 678 | 679 | impl> FromIterator for StringRecord { 680 | #[inline] 681 | fn from_iter>(iter: I) -> StringRecord { 682 | let mut record = StringRecord::new(); 683 | record.extend(iter); 684 | record 685 | } 686 | } 687 | 688 | impl> Extend for StringRecord { 689 | #[inline] 690 | fn extend>(&mut self, iter: I) { 691 | for x in iter { 692 | self.push_field(x.as_ref()); 693 | } 694 | } 695 | } 696 | 697 | impl<'a> IntoIterator for &'a StringRecord { 698 | type IntoIter = StringRecordIter<'a>; 699 | type Item = &'a str; 700 | 701 | #[inline] 702 | fn into_iter(self) -> StringRecordIter<'a> { 703 | StringRecordIter(self.0.iter()) 704 | } 705 | } 706 | 707 | /// An iterator over the fields in a string record. 708 | /// 709 | /// The `'r` lifetime variable refers to the lifetime of the `StringRecord` 710 | /// that is being iterated over. 711 | #[derive(Clone)] 712 | pub struct StringRecordIter<'r>(ByteRecordIter<'r>); 713 | 714 | impl<'r> Iterator for StringRecordIter<'r> { 715 | type Item = &'r str; 716 | 717 | #[inline] 718 | fn next(&mut self) -> Option<&'r str> { 719 | self.0.next().map(|bytes| { 720 | debug_assert!(str::from_utf8(bytes).is_ok()); 721 | // See StringRecord::get for safety argument. 722 | unsafe { str::from_utf8_unchecked(bytes) } 723 | }) 724 | } 725 | 726 | #[inline] 727 | fn size_hint(&self) -> (usize, Option) { 728 | self.0.size_hint() 729 | } 730 | 731 | #[inline] 732 | fn count(self) -> usize { 733 | self.0.len() 734 | } 735 | } 736 | 737 | impl<'r> DoubleEndedIterator for StringRecordIter<'r> { 738 | #[inline] 739 | fn next_back(&mut self) -> Option<&'r str> { 740 | self.0.next_back().map(|bytes| { 741 | debug_assert!(str::from_utf8(bytes).is_ok()); 742 | // See StringRecord::get for safety argument. 743 | unsafe { str::from_utf8_unchecked(bytes) } 744 | }) 745 | } 746 | } 747 | 748 | #[cfg(test)] 749 | mod tests { 750 | use crate::string_record::StringRecord; 751 | 752 | #[test] 753 | fn trim_front() { 754 | let mut rec = StringRecord::from(vec![" abc"]); 755 | rec.trim(); 756 | assert_eq!(rec.get(0), Some("abc")); 757 | 758 | let mut rec = StringRecord::from(vec![" abc", " xyz"]); 759 | rec.trim(); 760 | assert_eq!(rec.get(0), Some("abc")); 761 | assert_eq!(rec.get(1), Some("xyz")); 762 | } 763 | 764 | #[test] 765 | fn trim_back() { 766 | let mut rec = StringRecord::from(vec!["abc "]); 767 | rec.trim(); 768 | assert_eq!(rec.get(0), Some("abc")); 769 | 770 | let mut rec = StringRecord::from(vec!["abc ", "xyz "]); 771 | rec.trim(); 772 | assert_eq!(rec.get(0), Some("abc")); 773 | assert_eq!(rec.get(1), Some("xyz")); 774 | } 775 | 776 | #[test] 777 | fn trim_both() { 778 | let mut rec = StringRecord::from(vec![" abc "]); 779 | rec.trim(); 780 | assert_eq!(rec.get(0), Some("abc")); 781 | 782 | let mut rec = StringRecord::from(vec![" abc ", " xyz "]); 783 | rec.trim(); 784 | assert_eq!(rec.get(0), Some("abc")); 785 | assert_eq!(rec.get(1), Some("xyz")); 786 | } 787 | 788 | #[test] 789 | fn trim_does_not_panic_on_empty_records_1() { 790 | let mut rec = StringRecord::from(vec![""]); 791 | rec.trim(); 792 | assert_eq!(rec.get(0), Some("")); 793 | } 794 | 795 | #[test] 796 | fn trim_does_not_panic_on_empty_records_2() { 797 | let mut rec = StringRecord::from(vec!["", ""]); 798 | rec.trim(); 799 | assert_eq!(rec.get(0), Some("")); 800 | assert_eq!(rec.get(1), Some("")); 801 | } 802 | 803 | #[test] 804 | fn trim_does_not_panic_on_empty_records_3() { 805 | let mut rec = StringRecord::new(); 806 | rec.trim(); 807 | assert_eq!(rec.as_slice().len(), 0); 808 | } 809 | 810 | #[test] 811 | fn trim_whitespace_only() { 812 | let mut rec = StringRecord::from(vec![ 813 | "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{0085}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}", 814 | ]); 815 | rec.trim(); 816 | assert_eq!(rec.get(0), Some("")); 817 | } 818 | 819 | // Check that record equality respects field boundaries. 820 | // 821 | // Regression test for #138. 822 | #[test] 823 | fn eq_field_boundaries() { 824 | let test1 = StringRecord::from(vec!["12", "34"]); 825 | let test2 = StringRecord::from(vec!["123", "4"]); 826 | 827 | assert_ne!(test1, test2); 828 | } 829 | 830 | // Check that record equality respects number of fields. 831 | // 832 | // Regression test for #138. 833 | #[test] 834 | fn eq_record_len() { 835 | let test1 = StringRecord::from(vec!["12", "34", "56"]); 836 | let test2 = StringRecord::from(vec!["12", "34"]); 837 | assert_ne!(test1, test2); 838 | } 839 | } 840 | -------------------------------------------------------------------------------- /tests/data/cities_incomplete_row.csv: -------------------------------------------------------------------------------- 1 | city,region,country,population 2 | Southborough,MA,United States,9686 3 | Northbridge,MA 4 | Marlborough,MA,United States,38334 5 | Boston,MA,United States,152227 6 | Springfield,MO,United States,150443 7 | Trenton,NJ,United States,14976 8 | Plymouth,NH,United States,42605 9 | -------------------------------------------------------------------------------- /tests/data/cities_non_int.csv: -------------------------------------------------------------------------------- 1 | city,region,country,population 2 | Southborough,MA,United States,9686 3 | Northbridge,MA,United States,14061 4 | Marlborough,MA,United States,xxxxx 5 | Boston,MA,United States,152227 6 | Springfield,MO,United States,150443 7 | Trenton,NJ,United States,14976 8 | Plymouth,NH,United States,42605 9 | -------------------------------------------------------------------------------- /tests/data/cities_ok.csv: -------------------------------------------------------------------------------- 1 | city,region,country,population 2 | Southborough,MA,United States,9686 3 | Northbridge,MA,United States,14061 4 | Marlborough,MA,United States,38334 5 | Boston,MA,United States,152227 6 | Springfield,MO,United States,150443 7 | Trenton,NJ,United States,14976 8 | Plymouth,NH,United States,42605 9 | -------------------------------------------------------------------------------- /tests/data/cities_pl_win1250.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gwierzchowski/csv-async/1e4ae466ab1b90693f3284ce8ebf5305b69294fc/tests/data/cities_pl_win1250.csv -------------------------------------------------------------------------------- /tests/data/invalid_date_time.csv: -------------------------------------------------------------------------------- 1 | # Thanks to https://github.com/jsimpson-gro 2 | field_1,field_2 3 | 2016-07-08T09:10:11,2 4 | abc,2 5 | 2016-07-38T09:10:11,2 6 | -------------------------------------------------------------------------------- /tests/helpers/helpers_async_std.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | pub use async_std::test; 3 | pub use async_std::fs::File; 4 | pub use futures::stream::StreamExt; 5 | 6 | pub type Reader = csv_async::AsyncReader; 7 | #[cfg(feature = "with_serde")] 8 | pub type Deserializer = csv_async::AsyncDeserializer; 9 | 10 | pub async fn get_reader(path: &str) -> async_std::io::Result { 11 | Ok(csv_async::AsyncReader::from_reader(File::open(path).await?)) 12 | } 13 | 14 | #[cfg(feature = "with_serde")] 15 | pub async fn get_deserializer(path: &str) -> async_std::io::Result { 16 | Ok( 17 | csv_async::AsyncReaderBuilder::new() 18 | .create_deserializer(File::open(path).await?) 19 | ) 20 | } 21 | -------------------------------------------------------------------------------- /tests/helpers/helpers_tokio.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | pub use tokio1 as tokio; 3 | pub use tokio::test; 4 | pub use tokio::fs::File; 5 | pub use tokio_stream::StreamExt; 6 | 7 | pub type Reader = csv_async::AsyncReader; 8 | #[cfg(feature = "with_serde")] 9 | pub type Deserializer = csv_async::AsyncDeserializer; 10 | 11 | pub async fn get_reader(path: &str) -> async_std::io::Result { 12 | Ok( 13 | csv_async::AsyncReader::from_reader( 14 | File::open(path).await? 15 | )) 16 | } 17 | 18 | #[cfg(feature = "with_serde")] 19 | pub async fn get_deserializer(path: &str) -> async_std::io::Result { 20 | Ok( 21 | csv_async::AsyncReaderBuilder::new() 22 | .create_deserializer(File::open(path).await?) 23 | ) 24 | } 25 | -------------------------------------------------------------------------------- /tests/helpers/mod.rs: -------------------------------------------------------------------------------- 1 | #[cfg(not(feature = "tokio"))] 2 | mod helpers_async_std; 3 | #[cfg(feature = "tokio")] 4 | mod helpers_tokio; 5 | 6 | #[cfg(not(feature = "tokio"))] 7 | pub use helpers_async_std::*; 8 | #[cfg(feature = "tokio")] 9 | pub use helpers_tokio::*; 10 | 11 | #[allow(dead_code)] 12 | pub fn custom_error_message(err: &csv_async::Error) -> String { 13 | match err.kind() { 14 | csv_async::ErrorKind::Io(e) => { 15 | format!("IO Error: {e}") 16 | }, 17 | csv_async::ErrorKind::Seek => { 18 | String::from("Seek error") 19 | }, 20 | csv_async::ErrorKind::UnequalLengths { pos, expected_len, len } => { 21 | format!("Unequal lengths: position = {pos:?}, expected_len = {expected_len}, len = {len}") 22 | }, 23 | csv_async::ErrorKind::Utf8 { pos, err } => { 24 | format!("Invalid UTF8: position = {pos:?}, err = {err}") 25 | }, 26 | #[cfg(feature = "with_serde")] 27 | csv_async::ErrorKind::Serialize(msg) => { 28 | format!("Serialize error: {msg}") 29 | }, 30 | #[cfg(feature = "with_serde")] 31 | csv_async::ErrorKind::Deserialize { pos, err } => { 32 | let field = err.field(); 33 | let msg = match err.kind() { 34 | csv_async::DeserializeErrorKind::InvalidUtf8(e) => { 35 | format!("Invalid UTF8: {e}") 36 | }, 37 | csv_async::DeserializeErrorKind::Message(msg) => msg.clone(), 38 | csv_async::DeserializeErrorKind::ParseBool(e) => { 39 | format!("Error parsing boolean: {e}") 40 | } 41 | csv_async::DeserializeErrorKind::ParseFloat(e) => { 42 | format!("Error parsing float: {e}") 43 | } 44 | csv_async::DeserializeErrorKind::ParseInt(e) => { 45 | format!("Error parsing integer: {e}") 46 | } 47 | csv_async::DeserializeErrorKind::UnexpectedEndOfRow => { 48 | String::from("Row has too few fields") 49 | } 50 | csv_async::DeserializeErrorKind::Unsupported(e) => { 51 | format!("Unsupported type: {e}") 52 | } 53 | }; 54 | format!("Deserialize error: position = {pos:?}, field = {field:?}: {msg}") 55 | }, 56 | _ => String::from("Other error") 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /tests/read_records.rs: -------------------------------------------------------------------------------- 1 | mod helpers; 2 | use helpers::*; 3 | 4 | #[helpers::test] 5 | async fn read_records_ok() { 6 | let mut rdr = get_reader("tests/data/cities_ok.csv").await.expect("Data file found"); 7 | let mut max_population = 0; 8 | let mut max_record = None; 9 | let mut records = rdr.records(); 10 | while let Some(record) = records.next().await { 11 | let record = record.expect("Record read correctly"); 12 | if let Some(population) = record.get(3) { 13 | let population = population.parse::() 14 | .expect("Column 4 parsed as integer"); 15 | if population > max_population { 16 | max_population = population; 17 | max_record = Some(record.clone()); 18 | } 19 | } 20 | } 21 | assert_eq!(max_record.expect("Max found").get(0), Some("Boston")); 22 | } 23 | 24 | #[helpers::test] 25 | async fn read_records_incomplete_row() { 26 | let mut rdr = get_reader("tests/data/cities_incomplete_row.csv").await.expect("Data file found"); 27 | let mut read_correctly = 0; 28 | let mut read_errors = Vec::new(); 29 | let mut records = rdr.records(); 30 | while let Some(record) = records.next().await { 31 | match record { 32 | Ok(_) => read_correctly += 1, 33 | Err(e) => read_errors.push(e) 34 | } 35 | } 36 | assert_eq!(read_correctly, 6); 37 | assert_eq!(read_errors.len(), 1); 38 | 39 | // For file with unix newlines. 40 | let (line, byte) = if cfg!(windows) { 41 | (2, 67) 42 | } else { 43 | (3, 66) // correct value 44 | }; 45 | assert_eq!( 46 | read_errors[0].to_string(), 47 | format!("CSV error: record 2 (line: {line}, byte: {byte}): found record with 2 fields, but the previous record has 4 fields") 48 | ); 49 | assert_eq!( 50 | custom_error_message(&read_errors[0]), 51 | format!("Unequal lengths: position = Some(Position {{ byte: {byte}, line: {line}, record: 2 }}), expected_len = 4, len = 2") 52 | ); 53 | } 54 | 55 | #[helpers::test] 56 | async fn read_records_non_utf8() { 57 | let mut rdr = get_reader("tests/data/cities_pl_win1250.csv").await.expect("Data file found"); 58 | let mut read_correctly = 0; 59 | let mut read_errors = Vec::new(); 60 | let mut records = rdr.records(); 61 | while let Some(record) = records.next().await { 62 | match record { 63 | Ok(_) => read_correctly += 1, 64 | Err(e) => read_errors.push(e) 65 | } 66 | } 67 | assert_eq!(read_correctly, 2); 68 | assert_eq!(read_errors.len(), 5); 69 | assert_eq!( 70 | read_errors[0].to_string().as_str(), 71 | "CSV parse error: record 0 (line 1, field: 1, byte: 0): invalid utf-8: invalid UTF-8 in field 1 near byte index 3" 72 | ); 73 | assert_eq!( 74 | custom_error_message(&read_errors[0]).as_str(), 75 | "Invalid UTF8: position = Some(Position { byte: 0, line: 1, record: 0 }), err = invalid utf-8: invalid UTF-8 in field 1 near byte index 3" 76 | ); 77 | } -------------------------------------------------------------------------------- /tests/read_serde.rs: -------------------------------------------------------------------------------- 1 | #![cfg(feature = "with_serde")] 2 | 3 | use serde::Deserialize; 4 | 5 | mod helpers; 6 | use helpers::*; 7 | 8 | #[allow(dead_code)] 9 | #[derive(Deserialize, Clone)] 10 | struct City { 11 | city: String, 12 | region: String, 13 | country: String, 14 | population: usize, 15 | } 16 | 17 | #[helpers::test] 18 | async fn read_serde_ok() { 19 | let des = get_deserializer("tests/data/cities_ok.csv").await.expect("Data file found"); 20 | let mut max_population = 0; 21 | let mut max_record = None; 22 | let mut records = des.into_deserialize::(); 23 | while let Some(record) = records.next().await { 24 | let record = record.expect("Record read correctly"); 25 | if record.population > max_population { 26 | max_population = record.population; 27 | max_record = Some(record.clone()); 28 | } 29 | } 30 | assert_eq!(&max_record.expect("Max found").city, "Boston"); 31 | } 32 | 33 | #[helpers::test] 34 | async fn read_serde_incomplete_row() { 35 | let des = get_deserializer("tests/data/cities_incomplete_row.csv").await.expect("Data file found"); 36 | let mut read_correctly = 0; 37 | let mut read_errors = Vec::new(); 38 | let mut records = des.into_deserialize::(); 39 | while let Some(record) = records.next().await { 40 | match record { 41 | Ok(_) => read_correctly += 1, 42 | Err(e) => read_errors.push(e) 43 | } 44 | } 45 | assert_eq!(read_correctly, 6); 46 | assert_eq!(read_errors.len(), 1); 47 | 48 | // For file with unix newlines. 49 | let (line, byte) = if cfg!(windows) { 50 | (2, 67) 51 | } else { 52 | (3, 66) // correct value 53 | }; 54 | assert_eq!( 55 | read_errors[0].to_string(), 56 | format!("CSV error: record 2 (line: {line}, byte: {byte}): found record with 2 fields, but the previous record has 4 fields") 57 | ); 58 | assert_eq!( 59 | custom_error_message(&read_errors[0]), 60 | format!("Unequal lengths: position = Some(Position {{ byte: {byte}, line: {line}, record: 2 }}), expected_len = 4, len = 2") 61 | ); 62 | } 63 | 64 | #[helpers::test] 65 | async fn read_serde_non_utf8() { 66 | let des = get_deserializer("tests/data/cities_pl_win1250.csv").await.expect("Data file found"); 67 | let mut read_correctly = 0; 68 | let mut read_errors = Vec::new(); 69 | let mut records = des.into_deserialize::(); 70 | while let Some(record) = records.next().await { 71 | match record { 72 | Ok(_) => read_correctly += 1, 73 | Err(e) => read_errors.push(e) 74 | } 75 | } 76 | assert_eq!(read_correctly, 0); 77 | assert_eq!(read_errors.len(), 7); 78 | 79 | // For file with unix newlines. 80 | let line = if cfg!(windows) { 1 } else { 2 }; 81 | assert_eq!( 82 | read_errors[0].to_string().as_str(), 83 | format!("CSV parse error: record 1 (line {line}, field: 1, byte: 29): invalid utf-8: invalid UTF-8 in field 1 near byte index 3") 84 | ); 85 | assert_eq!( 86 | custom_error_message(&read_errors[0]).as_str(), 87 | format!("Invalid UTF8: position = Some(Position {{ byte: 29, line: {line}, record: 1 }}), err = invalid utf-8: invalid UTF-8 in field 1 near byte index 3") 88 | ); 89 | } 90 | 91 | #[helpers::test] 92 | async fn read_serde_non_int() { 93 | let des = get_deserializer("tests/data/cities_non_int.csv").await.expect("Data file found"); 94 | let mut read_correctly = 0; 95 | let mut read_errors = Vec::new(); 96 | let mut records = des.into_deserialize::(); 97 | while let Some(record) = records.next().await { 98 | match record { 99 | Ok(_) => read_correctly += 1, 100 | Err(e) => read_errors.push(e) 101 | } 102 | } 103 | assert_eq!(read_correctly, 6); 104 | assert_eq!(read_errors.len(), 1); 105 | 106 | // For file with unix newlines. 107 | let (line, byte) = if cfg!(windows) { 108 | (3, 103) 109 | } else { 110 | (4, 101) // correct value 111 | }; 112 | assert_eq!( 113 | read_errors[0].to_string().as_str(), 114 | format!("CSV deserialize error: record 3 (line {line}, byte: {byte}): field 4: invalid digit found in string") 115 | ); 116 | assert_eq!(custom_error_message( 117 | &read_errors[0]).as_str(), 118 | format!("Deserialize error: position = Some(Position {{ byte: {byte}, line: {line}, record: 3 }}), field = Some(3): Error parsing integer: invalid digit found in string") 119 | ); 120 | } 121 | 122 | #[derive(Deserialize)] 123 | struct DateRow { 124 | field_1: chrono::NaiveDateTime, 125 | field_2: i32, 126 | } 127 | 128 | #[helpers::test] 129 | async fn read_serde_invalid_date_time() { 130 | let des = csv_async::AsyncReaderBuilder::new() 131 | .comment(Some(b'#')) 132 | .create_deserializer( 133 | File::open("tests/data/invalid_date_time.csv").await 134 | .expect("Data file found") 135 | ); 136 | let mut read_correctly = 0; 137 | let mut read_errors = Vec::new(); 138 | let mut records = des.into_deserialize::(); 139 | while let Some(record) = records.next().await { 140 | match record { 141 | Ok(rec) => { 142 | let expected_dt = chrono::NaiveDate::from_ymd_opt(2016, 7, 8).unwrap().and_hms_opt(9, 10, 11).unwrap(); 143 | assert_eq!(rec.field_1, expected_dt); 144 | assert_eq!(rec.field_2, 2); 145 | read_correctly += 1; 146 | } 147 | Err(e) => read_errors.push(e) 148 | } 149 | } 150 | assert_eq!(read_correctly, 1); 151 | assert_eq!(read_errors.len(), 2); 152 | 153 | // For file with unix newlines. 154 | let (line, byte) = if cfg!(windows) { 155 | (3, 84) 156 | } else { 157 | (4, 82) // correct value 158 | }; 159 | assert_eq!( 160 | read_errors[0].to_string().as_str(), 161 | format!("CSV deserialize error: record 2 (line {line}, byte: {byte}): field 1: input contains invalid characters") 162 | ); 163 | assert_eq!(custom_error_message( 164 | &read_errors[0]).as_str(), 165 | format!("Deserialize error: position = Some(Position {{ byte: {byte}, line: {line}, record: 2 }}), field = Some(0): input contains invalid characters") 166 | ); 167 | 168 | // For file with unix newlines. 169 | let (line, byte) = if cfg!(windows) { 170 | (4, 91) 171 | } else { 172 | (5, 88) // correct value 173 | }; 174 | assert_eq!( 175 | read_errors[1].to_string().as_str(), 176 | format!("CSV deserialize error: record 3 (line {line}, byte: {byte}): field 1: input is out of range") 177 | ); 178 | assert_eq!(custom_error_message( 179 | &read_errors[1]).as_str(), 180 | format!("Deserialize error: position = Some(Position {{ byte: {byte}, line: {line}, record: 3 }}), field = Some(0): input is out of range") 181 | ); 182 | } 183 | --------------------------------------------------------------------------------