├── .github
    ├── FUNDING.yml
    └── workflows
    │   └── ci.yml
├── rustfmt.toml
├── examples
    ├── data
    │   ├── uspop-latin1.csv
    │   ├── strange.csv
    │   ├── smallpop-no-headers.csv
    │   ├── smallpop.csv
    │   ├── smallpop-colon.csv
    │   ├── uspop.csv
    │   └── uspop-null.csv
    ├── tutorial-error-01.rs
    ├── tutorial-read-headers-01.rs
    ├── cookbook-read-colon.rs
    ├── cookbook-read-no-headers.rs
    ├── tutorial-error-04.rs
    ├── tutorial-error-02.rs
    ├── tutorial-read-serde-03.rs
    ├── cookbook-read-basic.rs
    ├── tutorial-read-headers-02.rs
    ├── tutorial-perf-alloc-01.rs
    ├── tutorial-read-delimiter-01.rs
    ├── tutorial-perf-alloc-02.rs
    ├── tutorial-setup-01.rs
    ├── tutorial-perf-alloc-03.rs
    ├── cookbook-write-basic.rs
    ├── tutorial-error-03.rs
    ├── tutorial-read-serde-invalid-01.rs
    ├── tutorial-read-serde-invalid-02.rs
    ├── tutorial-read-serde-02.rs
    ├── tutorial-read-01.rs
    ├── tutorial-write-delimiter-01.rs
    ├── cookbook-read-serde.rs
    ├── tutorial-perf-serde-01.rs
    ├── tutorial-pipeline-search-02.rs
    ├── cookbook-write-serde.rs
    ├── tutorial-write-01.rs
    ├── tutorial-read-serde-04.rs
    ├── tutorial-perf-serde-02.rs
    ├── tutorial-read-serde-01.rs
    ├── tutorial-perf-serde-03.rs
    ├── tutorial-write-02.rs
    ├── tutorial-write-serde-01.rs
    ├── tutorial-pipeline-search-01.rs
    ├── tutorial-write-serde-02.rs
    ├── tutorial-pipeline-pop-01.rs
    └── tutorial-perf-core-01.rs
├── .gitignore
├── COPYING
├── csv-core
    ├── COPYING
    ├── Cargo.toml
    ├── LICENSE-MIT
    ├── UNLICENSE
    ├── benches
    │   └── bench.rs
    ├── README.md
    └── src
    │   └── lib.rs
├── csv-index
    ├── COPYING
    ├── Cargo.toml
    ├── LICENSE-MIT
    ├── UNLICENSE
    ├── README.md
    └── src
    │   ├── lib.rs
    │   └── simple.rs
├── ci
    ├── script.sh
    └── check-copy
├── Cargo.toml
├── LICENSE-MIT
├── scripts
    └── copy-examples
├── UNLICENSE
├── ISSUE_TEMPLATE.md
├── src
    ├── debug.rs
    ├── cookbook.rs
    ├── lib.rs
    ├── error.rs
    └── string_record.rs
├── README.md
├── benches
    └── bench.rs
└── tests
    └── tests.rs


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: [BurntSushi]
2 | 


--------------------------------------------------------------------------------
/rustfmt.toml:
--------------------------------------------------------------------------------
1 | max_width = 79
2 | use_small_heuristics = "max"
3 | 


--------------------------------------------------------------------------------
/examples/data/uspop-latin1.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BurntSushi/rust-csv/HEAD/examples/data/uspop-latin1.csv


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .*.swp
 2 | doc
 3 | tags
 4 | examples/ss10pusa.csv
 5 | build
 6 | target
 7 | Cargo.lock
 8 | scratch*
 9 | bench_large/huge
10 | 


--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | This project is dual-licensed under the Unlicense and MIT licenses.
2 | 
3 | You may use this code under the terms of either license.
4 | 


--------------------------------------------------------------------------------
/csv-core/COPYING:
--------------------------------------------------------------------------------
1 | This project is dual-licensed under the Unlicense and MIT licenses.
2 | 
3 | You may use this code under the terms of either license.
4 | 


--------------------------------------------------------------------------------
/csv-index/COPYING:
--------------------------------------------------------------------------------
1 | This project is dual-licensed under the Unlicense and MIT licenses.
2 | 
3 | You may use this code under the terms of either license.
4 | 


--------------------------------------------------------------------------------
/examples/data/strange.csv:
--------------------------------------------------------------------------------
1 | "\"Hacksaw\" Jim Duggan";1987
2 | "Bret \"Hit Man\" Hart";1984
3 | # We're not sure when Rafael started, so omit the year.
4 | Rafael Halperin
5 | "\"Big Cat\" Ernie Ladd";1964
6 | "\"Macho Man\" Randy Savage";1985
7 | "Jake \"The Snake\" Roberts";1986
8 | 


--------------------------------------------------------------------------------
/examples/tutorial-error-01.rs:
--------------------------------------------------------------------------------
 1 | use std::io;
 2 | 
 3 | fn main() {
 4 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 5 |     for result in rdr.records() {
 6 |         let record = result.expect("a CSV record");
 7 |         println!("{:?}", record);
 8 |     }
 9 | }
10 | 


--------------------------------------------------------------------------------
/examples/data/smallpop-no-headers.csv:
--------------------------------------------------------------------------------
 1 | Southborough,MA,United States,9686
 2 | Northbridge,MA,United States,14061
 3 | Westborough,MA,United States,29313
 4 | Marlborough,MA,United States,38334
 5 | Springfield,MA,United States,152227
 6 | Springfield,MO,United States,150443
 7 | Springfield,NJ,United States,14976
 8 | Springfield,OH,United States,64325
 9 | Springfield,OR,United States,56032
10 | Concord,NH,United States,42605
11 | 


--------------------------------------------------------------------------------
/examples/data/smallpop.csv:
--------------------------------------------------------------------------------
 1 | city,region,country,population
 2 | Southborough,MA,United States,9686
 3 | Northbridge,MA,United States,14061
 4 | Westborough,MA,United States,29313
 5 | Marlborough,MA,United States,38334
 6 | Springfield,MA,United States,152227
 7 | Springfield,MO,United States,150443
 8 | Springfield,NJ,United States,14976
 9 | Springfield,OH,United States,64325
10 | Springfield,OR,United States,56032
11 | Concord,NH,United States,42605
12 | 


--------------------------------------------------------------------------------
/examples/data/smallpop-colon.csv:
--------------------------------------------------------------------------------
 1 | city:region:country:population
 2 | Southborough:MA:United States:9686
 3 | Northbridge:MA:United States:14061
 4 | Westborough:MA:United States:29313
 5 | Marlborough:MA:United States:38334
 6 | Springfield:MA:United States:152227
 7 | Springfield:MO:United States:150443
 8 | Springfield:NJ:United States:14976
 9 | Springfield:OH:United States:64325
10 | Springfield:OR:United States:56032
11 | Concord:NH:United States:42605
12 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-headers-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let mut rdr =
 5 |         csv::ReaderBuilder::new().has_headers(false).from_reader(io::stdin());
 6 |     for result in rdr.records() {
 7 |         let record = result?;
 8 |         println!("{:?}", record);
 9 |     }
10 |     Ok(())
11 | }
12 | 
13 | fn main() {
14 |     if let Err(err) = run() {
15 |         println!("{}", err);
16 |         process::exit(1);
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/examples/cookbook-read-colon.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn example() -> Result<(), Box<dyn Error>> {
 4 |     let mut rdr =
 5 |         csv::ReaderBuilder::new().delimiter(b':').from_reader(io::stdin());
 6 |     for result in rdr.records() {
 7 |         let record = result?;
 8 |         println!("{:?}", record);
 9 |     }
10 |     Ok(())
11 | }
12 | 
13 | fn main() {
14 |     if let Err(err) = example() {
15 |         println!("error running example: {}", err);
16 |         process::exit(1);
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/examples/cookbook-read-no-headers.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn example() -> Result<(), Box<dyn Error>> {
 4 |     let mut rdr =
 5 |         csv::ReaderBuilder::new().has_headers(false).from_reader(io::stdin());
 6 |     for result in rdr.records() {
 7 |         let record = result?;
 8 |         println!("{:?}", record);
 9 |     }
10 |     Ok(())
11 | }
12 | 
13 | fn main() {
14 |     if let Err(err) = example() {
15 |         println!("error running example: {}", err);
16 |         process::exit(1);
17 |     }
18 | }
19 | 


--------------------------------------------------------------------------------
/examples/tutorial-error-04.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn main() {
 4 |     if let Err(err) = run() {
 5 |         println!("{}", err);
 6 |         process::exit(1);
 7 |     }
 8 | }
 9 | 
10 | fn run() -> Result<(), Box<dyn Error>> {
11 |     let mut rdr = csv::Reader::from_reader(io::stdin());
12 |     for result in rdr.records() {
13 |         // This is effectively the same code as our `match` in the
14 |         // previous example. In other words, `?` is syntactic sugar.
15 |         let record = result?;
16 |         println!("{:?}", record);
17 |     }
18 |     Ok(())
19 | }
20 | 


--------------------------------------------------------------------------------
/examples/tutorial-error-02.rs:
--------------------------------------------------------------------------------
 1 | use std::{io, process};
 2 | 
 3 | fn main() {
 4 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 5 |     for result in rdr.records() {
 6 |         // Examine our Result.
 7 |         // If there was no problem, print the record.
 8 |         // Otherwise, print the error message and quit the program.
 9 |         match result {
10 |             Ok(record) => println!("{:?}", record),
11 |             Err(err) => {
12 |                 println!("error reading CSV from <stdin>: {}", err);
13 |                 process::exit(1);
14 |             }
15 |         }
16 |     }
17 | }
18 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-serde-03.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | use std::{error::Error, io, process};
 3 | 
 4 | // This introduces a type alias so that we can conveniently reference our
 5 | // record type.
 6 | type Record = HashMap<String, String>;
 7 | 
 8 | fn run() -> Result<(), Box<dyn Error>> {
 9 |     let mut rdr = csv::Reader::from_reader(io::stdin());
10 |     for result in rdr.deserialize() {
11 |         let record: Record = result?;
12 |         println!("{:?}", record);
13 |     }
14 |     Ok(())
15 | }
16 | 
17 | fn main() {
18 |     if let Err(err) = run() {
19 |         println!("{}", err);
20 |         process::exit(1);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/cookbook-read-basic.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn example() -> Result<(), Box<dyn Error>> {
 4 |     // Build the CSV reader and iterate over each record.
 5 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 6 |     for result in rdr.records() {
 7 |         // The iterator yields Result<StringRecord, Error>, so we check the
 8 |         // error here..
 9 |         let record = result?;
10 |         println!("{:?}", record);
11 |     }
12 |     Ok(())
13 | }
14 | 
15 | fn main() {
16 |     if let Err(err) = example() {
17 |         println!("error running example: {}", err);
18 |         process::exit(1);
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-headers-02.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 5 |     let headers = rdr.headers()?;
 6 |     println!("{:?}", headers);
 7 |     for result in rdr.records() {
 8 |         let record = result?;
 9 |         println!("{:?}", record);
10 |     }
11 |     // We can ask for the headers at any time.
12 |     let headers = rdr.headers()?;
13 |     println!("{:?}", headers);
14 |     Ok(())
15 | }
16 | 
17 | fn main() {
18 |     if let Err(err) = run() {
19 |         println!("{}", err);
20 |         process::exit(1);
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/examples/tutorial-perf-alloc-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<u64, Box<dyn Error>> {
 4 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 5 | 
 6 |     let mut count = 0;
 7 |     for result in rdr.records() {
 8 |         let record = result?;
 9 |         if &record[0] == "us" && &record[3] == "MA" {
10 |             count += 1;
11 |         }
12 |     }
13 |     Ok(count)
14 | }
15 | 
16 | fn main() {
17 |     match run() {
18 |         Ok(count) => {
19 |             println!("{}", count);
20 |         }
21 |         Err(err) => {
22 |             println!("{}", err);
23 |             process::exit(1);
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-delimiter-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let mut rdr = csv::ReaderBuilder::new()
 5 |         .has_headers(false)
 6 |         .delimiter(b';')
 7 |         .double_quote(false)
 8 |         .escape(Some(b'\\'))
 9 |         .flexible(true)
10 |         .comment(Some(b'#'))
11 |         .from_reader(io::stdin());
12 |     for result in rdr.records() {
13 |         let record = result?;
14 |         println!("{:?}", record);
15 |     }
16 |     Ok(())
17 | }
18 | 
19 | fn main() {
20 |     if let Err(err) = run() {
21 |         println!("{}", err);
22 |         process::exit(1);
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/tutorial-perf-alloc-02.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<u64, Box<dyn Error>> {
 4 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 5 | 
 6 |     let mut count = 0;
 7 |     for result in rdr.byte_records() {
 8 |         let record = result?;
 9 |         if &record[0] == b"us" && &record[3] == b"MA" {
10 |             count += 1;
11 |         }
12 |     }
13 |     Ok(count)
14 | }
15 | 
16 | fn main() {
17 |     match run() {
18 |         Ok(count) => {
19 |             println!("{}", count);
20 |         }
21 |         Err(err) => {
22 |             println!("{}", err);
23 |             process::exit(1);
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/examples/tutorial-setup-01.rs:
--------------------------------------------------------------------------------
 1 | // Import the standard library's I/O module so we can read from stdin.
 2 | use std::io;
 3 | 
 4 | // The `main` function is where your program starts executing.
 5 | fn main() {
 6 |     // Create a CSV parser that reads data from stdin.
 7 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 8 |     // Loop over each record.
 9 |     for result in rdr.records() {
10 |         // An error may occur, so abort the program in an unfriendly way.
11 |         // We will make this more friendly later!
12 |         let record = result.expect("a CSV record");
13 |         // Print a debug version of the record.
14 |         println!("{:?}", record);
15 |     }
16 | }
17 | 


--------------------------------------------------------------------------------
/examples/tutorial-perf-alloc-03.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<u64, Box<dyn Error>> {
 4 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 5 |     let mut record = csv::ByteRecord::new();
 6 | 
 7 |     let mut count = 0;
 8 |     while rdr.read_byte_record(&mut record)? {
 9 |         if &record[0] == b"us" && &record[3] == b"MA" {
10 |             count += 1;
11 |         }
12 |     }
13 |     Ok(count)
14 | }
15 | 
16 | fn main() {
17 |     match run() {
18 |         Ok(count) => {
19 |             println!("{}", count);
20 |         }
21 |         Err(err) => {
22 |             println!("{}", err);
23 |             process::exit(1);
24 |         }
25 |     }
26 | }
27 | 


--------------------------------------------------------------------------------
/ci/script.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | set -ex
 4 | 
 5 | cargo build --verbose
 6 | cargo doc --verbose
 7 | 
 8 | # Our dev dependencies want newer versions of Rust. Instead of bumping our
 9 | # MSRV, we just don't test on our MSRV.
10 | if [ "$TRAVIS_RUST_VERSION" = "1.33.0" ]; then
11 |   exit 0
12 | fi
13 | 
14 | cargo test --verbose
15 | cargo test --verbose --manifest-path csv-core/Cargo.toml
16 | cargo test --verbose --manifest-path csv-index/Cargo.toml
17 | if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then
18 |   rustup component add rustfmt
19 |   cargo fmt -- --check
20 | 
21 |   ci/check-copy cookbook
22 |   ci/check-copy tutorial
23 | fi
24 | if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
25 |   cargo bench --verbose --no-run
26 | fi
27 | 


--------------------------------------------------------------------------------
/examples/cookbook-write-basic.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn example() -> Result<(), Box<dyn Error>> {
 4 |     let mut wtr = csv::Writer::from_writer(io::stdout());
 5 | 
 6 |     // When writing records without Serde, the header record is written just
 7 |     // like any other record.
 8 |     wtr.write_record(["city", "region", "country", "population"])?;
 9 |     wtr.write_record(["Southborough", "MA", "United States", "9686"])?;
10 |     wtr.write_record(["Northbridge", "MA", "United States", "14061"])?;
11 |     wtr.flush()?;
12 |     Ok(())
13 | }
14 | 
15 | fn main() {
16 |     if let Err(err) = example() {
17 |         println!("error running example: {}", err);
18 |         process::exit(1);
19 |     }
20 | }
21 | 


--------------------------------------------------------------------------------
/examples/tutorial-error-03.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn main() {
 4 |     if let Err(err) = run() {
 5 |         println!("{}", err);
 6 |         process::exit(1);
 7 |     }
 8 | }
 9 | 
10 | fn run() -> Result<(), Box<dyn Error>> {
11 |     let mut rdr = csv::Reader::from_reader(io::stdin());
12 |     for result in rdr.records() {
13 |         // Examine our Result.
14 |         // If there was no problem, print the record.
15 |         // Otherwise, convert our error to a Box<dyn Error> and return it.
16 |         match result {
17 |             Err(err) => return Err(From::from(err)),
18 |             Ok(record) => {
19 |                 println!("{:?}", record);
20 |             }
21 |         }
22 |     }
23 |     Ok(())
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-serde-invalid-01.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | use std::{error::Error, io, process};
 3 | 
 4 | use serde::Deserialize;
 5 | 
 6 | #[derive(Debug, Deserialize)]
 7 | #[serde(rename_all = "PascalCase")]
 8 | struct Record {
 9 |     latitude: f64,
10 |     longitude: f64,
11 |     population: Option<u64>,
12 |     city: String,
13 |     state: String,
14 | }
15 | 
16 | fn run() -> Result<(), Box<dyn Error>> {
17 |     let mut rdr = csv::Reader::from_reader(io::stdin());
18 |     for result in rdr.deserialize() {
19 |         let record: Record = result?;
20 |         println!("{:?}", record);
21 |     }
22 |     Ok(())
23 | }
24 | 
25 | fn main() {
26 |     if let Err(err) = run() {
27 |         println!("{}", err);
28 |         process::exit(1);
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/csv-index/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "csv-index"
 3 | version = "0.1.6"  #:version
 4 | authors = ["Andrew Gallant <jamslam@gmail.com>"]
 5 | description = "On disk CSV indexing data structures."
 6 | documentation = "https://docs.rs/csv-index"
 7 | homepage = "https://github.com/BurntSushi/rust-csv"
 8 | repository = "https://github.com/BurntSushi/rust-csv"
 9 | readme = "README.md"
10 | keywords = ["csv", "comma", "parser", "delimited", "index"]
11 | license = "Unlicense/MIT"
12 | categories = ["encoding", "parser-implementations"]
13 | workspace = ".."
14 | edition = "2018"
15 | 
16 | [badges]
17 | travis-ci = { repository = "BurntSushi/rust-csv" }
18 | appveyor = { repository = "BurntSushi/rust-csv" }
19 | 
20 | [lib]
21 | bench = false
22 | 
23 | [dependencies]
24 | byteorder = "1"
25 | csv = { path = "..", version = "1.1.0" }
26 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-serde-invalid-02.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | use std::{error::Error, io, process};
 3 | 
 4 | use serde::Deserialize;
 5 | #[derive(Debug, Deserialize)]
 6 | #[serde(rename_all = "PascalCase")]
 7 | struct Record {
 8 |     latitude: f64,
 9 |     longitude: f64,
10 |     #[serde(deserialize_with = "csv::invalid_option")]
11 |     population: Option<u64>,
12 |     city: String,
13 |     state: String,
14 | }
15 | 
16 | fn run() -> Result<(), Box<dyn Error>> {
17 |     let mut rdr = csv::Reader::from_reader(io::stdin());
18 |     for result in rdr.deserialize() {
19 |         let record: Record = result?;
20 |         println!("{:?}", record);
21 |     }
22 |     Ok(())
23 | }
24 | 
25 | fn main() {
26 |     if let Err(err) = run() {
27 |         println!("{}", err);
28 |         process::exit(1);
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-serde-02.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | // This introduces a type alias so that we can conveniently reference our
 4 | // record type.
 5 | type Record = (String, String, Option<u64>, f64, f64);
 6 | 
 7 | fn run() -> Result<(), Box<dyn Error>> {
 8 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 9 |     // Instead of creating an iterator with the `records` method, we create
10 |     // an iterator with the `deserialize` method.
11 |     for result in rdr.deserialize() {
12 |         // We must tell Serde what type we want to deserialize into.
13 |         let record: Record = result?;
14 |         println!("{:?}", record);
15 |     }
16 |     Ok(())
17 | }
18 | 
19 | fn main() {
20 |     if let Err(err) = run() {
21 |         println!("{}", err);
22 |         process::exit(1);
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, error::Error, ffi::OsString, fs::File, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let file_path = get_first_arg()?;
 5 |     let file = File::open(file_path)?;
 6 |     let mut rdr = csv::Reader::from_reader(file);
 7 |     for result in rdr.records() {
 8 |         let record = result?;
 9 |         println!("{:?}", record);
10 |     }
11 |     Ok(())
12 | }
13 | 
14 | /// Returns the first positional argument sent to this process. If there are no
15 | /// positional arguments, then this returns an error.
16 | fn get_first_arg() -> Result<OsString, Box<dyn Error>> {
17 |     match env::args_os().nth(1) {
18 |         None => Err(From::from("expected 1 argument, but got none")),
19 |         Some(file_path) => Ok(file_path),
20 |     }
21 | }
22 | 
23 | fn main() {
24 |     if let Err(err) = run() {
25 |         println!("{}", err);
26 |         process::exit(1);
27 |     }
28 | }
29 | 


--------------------------------------------------------------------------------
/examples/tutorial-write-delimiter-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let mut wtr = csv::WriterBuilder::new()
 5 |         .delimiter(b'\t')
 6 |         .quote_style(csv::QuoteStyle::NonNumeric)
 7 |         .from_writer(io::stdout());
 8 | 
 9 |     wtr.write_record([
10 |         "City",
11 |         "State",
12 |         "Population",
13 |         "Latitude",
14 |         "Longitude",
15 |     ])?;
16 |     wtr.write_record([
17 |         "Davidsons Landing",
18 |         "AK",
19 |         "",
20 |         "65.2419444",
21 |         "-165.2716667",
22 |     ])?;
23 |     wtr.write_record(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?;
24 |     wtr.write_record(["Oakman", "AL", "", "33.7133333", "-87.3886111"])?;
25 | 
26 |     wtr.flush()?;
27 |     Ok(())
28 | }
29 | 
30 | fn main() {
31 |     if let Err(err) = run() {
32 |         println!("{}", err);
33 |         process::exit(1);
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/examples/cookbook-read-serde.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | use std::{error::Error, io, process};
 3 | 
 4 | use serde::Deserialize;
 5 | 
 6 | // By default, struct field names are deserialized based on the position of
 7 | // a corresponding field in the CSV data's header record.
 8 | #[derive(Debug, Deserialize)]
 9 | struct Record {
10 |     city: String,
11 |     region: String,
12 |     country: String,
13 |     population: Option<u64>,
14 | }
15 | 
16 | fn example() -> Result<(), Box<dyn Error>> {
17 |     let mut rdr = csv::Reader::from_reader(io::stdin());
18 |     for result in rdr.deserialize() {
19 |         // Notice that we need to provide a type hint for automatic
20 |         // deserialization.
21 |         let record: Record = result?;
22 |         println!("{:?}", record);
23 |     }
24 |     Ok(())
25 | }
26 | 
27 | fn main() {
28 |     if let Err(err) = example() {
29 |         println!("error running example: {}", err);
30 |         process::exit(1);
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/csv-core/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "csv-core"
 3 | version = "0.1.13"  #:version
 4 | authors = ["Andrew Gallant <jamslam@gmail.com>"]
 5 | description = "Bare bones CSV parsing with no_std support."
 6 | documentation = "https://docs.rs/csv-core"
 7 | homepage = "https://github.com/BurntSushi/rust-csv"
 8 | repository = "https://github.com/BurntSushi/rust-csv"
 9 | readme = "README.md"
10 | keywords = ["csv", "comma", "parser", "delimited", "no_std"]
11 | license = "Unlicense/MIT"
12 | categories = ["encoding", "no-std", "parser-implementations"]
13 | workspace = ".."
14 | edition = "2018"
15 | 
16 | [badges]
17 | travis-ci = { repository = "BurntSushi/rust-csv" }
18 | appveyor = { repository = "BurntSushi/rust-csv" }
19 | 
20 | [lib]
21 | bench = false
22 | 
23 | [features]
24 | default = []
25 | libc = ["memchr/libc"]
26 | 
27 | [dependencies]
28 | memchr = { version = "2", default-features = false }
29 | 
30 | [dev-dependencies]
31 | arrayvec = { version = "0.5", default-features = false }
32 | 


--------------------------------------------------------------------------------
/examples/tutorial-perf-serde-01.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | use std::{error::Error, io, process};
 3 | 
 4 | use serde::Deserialize;
 5 | 
 6 | #[derive(Debug, Deserialize)]
 7 | #[serde(rename_all = "PascalCase")]
 8 | struct Record {
 9 |     country: String,
10 |     city: String,
11 |     accent_city: String,
12 |     region: String,
13 |     population: Option<u64>,
14 |     latitude: f64,
15 |     longitude: f64,
16 | }
17 | 
18 | fn run() -> Result<u64, Box<dyn Error>> {
19 |     let mut rdr = csv::Reader::from_reader(io::stdin());
20 | 
21 |     let mut count = 0;
22 |     for result in rdr.deserialize() {
23 |         let record: Record = result?;
24 |         if record.country == "us" && record.region == "MA" {
25 |             count += 1;
26 |         }
27 |     }
28 |     Ok(count)
29 | }
30 | 
31 | fn main() {
32 |     match run() {
33 |         Ok(count) => {
34 |             println!("{}", count);
35 |         }
36 |         Err(err) => {
37 |             println!("{}", err);
38 |             process::exit(1);
39 |         }
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/examples/tutorial-pipeline-search-02.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let query = match env::args().nth(1) {
 5 |         None => return Err(From::from("expected 1 argument, but got none")),
 6 |         Some(query) => query,
 7 |     };
 8 | 
 9 |     let mut rdr = csv::Reader::from_reader(io::stdin());
10 |     let mut wtr = csv::Writer::from_writer(io::stdout());
11 | 
12 |     wtr.write_record(rdr.byte_headers()?)?;
13 | 
14 |     for result in rdr.byte_records() {
15 |         let record = result?;
16 |         // `query` is a `String` while `field` is now a `&[u8]`, so we'll
17 |         // need to convert `query` to `&[u8]` before doing a comparison.
18 |         if record.iter().any(|field| field == query.as_bytes()) {
19 |             wtr.write_record(&record)?;
20 |         }
21 |     }
22 | 
23 |     wtr.flush()?;
24 |     Ok(())
25 | }
26 | 
27 | fn main() {
28 |     if let Err(err) = run() {
29 |         println!("{}", err);
30 |         process::exit(1);
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/ci/check-copy:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | D="$(cd "$(dirname "$0")" && pwd -P)"
 6 | REPO="$D/.."
 7 | SCRIPTS="$REPO/scripts"
 8 | TMPDIR="$(mktemp -d)"
 9 | 
10 | if [ $# != 1 ]; then
11 |   echo "Usage: $(basename $0) (cookbook | tutorial)" >&2
12 |   exit 1
13 | fi
14 | SOURCE="$1"
15 | 
16 | errored() {
17 |     rm -rf "$TMPDIR"
18 |     echo "HINT: please run scripts/copy-examples" >&2
19 |     exit 1
20 | }
21 | 
22 | # Make sure the right rustfmt config is available.
23 | cp "$REPO/rustfmt.toml" "$TMPDIR/"
24 | "$SCRIPTS/copy-examples" \
25 |   --rust-file "$REPO/src/$SOURCE.rs" \
26 |   --example-dir "$TMPDIR"
27 | for new in "$TMPDIR"/*.rs; do
28 |   name="$(basename "$new")"
29 |   old="$REPO"/examples/"$name"
30 |   if ! [ -f "$old" ]; then
31 |     echo "ERROR: missing examples/$name" >&2
32 |     errored
33 |   fi
34 |   old="$(readlink -e "$REPO"/examples/"$name")"
35 |   if ! diff "$old" "$new"; then
36 |     echo "ERROR: examples/$name differs from ${name%%.rs} in src/$SOURCE.rs" >&2
37 |     errored
38 |   fi
39 | done
40 | rm -rf "$TMPDIR"
41 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "csv"
 3 | version = "1.4.0"  #:version
 4 | authors = ["Andrew Gallant <jamslam@gmail.com>"]
 5 | description = "Fast CSV parsing with support for serde."
 6 | documentation = "https://docs.rs/csv"
 7 | homepage = "https://github.com/BurntSushi/rust-csv"
 8 | repository = "https://github.com/BurntSushi/rust-csv"
 9 | readme = "README.md"
10 | keywords = ["csv", "comma", "parser", "delimited", "serde"]
11 | license = "Unlicense/MIT"
12 | categories = ["encoding", "parser-implementations"]
13 | exclude = ["/.github", "/ci/*", "/scripts/*"]
14 | edition = "2021"
15 | rust-version = "1.73"
16 | 
17 | [workspace]
18 | members = ["csv-core", "csv-index"]
19 | 
20 | [lib]
21 | bench = false
22 | 
23 | [dependencies]
24 | csv-core = { path = "csv-core", version = "0.1.11" }
25 | itoa = "1"
26 | ryu = "1"
27 | serde_core = "1.0.221"
28 | 
29 | [dev-dependencies]
30 | bstr = { version = "1.7.0", default-features = false, features = ["alloc", "serde"] }
31 | serde = { version = "1.0.221", features = ["derive"] }
32 | 
33 | [profile.release]
34 | debug = true
35 | 
36 | [profile.bench]
37 | debug = true
38 | 


--------------------------------------------------------------------------------
/examples/cookbook-write-serde.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | use serde::Serialize;
 4 | 
 5 | #[derive(Debug, Serialize)]
 6 | struct Record {
 7 |     city: String,
 8 |     region: String,
 9 |     country: String,
10 |     population: Option<u64>,
11 | }
12 | 
13 | fn example() -> Result<(), Box<dyn Error>> {
14 |     let mut wtr = csv::Writer::from_writer(io::stdout());
15 | 
16 |     // When writing records with Serde using structs, the header row is written
17 |     // automatically.
18 |     wtr.serialize(Record {
19 |         city: "Southborough".to_string(),
20 |         region: "MA".to_string(),
21 |         country: "United States".to_string(),
22 |         population: Some(9686),
23 |     })?;
24 |     wtr.serialize(Record {
25 |         city: "Northbridge".to_string(),
26 |         region: "MA".to_string(),
27 |         country: "United States".to_string(),
28 |         population: Some(14061),
29 |     })?;
30 |     wtr.flush()?;
31 |     Ok(())
32 | }
33 | 
34 | fn main() {
35 |     if let Err(err) = example() {
36 |         println!("error running example: {}", err);
37 |         process::exit(1);
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/examples/tutorial-write-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let mut wtr = csv::Writer::from_writer(io::stdout());
 5 |     // Since we're writing records manually, we must explicitly write our
 6 |     // header record. A header record is written the same way that other
 7 |     // records are written.
 8 |     wtr.write_record([
 9 |         "City",
10 |         "State",
11 |         "Population",
12 |         "Latitude",
13 |         "Longitude",
14 |     ])?;
15 |     wtr.write_record([
16 |         "Davidsons Landing",
17 |         "AK",
18 |         "",
19 |         "65.2419444",
20 |         "-165.2716667",
21 |     ])?;
22 |     wtr.write_record(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?;
23 |     wtr.write_record(["Oakman", "AL", "", "33.7133333", "-87.3886111"])?;
24 | 
25 |     // A CSV writer maintains an internal buffer, so it's important
26 |     // to flush the buffer when you're done.
27 |     wtr.flush()?;
28 |     Ok(())
29 | }
30 | 
31 | fn main() {
32 |     if let Err(err) = run() {
33 |         println!("{}", err);
34 |         process::exit(1);
35 |     }
36 | }
37 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Andrew Gallant
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/csv-core/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Andrew Gallant
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/csv-index/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Andrew Gallant
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-serde-04.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | use std::{error::Error, io, process};
 3 | 
 4 | // This lets us write `#[derive(Deserialize)]`.
 5 | use serde::Deserialize;
 6 | 
 7 | // We don't need to derive `Debug` (which doesn't require Serde), but it's a
 8 | // good habit to do it for all your types.
 9 | //
10 | // Notice that the field names in this struct are NOT in the same order as
11 | // the fields in the CSV data!
12 | #[derive(Debug, Deserialize)]
13 | #[serde(rename_all = "PascalCase")]
14 | struct Record {
15 |     latitude: f64,
16 |     longitude: f64,
17 |     population: Option<u64>,
18 |     city: String,
19 |     state: String,
20 | }
21 | 
22 | fn run() -> Result<(), Box<dyn Error>> {
23 |     let mut rdr = csv::Reader::from_reader(io::stdin());
24 |     for result in rdr.deserialize() {
25 |         let record: Record = result?;
26 |         println!("{:?}", record);
27 |         // Try this if you don't like each record smushed on one line:
28 |         // println!("{:#?}", record);
29 |     }
30 |     Ok(())
31 | }
32 | 
33 | fn main() {
34 |     if let Err(err) = run() {
35 |         println!("{}", err);
36 |         process::exit(1);
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/examples/tutorial-perf-serde-02.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | use serde::Deserialize;
 3 | use std::{error::Error, io, process};
 4 | 
 5 | #[derive(Debug, Deserialize)]
 6 | #[serde(rename_all = "PascalCase")]
 7 | struct Record<'a> {
 8 |     country: &'a str,
 9 |     city: &'a str,
10 |     accent_city: &'a str,
11 |     region: &'a str,
12 |     population: Option<u64>,
13 |     latitude: f64,
14 |     longitude: f64,
15 | }
16 | 
17 | fn run() -> Result<u64, Box<dyn Error>> {
18 |     let mut rdr = csv::Reader::from_reader(io::stdin());
19 |     let mut raw_record = csv::StringRecord::new();
20 |     let headers = rdr.headers()?.clone();
21 | 
22 |     let mut count = 0;
23 |     while rdr.read_record(&mut raw_record)? {
24 |         let record: Record = raw_record.deserialize(Some(&headers))?;
25 |         if record.country == "us" && record.region == "MA" {
26 |             count += 1;
27 |         }
28 |     }
29 |     Ok(count)
30 | }
31 | 
32 | fn main() {
33 |     match run() {
34 |         Ok(count) => {
35 |             println!("{}", count);
36 |         }
37 |         Err(err) => {
38 |             println!("{}", err);
39 |             process::exit(1);
40 |         }
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/examples/tutorial-read-serde-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 5 |     for result in rdr.records() {
 6 |         let record = result?;
 7 | 
 8 |         let city = &record[0];
 9 |         let state = &record[1];
10 |         // Some records are missing population counts, so if we can't
11 |         // parse a number, treat the population count as missing instead
12 |         // of returning an error.
13 |         let pop: Option<u64> = record[2].parse().ok();
14 |         // Lucky us! Latitudes and longitudes are available for every record.
15 |         // Therefore, if one couldn't be parsed, return an error.
16 |         let latitude: f64 = record[3].parse()?;
17 |         let longitude: f64 = record[4].parse()?;
18 | 
19 |         println!(
20 |             "city: {:?}, state: {:?}, \
21 |              pop: {:?}, latitude: {:?}, longitude: {:?}",
22 |             city, state, pop, latitude, longitude
23 |         );
24 |     }
25 |     Ok(())
26 | }
27 | 
28 | fn main() {
29 |     if let Err(err) = run() {
30 |         println!("{}", err);
31 |         process::exit(1);
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/examples/tutorial-perf-serde-03.rs:
--------------------------------------------------------------------------------
 1 | #![allow(dead_code)]
 2 | use std::{error::Error, io, process};
 3 | 
 4 | use serde::Deserialize;
 5 | 
 6 | #[derive(Debug, Deserialize)]
 7 | #[serde(rename_all = "PascalCase")]
 8 | struct Record<'a> {
 9 |     country: &'a [u8],
10 |     city: &'a [u8],
11 |     accent_city: &'a [u8],
12 |     region: &'a [u8],
13 |     population: Option<u64>,
14 |     latitude: f64,
15 |     longitude: f64,
16 | }
17 | 
18 | fn run() -> Result<u64, Box<dyn Error>> {
19 |     let mut rdr = csv::Reader::from_reader(io::stdin());
20 |     let mut raw_record = csv::ByteRecord::new();
21 |     let headers = rdr.byte_headers()?.clone();
22 | 
23 |     let mut count = 0;
24 |     while rdr.read_byte_record(&mut raw_record)? {
25 |         let record: Record = raw_record.deserialize(Some(&headers))?;
26 |         if record.country == b"us" && record.region == b"MA" {
27 |             count += 1;
28 |         }
29 |     }
30 |     Ok(count)
31 | }
32 | 
33 | fn main() {
34 |     match run() {
35 |         Ok(count) => {
36 |             println!("{}", count);
37 |         }
38 |         Err(err) => {
39 |             println!("{}", err);
40 |             process::exit(1);
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/scripts/copy-examples:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import absolute_import, division, print_function
 4 | import argparse
 5 | import codecs
 6 | import os.path
 7 | import re
 8 | import subprocess
 9 | 
10 | RE_EACH_CODE_BLOCK = re.compile(r'(?s)```[^\n]*\n(.*?)```')
11 | RE_MARKER = re.compile(r'^(?:# )?//(.+)$')
12 | RE_STRIP_COMMENT = re.compile(r'^#($| +)')
13 | 
14 | if __name__ == '__main__':
15 |     p = argparse.ArgumentParser()
16 |     p.add_argument('--rust-file', default='src/cookbook.rs')
17 |     p.add_argument('--example-dir', default='examples')
18 |     args = p.parse_args()
19 | 
20 |     with codecs.open(args.rust_file, encoding='utf-8') as f:
21 |         rustcode = f.read()
22 |     for m in RE_EACH_CODE_BLOCK.finditer(rustcode):
23 |         lines = m.group(1).splitlines()
24 |         marker, codelines = lines[0], lines[1:]
25 |         m = RE_MARKER.search(marker)
26 |         if m is None:
27 |             continue
28 | 
29 |         code = '\n'.join(RE_STRIP_COMMENT.sub('', line) for line in codelines)
30 |         fpath = os.path.join(args.example_dir, m.group(1))
31 |         with codecs.open(fpath, mode='w+', encoding='utf-8') as f:
32 |             print(code, file=f)
33 |         subprocess.check_output(['rustfmt', fpath])
34 | 


--------------------------------------------------------------------------------
/examples/tutorial-write-02.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, error::Error, ffi::OsString, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let file_path = get_first_arg()?;
 5 |     let mut wtr = csv::Writer::from_path(file_path)?;
 6 | 
 7 |     wtr.write_record([
 8 |         "City",
 9 |         "State",
10 |         "Population",
11 |         "Latitude",
12 |         "Longitude",
13 |     ])?;
14 |     wtr.write_record([
15 |         "Davidsons Landing",
16 |         "AK",
17 |         "",
18 |         "65.2419444",
19 |         "-165.2716667",
20 |     ])?;
21 |     wtr.write_record(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?;
22 |     wtr.write_record(["Oakman", "AL", "", "33.7133333", "-87.3886111"])?;
23 | 
24 |     wtr.flush()?;
25 |     Ok(())
26 | }
27 | 
28 | /// Returns the first positional argument sent to this process. If there are no
29 | /// positional arguments, then this returns an error.
30 | fn get_first_arg() -> Result<OsString, Box<dyn Error>> {
31 |     match env::args_os().nth(1) {
32 |         None => Err(From::from("expected 1 argument, but got none")),
33 |         Some(file_path) => Ok(file_path),
34 |     }
35 | }
36 | 
37 | fn main() {
38 |     if let Err(err) = run() {
39 |         println!("{}", err);
40 |         process::exit(1);
41 |     }
42 | }
43 | 


--------------------------------------------------------------------------------
/examples/tutorial-write-serde-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     let mut wtr = csv::Writer::from_writer(io::stdout());
 5 | 
 6 |     // We still need to write headers manually.
 7 |     wtr.write_record([
 8 |         "City",
 9 |         "State",
10 |         "Population",
11 |         "Latitude",
12 |         "Longitude",
13 |     ])?;
14 | 
15 |     // But now we can write records by providing a normal Rust value.
16 |     //
17 |     // Note that the odd `None::<u64>` syntax is required because `None` on
18 |     // its own doesn't have a concrete type, but Serde needs a concrete type
19 |     // in order to serialize it. That is, `None` has type `Option<T>` but
20 |     // `None::<u64>` has type `Option<u64>`.
21 |     wtr.serialize((
22 |         "Davidsons Landing",
23 |         "AK",
24 |         None::<u64>,
25 |         65.2419444,
26 |         -165.2716667,
27 |     ))?;
28 |     wtr.serialize(("Kenai", "AK", Some(7610), 60.5544444, -151.2583333))?;
29 |     wtr.serialize(("Oakman", "AL", None::<u64>, 33.7133333, -87.3886111))?;
30 | 
31 |     wtr.flush()?;
32 |     Ok(())
33 | }
34 | 
35 | fn main() {
36 |     if let Err(err) = run() {
37 |         println!("{}", err);
38 |         process::exit(1);
39 |     }
40 | }
41 | 


--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/csv-core/UNLICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/csv-index/UNLICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org/>
25 | 


--------------------------------------------------------------------------------
/examples/tutorial-pipeline-search-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, error::Error, io, process};
 2 | 
 3 | fn run() -> Result<(), Box<dyn Error>> {
 4 |     // Get the query from the positional arguments.
 5 |     // If one doesn't exist, return an error.
 6 |     let query = match env::args().nth(1) {
 7 |         None => return Err(From::from("expected 1 argument, but got none")),
 8 |         Some(query) => query,
 9 |     };
10 | 
11 |     // Build CSV readers and writers to stdin and stdout, respectively.
12 |     let mut rdr = csv::Reader::from_reader(io::stdin());
13 |     let mut wtr = csv::Writer::from_writer(io::stdout());
14 | 
15 |     // Before reading our data records, we should write the header record.
16 |     wtr.write_record(rdr.headers()?)?;
17 | 
18 |     // Iterate over all the records in `rdr`, and write only records containing
19 |     // `query` to `wtr`.
20 |     for result in rdr.records() {
21 |         let record = result?;
22 |         if record.iter().any(|field| field == query) {
23 |             wtr.write_record(&record)?;
24 |         }
25 |     }
26 | 
27 |     // CSV writers use an internal buffer, so we should always flush when done.
28 |     wtr.flush()?;
29 |     Ok(())
30 | }
31 | 
32 | fn main() {
33 |     if let Err(err) = run() {
34 |         println!("{}", err);
35 |         process::exit(1);
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/examples/tutorial-write-serde-02.rs:
--------------------------------------------------------------------------------
 1 | use std::{error::Error, io, process};
 2 | 
 3 | use serde::Serialize;
 4 | 
 5 | // Note that structs can derive both Serialize and Deserialize!
 6 | #[derive(Debug, Serialize)]
 7 | #[serde(rename_all = "PascalCase")]
 8 | struct Record<'a> {
 9 |     city: &'a str,
10 |     state: &'a str,
11 |     population: Option<u64>,
12 |     latitude: f64,
13 |     longitude: f64,
14 | }
15 | 
16 | fn run() -> Result<(), Box<dyn Error>> {
17 |     let mut wtr = csv::Writer::from_writer(io::stdout());
18 | 
19 |     wtr.serialize(Record {
20 |         city: "Davidsons Landing",
21 |         state: "AK",
22 |         population: None,
23 |         latitude: 65.2419444,
24 |         longitude: -165.2716667,
25 |     })?;
26 |     wtr.serialize(Record {
27 |         city: "Kenai",
28 |         state: "AK",
29 |         population: Some(7610),
30 |         latitude: 60.5544444,
31 |         longitude: -151.2583333,
32 |     })?;
33 |     wtr.serialize(Record {
34 |         city: "Oakman",
35 |         state: "AL",
36 |         population: None,
37 |         latitude: 33.7133333,
38 |         longitude: -87.3886111,
39 |     })?;
40 | 
41 |     wtr.flush()?;
42 |     Ok(())
43 | }
44 | 
45 | fn main() {
46 |     if let Err(err) = run() {
47 |         println!("{}", err);
48 |         process::exit(1);
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | Thank you for taking the time to file a bug report. The following describes
 2 | some guidelines to creating a minimally useful ticket.
 3 | 
 4 | Above all else: do not describe your problem, **SHOW** your problem.
 5 | 
 6 | #### What version of the `csv` crate are you using?
 7 | 
 8 | Replace this text with the version. (The version can be found in your
 9 | Cargo.lock.)
10 | 
11 | #### Briefly describe the question, bug or feature request.
12 | 
13 | Replace this text with a description.
14 | 
15 | #### Include a complete program demonstrating a problem.
16 | 
17 | Whether you're asking for a feature, filing a bug or just asking a question,
18 | this section should almost always include some kind of code that you have
19 | written. The code provided should be able to be compiled by others and should
20 | be as feasibly small as possible.
21 | 
22 | If you're reporting a bug, then the code should exhibit some undesirable
23 | characteristic.
24 | 
25 | If you're asking a question, then the code should represent what you've tried
26 | so far.
27 | 
28 | If you're requesting a feature, then provide code that does the closest
29 | possible thing to what you're requesting, if possible.
30 | 
31 | #### What is the observed behavior of the code above?
32 | 
33 | Replace this text with the output of the program.
34 | 
35 | #### What is the expected or desired behavior of the code above?
36 | 
37 | Replace this text with the expected or desired output of the program.
38 | 


--------------------------------------------------------------------------------
/examples/tutorial-pipeline-pop-01.rs:
--------------------------------------------------------------------------------
 1 | use std::{env, error::Error, io, process};
 2 | 
 3 | use serde::{Deserialize, Serialize};
 4 | 
 5 | // Unlike previous examples, we derive both Deserialize and Serialize. This
 6 | // means we'll be able to automatically deserialize and serialize this type.
 7 | #[derive(Debug, Deserialize, Serialize)]
 8 | #[serde(rename_all = "PascalCase")]
 9 | struct Record {
10 |     city: String,
11 |     state: String,
12 |     population: Option<u64>,
13 |     latitude: f64,
14 |     longitude: f64,
15 | }
16 | 
17 | fn run() -> Result<(), Box<dyn Error>> {
18 |     // Get the query from the positional arguments.
19 |     // If one doesn't exist or isn't an integer, return an error.
20 |     let minimum_pop: u64 = match env::args().nth(1) {
21 |         None => return Err(From::from("expected 1 argument, but got none")),
22 |         Some(arg) => arg.parse()?,
23 |     };
24 | 
25 |     // Build CSV readers and writers to stdin and stdout, respectively.
26 |     // Note that we don't need to write headers explicitly. Since we're
27 |     // serializing a custom struct, that's done for us automatically.
28 |     let mut rdr = csv::Reader::from_reader(io::stdin());
29 |     let mut wtr = csv::Writer::from_writer(io::stdout());
30 | 
31 |     // Iterate over all the records in `rdr`, and write only records containing
32 |     // a population that is greater than or equal to `minimum_pop`.
33 |     for result in rdr.deserialize() {
34 |         // Remember that when deserializing, we must use a type hint to
35 |         // indicate which type we want to deserialize our record into.
36 |         let record: Record = result?;
37 | 
38 |         // `is_some_and` is a combinator on `Option`. It takes a closure that
39 |         // returns `bool` when the `Option` is `Some`. When the `Option` is
40 |         // `None`, `false` is always returned. In this case, we test it against
41 |         // our minimum population count that we got from the command line.
42 |         if record.population.is_some_and(|pop| pop >= minimum_pop) {
43 |             wtr.serialize(record)?;
44 |         }
45 |     }
46 | 
47 |     // CSV writers use an internal buffer, so we should always flush when done.
48 |     wtr.flush()?;
49 |     Ok(())
50 | }
51 | 
52 | fn main() {
53 |     if let Err(err) = run() {
54 |         println!("{}", err);
55 |         process::exit(1);
56 |     }
57 | }
58 | 


--------------------------------------------------------------------------------
/csv-index/README.md:
--------------------------------------------------------------------------------
 1 | csv-index
 2 | =========
 3 | A collection of data structures for indexing CSV data, with a focus on data
 4 | structures that can be easily serialized to and deserialized from disk.
 5 | 
 6 | [![Linux build status](https://api.travis-ci.org/BurntSushi/rust-csv.png)](https://travis-ci.org/BurntSushi/rust-csv)
 7 | [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/rust-csv?svg=true)](https://ci.appveyor.com/project/BurntSushi/rust-csv)
 8 | [![](http://meritbadge.herokuapp.com/csv-index)](https://crates.io/crates/csv-index)
 9 | 
10 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
11 | 
12 | ### Documentation
13 | 
14 | https://docs.rs/csv-index
15 | 
16 | ### Usage
17 | 
18 | Add this to your `Cargo.toml`:
19 | 
20 | ```toml
21 | [dependencies]
22 | csv-index = "0.1.6"
23 | ```
24 | 
25 | ### Example: build a simple random access index
26 | 
27 | The `RandomAccessSimple` index is a simple data structure that maps record
28 | indices to the byte offset corresponding to the start of that record in CSV
29 | data. This example shows how to save this index to disk for a particular CSV
30 | file.
31 | 
32 | ```rust
33 | use std::error::Error;
34 | use std::fs::File;
35 | use std::io::{self, Write};
36 | 
37 | use csv_index::RandomAccessSimple;
38 | 
39 | fn main() {
40 |   example().unwrap();
41 | }
42 | 
43 | fn example() -> Result<(), Box<dyn Error>> {
44 |     // Open a normal CSV reader.
45 |     let mut rdr = csv::Reader::from_path("data.csv")?;
46 | 
47 |     // Create an index for the CSV data in `data.csv` and write it
48 |     // to `data.csv.idx`.
49 |     let mut wtr = io::BufWriter::new(File::create("data.csv.idx")?);
50 |     RandomAccessSimple::create(&mut rdr, &mut wtr)?;
51 |     wtr.flush()?;
52 | 
53 |     // Open the index we just created, get the position of the last
54 |     // record and seek the CSV reader to the last record.
55 |     let mut idx = RandomAccessSimple::open(File::open("data.csv.idx")?)?;
56 |     if idx.is_empty() {
57 |         return Err(From::from("expected a non-empty CSV index"));
58 |     }
59 |     let last = idx.len() - 1;
60 |     let pos = idx.get(last)?;
61 |     rdr.seek(pos)?;
62 | 
63 |     // Read the next record.
64 |     if let Some(result) = rdr.records().next() {
65 |         let record = result?;
66 |         println!("{:?}", record);
67 |         Ok(())
68 |     } else {
69 |         Err(From::from("expected at least one record but got none"))
70 |     }
71 | }
72 | ```
73 | 


--------------------------------------------------------------------------------
/csv-index/src/lib.rs:
--------------------------------------------------------------------------------
 1 | /*!
 2 | The `csv-index` crate provides data structures for indexing CSV data.
 3 | 
 4 | # Usage
 5 | 
 6 | This crate is
 7 | [on crates.io](https://crates.io/crates/csv-index)
 8 | and can be used by adding `csv-index` to your dependencies in your project's
 9 | `Cargo.toml`
10 | 
11 | ```toml
12 | [dependencies]
13 | csv-index = "0.2"
14 | ```
15 | 
16 | # Example: build a simple random access index
17 | 
18 | The `RandomAccessSimple` index is a simple data structure that maps record
19 | indices to the byte offset corresponding to the start of that record in CSV
20 | data. This example shows how to save this index to disk for a particular CSV
21 | file.
22 | 
23 | Note that this indexing data structure cannot be updated. That means that if
24 | your CSV data has changed since the index was created, then the index will need
25 | to be regenerated.
26 | 
27 | ```no_run
28 | use std::error::Error;
29 | use std::fs::File;
30 | use std::io::{self, Write};
31 | use csv_index::RandomAccessSimple;
32 | 
33 | # fn main() { example().unwrap(); }
34 | fn example() -> Result<(), Box<dyn Error>> {
35 |     // Open a normal CSV reader.
36 |     let mut rdr = csv::Reader::from_path("data.csv")?;
37 | 
38 |     // Create an index for the CSV data in `data.csv` and write it
39 |     // to `data.csv.idx`.
40 |     let mut wtr = io::BufWriter::new(File::create("data.csv.idx")?);
41 |     RandomAccessSimple::create(&mut rdr, &mut wtr)?;
42 |     wtr.flush()?;
43 | 
44 |     // Open the index we just created, get the position of the last
45 |     // record and seek the CSV reader to the last record.
46 |     let mut idx = RandomAccessSimple::open(File::open("data.csv.idx")?)?;
47 |     if idx.is_empty() {
48 |         return Err(From::from("expected a non-empty CSV index"));
49 |     }
50 |     let last = idx.len() - 1;
51 |     let pos = idx.get(last)?;
52 |     rdr.seek(pos)?;
53 | 
54 |     // Read the next record.
55 |     if let Some(result) = rdr.records().next() {
56 |         let record = result?;
57 |         println!("{:?}", record);
58 |         Ok(())
59 |     } else {
60 |         Err(From::from("expected at least one record but got none"))
61 |     }
62 | }
63 | ```
64 | 
65 | # Future work
66 | 
67 | The full scope of this crate hasn't been determined yet. For example, it's not
68 | clear whether this crate should support data structures more amenable to
69 | in-memory indexing. (Where the current set of indexing data structures are all
70 | amenable to serializing to disk.)
71 | */
72 | 
73 | #![deny(missing_docs)]
74 | 
75 | pub use crate::simple::RandomAccessSimple;
76 | 
77 | mod simple;
78 | 


--------------------------------------------------------------------------------
/src/debug.rs:
--------------------------------------------------------------------------------
 1 | /// A type that provides a human readable debug impl for arbitrary bytes.
 2 | ///
 3 | /// This generally works best when the bytes are presumed to be mostly UTF-8,
 4 | /// but will work for anything.
 5 | ///
 6 | /// N.B. This is copied nearly verbatim from regex-automata. Sigh.
 7 | pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]);
 8 | 
 9 | impl<'a> core::fmt::Debug for Bytes<'a> {
10 |     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
11 |         write!(f, "\"")?;
12 |         // This is a sad re-implementation of a similar impl found in bstr.
13 |         let mut bytes = self.0;
14 |         while let Some(result) = utf8_decode(bytes) {
15 |             let ch = match result {
16 |                 Ok(ch) => ch,
17 |                 Err(byte) => {
18 |                     write!(f, r"\x{:02x}", byte)?;
19 |                     bytes = &bytes[1..];
20 |                     continue;
21 |                 }
22 |             };
23 |             bytes = &bytes[ch.len_utf8()..];
24 |             match ch {
25 |                 '\0' => write!(f, "\\0")?,
26 |                 // ASCII control characters except \0, \n, \r, \t
27 |                 '\x01'..='\x08'
28 |                 | '\x0b'
29 |                 | '\x0c'
30 |                 | '\x0e'..='\x19'
31 |                 | '\x7f' => {
32 |                     write!(f, "\\x{:02x}", u32::from(ch))?;
33 |                 }
34 |                 '\n' | '\r' | '\t' | _ => {
35 |                     write!(f, "{}", ch.escape_debug())?;
36 |                 }
37 |             }
38 |         }
39 |         write!(f, "\"")?;
40 |         Ok(())
41 |     }
42 | }
43 | 
44 | /// Decodes the next UTF-8 encoded codepoint from the given byte slice.
45 | ///
46 | /// If no valid encoding of a codepoint exists at the beginning of the given
47 | /// byte slice, then the first byte is returned instead.
48 | ///
49 | /// This returns `None` if and only if `bytes` is empty.
50 | pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
51 |     fn len(byte: u8) -> Option<usize> {
52 |         if byte <= 0x7F {
53 |             Some(1)
54 |         } else if byte & 0b1100_0000 == 0b1000_0000 {
55 |             None
56 |         } else if byte <= 0b1101_1111 {
57 |             Some(2)
58 |         } else if byte <= 0b1110_1111 {
59 |             Some(3)
60 |         } else if byte <= 0b1111_0111 {
61 |             Some(4)
62 |         } else {
63 |             None
64 |         }
65 |     }
66 | 
67 |     if bytes.is_empty() {
68 |         return None;
69 |     }
70 |     let len = match len(bytes[0]) {
71 |         None => return Some(Err(bytes[0])),
72 |         Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
73 |         Some(1) => return Some(Ok(char::from(bytes[0]))),
74 |         Some(len) => len,
75 |     };
76 |     match core::str::from_utf8(&bytes[..len]) {
77 |         Ok(s) => Some(Ok(s.chars().next().unwrap())),
78 |         Err(_) => Some(Err(bytes[0])),
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/examples/tutorial-perf-core-01.rs:
--------------------------------------------------------------------------------
 1 | use std::io::{self, Read};
 2 | use std::process;
 3 | 
 4 | use csv_core::{ReadFieldResult, Reader};
 5 | 
 6 | fn run(mut data: &[u8]) -> Option<u64> {
 7 |     let mut rdr = Reader::new();
 8 | 
 9 |     // Count the number of records in Massachusetts.
10 |     let mut count = 0;
11 |     // Indicates the current field index. Reset to 0 at start of each record.
12 |     let mut fieldidx = 0;
13 |     // True when the current record is in the United States.
14 |     let mut inus = false;
15 |     // Buffer for field data. Must be big enough to hold the largest field.
16 |     let mut field = [0; 1024];
17 |     loop {
18 |         // Attempt to incrementally read the next CSV field.
19 |         let (result, nread, nwrite) = rdr.read_field(data, &mut field);
20 |         // nread is the number of bytes read from our input. We should never
21 |         // pass those bytes to read_field again.
22 |         data = &data[nread..];
23 |         // nwrite is the number of bytes written to the output buffer `field`.
24 |         // The contents of the buffer after this point is unspecified.
25 |         let field = &field[..nwrite];
26 | 
27 |         match result {
28 |             // We don't need to handle this case because we read all of the
29 |             // data up front. If we were reading data incrementally, then this
30 |             // would be a signal to read more.
31 |             ReadFieldResult::InputEmpty => {}
32 |             // If we get this case, then we found a field that contains more
33 |             // than 1024 bytes. We keep this example simple and just fail.
34 |             ReadFieldResult::OutputFull => {
35 |                 return None;
36 |             }
37 |             // This case happens when we've successfully read a field. If the
38 |             // field is the last field in a record, then `record_end` is true.
39 |             ReadFieldResult::Field { record_end } => {
40 |                 if fieldidx == 0 && field == b"us" {
41 |                     inus = true;
42 |                 } else if inus && fieldidx == 3 && field == b"MA" {
43 |                     count += 1;
44 |                 }
45 |                 if record_end {
46 |                     fieldidx = 0;
47 |                     inus = false;
48 |                 } else {
49 |                     fieldidx += 1;
50 |                 }
51 |             }
52 |             // This case happens when the CSV reader has successfully exhausted
53 |             // all input.
54 |             ReadFieldResult::End => {
55 |                 break;
56 |             }
57 |         }
58 |     }
59 |     Some(count)
60 | }
61 | 
62 | fn main() {
63 |     // Read the entire contents of stdin up front.
64 |     let mut data = vec![];
65 |     if let Err(err) = io::stdin().read_to_end(&mut data) {
66 |         println!("{}", err);
67 |         process::exit(1);
68 |     }
69 |     match run(&data) {
70 |         None => {
71 |             println!("error: could not count records, buffer too small");
72 |             process::exit(1);
73 |         }
74 |         Some(count) => {
75 |             println!("{}", count);
76 |         }
77 |     }
78 | }
79 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: ci
 2 | on:
 3 |   pull_request:
 4 |   push:
 5 |     branches:
 6 |     - master
 7 |   schedule:
 8 |     - cron: '00 01 * * *'
 9 | jobs:
10 |   test:
11 |     name: test
12 |     runs-on: ${{ matrix.os }}
13 |     strategy:
14 |       matrix:
15 |         # The docs seem to suggest that we can have a matrix with just an
16 |         # include directive, but it results in a "matrix must define at least
17 |         # one vector" error in the CI system.
18 |         build: [pinned, stable, beta, nightly, macos, win-msvc, win-gnu]
19 |         include:
20 |         - build: pinned
21 |           os: ubuntu-latest
22 |           rust: 1.73.0
23 |         - build: stable
24 |           os: ubuntu-latest
25 |           rust: stable
26 |         - build: beta
27 |           os: ubuntu-latest
28 |           rust: beta
29 |         - build: nightly
30 |           os: ubuntu-latest
31 |           rust: nightly
32 |         - build: macos
33 |           os: macos-latest
34 |           rust: stable
35 |         - build: win-msvc
36 |           os: windows-latest
37 |           rust: stable
38 |         - build: win-gnu
39 |           os: windows-latest
40 |           rust: stable-x86_64-gnu
41 |     steps:
42 |     - name: Checkout repository
43 |       uses: actions/checkout@v4
44 |     - name: Install Rust
45 |       uses: dtolnay/rust-toolchain@master
46 |       with:
47 |         toolchain: ${{ matrix.rust }}
48 |     - run: cargo build --verbose
49 |     - run: cargo doc --verbose
50 |     - run: cargo test --verbose
51 |     - run: cargo test --verbose --manifest-path csv-core/Cargo.toml
52 |     - run: cargo test --verbose --manifest-path csv-index/Cargo.toml
53 |     - if: matrix.build == 'nightly'
54 |       run: cargo bench --verbose --no-run
55 | 
56 |   check-doc-sync:
57 |     name: check tutorial and cookbook examples
58 |     runs-on: ubuntu-latest
59 |     steps:
60 |     - name: Checkout repository
61 |       uses: actions/checkout@v4
62 |     - name: Install Rust
63 |       uses: dtolnay/rust-toolchain@master
64 |       with:
65 |         toolchain: stable
66 |         components: rustfmt
67 |     - name: check that cookbook examples are up to date and in sync
68 |       run: ci/check-copy cookbook
69 |     - name: check that tutorial examples are up to date and in sync
70 |       run: ci/check-copy tutorial
71 | 
72 |   rustfmt:
73 |     runs-on: ubuntu-latest
74 |     steps:
75 |     - name: Checkout repository
76 |       uses: actions/checkout@v4
77 |     - name: Install Rust
78 |       uses: dtolnay/rust-toolchain@master
79 |       with:
80 |         toolchain: stable
81 |         components: rustfmt
82 |     - name: Check formatting
83 |       run: cargo fmt --all --check
84 | 
85 |   miri:
86 |     name: miri
87 |     runs-on: ubuntu-latest
88 |     steps:
89 |     - name: Checkout repository
90 |       uses: actions/checkout@v4
91 |     - name: Install Rust
92 |       uses: dtolnay/rust-toolchain@miri
93 |     - run: cargo miri test --lib --verbose
94 |       env:
95 |         MIRIFLAGS: -Zmiri-strict-provenance
96 |     - run: cargo miri test --doc --verbose
97 |       env:
98 |         MIRIFLAGS: -Zmiri-strict-provenance
99 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | csv
  2 | ===
  3 | A fast and flexible CSV reader and writer for Rust, with support for Serde.
  4 | 
  5 | [![Build status](https://github.com/BurntSushi/rust-csv/workflows/ci/badge.svg)](https://github.com/BurntSushi/rust-csv/actions)
  6 | [![crates.io](https://img.shields.io/crates/v/csv.svg)](https://crates.io/crates/csv)
  7 | 
  8 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
  9 | 
 10 | 
 11 | ### Documentation
 12 | 
 13 | https://docs.rs/csv
 14 | 
 15 | If you're new to Rust, the
 16 | [tutorial](https://docs.rs/csv/1.*/csv/tutorial/index.html)
 17 | is a good place to start.
 18 | 
 19 | 
 20 | ### Usage
 21 | 
 22 | To bring this crate into your repository, either add `csv` to your
 23 | `Cargo.toml`, or run `cargo add csv`.
 24 | 
 25 | 
 26 | ### Example
 27 | 
 28 | This example shows how to read CSV data from stdin and print each record to
 29 | stdout.
 30 | 
 31 | There are more examples in the
 32 | [cookbook](https://docs.rs/csv/1.*/csv/cookbook/index.html).
 33 | 
 34 | ```rust
 35 | use std::{error::Error, io, process};
 36 | 
 37 | fn example() -> Result<(), Box<dyn Error>> {
 38 |     // Build the CSV reader and iterate over each record.
 39 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 40 |     for result in rdr.records() {
 41 |         // The iterator yields Result<StringRecord, Error>, so we check the
 42 |         // error here.
 43 |         let record = result?;
 44 |         println!("{:?}", record);
 45 |     }
 46 |     Ok(())
 47 | }
 48 | 
 49 | fn main() {
 50 |     if let Err(err) = example() {
 51 |         println!("error running example: {}", err);
 52 |         process::exit(1);
 53 |     }
 54 | }
 55 | ```
 56 | 
 57 | The above example can be run like so:
 58 | 
 59 | ```text
 60 | $ git clone git://github.com/BurntSushi/rust-csv
 61 | $ cd rust-csv
 62 | $ cargo run --example cookbook-read-basic < examples/data/smallpop.csv
 63 | ```
 64 | 
 65 | ### Example with Serde
 66 | 
 67 | This example shows how to read CSV data from stdin into your own custom struct.
 68 | By default, the member names of the struct are matched with the values in the
 69 | header record of your CSV data.
 70 | 
 71 | ```rust
 72 | use std::{error::Error, io, process};
 73 | 
 74 | #[derive(Debug, serde::Deserialize)]
 75 | struct Record {
 76 |     city: String,
 77 |     region: String,
 78 |     country: String,
 79 |     population: Option<u64>,
 80 | }
 81 | 
 82 | fn example() -> Result<(), Box<dyn Error>> {
 83 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 84 |     for result in rdr.deserialize() {
 85 |         // Notice that we need to provide a type hint for automatic
 86 |         // deserialization.
 87 |         let record: Record = result?;
 88 |         println!("{:?}", record);
 89 |     }
 90 |     Ok(())
 91 | }
 92 | 
 93 | fn main() {
 94 |     if let Err(err) = example() {
 95 |         println!("error running example: {}", err);
 96 |         process::exit(1);
 97 |     }
 98 | }
 99 | ```
100 | 
101 | The above example can be run like so:
102 | 
103 | ```
104 | $ git clone git://github.com/BurntSushi/rust-csv
105 | $ cd rust-csv
106 | $ cargo run --example cookbook-read-serde < examples/data/smallpop.csv
107 | ```
108 | 


--------------------------------------------------------------------------------
/csv-core/benches/bench.rs:
--------------------------------------------------------------------------------
 1 | #![feature(test)]
 2 | 
 3 | extern crate test;
 4 | 
 5 | use test::Bencher;
 6 | 
 7 | use csv_core::{Reader, ReaderBuilder};
 8 | 
 9 | static NFL: &'static str = include_str!("../../examples/data/bench/nfl.csv");
10 | static GAME: &'static str = include_str!("../../examples/data/bench/game.csv");
11 | static POP: &'static str =
12 |     include_str!("../../examples/data/bench/worldcitiespop.csv");
13 | static MBTA: &'static str =
14 |     include_str!("../../examples/data/bench/gtfs-mbta-stop-times.csv");
15 | 
16 | macro_rules! bench {
17 |     ($name:ident, $data:ident, $counter:ident, $result:expr) => {
18 |         bench!($name, $data, $counter, $result, false);
19 |     };
20 |     ($name:ident, $data:ident, $counter:ident, $result:expr, NFA) => {
21 |         bench!($name, $data, $counter, $result, true);
22 |     };
23 |     ($name:ident, $data:ident, $counter:ident, $result:expr, $nfa:expr) => {
24 |         #[bench]
25 |         fn $name(b: &mut Bencher) {
26 |             let data = $data.as_bytes();
27 |             b.bytes = data.len() as u64;
28 |             let mut rdr = ReaderBuilder::new().nfa($nfa).build();
29 |             b.iter(|| {
30 |                 rdr.reset();
31 |                 assert_eq!($counter(&mut rdr, data), $result);
32 |             })
33 |         }
34 |     };
35 | }
36 | 
37 | bench!(count_nfl_field_copy_dfa, NFL, count_fields, 130000);
38 | bench!(count_nfl_field_copy_nfa, NFL, count_fields, 130000, NFA);
39 | bench!(count_nfl_record_copy_dfa, NFL, count_records, 10000);
40 | bench!(count_nfl_record_copy_nfa, NFL, count_records, 10000, NFA);
41 | 
42 | bench!(count_game_field_copy_dfa, GAME, count_fields, 600000);
43 | bench!(count_game_field_copy_nfa, GAME, count_fields, 600000, NFA);
44 | bench!(count_game_record_copy_dfa, GAME, count_records, 100000);
45 | bench!(count_game_record_copy_nfa, GAME, count_records, 100000, NFA);
46 | 
47 | bench!(count_pop_field_copy_dfa, POP, count_fields, 140007);
48 | bench!(count_pop_field_copy_nfa, POP, count_fields, 140007, NFA);
49 | bench!(count_pop_record_copy_dfa, POP, count_records, 20001);
50 | bench!(count_pop_record_copy_nfa, POP, count_records, 20001, NFA);
51 | 
52 | bench!(count_mbta_field_copy_dfa, MBTA, count_fields, 90000);
53 | bench!(count_mbta_field_copy_nfa, MBTA, count_fields, 90000, NFA);
54 | bench!(count_mbta_record_copy_dfa, MBTA, count_records, 10000);
55 | bench!(count_mbta_record_copy_nfa, MBTA, count_records, 10000, NFA);
56 | 
57 | fn count_fields(rdr: &mut Reader, mut data: &[u8]) -> u64 {
58 |     use csv_core::ReadFieldResult::*;
59 | 
60 |     let mut count = 0;
61 |     let mut field = [0u8; 1024];
62 |     loop {
63 |         let (res, nin, _) = rdr.read_field(data, &mut field);
64 |         data = &data[nin..];
65 |         match res {
66 |             InputEmpty => {}
67 |             OutputFull => panic!("field too large"),
68 |             Field { .. } => {
69 |                 count += 1;
70 |             }
71 |             End => break,
72 |         }
73 |     }
74 |     count
75 | }
76 | 
77 | fn count_records(rdr: &mut Reader, mut data: &[u8]) -> u64 {
78 |     use csv_core::ReadRecordResult::*;
79 | 
80 |     let mut count = 0;
81 |     let mut record = [0; 8192];
82 |     let mut ends = [0; 32];
83 |     loop {
84 |         let (res, nin, _, _) = rdr.read_record(data, &mut record, &mut ends);
85 |         data = &data[nin..];
86 |         match res {
87 |             InputEmpty => {}
88 |             OutputFull | OutputEndsFull => panic!("field too large"),
89 |             Record => count += 1,
90 |             End => break,
91 |         }
92 |     }
93 |     count
94 | }
95 | 


--------------------------------------------------------------------------------
/csv-core/README.md:
--------------------------------------------------------------------------------
  1 | csv-core
  2 | ========
  3 | A fast CSV reader and write for use in a `no_std` context. This crate will
  4 | never use the Rust standard library.
  5 | 
  6 | [![Linux build status](https://api.travis-ci.org/BurntSushi/rust-csv.png)](https://travis-ci.org/BurntSushi/rust-csv)
  7 | [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/rust-csv?svg=true)](https://ci.appveyor.com/project/BurntSushi/rust-csv)
  8 | [![](http://meritbadge.herokuapp.com/csv-core)](https://crates.io/crates/csv-core)
  9 | 
 10 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
 11 | 
 12 | ### Documentation
 13 | 
 14 | https://docs.rs/csv-core
 15 | 
 16 | ### Usage
 17 | 
 18 | Add this to your `Cargo.toml`:
 19 | 
 20 | ```toml
 21 | [dependencies]
 22 | csv-core = "0.1.6"
 23 | ```
 24 | 
 25 | ### Build features
 26 | 
 27 | This crate by default links with `libc`, which is done via the `libc` feature.
 28 | Disabling this feature will drop `csv-core`'s dependency on `libc`.
 29 | 
 30 | 
 31 | ### Example: reading CSV
 32 | 
 33 | This example shows how to count the number of fields and records in CSV data.
 34 | 
 35 | ```rust
 36 | use csv_core::{Reader, ReadFieldResult};
 37 | 
 38 | let data = "
 39 | foo,bar,baz
 40 | a,b,c
 41 | xxx,yyy,zzz
 42 | ";
 43 | 
 44 | let mut rdr = Reader::new();
 45 | let mut bytes = data.as_bytes();
 46 | let mut count_fields = 0;
 47 | let mut count_records = 0;
 48 | loop {
 49 |     // We skip handling the output since we don't need it for counting.
 50 |     let (result, nin, _) = rdr.read_field(bytes, &mut [0; 1024]);
 51 |     bytes = &bytes[nin..];
 52 |     match result {
 53 |         ReadFieldResult::InputEmpty => {},
 54 |         ReadFieldResult::OutputFull => panic!("field too large"),
 55 |         ReadFieldResult::Field { record_end } => {
 56 |             count_fields += 1;
 57 |             if record_end {
 58 |                 count_records += 1;
 59 |             }
 60 |         }
 61 |         ReadFieldResult::End => break,
 62 |     }
 63 | }
 64 | assert_eq!(3, count_records);
 65 | assert_eq!(9, count_fields);
 66 | ```
 67 | 
 68 | 
 69 | ### Example: writing CSV
 70 | 
 71 | This example shows how to use the `Writer` API to write valid CSV data. Proper
 72 | quoting is handled automatically.
 73 | 
 74 | ```rust
 75 | use csv_core::Writer;
 76 | 
 77 | // This is where we'll write out CSV data.
 78 | let mut out = &mut [0; 1024];
 79 | // The number of bytes we've written to `out`.
 80 | let mut nout = 0;
 81 | // Create a CSV writer with a default configuration.
 82 | let mut wtr = Writer::new();
 83 | 
 84 | // Write a single field. Note that we ignore the `WriteResult` and the number
 85 | // of input bytes consumed since we're doing this by hand.
 86 | let (_, _, n) = wtr.field(&b"foo"[..], &mut out[nout..]);
 87 | nout += n;
 88 | 
 89 | // Write a delimiter and then another field that requires quotes.
 90 | let (_, n) = wtr.delimiter(&mut out[nout..]);
 91 | nout += n;
 92 | let (_, _, n) = wtr.field(&b"bar,baz"[..], &mut out[nout..]);
 93 | nout += n;
 94 | let (_, n) = wtr.terminator(&mut out[nout..]);
 95 | nout += n;
 96 | 
 97 | // Now write another record.
 98 | let (_, _, n) = wtr.field(&b"a \"b\" c"[..], &mut out[nout..]);
 99 | nout += n;
100 | let (_, n) = wtr.delimiter(&mut out[nout..]);
101 | nout += n;
102 | let (_, _, n) = wtr.field(&b"quux"[..], &mut out[nout..]);
103 | nout += n;
104 | 
105 | // We must always call finish once done writing.
106 | // This ensures that any closing quotes are written.
107 | let (_, n) = wtr.finish(&mut out[nout..]);
108 | nout += n;
109 | 
110 | assert_eq!(&out[..nout], &b"\
111 | foo,\"bar,baz\"
112 | \"a \"\"b\"\" c\",quux"[..]);
113 | ```
114 | 


--------------------------------------------------------------------------------
/examples/data/uspop.csv:
--------------------------------------------------------------------------------
  1 | City,State,Population,Latitude,Longitude
  2 | Davidsons Landing,AK,,65.2419444,-165.2716667
  3 | Kenai,AK,7610,60.5544444,-151.2583333
  4 | Oakman,AL,,33.7133333,-87.3886111
  5 | Richards Crossroads,AL,,31.7369444,-85.2644444
  6 | Sandfort,AL,,32.3380556,-85.2233333
  7 | Selma,AL,18980,32.4072222,-87.0211111
  8 | Shadow Oaks Addition,AR,,34.9555556,-91.9475000
  9 | Summerville,AR,,33.5202778,-92.3555556
 10 | El Mirage,AZ,32308,33.6130556,-112.3238889
 11 | Willow Springs,AZ,,36.1894444,-111.3930556
 12 | Colton,CA,52335,34.0738889,-117.3127778
 13 | Fontana,CA,169160,34.0922222,-117.4341667
 14 | Fountain Valley,CA,56133,33.7091667,-117.9527778
 15 | Kings Beach,CA,,39.2377778,-120.0255556
 16 | Milpitas,CA,62636,37.4283333,-121.9055556
 17 | Mokelumne City,CA,,38.2530556,-121.4380556
 18 | Mount Eden,CA,,37.6361111,-122.0988889
 19 | San Clemente,CA,62272,33.4269444,-117.6111111
 20 | Seal Beach,CA,24404,33.7413889,-118.1038889
 21 | West Hollywood,CA,37031,34.0900000,-118.3608333
 22 | Bridgeport,CT,139090,41.1669444,-73.2052778
 23 | Orange,CT,13860,41.2783333,-73.0261111
 24 | Azalea Park,FL,12347,28.5408333,-81.3008333
 25 | Bratt,FL,,30.9655556,-87.4275000
 26 | Cutler Ridge,FL,26831,25.5802778,-80.3469444
 27 | Dunn Creek,FL,,30.4861111,-81.5908333
 28 | South Daytona,FL,14451,29.1655556,-81.0047222
 29 | Brickhouse,GA,,33.7750000,-82.8108333
 30 | Lakeview Heights,GA,,33.6188889,-84.4505556
 31 | Perry,GA,11234,32.4580556,-83.7316667
 32 | Roswell,GA,77218,34.0230556,-84.3616667
 33 | Warfield,GA,,33.2994444,-83.3838889
 34 | Kirkman,IA,,41.7286111,-95.2650000
 35 | Travers,ID,,42.6091667,-113.7361111
 36 | Calhoun,IL,,38.6502778,-88.0436111
 37 | Cleone,IL,,39.4230556,-87.9075000
 38 | Deerfield,IL,19618,42.1711111,-87.8444444
 39 | Highbank Town,IN,,38.5144444,-87.1502778
 40 | Indianapolis,IN,773283,39.7683333,-86.1580556
 41 | Leona,KS,,39.7872222,-95.3213889
 42 | New Salem,KS,,37.3105556,-96.8950000
 43 | Flint Springs,KY,,37.3433333,-86.7136111
 44 | Harvey,LA,22383,29.9033333,-90.0772222
 45 | Jennings,LA,10547,30.2222222,-92.6569444
 46 | King,LA,,32.2405556,-91.1213889
 47 | Opelousas,LA,22835,30.5333333,-92.0813889
 48 | Reading,MA,23441,42.5255556,-71.0958333
 49 | Mount Airy,MD,8714,39.3761111,-77.1550000
 50 | Auburn,ME,23488,44.0977778,-70.2316667
 51 | Ellsworth,ME,7055,44.5433333,-68.4200000
 52 | Sturgis,MI,11081,41.7991667,-85.4191667
 53 | Brooklyn Center,MN,27718,45.0761111,-93.3325000
 54 | Coon Rapids,MN,62528,45.1200000,-93.2875000
 55 | Moark,MO,,36.3825000,-89.9888889
 56 | Owens,MO,,37.2188889,-92.4027778
 57 | Natchez,MS,17118,31.5602778,-91.4030556
 58 | Rogers,NE,,41.4652778,-96.9147222
 59 | Hollis,NH,7711,42.7430556,-71.5922222
 60 | Bayonne,NJ,59878,40.6686111,-74.1147222
 61 | Belleville,NJ,36878,40.7936111,-74.1505556
 62 | Frenchtown,NJ,,40.5261111,-75.0619444
 63 | Sharp,NJ,,40.0922222,-74.7427778
 64 | Los Ranchos de Albuquerque,NM,5184,35.1619444,-106.6422222
 65 | Deerhead,NY,,44.3522222,-73.5436111
 66 | Howland,NY,,43.0791667,-76.6827778
 67 | Lake Grove,NY,10715,40.8527778,-73.1155556
 68 | Penfield Center,NY,,43.1672222,-77.4313889
 69 | Comet,OH,,39.1158333,-82.5511111
 70 | Little Mountain,OH,,41.6402778,-81.2819444
 71 | Mason,OH,30988,39.3600000,-84.3100000
 72 | Siverly,OH,,39.3488889,-82.5000000
 73 | Gladstone,OR,12249,45.3808333,-122.5936111
 74 | Gresham,OR,98851,45.4983333,-122.4302778
 75 | Ephrata,PA,13182,40.1797222,-76.1791667
 76 | Mount Airy,PA,,41.0941667,-79.5222222
 77 | Uhlerstown,PA,,40.5252778,-75.0736111
 78 | Weis Library,PA,,42.0483333,-80.1700000
 79 | Woodcock,PA,,41.7547222,-80.0858333
 80 | Rock Hill,SC,59766,34.9247222,-81.0252778
 81 | Summerville,SC,34958,33.0183333,-80.1758333
 82 | Wolfton,SC,,33.5883333,-80.9819444
 83 | Avenger Village,TX,,32.4594444,-100.4552778
 84 | Brashear,TX,,33.1186111,-95.7333333
 85 | Dumas Junction,TX,,35.2127778,-101.8019444
 86 | Edinburg,TX,60509,26.3013889,-98.1630556
 87 | Eichelberger Crossing,TX,,31.6166667,-97.3077778
 88 | Euless,TX,53221,32.8369444,-97.0816667
 89 | Greenock,TX,,31.7661111,-97.3452778
 90 | Greenville,TX,25382,33.1383333,-96.1105556
 91 | Highland Village,TX,15365,33.0916667,-97.0463889
 92 | Maxey Town,TX,,31.4433333,-94.1225000
 93 | Pharr,TX,60687,26.1944444,-98.1833333
 94 | Snyder,TX,10600,32.7177778,-100.9172222
 95 | Webster,TX,9038,29.5375000,-95.1180556
 96 | Wild Peach Village,TX,,29.0833333,-95.6336111
 97 | Misty Hills Numbers 1-7,UT,,40.6416667,-111.9955556
 98 | Pleasant Grove,UT,24449,40.3641667,-111.7377778
 99 | Rio Vista,VA,,37.5688889,-77.5230556
100 | Tabernacle,VA,,37.4230556,-76.2966667
101 | Cody,WY,9161,44.5263889,-109.0558333
102 | 


--------------------------------------------------------------------------------
/examples/data/uspop-null.csv:
--------------------------------------------------------------------------------
  1 | City,State,Population,Latitude,Longitude
  2 | Davidsons Landing,AK,,65.2419444,-165.2716667
  3 | Kenai,AK,7610,60.5544444,-151.2583333
  4 | Oakman,AL,,33.7133333,-87.3886111
  5 | Richards Crossroads,AL,,31.7369444,-85.2644444
  6 | Sandfort,AL,,32.3380556,-85.2233333
  7 | Selma,AL,18980,32.4072222,-87.0211111
  8 | Shadow Oaks Addition,AR,,34.9555556,-91.9475000
  9 | Summerville,AR,,33.5202778,-92.3555556
 10 | El Mirage,AZ,32308,33.6130556,-112.3238889
 11 | Willow Springs,AZ,,36.1894444,-111.3930556
 12 | Colton,CA,52335,34.0738889,-117.3127778
 13 | Fontana,CA,169160,34.0922222,-117.4341667
 14 | Fountain Valley,CA,56133,33.7091667,-117.9527778
 15 | Kings Beach,CA,,39.2377778,-120.0255556
 16 | Milpitas,CA,62636,37.4283333,-121.9055556
 17 | Mokelumne City,CA,,38.2530556,-121.4380556
 18 | Mount Eden,CA,,37.6361111,-122.0988889
 19 | San Clemente,CA,62272,33.4269444,-117.6111111
 20 | Seal Beach,CA,24404,33.7413889,-118.1038889
 21 | West Hollywood,CA,37031,34.0900000,-118.3608333
 22 | Bridgeport,CT,139090,41.1669444,-73.2052778
 23 | Orange,CT,13860,41.2783333,-73.0261111
 24 | Azalea Park,FL,12347,28.5408333,-81.3008333
 25 | Bratt,FL,,30.9655556,-87.4275000
 26 | Cutler Ridge,FL,26831,25.5802778,-80.3469444
 27 | Dunn Creek,FL,,30.4861111,-81.5908333
 28 | South Daytona,FL,14451,29.1655556,-81.0047222
 29 | Brickhouse,GA,,33.7750000,-82.8108333
 30 | Lakeview Heights,GA,,33.6188889,-84.4505556
 31 | Perry,GA,11234,32.4580556,-83.7316667
 32 | Roswell,GA,77218,34.0230556,-84.3616667
 33 | Warfield,GA,,33.2994444,-83.3838889
 34 | Kirkman,IA,,41.7286111,-95.2650000
 35 | Travers,ID,,42.6091667,-113.7361111
 36 | Calhoun,IL,,38.6502778,-88.0436111
 37 | Cleone,IL,,39.4230556,-87.9075000
 38 | Deerfield,IL,19618,42.1711111,-87.8444444
 39 | Highbank Town,IN,,38.5144444,-87.1502778
 40 | Indianapolis,IN,773283,39.7683333,-86.1580556
 41 | Leona,KS,,39.7872222,-95.3213889
 42 | New Salem,KS,,37.3105556,-96.8950000
 43 | Flint Springs,KY,NULL,37.3433333,-86.7136111
 44 | Harvey,LA,22383,29.9033333,-90.0772222
 45 | Jennings,LA,10547,30.2222222,-92.6569444
 46 | King,LA,,32.2405556,-91.1213889
 47 | Opelousas,LA,22835,30.5333333,-92.0813889
 48 | Reading,MA,23441,42.5255556,-71.0958333
 49 | Mount Airy,MD,8714,39.3761111,-77.1550000
 50 | Auburn,ME,23488,44.0977778,-70.2316667
 51 | Ellsworth,ME,7055,44.5433333,-68.4200000
 52 | Sturgis,MI,11081,41.7991667,-85.4191667
 53 | Brooklyn Center,MN,27718,45.0761111,-93.3325000
 54 | Coon Rapids,MN,62528,45.1200000,-93.2875000
 55 | Moark,MO,NULL,36.3825000,-89.9888889
 56 | Owens,MO,,37.2188889,-92.4027778
 57 | Natchez,MS,17118,31.5602778,-91.4030556
 58 | Rogers,NE,,41.4652778,-96.9147222
 59 | Hollis,NH,7711,42.7430556,-71.5922222
 60 | Bayonne,NJ,59878,40.6686111,-74.1147222
 61 | Belleville,NJ,36878,40.7936111,-74.1505556
 62 | Frenchtown,NJ,NULL,40.5261111,-75.0619444
 63 | Sharp,NJ,,40.0922222,-74.7427778
 64 | Los Ranchos de Albuquerque,NM,5184,35.1619444,-106.6422222
 65 | Deerhead,NY,,44.3522222,-73.5436111
 66 | Howland,NY,,43.0791667,-76.6827778
 67 | Lake Grove,NY,10715,40.8527778,-73.1155556
 68 | Penfield Center,NY,,43.1672222,-77.4313889
 69 | Comet,OH,,39.1158333,-82.5511111
 70 | Little Mountain,OH,,41.6402778,-81.2819444
 71 | Mason,OH,30988,39.3600000,-84.3100000
 72 | Siverly,OH,,39.3488889,-82.5000000
 73 | Gladstone,OR,12249,45.3808333,-122.5936111
 74 | Gresham,OR,98851,45.4983333,-122.4302778
 75 | Ephrata,PA,13182,40.1797222,-76.1791667
 76 | Mount Airy,PA,,41.0941667,-79.5222222
 77 | Uhlerstown,PA,,40.5252778,-75.0736111
 78 | Weis Library,PA,,42.0483333,-80.1700000
 79 | Woodcock,PA,,41.7547222,-80.0858333
 80 | Rock Hill,SC,59766,34.9247222,-81.0252778
 81 | Summerville,SC,34958,33.0183333,-80.1758333
 82 | Wolfton,SC,,33.5883333,-80.9819444
 83 | Avenger Village,TX,,32.4594444,-100.4552778
 84 | Brashear,TX,,33.1186111,-95.7333333
 85 | Dumas Junction,TX,,35.2127778,-101.8019444
 86 | Edinburg,TX,60509,26.3013889,-98.1630556
 87 | Eichelberger Crossing,TX,,31.6166667,-97.3077778
 88 | Euless,TX,53221,32.8369444,-97.0816667
 89 | Greenock,TX,,31.7661111,-97.3452778
 90 | Greenville,TX,25382,33.1383333,-96.1105556
 91 | Highland Village,TX,15365,33.0916667,-97.0463889
 92 | Maxey Town,TX,,31.4433333,-94.1225000
 93 | Pharr,TX,60687,26.1944444,-98.1833333
 94 | Snyder,TX,10600,32.7177778,-100.9172222
 95 | Webster,TX,9038,29.5375000,-95.1180556
 96 | Wild Peach Village,TX,,29.0833333,-95.6336111
 97 | Misty Hills Numbers 1-7,UT,,40.6416667,-111.9955556
 98 | Pleasant Grove,UT,24449,40.3641667,-111.7377778
 99 | Rio Vista,VA,,37.5688889,-77.5230556
100 | Tabernacle,VA,,37.4230556,-76.2966667
101 | Cody,WY,9161,44.5263889,-109.0558333
102 | 


--------------------------------------------------------------------------------
/csv-core/src/lib.rs:
--------------------------------------------------------------------------------
  1 | /*!
  2 | `csv-core` provides a fast CSV reader and writer for use in a `no_std` context.
  3 | 
  4 | This crate will never use the standard library. `no_std` support is therefore
  5 | enabled by default.
  6 | 
  7 | If you're looking for more ergonomic CSV parsing routines, please use the
  8 | [`csv`](https://docs.rs/csv) crate.
  9 | 
 10 | # Overview
 11 | 
 12 | This crate has two primary APIs. The `Reader` API provides a CSV parser, and
 13 | the `Writer` API provides a CSV writer.
 14 | 
 15 | # Example: reading CSV
 16 | 
 17 | This example shows how to count the number of fields and records in CSV data.
 18 | 
 19 | ```
 20 | use csv_core::{Reader, ReadFieldResult};
 21 | 
 22 | let data = "
 23 | foo,bar,baz
 24 | a,b,c
 25 | xxx,yyy,zzz
 26 | ";
 27 | 
 28 | let mut rdr = Reader::new();
 29 | let mut bytes = data.as_bytes();
 30 | let mut count_fields = 0;
 31 | let mut count_records = 0;
 32 | loop {
 33 |     // We skip handling the output since we don't need it for counting.
 34 |     let (result, nin, _) = rdr.read_field(bytes, &mut [0; 1024]);
 35 |     bytes = &bytes[nin..];
 36 |     match result {
 37 |         ReadFieldResult::InputEmpty => {},
 38 |         ReadFieldResult::OutputFull => panic!("field too large"),
 39 |         ReadFieldResult::Field { record_end } => {
 40 |             count_fields += 1;
 41 |             if record_end {
 42 |                 count_records += 1;
 43 |             }
 44 |         }
 45 |         ReadFieldResult::End => break,
 46 |     }
 47 | }
 48 | assert_eq!(3, count_records);
 49 | assert_eq!(9, count_fields);
 50 | ```
 51 | 
 52 | # Example: writing CSV
 53 | 
 54 | This example shows how to use the `Writer` API to write valid CSV data. Proper
 55 | quoting is handled automatically.
 56 | 
 57 | ```
 58 | use csv_core::Writer;
 59 | 
 60 | // This is where we'll write out CSV data.
 61 | let mut out = &mut [0; 1024];
 62 | // The number of bytes we've written to `out`.
 63 | let mut nout = 0;
 64 | // Create a CSV writer with a default configuration.
 65 | let mut wtr = Writer::new();
 66 | 
 67 | // Write a single field. Note that we ignore the `WriteResult` and the number
 68 | // of input bytes consumed since we're doing this by hand.
 69 | let (_, _, n) = wtr.field(&b"foo"[..], &mut out[nout..]);
 70 | nout += n;
 71 | 
 72 | // Write a delimiter and then another field that requires quotes.
 73 | let (_, n) = wtr.delimiter(&mut out[nout..]);
 74 | nout += n;
 75 | let (_, _, n) = wtr.field(&b"bar,baz"[..], &mut out[nout..]);
 76 | nout += n;
 77 | let (_, n) = wtr.terminator(&mut out[nout..]);
 78 | nout += n;
 79 | 
 80 | // Now write another record.
 81 | let (_, _, n) = wtr.field(&b"a \"b\" c"[..], &mut out[nout..]);
 82 | nout += n;
 83 | let (_, n) = wtr.delimiter(&mut out[nout..]);
 84 | nout += n;
 85 | let (_, _, n) = wtr.field(&b"quux"[..], &mut out[nout..]);
 86 | nout += n;
 87 | 
 88 | // We must always call finish once done writing.
 89 | // This ensures that any closing quotes are written.
 90 | let (_, n) = wtr.finish(&mut out[nout..]);
 91 | nout += n;
 92 | 
 93 | assert_eq!(&out[..nout], &b"\
 94 | foo,\"bar,baz\"
 95 | \"a \"\"b\"\" c\",quux"[..]);
 96 | ```
 97 | */
 98 | 
 99 | #![deny(missing_docs)]
100 | #![no_std]
101 | 
102 | pub use crate::reader::{
103 |     ReadFieldNoCopyResult, ReadFieldResult, ReadRecordNoCopyResult,
104 |     ReadRecordResult, Reader, ReaderBuilder,
105 | };
106 | pub use crate::writer::{
107 |     is_non_numeric, quote, WriteResult, Writer, WriterBuilder,
108 | };
109 | 
110 | mod reader;
111 | mod writer;
112 | 
113 | /// A record terminator.
114 | ///
115 | /// Use this to specify the record terminator while parsing CSV. The default is
116 | /// CRLF, which treats `\r`, `\n` or `\r\n` as a single record terminator.
117 | #[derive(Clone, Copy, Debug, Default)]
118 | #[non_exhaustive]
119 | pub enum Terminator {
120 |     /// Parses `\r`, `\n` or `\r\n` as a single record terminator.
121 |     #[default]
122 |     CRLF,
123 |     /// Parses the byte given as a record terminator.
124 |     Any(u8),
125 | }
126 | 
127 | impl Terminator {
128 |     /// Checks whether the terminator is set to CRLF.
129 |     fn is_crlf(&self) -> bool {
130 |         match *self {
131 |             Terminator::CRLF => true,
132 |             Terminator::Any(_) => false,
133 |         }
134 |     }
135 | 
136 |     fn equals(&self, other: u8) -> bool {
137 |         match *self {
138 |             Terminator::CRLF => other == b'\r' || other == b'\n',
139 |             Terminator::Any(b) => other == b,
140 |         }
141 |     }
142 | }
143 | 
144 | /// The quoting style to use when writing CSV data.
145 | #[derive(Clone, Copy, Debug, Default)]
146 | #[non_exhaustive]
147 | pub enum QuoteStyle {
148 |     /// This puts quotes around every field. Always.
149 |     Always,
150 |     /// This puts quotes around fields only when necessary.
151 |     ///
152 |     /// They are necessary when fields contain a quote, delimiter or record
153 |     /// terminator. Quotes are also necessary when writing an empty record
154 |     /// (which is indistinguishable from a record with one empty field).
155 |     ///
156 |     /// This is the default.
157 |     #[default]
158 |     Necessary,
159 |     /// This puts quotes around all fields that are non-numeric. Namely, when
160 |     /// writing a field that does not parse as a valid float or integer, then
161 |     /// quotes will be used even if they aren't strictly necessary.
162 |     NonNumeric,
163 |     /// This *never* writes quotes, even if it would produce invalid CSV data.
164 |     Never,
165 | }
166 | 


--------------------------------------------------------------------------------
/src/cookbook.rs:
--------------------------------------------------------------------------------
  1 | /*!
  2 | A cookbook of examples for CSV reading and writing.
  3 | 
  4 | # List of examples
  5 | 
  6 | This is a list of examples that follow. Each of them can be found in the
  7 | `examples` directory of the
  8 | [`rust-csv`](https://github.com/BurntSushi/rust-csv)
  9 | repository.
 10 | 
 11 | For **reading** CSV:
 12 | 
 13 | 1. [Basic](#reading-basic)
 14 | 2. [With Serde](#reading-with-serde)
 15 | 3. [Setting a different delimiter](#reading-setting-a-different-delimiter)
 16 | 4. [Without headers](#reading-without-headers)
 17 | 
 18 | For **writing** CSV:
 19 | 
 20 | 5. [Basic](#writing-basic)
 21 | 6. [With Serde](#writing-with-serde)
 22 | 
 23 | Please
 24 | [submit a pull request](https://github.com/BurntSushi/rust-csv/pulls)
 25 | if you're interested in adding an example to this list!
 26 | 
 27 | # Reading: basic
 28 | 
 29 | This example shows how to read CSV data from stdin and print each record to
 30 | stdout.
 31 | 
 32 | ```no_run
 33 | # //cookbook-read-basic.rs
 34 | use std::{error::Error, io, process};
 35 | 
 36 | fn example() -> Result<(), Box<dyn Error>> {
 37 |     // Build the CSV reader and iterate over each record.
 38 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 39 |     for result in rdr.records() {
 40 |         // The iterator yields Result<StringRecord, Error>, so we check the
 41 |         // error here..
 42 |         let record = result?;
 43 |         println!("{:?}", record);
 44 |     }
 45 |     Ok(())
 46 | }
 47 | 
 48 | fn main() {
 49 |     if let Err(err) = example() {
 50 |         println!("error running example: {}", err);
 51 |         process::exit(1);
 52 |     }
 53 | }
 54 | ```
 55 | 
 56 | The above example can be run like so:
 57 | 
 58 | ```ignore
 59 | $ git clone git://github.com/BurntSushi/rust-csv
 60 | $ cd rust-csv
 61 | $ cargo run --example cookbook-read-basic < examples/data/smallpop.csv
 62 | ```
 63 | 
 64 | # Reading: with Serde
 65 | 
 66 | This is like the previous example, except it shows how to deserialize each
 67 | record into a struct type that you define.
 68 | 
 69 | For more examples and details on how Serde deserialization works, see the
 70 | [`Reader::deserialize`](../struct.Reader.html#method.deserialize)
 71 | method.
 72 | 
 73 | ```no_run
 74 | # //cookbook-read-serde.rs
 75 | # #![allow(dead_code)]
 76 | use std::{error::Error, io, process};
 77 | 
 78 | use serde::Deserialize;
 79 | 
 80 | // By default, struct field names are deserialized based on the position of
 81 | // a corresponding field in the CSV data's header record.
 82 | #[derive(Debug, Deserialize)]
 83 | struct Record {
 84 |     city: String,
 85 |     region: String,
 86 |     country: String,
 87 |     population: Option<u64>,
 88 | }
 89 | 
 90 | fn example() -> Result<(), Box<dyn Error>> {
 91 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 92 |     for result in rdr.deserialize() {
 93 |         // Notice that we need to provide a type hint for automatic
 94 |         // deserialization.
 95 |         let record: Record = result?;
 96 |         println!("{:?}", record);
 97 |     }
 98 |     Ok(())
 99 | }
100 | 
101 | fn main() {
102 |     if let Err(err) = example() {
103 |         println!("error running example: {}", err);
104 |         process::exit(1);
105 |     }
106 | }
107 | ```
108 | 
109 | The above example can be run like so:
110 | 
111 | ```ignore
112 | $ git clone git://github.com/BurntSushi/rust-csv
113 | $ cd rust-csv
114 | $ cargo run --example cookbook-read-serde < examples/data/smallpop.csv
115 | ```
116 | 
117 | # Reading: setting a different delimiter
118 | 
119 | This example shows how to read CSV data from stdin where fields are separated
120 | by `:` instead of `,`.
121 | 
122 | ```no_run
123 | # //cookbook-read-colon.rs
124 | use std::{error::Error, io, process};
125 | 
126 | fn example() -> Result<(), Box<dyn Error>> {
127 |     let mut rdr = csv::ReaderBuilder::new()
128 |         .delimiter(b':')
129 |         .from_reader(io::stdin());
130 |     for result in rdr.records() {
131 |         let record = result?;
132 |         println!("{:?}", record);
133 |     }
134 |     Ok(())
135 | }
136 | 
137 | fn main() {
138 |     if let Err(err) = example() {
139 |         println!("error running example: {}", err);
140 |         process::exit(1);
141 |     }
142 | }
143 | ```
144 | 
145 | The above example can be run like so:
146 | 
147 | ```ignore
148 | $ git clone git://github.com/BurntSushi/rust-csv
149 | $ cd rust-csv
150 | $ cargo run --example cookbook-read-colon < examples/data/smallpop-colon.csv
151 | ```
152 | 
153 | # Reading: without headers
154 | 
155 | The CSV reader in this crate assumes that CSV data has a header record by
156 | default, but the setting can be toggled. When enabled, the first record in
157 | CSV data in interpreted as the header record and is skipped. When disabled, the
158 | first record is not skipped. This example shows how to disable that setting.
159 | 
160 | ```no_run
161 | # //cookbook-read-no-headers.rs
162 | use std::{error::Error, io, process};
163 | 
164 | fn example() -> Result<(), Box<dyn Error>> {
165 |     let mut rdr = csv::ReaderBuilder::new()
166 |         .has_headers(false)
167 |         .from_reader(io::stdin());
168 |     for result in rdr.records() {
169 |         let record = result?;
170 |         println!("{:?}", record);
171 |     }
172 |     Ok(())
173 | }
174 | 
175 | fn main() {
176 |     if let Err(err) = example() {
177 |         println!("error running example: {}", err);
178 |         process::exit(1);
179 |     }
180 | }
181 | ```
182 | 
183 | The above example can be run like so:
184 | 
185 | ```ignore
186 | $ git clone git://github.com/BurntSushi/rust-csv
187 | $ cd rust-csv
188 | $ cargo run --example cookbook-read-no-headers < examples/data/smallpop-no-headers.csv
189 | ```
190 | 
191 | # Writing: basic
192 | 
193 | This example shows how to write CSV data to stdout.
194 | 
195 | ```no_run
196 | # //cookbook-write-basic.rs
197 | use std::{error::Error, io, process};
198 | 
199 | fn example() -> Result<(), Box<dyn Error>> {
200 |     let mut wtr = csv::Writer::from_writer(io::stdout());
201 | 
202 |     // When writing records without Serde, the header record is written just
203 |     // like any other record.
204 |     wtr.write_record(["city", "region", "country", "population"])?;
205 |     wtr.write_record(["Southborough", "MA", "United States", "9686"])?;
206 |     wtr.write_record(["Northbridge", "MA", "United States", "14061"])?;
207 |     wtr.flush()?;
208 |     Ok(())
209 | }
210 | 
211 | fn main() {
212 |     if let Err(err) = example() {
213 |         println!("error running example: {}", err);
214 |         process::exit(1);
215 |     }
216 | }
217 | ```
218 | 
219 | The above example can be run like so:
220 | 
221 | ```ignore
222 | $ git clone git://github.com/BurntSushi/rust-csv
223 | $ cd rust-csv
224 | $ cargo run --example cookbook-write-basic > /tmp/simplepop.csv
225 | ```
226 | 
227 | # Writing: with Serde
228 | 
229 | This example shows how to write CSV data to stdout with Serde. Namely, we
230 | represent each record using a custom struct that we define. In this example,
231 | headers are written automatically.
232 | 
233 | ```no_run
234 | # //cookbook-write-serde.rs
235 | use std::{error::Error, io, process};
236 | 
237 | use serde::Serialize;
238 | 
239 | #[derive(Debug, Serialize)]
240 | struct Record {
241 |     city: String,
242 |     region: String,
243 |     country: String,
244 |     population: Option<u64>,
245 | }
246 | 
247 | fn example() -> Result<(), Box<dyn Error>> {
248 |     let mut wtr = csv::Writer::from_writer(io::stdout());
249 | 
250 |     // When writing records with Serde using structs, the header row is written
251 |     // automatically.
252 |     wtr.serialize(Record {
253 |         city: "Southborough".to_string(),
254 |         region: "MA".to_string(),
255 |         country: "United States".to_string(),
256 |         population: Some(9686),
257 |     })?;
258 |     wtr.serialize(Record {
259 |         city: "Northbridge".to_string(),
260 |         region: "MA".to_string(),
261 |         country: "United States".to_string(),
262 |         population: Some(14061),
263 |     })?;
264 |     wtr.flush()?;
265 |     Ok(())
266 | }
267 | 
268 | fn main() {
269 |     if let Err(err) = example() {
270 |         println!("error running example: {}", err);
271 |         process::exit(1);
272 |     }
273 | }
274 | ```
275 | 
276 | The above example can be run like so:
277 | 
278 | ```ignore
279 | $ git clone git://github.com/BurntSushi/rust-csv
280 | $ cd rust-csv
281 | $ cargo run --example cookbook-write-serde > /tmp/simplepop.csv
282 | ```
283 | */
284 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
  1 | /*!
  2 | The `csv` crate provides a fast and flexible CSV reader and writer, with
  3 | support for Serde.
  4 | 
  5 | The [tutorial](tutorial/index.html) is a good place to start if you're new to
  6 | Rust.
  7 | 
  8 | The [cookbook](cookbook/index.html) will give you a variety of complete Rust
  9 | programs that do CSV reading and writing.
 10 | 
 11 | # Brief overview
 12 | 
 13 | **If you're new to Rust**, you might find the
 14 | [tutorial](tutorial/index.html)
 15 | to be a good place to start.
 16 | 
 17 | The primary types in this crate are
 18 | [`Reader`](struct.Reader.html)
 19 | and
 20 | [`Writer`](struct.Writer.html),
 21 | for reading and writing CSV data respectively.
 22 | Correspondingly, to support CSV data with custom field or record delimiters
 23 | (among many other things), you should use either a
 24 | [`ReaderBuilder`](struct.ReaderBuilder.html)
 25 | or a
 26 | [`WriterBuilder`](struct.WriterBuilder.html),
 27 | depending on whether you're reading or writing CSV data.
 28 | 
 29 | Unless you're using Serde, the standard CSV record types are
 30 | [`StringRecord`](struct.StringRecord.html)
 31 | and
 32 | [`ByteRecord`](struct.ByteRecord.html).
 33 | `StringRecord` should be used when you know your data to be valid UTF-8.
 34 | For data that may be invalid UTF-8, `ByteRecord` is suitable.
 35 | 
 36 | Finally, the set of errors is described by the
 37 | [`Error`](struct.Error.html)
 38 | type.
 39 | 
 40 | The rest of the types in this crate mostly correspond to more detailed errors,
 41 | position information, configuration knobs or iterator types.
 42 | 
 43 | # Setup
 44 | 
 45 | Run `cargo add csv` to add the latest version of the `csv` crate to your
 46 | Cargo.toml.
 47 | 
 48 | If you want to use Serde's custom derive functionality on your custom structs,
 49 | then run `cargo add serde --features derive` to add the `serde` crate with its
 50 | `derive` feature enabled to your `Cargo.toml`.
 51 | 
 52 | # Example
 53 | 
 54 | This example shows how to read CSV data from stdin and print each record to
 55 | stdout.
 56 | 
 57 | There are more examples in the [cookbook](cookbook/index.html).
 58 | 
 59 | ```no_run
 60 | use std::{error::Error, io, process};
 61 | 
 62 | fn example() -> Result<(), Box<dyn Error>> {
 63 |     // Build the CSV reader and iterate over each record.
 64 |     let mut rdr = csv::Reader::from_reader(io::stdin());
 65 |     for result in rdr.records() {
 66 |         // The iterator yields Result<StringRecord, Error>, so we check the
 67 |         // error here.
 68 |         let record = result?;
 69 |         println!("{:?}", record);
 70 |     }
 71 |     Ok(())
 72 | }
 73 | 
 74 | fn main() {
 75 |     if let Err(err) = example() {
 76 |         println!("error running example: {}", err);
 77 |         process::exit(1);
 78 |     }
 79 | }
 80 | ```
 81 | 
 82 | The above example can be run like so:
 83 | 
 84 | ```ignore
 85 | $ git clone git://github.com/BurntSushi/rust-csv
 86 | $ cd rust-csv
 87 | $ cargo run --example cookbook-read-basic < examples/data/smallpop.csv
 88 | ```
 89 | 
 90 | # Example with Serde
 91 | 
 92 | This example shows how to read CSV data from stdin into your own custom struct.
 93 | By default, the member names of the struct are matched with the values in the
 94 | header record of your CSV data.
 95 | 
 96 | ```no_run
 97 | use std::{error::Error, io, process};
 98 | 
 99 | #[derive(Debug, serde::Deserialize)]
100 | struct Record {
101 |     city: String,
102 |     region: String,
103 |     country: String,
104 |     population: Option<u64>,
105 | }
106 | 
107 | fn example() -> Result<(), Box<dyn Error>> {
108 |     let mut rdr = csv::Reader::from_reader(io::stdin());
109 |     for result in rdr.deserialize() {
110 |         // Notice that we need to provide a type hint for automatic
111 |         // deserialization.
112 |         let record: Record = result?;
113 |         println!("{:?}", record);
114 |     }
115 |     Ok(())
116 | }
117 | 
118 | fn main() {
119 |     if let Err(err) = example() {
120 |         println!("error running example: {}", err);
121 |         process::exit(1);
122 |     }
123 | }
124 | ```
125 | 
126 | The above example can be run like so:
127 | 
128 | ```ignore
129 | $ git clone git://github.com/BurntSushi/rust-csv
130 | $ cd rust-csv
131 | $ cargo run --example cookbook-read-serde < examples/data/smallpop.csv
132 | ```
133 | 
134 | */
135 | 
136 | #![deny(missing_docs)]
137 | 
138 | use std::result;
139 | 
140 | use serde_core::{Deserialize, Deserializer};
141 | 
142 | pub use crate::{
143 |     byte_record::{ByteRecord, ByteRecordIter, Position},
144 |     deserializer::{DeserializeError, DeserializeErrorKind},
145 |     error::{
146 |         Error, ErrorKind, FromUtf8Error, IntoInnerError, Result, Utf8Error,
147 |     },
148 |     reader::{
149 |         ByteRecordsIntoIter, ByteRecordsIter, DeserializeRecordsIntoIter,
150 |         DeserializeRecordsIter, Reader, ReaderBuilder, StringRecordsIntoIter,
151 |         StringRecordsIter,
152 |     },
153 |     string_record::{StringRecord, StringRecordIter},
154 |     writer::{Writer, WriterBuilder},
155 | };
156 | 
157 | mod byte_record;
158 | pub mod cookbook;
159 | mod debug;
160 | mod deserializer;
161 | mod error;
162 | mod reader;
163 | mod serializer;
164 | mod string_record;
165 | pub mod tutorial;
166 | mod writer;
167 | 
168 | /// The quoting style to use when writing CSV data.
169 | #[derive(Clone, Copy, Debug, Default)]
170 | #[non_exhaustive]
171 | pub enum QuoteStyle {
172 |     /// This puts quotes around every field. Always.
173 |     Always,
174 |     /// This puts quotes around fields only when necessary.
175 |     ///
176 |     /// They are necessary when fields contain a quote, delimiter or record
177 |     /// terminator. Quotes are also necessary when writing an empty record
178 |     /// (which is indistinguishable from a record with one empty field).
179 |     ///
180 |     /// This is the default.
181 |     #[default]
182 |     Necessary,
183 |     /// This puts quotes around all fields that are non-numeric. Namely, when
184 |     /// writing a field that does not parse as a valid float or integer, then
185 |     /// quotes will be used even if they aren't strictly necessary.
186 |     NonNumeric,
187 |     /// This *never* writes quotes, even if it would produce invalid CSV data.
188 |     Never,
189 | }
190 | 
191 | impl QuoteStyle {
192 |     fn to_core(self) -> csv_core::QuoteStyle {
193 |         match self {
194 |             QuoteStyle::Always => csv_core::QuoteStyle::Always,
195 |             QuoteStyle::Necessary => csv_core::QuoteStyle::Necessary,
196 |             QuoteStyle::NonNumeric => csv_core::QuoteStyle::NonNumeric,
197 |             QuoteStyle::Never => csv_core::QuoteStyle::Never,
198 |         }
199 |     }
200 | }
201 | 
202 | /// A record terminator.
203 | ///
204 | /// Use this to specify the record terminator while parsing CSV. The default is
205 | /// CRLF, which treats `\r`, `\n` or `\r\n` as a single record terminator.
206 | #[derive(Clone, Copy, Debug, Default)]
207 | #[non_exhaustive]
208 | pub enum Terminator {
209 |     /// Parses `\r`, `\n` or `\r\n` as a single record terminator.
210 |     #[default]
211 |     CRLF,
212 |     /// Parses the byte given as a record terminator.
213 |     Any(u8),
214 | }
215 | 
216 | impl Terminator {
217 |     /// Convert this to the csv_core type of the same name.
218 |     fn to_core(self) -> csv_core::Terminator {
219 |         match self {
220 |             Terminator::CRLF => csv_core::Terminator::CRLF,
221 |             Terminator::Any(b) => csv_core::Terminator::Any(b),
222 |         }
223 |     }
224 | }
225 | 
226 | /// The whitespace preservation behaviour when reading CSV data.
227 | #[derive(Clone, Copy, Debug, Default, PartialEq)]
228 | #[non_exhaustive]
229 | pub enum Trim {
230 |     /// Preserves fields and headers. This is the default.
231 |     #[default]
232 |     None,
233 |     /// Trim whitespace from headers.
234 |     Headers,
235 |     /// Trim whitespace from fields, but not headers.
236 |     Fields,
237 |     /// Trim whitespace from fields and headers.
238 |     All,
239 | }
240 | 
241 | impl Trim {
242 |     fn should_trim_fields(&self) -> bool {
243 |         self == &Trim::Fields || self == &Trim::All
244 |     }
245 | 
246 |     fn should_trim_headers(&self) -> bool {
247 |         self == &Trim::Headers || self == &Trim::All
248 |     }
249 | }
250 | 
251 | /// A custom Serde deserializer for possibly invalid `Option<T>` fields.
252 | ///
253 | /// When deserializing CSV data, it is sometimes desirable to simply ignore
254 | /// fields with invalid data. For example, there might be a field that is
255 | /// usually a number, but will occasionally contain garbage data that causes
256 | /// number parsing to fail.
257 | ///
258 | /// You might be inclined to use, say, `Option<i32>` for fields such at this.
259 | /// By default, however, `Option<i32>` will either capture *empty* fields with
260 | /// `None` or valid numeric fields with `Some(the_number)`. If the field is
261 | /// non-empty and not a valid number, then deserialization will return an error
262 | /// instead of using `None`.
263 | ///
264 | /// This function allows you to override this default behavior. Namely, if
265 | /// `Option<T>` is deserialized with non-empty but invalid data, then the value
266 | /// will be `None` and the error will be ignored.
267 | ///
268 | /// # Example
269 | ///
270 | /// This example shows how to parse CSV records with numerical data, even if
271 | /// some numerical data is absent or invalid. Without the
272 | /// `serde(deserialize_with = "...")` annotations, this example would return
273 | /// an error.
274 | ///
275 | /// ```
276 | /// use std::error::Error;
277 | ///
278 | /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)]
279 | /// struct Row {
280 | ///     #[serde(deserialize_with = "csv::invalid_option")]
281 | ///     a: Option<i32>,
282 | ///     #[serde(deserialize_with = "csv::invalid_option")]
283 | ///     b: Option<i32>,
284 | ///     #[serde(deserialize_with = "csv::invalid_option")]
285 | ///     c: Option<i32>,
286 | /// }
287 | ///
288 | /// # fn main() { example().unwrap(); }
289 | /// fn example() -> Result<(), Box<dyn Error>> {
290 | ///     let data = "\
291 | /// a,b,c
292 | /// 5,\"\",xyz
293 | /// ";
294 | ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
295 | ///     if let Some(result) = rdr.deserialize().next() {
296 | ///         let record: Row = result?;
297 | ///         assert_eq!(record, Row { a: Some(5), b: None, c: None });
298 | ///         Ok(())
299 | ///     } else {
300 | ///         Err(From::from("expected at least one record but got none"))
301 | ///     }
302 | /// }
303 | /// ```
304 | pub fn invalid_option<'de, D, T>(de: D) -> result::Result<Option<T>, D::Error>
305 | where
306 |     D: Deserializer<'de>,
307 |     Option<T>: Deserialize<'de>,
308 | {
309 |     Option::<T>::deserialize(de).or_else(|_| Ok(None))
310 | }
311 | 


--------------------------------------------------------------------------------
/src/error.rs:
--------------------------------------------------------------------------------
  1 | use std::{error::Error as StdError, fmt, io, result};
  2 | 
  3 | use crate::{
  4 |     byte_record::{ByteRecord, Position},
  5 |     deserializer::DeserializeError,
  6 | };
  7 | 
  8 | /// A type alias for `Result<T, csv::Error>`.
  9 | pub type Result<T> = result::Result<T, Error>;
 10 | 
 11 | /// An error that can occur when processing CSV data.
 12 | ///
 13 | /// This error can happen when writing or reading CSV data.
 14 | ///
 15 | /// There are some important scenarios where an error is impossible to occur.
 16 | /// For example, if a CSV reader is used on an in-memory buffer with the
 17 | /// `flexible` option enabled and one is reading records as raw byte strings,
 18 | /// then no error can occur.
 19 | #[derive(Debug)]
 20 | pub struct Error(Box<ErrorKind>);
 21 | 
 22 | impl Error {
 23 |     /// A crate private constructor for `Error`.
 24 |     pub(crate) fn new(kind: ErrorKind) -> Error {
 25 |         Error(Box::new(kind))
 26 |     }
 27 | 
 28 |     /// Return the specific type of this error.
 29 |     pub fn kind(&self) -> &ErrorKind {
 30 |         &self.0
 31 |     }
 32 | 
 33 |     /// Unwrap this error into its underlying type.
 34 |     pub fn into_kind(self) -> ErrorKind {
 35 |         *self.0
 36 |     }
 37 | 
 38 |     /// Returns true if this is an I/O error.
 39 |     ///
 40 |     /// If this is true, the underlying `ErrorKind` is guaranteed to be
 41 |     /// `ErrorKind::Io`.
 42 |     pub fn is_io_error(&self) -> bool {
 43 |         matches!(*self.0, ErrorKind::Io(_))
 44 |     }
 45 | 
 46 |     /// Return the position for this error, if one exists.
 47 |     ///
 48 |     /// This is a convenience function that permits callers to easily access
 49 |     /// the position on an error without doing case analysis on `ErrorKind`.
 50 |     pub fn position(&self) -> Option<&Position> {
 51 |         self.0.position()
 52 |     }
 53 | }
 54 | 
 55 | /// The specific type of an error.
 56 | #[derive(Debug)]
 57 | #[non_exhaustive]
 58 | pub enum ErrorKind {
 59 |     /// An I/O error that occurred while reading CSV data.
 60 |     Io(io::Error),
 61 |     /// A UTF-8 decoding error that occured while reading CSV data into Rust
 62 |     /// `String`s.
 63 |     Utf8 {
 64 |         /// The position of the record in which this error occurred, if
 65 |         /// available.
 66 |         pos: Option<Position>,
 67 |         /// The corresponding UTF-8 error.
 68 |         err: Utf8Error,
 69 |     },
 70 |     /// This error occurs when two records with an unequal number of fields
 71 |     /// are found. This error only occurs when the `flexible` option in a
 72 |     /// CSV reader/writer is disabled.
 73 |     UnequalLengths {
 74 |         /// The position of the first record with an unequal number of fields
 75 |         /// to the previous record, if available.
 76 |         pos: Option<Position>,
 77 |         /// The expected number of fields in a record. This is the number of
 78 |         /// fields in the record read prior to the record indicated by
 79 |         /// `pos`.
 80 |         expected_len: u64,
 81 |         /// The number of fields in the bad record.
 82 |         len: u64,
 83 |     },
 84 |     /// This error occurs when either the `byte_headers` or `headers` methods
 85 |     /// are called on a CSV reader that was asked to `seek` before it parsed
 86 |     /// the first record.
 87 |     Seek,
 88 |     /// An error of this kind occurs only when using the Serde serializer.
 89 |     Serialize(String),
 90 |     /// An error of this kind occurs only when performing automatic
 91 |     /// deserialization with serde.
 92 |     Deserialize {
 93 |         /// The position of this error, if available.
 94 |         pos: Option<Position>,
 95 |         /// The deserialization error.
 96 |         err: DeserializeError,
 97 |     },
 98 | }
 99 | 
100 | impl ErrorKind {
101 |     /// Return the position for this error, if one exists.
102 |     ///
103 |     /// This is a convenience function that permits callers to easily access
104 |     /// the position on an error without doing case analysis on `ErrorKind`.
105 |     pub fn position(&self) -> Option<&Position> {
106 |         match *self {
107 |             ErrorKind::Utf8 { ref pos, .. } => pos.as_ref(),
108 |             ErrorKind::UnequalLengths { ref pos, .. } => pos.as_ref(),
109 |             ErrorKind::Deserialize { ref pos, .. } => pos.as_ref(),
110 |             _ => None,
111 |         }
112 |     }
113 | }
114 | 
115 | impl From<io::Error> for Error {
116 |     fn from(err: io::Error) -> Error {
117 |         Error::new(ErrorKind::Io(err))
118 |     }
119 | }
120 | 
121 | impl From<Error> for io::Error {
122 |     fn from(err: Error) -> io::Error {
123 |         io::Error::new(io::ErrorKind::Other, err)
124 |     }
125 | }
126 | 
127 | impl StdError for Error {}
128 | 
129 | impl fmt::Display for Error {
130 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
131 |         match *self.0 {
132 |             ErrorKind::Io(ref err) => err.fmt(f),
133 |             ErrorKind::Utf8 { pos: None, ref err } => {
134 |                 write!(f, "CSV parse error: field {}: {}", err.field(), err)
135 |             }
136 |             ErrorKind::Utf8 { pos: Some(ref pos), ref err } => write!(
137 |                 f,
138 |                 "CSV parse error: record {} \
139 |                  (line {}, field: {}, byte: {}): {}",
140 |                 pos.record(),
141 |                 pos.line(),
142 |                 err.field(),
143 |                 pos.byte(),
144 |                 err
145 |             ),
146 |             ErrorKind::UnequalLengths { pos: None, expected_len, len } => {
147 |                 write!(
148 |                     f,
149 |                     "CSV error: \
150 |                      found record with {} fields, but the previous record \
151 |                      has {} fields",
152 |                     len, expected_len
153 |                 )
154 |             }
155 |             ErrorKind::UnequalLengths {
156 |                 pos: Some(ref pos),
157 |                 expected_len,
158 |                 len,
159 |             } => write!(
160 |                 f,
161 |                 "CSV error: record {} (line: {}, byte: {}): \
162 |                  found record with {} fields, but the previous record \
163 |                  has {} fields",
164 |                 pos.record(),
165 |                 pos.line(),
166 |                 pos.byte(),
167 |                 len,
168 |                 expected_len
169 |             ),
170 |             ErrorKind::Seek => write!(
171 |                 f,
172 |                 "CSV error: cannot access headers of CSV data \
173 |                  when the parser was seeked before the first record \
174 |                  could be read"
175 |             ),
176 |             ErrorKind::Serialize(ref err) => {
177 |                 write!(f, "CSV write error: {}", err)
178 |             }
179 |             ErrorKind::Deserialize { pos: None, ref err } => {
180 |                 write!(f, "CSV deserialize error: {}", err)
181 |             }
182 |             ErrorKind::Deserialize { pos: Some(ref pos), ref err } => write!(
183 |                 f,
184 |                 "CSV deserialize error: record {} \
185 |                  (line: {}, byte: {}): {}",
186 |                 pos.record(),
187 |                 pos.line(),
188 |                 pos.byte(),
189 |                 err
190 |             ),
191 |         }
192 |     }
193 | }
194 | 
195 | /// A UTF-8 validation error during record conversion.
196 | ///
197 | /// This occurs when attempting to convert a `ByteRecord` into a
198 | /// `StringRecord`.
199 | #[derive(Clone, Debug, Eq, PartialEq)]
200 | pub struct FromUtf8Error {
201 |     record: ByteRecord,
202 |     err: Utf8Error,
203 | }
204 | 
205 | impl FromUtf8Error {
206 |     /// Create a new FromUtf8Error.
207 |     pub(crate) fn new(record: ByteRecord, err: Utf8Error) -> FromUtf8Error {
208 |         FromUtf8Error { record, err }
209 |     }
210 | 
211 |     /// Access the underlying `ByteRecord` that failed UTF-8 validation.
212 |     pub fn into_byte_record(self) -> ByteRecord {
213 |         self.record
214 |     }
215 | 
216 |     /// Access the underlying UTF-8 validation error.
217 |     pub fn utf8_error(&self) -> &Utf8Error {
218 |         &self.err
219 |     }
220 | }
221 | 
222 | impl fmt::Display for FromUtf8Error {
223 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
224 |         self.err.fmt(f)
225 |     }
226 | }
227 | 
228 | impl StdError for FromUtf8Error {
229 |     fn source(&self) -> Option<&(dyn StdError + 'static)> {
230 |         Some(&self.err)
231 |     }
232 | }
233 | 
234 | /// A UTF-8 validation error.
235 | ///
236 | /// This occurs when attempting to convert a `ByteRecord` into a
237 | /// `StringRecord`.
238 | ///
239 | /// The error includes the index of the field that failed validation, and the
240 | /// last byte at which valid UTF-8 was verified.
241 | #[derive(Clone, Debug, Eq, PartialEq)]
242 | pub struct Utf8Error {
243 |     /// The field index of a byte record in which UTF-8 validation failed.
244 |     field: usize,
245 |     /// The index into the given field up to which valid UTF-8 was verified.
246 |     valid_up_to: usize,
247 | }
248 | 
249 | /// Create a new UTF-8 error.
250 | pub fn new_utf8_error(field: usize, valid_up_to: usize) -> Utf8Error {
251 |     Utf8Error { field, valid_up_to }
252 | }
253 | 
254 | impl Utf8Error {
255 |     /// The field index of a byte record in which UTF-8 validation failed.
256 |     pub fn field(&self) -> usize {
257 |         self.field
258 |     }
259 |     /// The index into the given field up to which valid UTF-8 was verified.
260 |     pub fn valid_up_to(&self) -> usize {
261 |         self.valid_up_to
262 |     }
263 | }
264 | 
265 | impl StdError for Utf8Error {}
266 | 
267 | impl fmt::Display for Utf8Error {
268 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
269 |         write!(
270 |             f,
271 |             "invalid utf-8: invalid UTF-8 in field {} near byte index {}",
272 |             self.field, self.valid_up_to
273 |         )
274 |     }
275 | }
276 | 
277 | /// `IntoInnerError` occurs when consuming a `Writer` fails.
278 | ///
279 | /// Consuming the `Writer` causes a flush to happen. If the flush fails, then
280 | /// this error is returned, which contains both the original `Writer` and
281 | /// the error that occurred.
282 | ///
283 | /// The type parameter `W` is the unconsumed writer.
284 | pub struct IntoInnerError<W> {
285 |     wtr: W,
286 |     err: io::Error,
287 | }
288 | 
289 | impl<W> IntoInnerError<W> {
290 |     /// Creates a new `IntoInnerError`.
291 |     ///
292 |     /// (This is a visibility hack. It's public in this module, but not in the
293 |     /// crate.)
294 |     pub(crate) fn new(wtr: W, err: io::Error) -> IntoInnerError<W> {
295 |         IntoInnerError { wtr, err }
296 |     }
297 | 
298 |     /// Returns the error which caused the call to `into_inner` to fail.
299 |     ///
300 |     /// This error was returned when attempting to flush the internal buffer.
301 |     pub fn error(&self) -> &io::Error {
302 |         &self.err
303 |     }
304 | 
305 |     /// Consumes the [`IntoInnerError`] and returns the error which caused the
306 |     /// call to [`Writer::into_inner`](crate::Writer::into_inner) to fail.
307 |     ///
308 |     /// Unlike [`IntoInnerError::error`], this can be used to obtain ownership
309 |     /// of the underlying error.
310 |     pub fn into_error(self) -> io::Error {
311 |         self.err
312 |     }
313 | 
314 |     /// Returns the underlying writer which generated the error.
315 |     ///
316 |     /// The returned value can be used for error recovery, such as
317 |     /// re-inspecting the buffer.
318 |     pub fn into_inner(self) -> W {
319 |         self.wtr
320 |     }
321 | }
322 | 
323 | impl<W: std::any::Any> StdError for IntoInnerError<W> {}
324 | 
325 | impl<W> fmt::Display for IntoInnerError<W> {
326 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
327 |         self.err.fmt(f)
328 |     }
329 | }
330 | 
331 | impl<W> fmt::Debug for IntoInnerError<W> {
332 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
333 |         self.err.fmt(f)
334 |     }
335 | }
336 | 


--------------------------------------------------------------------------------
/csv-index/src/simple.rs:
--------------------------------------------------------------------------------
  1 | use std::io;
  2 | 
  3 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
  4 | use csv;
  5 | 
  6 | /// A simple index for random access to CSV records.
  7 | ///
  8 | /// This index permits seeking to the start of any CSV record with a constant
  9 | /// number of operations.
 10 | ///
 11 | /// The format of the index is simplistic and amenable to serializing to disk.
 12 | /// It consists of exactly `N+1` 64 bit big-endian integers, where `N` is the
 13 | /// number of records in the CSV data that is indexed. Each `i`th integer
 14 | /// corresponds to the approximate byte offset where the `i`th record in the
 15 | /// CSV data begins. One additional integer is written to the end of the index
 16 | /// which indicates the total number of records in the CSV data.
 17 | ///
 18 | /// This indexing format does not store the line numbers of CSV records, so
 19 | /// using the positions returned by this index to seek a CSV reader will likely
 20 | /// cause any future line numbers reported by that reader to be incorrect.
 21 | ///
 22 | /// This format will never change.
 23 | ///
 24 | /// N.B. The format of this indexing scheme matches the format of the old the
 25 | /// `csv::Indexed` type in pre-1.0 versions of the `csv` crate.
 26 | pub struct RandomAccessSimple<R> {
 27 |     rdr: R,
 28 |     len: u64,
 29 | }
 30 | 
 31 | impl<W: io::Write> RandomAccessSimple<W> {
 32 |     /// Write a simple index to the given writer for the given CSV reader.
 33 |     ///
 34 |     /// If there was a problem reading CSV records or writing to the given
 35 |     /// writer, then an error is returned.
 36 |     ///
 37 |     /// That the given CSV reader is read as given until EOF. The index
 38 |     /// produced includes all records, including the first record even if the
 39 |     /// CSV reader is configured to interpret the first record as a header
 40 |     /// record.
 41 |     ///
 42 |     /// # Example: in memory index
 43 |     ///
 44 |     /// This example shows how to create a simple random access index, open it
 45 |     /// and query the number of records in the index.
 46 |     ///
 47 |     /// ```
 48 |     /// use std::io;
 49 |     /// use csv_index::RandomAccessSimple;
 50 |     ///
 51 |     /// # fn main() { example().unwrap(); }
 52 |     /// fn example() -> csv::Result<()> {
 53 |     ///     let data = "\
 54 |     /// city,country,pop
 55 |     /// Boston,United States,4628910
 56 |     /// Concord,United States,42695
 57 |     /// ";
 58 |     ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
 59 |     ///     let mut wtr = io::Cursor::new(vec![]);
 60 |     ///     RandomAccessSimple::create(&mut rdr, &mut wtr)?;
 61 |     ///
 62 |     ///     let idx = RandomAccessSimple::open(wtr)?;
 63 |     ///     assert_eq!(idx.len(), 3);
 64 |     ///     Ok(())
 65 |     /// }
 66 |     /// ```
 67 |     ///
 68 |     /// # Example: file backed index
 69 |     ///
 70 |     /// This is like the previous example, but instead of creating the index
 71 |     /// in memory with `std::io::Cursor`, we write the index to a file.
 72 |     ///
 73 |     /// ```no_run
 74 |     /// use std::fs::File;
 75 |     /// use std::io;
 76 |     /// use csv_index::RandomAccessSimple;
 77 |     ///
 78 |     /// # fn main() { example().unwrap(); }
 79 |     /// fn example() -> csv::Result<()> {
 80 |     ///     let data = "\
 81 |     /// city,country,pop
 82 |     /// Boston,United States,4628910
 83 |     /// Concord,United States,42695
 84 |     /// ";
 85 |     ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
 86 |     ///     let mut wtr = File::create("data.csv.idx")?;
 87 |     ///     RandomAccessSimple::create(&mut rdr, &mut wtr)?;
 88 |     ///
 89 |     ///     let fileidx = File::open("data.csv.idx")?;
 90 |     ///     let idx = RandomAccessSimple::open(fileidx)?;
 91 |     ///     assert_eq!(idx.len(), 3);
 92 |     ///     Ok(())
 93 |     /// }
 94 |     /// ```
 95 |     pub fn create<R: io::Read>(
 96 |         rdr: &mut csv::Reader<R>,
 97 |         mut wtr: W,
 98 |     ) -> csv::Result<()> {
 99 |         // If the reader is configured to read a header, then read that
100 |         // first. (The CSV reader otherwise won't yield the header record
101 |         // when calling `read_byte_record`.)
102 |         let mut len = 0;
103 |         if rdr.has_headers() {
104 |             let header = rdr.byte_headers()?;
105 |             if !header.is_empty() {
106 |                 let pos = header.position().expect("position on header row");
107 |                 wtr.write_u64::<BigEndian>(pos.byte())?;
108 |                 len += 1;
109 |             }
110 |         }
111 |         let mut record = csv::ByteRecord::new();
112 |         while rdr.read_byte_record(&mut record)? {
113 |             let pos = record.position().expect("position on row");
114 |             wtr.write_u64::<BigEndian>(pos.byte())?;
115 |             len += 1;
116 |         }
117 |         wtr.write_u64::<BigEndian>(len)?;
118 |         Ok(())
119 |     }
120 | }
121 | 
122 | impl<R: io::Read + io::Seek> RandomAccessSimple<R> {
123 |     /// Open an existing simple CSV index.
124 |     ///
125 |     /// The reader given must be seekable and should contain an index written
126 |     /// by `RandomAccessSimple::create`.
127 |     ///
128 |     /// # Example
129 |     ///
130 |     /// This example shows how to create a simple random access index, open it
131 |     /// and query the number of records in the index.
132 |     ///
133 |     /// ```
134 |     /// use std::io;
135 |     /// use csv_index::RandomAccessSimple;
136 |     ///
137 |     /// # fn main() { example().unwrap(); }
138 |     /// fn example() -> csv::Result<()> {
139 |     ///     let data = "\
140 |     /// city,country,pop
141 |     /// Boston,United States,4628910
142 |     /// Concord,United States,42695
143 |     /// ";
144 |     ///     let mut rdr = csv::Reader::from_reader(data.as_bytes());
145 |     ///     let mut wtr = io::Cursor::new(vec![]);
146 |     ///     RandomAccessSimple::create(&mut rdr, &mut wtr)?;
147 |     ///
148 |     ///     let idx = RandomAccessSimple::open(wtr)?;
149 |     ///     assert_eq!(idx.len(), 3);
150 |     ///     Ok(())
151 |     /// }
152 |     /// ```
153 |     pub fn open(mut rdr: R) -> csv::Result<RandomAccessSimple<R>> {
154 |         rdr.seek(io::SeekFrom::End(-8))?;
155 |         let len = rdr.read_u64::<BigEndian>()?;
156 |         Ok(RandomAccessSimple { rdr: rdr, len: len })
157 |     }
158 | 
159 |     /// Get the position of the record at index `i`.
160 |     ///
161 |     /// The first record has index `0`.
162 |     ///
163 |     /// If the position returned is used to seek the CSV reader that was used
164 |     /// to create this index, then the next record read by the CSV reader will
165 |     /// be the `i`th record.
166 |     ///
167 |     /// Note that since this index does not store the line number of each
168 |     /// record, the position returned will always have a line number equivalent
169 |     /// to `1`. This in turn will cause the CSV reader to report all subsequent
170 |     /// line numbers incorrectly.
171 |     ///
172 |     /// # Example
173 |     ///
174 |     /// This example shows how to create a simple random access index, open it
175 |     /// and use it to seek a CSV reader to read an arbitrary record.
176 |     ///
177 |     /// ```
178 |     /// use std::error::Error;
179 |     /// use std::io;
180 |     /// use csv_index::RandomAccessSimple;
181 |     ///
182 |     /// # fn main() { example().unwrap(); }
183 |     /// fn example() -> Result<(), Box<dyn Error>> {
184 |     ///     let data = "\
185 |     /// city,country,pop
186 |     /// Boston,United States,4628910
187 |     /// Concord,United States,42695
188 |     /// ";
189 |     ///     // Note that we wrap our CSV data in an io::Cursor, which makes it
190 |     ///     // seekable. If you're opening CSV data from a file, then this is
191 |     ///     // not needed since a `File` is already seekable.
192 |     ///     let mut rdr = csv::Reader::from_reader(io::Cursor::new(data));
193 |     ///     let mut wtr = io::Cursor::new(vec![]);
194 |     ///     RandomAccessSimple::create(&mut rdr, &mut wtr)?;
195 |     ///
196 |     ///     // Open the index we just created, get the position of the last
197 |     ///     // record and seek the CSV reader.
198 |     ///     let mut idx = RandomAccessSimple::open(wtr)?;
199 |     ///     let pos = idx.get(2)?;
200 |     ///     rdr.seek(pos)?;
201 |     ///
202 |     ///     // Read the next record.
203 |     ///     if let Some(result) = rdr.records().next() {
204 |     ///         let record = result?;
205 |     ///         assert_eq!(record, vec!["Concord", "United States", "42695"]);
206 |     ///         Ok(())
207 |     ///     } else {
208 |     ///         Err(From::from("expected at least one record but got none"))
209 |     ///     }
210 |     /// }
211 |     /// ```
212 |     pub fn get(&mut self, i: u64) -> csv::Result<csv::Position> {
213 |         if i >= self.len {
214 |             let msg = format!(
215 |                 "invalid record index {} (there are {} records)",
216 |                 i, self.len
217 |             );
218 |             let err = io::Error::new(io::ErrorKind::Other, msg);
219 |             return Err(csv::Error::from(err));
220 |         }
221 |         self.rdr.seek(io::SeekFrom::Start(i * 8))?;
222 |         let offset = self.rdr.read_u64::<BigEndian>()?;
223 |         let mut pos = csv::Position::new();
224 |         pos.set_byte(offset).set_record(i);
225 |         Ok(pos)
226 |     }
227 | 
228 |     /// Return the number of records (including the header record) in this
229 |     /// index.
230 |     pub fn len(&self) -> u64 {
231 |         self.len
232 |     }
233 | 
234 |     /// Return true if and only if this index has zero records.
235 |     pub fn is_empty(&self) -> bool {
236 |         self.len() == 0
237 |     }
238 | }
239 | 
240 | #[cfg(test)]
241 | mod tests {
242 |     use std::io;
243 | 
244 |     use csv;
245 | 
246 |     use super::RandomAccessSimple;
247 | 
248 |     struct Indexed<'a> {
249 |         csv: csv::Reader<io::Cursor<&'a str>>,
250 |         idx: RandomAccessSimple<io::Cursor<Vec<u8>>>,
251 |     }
252 | 
253 |     impl<'a> Indexed<'a> {
254 |         fn new(headers: bool, csv_data: &'a str) -> Indexed<'a> {
255 |             let mut rdr = csv::ReaderBuilder::new()
256 |                 .has_headers(headers)
257 |                 .from_reader(io::Cursor::new(csv_data));
258 |             let mut idxbuf = io::Cursor::new(vec![]);
259 |             RandomAccessSimple::create(&mut rdr, &mut idxbuf).unwrap();
260 |             Indexed {
261 |                 csv: rdr,
262 |                 idx: RandomAccessSimple::open(idxbuf).unwrap(),
263 |             }
264 |         }
265 | 
266 |         fn read_at(&mut self, record: u64) -> csv::StringRecord {
267 |             let pos = self.idx.get(record).unwrap();
268 |             self.csv.seek(pos).unwrap();
269 |             self.csv.records().next().unwrap().unwrap()
270 |         }
271 |     }
272 | 
273 |     #[test]
274 |     fn headers_empty() {
275 |         let idx = Indexed::new(true, "");
276 |         assert_eq!(idx.idx.len(), 0);
277 |     }
278 | 
279 |     #[test]
280 |     fn headers_one_field() {
281 |         let mut idx = Indexed::new(true, "h1\na\nb\nc\n");
282 |         assert_eq!(idx.idx.len(), 4);
283 |         assert_eq!(idx.read_at(0), vec!["h1"]);
284 |         assert_eq!(idx.read_at(1), vec!["a"]);
285 |         assert_eq!(idx.read_at(2), vec!["b"]);
286 |         assert_eq!(idx.read_at(3), vec!["c"]);
287 |     }
288 | 
289 |     #[test]
290 |     fn headers_many_fields() {
291 |         let mut idx = Indexed::new(
292 |             true,
293 |             "\
294 | h1,h2,h3
295 | a,b,c
296 | d,e,f
297 | g,h,i
298 | ",
299 |         );
300 |         assert_eq!(idx.idx.len(), 4);
301 |         assert_eq!(idx.read_at(0), vec!["h1", "h2", "h3"]);
302 |         assert_eq!(idx.read_at(1), vec!["a", "b", "c"]);
303 |         assert_eq!(idx.read_at(2), vec!["d", "e", "f"]);
304 |         assert_eq!(idx.read_at(3), vec!["g", "h", "i"]);
305 |     }
306 | 
307 |     #[test]
308 |     fn no_headers_one_field() {
309 |         let mut idx = Indexed::new(false, "h1\na\nb\nc\n");
310 |         assert_eq!(idx.idx.len(), 4);
311 |         assert_eq!(idx.read_at(0), vec!["h1"]);
312 |         assert_eq!(idx.read_at(1), vec!["a"]);
313 |         assert_eq!(idx.read_at(2), vec!["b"]);
314 |         assert_eq!(idx.read_at(3), vec!["c"]);
315 |     }
316 | 
317 |     #[test]
318 |     fn no_headers_many_fields() {
319 |         let mut idx = Indexed::new(
320 |             false,
321 |             "\
322 | h1,h2,h3
323 | a,b,c
324 | d,e,f
325 | g,h,i
326 | ",
327 |         );
328 |         assert_eq!(idx.idx.len(), 4);
329 |         assert_eq!(idx.read_at(0), vec!["h1", "h2", "h3"]);
330 |         assert_eq!(idx.read_at(1), vec!["a", "b", "c"]);
331 |         assert_eq!(idx.read_at(2), vec!["d", "e", "f"]);
332 |         assert_eq!(idx.read_at(3), vec!["g", "h", "i"]);
333 |     }
334 | 
335 |     #[test]
336 |     fn headers_one_field_newlines() {
337 |         let mut idx = Indexed::new(
338 |             true,
339 |             "
340 | 
341 | 
342 | 
343 | 
344 | h1
345 | 
346 | a
347 | 
348 | 
349 | b
350 | 
351 | 
352 | 
353 | 
354 | 
355 | 
356 | c
357 | 
358 | 
359 | 
360 | 
361 | 
362 | 
363 | ",
364 |         );
365 |         assert_eq!(idx.idx.len(), 4);
366 |         assert_eq!(idx.read_at(0), vec!["h1"]);
367 |         assert_eq!(idx.read_at(1), vec!["a"]);
368 |         assert_eq!(idx.read_at(2), vec!["b"]);
369 |         assert_eq!(idx.read_at(3), vec!["c"]);
370 |     }
371 | }
372 | 


--------------------------------------------------------------------------------
/benches/bench.rs:
--------------------------------------------------------------------------------
  1 | #![feature(test)]
  2 | 
  3 | extern crate test;
  4 | 
  5 | use std::io;
  6 | 
  7 | use serde::{de::DeserializeOwned, Deserialize, Serialize};
  8 | use test::Bencher;
  9 | 
 10 | use csv::{
 11 |     ByteRecord, Reader, ReaderBuilder, StringRecord, Trim, Writer,
 12 |     WriterBuilder,
 13 | };
 14 | 
 15 | static NFL: &str = include_str!("../examples/data/bench/nfl.csv");
 16 | static GAME: &str = include_str!("../examples/data/bench/game.csv");
 17 | static POP: &str = include_str!("../examples/data/bench/worldcitiespop.csv");
 18 | static MBTA: &str =
 19 |     include_str!("../examples/data/bench/gtfs-mbta-stop-times.csv");
 20 | 
 21 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 22 | struct NFLRowOwned {
 23 |     gameid: String,
 24 |     qtr: i32,
 25 |     min: Option<i32>,
 26 |     sec: Option<i32>,
 27 |     off: String,
 28 |     def: String,
 29 |     down: Option<i32>,
 30 |     togo: Option<i32>,
 31 |     ydline: Option<i32>,
 32 |     description: String,
 33 |     offscore: i32,
 34 |     defscore: i32,
 35 |     season: i32,
 36 | }
 37 | 
 38 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 39 | struct NFLRowBorrowed<'a> {
 40 |     gameid: &'a str,
 41 |     qtr: i32,
 42 |     min: Option<i32>,
 43 |     sec: Option<i32>,
 44 |     off: &'a str,
 45 |     def: &'a str,
 46 |     down: Option<i32>,
 47 |     togo: Option<i32>,
 48 |     ydline: Option<i32>,
 49 |     description: &'a str,
 50 |     offscore: i32,
 51 |     defscore: i32,
 52 |     season: i32,
 53 | }
 54 | 
 55 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 56 | struct GAMERowOwned(String, String, String, String, i32, String);
 57 | 
 58 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 59 | struct GAMERowBorrowed<'a>(&'a str, &'a str, &'a str, &'a str, i32, &'a str);
 60 | 
 61 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 62 | #[serde(rename_all = "PascalCase")]
 63 | struct POPRowOwned {
 64 |     country: String,
 65 |     city: String,
 66 |     accent_city: String,
 67 |     region: String,
 68 |     population: Option<i32>,
 69 |     latitude: f64,
 70 |     longitude: f64,
 71 | }
 72 | 
 73 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 74 | #[serde(rename_all = "PascalCase")]
 75 | struct POPRowBorrowed<'a> {
 76 |     country: &'a str,
 77 |     city: &'a str,
 78 |     accent_city: &'a str,
 79 |     region: &'a str,
 80 |     population: Option<i32>,
 81 |     latitude: f64,
 82 |     longitude: f64,
 83 | }
 84 | 
 85 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 86 | struct MBTARowOwned {
 87 |     trip_id: String,
 88 |     arrival_time: String,
 89 |     departure_time: String,
 90 |     stop_id: String,
 91 |     stop_sequence: i32,
 92 |     stop_headsign: String,
 93 |     pickup_type: i32,
 94 |     drop_off_type: i32,
 95 |     timepoint: i32,
 96 | }
 97 | 
 98 | #[derive(Debug, Serialize, Deserialize, PartialEq)]
 99 | struct MBTARowBorrowed<'a> {
100 |     trip_id: &'a str,
101 |     arrival_time: &'a str,
102 |     departure_time: &'a str,
103 |     stop_id: &'a str,
104 |     stop_sequence: i32,
105 |     stop_headsign: &'a str,
106 |     pickup_type: i32,
107 |     drop_off_type: i32,
108 |     timepoint: i32,
109 | }
110 | 
111 | #[derive(Default)]
112 | struct ByteCounter {
113 |     count: usize,
114 | }
115 | impl io::Write for ByteCounter {
116 |     fn write(&mut self, data: &[u8]) -> io::Result<usize> {
117 |         self.count += data.len();
118 |         Ok(data.len())
119 |     }
120 |     fn flush(&mut self) -> io::Result<()> {
121 |         Ok(())
122 |     }
123 | }
124 | 
125 | macro_rules! bench {
126 |     ($name:ident, $data:ident, $counter:ident, $result:expr) => {
127 |         #[bench]
128 |         fn $name(b: &mut Bencher) {
129 |             let data = $data.as_bytes();
130 |             b.bytes = data.len() as u64;
131 |             b.iter(|| {
132 |                 let mut rdr =
133 |                     ReaderBuilder::new().has_headers(false).from_reader(data);
134 |                 assert_eq!($counter(&mut rdr), $result);
135 |             })
136 |         }
137 |     };
138 | }
139 | 
140 | macro_rules! bench_trimmed {
141 |     ($name:ident, $data:ident, $counter:ident, $result:expr) => {
142 |         #[bench]
143 |         fn $name(b: &mut Bencher) {
144 |             let data = $data.as_bytes();
145 |             b.bytes = data.len() as u64;
146 |             b.iter(|| {
147 |                 let mut rdr = ReaderBuilder::new()
148 |                     .has_headers(false)
149 |                     .trim(Trim::All)
150 |                     .from_reader(data);
151 |                 assert_eq!($counter(&mut rdr), $result);
152 |             })
153 |         }
154 |     };
155 | }
156 | 
157 | macro_rules! bench_serde {
158 |     (no_headers,
159 |      $name_de:ident, $name_ser:ident, $data:ident, $counter:ident, $type:ty, $result:expr) => {
160 |         #[bench]
161 |         fn $name_de(b: &mut Bencher) {
162 |             let data = $data.as_bytes();
163 |             b.bytes = data.len() as u64;
164 |             b.iter(|| {
165 |                 let mut rdr =
166 |                     ReaderBuilder::new().has_headers(false).from_reader(data);
167 |                 assert_eq!($counter::<_, $type>(&mut rdr), $result);
168 |             })
169 |         }
170 |         #[bench]
171 |         fn $name_ser(b: &mut Bencher) {
172 |             let data = $data.as_bytes();
173 |             let values = ReaderBuilder::new()
174 |                 .has_headers(false)
175 |                 .from_reader(data)
176 |                 .deserialize()
177 |                 .collect::<Result<Vec<$type>, _>>()
178 |                 .unwrap();
179 | 
180 |             let do_it = || {
181 |                 let mut counter = ByteCounter::default();
182 |                 {
183 |                     let mut wtr = WriterBuilder::new()
184 |                         .has_headers(false)
185 |                         .from_writer(&mut counter);
186 |                     for val in &values {
187 |                         wtr.serialize(val).unwrap();
188 |                     }
189 |                 }
190 |                 counter.count
191 |             };
192 |             b.bytes = do_it() as u64;
193 |             b.iter(do_it)
194 |         }
195 |     };
196 |     ($name_de:ident, $name_ser:ident, $data:ident, $counter:ident, $type:ty, $result:expr) => {
197 |         #[bench]
198 |         fn $name_de(b: &mut Bencher) {
199 |             let data = $data.as_bytes();
200 |             b.bytes = data.len() as u64;
201 |             b.iter(|| {
202 |                 let mut rdr =
203 |                     ReaderBuilder::new().has_headers(true).from_reader(data);
204 |                 assert_eq!($counter::<_, $type>(&mut rdr), $result);
205 |             })
206 |         }
207 |         #[bench]
208 |         fn $name_ser(b: &mut Bencher) {
209 |             let data = $data.as_bytes();
210 |             let values = ReaderBuilder::new()
211 |                 .has_headers(true)
212 |                 .from_reader(data)
213 |                 .deserialize()
214 |                 .collect::<Result<Vec<$type>, _>>()
215 |                 .unwrap();
216 | 
217 |             let do_it = || {
218 |                 let mut counter = ByteCounter::default();
219 |                 {
220 |                     let mut wtr = WriterBuilder::new()
221 |                         .has_headers(true)
222 |                         .from_writer(&mut counter);
223 |                     for val in &values {
224 |                         wtr.serialize(val).unwrap();
225 |                     }
226 |                 }
227 |                 counter.count
228 |             };
229 |             b.bytes = do_it() as u64;
230 |             b.iter(do_it)
231 |         }
232 |     };
233 | }
234 | 
235 | macro_rules! bench_serde_borrowed_bytes {
236 |     ($name:ident, $data:ident, $type:ty, $headers:expr, $result:expr) => {
237 |         #[bench]
238 |         fn $name(b: &mut Bencher) {
239 |             let data = $data.as_bytes();
240 |             b.bytes = data.len() as u64;
241 |             b.iter(|| {
242 |                 let mut rdr = ReaderBuilder::new()
243 |                     .has_headers($headers)
244 |                     .from_reader(data);
245 |                 let mut count = 0;
246 |                 let mut rec = ByteRecord::new();
247 |                 while rdr.read_byte_record(&mut rec).unwrap() {
248 |                     let _: $type = rec.deserialize(None).unwrap();
249 |                     count += 1;
250 |                 }
251 |                 count
252 |             })
253 |         }
254 |     };
255 | }
256 | 
257 | macro_rules! bench_serde_borrowed_str {
258 |     ($name:ident, $data:ident, $type:ty, $headers:expr, $result:expr) => {
259 |         #[bench]
260 |         fn $name(b: &mut Bencher) {
261 |             let data = $data.as_bytes();
262 |             b.bytes = data.len() as u64;
263 |             b.iter(|| {
264 |                 let mut rdr = ReaderBuilder::new()
265 |                     .has_headers($headers)
266 |                     .from_reader(data);
267 |                 let mut count = 0;
268 |                 let mut rec = StringRecord::new();
269 |                 while rdr.read_record(&mut rec).unwrap() {
270 |                     let _: $type = rec.deserialize(None).unwrap();
271 |                     count += 1;
272 |                 }
273 |                 count
274 |             })
275 |         }
276 |     };
277 | }
278 | 
279 | bench_serde!(
280 |     count_nfl_deserialize_owned_bytes,
281 |     count_nfl_serialize_owned_bytes,
282 |     NFL,
283 |     count_deserialize_owned_bytes,
284 |     NFLRowOwned,
285 |     9999
286 | );
287 | bench_serde!(
288 |     count_nfl_deserialize_owned_str,
289 |     count_nfl_serialize_owned_str,
290 |     NFL,
291 |     count_deserialize_owned_str,
292 |     NFLRowOwned,
293 |     9999
294 | );
295 | bench_serde_borrowed_bytes!(
296 |     count_nfl_deserialize_borrowed_bytes,
297 |     NFL,
298 |     NFLRowBorrowed,
299 |     true,
300 |     9999
301 | );
302 | bench_serde_borrowed_str!(
303 |     count_nfl_deserialize_borrowed_str,
304 |     NFL,
305 |     NFLRowBorrowed,
306 |     true,
307 |     9999
308 | );
309 | bench!(count_nfl_iter_bytes, NFL, count_iter_bytes, 130000);
310 | bench_trimmed!(count_nfl_iter_bytes_trimmed, NFL, count_iter_bytes, 130000);
311 | bench!(count_nfl_iter_str, NFL, count_iter_str, 130000);
312 | bench_trimmed!(count_nfl_iter_str_trimmed, NFL, count_iter_str, 130000);
313 | bench!(count_nfl_read_bytes, NFL, count_read_bytes, 130000);
314 | bench!(count_nfl_read_str, NFL, count_read_str, 130000);
315 | bench_serde!(
316 |     no_headers,
317 |     count_game_deserialize_owned_bytes,
318 |     count_game_serialize_owned_bytes,
319 |     GAME,
320 |     count_deserialize_owned_bytes,
321 |     GAMERowOwned,
322 |     100000
323 | );
324 | bench_serde!(
325 |     no_headers,
326 |     count_game_deserialize_owned_str,
327 |     count_game_serialize_owned_str,
328 |     GAME,
329 |     count_deserialize_owned_str,
330 |     GAMERowOwned,
331 |     100000
332 | );
333 | bench_serde_borrowed_bytes!(
334 |     count_game_deserialize_borrowed_bytes,
335 |     GAME,
336 |     GAMERowBorrowed,
337 |     true,
338 |     100000
339 | );
340 | bench_serde_borrowed_str!(
341 |     count_game_deserialize_borrowed_str,
342 |     GAME,
343 |     GAMERowBorrowed,
344 |     true,
345 |     100000
346 | );
347 | bench!(count_game_iter_bytes, GAME, count_iter_bytes, 600000);
348 | bench!(count_game_iter_str, GAME, count_iter_str, 600000);
349 | bench!(count_game_read_bytes, GAME, count_read_bytes, 600000);
350 | bench!(count_game_read_str, GAME, count_read_str, 600000);
351 | bench_serde!(
352 |     count_pop_deserialize_owned_bytes,
353 |     count_pop_serialize_owned_bytes,
354 |     POP,
355 |     count_deserialize_owned_bytes,
356 |     POPRowOwned,
357 |     20000
358 | );
359 | bench_serde!(
360 |     count_pop_deserialize_owned_str,
361 |     count_pop_serialize_owned_str,
362 |     POP,
363 |     count_deserialize_owned_str,
364 |     POPRowOwned,
365 |     20000
366 | );
367 | bench_serde_borrowed_bytes!(
368 |     count_pop_deserialize_borrowed_bytes,
369 |     POP,
370 |     POPRowBorrowed,
371 |     true,
372 |     20000
373 | );
374 | bench_serde_borrowed_str!(
375 |     count_pop_deserialize_borrowed_str,
376 |     POP,
377 |     POPRowBorrowed,
378 |     true,
379 |     20000
380 | );
381 | bench!(count_pop_iter_bytes, POP, count_iter_bytes, 140007);
382 | bench!(count_pop_iter_str, POP, count_iter_str, 140007);
383 | bench!(count_pop_read_bytes, POP, count_read_bytes, 140007);
384 | bench!(count_pop_read_str, POP, count_read_str, 140007);
385 | bench_serde!(
386 |     count_mbta_deserialize_owned_bytes,
387 |     count_mbta_serialize_owned_bytes,
388 |     MBTA,
389 |     count_deserialize_owned_bytes,
390 |     MBTARowOwned,
391 |     9999
392 | );
393 | bench_serde!(
394 |     count_mbta_deserialize_owned_str,
395 |     count_mbta_serialize_owned_str,
396 |     MBTA,
397 |     count_deserialize_owned_str,
398 |     MBTARowOwned,
399 |     9999
400 | );
401 | bench_serde_borrowed_bytes!(
402 |     count_mbta_deserialize_borrowed_bytes,
403 |     MBTA,
404 |     MBTARowBorrowed,
405 |     true,
406 |     9999
407 | );
408 | bench_serde_borrowed_str!(
409 |     count_mbta_deserialize_borrowed_str,
410 |     MBTA,
411 |     MBTARowBorrowed,
412 |     true,
413 |     9999
414 | );
415 | bench!(count_mbta_iter_bytes, MBTA, count_iter_bytes, 90000);
416 | bench!(count_mbta_iter_str, MBTA, count_iter_str, 90000);
417 | bench!(count_mbta_read_bytes, MBTA, count_read_bytes, 90000);
418 | bench!(count_mbta_read_str, MBTA, count_read_str, 90000);
419 | 
420 | macro_rules! bench_write {
421 |     ($name:ident, $data:ident) => {
422 |         #[bench]
423 |         fn $name(b: &mut Bencher) {
424 |             let data = $data.as_bytes();
425 |             b.bytes = data.len() as u64;
426 |             let records = collect_records(data);
427 | 
428 |             b.iter(|| {
429 |                 let mut wtr = Writer::from_writer(vec![]);
430 |                 for r in &records {
431 |                     wtr.write_record(r).unwrap();
432 |                 }
433 |                 assert!(wtr.flush().is_ok());
434 |             })
435 |         }
436 |     };
437 | }
438 | 
439 | macro_rules! bench_write_bytes {
440 |     ($name:ident, $data:ident) => {
441 |         #[bench]
442 |         fn $name(b: &mut Bencher) {
443 |             let data = $data.as_bytes();
444 |             b.bytes = data.len() as u64;
445 |             let records = collect_records(data);
446 | 
447 |             b.iter(|| {
448 |                 let mut wtr = Writer::from_writer(vec![]);
449 |                 for r in &records {
450 |                     wtr.write_byte_record(r).unwrap();
451 |                 }
452 |                 assert!(wtr.flush().is_ok());
453 |             })
454 |         }
455 |     };
456 | }
457 | 
458 | bench_write!(write_nfl_record, NFL);
459 | bench_write_bytes!(write_nfl_bytes, NFL);
460 | 
461 | fn count_deserialize_owned_bytes<R, D>(rdr: &mut Reader<R>) -> u64
462 | where
463 |     R: io::Read,
464 |     D: DeserializeOwned,
465 | {
466 |     let mut count = 0;
467 |     let mut rec = ByteRecord::new();
468 |     while rdr.read_byte_record(&mut rec).unwrap() {
469 |         let _: D = rec.deserialize(None).unwrap();
470 |         count += 1;
471 |     }
472 |     count
473 | }
474 | 
475 | fn count_deserialize_owned_str<R, D>(rdr: &mut Reader<R>) -> u64
476 | where
477 |     R: io::Read,
478 |     D: DeserializeOwned,
479 | {
480 |     let mut count = 0;
481 |     for rec in rdr.deserialize::<D>() {
482 |         let _ = rec.unwrap();
483 |         count += 1;
484 |     }
485 |     count
486 | }
487 | 
488 | fn count_iter_bytes<R: io::Read>(rdr: &mut Reader<R>) -> u64 {
489 |     let mut count = 0;
490 |     for rec in rdr.byte_records() {
491 |         count += rec.unwrap().len() as u64;
492 |     }
493 |     count
494 | }
495 | 
496 | fn count_iter_str<R: io::Read>(rdr: &mut Reader<R>) -> u64 {
497 |     let mut count = 0;
498 |     for rec in rdr.records() {
499 |         count += rec.unwrap().len() as u64;
500 |     }
501 |     count
502 | }
503 | 
504 | fn count_read_bytes<R: io::Read>(rdr: &mut Reader<R>) -> u64 {
505 |     let mut count = 0;
506 |     let mut rec = ByteRecord::new();
507 |     while rdr.read_byte_record(&mut rec).unwrap() {
508 |         count += rec.len() as u64;
509 |     }
510 |     count
511 | }
512 | 
513 | fn count_read_str<R: io::Read>(rdr: &mut Reader<R>) -> u64 {
514 |     let mut count = 0;
515 |     let mut rec = StringRecord::new();
516 |     while rdr.read_record(&mut rec).unwrap() {
517 |         count += rec.len() as u64;
518 |     }
519 |     count
520 | }
521 | 
522 | fn collect_records(data: &[u8]) -> Vec<ByteRecord> {
523 |     let mut rdr = ReaderBuilder::new().has_headers(false).from_reader(data);
524 |     rdr.byte_records().collect::<Result<Vec<_>, _>>().unwrap()
525 | }
526 | 


--------------------------------------------------------------------------------
/tests/tests.rs:
--------------------------------------------------------------------------------
  1 | #![allow(dead_code)]
  2 | 
  3 | use csv::Reader;
  4 | 
  5 | use std::env;
  6 | use std::io::{self, Read, Write};
  7 | use std::path::PathBuf;
  8 | use std::process::{self, Command};
  9 | 
 10 | static STRANGE: &str = include_str!("../examples/data/strange.csv");
 11 | static USPOP: &str = include_str!("../examples/data/uspop.csv");
 12 | static USPOP_NULL: &str = include_str!("../examples/data/uspop-null.csv");
 13 | static USPOP_LATIN1: &[u8] =
 14 |     include_bytes!("../examples/data/uspop-latin1.csv");
 15 | static WORLDPOP: &str =
 16 |     include_str!("../examples/data/bench/worldcitiespop.csv");
 17 | static SMALLPOP: &str = include_str!("../examples/data/smallpop.csv");
 18 | static SMALLPOP_COLON: &str =
 19 |     include_str!("../examples/data/smallpop-colon.csv");
 20 | static SMALLPOP_NO_HEADERS: &str =
 21 |     include_str!("../examples/data/smallpop-no-headers.csv");
 22 | 
 23 | #[test]
 24 | fn cookbook_read_basic() {
 25 |     let mut cmd = cmd_for_example("cookbook-read-basic");
 26 |     let out = cmd_output_with(&mut cmd, SMALLPOP.as_bytes());
 27 |     assert_eq!(out.stdout().lines().count(), 10);
 28 | }
 29 | 
 30 | #[test]
 31 | fn cookbook_read_serde() {
 32 |     let mut cmd = cmd_for_example("cookbook-read-serde");
 33 |     let out = cmd_output_with(&mut cmd, SMALLPOP.as_bytes());
 34 |     assert_eq!(out.stdout().lines().count(), 10);
 35 | }
 36 | 
 37 | #[test]
 38 | fn cookbook_read_colon() {
 39 |     let mut cmd = cmd_for_example("cookbook-read-colon");
 40 |     let out = cmd_output_with(&mut cmd, SMALLPOP_COLON.as_bytes());
 41 |     assert_eq!(out.stdout().lines().count(), 10);
 42 | }
 43 | 
 44 | #[test]
 45 | fn cookbook_read_no_headers() {
 46 |     let mut cmd = cmd_for_example("cookbook-read-no-headers");
 47 |     let out = cmd_output_with(&mut cmd, SMALLPOP_NO_HEADERS.as_bytes());
 48 |     assert_eq!(out.stdout().lines().count(), 10);
 49 | }
 50 | 
 51 | #[test]
 52 | fn cookbook_write_basic() {
 53 |     let mut cmd = cmd_for_example("cookbook-write-basic");
 54 |     let out = cmd_output(&mut cmd);
 55 |     assert_eq!(out.stdout().lines().count(), 3);
 56 | }
 57 | 
 58 | #[test]
 59 | fn cookbook_write_serde() {
 60 |     let mut cmd = cmd_for_example("cookbook-write-serde");
 61 |     let out = cmd_output(&mut cmd);
 62 |     assert_eq!(out.stdout().lines().count(), 3);
 63 | }
 64 | 
 65 | #[test]
 66 | fn tutorial_setup_01() {
 67 |     let mut cmd = cmd_for_example("tutorial-setup-01");
 68 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
 69 |     assert_eq!(out.stdout().lines().count(), 100);
 70 | }
 71 | 
 72 | #[test]
 73 | fn tutorial_error_01() {
 74 |     let mut cmd = cmd_for_example("tutorial-error-01");
 75 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
 76 |     assert_eq!(out.stdout().lines().count(), 100);
 77 | }
 78 | 
 79 | #[test]
 80 | fn tutorial_error_01_errored() {
 81 |     let data = "\
 82 | header1,header2
 83 | foo,bar
 84 | quux,baz,foobar
 85 | ";
 86 |     let mut cmd = cmd_for_example("tutorial-error-01");
 87 |     let out = cmd_output_with(&mut cmd, data.as_bytes());
 88 |     assert!(out.stderr().contains("thread 'main' "));
 89 |     assert!(out.stderr().contains(" panicked"));
 90 | }
 91 | 
 92 | #[test]
 93 | fn tutorial_error_02() {
 94 |     let mut cmd = cmd_for_example("tutorial-error-02");
 95 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
 96 |     assert_eq!(out.stdout().lines().count(), 100);
 97 | }
 98 | 
 99 | #[test]
100 | fn tutorial_error_02_errored() {
101 |     let data = "\
102 | header1,header2
103 | foo,bar
104 | quux,baz,foobar
105 | ";
106 |     let mut cmd = cmd_for_example("tutorial-error-02");
107 |     let out = cmd_output_with(&mut cmd, data.as_bytes());
108 |     assert!(out.stdout_failed().contains("error reading CSV from <stdin>"));
109 | }
110 | 
111 | #[test]
112 | fn tutorial_error_03() {
113 |     let mut cmd = cmd_for_example("tutorial-error-03");
114 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
115 |     assert_eq!(out.stdout().lines().count(), 100);
116 | }
117 | 
118 | #[test]
119 | fn tutorial_error_03_errored() {
120 |     let data = "\
121 | header1,header2
122 | foo,bar
123 | quux,baz,foobar
124 | ";
125 |     let mut cmd = cmd_for_example("tutorial-error-03");
126 |     let out = cmd_output_with(&mut cmd, data.as_bytes());
127 |     assert!(out.stdout_failed().contains("CSV error:"));
128 | }
129 | 
130 | #[test]
131 | fn tutorial_error_04() {
132 |     let mut cmd = cmd_for_example("tutorial-error-04");
133 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
134 |     assert_eq!(out.stdout().lines().count(), 100);
135 | }
136 | 
137 | #[test]
138 | fn tutorial_error_04_errored() {
139 |     let data = "\
140 | header1,header2
141 | foo,bar
142 | quux,baz,foobar
143 | ";
144 |     let mut cmd = cmd_for_example("tutorial-error-04");
145 |     let out = cmd_output_with(&mut cmd, data.as_bytes());
146 |     assert!(out.stdout_failed().contains("CSV error:"));
147 | }
148 | 
149 | #[test]
150 | fn tutorial_read_01() {
151 |     let mut cmd = cmd_for_example("tutorial-read-01");
152 |     cmd.arg(data_dir().join("uspop.csv"));
153 |     let out = cmd_output(&mut cmd);
154 |     assert_eq!(out.stdout().lines().count(), 100);
155 | }
156 | 
157 | #[test]
158 | fn tutorial_read_headers_01() {
159 |     let mut cmd = cmd_for_example("tutorial-read-headers-01");
160 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
161 |     assert_eq!(out.stdout().lines().count(), 101);
162 | }
163 | 
164 | #[test]
165 | fn tutorial_read_headers_02() {
166 |     let mut cmd = cmd_for_example("tutorial-read-headers-02");
167 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
168 |     assert_eq!(out.stdout().lines().count(), 102);
169 | }
170 | 
171 | #[test]
172 | fn tutorial_read_delimiter_01() {
173 |     let mut cmd = cmd_for_example("tutorial-read-delimiter-01");
174 |     let out = cmd_output_with(&mut cmd, STRANGE.as_bytes());
175 |     assert_eq!(out.stdout().lines().count(), 6);
176 | }
177 | 
178 | #[test]
179 | fn tutorial_read_serde_01() {
180 |     let mut cmd = cmd_for_example("tutorial-read-serde-01");
181 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
182 |     assert_eq!(out.stdout().lines().count(), 100);
183 |     assert!(out.stdout().lines().all(|x| x.contains("pop:")));
184 | }
185 | 
186 | #[test]
187 | fn tutorial_read_serde_02() {
188 |     let mut cmd = cmd_for_example("tutorial-read-serde-02");
189 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
190 |     assert_eq!(out.stdout().lines().count(), 100);
191 |     assert!(out.stdout().lines().all(|x| x.starts_with("(")));
192 | }
193 | 
194 | #[test]
195 | fn tutorial_read_serde_03() {
196 |     let mut cmd = cmd_for_example("tutorial-read-serde-03");
197 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
198 |     assert_eq!(out.stdout().lines().count(), 100);
199 |     assert!(out.stdout().lines().all(|x| x.contains("\"City\":")));
200 | }
201 | 
202 | #[test]
203 | fn tutorial_read_serde_04() {
204 |     let mut cmd = cmd_for_example("tutorial-read-serde-04");
205 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
206 |     assert_eq!(out.stdout().lines().count(), 100);
207 |     assert!(out.stdout().lines().all(|x| x.starts_with("Record { latitude:")));
208 | }
209 | 
210 | #[test]
211 | fn tutorial_read_serde_05_invalid() {
212 |     let mut cmd = cmd_for_example("tutorial-read-serde-invalid-01");
213 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
214 |     assert_eq!(out.stdout().lines().count(), 100);
215 |     assert!(out.stdout().lines().all(|x| x.starts_with("Record { latitude:")));
216 | }
217 | 
218 | #[test]
219 | fn tutorial_read_serde_05_invalid_errored() {
220 |     let mut cmd = cmd_for_example("tutorial-read-serde-invalid-01");
221 |     let out = cmd_output_with(&mut cmd, USPOP_NULL.as_bytes());
222 |     assert!(out.stdout_failed().contains("CSV deserialize error:"));
223 | }
224 | 
225 | #[test]
226 | fn tutorial_read_serde_invalid_06() {
227 |     let mut cmd = cmd_for_example("tutorial-read-serde-invalid-02");
228 |     let out = cmd_output_with(&mut cmd, USPOP_NULL.as_bytes());
229 |     assert_eq!(out.stdout().lines().count(), 100);
230 |     assert!(out.stdout().lines().all(|x| x.starts_with("Record { latitude:")));
231 | }
232 | 
233 | #[test]
234 | fn tutorial_write_01() {
235 |     let mut cmd = cmd_for_example("tutorial-write-01");
236 |     let out = cmd_output(&mut cmd);
237 |     assert_eq!(out.stdout().lines().count(), 4);
238 | }
239 | 
240 | #[test]
241 | fn tutorial_write_delimiter_01() {
242 |     let mut cmd = cmd_for_example("tutorial-write-delimiter-01");
243 |     let out = cmd_output(&mut cmd);
244 |     assert_eq!(out.stdout().lines().count(), 4);
245 |     assert!(out.stdout().lines().all(|x| x.contains('\t')));
246 | }
247 | 
248 | #[test]
249 | fn tutorial_write_serde_01() {
250 |     let mut cmd = cmd_for_example("tutorial-write-serde-01");
251 |     let out = cmd_output(&mut cmd);
252 |     assert_eq!(out.stdout().lines().count(), 4);
253 | }
254 | 
255 | #[test]
256 | fn tutorial_write_serde_02() {
257 |     let mut cmd = cmd_for_example("tutorial-write-serde-02");
258 |     let out = cmd_output(&mut cmd);
259 |     assert_eq!(out.stdout().lines().count(), 4);
260 | }
261 | 
262 | #[test]
263 | fn tutorial_pipeline_search_01() {
264 |     let mut cmd = cmd_for_example("tutorial-pipeline-search-01");
265 |     cmd.arg("MA");
266 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
267 |     assert_eq!(out.stdout().lines().count(), 2);
268 | }
269 | 
270 | #[test]
271 | fn tutorial_pipeline_search_01_errored() {
272 |     let mut cmd = cmd_for_example("tutorial-pipeline-search-01");
273 |     cmd.arg("MA");
274 |     let out = cmd_output_with(&mut cmd, USPOP_LATIN1);
275 |     assert!(out.stdout_failed().contains("invalid utf-8"));
276 | }
277 | 
278 | #[test]
279 | fn tutorial_pipeline_search_02() {
280 |     let mut cmd = cmd_for_example("tutorial-pipeline-search-02");
281 |     cmd.arg("MA");
282 |     let out = cmd_output_with(&mut cmd, USPOP_LATIN1);
283 |     assert_eq!(out.stdout().lines().count(), 2);
284 | }
285 | 
286 | #[test]
287 | fn tutorial_pipeline_pop_01() {
288 |     let mut cmd = cmd_for_example("tutorial-pipeline-pop-01");
289 |     cmd.arg("100000");
290 |     let out = cmd_output_with(&mut cmd, USPOP.as_bytes());
291 |     assert_eq!(out.stdout().lines().count(), 4);
292 | }
293 | 
294 | #[test]
295 | fn tutorial_perf_alloc_01() {
296 |     let mut cmd = cmd_for_example("tutorial-perf-alloc-01");
297 |     let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes());
298 |     assert_eq!(out.stdout(), "11\n");
299 | }
300 | 
301 | #[test]
302 | fn tutorial_perf_alloc_02() {
303 |     let mut cmd = cmd_for_example("tutorial-perf-alloc-02");
304 |     let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes());
305 |     assert_eq!(out.stdout(), "11\n");
306 | }
307 | 
308 | #[test]
309 | fn tutorial_perf_alloc_03() {
310 |     let mut cmd = cmd_for_example("tutorial-perf-alloc-03");
311 |     let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes());
312 |     assert_eq!(out.stdout(), "11\n");
313 | }
314 | 
315 | #[test]
316 | fn tutorial_perf_serde_01() {
317 |     let mut cmd = cmd_for_example("tutorial-perf-serde-01");
318 |     let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes());
319 |     assert_eq!(out.stdout(), "11\n");
320 | }
321 | 
322 | #[test]
323 | fn tutorial_perf_serde_02() {
324 |     let mut cmd = cmd_for_example("tutorial-perf-serde-02");
325 |     let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes());
326 |     assert_eq!(out.stdout(), "11\n");
327 | }
328 | 
329 | #[test]
330 | fn tutorial_perf_serde_03() {
331 |     let mut cmd = cmd_for_example("tutorial-perf-serde-03");
332 |     let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes());
333 |     assert_eq!(out.stdout(), "11\n");
334 | }
335 | 
336 | #[test]
337 | fn tutorial_perf_core_01() {
338 |     let mut cmd = cmd_for_example("tutorial-perf-core-01");
339 |     let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes());
340 |     assert_eq!(out.stdout(), "11\n");
341 | }
342 | 
343 | #[test]
344 | fn no_infinite_loop_on_io_errors() {
345 |     struct FailingRead;
346 |     impl Read for FailingRead {
347 |         fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
348 |             Err(io::Error::new(io::ErrorKind::Other, "Broken reader"))
349 |         }
350 |     }
351 | 
352 |     let mut record_results = Reader::from_reader(FailingRead).into_records();
353 |     let first_result = record_results.next();
354 |     assert!(
355 |         matches!(&first_result, Some(Err(e)) if matches!(e.kind(), csv::ErrorKind::Io(_)))
356 |     );
357 |     assert!(record_results.next().is_none());
358 | }
359 | 
360 | // Helper functions follow.
361 | 
362 | /// Return the target/debug directory path.
363 | fn debug_dir() -> PathBuf {
364 |     env::current_exe()
365 |         .expect("test binary path")
366 |         .parent()
367 |         .expect("test binary directory")
368 |         .parent()
369 |         .expect("example binary directory")
370 |         .to_path_buf()
371 | }
372 | 
373 | /// Return the directory containing the example test binaries.
374 | fn example_bin_dir() -> PathBuf {
375 |     debug_dir().join("examples")
376 | }
377 | 
378 | /// Return the repo root directory path.
379 | fn repo_dir() -> PathBuf {
380 |     PathBuf::from(env!("CARGO_MANIFEST_DIR"))
381 | }
382 | 
383 | /// Return the directory containing the example data.
384 | fn data_dir() -> PathBuf {
385 |     repo_dir().join("examples").join("data")
386 | }
387 | 
388 | /// Return a command ready to execute the given example test binary.
389 | ///
390 | /// The command's current directory is set to the repo root.
391 | fn cmd_for_example(name: &str) -> Command {
392 |     let mut cmd = Command::new(example_bin_dir().join(name));
393 |     cmd.current_dir(repo_dir());
394 |     cmd
395 | }
396 | 
397 | /// Return the (stdout, stderr) of running the command as a string.
398 | ///
399 | /// If the command has a non-zero exit code, then this function panics.
400 | fn cmd_output(cmd: &mut Command) -> Output {
401 |     cmd.stdout(process::Stdio::piped());
402 |     cmd.stderr(process::Stdio::piped());
403 |     let child = cmd.spawn().expect("command spawns successfully");
404 |     Output::new(cmd, child)
405 | }
406 | 
407 | /// Like cmd_output, but sends the given data as stdin to the given child.
408 | fn cmd_output_with(cmd: &mut Command, data: &[u8]) -> Output {
409 |     cmd.stdin(process::Stdio::piped());
410 |     cmd.stdout(process::Stdio::piped());
411 |     cmd.stderr(process::Stdio::piped());
412 |     let mut child = cmd.spawn().expect("command spawns successfully");
413 |     {
414 |         let stdin = child.stdin.as_mut().expect("failed to get stdin");
415 |         stdin.write_all(data).expect("failed to write to stdin");
416 |     }
417 |     Output::new(cmd, child)
418 | }
419 | 
420 | struct Output {
421 |     stdout: String,
422 |     stderr: String,
423 |     command: String,
424 |     status: process::ExitStatus,
425 | }
426 | 
427 | impl Output {
428 |     /// Return the (stdout, stderr) of running the given child as a string.
429 |     ///
430 |     /// If the command has a non-zero exit code, then this function panics.
431 |     fn new(cmd: &mut Command, child: process::Child) -> Output {
432 |         let out = child.wait_with_output().expect("command runs successfully");
433 |         let stdout =
434 |             String::from_utf8(out.stdout).expect("valid utf-8 (stdout)");
435 |         let stderr =
436 |             String::from_utf8(out.stderr).expect("valid utf-8 (stderr)");
437 |         Output {
438 |             stdout,
439 |             stderr,
440 |             command: format!("{:?}", cmd),
441 |             status: out.status,
442 |         }
443 |     }
444 | 
445 |     fn stdout(&self) -> &str {
446 |         if !self.status.success() {
447 |             panic!(
448 |                 "\n\n==== {:?} ====\n\
449 |                  command failed but expected success!\
450 |                  \n\ncwd: {}\
451 |                  \n\nstatus: {}\
452 |                  \n\nstdout: {}\
453 |                  \n\nstderr: {}\
454 |                  \n\n=====\n",
455 |                 self.command,
456 |                 repo_dir().display(),
457 |                 self.status,
458 |                 self.stdout,
459 |                 self.stderr
460 |             );
461 |         }
462 |         &self.stdout
463 |     }
464 | 
465 |     fn stdout_failed(&self) -> &str {
466 |         if self.status.success() {
467 |             panic!(
468 |                 "\n\n==== {:?} ====\n\
469 |                  command succeeded but expected failure!\
470 |                  \n\ncwd: {}\
471 |                  \n\nstatus: {}\
472 |                  \n\nstdout: {}\
473 |                  \n\nstderr: {}\
474 |                  \n\n=====\n",
475 |                 self.command,
476 |                 repo_dir().display(),
477 |                 self.status,
478 |                 self.stdout,
479 |                 self.stderr
480 |             );
481 |         }
482 |         &self.stdout
483 |     }
484 | 
485 |     fn stderr(&self) -> &str {
486 |         if self.status.success() {
487 |             panic!(
488 |                 "\n\n==== {:?} ====\n\
489 |                  command succeeded but expected failure!\
490 |                  \n\ncwd: {}\
491 |                  \n\nstatus: {}\
492 |                  \n\nstdout: {}\
493 |                  \n\nstderr: {}\
494 |                  \n\n=====\n",
495 |                 self.command,
496 |                 repo_dir().display(),
497 |                 self.status,
498 |                 self.stdout,
499 |                 self.stderr
500 |             );
501 |         }
502 |         &self.stderr
503 |     }
504 | }
505 | 
506 | /// Consume the reader given into a string.
507 | fn read_to_string<R: io::Read>(mut rdr: R) -> String {
508 |     let mut s = String::new();
509 |     rdr.read_to_string(&mut s).unwrap();
510 |     s
511 | }
512 | 


--------------------------------------------------------------------------------
/src/string_record.rs:
--------------------------------------------------------------------------------
  1 | use std::{
  2 |     fmt, io,
  3 |     iter::FromIterator,
  4 |     ops::{self, Range},
  5 |     result, str,
  6 | };
  7 | 
  8 | use serde_core::de::Deserialize;
  9 | 
 10 | use crate::{
 11 |     byte_record::{ByteRecord, ByteRecordIter, Position},
 12 |     deserializer::deserialize_string_record,
 13 |     error::{Error, ErrorKind, FromUtf8Error, Result},
 14 |     reader::Reader,
 15 | };
 16 | 
 17 | /// A single CSV record stored as valid UTF-8 bytes.
 18 | ///
 19 | /// A string record permits reading or writing CSV rows that are valid UTF-8.
 20 | /// If string records are used to read CSV data that is not valid UTF-8, then
 21 | /// the CSV reader will return an invalid UTF-8 error. If you do need to read
 22 | /// possibly invalid UTF-8 data, then you should prefer using a
 23 | /// [`ByteRecord`](struct.ByteRecord.html),
 24 | /// since it makes no assumptions about UTF-8.
 25 | ///
 26 | /// If you are using the Serde (de)serialization APIs, then you probably never
 27 | /// need to interact with a `ByteRecord` or a `StringRecord`. However, there
 28 | /// are some circumstances in which you might need to use a raw record type
 29 | /// while still using Serde. For example, if you need to deserialize possibly
 30 | /// invalid UTF-8 fields, then you'll need to first read your record into a
 31 | /// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another
 32 | /// reason for using the raw record deserialization APIs is if you're using
 33 | /// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`.
 34 | ///
 35 | /// Two `StringRecord`s are compared on the basis of their field data. Any
 36 | /// position information associated with the records is ignored.
 37 | #[derive(Clone, Eq)]
 38 | pub struct StringRecord(ByteRecord);
 39 | 
 40 | impl PartialEq for StringRecord {
 41 |     fn eq(&self, other: &StringRecord) -> bool {
 42 |         self.0.iter_eq(&other.0)
 43 |     }
 44 | }
 45 | 
 46 | impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for StringRecord {
 47 |     fn eq(&self, other: &Vec<T>) -> bool {
 48 |         self.0.iter_eq(other)
 49 |     }
 50 | }
 51 | 
 52 | impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for &StringRecord {
 53 |     fn eq(&self, other: &Vec<T>) -> bool {
 54 |         self.0.iter_eq(other)
 55 |     }
 56 | }
 57 | 
 58 | impl<T: AsRef<[u8]>> PartialEq<[T]> for StringRecord {
 59 |     fn eq(&self, other: &[T]) -> bool {
 60 |         self.0.iter_eq(other)
 61 |     }
 62 | }
 63 | 
 64 | impl<T: AsRef<[u8]>> PartialEq<[T]> for &StringRecord {
 65 |     fn eq(&self, other: &[T]) -> bool {
 66 |         self.0.iter_eq(other)
 67 |     }
 68 | }
 69 | 
 70 | impl fmt::Debug for StringRecord {
 71 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 72 |         let fields: Vec<&str> = self.iter().collect();
 73 |         write!(f, "StringRecord({:?})", fields)
 74 |     }
 75 | }
 76 | 
 77 | impl Default for StringRecord {
 78 |     #[inline]
 79 |     fn default() -> StringRecord {
 80 |         StringRecord::new()
 81 |     }
 82 | }
 83 | 
 84 | impl StringRecord {
 85 |     /// Create a new empty `StringRecord`.
 86 |     ///
 87 |     /// Note that you may find the `StringRecord::from` constructor more
 88 |     /// convenient, which is provided by an impl on the `From` trait.
 89 |     ///
 90 |     /// # Example: create an empty record
 91 |     ///
 92 |     /// ```
 93 |     /// use csv::StringRecord;
 94 |     ///
 95 |     /// let record = StringRecord::new();
 96 |     /// assert_eq!(record.len(), 0);
 97 |     /// ```
 98 |     ///
 99 |     /// # Example: initialize a record from a `Vec`
100 |     ///
101 |     /// ```
102 |     /// use csv::StringRecord;
103 |     ///
104 |     /// let record = StringRecord::from(vec!["a", "b", "c"]);
105 |     /// assert_eq!(record.len(), 3);
106 |     /// ```
107 |     #[inline]
108 |     pub fn new() -> StringRecord {
109 |         StringRecord(ByteRecord::new())
110 |     }
111 | 
112 |     /// Create a new empty `StringRecord` with the given capacity.
113 |     ///
114 |     /// `buffer` refers to the capacity of the buffer used to store the
115 |     /// actual row contents. `fields` refers to the number of fields one
116 |     /// might expect to store.
117 |     #[inline]
118 |     pub fn with_capacity(buffer: usize, fields: usize) -> StringRecord {
119 |         StringRecord(ByteRecord::with_capacity(buffer, fields))
120 |     }
121 | 
122 |     /// Create a new `StringRecord` from a `ByteRecord`.
123 |     ///
124 |     /// Note that this does UTF-8 validation. If the given `ByteRecord` does
125 |     /// not contain valid UTF-8, then this returns an error. The error includes
126 |     /// the UTF-8 error and the original `ByteRecord`.
127 |     ///
128 |     /// # Example: valid UTF-8
129 |     ///
130 |     /// ```
131 |     /// use std::error::Error;
132 |     /// use csv::{ByteRecord, StringRecord};
133 |     ///
134 |     /// # fn main() { example().unwrap(); }
135 |     /// fn example() -> Result<(), Box<dyn Error>> {
136 |     ///     let byte_record = ByteRecord::from(vec!["a", "b", "c"]);
137 |     ///     let str_record = StringRecord::from_byte_record(byte_record)?;
138 |     ///     assert_eq!(str_record.len(), 3);
139 |     ///     Ok(())
140 |     /// }
141 |     /// ```
142 |     ///
143 |     /// # Example: invalid UTF-8
144 |     ///
145 |     /// ```
146 |     /// use csv::{ByteRecord, StringRecord};
147 |     ///
148 |     /// let byte_record = ByteRecord::from(vec![
149 |     ///     &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..],
150 |     /// ]);
151 |     /// let err = StringRecord::from_byte_record(byte_record).unwrap_err();
152 |     /// assert_eq!(err.utf8_error().field(), 1);
153 |     /// assert_eq!(err.utf8_error().valid_up_to(), 3);
154 |     /// ```
155 |     #[inline]
156 |     pub fn from_byte_record(
157 |         record: ByteRecord,
158 |     ) -> result::Result<StringRecord, FromUtf8Error> {
159 |         match record.validate() {
160 |             Ok(()) => Ok(StringRecord(record)),
161 |             Err(err) => Err(FromUtf8Error::new(record, err)),
162 |         }
163 |     }
164 | 
165 |     /// Lossily create a new `StringRecord` from a `ByteRecord`.
166 |     ///
167 |     /// This is like `StringRecord::from_byte_record`, except all invalid UTF-8
168 |     /// sequences are replaced with the `U+FFFD REPLACEMENT CHARACTER`, which
169 |     /// looks like this: �.
170 |     ///
171 |     /// # Example: valid UTF-8
172 |     ///
173 |     /// ```
174 |     /// use csv::{ByteRecord, StringRecord};
175 |     ///
176 |     /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]);
177 |     /// let str_record = StringRecord::from_byte_record_lossy(byte_record);
178 |     /// assert_eq!(str_record.len(), 3);
179 |     /// ```
180 |     ///
181 |     /// # Example: invalid UTF-8
182 |     ///
183 |     /// ```
184 |     /// use csv::{ByteRecord, StringRecord};
185 |     ///
186 |     /// let byte_record = ByteRecord::from(vec![
187 |     ///     &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..],
188 |     /// ]);
189 |     /// let str_record = StringRecord::from_byte_record_lossy(byte_record);
190 |     /// assert_eq!(&str_record[0], "quux");
191 |     /// assert_eq!(&str_record[1], "foo�bar");
192 |     /// assert_eq!(&str_record[2], "c");
193 |     /// ```
194 |     #[inline]
195 |     pub fn from_byte_record_lossy(record: ByteRecord) -> StringRecord {
196 |         // If the record is valid UTF-8, then take the easy path.
197 |         if let Ok(()) = record.validate() {
198 |             return StringRecord(record);
199 |         }
200 |         // TODO: We can be faster here. Not sure if it's worth it.
201 |         let mut str_record =
202 |             StringRecord::with_capacity(record.as_slice().len(), record.len());
203 |         for field in &record {
204 |             str_record.push_field(&String::from_utf8_lossy(field));
205 |         }
206 |         str_record
207 |     }
208 | 
209 |     /// Deserialize this record.
210 |     ///
211 |     /// The `D` type parameter refers to the type that this record should be
212 |     /// deserialized into. The `'de` lifetime refers to the lifetime of the
213 |     /// `StringRecord`. The `'de` lifetime permits deserializing into structs
214 |     /// that borrow field data from this record.
215 |     ///
216 |     /// An optional `headers` parameter permits deserializing into a struct
217 |     /// based on its field names (corresponding to header values) rather than
218 |     /// the order in which the fields are defined.
219 |     ///
220 |     /// # Example: without headers
221 |     ///
222 |     /// This shows how to deserialize a single row into a struct based on the
223 |     /// order in which fields occur. This example also shows how to borrow
224 |     /// fields from the `StringRecord`, which results in zero allocation
225 |     /// deserialization.
226 |     ///
227 |     /// ```
228 |     /// use std::error::Error;
229 |     ///
230 |     /// use csv::StringRecord;
231 |     ///
232 |     /// #[derive(serde::Deserialize)]
233 |     /// struct Row<'a> {
234 |     ///     city: &'a str,
235 |     ///     country: &'a str,
236 |     ///     population: u64,
237 |     /// }
238 |     ///
239 |     /// # fn main() { example().unwrap() }
240 |     /// fn example() -> Result<(), Box<dyn Error>> {
241 |     ///     let record = StringRecord::from(vec![
242 |     ///         "Boston", "United States", "4628910",
243 |     ///     ]);
244 |     ///
245 |     ///     let row: Row = record.deserialize(None)?;
246 |     ///     assert_eq!(row.city, "Boston");
247 |     ///     assert_eq!(row.country, "United States");
248 |     ///     assert_eq!(row.population, 4628910);
249 |     ///     Ok(())
250 |     /// }
251 |     /// ```
252 |     ///
253 |     /// # Example: with headers
254 |     ///
255 |     /// This example is like the previous one, but shows how to deserialize
256 |     /// into a struct based on the struct's field names. For this to work,
257 |     /// you must provide a header row.
258 |     ///
259 |     /// This example also shows that you can deserialize into owned data
260 |     /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`).
261 |     ///
262 |     /// ```
263 |     /// use std::error::Error;
264 |     ///
265 |     /// use csv::StringRecord;
266 |     ///
267 |     /// #[derive(serde::Deserialize)]
268 |     /// struct Row {
269 |     ///     city: String,
270 |     ///     country: String,
271 |     ///     population: u64,
272 |     /// }
273 |     ///
274 |     /// # fn main() { example().unwrap() }
275 |     /// fn example() -> Result<(), Box<dyn Error>> {
276 |     ///     // Notice that the fields are not in the same order
277 |     ///     // as the fields in the struct!
278 |     ///     let header = StringRecord::from(vec![
279 |     ///         "country", "city", "population",
280 |     ///     ]);
281 |     ///     let record = StringRecord::from(vec![
282 |     ///         "United States", "Boston", "4628910",
283 |     ///     ]);
284 |     ///
285 |     ///     let row: Row = record.deserialize(Some(&header))?;
286 |     ///     assert_eq!(row.city, "Boston");
287 |     ///     assert_eq!(row.country, "United States");
288 |     ///     assert_eq!(row.population, 4628910);
289 |     ///     Ok(())
290 |     /// }
291 |     /// ```
292 |     pub fn deserialize<'de, D: Deserialize<'de>>(
293 |         &'de self,
294 |         headers: Option<&'de StringRecord>,
295 |     ) -> Result<D> {
296 |         deserialize_string_record(self, headers)
297 |     }
298 | 
299 |     /// Returns an iterator over all fields in this record.
300 |     ///
301 |     /// # Example
302 |     ///
303 |     /// This example shows how to iterate over each field in a `StringRecord`.
304 |     ///
305 |     /// ```
306 |     /// use csv::StringRecord;
307 |     ///
308 |     /// let record = StringRecord::from(vec!["a", "b", "c"]);
309 |     /// for field in record.iter() {
310 |     ///     assert!(field == "a" || field == "b" || field == "c");
311 |     /// }
312 |     /// ```
313 |     #[inline]
314 |     pub fn iter(&self) -> StringRecordIter<'_> {
315 |         self.into_iter()
316 |     }
317 | 
318 |     /// Return the field at index `i`.
319 |     ///
320 |     /// If no field at index `i` exists, then this returns `None`.
321 |     ///
322 |     /// # Example
323 |     ///
324 |     /// ```
325 |     /// use csv::StringRecord;
326 |     ///
327 |     /// let record = StringRecord::from(vec!["a", "b", "c"]);
328 |     /// assert_eq!(record.get(1), Some("b"));
329 |     /// assert_eq!(record.get(3), None);
330 |     /// ```
331 |     #[inline]
332 |     pub fn get(&self, i: usize) -> Option<&str> {
333 |         self.0.get(i).map(|bytes| {
334 |             debug_assert!(str::from_utf8(bytes).is_ok());
335 |             // This is safe because we guarantee that all string records
336 |             // have a valid UTF-8 buffer. It's also safe because we
337 |             // individually check each field for valid UTF-8.
338 |             unsafe { str::from_utf8_unchecked(bytes) }
339 |         })
340 |     }
341 | 
342 |     /// Returns true if and only if this record is empty.
343 |     ///
344 |     /// # Example
345 |     ///
346 |     /// ```
347 |     /// use csv::StringRecord;
348 |     ///
349 |     /// assert!(StringRecord::new().is_empty());
350 |     /// ```
351 |     #[inline]
352 |     pub fn is_empty(&self) -> bool {
353 |         self.len() == 0
354 |     }
355 | 
356 |     /// Returns the number of fields in this record.
357 |     ///
358 |     /// # Example
359 |     ///
360 |     /// ```
361 |     /// use csv::StringRecord;
362 |     ///
363 |     /// let record = StringRecord::from(vec!["a", "b", "c"]);
364 |     /// assert_eq!(record.len(), 3);
365 |     /// ```
366 |     #[inline]
367 |     pub fn len(&self) -> usize {
368 |         self.0.len()
369 |     }
370 | 
371 |     /// Truncate this record to `n` fields.
372 |     ///
373 |     /// If `n` is greater than the number of fields in this record, then this
374 |     /// has no effect.
375 |     ///
376 |     /// # Example
377 |     ///
378 |     /// ```
379 |     /// use csv::StringRecord;
380 |     ///
381 |     /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
382 |     /// assert_eq!(record.len(), 3);
383 |     /// record.truncate(1);
384 |     /// assert_eq!(record.len(), 1);
385 |     /// assert_eq!(record, vec!["a"]);
386 |     /// ```
387 |     #[inline]
388 |     pub fn truncate(&mut self, n: usize) {
389 |         self.0.truncate(n);
390 |     }
391 | 
392 |     /// Clear this record so that it has zero fields.
393 |     ///
394 |     /// Note that it is not necessary to clear the record to reuse it with
395 |     /// the CSV reader.
396 |     ///
397 |     /// # Example
398 |     ///
399 |     /// ```
400 |     /// use csv::StringRecord;
401 |     ///
402 |     /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
403 |     /// assert_eq!(record.len(), 3);
404 |     /// record.clear();
405 |     /// assert_eq!(record.len(), 0);
406 |     /// ```
407 |     #[inline]
408 |     pub fn clear(&mut self) {
409 |         self.0.clear();
410 |     }
411 | 
412 |     /// Trim the fields of this record so that leading and trailing whitespace
413 |     /// is removed.
414 |     ///
415 |     /// This method uses the Unicode definition of whitespace.
416 |     ///
417 |     /// # Example
418 |     ///
419 |     /// ```
420 |     /// use csv::StringRecord;
421 |     ///
422 |     /// let mut record = StringRecord::from(vec![
423 |     ///     "  ", "\u{3000}\tfoo ", "bar  ", "b a z",
424 |     /// ]);
425 |     /// record.trim();
426 |     /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]);
427 |     /// ```
428 |     pub fn trim(&mut self) {
429 |         let length = self.len();
430 |         if length == 0 {
431 |             return;
432 |         }
433 |         // TODO: We could likely do this in place, but for now, we allocate.
434 |         let mut trimmed =
435 |             StringRecord::with_capacity(self.as_slice().len(), self.len());
436 |         trimmed.set_position(self.position().cloned());
437 |         for field in &*self {
438 |             trimmed.push_field(field.trim());
439 |         }
440 |         *self = trimmed;
441 |     }
442 | 
443 |     /// Add a new field to this record.
444 |     ///
445 |     /// # Example
446 |     ///
447 |     /// ```
448 |     /// use csv::StringRecord;
449 |     ///
450 |     /// let mut record = StringRecord::new();
451 |     /// record.push_field("foo");
452 |     /// assert_eq!(&record[0], "foo");
453 |     /// ```
454 |     #[inline]
455 |     pub fn push_field(&mut self, field: &str) {
456 |         self.0.push_field(field.as_bytes());
457 |     }
458 | 
459 |     /// Return the position of this record, if available.
460 |     ///
461 |     /// # Example
462 |     ///
463 |     /// ```
464 |     /// use std::error::Error;
465 |     /// use csv::{StringRecord, ReaderBuilder};
466 |     ///
467 |     /// # fn main() { example().unwrap(); }
468 |     /// fn example() -> Result<(), Box<dyn Error>> {
469 |     ///     let mut record = StringRecord::new();
470 |     ///     let mut rdr = ReaderBuilder::new()
471 |     ///         .has_headers(false)
472 |     ///         .from_reader("a,b,c\nx,y,z".as_bytes());
473 |     ///
474 |     ///     assert!(rdr.read_record(&mut record)?);
475 |     ///     {
476 |     ///         let pos = record.position().expect("a record position");
477 |     ///         assert_eq!(pos.byte(), 0);
478 |     ///         assert_eq!(pos.line(), 1);
479 |     ///         assert_eq!(pos.record(), 0);
480 |     ///     }
481 |     ///
482 |     ///     assert!(rdr.read_record(&mut record)?);
483 |     ///     {
484 |     ///         let pos = record.position().expect("a record position");
485 |     ///         assert_eq!(pos.byte(), 6);
486 |     ///         assert_eq!(pos.line(), 2);
487 |     ///         assert_eq!(pos.record(), 1);
488 |     ///     }
489 |     ///
490 |     ///     // Finish the CSV reader for good measure.
491 |     ///     assert!(!rdr.read_record(&mut record)?);
492 |     ///     Ok(())
493 |     /// }
494 |     /// ```
495 |     #[inline]
496 |     pub fn position(&self) -> Option<&Position> {
497 |         self.0.position()
498 |     }
499 | 
500 |     /// Set the position of this record.
501 |     ///
502 |     /// # Example
503 |     ///
504 |     /// ```
505 |     /// use csv::{StringRecord, Position};
506 |     ///
507 |     /// let mut record = StringRecord::from(vec!["a", "b", "c"]);
508 |     /// let mut pos = Position::new();
509 |     /// pos.set_byte(100);
510 |     /// pos.set_line(4);
511 |     /// pos.set_record(2);
512 |     ///
513 |     /// record.set_position(Some(pos.clone()));
514 |     /// assert_eq!(record.position(), Some(&pos));
515 |     /// ```
516 |     #[inline]
517 |     pub fn set_position(&mut self, pos: Option<Position>) {
518 |         self.0.set_position(pos);
519 |     }
520 | 
521 |     /// Return the start and end position of a field in this record.
522 |     ///
523 |     /// If no such field exists at the given index, then return `None`.
524 |     ///
525 |     /// The range returned can be used with the slice returned by `as_slice`.
526 |     /// Namely, the range returned is guaranteed to start and end at valid
527 |     /// UTF-8 sequence boundaries.
528 |     ///
529 |     /// # Example
530 |     ///
531 |     /// ```
532 |     /// use csv::StringRecord;
533 |     ///
534 |     /// let record = StringRecord::from(vec!["foo", "quux", "z"]);
535 |     /// let range = record.range(1).expect("a record range");
536 |     /// assert_eq!(&record.as_slice()[range], "quux");
537 |     /// ```
538 |     #[inline]
539 |     pub fn range(&self, i: usize) -> Option<Range<usize>> {
540 |         self.0.range(i)
541 |     }
542 | 
543 |     /// Return the entire row as a single string slice. The slice returned
544 |     /// stores all fields contiguously. The boundaries of each field can be
545 |     /// determined via the `range` method.
546 |     ///
547 |     /// # Example
548 |     ///
549 |     /// ```
550 |     /// use csv::StringRecord;
551 |     ///
552 |     /// let record = StringRecord::from(vec!["foo", "quux", "z"]);
553 |     /// assert_eq!(record.as_slice(), "fooquuxz");
554 |     /// ```
555 |     #[inline]
556 |     pub fn as_slice(&self) -> &str {
557 |         debug_assert!(str::from_utf8(self.0.as_slice()).is_ok());
558 |         // This is safe because we guarantee that each field is valid UTF-8.
559 |         // If each field is valid UTF-8, then the entire buffer (up to the end
560 |         // of the last field) must also be valid UTF-8.
561 |         unsafe { str::from_utf8_unchecked(self.0.as_slice()) }
562 |     }
563 | 
564 |     /// Return a reference to this record's raw
565 |     /// [`ByteRecord`](struct.ByteRecord.html).
566 |     ///
567 |     /// # Example
568 |     ///
569 |     /// ```
570 |     /// use csv::StringRecord;
571 |     ///
572 |     /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
573 |     /// let byte_record = str_record.as_byte_record();
574 |     /// assert_eq!(&byte_record[2], b"c");
575 |     /// ```
576 |     #[inline]
577 |     pub fn as_byte_record(&self) -> &ByteRecord {
578 |         &self.0
579 |     }
580 | 
581 |     /// Convert this `StringRecord` into a
582 |     /// [`ByteRecord`](struct.ByteRecord.html).
583 |     ///
584 |     /// # Example
585 |     ///
586 |     /// ```
587 |     /// use csv::StringRecord;
588 |     ///
589 |     /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
590 |     /// let byte_record = str_record.into_byte_record();
591 |     /// assert_eq!(&byte_record[2], b"c");
592 |     /// ```
593 |     ///
594 |     /// Note that this can also be achieved using the `From` impl:
595 |     ///
596 |     /// ```
597 |     /// use csv::{ByteRecord, StringRecord};
598 |     ///
599 |     /// // Using ByteRecord::from...
600 |     /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
601 |     /// assert_eq!(ByteRecord::from(str_record).len(), 3);
602 |     ///
603 |     /// // Using StringRecord::into...
604 |     /// let str_record = StringRecord::from(vec!["a", "b", "c"]);
605 |     /// let byte_record: ByteRecord = str_record.into();
606 |     /// assert_eq!(byte_record.len(), 3);
607 |     /// ```
608 |     #[inline]
609 |     pub fn into_byte_record(self) -> ByteRecord {
610 |         self.0
611 |     }
612 | 
613 |     /// Clone this record, but only copy `fields` up to the end of bounds. This
614 |     /// is useful when one wants to copy a record, but not necessarily any
615 |     /// excess capacity in that record.
616 |     #[inline]
617 |     pub(crate) fn clone_truncated(&self) -> StringRecord {
618 |         StringRecord(self.0.clone_truncated())
619 |     }
620 | 
621 |     /// A safe function for reading CSV data into a `StringRecord`.
622 |     ///
623 |     /// This relies on the internal representation of `StringRecord`.
624 |     #[inline(always)]
625 |     pub(crate) fn read<R: io::Read>(
626 |         &mut self,
627 |         rdr: &mut Reader<R>,
628 |     ) -> Result<bool> {
629 |         // SAFETY: This code is critical to upholding the safety of other code
630 |         // blocks in this module. Namely, after calling `read_byte_record`,
631 |         // it is possible for `record` to contain invalid UTF-8. We check for
632 |         // this in the `validate` method, and if it does have invalid UTF-8, we
633 |         // clear the record. (It is bad for `record` to contain invalid UTF-8
634 |         // because other accessor methods, like `get`, assume that every field
635 |         // is valid UTF-8.)
636 |         let pos = rdr.position().clone();
637 |         let read_res = rdr.read_byte_record(&mut self.0);
638 |         let utf8_res = match self.0.validate() {
639 |             Ok(()) => Ok(()),
640 |             Err(err) => {
641 |                 // If this record isn't valid UTF-8, then completely wipe it.
642 |                 self.0.clear();
643 |                 Err(err)
644 |             }
645 |         };
646 |         match (read_res, utf8_res) {
647 |             (Err(err), _) => Err(err),
648 |             (Ok(_), Err(err)) => {
649 |                 Err(Error::new(ErrorKind::Utf8 { pos: Some(pos), err }))
650 |             }
651 |             (Ok(eof), Ok(())) => Ok(eof),
652 |         }
653 |     }
654 | }
655 | 
656 | impl ops::Index<usize> for StringRecord {
657 |     type Output = str;
658 |     #[inline]
659 |     fn index(&self, i: usize) -> &str {
660 |         self.get(i).unwrap()
661 |     }
662 | }
663 | 
664 | impl<T: AsRef<str>> From<Vec<T>> for StringRecord {
665 |     #[inline]
666 |     fn from(xs: Vec<T>) -> StringRecord {
667 |         StringRecord::from_iter(xs)
668 |     }
669 | }
670 | 
671 | impl<'a, T: AsRef<str>> From<&'a [T]> for StringRecord {
672 |     #[inline]
673 |     fn from(xs: &'a [T]) -> StringRecord {
674 |         StringRecord::from_iter(xs)
675 |     }
676 | }
677 | 
678 | impl<T: AsRef<str>> FromIterator<T> for StringRecord {
679 |     #[inline]
680 |     fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> StringRecord {
681 |         let mut record = StringRecord::new();
682 |         record.extend(iter);
683 |         record
684 |     }
685 | }
686 | 
687 | impl<T: AsRef<str>> Extend<T> for StringRecord {
688 |     #[inline]
689 |     fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
690 |         for x in iter {
691 |             self.push_field(x.as_ref());
692 |         }
693 |     }
694 | }
695 | 
696 | impl<'a> IntoIterator for &'a StringRecord {
697 |     type IntoIter = StringRecordIter<'a>;
698 |     type Item = &'a str;
699 | 
700 |     #[inline]
701 |     fn into_iter(self) -> StringRecordIter<'a> {
702 |         StringRecordIter(self.0.iter())
703 |     }
704 | }
705 | 
706 | /// An iterator over the fields in a string record.
707 | ///
708 | /// The `'r` lifetime variable refers to the lifetime of the `StringRecord`
709 | /// that is being iterated over.
710 | #[derive(Clone)]
711 | pub struct StringRecordIter<'r>(ByteRecordIter<'r>);
712 | 
713 | impl<'r> Iterator for StringRecordIter<'r> {
714 |     type Item = &'r str;
715 | 
716 |     #[inline]
717 |     fn next(&mut self) -> Option<&'r str> {
718 |         self.0.next().map(|bytes| {
719 |             debug_assert!(str::from_utf8(bytes).is_ok());
720 |             // See StringRecord::get for safety argument.
721 |             unsafe { str::from_utf8_unchecked(bytes) }
722 |         })
723 |     }
724 | 
725 |     #[inline]
726 |     fn size_hint(&self) -> (usize, Option<usize>) {
727 |         self.0.size_hint()
728 |     }
729 | 
730 |     #[inline]
731 |     fn count(self) -> usize {
732 |         self.0.len()
733 |     }
734 | }
735 | 
736 | impl<'r> DoubleEndedIterator for StringRecordIter<'r> {
737 |     #[inline]
738 |     fn next_back(&mut self) -> Option<&'r str> {
739 |         self.0.next_back().map(|bytes| {
740 |             debug_assert!(str::from_utf8(bytes).is_ok());
741 |             // See StringRecord::get for safety argument.
742 |             unsafe { str::from_utf8_unchecked(bytes) }
743 |         })
744 |     }
745 | }
746 | 
747 | #[cfg(test)]
748 | mod tests {
749 |     use crate::string_record::StringRecord;
750 | 
751 |     #[test]
752 |     fn trim_front() {
753 |         let mut rec = StringRecord::from(vec![" abc"]);
754 |         rec.trim();
755 |         assert_eq!(rec.get(0), Some("abc"));
756 | 
757 |         let mut rec = StringRecord::from(vec![" abc", "  xyz"]);
758 |         rec.trim();
759 |         assert_eq!(rec.get(0), Some("abc"));
760 |         assert_eq!(rec.get(1), Some("xyz"));
761 |     }
762 | 
763 |     #[test]
764 |     fn trim_back() {
765 |         let mut rec = StringRecord::from(vec!["abc "]);
766 |         rec.trim();
767 |         assert_eq!(rec.get(0), Some("abc"));
768 | 
769 |         let mut rec = StringRecord::from(vec!["abc ", "xyz  "]);
770 |         rec.trim();
771 |         assert_eq!(rec.get(0), Some("abc"));
772 |         assert_eq!(rec.get(1), Some("xyz"));
773 |     }
774 | 
775 |     #[test]
776 |     fn trim_both() {
777 |         let mut rec = StringRecord::from(vec![" abc "]);
778 |         rec.trim();
779 |         assert_eq!(rec.get(0), Some("abc"));
780 | 
781 |         let mut rec = StringRecord::from(vec![" abc ", "  xyz  "]);
782 |         rec.trim();
783 |         assert_eq!(rec.get(0), Some("abc"));
784 |         assert_eq!(rec.get(1), Some("xyz"));
785 |     }
786 | 
787 |     #[test]
788 |     fn trim_does_not_panic_on_empty_records_1() {
789 |         let mut rec = StringRecord::from(vec![""]);
790 |         rec.trim();
791 |         assert_eq!(rec.get(0), Some(""));
792 |     }
793 | 
794 |     #[test]
795 |     fn trim_does_not_panic_on_empty_records_2() {
796 |         let mut rec = StringRecord::from(vec!["", ""]);
797 |         rec.trim();
798 |         assert_eq!(rec.get(0), Some(""));
799 |         assert_eq!(rec.get(1), Some(""));
800 |     }
801 | 
802 |     #[test]
803 |     fn trim_does_not_panic_on_empty_records_3() {
804 |         let mut rec = StringRecord::new();
805 |         rec.trim();
806 |         assert_eq!(rec.as_slice().len(), 0);
807 |     }
808 | 
809 |     #[test]
810 |     fn trim_whitespace_only() {
811 |         let mut rec = StringRecord::from(vec![
812 |             "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{0085}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}",
813 |         ]);
814 |         rec.trim();
815 |         assert_eq!(rec.get(0), Some(""));
816 |     }
817 | 
818 |     // Check that record equality respects field boundaries.
819 |     //
820 |     // Regression test for #138.
821 |     #[test]
822 |     fn eq_field_boundaries() {
823 |         let test1 = StringRecord::from(vec!["12", "34"]);
824 |         let test2 = StringRecord::from(vec!["123", "4"]);
825 | 
826 |         assert_ne!(test1, test2);
827 |     }
828 | 
829 |     // Check that record equality respects number of fields.
830 |     //
831 |     // Regression test for #138.
832 |     #[test]
833 |     fn eq_record_len() {
834 |         let test1 = StringRecord::from(vec!["12", "34", "56"]);
835 |         let test2 = StringRecord::from(vec!["12", "34"]);
836 |         assert_ne!(test1, test2);
837 |     }
838 | }
839 | 


--------------------------------------------------------------------------------