├── .github ├── FUNDING.yml └── workflows │ └── ci.yml ├── rustfmt.toml ├── examples ├── data │ ├── uspop-latin1.csv │ ├── strange.csv │ ├── smallpop-no-headers.csv │ ├── smallpop.csv │ ├── smallpop-colon.csv │ ├── uspop.csv │ └── uspop-null.csv ├── tutorial-error-01.rs ├── tutorial-read-headers-01.rs ├── cookbook-read-colon.rs ├── cookbook-read-no-headers.rs ├── tutorial-error-04.rs ├── tutorial-error-02.rs ├── tutorial-read-serde-03.rs ├── cookbook-read-basic.rs ├── tutorial-read-headers-02.rs ├── tutorial-perf-alloc-01.rs ├── tutorial-read-delimiter-01.rs ├── tutorial-perf-alloc-02.rs ├── tutorial-setup-01.rs ├── tutorial-perf-alloc-03.rs ├── cookbook-write-basic.rs ├── tutorial-error-03.rs ├── tutorial-read-serde-invalid-01.rs ├── tutorial-read-serde-invalid-02.rs ├── tutorial-read-serde-02.rs ├── tutorial-read-01.rs ├── tutorial-write-delimiter-01.rs ├── cookbook-read-serde.rs ├── tutorial-perf-serde-01.rs ├── tutorial-pipeline-search-02.rs ├── cookbook-write-serde.rs ├── tutorial-write-01.rs ├── tutorial-read-serde-04.rs ├── tutorial-perf-serde-02.rs ├── tutorial-read-serde-01.rs ├── tutorial-perf-serde-03.rs ├── tutorial-write-02.rs ├── tutorial-write-serde-01.rs ├── tutorial-pipeline-search-01.rs ├── tutorial-write-serde-02.rs ├── tutorial-pipeline-pop-01.rs └── tutorial-perf-core-01.rs ├── .gitignore ├── COPYING ├── csv-core ├── COPYING ├── Cargo.toml ├── LICENSE-MIT ├── UNLICENSE ├── benches │ └── bench.rs ├── README.md └── src │ └── lib.rs ├── csv-index ├── COPYING ├── Cargo.toml ├── LICENSE-MIT ├── UNLICENSE ├── README.md └── src │ ├── lib.rs │ └── simple.rs ├── ci ├── script.sh └── check-copy ├── Cargo.toml ├── LICENSE-MIT ├── scripts └── copy-examples ├── UNLICENSE ├── ISSUE_TEMPLATE.md ├── src ├── debug.rs ├── cookbook.rs ├── lib.rs ├── error.rs └── string_record.rs ├── README.md ├── benches └── bench.rs └── tests └── tests.rs /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [BurntSushi] 2 | -------------------------------------------------------------------------------- /rustfmt.toml: -------------------------------------------------------------------------------- 1 | max_width = 79 2 | use_small_heuristics = "max" 3 | -------------------------------------------------------------------------------- /examples/data/uspop-latin1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BurntSushi/rust-csv/HEAD/examples/data/uspop-latin1.csv -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | doc 3 | tags 4 | examples/ss10pusa.csv 5 | build 6 | target 7 | Cargo.lock 8 | scratch* 9 | bench_large/huge 10 | -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | This project is dual-licensed under the Unlicense and MIT licenses. 2 | 3 | You may use this code under the terms of either license. 4 | -------------------------------------------------------------------------------- /csv-core/COPYING: -------------------------------------------------------------------------------- 1 | This project is dual-licensed under the Unlicense and MIT licenses. 2 | 3 | You may use this code under the terms of either license. 4 | -------------------------------------------------------------------------------- /csv-index/COPYING: -------------------------------------------------------------------------------- 1 | This project is dual-licensed under the Unlicense and MIT licenses. 2 | 3 | You may use this code under the terms of either license. 4 | -------------------------------------------------------------------------------- /examples/data/strange.csv: -------------------------------------------------------------------------------- 1 | "\"Hacksaw\" Jim Duggan";1987 2 | "Bret \"Hit Man\" Hart";1984 3 | # We're not sure when Rafael started, so omit the year. 4 | Rafael Halperin 5 | "\"Big Cat\" Ernie Ladd";1964 6 | "\"Macho Man\" Randy Savage";1985 7 | "Jake \"The Snake\" Roberts";1986 8 | -------------------------------------------------------------------------------- /examples/tutorial-error-01.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | fn main() { 4 | let mut rdr = csv::Reader::from_reader(io::stdin()); 5 | for result in rdr.records() { 6 | let record = result.expect("a CSV record"); 7 | println!("{:?}", record); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /examples/data/smallpop-no-headers.csv: -------------------------------------------------------------------------------- 1 | Southborough,MA,United States,9686 2 | Northbridge,MA,United States,14061 3 | Westborough,MA,United States,29313 4 | Marlborough,MA,United States,38334 5 | Springfield,MA,United States,152227 6 | Springfield,MO,United States,150443 7 | Springfield,NJ,United States,14976 8 | Springfield,OH,United States,64325 9 | Springfield,OR,United States,56032 10 | Concord,NH,United States,42605 11 | -------------------------------------------------------------------------------- /examples/data/smallpop.csv: -------------------------------------------------------------------------------- 1 | city,region,country,population 2 | Southborough,MA,United States,9686 3 | Northbridge,MA,United States,14061 4 | Westborough,MA,United States,29313 5 | Marlborough,MA,United States,38334 6 | Springfield,MA,United States,152227 7 | Springfield,MO,United States,150443 8 | Springfield,NJ,United States,14976 9 | Springfield,OH,United States,64325 10 | Springfield,OR,United States,56032 11 | Concord,NH,United States,42605 12 | -------------------------------------------------------------------------------- /examples/data/smallpop-colon.csv: -------------------------------------------------------------------------------- 1 | city:region:country:population 2 | Southborough:MA:United States:9686 3 | Northbridge:MA:United States:14061 4 | Westborough:MA:United States:29313 5 | Marlborough:MA:United States:38334 6 | Springfield:MA:United States:152227 7 | Springfield:MO:United States:150443 8 | Springfield:NJ:United States:14976 9 | Springfield:OH:United States:64325 10 | Springfield:OR:United States:56032 11 | Concord:NH:United States:42605 12 | -------------------------------------------------------------------------------- /examples/tutorial-read-headers-01.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let mut rdr = 5 | csv::ReaderBuilder::new().has_headers(false).from_reader(io::stdin()); 6 | for result in rdr.records() { 7 | let record = result?; 8 | println!("{:?}", record); 9 | } 10 | Ok(()) 11 | } 12 | 13 | fn main() { 14 | if let Err(err) = run() { 15 | println!("{}", err); 16 | process::exit(1); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /examples/cookbook-read-colon.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn example() -> Result<(), Box> { 4 | let mut rdr = 5 | csv::ReaderBuilder::new().delimiter(b':').from_reader(io::stdin()); 6 | for result in rdr.records() { 7 | let record = result?; 8 | println!("{:?}", record); 9 | } 10 | Ok(()) 11 | } 12 | 13 | fn main() { 14 | if let Err(err) = example() { 15 | println!("error running example: {}", err); 16 | process::exit(1); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /examples/cookbook-read-no-headers.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn example() -> Result<(), Box> { 4 | let mut rdr = 5 | csv::ReaderBuilder::new().has_headers(false).from_reader(io::stdin()); 6 | for result in rdr.records() { 7 | let record = result?; 8 | println!("{:?}", record); 9 | } 10 | Ok(()) 11 | } 12 | 13 | fn main() { 14 | if let Err(err) = example() { 15 | println!("error running example: {}", err); 16 | process::exit(1); 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /examples/tutorial-error-04.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn main() { 4 | if let Err(err) = run() { 5 | println!("{}", err); 6 | process::exit(1); 7 | } 8 | } 9 | 10 | fn run() -> Result<(), Box> { 11 | let mut rdr = csv::Reader::from_reader(io::stdin()); 12 | for result in rdr.records() { 13 | // This is effectively the same code as our `match` in the 14 | // previous example. In other words, `?` is syntactic sugar. 15 | let record = result?; 16 | println!("{:?}", record); 17 | } 18 | Ok(()) 19 | } 20 | -------------------------------------------------------------------------------- /examples/tutorial-error-02.rs: -------------------------------------------------------------------------------- 1 | use std::{io, process}; 2 | 3 | fn main() { 4 | let mut rdr = csv::Reader::from_reader(io::stdin()); 5 | for result in rdr.records() { 6 | // Examine our Result. 7 | // If there was no problem, print the record. 8 | // Otherwise, print the error message and quit the program. 9 | match result { 10 | Ok(record) => println!("{:?}", record), 11 | Err(err) => { 12 | println!("error reading CSV from : {}", err); 13 | process::exit(1); 14 | } 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /examples/tutorial-read-serde-03.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::{error::Error, io, process}; 3 | 4 | // This introduces a type alias so that we can conveniently reference our 5 | // record type. 6 | type Record = HashMap; 7 | 8 | fn run() -> Result<(), Box> { 9 | let mut rdr = csv::Reader::from_reader(io::stdin()); 10 | for result in rdr.deserialize() { 11 | let record: Record = result?; 12 | println!("{:?}", record); 13 | } 14 | Ok(()) 15 | } 16 | 17 | fn main() { 18 | if let Err(err) = run() { 19 | println!("{}", err); 20 | process::exit(1); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /examples/cookbook-read-basic.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn example() -> Result<(), Box> { 4 | // Build the CSV reader and iterate over each record. 5 | let mut rdr = csv::Reader::from_reader(io::stdin()); 6 | for result in rdr.records() { 7 | // The iterator yields Result, so we check the 8 | // error here.. 9 | let record = result?; 10 | println!("{:?}", record); 11 | } 12 | Ok(()) 13 | } 14 | 15 | fn main() { 16 | if let Err(err) = example() { 17 | println!("error running example: {}", err); 18 | process::exit(1); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/tutorial-read-headers-02.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let mut rdr = csv::Reader::from_reader(io::stdin()); 5 | let headers = rdr.headers()?; 6 | println!("{:?}", headers); 7 | for result in rdr.records() { 8 | let record = result?; 9 | println!("{:?}", record); 10 | } 11 | // We can ask for the headers at any time. 12 | let headers = rdr.headers()?; 13 | println!("{:?}", headers); 14 | Ok(()) 15 | } 16 | 17 | fn main() { 18 | if let Err(err) = run() { 19 | println!("{}", err); 20 | process::exit(1); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /examples/tutorial-perf-alloc-01.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result> { 4 | let mut rdr = csv::Reader::from_reader(io::stdin()); 5 | 6 | let mut count = 0; 7 | for result in rdr.records() { 8 | let record = result?; 9 | if &record[0] == "us" && &record[3] == "MA" { 10 | count += 1; 11 | } 12 | } 13 | Ok(count) 14 | } 15 | 16 | fn main() { 17 | match run() { 18 | Ok(count) => { 19 | println!("{}", count); 20 | } 21 | Err(err) => { 22 | println!("{}", err); 23 | process::exit(1); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /examples/tutorial-read-delimiter-01.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let mut rdr = csv::ReaderBuilder::new() 5 | .has_headers(false) 6 | .delimiter(b';') 7 | .double_quote(false) 8 | .escape(Some(b'\\')) 9 | .flexible(true) 10 | .comment(Some(b'#')) 11 | .from_reader(io::stdin()); 12 | for result in rdr.records() { 13 | let record = result?; 14 | println!("{:?}", record); 15 | } 16 | Ok(()) 17 | } 18 | 19 | fn main() { 20 | if let Err(err) = run() { 21 | println!("{}", err); 22 | process::exit(1); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /examples/tutorial-perf-alloc-02.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result> { 4 | let mut rdr = csv::Reader::from_reader(io::stdin()); 5 | 6 | let mut count = 0; 7 | for result in rdr.byte_records() { 8 | let record = result?; 9 | if &record[0] == b"us" && &record[3] == b"MA" { 10 | count += 1; 11 | } 12 | } 13 | Ok(count) 14 | } 15 | 16 | fn main() { 17 | match run() { 18 | Ok(count) => { 19 | println!("{}", count); 20 | } 21 | Err(err) => { 22 | println!("{}", err); 23 | process::exit(1); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /examples/tutorial-setup-01.rs: -------------------------------------------------------------------------------- 1 | // Import the standard library's I/O module so we can read from stdin. 2 | use std::io; 3 | 4 | // The `main` function is where your program starts executing. 5 | fn main() { 6 | // Create a CSV parser that reads data from stdin. 7 | let mut rdr = csv::Reader::from_reader(io::stdin()); 8 | // Loop over each record. 9 | for result in rdr.records() { 10 | // An error may occur, so abort the program in an unfriendly way. 11 | // We will make this more friendly later! 12 | let record = result.expect("a CSV record"); 13 | // Print a debug version of the record. 14 | println!("{:?}", record); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /examples/tutorial-perf-alloc-03.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result> { 4 | let mut rdr = csv::Reader::from_reader(io::stdin()); 5 | let mut record = csv::ByteRecord::new(); 6 | 7 | let mut count = 0; 8 | while rdr.read_byte_record(&mut record)? { 9 | if &record[0] == b"us" && &record[3] == b"MA" { 10 | count += 1; 11 | } 12 | } 13 | Ok(count) 14 | } 15 | 16 | fn main() { 17 | match run() { 18 | Ok(count) => { 19 | println!("{}", count); 20 | } 21 | Err(err) => { 22 | println!("{}", err); 23 | process::exit(1); 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /ci/script.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -ex 4 | 5 | cargo build --verbose 6 | cargo doc --verbose 7 | 8 | # Our dev dependencies want newer versions of Rust. Instead of bumping our 9 | # MSRV, we just don't test on our MSRV. 10 | if [ "$TRAVIS_RUST_VERSION" = "1.33.0" ]; then 11 | exit 0 12 | fi 13 | 14 | cargo test --verbose 15 | cargo test --verbose --manifest-path csv-core/Cargo.toml 16 | cargo test --verbose --manifest-path csv-index/Cargo.toml 17 | if [ "$TRAVIS_RUST_VERSION" = "stable" ]; then 18 | rustup component add rustfmt 19 | cargo fmt -- --check 20 | 21 | ci/check-copy cookbook 22 | ci/check-copy tutorial 23 | fi 24 | if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then 25 | cargo bench --verbose --no-run 26 | fi 27 | -------------------------------------------------------------------------------- /examples/cookbook-write-basic.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn example() -> Result<(), Box> { 4 | let mut wtr = csv::Writer::from_writer(io::stdout()); 5 | 6 | // When writing records without Serde, the header record is written just 7 | // like any other record. 8 | wtr.write_record(["city", "region", "country", "population"])?; 9 | wtr.write_record(["Southborough", "MA", "United States", "9686"])?; 10 | wtr.write_record(["Northbridge", "MA", "United States", "14061"])?; 11 | wtr.flush()?; 12 | Ok(()) 13 | } 14 | 15 | fn main() { 16 | if let Err(err) = example() { 17 | println!("error running example: {}", err); 18 | process::exit(1); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /examples/tutorial-error-03.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn main() { 4 | if let Err(err) = run() { 5 | println!("{}", err); 6 | process::exit(1); 7 | } 8 | } 9 | 10 | fn run() -> Result<(), Box> { 11 | let mut rdr = csv::Reader::from_reader(io::stdin()); 12 | for result in rdr.records() { 13 | // Examine our Result. 14 | // If there was no problem, print the record. 15 | // Otherwise, convert our error to a Box and return it. 16 | match result { 17 | Err(err) => return Err(From::from(err)), 18 | Ok(record) => { 19 | println!("{:?}", record); 20 | } 21 | } 22 | } 23 | Ok(()) 24 | } 25 | -------------------------------------------------------------------------------- /examples/tutorial-read-serde-invalid-01.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::{error::Error, io, process}; 3 | 4 | use serde::Deserialize; 5 | 6 | #[derive(Debug, Deserialize)] 7 | #[serde(rename_all = "PascalCase")] 8 | struct Record { 9 | latitude: f64, 10 | longitude: f64, 11 | population: Option, 12 | city: String, 13 | state: String, 14 | } 15 | 16 | fn run() -> Result<(), Box> { 17 | let mut rdr = csv::Reader::from_reader(io::stdin()); 18 | for result in rdr.deserialize() { 19 | let record: Record = result?; 20 | println!("{:?}", record); 21 | } 22 | Ok(()) 23 | } 24 | 25 | fn main() { 26 | if let Err(err) = run() { 27 | println!("{}", err); 28 | process::exit(1); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /csv-index/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "csv-index" 3 | version = "0.1.6" #:version 4 | authors = ["Andrew Gallant "] 5 | description = "On disk CSV indexing data structures." 6 | documentation = "https://docs.rs/csv-index" 7 | homepage = "https://github.com/BurntSushi/rust-csv" 8 | repository = "https://github.com/BurntSushi/rust-csv" 9 | readme = "README.md" 10 | keywords = ["csv", "comma", "parser", "delimited", "index"] 11 | license = "Unlicense/MIT" 12 | categories = ["encoding", "parser-implementations"] 13 | workspace = ".." 14 | edition = "2018" 15 | 16 | [badges] 17 | travis-ci = { repository = "BurntSushi/rust-csv" } 18 | appveyor = { repository = "BurntSushi/rust-csv" } 19 | 20 | [lib] 21 | bench = false 22 | 23 | [dependencies] 24 | byteorder = "1" 25 | csv = { path = "..", version = "1.1.0" } 26 | -------------------------------------------------------------------------------- /examples/tutorial-read-serde-invalid-02.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::{error::Error, io, process}; 3 | 4 | use serde::Deserialize; 5 | #[derive(Debug, Deserialize)] 6 | #[serde(rename_all = "PascalCase")] 7 | struct Record { 8 | latitude: f64, 9 | longitude: f64, 10 | #[serde(deserialize_with = "csv::invalid_option")] 11 | population: Option, 12 | city: String, 13 | state: String, 14 | } 15 | 16 | fn run() -> Result<(), Box> { 17 | let mut rdr = csv::Reader::from_reader(io::stdin()); 18 | for result in rdr.deserialize() { 19 | let record: Record = result?; 20 | println!("{:?}", record); 21 | } 22 | Ok(()) 23 | } 24 | 25 | fn main() { 26 | if let Err(err) = run() { 27 | println!("{}", err); 28 | process::exit(1); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /examples/tutorial-read-serde-02.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | // This introduces a type alias so that we can conveniently reference our 4 | // record type. 5 | type Record = (String, String, Option, f64, f64); 6 | 7 | fn run() -> Result<(), Box> { 8 | let mut rdr = csv::Reader::from_reader(io::stdin()); 9 | // Instead of creating an iterator with the `records` method, we create 10 | // an iterator with the `deserialize` method. 11 | for result in rdr.deserialize() { 12 | // We must tell Serde what type we want to deserialize into. 13 | let record: Record = result?; 14 | println!("{:?}", record); 15 | } 16 | Ok(()) 17 | } 18 | 19 | fn main() { 20 | if let Err(err) = run() { 21 | println!("{}", err); 22 | process::exit(1); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /examples/tutorial-read-01.rs: -------------------------------------------------------------------------------- 1 | use std::{env, error::Error, ffi::OsString, fs::File, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let file_path = get_first_arg()?; 5 | let file = File::open(file_path)?; 6 | let mut rdr = csv::Reader::from_reader(file); 7 | for result in rdr.records() { 8 | let record = result?; 9 | println!("{:?}", record); 10 | } 11 | Ok(()) 12 | } 13 | 14 | /// Returns the first positional argument sent to this process. If there are no 15 | /// positional arguments, then this returns an error. 16 | fn get_first_arg() -> Result> { 17 | match env::args_os().nth(1) { 18 | None => Err(From::from("expected 1 argument, but got none")), 19 | Some(file_path) => Ok(file_path), 20 | } 21 | } 22 | 23 | fn main() { 24 | if let Err(err) = run() { 25 | println!("{}", err); 26 | process::exit(1); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /examples/tutorial-write-delimiter-01.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let mut wtr = csv::WriterBuilder::new() 5 | .delimiter(b'\t') 6 | .quote_style(csv::QuoteStyle::NonNumeric) 7 | .from_writer(io::stdout()); 8 | 9 | wtr.write_record([ 10 | "City", 11 | "State", 12 | "Population", 13 | "Latitude", 14 | "Longitude", 15 | ])?; 16 | wtr.write_record([ 17 | "Davidsons Landing", 18 | "AK", 19 | "", 20 | "65.2419444", 21 | "-165.2716667", 22 | ])?; 23 | wtr.write_record(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?; 24 | wtr.write_record(["Oakman", "AL", "", "33.7133333", "-87.3886111"])?; 25 | 26 | wtr.flush()?; 27 | Ok(()) 28 | } 29 | 30 | fn main() { 31 | if let Err(err) = run() { 32 | println!("{}", err); 33 | process::exit(1); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /examples/cookbook-read-serde.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::{error::Error, io, process}; 3 | 4 | use serde::Deserialize; 5 | 6 | // By default, struct field names are deserialized based on the position of 7 | // a corresponding field in the CSV data's header record. 8 | #[derive(Debug, Deserialize)] 9 | struct Record { 10 | city: String, 11 | region: String, 12 | country: String, 13 | population: Option, 14 | } 15 | 16 | fn example() -> Result<(), Box> { 17 | let mut rdr = csv::Reader::from_reader(io::stdin()); 18 | for result in rdr.deserialize() { 19 | // Notice that we need to provide a type hint for automatic 20 | // deserialization. 21 | let record: Record = result?; 22 | println!("{:?}", record); 23 | } 24 | Ok(()) 25 | } 26 | 27 | fn main() { 28 | if let Err(err) = example() { 29 | println!("error running example: {}", err); 30 | process::exit(1); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /csv-core/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "csv-core" 3 | version = "0.1.13" #:version 4 | authors = ["Andrew Gallant "] 5 | description = "Bare bones CSV parsing with no_std support." 6 | documentation = "https://docs.rs/csv-core" 7 | homepage = "https://github.com/BurntSushi/rust-csv" 8 | repository = "https://github.com/BurntSushi/rust-csv" 9 | readme = "README.md" 10 | keywords = ["csv", "comma", "parser", "delimited", "no_std"] 11 | license = "Unlicense/MIT" 12 | categories = ["encoding", "no-std", "parser-implementations"] 13 | workspace = ".." 14 | edition = "2018" 15 | 16 | [badges] 17 | travis-ci = { repository = "BurntSushi/rust-csv" } 18 | appveyor = { repository = "BurntSushi/rust-csv" } 19 | 20 | [lib] 21 | bench = false 22 | 23 | [features] 24 | default = [] 25 | libc = ["memchr/libc"] 26 | 27 | [dependencies] 28 | memchr = { version = "2", default-features = false } 29 | 30 | [dev-dependencies] 31 | arrayvec = { version = "0.5", default-features = false } 32 | -------------------------------------------------------------------------------- /examples/tutorial-perf-serde-01.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::{error::Error, io, process}; 3 | 4 | use serde::Deserialize; 5 | 6 | #[derive(Debug, Deserialize)] 7 | #[serde(rename_all = "PascalCase")] 8 | struct Record { 9 | country: String, 10 | city: String, 11 | accent_city: String, 12 | region: String, 13 | population: Option, 14 | latitude: f64, 15 | longitude: f64, 16 | } 17 | 18 | fn run() -> Result> { 19 | let mut rdr = csv::Reader::from_reader(io::stdin()); 20 | 21 | let mut count = 0; 22 | for result in rdr.deserialize() { 23 | let record: Record = result?; 24 | if record.country == "us" && record.region == "MA" { 25 | count += 1; 26 | } 27 | } 28 | Ok(count) 29 | } 30 | 31 | fn main() { 32 | match run() { 33 | Ok(count) => { 34 | println!("{}", count); 35 | } 36 | Err(err) => { 37 | println!("{}", err); 38 | process::exit(1); 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/tutorial-pipeline-search-02.rs: -------------------------------------------------------------------------------- 1 | use std::{env, error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let query = match env::args().nth(1) { 5 | None => return Err(From::from("expected 1 argument, but got none")), 6 | Some(query) => query, 7 | }; 8 | 9 | let mut rdr = csv::Reader::from_reader(io::stdin()); 10 | let mut wtr = csv::Writer::from_writer(io::stdout()); 11 | 12 | wtr.write_record(rdr.byte_headers()?)?; 13 | 14 | for result in rdr.byte_records() { 15 | let record = result?; 16 | // `query` is a `String` while `field` is now a `&[u8]`, so we'll 17 | // need to convert `query` to `&[u8]` before doing a comparison. 18 | if record.iter().any(|field| field == query.as_bytes()) { 19 | wtr.write_record(&record)?; 20 | } 21 | } 22 | 23 | wtr.flush()?; 24 | Ok(()) 25 | } 26 | 27 | fn main() { 28 | if let Err(err) = run() { 29 | println!("{}", err); 30 | process::exit(1); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /ci/check-copy: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | D="$(cd "$(dirname "$0")" && pwd -P)" 6 | REPO="$D/.." 7 | SCRIPTS="$REPO/scripts" 8 | TMPDIR="$(mktemp -d)" 9 | 10 | if [ $# != 1 ]; then 11 | echo "Usage: $(basename $0) (cookbook | tutorial)" >&2 12 | exit 1 13 | fi 14 | SOURCE="$1" 15 | 16 | errored() { 17 | rm -rf "$TMPDIR" 18 | echo "HINT: please run scripts/copy-examples" >&2 19 | exit 1 20 | } 21 | 22 | # Make sure the right rustfmt config is available. 23 | cp "$REPO/rustfmt.toml" "$TMPDIR/" 24 | "$SCRIPTS/copy-examples" \ 25 | --rust-file "$REPO/src/$SOURCE.rs" \ 26 | --example-dir "$TMPDIR" 27 | for new in "$TMPDIR"/*.rs; do 28 | name="$(basename "$new")" 29 | old="$REPO"/examples/"$name" 30 | if ! [ -f "$old" ]; then 31 | echo "ERROR: missing examples/$name" >&2 32 | errored 33 | fi 34 | old="$(readlink -e "$REPO"/examples/"$name")" 35 | if ! diff "$old" "$new"; then 36 | echo "ERROR: examples/$name differs from ${name%%.rs} in src/$SOURCE.rs" >&2 37 | errored 38 | fi 39 | done 40 | rm -rf "$TMPDIR" 41 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "csv" 3 | version = "1.4.0" #:version 4 | authors = ["Andrew Gallant "] 5 | description = "Fast CSV parsing with support for serde." 6 | documentation = "https://docs.rs/csv" 7 | homepage = "https://github.com/BurntSushi/rust-csv" 8 | repository = "https://github.com/BurntSushi/rust-csv" 9 | readme = "README.md" 10 | keywords = ["csv", "comma", "parser", "delimited", "serde"] 11 | license = "Unlicense/MIT" 12 | categories = ["encoding", "parser-implementations"] 13 | exclude = ["/.github", "/ci/*", "/scripts/*"] 14 | edition = "2021" 15 | rust-version = "1.73" 16 | 17 | [workspace] 18 | members = ["csv-core", "csv-index"] 19 | 20 | [lib] 21 | bench = false 22 | 23 | [dependencies] 24 | csv-core = { path = "csv-core", version = "0.1.11" } 25 | itoa = "1" 26 | ryu = "1" 27 | serde_core = "1.0.221" 28 | 29 | [dev-dependencies] 30 | bstr = { version = "1.7.0", default-features = false, features = ["alloc", "serde"] } 31 | serde = { version = "1.0.221", features = ["derive"] } 32 | 33 | [profile.release] 34 | debug = true 35 | 36 | [profile.bench] 37 | debug = true 38 | -------------------------------------------------------------------------------- /examples/cookbook-write-serde.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | use serde::Serialize; 4 | 5 | #[derive(Debug, Serialize)] 6 | struct Record { 7 | city: String, 8 | region: String, 9 | country: String, 10 | population: Option, 11 | } 12 | 13 | fn example() -> Result<(), Box> { 14 | let mut wtr = csv::Writer::from_writer(io::stdout()); 15 | 16 | // When writing records with Serde using structs, the header row is written 17 | // automatically. 18 | wtr.serialize(Record { 19 | city: "Southborough".to_string(), 20 | region: "MA".to_string(), 21 | country: "United States".to_string(), 22 | population: Some(9686), 23 | })?; 24 | wtr.serialize(Record { 25 | city: "Northbridge".to_string(), 26 | region: "MA".to_string(), 27 | country: "United States".to_string(), 28 | population: Some(14061), 29 | })?; 30 | wtr.flush()?; 31 | Ok(()) 32 | } 33 | 34 | fn main() { 35 | if let Err(err) = example() { 36 | println!("error running example: {}", err); 37 | process::exit(1); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /examples/tutorial-write-01.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let mut wtr = csv::Writer::from_writer(io::stdout()); 5 | // Since we're writing records manually, we must explicitly write our 6 | // header record. A header record is written the same way that other 7 | // records are written. 8 | wtr.write_record([ 9 | "City", 10 | "State", 11 | "Population", 12 | "Latitude", 13 | "Longitude", 14 | ])?; 15 | wtr.write_record([ 16 | "Davidsons Landing", 17 | "AK", 18 | "", 19 | "65.2419444", 20 | "-165.2716667", 21 | ])?; 22 | wtr.write_record(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?; 23 | wtr.write_record(["Oakman", "AL", "", "33.7133333", "-87.3886111"])?; 24 | 25 | // A CSV writer maintains an internal buffer, so it's important 26 | // to flush the buffer when you're done. 27 | wtr.flush()?; 28 | Ok(()) 29 | } 30 | 31 | fn main() { 32 | if let Err(err) = run() { 33 | println!("{}", err); 34 | process::exit(1); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Andrew Gallant 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /csv-core/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Andrew Gallant 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /csv-index/LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Andrew Gallant 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /examples/tutorial-read-serde-04.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::{error::Error, io, process}; 3 | 4 | // This lets us write `#[derive(Deserialize)]`. 5 | use serde::Deserialize; 6 | 7 | // We don't need to derive `Debug` (which doesn't require Serde), but it's a 8 | // good habit to do it for all your types. 9 | // 10 | // Notice that the field names in this struct are NOT in the same order as 11 | // the fields in the CSV data! 12 | #[derive(Debug, Deserialize)] 13 | #[serde(rename_all = "PascalCase")] 14 | struct Record { 15 | latitude: f64, 16 | longitude: f64, 17 | population: Option, 18 | city: String, 19 | state: String, 20 | } 21 | 22 | fn run() -> Result<(), Box> { 23 | let mut rdr = csv::Reader::from_reader(io::stdin()); 24 | for result in rdr.deserialize() { 25 | let record: Record = result?; 26 | println!("{:?}", record); 27 | // Try this if you don't like each record smushed on one line: 28 | // println!("{:#?}", record); 29 | } 30 | Ok(()) 31 | } 32 | 33 | fn main() { 34 | if let Err(err) = run() { 35 | println!("{}", err); 36 | process::exit(1); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /examples/tutorial-perf-serde-02.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use serde::Deserialize; 3 | use std::{error::Error, io, process}; 4 | 5 | #[derive(Debug, Deserialize)] 6 | #[serde(rename_all = "PascalCase")] 7 | struct Record<'a> { 8 | country: &'a str, 9 | city: &'a str, 10 | accent_city: &'a str, 11 | region: &'a str, 12 | population: Option, 13 | latitude: f64, 14 | longitude: f64, 15 | } 16 | 17 | fn run() -> Result> { 18 | let mut rdr = csv::Reader::from_reader(io::stdin()); 19 | let mut raw_record = csv::StringRecord::new(); 20 | let headers = rdr.headers()?.clone(); 21 | 22 | let mut count = 0; 23 | while rdr.read_record(&mut raw_record)? { 24 | let record: Record = raw_record.deserialize(Some(&headers))?; 25 | if record.country == "us" && record.region == "MA" { 26 | count += 1; 27 | } 28 | } 29 | Ok(count) 30 | } 31 | 32 | fn main() { 33 | match run() { 34 | Ok(count) => { 35 | println!("{}", count); 36 | } 37 | Err(err) => { 38 | println!("{}", err); 39 | process::exit(1); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /examples/tutorial-read-serde-01.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let mut rdr = csv::Reader::from_reader(io::stdin()); 5 | for result in rdr.records() { 6 | let record = result?; 7 | 8 | let city = &record[0]; 9 | let state = &record[1]; 10 | // Some records are missing population counts, so if we can't 11 | // parse a number, treat the population count as missing instead 12 | // of returning an error. 13 | let pop: Option = record[2].parse().ok(); 14 | // Lucky us! Latitudes and longitudes are available for every record. 15 | // Therefore, if one couldn't be parsed, return an error. 16 | let latitude: f64 = record[3].parse()?; 17 | let longitude: f64 = record[4].parse()?; 18 | 19 | println!( 20 | "city: {:?}, state: {:?}, \ 21 | pop: {:?}, latitude: {:?}, longitude: {:?}", 22 | city, state, pop, latitude, longitude 23 | ); 24 | } 25 | Ok(()) 26 | } 27 | 28 | fn main() { 29 | if let Err(err) = run() { 30 | println!("{}", err); 31 | process::exit(1); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /examples/tutorial-perf-serde-03.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | use std::{error::Error, io, process}; 3 | 4 | use serde::Deserialize; 5 | 6 | #[derive(Debug, Deserialize)] 7 | #[serde(rename_all = "PascalCase")] 8 | struct Record<'a> { 9 | country: &'a [u8], 10 | city: &'a [u8], 11 | accent_city: &'a [u8], 12 | region: &'a [u8], 13 | population: Option, 14 | latitude: f64, 15 | longitude: f64, 16 | } 17 | 18 | fn run() -> Result> { 19 | let mut rdr = csv::Reader::from_reader(io::stdin()); 20 | let mut raw_record = csv::ByteRecord::new(); 21 | let headers = rdr.byte_headers()?.clone(); 22 | 23 | let mut count = 0; 24 | while rdr.read_byte_record(&mut raw_record)? { 25 | let record: Record = raw_record.deserialize(Some(&headers))?; 26 | if record.country == b"us" && record.region == b"MA" { 27 | count += 1; 28 | } 29 | } 30 | Ok(count) 31 | } 32 | 33 | fn main() { 34 | match run() { 35 | Ok(count) => { 36 | println!("{}", count); 37 | } 38 | Err(err) => { 39 | println!("{}", err); 40 | process::exit(1); 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /scripts/copy-examples: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import, division, print_function 4 | import argparse 5 | import codecs 6 | import os.path 7 | import re 8 | import subprocess 9 | 10 | RE_EACH_CODE_BLOCK = re.compile(r'(?s)```[^\n]*\n(.*?)```') 11 | RE_MARKER = re.compile(r'^(?:# )?//(.+)$') 12 | RE_STRIP_COMMENT = re.compile(r'^#($| +)') 13 | 14 | if __name__ == '__main__': 15 | p = argparse.ArgumentParser() 16 | p.add_argument('--rust-file', default='src/cookbook.rs') 17 | p.add_argument('--example-dir', default='examples') 18 | args = p.parse_args() 19 | 20 | with codecs.open(args.rust_file, encoding='utf-8') as f: 21 | rustcode = f.read() 22 | for m in RE_EACH_CODE_BLOCK.finditer(rustcode): 23 | lines = m.group(1).splitlines() 24 | marker, codelines = lines[0], lines[1:] 25 | m = RE_MARKER.search(marker) 26 | if m is None: 27 | continue 28 | 29 | code = '\n'.join(RE_STRIP_COMMENT.sub('', line) for line in codelines) 30 | fpath = os.path.join(args.example_dir, m.group(1)) 31 | with codecs.open(fpath, mode='w+', encoding='utf-8') as f: 32 | print(code, file=f) 33 | subprocess.check_output(['rustfmt', fpath]) 34 | -------------------------------------------------------------------------------- /examples/tutorial-write-02.rs: -------------------------------------------------------------------------------- 1 | use std::{env, error::Error, ffi::OsString, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let file_path = get_first_arg()?; 5 | let mut wtr = csv::Writer::from_path(file_path)?; 6 | 7 | wtr.write_record([ 8 | "City", 9 | "State", 10 | "Population", 11 | "Latitude", 12 | "Longitude", 13 | ])?; 14 | wtr.write_record([ 15 | "Davidsons Landing", 16 | "AK", 17 | "", 18 | "65.2419444", 19 | "-165.2716667", 20 | ])?; 21 | wtr.write_record(["Kenai", "AK", "7610", "60.5544444", "-151.2583333"])?; 22 | wtr.write_record(["Oakman", "AL", "", "33.7133333", "-87.3886111"])?; 23 | 24 | wtr.flush()?; 25 | Ok(()) 26 | } 27 | 28 | /// Returns the first positional argument sent to this process. If there are no 29 | /// positional arguments, then this returns an error. 30 | fn get_first_arg() -> Result> { 31 | match env::args_os().nth(1) { 32 | None => Err(From::from("expected 1 argument, but got none")), 33 | Some(file_path) => Ok(file_path), 34 | } 35 | } 36 | 37 | fn main() { 38 | if let Err(err) = run() { 39 | println!("{}", err); 40 | process::exit(1); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /examples/tutorial-write-serde-01.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | let mut wtr = csv::Writer::from_writer(io::stdout()); 5 | 6 | // We still need to write headers manually. 7 | wtr.write_record([ 8 | "City", 9 | "State", 10 | "Population", 11 | "Latitude", 12 | "Longitude", 13 | ])?; 14 | 15 | // But now we can write records by providing a normal Rust value. 16 | // 17 | // Note that the odd `None::` syntax is required because `None` on 18 | // its own doesn't have a concrete type, but Serde needs a concrete type 19 | // in order to serialize it. That is, `None` has type `Option` but 20 | // `None::` has type `Option`. 21 | wtr.serialize(( 22 | "Davidsons Landing", 23 | "AK", 24 | None::, 25 | 65.2419444, 26 | -165.2716667, 27 | ))?; 28 | wtr.serialize(("Kenai", "AK", Some(7610), 60.5544444, -151.2583333))?; 29 | wtr.serialize(("Oakman", "AL", None::, 33.7133333, -87.3886111))?; 30 | 31 | wtr.flush()?; 32 | Ok(()) 33 | } 34 | 35 | fn main() { 36 | if let Err(err) = run() { 37 | println!("{}", err); 38 | process::exit(1); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /csv-core/UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /csv-index/UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /examples/tutorial-pipeline-search-01.rs: -------------------------------------------------------------------------------- 1 | use std::{env, error::Error, io, process}; 2 | 3 | fn run() -> Result<(), Box> { 4 | // Get the query from the positional arguments. 5 | // If one doesn't exist, return an error. 6 | let query = match env::args().nth(1) { 7 | None => return Err(From::from("expected 1 argument, but got none")), 8 | Some(query) => query, 9 | }; 10 | 11 | // Build CSV readers and writers to stdin and stdout, respectively. 12 | let mut rdr = csv::Reader::from_reader(io::stdin()); 13 | let mut wtr = csv::Writer::from_writer(io::stdout()); 14 | 15 | // Before reading our data records, we should write the header record. 16 | wtr.write_record(rdr.headers()?)?; 17 | 18 | // Iterate over all the records in `rdr`, and write only records containing 19 | // `query` to `wtr`. 20 | for result in rdr.records() { 21 | let record = result?; 22 | if record.iter().any(|field| field == query) { 23 | wtr.write_record(&record)?; 24 | } 25 | } 26 | 27 | // CSV writers use an internal buffer, so we should always flush when done. 28 | wtr.flush()?; 29 | Ok(()) 30 | } 31 | 32 | fn main() { 33 | if let Err(err) = run() { 34 | println!("{}", err); 35 | process::exit(1); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /examples/tutorial-write-serde-02.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error, io, process}; 2 | 3 | use serde::Serialize; 4 | 5 | // Note that structs can derive both Serialize and Deserialize! 6 | #[derive(Debug, Serialize)] 7 | #[serde(rename_all = "PascalCase")] 8 | struct Record<'a> { 9 | city: &'a str, 10 | state: &'a str, 11 | population: Option, 12 | latitude: f64, 13 | longitude: f64, 14 | } 15 | 16 | fn run() -> Result<(), Box> { 17 | let mut wtr = csv::Writer::from_writer(io::stdout()); 18 | 19 | wtr.serialize(Record { 20 | city: "Davidsons Landing", 21 | state: "AK", 22 | population: None, 23 | latitude: 65.2419444, 24 | longitude: -165.2716667, 25 | })?; 26 | wtr.serialize(Record { 27 | city: "Kenai", 28 | state: "AK", 29 | population: Some(7610), 30 | latitude: 60.5544444, 31 | longitude: -151.2583333, 32 | })?; 33 | wtr.serialize(Record { 34 | city: "Oakman", 35 | state: "AL", 36 | population: None, 37 | latitude: 33.7133333, 38 | longitude: -87.3886111, 39 | })?; 40 | 41 | wtr.flush()?; 42 | Ok(()) 43 | } 44 | 45 | fn main() { 46 | if let Err(err) = run() { 47 | println!("{}", err); 48 | process::exit(1); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Thank you for taking the time to file a bug report. The following describes 2 | some guidelines to creating a minimally useful ticket. 3 | 4 | Above all else: do not describe your problem, **SHOW** your problem. 5 | 6 | #### What version of the `csv` crate are you using? 7 | 8 | Replace this text with the version. (The version can be found in your 9 | Cargo.lock.) 10 | 11 | #### Briefly describe the question, bug or feature request. 12 | 13 | Replace this text with a description. 14 | 15 | #### Include a complete program demonstrating a problem. 16 | 17 | Whether you're asking for a feature, filing a bug or just asking a question, 18 | this section should almost always include some kind of code that you have 19 | written. The code provided should be able to be compiled by others and should 20 | be as feasibly small as possible. 21 | 22 | If you're reporting a bug, then the code should exhibit some undesirable 23 | characteristic. 24 | 25 | If you're asking a question, then the code should represent what you've tried 26 | so far. 27 | 28 | If you're requesting a feature, then provide code that does the closest 29 | possible thing to what you're requesting, if possible. 30 | 31 | #### What is the observed behavior of the code above? 32 | 33 | Replace this text with the output of the program. 34 | 35 | #### What is the expected or desired behavior of the code above? 36 | 37 | Replace this text with the expected or desired output of the program. 38 | -------------------------------------------------------------------------------- /examples/tutorial-pipeline-pop-01.rs: -------------------------------------------------------------------------------- 1 | use std::{env, error::Error, io, process}; 2 | 3 | use serde::{Deserialize, Serialize}; 4 | 5 | // Unlike previous examples, we derive both Deserialize and Serialize. This 6 | // means we'll be able to automatically deserialize and serialize this type. 7 | #[derive(Debug, Deserialize, Serialize)] 8 | #[serde(rename_all = "PascalCase")] 9 | struct Record { 10 | city: String, 11 | state: String, 12 | population: Option, 13 | latitude: f64, 14 | longitude: f64, 15 | } 16 | 17 | fn run() -> Result<(), Box> { 18 | // Get the query from the positional arguments. 19 | // If one doesn't exist or isn't an integer, return an error. 20 | let minimum_pop: u64 = match env::args().nth(1) { 21 | None => return Err(From::from("expected 1 argument, but got none")), 22 | Some(arg) => arg.parse()?, 23 | }; 24 | 25 | // Build CSV readers and writers to stdin and stdout, respectively. 26 | // Note that we don't need to write headers explicitly. Since we're 27 | // serializing a custom struct, that's done for us automatically. 28 | let mut rdr = csv::Reader::from_reader(io::stdin()); 29 | let mut wtr = csv::Writer::from_writer(io::stdout()); 30 | 31 | // Iterate over all the records in `rdr`, and write only records containing 32 | // a population that is greater than or equal to `minimum_pop`. 33 | for result in rdr.deserialize() { 34 | // Remember that when deserializing, we must use a type hint to 35 | // indicate which type we want to deserialize our record into. 36 | let record: Record = result?; 37 | 38 | // `is_some_and` is a combinator on `Option`. It takes a closure that 39 | // returns `bool` when the `Option` is `Some`. When the `Option` is 40 | // `None`, `false` is always returned. In this case, we test it against 41 | // our minimum population count that we got from the command line. 42 | if record.population.is_some_and(|pop| pop >= minimum_pop) { 43 | wtr.serialize(record)?; 44 | } 45 | } 46 | 47 | // CSV writers use an internal buffer, so we should always flush when done. 48 | wtr.flush()?; 49 | Ok(()) 50 | } 51 | 52 | fn main() { 53 | if let Err(err) = run() { 54 | println!("{}", err); 55 | process::exit(1); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /csv-index/README.md: -------------------------------------------------------------------------------- 1 | csv-index 2 | ========= 3 | A collection of data structures for indexing CSV data, with a focus on data 4 | structures that can be easily serialized to and deserialized from disk. 5 | 6 | [![Linux build status](https://api.travis-ci.org/BurntSushi/rust-csv.png)](https://travis-ci.org/BurntSushi/rust-csv) 7 | [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/rust-csv?svg=true)](https://ci.appveyor.com/project/BurntSushi/rust-csv) 8 | [![](http://meritbadge.herokuapp.com/csv-index)](https://crates.io/crates/csv-index) 9 | 10 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). 11 | 12 | ### Documentation 13 | 14 | https://docs.rs/csv-index 15 | 16 | ### Usage 17 | 18 | Add this to your `Cargo.toml`: 19 | 20 | ```toml 21 | [dependencies] 22 | csv-index = "0.1.6" 23 | ``` 24 | 25 | ### Example: build a simple random access index 26 | 27 | The `RandomAccessSimple` index is a simple data structure that maps record 28 | indices to the byte offset corresponding to the start of that record in CSV 29 | data. This example shows how to save this index to disk for a particular CSV 30 | file. 31 | 32 | ```rust 33 | use std::error::Error; 34 | use std::fs::File; 35 | use std::io::{self, Write}; 36 | 37 | use csv_index::RandomAccessSimple; 38 | 39 | fn main() { 40 | example().unwrap(); 41 | } 42 | 43 | fn example() -> Result<(), Box> { 44 | // Open a normal CSV reader. 45 | let mut rdr = csv::Reader::from_path("data.csv")?; 46 | 47 | // Create an index for the CSV data in `data.csv` and write it 48 | // to `data.csv.idx`. 49 | let mut wtr = io::BufWriter::new(File::create("data.csv.idx")?); 50 | RandomAccessSimple::create(&mut rdr, &mut wtr)?; 51 | wtr.flush()?; 52 | 53 | // Open the index we just created, get the position of the last 54 | // record and seek the CSV reader to the last record. 55 | let mut idx = RandomAccessSimple::open(File::open("data.csv.idx")?)?; 56 | if idx.is_empty() { 57 | return Err(From::from("expected a non-empty CSV index")); 58 | } 59 | let last = idx.len() - 1; 60 | let pos = idx.get(last)?; 61 | rdr.seek(pos)?; 62 | 63 | // Read the next record. 64 | if let Some(result) = rdr.records().next() { 65 | let record = result?; 66 | println!("{:?}", record); 67 | Ok(()) 68 | } else { 69 | Err(From::from("expected at least one record but got none")) 70 | } 71 | } 72 | ``` 73 | -------------------------------------------------------------------------------- /csv-index/src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | The `csv-index` crate provides data structures for indexing CSV data. 3 | 4 | # Usage 5 | 6 | This crate is 7 | [on crates.io](https://crates.io/crates/csv-index) 8 | and can be used by adding `csv-index` to your dependencies in your project's 9 | `Cargo.toml` 10 | 11 | ```toml 12 | [dependencies] 13 | csv-index = "0.2" 14 | ``` 15 | 16 | # Example: build a simple random access index 17 | 18 | The `RandomAccessSimple` index is a simple data structure that maps record 19 | indices to the byte offset corresponding to the start of that record in CSV 20 | data. This example shows how to save this index to disk for a particular CSV 21 | file. 22 | 23 | Note that this indexing data structure cannot be updated. That means that if 24 | your CSV data has changed since the index was created, then the index will need 25 | to be regenerated. 26 | 27 | ```no_run 28 | use std::error::Error; 29 | use std::fs::File; 30 | use std::io::{self, Write}; 31 | use csv_index::RandomAccessSimple; 32 | 33 | # fn main() { example().unwrap(); } 34 | fn example() -> Result<(), Box> { 35 | // Open a normal CSV reader. 36 | let mut rdr = csv::Reader::from_path("data.csv")?; 37 | 38 | // Create an index for the CSV data in `data.csv` and write it 39 | // to `data.csv.idx`. 40 | let mut wtr = io::BufWriter::new(File::create("data.csv.idx")?); 41 | RandomAccessSimple::create(&mut rdr, &mut wtr)?; 42 | wtr.flush()?; 43 | 44 | // Open the index we just created, get the position of the last 45 | // record and seek the CSV reader to the last record. 46 | let mut idx = RandomAccessSimple::open(File::open("data.csv.idx")?)?; 47 | if idx.is_empty() { 48 | return Err(From::from("expected a non-empty CSV index")); 49 | } 50 | let last = idx.len() - 1; 51 | let pos = idx.get(last)?; 52 | rdr.seek(pos)?; 53 | 54 | // Read the next record. 55 | if let Some(result) = rdr.records().next() { 56 | let record = result?; 57 | println!("{:?}", record); 58 | Ok(()) 59 | } else { 60 | Err(From::from("expected at least one record but got none")) 61 | } 62 | } 63 | ``` 64 | 65 | # Future work 66 | 67 | The full scope of this crate hasn't been determined yet. For example, it's not 68 | clear whether this crate should support data structures more amenable to 69 | in-memory indexing. (Where the current set of indexing data structures are all 70 | amenable to serializing to disk.) 71 | */ 72 | 73 | #![deny(missing_docs)] 74 | 75 | pub use crate::simple::RandomAccessSimple; 76 | 77 | mod simple; 78 | -------------------------------------------------------------------------------- /src/debug.rs: -------------------------------------------------------------------------------- 1 | /// A type that provides a human readable debug impl for arbitrary bytes. 2 | /// 3 | /// This generally works best when the bytes are presumed to be mostly UTF-8, 4 | /// but will work for anything. 5 | /// 6 | /// N.B. This is copied nearly verbatim from regex-automata. Sigh. 7 | pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); 8 | 9 | impl<'a> core::fmt::Debug for Bytes<'a> { 10 | fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { 11 | write!(f, "\"")?; 12 | // This is a sad re-implementation of a similar impl found in bstr. 13 | let mut bytes = self.0; 14 | while let Some(result) = utf8_decode(bytes) { 15 | let ch = match result { 16 | Ok(ch) => ch, 17 | Err(byte) => { 18 | write!(f, r"\x{:02x}", byte)?; 19 | bytes = &bytes[1..]; 20 | continue; 21 | } 22 | }; 23 | bytes = &bytes[ch.len_utf8()..]; 24 | match ch { 25 | '\0' => write!(f, "\\0")?, 26 | // ASCII control characters except \0, \n, \r, \t 27 | '\x01'..='\x08' 28 | | '\x0b' 29 | | '\x0c' 30 | | '\x0e'..='\x19' 31 | | '\x7f' => { 32 | write!(f, "\\x{:02x}", u32::from(ch))?; 33 | } 34 | '\n' | '\r' | '\t' | _ => { 35 | write!(f, "{}", ch.escape_debug())?; 36 | } 37 | } 38 | } 39 | write!(f, "\"")?; 40 | Ok(()) 41 | } 42 | } 43 | 44 | /// Decodes the next UTF-8 encoded codepoint from the given byte slice. 45 | /// 46 | /// If no valid encoding of a codepoint exists at the beginning of the given 47 | /// byte slice, then the first byte is returned instead. 48 | /// 49 | /// This returns `None` if and only if `bytes` is empty. 50 | pub(crate) fn utf8_decode(bytes: &[u8]) -> Option> { 51 | fn len(byte: u8) -> Option { 52 | if byte <= 0x7F { 53 | Some(1) 54 | } else if byte & 0b1100_0000 == 0b1000_0000 { 55 | None 56 | } else if byte <= 0b1101_1111 { 57 | Some(2) 58 | } else if byte <= 0b1110_1111 { 59 | Some(3) 60 | } else if byte <= 0b1111_0111 { 61 | Some(4) 62 | } else { 63 | None 64 | } 65 | } 66 | 67 | if bytes.is_empty() { 68 | return None; 69 | } 70 | let len = match len(bytes[0]) { 71 | None => return Some(Err(bytes[0])), 72 | Some(len) if len > bytes.len() => return Some(Err(bytes[0])), 73 | Some(1) => return Some(Ok(char::from(bytes[0]))), 74 | Some(len) => len, 75 | }; 76 | match core::str::from_utf8(&bytes[..len]) { 77 | Ok(s) => Some(Ok(s.chars().next().unwrap())), 78 | Err(_) => Some(Err(bytes[0])), 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /examples/tutorial-perf-core-01.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, Read}; 2 | use std::process; 3 | 4 | use csv_core::{ReadFieldResult, Reader}; 5 | 6 | fn run(mut data: &[u8]) -> Option { 7 | let mut rdr = Reader::new(); 8 | 9 | // Count the number of records in Massachusetts. 10 | let mut count = 0; 11 | // Indicates the current field index. Reset to 0 at start of each record. 12 | let mut fieldidx = 0; 13 | // True when the current record is in the United States. 14 | let mut inus = false; 15 | // Buffer for field data. Must be big enough to hold the largest field. 16 | let mut field = [0; 1024]; 17 | loop { 18 | // Attempt to incrementally read the next CSV field. 19 | let (result, nread, nwrite) = rdr.read_field(data, &mut field); 20 | // nread is the number of bytes read from our input. We should never 21 | // pass those bytes to read_field again. 22 | data = &data[nread..]; 23 | // nwrite is the number of bytes written to the output buffer `field`. 24 | // The contents of the buffer after this point is unspecified. 25 | let field = &field[..nwrite]; 26 | 27 | match result { 28 | // We don't need to handle this case because we read all of the 29 | // data up front. If we were reading data incrementally, then this 30 | // would be a signal to read more. 31 | ReadFieldResult::InputEmpty => {} 32 | // If we get this case, then we found a field that contains more 33 | // than 1024 bytes. We keep this example simple and just fail. 34 | ReadFieldResult::OutputFull => { 35 | return None; 36 | } 37 | // This case happens when we've successfully read a field. If the 38 | // field is the last field in a record, then `record_end` is true. 39 | ReadFieldResult::Field { record_end } => { 40 | if fieldidx == 0 && field == b"us" { 41 | inus = true; 42 | } else if inus && fieldidx == 3 && field == b"MA" { 43 | count += 1; 44 | } 45 | if record_end { 46 | fieldidx = 0; 47 | inus = false; 48 | } else { 49 | fieldidx += 1; 50 | } 51 | } 52 | // This case happens when the CSV reader has successfully exhausted 53 | // all input. 54 | ReadFieldResult::End => { 55 | break; 56 | } 57 | } 58 | } 59 | Some(count) 60 | } 61 | 62 | fn main() { 63 | // Read the entire contents of stdin up front. 64 | let mut data = vec![]; 65 | if let Err(err) = io::stdin().read_to_end(&mut data) { 66 | println!("{}", err); 67 | process::exit(1); 68 | } 69 | match run(&data) { 70 | None => { 71 | println!("error: could not count records, buffer too small"); 72 | process::exit(1); 73 | } 74 | Some(count) => { 75 | println!("{}", count); 76 | } 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | on: 3 | pull_request: 4 | push: 5 | branches: 6 | - master 7 | schedule: 8 | - cron: '00 01 * * *' 9 | jobs: 10 | test: 11 | name: test 12 | runs-on: ${{ matrix.os }} 13 | strategy: 14 | matrix: 15 | # The docs seem to suggest that we can have a matrix with just an 16 | # include directive, but it results in a "matrix must define at least 17 | # one vector" error in the CI system. 18 | build: [pinned, stable, beta, nightly, macos, win-msvc, win-gnu] 19 | include: 20 | - build: pinned 21 | os: ubuntu-latest 22 | rust: 1.73.0 23 | - build: stable 24 | os: ubuntu-latest 25 | rust: stable 26 | - build: beta 27 | os: ubuntu-latest 28 | rust: beta 29 | - build: nightly 30 | os: ubuntu-latest 31 | rust: nightly 32 | - build: macos 33 | os: macos-latest 34 | rust: stable 35 | - build: win-msvc 36 | os: windows-latest 37 | rust: stable 38 | - build: win-gnu 39 | os: windows-latest 40 | rust: stable-x86_64-gnu 41 | steps: 42 | - name: Checkout repository 43 | uses: actions/checkout@v4 44 | - name: Install Rust 45 | uses: dtolnay/rust-toolchain@master 46 | with: 47 | toolchain: ${{ matrix.rust }} 48 | - run: cargo build --verbose 49 | - run: cargo doc --verbose 50 | - run: cargo test --verbose 51 | - run: cargo test --verbose --manifest-path csv-core/Cargo.toml 52 | - run: cargo test --verbose --manifest-path csv-index/Cargo.toml 53 | - if: matrix.build == 'nightly' 54 | run: cargo bench --verbose --no-run 55 | 56 | check-doc-sync: 57 | name: check tutorial and cookbook examples 58 | runs-on: ubuntu-latest 59 | steps: 60 | - name: Checkout repository 61 | uses: actions/checkout@v4 62 | - name: Install Rust 63 | uses: dtolnay/rust-toolchain@master 64 | with: 65 | toolchain: stable 66 | components: rustfmt 67 | - name: check that cookbook examples are up to date and in sync 68 | run: ci/check-copy cookbook 69 | - name: check that tutorial examples are up to date and in sync 70 | run: ci/check-copy tutorial 71 | 72 | rustfmt: 73 | runs-on: ubuntu-latest 74 | steps: 75 | - name: Checkout repository 76 | uses: actions/checkout@v4 77 | - name: Install Rust 78 | uses: dtolnay/rust-toolchain@master 79 | with: 80 | toolchain: stable 81 | components: rustfmt 82 | - name: Check formatting 83 | run: cargo fmt --all --check 84 | 85 | miri: 86 | name: miri 87 | runs-on: ubuntu-latest 88 | steps: 89 | - name: Checkout repository 90 | uses: actions/checkout@v4 91 | - name: Install Rust 92 | uses: dtolnay/rust-toolchain@miri 93 | - run: cargo miri test --lib --verbose 94 | env: 95 | MIRIFLAGS: -Zmiri-strict-provenance 96 | - run: cargo miri test --doc --verbose 97 | env: 98 | MIRIFLAGS: -Zmiri-strict-provenance 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | csv 2 | === 3 | A fast and flexible CSV reader and writer for Rust, with support for Serde. 4 | 5 | [![Build status](https://github.com/BurntSushi/rust-csv/workflows/ci/badge.svg)](https://github.com/BurntSushi/rust-csv/actions) 6 | [![crates.io](https://img.shields.io/crates/v/csv.svg)](https://crates.io/crates/csv) 7 | 8 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). 9 | 10 | 11 | ### Documentation 12 | 13 | https://docs.rs/csv 14 | 15 | If you're new to Rust, the 16 | [tutorial](https://docs.rs/csv/1.*/csv/tutorial/index.html) 17 | is a good place to start. 18 | 19 | 20 | ### Usage 21 | 22 | To bring this crate into your repository, either add `csv` to your 23 | `Cargo.toml`, or run `cargo add csv`. 24 | 25 | 26 | ### Example 27 | 28 | This example shows how to read CSV data from stdin and print each record to 29 | stdout. 30 | 31 | There are more examples in the 32 | [cookbook](https://docs.rs/csv/1.*/csv/cookbook/index.html). 33 | 34 | ```rust 35 | use std::{error::Error, io, process}; 36 | 37 | fn example() -> Result<(), Box> { 38 | // Build the CSV reader and iterate over each record. 39 | let mut rdr = csv::Reader::from_reader(io::stdin()); 40 | for result in rdr.records() { 41 | // The iterator yields Result, so we check the 42 | // error here. 43 | let record = result?; 44 | println!("{:?}", record); 45 | } 46 | Ok(()) 47 | } 48 | 49 | fn main() { 50 | if let Err(err) = example() { 51 | println!("error running example: {}", err); 52 | process::exit(1); 53 | } 54 | } 55 | ``` 56 | 57 | The above example can be run like so: 58 | 59 | ```text 60 | $ git clone git://github.com/BurntSushi/rust-csv 61 | $ cd rust-csv 62 | $ cargo run --example cookbook-read-basic < examples/data/smallpop.csv 63 | ``` 64 | 65 | ### Example with Serde 66 | 67 | This example shows how to read CSV data from stdin into your own custom struct. 68 | By default, the member names of the struct are matched with the values in the 69 | header record of your CSV data. 70 | 71 | ```rust 72 | use std::{error::Error, io, process}; 73 | 74 | #[derive(Debug, serde::Deserialize)] 75 | struct Record { 76 | city: String, 77 | region: String, 78 | country: String, 79 | population: Option, 80 | } 81 | 82 | fn example() -> Result<(), Box> { 83 | let mut rdr = csv::Reader::from_reader(io::stdin()); 84 | for result in rdr.deserialize() { 85 | // Notice that we need to provide a type hint for automatic 86 | // deserialization. 87 | let record: Record = result?; 88 | println!("{:?}", record); 89 | } 90 | Ok(()) 91 | } 92 | 93 | fn main() { 94 | if let Err(err) = example() { 95 | println!("error running example: {}", err); 96 | process::exit(1); 97 | } 98 | } 99 | ``` 100 | 101 | The above example can be run like so: 102 | 103 | ``` 104 | $ git clone git://github.com/BurntSushi/rust-csv 105 | $ cd rust-csv 106 | $ cargo run --example cookbook-read-serde < examples/data/smallpop.csv 107 | ``` 108 | -------------------------------------------------------------------------------- /csv-core/benches/bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use test::Bencher; 6 | 7 | use csv_core::{Reader, ReaderBuilder}; 8 | 9 | static NFL: &'static str = include_str!("../../examples/data/bench/nfl.csv"); 10 | static GAME: &'static str = include_str!("../../examples/data/bench/game.csv"); 11 | static POP: &'static str = 12 | include_str!("../../examples/data/bench/worldcitiespop.csv"); 13 | static MBTA: &'static str = 14 | include_str!("../../examples/data/bench/gtfs-mbta-stop-times.csv"); 15 | 16 | macro_rules! bench { 17 | ($name:ident, $data:ident, $counter:ident, $result:expr) => { 18 | bench!($name, $data, $counter, $result, false); 19 | }; 20 | ($name:ident, $data:ident, $counter:ident, $result:expr, NFA) => { 21 | bench!($name, $data, $counter, $result, true); 22 | }; 23 | ($name:ident, $data:ident, $counter:ident, $result:expr, $nfa:expr) => { 24 | #[bench] 25 | fn $name(b: &mut Bencher) { 26 | let data = $data.as_bytes(); 27 | b.bytes = data.len() as u64; 28 | let mut rdr = ReaderBuilder::new().nfa($nfa).build(); 29 | b.iter(|| { 30 | rdr.reset(); 31 | assert_eq!($counter(&mut rdr, data), $result); 32 | }) 33 | } 34 | }; 35 | } 36 | 37 | bench!(count_nfl_field_copy_dfa, NFL, count_fields, 130000); 38 | bench!(count_nfl_field_copy_nfa, NFL, count_fields, 130000, NFA); 39 | bench!(count_nfl_record_copy_dfa, NFL, count_records, 10000); 40 | bench!(count_nfl_record_copy_nfa, NFL, count_records, 10000, NFA); 41 | 42 | bench!(count_game_field_copy_dfa, GAME, count_fields, 600000); 43 | bench!(count_game_field_copy_nfa, GAME, count_fields, 600000, NFA); 44 | bench!(count_game_record_copy_dfa, GAME, count_records, 100000); 45 | bench!(count_game_record_copy_nfa, GAME, count_records, 100000, NFA); 46 | 47 | bench!(count_pop_field_copy_dfa, POP, count_fields, 140007); 48 | bench!(count_pop_field_copy_nfa, POP, count_fields, 140007, NFA); 49 | bench!(count_pop_record_copy_dfa, POP, count_records, 20001); 50 | bench!(count_pop_record_copy_nfa, POP, count_records, 20001, NFA); 51 | 52 | bench!(count_mbta_field_copy_dfa, MBTA, count_fields, 90000); 53 | bench!(count_mbta_field_copy_nfa, MBTA, count_fields, 90000, NFA); 54 | bench!(count_mbta_record_copy_dfa, MBTA, count_records, 10000); 55 | bench!(count_mbta_record_copy_nfa, MBTA, count_records, 10000, NFA); 56 | 57 | fn count_fields(rdr: &mut Reader, mut data: &[u8]) -> u64 { 58 | use csv_core::ReadFieldResult::*; 59 | 60 | let mut count = 0; 61 | let mut field = [0u8; 1024]; 62 | loop { 63 | let (res, nin, _) = rdr.read_field(data, &mut field); 64 | data = &data[nin..]; 65 | match res { 66 | InputEmpty => {} 67 | OutputFull => panic!("field too large"), 68 | Field { .. } => { 69 | count += 1; 70 | } 71 | End => break, 72 | } 73 | } 74 | count 75 | } 76 | 77 | fn count_records(rdr: &mut Reader, mut data: &[u8]) -> u64 { 78 | use csv_core::ReadRecordResult::*; 79 | 80 | let mut count = 0; 81 | let mut record = [0; 8192]; 82 | let mut ends = [0; 32]; 83 | loop { 84 | let (res, nin, _, _) = rdr.read_record(data, &mut record, &mut ends); 85 | data = &data[nin..]; 86 | match res { 87 | InputEmpty => {} 88 | OutputFull | OutputEndsFull => panic!("field too large"), 89 | Record => count += 1, 90 | End => break, 91 | } 92 | } 93 | count 94 | } 95 | -------------------------------------------------------------------------------- /csv-core/README.md: -------------------------------------------------------------------------------- 1 | csv-core 2 | ======== 3 | A fast CSV reader and write for use in a `no_std` context. This crate will 4 | never use the Rust standard library. 5 | 6 | [![Linux build status](https://api.travis-ci.org/BurntSushi/rust-csv.png)](https://travis-ci.org/BurntSushi/rust-csv) 7 | [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/rust-csv?svg=true)](https://ci.appveyor.com/project/BurntSushi/rust-csv) 8 | [![](http://meritbadge.herokuapp.com/csv-core)](https://crates.io/crates/csv-core) 9 | 10 | Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). 11 | 12 | ### Documentation 13 | 14 | https://docs.rs/csv-core 15 | 16 | ### Usage 17 | 18 | Add this to your `Cargo.toml`: 19 | 20 | ```toml 21 | [dependencies] 22 | csv-core = "0.1.6" 23 | ``` 24 | 25 | ### Build features 26 | 27 | This crate by default links with `libc`, which is done via the `libc` feature. 28 | Disabling this feature will drop `csv-core`'s dependency on `libc`. 29 | 30 | 31 | ### Example: reading CSV 32 | 33 | This example shows how to count the number of fields and records in CSV data. 34 | 35 | ```rust 36 | use csv_core::{Reader, ReadFieldResult}; 37 | 38 | let data = " 39 | foo,bar,baz 40 | a,b,c 41 | xxx,yyy,zzz 42 | "; 43 | 44 | let mut rdr = Reader::new(); 45 | let mut bytes = data.as_bytes(); 46 | let mut count_fields = 0; 47 | let mut count_records = 0; 48 | loop { 49 | // We skip handling the output since we don't need it for counting. 50 | let (result, nin, _) = rdr.read_field(bytes, &mut [0; 1024]); 51 | bytes = &bytes[nin..]; 52 | match result { 53 | ReadFieldResult::InputEmpty => {}, 54 | ReadFieldResult::OutputFull => panic!("field too large"), 55 | ReadFieldResult::Field { record_end } => { 56 | count_fields += 1; 57 | if record_end { 58 | count_records += 1; 59 | } 60 | } 61 | ReadFieldResult::End => break, 62 | } 63 | } 64 | assert_eq!(3, count_records); 65 | assert_eq!(9, count_fields); 66 | ``` 67 | 68 | 69 | ### Example: writing CSV 70 | 71 | This example shows how to use the `Writer` API to write valid CSV data. Proper 72 | quoting is handled automatically. 73 | 74 | ```rust 75 | use csv_core::Writer; 76 | 77 | // This is where we'll write out CSV data. 78 | let mut out = &mut [0; 1024]; 79 | // The number of bytes we've written to `out`. 80 | let mut nout = 0; 81 | // Create a CSV writer with a default configuration. 82 | let mut wtr = Writer::new(); 83 | 84 | // Write a single field. Note that we ignore the `WriteResult` and the number 85 | // of input bytes consumed since we're doing this by hand. 86 | let (_, _, n) = wtr.field(&b"foo"[..], &mut out[nout..]); 87 | nout += n; 88 | 89 | // Write a delimiter and then another field that requires quotes. 90 | let (_, n) = wtr.delimiter(&mut out[nout..]); 91 | nout += n; 92 | let (_, _, n) = wtr.field(&b"bar,baz"[..], &mut out[nout..]); 93 | nout += n; 94 | let (_, n) = wtr.terminator(&mut out[nout..]); 95 | nout += n; 96 | 97 | // Now write another record. 98 | let (_, _, n) = wtr.field(&b"a \"b\" c"[..], &mut out[nout..]); 99 | nout += n; 100 | let (_, n) = wtr.delimiter(&mut out[nout..]); 101 | nout += n; 102 | let (_, _, n) = wtr.field(&b"quux"[..], &mut out[nout..]); 103 | nout += n; 104 | 105 | // We must always call finish once done writing. 106 | // This ensures that any closing quotes are written. 107 | let (_, n) = wtr.finish(&mut out[nout..]); 108 | nout += n; 109 | 110 | assert_eq!(&out[..nout], &b"\ 111 | foo,\"bar,baz\" 112 | \"a \"\"b\"\" c\",quux"[..]); 113 | ``` 114 | -------------------------------------------------------------------------------- /examples/data/uspop.csv: -------------------------------------------------------------------------------- 1 | City,State,Population,Latitude,Longitude 2 | Davidsons Landing,AK,,65.2419444,-165.2716667 3 | Kenai,AK,7610,60.5544444,-151.2583333 4 | Oakman,AL,,33.7133333,-87.3886111 5 | Richards Crossroads,AL,,31.7369444,-85.2644444 6 | Sandfort,AL,,32.3380556,-85.2233333 7 | Selma,AL,18980,32.4072222,-87.0211111 8 | Shadow Oaks Addition,AR,,34.9555556,-91.9475000 9 | Summerville,AR,,33.5202778,-92.3555556 10 | El Mirage,AZ,32308,33.6130556,-112.3238889 11 | Willow Springs,AZ,,36.1894444,-111.3930556 12 | Colton,CA,52335,34.0738889,-117.3127778 13 | Fontana,CA,169160,34.0922222,-117.4341667 14 | Fountain Valley,CA,56133,33.7091667,-117.9527778 15 | Kings Beach,CA,,39.2377778,-120.0255556 16 | Milpitas,CA,62636,37.4283333,-121.9055556 17 | Mokelumne City,CA,,38.2530556,-121.4380556 18 | Mount Eden,CA,,37.6361111,-122.0988889 19 | San Clemente,CA,62272,33.4269444,-117.6111111 20 | Seal Beach,CA,24404,33.7413889,-118.1038889 21 | West Hollywood,CA,37031,34.0900000,-118.3608333 22 | Bridgeport,CT,139090,41.1669444,-73.2052778 23 | Orange,CT,13860,41.2783333,-73.0261111 24 | Azalea Park,FL,12347,28.5408333,-81.3008333 25 | Bratt,FL,,30.9655556,-87.4275000 26 | Cutler Ridge,FL,26831,25.5802778,-80.3469444 27 | Dunn Creek,FL,,30.4861111,-81.5908333 28 | South Daytona,FL,14451,29.1655556,-81.0047222 29 | Brickhouse,GA,,33.7750000,-82.8108333 30 | Lakeview Heights,GA,,33.6188889,-84.4505556 31 | Perry,GA,11234,32.4580556,-83.7316667 32 | Roswell,GA,77218,34.0230556,-84.3616667 33 | Warfield,GA,,33.2994444,-83.3838889 34 | Kirkman,IA,,41.7286111,-95.2650000 35 | Travers,ID,,42.6091667,-113.7361111 36 | Calhoun,IL,,38.6502778,-88.0436111 37 | Cleone,IL,,39.4230556,-87.9075000 38 | Deerfield,IL,19618,42.1711111,-87.8444444 39 | Highbank Town,IN,,38.5144444,-87.1502778 40 | Indianapolis,IN,773283,39.7683333,-86.1580556 41 | Leona,KS,,39.7872222,-95.3213889 42 | New Salem,KS,,37.3105556,-96.8950000 43 | Flint Springs,KY,,37.3433333,-86.7136111 44 | Harvey,LA,22383,29.9033333,-90.0772222 45 | Jennings,LA,10547,30.2222222,-92.6569444 46 | King,LA,,32.2405556,-91.1213889 47 | Opelousas,LA,22835,30.5333333,-92.0813889 48 | Reading,MA,23441,42.5255556,-71.0958333 49 | Mount Airy,MD,8714,39.3761111,-77.1550000 50 | Auburn,ME,23488,44.0977778,-70.2316667 51 | Ellsworth,ME,7055,44.5433333,-68.4200000 52 | Sturgis,MI,11081,41.7991667,-85.4191667 53 | Brooklyn Center,MN,27718,45.0761111,-93.3325000 54 | Coon Rapids,MN,62528,45.1200000,-93.2875000 55 | Moark,MO,,36.3825000,-89.9888889 56 | Owens,MO,,37.2188889,-92.4027778 57 | Natchez,MS,17118,31.5602778,-91.4030556 58 | Rogers,NE,,41.4652778,-96.9147222 59 | Hollis,NH,7711,42.7430556,-71.5922222 60 | Bayonne,NJ,59878,40.6686111,-74.1147222 61 | Belleville,NJ,36878,40.7936111,-74.1505556 62 | Frenchtown,NJ,,40.5261111,-75.0619444 63 | Sharp,NJ,,40.0922222,-74.7427778 64 | Los Ranchos de Albuquerque,NM,5184,35.1619444,-106.6422222 65 | Deerhead,NY,,44.3522222,-73.5436111 66 | Howland,NY,,43.0791667,-76.6827778 67 | Lake Grove,NY,10715,40.8527778,-73.1155556 68 | Penfield Center,NY,,43.1672222,-77.4313889 69 | Comet,OH,,39.1158333,-82.5511111 70 | Little Mountain,OH,,41.6402778,-81.2819444 71 | Mason,OH,30988,39.3600000,-84.3100000 72 | Siverly,OH,,39.3488889,-82.5000000 73 | Gladstone,OR,12249,45.3808333,-122.5936111 74 | Gresham,OR,98851,45.4983333,-122.4302778 75 | Ephrata,PA,13182,40.1797222,-76.1791667 76 | Mount Airy,PA,,41.0941667,-79.5222222 77 | Uhlerstown,PA,,40.5252778,-75.0736111 78 | Weis Library,PA,,42.0483333,-80.1700000 79 | Woodcock,PA,,41.7547222,-80.0858333 80 | Rock Hill,SC,59766,34.9247222,-81.0252778 81 | Summerville,SC,34958,33.0183333,-80.1758333 82 | Wolfton,SC,,33.5883333,-80.9819444 83 | Avenger Village,TX,,32.4594444,-100.4552778 84 | Brashear,TX,,33.1186111,-95.7333333 85 | Dumas Junction,TX,,35.2127778,-101.8019444 86 | Edinburg,TX,60509,26.3013889,-98.1630556 87 | Eichelberger Crossing,TX,,31.6166667,-97.3077778 88 | Euless,TX,53221,32.8369444,-97.0816667 89 | Greenock,TX,,31.7661111,-97.3452778 90 | Greenville,TX,25382,33.1383333,-96.1105556 91 | Highland Village,TX,15365,33.0916667,-97.0463889 92 | Maxey Town,TX,,31.4433333,-94.1225000 93 | Pharr,TX,60687,26.1944444,-98.1833333 94 | Snyder,TX,10600,32.7177778,-100.9172222 95 | Webster,TX,9038,29.5375000,-95.1180556 96 | Wild Peach Village,TX,,29.0833333,-95.6336111 97 | Misty Hills Numbers 1-7,UT,,40.6416667,-111.9955556 98 | Pleasant Grove,UT,24449,40.3641667,-111.7377778 99 | Rio Vista,VA,,37.5688889,-77.5230556 100 | Tabernacle,VA,,37.4230556,-76.2966667 101 | Cody,WY,9161,44.5263889,-109.0558333 102 | -------------------------------------------------------------------------------- /examples/data/uspop-null.csv: -------------------------------------------------------------------------------- 1 | City,State,Population,Latitude,Longitude 2 | Davidsons Landing,AK,,65.2419444,-165.2716667 3 | Kenai,AK,7610,60.5544444,-151.2583333 4 | Oakman,AL,,33.7133333,-87.3886111 5 | Richards Crossroads,AL,,31.7369444,-85.2644444 6 | Sandfort,AL,,32.3380556,-85.2233333 7 | Selma,AL,18980,32.4072222,-87.0211111 8 | Shadow Oaks Addition,AR,,34.9555556,-91.9475000 9 | Summerville,AR,,33.5202778,-92.3555556 10 | El Mirage,AZ,32308,33.6130556,-112.3238889 11 | Willow Springs,AZ,,36.1894444,-111.3930556 12 | Colton,CA,52335,34.0738889,-117.3127778 13 | Fontana,CA,169160,34.0922222,-117.4341667 14 | Fountain Valley,CA,56133,33.7091667,-117.9527778 15 | Kings Beach,CA,,39.2377778,-120.0255556 16 | Milpitas,CA,62636,37.4283333,-121.9055556 17 | Mokelumne City,CA,,38.2530556,-121.4380556 18 | Mount Eden,CA,,37.6361111,-122.0988889 19 | San Clemente,CA,62272,33.4269444,-117.6111111 20 | Seal Beach,CA,24404,33.7413889,-118.1038889 21 | West Hollywood,CA,37031,34.0900000,-118.3608333 22 | Bridgeport,CT,139090,41.1669444,-73.2052778 23 | Orange,CT,13860,41.2783333,-73.0261111 24 | Azalea Park,FL,12347,28.5408333,-81.3008333 25 | Bratt,FL,,30.9655556,-87.4275000 26 | Cutler Ridge,FL,26831,25.5802778,-80.3469444 27 | Dunn Creek,FL,,30.4861111,-81.5908333 28 | South Daytona,FL,14451,29.1655556,-81.0047222 29 | Brickhouse,GA,,33.7750000,-82.8108333 30 | Lakeview Heights,GA,,33.6188889,-84.4505556 31 | Perry,GA,11234,32.4580556,-83.7316667 32 | Roswell,GA,77218,34.0230556,-84.3616667 33 | Warfield,GA,,33.2994444,-83.3838889 34 | Kirkman,IA,,41.7286111,-95.2650000 35 | Travers,ID,,42.6091667,-113.7361111 36 | Calhoun,IL,,38.6502778,-88.0436111 37 | Cleone,IL,,39.4230556,-87.9075000 38 | Deerfield,IL,19618,42.1711111,-87.8444444 39 | Highbank Town,IN,,38.5144444,-87.1502778 40 | Indianapolis,IN,773283,39.7683333,-86.1580556 41 | Leona,KS,,39.7872222,-95.3213889 42 | New Salem,KS,,37.3105556,-96.8950000 43 | Flint Springs,KY,NULL,37.3433333,-86.7136111 44 | Harvey,LA,22383,29.9033333,-90.0772222 45 | Jennings,LA,10547,30.2222222,-92.6569444 46 | King,LA,,32.2405556,-91.1213889 47 | Opelousas,LA,22835,30.5333333,-92.0813889 48 | Reading,MA,23441,42.5255556,-71.0958333 49 | Mount Airy,MD,8714,39.3761111,-77.1550000 50 | Auburn,ME,23488,44.0977778,-70.2316667 51 | Ellsworth,ME,7055,44.5433333,-68.4200000 52 | Sturgis,MI,11081,41.7991667,-85.4191667 53 | Brooklyn Center,MN,27718,45.0761111,-93.3325000 54 | Coon Rapids,MN,62528,45.1200000,-93.2875000 55 | Moark,MO,NULL,36.3825000,-89.9888889 56 | Owens,MO,,37.2188889,-92.4027778 57 | Natchez,MS,17118,31.5602778,-91.4030556 58 | Rogers,NE,,41.4652778,-96.9147222 59 | Hollis,NH,7711,42.7430556,-71.5922222 60 | Bayonne,NJ,59878,40.6686111,-74.1147222 61 | Belleville,NJ,36878,40.7936111,-74.1505556 62 | Frenchtown,NJ,NULL,40.5261111,-75.0619444 63 | Sharp,NJ,,40.0922222,-74.7427778 64 | Los Ranchos de Albuquerque,NM,5184,35.1619444,-106.6422222 65 | Deerhead,NY,,44.3522222,-73.5436111 66 | Howland,NY,,43.0791667,-76.6827778 67 | Lake Grove,NY,10715,40.8527778,-73.1155556 68 | Penfield Center,NY,,43.1672222,-77.4313889 69 | Comet,OH,,39.1158333,-82.5511111 70 | Little Mountain,OH,,41.6402778,-81.2819444 71 | Mason,OH,30988,39.3600000,-84.3100000 72 | Siverly,OH,,39.3488889,-82.5000000 73 | Gladstone,OR,12249,45.3808333,-122.5936111 74 | Gresham,OR,98851,45.4983333,-122.4302778 75 | Ephrata,PA,13182,40.1797222,-76.1791667 76 | Mount Airy,PA,,41.0941667,-79.5222222 77 | Uhlerstown,PA,,40.5252778,-75.0736111 78 | Weis Library,PA,,42.0483333,-80.1700000 79 | Woodcock,PA,,41.7547222,-80.0858333 80 | Rock Hill,SC,59766,34.9247222,-81.0252778 81 | Summerville,SC,34958,33.0183333,-80.1758333 82 | Wolfton,SC,,33.5883333,-80.9819444 83 | Avenger Village,TX,,32.4594444,-100.4552778 84 | Brashear,TX,,33.1186111,-95.7333333 85 | Dumas Junction,TX,,35.2127778,-101.8019444 86 | Edinburg,TX,60509,26.3013889,-98.1630556 87 | Eichelberger Crossing,TX,,31.6166667,-97.3077778 88 | Euless,TX,53221,32.8369444,-97.0816667 89 | Greenock,TX,,31.7661111,-97.3452778 90 | Greenville,TX,25382,33.1383333,-96.1105556 91 | Highland Village,TX,15365,33.0916667,-97.0463889 92 | Maxey Town,TX,,31.4433333,-94.1225000 93 | Pharr,TX,60687,26.1944444,-98.1833333 94 | Snyder,TX,10600,32.7177778,-100.9172222 95 | Webster,TX,9038,29.5375000,-95.1180556 96 | Wild Peach Village,TX,,29.0833333,-95.6336111 97 | Misty Hills Numbers 1-7,UT,,40.6416667,-111.9955556 98 | Pleasant Grove,UT,24449,40.3641667,-111.7377778 99 | Rio Vista,VA,,37.5688889,-77.5230556 100 | Tabernacle,VA,,37.4230556,-76.2966667 101 | Cody,WY,9161,44.5263889,-109.0558333 102 | -------------------------------------------------------------------------------- /csv-core/src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | `csv-core` provides a fast CSV reader and writer for use in a `no_std` context. 3 | 4 | This crate will never use the standard library. `no_std` support is therefore 5 | enabled by default. 6 | 7 | If you're looking for more ergonomic CSV parsing routines, please use the 8 | [`csv`](https://docs.rs/csv) crate. 9 | 10 | # Overview 11 | 12 | This crate has two primary APIs. The `Reader` API provides a CSV parser, and 13 | the `Writer` API provides a CSV writer. 14 | 15 | # Example: reading CSV 16 | 17 | This example shows how to count the number of fields and records in CSV data. 18 | 19 | ``` 20 | use csv_core::{Reader, ReadFieldResult}; 21 | 22 | let data = " 23 | foo,bar,baz 24 | a,b,c 25 | xxx,yyy,zzz 26 | "; 27 | 28 | let mut rdr = Reader::new(); 29 | let mut bytes = data.as_bytes(); 30 | let mut count_fields = 0; 31 | let mut count_records = 0; 32 | loop { 33 | // We skip handling the output since we don't need it for counting. 34 | let (result, nin, _) = rdr.read_field(bytes, &mut [0; 1024]); 35 | bytes = &bytes[nin..]; 36 | match result { 37 | ReadFieldResult::InputEmpty => {}, 38 | ReadFieldResult::OutputFull => panic!("field too large"), 39 | ReadFieldResult::Field { record_end } => { 40 | count_fields += 1; 41 | if record_end { 42 | count_records += 1; 43 | } 44 | } 45 | ReadFieldResult::End => break, 46 | } 47 | } 48 | assert_eq!(3, count_records); 49 | assert_eq!(9, count_fields); 50 | ``` 51 | 52 | # Example: writing CSV 53 | 54 | This example shows how to use the `Writer` API to write valid CSV data. Proper 55 | quoting is handled automatically. 56 | 57 | ``` 58 | use csv_core::Writer; 59 | 60 | // This is where we'll write out CSV data. 61 | let mut out = &mut [0; 1024]; 62 | // The number of bytes we've written to `out`. 63 | let mut nout = 0; 64 | // Create a CSV writer with a default configuration. 65 | let mut wtr = Writer::new(); 66 | 67 | // Write a single field. Note that we ignore the `WriteResult` and the number 68 | // of input bytes consumed since we're doing this by hand. 69 | let (_, _, n) = wtr.field(&b"foo"[..], &mut out[nout..]); 70 | nout += n; 71 | 72 | // Write a delimiter and then another field that requires quotes. 73 | let (_, n) = wtr.delimiter(&mut out[nout..]); 74 | nout += n; 75 | let (_, _, n) = wtr.field(&b"bar,baz"[..], &mut out[nout..]); 76 | nout += n; 77 | let (_, n) = wtr.terminator(&mut out[nout..]); 78 | nout += n; 79 | 80 | // Now write another record. 81 | let (_, _, n) = wtr.field(&b"a \"b\" c"[..], &mut out[nout..]); 82 | nout += n; 83 | let (_, n) = wtr.delimiter(&mut out[nout..]); 84 | nout += n; 85 | let (_, _, n) = wtr.field(&b"quux"[..], &mut out[nout..]); 86 | nout += n; 87 | 88 | // We must always call finish once done writing. 89 | // This ensures that any closing quotes are written. 90 | let (_, n) = wtr.finish(&mut out[nout..]); 91 | nout += n; 92 | 93 | assert_eq!(&out[..nout], &b"\ 94 | foo,\"bar,baz\" 95 | \"a \"\"b\"\" c\",quux"[..]); 96 | ``` 97 | */ 98 | 99 | #![deny(missing_docs)] 100 | #![no_std] 101 | 102 | pub use crate::reader::{ 103 | ReadFieldNoCopyResult, ReadFieldResult, ReadRecordNoCopyResult, 104 | ReadRecordResult, Reader, ReaderBuilder, 105 | }; 106 | pub use crate::writer::{ 107 | is_non_numeric, quote, WriteResult, Writer, WriterBuilder, 108 | }; 109 | 110 | mod reader; 111 | mod writer; 112 | 113 | /// A record terminator. 114 | /// 115 | /// Use this to specify the record terminator while parsing CSV. The default is 116 | /// CRLF, which treats `\r`, `\n` or `\r\n` as a single record terminator. 117 | #[derive(Clone, Copy, Debug, Default)] 118 | #[non_exhaustive] 119 | pub enum Terminator { 120 | /// Parses `\r`, `\n` or `\r\n` as a single record terminator. 121 | #[default] 122 | CRLF, 123 | /// Parses the byte given as a record terminator. 124 | Any(u8), 125 | } 126 | 127 | impl Terminator { 128 | /// Checks whether the terminator is set to CRLF. 129 | fn is_crlf(&self) -> bool { 130 | match *self { 131 | Terminator::CRLF => true, 132 | Terminator::Any(_) => false, 133 | } 134 | } 135 | 136 | fn equals(&self, other: u8) -> bool { 137 | match *self { 138 | Terminator::CRLF => other == b'\r' || other == b'\n', 139 | Terminator::Any(b) => other == b, 140 | } 141 | } 142 | } 143 | 144 | /// The quoting style to use when writing CSV data. 145 | #[derive(Clone, Copy, Debug, Default)] 146 | #[non_exhaustive] 147 | pub enum QuoteStyle { 148 | /// This puts quotes around every field. Always. 149 | Always, 150 | /// This puts quotes around fields only when necessary. 151 | /// 152 | /// They are necessary when fields contain a quote, delimiter or record 153 | /// terminator. Quotes are also necessary when writing an empty record 154 | /// (which is indistinguishable from a record with one empty field). 155 | /// 156 | /// This is the default. 157 | #[default] 158 | Necessary, 159 | /// This puts quotes around all fields that are non-numeric. Namely, when 160 | /// writing a field that does not parse as a valid float or integer, then 161 | /// quotes will be used even if they aren't strictly necessary. 162 | NonNumeric, 163 | /// This *never* writes quotes, even if it would produce invalid CSV data. 164 | Never, 165 | } 166 | -------------------------------------------------------------------------------- /src/cookbook.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | A cookbook of examples for CSV reading and writing. 3 | 4 | # List of examples 5 | 6 | This is a list of examples that follow. Each of them can be found in the 7 | `examples` directory of the 8 | [`rust-csv`](https://github.com/BurntSushi/rust-csv) 9 | repository. 10 | 11 | For **reading** CSV: 12 | 13 | 1. [Basic](#reading-basic) 14 | 2. [With Serde](#reading-with-serde) 15 | 3. [Setting a different delimiter](#reading-setting-a-different-delimiter) 16 | 4. [Without headers](#reading-without-headers) 17 | 18 | For **writing** CSV: 19 | 20 | 5. [Basic](#writing-basic) 21 | 6. [With Serde](#writing-with-serde) 22 | 23 | Please 24 | [submit a pull request](https://github.com/BurntSushi/rust-csv/pulls) 25 | if you're interested in adding an example to this list! 26 | 27 | # Reading: basic 28 | 29 | This example shows how to read CSV data from stdin and print each record to 30 | stdout. 31 | 32 | ```no_run 33 | # //cookbook-read-basic.rs 34 | use std::{error::Error, io, process}; 35 | 36 | fn example() -> Result<(), Box> { 37 | // Build the CSV reader and iterate over each record. 38 | let mut rdr = csv::Reader::from_reader(io::stdin()); 39 | for result in rdr.records() { 40 | // The iterator yields Result, so we check the 41 | // error here.. 42 | let record = result?; 43 | println!("{:?}", record); 44 | } 45 | Ok(()) 46 | } 47 | 48 | fn main() { 49 | if let Err(err) = example() { 50 | println!("error running example: {}", err); 51 | process::exit(1); 52 | } 53 | } 54 | ``` 55 | 56 | The above example can be run like so: 57 | 58 | ```ignore 59 | $ git clone git://github.com/BurntSushi/rust-csv 60 | $ cd rust-csv 61 | $ cargo run --example cookbook-read-basic < examples/data/smallpop.csv 62 | ``` 63 | 64 | # Reading: with Serde 65 | 66 | This is like the previous example, except it shows how to deserialize each 67 | record into a struct type that you define. 68 | 69 | For more examples and details on how Serde deserialization works, see the 70 | [`Reader::deserialize`](../struct.Reader.html#method.deserialize) 71 | method. 72 | 73 | ```no_run 74 | # //cookbook-read-serde.rs 75 | # #![allow(dead_code)] 76 | use std::{error::Error, io, process}; 77 | 78 | use serde::Deserialize; 79 | 80 | // By default, struct field names are deserialized based on the position of 81 | // a corresponding field in the CSV data's header record. 82 | #[derive(Debug, Deserialize)] 83 | struct Record { 84 | city: String, 85 | region: String, 86 | country: String, 87 | population: Option, 88 | } 89 | 90 | fn example() -> Result<(), Box> { 91 | let mut rdr = csv::Reader::from_reader(io::stdin()); 92 | for result in rdr.deserialize() { 93 | // Notice that we need to provide a type hint for automatic 94 | // deserialization. 95 | let record: Record = result?; 96 | println!("{:?}", record); 97 | } 98 | Ok(()) 99 | } 100 | 101 | fn main() { 102 | if let Err(err) = example() { 103 | println!("error running example: {}", err); 104 | process::exit(1); 105 | } 106 | } 107 | ``` 108 | 109 | The above example can be run like so: 110 | 111 | ```ignore 112 | $ git clone git://github.com/BurntSushi/rust-csv 113 | $ cd rust-csv 114 | $ cargo run --example cookbook-read-serde < examples/data/smallpop.csv 115 | ``` 116 | 117 | # Reading: setting a different delimiter 118 | 119 | This example shows how to read CSV data from stdin where fields are separated 120 | by `:` instead of `,`. 121 | 122 | ```no_run 123 | # //cookbook-read-colon.rs 124 | use std::{error::Error, io, process}; 125 | 126 | fn example() -> Result<(), Box> { 127 | let mut rdr = csv::ReaderBuilder::new() 128 | .delimiter(b':') 129 | .from_reader(io::stdin()); 130 | for result in rdr.records() { 131 | let record = result?; 132 | println!("{:?}", record); 133 | } 134 | Ok(()) 135 | } 136 | 137 | fn main() { 138 | if let Err(err) = example() { 139 | println!("error running example: {}", err); 140 | process::exit(1); 141 | } 142 | } 143 | ``` 144 | 145 | The above example can be run like so: 146 | 147 | ```ignore 148 | $ git clone git://github.com/BurntSushi/rust-csv 149 | $ cd rust-csv 150 | $ cargo run --example cookbook-read-colon < examples/data/smallpop-colon.csv 151 | ``` 152 | 153 | # Reading: without headers 154 | 155 | The CSV reader in this crate assumes that CSV data has a header record by 156 | default, but the setting can be toggled. When enabled, the first record in 157 | CSV data in interpreted as the header record and is skipped. When disabled, the 158 | first record is not skipped. This example shows how to disable that setting. 159 | 160 | ```no_run 161 | # //cookbook-read-no-headers.rs 162 | use std::{error::Error, io, process}; 163 | 164 | fn example() -> Result<(), Box> { 165 | let mut rdr = csv::ReaderBuilder::new() 166 | .has_headers(false) 167 | .from_reader(io::stdin()); 168 | for result in rdr.records() { 169 | let record = result?; 170 | println!("{:?}", record); 171 | } 172 | Ok(()) 173 | } 174 | 175 | fn main() { 176 | if let Err(err) = example() { 177 | println!("error running example: {}", err); 178 | process::exit(1); 179 | } 180 | } 181 | ``` 182 | 183 | The above example can be run like so: 184 | 185 | ```ignore 186 | $ git clone git://github.com/BurntSushi/rust-csv 187 | $ cd rust-csv 188 | $ cargo run --example cookbook-read-no-headers < examples/data/smallpop-no-headers.csv 189 | ``` 190 | 191 | # Writing: basic 192 | 193 | This example shows how to write CSV data to stdout. 194 | 195 | ```no_run 196 | # //cookbook-write-basic.rs 197 | use std::{error::Error, io, process}; 198 | 199 | fn example() -> Result<(), Box> { 200 | let mut wtr = csv::Writer::from_writer(io::stdout()); 201 | 202 | // When writing records without Serde, the header record is written just 203 | // like any other record. 204 | wtr.write_record(["city", "region", "country", "population"])?; 205 | wtr.write_record(["Southborough", "MA", "United States", "9686"])?; 206 | wtr.write_record(["Northbridge", "MA", "United States", "14061"])?; 207 | wtr.flush()?; 208 | Ok(()) 209 | } 210 | 211 | fn main() { 212 | if let Err(err) = example() { 213 | println!("error running example: {}", err); 214 | process::exit(1); 215 | } 216 | } 217 | ``` 218 | 219 | The above example can be run like so: 220 | 221 | ```ignore 222 | $ git clone git://github.com/BurntSushi/rust-csv 223 | $ cd rust-csv 224 | $ cargo run --example cookbook-write-basic > /tmp/simplepop.csv 225 | ``` 226 | 227 | # Writing: with Serde 228 | 229 | This example shows how to write CSV data to stdout with Serde. Namely, we 230 | represent each record using a custom struct that we define. In this example, 231 | headers are written automatically. 232 | 233 | ```no_run 234 | # //cookbook-write-serde.rs 235 | use std::{error::Error, io, process}; 236 | 237 | use serde::Serialize; 238 | 239 | #[derive(Debug, Serialize)] 240 | struct Record { 241 | city: String, 242 | region: String, 243 | country: String, 244 | population: Option, 245 | } 246 | 247 | fn example() -> Result<(), Box> { 248 | let mut wtr = csv::Writer::from_writer(io::stdout()); 249 | 250 | // When writing records with Serde using structs, the header row is written 251 | // automatically. 252 | wtr.serialize(Record { 253 | city: "Southborough".to_string(), 254 | region: "MA".to_string(), 255 | country: "United States".to_string(), 256 | population: Some(9686), 257 | })?; 258 | wtr.serialize(Record { 259 | city: "Northbridge".to_string(), 260 | region: "MA".to_string(), 261 | country: "United States".to_string(), 262 | population: Some(14061), 263 | })?; 264 | wtr.flush()?; 265 | Ok(()) 266 | } 267 | 268 | fn main() { 269 | if let Err(err) = example() { 270 | println!("error running example: {}", err); 271 | process::exit(1); 272 | } 273 | } 274 | ``` 275 | 276 | The above example can be run like so: 277 | 278 | ```ignore 279 | $ git clone git://github.com/BurntSushi/rust-csv 280 | $ cd rust-csv 281 | $ cargo run --example cookbook-write-serde > /tmp/simplepop.csv 282 | ``` 283 | */ 284 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | /*! 2 | The `csv` crate provides a fast and flexible CSV reader and writer, with 3 | support for Serde. 4 | 5 | The [tutorial](tutorial/index.html) is a good place to start if you're new to 6 | Rust. 7 | 8 | The [cookbook](cookbook/index.html) will give you a variety of complete Rust 9 | programs that do CSV reading and writing. 10 | 11 | # Brief overview 12 | 13 | **If you're new to Rust**, you might find the 14 | [tutorial](tutorial/index.html) 15 | to be a good place to start. 16 | 17 | The primary types in this crate are 18 | [`Reader`](struct.Reader.html) 19 | and 20 | [`Writer`](struct.Writer.html), 21 | for reading and writing CSV data respectively. 22 | Correspondingly, to support CSV data with custom field or record delimiters 23 | (among many other things), you should use either a 24 | [`ReaderBuilder`](struct.ReaderBuilder.html) 25 | or a 26 | [`WriterBuilder`](struct.WriterBuilder.html), 27 | depending on whether you're reading or writing CSV data. 28 | 29 | Unless you're using Serde, the standard CSV record types are 30 | [`StringRecord`](struct.StringRecord.html) 31 | and 32 | [`ByteRecord`](struct.ByteRecord.html). 33 | `StringRecord` should be used when you know your data to be valid UTF-8. 34 | For data that may be invalid UTF-8, `ByteRecord` is suitable. 35 | 36 | Finally, the set of errors is described by the 37 | [`Error`](struct.Error.html) 38 | type. 39 | 40 | The rest of the types in this crate mostly correspond to more detailed errors, 41 | position information, configuration knobs or iterator types. 42 | 43 | # Setup 44 | 45 | Run `cargo add csv` to add the latest version of the `csv` crate to your 46 | Cargo.toml. 47 | 48 | If you want to use Serde's custom derive functionality on your custom structs, 49 | then run `cargo add serde --features derive` to add the `serde` crate with its 50 | `derive` feature enabled to your `Cargo.toml`. 51 | 52 | # Example 53 | 54 | This example shows how to read CSV data from stdin and print each record to 55 | stdout. 56 | 57 | There are more examples in the [cookbook](cookbook/index.html). 58 | 59 | ```no_run 60 | use std::{error::Error, io, process}; 61 | 62 | fn example() -> Result<(), Box> { 63 | // Build the CSV reader and iterate over each record. 64 | let mut rdr = csv::Reader::from_reader(io::stdin()); 65 | for result in rdr.records() { 66 | // The iterator yields Result, so we check the 67 | // error here. 68 | let record = result?; 69 | println!("{:?}", record); 70 | } 71 | Ok(()) 72 | } 73 | 74 | fn main() { 75 | if let Err(err) = example() { 76 | println!("error running example: {}", err); 77 | process::exit(1); 78 | } 79 | } 80 | ``` 81 | 82 | The above example can be run like so: 83 | 84 | ```ignore 85 | $ git clone git://github.com/BurntSushi/rust-csv 86 | $ cd rust-csv 87 | $ cargo run --example cookbook-read-basic < examples/data/smallpop.csv 88 | ``` 89 | 90 | # Example with Serde 91 | 92 | This example shows how to read CSV data from stdin into your own custom struct. 93 | By default, the member names of the struct are matched with the values in the 94 | header record of your CSV data. 95 | 96 | ```no_run 97 | use std::{error::Error, io, process}; 98 | 99 | #[derive(Debug, serde::Deserialize)] 100 | struct Record { 101 | city: String, 102 | region: String, 103 | country: String, 104 | population: Option, 105 | } 106 | 107 | fn example() -> Result<(), Box> { 108 | let mut rdr = csv::Reader::from_reader(io::stdin()); 109 | for result in rdr.deserialize() { 110 | // Notice that we need to provide a type hint for automatic 111 | // deserialization. 112 | let record: Record = result?; 113 | println!("{:?}", record); 114 | } 115 | Ok(()) 116 | } 117 | 118 | fn main() { 119 | if let Err(err) = example() { 120 | println!("error running example: {}", err); 121 | process::exit(1); 122 | } 123 | } 124 | ``` 125 | 126 | The above example can be run like so: 127 | 128 | ```ignore 129 | $ git clone git://github.com/BurntSushi/rust-csv 130 | $ cd rust-csv 131 | $ cargo run --example cookbook-read-serde < examples/data/smallpop.csv 132 | ``` 133 | 134 | */ 135 | 136 | #![deny(missing_docs)] 137 | 138 | use std::result; 139 | 140 | use serde_core::{Deserialize, Deserializer}; 141 | 142 | pub use crate::{ 143 | byte_record::{ByteRecord, ByteRecordIter, Position}, 144 | deserializer::{DeserializeError, DeserializeErrorKind}, 145 | error::{ 146 | Error, ErrorKind, FromUtf8Error, IntoInnerError, Result, Utf8Error, 147 | }, 148 | reader::{ 149 | ByteRecordsIntoIter, ByteRecordsIter, DeserializeRecordsIntoIter, 150 | DeserializeRecordsIter, Reader, ReaderBuilder, StringRecordsIntoIter, 151 | StringRecordsIter, 152 | }, 153 | string_record::{StringRecord, StringRecordIter}, 154 | writer::{Writer, WriterBuilder}, 155 | }; 156 | 157 | mod byte_record; 158 | pub mod cookbook; 159 | mod debug; 160 | mod deserializer; 161 | mod error; 162 | mod reader; 163 | mod serializer; 164 | mod string_record; 165 | pub mod tutorial; 166 | mod writer; 167 | 168 | /// The quoting style to use when writing CSV data. 169 | #[derive(Clone, Copy, Debug, Default)] 170 | #[non_exhaustive] 171 | pub enum QuoteStyle { 172 | /// This puts quotes around every field. Always. 173 | Always, 174 | /// This puts quotes around fields only when necessary. 175 | /// 176 | /// They are necessary when fields contain a quote, delimiter or record 177 | /// terminator. Quotes are also necessary when writing an empty record 178 | /// (which is indistinguishable from a record with one empty field). 179 | /// 180 | /// This is the default. 181 | #[default] 182 | Necessary, 183 | /// This puts quotes around all fields that are non-numeric. Namely, when 184 | /// writing a field that does not parse as a valid float or integer, then 185 | /// quotes will be used even if they aren't strictly necessary. 186 | NonNumeric, 187 | /// This *never* writes quotes, even if it would produce invalid CSV data. 188 | Never, 189 | } 190 | 191 | impl QuoteStyle { 192 | fn to_core(self) -> csv_core::QuoteStyle { 193 | match self { 194 | QuoteStyle::Always => csv_core::QuoteStyle::Always, 195 | QuoteStyle::Necessary => csv_core::QuoteStyle::Necessary, 196 | QuoteStyle::NonNumeric => csv_core::QuoteStyle::NonNumeric, 197 | QuoteStyle::Never => csv_core::QuoteStyle::Never, 198 | } 199 | } 200 | } 201 | 202 | /// A record terminator. 203 | /// 204 | /// Use this to specify the record terminator while parsing CSV. The default is 205 | /// CRLF, which treats `\r`, `\n` or `\r\n` as a single record terminator. 206 | #[derive(Clone, Copy, Debug, Default)] 207 | #[non_exhaustive] 208 | pub enum Terminator { 209 | /// Parses `\r`, `\n` or `\r\n` as a single record terminator. 210 | #[default] 211 | CRLF, 212 | /// Parses the byte given as a record terminator. 213 | Any(u8), 214 | } 215 | 216 | impl Terminator { 217 | /// Convert this to the csv_core type of the same name. 218 | fn to_core(self) -> csv_core::Terminator { 219 | match self { 220 | Terminator::CRLF => csv_core::Terminator::CRLF, 221 | Terminator::Any(b) => csv_core::Terminator::Any(b), 222 | } 223 | } 224 | } 225 | 226 | /// The whitespace preservation behaviour when reading CSV data. 227 | #[derive(Clone, Copy, Debug, Default, PartialEq)] 228 | #[non_exhaustive] 229 | pub enum Trim { 230 | /// Preserves fields and headers. This is the default. 231 | #[default] 232 | None, 233 | /// Trim whitespace from headers. 234 | Headers, 235 | /// Trim whitespace from fields, but not headers. 236 | Fields, 237 | /// Trim whitespace from fields and headers. 238 | All, 239 | } 240 | 241 | impl Trim { 242 | fn should_trim_fields(&self) -> bool { 243 | self == &Trim::Fields || self == &Trim::All 244 | } 245 | 246 | fn should_trim_headers(&self) -> bool { 247 | self == &Trim::Headers || self == &Trim::All 248 | } 249 | } 250 | 251 | /// A custom Serde deserializer for possibly invalid `Option` fields. 252 | /// 253 | /// When deserializing CSV data, it is sometimes desirable to simply ignore 254 | /// fields with invalid data. For example, there might be a field that is 255 | /// usually a number, but will occasionally contain garbage data that causes 256 | /// number parsing to fail. 257 | /// 258 | /// You might be inclined to use, say, `Option` for fields such at this. 259 | /// By default, however, `Option` will either capture *empty* fields with 260 | /// `None` or valid numeric fields with `Some(the_number)`. If the field is 261 | /// non-empty and not a valid number, then deserialization will return an error 262 | /// instead of using `None`. 263 | /// 264 | /// This function allows you to override this default behavior. Namely, if 265 | /// `Option` is deserialized with non-empty but invalid data, then the value 266 | /// will be `None` and the error will be ignored. 267 | /// 268 | /// # Example 269 | /// 270 | /// This example shows how to parse CSV records with numerical data, even if 271 | /// some numerical data is absent or invalid. Without the 272 | /// `serde(deserialize_with = "...")` annotations, this example would return 273 | /// an error. 274 | /// 275 | /// ``` 276 | /// use std::error::Error; 277 | /// 278 | /// #[derive(Debug, serde::Deserialize, Eq, PartialEq)] 279 | /// struct Row { 280 | /// #[serde(deserialize_with = "csv::invalid_option")] 281 | /// a: Option, 282 | /// #[serde(deserialize_with = "csv::invalid_option")] 283 | /// b: Option, 284 | /// #[serde(deserialize_with = "csv::invalid_option")] 285 | /// c: Option, 286 | /// } 287 | /// 288 | /// # fn main() { example().unwrap(); } 289 | /// fn example() -> Result<(), Box> { 290 | /// let data = "\ 291 | /// a,b,c 292 | /// 5,\"\",xyz 293 | /// "; 294 | /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 295 | /// if let Some(result) = rdr.deserialize().next() { 296 | /// let record: Row = result?; 297 | /// assert_eq!(record, Row { a: Some(5), b: None, c: None }); 298 | /// Ok(()) 299 | /// } else { 300 | /// Err(From::from("expected at least one record but got none")) 301 | /// } 302 | /// } 303 | /// ``` 304 | pub fn invalid_option<'de, D, T>(de: D) -> result::Result, D::Error> 305 | where 306 | D: Deserializer<'de>, 307 | Option: Deserialize<'de>, 308 | { 309 | Option::::deserialize(de).or_else(|_| Ok(None)) 310 | } 311 | -------------------------------------------------------------------------------- /src/error.rs: -------------------------------------------------------------------------------- 1 | use std::{error::Error as StdError, fmt, io, result}; 2 | 3 | use crate::{ 4 | byte_record::{ByteRecord, Position}, 5 | deserializer::DeserializeError, 6 | }; 7 | 8 | /// A type alias for `Result`. 9 | pub type Result = result::Result; 10 | 11 | /// An error that can occur when processing CSV data. 12 | /// 13 | /// This error can happen when writing or reading CSV data. 14 | /// 15 | /// There are some important scenarios where an error is impossible to occur. 16 | /// For example, if a CSV reader is used on an in-memory buffer with the 17 | /// `flexible` option enabled and one is reading records as raw byte strings, 18 | /// then no error can occur. 19 | #[derive(Debug)] 20 | pub struct Error(Box); 21 | 22 | impl Error { 23 | /// A crate private constructor for `Error`. 24 | pub(crate) fn new(kind: ErrorKind) -> Error { 25 | Error(Box::new(kind)) 26 | } 27 | 28 | /// Return the specific type of this error. 29 | pub fn kind(&self) -> &ErrorKind { 30 | &self.0 31 | } 32 | 33 | /// Unwrap this error into its underlying type. 34 | pub fn into_kind(self) -> ErrorKind { 35 | *self.0 36 | } 37 | 38 | /// Returns true if this is an I/O error. 39 | /// 40 | /// If this is true, the underlying `ErrorKind` is guaranteed to be 41 | /// `ErrorKind::Io`. 42 | pub fn is_io_error(&self) -> bool { 43 | matches!(*self.0, ErrorKind::Io(_)) 44 | } 45 | 46 | /// Return the position for this error, if one exists. 47 | /// 48 | /// This is a convenience function that permits callers to easily access 49 | /// the position on an error without doing case analysis on `ErrorKind`. 50 | pub fn position(&self) -> Option<&Position> { 51 | self.0.position() 52 | } 53 | } 54 | 55 | /// The specific type of an error. 56 | #[derive(Debug)] 57 | #[non_exhaustive] 58 | pub enum ErrorKind { 59 | /// An I/O error that occurred while reading CSV data. 60 | Io(io::Error), 61 | /// A UTF-8 decoding error that occured while reading CSV data into Rust 62 | /// `String`s. 63 | Utf8 { 64 | /// The position of the record in which this error occurred, if 65 | /// available. 66 | pos: Option, 67 | /// The corresponding UTF-8 error. 68 | err: Utf8Error, 69 | }, 70 | /// This error occurs when two records with an unequal number of fields 71 | /// are found. This error only occurs when the `flexible` option in a 72 | /// CSV reader/writer is disabled. 73 | UnequalLengths { 74 | /// The position of the first record with an unequal number of fields 75 | /// to the previous record, if available. 76 | pos: Option, 77 | /// The expected number of fields in a record. This is the number of 78 | /// fields in the record read prior to the record indicated by 79 | /// `pos`. 80 | expected_len: u64, 81 | /// The number of fields in the bad record. 82 | len: u64, 83 | }, 84 | /// This error occurs when either the `byte_headers` or `headers` methods 85 | /// are called on a CSV reader that was asked to `seek` before it parsed 86 | /// the first record. 87 | Seek, 88 | /// An error of this kind occurs only when using the Serde serializer. 89 | Serialize(String), 90 | /// An error of this kind occurs only when performing automatic 91 | /// deserialization with serde. 92 | Deserialize { 93 | /// The position of this error, if available. 94 | pos: Option, 95 | /// The deserialization error. 96 | err: DeserializeError, 97 | }, 98 | } 99 | 100 | impl ErrorKind { 101 | /// Return the position for this error, if one exists. 102 | /// 103 | /// This is a convenience function that permits callers to easily access 104 | /// the position on an error without doing case analysis on `ErrorKind`. 105 | pub fn position(&self) -> Option<&Position> { 106 | match *self { 107 | ErrorKind::Utf8 { ref pos, .. } => pos.as_ref(), 108 | ErrorKind::UnequalLengths { ref pos, .. } => pos.as_ref(), 109 | ErrorKind::Deserialize { ref pos, .. } => pos.as_ref(), 110 | _ => None, 111 | } 112 | } 113 | } 114 | 115 | impl From for Error { 116 | fn from(err: io::Error) -> Error { 117 | Error::new(ErrorKind::Io(err)) 118 | } 119 | } 120 | 121 | impl From for io::Error { 122 | fn from(err: Error) -> io::Error { 123 | io::Error::new(io::ErrorKind::Other, err) 124 | } 125 | } 126 | 127 | impl StdError for Error {} 128 | 129 | impl fmt::Display for Error { 130 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 131 | match *self.0 { 132 | ErrorKind::Io(ref err) => err.fmt(f), 133 | ErrorKind::Utf8 { pos: None, ref err } => { 134 | write!(f, "CSV parse error: field {}: {}", err.field(), err) 135 | } 136 | ErrorKind::Utf8 { pos: Some(ref pos), ref err } => write!( 137 | f, 138 | "CSV parse error: record {} \ 139 | (line {}, field: {}, byte: {}): {}", 140 | pos.record(), 141 | pos.line(), 142 | err.field(), 143 | pos.byte(), 144 | err 145 | ), 146 | ErrorKind::UnequalLengths { pos: None, expected_len, len } => { 147 | write!( 148 | f, 149 | "CSV error: \ 150 | found record with {} fields, but the previous record \ 151 | has {} fields", 152 | len, expected_len 153 | ) 154 | } 155 | ErrorKind::UnequalLengths { 156 | pos: Some(ref pos), 157 | expected_len, 158 | len, 159 | } => write!( 160 | f, 161 | "CSV error: record {} (line: {}, byte: {}): \ 162 | found record with {} fields, but the previous record \ 163 | has {} fields", 164 | pos.record(), 165 | pos.line(), 166 | pos.byte(), 167 | len, 168 | expected_len 169 | ), 170 | ErrorKind::Seek => write!( 171 | f, 172 | "CSV error: cannot access headers of CSV data \ 173 | when the parser was seeked before the first record \ 174 | could be read" 175 | ), 176 | ErrorKind::Serialize(ref err) => { 177 | write!(f, "CSV write error: {}", err) 178 | } 179 | ErrorKind::Deserialize { pos: None, ref err } => { 180 | write!(f, "CSV deserialize error: {}", err) 181 | } 182 | ErrorKind::Deserialize { pos: Some(ref pos), ref err } => write!( 183 | f, 184 | "CSV deserialize error: record {} \ 185 | (line: {}, byte: {}): {}", 186 | pos.record(), 187 | pos.line(), 188 | pos.byte(), 189 | err 190 | ), 191 | } 192 | } 193 | } 194 | 195 | /// A UTF-8 validation error during record conversion. 196 | /// 197 | /// This occurs when attempting to convert a `ByteRecord` into a 198 | /// `StringRecord`. 199 | #[derive(Clone, Debug, Eq, PartialEq)] 200 | pub struct FromUtf8Error { 201 | record: ByteRecord, 202 | err: Utf8Error, 203 | } 204 | 205 | impl FromUtf8Error { 206 | /// Create a new FromUtf8Error. 207 | pub(crate) fn new(record: ByteRecord, err: Utf8Error) -> FromUtf8Error { 208 | FromUtf8Error { record, err } 209 | } 210 | 211 | /// Access the underlying `ByteRecord` that failed UTF-8 validation. 212 | pub fn into_byte_record(self) -> ByteRecord { 213 | self.record 214 | } 215 | 216 | /// Access the underlying UTF-8 validation error. 217 | pub fn utf8_error(&self) -> &Utf8Error { 218 | &self.err 219 | } 220 | } 221 | 222 | impl fmt::Display for FromUtf8Error { 223 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 224 | self.err.fmt(f) 225 | } 226 | } 227 | 228 | impl StdError for FromUtf8Error { 229 | fn source(&self) -> Option<&(dyn StdError + 'static)> { 230 | Some(&self.err) 231 | } 232 | } 233 | 234 | /// A UTF-8 validation error. 235 | /// 236 | /// This occurs when attempting to convert a `ByteRecord` into a 237 | /// `StringRecord`. 238 | /// 239 | /// The error includes the index of the field that failed validation, and the 240 | /// last byte at which valid UTF-8 was verified. 241 | #[derive(Clone, Debug, Eq, PartialEq)] 242 | pub struct Utf8Error { 243 | /// The field index of a byte record in which UTF-8 validation failed. 244 | field: usize, 245 | /// The index into the given field up to which valid UTF-8 was verified. 246 | valid_up_to: usize, 247 | } 248 | 249 | /// Create a new UTF-8 error. 250 | pub fn new_utf8_error(field: usize, valid_up_to: usize) -> Utf8Error { 251 | Utf8Error { field, valid_up_to } 252 | } 253 | 254 | impl Utf8Error { 255 | /// The field index of a byte record in which UTF-8 validation failed. 256 | pub fn field(&self) -> usize { 257 | self.field 258 | } 259 | /// The index into the given field up to which valid UTF-8 was verified. 260 | pub fn valid_up_to(&self) -> usize { 261 | self.valid_up_to 262 | } 263 | } 264 | 265 | impl StdError for Utf8Error {} 266 | 267 | impl fmt::Display for Utf8Error { 268 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 269 | write!( 270 | f, 271 | "invalid utf-8: invalid UTF-8 in field {} near byte index {}", 272 | self.field, self.valid_up_to 273 | ) 274 | } 275 | } 276 | 277 | /// `IntoInnerError` occurs when consuming a `Writer` fails. 278 | /// 279 | /// Consuming the `Writer` causes a flush to happen. If the flush fails, then 280 | /// this error is returned, which contains both the original `Writer` and 281 | /// the error that occurred. 282 | /// 283 | /// The type parameter `W` is the unconsumed writer. 284 | pub struct IntoInnerError { 285 | wtr: W, 286 | err: io::Error, 287 | } 288 | 289 | impl IntoInnerError { 290 | /// Creates a new `IntoInnerError`. 291 | /// 292 | /// (This is a visibility hack. It's public in this module, but not in the 293 | /// crate.) 294 | pub(crate) fn new(wtr: W, err: io::Error) -> IntoInnerError { 295 | IntoInnerError { wtr, err } 296 | } 297 | 298 | /// Returns the error which caused the call to `into_inner` to fail. 299 | /// 300 | /// This error was returned when attempting to flush the internal buffer. 301 | pub fn error(&self) -> &io::Error { 302 | &self.err 303 | } 304 | 305 | /// Consumes the [`IntoInnerError`] and returns the error which caused the 306 | /// call to [`Writer::into_inner`](crate::Writer::into_inner) to fail. 307 | /// 308 | /// Unlike [`IntoInnerError::error`], this can be used to obtain ownership 309 | /// of the underlying error. 310 | pub fn into_error(self) -> io::Error { 311 | self.err 312 | } 313 | 314 | /// Returns the underlying writer which generated the error. 315 | /// 316 | /// The returned value can be used for error recovery, such as 317 | /// re-inspecting the buffer. 318 | pub fn into_inner(self) -> W { 319 | self.wtr 320 | } 321 | } 322 | 323 | impl StdError for IntoInnerError {} 324 | 325 | impl fmt::Display for IntoInnerError { 326 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 327 | self.err.fmt(f) 328 | } 329 | } 330 | 331 | impl fmt::Debug for IntoInnerError { 332 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 333 | self.err.fmt(f) 334 | } 335 | } 336 | -------------------------------------------------------------------------------- /csv-index/src/simple.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | 3 | use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; 4 | use csv; 5 | 6 | /// A simple index for random access to CSV records. 7 | /// 8 | /// This index permits seeking to the start of any CSV record with a constant 9 | /// number of operations. 10 | /// 11 | /// The format of the index is simplistic and amenable to serializing to disk. 12 | /// It consists of exactly `N+1` 64 bit big-endian integers, where `N` is the 13 | /// number of records in the CSV data that is indexed. Each `i`th integer 14 | /// corresponds to the approximate byte offset where the `i`th record in the 15 | /// CSV data begins. One additional integer is written to the end of the index 16 | /// which indicates the total number of records in the CSV data. 17 | /// 18 | /// This indexing format does not store the line numbers of CSV records, so 19 | /// using the positions returned by this index to seek a CSV reader will likely 20 | /// cause any future line numbers reported by that reader to be incorrect. 21 | /// 22 | /// This format will never change. 23 | /// 24 | /// N.B. The format of this indexing scheme matches the format of the old the 25 | /// `csv::Indexed` type in pre-1.0 versions of the `csv` crate. 26 | pub struct RandomAccessSimple { 27 | rdr: R, 28 | len: u64, 29 | } 30 | 31 | impl RandomAccessSimple { 32 | /// Write a simple index to the given writer for the given CSV reader. 33 | /// 34 | /// If there was a problem reading CSV records or writing to the given 35 | /// writer, then an error is returned. 36 | /// 37 | /// That the given CSV reader is read as given until EOF. The index 38 | /// produced includes all records, including the first record even if the 39 | /// CSV reader is configured to interpret the first record as a header 40 | /// record. 41 | /// 42 | /// # Example: in memory index 43 | /// 44 | /// This example shows how to create a simple random access index, open it 45 | /// and query the number of records in the index. 46 | /// 47 | /// ``` 48 | /// use std::io; 49 | /// use csv_index::RandomAccessSimple; 50 | /// 51 | /// # fn main() { example().unwrap(); } 52 | /// fn example() -> csv::Result<()> { 53 | /// let data = "\ 54 | /// city,country,pop 55 | /// Boston,United States,4628910 56 | /// Concord,United States,42695 57 | /// "; 58 | /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 59 | /// let mut wtr = io::Cursor::new(vec![]); 60 | /// RandomAccessSimple::create(&mut rdr, &mut wtr)?; 61 | /// 62 | /// let idx = RandomAccessSimple::open(wtr)?; 63 | /// assert_eq!(idx.len(), 3); 64 | /// Ok(()) 65 | /// } 66 | /// ``` 67 | /// 68 | /// # Example: file backed index 69 | /// 70 | /// This is like the previous example, but instead of creating the index 71 | /// in memory with `std::io::Cursor`, we write the index to a file. 72 | /// 73 | /// ```no_run 74 | /// use std::fs::File; 75 | /// use std::io; 76 | /// use csv_index::RandomAccessSimple; 77 | /// 78 | /// # fn main() { example().unwrap(); } 79 | /// fn example() -> csv::Result<()> { 80 | /// let data = "\ 81 | /// city,country,pop 82 | /// Boston,United States,4628910 83 | /// Concord,United States,42695 84 | /// "; 85 | /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 86 | /// let mut wtr = File::create("data.csv.idx")?; 87 | /// RandomAccessSimple::create(&mut rdr, &mut wtr)?; 88 | /// 89 | /// let fileidx = File::open("data.csv.idx")?; 90 | /// let idx = RandomAccessSimple::open(fileidx)?; 91 | /// assert_eq!(idx.len(), 3); 92 | /// Ok(()) 93 | /// } 94 | /// ``` 95 | pub fn create( 96 | rdr: &mut csv::Reader, 97 | mut wtr: W, 98 | ) -> csv::Result<()> { 99 | // If the reader is configured to read a header, then read that 100 | // first. (The CSV reader otherwise won't yield the header record 101 | // when calling `read_byte_record`.) 102 | let mut len = 0; 103 | if rdr.has_headers() { 104 | let header = rdr.byte_headers()?; 105 | if !header.is_empty() { 106 | let pos = header.position().expect("position on header row"); 107 | wtr.write_u64::(pos.byte())?; 108 | len += 1; 109 | } 110 | } 111 | let mut record = csv::ByteRecord::new(); 112 | while rdr.read_byte_record(&mut record)? { 113 | let pos = record.position().expect("position on row"); 114 | wtr.write_u64::(pos.byte())?; 115 | len += 1; 116 | } 117 | wtr.write_u64::(len)?; 118 | Ok(()) 119 | } 120 | } 121 | 122 | impl RandomAccessSimple { 123 | /// Open an existing simple CSV index. 124 | /// 125 | /// The reader given must be seekable and should contain an index written 126 | /// by `RandomAccessSimple::create`. 127 | /// 128 | /// # Example 129 | /// 130 | /// This example shows how to create a simple random access index, open it 131 | /// and query the number of records in the index. 132 | /// 133 | /// ``` 134 | /// use std::io; 135 | /// use csv_index::RandomAccessSimple; 136 | /// 137 | /// # fn main() { example().unwrap(); } 138 | /// fn example() -> csv::Result<()> { 139 | /// let data = "\ 140 | /// city,country,pop 141 | /// Boston,United States,4628910 142 | /// Concord,United States,42695 143 | /// "; 144 | /// let mut rdr = csv::Reader::from_reader(data.as_bytes()); 145 | /// let mut wtr = io::Cursor::new(vec![]); 146 | /// RandomAccessSimple::create(&mut rdr, &mut wtr)?; 147 | /// 148 | /// let idx = RandomAccessSimple::open(wtr)?; 149 | /// assert_eq!(idx.len(), 3); 150 | /// Ok(()) 151 | /// } 152 | /// ``` 153 | pub fn open(mut rdr: R) -> csv::Result> { 154 | rdr.seek(io::SeekFrom::End(-8))?; 155 | let len = rdr.read_u64::()?; 156 | Ok(RandomAccessSimple { rdr: rdr, len: len }) 157 | } 158 | 159 | /// Get the position of the record at index `i`. 160 | /// 161 | /// The first record has index `0`. 162 | /// 163 | /// If the position returned is used to seek the CSV reader that was used 164 | /// to create this index, then the next record read by the CSV reader will 165 | /// be the `i`th record. 166 | /// 167 | /// Note that since this index does not store the line number of each 168 | /// record, the position returned will always have a line number equivalent 169 | /// to `1`. This in turn will cause the CSV reader to report all subsequent 170 | /// line numbers incorrectly. 171 | /// 172 | /// # Example 173 | /// 174 | /// This example shows how to create a simple random access index, open it 175 | /// and use it to seek a CSV reader to read an arbitrary record. 176 | /// 177 | /// ``` 178 | /// use std::error::Error; 179 | /// use std::io; 180 | /// use csv_index::RandomAccessSimple; 181 | /// 182 | /// # fn main() { example().unwrap(); } 183 | /// fn example() -> Result<(), Box> { 184 | /// let data = "\ 185 | /// city,country,pop 186 | /// Boston,United States,4628910 187 | /// Concord,United States,42695 188 | /// "; 189 | /// // Note that we wrap our CSV data in an io::Cursor, which makes it 190 | /// // seekable. If you're opening CSV data from a file, then this is 191 | /// // not needed since a `File` is already seekable. 192 | /// let mut rdr = csv::Reader::from_reader(io::Cursor::new(data)); 193 | /// let mut wtr = io::Cursor::new(vec![]); 194 | /// RandomAccessSimple::create(&mut rdr, &mut wtr)?; 195 | /// 196 | /// // Open the index we just created, get the position of the last 197 | /// // record and seek the CSV reader. 198 | /// let mut idx = RandomAccessSimple::open(wtr)?; 199 | /// let pos = idx.get(2)?; 200 | /// rdr.seek(pos)?; 201 | /// 202 | /// // Read the next record. 203 | /// if let Some(result) = rdr.records().next() { 204 | /// let record = result?; 205 | /// assert_eq!(record, vec!["Concord", "United States", "42695"]); 206 | /// Ok(()) 207 | /// } else { 208 | /// Err(From::from("expected at least one record but got none")) 209 | /// } 210 | /// } 211 | /// ``` 212 | pub fn get(&mut self, i: u64) -> csv::Result { 213 | if i >= self.len { 214 | let msg = format!( 215 | "invalid record index {} (there are {} records)", 216 | i, self.len 217 | ); 218 | let err = io::Error::new(io::ErrorKind::Other, msg); 219 | return Err(csv::Error::from(err)); 220 | } 221 | self.rdr.seek(io::SeekFrom::Start(i * 8))?; 222 | let offset = self.rdr.read_u64::()?; 223 | let mut pos = csv::Position::new(); 224 | pos.set_byte(offset).set_record(i); 225 | Ok(pos) 226 | } 227 | 228 | /// Return the number of records (including the header record) in this 229 | /// index. 230 | pub fn len(&self) -> u64 { 231 | self.len 232 | } 233 | 234 | /// Return true if and only if this index has zero records. 235 | pub fn is_empty(&self) -> bool { 236 | self.len() == 0 237 | } 238 | } 239 | 240 | #[cfg(test)] 241 | mod tests { 242 | use std::io; 243 | 244 | use csv; 245 | 246 | use super::RandomAccessSimple; 247 | 248 | struct Indexed<'a> { 249 | csv: csv::Reader>, 250 | idx: RandomAccessSimple>>, 251 | } 252 | 253 | impl<'a> Indexed<'a> { 254 | fn new(headers: bool, csv_data: &'a str) -> Indexed<'a> { 255 | let mut rdr = csv::ReaderBuilder::new() 256 | .has_headers(headers) 257 | .from_reader(io::Cursor::new(csv_data)); 258 | let mut idxbuf = io::Cursor::new(vec![]); 259 | RandomAccessSimple::create(&mut rdr, &mut idxbuf).unwrap(); 260 | Indexed { 261 | csv: rdr, 262 | idx: RandomAccessSimple::open(idxbuf).unwrap(), 263 | } 264 | } 265 | 266 | fn read_at(&mut self, record: u64) -> csv::StringRecord { 267 | let pos = self.idx.get(record).unwrap(); 268 | self.csv.seek(pos).unwrap(); 269 | self.csv.records().next().unwrap().unwrap() 270 | } 271 | } 272 | 273 | #[test] 274 | fn headers_empty() { 275 | let idx = Indexed::new(true, ""); 276 | assert_eq!(idx.idx.len(), 0); 277 | } 278 | 279 | #[test] 280 | fn headers_one_field() { 281 | let mut idx = Indexed::new(true, "h1\na\nb\nc\n"); 282 | assert_eq!(idx.idx.len(), 4); 283 | assert_eq!(idx.read_at(0), vec!["h1"]); 284 | assert_eq!(idx.read_at(1), vec!["a"]); 285 | assert_eq!(idx.read_at(2), vec!["b"]); 286 | assert_eq!(idx.read_at(3), vec!["c"]); 287 | } 288 | 289 | #[test] 290 | fn headers_many_fields() { 291 | let mut idx = Indexed::new( 292 | true, 293 | "\ 294 | h1,h2,h3 295 | a,b,c 296 | d,e,f 297 | g,h,i 298 | ", 299 | ); 300 | assert_eq!(idx.idx.len(), 4); 301 | assert_eq!(idx.read_at(0), vec!["h1", "h2", "h3"]); 302 | assert_eq!(idx.read_at(1), vec!["a", "b", "c"]); 303 | assert_eq!(idx.read_at(2), vec!["d", "e", "f"]); 304 | assert_eq!(idx.read_at(3), vec!["g", "h", "i"]); 305 | } 306 | 307 | #[test] 308 | fn no_headers_one_field() { 309 | let mut idx = Indexed::new(false, "h1\na\nb\nc\n"); 310 | assert_eq!(idx.idx.len(), 4); 311 | assert_eq!(idx.read_at(0), vec!["h1"]); 312 | assert_eq!(idx.read_at(1), vec!["a"]); 313 | assert_eq!(idx.read_at(2), vec!["b"]); 314 | assert_eq!(idx.read_at(3), vec!["c"]); 315 | } 316 | 317 | #[test] 318 | fn no_headers_many_fields() { 319 | let mut idx = Indexed::new( 320 | false, 321 | "\ 322 | h1,h2,h3 323 | a,b,c 324 | d,e,f 325 | g,h,i 326 | ", 327 | ); 328 | assert_eq!(idx.idx.len(), 4); 329 | assert_eq!(idx.read_at(0), vec!["h1", "h2", "h3"]); 330 | assert_eq!(idx.read_at(1), vec!["a", "b", "c"]); 331 | assert_eq!(idx.read_at(2), vec!["d", "e", "f"]); 332 | assert_eq!(idx.read_at(3), vec!["g", "h", "i"]); 333 | } 334 | 335 | #[test] 336 | fn headers_one_field_newlines() { 337 | let mut idx = Indexed::new( 338 | true, 339 | " 340 | 341 | 342 | 343 | 344 | h1 345 | 346 | a 347 | 348 | 349 | b 350 | 351 | 352 | 353 | 354 | 355 | 356 | c 357 | 358 | 359 | 360 | 361 | 362 | 363 | ", 364 | ); 365 | assert_eq!(idx.idx.len(), 4); 366 | assert_eq!(idx.read_at(0), vec!["h1"]); 367 | assert_eq!(idx.read_at(1), vec!["a"]); 368 | assert_eq!(idx.read_at(2), vec!["b"]); 369 | assert_eq!(idx.read_at(3), vec!["c"]); 370 | } 371 | } 372 | -------------------------------------------------------------------------------- /benches/bench.rs: -------------------------------------------------------------------------------- 1 | #![feature(test)] 2 | 3 | extern crate test; 4 | 5 | use std::io; 6 | 7 | use serde::{de::DeserializeOwned, Deserialize, Serialize}; 8 | use test::Bencher; 9 | 10 | use csv::{ 11 | ByteRecord, Reader, ReaderBuilder, StringRecord, Trim, Writer, 12 | WriterBuilder, 13 | }; 14 | 15 | static NFL: &str = include_str!("../examples/data/bench/nfl.csv"); 16 | static GAME: &str = include_str!("../examples/data/bench/game.csv"); 17 | static POP: &str = include_str!("../examples/data/bench/worldcitiespop.csv"); 18 | static MBTA: &str = 19 | include_str!("../examples/data/bench/gtfs-mbta-stop-times.csv"); 20 | 21 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 22 | struct NFLRowOwned { 23 | gameid: String, 24 | qtr: i32, 25 | min: Option, 26 | sec: Option, 27 | off: String, 28 | def: String, 29 | down: Option, 30 | togo: Option, 31 | ydline: Option, 32 | description: String, 33 | offscore: i32, 34 | defscore: i32, 35 | season: i32, 36 | } 37 | 38 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 39 | struct NFLRowBorrowed<'a> { 40 | gameid: &'a str, 41 | qtr: i32, 42 | min: Option, 43 | sec: Option, 44 | off: &'a str, 45 | def: &'a str, 46 | down: Option, 47 | togo: Option, 48 | ydline: Option, 49 | description: &'a str, 50 | offscore: i32, 51 | defscore: i32, 52 | season: i32, 53 | } 54 | 55 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 56 | struct GAMERowOwned(String, String, String, String, i32, String); 57 | 58 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 59 | struct GAMERowBorrowed<'a>(&'a str, &'a str, &'a str, &'a str, i32, &'a str); 60 | 61 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 62 | #[serde(rename_all = "PascalCase")] 63 | struct POPRowOwned { 64 | country: String, 65 | city: String, 66 | accent_city: String, 67 | region: String, 68 | population: Option, 69 | latitude: f64, 70 | longitude: f64, 71 | } 72 | 73 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 74 | #[serde(rename_all = "PascalCase")] 75 | struct POPRowBorrowed<'a> { 76 | country: &'a str, 77 | city: &'a str, 78 | accent_city: &'a str, 79 | region: &'a str, 80 | population: Option, 81 | latitude: f64, 82 | longitude: f64, 83 | } 84 | 85 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 86 | struct MBTARowOwned { 87 | trip_id: String, 88 | arrival_time: String, 89 | departure_time: String, 90 | stop_id: String, 91 | stop_sequence: i32, 92 | stop_headsign: String, 93 | pickup_type: i32, 94 | drop_off_type: i32, 95 | timepoint: i32, 96 | } 97 | 98 | #[derive(Debug, Serialize, Deserialize, PartialEq)] 99 | struct MBTARowBorrowed<'a> { 100 | trip_id: &'a str, 101 | arrival_time: &'a str, 102 | departure_time: &'a str, 103 | stop_id: &'a str, 104 | stop_sequence: i32, 105 | stop_headsign: &'a str, 106 | pickup_type: i32, 107 | drop_off_type: i32, 108 | timepoint: i32, 109 | } 110 | 111 | #[derive(Default)] 112 | struct ByteCounter { 113 | count: usize, 114 | } 115 | impl io::Write for ByteCounter { 116 | fn write(&mut self, data: &[u8]) -> io::Result { 117 | self.count += data.len(); 118 | Ok(data.len()) 119 | } 120 | fn flush(&mut self) -> io::Result<()> { 121 | Ok(()) 122 | } 123 | } 124 | 125 | macro_rules! bench { 126 | ($name:ident, $data:ident, $counter:ident, $result:expr) => { 127 | #[bench] 128 | fn $name(b: &mut Bencher) { 129 | let data = $data.as_bytes(); 130 | b.bytes = data.len() as u64; 131 | b.iter(|| { 132 | let mut rdr = 133 | ReaderBuilder::new().has_headers(false).from_reader(data); 134 | assert_eq!($counter(&mut rdr), $result); 135 | }) 136 | } 137 | }; 138 | } 139 | 140 | macro_rules! bench_trimmed { 141 | ($name:ident, $data:ident, $counter:ident, $result:expr) => { 142 | #[bench] 143 | fn $name(b: &mut Bencher) { 144 | let data = $data.as_bytes(); 145 | b.bytes = data.len() as u64; 146 | b.iter(|| { 147 | let mut rdr = ReaderBuilder::new() 148 | .has_headers(false) 149 | .trim(Trim::All) 150 | .from_reader(data); 151 | assert_eq!($counter(&mut rdr), $result); 152 | }) 153 | } 154 | }; 155 | } 156 | 157 | macro_rules! bench_serde { 158 | (no_headers, 159 | $name_de:ident, $name_ser:ident, $data:ident, $counter:ident, $type:ty, $result:expr) => { 160 | #[bench] 161 | fn $name_de(b: &mut Bencher) { 162 | let data = $data.as_bytes(); 163 | b.bytes = data.len() as u64; 164 | b.iter(|| { 165 | let mut rdr = 166 | ReaderBuilder::new().has_headers(false).from_reader(data); 167 | assert_eq!($counter::<_, $type>(&mut rdr), $result); 168 | }) 169 | } 170 | #[bench] 171 | fn $name_ser(b: &mut Bencher) { 172 | let data = $data.as_bytes(); 173 | let values = ReaderBuilder::new() 174 | .has_headers(false) 175 | .from_reader(data) 176 | .deserialize() 177 | .collect::, _>>() 178 | .unwrap(); 179 | 180 | let do_it = || { 181 | let mut counter = ByteCounter::default(); 182 | { 183 | let mut wtr = WriterBuilder::new() 184 | .has_headers(false) 185 | .from_writer(&mut counter); 186 | for val in &values { 187 | wtr.serialize(val).unwrap(); 188 | } 189 | } 190 | counter.count 191 | }; 192 | b.bytes = do_it() as u64; 193 | b.iter(do_it) 194 | } 195 | }; 196 | ($name_de:ident, $name_ser:ident, $data:ident, $counter:ident, $type:ty, $result:expr) => { 197 | #[bench] 198 | fn $name_de(b: &mut Bencher) { 199 | let data = $data.as_bytes(); 200 | b.bytes = data.len() as u64; 201 | b.iter(|| { 202 | let mut rdr = 203 | ReaderBuilder::new().has_headers(true).from_reader(data); 204 | assert_eq!($counter::<_, $type>(&mut rdr), $result); 205 | }) 206 | } 207 | #[bench] 208 | fn $name_ser(b: &mut Bencher) { 209 | let data = $data.as_bytes(); 210 | let values = ReaderBuilder::new() 211 | .has_headers(true) 212 | .from_reader(data) 213 | .deserialize() 214 | .collect::, _>>() 215 | .unwrap(); 216 | 217 | let do_it = || { 218 | let mut counter = ByteCounter::default(); 219 | { 220 | let mut wtr = WriterBuilder::new() 221 | .has_headers(true) 222 | .from_writer(&mut counter); 223 | for val in &values { 224 | wtr.serialize(val).unwrap(); 225 | } 226 | } 227 | counter.count 228 | }; 229 | b.bytes = do_it() as u64; 230 | b.iter(do_it) 231 | } 232 | }; 233 | } 234 | 235 | macro_rules! bench_serde_borrowed_bytes { 236 | ($name:ident, $data:ident, $type:ty, $headers:expr, $result:expr) => { 237 | #[bench] 238 | fn $name(b: &mut Bencher) { 239 | let data = $data.as_bytes(); 240 | b.bytes = data.len() as u64; 241 | b.iter(|| { 242 | let mut rdr = ReaderBuilder::new() 243 | .has_headers($headers) 244 | .from_reader(data); 245 | let mut count = 0; 246 | let mut rec = ByteRecord::new(); 247 | while rdr.read_byte_record(&mut rec).unwrap() { 248 | let _: $type = rec.deserialize(None).unwrap(); 249 | count += 1; 250 | } 251 | count 252 | }) 253 | } 254 | }; 255 | } 256 | 257 | macro_rules! bench_serde_borrowed_str { 258 | ($name:ident, $data:ident, $type:ty, $headers:expr, $result:expr) => { 259 | #[bench] 260 | fn $name(b: &mut Bencher) { 261 | let data = $data.as_bytes(); 262 | b.bytes = data.len() as u64; 263 | b.iter(|| { 264 | let mut rdr = ReaderBuilder::new() 265 | .has_headers($headers) 266 | .from_reader(data); 267 | let mut count = 0; 268 | let mut rec = StringRecord::new(); 269 | while rdr.read_record(&mut rec).unwrap() { 270 | let _: $type = rec.deserialize(None).unwrap(); 271 | count += 1; 272 | } 273 | count 274 | }) 275 | } 276 | }; 277 | } 278 | 279 | bench_serde!( 280 | count_nfl_deserialize_owned_bytes, 281 | count_nfl_serialize_owned_bytes, 282 | NFL, 283 | count_deserialize_owned_bytes, 284 | NFLRowOwned, 285 | 9999 286 | ); 287 | bench_serde!( 288 | count_nfl_deserialize_owned_str, 289 | count_nfl_serialize_owned_str, 290 | NFL, 291 | count_deserialize_owned_str, 292 | NFLRowOwned, 293 | 9999 294 | ); 295 | bench_serde_borrowed_bytes!( 296 | count_nfl_deserialize_borrowed_bytes, 297 | NFL, 298 | NFLRowBorrowed, 299 | true, 300 | 9999 301 | ); 302 | bench_serde_borrowed_str!( 303 | count_nfl_deserialize_borrowed_str, 304 | NFL, 305 | NFLRowBorrowed, 306 | true, 307 | 9999 308 | ); 309 | bench!(count_nfl_iter_bytes, NFL, count_iter_bytes, 130000); 310 | bench_trimmed!(count_nfl_iter_bytes_trimmed, NFL, count_iter_bytes, 130000); 311 | bench!(count_nfl_iter_str, NFL, count_iter_str, 130000); 312 | bench_trimmed!(count_nfl_iter_str_trimmed, NFL, count_iter_str, 130000); 313 | bench!(count_nfl_read_bytes, NFL, count_read_bytes, 130000); 314 | bench!(count_nfl_read_str, NFL, count_read_str, 130000); 315 | bench_serde!( 316 | no_headers, 317 | count_game_deserialize_owned_bytes, 318 | count_game_serialize_owned_bytes, 319 | GAME, 320 | count_deserialize_owned_bytes, 321 | GAMERowOwned, 322 | 100000 323 | ); 324 | bench_serde!( 325 | no_headers, 326 | count_game_deserialize_owned_str, 327 | count_game_serialize_owned_str, 328 | GAME, 329 | count_deserialize_owned_str, 330 | GAMERowOwned, 331 | 100000 332 | ); 333 | bench_serde_borrowed_bytes!( 334 | count_game_deserialize_borrowed_bytes, 335 | GAME, 336 | GAMERowBorrowed, 337 | true, 338 | 100000 339 | ); 340 | bench_serde_borrowed_str!( 341 | count_game_deserialize_borrowed_str, 342 | GAME, 343 | GAMERowBorrowed, 344 | true, 345 | 100000 346 | ); 347 | bench!(count_game_iter_bytes, GAME, count_iter_bytes, 600000); 348 | bench!(count_game_iter_str, GAME, count_iter_str, 600000); 349 | bench!(count_game_read_bytes, GAME, count_read_bytes, 600000); 350 | bench!(count_game_read_str, GAME, count_read_str, 600000); 351 | bench_serde!( 352 | count_pop_deserialize_owned_bytes, 353 | count_pop_serialize_owned_bytes, 354 | POP, 355 | count_deserialize_owned_bytes, 356 | POPRowOwned, 357 | 20000 358 | ); 359 | bench_serde!( 360 | count_pop_deserialize_owned_str, 361 | count_pop_serialize_owned_str, 362 | POP, 363 | count_deserialize_owned_str, 364 | POPRowOwned, 365 | 20000 366 | ); 367 | bench_serde_borrowed_bytes!( 368 | count_pop_deserialize_borrowed_bytes, 369 | POP, 370 | POPRowBorrowed, 371 | true, 372 | 20000 373 | ); 374 | bench_serde_borrowed_str!( 375 | count_pop_deserialize_borrowed_str, 376 | POP, 377 | POPRowBorrowed, 378 | true, 379 | 20000 380 | ); 381 | bench!(count_pop_iter_bytes, POP, count_iter_bytes, 140007); 382 | bench!(count_pop_iter_str, POP, count_iter_str, 140007); 383 | bench!(count_pop_read_bytes, POP, count_read_bytes, 140007); 384 | bench!(count_pop_read_str, POP, count_read_str, 140007); 385 | bench_serde!( 386 | count_mbta_deserialize_owned_bytes, 387 | count_mbta_serialize_owned_bytes, 388 | MBTA, 389 | count_deserialize_owned_bytes, 390 | MBTARowOwned, 391 | 9999 392 | ); 393 | bench_serde!( 394 | count_mbta_deserialize_owned_str, 395 | count_mbta_serialize_owned_str, 396 | MBTA, 397 | count_deserialize_owned_str, 398 | MBTARowOwned, 399 | 9999 400 | ); 401 | bench_serde_borrowed_bytes!( 402 | count_mbta_deserialize_borrowed_bytes, 403 | MBTA, 404 | MBTARowBorrowed, 405 | true, 406 | 9999 407 | ); 408 | bench_serde_borrowed_str!( 409 | count_mbta_deserialize_borrowed_str, 410 | MBTA, 411 | MBTARowBorrowed, 412 | true, 413 | 9999 414 | ); 415 | bench!(count_mbta_iter_bytes, MBTA, count_iter_bytes, 90000); 416 | bench!(count_mbta_iter_str, MBTA, count_iter_str, 90000); 417 | bench!(count_mbta_read_bytes, MBTA, count_read_bytes, 90000); 418 | bench!(count_mbta_read_str, MBTA, count_read_str, 90000); 419 | 420 | macro_rules! bench_write { 421 | ($name:ident, $data:ident) => { 422 | #[bench] 423 | fn $name(b: &mut Bencher) { 424 | let data = $data.as_bytes(); 425 | b.bytes = data.len() as u64; 426 | let records = collect_records(data); 427 | 428 | b.iter(|| { 429 | let mut wtr = Writer::from_writer(vec![]); 430 | for r in &records { 431 | wtr.write_record(r).unwrap(); 432 | } 433 | assert!(wtr.flush().is_ok()); 434 | }) 435 | } 436 | }; 437 | } 438 | 439 | macro_rules! bench_write_bytes { 440 | ($name:ident, $data:ident) => { 441 | #[bench] 442 | fn $name(b: &mut Bencher) { 443 | let data = $data.as_bytes(); 444 | b.bytes = data.len() as u64; 445 | let records = collect_records(data); 446 | 447 | b.iter(|| { 448 | let mut wtr = Writer::from_writer(vec![]); 449 | for r in &records { 450 | wtr.write_byte_record(r).unwrap(); 451 | } 452 | assert!(wtr.flush().is_ok()); 453 | }) 454 | } 455 | }; 456 | } 457 | 458 | bench_write!(write_nfl_record, NFL); 459 | bench_write_bytes!(write_nfl_bytes, NFL); 460 | 461 | fn count_deserialize_owned_bytes(rdr: &mut Reader) -> u64 462 | where 463 | R: io::Read, 464 | D: DeserializeOwned, 465 | { 466 | let mut count = 0; 467 | let mut rec = ByteRecord::new(); 468 | while rdr.read_byte_record(&mut rec).unwrap() { 469 | let _: D = rec.deserialize(None).unwrap(); 470 | count += 1; 471 | } 472 | count 473 | } 474 | 475 | fn count_deserialize_owned_str(rdr: &mut Reader) -> u64 476 | where 477 | R: io::Read, 478 | D: DeserializeOwned, 479 | { 480 | let mut count = 0; 481 | for rec in rdr.deserialize::() { 482 | let _ = rec.unwrap(); 483 | count += 1; 484 | } 485 | count 486 | } 487 | 488 | fn count_iter_bytes(rdr: &mut Reader) -> u64 { 489 | let mut count = 0; 490 | for rec in rdr.byte_records() { 491 | count += rec.unwrap().len() as u64; 492 | } 493 | count 494 | } 495 | 496 | fn count_iter_str(rdr: &mut Reader) -> u64 { 497 | let mut count = 0; 498 | for rec in rdr.records() { 499 | count += rec.unwrap().len() as u64; 500 | } 501 | count 502 | } 503 | 504 | fn count_read_bytes(rdr: &mut Reader) -> u64 { 505 | let mut count = 0; 506 | let mut rec = ByteRecord::new(); 507 | while rdr.read_byte_record(&mut rec).unwrap() { 508 | count += rec.len() as u64; 509 | } 510 | count 511 | } 512 | 513 | fn count_read_str(rdr: &mut Reader) -> u64 { 514 | let mut count = 0; 515 | let mut rec = StringRecord::new(); 516 | while rdr.read_record(&mut rec).unwrap() { 517 | count += rec.len() as u64; 518 | } 519 | count 520 | } 521 | 522 | fn collect_records(data: &[u8]) -> Vec { 523 | let mut rdr = ReaderBuilder::new().has_headers(false).from_reader(data); 524 | rdr.byte_records().collect::, _>>().unwrap() 525 | } 526 | -------------------------------------------------------------------------------- /tests/tests.rs: -------------------------------------------------------------------------------- 1 | #![allow(dead_code)] 2 | 3 | use csv::Reader; 4 | 5 | use std::env; 6 | use std::io::{self, Read, Write}; 7 | use std::path::PathBuf; 8 | use std::process::{self, Command}; 9 | 10 | static STRANGE: &str = include_str!("../examples/data/strange.csv"); 11 | static USPOP: &str = include_str!("../examples/data/uspop.csv"); 12 | static USPOP_NULL: &str = include_str!("../examples/data/uspop-null.csv"); 13 | static USPOP_LATIN1: &[u8] = 14 | include_bytes!("../examples/data/uspop-latin1.csv"); 15 | static WORLDPOP: &str = 16 | include_str!("../examples/data/bench/worldcitiespop.csv"); 17 | static SMALLPOP: &str = include_str!("../examples/data/smallpop.csv"); 18 | static SMALLPOP_COLON: &str = 19 | include_str!("../examples/data/smallpop-colon.csv"); 20 | static SMALLPOP_NO_HEADERS: &str = 21 | include_str!("../examples/data/smallpop-no-headers.csv"); 22 | 23 | #[test] 24 | fn cookbook_read_basic() { 25 | let mut cmd = cmd_for_example("cookbook-read-basic"); 26 | let out = cmd_output_with(&mut cmd, SMALLPOP.as_bytes()); 27 | assert_eq!(out.stdout().lines().count(), 10); 28 | } 29 | 30 | #[test] 31 | fn cookbook_read_serde() { 32 | let mut cmd = cmd_for_example("cookbook-read-serde"); 33 | let out = cmd_output_with(&mut cmd, SMALLPOP.as_bytes()); 34 | assert_eq!(out.stdout().lines().count(), 10); 35 | } 36 | 37 | #[test] 38 | fn cookbook_read_colon() { 39 | let mut cmd = cmd_for_example("cookbook-read-colon"); 40 | let out = cmd_output_with(&mut cmd, SMALLPOP_COLON.as_bytes()); 41 | assert_eq!(out.stdout().lines().count(), 10); 42 | } 43 | 44 | #[test] 45 | fn cookbook_read_no_headers() { 46 | let mut cmd = cmd_for_example("cookbook-read-no-headers"); 47 | let out = cmd_output_with(&mut cmd, SMALLPOP_NO_HEADERS.as_bytes()); 48 | assert_eq!(out.stdout().lines().count(), 10); 49 | } 50 | 51 | #[test] 52 | fn cookbook_write_basic() { 53 | let mut cmd = cmd_for_example("cookbook-write-basic"); 54 | let out = cmd_output(&mut cmd); 55 | assert_eq!(out.stdout().lines().count(), 3); 56 | } 57 | 58 | #[test] 59 | fn cookbook_write_serde() { 60 | let mut cmd = cmd_for_example("cookbook-write-serde"); 61 | let out = cmd_output(&mut cmd); 62 | assert_eq!(out.stdout().lines().count(), 3); 63 | } 64 | 65 | #[test] 66 | fn tutorial_setup_01() { 67 | let mut cmd = cmd_for_example("tutorial-setup-01"); 68 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 69 | assert_eq!(out.stdout().lines().count(), 100); 70 | } 71 | 72 | #[test] 73 | fn tutorial_error_01() { 74 | let mut cmd = cmd_for_example("tutorial-error-01"); 75 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 76 | assert_eq!(out.stdout().lines().count(), 100); 77 | } 78 | 79 | #[test] 80 | fn tutorial_error_01_errored() { 81 | let data = "\ 82 | header1,header2 83 | foo,bar 84 | quux,baz,foobar 85 | "; 86 | let mut cmd = cmd_for_example("tutorial-error-01"); 87 | let out = cmd_output_with(&mut cmd, data.as_bytes()); 88 | assert!(out.stderr().contains("thread 'main' ")); 89 | assert!(out.stderr().contains(" panicked")); 90 | } 91 | 92 | #[test] 93 | fn tutorial_error_02() { 94 | let mut cmd = cmd_for_example("tutorial-error-02"); 95 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 96 | assert_eq!(out.stdout().lines().count(), 100); 97 | } 98 | 99 | #[test] 100 | fn tutorial_error_02_errored() { 101 | let data = "\ 102 | header1,header2 103 | foo,bar 104 | quux,baz,foobar 105 | "; 106 | let mut cmd = cmd_for_example("tutorial-error-02"); 107 | let out = cmd_output_with(&mut cmd, data.as_bytes()); 108 | assert!(out.stdout_failed().contains("error reading CSV from ")); 109 | } 110 | 111 | #[test] 112 | fn tutorial_error_03() { 113 | let mut cmd = cmd_for_example("tutorial-error-03"); 114 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 115 | assert_eq!(out.stdout().lines().count(), 100); 116 | } 117 | 118 | #[test] 119 | fn tutorial_error_03_errored() { 120 | let data = "\ 121 | header1,header2 122 | foo,bar 123 | quux,baz,foobar 124 | "; 125 | let mut cmd = cmd_for_example("tutorial-error-03"); 126 | let out = cmd_output_with(&mut cmd, data.as_bytes()); 127 | assert!(out.stdout_failed().contains("CSV error:")); 128 | } 129 | 130 | #[test] 131 | fn tutorial_error_04() { 132 | let mut cmd = cmd_for_example("tutorial-error-04"); 133 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 134 | assert_eq!(out.stdout().lines().count(), 100); 135 | } 136 | 137 | #[test] 138 | fn tutorial_error_04_errored() { 139 | let data = "\ 140 | header1,header2 141 | foo,bar 142 | quux,baz,foobar 143 | "; 144 | let mut cmd = cmd_for_example("tutorial-error-04"); 145 | let out = cmd_output_with(&mut cmd, data.as_bytes()); 146 | assert!(out.stdout_failed().contains("CSV error:")); 147 | } 148 | 149 | #[test] 150 | fn tutorial_read_01() { 151 | let mut cmd = cmd_for_example("tutorial-read-01"); 152 | cmd.arg(data_dir().join("uspop.csv")); 153 | let out = cmd_output(&mut cmd); 154 | assert_eq!(out.stdout().lines().count(), 100); 155 | } 156 | 157 | #[test] 158 | fn tutorial_read_headers_01() { 159 | let mut cmd = cmd_for_example("tutorial-read-headers-01"); 160 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 161 | assert_eq!(out.stdout().lines().count(), 101); 162 | } 163 | 164 | #[test] 165 | fn tutorial_read_headers_02() { 166 | let mut cmd = cmd_for_example("tutorial-read-headers-02"); 167 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 168 | assert_eq!(out.stdout().lines().count(), 102); 169 | } 170 | 171 | #[test] 172 | fn tutorial_read_delimiter_01() { 173 | let mut cmd = cmd_for_example("tutorial-read-delimiter-01"); 174 | let out = cmd_output_with(&mut cmd, STRANGE.as_bytes()); 175 | assert_eq!(out.stdout().lines().count(), 6); 176 | } 177 | 178 | #[test] 179 | fn tutorial_read_serde_01() { 180 | let mut cmd = cmd_for_example("tutorial-read-serde-01"); 181 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 182 | assert_eq!(out.stdout().lines().count(), 100); 183 | assert!(out.stdout().lines().all(|x| x.contains("pop:"))); 184 | } 185 | 186 | #[test] 187 | fn tutorial_read_serde_02() { 188 | let mut cmd = cmd_for_example("tutorial-read-serde-02"); 189 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 190 | assert_eq!(out.stdout().lines().count(), 100); 191 | assert!(out.stdout().lines().all(|x| x.starts_with("("))); 192 | } 193 | 194 | #[test] 195 | fn tutorial_read_serde_03() { 196 | let mut cmd = cmd_for_example("tutorial-read-serde-03"); 197 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 198 | assert_eq!(out.stdout().lines().count(), 100); 199 | assert!(out.stdout().lines().all(|x| x.contains("\"City\":"))); 200 | } 201 | 202 | #[test] 203 | fn tutorial_read_serde_04() { 204 | let mut cmd = cmd_for_example("tutorial-read-serde-04"); 205 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 206 | assert_eq!(out.stdout().lines().count(), 100); 207 | assert!(out.stdout().lines().all(|x| x.starts_with("Record { latitude:"))); 208 | } 209 | 210 | #[test] 211 | fn tutorial_read_serde_05_invalid() { 212 | let mut cmd = cmd_for_example("tutorial-read-serde-invalid-01"); 213 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 214 | assert_eq!(out.stdout().lines().count(), 100); 215 | assert!(out.stdout().lines().all(|x| x.starts_with("Record { latitude:"))); 216 | } 217 | 218 | #[test] 219 | fn tutorial_read_serde_05_invalid_errored() { 220 | let mut cmd = cmd_for_example("tutorial-read-serde-invalid-01"); 221 | let out = cmd_output_with(&mut cmd, USPOP_NULL.as_bytes()); 222 | assert!(out.stdout_failed().contains("CSV deserialize error:")); 223 | } 224 | 225 | #[test] 226 | fn tutorial_read_serde_invalid_06() { 227 | let mut cmd = cmd_for_example("tutorial-read-serde-invalid-02"); 228 | let out = cmd_output_with(&mut cmd, USPOP_NULL.as_bytes()); 229 | assert_eq!(out.stdout().lines().count(), 100); 230 | assert!(out.stdout().lines().all(|x| x.starts_with("Record { latitude:"))); 231 | } 232 | 233 | #[test] 234 | fn tutorial_write_01() { 235 | let mut cmd = cmd_for_example("tutorial-write-01"); 236 | let out = cmd_output(&mut cmd); 237 | assert_eq!(out.stdout().lines().count(), 4); 238 | } 239 | 240 | #[test] 241 | fn tutorial_write_delimiter_01() { 242 | let mut cmd = cmd_for_example("tutorial-write-delimiter-01"); 243 | let out = cmd_output(&mut cmd); 244 | assert_eq!(out.stdout().lines().count(), 4); 245 | assert!(out.stdout().lines().all(|x| x.contains('\t'))); 246 | } 247 | 248 | #[test] 249 | fn tutorial_write_serde_01() { 250 | let mut cmd = cmd_for_example("tutorial-write-serde-01"); 251 | let out = cmd_output(&mut cmd); 252 | assert_eq!(out.stdout().lines().count(), 4); 253 | } 254 | 255 | #[test] 256 | fn tutorial_write_serde_02() { 257 | let mut cmd = cmd_for_example("tutorial-write-serde-02"); 258 | let out = cmd_output(&mut cmd); 259 | assert_eq!(out.stdout().lines().count(), 4); 260 | } 261 | 262 | #[test] 263 | fn tutorial_pipeline_search_01() { 264 | let mut cmd = cmd_for_example("tutorial-pipeline-search-01"); 265 | cmd.arg("MA"); 266 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 267 | assert_eq!(out.stdout().lines().count(), 2); 268 | } 269 | 270 | #[test] 271 | fn tutorial_pipeline_search_01_errored() { 272 | let mut cmd = cmd_for_example("tutorial-pipeline-search-01"); 273 | cmd.arg("MA"); 274 | let out = cmd_output_with(&mut cmd, USPOP_LATIN1); 275 | assert!(out.stdout_failed().contains("invalid utf-8")); 276 | } 277 | 278 | #[test] 279 | fn tutorial_pipeline_search_02() { 280 | let mut cmd = cmd_for_example("tutorial-pipeline-search-02"); 281 | cmd.arg("MA"); 282 | let out = cmd_output_with(&mut cmd, USPOP_LATIN1); 283 | assert_eq!(out.stdout().lines().count(), 2); 284 | } 285 | 286 | #[test] 287 | fn tutorial_pipeline_pop_01() { 288 | let mut cmd = cmd_for_example("tutorial-pipeline-pop-01"); 289 | cmd.arg("100000"); 290 | let out = cmd_output_with(&mut cmd, USPOP.as_bytes()); 291 | assert_eq!(out.stdout().lines().count(), 4); 292 | } 293 | 294 | #[test] 295 | fn tutorial_perf_alloc_01() { 296 | let mut cmd = cmd_for_example("tutorial-perf-alloc-01"); 297 | let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes()); 298 | assert_eq!(out.stdout(), "11\n"); 299 | } 300 | 301 | #[test] 302 | fn tutorial_perf_alloc_02() { 303 | let mut cmd = cmd_for_example("tutorial-perf-alloc-02"); 304 | let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes()); 305 | assert_eq!(out.stdout(), "11\n"); 306 | } 307 | 308 | #[test] 309 | fn tutorial_perf_alloc_03() { 310 | let mut cmd = cmd_for_example("tutorial-perf-alloc-03"); 311 | let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes()); 312 | assert_eq!(out.stdout(), "11\n"); 313 | } 314 | 315 | #[test] 316 | fn tutorial_perf_serde_01() { 317 | let mut cmd = cmd_for_example("tutorial-perf-serde-01"); 318 | let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes()); 319 | assert_eq!(out.stdout(), "11\n"); 320 | } 321 | 322 | #[test] 323 | fn tutorial_perf_serde_02() { 324 | let mut cmd = cmd_for_example("tutorial-perf-serde-02"); 325 | let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes()); 326 | assert_eq!(out.stdout(), "11\n"); 327 | } 328 | 329 | #[test] 330 | fn tutorial_perf_serde_03() { 331 | let mut cmd = cmd_for_example("tutorial-perf-serde-03"); 332 | let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes()); 333 | assert_eq!(out.stdout(), "11\n"); 334 | } 335 | 336 | #[test] 337 | fn tutorial_perf_core_01() { 338 | let mut cmd = cmd_for_example("tutorial-perf-core-01"); 339 | let out = cmd_output_with(&mut cmd, WORLDPOP.as_bytes()); 340 | assert_eq!(out.stdout(), "11\n"); 341 | } 342 | 343 | #[test] 344 | fn no_infinite_loop_on_io_errors() { 345 | struct FailingRead; 346 | impl Read for FailingRead { 347 | fn read(&mut self, _buf: &mut [u8]) -> io::Result { 348 | Err(io::Error::new(io::ErrorKind::Other, "Broken reader")) 349 | } 350 | } 351 | 352 | let mut record_results = Reader::from_reader(FailingRead).into_records(); 353 | let first_result = record_results.next(); 354 | assert!( 355 | matches!(&first_result, Some(Err(e)) if matches!(e.kind(), csv::ErrorKind::Io(_))) 356 | ); 357 | assert!(record_results.next().is_none()); 358 | } 359 | 360 | // Helper functions follow. 361 | 362 | /// Return the target/debug directory path. 363 | fn debug_dir() -> PathBuf { 364 | env::current_exe() 365 | .expect("test binary path") 366 | .parent() 367 | .expect("test binary directory") 368 | .parent() 369 | .expect("example binary directory") 370 | .to_path_buf() 371 | } 372 | 373 | /// Return the directory containing the example test binaries. 374 | fn example_bin_dir() -> PathBuf { 375 | debug_dir().join("examples") 376 | } 377 | 378 | /// Return the repo root directory path. 379 | fn repo_dir() -> PathBuf { 380 | PathBuf::from(env!("CARGO_MANIFEST_DIR")) 381 | } 382 | 383 | /// Return the directory containing the example data. 384 | fn data_dir() -> PathBuf { 385 | repo_dir().join("examples").join("data") 386 | } 387 | 388 | /// Return a command ready to execute the given example test binary. 389 | /// 390 | /// The command's current directory is set to the repo root. 391 | fn cmd_for_example(name: &str) -> Command { 392 | let mut cmd = Command::new(example_bin_dir().join(name)); 393 | cmd.current_dir(repo_dir()); 394 | cmd 395 | } 396 | 397 | /// Return the (stdout, stderr) of running the command as a string. 398 | /// 399 | /// If the command has a non-zero exit code, then this function panics. 400 | fn cmd_output(cmd: &mut Command) -> Output { 401 | cmd.stdout(process::Stdio::piped()); 402 | cmd.stderr(process::Stdio::piped()); 403 | let child = cmd.spawn().expect("command spawns successfully"); 404 | Output::new(cmd, child) 405 | } 406 | 407 | /// Like cmd_output, but sends the given data as stdin to the given child. 408 | fn cmd_output_with(cmd: &mut Command, data: &[u8]) -> Output { 409 | cmd.stdin(process::Stdio::piped()); 410 | cmd.stdout(process::Stdio::piped()); 411 | cmd.stderr(process::Stdio::piped()); 412 | let mut child = cmd.spawn().expect("command spawns successfully"); 413 | { 414 | let stdin = child.stdin.as_mut().expect("failed to get stdin"); 415 | stdin.write_all(data).expect("failed to write to stdin"); 416 | } 417 | Output::new(cmd, child) 418 | } 419 | 420 | struct Output { 421 | stdout: String, 422 | stderr: String, 423 | command: String, 424 | status: process::ExitStatus, 425 | } 426 | 427 | impl Output { 428 | /// Return the (stdout, stderr) of running the given child as a string. 429 | /// 430 | /// If the command has a non-zero exit code, then this function panics. 431 | fn new(cmd: &mut Command, child: process::Child) -> Output { 432 | let out = child.wait_with_output().expect("command runs successfully"); 433 | let stdout = 434 | String::from_utf8(out.stdout).expect("valid utf-8 (stdout)"); 435 | let stderr = 436 | String::from_utf8(out.stderr).expect("valid utf-8 (stderr)"); 437 | Output { 438 | stdout, 439 | stderr, 440 | command: format!("{:?}", cmd), 441 | status: out.status, 442 | } 443 | } 444 | 445 | fn stdout(&self) -> &str { 446 | if !self.status.success() { 447 | panic!( 448 | "\n\n==== {:?} ====\n\ 449 | command failed but expected success!\ 450 | \n\ncwd: {}\ 451 | \n\nstatus: {}\ 452 | \n\nstdout: {}\ 453 | \n\nstderr: {}\ 454 | \n\n=====\n", 455 | self.command, 456 | repo_dir().display(), 457 | self.status, 458 | self.stdout, 459 | self.stderr 460 | ); 461 | } 462 | &self.stdout 463 | } 464 | 465 | fn stdout_failed(&self) -> &str { 466 | if self.status.success() { 467 | panic!( 468 | "\n\n==== {:?} ====\n\ 469 | command succeeded but expected failure!\ 470 | \n\ncwd: {}\ 471 | \n\nstatus: {}\ 472 | \n\nstdout: {}\ 473 | \n\nstderr: {}\ 474 | \n\n=====\n", 475 | self.command, 476 | repo_dir().display(), 477 | self.status, 478 | self.stdout, 479 | self.stderr 480 | ); 481 | } 482 | &self.stdout 483 | } 484 | 485 | fn stderr(&self) -> &str { 486 | if self.status.success() { 487 | panic!( 488 | "\n\n==== {:?} ====\n\ 489 | command succeeded but expected failure!\ 490 | \n\ncwd: {}\ 491 | \n\nstatus: {}\ 492 | \n\nstdout: {}\ 493 | \n\nstderr: {}\ 494 | \n\n=====\n", 495 | self.command, 496 | repo_dir().display(), 497 | self.status, 498 | self.stdout, 499 | self.stderr 500 | ); 501 | } 502 | &self.stderr 503 | } 504 | } 505 | 506 | /// Consume the reader given into a string. 507 | fn read_to_string(mut rdr: R) -> String { 508 | let mut s = String::new(); 509 | rdr.read_to_string(&mut s).unwrap(); 510 | s 511 | } 512 | -------------------------------------------------------------------------------- /src/string_record.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | fmt, io, 3 | iter::FromIterator, 4 | ops::{self, Range}, 5 | result, str, 6 | }; 7 | 8 | use serde_core::de::Deserialize; 9 | 10 | use crate::{ 11 | byte_record::{ByteRecord, ByteRecordIter, Position}, 12 | deserializer::deserialize_string_record, 13 | error::{Error, ErrorKind, FromUtf8Error, Result}, 14 | reader::Reader, 15 | }; 16 | 17 | /// A single CSV record stored as valid UTF-8 bytes. 18 | /// 19 | /// A string record permits reading or writing CSV rows that are valid UTF-8. 20 | /// If string records are used to read CSV data that is not valid UTF-8, then 21 | /// the CSV reader will return an invalid UTF-8 error. If you do need to read 22 | /// possibly invalid UTF-8 data, then you should prefer using a 23 | /// [`ByteRecord`](struct.ByteRecord.html), 24 | /// since it makes no assumptions about UTF-8. 25 | /// 26 | /// If you are using the Serde (de)serialization APIs, then you probably never 27 | /// need to interact with a `ByteRecord` or a `StringRecord`. However, there 28 | /// are some circumstances in which you might need to use a raw record type 29 | /// while still using Serde. For example, if you need to deserialize possibly 30 | /// invalid UTF-8 fields, then you'll need to first read your record into a 31 | /// `ByteRecord`, and then use `ByteRecord::deserialize` to run Serde. Another 32 | /// reason for using the raw record deserialization APIs is if you're using 33 | /// Serde to read into borrowed data such as a `&'a str` or a `&'a [u8]`. 34 | /// 35 | /// Two `StringRecord`s are compared on the basis of their field data. Any 36 | /// position information associated with the records is ignored. 37 | #[derive(Clone, Eq)] 38 | pub struct StringRecord(ByteRecord); 39 | 40 | impl PartialEq for StringRecord { 41 | fn eq(&self, other: &StringRecord) -> bool { 42 | self.0.iter_eq(&other.0) 43 | } 44 | } 45 | 46 | impl> PartialEq> for StringRecord { 47 | fn eq(&self, other: &Vec) -> bool { 48 | self.0.iter_eq(other) 49 | } 50 | } 51 | 52 | impl> PartialEq> for &StringRecord { 53 | fn eq(&self, other: &Vec) -> bool { 54 | self.0.iter_eq(other) 55 | } 56 | } 57 | 58 | impl> PartialEq<[T]> for StringRecord { 59 | fn eq(&self, other: &[T]) -> bool { 60 | self.0.iter_eq(other) 61 | } 62 | } 63 | 64 | impl> PartialEq<[T]> for &StringRecord { 65 | fn eq(&self, other: &[T]) -> bool { 66 | self.0.iter_eq(other) 67 | } 68 | } 69 | 70 | impl fmt::Debug for StringRecord { 71 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 72 | let fields: Vec<&str> = self.iter().collect(); 73 | write!(f, "StringRecord({:?})", fields) 74 | } 75 | } 76 | 77 | impl Default for StringRecord { 78 | #[inline] 79 | fn default() -> StringRecord { 80 | StringRecord::new() 81 | } 82 | } 83 | 84 | impl StringRecord { 85 | /// Create a new empty `StringRecord`. 86 | /// 87 | /// Note that you may find the `StringRecord::from` constructor more 88 | /// convenient, which is provided by an impl on the `From` trait. 89 | /// 90 | /// # Example: create an empty record 91 | /// 92 | /// ``` 93 | /// use csv::StringRecord; 94 | /// 95 | /// let record = StringRecord::new(); 96 | /// assert_eq!(record.len(), 0); 97 | /// ``` 98 | /// 99 | /// # Example: initialize a record from a `Vec` 100 | /// 101 | /// ``` 102 | /// use csv::StringRecord; 103 | /// 104 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 105 | /// assert_eq!(record.len(), 3); 106 | /// ``` 107 | #[inline] 108 | pub fn new() -> StringRecord { 109 | StringRecord(ByteRecord::new()) 110 | } 111 | 112 | /// Create a new empty `StringRecord` with the given capacity. 113 | /// 114 | /// `buffer` refers to the capacity of the buffer used to store the 115 | /// actual row contents. `fields` refers to the number of fields one 116 | /// might expect to store. 117 | #[inline] 118 | pub fn with_capacity(buffer: usize, fields: usize) -> StringRecord { 119 | StringRecord(ByteRecord::with_capacity(buffer, fields)) 120 | } 121 | 122 | /// Create a new `StringRecord` from a `ByteRecord`. 123 | /// 124 | /// Note that this does UTF-8 validation. If the given `ByteRecord` does 125 | /// not contain valid UTF-8, then this returns an error. The error includes 126 | /// the UTF-8 error and the original `ByteRecord`. 127 | /// 128 | /// # Example: valid UTF-8 129 | /// 130 | /// ``` 131 | /// use std::error::Error; 132 | /// use csv::{ByteRecord, StringRecord}; 133 | /// 134 | /// # fn main() { example().unwrap(); } 135 | /// fn example() -> Result<(), Box> { 136 | /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]); 137 | /// let str_record = StringRecord::from_byte_record(byte_record)?; 138 | /// assert_eq!(str_record.len(), 3); 139 | /// Ok(()) 140 | /// } 141 | /// ``` 142 | /// 143 | /// # Example: invalid UTF-8 144 | /// 145 | /// ``` 146 | /// use csv::{ByteRecord, StringRecord}; 147 | /// 148 | /// let byte_record = ByteRecord::from(vec![ 149 | /// &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..], 150 | /// ]); 151 | /// let err = StringRecord::from_byte_record(byte_record).unwrap_err(); 152 | /// assert_eq!(err.utf8_error().field(), 1); 153 | /// assert_eq!(err.utf8_error().valid_up_to(), 3); 154 | /// ``` 155 | #[inline] 156 | pub fn from_byte_record( 157 | record: ByteRecord, 158 | ) -> result::Result { 159 | match record.validate() { 160 | Ok(()) => Ok(StringRecord(record)), 161 | Err(err) => Err(FromUtf8Error::new(record, err)), 162 | } 163 | } 164 | 165 | /// Lossily create a new `StringRecord` from a `ByteRecord`. 166 | /// 167 | /// This is like `StringRecord::from_byte_record`, except all invalid UTF-8 168 | /// sequences are replaced with the `U+FFFD REPLACEMENT CHARACTER`, which 169 | /// looks like this: �. 170 | /// 171 | /// # Example: valid UTF-8 172 | /// 173 | /// ``` 174 | /// use csv::{ByteRecord, StringRecord}; 175 | /// 176 | /// let byte_record = ByteRecord::from(vec!["a", "b", "c"]); 177 | /// let str_record = StringRecord::from_byte_record_lossy(byte_record); 178 | /// assert_eq!(str_record.len(), 3); 179 | /// ``` 180 | /// 181 | /// # Example: invalid UTF-8 182 | /// 183 | /// ``` 184 | /// use csv::{ByteRecord, StringRecord}; 185 | /// 186 | /// let byte_record = ByteRecord::from(vec![ 187 | /// &b"quux"[..], &b"foo\xFFbar"[..], &b"c"[..], 188 | /// ]); 189 | /// let str_record = StringRecord::from_byte_record_lossy(byte_record); 190 | /// assert_eq!(&str_record[0], "quux"); 191 | /// assert_eq!(&str_record[1], "foo�bar"); 192 | /// assert_eq!(&str_record[2], "c"); 193 | /// ``` 194 | #[inline] 195 | pub fn from_byte_record_lossy(record: ByteRecord) -> StringRecord { 196 | // If the record is valid UTF-8, then take the easy path. 197 | if let Ok(()) = record.validate() { 198 | return StringRecord(record); 199 | } 200 | // TODO: We can be faster here. Not sure if it's worth it. 201 | let mut str_record = 202 | StringRecord::with_capacity(record.as_slice().len(), record.len()); 203 | for field in &record { 204 | str_record.push_field(&String::from_utf8_lossy(field)); 205 | } 206 | str_record 207 | } 208 | 209 | /// Deserialize this record. 210 | /// 211 | /// The `D` type parameter refers to the type that this record should be 212 | /// deserialized into. The `'de` lifetime refers to the lifetime of the 213 | /// `StringRecord`. The `'de` lifetime permits deserializing into structs 214 | /// that borrow field data from this record. 215 | /// 216 | /// An optional `headers` parameter permits deserializing into a struct 217 | /// based on its field names (corresponding to header values) rather than 218 | /// the order in which the fields are defined. 219 | /// 220 | /// # Example: without headers 221 | /// 222 | /// This shows how to deserialize a single row into a struct based on the 223 | /// order in which fields occur. This example also shows how to borrow 224 | /// fields from the `StringRecord`, which results in zero allocation 225 | /// deserialization. 226 | /// 227 | /// ``` 228 | /// use std::error::Error; 229 | /// 230 | /// use csv::StringRecord; 231 | /// 232 | /// #[derive(serde::Deserialize)] 233 | /// struct Row<'a> { 234 | /// city: &'a str, 235 | /// country: &'a str, 236 | /// population: u64, 237 | /// } 238 | /// 239 | /// # fn main() { example().unwrap() } 240 | /// fn example() -> Result<(), Box> { 241 | /// let record = StringRecord::from(vec![ 242 | /// "Boston", "United States", "4628910", 243 | /// ]); 244 | /// 245 | /// let row: Row = record.deserialize(None)?; 246 | /// assert_eq!(row.city, "Boston"); 247 | /// assert_eq!(row.country, "United States"); 248 | /// assert_eq!(row.population, 4628910); 249 | /// Ok(()) 250 | /// } 251 | /// ``` 252 | /// 253 | /// # Example: with headers 254 | /// 255 | /// This example is like the previous one, but shows how to deserialize 256 | /// into a struct based on the struct's field names. For this to work, 257 | /// you must provide a header row. 258 | /// 259 | /// This example also shows that you can deserialize into owned data 260 | /// types (e.g., `String`) instead of borrowed data types (e.g., `&str`). 261 | /// 262 | /// ``` 263 | /// use std::error::Error; 264 | /// 265 | /// use csv::StringRecord; 266 | /// 267 | /// #[derive(serde::Deserialize)] 268 | /// struct Row { 269 | /// city: String, 270 | /// country: String, 271 | /// population: u64, 272 | /// } 273 | /// 274 | /// # fn main() { example().unwrap() } 275 | /// fn example() -> Result<(), Box> { 276 | /// // Notice that the fields are not in the same order 277 | /// // as the fields in the struct! 278 | /// let header = StringRecord::from(vec![ 279 | /// "country", "city", "population", 280 | /// ]); 281 | /// let record = StringRecord::from(vec![ 282 | /// "United States", "Boston", "4628910", 283 | /// ]); 284 | /// 285 | /// let row: Row = record.deserialize(Some(&header))?; 286 | /// assert_eq!(row.city, "Boston"); 287 | /// assert_eq!(row.country, "United States"); 288 | /// assert_eq!(row.population, 4628910); 289 | /// Ok(()) 290 | /// } 291 | /// ``` 292 | pub fn deserialize<'de, D: Deserialize<'de>>( 293 | &'de self, 294 | headers: Option<&'de StringRecord>, 295 | ) -> Result { 296 | deserialize_string_record(self, headers) 297 | } 298 | 299 | /// Returns an iterator over all fields in this record. 300 | /// 301 | /// # Example 302 | /// 303 | /// This example shows how to iterate over each field in a `StringRecord`. 304 | /// 305 | /// ``` 306 | /// use csv::StringRecord; 307 | /// 308 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 309 | /// for field in record.iter() { 310 | /// assert!(field == "a" || field == "b" || field == "c"); 311 | /// } 312 | /// ``` 313 | #[inline] 314 | pub fn iter(&self) -> StringRecordIter<'_> { 315 | self.into_iter() 316 | } 317 | 318 | /// Return the field at index `i`. 319 | /// 320 | /// If no field at index `i` exists, then this returns `None`. 321 | /// 322 | /// # Example 323 | /// 324 | /// ``` 325 | /// use csv::StringRecord; 326 | /// 327 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 328 | /// assert_eq!(record.get(1), Some("b")); 329 | /// assert_eq!(record.get(3), None); 330 | /// ``` 331 | #[inline] 332 | pub fn get(&self, i: usize) -> Option<&str> { 333 | self.0.get(i).map(|bytes| { 334 | debug_assert!(str::from_utf8(bytes).is_ok()); 335 | // This is safe because we guarantee that all string records 336 | // have a valid UTF-8 buffer. It's also safe because we 337 | // individually check each field for valid UTF-8. 338 | unsafe { str::from_utf8_unchecked(bytes) } 339 | }) 340 | } 341 | 342 | /// Returns true if and only if this record is empty. 343 | /// 344 | /// # Example 345 | /// 346 | /// ``` 347 | /// use csv::StringRecord; 348 | /// 349 | /// assert!(StringRecord::new().is_empty()); 350 | /// ``` 351 | #[inline] 352 | pub fn is_empty(&self) -> bool { 353 | self.len() == 0 354 | } 355 | 356 | /// Returns the number of fields in this record. 357 | /// 358 | /// # Example 359 | /// 360 | /// ``` 361 | /// use csv::StringRecord; 362 | /// 363 | /// let record = StringRecord::from(vec!["a", "b", "c"]); 364 | /// assert_eq!(record.len(), 3); 365 | /// ``` 366 | #[inline] 367 | pub fn len(&self) -> usize { 368 | self.0.len() 369 | } 370 | 371 | /// Truncate this record to `n` fields. 372 | /// 373 | /// If `n` is greater than the number of fields in this record, then this 374 | /// has no effect. 375 | /// 376 | /// # Example 377 | /// 378 | /// ``` 379 | /// use csv::StringRecord; 380 | /// 381 | /// let mut record = StringRecord::from(vec!["a", "b", "c"]); 382 | /// assert_eq!(record.len(), 3); 383 | /// record.truncate(1); 384 | /// assert_eq!(record.len(), 1); 385 | /// assert_eq!(record, vec!["a"]); 386 | /// ``` 387 | #[inline] 388 | pub fn truncate(&mut self, n: usize) { 389 | self.0.truncate(n); 390 | } 391 | 392 | /// Clear this record so that it has zero fields. 393 | /// 394 | /// Note that it is not necessary to clear the record to reuse it with 395 | /// the CSV reader. 396 | /// 397 | /// # Example 398 | /// 399 | /// ``` 400 | /// use csv::StringRecord; 401 | /// 402 | /// let mut record = StringRecord::from(vec!["a", "b", "c"]); 403 | /// assert_eq!(record.len(), 3); 404 | /// record.clear(); 405 | /// assert_eq!(record.len(), 0); 406 | /// ``` 407 | #[inline] 408 | pub fn clear(&mut self) { 409 | self.0.clear(); 410 | } 411 | 412 | /// Trim the fields of this record so that leading and trailing whitespace 413 | /// is removed. 414 | /// 415 | /// This method uses the Unicode definition of whitespace. 416 | /// 417 | /// # Example 418 | /// 419 | /// ``` 420 | /// use csv::StringRecord; 421 | /// 422 | /// let mut record = StringRecord::from(vec![ 423 | /// " ", "\u{3000}\tfoo ", "bar ", "b a z", 424 | /// ]); 425 | /// record.trim(); 426 | /// assert_eq!(record, vec!["", "foo", "bar", "b a z"]); 427 | /// ``` 428 | pub fn trim(&mut self) { 429 | let length = self.len(); 430 | if length == 0 { 431 | return; 432 | } 433 | // TODO: We could likely do this in place, but for now, we allocate. 434 | let mut trimmed = 435 | StringRecord::with_capacity(self.as_slice().len(), self.len()); 436 | trimmed.set_position(self.position().cloned()); 437 | for field in &*self { 438 | trimmed.push_field(field.trim()); 439 | } 440 | *self = trimmed; 441 | } 442 | 443 | /// Add a new field to this record. 444 | /// 445 | /// # Example 446 | /// 447 | /// ``` 448 | /// use csv::StringRecord; 449 | /// 450 | /// let mut record = StringRecord::new(); 451 | /// record.push_field("foo"); 452 | /// assert_eq!(&record[0], "foo"); 453 | /// ``` 454 | #[inline] 455 | pub fn push_field(&mut self, field: &str) { 456 | self.0.push_field(field.as_bytes()); 457 | } 458 | 459 | /// Return the position of this record, if available. 460 | /// 461 | /// # Example 462 | /// 463 | /// ``` 464 | /// use std::error::Error; 465 | /// use csv::{StringRecord, ReaderBuilder}; 466 | /// 467 | /// # fn main() { example().unwrap(); } 468 | /// fn example() -> Result<(), Box> { 469 | /// let mut record = StringRecord::new(); 470 | /// let mut rdr = ReaderBuilder::new() 471 | /// .has_headers(false) 472 | /// .from_reader("a,b,c\nx,y,z".as_bytes()); 473 | /// 474 | /// assert!(rdr.read_record(&mut record)?); 475 | /// { 476 | /// let pos = record.position().expect("a record position"); 477 | /// assert_eq!(pos.byte(), 0); 478 | /// assert_eq!(pos.line(), 1); 479 | /// assert_eq!(pos.record(), 0); 480 | /// } 481 | /// 482 | /// assert!(rdr.read_record(&mut record)?); 483 | /// { 484 | /// let pos = record.position().expect("a record position"); 485 | /// assert_eq!(pos.byte(), 6); 486 | /// assert_eq!(pos.line(), 2); 487 | /// assert_eq!(pos.record(), 1); 488 | /// } 489 | /// 490 | /// // Finish the CSV reader for good measure. 491 | /// assert!(!rdr.read_record(&mut record)?); 492 | /// Ok(()) 493 | /// } 494 | /// ``` 495 | #[inline] 496 | pub fn position(&self) -> Option<&Position> { 497 | self.0.position() 498 | } 499 | 500 | /// Set the position of this record. 501 | /// 502 | /// # Example 503 | /// 504 | /// ``` 505 | /// use csv::{StringRecord, Position}; 506 | /// 507 | /// let mut record = StringRecord::from(vec!["a", "b", "c"]); 508 | /// let mut pos = Position::new(); 509 | /// pos.set_byte(100); 510 | /// pos.set_line(4); 511 | /// pos.set_record(2); 512 | /// 513 | /// record.set_position(Some(pos.clone())); 514 | /// assert_eq!(record.position(), Some(&pos)); 515 | /// ``` 516 | #[inline] 517 | pub fn set_position(&mut self, pos: Option) { 518 | self.0.set_position(pos); 519 | } 520 | 521 | /// Return the start and end position of a field in this record. 522 | /// 523 | /// If no such field exists at the given index, then return `None`. 524 | /// 525 | /// The range returned can be used with the slice returned by `as_slice`. 526 | /// Namely, the range returned is guaranteed to start and end at valid 527 | /// UTF-8 sequence boundaries. 528 | /// 529 | /// # Example 530 | /// 531 | /// ``` 532 | /// use csv::StringRecord; 533 | /// 534 | /// let record = StringRecord::from(vec!["foo", "quux", "z"]); 535 | /// let range = record.range(1).expect("a record range"); 536 | /// assert_eq!(&record.as_slice()[range], "quux"); 537 | /// ``` 538 | #[inline] 539 | pub fn range(&self, i: usize) -> Option> { 540 | self.0.range(i) 541 | } 542 | 543 | /// Return the entire row as a single string slice. The slice returned 544 | /// stores all fields contiguously. The boundaries of each field can be 545 | /// determined via the `range` method. 546 | /// 547 | /// # Example 548 | /// 549 | /// ``` 550 | /// use csv::StringRecord; 551 | /// 552 | /// let record = StringRecord::from(vec!["foo", "quux", "z"]); 553 | /// assert_eq!(record.as_slice(), "fooquuxz"); 554 | /// ``` 555 | #[inline] 556 | pub fn as_slice(&self) -> &str { 557 | debug_assert!(str::from_utf8(self.0.as_slice()).is_ok()); 558 | // This is safe because we guarantee that each field is valid UTF-8. 559 | // If each field is valid UTF-8, then the entire buffer (up to the end 560 | // of the last field) must also be valid UTF-8. 561 | unsafe { str::from_utf8_unchecked(self.0.as_slice()) } 562 | } 563 | 564 | /// Return a reference to this record's raw 565 | /// [`ByteRecord`](struct.ByteRecord.html). 566 | /// 567 | /// # Example 568 | /// 569 | /// ``` 570 | /// use csv::StringRecord; 571 | /// 572 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 573 | /// let byte_record = str_record.as_byte_record(); 574 | /// assert_eq!(&byte_record[2], b"c"); 575 | /// ``` 576 | #[inline] 577 | pub fn as_byte_record(&self) -> &ByteRecord { 578 | &self.0 579 | } 580 | 581 | /// Convert this `StringRecord` into a 582 | /// [`ByteRecord`](struct.ByteRecord.html). 583 | /// 584 | /// # Example 585 | /// 586 | /// ``` 587 | /// use csv::StringRecord; 588 | /// 589 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 590 | /// let byte_record = str_record.into_byte_record(); 591 | /// assert_eq!(&byte_record[2], b"c"); 592 | /// ``` 593 | /// 594 | /// Note that this can also be achieved using the `From` impl: 595 | /// 596 | /// ``` 597 | /// use csv::{ByteRecord, StringRecord}; 598 | /// 599 | /// // Using ByteRecord::from... 600 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 601 | /// assert_eq!(ByteRecord::from(str_record).len(), 3); 602 | /// 603 | /// // Using StringRecord::into... 604 | /// let str_record = StringRecord::from(vec!["a", "b", "c"]); 605 | /// let byte_record: ByteRecord = str_record.into(); 606 | /// assert_eq!(byte_record.len(), 3); 607 | /// ``` 608 | #[inline] 609 | pub fn into_byte_record(self) -> ByteRecord { 610 | self.0 611 | } 612 | 613 | /// Clone this record, but only copy `fields` up to the end of bounds. This 614 | /// is useful when one wants to copy a record, but not necessarily any 615 | /// excess capacity in that record. 616 | #[inline] 617 | pub(crate) fn clone_truncated(&self) -> StringRecord { 618 | StringRecord(self.0.clone_truncated()) 619 | } 620 | 621 | /// A safe function for reading CSV data into a `StringRecord`. 622 | /// 623 | /// This relies on the internal representation of `StringRecord`. 624 | #[inline(always)] 625 | pub(crate) fn read( 626 | &mut self, 627 | rdr: &mut Reader, 628 | ) -> Result { 629 | // SAFETY: This code is critical to upholding the safety of other code 630 | // blocks in this module. Namely, after calling `read_byte_record`, 631 | // it is possible for `record` to contain invalid UTF-8. We check for 632 | // this in the `validate` method, and if it does have invalid UTF-8, we 633 | // clear the record. (It is bad for `record` to contain invalid UTF-8 634 | // because other accessor methods, like `get`, assume that every field 635 | // is valid UTF-8.) 636 | let pos = rdr.position().clone(); 637 | let read_res = rdr.read_byte_record(&mut self.0); 638 | let utf8_res = match self.0.validate() { 639 | Ok(()) => Ok(()), 640 | Err(err) => { 641 | // If this record isn't valid UTF-8, then completely wipe it. 642 | self.0.clear(); 643 | Err(err) 644 | } 645 | }; 646 | match (read_res, utf8_res) { 647 | (Err(err), _) => Err(err), 648 | (Ok(_), Err(err)) => { 649 | Err(Error::new(ErrorKind::Utf8 { pos: Some(pos), err })) 650 | } 651 | (Ok(eof), Ok(())) => Ok(eof), 652 | } 653 | } 654 | } 655 | 656 | impl ops::Index for StringRecord { 657 | type Output = str; 658 | #[inline] 659 | fn index(&self, i: usize) -> &str { 660 | self.get(i).unwrap() 661 | } 662 | } 663 | 664 | impl> From> for StringRecord { 665 | #[inline] 666 | fn from(xs: Vec) -> StringRecord { 667 | StringRecord::from_iter(xs) 668 | } 669 | } 670 | 671 | impl<'a, T: AsRef> From<&'a [T]> for StringRecord { 672 | #[inline] 673 | fn from(xs: &'a [T]) -> StringRecord { 674 | StringRecord::from_iter(xs) 675 | } 676 | } 677 | 678 | impl> FromIterator for StringRecord { 679 | #[inline] 680 | fn from_iter>(iter: I) -> StringRecord { 681 | let mut record = StringRecord::new(); 682 | record.extend(iter); 683 | record 684 | } 685 | } 686 | 687 | impl> Extend for StringRecord { 688 | #[inline] 689 | fn extend>(&mut self, iter: I) { 690 | for x in iter { 691 | self.push_field(x.as_ref()); 692 | } 693 | } 694 | } 695 | 696 | impl<'a> IntoIterator for &'a StringRecord { 697 | type IntoIter = StringRecordIter<'a>; 698 | type Item = &'a str; 699 | 700 | #[inline] 701 | fn into_iter(self) -> StringRecordIter<'a> { 702 | StringRecordIter(self.0.iter()) 703 | } 704 | } 705 | 706 | /// An iterator over the fields in a string record. 707 | /// 708 | /// The `'r` lifetime variable refers to the lifetime of the `StringRecord` 709 | /// that is being iterated over. 710 | #[derive(Clone)] 711 | pub struct StringRecordIter<'r>(ByteRecordIter<'r>); 712 | 713 | impl<'r> Iterator for StringRecordIter<'r> { 714 | type Item = &'r str; 715 | 716 | #[inline] 717 | fn next(&mut self) -> Option<&'r str> { 718 | self.0.next().map(|bytes| { 719 | debug_assert!(str::from_utf8(bytes).is_ok()); 720 | // See StringRecord::get for safety argument. 721 | unsafe { str::from_utf8_unchecked(bytes) } 722 | }) 723 | } 724 | 725 | #[inline] 726 | fn size_hint(&self) -> (usize, Option) { 727 | self.0.size_hint() 728 | } 729 | 730 | #[inline] 731 | fn count(self) -> usize { 732 | self.0.len() 733 | } 734 | } 735 | 736 | impl<'r> DoubleEndedIterator for StringRecordIter<'r> { 737 | #[inline] 738 | fn next_back(&mut self) -> Option<&'r str> { 739 | self.0.next_back().map(|bytes| { 740 | debug_assert!(str::from_utf8(bytes).is_ok()); 741 | // See StringRecord::get for safety argument. 742 | unsafe { str::from_utf8_unchecked(bytes) } 743 | }) 744 | } 745 | } 746 | 747 | #[cfg(test)] 748 | mod tests { 749 | use crate::string_record::StringRecord; 750 | 751 | #[test] 752 | fn trim_front() { 753 | let mut rec = StringRecord::from(vec![" abc"]); 754 | rec.trim(); 755 | assert_eq!(rec.get(0), Some("abc")); 756 | 757 | let mut rec = StringRecord::from(vec![" abc", " xyz"]); 758 | rec.trim(); 759 | assert_eq!(rec.get(0), Some("abc")); 760 | assert_eq!(rec.get(1), Some("xyz")); 761 | } 762 | 763 | #[test] 764 | fn trim_back() { 765 | let mut rec = StringRecord::from(vec!["abc "]); 766 | rec.trim(); 767 | assert_eq!(rec.get(0), Some("abc")); 768 | 769 | let mut rec = StringRecord::from(vec!["abc ", "xyz "]); 770 | rec.trim(); 771 | assert_eq!(rec.get(0), Some("abc")); 772 | assert_eq!(rec.get(1), Some("xyz")); 773 | } 774 | 775 | #[test] 776 | fn trim_both() { 777 | let mut rec = StringRecord::from(vec![" abc "]); 778 | rec.trim(); 779 | assert_eq!(rec.get(0), Some("abc")); 780 | 781 | let mut rec = StringRecord::from(vec![" abc ", " xyz "]); 782 | rec.trim(); 783 | assert_eq!(rec.get(0), Some("abc")); 784 | assert_eq!(rec.get(1), Some("xyz")); 785 | } 786 | 787 | #[test] 788 | fn trim_does_not_panic_on_empty_records_1() { 789 | let mut rec = StringRecord::from(vec![""]); 790 | rec.trim(); 791 | assert_eq!(rec.get(0), Some("")); 792 | } 793 | 794 | #[test] 795 | fn trim_does_not_panic_on_empty_records_2() { 796 | let mut rec = StringRecord::from(vec!["", ""]); 797 | rec.trim(); 798 | assert_eq!(rec.get(0), Some("")); 799 | assert_eq!(rec.get(1), Some("")); 800 | } 801 | 802 | #[test] 803 | fn trim_does_not_panic_on_empty_records_3() { 804 | let mut rec = StringRecord::new(); 805 | rec.trim(); 806 | assert_eq!(rec.as_slice().len(), 0); 807 | } 808 | 809 | #[test] 810 | fn trim_whitespace_only() { 811 | let mut rec = StringRecord::from(vec![ 812 | "\u{0009}\u{000A}\u{000B}\u{000C}\u{000D}\u{0020}\u{0085}\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}", 813 | ]); 814 | rec.trim(); 815 | assert_eq!(rec.get(0), Some("")); 816 | } 817 | 818 | // Check that record equality respects field boundaries. 819 | // 820 | // Regression test for #138. 821 | #[test] 822 | fn eq_field_boundaries() { 823 | let test1 = StringRecord::from(vec!["12", "34"]); 824 | let test2 = StringRecord::from(vec!["123", "4"]); 825 | 826 | assert_ne!(test1, test2); 827 | } 828 | 829 | // Check that record equality respects number of fields. 830 | // 831 | // Regression test for #138. 832 | #[test] 833 | fn eq_record_len() { 834 | let test1 = StringRecord::from(vec!["12", "34", "56"]); 835 | let test2 = StringRecord::from(vec!["12", "34"]); 836 | assert_ne!(test1, test2); 837 | } 838 | } 839 | --------------------------------------------------------------------------------