├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── src └── main.rs └── test.csv /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | *~ 13 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pivot" 3 | version = "0.1.0" 4 | authors = ["John Graham-Cumming "] 5 | 6 | [dependencies] 7 | csv = "1.0.0-beta.5" 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2017, John Graham-Cumming 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pivot 2 | 3 | Small program that creates very simplistic CSV-based 'pivot tables' on the command-line 4 | 5 | # Example 6 | 7 | Take for example the following CSV file: 8 | 9 | a,1,2 10 | a,42,11 11 | b,2,3 12 | b,13,12 13 | c,3,4 14 | 15 | It can be useful to summarize that data by grouping information based 16 | on the first (0) column. For example, suppose that we want to group by 17 | column 0 and output the sum of the other two columns. This can be achieved as follows: 18 | 19 | $ pivot 0 sum:1 sum:2 < test.csv 20 | a,43,13, 21 | b,15,15, 22 | c,3,4, 23 | 24 | A single column of data can be referenced more than once, so to find the max and min in 25 | column 1 do the following: 26 | 27 | $ pivot 0 max:1 min:2 < test.csv 28 | a,42,2, 29 | b,13,3, 30 | c,3,4, 31 | 32 | # Rust 33 | 34 | This is the first program I've ever written in Rust. I chose this particular task because 35 | it was relatively small and something that I wanted to have. I would be grateful for 36 | constructive criticism of my (lack of) Rust knowledge. 37 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | // pivot -- summarize and sort data from delimited text files 2 | // 3 | // Usage: 4 | // pivot 0 sum:1 avg:3 5 | // ^ ^ ^ 6 | // ^ ^ ^ 7 | // ^ ^ Average values in column 3 of CSV 8 | // ^ ^ 9 | // ^ Sum values in column one of CSV 10 | // ^ 11 | // Column to pivot on (i.e. summarize) 12 | // 13 | // The operators are sum, avg, max, min 14 | // 15 | // Copyright (c) 2017 John Graham-Cumming 16 | 17 | extern crate csv; 18 | 19 | use csv::ReaderBuilder; 20 | use std::env; 21 | use std::io; 22 | use std::collections::HashMap; 23 | use std::process; 24 | use std::str; 25 | 26 | // Val is used to accumulate values from a single column of the CSV 27 | struct Val { 28 | sum: i64, 29 | count: i64, 30 | max: i64, 31 | min: i64 32 | } 33 | 34 | // A Row is all the values accumulated as specified by the command-line 35 | type Row = Vec; 36 | 37 | // Pivot is the pivot table mapping some summarized value to a Row 38 | type Pivot = HashMap; 39 | 40 | // Op are the possible operations on a single column of the CSV 41 | enum Op { 42 | Sum, 43 | Max, 44 | Min, 45 | Avg, 46 | } 47 | 48 | // ColOp keeps track of which operation to apply to which CSV column 49 | struct ColOp { 50 | col: usize, 51 | op: Op 52 | } 53 | 54 | // Columns is the list of all CSV columns on which to perform operations and their 55 | // corresponding operations 56 | type Columns = Vec; 57 | 58 | macro_rules! error { 59 | ($fmt:expr) => ({eprint!(concat!($fmt, "\n")); process::exit(1)}); 60 | ($fmt:expr, $($arg:tt)*) => ({eprint!(concat!($fmt, "\n"), $($arg)*); process::exit(1)}); 61 | } 62 | 63 | // run builds the pivot table by looking at rows in the CSV and summarizing based 64 | // on the value indexed by row_index. The columns upon which operations are performed 65 | // is in cols. The o parameter is used to kept track of the order in which items 66 | // are added to the pivot table. 67 | fn run(p: &mut Pivot, o: &mut Vec, row_index: usize, cols: &Columns) { 68 | let mut r = ReaderBuilder::new() 69 | .has_headers(false) 70 | .from_reader(io::stdin()); 71 | 72 | let mut j = -1; 73 | for res in r.records() { 74 | j += 1; 75 | if res.is_ok() { 76 | let rec = res.unwrap(); 77 | if row_index > rec.len() { 78 | error!("Insufficient columns in CSV at row {}", j); 79 | } 80 | 81 | let row = p.entry(rec.get(row_index).unwrap().into()).or_insert(Vec::new()); 82 | if row.len() == 0 { 83 | o.push(rec.get(row_index).unwrap().into()); 84 | for _i in 0..cols.len() { 85 | row.push(Val{sum: 0, count: 0, max: 0, min: 0}); 86 | } 87 | } 88 | 89 | for i in 0..cols.len() { 90 | if cols[i].col > rec.len() { 91 | error!("Insufficient columns in CSV row at row {}", j); 92 | } 93 | 94 | let num_string = rec.get(cols[i].col).unwrap(); 95 | let num_res = num_string.parse::(); 96 | if !num_res.is_ok() { 97 | error!("Failed to parse number {} at CSV row {}", num_string, j); 98 | } 99 | let num = num_res.unwrap(); 100 | if let Some(val) = row.get_mut(i) { 101 | val.sum += num; 102 | val.count += 1; 103 | if val.count == 1 { 104 | val.max = num; 105 | val.min = num; 106 | } else { 107 | if num < val.min { 108 | val.min = num; 109 | } 110 | if num > val.max { 111 | val.max = num; 112 | } 113 | } 114 | } 115 | } 116 | } 117 | } 118 | } 119 | 120 | // parse the command line arguments and return the index of the CSV column 121 | // which is the key for summarization. Also fills in the cols to specify 122 | // which CSV columns are to be operated on and output 123 | fn parse(cols: &mut Columns) -> usize { 124 | let args: Vec = env::args().collect(); 125 | 126 | if args.len() <= 2 { 127 | error!("Need at least two arguments; has {}", args.len()); 128 | } 129 | 130 | let row_res = args[1].parse::(); 131 | if !row_res.is_ok() { 132 | error!("First argument must be pivot row number, indexed from 1, not {}", args[1]); 133 | } 134 | let row_index = row_res.unwrap(); 135 | 136 | for i in 2..args.len() { 137 | let parts: Vec<&str> = args[i].split(":").collect(); 138 | if parts.len() != 2 { 139 | error!("Column parameters must be in the form op:index, don't understand {}", args[i]); 140 | } 141 | 142 | let index_res = parts[1].parse::(); 143 | if !index_res.is_ok() { 144 | error!("Column parameters must be in the form op:index, don't understand {}", args[i]); 145 | } 146 | let index = index_res.unwrap(); 147 | 148 | match parts[0] { 149 | "sum" => cols.push(ColOp{col: index, op: Op::Sum}), 150 | "max" => cols.push(ColOp{col: index, op: Op::Max}), 151 | "min" => cols.push(ColOp{col: index, op: Op::Min}), 152 | "avg" => cols.push(ColOp{col: index, op: Op::Avg}), 153 | 154 | _ => error!("The valid operators are: sum, max, min, avg; don't understand {}", parts[0]), 155 | } 156 | } 157 | 158 | return row_index 159 | } 160 | 161 | fn main() { 162 | let mut cols: Columns = Vec::new(); 163 | let row_index = parse(&mut cols); 164 | 165 | let mut table: Pivot = HashMap::new(); 166 | let mut order: Vec = Vec::new(); 167 | run(&mut table, &mut order, row_index, &cols); 168 | 169 | for row in order { 170 | let vals = &table[&row]; 171 | print!("{},", row); 172 | for i in 0..cols.len() { 173 | match cols[i].op { 174 | Op::Sum => print!("{}", vals[i].sum), 175 | Op::Max => print!("{}", vals[i].max), 176 | Op::Min => print!("{}", vals[i].min), 177 | Op::Avg => print!("{}", vals[i].sum / vals[i].count), 178 | } 179 | print!(","); 180 | } 181 | println!(); 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /test.csv: -------------------------------------------------------------------------------- 1 | a,1,2 2 | b,2,3 3 | c,3,4 4 | a,42,11 5 | b,13,12 6 | --------------------------------------------------------------------------------