├── LICENSE ├── README.md ├── build-all.sh ├── build-list.txt ├── catcsv ├── README.md ├── catcsv.go ├── test1.csv └── test2.csv ├── comparecsv ├── README.md ├── both.csv ├── comparecsv.go ├── f1only.csv ├── f2only.csv ├── test1.csv ├── test2.csv └── test3.csv ├── cryptcsv ├── README.md ├── cryptcsv.go ├── test1-decrypted-both.csv ├── test1-decrypted.csv ├── test1-encrypted.csv └── test1.csv ├── dedupcsv ├── README.md ├── dedupcsv.go └── test1.csv ├── diffcsv ├── README.md ├── diffcsv.go ├── input1.csv ├── input2.csv ├── input3.csv ├── input4.csv ├── input5.csv ├── input6.csv ├── input7.csv ├── input8.csv ├── test1.csv ├── test2.csv ├── test3.csv ├── test6.csv └── test7.csv ├── editcsv ├── README.md ├── editcsv.go └── test1.csv ├── obfuscatecsv ├── README.md ├── obfuscatecsv.go └── test1.csv ├── pivotcsv ├── README.md ├── pivotcsv.go ├── test1.csv └── test2.csv ├── rangespec.go ├── rangespec_test.go ├── recursecsv ├── README.md ├── recursecsv.go ├── test1.csv └── test2.csv ├── recursedata └── recursedata.go ├── reordercsv ├── README.md ├── reordercsv.go └── test1.csv ├── searchcsv ├── README.md ├── searchcsv.go └── test1.csv ├── sortcsv ├── README.md ├── sortcsv.go └── test1.csv ├── splitcsv ├── README.md ├── go.mod ├── rangespec │ └── rangespec.go ├── splitcsv.go ├── test1.csv └── test2.csv └── transformcsv ├── README.md ├── template1.txt ├── test1.csv ├── trans1.sql └── transformcsv.go /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSV Utilities 2 | 3 | This repo has a collection of CSV utilities to manipulate 4 | CSV files. Here is a brief description of each. Each utility 5 | is in its own folder and has its own README. 6 | - catcsv: concatenate two CSV files 7 | - comparecsv: compare two CSV files 8 | - dedupcsv: remove duplicates in a CSV file 9 | - diffcsv: shows differences between two CSV files 10 | - editcsv: alter contents of a CSV; regexp replace supported 11 | - obfuscatecsv: obscures content in a regular fashion 12 | - pivotcsv: do a pivot table operation 13 | - recursecsv: recursively process hierarchical data; supports 14 | the Oracle list of hierarchical functions 15 | - reordercsv: alters order of columns of a CSV file 16 | - searchcsv: outputs matching rows of a CSV file; regexp 17 | supported 18 | - sortcsv: sorts a CSV file 19 | - splitcsv: splits a CSV by columns and/or rows 20 | - transformcsv: using a "text/template", will transform a CSV 21 | by applying the template for each row 22 | 23 | Each utility has its own README with examples. 24 | 25 | To install `go get github.com/mandolyte/csv-utils`. 26 | 27 | Afterwards you can use `go install` to compile the ones of 28 | interest or just use `go run`. 29 | 30 | To install all of them: `sh build_all.sh`. 31 | 32 | To Do: 33 | - document recursedata.go -------------------------------------------------------------------------------- /build-all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo build all 3 | for i in `cat build-list.txt` 4 | do 5 | echo Working on $i in `pwd` 6 | cd $i 7 | go install $i.go 8 | cd .. 9 | done 10 | -------------------------------------------------------------------------------- /build-list.txt: -------------------------------------------------------------------------------- 1 | catcsv 2 | comparecsv 3 | dedupcsv 4 | diffcsv 5 | editcsv 6 | obfuscatecsv 7 | pivotcsv 8 | recursecsv 9 | recursedata 10 | reordercsv 11 | searchcsv 12 | sortcsv 13 | splitcsv 14 | transformcsv -------------------------------------------------------------------------------- /catcsv/README.md: -------------------------------------------------------------------------------- 1 | # Catcsv 2 | This utility will concatenate CSV files. 3 | 4 | Use -help to show: 5 | ``` 6 | $ catcsv -help 7 | Help Message 8 | 9 | Usage: catcsv [options] input1.csv input2.csv ... 10 | -f Force concatenation of different width CSV files 11 | -headers 12 | CSV has headers (default true) 13 | -help 14 | Show usage message 15 | -keep 16 | Keep CSV headers on output (default true) 17 | -o string 18 | Output CSV filename; default STDOUT 19 | ``` 20 | 21 | ## Examples 22 | This first example shows an error due to different number of columns 23 | in the input files. 24 | ``` 25 | $ go run catcsv.go test1.csv test2.csv 26 | 2017/12/01 09:18:16 Individual file row counts include header row 27 | 2017/12/01 09:18:16 Total row count does not include header rows 28 | 2017/12/01 09:18:16 File test1.csv had 4 rows 29 | 2017/12/01 09:18:16 csv.Read: 30 | line 1, column 0: wrong number of fields in line 31 | exit status 1 32 | $ 33 | ``` 34 | This example shows use of the force option to concatenate anyway. 35 | ``` 36 | $ go run catcsv.go -f test1.csv test2.csv 37 | 2017/12/01 09:18:28 Individual file row counts include header row 38 | 2017/12/01 09:18:28 Total row count does not include header rows 39 | 2017/12/01 09:18:28 File test1.csv had 4 rows 40 | 2017/12/01 09:18:28 File test2.csv had 4 rows 41 | A,B 42 | 1,2 43 | 3,4 44 | 5,6 45 | 1,2,3 46 | 4,5,6 47 | 7,8,9 48 | 2017/12/01 09:18:28 Total rows in output has 6 rows 49 | $ 50 | ``` -------------------------------------------------------------------------------- /catcsv/catcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | ) 11 | 12 | func main() { 13 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 14 | headers := flag.Bool("headers", true, "CSV has headers") 15 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 16 | help := flag.Bool("help", false, "Show usage message") 17 | force := flag.Bool("f", false, "Force concatenation of different width CSV files") 18 | flag.Parse() 19 | 20 | if *help { 21 | usage("Help Message") 22 | os.Exit(0) 23 | } 24 | 25 | if len(flag.Args()) < 1 { 26 | usage("No files specified to concatenate!") 27 | os.Exit(0) 28 | } 29 | 30 | if !*headers { 31 | *keep = false 32 | log.Println("If no headers, keep option is auto-set to false") 33 | } 34 | 35 | // open output file 36 | var w *csv.Writer 37 | if *output == "" { 38 | w = csv.NewWriter(os.Stdout) 39 | } else { 40 | fo, foerr := os.Create(*output) 41 | if foerr != nil { 42 | log.Fatal("os.Create() Error:" + foerr.Error()) 43 | } 44 | defer fo.Close() 45 | w = csv.NewWriter(fo) 46 | } 47 | log.Println("Individual file row counts include header row") 48 | log.Println("Total row count does not include header rows") 49 | 50 | var total uint64 51 | var firstfilecolumncount int 52 | for n, f := range flag.Args() { 53 | // open input file 54 | var r *csv.Reader 55 | fi, fierr := os.Open(f) 56 | if fierr != nil { 57 | log.Fatal("os.Open() Error:" + fierr.Error()) 58 | } 59 | defer fi.Close() 60 | r = csv.NewReader(fi) 61 | if n == 0 { 62 | r.FieldsPerRecord = 0 63 | } else { 64 | if *force { 65 | r.FieldsPerRecord = -1 66 | } else { 67 | r.FieldsPerRecord = firstfilecolumncount 68 | } 69 | } 70 | 71 | // read loop for CSV files 72 | var row uint64 73 | row = 0 74 | for { 75 | // read the csv file 76 | cells, rerr := r.Read() 77 | if rerr == io.EOF { 78 | break 79 | } 80 | if rerr != nil { 81 | log.Fatalf("csv.Read:\n%v\n", rerr) 82 | } 83 | if n == 0 && row == 0 { 84 | firstfilecolumncount = len(cells) 85 | if *headers { 86 | if *keep { 87 | err := w.Write(cells) 88 | if err != nil { 89 | log.Fatalf("csv.Write:\n%v\n", err) 90 | } 91 | } 92 | } else { 93 | err := w.Write(cells) 94 | if err != nil { 95 | log.Fatalf("csv.Write:\n%v\n", err) 96 | } 97 | } 98 | row++ 99 | continue 100 | } 101 | if n > 0 && row == 0 { 102 | if *headers { 103 | row++ 104 | continue // omit headers on all but first file 105 | } 106 | } 107 | row++ 108 | err := w.Write(cells) 109 | if err != nil { 110 | log.Fatalf("csv.Write:\n%v\n", err) 111 | } 112 | } 113 | log.Printf("File %v had %v rows", f, row) 114 | total += row 115 | } 116 | w.Flush() 117 | if *headers { 118 | total -= uint64(len(flag.Args())) // don't include header rows in counts 119 | } 120 | log.Printf("Total rows in output %v has %v rows", *output, total) 121 | } 122 | 123 | func usage(msg string) { 124 | fmt.Println(msg + "\n") 125 | fmt.Print("Usage: catcsv [options] input1.csv input2.csv ...\n") 126 | flag.PrintDefaults() 127 | } 128 | -------------------------------------------------------------------------------- /catcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B 2 | 1,2 3 | 3,4 4 | 5,6 5 | -------------------------------------------------------------------------------- /catcsv/test2.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | 1,2,3 3 | 4,5,6 4 | 7,8,9 5 | -------------------------------------------------------------------------------- /comparecsv/README.md: -------------------------------------------------------------------------------- 1 | # Comparecsv 2 | This utility compares two CSV files using Merkle Tree conceps, 3 | namely, hashes of the rows are the basis of the compare logic. 4 | 5 | It is written to enable large CSV file comparisons. The only 6 | memory consumed are maps of the row hashes. 7 | 8 | Use -help to show: 9 | ``` 10 | $ go run comparecsv.go -help 11 | -f1 string 12 | First CSV file name to compare 13 | -f2 string 14 | Second CSV file name to compare 15 | -help 16 | Show help message 17 | NOTE 1: Headers on the CSV files are expected. 18 | NOTE 2: Duplicates are omitted in all outputs. 19 | $ 20 | ``` 21 | 22 | It produces three output files, which are currently fixed: 23 | - f1only.csv contains the rows unique to file 1 24 | - f2only.csv contains the rows unique to file 2 25 | - both.csv contains the rows common to both input files 26 | 27 | ## Examples 28 | A simple test to validate basic operations: 29 | ``` 30 | $ go run comparecsv.go -f1 test2.csv -f2 test3.csv 31 | 2017/12/04 11:15:29 Start at 2017-12-04 11:15:29.853501341 -0500 EST m=+0.000326007 32 | 2017/12/04 11:15:29 Number of rows in file 1:3 33 | 2017/12/04 11:15:29 Number of rows in file 2:3 34 | 2017/12/04 11:15:29 Number of rows in both files:2 35 | 2017/12/04 11:15:29 Number of rows ONLY in file 2:1 36 | 2017/12/04 11:15:29 Number of rows ONLY in file 1:1 37 | 2017/12/04 11:15:29 End at 2017-12-04 11:15:29.85432992 -0500 EST m=+0.001154546 38 | 2017/12/04 11:15:29 Elapsed time 828.715µs 39 | $ 40 | ``` 41 | 42 | A performance test using wine review public data set at 43 | https://www.kaggle.com/zynicide/wine-reviews/data. Minor 44 | changes are made to the original to make test1.csv. 45 | ``` 46 | $ comparecsv -f1 winemag-data-130k-v2.csv -f2 test1.csv 47 | 2017/12/04 11:18:40 Start at 2017-12-04 11:18:40.631915938 -0500 EST m=+0.000781184 48 | 2017/12/04 11:18:43 Number of rows in file 1:129971 49 | 2017/12/04 11:18:49 Number of rows in file 2:129969 50 | 2017/12/04 11:18:49 Number of rows in both files:129968 51 | 2017/12/04 11:18:49 Number of rows ONLY in file 2:1 52 | 2017/12/04 11:18:51 Number of rows ONLY in file 1:3 53 | 2017/12/04 11:18:51 End at 2017-12-04 11:18:51.356633528 -0500 EST m=+10.725498483 54 | 2017/12/04 11:18:51 Elapsed time 10.72471747s 55 | $ 56 | ``` -------------------------------------------------------------------------------- /comparecsv/both.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | 1,2,3 3 | 7,8,9 4 | -------------------------------------------------------------------------------- /comparecsv/comparecsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha1" 5 | "encoding/csv" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "time" 12 | ) 13 | 14 | var f1name = flag.String("f1", "", "First CSV file name to compare") 15 | var f2name = flag.String("f2", "", "Second CSV file name to compare") 16 | var help = flag.Bool("help", false, "Show help message") 17 | 18 | /* 19 | Design Overview: 20 | There will be two input files to compare and there will be 21 | three output files created: 22 | - f1only.csv having rows unique to f1 23 | - f2only.csv having rows unique to f2 24 | - both.csv having rows common to both f1 and f2 25 | a) The first file will be read and hash computed per row 26 | b) The hash will be a key in a map with value struct{} 27 | c) The second file is read and then per row: 28 | - the hash value is computed 29 | - if hash exists in first file's map, then the row is 30 | written to the "both" output file 31 | - Otherwise, it is written to the f2 only file 32 | - the hash is then stored similar to f1 in a map 33 | d) can the f1 map be reclaimed by the GC?? 34 | e) Now f1 is read a second time and per row: 35 | - the hash value is computed 36 | - if hash exists in second file's map, then continue, since 37 | it is already written to the both csv file 38 | - otherwise, write to the f1 only file 39 | */ 40 | 41 | func main() { 42 | flag.Parse() 43 | 44 | if *help { 45 | usage() 46 | } 47 | 48 | if len(flag.Args()) > 0 { 49 | usage() 50 | } 51 | 52 | now := time.Now() 53 | log.Printf("Start at %v", now) 54 | 55 | // open first input file 56 | var r1 *csv.Reader 57 | f1, f1err := os.Open(*f1name) 58 | if f1err != nil { 59 | log.Fatal("os.Open() Error:" + f1err.Error()) 60 | } 61 | r1 = csv.NewReader(f1) 62 | 63 | // open second input file 64 | var r2 *csv.Reader 65 | f2, f2err := os.Open(*f2name) 66 | if f2err != nil { 67 | log.Fatal("os.Open() Error:" + f2err.Error()) 68 | } 69 | r2 = csv.NewReader(f2) 70 | 71 | /*********************************************************/ 72 | // do a quick check on columns first 73 | // if not the same, then log error and exit 74 | 75 | // second file 76 | hdrs2, rerr := r2.Read() 77 | if rerr == io.EOF { 78 | log.Fatal("File 2 is empty", rerr) 79 | } 80 | if rerr != nil { 81 | log.Fatalf("csv.Read:\n%v\n", rerr) 82 | } 83 | numcols2 := len(hdrs2) 84 | 85 | // first file 86 | hdrs1, rerr := r1.Read() 87 | if rerr == io.EOF { 88 | log.Fatal("File 1 is empty", rerr) 89 | } 90 | if rerr != nil { 91 | log.Fatalf("csv.Read:\n%v\n", rerr) 92 | } 93 | numcols1 := len(hdrs1) 94 | 95 | if numcols1 != numcols2 { 96 | log.Fatalf("Different number of columns:%v vs. %v", 97 | numcols1, numcols2) 98 | } 99 | 100 | // set expectations of fields per row 101 | r1.FieldsPerRecord = numcols1 102 | r2.FieldsPerRecord = numcols1 103 | 104 | // open f1only file 105 | var wf1 *csv.Writer 106 | wf1o, wf1oerr := os.Create("f1only.csv") 107 | if wf1oerr != nil { 108 | log.Fatal("os.Create() Error:" + wf1oerr.Error()) 109 | } 110 | defer wf1o.Close() 111 | wf1 = csv.NewWriter(wf1o) 112 | err := wf1.Write(hdrs1) 113 | if err != nil { 114 | log.Fatalf("Headers 1 Error:\n%v\n", err) 115 | } 116 | 117 | // open f2only file 118 | var wf2 *csv.Writer 119 | wf2o, wf2oerr := os.Create("f2only.csv") 120 | if wf2oerr != nil { 121 | log.Fatal("os.Create() Error:" + wf2oerr.Error()) 122 | } 123 | defer wf2o.Close() 124 | wf2 = csv.NewWriter(wf2o) 125 | err = wf2.Write(hdrs2) 126 | if err != nil { 127 | log.Fatalf("Headers 2 Error:\n%v\n", err) 128 | } 129 | 130 | // open both file 131 | var both *csv.Writer 132 | botho, bothoerr := os.Create("both.csv") 133 | if bothoerr != nil { 134 | log.Fatal("os.Create() Error:" + bothoerr.Error()) 135 | } 136 | defer botho.Close() 137 | both = csv.NewWriter(botho) 138 | err = both.Write(hdrs1) 139 | if err != nil { 140 | log.Fatalf("Both Headers Error:\n%v\n", err) 141 | } 142 | 143 | f1map := make(map[string]struct{}) 144 | // read first file 145 | // read loop for CSV 146 | rows := 0 147 | for { 148 | // read the csv file 149 | cells, rerr := r1.Read() 150 | if rerr == io.EOF { 151 | break 152 | } 153 | if rerr != nil { 154 | log.Fatalf("csv.Read:\n%v\n", rerr) 155 | } 156 | key := computeSliceSha1(cells) 157 | f1map[key] = struct{}{} 158 | rows++ 159 | } 160 | log.Printf("Number of rows in file 1:%v\n", rows) 161 | f1.Close() 162 | 163 | f2map := make(map[string]struct{}) 164 | // read second file 165 | // read loop for CSV 166 | rows = 0 167 | bothCount := 0 168 | f2Count := 0 169 | for { 170 | // read the csv file 171 | cells, rerr := r2.Read() 172 | if rerr == io.EOF { 173 | break 174 | } 175 | if rerr != nil { 176 | log.Fatalf("csv.Read:\n%v\n", rerr) 177 | } 178 | key := computeSliceSha1(cells) 179 | f2map[key] = struct{}{} 180 | 181 | // does this row exist in file 1? 182 | _, f1Exists := f1map[key] 183 | if f1Exists { 184 | err := both.Write(cells) 185 | if err != nil { 186 | log.Fatalf("both Write Error:\n%v\n", err) 187 | } 188 | bothCount++ 189 | } else { 190 | err := wf2.Write(cells) 191 | if err != nil { 192 | log.Fatalf("both Write Error:\n%v\n", err) 193 | } 194 | f2Count++ 195 | } 196 | rows++ 197 | } 198 | // flush the CSV writers 199 | both.Flush() 200 | wf2.Flush() 201 | f2.Close() 202 | botho.Close() 203 | log.Printf("Number of rows in file 2:%v\n", rows) 204 | log.Printf("Number of rows in both files:%v\n", bothCount) 205 | log.Printf("Number of rows ONLY in file 2:%v\n", f2Count) 206 | 207 | // finally re-read file 1 and match up 208 | // open first input file 209 | f1, f1err = os.Open(*f1name) 210 | if f1err != nil { 211 | log.Fatal("os.Open() Error:" + f1err.Error()) 212 | } 213 | defer f1.Close() 214 | r1 = csv.NewReader(f1) 215 | f1Count := 0 216 | isHeader := true 217 | for { 218 | // read the csv file 219 | cells, rerr := r1.Read() 220 | if rerr == io.EOF { 221 | break 222 | } 223 | if rerr != nil { 224 | log.Fatalf("csv.Read:\n%v\n", rerr) 225 | } 226 | if isHeader { 227 | isHeader = false 228 | continue 229 | } 230 | key := computeSliceSha1(cells) 231 | // does this row exist in file 2? 232 | _, f2Exists := f2map[key] 233 | if f2Exists { 234 | continue 235 | } else { 236 | err := wf1.Write(cells) 237 | if err != nil { 238 | log.Fatalf("both Write Error:\n%v\n", err) 239 | } 240 | f1Count++ 241 | } 242 | } 243 | log.Printf("Number of rows ONLY in file 1:%v\n", f1Count) 244 | wf1.Flush() 245 | f1.Close() 246 | stop := time.Now() 247 | elapsed := time.Since(now) 248 | 249 | log.Printf("End at %v", stop) 250 | log.Printf("Elapsed time %v", elapsed) 251 | 252 | } 253 | 254 | func usage() { 255 | flag.PrintDefaults() 256 | fmt.Println("NOTE 1: Headers on the CSV files are expected.") 257 | fmt.Println("NOTE 2: Duplicates are omitted in all outputs.") 258 | os.Exit(0) 259 | } 260 | 261 | func computeSliceSha1(c []string) string { 262 | h := sha1.New() 263 | for _, v := range c { 264 | if v == "" { 265 | v = "#empty" 266 | } 267 | io.WriteString(h, v) 268 | } 269 | return string(h.Sum(nil)) 270 | } 271 | 272 | /* snippets 273 | 274 | package main 275 | 276 | import ( 277 | "crypto/sha1" 278 | "fmt" 279 | "io" 280 | ) 281 | 282 | func main() { 283 | h := sha1.New() 284 | io.WriteString(h, "His money is twice tainted:") 285 | io.WriteString(h, " 'taint yours and 'taint mine.") 286 | fmt.Printf("% x", h.Sum(nil)) 287 | } 288 | */ 289 | -------------------------------------------------------------------------------- /comparecsv/f1only.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | 4,5,6 3 | -------------------------------------------------------------------------------- /comparecsv/f2only.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | d,e,f 3 | -------------------------------------------------------------------------------- /comparecsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B 2 | 1,2 3 | 3,4 4 | 5,6 5 | -------------------------------------------------------------------------------- /comparecsv/test2.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | 1,2,3 3 | 4,5,6 4 | 7,8,9 5 | -------------------------------------------------------------------------------- /comparecsv/test3.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | 1,2,3 3 | 7,8,9 4 | d,e,f 5 | -------------------------------------------------------------------------------- /cryptcsv/README.md: -------------------------------------------------------------------------------- 1 | # Cryptcsv 2 | 3 | This routine will encrypt/decrypt the selected column(s) using the supplied key. 4 | 5 | Use -help to show: 6 | ``` 7 | $ cryptcsv -help 8 | Help Message 9 | 10 | Usage: cryptcsv [options] 11 | -c string 12 | Range spec for columns to obfuscate 13 | -d string 14 | Decrpytion key; required if decrypting 15 | -e string 16 | Encrpytion key; required if encrypting 17 | -headers 18 | CSV has headers (default true) 19 | -help 20 | Show help message 21 | -i string 22 | Input CSV filename; default STDIN 23 | -keep 24 | Keep CSV headers on output (default true) 25 | -o string 26 | Output CSV filename; default STDOUT 27 | $ 28 | ``` 29 | 30 | # Examples 31 | Encrypt the first and last columns: 32 | ``` 33 | $ cat test1.csv 34 | A,B,C 35 | abc,def,Army 36 | def,abc,Navy 37 | ijk,abc,Navy 38 | zyz,def,Army 39 | abc,abc,AF 40 | $ cryptcsv -i test1.csv -c 1,3 -e abcdef -o test1-encrypted.csv 41 | $ cat test1-encrypted.csv 42 | $ cat test1-encrypted.csv 43 | A,B,C 44 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,cIJhrzIIYEXgAbcTPKEYpsKIJcw= 45 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,/1WXOfya+LjAHWB2xr4zqo8Qmks= 46 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,zao7y8CJgzW+G1ZSjRWelhIzNhw= 47 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,ShIYZBMV+PFG8JTud/FFRVGjtVQ= 48 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,4ydx/qW9LierW6pQFeILRRtV 49 | $ 50 | ``` 51 | 52 | Now decrypt just the last column: 53 | ``` 54 | $ cryptcsv -i test1-encrypted.csv -o test1-decrypted.csv -c 3 -d abcdef 55 | $ cat test1-decrypted.csv 56 | ,B,C 57 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,Army 58 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,Navy 59 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,Navy 60 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,Army 61 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,AF 62 | $ 63 | $ cksum test1.csv test1-decrypted.csv 64 | ``` 65 | 66 | Now decrypt both: 67 | ``` 68 | $ cryptcsv -i test1-encrypted.csv -o test1-decrypted-both.csv -d abcdef -c 1,3 69 | $ cat test1-decrypted-both.csv 70 | A,B,C 71 | abc,def,Army 72 | def,abc,Navy 73 | ijk,abc,Navy 74 | zyz,def,Army 75 | abc,abc,AF 76 | $ cksum test1.csv test1-decrypted-both.csv 77 | 2235581246 69 test1.csv 78 | 2235581246 69 test1-decrypted-both.csv 79 | $ 80 | ``` 81 | -------------------------------------------------------------------------------- /cryptcsv/cryptcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/aes" 5 | "crypto/cipher" 6 | "crypto/rand" 7 | "encoding/base64" 8 | "encoding/csv" 9 | "flag" 10 | "fmt" 11 | "io" 12 | "log" 13 | "os" 14 | 15 | "github.com/mandolyte/csv-utils" 16 | ) 17 | 18 | var cs *rangespec.RangeSpec 19 | 20 | func main() { 21 | e := flag.String("e", "", "Encrpytion key; required if encrypting") 22 | d := flag.String("d", "", "Decrpytion key; required if decrypting") 23 | cols := flag.String("c", "", "Range spec for columns to obfuscate") 24 | input := flag.String("i", "", "Input CSV filename; default STDIN") 25 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 26 | headers := flag.Bool("headers", true, "CSV has headers") 27 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 28 | help := flag.Bool("help", false, "Show help message") 29 | flag.Parse() 30 | 31 | if *help { 32 | usage("Help Message") 33 | } 34 | 35 | /* check parameters */ 36 | if len(*e)+len(*d) == 0 { 37 | usage("Specify either -e or -d with key to encrypt or decrypt") 38 | } 39 | 40 | if *cols != "" { 41 | var cserr error 42 | cs, cserr = rangespec.New(*cols) 43 | if cserr != nil { 44 | log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr) 45 | } 46 | } 47 | 48 | if *keep { 49 | if !*headers { 50 | log.Fatal("Cannot keep headers you don't have!") 51 | } 52 | } 53 | 54 | // open output file 55 | var w *csv.Writer 56 | if *output == "" { 57 | w = csv.NewWriter(os.Stdout) 58 | } else { 59 | fo, foerr := os.Create(*output) 60 | if foerr != nil { 61 | log.Fatal("os.Create() Error:" + foerr.Error()) 62 | } 63 | defer fo.Close() 64 | w = csv.NewWriter(fo) 65 | } 66 | 67 | // open input file 68 | var r *csv.Reader 69 | if *input == "" { 70 | r = csv.NewReader(os.Stdin) 71 | } else { 72 | fi, fierr := os.Open(*input) 73 | if fierr != nil { 74 | log.Fatal("os.Open() Error:" + fierr.Error()) 75 | } 76 | defer fi.Close() 77 | r = csv.NewReader(fi) 78 | } 79 | 80 | // ignore expectations of fields per row 81 | r.FieldsPerRecord = -1 82 | 83 | var key string 84 | if *e != "" { 85 | key = *e 86 | } else { 87 | key = *d 88 | } 89 | 90 | keydata := make([]byte, 32) 91 | copy(keydata, key[:]) 92 | 93 | // read loop for CSV 94 | var row uint64 95 | for { 96 | // read the csv file 97 | cells, rerr := r.Read() 98 | if rerr == io.EOF { 99 | break 100 | } 101 | if rerr != nil { 102 | log.Fatalf("csv.Read:\n%v\n", rerr) 103 | } 104 | if (row == 0) && *headers && *keep { 105 | row = 1 106 | err := w.Write(cells) 107 | if err != nil { 108 | log.Fatalf("csv.Write:\n%v\n", err) 109 | } 110 | continue 111 | } 112 | row++ 113 | // test columns for a match to encrypt/decrypt 114 | for n, v := range cells { 115 | if cs.InRange(uint64(n + 1)) { 116 | // encrpyt? 117 | if *d == "" { 118 | // decrypt key not provided so we encrypt 119 | cells[n] = encrypt(v, keydata) 120 | } else { 121 | // decrypt key is provided so we decrypt 122 | cells[n] = decrypt(v, keydata) 123 | } 124 | } 125 | } 126 | err := w.Write(cells) 127 | if err != nil { 128 | log.Fatalf("csv.Write:\n%v\n", err) 129 | } 130 | } 131 | w.Flush() 132 | } 133 | 134 | func usage(msg string) { 135 | fmt.Println(msg + "\n") 136 | fmt.Print("Usage: cryptcsv [options]\n") 137 | flag.PrintDefaults() 138 | os.Exit(0) 139 | } 140 | 141 | func decrypt(b64 string, key []byte) string { 142 | // convert base64 back to byte 143 | data, err := base64.StdEncoding.DecodeString(b64) 144 | if err != nil { 145 | log.Fatalf("base64 decode error:", err) 146 | } 147 | 148 | // Byte array of the string 149 | //ciphertext := []byte(cipherstring) 150 | 151 | // Create the AES cipher 152 | block, err := aes.NewCipher(key) 153 | if err != nil { 154 | log.Fatalf("aes.NewCipher() error:", err) 155 | } 156 | 157 | // if the text is too small, then it is incorrect 158 | if len(data) < aes.BlockSize { 159 | log.Fatal("Text too short error\n") 160 | } 161 | 162 | // Get the 16 byte iv 163 | iv := data[:aes.BlockSize] 164 | 165 | // Remove it 166 | data = data[aes.BlockSize:] 167 | 168 | // Return a decrypted stream 169 | stream := cipher.NewCFBDecrypter(block, iv) 170 | 171 | // Decrypt bytes from ciphertext 172 | stream.XORKeyStream(data, data) 173 | 174 | return string(data) 175 | } 176 | 177 | func encrypt(text string, key []byte) string { 178 | // Byte array of the string 179 | bytes := []byte(text) 180 | 181 | // Create the AES cipher 182 | block, err := aes.NewCipher(key) 183 | if err != nil { 184 | log.Fatalf("aes.NewCipher() error: %v\n", err) 185 | } 186 | 187 | // Create slice of (16 + bytes) length 188 | ciphertext := make([]byte, aes.BlockSize+len(bytes)) 189 | 190 | // Include the IV at the beginning 191 | // Slice of first 16 bytes 192 | iv := ciphertext[:aes.BlockSize] 193 | 194 | // Write 16 rand bytes to fill iv 195 | if _, err := io.ReadFull(rand.Reader, iv); err != nil { 196 | log.Fatalf("io.ReadFull() error: %v\n", err) 197 | } 198 | 199 | // Return an encrypted stream 200 | stream := cipher.NewCFBEncrypter(block, iv) 201 | 202 | // Encrypt bytes to ciphertext after the iv 203 | stream.XORKeyStream(ciphertext[aes.BlockSize:], bytes) 204 | 205 | // now encode to base64 206 | b64 := base64.StdEncoding.EncodeToString(ciphertext) 207 | return b64 208 | } 209 | -------------------------------------------------------------------------------- /cryptcsv/test1-decrypted-both.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | abc,def,Army 3 | def,abc,Navy 4 | ijk,abc,Navy 5 | zyz,def,Army 6 | abc,abc,AF 7 | -------------------------------------------------------------------------------- /cryptcsv/test1-decrypted.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,Army 3 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,Navy 4 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,Navy 5 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,Army 6 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,AF 7 | -------------------------------------------------------------------------------- /cryptcsv/test1-encrypted.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,cIJhrzIIYEXgAbcTPKEYpsKIJcw= 3 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,/1WXOfya+LjAHWB2xr4zqo8Qmks= 4 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,zao7y8CJgzW+G1ZSjRWelhIzNhw= 5 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,ShIYZBMV+PFG8JTud/FFRVGjtVQ= 6 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,4ydx/qW9LierW6pQFeILRRtV 7 | -------------------------------------------------------------------------------- /cryptcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | abc,def,Army 3 | def,abc,Navy 4 | ijk,abc,Navy 5 | zyz,def,Army 6 | abc,abc,AF 7 | -------------------------------------------------------------------------------- /dedupcsv/README.md: -------------------------------------------------------------------------------- 1 | ## Dedupcsv 2 | This utility removed duplicate rows. The input must be sorted! 3 | 4 | Use -help to show: 5 | ``` 6 | $ dedupcsv -help 7 | Help Message 8 | 9 | Usage: dedupcsv [options] 10 | NOTE: must be sorted; only compares row against prior row. -headers 11 | CSV has headers (default true) 12 | -help 13 | Show help message 14 | -i string 15 | Input CSV filename; default STDIN 16 | -keep 17 | Keep CSV headers on output (default true) 18 | -o string 19 | Output CSV filename; default STDOUT 20 | ``` 21 | 22 | For example: 23 | ``` 24 | $ cat test1.csv 25 | A,B,C 26 | 1,2,3 27 | 1,2,3 28 | 4,5,6 29 | 4,5,6 30 | d,e,f 31 | d,e,f 32 | d,e,f 33 | $ go run dedupcsv.go < test1.csv 34 | A,B,C 35 | 1,2,3 36 | 4,5,6 37 | d,e,f 38 | $ 39 | ``` 40 | -------------------------------------------------------------------------------- /dedupcsv/dedupcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | 11 | ) 12 | 13 | func main() { 14 | input := flag.String("i", "", "Input CSV filename; default STDIN") 15 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 16 | headers := flag.Bool("headers", true, "CSV has headers") 17 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 18 | help := flag.Bool("help", false, "Show help message") 19 | flag.Parse() 20 | 21 | if *help { 22 | usage("Help Message") 23 | os.Exit(0) 24 | } 25 | 26 | if !*headers { 27 | if *keep { 28 | log.Fatal("Cannot keep headers you don't have!") 29 | } 30 | } 31 | // open output file 32 | var w *csv.Writer 33 | if *output == "" { 34 | w = csv.NewWriter(os.Stdout) 35 | } else { 36 | fo, foerr := os.Create(*output) 37 | if foerr != nil { 38 | log.Fatal("os.Create() Error:" + foerr.Error()) 39 | } 40 | defer fo.Close() 41 | w = csv.NewWriter(fo) 42 | } 43 | 44 | // open input file 45 | var r *csv.Reader 46 | if *input == "" { 47 | r = csv.NewReader(os.Stdin) 48 | } else { 49 | fi, fierr := os.Open(*input) 50 | if fierr != nil { 51 | log.Fatal("os.Open() Error:" + fierr.Error()) 52 | } 53 | defer fi.Close() 54 | r = csv.NewReader(fi) 55 | } 56 | 57 | // ignore expectations of fields per row 58 | r.FieldsPerRecord = -1 59 | 60 | // read loop for CSV 61 | var row uint64 62 | var priorRow []string 63 | for { 64 | // read the csv file 65 | cells, rerr := r.Read() 66 | if rerr == io.EOF { 67 | break 68 | } 69 | if rerr != nil { 70 | log.Fatalf("csv.Read:\n%v\n", rerr) 71 | } 72 | if row == 0 { 73 | priorRow = make([]string,len(cells)) 74 | _ = copy(priorRow,cells) 75 | } 76 | if (row == 0) && *headers && *keep { 77 | row = 1 78 | err := w.Write(cells) 79 | if err != nil { 80 | log.Fatalf("csv.Write:\n%v\n", err) 81 | } 82 | continue 83 | } 84 | 85 | areEqual := testEq(priorRow,cells) 86 | if areEqual { 87 | continue 88 | } 89 | 90 | err := w.Write(cells) 91 | if err != nil { 92 | log.Fatalf("csv.Write:\n%v\n", err) 93 | } 94 | 95 | priorRow = make([]string,len(cells)) 96 | _ = copy(priorRow,cells) 97 | 98 | row++ 99 | } 100 | w.Flush() 101 | } 102 | 103 | func testEq(a, b []string) bool { 104 | 105 | if a == nil && b == nil { 106 | return true; 107 | } 108 | 109 | if a == nil || b == nil { 110 | return false; 111 | } 112 | 113 | if len(a) != len(b) { 114 | return false 115 | } 116 | 117 | for i := range a { 118 | if a[i] != b[i] { 119 | return false 120 | } 121 | } 122 | 123 | return true 124 | } 125 | 126 | 127 | func usage(msg string) { 128 | fmt.Println(msg + "\n") 129 | fmt.Print("Usage: uniqcsv [options]\n") 130 | fmt.Print("NOTE: must be sorted; only compares row against prior row.") 131 | flag.PrintDefaults() 132 | } 133 | -------------------------------------------------------------------------------- /dedupcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | 1,2,3 3 | 1,2,3 4 | 4,5,6 5 | 4,5,6 6 | d,e,f 7 | d,e,f 8 | d,e,f 9 | -------------------------------------------------------------------------------- /diffcsv/README.md: -------------------------------------------------------------------------------- 1 | # Diffcsv 2 | 3 | Todo: 4 | - Considering adding a "map" option where you can supply a JSON formatted map of value transformations. Possibly using regualar expressions as matching values. 5 | 6 | Changes (2018-11-06): 7 | - Renamed the alias parameters as "alias1" and "alias2" with defaults of "f1" and "f2" 8 | - Added `trimSpace` and `ignoreCase` options. The ignore case option applies to the *key* values. 9 | 10 | Changes (2018-10-31): 11 | - Added aliasing option of input files; default is DF1 and DF2 as before 12 | - Added option to add numbers to column headers to make it easier to 13 | reference columns with differences 14 | 15 | Use the -help argument to show: 16 | ``` 17 | $ diffcsv -help 18 | 19 | Usage: diffcsv [options] 20 | -alias1 string 21 | Alias for first input file; default F1 (default "F1") 22 | -alias2 string 23 | Alias for second input file; default F2 (default "F2") 24 | -colnums 25 | Add difference column numbers to headers 26 | -f1 string 27 | First CSV file name to compare 28 | -f2 string 29 | Second CSV file name to compare 30 | -help 31 | Show help message 32 | -ignoreCase 33 | Ignore case when comparing; default true (default true) 34 | -key int 35 | Key column in input CSVs (first is 1); must be unique 36 | -noeq 37 | Suppress matches, showing only differences 38 | -o string 39 | Output CSV file for differences 40 | -ondupFirst 41 | On duplicate key, keep first one 42 | -ondupLast 43 | On duplicate key, keep last one 44 | -trimSpace 45 | Ignore leading and trailing spaces when comparing; default true (default true) 46 | 47 | Detailed Help: 48 | Inputs: 49 | - a key column 50 | - two input filenames 51 | - an output filename 52 | There will be two input files to compare and there will be 53 | one output file created: 54 | a) The first file will be read and stored into a map 55 | b) The second file will be read and stored into a map 56 | c) It is an error if a file has the same key value on two rows. 57 | Keys must be unique within each file. 58 | Note that key column number is one based, not zero based! 59 | NOTE! if duplicate keys exist, then there are options to keep 60 | the first or to keep the last one. Default is to error out. 61 | d) Then all keys from both inputs are combined/deduped/sorted 62 | e) Then we range over the combined keyset and output a new CSV 63 | that has a new status column as the first column and the other columns 64 | from the inputs as the remaining columns. 65 | f) the new status column has the following values: 66 | - EQ meaning that the values for the key are same in both input files 67 | - IN=1 meaning that the key and values are only in input file #1 68 | - IN=2 similar for input file #2 69 | - DFn=x,y,..,z where n is either 1 or 2; followed by a comma delimited 70 | list of column numbers where the values for the key do not match. 71 | Note that the DF statuses always come in pairs, one for each input file. 72 | g) Limitations: 73 | - both input files must have the same number of columns 74 | - both must have a header row and the headers must be the same 75 | ``` 76 | 77 | ## Normal Cases 78 | 79 | Compare two identical files (using same file for both inputs): 80 | ``` 81 | $ cat input1.csv 82 | A,B,C 83 | X,1,1 84 | Y,2,2 85 | Z,3,3 86 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input1.csv -o test1.csv 87 | 2018/10/08 06:46:55 Start: Oct 8 06:46:55.040 88 | 2018/10/08 06:46:55 Processing input #1:input1.csv 89 | 2018/10/08 06:46:55 Number of rows in file input1.csv:3 90 | 2018/10/08 06:46:55 Processing input #2:input1.csv 91 | 2018/10/08 06:46:55 Number of rows in file input1.csv:3 92 | 2018/10/08 06:46:55 Number of combined unique keys:3 93 | 2018/10/08 06:46:55 End: Oct 8 06:46:55.041 94 | 2018/10/08 06:46:55 Elapsed time 842.333µs 95 | 2018/10/08 06:46:55 ------- Summary ------- 96 | 2018/10/08 06:46:55 Equal Count: 3 97 | 2018/10/08 06:46:55 Key Diff Count: 0 98 | 2018/10/08 06:46:55 Unique to input #1: 0 99 | 2018/10/08 06:46:55 Unique to input #2: 0 100 | $ cat test1.csv 101 | STATUS,A,B,C 102 | EQ,X,1,1 103 | EQ,Y,2,2 104 | EQ,Z,3,3 105 | ``` 106 | 107 | Compare two files where keys are ok, but values are different: 108 | ``` 109 | $ cat input3.csv 110 | A,B,C 111 | X,1,1 112 | Y,2,2 113 | Z,9,9 114 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input3.csv -o test3.csv 115 | ... elided ... 116 | $ cat test2.csv 117 | STATUS,A,B,C 118 | EQ,X,1,1 119 | EQ,Y,2,2 120 | "DF1=2,3",Z,3,3 121 | "DF2=2,3",Z,9,9 122 | ``` 123 | 124 | Same as above, but show only differences; use aliases and column numbers: 125 | ``` 126 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input3.csv \ 127 | -o test3.csv -noeq \ 128 | -df1 i1 -df2 i2 -colnums 129 | 2018/10/31 07:03:26 Start: Oct 31 07:03:26.298 130 | 2018/10/31 07:03:26 Processing input #1:input1.csv 131 | 2018/10/31 07:03:26 Number of rows in file input1.csv:3 132 | 2018/10/31 07:03:26 Processing input #2:input3.csv 133 | 2018/10/31 07:03:26 Number of rows in file input3.csv:3 134 | 2018/10/31 07:03:26 Number of combined unique keys:3 135 | 2018/10/31 07:03:26 End: Oct 31 07:03:26.300 136 | 2018/10/31 07:03:26 Elapsed time 1.9993ms 137 | 2018/10/31 07:03:26 ------- Summary ------- 138 | 2018/10/31 07:03:26 Equal Count: 2 139 | 2018/10/31 07:03:26 Key Diff Count: 1 140 | 2018/10/31 07:03:26 Unique to input #1: 0 141 | 2018/10/31 07:03:26 Unique to input #2: 0 142 | $ cat test3.csv 143 | STATUS,1-A,2-B,3-C 144 | "i1=2,3",Z,3,3 145 | "i2=2,3",Z,9,9 146 | $ 147 | ``` 148 | 149 | Compare two files where keys are not the same: 150 | ``` 151 | $ cat input2.csv 152 | A,B,C 153 | X,1,1 154 | Y,2,2 155 | W,3,3 156 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input2.csv -o test3.csv 157 | ... elided ... 158 | $ cat test3.csv 159 | STATUS,A,B,C 160 | IN=2,W,3,3 161 | EQ,X,1,1 162 | EQ,Y,2,2 163 | IN=1,Z,3,3 164 | ``` 165 | 166 | Compare two files where trim space and ignore case are needed: 167 | ``` 168 | $ cat input7.csv 169 | A,B,C 170 | X,1,1 171 | Y,2,3 172 | W,3,3 173 | $ cat input8.csv 174 | A,B,C 175 | x,1,1 176 | Y ,2,3 177 | w ,3,3 178 | $ go run diffcsv.go -key 1 -f1 input7.csv -f2 input8.csv -alias1 f1 -alias2 f2 -trimSpace=true -ignoreCase=false -o test7.csv 179 | 2018/11/06 13:37:09 Start: Nov 6 13:37:09.884 180 | 2018/11/06 13:37:09 Processing input #1:input7.csv 181 | 2018/11/06 13:37:09 Number of rows in file input7.csv:3 182 | 2018/11/06 13:37:09 Processing input #2:input8.csv 183 | 2018/11/06 13:37:09 Number of rows in file input8.csv:3 184 | 2018/11/06 13:37:09 Number of combined unique keys:5 185 | 2018/11/06 13:37:09 End: Nov 6 13:37:09.886 186 | 2018/11/06 13:37:09 Elapsed time 1.9977ms 187 | 2018/11/06 13:37:09 ------- Summary ------- 188 | 2018/11/06 13:37:09 Equal Count: 1 189 | 2018/11/06 13:37:09 Key Diff Count: 0 190 | 2018/11/06 13:37:09 Unique to input #1: 2 191 | 2018/11/06 13:37:09 Unique to input #2: 2 192 | $ cat test7.csv 193 | STATUS,A,B,C 194 | IN=f1,W,3,3 195 | IN=f1,X,1,1 196 | EQ,Y,2,3 197 | IN=f2,w,3,3 198 | IN=f2,x,1,1 199 | ``` 200 | 201 | ## Error Conditions 202 | 203 | Compare two files with headers that don't match: 204 | ``` 205 | $ cat input4.csv 206 | A,B,D 207 | X,1,1 208 | Y,2,2 209 | Z,9,9 210 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input4.csv -o test4.csv 211 | 2018/10/04 21:25:36 Start: Oct 4 21:25:36.905 212 | 2018/10/04 21:25:36 Headers are not the same on input files 213 | exit status 1 214 | $ 215 | ``` 216 | 217 | Compare two files that don't the same number of columns: 218 | ``` 219 | $ cat input5.csv 220 | A,B,C,D 221 | X,1,1,1 222 | Y,2,2,2 223 | Z,9,9,9 224 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input5.csv -o test5.csv 225 | 2018/10/04 21:27:24 Start: Oct 4 21:27:24.851 226 | 2018/10/04 21:27:24 Different number of columns:3 vs. 4 227 | exit status 1 228 | $ 229 | ``` 230 | 231 | Compare two files where one has a non-unique key: 232 | ``` 233 | $ cat input6.csv 234 | A,B,C,D 235 | X,1,1,1 236 | Y,2,2,2 237 | Z,9,9,9 238 | X,1,2,3 239 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input6.csv -o test6.csv 240 | 2018/10/05 07:15:00 Start: Oct 5 07:15:00.105 241 | 2018/10/05 07:15:00 Processing input #1:input1.csv 242 | 2018/10/05 07:15:00 Number of rows in file input1.csv:3 243 | 2018/10/05 07:15:00 Processing input #2:input6.csv 244 | 2018/10/05 07:15:00 Key value not unique: X on row 4 245 | exit status 1 246 | $ 247 | ``` 248 | -------------------------------------------------------------------------------- /diffcsv/diffcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | "sort" 11 | "strings" 12 | "time" 13 | ) 14 | 15 | var f1name = flag.String("f1", "", "First CSV file name to compare") 16 | var f2name = flag.String("f2", "", "Second CSV file name to compare") 17 | var output = flag.String("o", "", "Output CSV file for differences") 18 | var key = flag.Int("key", 0, "Key column in input CSVs (first is 1); must be unique") 19 | var help = flag.Bool("help", false, "Show help message") 20 | var ondupfirst = flag.Bool("ondupFirst", false, "On duplicate key, keep first one") 21 | var onduplast = flag.Bool("ondupLast", false, "On duplicate key, keep last one") 22 | var noeq = flag.Bool("noeq", false, "Suppress matches, showing only differences") 23 | var alias1 = flag.String("alias1", "F1", "Alias for first input file; default F1") 24 | var alias2 = flag.String("alias2", "F2", "Alias for second input file; default F2") 25 | var colnums = flag.Bool("colnums", false, "Add difference column numbers to headers") 26 | var ignoreCase = flag.Bool("ignoreCase", true, "Ignore case when comparing; default true") 27 | var trimSpace = flag.Bool("trimSpace", true, "Ignore leading and trailing spaces when comparing; default true") 28 | 29 | var detailedHelp = ` 30 | Detailed Help: 31 | Inputs: 32 | - a key column 33 | - two input filenames 34 | - an output filename 35 | There will be two input files to compare and there will be 36 | one output file created: 37 | a) The first file will be read and stored into a map 38 | b) The second file will be read and stored into a map 39 | c) It is an error if a file has the same key value on two rows. 40 | Keys must be unique within each file. 41 | Note that key column number is one based, not zero based! 42 | NOTE! if duplicate keys exist, then there are options to keep 43 | the first or to keep the last one. Default is to error out. 44 | d) Then all keys from both inputs are combined/deduped/sorted 45 | e) Then we range over the combined keyset and output a new CSV 46 | that has a new status column as the first column and the other columns 47 | from the inputs as the remaining columns. 48 | f) the new status column has the following values: 49 | - EQ meaning that the values for the key are same in both input files 50 | - IN=1 meaning that the key and values are only in input file #1 51 | - IN=2 similar for input file #2 52 | - DFn=x,y,..,z where n is either 1 or 2; followed by a comma delimited 53 | list of column numbers where the values for the key do not match. 54 | Note that the DF statuses always come in pairs, one for each input file. 55 | g) Limitations: 56 | - both input files must have the same number of columns 57 | - both must have a header row and the headers must be the same 58 | ` 59 | 60 | func main() { 61 | flag.Parse() 62 | 63 | if *help { 64 | usage("") 65 | } 66 | 67 | if *key == 0 { 68 | usage("Key column number missing.") 69 | } 70 | 71 | if *f1name == "" { 72 | usage("First filename is missing.") 73 | } 74 | 75 | if *f2name == "" { 76 | fmt.Println() 77 | usage("Second filename is missing.") 78 | } 79 | 80 | if *output == "" { 81 | fmt.Println() 82 | usage("Output filename is missing.") 83 | } 84 | 85 | if *ondupfirst && *onduplast { 86 | fmt.Println() 87 | usage("Cannot use both on-dup options") 88 | } 89 | 90 | now := time.Now() 91 | log.Printf("Start: %v", now.Format(time.StampMilli)) 92 | 93 | // open first input file stop.Format(Time.StampMilli) 94 | var r1 *csv.Reader 95 | f1, f1err := os.Open(*f1name) 96 | if f1err != nil { 97 | log.Fatal("os.Open() Error:" + f1err.Error()) 98 | } 99 | r1 = csv.NewReader(f1) 100 | 101 | // open second input file 102 | var r2 *csv.Reader 103 | f2, f2err := os.Open(*f2name) 104 | if f2err != nil { 105 | log.Fatal("os.Open() Error:" + f2err.Error()) 106 | } 107 | r2 = csv.NewReader(f2) 108 | 109 | /*********************************************************/ 110 | // do a quick check on columns first 111 | // if not the same, then log error and exit 112 | 113 | // second file 114 | hdrs2, rerr := r2.Read() 115 | if rerr == io.EOF { 116 | log.Fatal("File 2 is empty", rerr) 117 | } 118 | if rerr != nil { 119 | log.Fatalf("csv.Read:\n%v\n", rerr) 120 | } 121 | numcols2 := len(hdrs2) 122 | 123 | // first file 124 | hdrs1, rerr := r1.Read() 125 | if rerr == io.EOF { 126 | log.Fatal("File 1 is empty", rerr) 127 | } 128 | if rerr != nil { 129 | log.Fatalf("csv.Read:\n%v\n", rerr) 130 | } 131 | numcols1 := len(hdrs1) 132 | 133 | if numcols1 != numcols2 { 134 | log.Fatalf("Different number of columns:%v vs. %v", 135 | numcols1, numcols2) 136 | } 137 | 138 | // check that headers are the same 139 | for i := range hdrs1 { 140 | if hdrs1[i] == hdrs2[i] { 141 | continue 142 | } 143 | log.Fatal("Headers are not the same on input files") 144 | } 145 | 146 | // check on whether to add column numbers to headers 147 | if *colnums { 148 | for i := range hdrs1 { 149 | hdrs1[i] = fmt.Sprintf("%v-%v", i+1, hdrs1[i]) 150 | } 151 | } 152 | 153 | // set expectations of fields per row 154 | r1.FieldsPerRecord = numcols1 155 | r2.FieldsPerRecord = numcols1 156 | 157 | // open output file 158 | var wf1 *csv.Writer 159 | wf1o, wf1oerr := os.Create(*output) 160 | if wf1oerr != nil { 161 | log.Fatal("os.Create() Error:" + wf1oerr.Error()) 162 | } 163 | defer wf1o.Close() 164 | wf1 = csv.NewWriter(wf1o) 165 | hdrOutput := make([]string, 0) 166 | hdrOutput = append(hdrOutput, "STATUS") 167 | hdrOutput = append(hdrOutput, hdrs1...) 168 | err := wf1.Write(hdrOutput) 169 | if err != nil { 170 | log.Fatalf("Output Error:\n%v\n", err) 171 | } 172 | 173 | log.Printf("Processing input #1:%v\n", *f1name) 174 | f1map := make(map[string][]string) 175 | // read first file 176 | rows := 0 177 | for { 178 | // read the csv file 179 | cells, rerr := r1.Read() 180 | if rerr == io.EOF { 181 | break 182 | } 183 | if rerr != nil { 184 | log.Fatalf("csv.Read:\n%v\n", rerr) 185 | } 186 | rows++ 187 | if *trimSpace { 188 | for n := range cells { 189 | cells[n] = strings.TrimSpace(cells[n]) 190 | } 191 | } 192 | keyv := cells[*key-1] 193 | if *ignoreCase { 194 | keyv = strings.ToLower(keyv) 195 | } 196 | if _, ok := f1map[keyv]; ok { 197 | if *onduplast { 198 | log.Printf("Replacing non-unique key: %v on row %v\n", keyv, rows+1) 199 | } else if *ondupfirst { 200 | log.Printf("Skipping non-unique key: %v on row %v\n", keyv, rows+1) 201 | continue 202 | } else { 203 | log.Fatalf("Key value not unique: %v on row %v\n", keyv, rows+1) 204 | } 205 | } 206 | f1map[keyv] = cells 207 | } 208 | log.Printf("Number of rows in file %v:%v\n", *f1name, rows) 209 | f1.Close() 210 | 211 | log.Printf("Processing input #2:%v\n", *f2name) 212 | f2map := make(map[string][]string) 213 | // read second file 214 | rows = 0 215 | for { 216 | // read the csv file 217 | cells, rerr := r2.Read() 218 | if rerr == io.EOF { 219 | break 220 | } 221 | if rerr != nil { 222 | log.Fatalf("csv.Read:\n%v\n", rerr) 223 | } 224 | rows++ 225 | if *trimSpace { 226 | for n := range cells { 227 | cells[n] = strings.TrimSpace(cells[n]) 228 | } 229 | } 230 | keyv := cells[*key-1] 231 | if *ignoreCase { 232 | keyv = strings.ToLower(keyv) 233 | } 234 | if _, ok := f2map[keyv]; ok { 235 | if *onduplast { 236 | log.Printf("Replacing non-unique key: %v on row %v\n", keyv, rows+1) 237 | } else if *ondupfirst { 238 | log.Printf("Skipping non-unique key: %v on row %v\n", keyv, rows+1) 239 | continue 240 | } else { 241 | log.Fatalf("Key value not unique: %v on row %v\n", keyv, rows+1) 242 | } 243 | } 244 | f2map[keyv] = cells 245 | } 246 | log.Printf("Number of rows in file %v:%v\n", *f2name, rows) 247 | f2.Close() 248 | 249 | // 250 | // Get a combined set of keys 251 | // 252 | uniqkeyset := make(map[string]struct{}) 253 | for k := range f1map { 254 | uniqkeyset[k] = struct{}{} 255 | } 256 | for k := range f2map { 257 | uniqkeyset[k] = struct{}{} 258 | } 259 | keySliceSize := len(uniqkeyset) 260 | keys := make([]string, keySliceSize) 261 | slot := 0 262 | for k := range uniqkeyset { 263 | keys[slot] = k 264 | slot++ 265 | } 266 | log.Printf("Number of combined unique keys:%v\n", keySliceSize) 267 | 268 | // sort them 269 | sort.Slice(keys, func(i, j int) bool { 270 | return keys[i] < keys[j] 271 | }) 272 | 273 | // counts 274 | eqCount := 0 275 | diffCount := 0 276 | f1UniqCount := 0 277 | f2UniqCount := 0 278 | 279 | // Now range of combined unique keys 280 | for n := range keys { 281 | val := keys[n] 282 | row1, ok1 := f1map[val] 283 | row2, ok2 := f2map[val] 284 | if ok1 && ok2 { 285 | // are all the row values the same? 286 | diffList := make([]int, 0) 287 | for i := range row1 { 288 | if row1[i] == row2[i] { 289 | continue 290 | } 291 | if *ignoreCase { 292 | if strings.EqualFold(row1[i], row2[i]) { 293 | continue 294 | } 295 | } 296 | f := i - 1 297 | diffList = append(diffList, f) 298 | } 299 | if len(diffList) == 0 { 300 | eqCount++ 301 | if *noeq { 302 | continue 303 | } 304 | outrow1 := make([]string, 0) 305 | outrow1 = append(outrow1, "EQ") 306 | outrow1 = append(outrow1, row1...) 307 | err := wf1.Write(outrow1) 308 | if err != nil { 309 | log.Fatalf("Output Write() Error: %v\n", err) 310 | } 311 | } else { 312 | diffCount++ 313 | diffs := "" 314 | for i := range diffList { 315 | diffs += fmt.Sprintf("%v,", diffList[i]+2) 316 | } 317 | diffs = strings.TrimRight(diffs, ",") 318 | outrow1 := make([]string, 0) 319 | outrow1 = append(outrow1, fmt.Sprintf("%v=%v", *alias1, diffs)) 320 | outrow1 = append(outrow1, row1...) 321 | err := wf1.Write(outrow1) 322 | if err != nil { 323 | log.Fatalf("Output Write() Error: %v\n", err) 324 | } 325 | outrow2 := make([]string, 0) 326 | outrow2 = append(outrow2, fmt.Sprintf("%v=%v", *alias2, diffs)) 327 | outrow2 = append(outrow2, row2...) 328 | err = wf1.Write(outrow2) 329 | if err != nil { 330 | log.Fatalf("Output Write() Error: %v\n", err) 331 | } 332 | } 333 | } else { 334 | if !ok1 { 335 | f2UniqCount++ 336 | outrow := make([]string, 0) 337 | outrow = append(outrow, fmt.Sprintf("IN=%v", *alias2)) 338 | outrow = append(outrow, row2...) 339 | err := wf1.Write(outrow) 340 | if err != nil { 341 | log.Fatalf("Output Write() Error: %v\n", err) 342 | } 343 | } else { 344 | f1UniqCount++ 345 | outrow := make([]string, 0) 346 | outrow = append(outrow, fmt.Sprintf("IN=%v", *alias1)) 347 | outrow = append(outrow, row1...) 348 | err := wf1.Write(outrow) 349 | if err != nil { 350 | log.Fatalf("Output Write() Error: %v\n", err) 351 | } 352 | } 353 | } 354 | 355 | } 356 | wf1.Flush() 357 | 358 | // wrapup 359 | stop := time.Now() 360 | elapsed := time.Since(now) 361 | log.Printf("End: %v", stop.Format(time.StampMilli)) 362 | log.Printf("Elapsed time %v", elapsed) 363 | 364 | log.Printf("------- Summary -------\n") 365 | log.Printf("Equal Count: %v\n", eqCount) 366 | log.Printf("Key Diff Count: %v\n", diffCount) 367 | log.Printf("Unique to input #1 %v: %v\n", *alias1,f1UniqCount) 368 | log.Printf("Unique to input #2 %v: %v\n", *alias2,f2UniqCount) 369 | 370 | } 371 | 372 | func usage(msg string) { 373 | fmt.Println(msg) 374 | fmt.Print("Usage: diffcsv [options]\n") 375 | flag.PrintDefaults() 376 | if msg == "" { 377 | fmt.Println(detailedHelp) 378 | } 379 | os.Exit(0) 380 | } 381 | -------------------------------------------------------------------------------- /diffcsv/input1.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | X,1,1 3 | Y,2,2 4 | Z,3,3 -------------------------------------------------------------------------------- /diffcsv/input2.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | X,1,1 3 | Y,2,2 4 | W,3,3 -------------------------------------------------------------------------------- /diffcsv/input3.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | X,1,1 3 | Y,2,2 4 | Z,9,9 -------------------------------------------------------------------------------- /diffcsv/input4.csv: -------------------------------------------------------------------------------- 1 | A,B,D 2 | X,1,1 3 | Y,2,2 4 | Z,9,9 -------------------------------------------------------------------------------- /diffcsv/input5.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D 2 | X,1,1,1 3 | Y,2,2,2 4 | Z,9,9,9 -------------------------------------------------------------------------------- /diffcsv/input6.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | X,1,1 3 | Y,2,2 4 | Z,9,9 5 | X,1,2 6 | X,3,4 7 | -------------------------------------------------------------------------------- /diffcsv/input7.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | X,1,1 3 | Y,2,3 4 | W,3,3 -------------------------------------------------------------------------------- /diffcsv/input8.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | x,1,1 3 | Y ,2,3 4 | w ,3,3 -------------------------------------------------------------------------------- /diffcsv/test1.csv: -------------------------------------------------------------------------------- 1 | STATUS,A,B,C 2 | EQ,X,1,1 3 | EQ,Y,2,2 4 | EQ,Z,3,3 5 | -------------------------------------------------------------------------------- /diffcsv/test2.csv: -------------------------------------------------------------------------------- 1 | STATUS,A,B,C 2 | IN=2,W,3,3 3 | EQ,X,1,1 4 | DF1=3,Y,2,2 5 | DF2=3,Y,2,3 6 | IN=1,Z,3,3 7 | -------------------------------------------------------------------------------- /diffcsv/test3.csv: -------------------------------------------------------------------------------- 1 | STATUS,1-A,2-B,3-C 2 | "i1=2,3",Z,3,3 3 | "i2=2,3",Z,9,9 4 | -------------------------------------------------------------------------------- /diffcsv/test6.csv: -------------------------------------------------------------------------------- 1 | STATUS,A,B,C 2 | "DF1=2,3",X,1,1 3 | "DF2=2,3",X,3,4 4 | EQ,Y,2,2 5 | "DF1=2,3",Z,3,3 6 | "DF2=2,3",Z,9,9 7 | -------------------------------------------------------------------------------- /diffcsv/test7.csv: -------------------------------------------------------------------------------- 1 | STATUS,A,B,C 2 | IN=f1,W,3,3 3 | IN=f1,X,1,1 4 | EQ,Y,2,3 5 | IN=f2,w,3,3 6 | IN=f2,x,1,1 7 | -------------------------------------------------------------------------------- /editcsv/README.md: -------------------------------------------------------------------------------- 1 | # Editcsv 2 | This utility will edit a CSV and either update inline or add update 3 | as a new column. 4 | 5 | Use -help to show: 6 | ``` 7 | $ editcsv -help 8 | Help Message 9 | 10 | Usage: editcsv [options] input.csv output.csv 11 | -add 12 | Add replace string as a new column; default, replace in-place 13 | -addHeader string 14 | Header to use for added column (default "ADDED") 15 | -c string 16 | Range spec for columns 17 | -headers 18 | CSV has headers (default true) 19 | -help 20 | Show help message 21 | -i string 22 | Input CSV filename; default STDIN 23 | -keep 24 | Keep CSV headers on output (default true) 25 | -o string 26 | Output CSV filename; default STDOUT 27 | -pattern string 28 | Search pattern 29 | -replace string 30 | Regexp replace expression 31 | ``` 32 | 33 | ## Examples 34 | Put an "x-" in front of any cell value beginning with the letter "a". 35 | ``` 36 | $ cat test1.csv 37 | A,B,C 38 | abc,def,Army 39 | one,two,Navy 40 | go,abacus,Marine 41 | Android,Ubuntu,Linux 42 | $ go run editcsv.go -pattern "^(a)" -replace "x-$1" < test1.csv 43 | A,B,C 44 | x-bc,def,Army 45 | one,two,Navy 46 | go,x-bacus,Marine 47 | Android,Ubuntu,Linux 48 | ``` 49 | Replace matches with a constant value, in this case "--elided--". 50 | ``` 51 | $ go run editcsv.go -pattern "^.*y$" -replace "--elided--" < test1.csv 52 | A,B,C 53 | abc,def,--elided-- 54 | one,two,--elided-- 55 | go,abacus,Marine 56 | Android,Ubuntu,Linux 57 | ``` 58 | Replace matches (cell values in column 2 only) that end in letter "o", 59 | adding a new column named "final" for the updated column 2. 60 | ``` 61 | $ editcsv -pattern "^.*o$" -replace "--elided--" -c 2 -add=true -addHeader "final" < test1.csv 62 | A,B,C,final 63 | abc,def,Army,def 64 | one,two,Navy,--elided-- 65 | go,abacus,Marine,abacus 66 | Android,Ubuntu,Linux,Ubuntu 67 | ``` 68 | -------------------------------------------------------------------------------- /editcsv/editcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | "regexp" 11 | 12 | "github.com/mandolyte/csv-utils" 13 | ) 14 | 15 | var cs *rangespec.RangeSpec 16 | var re *regexp.Regexp 17 | 18 | func main() { 19 | pattern := flag.String("pattern", "", "Search pattern") 20 | replace := flag.String("replace", "", "Regexp replace expression") 21 | addHdr := flag.String("addHeader", "ADDED", "Header to use for added column") 22 | cols := flag.String("c", "", "Range spec for columns") 23 | input := flag.String("i", "", "Input CSV filename; default STDIN") 24 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 25 | headers := flag.Bool("headers", true, "CSV has headers") 26 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 27 | help := flag.Bool("help", false, "Show help message") 28 | add := flag.Bool("add", false, "Add replace string as a new column; default, replace in-place") 29 | flag.Parse() 30 | 31 | if *help { 32 | usage("Help Message") 33 | os.Exit(0) 34 | } 35 | 36 | /* check parameters */ 37 | if *replace == "" { 38 | usage("Required: Missing replace expression") 39 | os.Exit(0) 40 | } 41 | 42 | if *pattern == "" { 43 | usage("Required: Missing search expression") 44 | os.Exit(0) 45 | } 46 | re = regexp.MustCompile(*pattern) 47 | 48 | if *cols != "" { 49 | var cserr error 50 | cs, cserr = rangespec.New(*cols) 51 | if cserr != nil { 52 | log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr) 53 | } 54 | } 55 | 56 | if *keep { 57 | if !*headers { 58 | log.Fatal("Cannot keep headers you don't have!") 59 | } 60 | } 61 | // open output file 62 | var w *csv.Writer 63 | if *output == "" { 64 | w = csv.NewWriter(os.Stdout) 65 | } else { 66 | fo, foerr := os.Create(*output) 67 | if foerr != nil { 68 | log.Fatal("os.Create() Error:" + foerr.Error()) 69 | } 70 | defer fo.Close() 71 | w = csv.NewWriter(fo) 72 | } 73 | 74 | // open input file 75 | var r *csv.Reader 76 | if *input == "" { 77 | r = csv.NewReader(os.Stdin) 78 | } else { 79 | fi, fierr := os.Open(*input) 80 | if fierr != nil { 81 | log.Fatal("os.Open() Error:" + fierr.Error()) 82 | } 83 | defer fi.Close() 84 | r = csv.NewReader(fi) 85 | } 86 | 87 | // ignore expectations of fields per row 88 | r.FieldsPerRecord = -1 89 | 90 | // read loop for CSV 91 | var row uint64 92 | for { 93 | // read the csv file 94 | cells, rerr := r.Read() 95 | if rerr == io.EOF { 96 | break 97 | } 98 | if rerr != nil { 99 | log.Fatalf("csv.Read:\n%v\n", rerr) 100 | } 101 | if (row == 0) && *headers && *keep { 102 | row = 1 103 | if *add { 104 | cells = append(cells, *addHdr) 105 | } 106 | err := w.Write(cells) 107 | if err != nil { 108 | log.Fatalf("csv.Write:\n%v\n", err) 109 | } 110 | continue 111 | } 112 | row++ 113 | // test row/columns for a match 114 | err := w.Write(patternMatches(cells, re, *replace, *add)) 115 | if err != nil { 116 | log.Fatalf("csv.Write:\n%v\n", err) 117 | } 118 | } 119 | w.Flush() 120 | } 121 | 122 | func patternMatches(c []string, re *regexp.Regexp, replace string, add bool) []string { 123 | for n := range c { 124 | if cs == nil { 125 | newstring := re.ReplaceAllString(c[n], replace) 126 | if add { 127 | c = append(c, newstring) 128 | } else { 129 | c[n] = newstring 130 | } 131 | } else { 132 | if cs.InRange(uint64(n + 1)) { 133 | newstring := re.ReplaceAllString(c[n], replace) 134 | if add { 135 | c = append(c, newstring) 136 | } else { 137 | c[n] = newstring 138 | } 139 | } 140 | } 141 | } 142 | return c 143 | } 144 | 145 | func usage(msg string) { 146 | fmt.Println(msg + "\n") 147 | fmt.Print("Usage: editcsv [options] input.csv output.csv\n") 148 | flag.PrintDefaults() 149 | } 150 | -------------------------------------------------------------------------------- /editcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | abc,def,Army 3 | one,two,Navy 4 | go,abacus,Marine 5 | Android,Ubuntu,Linux 6 | -------------------------------------------------------------------------------- /obfuscatecsv/README.md: -------------------------------------------------------------------------------- 1 | # Obfuscatecsv 2 | *Notes* 3 | 1. If mulitple columns have the same data, then they will be obfuscated to the same value to preserve identity of same value in two columns 4 | 2. The "prefix" is required and recommended to be something that is related to the data. For example, if names are being obfuscated, then use "name" as the prefix. 5 | 3. The sequences are simply the row and column of the first occurence of the value. That gives you a way to work backward if you need to. 6 | 4. The default delimiter between the row and column sequence number is a dash. If no delimiter is desired just use "" as shown below. 7 | 8 | Use -help to show: 9 | ``` 10 | $ obfuscatecsv -help 11 | Help Message 12 | 13 | Usage: obfuscatecsv [options] 14 | -c string 15 | Range spec for columns to obfuscate 16 | -d string 17 | Delimiter for sequences (default "-") 18 | -headers 19 | CSV has headers (default true) 20 | -help 21 | Show help message 22 | -i string 23 | Input CSV filename; default STDIN 24 | -keep 25 | Keep CSV headers on output (default true) 26 | -o string 27 | Output CSV filename; default STDOUT 28 | -prefix string 29 | Prefix for obfuscator value 30 | $ 31 | ``` 32 | 33 | # Examples 34 | Obfuscate first two columns: 35 | ``` 36 | $ cat test1.csv 37 | A,B,C 38 | abc,def,Army 39 | def,abc,Navy 40 | ijk,abc,Navy 41 | zyz,def,Army 42 | abc,abc,AF 43 | $ obfuscatecsv -i test1.csv -prefix XT -c 1,2 44 | A,B,C 45 | XT2-0,XT2-1,Army 46 | XT2-1,XT2-0,Navy 47 | XT4-0,XT2-0,Navy 48 | XT5-0,XT2-1,Army 49 | XT2-0,XT2-0,AF 50 | ``` 51 | Chained/piped example that obfuscates all the columns, but with 52 | different prefixes. 53 | ``` 54 | $ obfuscatecsv -i test1.csv -prefix XT -c 1,2 -d "" | obfuscatecsv -prefix DOD -c 3 55 | A,B,C 56 | XT20,XT21,DOD2-2 57 | XT21,XT20,DOD3-2 58 | XT40,XT20,DOD3-2 59 | XT50,XT21,DOD2-2 60 | XT20,XT20,DOD6-2 61 | $ 62 | ``` 63 | -------------------------------------------------------------------------------- /obfuscatecsv/obfuscatecsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | 11 | "github.com/mandolyte/csv-utils" 12 | ) 13 | 14 | var cs *rangespec.RangeSpec 15 | 16 | func main() { 17 | prefix := flag.String("prefix", "", "Prefix for obfuscator value") 18 | cols := flag.String("c", "", "Range spec for columns to obfuscate") 19 | input := flag.String("i", "", "Input CSV filename; default STDIN") 20 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 21 | headers := flag.Bool("headers", true, "CSV has headers") 22 | delimiter := flag.String("d", "-", "Delimiter for sequences") 23 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 24 | help := flag.Bool("help", false, "Show help message") 25 | flag.Parse() 26 | 27 | if *help { 28 | usage("Help Message") 29 | os.Exit(0) 30 | } 31 | 32 | /* check parameters */ 33 | if *prefix == "" { 34 | usage("Required: Missing prefix for obfuscation value") 35 | os.Exit(0) 36 | } 37 | 38 | if *cols != "" { 39 | var cserr error 40 | cs, cserr = rangespec.New(*cols) 41 | if cserr != nil { 42 | log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr) 43 | } 44 | } 45 | 46 | if *keep { 47 | if !*headers { 48 | log.Fatal("Cannot keep headers you don't have!") 49 | } 50 | } 51 | // open output file 52 | var w *csv.Writer 53 | if *output == "" { 54 | w = csv.NewWriter(os.Stdout) 55 | } else { 56 | fo, foerr := os.Create(*output) 57 | if foerr != nil { 58 | log.Fatal("os.Create() Error:" + foerr.Error()) 59 | } 60 | defer fo.Close() 61 | w = csv.NewWriter(fo) 62 | } 63 | 64 | // open input file 65 | var r *csv.Reader 66 | if *input == "" { 67 | r = csv.NewReader(os.Stdin) 68 | } else { 69 | fi, fierr := os.Open(*input) 70 | if fierr != nil { 71 | log.Fatal("os.Open() Error:" + fierr.Error()) 72 | } 73 | defer fi.Close() 74 | r = csv.NewReader(fi) 75 | } 76 | 77 | // ignore expectations of fields per row 78 | r.FieldsPerRecord = -1 79 | 80 | // Create value map to store mapping between 81 | // original values and obfuscated values 82 | valmap := make(map[string]string) 83 | 84 | // read loop for CSV 85 | var row uint64 86 | for { 87 | // read the csv file 88 | cells, rerr := r.Read() 89 | if rerr == io.EOF { 90 | break 91 | } 92 | if rerr != nil { 93 | log.Fatalf("csv.Read:\n%v\n", rerr) 94 | } 95 | if (row == 0) && *headers && *keep { 96 | row = 1 97 | err := w.Write(cells) 98 | if err != nil { 99 | log.Fatalf("csv.Write:\n%v\n", err) 100 | } 101 | continue 102 | } 103 | row++ 104 | // test row/columns for a match 105 | //process(cells, *prefix, valmap, row, *width) 106 | for n, v := range cells { 107 | if cs.InRange(uint64(n + 1)) { 108 | obsv, ok := valmap[v] 109 | if ok { 110 | cells[n] = obsv 111 | } else { 112 | valmap[v] = fmt.Sprintf("%s%d%s%d", *prefix, row, *delimiter, n) 113 | cells[n] = valmap[v] 114 | } 115 | } 116 | } 117 | err := w.Write(cells) 118 | if err != nil { 119 | log.Fatalf("csv.Write:\n%v\n", err) 120 | } 121 | } 122 | w.Flush() 123 | } 124 | 125 | /* 126 | func process(c []string, pf string, vm map[string]string, r uint64, w int) { 127 | 128 | } 129 | */ 130 | func usage(msg string) { 131 | fmt.Println(msg + "\n") 132 | fmt.Print("Usage: obfuscatecsv [options]\n") 133 | flag.PrintDefaults() 134 | } 135 | -------------------------------------------------------------------------------- /obfuscatecsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | abc,def,Army 3 | def,abc,Navy 4 | ijk,abc,Navy 5 | zyz,def,Army 6 | abc,abc,AF 7 | -------------------------------------------------------------------------------- /pivotcsv/README.md: -------------------------------------------------------------------------------- 1 | # Pivotcsv 2 | Use -help to show: 3 | ``` 4 | $ pivotcsv -help 5 | -c int 6 | Column to pivot (REQUIRED) 7 | -headers 8 | CSV must have headers; cannot be false (default true) 9 | -help 10 | Show help message 11 | -i string 12 | CSV file name to pivot; default STDIN 13 | -nf string 14 | Format to use for numbers (default "%v") 15 | -nv string 16 | String to signal novalue; default is empty string 17 | -o string 18 | CSV output file name; default STDOUT 19 | -on 20 | Only consider numeric data and sum them (default true) 21 | -os 22 | Consider data as strings and concatenate 23 | -s int 24 | Column to sum/concat (REQUIRED) 25 | -sd string 26 | Concatenation delimiter; default is comma (default ",") 27 | ``` 28 | ## Examples 29 | ``` 30 | $ cat test1.csv 31 | A,B,C,D,E,F 32 | a1,b1,c1,d1,X,1 33 | a2,b2,c2,d2,Y,3 34 | a1,b1,c1,d1,X,3 35 | a2,b2,c2,d2,Y,3 36 | $ go run pivotcsv.go -i test1.csv -c 5 -s 6 37 | A,B,C,D,X,Y 38 | a1,b1,c1,d1,4, 39 | a2,b2,c2,d2,,6 40 | 41 | $ go run pivotcsv.go -i test1.csv -c 1 -s 2 -os 42 | a1,a2,C,D,E,F 43 | b1,,X,1 44 | b1,,X,3 45 | ,"b2,b2",Y,3 46 | $ cat test1.csv 47 | A,B,C,D,E,F 48 | a1,b1,c1,d1,X,1 49 | a2,b2,c2,d2,Y,3 50 | a1,b1,c1,d1,X,3 51 | a2,b2,c2,d2,Y,3 52 | 53 | $ cat test2.csv 54 | A,B,C,D,E,F 55 | a1,b1,c1,d1,X,1 56 | a2,b2,c2,d2,X,3 57 | a1,b1,c1,d1,X,3 58 | a2,b2,c2,d2,X,3 59 | a1,b1,c1,d1,Y,2 60 | a2,b2,c2,d2,Y,4 61 | a1,b1,c1,d1,Y,4 62 | a2,b2,c2,d2,Y,4 63 | a1,b1,c1,d1,Z,3 64 | a2,b2,c2,d2,Z,5 65 | a1,b1,c1,d1,Z,5 66 | a2,b2,c2,d2,Z,5 67 | $ go run pivotcsv.go -c 5 -s 6 < test2.csv 68 | A,B,C,D,X,Y,Z 69 | a1,b1,c1,d1,4,6,8 70 | a2,b2,c2,d2,6,8,10 71 | $ 72 | 73 | ``` 74 | -------------------------------------------------------------------------------- /pivotcsv/pivotcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/csv" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "sort" 12 | "strconv" 13 | "strings" 14 | ) 15 | 16 | type table struct { 17 | records [][]string 18 | pivotcol, pivotcolcount int 19 | } 20 | 21 | func (t *table) Len() int { 22 | return len(t.records) 23 | } 24 | 25 | func (t *table) Swap(i, j int) { 26 | t.records[i], t.records[j] = t.records[j], t.records[i] 27 | } 28 | 29 | func (t *table) Less(i, j int) bool { 30 | for n := range t.records[i] { 31 | if n >= t.pivotcol && n < (t.pivotcol+t.pivotcolcount) { 32 | continue 33 | } 34 | if t.records[i][n] < t.records[j][n] { 35 | return true 36 | } 37 | } 38 | return false 39 | } 40 | 41 | func main() { 42 | pivotcol := flag.Int("c", 0, "Column to pivot (REQUIRED)") 43 | pivotsum := flag.Int("s", 0, "Column to sum/concat (REQUIRED)") 44 | pivotinf := flag.String("i", "", "CSV file name to pivot; default STDIN") 45 | pivotout := flag.String("o", "", "CSV output file name; default STDOUT") 46 | headers := flag.Bool("headers", true, "CSV must have headers; cannot be false") 47 | help := flag.Bool("help", false, "Show help message") 48 | novalue := flag.String("nv", "", "String to signal novalue; default is empty string") 49 | numformat := flag.String("nf", "%v", "Format to use for numbers") 50 | onlynum := flag.Bool("on", true, "Only consider numeric data and sum them") 51 | onlystr := flag.Bool("os", false, "Consider data as strings and concatenate") 52 | strdlm := flag.String("sd", ",", "Concatenation delimiter; default is comma") 53 | flag.Parse() 54 | 55 | if *help { 56 | usage("") 57 | os.Exit(0) 58 | } 59 | 60 | if len(flag.Args()) > 0 { 61 | usage("Arguments provided when none expected") 62 | os.Exit(1) 63 | } 64 | 65 | if *pivotcol == 0 { 66 | usage("Pivot column number must greater than zero") 67 | os.Exit(0) 68 | } 69 | 70 | if *pivotsum == 0 { 71 | usage("Pivot sum column number must greater than zero") 72 | os.Exit(0) 73 | } 74 | 75 | if !*headers { 76 | usage("Headers are required; add them before using") 77 | os.Exit(0) 78 | } 79 | 80 | if *onlystr { 81 | *onlynum = false 82 | } 83 | 84 | // open output file 85 | var w *csv.Writer 86 | if *pivotout == "" { 87 | w = csv.NewWriter(os.Stdout) 88 | } else { 89 | fo, foerr := os.Create(*pivotout) 90 | if foerr != nil { 91 | log.Fatal("os.Create() Error:" + foerr.Error()) 92 | } 93 | defer fo.Close() 94 | w = csv.NewWriter(fo) 95 | defer w.Flush() 96 | } 97 | 98 | // open input file 99 | var r *csv.Reader 100 | if *pivotinf == "" { 101 | r = csv.NewReader(os.Stdin) 102 | } else { 103 | fi, fierr := os.Open(*pivotinf) 104 | if fierr != nil { 105 | log.Fatal("os.Open() Error:" + fierr.Error()) 106 | } 107 | defer fi.Close() 108 | r = csv.NewReader(fi) 109 | } 110 | 111 | // ignore expectations of fields per row 112 | r.FieldsPerRecord = -1 113 | 114 | // read into memory 115 | csvall, raerr := r.ReadAll() 116 | if raerr != nil { 117 | log.Fatal("r.ReadAll() Error:" + raerr.Error()) 118 | } 119 | 120 | // analyze the pivot column 121 | // a. get list of distinct values 122 | // b. use this to calculate width of new CSV table 123 | var row int 124 | pivotHdrs := make(map[string]int) 125 | for n := range csvall { 126 | if row == 0 { 127 | row++ 128 | continue 129 | } 130 | pivotHdrs[csvall[n][*pivotcol-1]]++ 131 | } 132 | //log.Printf("Number of pivot headers:%v", len(pivotHdrs)) 133 | 134 | // sort the new pivot headers 135 | var phkeys []string 136 | for phk := range pivotHdrs { 137 | phkeys = append(phkeys, phk) 138 | } 139 | sort.Strings(phkeys) 140 | 141 | // I have enough to make the new header row now! 142 | // make the output slice table 143 | var orecs [][]string 144 | 145 | // let's create the header row by: 146 | // a. appending to a slice the non pivot and sum columns 147 | // b. append the phkeys from above 148 | var hdrrow []string 149 | for n, v := range csvall[0] { 150 | if n+1 == *pivotcol { 151 | // insert here the new pivot headers 152 | for _, w := range phkeys { 153 | hdrrow = append(hdrrow, w) 154 | } 155 | continue 156 | } 157 | if n+1 == *pivotsum { 158 | continue 159 | } 160 | hdrrow = append(hdrrow, v) 161 | } 162 | // now make the headers the first append to the table 163 | orecs = append(orecs, hdrrow) 164 | 165 | // idea: 166 | // create a key based on all columns EXCEPT pivot and sum 167 | // by letting csv package write (reduced) row to a string buffer 168 | // use this as a map key 169 | 170 | // the value for the map would be a slice of type struct: 171 | // type sumconcat struct { 172 | // float64 -- to sum up numbers 173 | // []string -- to collect non-numbers 174 | // } 175 | // the slice would be one per pivot column header value 176 | // or maybe a map also with header value as key and struct as value 177 | type sumconcat struct { 178 | sumnum float64 179 | sumstr []string 180 | ncount uint64 181 | } 182 | pivot := make(map[string](map[string]*sumconcat)) 183 | for _, v := range csvall[1:] { 184 | // 185 | // step 1. create the []string for the key 186 | // 187 | var tmp []string 188 | for x, y := range v { 189 | // skip the pivot and sum columns 190 | if x+1 == *pivotcol { 191 | continue 192 | } 193 | if x+1 == *pivotsum { 194 | continue 195 | } 196 | tmp = append(tmp, y) 197 | } 198 | // 199 | // step 2. let CSV package create the key 200 | // 201 | var b bytes.Buffer 202 | w2 := csv.NewWriter(&b) 203 | err := w2.Write(tmp) 204 | w2.Flush() 205 | if err != nil { 206 | log.Fatal("w2.Write() Error:" + err.Error()) 207 | } 208 | skey := b.String() 209 | 210 | // step 3. update key value 211 | mapsc, ok := pivot[skey] 212 | //fmt.Printf("Summing:%v\n", v[*pivotsum-1]) 213 | //fmt.Printf("Pivot col value is:%v\n", v[*pivotcol-1]) 214 | if ok { 215 | // if key exists already in the pivot map 216 | // update the values and continue 217 | // try to convert pivotsum column value to a float64 218 | /* 219 | fmt.Print("Working on map:\n") 220 | 221 | for debugk, debugv := range mapsc { 222 | fmt.Printf("Key: %v -- Val: %v\n", debugk, debugv) 223 | } 224 | */ 225 | if *onlynum { 226 | if f, err := strconv.ParseFloat(v[*pivotsum-1], 64); err == nil { 227 | tmpsc, ok := mapsc[v[*pivotcol-1]] 228 | if ok { 229 | mapsc[v[*pivotcol-1]].sumnum += f 230 | mapsc[v[*pivotcol-1]].ncount++ 231 | } else { 232 | tmpsc = new(sumconcat) 233 | tmpsc.sumnum = f 234 | tmpsc.ncount++ 235 | mapsc[v[*pivotcol-1]] = tmpsc 236 | } 237 | } 238 | } else { 239 | tmpsc, ok := mapsc[v[*pivotcol-1]] 240 | if ok { 241 | mapsc[v[*pivotcol-1]].sumstr = 242 | append(mapsc[v[*pivotcol-1]].sumstr, v[*pivotsum-1]) 243 | } else { 244 | tmpsc = new(sumconcat) 245 | tmpsc.sumstr = append(tmpsc.sumstr, v[*pivotsum-1]) 246 | mapsc[v[*pivotcol-1]] = tmpsc 247 | } 248 | } 249 | } else { 250 | // 251 | // step 3b. fill out the struct val for map 252 | // 253 | sc := new(sumconcat) 254 | if *onlynum { 255 | if f, err := strconv.ParseFloat(v[*pivotsum-1], 64); err == nil { 256 | sc.sumnum = f 257 | } 258 | } else { 259 | sc.sumstr = append(sc.sumstr, v[*pivotsum-1]) 260 | } 261 | tmpmap := make(map[string]*sumconcat) 262 | tmpmap[v[*pivotcol-1]] = sc 263 | pivot[skey] = tmpmap 264 | } 265 | 266 | } 267 | csvall = nil 268 | // now create the output table 269 | for k, v := range pivot { 270 | // untangle the CSV formatted key using CSV package 271 | //fmt.Printf("Pivot Key is /%v/\n", k) 272 | b := bytes.NewBufferString(k) 273 | r := csv.NewReader(b) 274 | row, rerr := r.Read() 275 | if rerr != nil { 276 | if rerr != io.EOF { 277 | log.Fatal("r.Read Error:" + rerr.Error()) 278 | } 279 | } 280 | // append to a new row, inserting pivot columns in correct spot 281 | var newrow []string 282 | for i := 0; i < *pivotcol-1; i++ { 283 | if i == (*pivotsum - 1) { 284 | continue 285 | } 286 | newrow = append(newrow, row[i]) 287 | } 288 | // now for the pivot columns 289 | // use the sorted slice to pick them in sorted order 290 | for _, vsc := range phkeys { 291 | //fmt.Printf("phkey is /%v/\n", vsc) 292 | sc, ok := v[vsc] 293 | if ok { 294 | //fmt.Printf("Found v[vsc] /%v/\n", sc) 295 | if *onlynum { 296 | if sc.ncount == 0 { 297 | newrow = append(newrow, *novalue) 298 | } else { 299 | newrow = append(newrow, fmt.Sprintf(*numformat, sc.sumnum)) 300 | } 301 | } else { 302 | newrow = append(newrow, strings.Join(sc.sumstr, *strdlm)) 303 | } 304 | } else { 305 | //fmt.Printf("NOT Found v[vsc] /%v/\n", sc) 306 | // nothing for this header key - put out empty strings 307 | newrow = append(newrow, *novalue) 308 | } 309 | } 310 | // now append the rest of the columns after *pivotcol 311 | for i := *pivotcol; i < len(row); i++ { 312 | if i == (*pivotsum - 1) { 313 | continue 314 | } 315 | newrow = append(newrow, row[i]) 316 | } 317 | 318 | // append row to orecs table 319 | orecs = append(orecs, newrow) 320 | } 321 | 322 | // write out the header row 323 | werr := w.Write(orecs[0]) 324 | if werr != nil { 325 | log.Fatal("w.Write() Error:" + werr.Error()) 326 | } 327 | 328 | // now, let's sort the table 329 | tbl := &table{records: orecs[1:], pivotcol: *pivotcol - 1, pivotcolcount: len(phkeys)} 330 | 331 | sort.Sort(tbl) 332 | 333 | werr = w.WriteAll(tbl.records) 334 | if werr != nil { 335 | log.Fatal("w.WriteAll() Error:" + werr.Error()) 336 | } 337 | } 338 | 339 | func usage(msg string) { 340 | if msg != "" { 341 | fmt.Println(msg) 342 | } 343 | flag.PrintDefaults() 344 | } 345 | -------------------------------------------------------------------------------- /pivotcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D,E,F 2 | a1,b1,c1,d1,X,1 3 | a2,b2,c2,d2,Y,3 4 | a1,b1,c1,d1,X,3 5 | a2,b2,c2,d2,Y,3 6 | -------------------------------------------------------------------------------- /pivotcsv/test2.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D,E,F 2 | a1,b1,c1,d1,X,1 3 | a2,b2,c2,d2,X,3 4 | a1,b1,c1,d1,X,3 5 | a2,b2,c2,d2,X,3 6 | a1,b1,c1,d1,Y,2 7 | a2,b2,c2,d2,Y,4 8 | a1,b1,c1,d1,Y,4 9 | a2,b2,c2,d2,Y,4 10 | a1,b1,c1,d1,Z,3 11 | a2,b2,c2,d2,Z,5 12 | a1,b1,c1,d1,Z,5 13 | a2,b2,c2,d2,Z,5 14 | -------------------------------------------------------------------------------- /rangespec.go: -------------------------------------------------------------------------------- 1 | package rangespec 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | // RangeSpec parses a range specification, such as: 11 | // 1,3,5-8,12- 12 | // It will return a slice of RangeSpec, being two ints, 13 | // a start and a stop 14 | // Ranges start at 1, not zero. 15 | type RangeSpec struct { 16 | pairs []pair 17 | spec string 18 | Max uint64 19 | } 20 | type pair struct { 21 | start, stop uint64 22 | } 23 | 24 | // New takes a range specification string and 25 | // returns a slice of RangeSpec structs 26 | func New(r string) (*RangeSpec, error) { 27 | // remove any whitespace as a convenience 28 | r = strings.Replace(r, " ", "", -1) 29 | ret := new(RangeSpec) 30 | ret.pairs = make([]pair, 0) 31 | ret.spec = r 32 | tokens := strings.Split(r, ",") 33 | for n, val := range tokens { 34 | //fmt.Printf("Working on %v at %v\n", val, n) 35 | // does val have a dash? 36 | if strings.Contains(val, "-") { 37 | // split on dash 38 | ends := strings.Split(val, "-") 39 | if len(ends) > 2 { 40 | return nil, fmt.Errorf("RangeSpec: malformed specification:%v", val) 41 | } 42 | if ends[1] != "" { 43 | //fmt.Print("ends[] greater than 1\n") 44 | end1, err := strconv.ParseUint(ends[0], 10, 64) 45 | if err != nil { 46 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err) 47 | } 48 | end2, err := strconv.ParseUint(ends[1], 10, 64) 49 | if err != nil { 50 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[1], err) 51 | } 52 | var rs pair 53 | rs.start = end1 54 | rs.stop = end2 55 | ret.pairs = append(ret.pairs, rs) 56 | } else { 57 | //fmt.Print("ends[] == 1\n") 58 | if n+1 != len(tokens) { 59 | return nil, fmt.Errorf("RangeSpec: open range must be last:%v", val) 60 | } 61 | end1, err := strconv.ParseUint(ends[0], 10, 64) 62 | if err != nil { 63 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err) 64 | } 65 | var rs pair 66 | rs.start = end1 67 | rs.stop = math.MaxUint64 68 | ret.pairs = append(ret.pairs, rs) 69 | } 70 | continue 71 | } else { 72 | end1, err := strconv.ParseUint(val, 10, 64) 73 | if err != nil { 74 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", val, err) 75 | } 76 | var rs pair 77 | rs.start = end1 78 | rs.stop = end1 79 | ret.pairs = append(ret.pairs, rs) 80 | } 81 | } 82 | // ensure ascending specification 83 | for i := 0; i < len(ret.pairs); i++ { 84 | if i == 0 { 85 | if ret.pairs[i].start == 0 { 86 | return nil, fmt.Errorf("RangeSpec: range must be larger zero: %v", ret.pairs[i].start) 87 | } 88 | } 89 | if ret.pairs[i].start > ret.pairs[i].stop { 90 | return nil, fmt.Errorf("RangeSpec: start (%v) must be equal or less than stop (%v)", ret.pairs[i].start, ret.pairs[i].stop) 91 | } 92 | if i > 0 { 93 | if ret.pairs[i].start <= ret.pairs[i-1].stop { 94 | return nil, fmt.Errorf("RangeSpec: start (%v) must be greater than previous stop (%v)", ret.pairs[i].start, ret.pairs[i-1].stop) 95 | } 96 | } 97 | } 98 | // set the maximum row number 99 | ret.Max = ret.pairs[len(ret.pairs)-1].stop 100 | return ret, nil 101 | } 102 | 103 | // InRange will test whehter a number is in the range specification 104 | func (rs *RangeSpec) InRange(num uint64) bool { 105 | for _, val := range rs.pairs { 106 | if num >= val.start && num <= val.stop { 107 | return true 108 | } 109 | } 110 | return false 111 | } 112 | -------------------------------------------------------------------------------- /rangespec_test.go: -------------------------------------------------------------------------------- 1 | package rangespec 2 | 3 | import ( 4 | "testing" 5 | "math" 6 | "fmt" 7 | ) 8 | 9 | func TestRangeSpec(t *testing.T) { 10 | input := " 1, 3, 5 - 8 , 12 - " 11 | expected := []pair{{1,1},{3,3},{5,8},{12,math.MaxUint64}} 12 | result,err := New(input) 13 | if err != nil { 14 | t.Fatalf("[fail] expected no error for: %s, got %#v\n", input, err) 15 | } 16 | 17 | for n := range result.pairs { 18 | if result.pairs[n].start != expected[n].start { 19 | t.Fatalf("[fail] Start[%v] should be: %v, got %v\n", n, result.pairs[n].start, expected[n].start) 20 | } 21 | if result.pairs[n].stop != expected[n].stop { 22 | t.Fatalf("[fail] Stop[%v] should be: %v, got %v\n", n, result.pairs[n].stop, expected[n].stop) 23 | } 24 | } 25 | 26 | input = "1,3,5-,12-" 27 | expected = nil 28 | result,err = New(input) 29 | if err == nil { 30 | t.Fatalf("[fail] expected error for invalid input: %s\n", input) 31 | } 32 | if result != nil { 33 | t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input) 34 | } 35 | fmt.Printf("Invalid error message for %v was:\n %v\n\n",input,err) 36 | 37 | input = "1,A,5-,12-" 38 | expected = nil 39 | result,err = New(input) 40 | if err == nil { 41 | t.Fatalf("[fail] expected error for invalid input: %s\n", input) 42 | } 43 | if result != nil { 44 | t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input) 45 | } 46 | fmt.Printf("Invalid error message for %v was:\n %v\n\n",input,err) 47 | 48 | input = "1,5,3-,12-" 49 | expected = nil 50 | result,err = New(input) 51 | if err == nil { 52 | t.Fatalf("[fail] expected error for invalid input: %s\n", input) 53 | } 54 | if result != nil { 55 | t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input) 56 | } 57 | fmt.Printf("Invalid error message for %v was:\n %v\n\n",input,err) 58 | 59 | input = "1,3,5-4,12-" 60 | expected = nil 61 | result,err = New(input) 62 | if err == nil { 63 | t.Fatalf("[fail] expected error for invalid input: %s\n", input) 64 | } 65 | if result != nil { 66 | t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input) 67 | } 68 | fmt.Printf("Invalid error message for %v was:\n %v\n\n",input,err) 69 | 70 | input = "1,5,5-6,12-" 71 | expected = nil 72 | result,err = New(input) 73 | if err == nil { 74 | t.Fatalf("[fail] expected error for invalid input: %s\n", input) 75 | } 76 | if result != nil { 77 | t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input) 78 | } 79 | fmt.Printf("Invalid error message for %v was:\n %v\n\n",input,err) 80 | 81 | input = " 1,3,5-8,12-" 82 | result,err = New(input) 83 | if err != nil { 84 | t.Fatalf("[fail] expected no error for: %s, got %#v\n", input, err) 85 | } 86 | 87 | tests := []uint64{1,2,3,5,6,8,12,13,99} 88 | exptd := []bool{true,false,true,true,true,true,true,true,true} 89 | for n,x := range tests { 90 | if result.InRange(x) != exptd[n] { 91 | t.Fatalf("[fail] InRange error, range %v, for %v got %v\n", input, x, result.InRange(x)) 92 | } 93 | } 94 | 95 | } 96 | -------------------------------------------------------------------------------- /recursecsv/README.md: -------------------------------------------------------------------------------- 1 | # Recursecsv 2 | *Notes* 3 | 1. It will always output the normal hierarchical columns in this order: level, root, parent, child, path, and cycle (a Yes/No) 4 | 2. Note defaults shown in the help message below 5 | 3. At present it can only take two columns of data, the parent and child columns. If these have other associated values, they will have to be added back in to this output. 6 | 4. The input must have column headers, since they are re-used in the output CSV. 7 | 8 | 9 | Use -help to show: 10 | ``` 11 | $ recursecsv -help 12 | Help Message 13 | 14 | -child int 15 | Child column; default 2 (default 2) 16 | -delimiter string 17 | String for path delimiter (default ">") 18 | -help 19 | Show usage message 20 | -i string 21 | Input CSV filename; default STDIN 22 | -o string 23 | Output CSV filename; default STDOUT 24 | -parent int 25 | Parent column; default 1 (default 1) 26 | -start string 27 | Start value of hierarchy 28 | ``` 29 | 30 | ## Examples 31 | Example with a cyclic condition. 32 | ``` 33 | $ cat test1.csv 34 | parent,child 35 | A,X 36 | A,B 37 | B,C 38 | D,E 39 | C,D 40 | X,Y 41 | Y,B 42 | E,C 43 | $ recursecsv -i test1.csv -start A 44 | 2017/12/01 09:56:39 Start at 2017-12-01 14:56:39.064464694 +0000 UTC 45 | 2017/12/01 09:56:39 Data loaded and ready to start recursing 46 | 2017/12/01 09:56:39 Working on A 47 | 2017/12/01 09:56:39 . elasped 66.33µs 48 | 2017/12/01 09:56:39 End at 2017-12-01 14:56:39.087153217 +0000 UTC 49 | 2017/12/01 09:56:39 Elapsed time 22.688732ms 50 | Level,Root,parent,child,Path,Leaf,Cycle 51 | 1,A,A,B,>A>B>,No,No 52 | 2,A,B,C,>A>B>C>,No,No 53 | 3,A,C,D,>A>B>C>D>,No,No 54 | 4,A,D,E,>A>B>C>D>E>,No,No 55 | 5,A,E,C,>A>B>C>D>E>C>,No,Yes 56 | 1,A,A,X,>A>X>,No,No 57 | 2,A,X,Y,>A>X>Y>,No,No 58 | 3,A,Y,B,>A>X>Y>B>,No,No 59 | 4,A,B,C,>A>X>Y>B>C>,No,No 60 | 5,A,C,D,>A>X>Y>B>C>D>,No,No 61 | 6,A,D,E,>A>X>Y>B>C>D>E>,No,No 62 | 7,A,E,C,>A>X>Y>B>C>D>E>C>,No,Yes 63 | $ 64 | ``` 65 | Simple no cycle test. 66 | ``` 67 | $ recursecsv -i test2.csv -start A 68 | 2017/12/01 09:58:39 Start at 2017-12-01 14:58:39.319162864 +0000 UTC 69 | 2017/12/01 09:58:39 Data loaded and ready to start recursing 70 | 2017/12/01 09:58:39 Working on A 71 | 2017/12/01 09:58:39 . elasped 87.756µs 72 | 2017/12/01 09:58:39 End at 2017-12-01 14:58:39.319813 +0000 UTC 73 | 2017/12/01 09:58:39 Elapsed time 650.482µs 74 | Level,Root,parent,child,Path,Leaf,Cycle 75 | 1,A,A,B,>A>B>,No,No 76 | 2,A,B,C,>A>B>C>,No,No 77 | 3,A,C,D,>A>B>C>D>,No,No 78 | 4,A,D,E,>A>B>C>D>E>,Yes,No 79 | ``` 80 | -------------------------------------------------------------------------------- /recursecsv/recursecsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/csv" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "sort" 12 | "strings" 13 | "time" 14 | ) 15 | 16 | var w *csv.Writer 17 | 18 | var parent = flag.Int("parent", 1, "Parent column; default 1") 19 | var child = flag.Int("child", 2, "Child column; default 2") 20 | var start = flag.String("start", "", "Start value of hierarchy;\nif first letter is ampersand, use as a file with a list of values to process") 21 | var delimiter = flag.String("delimiter", ">", "String for path delimiter") 22 | var input = flag.String("i", "", "Input CSV filename; default STDIN") 23 | var output = flag.String("o", "", "Output CSV filename; default STDOUT") 24 | var headers = flag.Bool("headers", true, "Input CSV has headers") 25 | var help = flag.Bool("help", false, "Show usage message") 26 | var info = flag.Bool("info", true, "Show info messages during processing") 27 | 28 | func main() { 29 | flag.Parse() 30 | 31 | if *help { 32 | usage("Help Message") 33 | } 34 | 35 | if *start == "" { 36 | usage("Start value is missing") 37 | } 38 | now := time.Now().UTC() 39 | display(fmt.Sprintf("Start at %v", now)) 40 | 41 | var startvals []string 42 | if strings.HasPrefix(*start, "@") { 43 | f, ferr := os.Open((*start)[1:]) 44 | if ferr != nil { 45 | log.Fatalf("os.Open() error on %v\n:%v", (*start)[1:], ferr) 46 | } 47 | defer f.Close() 48 | scanner := bufio.NewScanner(f) 49 | for scanner.Scan() { 50 | startvals = append(startvals, scanner.Text()) 51 | } 52 | } else { 53 | startvals = append(startvals, *start) 54 | } 55 | 56 | // open output file 57 | if *output == "" { 58 | w = csv.NewWriter(os.Stdout) 59 | } else { 60 | fo, foerr := os.Create(*output) 61 | if foerr != nil { 62 | log.Fatal("os.Create() Error:" + foerr.Error()) 63 | } 64 | defer fo.Close() 65 | w = csv.NewWriter(fo) 66 | } 67 | 68 | // open input file 69 | var r *csv.Reader 70 | if *input == "" { 71 | r = csv.NewReader(os.Stdin) 72 | } else { 73 | fi, fierr := os.Open(*input) 74 | if fierr != nil { 75 | log.Fatal("os.Open() Error:" + fierr.Error()) 76 | } 77 | defer fi.Close() 78 | r = csv.NewReader(fi) 79 | } 80 | 81 | // ignore expectations of fields per row 82 | r.FieldsPerRecord = 2 83 | 84 | // read loop for CSV to load into memory 85 | var row uint64 86 | pcol := *parent - 1 87 | ccol := *child - 1 88 | parents := make(map[string][]string) 89 | for { 90 | // read the csv file 91 | cells, rerr := r.Read() 92 | if rerr == io.EOF { 93 | break 94 | } 95 | if rerr != nil { 96 | log.Fatalf("csv.Read [row %v]:\n%v\n", row, rerr) 97 | } 98 | if row == 0 { 99 | if *headers == false { 100 | recurseHeaders[2] = "Parent" 101 | recurseHeaders[3] = "Child" 102 | } else { 103 | recurseHeaders[2] = cells[pcol] 104 | recurseHeaders[3] = cells[ccol] 105 | } 106 | writeRow(recurseHeaders[0], recurseHeaders[1], 107 | recurseHeaders[2], recurseHeaders[3], 108 | recurseHeaders[4], recurseHeaders[5], recurseHeaders[6], 109 | true) 110 | row++ 111 | continue 112 | } 113 | _, ok := parents[cells[pcol]] 114 | if ok { 115 | parents[cells[pcol]] = append(parents[cells[pcol]], cells[ccol]) 116 | } else { 117 | parents[cells[pcol]] = make([]string, 0) 118 | parents[cells[pcol]] = append(parents[cells[pcol]], cells[ccol]) 119 | } 120 | row++ 121 | } 122 | 123 | display("Data loaded and ready to start recursing") 124 | for _, v := range startvals { 125 | begin := time.Now().UTC() 126 | display(fmt.Sprintf("Working on %v", v)) 127 | recurse(0, v, v, *delimiter+v, parents) 128 | display(fmt.Sprintf(". elasped %v", time.Since(begin))) 129 | } 130 | stop := time.Now().UTC() 131 | elapsed := time.Since(now) 132 | display(fmt.Sprintf("End at %v", stop)) 133 | display(fmt.Sprintf("Elapsed time %v", elapsed)) 134 | w.Flush() 135 | } 136 | 137 | func recurse(level int, root, start, path string, parents map[string][]string) { 138 | // get value from map for start node 139 | //v, ok := parents[start] 140 | //if !ok { 141 | // return // at a leaf node 142 | //} 143 | 144 | // sort the children 145 | v := parents[start] 146 | sort.Strings(v) 147 | 148 | level++ // increment depth 149 | for _, child := range v { 150 | looptest := *delimiter + child + *delimiter 151 | cycle := "No" 152 | if strings.Contains(path, looptest) { 153 | cycle = "Yes" 154 | } 155 | sLevel := fmt.Sprintf("%v", level) 156 | sPath := path + *delimiter + child 157 | leaf := "Yes" 158 | _, ok := parents[child] 159 | if ok { 160 | leaf = "No" 161 | } 162 | writeRow(sLevel, root, start, child, sPath, leaf, cycle, false) 163 | if cycle == "No" && ok { 164 | recurse(level, root, child, sPath, parents) 165 | } 166 | } 167 | 168 | } 169 | 170 | func writeRow(level, root, parent, child, path, leaf, cycle string, headerrow bool) { 171 | var cells []string 172 | cells = append(cells, level) 173 | cells = append(cells, root) 174 | cells = append(cells, parent) 175 | cells = append(cells, child) 176 | if headerrow { 177 | cells = append(cells, path) 178 | } else { 179 | cells = append(cells, path+*delimiter) 180 | } 181 | cells = append(cells, leaf) 182 | cells = append(cells, cycle) 183 | 184 | err := w.Write(cells) 185 | if err != nil { 186 | log.Fatalf("csv.Write:\n%v\n", err) 187 | } 188 | 189 | } 190 | 191 | func usage(msg string) { 192 | fmt.Println(msg + "\n") 193 | flag.PrintDefaults() 194 | os.Exit(0) 195 | } 196 | 197 | func display(msg string) { 198 | if *info { 199 | log.Print(msg + "\n") 200 | } 201 | } 202 | 203 | var recurseHeaders []string 204 | 205 | func init() { 206 | recurseHeaders = append(recurseHeaders, 207 | "Level", "Root", "", "", "Path", "Leaf", "Cycle") 208 | } 209 | -------------------------------------------------------------------------------- /recursecsv/test1.csv: -------------------------------------------------------------------------------- 1 | parent,child 2 | A,X 3 | A,B 4 | B,C 5 | D,E 6 | C,D 7 | X,Y 8 | Y,B 9 | E,C 10 | -------------------------------------------------------------------------------- /recursecsv/test2.csv: -------------------------------------------------------------------------------- 1 | parent,child 2 | A,B 3 | B,C 4 | D,E 5 | C,D 6 | -------------------------------------------------------------------------------- /recursedata/recursedata.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/csv" 6 | "encoding/json" 7 | "flag" 8 | "fmt" 9 | "io" 10 | "log" 11 | "os" 12 | "sort" 13 | "strconv" 14 | "strings" 15 | "time" 16 | ) 17 | 18 | var w *csv.Writer 19 | var wpath *csv.Writer 20 | 21 | var parent = flag.Int("parent", 1, "Parent column; default 1") 22 | var child = flag.Int("child", 2, "Child column; default 2") 23 | var start = flag.String("start", "", "Start value of hierarchy;\nif first letter is ampersand, use as a file with a list of values to process") 24 | var delimiter = flag.String("delimiter", ">", "String for path delimiter") 25 | var input = flag.String("i", "", "Input CSV filename; default STDIN") 26 | var output = flag.String("o", "", "Output CSV filename; default STDOUT") 27 | var headers = flag.Bool("headers", true, "Input CSV has headers") 28 | var help = flag.Bool("help", false, "Show usage message") 29 | var info = flag.Bool("info", true, "Show info messages during processing") 30 | var data = flag.String("data", "", "Comma list of child data columns to include") 31 | var pathfile = flag.String("path", "", "Output CSV file for path data") 32 | 33 | func main() { 34 | flag.Parse() 35 | 36 | if *help { 37 | usage("Help Message") 38 | } 39 | 40 | if *start == "" { 41 | usage("Start value is missing") 42 | } 43 | var dataVals []string 44 | var dataVal []int 45 | if *data != "" { 46 | // split into the ints and store away for use later 47 | dataVals = strings.Split(*data, ",") 48 | dataVal = make([]int, len(dataVals)) 49 | for i := range dataVals { 50 | n, err := strconv.Atoi(dataVals[i]) 51 | if err != nil { 52 | log.Fatalf("strconv.Atoi() error on %v\n:%v", dataVals[i], err) 53 | } 54 | dataVal[i] = n 55 | } 56 | if *pathfile == "" { 57 | log.Fatal("Cannot specify data columns without a path CSV filename") 58 | } 59 | } 60 | 61 | var startvals []string 62 | if strings.HasPrefix(*start, "@") { 63 | f, ferr := os.Open((*start)[1:]) 64 | if ferr != nil { 65 | log.Fatalf("os.Open() error on %v\n:%v", (*start)[1:], ferr) 66 | } 67 | defer f.Close() 68 | scanner := bufio.NewScanner(f) 69 | for scanner.Scan() { 70 | startvals = append(startvals, scanner.Text()) 71 | } 72 | } else { 73 | startvals = append(startvals, *start) 74 | } 75 | 76 | // open output file 77 | if *output == "" { 78 | w = csv.NewWriter(os.Stdout) 79 | } else { 80 | fo, foerr := os.Create(*output) 81 | if foerr != nil { 82 | log.Fatal("os.Create() Error:" + foerr.Error()) 83 | } 84 | defer fo.Close() 85 | w = csv.NewWriter(fo) 86 | } 87 | 88 | // open output path file 89 | if *pathfile != "" { 90 | if *data == "" { 91 | log.Fatal("Cannot specify path CSV filename without data columns") 92 | } 93 | pfo, pfoerr := os.Create(*pathfile) 94 | if pfoerr != nil { 95 | log.Fatal("os.Create() Error:" + pfoerr.Error()) 96 | } 97 | defer pfo.Close() 98 | wpath = csv.NewWriter(pfo) 99 | // write the headers 100 | err := wpath.Write(pathHeaders) 101 | if err != nil { 102 | log.Fatal("wpath.Write(pathHeaders) Error:" + err.Error()) 103 | } 104 | } 105 | 106 | // open input file 107 | var r *csv.Reader 108 | if *input == "" { 109 | r = csv.NewReader(os.Stdin) 110 | } else { 111 | fi, fierr := os.Open(*input) 112 | if fierr != nil { 113 | log.Fatal("os.Open() Error:" + fierr.Error()) 114 | } 115 | defer fi.Close() 116 | r = csv.NewReader(fi) 117 | } 118 | 119 | // ignore expectations of fields per row 120 | r.FieldsPerRecord = -1 121 | 122 | now := time.Now().UTC() 123 | display(fmt.Sprintf("Start at %v", now)) 124 | 125 | // read loop for CSV to load into memory 126 | var row uint64 127 | pcol := *parent - 1 128 | ccol := *child - 1 129 | parents := make(map[string]map[string][][]string) 130 | for { 131 | // read the csv file 132 | cells, rerr := r.Read() 133 | if rerr == io.EOF { 134 | break 135 | } 136 | if rerr != nil { 137 | log.Fatalf("csv.Read [row %v]:\n%v\n", row, rerr) 138 | } 139 | if row == 0 { 140 | if *headers == false { 141 | recurseHeaders[2] = "Parent" 142 | recurseHeaders[3] = "Child" 143 | } else { 144 | recurseHeaders[2] = cells[pcol] 145 | recurseHeaders[3] = cells[ccol] 146 | } 147 | writeRow(recurseHeaders[0], recurseHeaders[1], 148 | recurseHeaders[2], recurseHeaders[3], 149 | nil, recurseHeaders[5], recurseHeaders[6], 150 | true) 151 | row++ 152 | continue 153 | } 154 | childmap, ok := parents[cells[pcol]] 155 | 156 | if ok { 157 | // does the child exist in the map? 158 | _, childOk := childmap[cells[ccol]] 159 | if childOk { 160 | // is a child table needed? 161 | if *data == "" { 162 | // no table needed 163 | // child is in the map already 164 | // nothing to do! 165 | } else { 166 | childTable := childmap[cells[ccol]] 167 | // child data table exists, add a new row 168 | newrow := make([]string, 0) 169 | for i := range dataVal { 170 | newrow = append(newrow, cells[dataVal[i]-1]) 171 | } 172 | childTable = append(childTable, newrow) 173 | // put it back 174 | childmap[cells[ccol]] = childTable 175 | } 176 | } else { 177 | // Child is not in the map; add it 178 | // is a child table needed? 179 | if *data == "" { 180 | // no table needed 181 | childmap[cells[ccol]] = nil 182 | } else { 183 | // child data table not exists, create it first 184 | childTable := make([][]string, 0) 185 | // now make the first row for this new table 186 | newrow := make([]string, 0) 187 | for i := range dataVal { 188 | newrow = append(newrow, cells[dataVal[i]-1]) 189 | } 190 | childTable = append(childTable, newrow) 191 | // put it back 192 | childmap[cells[ccol]] = childTable 193 | } 194 | } 195 | // put it back into the parent map 196 | parents[cells[pcol]] = childmap // do I need this?? 197 | } else { 198 | // child map does not exist 199 | newChildMap := make(map[string][][]string) 200 | if *data == "" { 201 | // no table needed 202 | newChildMap[cells[ccol]] = nil 203 | } else { 204 | // child data table needed, create it first 205 | childTable := make([][]string, 0) 206 | // now make the first row for this new table 207 | newrow := make([]string, 0) 208 | for i := range dataVal { 209 | newrow = append(newrow, cells[dataVal[i]-1]) 210 | } 211 | childTable = append(childTable, newrow) 212 | // put it back 213 | newChildMap[cells[ccol]] = childTable 214 | } 215 | // add to parent map 216 | parents[cells[pcol]] = newChildMap 217 | } 218 | row++ 219 | } 220 | 221 | display("Data loaded and ready to start recursing") 222 | for _, v := range startvals { 223 | begin := time.Now().UTC() 224 | display(fmt.Sprintf("Working on %v", v)) 225 | if *data == "" { 226 | recurse(0, v, v, nil, nil, parents) 227 | } else { 228 | initpath := make([]string, 0) 229 | initpath = append(initpath, v) 230 | initChildData := make([]childData, 0) 231 | recurse(0, v, v, initpath, initChildData, parents) 232 | } 233 | display(fmt.Sprintf(". elasped %v", time.Since(begin))) 234 | } 235 | stop := time.Now().UTC() 236 | elapsed := time.Since(now) 237 | w.Flush() 238 | if wpath != nil { 239 | wpath.Flush() 240 | } 241 | display(fmt.Sprintf("End at %v", stop)) 242 | display(fmt.Sprintf("Elapsed time %v", elapsed)) 243 | } 244 | 245 | type childData struct { 246 | child string 247 | data [][]string 248 | } 249 | 250 | func recurse(level int, root, start string, path []string, 251 | pathData []childData, 252 | parents map[string]map[string][][]string) { 253 | 254 | // sort the children 255 | childmap := parents[start] 256 | var keys []string 257 | for k := range childmap { 258 | keys = append(keys, k) 259 | } 260 | sort.Strings(keys) 261 | 262 | level++ // increment depth 263 | for _, child := range keys { 264 | cycle := contains(path, child) 265 | sLevel := fmt.Sprintf("%v", level) 266 | sPath := make([]string, len(path)) 267 | copy(sPath, path) 268 | sPath = append(sPath, child) 269 | leaf := "Yes" 270 | _, ok := parents[child] 271 | if ok { 272 | leaf = "No" 273 | } 274 | writeRow(sLevel, root, start, child, sPath, leaf, cycle, false) 275 | var newPathData []childData 276 | if pathData != nil { 277 | newPathData = make([]childData, len(pathData)) 278 | copy(newPathData, pathData) 279 | newChildData := childData{} 280 | newChildData.child = child 281 | newChildData.data = make([][]string, len(childmap[child])) 282 | copy(newChildData.data, childmap[child]) 283 | newPathData = append(newPathData, newChildData) 284 | writePath(root, newPathData) 285 | } 286 | if cycle == "No" && ok { 287 | recurse(level, root, child, sPath, newPathData, parents) 288 | } 289 | } 290 | 291 | } 292 | 293 | func writeRow(level, root, parent, child string, 294 | path []string, leaf, cycle string, headerrow bool) { 295 | 296 | var cells []string 297 | cells = append(cells, level) 298 | cells = append(cells, root) 299 | cells = append(cells, parent) 300 | cells = append(cells, child) 301 | if headerrow { 302 | cells = append(cells, recurseHeaders[4]) 303 | } else { 304 | pathString := strings.Join(path, *delimiter) 305 | // put a delimiter at beginning and end 306 | cells = append(cells, *delimiter+pathString+*delimiter) 307 | } 308 | cells = append(cells, leaf) 309 | cells = append(cells, cycle) 310 | 311 | err := w.Write(cells) 312 | if err != nil { 313 | log.Fatalf("csv.Write:\n%v\n", err) 314 | } 315 | } 316 | 317 | func writePath(root string, pathData []childData) { 318 | cells := make([]string, 0) 319 | cells = append(cells, root) 320 | cells = append(cells, pathData[len(pathData)-1].child) 321 | for _, cdata := range pathData { 322 | jsonVal, jsonErr := json.Marshal(cdata.data) 323 | if jsonErr != nil { 324 | log.Fatalf("json.Marshal:\n%v\n", jsonErr) 325 | } 326 | cells = append(cells, string(jsonVal)) 327 | cells = append(cells, cdata.child) 328 | } 329 | err := wpath.Write(cells) 330 | if err != nil { 331 | log.Fatalf("csv.Write:\n%v\n", err) 332 | } 333 | } 334 | 335 | func usage(msg string) { 336 | fmt.Println(msg + "\n") 337 | flag.PrintDefaults() 338 | os.Exit(0) 339 | } 340 | 341 | func display(msg string) { 342 | if *info { 343 | log.Print(msg + "\n") 344 | } 345 | } 346 | 347 | var recurseHeaders []string 348 | var pathHeaders []string 349 | 350 | func init() { 351 | recurseHeaders = append(recurseHeaders, 352 | "Level", "Root", "", "", "Path", "Leaf", "Cycle") 353 | pathHeaders = append(pathHeaders, "root", "child", 354 | "data1", "child1", 355 | "data2", "child2", 356 | "data3", "child3", 357 | "data4", "child4", 358 | "data5", "child5", 359 | "data6", "child6", 360 | "data7", "child7", 361 | "data8", "child8", 362 | "data9", "child9", 363 | "data10", "child10", 364 | "data11", "child11", 365 | "data12", "child12", 366 | "data13", "child13", 367 | "data14", "child14", 368 | "data15", "child15", 369 | ) 370 | } 371 | 372 | func contains(path []string, value string) string { 373 | for _, v := range path { 374 | if v == value { 375 | return "Yes" 376 | } 377 | } 378 | return "No" 379 | } 380 | 381 | /* Code Graveyard 382 | func writePathRow(c []string, d []string, child string) { 383 | numcols := len(c) + len(d) + 1 384 | row := make([]string, numcols) 385 | i := 0 386 | for _, v := range c { 387 | row[i] = v 388 | i++ 389 | } 390 | for _, v := range d { 391 | row[i] = v 392 | i++ 393 | } 394 | row[i] = child 395 | err := wpath.Write(row) 396 | if err != nil { 397 | log.Fatalf("csv.Write:\n%v\n", err) 398 | } 399 | 400 | } 401 | 402 | func writePath(root string, pathData []childData) { 403 | cells := make([]string, 0) 404 | cells = append(cells, root) 405 | for _, cdata := range pathData { 406 | for _, val := range cdata.data { 407 | cells = append(cells, val...) 408 | } 409 | cells = append(cells, cdata.child) 410 | } 411 | err := wpath.Write(cells) 412 | if err != nil { 413 | log.Fatalf("csv.Write:\n%v\n", err) 414 | } 415 | } 416 | 417 | func writePath(root string, pathData []childData) { 418 | cells := make([]string, 0) 419 | cells = append(cells, root) 420 | for _, cdata := range pathData { 421 | jsonVal, jsonErr := json.Marshal(cdata.data) 422 | if jsonErr != nil { 423 | log.Fatalf("json.Marshal:\n%v\n", jsonErr) 424 | } 425 | cells = append(cells, string(jsonVal)) 426 | cells = append(cells, cdata.child) 427 | } 428 | err := wpath.Write(cells) 429 | if err != nil { 430 | log.Fatalf("csv.Write:\n%v\n", err) 431 | } 432 | } 433 | 434 | */ 435 | -------------------------------------------------------------------------------- /reordercsv/README.md: -------------------------------------------------------------------------------- 1 | # Reordercsv 2 | Use -help to show: 3 | ``` 4 | $ reordercsv -help 5 | Help Message 6 | 7 | -c string 8 | Order of columns from input 9 | -headers 10 | CSV has headers (default true) 11 | -help 12 | Show usage message 13 | -i string 14 | Input CSV filename; default STDIN 15 | -keep 16 | Keep CSV headers on output (default true) 17 | -o string 18 | Output CSV filename; default STDOUT 19 | $ 20 | ``` 21 | Example: 22 | ``` 23 | $ cat test1.csv 24 | A,B,C,D,E,F,G,H,I 25 | 1,1,1,1,1,1,1,1,1 26 | 2,2,2,2,2,2,2,2,2 27 | 3,3,3,3,3,3,3,3,3 28 | 4,4,4,4,4,4,4,4,4 29 | 5,5,5,5,5,5,5,5,5 30 | 6,6,6,6,6,6,6,6,6 31 | 7,7,7,7,7,7,7,7,7 32 | 8,8,8,8,8,8,8,8,8 33 | 9,9,9,9,9,9,9,9,9 34 | $ reordercsv -i test1.csv -c 3,2,1,1 35 | C,B,A,A 36 | 1,1,1,1 37 | 2,2,2,2 38 | 3,3,3,3 39 | 4,4,4,4 40 | 5,5,5,5 41 | 6,6,6,6 42 | 7,7,7,7 43 | 8,8,8,8 44 | 9,9,9,9 45 | $ 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /reordercsv/reordercsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | "strconv" 11 | "strings" 12 | ) 13 | 14 | func main() { 15 | cols := flag.String("c", "", "Order of columns from input") 16 | input := flag.String("i", "", "Input CSV filename; default STDIN") 17 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 18 | headers := flag.Bool("headers", true, "CSV has headers") 19 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 20 | help := flag.Bool("help", false, "Show usage message") 21 | flag.Parse() 22 | 23 | if *help { 24 | usage("Help Message") 25 | } 26 | 27 | if *cols == "" { 28 | usage("Missing new order of columns") 29 | } 30 | 31 | tokens := strings.Split(*cols, ",") 32 | outn := make([]int, len(tokens)) 33 | 34 | for n := range tokens { 35 | i, err := strconv.Atoi(tokens[n]) 36 | if err != nil { 37 | log.Fatalf("Value not a number:%v\n", tokens[n]) 38 | } 39 | if i < 1 { 40 | log.Fatalf("Columns start at one:%v\n", tokens[n]) 41 | } 42 | outn[n] = i 43 | } 44 | 45 | if *keep { 46 | if !*headers { 47 | log.Fatal("Cannot keep headers you don't have!") 48 | } 49 | } 50 | // open output file 51 | var w *csv.Writer 52 | if *output == "" { 53 | w = csv.NewWriter(os.Stdout) 54 | } else { 55 | fo, foerr := os.Create(*output) 56 | if foerr != nil { 57 | log.Fatal("os.Create() Error:" + foerr.Error()) 58 | } 59 | defer fo.Close() 60 | w = csv.NewWriter(fo) 61 | } 62 | 63 | // open input file 64 | var r *csv.Reader 65 | if *input == "" { 66 | r = csv.NewReader(os.Stdin) 67 | } else { 68 | fi, fierr := os.Open(*input) 69 | if fierr != nil { 70 | log.Fatal("os.Open() Error:" + fierr.Error()) 71 | } 72 | defer fi.Close() 73 | r = csv.NewReader(fi) 74 | } 75 | 76 | // ignore expectations of fields per row 77 | r.FieldsPerRecord = -1 78 | 79 | // read loop for CSV 80 | outs := make([]string, len(tokens)) 81 | 82 | var row uint64 83 | for { 84 | // read the csv file 85 | cells, rerr := r.Read() 86 | if rerr == io.EOF { 87 | break 88 | } 89 | if rerr != nil { 90 | log.Fatalf("csv.Read:\n%v\n", rerr) 91 | } 92 | if row == 0 { 93 | if *headers && *keep { 94 | } else { 95 | row++ 96 | continue 97 | } 98 | } 99 | for n, m := range outn { 100 | outs[n] = cells[m-1] 101 | } 102 | err := w.Write(outs) 103 | if err != nil { 104 | log.Fatalf("csv.Write:\n%v\n", err) 105 | } 106 | row++ 107 | } 108 | w.Flush() 109 | } 110 | 111 | func usage(msg string) { 112 | fmt.Println(msg + "\n") 113 | flag.PrintDefaults() 114 | os.Exit(0) 115 | } 116 | -------------------------------------------------------------------------------- /reordercsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D,E,F,G,H,I 2 | 1,1,1,1,1,1,1,1,1 3 | 2,2,2,2,2,2,2,2,2 4 | 3,3,3,3,3,3,3,3,3 5 | 4,4,4,4,4,4,4,4,4 6 | 5,5,5,5,5,5,5,5,5 7 | 6,6,6,6,6,6,6,6,6 8 | 7,7,7,7,7,7,7,7,7 9 | 8,8,8,8,8,8,8,8,8 10 | 9,9,9,9,9,9,9,9,9 11 | -------------------------------------------------------------------------------- /searchcsv/README.md: -------------------------------------------------------------------------------- 1 | # Searchcsv 2 | Use the -help argument to show: 3 | 4 | ``` 5 | $ searchcsv -help 6 | Help Message 7 | 8 | Usage: searchcsv [options] 9 | -c string 10 | Range spec for columns 11 | -headers 12 | CSV has headers (default true) 13 | -help 14 | Show help message 15 | -i string 16 | Input CSV filename; default STDIN 17 | -keep 18 | Keep CSV headers on output (default true) 19 | -o string 20 | Output CSV filename; default STDOUT 21 | -pattern string 22 | Search pattern 23 | -re 24 | Search pattern is a regular expression 25 | -v Omit rather than include matched rows 26 | ``` 27 | Examples: 28 | ``` 29 | $ cat test1.csv 30 | A,B,C 31 | abc,def,Army 32 | one,two,Navy 33 | go,abacus,Marine 34 | Android,Ubuntu,Linux 35 | $ searchcsv -c 1 -pattern "y$" < test1.csv 36 | A,B,C 37 | $ searchcsv -c 3 -pattern "y$" < test1.csv 38 | A,B,C 39 | $ searchcsv -c 3 -pattern "y$" -re=true < test1.csv 40 | A,B,C 41 | abc,def,Army 42 | one,two,Navy 43 | $ searchcsv -c 3 -pattern "[mu][xy]$" -re=true < test1.csv 44 | A,B,C 45 | abc,def,Army 46 | Android,Ubuntu,Linux 47 | $ searchcsv -v -c 3 -pattern "[mu][xy]$" -re=true < test1.csv 48 | A,B,C 49 | one,two,Navy 50 | go,abacus,Marine 51 | ``` 52 | 53 | 54 | -------------------------------------------------------------------------------- /searchcsv/searchcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | "regexp" 11 | "strings" 12 | 13 | "github.com/mandolyte/csv-utils" 14 | ) 15 | 16 | var cs *rangespec.RangeSpec 17 | var re *regexp.Regexp 18 | 19 | func main() { 20 | pattern := flag.String("pattern", "", "Search pattern") 21 | suppress := flag.Bool("v", false, "Omit rather than include matched rows") 22 | cols := flag.String("c", "", "Range spec for columns") 23 | input := flag.String("i", "", "Input CSV filename; default STDIN") 24 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 25 | headers := flag.Bool("headers", true, "CSV has headers") 26 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 27 | regex := flag.Bool("re", false, "Search pattern is a regular expression") 28 | help := flag.Bool("help", false, "Show help message") 29 | flag.Parse() 30 | 31 | if *help { 32 | usage("Help Message") 33 | os.Exit(0) 34 | } 35 | 36 | /* check parameters */ 37 | if *pattern == "" { 38 | usage("Required: Missing pattern for search") 39 | os.Exit(0) 40 | } 41 | 42 | if *regex { 43 | re = regexp.MustCompile(*pattern) 44 | } 45 | 46 | if *cols != "" { 47 | var cserr error 48 | cs, cserr = rangespec.New(*cols) 49 | if cserr != nil { 50 | log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr) 51 | } 52 | } 53 | 54 | if *keep { 55 | if !*headers { 56 | log.Fatal("Cannot keep headers you don't have!") 57 | } 58 | } 59 | // open output file 60 | var w *csv.Writer 61 | if *output == "" { 62 | w = csv.NewWriter(os.Stdout) 63 | } else { 64 | fo, foerr := os.Create(*output) 65 | if foerr != nil { 66 | log.Fatal("os.Create() Error:" + foerr.Error()) 67 | } 68 | defer fo.Close() 69 | w = csv.NewWriter(fo) 70 | } 71 | 72 | // open input file 73 | var r *csv.Reader 74 | if *input == "" { 75 | r = csv.NewReader(os.Stdin) 76 | } else { 77 | fi, fierr := os.Open(*input) 78 | if fierr != nil { 79 | log.Fatal("os.Open() Error:" + fierr.Error()) 80 | } 81 | defer fi.Close() 82 | r = csv.NewReader(fi) 83 | } 84 | 85 | // ignore expectations of fields per row 86 | r.FieldsPerRecord = -1 87 | 88 | // read loop for CSV 89 | var row uint64 90 | for { 91 | // read the csv file 92 | cells, rerr := r.Read() 93 | if rerr == io.EOF { 94 | break 95 | } 96 | if rerr != nil { 97 | log.Fatalf("csv.Read:\n%v\n", rerr) 98 | } 99 | if (row == 0) && *headers && *keep { 100 | row = 1 101 | err := w.Write(cells) 102 | if err != nil { 103 | log.Fatalf("csv.Write:\n%v\n", err) 104 | } 105 | continue 106 | } 107 | row++ 108 | // test row/columns for a match 109 | if patternMatches(cells, *pattern, *suppress) { 110 | err := w.Write(cells) 111 | if err != nil { 112 | log.Fatalf("csv.Write:\n%v\n", err) 113 | } 114 | } 115 | } 116 | w.Flush() 117 | } 118 | 119 | func patternMatches(c []string, pattern string, suppress bool) bool { 120 | found := false 121 | for n, v := range c { 122 | if cs == nil { 123 | if re == nil { 124 | found = strings.Contains(v, pattern) 125 | } else { 126 | found = re.MatchString(v) 127 | } 128 | } else { 129 | if cs.InRange(uint64(n + 1)) { 130 | if re == nil { 131 | found = strings.Contains(v, pattern) 132 | } else { 133 | found = re.MatchString(v) 134 | } 135 | } 136 | } 137 | if found { 138 | if suppress { 139 | return false 140 | } 141 | return true 142 | } 143 | } 144 | if suppress { 145 | return true 146 | } 147 | return false 148 | } 149 | 150 | func usage(msg string) { 151 | fmt.Println(msg + "\n") 152 | fmt.Print("Usage: searchcsv [options]\n") 153 | flag.PrintDefaults() 154 | } 155 | -------------------------------------------------------------------------------- /searchcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | abc,def,Army 3 | one,two,Navy 4 | go,abacus,Marine 5 | Android,Ubuntu,Linux 6 | -------------------------------------------------------------------------------- /sortcsv/README.md: -------------------------------------------------------------------------------- 1 | # Sortcsv 2 | This utility will sort a CSV file. However, it is done in-memory 3 | and has limits. 4 | 5 | ## Information 6 | Use the -help argument to show: 7 | 8 | ``` 9 | $ go run sortcsv.go -help 10 | -c string 11 | Comma delimited list of columns to sort (default "1") 12 | -headers 13 | CSV has headers (default true) 14 | -help 15 | Show help message 16 | -i string 17 | CSV file name to sort; default STDIN 18 | -o string 19 | CSV output file name; default STDOUT 20 | -s string 21 | Comma delimited list of letters 'a' or 'd', for ascending or descending (default is ascending) 22 | ``` 23 | 24 | Example: 25 | ``` 26 | $ cat test1.csv 27 | A,B,C 28 | 1,2,3 29 | 4,1,0 30 | 2,1,2 31 | 3,3,1 32 | 3,3,2 33 | $ go run sortcsv.go -c 1,3 -s a,d -i test1.csv 34 | A,B,C 35 | 1,2,3 36 | 2,1,2 37 | 3,3,2 38 | 3,3,1 39 | 4,1,0 40 | $ go run sortcsv.go -c 1,3 -s a,a -i test1.csv 41 | A,B,C 42 | 1,2,3 43 | 2,1,2 44 | 3,3,1 45 | 3,3,2 46 | 4,1,0 47 | $ $ 48 | ``` 49 | 50 | 51 | -------------------------------------------------------------------------------- /sortcsv/sortcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "log" 7 | "os" 8 | "sort" 9 | "strconv" 10 | "strings" 11 | ) 12 | 13 | type table struct { 14 | records [][]string 15 | seq []bool 16 | col []int 17 | } 18 | 19 | func (t *table) Len() int { 20 | return len(t.records) 21 | } 22 | 23 | func (t *table) Swap(i, j int) { 24 | t.records[i], t.records[j] = t.records[j], t.records[i] 25 | } 26 | 27 | func (t *table) Less(i, j int) bool { 28 | isless := false 29 | for n := range t.col { 30 | ith := t.records[i][t.col[n]-1] 31 | jth := t.records[j][t.col[n]-1] 32 | if ith == jth { 33 | continue 34 | } 35 | //log.Printf("Compare %v vs %v\n", ith, jth) 36 | if ith < jth { 37 | if t.seq[n] { 38 | isless = true 39 | } else { 40 | isless = false 41 | } 42 | break 43 | } else { 44 | if t.seq[n] { 45 | isless = false 46 | } else { 47 | isless = true 48 | } 49 | break 50 | } 51 | } 52 | //log.Printf("Returning %v\n", isless) 53 | return isless 54 | } 55 | 56 | func main() { 57 | sortseq := flag.String("s", "", "Comma delimited list of letters 'a' or 'd', for ascending or descending (default is ascending)") 58 | sortcol := flag.String("c", "1", "Comma delimited list of columns to sort") 59 | sortinf := flag.String("i", "", "CSV file name to sort; default STDIN") 60 | sortout := flag.String("o", "", "CSV output file name; default STDOUT") 61 | headers := flag.Bool("headers", true, "CSV has headers") 62 | help := flag.Bool("help", false, "Show help message") 63 | flag.Parse() 64 | 65 | if *help { 66 | usage() 67 | } 68 | 69 | // open output file 70 | var w *csv.Writer 71 | if *sortout == "" { 72 | w = csv.NewWriter(os.Stdout) 73 | } else { 74 | fo, foerr := os.Create(*sortout) 75 | if foerr != nil { 76 | log.Fatal("os.Create() Error:" + foerr.Error()) 77 | } 78 | defer fo.Close() 79 | w = csv.NewWriter(fo) 80 | } 81 | 82 | // open input file 83 | var r *csv.Reader 84 | if *sortinf == "" { 85 | r = csv.NewReader(os.Stdin) 86 | } else { 87 | fi, fierr := os.Open(*sortinf) 88 | if fierr != nil { 89 | log.Fatal("os.Open() Error:" + fierr.Error()) 90 | } 91 | defer fi.Close() 92 | r = csv.NewReader(fi) 93 | } 94 | 95 | // ignore expectations of fields per row 96 | r.FieldsPerRecord = -1 97 | 98 | // read into memory 99 | csvall, raerr := r.ReadAll() 100 | if raerr != nil { 101 | log.Fatal("r.ReadAll() Error:" + raerr.Error()) 102 | } 103 | 104 | if *headers { 105 | werr := w.Write(csvall[0]) 106 | if werr != nil { 107 | log.Fatal("w.Write() Error:" + werr.Error()) 108 | } 109 | csvall = csvall[1:] 110 | } 111 | 112 | // parse columns input 113 | collist := strings.Split(*sortcol, ",") 114 | seqlist := strings.Split(*sortseq, ",") 115 | clist := make([]int, len(collist)) 116 | slist := make([]bool, len(collist)) 117 | for i := range collist { 118 | x, err := strconv.Atoi(collist[i]) 119 | if err != nil { 120 | log.Fatalf("Element of column sort list is not an integer:%v\n", collist[i]) 121 | } 122 | if x == 0 { 123 | log.Fatal("Column numbers begin at 1 not zero\n") 124 | } 125 | clist[i] = x 126 | if clist[i] > len(csvall[0]) { 127 | log.Fatalf("Column is larger than number of cells in row:%v\n", clist[i]) 128 | } 129 | // now set the sort sequence for the column 130 | if i < len(seqlist) { 131 | if seqlist[i] == "a" || seqlist[i] == "" { 132 | slist[i] = true 133 | } else if seqlist[i] == "d" { 134 | slist[i] = false 135 | } else { 136 | log.Fatal("Sort sequence must 'a' for ascending or 'd' for descending\n") 137 | } 138 | } else { 139 | slist[i] = true 140 | } 141 | } 142 | 143 | /* debugging */ 144 | /* 145 | log.Printf("Sort columns:%v\n", clist) 146 | log.Printf("Sequence columns: %v\n", slist) 147 | */ 148 | t := &table{records: csvall, seq: slist, col: clist} 149 | 150 | //sort.Sort(t) 151 | sort.Stable(t) 152 | werr := w.WriteAll(t.records) 153 | if werr != nil { 154 | log.Fatal("w.WriteAll() Error:" + werr.Error()) 155 | } 156 | w.Flush() 157 | 158 | } 159 | 160 | func usage() { 161 | flag.PrintDefaults() 162 | os.Exit(0) 163 | } 164 | -------------------------------------------------------------------------------- /sortcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C 2 | 1,2,3 3 | 4,1,0 4 | 2,1,2 5 | 3,3,1 6 | 3,3,2 7 | -------------------------------------------------------------------------------- /splitcsv/README.md: -------------------------------------------------------------------------------- 1 | # Splitcsv 2 | Use the -help argument to show: 3 | ``` 4 | $ go run splitcsv.go -help 5 | Help Message 6 | 7 | Usage: splitcsv [options] input.csv output.csv 8 | -c string 9 | Range spec for columns 10 | -headers 11 | CSV has headers (default true) 12 | -help 13 | Show usage message 14 | -i string 15 | Input CSV filename; default STDIN 16 | -keep 17 | Keep CSV headers on output (default true) 18 | -o string 19 | Output CSV filename; default STDOUT 20 | -r string 21 | Range spec for rows 22 | $ cat test1.csv 23 | A,B,C,D,E,F,G,H,I 24 | 1,1,1,1,1,1,1,1,1 25 | 2,2,2,2,2,2,2,2,2 26 | 3,3,3,3,3,3,3,3,3 27 | 4,4,4,4,4,4,4,4,4 28 | 5,5,5,5,5,5,5,5,5 29 | 6,6,6,6,6,6,6,6,6 30 | 7,7,7,7,7,7,7,7,7 31 | 8,8,8,8,8,8,8,8,8 32 | 9,9,9,9,9,9,9,9,9 33 | $ go run splitcsv.go -c 4-6 -r 4-6 < test1.csv 34 | D,E,F 35 | 4,4,4 36 | 5,5,5 37 | 6,6,6 38 | $ 39 | ``` 40 | 41 | To upgrade to the new mod system: 42 | 43 | 1. created a subfolder named rangespec 44 | 2. copied the rangespec.go from project into it. 45 | 3. ran the "go mod" command: 46 | ``` 47 | $ go mod init github.com/mandolyte/csv-utils/splitcsv 48 | ``` 49 | 4. then changed my import to be: 50 | ```go 51 | import ( 52 | "encoding/csv" 53 | "flag" 54 | "fmt" 55 | "io" 56 | "log" 57 | "os" 58 | "github.com/mandolyte/csv-utils/splitcsv/rangespec" 59 | ) 60 | ``` 61 | 62 | -------------------------------------------------------------------------------- /splitcsv/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mandolyte/csv-utils/splitcsv 2 | 3 | go 1.19 4 | -------------------------------------------------------------------------------- /splitcsv/rangespec/rangespec.go: -------------------------------------------------------------------------------- 1 | package rangespec 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | // RangeSpec parses a range specification, such as: 11 | // 1,3,5-8,12- 12 | // It will return a slice of RangeSpec, being two ints, 13 | // a start and a stop 14 | // Ranges start at 1, not zero. 15 | type RangeSpec struct { 16 | pairs []pair 17 | spec string 18 | Max uint64 19 | } 20 | type pair struct { 21 | start, stop uint64 22 | } 23 | 24 | // New takes a range specification string and 25 | // returns a slice of RangeSpec structs 26 | func New(r string) (*RangeSpec, error) { 27 | // remove any whitespace as a convenience 28 | r = strings.Replace(r, " ", "", -1) 29 | ret := new(RangeSpec) 30 | ret.pairs = make([]pair, 0) 31 | ret.spec = r 32 | tokens := strings.Split(r, ",") 33 | for n, val := range tokens { 34 | //fmt.Printf("Working on %v at %v\n", val, n) 35 | // does val have a dash? 36 | if strings.Contains(val, "-") { 37 | // split on dash 38 | ends := strings.Split(val, "-") 39 | if len(ends) > 2 { 40 | return nil, fmt.Errorf("RangeSpec: malformed specification:%v", val) 41 | } 42 | if ends[1] != "" { 43 | //fmt.Print("ends[] greater than 1\n") 44 | end1, err := strconv.ParseUint(ends[0], 10, 64) 45 | if err != nil { 46 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err) 47 | } 48 | end2, err := strconv.ParseUint(ends[1], 10, 64) 49 | if err != nil { 50 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[1], err) 51 | } 52 | var rs pair 53 | rs.start = end1 54 | rs.stop = end2 55 | ret.pairs = append(ret.pairs, rs) 56 | } else { 57 | //fmt.Print("ends[] == 1\n") 58 | if n+1 != len(tokens) { 59 | return nil, fmt.Errorf("RangeSpec: open range must be last:%v", val) 60 | } 61 | end1, err := strconv.ParseUint(ends[0], 10, 64) 62 | if err != nil { 63 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err) 64 | } 65 | var rs pair 66 | rs.start = end1 67 | rs.stop = math.MaxUint64 68 | ret.pairs = append(ret.pairs, rs) 69 | } 70 | continue 71 | } else { 72 | end1, err := strconv.ParseUint(val, 10, 64) 73 | if err != nil { 74 | return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", val, err) 75 | } 76 | var rs pair 77 | rs.start = end1 78 | rs.stop = end1 79 | ret.pairs = append(ret.pairs, rs) 80 | } 81 | } 82 | // ensure ascending specification 83 | for i := 0; i < len(ret.pairs); i++ { 84 | if i == 0 { 85 | if ret.pairs[i].start == 0 { 86 | return nil, fmt.Errorf("RangeSpec: range must be larger zero: %v", ret.pairs[i].start) 87 | } 88 | } 89 | if ret.pairs[i].start > ret.pairs[i].stop { 90 | return nil, fmt.Errorf("RangeSpec: start (%v) must be equal or less than stop (%v)", ret.pairs[i].start, ret.pairs[i].stop) 91 | } 92 | if i > 0 { 93 | if ret.pairs[i].start <= ret.pairs[i-1].stop { 94 | return nil, fmt.Errorf("RangeSpec: start (%v) must be greater than previous stop (%v)", ret.pairs[i].start, ret.pairs[i-1].stop) 95 | } 96 | } 97 | } 98 | // set the maximum row number 99 | ret.Max = ret.pairs[len(ret.pairs)-1].stop 100 | return ret, nil 101 | } 102 | 103 | // InRange will test whehter a number is in the range specification 104 | func (rs *RangeSpec) InRange(num uint64) bool { 105 | for _, val := range rs.pairs { 106 | if num >= val.start && num <= val.stop { 107 | return true 108 | } 109 | } 110 | return false 111 | } 112 | -------------------------------------------------------------------------------- /splitcsv/splitcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "io" 8 | "log" 9 | "os" 10 | "github.com/mandolyte/csv-utils/splitcsv/rangespec" 11 | ) 12 | 13 | var rs *rangespec.RangeSpec 14 | var cs *rangespec.RangeSpec 15 | 16 | func main() { 17 | rows := flag.String("r", "1-", "Range spec for rows") 18 | cols := flag.String("c", "1-", "Range spec for columns") 19 | input := flag.String("i", "", "Input CSV filename; default STDIN") 20 | output := flag.String("o", "", "Output CSV filename; default STDOUT") 21 | headers := flag.Bool("headers", true, "CSV has headers") 22 | keep := flag.Bool("keep", true, "Keep CSV headers on output") 23 | help := flag.Bool("help", false, "Show usage message") 24 | flag.Parse() 25 | 26 | if *help { 27 | usage("Help Message") 28 | os.Exit(0) 29 | } 30 | 31 | /* check parameters */ 32 | if *rows == "" { 33 | usage("Required: Missing range specification for rows") 34 | os.Exit(0) 35 | } 36 | 37 | rs, rserr := rangespec.New(*rows) 38 | if rserr != nil { 39 | log.Fatalf("Invalid row range spec:%v, Error:\n%v\n", *rows, rserr) 40 | } 41 | 42 | if *cols != "" { 43 | var cserr error 44 | cs, cserr = rangespec.New(*cols) 45 | if cserr != nil { 46 | log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr) 47 | } 48 | } 49 | 50 | if *keep { 51 | if !*headers { 52 | log.Fatal("Cannot keep headers you don't have!") 53 | } 54 | } 55 | // open output file 56 | var w *csv.Writer 57 | if *output == "" { 58 | w = csv.NewWriter(os.Stdout) 59 | } else { 60 | fo, foerr := os.Create(*output) 61 | if foerr != nil { 62 | log.Fatal("os.Create() Error:" + foerr.Error()) 63 | } 64 | defer fo.Close() 65 | w = csv.NewWriter(fo) 66 | } 67 | 68 | // open input file 69 | var r *csv.Reader 70 | if *input == "" { 71 | r = csv.NewReader(os.Stdin) 72 | } else { 73 | fi, fierr := os.Open(*input) 74 | if fierr != nil { 75 | log.Fatal("os.Open() Error:" + fierr.Error()) 76 | } 77 | defer fi.Close() 78 | r = csv.NewReader(fi) 79 | } 80 | 81 | // ignore expectations of fields per row 82 | r.FieldsPerRecord = -1 83 | r.LazyQuotes = true 84 | 85 | // read loop for CSV 86 | var row uint64 87 | for { 88 | // read the csv file 89 | cells, rerr := r.Read() 90 | if rerr == io.EOF { 91 | break 92 | } 93 | if rerr != nil { 94 | log.Fatalf("csv.Read:\n%v\n", rerr) 95 | } 96 | if (row == 0) && *headers && *keep { 97 | row = 1 98 | err := writeRow(w, cells, cs) 99 | if err != nil { 100 | log.Fatalf("csv.Write:\n%v\n", err) 101 | } 102 | continue 103 | } 104 | row++ 105 | if rs.InRange(row - 1) { 106 | err := writeRow(w, cells, cs) 107 | if err != nil { 108 | log.Fatalf("csv.Write:\n%v\n", err) 109 | } 110 | } 111 | if row > rs.Max { 112 | break 113 | } 114 | } 115 | w.Flush() 116 | } 117 | 118 | func writeRow(w *csv.Writer, cells []string, cs *rangespec.RangeSpec) error { 119 | if cs == nil { 120 | err := w.Write(cells) 121 | if err != nil { 122 | return err 123 | } 124 | return nil 125 | } 126 | var outcells []string 127 | for m, c := range cells { 128 | if cs.InRange(uint64(m + 1)) { 129 | outcells = append(outcells, c) 130 | } 131 | } 132 | if len(outcells) == 0 { 133 | return fmt.Errorf("Column range outside actual columns:%v\n\n", cs) 134 | } 135 | err := w.Write(outcells) 136 | if err != nil { 137 | return err 138 | } 139 | return nil 140 | } 141 | 142 | func usage(msg string) { 143 | fmt.Println(msg + "\n") 144 | fmt.Print("Usage: splitcsv [options] input.csv output.csv\n") 145 | flag.PrintDefaults() 146 | } 147 | -------------------------------------------------------------------------------- /splitcsv/test1.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D,E,F,G,H,I 2 | 1,1,1,1,1,1,1,1,1 3 | 2,2,2,2,2,2,2,2,2 4 | 3,3,3,3,3,3,3,3,3 5 | 4,4,4,4,4,4,4,4,4 6 | 5,5,5,5,5,5,5,5,5 7 | 6,6,6,6,6,6,6,6,6 8 | 7,7,7,7,7,7,7,7,7 9 | 8,8,8,8,8,8,8,8,8 10 | 9,9,9,9,9,9,9,9,9 11 | -------------------------------------------------------------------------------- /splitcsv/test2.csv: -------------------------------------------------------------------------------- 1 | A,B,C,D,E,F,G,H,I 2 | 1,1,1,1,1,1,1,1,1 3 | 2,2,2,2,2,2,2,2,2 4 | ="0003",0003,'0003,3,3,3,3,3,3 5 | 4,4,4,4,4,4,4,4,4 6 | 5,5,5,5,5,5,5,5,5 7 | 6,6,6,6,6,6,6,6,6 8 | 7,7,7,7,7,7,7,7,7 9 | 8,8,8,8,8,8,8,8,8 10 | 9,9,9,9,9,9,9,9,9 11 | -------------------------------------------------------------------------------- /transformcsv/README.md: -------------------------------------------------------------------------------- 1 | # Transformcsv 2 | This utility will take an input CSV and transform it using a text template. 3 | The template is applied to every row in the CSV. The column headers are 4 | required. The column header names are used as map keys to the values 5 | used by the template. 6 | 7 | Use the -help argument to show: 8 | ``` 9 | $ go run transformcsv.go -help 10 | Help Message 11 | 12 | -help 13 | Show usage message 14 | -i string 15 | Input CSV filename; default STDIN 16 | -m string 17 | Name of map in template; default is m (default "m") 18 | -o string 19 | Output filename; default STDOUT 20 | -t string 21 | Template to use for transformation 22 | $ 23 | ``` 24 | 25 | Given template: 26 | ``` 27 | $ cat template1.txt 28 | INSERT INTO atable (column1, column2, column3) 29 | VALUES ('{{index .mp "column1"}}', '{{index .mp "column2"}}', '{{index .mp "column3"}}') 30 | ; 31 | ``` 32 | 33 | Given input CSV: 34 | ``` 35 | $ cat test1.csv 36 | column1,column2,column3 37 | v1.1,v1.2,v1.3 38 | v2.1,v2.1,v2.3 39 | $ 40 | ``` 41 | 42 | Then this command will generate SQL INSERT statements for each row 43 | in the CSV file. 44 | ``` 45 | $ go run transformcsv.go -i test1.csv -t template1.txt -m mp -o trans1.sql 46 | $ cat trans1.sql 47 | INSERT INTO atable (column1, column2, column3) 48 | VALUES ('v1.1', 'v1.2', 'v1.3') 49 | ; 50 | INSERT INTO atable (column1, column2, column3) 51 | VALUES ('v2.1', 'v2.1', 'v2.3') 52 | ; 53 | $ 54 | ``` -------------------------------------------------------------------------------- /transformcsv/template1.txt: -------------------------------------------------------------------------------- 1 | INSERT INTO atable (column1, column2, column3) 2 | VALUES ('{{index .mp "column1"}}', '{{index .mp "column2"}}', '{{index .mp "column3"}}') 3 | ; 4 | -------------------------------------------------------------------------------- /transformcsv/test1.csv: -------------------------------------------------------------------------------- 1 | column1,column2,column3 2 | v1.1,v1.2,v1.3 3 | v2.1,v2.1,v2.3 4 | -------------------------------------------------------------------------------- /transformcsv/trans1.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO atable (column1, column2, column3) 2 | VALUES ('v1.1', 'v1.2', 'v1.3') 3 | ; 4 | INSERT INTO atable (column1, column2, column3) 5 | VALUES ('v2.1', 'v2.1', 'v2.3') 6 | ; 7 | -------------------------------------------------------------------------------- /transformcsv/transformcsv.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/csv" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "io/ioutil" 10 | "log" 11 | "text/template" 12 | "os" 13 | ) 14 | 15 | 16 | func main() { 17 | input := flag.String("i", "", "Input CSV filename; default STDIN") 18 | tmplfile := flag.String("t","", "Template to use for transformation") 19 | output := flag.String("o", "", "Output filename; default STDOUT") 20 | mapname := flag.String("m", "m", "Name of map in template; default is m") 21 | help := flag.Bool("help", false, "Show usage message") 22 | flag.Parse() 23 | 24 | if *help { 25 | usage("Help Message") 26 | } 27 | 28 | if *tmplfile == "" { 29 | usage("Template file name missing") 30 | } 31 | templatebytes, terr := ioutil.ReadFile(*tmplfile) 32 | if terr != nil { 33 | log.Fatal("Template file read error:"+terr.Error()) 34 | } 35 | template := string(templatebytes) 36 | 37 | 38 | // open output file 39 | var w *bufio.Writer 40 | if *output == "" { 41 | w = bufio.NewWriter(os.Stdout) 42 | } else { 43 | fo, foerr := os.Create(*output) 44 | if foerr != nil { 45 | log.Fatal("os.Create() Error:" + foerr.Error()) 46 | } 47 | defer fo.Close() 48 | w = bufio.NewWriter(fo) 49 | } 50 | 51 | // open input file 52 | var r *csv.Reader 53 | if *input == "" { 54 | r = csv.NewReader(os.Stdin) 55 | } else { 56 | fi, fierr := os.Open(*input) 57 | if fierr != nil { 58 | log.Fatal("os.Open() Error:" + fierr.Error()) 59 | } 60 | defer fi.Close() 61 | r = csv.NewReader(fi) 62 | } 63 | 64 | // ignore expectations of fields per row 65 | r.FieldsPerRecord = -1 66 | r.LazyQuotes = true 67 | 68 | // read loop for CSV 69 | var hdrs []string 70 | var row uint64 71 | for { 72 | // read the csv file 73 | cells, rerr := r.Read() 74 | if rerr == io.EOF { 75 | break 76 | } 77 | if rerr != nil { 78 | log.Fatalf("csv.Read:\n%v\n", rerr) 79 | } 80 | if (row == 0) { 81 | row = 1 82 | hdrs = append(hdrs, cells...) 83 | continue 84 | } 85 | row++ 86 | err := writeTemplate(w, template, *mapname, hdrs, cells) 87 | if err != nil { 88 | log.Fatal("Write error to output:"+err.Error()) 89 | } 90 | } 91 | w.Flush() 92 | } 93 | 94 | func writeTemplate(w io.Writer, tmpltext,amap string, hdrs, cells []string) error { 95 | // logic flow 96 | // 1. create a map using the hdrs as keys and cells as values 97 | // 2. apply the map to the template 98 | // 3. write it out 99 | 100 | m := make(map[string]string) 101 | for i := range hdrs { 102 | m[hdrs[i]] = cells [i] 103 | } 104 | 105 | t := template.Must(template.New("").Parse(tmpltext)) 106 | err := t.Execute(w, map[string]interface{}{amap: m}) 107 | if err != nil { 108 | log.Fatal("Template Execute() error:"+err.Error()) 109 | } 110 | return nil 111 | } 112 | 113 | func usage(msg string) { 114 | fmt.Println(msg + "\n") 115 | flag.PrintDefaults() 116 | os.Exit(0) 117 | } 118 | --------------------------------------------------------------------------------