├── LICENSE
├── README.md
├── build-all.sh
├── build-list.txt
├── catcsv
    ├── README.md
    ├── catcsv.go
    ├── test1.csv
    └── test2.csv
├── comparecsv
    ├── README.md
    ├── both.csv
    ├── comparecsv.go
    ├── f1only.csv
    ├── f2only.csv
    ├── test1.csv
    ├── test2.csv
    └── test3.csv
├── cryptcsv
    ├── README.md
    ├── cryptcsv.go
    ├── test1-decrypted-both.csv
    ├── test1-decrypted.csv
    ├── test1-encrypted.csv
    └── test1.csv
├── dedupcsv
    ├── README.md
    ├── dedupcsv.go
    └── test1.csv
├── diffcsv
    ├── README.md
    ├── diffcsv.go
    ├── input1.csv
    ├── input2.csv
    ├── input3.csv
    ├── input4.csv
    ├── input5.csv
    ├── input6.csv
    ├── input7.csv
    ├── input8.csv
    ├── test1.csv
    ├── test2.csv
    ├── test3.csv
    ├── test6.csv
    └── test7.csv
├── editcsv
    ├── README.md
    ├── editcsv.go
    └── test1.csv
├── obfuscatecsv
    ├── README.md
    ├── obfuscatecsv.go
    └── test1.csv
├── pivotcsv
    ├── README.md
    ├── pivotcsv.go
    ├── test1.csv
    └── test2.csv
├── rangespec.go
├── rangespec_test.go
├── recursecsv
    ├── README.md
    ├── recursecsv.go
    ├── test1.csv
    └── test2.csv
├── recursedata
    └── recursedata.go
├── reordercsv
    ├── README.md
    ├── reordercsv.go
    └── test1.csv
├── searchcsv
    ├── README.md
    ├── searchcsv.go
    └── test1.csv
├── sortcsv
    ├── README.md
    ├── sortcsv.go
    └── test1.csv
├── splitcsv
    ├── README.md
    ├── go.mod
    ├── rangespec
    │   └── rangespec.go
    ├── splitcsv.go
    ├── test1.csv
    └── test2.csv
└── transformcsv
    ├── README.md
    ├── template1.txt
    ├── test1.csv
    ├── trans1.sql
    └── transformcsv.go


/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CSV Utilities
 2 | 
 3 | This repo has a collection of CSV utilities to manipulate
 4 | CSV files. Here is a brief description of each. Each utility 
 5 | is in its own folder and has its own README.
 6 | - catcsv: concatenate two CSV files
 7 | - comparecsv: compare two CSV files
 8 | - dedupcsv: remove duplicates in a CSV file
 9 | - diffcsv: shows differences between two CSV files
10 | - editcsv: alter contents of a CSV; regexp replace supported
11 | - obfuscatecsv: obscures content in a regular fashion
12 | - pivotcsv: do a pivot table operation
13 | - recursecsv: recursively process hierarchical data; supports 
14 | the Oracle list of hierarchical functions
15 | - reordercsv: alters order of columns of a CSV file
16 | - searchcsv: outputs matching rows of a CSV file; regexp 
17 | supported
18 | - sortcsv: sorts a CSV file
19 | - splitcsv: splits a CSV by columns and/or rows
20 | - transformcsv: using a "text/template", will transform a CSV 
21 | by applying the template for each row
22 | 
23 | Each utility has its own README with examples.
24 | 
25 | To install `go get github.com/mandolyte/csv-utils`. 
26 | 
27 | Afterwards you can use `go install` to compile the ones of
28 | interest or just use `go run`.
29 | 
30 | To install all of them: `sh build_all.sh`.
31 | 
32 | To Do:
33 | - document recursedata.go


--------------------------------------------------------------------------------
/build-all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh 
 2 | echo build all
 3 | for i in `cat build-list.txt`
 4 | do
 5 | echo Working on $i in `pwd`
 6 | cd $i 
 7 | go install $i.go 
 8 | cd ..
 9 | done
10 | 


--------------------------------------------------------------------------------
/build-list.txt:
--------------------------------------------------------------------------------
 1 | catcsv
 2 | comparecsv
 3 | dedupcsv
 4 | diffcsv
 5 | editcsv
 6 | obfuscatecsv
 7 | pivotcsv
 8 | recursecsv
 9 | recursedata
10 | reordercsv
11 | searchcsv
12 | sortcsv
13 | splitcsv
14 | transformcsv


--------------------------------------------------------------------------------
/catcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Catcsv
 2 | This utility will concatenate CSV files.
 3 | 
 4 | Use -help to show:
 5 | ```
 6 | $ catcsv -help
 7 | Help Message
 8 | 
 9 | Usage: catcsv [options] input1.csv input2.csv ...
10 |   -f    Force concatenation of different width CSV files
11 |   -headers
12 |         CSV has headers (default true)
13 |   -help
14 |         Show usage message
15 |   -keep
16 |         Keep CSV headers on output (default true)
17 |   -o string
18 |         Output CSV filename; default STDOUT
19 | ```
20 | 
21 | ## Examples
22 | This first example shows an error due to different number of columns
23 | in the input files.
24 | ```
25 | $ go run catcsv.go test1.csv test2.csv 
26 | 2017/12/01 09:18:16 Individual file row counts include header row
27 | 2017/12/01 09:18:16 Total row count does not include header rows
28 | 2017/12/01 09:18:16 File test1.csv had 4 rows
29 | 2017/12/01 09:18:16 csv.Read:
30 | line 1, column 0: wrong number of fields in line
31 | exit status 1
32 | $
33 | ```
34 | This example shows use of the force option to concatenate anyway.
35 | ```
36 | $ go run catcsv.go -f test1.csv test2.csv 
37 | 2017/12/01 09:18:28 Individual file row counts include header row
38 | 2017/12/01 09:18:28 Total row count does not include header rows
39 | 2017/12/01 09:18:28 File test1.csv had 4 rows
40 | 2017/12/01 09:18:28 File test2.csv had 4 rows
41 | A,B
42 | 1,2
43 | 3,4
44 | 5,6
45 | 1,2,3
46 | 4,5,6
47 | 7,8,9
48 | 2017/12/01 09:18:28 Total rows in output  has 6 rows
49 | $ 
50 | ```


--------------------------------------------------------------------------------
/catcsv/catcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | )
 11 | 
 12 | func main() {
 13 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 14 | 	headers := flag.Bool("headers", true, "CSV has headers")
 15 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 16 | 	help := flag.Bool("help", false, "Show usage message")
 17 | 	force := flag.Bool("f", false, "Force concatenation of different width CSV files")
 18 | 	flag.Parse()
 19 | 
 20 | 	if *help {
 21 | 		usage("Help Message")
 22 | 		os.Exit(0)
 23 | 	}
 24 | 
 25 | 	if len(flag.Args()) < 1 {
 26 | 		usage("No files specified to concatenate!")
 27 | 		os.Exit(0)
 28 | 	}
 29 | 
 30 | 	if !*headers {
 31 | 		*keep = false
 32 | 		log.Println("If no headers, keep option is auto-set to false")
 33 | 	}
 34 | 
 35 | 	// open output file
 36 | 	var w *csv.Writer
 37 | 	if *output == "" {
 38 | 		w = csv.NewWriter(os.Stdout)
 39 | 	} else {
 40 | 		fo, foerr := os.Create(*output)
 41 | 		if foerr != nil {
 42 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 43 | 		}
 44 | 		defer fo.Close()
 45 | 		w = csv.NewWriter(fo)
 46 | 	}
 47 | 	log.Println("Individual file row counts include header row")
 48 | 	log.Println("Total row count does not include header rows")
 49 | 
 50 | 	var total uint64
 51 | 	var firstfilecolumncount int
 52 | 	for n, f := range flag.Args() {
 53 | 		// open input file
 54 | 		var r *csv.Reader
 55 | 		fi, fierr := os.Open(f)
 56 | 		if fierr != nil {
 57 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 58 | 		}
 59 | 		defer fi.Close()
 60 | 		r = csv.NewReader(fi)
 61 | 		if n == 0 {
 62 | 			r.FieldsPerRecord = 0
 63 | 		} else {
 64 | 			if *force {
 65 | 				r.FieldsPerRecord = -1
 66 | 			} else {
 67 | 				r.FieldsPerRecord = firstfilecolumncount
 68 | 			}
 69 | 		}
 70 | 
 71 | 		// read loop for CSV files
 72 | 		var row uint64
 73 | 		row = 0
 74 | 		for {
 75 | 			// read the csv file
 76 | 			cells, rerr := r.Read()
 77 | 			if rerr == io.EOF {
 78 | 				break
 79 | 			}
 80 | 			if rerr != nil {
 81 | 				log.Fatalf("csv.Read:\n%v\n", rerr)
 82 | 			}
 83 | 			if n == 0 && row == 0 {
 84 | 				firstfilecolumncount = len(cells)
 85 | 				if *headers {
 86 | 					if *keep {
 87 | 						err := w.Write(cells)
 88 | 						if err != nil {
 89 | 							log.Fatalf("csv.Write:\n%v\n", err)
 90 | 						}
 91 | 					}
 92 | 				} else {
 93 | 					err := w.Write(cells)
 94 | 					if err != nil {
 95 | 						log.Fatalf("csv.Write:\n%v\n", err)
 96 | 					}
 97 | 				}
 98 | 				row++
 99 | 				continue
100 | 			}
101 | 			if n > 0 && row == 0 {
102 | 				if *headers {
103 | 					row++
104 | 					continue // omit headers on all but first file
105 | 				}
106 | 			}
107 | 			row++
108 | 			err := w.Write(cells)
109 | 			if err != nil {
110 | 				log.Fatalf("csv.Write:\n%v\n", err)
111 | 			}
112 | 		}
113 | 		log.Printf("File %v had %v rows", f, row)
114 | 		total += row
115 | 	}
116 | 	w.Flush()
117 | 	if *headers {
118 | 		total -= uint64(len(flag.Args())) // don't include header rows in counts
119 | 	}
120 | 	log.Printf("Total rows in output %v has %v rows", *output, total)
121 | }
122 | 
123 | func usage(msg string) {
124 | 	fmt.Println(msg + "\n")
125 | 	fmt.Print("Usage: catcsv [options] input1.csv input2.csv ...\n")
126 | 	flag.PrintDefaults()
127 | }
128 | 


--------------------------------------------------------------------------------
/catcsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B
2 | 1,2
3 | 3,4
4 | 5,6
5 | 


--------------------------------------------------------------------------------
/catcsv/test2.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | 1,2,3
3 | 4,5,6
4 | 7,8,9
5 | 


--------------------------------------------------------------------------------
/comparecsv/README.md:
--------------------------------------------------------------------------------
 1 | # Comparecsv
 2 | This utility compares two CSV files using Merkle Tree conceps, 
 3 | namely, hashes of the rows are the basis of the compare logic.
 4 | 
 5 | It is written to enable large CSV file comparisons. The only
 6 | memory consumed are maps of the row hashes.
 7 | 
 8 | Use -help to show:
 9 | ```
10 | $ go run comparecsv.go -help
11 |   -f1 string
12 |     	First CSV file name to compare
13 |   -f2 string
14 |     	Second CSV file name to compare
15 |   -help
16 |     	Show help message
17 | NOTE 1: Headers on the CSV files are expected.
18 | NOTE 2: Duplicates are omitted in all outputs.
19 | $
20 | ```
21 | 
22 | It produces three output files, which are currently fixed:
23 | - f1only.csv contains the rows unique to file 1
24 | - f2only.csv contains the rows unique to file 2
25 | - both.csv contains the rows common to both input files
26 | 
27 | ## Examples
28 | A simple test to validate basic operations:
29 | ```
30 | $ go run comparecsv.go -f1 test2.csv -f2 test3.csv 
31 | 2017/12/04 11:15:29 Start at 2017-12-04 11:15:29.853501341 -0500 EST m=+0.000326007
32 | 2017/12/04 11:15:29 Number of rows in file 1:3
33 | 2017/12/04 11:15:29 Number of rows in file 2:3
34 | 2017/12/04 11:15:29 Number of rows in both files:2
35 | 2017/12/04 11:15:29 Number of rows ONLY in file 2:1
36 | 2017/12/04 11:15:29 Number of rows ONLY in file 1:1
37 | 2017/12/04 11:15:29 End at 2017-12-04 11:15:29.85432992 -0500 EST m=+0.001154546
38 | 2017/12/04 11:15:29 Elapsed time 828.715µs
39 | $
40 | ```
41 | 
42 | A performance test using wine review public data set at
43 | https://www.kaggle.com/zynicide/wine-reviews/data. Minor
44 | changes are made to the original to make test1.csv.
45 | ```
46 | $ comparecsv -f1 winemag-data-130k-v2.csv -f2 test1.csv 
47 | 2017/12/04 11:18:40 Start at 2017-12-04 11:18:40.631915938 -0500 EST m=+0.000781184
48 | 2017/12/04 11:18:43 Number of rows in file 1:129971
49 | 2017/12/04 11:18:49 Number of rows in file 2:129969
50 | 2017/12/04 11:18:49 Number of rows in both files:129968
51 | 2017/12/04 11:18:49 Number of rows ONLY in file 2:1
52 | 2017/12/04 11:18:51 Number of rows ONLY in file 1:3
53 | 2017/12/04 11:18:51 End at 2017-12-04 11:18:51.356633528 -0500 EST m=+10.725498483
54 | 2017/12/04 11:18:51 Elapsed time 10.72471747s
55 | $ 
56 | ```


--------------------------------------------------------------------------------
/comparecsv/both.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | 1,2,3
3 | 7,8,9
4 | 


--------------------------------------------------------------------------------
/comparecsv/comparecsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"crypto/sha1"
  5 | 	"encoding/csv"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"log"
 10 | 	"os"
 11 | 	"time"
 12 | )
 13 | 
 14 | var f1name = flag.String("f1", "", "First CSV file name to compare")
 15 | var f2name = flag.String("f2", "", "Second CSV file name to compare")
 16 | var help = flag.Bool("help", false, "Show help message")
 17 | 
 18 | /*
 19 | 	Design Overview:
 20 | 	There will be two input files to compare and there will be
 21 | 	three output files created:
 22 | 		- f1only.csv having rows unique to f1
 23 | 		- f2only.csv having rows unique to f2
 24 | 		- both.csv having rows common to both f1 and f2
 25 | 	a) The first file will be read and hash computed per row
 26 | 	b) The hash will be a key in a map with value struct{}
 27 | 	c) The second file is read and then per row:
 28 | 		- the hash value is computed
 29 | 		- if hash exists in first file's map, then the row is
 30 | 		written to the "both" output file
 31 | 		- Otherwise, it is written to the f2 only file
 32 | 		- the hash is then stored similar to f1 in a map
 33 | 	d) can the f1 map be reclaimed by the GC??
 34 | 	e) Now f1 is read a second time and per row:
 35 | 		- the hash value is computed
 36 | 		- if hash exists in second file's map, then continue, since
 37 | 		it is already written to the both csv file
 38 | 		- otherwise, write to the f1 only file
 39 | */
 40 | 
 41 | func main() {
 42 | 	flag.Parse()
 43 | 
 44 | 	if *help {
 45 | 		usage()
 46 | 	}
 47 | 
 48 | 	if len(flag.Args()) > 0 {
 49 | 		usage()
 50 | 	}
 51 | 
 52 | 	now := time.Now()
 53 | 	log.Printf("Start at %v", now)
 54 | 
 55 | 	// open first input file
 56 | 	var r1 *csv.Reader
 57 | 	f1, f1err := os.Open(*f1name)
 58 | 	if f1err != nil {
 59 | 		log.Fatal("os.Open() Error:" + f1err.Error())
 60 | 	}
 61 | 	r1 = csv.NewReader(f1)
 62 | 
 63 | 	// open second input file
 64 | 	var r2 *csv.Reader
 65 | 	f2, f2err := os.Open(*f2name)
 66 | 	if f2err != nil {
 67 | 		log.Fatal("os.Open() Error:" + f2err.Error())
 68 | 	}
 69 | 	r2 = csv.NewReader(f2)
 70 | 
 71 | 	/*********************************************************/
 72 | 	// do a quick check on columns first
 73 | 	// if not the same, then log error and exit
 74 | 
 75 | 	// second file
 76 | 	hdrs2, rerr := r2.Read()
 77 | 	if rerr == io.EOF {
 78 | 		log.Fatal("File 2 is empty", rerr)
 79 | 	}
 80 | 	if rerr != nil {
 81 | 		log.Fatalf("csv.Read:\n%v\n", rerr)
 82 | 	}
 83 | 	numcols2 := len(hdrs2)
 84 | 
 85 | 	// first file
 86 | 	hdrs1, rerr := r1.Read()
 87 | 	if rerr == io.EOF {
 88 | 		log.Fatal("File 1 is empty", rerr)
 89 | 	}
 90 | 	if rerr != nil {
 91 | 		log.Fatalf("csv.Read:\n%v\n", rerr)
 92 | 	}
 93 | 	numcols1 := len(hdrs1)
 94 | 
 95 | 	if numcols1 != numcols2 {
 96 | 		log.Fatalf("Different number of columns:%v vs. %v",
 97 | 			numcols1, numcols2)
 98 | 	}
 99 | 
100 | 	// set expectations of fields per row
101 | 	r1.FieldsPerRecord = numcols1
102 | 	r2.FieldsPerRecord = numcols1
103 | 
104 | 	// open f1only file
105 | 	var wf1 *csv.Writer
106 | 	wf1o, wf1oerr := os.Create("f1only.csv")
107 | 	if wf1oerr != nil {
108 | 		log.Fatal("os.Create() Error:" + wf1oerr.Error())
109 | 	}
110 | 	defer wf1o.Close()
111 | 	wf1 = csv.NewWriter(wf1o)
112 | 	err := wf1.Write(hdrs1)
113 | 	if err != nil {
114 | 		log.Fatalf("Headers 1 Error:\n%v\n", err)
115 | 	}
116 | 
117 | 	// open f2only file
118 | 	var wf2 *csv.Writer
119 | 	wf2o, wf2oerr := os.Create("f2only.csv")
120 | 	if wf2oerr != nil {
121 | 		log.Fatal("os.Create() Error:" + wf2oerr.Error())
122 | 	}
123 | 	defer wf2o.Close()
124 | 	wf2 = csv.NewWriter(wf2o)
125 | 	err = wf2.Write(hdrs2)
126 | 	if err != nil {
127 | 		log.Fatalf("Headers 2 Error:\n%v\n", err)
128 | 	}
129 | 
130 | 	// open both file
131 | 	var both *csv.Writer
132 | 	botho, bothoerr := os.Create("both.csv")
133 | 	if bothoerr != nil {
134 | 		log.Fatal("os.Create() Error:" + bothoerr.Error())
135 | 	}
136 | 	defer botho.Close()
137 | 	both = csv.NewWriter(botho)
138 | 	err = both.Write(hdrs1)
139 | 	if err != nil {
140 | 		log.Fatalf("Both Headers Error:\n%v\n", err)
141 | 	}
142 | 
143 | 	f1map := make(map[string]struct{})
144 | 	// read first file
145 | 	// read loop for CSV
146 | 	rows := 0
147 | 	for {
148 | 		// read the csv file
149 | 		cells, rerr := r1.Read()
150 | 		if rerr == io.EOF {
151 | 			break
152 | 		}
153 | 		if rerr != nil {
154 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
155 | 		}
156 | 		key := computeSliceSha1(cells)
157 | 		f1map[key] = struct{}{}
158 | 		rows++
159 | 	}
160 | 	log.Printf("Number of rows in file 1:%v\n", rows)
161 | 	f1.Close()
162 | 
163 | 	f2map := make(map[string]struct{})
164 | 	// read second file
165 | 	// read loop for CSV
166 | 	rows = 0
167 | 	bothCount := 0
168 | 	f2Count := 0
169 | 	for {
170 | 		// read the csv file
171 | 		cells, rerr := r2.Read()
172 | 		if rerr == io.EOF {
173 | 			break
174 | 		}
175 | 		if rerr != nil {
176 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
177 | 		}
178 | 		key := computeSliceSha1(cells)
179 | 		f2map[key] = struct{}{}
180 | 
181 | 		// does this row exist in file 1?
182 | 		_, f1Exists := f1map[key]
183 | 		if f1Exists {
184 | 			err := both.Write(cells)
185 | 			if err != nil {
186 | 				log.Fatalf("both Write Error:\n%v\n", err)
187 | 			}
188 | 			bothCount++
189 | 		} else {
190 | 			err := wf2.Write(cells)
191 | 			if err != nil {
192 | 				log.Fatalf("both Write Error:\n%v\n", err)
193 | 			}
194 | 			f2Count++
195 | 		}
196 | 		rows++
197 | 	}
198 | 	// flush the CSV writers
199 | 	both.Flush()
200 | 	wf2.Flush()
201 | 	f2.Close()
202 | 	botho.Close()
203 | 	log.Printf("Number of rows in file 2:%v\n", rows)
204 | 	log.Printf("Number of rows in both files:%v\n", bothCount)
205 | 	log.Printf("Number of rows ONLY in file 2:%v\n", f2Count)
206 | 
207 | 	// finally re-read file 1 and match up
208 | 	// open first input file
209 | 	f1, f1err = os.Open(*f1name)
210 | 	if f1err != nil {
211 | 		log.Fatal("os.Open() Error:" + f1err.Error())
212 | 	}
213 | 	defer f1.Close()
214 | 	r1 = csv.NewReader(f1)
215 | 	f1Count := 0
216 | 	isHeader := true
217 | 	for {
218 | 		// read the csv file
219 | 		cells, rerr := r1.Read()
220 | 		if rerr == io.EOF {
221 | 			break
222 | 		}
223 | 		if rerr != nil {
224 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
225 | 		}
226 | 		if isHeader {
227 | 			isHeader = false
228 | 			continue
229 | 		}
230 | 		key := computeSliceSha1(cells)
231 | 		// does this row exist in file 2?
232 | 		_, f2Exists := f2map[key]
233 | 		if f2Exists {
234 | 			continue
235 | 		} else {
236 | 			err := wf1.Write(cells)
237 | 			if err != nil {
238 | 				log.Fatalf("both Write Error:\n%v\n", err)
239 | 			}
240 | 			f1Count++
241 | 		}
242 | 	}
243 | 	log.Printf("Number of rows ONLY in file 1:%v\n", f1Count)
244 | 	wf1.Flush()
245 | 	f1.Close()
246 | 	stop := time.Now()
247 | 	elapsed := time.Since(now)
248 | 
249 | 	log.Printf("End at %v", stop)
250 | 	log.Printf("Elapsed time %v", elapsed)
251 | 
252 | }
253 | 
254 | func usage() {
255 | 	flag.PrintDefaults()
256 | 	fmt.Println("NOTE 1: Headers on the CSV files are expected.")
257 | 	fmt.Println("NOTE 2: Duplicates are omitted in all outputs.")
258 | 	os.Exit(0)
259 | }
260 | 
261 | func computeSliceSha1(c []string) string {
262 | 	h := sha1.New()
263 | 	for _, v := range c {
264 | 		if v == "" {
265 | 			v = "#empty"
266 | 		}
267 | 		io.WriteString(h, v)
268 | 	}
269 | 	return string(h.Sum(nil))
270 | }
271 | 
272 | /* snippets
273 | 
274 | package main
275 | 
276 | import (
277 | 	"crypto/sha1"
278 | 	"fmt"
279 | 	"io"
280 | )
281 | 
282 | func main() {
283 | 	h := sha1.New()
284 | 	io.WriteString(h, "His money is twice tainted:")
285 | 	io.WriteString(h, " 'taint yours and 'taint mine.")
286 | 	fmt.Printf("% x", h.Sum(nil))
287 | }
288 | */
289 | 


--------------------------------------------------------------------------------
/comparecsv/f1only.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | 4,5,6
3 | 


--------------------------------------------------------------------------------
/comparecsv/f2only.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | d,e,f
3 | 


--------------------------------------------------------------------------------
/comparecsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B
2 | 1,2
3 | 3,4
4 | 5,6
5 | 


--------------------------------------------------------------------------------
/comparecsv/test2.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | 1,2,3
3 | 4,5,6
4 | 7,8,9
5 | 


--------------------------------------------------------------------------------
/comparecsv/test3.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | 1,2,3
3 | 7,8,9
4 | d,e,f
5 | 


--------------------------------------------------------------------------------
/cryptcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Cryptcsv
 2 | 
 3 | This routine will encrypt/decrypt the selected column(s) using the supplied key.
 4 | 
 5 | Use -help to show:
 6 | ```
 7 | $ cryptcsv -help
 8 | Help Message
 9 | 
10 | Usage: cryptcsv [options]
11 |   -c string
12 |         Range spec for columns to obfuscate
13 |   -d string
14 |         Decrpytion key; required if decrypting
15 |   -e string
16 |         Encrpytion key; required if encrypting
17 |   -headers
18 |         CSV has headers (default true)
19 |   -help
20 |         Show help message
21 |   -i string
22 |         Input CSV filename; default STDIN
23 |   -keep
24 |         Keep CSV headers on output (default true)
25 |   -o string
26 |         Output CSV filename; default STDOUT
27 | $ 
28 | ```
29 | 
30 | # Examples
31 | Encrypt the first and last columns:
32 | ```
33 | $ cat test1.csv
34 | A,B,C
35 | abc,def,Army
36 | def,abc,Navy
37 | ijk,abc,Navy
38 | zyz,def,Army
39 | abc,abc,AF
40 | $ cryptcsv -i test1.csv -c 1,3 -e abcdef -o test1-encrypted.csv
41 | $ cat test1-encrypted.csv
42 | $ cat test1-encrypted.csv 
43 | A,B,C
44 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,cIJhrzIIYEXgAbcTPKEYpsKIJcw=
45 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,/1WXOfya+LjAHWB2xr4zqo8Qmks=
46 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,zao7y8CJgzW+G1ZSjRWelhIzNhw=
47 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,ShIYZBMV+PFG8JTud/FFRVGjtVQ=
48 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,4ydx/qW9LierW6pQFeILRRtV
49 | $
50 | ```
51 | 
52 | Now decrypt just the last column:
53 | ```
54 | $ cryptcsv -i test1-encrypted.csv -o test1-decrypted.csv -c 3 -d abcdef
55 | $ cat test1-decrypted.csv
56 | ,B,C
57 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,Army
58 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,Navy
59 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,Navy
60 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,Army
61 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,AF
62 | $
63 | $ cksum test1.csv test1-decrypted.csv
64 | ```
65 | 
66 | Now decrypt both:
67 | ```
68 | $ cryptcsv -i test1-encrypted.csv -o test1-decrypted-both.csv -d abcdef -c 1,3 
69 | $ cat test1-decrypted-both.csv 
70 | A,B,C
71 | abc,def,Army
72 | def,abc,Navy
73 | ijk,abc,Navy
74 | zyz,def,Army
75 | abc,abc,AF
76 | $ cksum test1.csv test1-decrypted-both.csv 
77 | 2235581246 69 test1.csv
78 | 2235581246 69 test1-decrypted-both.csv
79 | $
80 | ``` 
81 | 


--------------------------------------------------------------------------------
/cryptcsv/cryptcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"crypto/aes"
  5 | 	"crypto/cipher"
  6 | 	"crypto/rand"
  7 | 	"encoding/base64"
  8 | 	"encoding/csv"
  9 | 	"flag"
 10 | 	"fmt"
 11 | 	"io"
 12 | 	"log"
 13 | 	"os"
 14 | 
 15 | 	"github.com/mandolyte/csv-utils"
 16 | )
 17 | 
 18 | var cs *rangespec.RangeSpec
 19 | 
 20 | func main() {
 21 | 	e := flag.String("e", "", "Encrpytion key; required if encrypting")
 22 | 	d := flag.String("d", "", "Decrpytion key; required if decrypting")
 23 | 	cols := flag.String("c", "", "Range spec for columns to obfuscate")
 24 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 25 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 26 | 	headers := flag.Bool("headers", true, "CSV has headers")
 27 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 28 | 	help := flag.Bool("help", false, "Show help message")
 29 | 	flag.Parse()
 30 | 
 31 | 	if *help {
 32 | 		usage("Help Message")
 33 | 	}
 34 | 
 35 | 	/* check parameters */
 36 | 	if len(*e)+len(*d) == 0 {
 37 | 		usage("Specify either -e or -d with key to encrypt or decrypt")
 38 | 	}
 39 | 
 40 | 	if *cols != "" {
 41 | 		var cserr error
 42 | 		cs, cserr = rangespec.New(*cols)
 43 | 		if cserr != nil {
 44 | 			log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr)
 45 | 		}
 46 | 	}
 47 | 
 48 | 	if *keep {
 49 | 		if !*headers {
 50 | 			log.Fatal("Cannot keep headers you don't have!")
 51 | 		}
 52 | 	}
 53 | 
 54 | 	// open output file
 55 | 	var w *csv.Writer
 56 | 	if *output == "" {
 57 | 		w = csv.NewWriter(os.Stdout)
 58 | 	} else {
 59 | 		fo, foerr := os.Create(*output)
 60 | 		if foerr != nil {
 61 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 62 | 		}
 63 | 		defer fo.Close()
 64 | 		w = csv.NewWriter(fo)
 65 | 	}
 66 | 
 67 | 	// open input file
 68 | 	var r *csv.Reader
 69 | 	if *input == "" {
 70 | 		r = csv.NewReader(os.Stdin)
 71 | 	} else {
 72 | 		fi, fierr := os.Open(*input)
 73 | 		if fierr != nil {
 74 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 75 | 		}
 76 | 		defer fi.Close()
 77 | 		r = csv.NewReader(fi)
 78 | 	}
 79 | 
 80 | 	// ignore expectations of fields per row
 81 | 	r.FieldsPerRecord = -1
 82 | 
 83 | 	var key string
 84 | 	if *e != "" {
 85 | 		key = *e
 86 | 	} else {
 87 | 		key = *d
 88 | 	}
 89 | 
 90 | 	keydata := make([]byte, 32)
 91 | 	copy(keydata, key[:])
 92 | 
 93 | 	// read loop for CSV
 94 | 	var row uint64
 95 | 	for {
 96 | 		// read the csv file
 97 | 		cells, rerr := r.Read()
 98 | 		if rerr == io.EOF {
 99 | 			break
100 | 		}
101 | 		if rerr != nil {
102 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
103 | 		}
104 | 		if (row == 0) && *headers && *keep {
105 | 			row = 1
106 | 			err := w.Write(cells)
107 | 			if err != nil {
108 | 				log.Fatalf("csv.Write:\n%v\n", err)
109 | 			}
110 | 			continue
111 | 		}
112 | 		row++
113 | 		// test columns for a match to encrypt/decrypt
114 | 		for n, v := range cells {
115 | 			if cs.InRange(uint64(n + 1)) {
116 | 				// encrpyt?
117 | 				if *d == "" {
118 | 					// decrypt key not provided so we encrypt
119 | 					cells[n] = encrypt(v, keydata)
120 | 				} else {
121 | 					// decrypt key is provided so we decrypt
122 | 					cells[n] = decrypt(v, keydata)
123 | 				}
124 | 			}
125 | 		}
126 | 		err := w.Write(cells)
127 | 		if err != nil {
128 | 			log.Fatalf("csv.Write:\n%v\n", err)
129 | 		}
130 | 	}
131 | 	w.Flush()
132 | }
133 | 
134 | func usage(msg string) {
135 | 	fmt.Println(msg + "\n")
136 | 	fmt.Print("Usage: cryptcsv [options]\n")
137 | 	flag.PrintDefaults()
138 | 	os.Exit(0)
139 | }
140 | 
141 | func decrypt(b64 string, key []byte) string {
142 | 	// convert base64 back to byte
143 | 	data, err := base64.StdEncoding.DecodeString(b64)
144 | 	if err != nil {
145 | 		log.Fatalf("base64 decode error:", err)
146 | 	}
147 | 
148 | 	// Byte array of the string
149 | 	//ciphertext := []byte(cipherstring)
150 | 
151 | 	// Create the AES cipher
152 | 	block, err := aes.NewCipher(key)
153 | 	if err != nil {
154 | 		log.Fatalf("aes.NewCipher() error:", err)
155 | 	}
156 | 
157 | 	// if the text is too small, then it is incorrect
158 | 	if len(data) < aes.BlockSize {
159 | 		log.Fatal("Text too short error\n")
160 | 	}
161 | 
162 | 	// Get the 16 byte iv
163 | 	iv := data[:aes.BlockSize]
164 | 
165 | 	// Remove it
166 | 	data = data[aes.BlockSize:]
167 | 
168 | 	// Return a decrypted stream
169 | 	stream := cipher.NewCFBDecrypter(block, iv)
170 | 
171 | 	// Decrypt bytes from ciphertext
172 | 	stream.XORKeyStream(data, data)
173 | 
174 | 	return string(data)
175 | }
176 | 
177 | func encrypt(text string, key []byte) string {
178 | 	// Byte array of the string
179 | 	bytes := []byte(text)
180 | 
181 | 	// Create the AES cipher
182 | 	block, err := aes.NewCipher(key)
183 | 	if err != nil {
184 | 		log.Fatalf("aes.NewCipher() error: %v\n", err)
185 | 	}
186 | 
187 | 	// Create slice of (16 + bytes) length
188 | 	ciphertext := make([]byte, aes.BlockSize+len(bytes))
189 | 
190 | 	// Include the IV at the beginning
191 | 	// Slice of first 16 bytes
192 | 	iv := ciphertext[:aes.BlockSize]
193 | 
194 | 	// Write 16 rand bytes to fill iv
195 | 	if _, err := io.ReadFull(rand.Reader, iv); err != nil {
196 | 		log.Fatalf("io.ReadFull() error: %v\n", err)
197 | 	}
198 | 
199 | 	// Return an encrypted stream
200 | 	stream := cipher.NewCFBEncrypter(block, iv)
201 | 
202 | 	// Encrypt bytes to ciphertext after the iv
203 | 	stream.XORKeyStream(ciphertext[aes.BlockSize:], bytes)
204 | 
205 | 	// now encode to base64
206 | 	b64 := base64.StdEncoding.EncodeToString(ciphertext)
207 | 	return b64
208 | }
209 | 


--------------------------------------------------------------------------------
/cryptcsv/test1-decrypted-both.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | abc,def,Army
3 | def,abc,Navy
4 | ijk,abc,Navy
5 | zyz,def,Army
6 | abc,abc,AF
7 | 


--------------------------------------------------------------------------------
/cryptcsv/test1-decrypted.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,Army
3 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,Navy
4 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,Navy
5 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,Army
6 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,AF
7 | 


--------------------------------------------------------------------------------
/cryptcsv/test1-encrypted.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | LjZwW4XHoiXeg/5S9PItOmw7LQ==,def,cIJhrzIIYEXgAbcTPKEYpsKIJcw=
3 | FaKIeKSORfKhZO+Sm3Rg3vEQKQ==,abc,/1WXOfya+LjAHWB2xr4zqo8Qmks=
4 | EmNIdOIqir9TiT4mAf6o1vFYrQ==,abc,zao7y8CJgzW+G1ZSjRWelhIzNhw=
5 | uowhS1km7U7B7k+aa8bWz0lUgw==,def,ShIYZBMV+PFG8JTud/FFRVGjtVQ=
6 | V8WKIWunjW12OKC+MCcqlZqH2w==,abc,4ydx/qW9LierW6pQFeILRRtV
7 | 


--------------------------------------------------------------------------------
/cryptcsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | abc,def,Army
3 | def,abc,Navy
4 | ijk,abc,Navy
5 | zyz,def,Army
6 | abc,abc,AF
7 | 


--------------------------------------------------------------------------------
/dedupcsv/README.md:
--------------------------------------------------------------------------------
 1 | ## Dedupcsv
 2 | This utility removed duplicate rows. The input must be sorted!
 3 | 
 4 | Use -help to show:
 5 | ```
 6 | $ dedupcsv -help
 7 | Help Message
 8 | 
 9 | Usage: dedupcsv [options]
10 | NOTE: must be sorted; only compares row against prior row.  -headers
11 |         CSV has headers (default true)
12 |   -help
13 |         Show help message
14 |   -i string
15 |         Input CSV filename; default STDIN
16 |   -keep
17 |         Keep CSV headers on output (default true)
18 |   -o string
19 |         Output CSV filename; default STDOUT
20 | ```
21 | 
22 | For example:
23 | ```
24 | $ cat test1.csv 
25 | A,B,C
26 | 1,2,3
27 | 1,2,3
28 | 4,5,6
29 | 4,5,6
30 | d,e,f
31 | d,e,f
32 | d,e,f
33 | $ go run dedupcsv.go < test1.csv 
34 | A,B,C
35 | 1,2,3
36 | 4,5,6
37 | d,e,f
38 | $
39 | ```
40 | 


--------------------------------------------------------------------------------
/dedupcsv/dedupcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 
 11 | )
 12 | 
 13 | func main() {
 14 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 15 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 16 | 	headers := flag.Bool("headers", true, "CSV has headers")
 17 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 18 | 	help := flag.Bool("help", false, "Show help message")
 19 | 	flag.Parse()
 20 | 
 21 | 	if *help {
 22 | 		usage("Help Message")
 23 | 		os.Exit(0)
 24 | 	}
 25 | 
 26 | 	if !*headers {
 27 | 		if *keep {
 28 | 			log.Fatal("Cannot keep headers you don't have!")
 29 | 		}
 30 | 	}
 31 | 	// open output file
 32 | 	var w *csv.Writer
 33 | 	if *output == "" {
 34 | 		w = csv.NewWriter(os.Stdout)
 35 | 	} else {
 36 | 		fo, foerr := os.Create(*output)
 37 | 		if foerr != nil {
 38 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 39 | 		}
 40 | 		defer fo.Close()
 41 | 		w = csv.NewWriter(fo)
 42 | 	}
 43 | 
 44 | 	// open input file
 45 | 	var r *csv.Reader
 46 | 	if *input == "" {
 47 | 		r = csv.NewReader(os.Stdin)
 48 | 	} else {
 49 | 		fi, fierr := os.Open(*input)
 50 | 		if fierr != nil {
 51 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 52 | 		}
 53 | 		defer fi.Close()
 54 | 		r = csv.NewReader(fi)
 55 | 	}
 56 | 
 57 | 	// ignore expectations of fields per row
 58 | 	r.FieldsPerRecord = -1
 59 | 
 60 | 	// read loop for CSV
 61 | 	var row uint64
 62 | 	var priorRow []string
 63 | 	for {
 64 | 		// read the csv file
 65 | 		cells, rerr := r.Read()
 66 | 		if rerr == io.EOF {
 67 | 			break
 68 | 		}
 69 | 		if rerr != nil {
 70 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
 71 | 		}
 72 | 		if row == 0 {
 73 | 			priorRow = make([]string,len(cells))
 74 | 			_ = copy(priorRow,cells)
 75 | 		}
 76 | 		if (row == 0) && *headers && *keep {
 77 | 			row = 1
 78 | 			err := w.Write(cells)
 79 | 			if err != nil {
 80 | 				log.Fatalf("csv.Write:\n%v\n", err)
 81 | 			}
 82 | 			continue
 83 | 		}
 84 | 		
 85 | 		areEqual := testEq(priorRow,cells)
 86 | 		if areEqual {
 87 | 			continue
 88 | 		}
 89 | 		
 90 | 		err := w.Write(cells)
 91 | 		if err != nil {
 92 | 			log.Fatalf("csv.Write:\n%v\n", err)
 93 | 		}
 94 | 		
 95 | 		priorRow = make([]string,len(cells))
 96 | 		_ = copy(priorRow,cells)
 97 | 
 98 | 		row++
 99 | 	}
100 | 	w.Flush()
101 | }
102 | 
103 | func testEq(a, b []string) bool {
104 | 
105 |     if a == nil && b == nil { 
106 |         return true; 
107 |     }
108 | 
109 |     if a == nil || b == nil { 
110 |         return false; 
111 |     }
112 | 
113 |     if len(a) != len(b) {
114 |         return false
115 |     }
116 | 
117 |     for i := range a {
118 |         if a[i] != b[i] {
119 |             return false
120 |         }
121 |     }
122 | 
123 |     return true
124 | }
125 | 
126 | 
127 | func usage(msg string) {
128 | 	fmt.Println(msg + "\n")
129 | 	fmt.Print("Usage: uniqcsv [options]\n")
130 | 	fmt.Print("NOTE: must be sorted; only compares row against prior row.")
131 | 	flag.PrintDefaults()
132 | }
133 | 


--------------------------------------------------------------------------------
/dedupcsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | 1,2,3
3 | 1,2,3
4 | 4,5,6
5 | 4,5,6
6 | d,e,f
7 | d,e,f
8 | d,e,f
9 | 


--------------------------------------------------------------------------------
/diffcsv/README.md:
--------------------------------------------------------------------------------
  1 | # Diffcsv
  2 | 
  3 | Todo:
  4 | - Considering adding a "map" option where you can supply a JSON formatted map of value transformations. Possibly using regualar expressions as matching values.
  5 | 
  6 | Changes (2018-11-06):
  7 | - Renamed the alias parameters as "alias1" and "alias2" with defaults of "f1" and "f2"
  8 | - Added `trimSpace` and `ignoreCase` options. The ignore case option applies to the *key* values.
  9 | 
 10 | Changes (2018-10-31):
 11 | - Added aliasing option of input files; default is DF1 and DF2 as before
 12 | - Added option to add numbers to column headers to make it easier to 
 13 | reference columns with differences
 14 | 
 15 | Use the -help argument to show:
 16 | ```
 17 | $ diffcsv -help
 18 | 
 19 | Usage: diffcsv [options]
 20 |   -alias1 string
 21 |         Alias for first input file; default F1 (default "F1")
 22 |   -alias2 string
 23 |         Alias for second input file; default F2 (default "F2")
 24 |   -colnums
 25 |         Add difference column numbers to headers
 26 |   -f1 string
 27 |         First CSV file name to compare
 28 |   -f2 string
 29 |         Second CSV file name to compare
 30 |   -help
 31 |         Show help message
 32 |   -ignoreCase
 33 |         Ignore case when comparing; default true (default true)
 34 |   -key int
 35 |         Key column in input CSVs (first is 1); must be unique
 36 |   -noeq
 37 |         Suppress matches, showing only differences
 38 |   -o string
 39 |         Output CSV file for differences
 40 |   -ondupFirst
 41 |         On duplicate key, keep first one
 42 |   -ondupLast
 43 |         On duplicate key, keep last  one
 44 |   -trimSpace
 45 |         Ignore leading and trailing spaces when comparing; default true (default true)
 46 | 
 47 | 	Detailed Help:
 48 | 	Inputs:
 49 | 		- a key column
 50 | 		- two input filenames
 51 | 		- an output filename
 52 | 	There will be two input files to compare and there will be
 53 | 	one output file created:
 54 | 	a) The first file will be read and stored into a map
 55 | 	b) The second file will be read and stored into a map
 56 | 	c) It is an error if a file has the same key value on two rows.
 57 | 	Keys must be unique within each file. 
 58 | 	Note that key column number is one based, not zero based!
 59 | 	NOTE! if duplicate keys exist, then there are options to keep
 60 | 	the first or to keep the last one. Default is to error out.
 61 | 	d) Then all keys from both inputs are combined/deduped/sorted
 62 | 	e) Then we range over the combined keyset and output a new CSV
 63 | 	that has a new status column as the first column and the other columns
 64 | 	from the inputs as the remaining columns.
 65 | 	f) the new status column has the following values:
 66 | 	- EQ meaning that the values for the key are same in both input files
 67 | 	- IN=1 meaning that the key and values are only in input file #1
 68 | 	- IN=2 similar for input file #2
 69 | 	- DFn=x,y,..,z where n is either 1 or 2; followed by a comma delimited 
 70 | 	list of column numbers where the values for the key do not match.
 71 | 	Note that the DF statuses always come in pairs, one for each input file.
 72 | 	g) Limitations:
 73 | 	- both input files must have the same number of columns
 74 | 	- both must have a header row and the headers must be the same
 75 | ```
 76 | 
 77 | ## Normal Cases
 78 | 
 79 | Compare two identical files (using same file for both inputs):
 80 | ```
 81 | $ cat input1.csv
 82 | A,B,C
 83 | X,1,1
 84 | Y,2,2
 85 | Z,3,3
 86 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input1.csv -o test1.csv
 87 | 2018/10/08 06:46:55 Start: Oct  8 06:46:55.040
 88 | 2018/10/08 06:46:55 Processing input #1:input1.csv
 89 | 2018/10/08 06:46:55 Number of rows in file input1.csv:3
 90 | 2018/10/08 06:46:55 Processing input #2:input1.csv
 91 | 2018/10/08 06:46:55 Number of rows in file input1.csv:3
 92 | 2018/10/08 06:46:55 Number of combined unique keys:3
 93 | 2018/10/08 06:46:55 End: Oct  8 06:46:55.041
 94 | 2018/10/08 06:46:55 Elapsed time 842.333µs
 95 | 2018/10/08 06:46:55 ------- Summary -------
 96 | 2018/10/08 06:46:55 Equal Count: 3
 97 | 2018/10/08 06:46:55 Key Diff Count: 0
 98 | 2018/10/08 06:46:55 Unique to input #1: 0
 99 | 2018/10/08 06:46:55 Unique to input #2: 0
100 | $ cat test1.csv
101 | STATUS,A,B,C
102 | EQ,X,1,1
103 | EQ,Y,2,2
104 | EQ,Z,3,3
105 | ```
106 | 
107 | Compare two files where keys are ok, but values are different:
108 | ```
109 | $ cat input3.csv
110 | A,B,C
111 | X,1,1
112 | Y,2,2
113 | Z,9,9
114 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input3.csv -o test3.csv
115 | ... elided ...
116 | $ cat test2.csv
117 | STATUS,A,B,C
118 | EQ,X,1,1
119 | EQ,Y,2,2
120 | "DF1=2,3",Z,3,3
121 | "DF2=2,3",Z,9,9
122 | ```
123 | 
124 | Same as above, but show only differences; use aliases and column numbers:
125 | ```
126 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input3.csv \
127 | 	-o test3.csv -noeq \
128 | 	-df1 i1 -df2 i2 -colnums
129 | 2018/10/31 07:03:26 Start: Oct 31 07:03:26.298
130 | 2018/10/31 07:03:26 Processing input #1:input1.csv
131 | 2018/10/31 07:03:26 Number of rows in file input1.csv:3
132 | 2018/10/31 07:03:26 Processing input #2:input3.csv
133 | 2018/10/31 07:03:26 Number of rows in file input3.csv:3
134 | 2018/10/31 07:03:26 Number of combined unique keys:3
135 | 2018/10/31 07:03:26 End: Oct 31 07:03:26.300
136 | 2018/10/31 07:03:26 Elapsed time 1.9993ms
137 | 2018/10/31 07:03:26 ------- Summary -------
138 | 2018/10/31 07:03:26 Equal Count: 2
139 | 2018/10/31 07:03:26 Key Diff Count: 1
140 | 2018/10/31 07:03:26 Unique to input #1: 0
141 | 2018/10/31 07:03:26 Unique to input #2: 0
142 | $ cat test3.csv
143 | STATUS,1-A,2-B,3-C
144 | "i1=2,3",Z,3,3
145 | "i2=2,3",Z,9,9
146 | $ 
147 | ```
148 | 
149 | Compare two files where keys are not the same:
150 | ```
151 | $ cat input2.csv
152 | A,B,C
153 | X,1,1
154 | Y,2,2
155 | W,3,3
156 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input2.csv -o test3.csv
157 | ... elided ...
158 | $ cat test3.csv
159 | STATUS,A,B,C
160 | IN=2,W,3,3
161 | EQ,X,1,1
162 | EQ,Y,2,2
163 | IN=1,Z,3,3
164 | ```
165 | 
166 | Compare two files where trim space and ignore case are needed:
167 | ```
168 | $ cat input7.csv
169 | A,B,C
170 | X,1,1
171 | Y,2,3
172 | W,3,3
173 | $ cat input8.csv
174 | A,B,C
175 | x,1,1
176 |  Y ,2,3
177 |  w ,3,3
178 | $ go run diffcsv.go -key 1 -f1 input7.csv -f2 input8.csv -alias1 f1 -alias2 f2 -trimSpace=true -ignoreCase=false -o test7.csv
179 | 2018/11/06 13:37:09 Start: Nov  6 13:37:09.884
180 | 2018/11/06 13:37:09 Processing input #1:input7.csv
181 | 2018/11/06 13:37:09 Number of rows in file input7.csv:3
182 | 2018/11/06 13:37:09 Processing input #2:input8.csv
183 | 2018/11/06 13:37:09 Number of rows in file input8.csv:3
184 | 2018/11/06 13:37:09 Number of combined unique keys:5
185 | 2018/11/06 13:37:09 End: Nov  6 13:37:09.886
186 | 2018/11/06 13:37:09 Elapsed time 1.9977ms
187 | 2018/11/06 13:37:09 ------- Summary -------
188 | 2018/11/06 13:37:09 Equal Count: 1
189 | 2018/11/06 13:37:09 Key Diff Count: 0
190 | 2018/11/06 13:37:09 Unique to input #1: 2
191 | 2018/11/06 13:37:09 Unique to input #2: 2
192 | $ cat test7.csv
193 | STATUS,A,B,C
194 | IN=f1,W,3,3
195 | IN=f1,X,1,1
196 | EQ,Y,2,3
197 | IN=f2,w,3,3
198 | IN=f2,x,1,1
199 | ```
200 | 
201 | ## Error Conditions
202 | 
203 | Compare two files with headers that don't match:
204 | ```
205 | $ cat input4.csv
206 | A,B,D
207 | X,1,1
208 | Y,2,2
209 | Z,9,9
210 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input4.csv -o test4.csv
211 | 2018/10/04 21:25:36 Start: Oct  4 21:25:36.905
212 | 2018/10/04 21:25:36 Headers are not the same on input files
213 | exit status 1
214 | $
215 | ```
216 | 
217 | Compare two files that don't the same number of columns:
218 | ```
219 | $ cat input5.csv
220 | A,B,C,D
221 | X,1,1,1
222 | Y,2,2,2
223 | Z,9,9,9
224 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input5.csv -o test5.csv
225 | 2018/10/04 21:27:24 Start: Oct  4 21:27:24.851
226 | 2018/10/04 21:27:24 Different number of columns:3 vs. 4
227 | exit status 1
228 | $
229 | ```
230 | 
231 | Compare two files where one has a non-unique key:
232 | ```
233 | $ cat input6.csv
234 | A,B,C,D
235 | X,1,1,1
236 | Y,2,2,2
237 | Z,9,9,9
238 | X,1,2,3
239 | $ go run diffcsv.go -key 1 -f1 input1.csv -f2 input6.csv -o test6.csv
240 | 2018/10/05 07:15:00 Start: Oct  5 07:15:00.105
241 | 2018/10/05 07:15:00 Processing input #1:input1.csv
242 | 2018/10/05 07:15:00 Number of rows in file input1.csv:3
243 | 2018/10/05 07:15:00 Processing input #2:input6.csv
244 | 2018/10/05 07:15:00 Key value not unique: X on row 4
245 | exit status 1
246 | $
247 | ```
248 | 


--------------------------------------------------------------------------------
/diffcsv/diffcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 	"sort"
 11 | 	"strings"
 12 | 	"time"
 13 | )
 14 | 
 15 | var f1name = flag.String("f1", "", "First CSV file name to compare")
 16 | var f2name = flag.String("f2", "", "Second CSV file name to compare")
 17 | var output = flag.String("o", "", "Output CSV file for differences")
 18 | var key = flag.Int("key", 0, "Key column in input CSVs (first is 1); must be unique")
 19 | var help = flag.Bool("help", false, "Show help message")
 20 | var ondupfirst = flag.Bool("ondupFirst", false, "On duplicate key, keep first one")
 21 | var onduplast = flag.Bool("ondupLast", false, "On duplicate key, keep last  one")
 22 | var noeq = flag.Bool("noeq", false, "Suppress matches, showing only differences")
 23 | var alias1 = flag.String("alias1", "F1", "Alias for first input file; default F1")
 24 | var alias2 = flag.String("alias2", "F2", "Alias for second input file; default F2")
 25 | var colnums = flag.Bool("colnums", false, "Add difference column numbers to headers")
 26 | var ignoreCase = flag.Bool("ignoreCase", true, "Ignore case when comparing; default true")
 27 | var trimSpace = flag.Bool("trimSpace", true, "Ignore leading and trailing spaces when comparing; default true")
 28 | 
 29 | var detailedHelp = `
 30 | 	Detailed Help:
 31 | 	Inputs:
 32 | 		- a key column
 33 | 		- two input filenames
 34 | 		- an output filename
 35 | 	There will be two input files to compare and there will be
 36 | 	one output file created:
 37 | 	a) The first file will be read and stored into a map
 38 | 	b) The second file will be read and stored into a map
 39 | 	c) It is an error if a file has the same key value on two rows.
 40 | 	Keys must be unique within each file. 
 41 | 	Note that key column number is one based, not zero based!
 42 | 	NOTE! if duplicate keys exist, then there are options to keep
 43 | 	the first or to keep the last one. Default is to error out.
 44 | 	d) Then all keys from both inputs are combined/deduped/sorted
 45 | 	e) Then we range over the combined keyset and output a new CSV
 46 | 	that has a new status column as the first column and the other columns
 47 | 	from the inputs as the remaining columns.
 48 | 	f) the new status column has the following values:
 49 | 	- EQ meaning that the values for the key are same in both input files
 50 | 	- IN=1 meaning that the key and values are only in input file #1
 51 | 	- IN=2 similar for input file #2
 52 | 	- DFn=x,y,..,z where n is either 1 or 2; followed by a comma delimited 
 53 | 	list of column numbers where the values for the key do not match.
 54 | 	Note that the DF statuses always come in pairs, one for each input file.
 55 | 	g) Limitations:
 56 | 	- both input files must have the same number of columns
 57 | 	- both must have a header row and the headers must be the same
 58 | `
 59 | 
 60 | func main() {
 61 | 	flag.Parse()
 62 | 
 63 | 	if *help {
 64 | 		usage("")
 65 | 	}
 66 | 
 67 | 	if *key == 0 {
 68 | 		usage("Key column number missing.")
 69 | 	}
 70 | 
 71 | 	if *f1name == "" {
 72 | 		usage("First filename is missing.")
 73 | 	}
 74 | 
 75 | 	if *f2name == "" {
 76 | 		fmt.Println()
 77 | 		usage("Second filename is missing.")
 78 | 	}
 79 | 
 80 | 	if *output == "" {
 81 | 		fmt.Println()
 82 | 		usage("Output filename is missing.")
 83 | 	}
 84 | 
 85 | 	if *ondupfirst && *onduplast {
 86 | 		fmt.Println()
 87 | 		usage("Cannot use both on-dup options")
 88 | 	}
 89 | 
 90 | 	now := time.Now()
 91 | 	log.Printf("Start: %v", now.Format(time.StampMilli))
 92 | 
 93 | 	// open first input file stop.Format(Time.StampMilli)
 94 | 	var r1 *csv.Reader
 95 | 	f1, f1err := os.Open(*f1name)
 96 | 	if f1err != nil {
 97 | 		log.Fatal("os.Open() Error:" + f1err.Error())
 98 | 	}
 99 | 	r1 = csv.NewReader(f1)
100 | 
101 | 	// open second input file
102 | 	var r2 *csv.Reader
103 | 	f2, f2err := os.Open(*f2name)
104 | 	if f2err != nil {
105 | 		log.Fatal("os.Open() Error:" + f2err.Error())
106 | 	}
107 | 	r2 = csv.NewReader(f2)
108 | 
109 | 	/*********************************************************/
110 | 	// do a quick check on columns first
111 | 	// if not the same, then log error and exit
112 | 
113 | 	// second file
114 | 	hdrs2, rerr := r2.Read()
115 | 	if rerr == io.EOF {
116 | 		log.Fatal("File 2 is empty", rerr)
117 | 	}
118 | 	if rerr != nil {
119 | 		log.Fatalf("csv.Read:\n%v\n", rerr)
120 | 	}
121 | 	numcols2 := len(hdrs2)
122 | 
123 | 	// first file
124 | 	hdrs1, rerr := r1.Read()
125 | 	if rerr == io.EOF {
126 | 		log.Fatal("File 1 is empty", rerr)
127 | 	}
128 | 	if rerr != nil {
129 | 		log.Fatalf("csv.Read:\n%v\n", rerr)
130 | 	}
131 | 	numcols1 := len(hdrs1)
132 | 
133 | 	if numcols1 != numcols2 {
134 | 		log.Fatalf("Different number of columns:%v vs. %v",
135 | 			numcols1, numcols2)
136 | 	}
137 | 
138 | 	// check that headers are the same
139 | 	for i := range hdrs1 {
140 | 		if hdrs1[i] == hdrs2[i] {
141 | 			continue
142 | 		}
143 | 		log.Fatal("Headers are not the same on input files")
144 | 	}
145 | 
146 | 	// check on whether to add column numbers to headers
147 | 	if *colnums {
148 | 		for i := range hdrs1 {
149 | 			hdrs1[i] = fmt.Sprintf("%v-%v", i+1, hdrs1[i])
150 | 		}
151 | 	}
152 | 
153 | 	// set expectations of fields per row
154 | 	r1.FieldsPerRecord = numcols1
155 | 	r2.FieldsPerRecord = numcols1
156 | 
157 | 	// open output file
158 | 	var wf1 *csv.Writer
159 | 	wf1o, wf1oerr := os.Create(*output)
160 | 	if wf1oerr != nil {
161 | 		log.Fatal("os.Create() Error:" + wf1oerr.Error())
162 | 	}
163 | 	defer wf1o.Close()
164 | 	wf1 = csv.NewWriter(wf1o)
165 | 	hdrOutput := make([]string, 0)
166 | 	hdrOutput = append(hdrOutput, "STATUS")
167 | 	hdrOutput = append(hdrOutput, hdrs1...)
168 | 	err := wf1.Write(hdrOutput)
169 | 	if err != nil {
170 | 		log.Fatalf("Output Error:\n%v\n", err)
171 | 	}
172 | 
173 | 	log.Printf("Processing input #1:%v\n", *f1name)
174 | 	f1map := make(map[string][]string)
175 | 	// read first file
176 | 	rows := 0
177 | 	for {
178 | 		// read the csv file
179 | 		cells, rerr := r1.Read()
180 | 		if rerr == io.EOF {
181 | 			break
182 | 		}
183 | 		if rerr != nil {
184 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
185 | 		}
186 | 		rows++
187 | 		if *trimSpace {
188 | 			for n := range cells {
189 | 				cells[n] = strings.TrimSpace(cells[n])
190 | 			}
191 | 		}
192 | 		keyv := cells[*key-1]
193 | 		if *ignoreCase {
194 | 			keyv = strings.ToLower(keyv)
195 | 		}
196 | 		if _, ok := f1map[keyv]; ok {
197 | 			if *onduplast {
198 | 				log.Printf("Replacing non-unique key: %v on row %v\n", keyv, rows+1)
199 | 			} else if *ondupfirst {
200 | 				log.Printf("Skipping non-unique key: %v on row %v\n", keyv, rows+1)
201 | 				continue
202 | 			} else {
203 | 				log.Fatalf("Key value not unique: %v on row %v\n", keyv, rows+1)
204 | 			}
205 | 		}
206 | 		f1map[keyv] = cells
207 | 	}
208 | 	log.Printf("Number of rows in file %v:%v\n", *f1name, rows)
209 | 	f1.Close()
210 | 
211 | 	log.Printf("Processing input #2:%v\n", *f2name)
212 | 	f2map := make(map[string][]string)
213 | 	// read second file
214 | 	rows = 0
215 | 	for {
216 | 		// read the csv file
217 | 		cells, rerr := r2.Read()
218 | 		if rerr == io.EOF {
219 | 			break
220 | 		}
221 | 		if rerr != nil {
222 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
223 | 		}
224 | 		rows++
225 | 		if *trimSpace {
226 | 			for n := range cells {
227 | 				cells[n] = strings.TrimSpace(cells[n])
228 | 			}
229 | 		}
230 | 		keyv := cells[*key-1]
231 | 		if *ignoreCase {
232 | 			keyv = strings.ToLower(keyv)
233 | 		}
234 | 		if _, ok := f2map[keyv]; ok {
235 | 			if *onduplast {
236 | 				log.Printf("Replacing non-unique key: %v on row %v\n", keyv, rows+1)
237 | 			} else if *ondupfirst {
238 | 				log.Printf("Skipping non-unique key: %v on row %v\n", keyv, rows+1)
239 | 				continue
240 | 			} else {
241 | 				log.Fatalf("Key value not unique: %v on row %v\n", keyv, rows+1)
242 | 			}
243 | 		}
244 | 		f2map[keyv] = cells
245 | 	}
246 | 	log.Printf("Number of rows in file %v:%v\n", *f2name, rows)
247 | 	f2.Close()
248 | 
249 | 	//
250 | 	// Get a combined set of keys
251 | 	//
252 | 	uniqkeyset := make(map[string]struct{})
253 | 	for k := range f1map {
254 | 		uniqkeyset[k] = struct{}{}
255 | 	}
256 | 	for k := range f2map {
257 | 		uniqkeyset[k] = struct{}{}
258 | 	}
259 | 	keySliceSize := len(uniqkeyset)
260 | 	keys := make([]string, keySliceSize)
261 | 	slot := 0
262 | 	for k := range uniqkeyset {
263 | 		keys[slot] = k
264 | 		slot++
265 | 	}
266 | 	log.Printf("Number of combined unique keys:%v\n", keySliceSize)
267 | 
268 | 	// sort them
269 | 	sort.Slice(keys, func(i, j int) bool {
270 | 		return keys[i] < keys[j]
271 | 	})
272 | 
273 | 	// counts
274 | 	eqCount := 0
275 | 	diffCount := 0
276 | 	f1UniqCount := 0
277 | 	f2UniqCount := 0
278 | 
279 | 	// Now range of combined unique keys
280 | 	for n := range keys {
281 | 		val := keys[n]
282 | 		row1, ok1 := f1map[val]
283 | 		row2, ok2 := f2map[val]
284 | 		if ok1 && ok2 {
285 | 			// are all the row values the same?
286 | 			diffList := make([]int, 0)
287 | 			for i := range row1 {
288 | 				if row1[i] == row2[i] {
289 | 					continue
290 | 				}
291 | 				if *ignoreCase {
292 | 					if strings.EqualFold(row1[i], row2[i]) {
293 | 						continue
294 | 					}
295 | 				}
296 | 				f := i - 1
297 | 				diffList = append(diffList, f)
298 | 			}
299 | 			if len(diffList) == 0 {
300 | 				eqCount++
301 | 				if *noeq {
302 | 					continue
303 | 				}
304 | 				outrow1 := make([]string, 0)
305 | 				outrow1 = append(outrow1, "EQ")
306 | 				outrow1 = append(outrow1, row1...)
307 | 				err := wf1.Write(outrow1)
308 | 				if err != nil {
309 | 					log.Fatalf("Output Write() Error: %v\n", err)
310 | 				}
311 | 			} else {
312 | 				diffCount++
313 | 				diffs := ""
314 | 				for i := range diffList {
315 | 					diffs += fmt.Sprintf("%v,", diffList[i]+2)
316 | 				}
317 | 				diffs = strings.TrimRight(diffs, ",")
318 | 				outrow1 := make([]string, 0)
319 | 				outrow1 = append(outrow1, fmt.Sprintf("%v=%v", *alias1, diffs))
320 | 				outrow1 = append(outrow1, row1...)
321 | 				err := wf1.Write(outrow1)
322 | 				if err != nil {
323 | 					log.Fatalf("Output Write() Error: %v\n", err)
324 | 				}
325 | 				outrow2 := make([]string, 0)
326 | 				outrow2 = append(outrow2, fmt.Sprintf("%v=%v", *alias2, diffs))
327 | 				outrow2 = append(outrow2, row2...)
328 | 				err = wf1.Write(outrow2)
329 | 				if err != nil {
330 | 					log.Fatalf("Output Write() Error: %v\n", err)
331 | 				}
332 | 			}
333 | 		} else {
334 | 			if !ok1 {
335 | 				f2UniqCount++
336 | 				outrow := make([]string, 0)
337 | 				outrow = append(outrow, fmt.Sprintf("IN=%v", *alias2))
338 | 				outrow = append(outrow, row2...)
339 | 				err := wf1.Write(outrow)
340 | 				if err != nil {
341 | 					log.Fatalf("Output Write() Error: %v\n", err)
342 | 				}
343 | 			} else {
344 | 				f1UniqCount++
345 | 				outrow := make([]string, 0)
346 | 				outrow = append(outrow, fmt.Sprintf("IN=%v", *alias1))
347 | 				outrow = append(outrow, row1...)
348 | 				err := wf1.Write(outrow)
349 | 				if err != nil {
350 | 					log.Fatalf("Output Write() Error: %v\n", err)
351 | 				}
352 | 			}
353 | 		}
354 | 
355 | 	}
356 | 	wf1.Flush()
357 | 
358 | 	// wrapup
359 | 	stop := time.Now()
360 | 	elapsed := time.Since(now)
361 | 	log.Printf("End: %v", stop.Format(time.StampMilli))
362 | 	log.Printf("Elapsed time %v", elapsed)
363 | 
364 | 	log.Printf("------- Summary -------\n")
365 | 	log.Printf("Equal Count: %v\n", eqCount)
366 | 	log.Printf("Key Diff Count: %v\n", diffCount)
367 | 	log.Printf("Unique to input #1 %v: %v\n", *alias1,f1UniqCount)
368 | 	log.Printf("Unique to input #2 %v: %v\n", *alias2,f2UniqCount)
369 | 
370 | }
371 | 
372 | func usage(msg string) {
373 | 	fmt.Println(msg)
374 | 	fmt.Print("Usage: diffcsv [options]\n")
375 | 	flag.PrintDefaults()
376 | 	if msg == "" {
377 | 		fmt.Println(detailedHelp)
378 | 	}
379 | 	os.Exit(0)
380 | }
381 | 


--------------------------------------------------------------------------------
/diffcsv/input1.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | X,1,1
3 | Y,2,2
4 | Z,3,3


--------------------------------------------------------------------------------
/diffcsv/input2.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | X,1,1
3 | Y,2,2
4 | W,3,3


--------------------------------------------------------------------------------
/diffcsv/input3.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | X,1,1
3 | Y,2,2
4 | Z,9,9


--------------------------------------------------------------------------------
/diffcsv/input4.csv:
--------------------------------------------------------------------------------
1 | A,B,D
2 | X,1,1
3 | Y,2,2
4 | Z,9,9


--------------------------------------------------------------------------------
/diffcsv/input5.csv:
--------------------------------------------------------------------------------
1 | A,B,C,D
2 | X,1,1,1
3 | Y,2,2,2
4 | Z,9,9,9


--------------------------------------------------------------------------------
/diffcsv/input6.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | X,1,1
3 | Y,2,2
4 | Z,9,9
5 | X,1,2
6 | X,3,4
7 | 


--------------------------------------------------------------------------------
/diffcsv/input7.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | X,1,1
3 | Y,2,3
4 | W,3,3


--------------------------------------------------------------------------------
/diffcsv/input8.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | x,1,1
3 |  Y ,2,3
4 |  w ,3,3


--------------------------------------------------------------------------------
/diffcsv/test1.csv:
--------------------------------------------------------------------------------
1 | STATUS,A,B,C
2 | EQ,X,1,1
3 | EQ,Y,2,2
4 | EQ,Z,3,3
5 | 


--------------------------------------------------------------------------------
/diffcsv/test2.csv:
--------------------------------------------------------------------------------
1 | STATUS,A,B,C
2 | IN=2,W,3,3
3 | EQ,X,1,1
4 | DF1=3,Y,2,2
5 | DF2=3,Y,2,3
6 | IN=1,Z,3,3
7 | 


--------------------------------------------------------------------------------
/diffcsv/test3.csv:
--------------------------------------------------------------------------------
1 | STATUS,1-A,2-B,3-C
2 | "i1=2,3",Z,3,3
3 | "i2=2,3",Z,9,9
4 | 


--------------------------------------------------------------------------------
/diffcsv/test6.csv:
--------------------------------------------------------------------------------
1 | STATUS,A,B,C
2 | "DF1=2,3",X,1,1
3 | "DF2=2,3",X,3,4
4 | EQ,Y,2,2
5 | "DF1=2,3",Z,3,3
6 | "DF2=2,3",Z,9,9
7 | 


--------------------------------------------------------------------------------
/diffcsv/test7.csv:
--------------------------------------------------------------------------------
1 | STATUS,A,B,C
2 | IN=f1,W,3,3
3 | IN=f1,X,1,1
4 | EQ,Y,2,3
5 | IN=f2,w,3,3
6 | IN=f2,x,1,1
7 | 


--------------------------------------------------------------------------------
/editcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Editcsv
 2 | This utility will edit a CSV and either update inline or add update 
 3 | as a new column.
 4 | 
 5 | Use -help to show:
 6 | ```
 7 | $ editcsv -help
 8 | Help Message
 9 | 
10 | Usage: editcsv [options] input.csv output.csv
11 |   -add
12 |     	Add replace string as a new column; default, replace in-place
13 |   -addHeader string
14 |     	Header to use for added column (default "ADDED")
15 |   -c string
16 |     	Range spec for columns
17 |   -headers
18 |     	CSV has headers (default true)
19 |   -help
20 |     	Show help message
21 |   -i string
22 |     	Input CSV filename; default STDIN
23 |   -keep
24 |     	Keep CSV headers on output (default true)
25 |   -o string
26 |     	Output CSV filename; default STDOUT
27 |   -pattern string
28 |     	Search pattern
29 |   -replace string
30 |     	Regexp replace expression
31 | ```
32 | 
33 | ## Examples
34 | Put an "x-" in front of any cell value beginning with the letter "a".
35 | ```
36 | $ cat test1.csv
37 | A,B,C
38 | abc,def,Army
39 | one,two,Navy
40 | go,abacus,Marine
41 | Android,Ubuntu,Linux
42 | $ go run editcsv.go -pattern "^(a)" -replace "x-$1" < test1.csv
43 | A,B,C
44 | x-bc,def,Army
45 | one,two,Navy
46 | go,x-bacus,Marine
47 | Android,Ubuntu,Linux
48 | ```
49 | Replace matches with a constant value, in this case "--elided--".
50 | ```
51 | $ go run editcsv.go -pattern "^.*y$" -replace "--elided--" < test1.csv
52 | A,B,C
53 | abc,def,--elided--
54 | one,two,--elided--
55 | go,abacus,Marine
56 | Android,Ubuntu,Linux
57 | ```
58 | Replace matches (cell values in column 2 only) that end in letter "o",
59 | adding a new column named "final" for the updated column 2.
60 | ```
61 | $ editcsv -pattern "^.*o$" -replace "--elided--" -c 2 -add=true -addHeader "final" < test1.csv
62 | A,B,C,final
63 | abc,def,Army,def
64 | one,two,Navy,--elided--
65 | go,abacus,Marine,abacus
66 | Android,Ubuntu,Linux,Ubuntu
67 | ```
68 | 


--------------------------------------------------------------------------------
/editcsv/editcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 	"regexp"
 11 | 
 12 | 	"github.com/mandolyte/csv-utils"
 13 | )
 14 | 
 15 | var cs *rangespec.RangeSpec
 16 | var re *regexp.Regexp
 17 | 
 18 | func main() {
 19 | 	pattern := flag.String("pattern", "", "Search pattern")
 20 | 	replace := flag.String("replace", "", "Regexp replace expression")
 21 | 	addHdr := flag.String("addHeader", "ADDED", "Header to use for added column")
 22 | 	cols := flag.String("c", "", "Range spec for columns")
 23 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 24 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 25 | 	headers := flag.Bool("headers", true, "CSV has headers")
 26 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 27 | 	help := flag.Bool("help", false, "Show help message")
 28 | 	add := flag.Bool("add", false, "Add replace string as a new column; default, replace in-place")
 29 | 	flag.Parse()
 30 | 
 31 | 	if *help {
 32 | 		usage("Help Message")
 33 | 		os.Exit(0)
 34 | 	}
 35 | 
 36 | 	/* check parameters */
 37 | 	if *replace == "" {
 38 | 		usage("Required: Missing replace expression")
 39 | 		os.Exit(0)
 40 | 	}
 41 | 
 42 | 	if *pattern == "" {
 43 | 		usage("Required: Missing search expression")
 44 | 		os.Exit(0)
 45 | 	}
 46 | 	re = regexp.MustCompile(*pattern)
 47 | 
 48 | 	if *cols != "" {
 49 | 		var cserr error
 50 | 		cs, cserr = rangespec.New(*cols)
 51 | 		if cserr != nil {
 52 | 			log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr)
 53 | 		}
 54 | 	}
 55 | 
 56 | 	if *keep {
 57 | 		if !*headers {
 58 | 			log.Fatal("Cannot keep headers you don't have!")
 59 | 		}
 60 | 	}
 61 | 	// open output file
 62 | 	var w *csv.Writer
 63 | 	if *output == "" {
 64 | 		w = csv.NewWriter(os.Stdout)
 65 | 	} else {
 66 | 		fo, foerr := os.Create(*output)
 67 | 		if foerr != nil {
 68 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 69 | 		}
 70 | 		defer fo.Close()
 71 | 		w = csv.NewWriter(fo)
 72 | 	}
 73 | 
 74 | 	// open input file
 75 | 	var r *csv.Reader
 76 | 	if *input == "" {
 77 | 		r = csv.NewReader(os.Stdin)
 78 | 	} else {
 79 | 		fi, fierr := os.Open(*input)
 80 | 		if fierr != nil {
 81 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 82 | 		}
 83 | 		defer fi.Close()
 84 | 		r = csv.NewReader(fi)
 85 | 	}
 86 | 
 87 | 	// ignore expectations of fields per row
 88 | 	r.FieldsPerRecord = -1
 89 | 
 90 | 	// read loop for CSV
 91 | 	var row uint64
 92 | 	for {
 93 | 		// read the csv file
 94 | 		cells, rerr := r.Read()
 95 | 		if rerr == io.EOF {
 96 | 			break
 97 | 		}
 98 | 		if rerr != nil {
 99 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
100 | 		}
101 | 		if (row == 0) && *headers && *keep {
102 | 			row = 1
103 | 			if *add {
104 | 				cells = append(cells, *addHdr)
105 | 			}
106 | 			err := w.Write(cells)
107 | 			if err != nil {
108 | 				log.Fatalf("csv.Write:\n%v\n", err)
109 | 			}
110 | 			continue
111 | 		}
112 | 		row++
113 | 		// test row/columns for a match
114 | 		err := w.Write(patternMatches(cells, re, *replace, *add))
115 | 		if err != nil {
116 | 			log.Fatalf("csv.Write:\n%v\n", err)
117 | 		}
118 | 	}
119 | 	w.Flush()
120 | }
121 | 
122 | func patternMatches(c []string, re *regexp.Regexp, replace string, add bool) []string {
123 | 	for n := range c {
124 | 		if cs == nil {
125 | 			newstring := re.ReplaceAllString(c[n], replace)
126 | 			if add {
127 | 				c = append(c, newstring)
128 | 			} else {
129 | 				c[n] = newstring
130 | 			}
131 | 		} else {
132 | 			if cs.InRange(uint64(n + 1)) {
133 | 				newstring := re.ReplaceAllString(c[n], replace)
134 | 				if add {
135 | 					c = append(c, newstring)
136 | 				} else {
137 | 					c[n] = newstring
138 | 				}
139 | 			}
140 | 		}
141 | 	}
142 | 	return c
143 | }
144 | 
145 | func usage(msg string) {
146 | 	fmt.Println(msg + "\n")
147 | 	fmt.Print("Usage: editcsv [options] input.csv output.csv\n")
148 | 	flag.PrintDefaults()
149 | }
150 | 


--------------------------------------------------------------------------------
/editcsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | abc,def,Army
3 | one,two,Navy
4 | go,abacus,Marine
5 | Android,Ubuntu,Linux
6 | 


--------------------------------------------------------------------------------
/obfuscatecsv/README.md:
--------------------------------------------------------------------------------
 1 | # Obfuscatecsv
 2 | *Notes*
 3 | 1. If mulitple columns have the same data, then they will be obfuscated to the same value to preserve identity of same value in two columns
 4 | 2. The "prefix" is required and recommended to be something that is related to the data. For example, if names are being obfuscated, then use "name" as the prefix.
 5 | 3. The sequences are simply the row and column of the first occurence of the value. That gives you a way to work backward if you need to.
 6 | 4. The default delimiter between the row and column sequence number is a dash. If no delimiter is desired just use "" as shown below.
 7 | 
 8 | Use -help to show:
 9 | ```
10 | $ obfuscatecsv -help
11 | Help Message
12 | 
13 | Usage: obfuscatecsv [options]
14 |   -c string
15 |     	Range spec for columns to obfuscate
16 |   -d string
17 |     	Delimiter for sequences (default "-")
18 |   -headers
19 |     	CSV has headers (default true)
20 |   -help
21 |     	Show help message
22 |   -i string
23 |     	Input CSV filename; default STDIN
24 |   -keep
25 |     	Keep CSV headers on output (default true)
26 |   -o string
27 |     	Output CSV filename; default STDOUT
28 |   -prefix string
29 |     	Prefix for obfuscator value
30 | $
31 | ```
32 | 
33 | # Examples
34 | Obfuscate first two columns:
35 | ```
36 | $ cat test1.csv
37 | A,B,C
38 | abc,def,Army
39 | def,abc,Navy
40 | ijk,abc,Navy
41 | zyz,def,Army
42 | abc,abc,AF
43 | $ obfuscatecsv -i test1.csv -prefix XT -c 1,2
44 | A,B,C
45 | XT2-0,XT2-1,Army
46 | XT2-1,XT2-0,Navy
47 | XT4-0,XT2-0,Navy
48 | XT5-0,XT2-1,Army
49 | XT2-0,XT2-0,AF
50 | ```
51 | Chained/piped example that obfuscates all the columns, but with 
52 | different prefixes.
53 | ```
54 | $ obfuscatecsv -i test1.csv -prefix XT -c 1,2 -d "" | obfuscatecsv -prefix DOD -c 3
55 | A,B,C
56 | XT20,XT21,DOD2-2
57 | XT21,XT20,DOD3-2
58 | XT40,XT20,DOD3-2
59 | XT50,XT21,DOD2-2
60 | XT20,XT20,DOD6-2
61 | $
62 | ```
63 | 


--------------------------------------------------------------------------------
/obfuscatecsv/obfuscatecsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 
 11 | 	"github.com/mandolyte/csv-utils"
 12 | )
 13 | 
 14 | var cs *rangespec.RangeSpec
 15 | 
 16 | func main() {
 17 | 	prefix := flag.String("prefix", "", "Prefix for obfuscator value")
 18 | 	cols := flag.String("c", "", "Range spec for columns to obfuscate")
 19 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 20 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 21 | 	headers := flag.Bool("headers", true, "CSV has headers")
 22 | 	delimiter := flag.String("d", "-", "Delimiter for sequences")
 23 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 24 | 	help := flag.Bool("help", false, "Show help message")
 25 | 	flag.Parse()
 26 | 
 27 | 	if *help {
 28 | 		usage("Help Message")
 29 | 		os.Exit(0)
 30 | 	}
 31 | 
 32 | 	/* check parameters */
 33 | 	if *prefix == "" {
 34 | 		usage("Required: Missing prefix for obfuscation value")
 35 | 		os.Exit(0)
 36 | 	}
 37 | 
 38 | 	if *cols != "" {
 39 | 		var cserr error
 40 | 		cs, cserr = rangespec.New(*cols)
 41 | 		if cserr != nil {
 42 | 			log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr)
 43 | 		}
 44 | 	}
 45 | 
 46 | 	if *keep {
 47 | 		if !*headers {
 48 | 			log.Fatal("Cannot keep headers you don't have!")
 49 | 		}
 50 | 	}
 51 | 	// open output file
 52 | 	var w *csv.Writer
 53 | 	if *output == "" {
 54 | 		w = csv.NewWriter(os.Stdout)
 55 | 	} else {
 56 | 		fo, foerr := os.Create(*output)
 57 | 		if foerr != nil {
 58 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 59 | 		}
 60 | 		defer fo.Close()
 61 | 		w = csv.NewWriter(fo)
 62 | 	}
 63 | 
 64 | 	// open input file
 65 | 	var r *csv.Reader
 66 | 	if *input == "" {
 67 | 		r = csv.NewReader(os.Stdin)
 68 | 	} else {
 69 | 		fi, fierr := os.Open(*input)
 70 | 		if fierr != nil {
 71 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 72 | 		}
 73 | 		defer fi.Close()
 74 | 		r = csv.NewReader(fi)
 75 | 	}
 76 | 
 77 | 	// ignore expectations of fields per row
 78 | 	r.FieldsPerRecord = -1
 79 | 
 80 | 	// Create value map to store mapping between
 81 | 	// original values and obfuscated values
 82 | 	valmap := make(map[string]string)
 83 | 
 84 | 	// read loop for CSV
 85 | 	var row uint64
 86 | 	for {
 87 | 		// read the csv file
 88 | 		cells, rerr := r.Read()
 89 | 		if rerr == io.EOF {
 90 | 			break
 91 | 		}
 92 | 		if rerr != nil {
 93 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
 94 | 		}
 95 | 		if (row == 0) && *headers && *keep {
 96 | 			row = 1
 97 | 			err := w.Write(cells)
 98 | 			if err != nil {
 99 | 				log.Fatalf("csv.Write:\n%v\n", err)
100 | 			}
101 | 			continue
102 | 		}
103 | 		row++
104 | 		// test row/columns for a match
105 | 		//process(cells, *prefix, valmap, row, *width)
106 | 		for n, v := range cells {
107 | 			if cs.InRange(uint64(n + 1)) {
108 | 				obsv, ok := valmap[v]
109 | 				if ok {
110 | 					cells[n] = obsv
111 | 				} else {
112 | 					valmap[v] = fmt.Sprintf("%s%d%s%d", *prefix, row, *delimiter, n)
113 | 					cells[n] = valmap[v]
114 | 				}
115 | 			}
116 | 		}
117 | 		err := w.Write(cells)
118 | 		if err != nil {
119 | 			log.Fatalf("csv.Write:\n%v\n", err)
120 | 		}
121 | 	}
122 | 	w.Flush()
123 | }
124 | 
125 | /*
126 | func process(c []string, pf string, vm map[string]string, r uint64, w int) {
127 | 
128 | }
129 | */
130 | func usage(msg string) {
131 | 	fmt.Println(msg + "\n")
132 | 	fmt.Print("Usage: obfuscatecsv [options]\n")
133 | 	flag.PrintDefaults()
134 | }
135 | 


--------------------------------------------------------------------------------
/obfuscatecsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | abc,def,Army
3 | def,abc,Navy
4 | ijk,abc,Navy
5 | zyz,def,Army
6 | abc,abc,AF
7 | 


--------------------------------------------------------------------------------
/pivotcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Pivotcsv
 2 | Use -help to show:
 3 | ```
 4 | $ pivotcsv -help
 5 |   -c int
 6 |     	Column to pivot (REQUIRED)
 7 |   -headers
 8 |     	CSV must have headers; cannot be false (default true)
 9 |   -help
10 |     	Show help message
11 |   -i string
12 |     	CSV file name to pivot; default STDIN
13 |   -nf string
14 |     	Format to use for numbers (default "%v")
15 |   -nv string
16 |     	String to signal novalue; default is empty string
17 |   -o string
18 |     	CSV output file name; default STDOUT
19 |   -on
20 |     	Only consider numeric data and sum them (default true)
21 |   -os
22 |     	Consider data as strings and concatenate
23 |   -s int
24 |     	Column to sum/concat (REQUIRED)
25 |   -sd string
26 |     	Concatenation delimiter; default is comma (default ",")
27 | ```
28 | ## Examples
29 | ```
30 | $ cat test1.csv
31 | A,B,C,D,E,F
32 | a1,b1,c1,d1,X,1
33 | a2,b2,c2,d2,Y,3
34 | a1,b1,c1,d1,X,3
35 | a2,b2,c2,d2,Y,3
36 | $ go run pivotcsv.go -i test1.csv -c 5 -s 6
37 | A,B,C,D,X,Y
38 | a1,b1,c1,d1,4,
39 | a2,b2,c2,d2,,6
40 | 
41 | $ go run pivotcsv.go -i test1.csv -c 1 -s 2 -os
42 | a1,a2,C,D,E,F
43 | b1,,X,1
44 | b1,,X,3
45 | ,"b2,b2",Y,3
46 | $ cat test1.csv
47 | A,B,C,D,E,F
48 | a1,b1,c1,d1,X,1
49 | a2,b2,c2,d2,Y,3
50 | a1,b1,c1,d1,X,3
51 | a2,b2,c2,d2,Y,3
52 | 
53 | $ cat test2.csv
54 | A,B,C,D,E,F
55 | a1,b1,c1,d1,X,1
56 | a2,b2,c2,d2,X,3
57 | a1,b1,c1,d1,X,3
58 | a2,b2,c2,d2,X,3
59 | a1,b1,c1,d1,Y,2
60 | a2,b2,c2,d2,Y,4
61 | a1,b1,c1,d1,Y,4
62 | a2,b2,c2,d2,Y,4
63 | a1,b1,c1,d1,Z,3
64 | a2,b2,c2,d2,Z,5
65 | a1,b1,c1,d1,Z,5
66 | a2,b2,c2,d2,Z,5
67 | $ go run pivotcsv.go -c 5 -s 6 < test2.csv
68 | A,B,C,D,X,Y,Z
69 | a1,b1,c1,d1,4,6,8
70 | a2,b2,c2,d2,6,8,10
71 | $
72 | 
73 | ```
74 | 


--------------------------------------------------------------------------------
/pivotcsv/pivotcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/csv"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"log"
 10 | 	"os"
 11 | 	"sort"
 12 | 	"strconv"
 13 | 	"strings"
 14 | )
 15 | 
 16 | type table struct {
 17 | 	records                 [][]string
 18 | 	pivotcol, pivotcolcount int
 19 | }
 20 | 
 21 | func (t *table) Len() int {
 22 | 	return len(t.records)
 23 | }
 24 | 
 25 | func (t *table) Swap(i, j int) {
 26 | 	t.records[i], t.records[j] = t.records[j], t.records[i]
 27 | }
 28 | 
 29 | func (t *table) Less(i, j int) bool {
 30 | 	for n := range t.records[i] {
 31 | 		if n >= t.pivotcol && n < (t.pivotcol+t.pivotcolcount) {
 32 | 			continue
 33 | 		}
 34 | 		if t.records[i][n] < t.records[j][n] {
 35 | 			return true
 36 | 		}
 37 | 	}
 38 | 	return false
 39 | }
 40 | 
 41 | func main() {
 42 | 	pivotcol := flag.Int("c", 0, "Column to pivot (REQUIRED)")
 43 | 	pivotsum := flag.Int("s", 0, "Column to sum/concat (REQUIRED)")
 44 | 	pivotinf := flag.String("i", "", "CSV file name to pivot; default STDIN")
 45 | 	pivotout := flag.String("o", "", "CSV output file name; default STDOUT")
 46 | 	headers := flag.Bool("headers", true, "CSV must have headers; cannot be false")
 47 | 	help := flag.Bool("help", false, "Show help message")
 48 | 	novalue := flag.String("nv", "", "String to signal novalue; default is empty string")
 49 | 	numformat := flag.String("nf", "%v", "Format to use for numbers")
 50 | 	onlynum := flag.Bool("on", true, "Only consider numeric data and sum them")
 51 | 	onlystr := flag.Bool("os", false, "Consider data as strings and concatenate")
 52 | 	strdlm := flag.String("sd", ",", "Concatenation delimiter; default is comma")
 53 | 	flag.Parse()
 54 | 
 55 | 	if *help {
 56 | 		usage("")
 57 | 		os.Exit(0)
 58 | 	}
 59 | 
 60 | 	if len(flag.Args()) > 0 {
 61 | 		usage("Arguments provided when none expected")
 62 | 		os.Exit(1)
 63 | 	}
 64 | 
 65 | 	if *pivotcol == 0 {
 66 | 		usage("Pivot column number must greater than zero")
 67 | 		os.Exit(0)
 68 | 	}
 69 | 
 70 | 	if *pivotsum == 0 {
 71 | 		usage("Pivot sum column number must greater than zero")
 72 | 		os.Exit(0)
 73 | 	}
 74 | 
 75 | 	if !*headers {
 76 | 		usage("Headers are required; add them before using")
 77 | 		os.Exit(0)
 78 | 	}
 79 | 
 80 | 	if *onlystr {
 81 | 		*onlynum = false
 82 | 	}
 83 | 
 84 | 	// open output file
 85 | 	var w *csv.Writer
 86 | 	if *pivotout == "" {
 87 | 		w = csv.NewWriter(os.Stdout)
 88 | 	} else {
 89 | 		fo, foerr := os.Create(*pivotout)
 90 | 		if foerr != nil {
 91 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 92 | 		}
 93 | 		defer fo.Close()
 94 | 		w = csv.NewWriter(fo)
 95 | 		defer w.Flush()
 96 | 	}
 97 | 
 98 | 	// open input file
 99 | 	var r *csv.Reader
100 | 	if *pivotinf == "" {
101 | 		r = csv.NewReader(os.Stdin)
102 | 	} else {
103 | 		fi, fierr := os.Open(*pivotinf)
104 | 		if fierr != nil {
105 | 			log.Fatal("os.Open() Error:" + fierr.Error())
106 | 		}
107 | 		defer fi.Close()
108 | 		r = csv.NewReader(fi)
109 | 	}
110 | 
111 | 	// ignore expectations of fields per row
112 | 	r.FieldsPerRecord = -1
113 | 
114 | 	// read into memory
115 | 	csvall, raerr := r.ReadAll()
116 | 	if raerr != nil {
117 | 		log.Fatal("r.ReadAll() Error:" + raerr.Error())
118 | 	}
119 | 
120 | 	// analyze the pivot column
121 | 	// a. get list of distinct values
122 | 	// b. use this to calculate width of new CSV table
123 | 	var row int
124 | 	pivotHdrs := make(map[string]int)
125 | 	for n := range csvall {
126 | 		if row == 0 {
127 | 			row++
128 | 			continue
129 | 		}
130 | 		pivotHdrs[csvall[n][*pivotcol-1]]++
131 | 	}
132 | 	//log.Printf("Number of pivot headers:%v", len(pivotHdrs))
133 | 
134 | 	// sort the new pivot headers
135 | 	var phkeys []string
136 | 	for phk := range pivotHdrs {
137 | 		phkeys = append(phkeys, phk)
138 | 	}
139 | 	sort.Strings(phkeys)
140 | 
141 | 	// I have enough to make the new header row now!
142 | 	// make the output slice table
143 | 	var orecs [][]string
144 | 
145 | 	// let's create the header row by:
146 | 	// a. appending to a slice the non pivot and sum columns
147 | 	// b. append the phkeys from above
148 | 	var hdrrow []string
149 | 	for n, v := range csvall[0] {
150 | 		if n+1 == *pivotcol {
151 | 			// insert here the new pivot headers
152 | 			for _, w := range phkeys {
153 | 				hdrrow = append(hdrrow, w)
154 | 			}
155 | 			continue
156 | 		}
157 | 		if n+1 == *pivotsum {
158 | 			continue
159 | 		}
160 | 		hdrrow = append(hdrrow, v)
161 | 	}
162 | 	// now make the headers the first append to the table
163 | 	orecs = append(orecs, hdrrow)
164 | 
165 | 	// idea:
166 | 	// create a key based on all columns EXCEPT pivot and sum
167 | 	// by letting csv package write (reduced) row to a string buffer
168 | 	// use this as a map key
169 | 
170 | 	// the value for the map would be a slice of type struct:
171 | 	// type sumconcat struct {
172 | 	//   float64 -- to sum up numbers
173 | 	//   []string -- to collect non-numbers
174 | 	// }
175 | 	// the slice would be one per pivot column header value
176 | 	// or maybe a map also with header value as key and struct as value
177 | 	type sumconcat struct {
178 | 		sumnum float64
179 | 		sumstr []string
180 | 		ncount uint64
181 | 	}
182 | 	pivot := make(map[string](map[string]*sumconcat))
183 | 	for _, v := range csvall[1:] {
184 | 		//
185 | 		// step 1. create the []string for the key
186 | 		//
187 | 		var tmp []string
188 | 		for x, y := range v {
189 | 			// skip the pivot and sum columns
190 | 			if x+1 == *pivotcol {
191 | 				continue
192 | 			}
193 | 			if x+1 == *pivotsum {
194 | 				continue
195 | 			}
196 | 			tmp = append(tmp, y)
197 | 		}
198 | 		//
199 | 		// step 2. let CSV package create the key
200 | 		//
201 | 		var b bytes.Buffer
202 | 		w2 := csv.NewWriter(&b)
203 | 		err := w2.Write(tmp)
204 | 		w2.Flush()
205 | 		if err != nil {
206 | 			log.Fatal("w2.Write() Error:" + err.Error())
207 | 		}
208 | 		skey := b.String()
209 | 
210 | 		// step 3. update key value
211 | 		mapsc, ok := pivot[skey]
212 | 		//fmt.Printf("Summing:%v\n", v[*pivotsum-1])
213 | 		//fmt.Printf("Pivot col value is:%v\n", v[*pivotcol-1])
214 | 		if ok {
215 | 			// if key exists already in the pivot map
216 | 			// update the values and continue
217 | 			// try to convert pivotsum column value to a float64
218 | 			/*
219 | 				fmt.Print("Working on map:\n")
220 | 
221 | 				for debugk, debugv := range mapsc {
222 | 					fmt.Printf("Key: %v -- Val: %v\n", debugk, debugv)
223 | 				}
224 | 			*/
225 | 			if *onlynum {
226 | 				if f, err := strconv.ParseFloat(v[*pivotsum-1], 64); err == nil {
227 | 					tmpsc, ok := mapsc[v[*pivotcol-1]]
228 | 					if ok {
229 | 						mapsc[v[*pivotcol-1]].sumnum += f
230 | 						mapsc[v[*pivotcol-1]].ncount++
231 | 					} else {
232 | 						tmpsc = new(sumconcat)
233 | 						tmpsc.sumnum = f
234 | 						tmpsc.ncount++
235 | 						mapsc[v[*pivotcol-1]] = tmpsc
236 | 					}
237 | 				}
238 | 			} else {
239 | 				tmpsc, ok := mapsc[v[*pivotcol-1]]
240 | 				if ok {
241 | 					mapsc[v[*pivotcol-1]].sumstr =
242 | 						append(mapsc[v[*pivotcol-1]].sumstr, v[*pivotsum-1])
243 | 				} else {
244 | 					tmpsc = new(sumconcat)
245 | 					tmpsc.sumstr = append(tmpsc.sumstr, v[*pivotsum-1])
246 | 					mapsc[v[*pivotcol-1]] = tmpsc
247 | 				}
248 | 			}
249 | 		} else {
250 | 			//
251 | 			// step 3b. fill out the struct val for map
252 | 			//
253 | 			sc := new(sumconcat)
254 | 			if *onlynum {
255 | 				if f, err := strconv.ParseFloat(v[*pivotsum-1], 64); err == nil {
256 | 					sc.sumnum = f
257 | 				}
258 | 			} else {
259 | 				sc.sumstr = append(sc.sumstr, v[*pivotsum-1])
260 | 			}
261 | 			tmpmap := make(map[string]*sumconcat)
262 | 			tmpmap[v[*pivotcol-1]] = sc
263 | 			pivot[skey] = tmpmap
264 | 		}
265 | 
266 | 	}
267 | 	csvall = nil
268 | 	// now create the output table
269 | 	for k, v := range pivot {
270 | 		// untangle the CSV formatted key using CSV package
271 | 		//fmt.Printf("Pivot Key is /%v/\n", k)
272 | 		b := bytes.NewBufferString(k)
273 | 		r := csv.NewReader(b)
274 | 		row, rerr := r.Read()
275 | 		if rerr != nil {
276 | 			if rerr != io.EOF {
277 | 				log.Fatal("r.Read Error:" + rerr.Error())
278 | 			}
279 | 		}
280 | 		// append to a new row, inserting pivot columns in correct spot
281 | 		var newrow []string
282 | 		for i := 0; i < *pivotcol-1; i++ {
283 | 			if i == (*pivotsum - 1) {
284 | 				continue
285 | 			}
286 | 			newrow = append(newrow, row[i])
287 | 		}
288 | 		// now for the pivot columns
289 | 		// use the sorted slice to pick them in sorted order
290 | 		for _, vsc := range phkeys {
291 | 			//fmt.Printf("phkey is /%v/\n", vsc)
292 | 			sc, ok := v[vsc]
293 | 			if ok {
294 | 				//fmt.Printf("Found v[vsc] /%v/\n", sc)
295 | 				if *onlynum {
296 | 					if sc.ncount == 0 {
297 | 						newrow = append(newrow, *novalue)
298 | 					} else {
299 | 						newrow = append(newrow, fmt.Sprintf(*numformat, sc.sumnum))
300 | 					}
301 | 				} else {
302 | 					newrow = append(newrow, strings.Join(sc.sumstr, *strdlm))
303 | 				}
304 | 			} else {
305 | 				//fmt.Printf("NOT Found v[vsc] /%v/\n", sc)
306 | 				// nothing for this header key - put out empty strings
307 | 				newrow = append(newrow, *novalue)
308 | 			}
309 | 		}
310 | 		// now append the rest of the columns after *pivotcol
311 | 		for i := *pivotcol; i < len(row); i++ {
312 | 			if i == (*pivotsum - 1) {
313 | 				continue
314 | 			}
315 | 			newrow = append(newrow, row[i])
316 | 		}
317 | 
318 | 		// append row to orecs table
319 | 		orecs = append(orecs, newrow)
320 | 	}
321 | 
322 | 	// write out the header row
323 | 	werr := w.Write(orecs[0])
324 | 	if werr != nil {
325 | 		log.Fatal("w.Write() Error:" + werr.Error())
326 | 	}
327 | 
328 | 	// now, let's sort the table
329 | 	tbl := &table{records: orecs[1:], pivotcol: *pivotcol - 1, pivotcolcount: len(phkeys)}
330 | 
331 | 	sort.Sort(tbl)
332 | 
333 | 	werr = w.WriteAll(tbl.records)
334 | 	if werr != nil {
335 | 		log.Fatal("w.WriteAll() Error:" + werr.Error())
336 | 	}
337 | }
338 | 
339 | func usage(msg string) {
340 | 	if msg != "" {
341 | 		fmt.Println(msg)
342 | 	}
343 | 	flag.PrintDefaults()
344 | }
345 | 


--------------------------------------------------------------------------------
/pivotcsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B,C,D,E,F
2 | a1,b1,c1,d1,X,1
3 | a2,b2,c2,d2,Y,3
4 | a1,b1,c1,d1,X,3
5 | a2,b2,c2,d2,Y,3
6 | 


--------------------------------------------------------------------------------
/pivotcsv/test2.csv:
--------------------------------------------------------------------------------
 1 | A,B,C,D,E,F
 2 | a1,b1,c1,d1,X,1
 3 | a2,b2,c2,d2,X,3
 4 | a1,b1,c1,d1,X,3
 5 | a2,b2,c2,d2,X,3
 6 | a1,b1,c1,d1,Y,2
 7 | a2,b2,c2,d2,Y,4
 8 | a1,b1,c1,d1,Y,4
 9 | a2,b2,c2,d2,Y,4
10 | a1,b1,c1,d1,Z,3
11 | a2,b2,c2,d2,Z,5
12 | a1,b1,c1,d1,Z,5
13 | a2,b2,c2,d2,Z,5
14 | 


--------------------------------------------------------------------------------
/rangespec.go:
--------------------------------------------------------------------------------
  1 | package rangespec
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strconv"
  7 | 	"strings"
  8 | )
  9 | 
 10 | // RangeSpec parses a range specification, such as:
 11 | // 1,3,5-8,12-
 12 | // It will return a slice of RangeSpec, being two ints,
 13 | // a start and a stop
 14 | // Ranges start at 1, not zero.
 15 | type RangeSpec struct {
 16 | 	pairs []pair
 17 | 	spec  string
 18 | 	Max   uint64
 19 | }
 20 | type pair struct {
 21 | 	start, stop uint64
 22 | }
 23 | 
 24 | // New takes a range specification string and
 25 | // returns a slice of RangeSpec structs
 26 | func New(r string) (*RangeSpec, error) {
 27 | 	// remove any whitespace as a convenience
 28 | 	r = strings.Replace(r, " ", "", -1)
 29 | 	ret := new(RangeSpec)
 30 | 	ret.pairs = make([]pair, 0)
 31 | 	ret.spec = r
 32 | 	tokens := strings.Split(r, ",")
 33 | 	for n, val := range tokens {
 34 | 		//fmt.Printf("Working on %v at %v\n", val, n)
 35 | 		// does val have a dash?
 36 | 		if strings.Contains(val, "-") {
 37 | 			// split on dash
 38 | 			ends := strings.Split(val, "-")
 39 | 			if len(ends) > 2 {
 40 | 				return nil, fmt.Errorf("RangeSpec: malformed specification:%v", val)
 41 | 			}
 42 | 			if ends[1] != "" {
 43 | 				//fmt.Print("ends[] greater than 1\n")
 44 | 				end1, err := strconv.ParseUint(ends[0], 10, 64)
 45 | 				if err != nil {
 46 | 					return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err)
 47 | 				}
 48 | 				end2, err := strconv.ParseUint(ends[1], 10, 64)
 49 | 				if err != nil {
 50 | 					return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[1], err)
 51 | 				}
 52 | 				var rs pair
 53 | 				rs.start = end1
 54 | 				rs.stop = end2
 55 | 				ret.pairs = append(ret.pairs, rs)
 56 | 			} else {
 57 | 				//fmt.Print("ends[] == 1\n")
 58 | 				if n+1 != len(tokens) {
 59 | 					return nil, fmt.Errorf("RangeSpec: open range must be last:%v", val)
 60 | 				}
 61 | 				end1, err := strconv.ParseUint(ends[0], 10, 64)
 62 | 				if err != nil {
 63 | 					return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err)
 64 | 				}
 65 | 				var rs pair
 66 | 				rs.start = end1
 67 | 				rs.stop = math.MaxUint64
 68 | 				ret.pairs = append(ret.pairs, rs)
 69 | 			}
 70 | 			continue
 71 | 		} else {
 72 | 			end1, err := strconv.ParseUint(val, 10, 64)
 73 | 			if err != nil {
 74 | 				return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", val, err)
 75 | 			}
 76 | 			var rs pair
 77 | 			rs.start = end1
 78 | 			rs.stop = end1
 79 | 			ret.pairs = append(ret.pairs, rs)
 80 | 		}
 81 | 	}
 82 | 	// ensure ascending specification
 83 | 	for i := 0; i < len(ret.pairs); i++ {
 84 | 		if i == 0 {
 85 | 			if ret.pairs[i].start == 0 {
 86 | 				return nil, fmt.Errorf("RangeSpec: range must be larger zero: %v", ret.pairs[i].start)
 87 | 			}
 88 | 		}
 89 | 		if ret.pairs[i].start > ret.pairs[i].stop {
 90 | 			return nil, fmt.Errorf("RangeSpec: start (%v) must be equal or less than stop (%v)", ret.pairs[i].start, ret.pairs[i].stop)
 91 | 		}
 92 | 		if i > 0 {
 93 | 			if ret.pairs[i].start <= ret.pairs[i-1].stop {
 94 | 				return nil, fmt.Errorf("RangeSpec: start (%v) must be greater than previous stop (%v)", ret.pairs[i].start, ret.pairs[i-1].stop)
 95 | 			}
 96 | 		}
 97 | 	}
 98 | 	// set the maximum row number
 99 | 	ret.Max = ret.pairs[len(ret.pairs)-1].stop
100 | 	return ret, nil
101 | }
102 | 
103 | // InRange will test whehter a number is in the range specification
104 | func (rs *RangeSpec) InRange(num uint64) bool {
105 | 	for _, val := range rs.pairs {
106 | 		if num >= val.start && num <= val.stop {
107 | 			return true
108 | 		}
109 | 	}
110 | 	return false
111 | }
112 | 


--------------------------------------------------------------------------------
/rangespec_test.go:
--------------------------------------------------------------------------------
 1 | package rangespec
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"math"
 6 | 	"fmt"
 7 | )
 8 | 
 9 | func TestRangeSpec(t *testing.T) {
10 |   input := " 1, 3, 5 - 8 , 12 - "
11 |   expected := []pair{{1,1},{3,3},{5,8},{12,math.MaxUint64}}
12 | 	result,err := New(input)
13 | 	if err != nil {
14 | 		t.Fatalf("[fail] expected no error for: %s, got %#v\n", input, err)
15 | 	}
16 | 
17 | 	for n := range result.pairs {
18 | 		if result.pairs[n].start != expected[n].start {
19 | 			t.Fatalf("[fail] Start[%v] should be: %v, got %v\n", n, result.pairs[n].start, expected[n].start)
20 | 		}
21 | 		if result.pairs[n].stop != expected[n].stop {
22 | 			t.Fatalf("[fail] Stop[%v] should be: %v, got %v\n", n, result.pairs[n].stop, expected[n].stop)
23 | 		}
24 | 	}
25 | 
26 | 	input = "1,3,5-,12-"
27 |   expected = nil
28 | 	result,err = New(input)
29 | 	if err == nil {
30 | 		t.Fatalf("[fail] expected error for invalid input: %s\n", input)
31 | 	}
32 | 	if result != nil {
33 | 		t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input)
34 | 	}
35 | 	fmt.Printf("Invalid error message for %v was:\n	%v\n\n",input,err)
36 | 
37 | 	input = "1,A,5-,12-"
38 |   expected = nil
39 | 	result,err = New(input)
40 | 	if err == nil {
41 | 		t.Fatalf("[fail] expected error for invalid input: %s\n", input)
42 | 	}
43 | 	if result != nil {
44 | 		t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input)
45 | 	}
46 | 	fmt.Printf("Invalid error message for %v was:\n	%v\n\n",input,err)
47 | 
48 | 	input = "1,5,3-,12-"
49 |   expected = nil
50 | 	result,err = New(input)
51 | 	if err == nil {
52 | 		t.Fatalf("[fail] expected error for invalid input: %s\n", input)
53 | 	}
54 | 	if result != nil {
55 | 		t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input)
56 | 	}
57 | 	fmt.Printf("Invalid error message for %v was:\n	%v\n\n",input,err)
58 | 
59 | 	input = "1,3,5-4,12-"
60 |   expected = nil
61 | 	result,err = New(input)
62 | 	if err == nil {
63 | 		t.Fatalf("[fail] expected error for invalid input: %s\n", input)
64 | 	}
65 | 	if result != nil {
66 | 		t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input)
67 | 	}
68 | 	fmt.Printf("Invalid error message for %v was:\n	%v\n\n",input,err)
69 | 
70 | 	input = "1,5,5-6,12-"
71 |   expected = nil
72 | 	result,err = New(input)
73 | 	if err == nil {
74 | 		t.Fatalf("[fail] expected error for invalid input: %s\n", input)
75 | 	}
76 | 	if result != nil {
77 | 		t.Fatalf("[fail] expected result to be nil for invalid input: %s\n", input)
78 | 	}
79 | 	fmt.Printf("Invalid error message for %v was:\n	%v\n\n",input,err)
80 | 
81 | 	input = " 1,3,5-8,12-"
82 | 	result,err = New(input)
83 | 	if err != nil {
84 | 		t.Fatalf("[fail] expected no error for: %s, got %#v\n", input, err)
85 | 	}
86 | 
87 | 	tests := []uint64{1,2,3,5,6,8,12,13,99}
88 | 	exptd := []bool{true,false,true,true,true,true,true,true,true}
89 | 	for n,x := range tests {
90 | 		if result.InRange(x) != exptd[n] {
91 | 			t.Fatalf("[fail] InRange error, range %v, for %v got %v\n", input, x, result.InRange(x))
92 | 		}
93 | 	}
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/recursecsv/README.md:
--------------------------------------------------------------------------------
 1 | # Recursecsv
 2 | *Notes*
 3 | 1. It will always output the normal hierarchical columns in this order: level, root, parent, child, path, and cycle (a Yes/No)
 4 | 2. Note defaults shown in the help message below
 5 | 3. At present it can only take two columns of data, the parent and child columns. If these have other associated values, they will have to be added back in to this output.
 6 | 4. The input must have column headers, since they are re-used in the output CSV.
 7 | 
 8 | 
 9 | Use -help to show:
10 | ```
11 | $ recursecsv -help
12 | Help Message
13 | 
14 |   -child int
15 |     	Child column; default 2 (default 2)
16 |   -delimiter string
17 |     	String for path delimiter (default ">")
18 |   -help
19 |     	Show usage message
20 |   -i string
21 |     	Input CSV filename; default STDIN
22 |   -o string
23 |     	Output CSV filename; default STDOUT
24 |   -parent int
25 |     	Parent column; default 1 (default 1)
26 |   -start string
27 |     	Start value of hierarchy
28 | ```
29 | 
30 | ## Examples
31 | Example with a cyclic condition.
32 | ```
33 | $ cat test1.csv
34 | parent,child
35 | A,X
36 | A,B
37 | B,C
38 | D,E
39 | C,D
40 | X,Y
41 | Y,B
42 | E,C
43 | $ recursecsv -i test1.csv -start A
44 | 2017/12/01 09:56:39 Start at 2017-12-01 14:56:39.064464694 +0000 UTC
45 | 2017/12/01 09:56:39 Data loaded and ready to start recursing
46 | 2017/12/01 09:56:39 Working on A
47 | 2017/12/01 09:56:39 . elasped 66.33µs
48 | 2017/12/01 09:56:39 End at 2017-12-01 14:56:39.087153217 +0000 UTC
49 | 2017/12/01 09:56:39 Elapsed time 22.688732ms
50 | Level,Root,parent,child,Path,Leaf,Cycle
51 | 1,A,A,B,>A>B>,No,No
52 | 2,A,B,C,>A>B>C>,No,No
53 | 3,A,C,D,>A>B>C>D>,No,No
54 | 4,A,D,E,>A>B>C>D>E>,No,No
55 | 5,A,E,C,>A>B>C>D>E>C>,No,Yes
56 | 1,A,A,X,>A>X>,No,No
57 | 2,A,X,Y,>A>X>Y>,No,No
58 | 3,A,Y,B,>A>X>Y>B>,No,No
59 | 4,A,B,C,>A>X>Y>B>C>,No,No
60 | 5,A,C,D,>A>X>Y>B>C>D>,No,No
61 | 6,A,D,E,>A>X>Y>B>C>D>E>,No,No
62 | 7,A,E,C,>A>X>Y>B>C>D>E>C>,No,Yes
63 | $ 
64 | ```
65 | Simple no cycle test.
66 | ```
67 | $ recursecsv -i test2.csv -start A
68 | 2017/12/01 09:58:39 Start at 2017-12-01 14:58:39.319162864 +0000 UTC
69 | 2017/12/01 09:58:39 Data loaded and ready to start recursing
70 | 2017/12/01 09:58:39 Working on A
71 | 2017/12/01 09:58:39 . elasped 87.756µs
72 | 2017/12/01 09:58:39 End at 2017-12-01 14:58:39.319813 +0000 UTC
73 | 2017/12/01 09:58:39 Elapsed time 650.482µs
74 | Level,Root,parent,child,Path,Leaf,Cycle
75 | 1,A,A,B,>A>B>,No,No
76 | 2,A,B,C,>A>B>C>,No,No
77 | 3,A,C,D,>A>B>C>D>,No,No
78 | 4,A,D,E,>A>B>C>D>E>,Yes,No
79 | ```
80 | 


--------------------------------------------------------------------------------
/recursecsv/recursecsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"encoding/csv"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"log"
 10 | 	"os"
 11 | 	"sort"
 12 | 	"strings"
 13 | 	"time"
 14 | )
 15 | 
 16 | var w *csv.Writer
 17 | 
 18 | var parent = flag.Int("parent", 1, "Parent column; default 1")
 19 | var child = flag.Int("child", 2, "Child column; default 2")
 20 | var start = flag.String("start", "", "Start value of hierarchy;\nif first letter is ampersand, use as a file with a list of values to process")
 21 | var delimiter = flag.String("delimiter", ">", "String for path delimiter")
 22 | var input = flag.String("i", "", "Input CSV filename; default STDIN")
 23 | var output = flag.String("o", "", "Output CSV filename; default STDOUT")
 24 | var headers = flag.Bool("headers", true, "Input CSV has headers")
 25 | var help = flag.Bool("help", false, "Show usage message")
 26 | var info = flag.Bool("info", true, "Show info messages during processing")
 27 | 
 28 | func main() {
 29 | 	flag.Parse()
 30 | 
 31 | 	if *help {
 32 | 		usage("Help Message")
 33 | 	}
 34 | 
 35 | 	if *start == "" {
 36 | 		usage("Start value is missing")
 37 | 	}
 38 | 	now := time.Now().UTC()
 39 | 	display(fmt.Sprintf("Start at %v", now))
 40 | 
 41 | 	var startvals []string
 42 | 	if strings.HasPrefix(*start, "@") {
 43 | 		f, ferr := os.Open((*start)[1:])
 44 | 		if ferr != nil {
 45 | 			log.Fatalf("os.Open() error on %v\n:%v", (*start)[1:], ferr)
 46 | 		}
 47 | 		defer f.Close()
 48 | 		scanner := bufio.NewScanner(f)
 49 | 		for scanner.Scan() {
 50 | 			startvals = append(startvals, scanner.Text())
 51 | 		}
 52 | 	} else {
 53 | 		startvals = append(startvals, *start)
 54 | 	}
 55 | 
 56 | 	// open output file
 57 | 	if *output == "" {
 58 | 		w = csv.NewWriter(os.Stdout)
 59 | 	} else {
 60 | 		fo, foerr := os.Create(*output)
 61 | 		if foerr != nil {
 62 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 63 | 		}
 64 | 		defer fo.Close()
 65 | 		w = csv.NewWriter(fo)
 66 | 	}
 67 | 
 68 | 	// open input file
 69 | 	var r *csv.Reader
 70 | 	if *input == "" {
 71 | 		r = csv.NewReader(os.Stdin)
 72 | 	} else {
 73 | 		fi, fierr := os.Open(*input)
 74 | 		if fierr != nil {
 75 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 76 | 		}
 77 | 		defer fi.Close()
 78 | 		r = csv.NewReader(fi)
 79 | 	}
 80 | 
 81 | 	// ignore expectations of fields per row
 82 | 	r.FieldsPerRecord = 2
 83 | 
 84 | 	// read loop for CSV to load into memory
 85 | 	var row uint64
 86 | 	pcol := *parent - 1
 87 | 	ccol := *child - 1
 88 | 	parents := make(map[string][]string)
 89 | 	for {
 90 | 		// read the csv file
 91 | 		cells, rerr := r.Read()
 92 | 		if rerr == io.EOF {
 93 | 			break
 94 | 		}
 95 | 		if rerr != nil {
 96 | 			log.Fatalf("csv.Read [row %v]:\n%v\n", row, rerr)
 97 | 		}
 98 | 		if row == 0 {
 99 | 			if *headers == false {
100 | 				recurseHeaders[2] = "Parent"
101 | 				recurseHeaders[3] = "Child"
102 | 			} else {
103 | 				recurseHeaders[2] = cells[pcol]
104 | 				recurseHeaders[3] = cells[ccol]
105 | 			}
106 | 			writeRow(recurseHeaders[0], recurseHeaders[1],
107 | 				recurseHeaders[2], recurseHeaders[3],
108 | 				recurseHeaders[4], recurseHeaders[5], recurseHeaders[6],
109 | 				true)
110 | 			row++
111 | 			continue
112 | 		}
113 | 		_, ok := parents[cells[pcol]]
114 | 		if ok {
115 | 			parents[cells[pcol]] = append(parents[cells[pcol]], cells[ccol])
116 | 		} else {
117 | 			parents[cells[pcol]] = make([]string, 0)
118 | 			parents[cells[pcol]] = append(parents[cells[pcol]], cells[ccol])
119 | 		}
120 | 		row++
121 | 	}
122 | 
123 | 	display("Data loaded and ready to start recursing")
124 | 	for _, v := range startvals {
125 | 		begin := time.Now().UTC()
126 | 		display(fmt.Sprintf("Working on %v", v))
127 | 		recurse(0, v, v, *delimiter+v, parents)
128 | 		display(fmt.Sprintf(". elasped %v", time.Since(begin)))
129 | 	}
130 | 	stop := time.Now().UTC()
131 | 	elapsed := time.Since(now)
132 | 	display(fmt.Sprintf("End at %v", stop))
133 | 	display(fmt.Sprintf("Elapsed time %v", elapsed))
134 | 	w.Flush()
135 | }
136 | 
137 | func recurse(level int, root, start, path string, parents map[string][]string) {
138 | 	// get value from map for start node
139 | 	//v, ok := parents[start]
140 | 	//if !ok {
141 | 	//	return // at a leaf node
142 | 	//}
143 | 
144 | 	// sort the children
145 | 	v := parents[start]
146 | 	sort.Strings(v)
147 | 
148 | 	level++ // increment depth
149 | 	for _, child := range v {
150 | 		looptest := *delimiter + child + *delimiter
151 | 		cycle := "No"
152 | 		if strings.Contains(path, looptest) {
153 | 			cycle = "Yes"
154 | 		}
155 | 		sLevel := fmt.Sprintf("%v", level)
156 | 		sPath := path + *delimiter + child
157 | 		leaf := "Yes"
158 | 		_, ok := parents[child]
159 | 		if ok {
160 | 			leaf = "No"
161 | 		}
162 | 		writeRow(sLevel, root, start, child, sPath, leaf, cycle, false)
163 | 		if cycle == "No" && ok {
164 | 			recurse(level, root, child, sPath, parents)
165 | 		}
166 | 	}
167 | 
168 | }
169 | 
170 | func writeRow(level, root, parent, child, path, leaf, cycle string, headerrow bool) {
171 | 	var cells []string
172 | 	cells = append(cells, level)
173 | 	cells = append(cells, root)
174 | 	cells = append(cells, parent)
175 | 	cells = append(cells, child)
176 | 	if headerrow {
177 | 		cells = append(cells, path)
178 | 	} else {
179 | 		cells = append(cells, path+*delimiter)
180 | 	}
181 | 	cells = append(cells, leaf)
182 | 	cells = append(cells, cycle)
183 | 
184 | 	err := w.Write(cells)
185 | 	if err != nil {
186 | 		log.Fatalf("csv.Write:\n%v\n", err)
187 | 	}
188 | 
189 | }
190 | 
191 | func usage(msg string) {
192 | 	fmt.Println(msg + "\n")
193 | 	flag.PrintDefaults()
194 | 	os.Exit(0)
195 | }
196 | 
197 | func display(msg string) {
198 | 	if *info {
199 | 		log.Print(msg + "\n")
200 | 	}
201 | }
202 | 
203 | var recurseHeaders []string
204 | 
205 | func init() {
206 | 	recurseHeaders = append(recurseHeaders,
207 | 		"Level", "Root", "", "", "Path", "Leaf", "Cycle")
208 | }
209 | 


--------------------------------------------------------------------------------
/recursecsv/test1.csv:
--------------------------------------------------------------------------------
 1 | parent,child
 2 | A,X
 3 | A,B
 4 | B,C
 5 | D,E
 6 | C,D
 7 | X,Y
 8 | Y,B
 9 | E,C
10 | 


--------------------------------------------------------------------------------
/recursecsv/test2.csv:
--------------------------------------------------------------------------------
1 | parent,child
2 | A,B
3 | B,C
4 | D,E
5 | C,D
6 | 


--------------------------------------------------------------------------------
/recursedata/recursedata.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"encoding/csv"
  6 | 	"encoding/json"
  7 | 	"flag"
  8 | 	"fmt"
  9 | 	"io"
 10 | 	"log"
 11 | 	"os"
 12 | 	"sort"
 13 | 	"strconv"
 14 | 	"strings"
 15 | 	"time"
 16 | )
 17 | 
 18 | var w *csv.Writer
 19 | var wpath *csv.Writer
 20 | 
 21 | var parent = flag.Int("parent", 1, "Parent column; default 1")
 22 | var child = flag.Int("child", 2, "Child column; default 2")
 23 | var start = flag.String("start", "", "Start value of hierarchy;\nif first letter is ampersand, use as a file with a list of values to process")
 24 | var delimiter = flag.String("delimiter", ">", "String for path delimiter")
 25 | var input = flag.String("i", "", "Input CSV filename; default STDIN")
 26 | var output = flag.String("o", "", "Output CSV filename; default STDOUT")
 27 | var headers = flag.Bool("headers", true, "Input CSV has headers")
 28 | var help = flag.Bool("help", false, "Show usage message")
 29 | var info = flag.Bool("info", true, "Show info messages during processing")
 30 | var data = flag.String("data", "", "Comma list of child data columns to include")
 31 | var pathfile = flag.String("path", "", "Output CSV file for path data")
 32 | 
 33 | func main() {
 34 | 	flag.Parse()
 35 | 
 36 | 	if *help {
 37 | 		usage("Help Message")
 38 | 	}
 39 | 
 40 | 	if *start == "" {
 41 | 		usage("Start value is missing")
 42 | 	}
 43 | 	var dataVals []string
 44 | 	var dataVal []int
 45 | 	if *data != "" {
 46 | 		// split into the ints and store away for use later
 47 | 		dataVals = strings.Split(*data, ",")
 48 | 		dataVal = make([]int, len(dataVals))
 49 | 		for i := range dataVals {
 50 | 			n, err := strconv.Atoi(dataVals[i])
 51 | 			if err != nil {
 52 | 				log.Fatalf("strconv.Atoi() error on %v\n:%v", dataVals[i], err)
 53 | 			}
 54 | 			dataVal[i] = n
 55 | 		}
 56 | 		if *pathfile == "" {
 57 | 			log.Fatal("Cannot specify data columns without a path CSV filename")
 58 | 		}
 59 | 	}
 60 | 
 61 | 	var startvals []string
 62 | 	if strings.HasPrefix(*start, "@") {
 63 | 		f, ferr := os.Open((*start)[1:])
 64 | 		if ferr != nil {
 65 | 			log.Fatalf("os.Open() error on %v\n:%v", (*start)[1:], ferr)
 66 | 		}
 67 | 		defer f.Close()
 68 | 		scanner := bufio.NewScanner(f)
 69 | 		for scanner.Scan() {
 70 | 			startvals = append(startvals, scanner.Text())
 71 | 		}
 72 | 	} else {
 73 | 		startvals = append(startvals, *start)
 74 | 	}
 75 | 
 76 | 	// open output file
 77 | 	if *output == "" {
 78 | 		w = csv.NewWriter(os.Stdout)
 79 | 	} else {
 80 | 		fo, foerr := os.Create(*output)
 81 | 		if foerr != nil {
 82 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 83 | 		}
 84 | 		defer fo.Close()
 85 | 		w = csv.NewWriter(fo)
 86 | 	}
 87 | 
 88 | 	// open output path file
 89 | 	if *pathfile != "" {
 90 | 		if *data == "" {
 91 | 			log.Fatal("Cannot specify path CSV filename without data columns")
 92 | 		}
 93 | 		pfo, pfoerr := os.Create(*pathfile)
 94 | 		if pfoerr != nil {
 95 | 			log.Fatal("os.Create() Error:" + pfoerr.Error())
 96 | 		}
 97 | 		defer pfo.Close()
 98 | 		wpath = csv.NewWriter(pfo)
 99 | 		// write the headers
100 | 		err := wpath.Write(pathHeaders)
101 | 		if err != nil {
102 | 			log.Fatal("wpath.Write(pathHeaders) Error:" + err.Error())
103 | 		}
104 | 	}
105 | 
106 | 	// open input file
107 | 	var r *csv.Reader
108 | 	if *input == "" {
109 | 		r = csv.NewReader(os.Stdin)
110 | 	} else {
111 | 		fi, fierr := os.Open(*input)
112 | 		if fierr != nil {
113 | 			log.Fatal("os.Open() Error:" + fierr.Error())
114 | 		}
115 | 		defer fi.Close()
116 | 		r = csv.NewReader(fi)
117 | 	}
118 | 
119 | 	// ignore expectations of fields per row
120 | 	r.FieldsPerRecord = -1
121 | 
122 | 	now := time.Now().UTC()
123 | 	display(fmt.Sprintf("Start at %v", now))
124 | 
125 | 	// read loop for CSV to load into memory
126 | 	var row uint64
127 | 	pcol := *parent - 1
128 | 	ccol := *child - 1
129 | 	parents := make(map[string]map[string][][]string)
130 | 	for {
131 | 		// read the csv file
132 | 		cells, rerr := r.Read()
133 | 		if rerr == io.EOF {
134 | 			break
135 | 		}
136 | 		if rerr != nil {
137 | 			log.Fatalf("csv.Read [row %v]:\n%v\n", row, rerr)
138 | 		}
139 | 		if row == 0 {
140 | 			if *headers == false {
141 | 				recurseHeaders[2] = "Parent"
142 | 				recurseHeaders[3] = "Child"
143 | 			} else {
144 | 				recurseHeaders[2] = cells[pcol]
145 | 				recurseHeaders[3] = cells[ccol]
146 | 			}
147 | 			writeRow(recurseHeaders[0], recurseHeaders[1],
148 | 				recurseHeaders[2], recurseHeaders[3],
149 | 				nil, recurseHeaders[5], recurseHeaders[6],
150 | 				true)
151 | 			row++
152 | 			continue
153 | 		}
154 | 		childmap, ok := parents[cells[pcol]]
155 | 
156 | 		if ok {
157 | 			// does the child exist in the map?
158 | 			_, childOk := childmap[cells[ccol]]
159 | 			if childOk {
160 | 				// is a child table needed?
161 | 				if *data == "" {
162 | 					// no table needed
163 | 					// child is in the map already
164 | 					// nothing to do!
165 | 				} else {
166 | 					childTable := childmap[cells[ccol]]
167 | 					// child data table exists, add a new row
168 | 					newrow := make([]string, 0)
169 | 					for i := range dataVal {
170 | 						newrow = append(newrow, cells[dataVal[i]-1])
171 | 					}
172 | 					childTable = append(childTable, newrow)
173 | 					// put it back
174 | 					childmap[cells[ccol]] = childTable
175 | 				}
176 | 			} else {
177 | 				// Child is not in the map; add it
178 | 				// is a child table needed?
179 | 				if *data == "" {
180 | 					// no table needed
181 | 					childmap[cells[ccol]] = nil
182 | 				} else {
183 | 					// child data table not exists, create it first
184 | 					childTable := make([][]string, 0)
185 | 					// now make the first row for this new table
186 | 					newrow := make([]string, 0)
187 | 					for i := range dataVal {
188 | 						newrow = append(newrow, cells[dataVal[i]-1])
189 | 					}
190 | 					childTable = append(childTable, newrow)
191 | 					// put it back
192 | 					childmap[cells[ccol]] = childTable
193 | 				}
194 | 			}
195 | 			// put it back into the parent map
196 | 			parents[cells[pcol]] = childmap // do I need this??
197 | 		} else {
198 | 			// child map does not exist
199 | 			newChildMap := make(map[string][][]string)
200 | 			if *data == "" {
201 | 				// no table needed
202 | 				newChildMap[cells[ccol]] = nil
203 | 			} else {
204 | 				// child data table needed, create it first
205 | 				childTable := make([][]string, 0)
206 | 				// now make the first row for this new table
207 | 				newrow := make([]string, 0)
208 | 				for i := range dataVal {
209 | 					newrow = append(newrow, cells[dataVal[i]-1])
210 | 				}
211 | 				childTable = append(childTable, newrow)
212 | 				// put it back
213 | 				newChildMap[cells[ccol]] = childTable
214 | 			}
215 | 			// add to parent map
216 | 			parents[cells[pcol]] = newChildMap
217 | 		}
218 | 		row++
219 | 	}
220 | 
221 | 	display("Data loaded and ready to start recursing")
222 | 	for _, v := range startvals {
223 | 		begin := time.Now().UTC()
224 | 		display(fmt.Sprintf("Working on %v", v))
225 | 		if *data == "" {
226 | 			recurse(0, v, v, nil, nil, parents)
227 | 		} else {
228 | 			initpath := make([]string, 0)
229 | 			initpath = append(initpath, v)
230 | 			initChildData := make([]childData, 0)
231 | 			recurse(0, v, v, initpath, initChildData, parents)
232 | 		}
233 | 		display(fmt.Sprintf(". elasped %v", time.Since(begin)))
234 | 	}
235 | 	stop := time.Now().UTC()
236 | 	elapsed := time.Since(now)
237 | 	w.Flush()
238 | 	if wpath != nil {
239 | 		wpath.Flush()
240 | 	}
241 | 	display(fmt.Sprintf("End at %v", stop))
242 | 	display(fmt.Sprintf("Elapsed time %v", elapsed))
243 | }
244 | 
245 | type childData struct {
246 | 	child string
247 | 	data  [][]string
248 | }
249 | 
250 | func recurse(level int, root, start string, path []string,
251 | 	pathData []childData,
252 | 	parents map[string]map[string][][]string) {
253 | 
254 | 	// sort the children
255 | 	childmap := parents[start]
256 | 	var keys []string
257 | 	for k := range childmap {
258 | 		keys = append(keys, k)
259 | 	}
260 | 	sort.Strings(keys)
261 | 
262 | 	level++ // increment depth
263 | 	for _, child := range keys {
264 | 		cycle := contains(path, child)
265 | 		sLevel := fmt.Sprintf("%v", level)
266 | 		sPath := make([]string, len(path))
267 | 		copy(sPath, path)
268 | 		sPath = append(sPath, child)
269 | 		leaf := "Yes"
270 | 		_, ok := parents[child]
271 | 		if ok {
272 | 			leaf = "No"
273 | 		}
274 | 		writeRow(sLevel, root, start, child, sPath, leaf, cycle, false)
275 | 		var newPathData []childData
276 | 		if pathData != nil {
277 | 			newPathData = make([]childData, len(pathData))
278 | 			copy(newPathData, pathData)
279 | 			newChildData := childData{}
280 | 			newChildData.child = child
281 | 			newChildData.data = make([][]string, len(childmap[child]))
282 | 			copy(newChildData.data, childmap[child])
283 | 			newPathData = append(newPathData, newChildData)
284 | 			writePath(root, newPathData)
285 | 		}
286 | 		if cycle == "No" && ok {
287 | 			recurse(level, root, child, sPath, newPathData, parents)
288 | 		}
289 | 	}
290 | 
291 | }
292 | 
293 | func writeRow(level, root, parent, child string,
294 | 	path []string, leaf, cycle string, headerrow bool) {
295 | 
296 | 	var cells []string
297 | 	cells = append(cells, level)
298 | 	cells = append(cells, root)
299 | 	cells = append(cells, parent)
300 | 	cells = append(cells, child)
301 | 	if headerrow {
302 | 		cells = append(cells, recurseHeaders[4])
303 | 	} else {
304 | 		pathString := strings.Join(path, *delimiter)
305 | 		// put a delimiter at beginning and end
306 | 		cells = append(cells, *delimiter+pathString+*delimiter)
307 | 	}
308 | 	cells = append(cells, leaf)
309 | 	cells = append(cells, cycle)
310 | 
311 | 	err := w.Write(cells)
312 | 	if err != nil {
313 | 		log.Fatalf("csv.Write:\n%v\n", err)
314 | 	}
315 | }
316 | 
317 | func writePath(root string, pathData []childData) {
318 | 	cells := make([]string, 0)
319 | 	cells = append(cells, root)
320 | 	cells = append(cells, pathData[len(pathData)-1].child)
321 | 	for _, cdata := range pathData {
322 | 		jsonVal, jsonErr := json.Marshal(cdata.data)
323 | 		if jsonErr != nil {
324 | 			log.Fatalf("json.Marshal:\n%v\n", jsonErr)
325 | 		}
326 | 		cells = append(cells, string(jsonVal))
327 | 		cells = append(cells, cdata.child)
328 | 	}
329 | 	err := wpath.Write(cells)
330 | 	if err != nil {
331 | 		log.Fatalf("csv.Write:\n%v\n", err)
332 | 	}
333 | }
334 | 
335 | func usage(msg string) {
336 | 	fmt.Println(msg + "\n")
337 | 	flag.PrintDefaults()
338 | 	os.Exit(0)
339 | }
340 | 
341 | func display(msg string) {
342 | 	if *info {
343 | 		log.Print(msg + "\n")
344 | 	}
345 | }
346 | 
347 | var recurseHeaders []string
348 | var pathHeaders []string
349 | 
350 | func init() {
351 | 	recurseHeaders = append(recurseHeaders,
352 | 		"Level", "Root", "", "", "Path", "Leaf", "Cycle")
353 | 	pathHeaders = append(pathHeaders, "root", "child",
354 | 		"data1", "child1",
355 | 		"data2", "child2",
356 | 		"data3", "child3",
357 | 		"data4", "child4",
358 | 		"data5", "child5",
359 | 		"data6", "child6",
360 | 		"data7", "child7",
361 | 		"data8", "child8",
362 | 		"data9", "child9",
363 | 		"data10", "child10",
364 | 		"data11", "child11",
365 | 		"data12", "child12",
366 | 		"data13", "child13",
367 | 		"data14", "child14",
368 | 		"data15", "child15",
369 | 	)
370 | }
371 | 
372 | func contains(path []string, value string) string {
373 | 	for _, v := range path {
374 | 		if v == value {
375 | 			return "Yes"
376 | 		}
377 | 	}
378 | 	return "No"
379 | }
380 | 
381 | /* Code Graveyard
382 | func writePathRow(c []string, d []string, child string) {
383 | 	numcols := len(c) + len(d) + 1
384 | 	row := make([]string, numcols)
385 | 	i := 0
386 | 	for _, v := range c {
387 | 		row[i] = v
388 | 		i++
389 | 	}
390 | 	for _, v := range d {
391 | 		row[i] = v
392 | 		i++
393 | 	}
394 | 	row[i] = child
395 | 	err := wpath.Write(row)
396 | 	if err != nil {
397 | 		log.Fatalf("csv.Write:\n%v\n", err)
398 | 	}
399 | 
400 | }
401 | 
402 | func writePath(root string, pathData []childData) {
403 | 	cells := make([]string, 0)
404 | 	cells = append(cells, root)
405 | 	for _, cdata := range pathData {
406 | 		for _, val := range cdata.data {
407 | 			cells = append(cells, val...)
408 | 		}
409 | 		cells = append(cells, cdata.child)
410 | 	}
411 | 	err := wpath.Write(cells)
412 | 	if err != nil {
413 | 		log.Fatalf("csv.Write:\n%v\n", err)
414 | 	}
415 | }
416 | 
417 | func writePath(root string, pathData []childData) {
418 | 	cells := make([]string, 0)
419 | 	cells = append(cells, root)
420 | 	for _, cdata := range pathData {
421 | 		jsonVal, jsonErr := json.Marshal(cdata.data)
422 | 		if jsonErr != nil {
423 | 			log.Fatalf("json.Marshal:\n%v\n", jsonErr)
424 | 		}
425 | 		cells = append(cells, string(jsonVal))
426 | 		cells = append(cells, cdata.child)
427 | 	}
428 | 	err := wpath.Write(cells)
429 | 	if err != nil {
430 | 		log.Fatalf("csv.Write:\n%v\n", err)
431 | 	}
432 | }
433 | 
434 | */
435 | 


--------------------------------------------------------------------------------
/reordercsv/README.md:
--------------------------------------------------------------------------------
 1 | # Reordercsv
 2 | Use -help to show:
 3 | ```
 4 | $ reordercsv -help
 5 | Help Message
 6 | 
 7 |   -c string
 8 |     	Order of columns from input
 9 |   -headers
10 |     	CSV has headers (default true)
11 |   -help
12 |     	Show usage message
13 |   -i string
14 |     	Input CSV filename; default STDIN
15 |   -keep
16 |     	Keep CSV headers on output (default true)
17 |   -o string
18 |     	Output CSV filename; default STDOUT
19 | $
20 | ```
21 | Example:
22 | ```
23 | $ cat test1.csv
24 | A,B,C,D,E,F,G,H,I
25 | 1,1,1,1,1,1,1,1,1
26 | 2,2,2,2,2,2,2,2,2
27 | 3,3,3,3,3,3,3,3,3
28 | 4,4,4,4,4,4,4,4,4
29 | 5,5,5,5,5,5,5,5,5
30 | 6,6,6,6,6,6,6,6,6
31 | 7,7,7,7,7,7,7,7,7
32 | 8,8,8,8,8,8,8,8,8
33 | 9,9,9,9,9,9,9,9,9
34 | $ reordercsv -i test1.csv -c 3,2,1,1
35 | C,B,A,A
36 | 1,1,1,1
37 | 2,2,2,2
38 | 3,3,3,3
39 | 4,4,4,4
40 | 5,5,5,5
41 | 6,6,6,6
42 | 7,7,7,7
43 | 8,8,8,8
44 | 9,9,9,9
45 | $
46 | ```
47 | 
48 | 


--------------------------------------------------------------------------------
/reordercsv/reordercsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 	"strconv"
 11 | 	"strings"
 12 | )
 13 | 
 14 | func main() {
 15 | 	cols := flag.String("c", "", "Order of columns from input")
 16 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 17 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 18 | 	headers := flag.Bool("headers", true, "CSV has headers")
 19 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 20 | 	help := flag.Bool("help", false, "Show usage message")
 21 | 	flag.Parse()
 22 | 
 23 | 	if *help {
 24 | 		usage("Help Message")
 25 | 	}
 26 | 
 27 | 	if *cols == "" {
 28 | 		usage("Missing new order of columns")
 29 | 	}
 30 | 
 31 | 	tokens := strings.Split(*cols, ",")
 32 | 	outn := make([]int, len(tokens))
 33 | 
 34 | 	for n := range tokens {
 35 | 		i, err := strconv.Atoi(tokens[n])
 36 | 		if err != nil {
 37 | 			log.Fatalf("Value not a number:%v\n", tokens[n])
 38 | 		}
 39 | 		if i < 1 {
 40 | 			log.Fatalf("Columns start at one:%v\n", tokens[n])
 41 | 		}
 42 | 		outn[n] = i
 43 | 	}
 44 | 
 45 | 	if *keep {
 46 | 		if !*headers {
 47 | 			log.Fatal("Cannot keep headers you don't have!")
 48 | 		}
 49 | 	}
 50 | 	// open output file
 51 | 	var w *csv.Writer
 52 | 	if *output == "" {
 53 | 		w = csv.NewWriter(os.Stdout)
 54 | 	} else {
 55 | 		fo, foerr := os.Create(*output)
 56 | 		if foerr != nil {
 57 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 58 | 		}
 59 | 		defer fo.Close()
 60 | 		w = csv.NewWriter(fo)
 61 | 	}
 62 | 
 63 | 	// open input file
 64 | 	var r *csv.Reader
 65 | 	if *input == "" {
 66 | 		r = csv.NewReader(os.Stdin)
 67 | 	} else {
 68 | 		fi, fierr := os.Open(*input)
 69 | 		if fierr != nil {
 70 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 71 | 		}
 72 | 		defer fi.Close()
 73 | 		r = csv.NewReader(fi)
 74 | 	}
 75 | 
 76 | 	// ignore expectations of fields per row
 77 | 	r.FieldsPerRecord = -1
 78 | 
 79 | 	// read loop for CSV
 80 | 	outs := make([]string, len(tokens))
 81 | 
 82 | 	var row uint64
 83 | 	for {
 84 | 		// read the csv file
 85 | 		cells, rerr := r.Read()
 86 | 		if rerr == io.EOF {
 87 | 			break
 88 | 		}
 89 | 		if rerr != nil {
 90 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
 91 | 		}
 92 | 		if row == 0 {
 93 | 			if *headers && *keep {
 94 | 			} else {
 95 | 				row++
 96 | 				continue
 97 | 			}
 98 | 		}
 99 | 		for n, m := range outn {
100 | 			outs[n] = cells[m-1]
101 | 		}
102 | 		err := w.Write(outs)
103 | 		if err != nil {
104 | 			log.Fatalf("csv.Write:\n%v\n", err)
105 | 		}
106 | 		row++
107 | 	}
108 | 	w.Flush()
109 | }
110 | 
111 | func usage(msg string) {
112 | 	fmt.Println(msg + "\n")
113 | 	flag.PrintDefaults()
114 | 	os.Exit(0)
115 | }
116 | 


--------------------------------------------------------------------------------
/reordercsv/test1.csv:
--------------------------------------------------------------------------------
 1 | A,B,C,D,E,F,G,H,I
 2 | 1,1,1,1,1,1,1,1,1
 3 | 2,2,2,2,2,2,2,2,2
 4 | 3,3,3,3,3,3,3,3,3
 5 | 4,4,4,4,4,4,4,4,4
 6 | 5,5,5,5,5,5,5,5,5
 7 | 6,6,6,6,6,6,6,6,6
 8 | 7,7,7,7,7,7,7,7,7
 9 | 8,8,8,8,8,8,8,8,8
10 | 9,9,9,9,9,9,9,9,9
11 | 


--------------------------------------------------------------------------------
/searchcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Searchcsv
 2 | Use the -help argument to show:
 3 | 
 4 | ```
 5 | $ searchcsv -help
 6 | Help Message
 7 | 
 8 | Usage: searchcsv [options]
 9 |   -c string
10 |     	Range spec for columns
11 |   -headers
12 |     	CSV has headers (default true)
13 |   -help
14 |     	Show help message
15 |   -i string
16 |     	Input CSV filename; default STDIN
17 |   -keep
18 |     	Keep CSV headers on output (default true)
19 |   -o string
20 |     	Output CSV filename; default STDOUT
21 |   -pattern string
22 |     	Search pattern
23 |   -re
24 |     	Search pattern is a regular expression
25 |   -v	Omit rather than include matched rows
26 | ```
27 | Examples:
28 | ```
29 | $ cat test1.csv
30 | A,B,C
31 | abc,def,Army
32 | one,two,Navy
33 | go,abacus,Marine
34 | Android,Ubuntu,Linux
35 | $ searchcsv -c 1 -pattern "y$" < test1.csv
36 | A,B,C
37 | $ searchcsv -c 3 -pattern "y$" < test1.csv
38 | A,B,C
39 | $ searchcsv -c 3 -pattern "y$" -re=true < test1.csv
40 | A,B,C
41 | abc,def,Army
42 | one,two,Navy
43 | $ searchcsv -c 3 -pattern "[mu][xy]$" -re=true < test1.csv
44 | A,B,C
45 | abc,def,Army
46 | Android,Ubuntu,Linux
47 | $ searchcsv -v -c 3 -pattern "[mu][xy]$" -re=true < test1.csv
48 | A,B,C
49 | one,two,Navy
50 | go,abacus,Marine
51 | ```
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/searchcsv/searchcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 	"regexp"
 11 | 	"strings"
 12 | 
 13 | 	"github.com/mandolyte/csv-utils"
 14 | )
 15 | 
 16 | var cs *rangespec.RangeSpec
 17 | var re *regexp.Regexp
 18 | 
 19 | func main() {
 20 | 	pattern := flag.String("pattern", "", "Search pattern")
 21 | 	suppress := flag.Bool("v", false, "Omit rather than include matched rows")
 22 | 	cols := flag.String("c", "", "Range spec for columns")
 23 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 24 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 25 | 	headers := flag.Bool("headers", true, "CSV has headers")
 26 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 27 | 	regex := flag.Bool("re", false, "Search pattern is a regular expression")
 28 | 	help := flag.Bool("help", false, "Show help message")
 29 | 	flag.Parse()
 30 | 
 31 | 	if *help {
 32 | 		usage("Help Message")
 33 | 		os.Exit(0)
 34 | 	}
 35 | 
 36 | 	/* check parameters */
 37 | 	if *pattern == "" {
 38 | 		usage("Required: Missing pattern for search")
 39 | 		os.Exit(0)
 40 | 	}
 41 | 
 42 | 	if *regex {
 43 | 		re = regexp.MustCompile(*pattern)
 44 | 	}
 45 | 
 46 | 	if *cols != "" {
 47 | 		var cserr error
 48 | 		cs, cserr = rangespec.New(*cols)
 49 | 		if cserr != nil {
 50 | 			log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr)
 51 | 		}
 52 | 	}
 53 | 
 54 | 	if *keep {
 55 | 		if !*headers {
 56 | 			log.Fatal("Cannot keep headers you don't have!")
 57 | 		}
 58 | 	}
 59 | 	// open output file
 60 | 	var w *csv.Writer
 61 | 	if *output == "" {
 62 | 		w = csv.NewWriter(os.Stdout)
 63 | 	} else {
 64 | 		fo, foerr := os.Create(*output)
 65 | 		if foerr != nil {
 66 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 67 | 		}
 68 | 		defer fo.Close()
 69 | 		w = csv.NewWriter(fo)
 70 | 	}
 71 | 
 72 | 	// open input file
 73 | 	var r *csv.Reader
 74 | 	if *input == "" {
 75 | 		r = csv.NewReader(os.Stdin)
 76 | 	} else {
 77 | 		fi, fierr := os.Open(*input)
 78 | 		if fierr != nil {
 79 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 80 | 		}
 81 | 		defer fi.Close()
 82 | 		r = csv.NewReader(fi)
 83 | 	}
 84 | 
 85 | 	// ignore expectations of fields per row
 86 | 	r.FieldsPerRecord = -1
 87 | 
 88 | 	// read loop for CSV
 89 | 	var row uint64
 90 | 	for {
 91 | 		// read the csv file
 92 | 		cells, rerr := r.Read()
 93 | 		if rerr == io.EOF {
 94 | 			break
 95 | 		}
 96 | 		if rerr != nil {
 97 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
 98 | 		}
 99 | 		if (row == 0) && *headers && *keep {
100 | 			row = 1
101 | 			err := w.Write(cells)
102 | 			if err != nil {
103 | 				log.Fatalf("csv.Write:\n%v\n", err)
104 | 			}
105 | 			continue
106 | 		}
107 | 		row++
108 | 		// test row/columns for a match
109 | 		if patternMatches(cells, *pattern, *suppress) {
110 | 			err := w.Write(cells)
111 | 			if err != nil {
112 | 				log.Fatalf("csv.Write:\n%v\n", err)
113 | 			}
114 | 		}
115 | 	}
116 | 	w.Flush()
117 | }
118 | 
119 | func patternMatches(c []string, pattern string, suppress bool) bool {
120 | 	found := false
121 | 	for n, v := range c {
122 | 		if cs == nil {
123 | 			if re == nil {
124 | 				found = strings.Contains(v, pattern)
125 | 			} else {
126 | 				found = re.MatchString(v)
127 | 			}
128 | 		} else {
129 | 			if cs.InRange(uint64(n + 1)) {
130 | 				if re == nil {
131 | 					found = strings.Contains(v, pattern)
132 | 				} else {
133 | 					found = re.MatchString(v)
134 | 				}
135 | 			}
136 | 		}
137 | 		if found {
138 | 			if suppress {
139 | 				return false
140 | 			}
141 | 			return true
142 | 		}
143 | 	}
144 | 	if suppress {
145 | 		return true
146 | 	}
147 | 	return false
148 | }
149 | 
150 | func usage(msg string) {
151 | 	fmt.Println(msg + "\n")
152 | 	fmt.Print("Usage: searchcsv [options]\n")
153 | 	flag.PrintDefaults()
154 | }
155 | 


--------------------------------------------------------------------------------
/searchcsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | abc,def,Army
3 | one,two,Navy
4 | go,abacus,Marine
5 | Android,Ubuntu,Linux
6 | 


--------------------------------------------------------------------------------
/sortcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Sortcsv
 2 | This utility will sort a CSV file. However, it is done in-memory
 3 | and has limits.
 4 | 
 5 | ## Information
 6 | Use the -help argument to show:
 7 | 
 8 | ```
 9 | $ go run sortcsv.go -help
10 |   -c string
11 |         Comma delimited list of columns to sort (default "1")
12 |   -headers
13 |         CSV has headers (default true)
14 |   -help
15 |         Show help message
16 |   -i string
17 |         CSV file name to sort; default STDIN
18 |   -o string
19 |         CSV output file name; default STDOUT
20 |   -s string
21 |         Comma delimited list of letters 'a' or 'd', for ascending or descending (default is ascending)
22 | ```
23 | 
24 | Example:
25 | ```
26 | $ cat test1.csv 
27 | A,B,C
28 | 1,2,3
29 | 4,1,0
30 | 2,1,2
31 | 3,3,1
32 | 3,3,2
33 | $ go run sortcsv.go -c 1,3 -s a,d -i test1.csv 
34 | A,B,C
35 | 1,2,3
36 | 2,1,2
37 | 3,3,2
38 | 3,3,1
39 | 4,1,0
40 | $ go run sortcsv.go -c 1,3 -s a,a -i test1.csv 
41 | A,B,C
42 | 1,2,3
43 | 2,1,2
44 | 3,3,1
45 | 3,3,2
46 | 4,1,0
47 | $ $
48 | ```
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/sortcsv/sortcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"log"
  7 | 	"os"
  8 | 	"sort"
  9 | 	"strconv"
 10 | 	"strings"
 11 | )
 12 | 
 13 | type table struct {
 14 | 	records [][]string
 15 | 	seq     []bool
 16 | 	col     []int
 17 | }
 18 | 
 19 | func (t *table) Len() int {
 20 | 	return len(t.records)
 21 | }
 22 | 
 23 | func (t *table) Swap(i, j int) {
 24 | 	t.records[i], t.records[j] = t.records[j], t.records[i]
 25 | }
 26 | 
 27 | func (t *table) Less(i, j int) bool {
 28 | 	isless := false
 29 | 	for n := range t.col {
 30 | 		ith := t.records[i][t.col[n]-1]
 31 | 		jth := t.records[j][t.col[n]-1]
 32 | 		if ith == jth {
 33 | 			continue
 34 | 		}
 35 | 		//log.Printf("Compare %v vs %v\n", ith, jth)
 36 | 		if ith < jth {
 37 | 			if t.seq[n] {
 38 | 				isless = true
 39 | 			} else {
 40 | 				isless = false
 41 | 			}
 42 | 			break
 43 | 		} else {
 44 | 			if t.seq[n] {
 45 | 				isless = false
 46 | 			} else {
 47 | 				isless = true
 48 | 			}
 49 | 			break
 50 | 		}
 51 | 	}
 52 | 	//log.Printf("Returning %v\n", isless)
 53 | 	return isless
 54 | }
 55 | 
 56 | func main() {
 57 | 	sortseq := flag.String("s", "", "Comma delimited list of letters 'a' or 'd', for ascending or descending (default is ascending)")
 58 | 	sortcol := flag.String("c", "1", "Comma delimited list of columns to sort")
 59 | 	sortinf := flag.String("i", "", "CSV file name to sort; default STDIN")
 60 | 	sortout := flag.String("o", "", "CSV output file name; default STDOUT")
 61 | 	headers := flag.Bool("headers", true, "CSV has headers")
 62 | 	help := flag.Bool("help", false, "Show help message")
 63 | 	flag.Parse()
 64 | 
 65 | 	if *help {
 66 | 		usage()
 67 | 	}
 68 | 
 69 | 	// open output file
 70 | 	var w *csv.Writer
 71 | 	if *sortout == "" {
 72 | 		w = csv.NewWriter(os.Stdout)
 73 | 	} else {
 74 | 		fo, foerr := os.Create(*sortout)
 75 | 		if foerr != nil {
 76 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 77 | 		}
 78 | 		defer fo.Close()
 79 | 		w = csv.NewWriter(fo)
 80 | 	}
 81 | 
 82 | 	// open input file
 83 | 	var r *csv.Reader
 84 | 	if *sortinf == "" {
 85 | 		r = csv.NewReader(os.Stdin)
 86 | 	} else {
 87 | 		fi, fierr := os.Open(*sortinf)
 88 | 		if fierr != nil {
 89 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 90 | 		}
 91 | 		defer fi.Close()
 92 | 		r = csv.NewReader(fi)
 93 | 	}
 94 | 
 95 | 	// ignore expectations of fields per row
 96 | 	r.FieldsPerRecord = -1
 97 | 
 98 | 	// read into memory
 99 | 	csvall, raerr := r.ReadAll()
100 | 	if raerr != nil {
101 | 		log.Fatal("r.ReadAll() Error:" + raerr.Error())
102 | 	}
103 | 
104 | 	if *headers {
105 | 		werr := w.Write(csvall[0])
106 | 		if werr != nil {
107 | 			log.Fatal("w.Write() Error:" + werr.Error())
108 | 		}
109 | 		csvall = csvall[1:]
110 | 	}
111 | 
112 | 	// parse columns input
113 | 	collist := strings.Split(*sortcol, ",")
114 | 	seqlist := strings.Split(*sortseq, ",")
115 | 	clist := make([]int, len(collist))
116 | 	slist := make([]bool, len(collist))
117 | 	for i := range collist {
118 | 		x, err := strconv.Atoi(collist[i])
119 | 		if err != nil {
120 | 			log.Fatalf("Element of column sort list is not an integer:%v\n", collist[i])
121 | 		}
122 | 		if x == 0 {
123 | 			log.Fatal("Column numbers begin at 1 not zero\n")
124 | 		}
125 | 		clist[i] = x
126 | 		if clist[i] > len(csvall[0]) {
127 | 			log.Fatalf("Column is larger than number of cells in row:%v\n", clist[i])
128 | 		}
129 | 		// now set the sort sequence for the column
130 | 		if i < len(seqlist) {
131 | 			if seqlist[i] == "a" || seqlist[i] == "" {
132 | 				slist[i] = true
133 | 			} else if seqlist[i] == "d" {
134 | 				slist[i] = false
135 | 			} else {
136 | 				log.Fatal("Sort sequence must 'a' for ascending or 'd' for descending\n")
137 | 			}
138 | 		} else {
139 | 			slist[i] = true
140 | 		}
141 | 	}
142 | 
143 | 	/* debugging */
144 | 	/*
145 | 		log.Printf("Sort columns:%v\n", clist)
146 | 		log.Printf("Sequence columns: %v\n", slist)
147 | 	*/
148 | 	t := &table{records: csvall, seq: slist, col: clist}
149 | 
150 | 	//sort.Sort(t)
151 | 	sort.Stable(t)
152 | 	werr := w.WriteAll(t.records)
153 | 	if werr != nil {
154 | 		log.Fatal("w.WriteAll() Error:" + werr.Error())
155 | 	}
156 | 	w.Flush()
157 | 
158 | }
159 | 
160 | func usage() {
161 | 	flag.PrintDefaults()
162 | 	os.Exit(0)
163 | }
164 | 


--------------------------------------------------------------------------------
/sortcsv/test1.csv:
--------------------------------------------------------------------------------
1 | A,B,C
2 | 1,2,3
3 | 4,1,0
4 | 2,1,2
5 | 3,3,1
6 | 3,3,2
7 | 


--------------------------------------------------------------------------------
/splitcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Splitcsv
 2 | Use the -help argument to show:
 3 | ```
 4 | $ go run splitcsv.go -help
 5 | Help Message
 6 | 
 7 | Usage: splitcsv [options] input.csv output.csv
 8 |   -c string
 9 |     	Range spec for columns
10 |   -headers
11 |     	CSV has headers (default true)
12 |   -help
13 |     	Show usage message
14 |   -i string
15 |     	Input CSV filename; default STDIN
16 |   -keep
17 |     	Keep CSV headers on output (default true)
18 |   -o string
19 |     	Output CSV filename; default STDOUT
20 |   -r string
21 |     	Range spec for rows
22 | $ cat test1.csv
23 | A,B,C,D,E,F,G,H,I
24 | 1,1,1,1,1,1,1,1,1
25 | 2,2,2,2,2,2,2,2,2
26 | 3,3,3,3,3,3,3,3,3
27 | 4,4,4,4,4,4,4,4,4
28 | 5,5,5,5,5,5,5,5,5
29 | 6,6,6,6,6,6,6,6,6
30 | 7,7,7,7,7,7,7,7,7
31 | 8,8,8,8,8,8,8,8,8
32 | 9,9,9,9,9,9,9,9,9
33 | $ go run splitcsv.go -c 4-6 -r 4-6 < test1.csv
34 | D,E,F
35 | 4,4,4
36 | 5,5,5
37 | 6,6,6
38 | $
39 | ```
40 | 
41 | To upgrade to the new mod system:
42 | 
43 | 1. created a subfolder named rangespec
44 | 2. copied the rangespec.go from project into it.
45 | 3. ran the "go mod" command:
46 | ```
47 | $ go mod init github.com/mandolyte/csv-utils/splitcsv
48 | ```
49 | 4. then changed my import to be:
50 | ```go
51 | import (
52 |         "encoding/csv"
53 |         "flag"
54 |         "fmt"
55 |         "io"
56 |         "log"
57 |         "os"
58 |         "github.com/mandolyte/csv-utils/splitcsv/rangespec"
59 | )
60 | ```
61 | 
62 | 


--------------------------------------------------------------------------------
/splitcsv/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/mandolyte/csv-utils/splitcsv
2 | 
3 | go 1.19
4 | 


--------------------------------------------------------------------------------
/splitcsv/rangespec/rangespec.go:
--------------------------------------------------------------------------------
  1 | package rangespec
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"math"
  6 | 	"strconv"
  7 | 	"strings"
  8 | )
  9 | 
 10 | // RangeSpec parses a range specification, such as:
 11 | // 1,3,5-8,12-
 12 | // It will return a slice of RangeSpec, being two ints,
 13 | // a start and a stop
 14 | // Ranges start at 1, not zero.
 15 | type RangeSpec struct {
 16 | 	pairs []pair
 17 | 	spec  string
 18 | 	Max   uint64
 19 | }
 20 | type pair struct {
 21 | 	start, stop uint64
 22 | }
 23 | 
 24 | // New takes a range specification string and
 25 | // returns a slice of RangeSpec structs
 26 | func New(r string) (*RangeSpec, error) {
 27 | 	// remove any whitespace as a convenience
 28 | 	r = strings.Replace(r, " ", "", -1)
 29 | 	ret := new(RangeSpec)
 30 | 	ret.pairs = make([]pair, 0)
 31 | 	ret.spec = r
 32 | 	tokens := strings.Split(r, ",")
 33 | 	for n, val := range tokens {
 34 | 		//fmt.Printf("Working on %v at %v\n", val, n)
 35 | 		// does val have a dash?
 36 | 		if strings.Contains(val, "-") {
 37 | 			// split on dash
 38 | 			ends := strings.Split(val, "-")
 39 | 			if len(ends) > 2 {
 40 | 				return nil, fmt.Errorf("RangeSpec: malformed specification:%v", val)
 41 | 			}
 42 | 			if ends[1] != "" {
 43 | 				//fmt.Print("ends[] greater than 1\n")
 44 | 				end1, err := strconv.ParseUint(ends[0], 10, 64)
 45 | 				if err != nil {
 46 | 					return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err)
 47 | 				}
 48 | 				end2, err := strconv.ParseUint(ends[1], 10, 64)
 49 | 				if err != nil {
 50 | 					return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[1], err)
 51 | 				}
 52 | 				var rs pair
 53 | 				rs.start = end1
 54 | 				rs.stop = end2
 55 | 				ret.pairs = append(ret.pairs, rs)
 56 | 			} else {
 57 | 				//fmt.Print("ends[] == 1\n")
 58 | 				if n+1 != len(tokens) {
 59 | 					return nil, fmt.Errorf("RangeSpec: open range must be last:%v", val)
 60 | 				}
 61 | 				end1, err := strconv.ParseUint(ends[0], 10, 64)
 62 | 				if err != nil {
 63 | 					return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", ends[0], err)
 64 | 				}
 65 | 				var rs pair
 66 | 				rs.start = end1
 67 | 				rs.stop = math.MaxUint64
 68 | 				ret.pairs = append(ret.pairs, rs)
 69 | 			}
 70 | 			continue
 71 | 		} else {
 72 | 			end1, err := strconv.ParseUint(val, 10, 64)
 73 | 			if err != nil {
 74 | 				return nil, fmt.Errorf("RangeSpec: not a number:%v\n%v", val, err)
 75 | 			}
 76 | 			var rs pair
 77 | 			rs.start = end1
 78 | 			rs.stop = end1
 79 | 			ret.pairs = append(ret.pairs, rs)
 80 | 		}
 81 | 	}
 82 | 	// ensure ascending specification
 83 | 	for i := 0; i < len(ret.pairs); i++ {
 84 | 		if i == 0 {
 85 | 			if ret.pairs[i].start == 0 {
 86 | 				return nil, fmt.Errorf("RangeSpec: range must be larger zero: %v", ret.pairs[i].start)
 87 | 			}
 88 | 		}
 89 | 		if ret.pairs[i].start > ret.pairs[i].stop {
 90 | 			return nil, fmt.Errorf("RangeSpec: start (%v) must be equal or less than stop (%v)", ret.pairs[i].start, ret.pairs[i].stop)
 91 | 		}
 92 | 		if i > 0 {
 93 | 			if ret.pairs[i].start <= ret.pairs[i-1].stop {
 94 | 				return nil, fmt.Errorf("RangeSpec: start (%v) must be greater than previous stop (%v)", ret.pairs[i].start, ret.pairs[i-1].stop)
 95 | 			}
 96 | 		}
 97 | 	}
 98 | 	// set the maximum row number
 99 | 	ret.Max = ret.pairs[len(ret.pairs)-1].stop
100 | 	return ret, nil
101 | }
102 | 
103 | // InRange will test whehter a number is in the range specification
104 | func (rs *RangeSpec) InRange(num uint64) bool {
105 | 	for _, val := range rs.pairs {
106 | 		if num >= val.start && num <= val.stop {
107 | 			return true
108 | 		}
109 | 	}
110 | 	return false
111 | }
112 | 


--------------------------------------------------------------------------------
/splitcsv/splitcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"log"
  9 | 	"os"
 10 | 	"github.com/mandolyte/csv-utils/splitcsv/rangespec"
 11 | )
 12 | 
 13 | var rs *rangespec.RangeSpec
 14 | var cs *rangespec.RangeSpec
 15 | 
 16 | func main() {
 17 | 	rows := flag.String("r", "1-", "Range spec for rows")
 18 | 	cols := flag.String("c", "1-", "Range spec for columns")
 19 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 20 | 	output := flag.String("o", "", "Output CSV filename; default STDOUT")
 21 | 	headers := flag.Bool("headers", true, "CSV has headers")
 22 | 	keep := flag.Bool("keep", true, "Keep CSV headers on output")
 23 | 	help := flag.Bool("help", false, "Show usage message")
 24 | 	flag.Parse()
 25 | 
 26 | 	if *help {
 27 | 		usage("Help Message")
 28 | 		os.Exit(0)
 29 | 	}
 30 | 
 31 | 	/* check parameters */
 32 | 	if *rows == "" {
 33 | 		usage("Required: Missing range specification for rows")
 34 | 		os.Exit(0)
 35 | 	}
 36 | 
 37 | 	rs, rserr := rangespec.New(*rows)
 38 | 	if rserr != nil {
 39 | 		log.Fatalf("Invalid row range spec:%v, Error:\n%v\n", *rows, rserr)
 40 | 	}
 41 | 
 42 | 	if *cols != "" {
 43 | 		var cserr error
 44 | 		cs, cserr = rangespec.New(*cols)
 45 | 		if cserr != nil {
 46 | 			log.Fatalf("Invalid column range spec:%v, Error:\n%v\n", *cols, cserr)
 47 | 		}
 48 | 	}
 49 | 
 50 | 	if *keep {
 51 | 		if !*headers {
 52 | 			log.Fatal("Cannot keep headers you don't have!")
 53 | 		}
 54 | 	}
 55 | 	// open output file
 56 | 	var w *csv.Writer
 57 | 	if *output == "" {
 58 | 		w = csv.NewWriter(os.Stdout)
 59 | 	} else {
 60 | 		fo, foerr := os.Create(*output)
 61 | 		if foerr != nil {
 62 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 63 | 		}
 64 | 		defer fo.Close()
 65 | 		w = csv.NewWriter(fo)
 66 | 	}
 67 | 
 68 | 	// open input file
 69 | 	var r *csv.Reader
 70 | 	if *input == "" {
 71 | 		r = csv.NewReader(os.Stdin)
 72 | 	} else {
 73 | 		fi, fierr := os.Open(*input)
 74 | 		if fierr != nil {
 75 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 76 | 		}
 77 | 		defer fi.Close()
 78 | 		r = csv.NewReader(fi)
 79 | 	}
 80 | 
 81 | 	// ignore expectations of fields per row
 82 | 	r.FieldsPerRecord = -1
 83 | 	r.LazyQuotes = true
 84 | 
 85 | 	// read loop for CSV
 86 | 	var row uint64
 87 | 	for {
 88 | 		// read the csv file
 89 | 		cells, rerr := r.Read()
 90 | 		if rerr == io.EOF {
 91 | 			break
 92 | 		}
 93 | 		if rerr != nil {
 94 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
 95 | 		}
 96 | 		if (row == 0) && *headers && *keep {
 97 | 			row = 1
 98 | 			err := writeRow(w, cells, cs)
 99 | 			if err != nil {
100 | 				log.Fatalf("csv.Write:\n%v\n", err)
101 | 			}
102 | 			continue
103 | 		}
104 | 		row++
105 | 		if rs.InRange(row - 1) {
106 | 			err := writeRow(w, cells, cs)
107 | 			if err != nil {
108 | 				log.Fatalf("csv.Write:\n%v\n", err)
109 | 			}
110 | 		}
111 | 		if row > rs.Max {
112 | 			break
113 | 		}
114 | 	}
115 | 	w.Flush()
116 | }
117 | 
118 | func writeRow(w *csv.Writer, cells []string, cs *rangespec.RangeSpec) error {
119 | 	if cs == nil {
120 | 		err := w.Write(cells)
121 | 		if err != nil {
122 | 			return err
123 | 		}
124 | 		return nil
125 | 	}
126 | 	var outcells []string
127 | 	for m, c := range cells {
128 | 		if cs.InRange(uint64(m + 1)) {
129 | 			outcells = append(outcells, c)
130 | 		}
131 | 	}
132 | 	if len(outcells) == 0 {
133 | 		return fmt.Errorf("Column range outside actual columns:%v\n\n", cs)
134 | 	}
135 | 	err := w.Write(outcells)
136 | 	if err != nil {
137 | 		return err
138 | 	}
139 | 	return nil
140 | }
141 | 
142 | func usage(msg string) {
143 | 	fmt.Println(msg + "\n")
144 | 	fmt.Print("Usage: splitcsv [options] input.csv output.csv\n")
145 | 	flag.PrintDefaults()
146 | }
147 | 


--------------------------------------------------------------------------------
/splitcsv/test1.csv:
--------------------------------------------------------------------------------
 1 | A,B,C,D,E,F,G,H,I
 2 | 1,1,1,1,1,1,1,1,1
 3 | 2,2,2,2,2,2,2,2,2
 4 | 3,3,3,3,3,3,3,3,3
 5 | 4,4,4,4,4,4,4,4,4
 6 | 5,5,5,5,5,5,5,5,5
 7 | 6,6,6,6,6,6,6,6,6
 8 | 7,7,7,7,7,7,7,7,7
 9 | 8,8,8,8,8,8,8,8,8
10 | 9,9,9,9,9,9,9,9,9
11 | 


--------------------------------------------------------------------------------
/splitcsv/test2.csv:
--------------------------------------------------------------------------------
 1 | A,B,C,D,E,F,G,H,I
 2 | 1,1,1,1,1,1,1,1,1
 3 | 2,2,2,2,2,2,2,2,2
 4 | ="0003",0003,'0003,3,3,3,3,3,3
 5 | 4,4,4,4,4,4,4,4,4
 6 | 5,5,5,5,5,5,5,5,5
 7 | 6,6,6,6,6,6,6,6,6
 8 | 7,7,7,7,7,7,7,7,7
 9 | 8,8,8,8,8,8,8,8,8
10 | 9,9,9,9,9,9,9,9,9
11 | 


--------------------------------------------------------------------------------
/transformcsv/README.md:
--------------------------------------------------------------------------------
 1 | # Transformcsv
 2 | This utility will take an input CSV and transform it using a text template.
 3 | The template is applied to every row in the CSV. The column headers are 
 4 | required. The column header names are used as map keys to the values
 5 | used by the template.
 6 | 
 7 | Use the -help argument to show:
 8 | ```
 9 | $ go run transformcsv.go -help
10 | Help Message
11 | 
12 |   -help
13 |         Show usage message
14 |   -i string
15 |         Input CSV filename; default STDIN
16 |   -m string
17 |         Name of map in template; default is m (default "m")
18 |   -o string
19 |         Output filename; default STDOUT
20 |   -t string
21 |         Template to use for transformation
22 | $ 
23 | ```
24 | 
25 | Given template:
26 | ```
27 | $ cat template1.txt 
28 | INSERT INTO atable (column1, column2, column3)
29 | VALUES ('{{index .mp "column1"}}', '{{index .mp "column2"}}', '{{index .mp "column3"}}')
30 | ;
31 | ```
32 | 
33 | Given input CSV:
34 | ```
35 | $ cat test1.csv 
36 | column1,column2,column3
37 | v1.1,v1.2,v1.3
38 | v2.1,v2.1,v2.3
39 | $ 
40 | ```
41 | 
42 | Then this command will generate SQL INSERT statements for each row
43 | in the CSV file.
44 | ```
45 | $ go run transformcsv.go -i test1.csv -t template1.txt -m mp -o trans1.sql
46 | $ cat trans1.sql
47 | INSERT INTO atable (column1, column2, column3)
48 | VALUES ('v1.1', 'v1.2', 'v1.3')
49 | ;
50 | INSERT INTO atable (column1, column2, column3)
51 | VALUES ('v2.1', 'v2.1', 'v2.3')
52 | ;
53 | $ 
54 | ```


--------------------------------------------------------------------------------
/transformcsv/template1.txt:
--------------------------------------------------------------------------------
1 | INSERT INTO atable (column1, column2, column3)
2 | VALUES ('{{index .mp "column1"}}', '{{index .mp "column2"}}', '{{index .mp "column3"}}')
3 | ;
4 | 


--------------------------------------------------------------------------------
/transformcsv/test1.csv:
--------------------------------------------------------------------------------
1 | column1,column2,column3
2 | v1.1,v1.2,v1.3
3 | v2.1,v2.1,v2.3
4 | 


--------------------------------------------------------------------------------
/transformcsv/trans1.sql:
--------------------------------------------------------------------------------
1 | INSERT INTO atable (column1, column2, column3)
2 | VALUES ('v1.1', 'v1.2', 'v1.3')
3 | ;
4 | INSERT INTO atable (column1, column2, column3)
5 | VALUES ('v2.1', 'v2.1', 'v2.3')
6 | ;
7 | 


--------------------------------------------------------------------------------
/transformcsv/transformcsv.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"encoding/csv"
  6 | 	"flag"
  7 | 	"fmt"
  8 | 	"io"
  9 | 	"io/ioutil"
 10 | 	"log"
 11 | 	"text/template"
 12 | 	"os"
 13 | )
 14 | 
 15 | 
 16 | func main() {
 17 | 	input := flag.String("i", "", "Input CSV filename; default STDIN")
 18 | 	tmplfile := flag.String("t","", "Template to use for transformation")
 19 | 	output := flag.String("o", "", "Output filename; default STDOUT")
 20 | 	mapname := flag.String("m", "m", "Name of map in template; default is m")
 21 | 	help := flag.Bool("help", false, "Show usage message")
 22 | 	flag.Parse()
 23 |  
 24 | 	if *help {
 25 | 		usage("Help Message")
 26 | 	}
 27 | 
 28 | 	if *tmplfile == "" {
 29 | 		usage("Template file name missing")
 30 | 	}
 31 | 	templatebytes, terr := ioutil.ReadFile(*tmplfile)
 32 |     if terr != nil {
 33 |         log.Fatal("Template file read error:"+terr.Error())
 34 | 	}
 35 | 	template := string(templatebytes)
 36 | 
 37 | 
 38 | 	// open output file
 39 | 	var w *bufio.Writer
 40 | 	if *output == "" {
 41 | 		w = bufio.NewWriter(os.Stdout)
 42 | 	} else {
 43 | 		fo, foerr := os.Create(*output)
 44 | 		if foerr != nil {
 45 | 			log.Fatal("os.Create() Error:" + foerr.Error())
 46 | 		}
 47 | 		defer fo.Close()
 48 | 		w = bufio.NewWriter(fo)
 49 | 	}
 50 | 
 51 | 	// open input file
 52 | 	var r *csv.Reader
 53 | 	if *input == "" {
 54 | 		r = csv.NewReader(os.Stdin)
 55 | 	} else {
 56 | 		fi, fierr := os.Open(*input)
 57 | 		if fierr != nil {
 58 | 			log.Fatal("os.Open() Error:" + fierr.Error())
 59 | 		}
 60 | 		defer fi.Close()
 61 | 		r = csv.NewReader(fi)
 62 | 	}
 63 | 
 64 | 	// ignore expectations of fields per row
 65 | 	r.FieldsPerRecord = -1
 66 | 	r.LazyQuotes = true
 67 | 
 68 | 	// read loop for CSV
 69 | 	var hdrs []string
 70 | 	var row uint64
 71 | 	for {
 72 | 		// read the csv file
 73 | 		cells, rerr := r.Read()
 74 | 		if rerr == io.EOF {
 75 | 			break
 76 | 		}
 77 | 		if rerr != nil {
 78 | 			log.Fatalf("csv.Read:\n%v\n", rerr)
 79 | 		}
 80 | 		if (row == 0) {
 81 | 			row = 1
 82 | 			hdrs = append(hdrs, cells...)
 83 | 			continue
 84 | 		}
 85 | 		row++
 86 | 		err := writeTemplate(w, template, *mapname, hdrs, cells)
 87 | 		if err != nil {
 88 | 			log.Fatal("Write error to output:"+err.Error())
 89 | 		}
 90 | 	}
 91 | 	w.Flush()
 92 | }
 93 | 
 94 | func writeTemplate(w io.Writer, tmpltext,amap string, hdrs, cells []string) error {
 95 | 	// logic flow 
 96 | 	// 1. create a map using the hdrs as keys and cells as values
 97 | 	// 2. apply the map to the template
 98 | 	// 3. write it out
 99 | 
100 | 	m := make(map[string]string)
101 | 	for i := range hdrs {
102 | 		m[hdrs[i]] = cells [i]
103 | 	}
104 | 
105 | 	t := template.Must(template.New("").Parse(tmpltext))
106 | 	err := t.Execute(w, map[string]interface{}{amap: m})
107 | 	if err != nil {
108 | 		log.Fatal("Template Execute() error:"+err.Error())
109 | 	}
110 | 	return nil
111 | }
112 | 
113 | func usage(msg string) {
114 | 	fmt.Println(msg + "\n")
115 | 	flag.PrintDefaults()
116 | 	os.Exit(0)
117 | }
118 | 


--------------------------------------------------------------------------------