├── .gitignore
├── .goreleaser.yml
├── .travis.yml
├── Dockerfile
├── LICENSE
├── Makefile
├── README.md
├── _config.yml
├── benchmark
    ├── README.md
    └── data-table.r
├── cmd
    ├── config.go
    ├── config_test.go
    ├── formatter.go
    ├── formatter_test.go
    ├── root.go
    ├── root_test.go
    └── version.go
├── codecov.yml
├── docker-push.sh
├── examples
    ├── base-small.csv
    ├── delta-small.csv
    ├── lazy_quotes.csv
    ├── lazy_quotes_delta.csv
    ├── no_comma.csv
    └── no_comma_delta.csv
├── go.mod
├── go.sum
├── install
├── install.sh
├── main.go
├── pkg
    └── digest
    │   ├── config.go
    │   ├── diff.go
    │   ├── diff_test.go
    │   ├── digest.go
    │   ├── digest_benchmark_test.go
    │   ├── digest_test.go
    │   ├── engine.go
    │   ├── engine_test.go
    │   ├── file_digest.go
    │   ├── file_digest_test.go
    │   ├── positions.go
    │   ├── positions_test.go
    │   ├── utils.go
    │   └── utils_test.go
└── release.sh


/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Created by https://www.gitignore.io/api/go
 3 | 
 4 | ### Go ###
 5 | # Binaries for programs and plugins
 6 | *.exe
 7 | *.exe~
 8 | *.dll
 9 | *.so
10 | *.dylib
11 | 
12 | # Test binary, build with `go test -c`
13 | *.test
14 | 
15 | # Output of the go coverage tool, specifically when used with LiteIDE
16 | *.out
17 | 
18 | .idea/*
19 | 
20 | out/
21 | 
22 | # End of https://www.gitignore.io/api/go
23 | 
24 | vendor/
25 | coverage.txt
26 | 
27 | majestic_million*.csv
28 | 
29 | # Output binary
30 | csvdiff


--------------------------------------------------------------------------------
/.goreleaser.yml:
--------------------------------------------------------------------------------
 1 | project_name: csvdiff
 2 | 
 3 | release:
 4 |   github:
 5 |     owner: aswinkarthik
 6 |     name: csvdiff
 7 | 
 8 | builds:
 9 |   - main: ./main.go
10 |     binary: csvdiff
11 |     goos:
12 |       - windows
13 |       - darwin
14 |       - linux
15 |     goarch:
16 |       - amd64
17 | nfpms:
18 |   - file_name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
19 |     replacements:
20 |       amd64: 64-bit
21 |       386: 32-bit
22 |       darwin: macOS
23 |       linux: linux
24 |     vendor: aswinkarthik
25 |     homepage: https://github.com/aswinkarthik/csvdiff
26 |     maintainer: aswinkarthik
27 |     description: A Blazingly fast diff tool for comparing csv files.
28 |     license: MIT
29 |     formats:
30 |       - deb
31 |       - rpm


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | 
 3 | addons:
 4 |   apt:
 5 |     packages:
 6 |       - rpm
 7 | 
 8 | go:
 9 |   - 1.x
10 | 
11 | env:
12 |   global:
13 |     - GO111MODULE=on
14 |     - GORELEASER_ON=1
15 | 
16 | gobuild_args: -ldflags "-X main.version=${TRAVIS_TAG}"
17 | 
18 | script:
19 |   - make lint test
20 |   - GOOS=linux go build
21 | 
22 | after_success:
23 |   - curl -sL https://codecov.io/bash | bash
24 | 
25 | deploy:
26 |   - provider: script
27 |     script: curl -sL https://git.io/goreleaser | bash
28 |     skip_cleanup: true
29 |     on:
30 |       branch: master
31 |       tags: true
32 |       repo: aswinkarthik/csvdiff
33 |       condition: $GORELEASER_ON = 1
34 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM alpine
2 | 
3 | COPY csvdiff /csvdiff
4 | 
5 | CMD /csvdiff
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright © 2018 aswinkarthik
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | -include .env
 2 | 
 3 | VERSION := $(shell git describe --tags)
 4 | BUILD := $(shell git rev-parse --short HEAD)
 5 | PROJECTNAME := $(shell basename "$(PWD)")
 6 | 
 7 | # Go related variables.
 8 | GOBASE := $(shell pwd)
 9 | GOPATH := $(GOBASE)/vendor:$(GOBASE)
10 | GOBIN := $(GOBASE)/out
11 | GOFILES := $(wildcard *.go)
12 | 
13 | # Use linker flags to provide version/build settings
14 | LDFLAGS=-ldflags "-X=main.Version=$(VERSION) -X=main.Build=$(BUILD)"
15 | 
16 | # Make is verbose in Linux. Make it silent.
17 | MAKEFLAGS += --silent
18 | 
19 | ## install: Install missing dependencies. Runs `go get` internally. e.g; make install get=github.com/foo/bar
20 | install: go-get
21 | 
22 | ## lint: Lint the codebase using golangci-lint
23 | lint:
24 | ifeq (,$(wildcard ./out/golangci-lint))
25 | 	curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(GOBIN)
26 | endif
27 | 	$(GOBIN)/golangci-lint  run -v ./...
28 | 
29 | ## test: Run all tests
30 | test: go-test
31 | 
32 | ## compie: Compile the binary.
33 | compile:
34 | 	@-$(MAKE) -s go-compile
35 | 
36 | ## exec: Run given command, wrapped with custom GOPATH. e.g; make exec run="go test ./..."
37 | exec:
38 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) $(run)
39 | 
40 | ## clean: Clean build files. Runs `go clean` internally.
41 | clean:
42 | 	@-rm $(GOBIN)/$(PROJECTNAME) 2> /dev/null
43 | 	@-$(MAKE) go-clean
44 | 
45 | go-compile: go-get go-build
46 | 
47 | go-build:
48 | 	@echo "  >  Building binary..."
49 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) go build $(LDFLAGS) -o $(GOBIN)/$(PROJECTNAME) $(GOFILES)
50 | 
51 | go-generate:
52 | 	@echo "  >  Generating dependency files..."
53 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) go generate $(generate)
54 | 
55 | go-get:
56 | 	@echo "  >  Checking if there is any missing dependencies..."
57 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) go get $(get)
58 | 
59 | go-install:
60 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) go mod tidy
61 | 
62 | go-vendor:
63 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) go mod vendor
64 | 
65 | go-test:
66 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) go test -race -coverprofile=coverage.txt -covermode=atomic -v ./...
67 | 
68 | richgo-test:
69 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) richgo test -v ./...
70 | 
71 | go-clean:
72 | 	@echo "  >  Cleaning build cache"
73 | 	@GOPATH=$(GOPATH) GOBIN=$(GOBIN) go clean
74 | 
75 | .PHONY: help
76 | all: help
77 | help: Makefile
78 | 	@echo
79 | 	@echo " Choose a command run in "$(PROJECTNAME)":"
80 | 	@echo
81 | 	@sed -n 's/^##//p' $< | column -t -s ':' |  sed -e 's/^/ /'
82 | 	@echo
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # csvdiff
  2 | 
  3 | [![Build Status](https://travis-ci.org/aswinkarthik/csvdiff.svg?branch=master)](https://travis-ci.org/aswinkarthik/csvdiff)
  4 | [![Go Doc](https://godoc.org/github.com/aswinkarthik/csvdiff?status.svg)](https://godoc.org/github.com/aswinkarthik/csvdiff)
  5 | [![Go Report Card](https://goreportcard.com/badge/github.com/aswinkarthik/csvdiff)](https://goreportcard.com/report/github.com/aswinkarthik/csvdiff)
  6 | [![codecov](https://codecov.io/gh/aswinkarthik/csvdiff/branch/master/graph/badge.svg)](https://codecov.io/gh/aswinkarthik/csvdiff)
  7 | [![Downloads](https://img.shields.io/github/downloads/aswinkarthik/csvdiff/total.svg)](https://github.com/aswinkarthik/csvdiff/releases)
  8 | [![Latest release](https://img.shields.io/github/release/aswinkarthik/csvdiff.svg)](https://github.com/aswinkarthik/csvdiff/releases)
  9 | 
 10 | A fast diff tool for comparing csv files.
 11 | 
 12 | ## What is csvdiff?
 13 | 
 14 | Csvdiff is a difftool to compute changes between two csv files.
 15 | 
 16 | - It is not a traditional diff tool. It is **most suitable** for comparing csv files dumped from **database tables**. GNU diff tool is orders of magnitude faster on comparing line by line.
 17 | - Supports selective comparison of fields in a row.
 18 | - Supports specifying group of columns as primary-key i.e uniquely identify a row.
 19 | - Support ignoring columns e.g ignore columns like `created_at` timestamps.
 20 | - Compares csvs of million records csv in under 2 seconds.
 21 | - Supports lot of output formats e.g colored git style output or JSON for post-processing.
 22 | 
 23 | ## Why?
 24 | 
 25 | I wanted to compare if the rows of a table before and after a given time and see what is the new changes that came in. Also, I wanted to selectively compare columns ignoring columns like `created_at` and `updated_at`. All I had was just the dumped csv files.
 26 | 
 27 | ## Demo
 28 | 
 29 | [![asciicast](https://asciinema.org/a/YNO5G0b2qL92MZWmb2IeiXveN.svg)](https://asciinema.org/a/YNO5G0b2qL92MZWmb2IeiXveN?speed=2&autoplay=1&size=medium&rows=20&cols=150)
 30 | 
 31 | ## Usage
 32 | 
 33 | ```diff
 34 | $ csvdiff base.csv delta.csv
 35 | # Additions (1)
 36 | + 24564,907,completely-newsite.com,com,19827,32902,completely-newsite.com,com,1621,909,19787,32822
 37 | # Modifications (1)
 38 | - 69,48,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491
 39 | + 69,1048,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491
 40 | # Deletions (1)
 41 | - 1618,907,deleted-website.com,com,19827,32902,deleted-website.com,com,1621,909,19787,32822
 42 | ```
 43 | 
 44 | 
 45 | ```bash
 46 | Differentiates two csv files and finds out the additions and modifications.
 47 | Most suitable for csv files created from database tables
 48 | 
 49 | Usage:
 50 |   csvdiff <base-csv> <delta-csv> [flags]
 51 | 
 52 | Flags:
 53 |       --columns ints          Selectively compare positions in CSV Eg: 1,2. Default is entire row
 54 |   -o, --format string         Available (rowmark|json|legacy-json|diff|word-diff|color-words) (default "diff")
 55 |   -h, --help                  help for csvdiff
 56 |       --ignore-columns ints   Inverse of --columns flag. This cannot be used if --columns are specified
 57 |       --include ints          Include positions in CSV to display Eg: 1,2. Default is entire row
 58 |   -p, --primary-key ints      Primary key positions of the Input CSV as comma separated values Eg: 1,2 (default [0])
 59 |   -s, --separator string      use specific separator (\t, or any one character string) (default ",")
 60 |       --time                  Measure time
 61 |   -t, --toggle                Help message for toggle
 62 |       --version               version for csvdiff
 63 | ```
 64 | 
 65 | ## Installation
 66 | 
 67 | ### Homebrew
 68 | 
 69 | ```bash
 70 | brew tap thecasualcoder/stable
 71 | brew install csvdiff
 72 | ```
 73 | 
 74 | ### Using binaries
 75 | 
 76 | ```bash
 77 | # binary will be $GOPATH/bin/csvdiff
 78 | curl -sfL https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s -- -b $GOPATH/bin
 79 | 
 80 | # or install it into ./bin/
 81 | curl -sfL https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s
 82 | 
 83 | # In alpine linux (as it does not come with curl by default)
 84 | wget -O - -q https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s
 85 | ```
 86 | 
 87 | ### Using source code
 88 | 
 89 | ```bash
 90 | go get -u github.com/aswinkarthik/csvdiff
 91 | ```
 92 | 
 93 | ## Use case
 94 | 
 95 | - Cases where you have a base database dump as csv. If you receive the changes as another database dump as csv, this tool can be used to figure out what are the additions and modifications to the original database dump. The `additions.csv` can be used to create an `insert.sql` and with the `modifications.csv` an `update.sql` data migration.
 96 | - The delta file can either contain just the changes or the entire table dump along with the changes.
 97 | 
 98 | ## Supported
 99 | 
100 | - Additions
101 | - Modifications
102 | - Deletions
103 | - Non comma separators
104 | 
105 | ## Not Supported
106 | 
107 | - Cannot be used as a generic difftool. Requires a column to be used as a primary key from the csv.
108 | 
109 | ## Formats
110 | 
111 | There are a number of formats supported
112 | 
113 | - `diff`: Git's diff style
114 | - `word-diff`: Git's --word-diff style 
115 | - `color-words`: Git's --color-words style
116 | - `json`: JSON serialization of result
117 | - `legacy-json`: JSON serialization of result in old format
118 | - `rowmark`: Marks each row with ADDED or MODIFIED status.
119 | 
120 | ## Miscellaneous features
121 | 
122 | - The `--primary-key` in an integer array. Specify comma separated positions if the table has a compound key. Using this primary key, it can figure out modifications. If the primary key changes, it is an addition.
123 | 
124 | ```bash
125 | % csvdiff base.csv delta.csv --primary-key 0,1
126 | ```
127 | 
128 | - If you want to compare only few columns in the csv when computing hash,
129 | 
130 | ```bash
131 | % csvdiff base.csv delta.csv --primary-key 0,1 --columns 2
132 | ```
133 | 
134 | - Supports JSON format for post processing
135 | 
136 | ```bash
137 | % csvdiff examples/base-small.csv examples/delta-small.csv --format json | jq '.'
138 | {
139 |   "Additions": [
140 |     "24564,907,completely-newsite.com,com,19827,32902,completely-newsite.com,com,1621,909,19787,32822"
141 |   ],
142 |   "Modifications": [{
143 |     "Original": "69,1048,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491",
144 |     "Current":  "69,1049,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491"
145 |   }],
146 |   "Deletions": [
147 |     "1615,905,deleted-website.com,com,19833,33110,deleted-website.com,com,1613,902,19835,33135"
148 |   ]
149 | }
150 | ```
151 | 
152 | ## Build locally
153 | 
154 | ```bash
155 | $ git clone https://github.com/aswinkarthik/csvdiff
156 | $ go get ./...
157 | $ go build
158 | 
159 | # To run tests
160 | $ go get github.com/stretchr/testify/assert
161 | $ go test -v ./...
162 | ```
163 | 
164 | ## Algorithm
165 | 
166 | - Creates a map of <uint64, uint64> for both base and delta file
167 |   - `key` is a hash of the primary key values as csv
168 |   - `value` is a hash of the entire row
169 | - Two maps as initial processing output
170 |   - base-map
171 |   - delta-map
172 | - The delta map is compared with the base map. As long as primary key is unchanged, they row will have same `key`. An entry in delta map is a
173 |   - **Addition**, if the base-map's does not have a `value`.
174 |   - **Modification**, if the base-map's `value` is different.
175 |   - **Deletions**, if the base-map has no match on the delta map.
176 | 
177 | ## Credits
178 | 
179 | - Uses 64 bit [xxHash](https://cyan4973.github.io/xxHash/) algorithm, an extremely fast non-cryptographic hash algorithm, for creating the hash. Implementations from [cespare](https://github.com/cespare/xxhash)
180 | - Used [Majestic million](https://blog.majestic.com/development/majestic-million-csv-daily/) data for demo.
181 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
 1 | # Comparison with other tools
 2 | 
 3 | ## Setup
 4 | 
 5 | * Using the majestic million data. (Source in credits section)
 6 | * Both files have 998390 rows and 12 columns.
 7 | * Only one modification between both files.
 8 | * Ran on Processor: Intel Core i7 2.5 GHz 4 cores 16 GB RAM
 9 | 
10 | ## Baseline
11 | 
12 | 0. csvdiff (this tool) : *0m1.159s*
13 | 
14 | ```bash
15 |  time csvdiff majestic_million.csv majestic_million_diff.csv
16 | Additions 0
17 | Modifications 1
18 | ...
19 | 
20 | real	0m1.159s
21 | user	0m2.167s
22 | sys		0m0.222s
23 | ```
24 | 
25 | ## Other tools
26 | 
27 | 1. [data.table](https://github.com/Rdatatable/data.table) : *0m4.284s*
28 | 
29 | 	* Join both csvs using `id` column.
30 | 	* Check inequality between both columns
31 | 	* Rscript in [data-table.r](/benchmark/data-table.r) (Can it be written better? New to R)
32 | 
33 | ```bash
34 | time Rscript data-table.r
35 | 
36 | real	0m4.284s
37 | user	0m3.887s
38 | sys	0m0.284s
39 | ```
40 | 
41 | 2. [csvdiff](https://pypi.org/project/csvdiff/) written in Python : *0m48.115s*
42 | 
43 | ```bash
44 | time csvdiff --style=summary id majestic_million.csv majestic_million_diff.csv
45 | 0 rows removed (0.0%)
46 | 0 rows added (0.0%)
47 | 1 rows changed (0.0%)
48 | 
49 | real	0m48.115s
50 | user	0m42.895s
51 | sys	0m3.948s
52 | ```
53 | 
54 | 3. GNU diff (Fastest) : *0m0.297s*
55 | 
56 | 	* Seems the fastest. Couldn't even come close here.
57 | 	* However, it does line by line diff. Does not support compound keys of a csv or selective compare of columns. Hence the disclaimer, cannot be used a generic diff tool.
58 | 	* On another note, lets see if we can reach this.
59 | 
60 | ```bash
61 | time diff majestic_million.csv majestic_million_diff.csv
62 | 
63 | real	0m0.297s
64 | user	0m0.144s
65 | sys	0m0.147s
66 | ```
67 | 
68 | ## Go Benchmark Results
69 | 
70 | Benchmark test can be found [here](https://github.com/aswinkarthik/csvdiff/blob/master/pkg/digest/digest_benchmark_test.go).
71 | 
72 | ```bash
73 | $ cd ./pkg/digest
74 | $ go test -bench=. -v -benchmem -benchtime=5s -cover
75 | ```
76 | 
77 | ```
78 | BenchmarkCreate1-8          	  200000	     31794 ns/op	  116163 B/op	      24 allocs/op
79 | BenchmarkCreate10-8         	  200000	     43351 ns/op	  119993 B/op	      79 allocs/op
80 | BenchmarkCreate100-8        	   50000	    142645 ns/op	  160577 B/op	     634 allocs/op
81 | BenchmarkCreate1000-8       	   10000	    907308 ns/op	  621694 B/op	    6085 allocs/op
82 | BenchmarkCreate10000-8      	    1000	   7998083 ns/op	 5117977 B/op	   60345 allocs/op
83 | BenchmarkCreate100000-8     	     100	  81260585 ns/op	49106849 B/op	  604563 allocs/op
84 | BenchmarkCreate1000000-8    	      10	 788485738 ns/op	520115434 B/op	 6042650 allocs/op
85 | BenchmarkCreate10000000-8   	       1	7878009695 ns/op	5029061632 B/op	60346535 allocs/op
86 | ```


--------------------------------------------------------------------------------
/benchmark/data-table.r:
--------------------------------------------------------------------------------
 1 | library(data.table)
 2 | 
 3 | csv1 = fread('majestic_million.csv')
 4 | csv2 = fread('majestic_million_diff.csv')
 5 | 
 6 | setkey(csv1,id)
 7 | setkey(csv2,id)
 8 | 
 9 | result <- merge(csv2, csv1, all.x=TRUE)
10 | 
11 | diff <- result[result$"col-1.x" != result$"col-1.y" | result$"col-2.x" != result$"col-2.y" | result$"col-3.x" != result$"col-3.y" | result$"col-4.x" != result$"col-4.y" | result$"col-5.x" != result$"col-5.y" | result$"col-6.x" != result$"col-6.y" | result$"col-7.x" != result$"col-7.y" | result$"col-8.x" != result$"col-8.y" | result$"col-9.x" != result$"col-9.y" | result$"col-10.x" != result$"col-10.y" | result$"col-11.x" != result$"col-11.y"]
12 | 
13 | diff
14 | 


--------------------------------------------------------------------------------
/cmd/config.go:
--------------------------------------------------------------------------------
  1 | package cmd
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"strings"
  8 | 
  9 | 	"github.com/spf13/afero"
 10 | 
 11 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
 12 | )
 13 | 
 14 | // Context is to store all command line Flags.
 15 | type Context struct {
 16 | 	fs                     afero.Fs
 17 | 	primaryKeyPositions    []int
 18 | 	valueColumnPositions   []int
 19 | 	includeColumnPositions []int
 20 | 	format                 string
 21 | 	baseFilename           string
 22 | 	deltaFilename          string
 23 | 	baseFile               afero.File
 24 | 	deltaFile              afero.File
 25 | 	recordCount            int
 26 | 	separator              rune
 27 | 	lazyQuotes             bool
 28 | }
 29 | 
 30 | // NewContext can take all CLI flags and create a cmd.Context
 31 | // Validations are done as part of this.
 32 | // File pointers are created too.
 33 | func NewContext(
 34 | 	fs afero.Fs,
 35 | 	primaryKeyPositions []int,
 36 | 	valueColumnPositions []int,
 37 | 	ignoreValueColumnPositions []int,
 38 | 	includeColumnPositions []int,
 39 | 	format string,
 40 | 	baseFilename string,
 41 | 	deltaFilename string,
 42 | 	separator rune,
 43 | 	lazyQuotes bool,
 44 | ) (*Context, error) {
 45 | 	baseRecordCount, err := getColumnsCount(fs, baseFilename, separator, lazyQuotes)
 46 | 	if err != nil {
 47 | 		return nil, fmt.Errorf("error in base-file: %v", err)
 48 | 	}
 49 | 
 50 | 	deltaRecordCount, err := getColumnsCount(fs, deltaFilename, separator, lazyQuotes)
 51 | 	if err != nil {
 52 | 		return nil, fmt.Errorf("error in delta-file: %v", err)
 53 | 	}
 54 | 
 55 | 	if baseRecordCount != deltaRecordCount {
 56 | 		return nil, fmt.Errorf("base-file and delta-file columns count do not match")
 57 | 	}
 58 | 
 59 | 	if len(ignoreValueColumnPositions) > 0 && len(valueColumnPositions) > 0 {
 60 | 		return nil, fmt.Errorf("only one of --columns or --ignore-columns")
 61 | 	}
 62 | 	if len(ignoreValueColumnPositions) > 0 {
 63 | 		valueColumnPositions = inferValueColumns(baseRecordCount, ignoreValueColumnPositions)
 64 | 	}
 65 | 
 66 | 	baseFile, err := fs.Open(baseFilename)
 67 | 	if err != nil {
 68 | 		return nil, err
 69 | 	}
 70 | 	deltaFile, err := fs.Open(deltaFilename)
 71 | 	if err != nil {
 72 | 		return nil, err
 73 | 	}
 74 | 	ctx := &Context{
 75 | 		fs:                     fs,
 76 | 		primaryKeyPositions:    primaryKeyPositions,
 77 | 		valueColumnPositions:   valueColumnPositions,
 78 | 		includeColumnPositions: includeColumnPositions,
 79 | 		format:                 format,
 80 | 		baseFilename:           baseFilename,
 81 | 		deltaFilename:          deltaFilename,
 82 | 		baseFile:               baseFile,
 83 | 		deltaFile:              deltaFile,
 84 | 		recordCount:            baseRecordCount,
 85 | 		separator:              separator,
 86 | 		lazyQuotes:             lazyQuotes,
 87 | 	}
 88 | 
 89 | 	if err := ctx.validate(); err != nil {
 90 | 		return nil, fmt.Errorf("validation failed: %v", err)
 91 | 	}
 92 | 
 93 | 	return ctx, nil
 94 | }
 95 | 
 96 | // GetPrimaryKeys is to return the --primary-key flags as digest.Positions array.
 97 | func (c *Context) GetPrimaryKeys() digest.Positions {
 98 | 	if len(c.primaryKeyPositions) > 0 {
 99 | 		return c.primaryKeyPositions
100 | 	}
101 | 	return []int{0}
102 | }
103 | 
104 | // GetValueColumns is to return the --columns flags as digest.Positions array.
105 | func (c *Context) GetValueColumns() digest.Positions {
106 | 	if len(c.valueColumnPositions) > 0 {
107 | 		return c.valueColumnPositions
108 | 	}
109 | 	return []int{}
110 | }
111 | 
112 | // GetIncludeColumnPositions is to return the --include flags as digest.Positions array.
113 | // If empty, it is value columns
114 | func (c Context) GetIncludeColumnPositions() digest.Positions {
115 | 	if len(c.includeColumnPositions) > 0 {
116 | 		return c.includeColumnPositions
117 | 	}
118 | 	return c.GetValueColumns()
119 | }
120 | 
121 | // validate validates the context object
122 | // and returns error if not valid.
123 | func (c *Context) validate() error {
124 | 	{
125 | 		// format validation
126 | 
127 | 		formatFound := false
128 | 		for _, format := range allFormats {
129 | 			if strings.ToLower(c.format) == format {
130 | 				formatFound = true
131 | 			}
132 | 		}
133 | 		if !formatFound {
134 | 			return fmt.Errorf("specified format is not valid")
135 | 		}
136 | 	}
137 | 
138 | 	{
139 | 		comparator := func(element int) bool {
140 | 			return element < c.recordCount
141 | 		}
142 | 
143 | 		if !assertAll(c.primaryKeyPositions, comparator) {
144 | 			return fmt.Errorf("--primary-key positions are out of bounds")
145 | 		}
146 | 		if !assertAll(c.includeColumnPositions, comparator) {
147 | 			return fmt.Errorf("--include positions are out of bounds")
148 | 		}
149 | 		if !assertAll(c.valueColumnPositions, comparator) {
150 | 			return fmt.Errorf("--columns positions are out of bounds")
151 | 		}
152 | 	}
153 | 
154 | 	return nil
155 | }
156 | 
157 | func inferValueColumns(recordCount int, ignoreValueColumns []int) digest.Positions {
158 | 	lookupMap := make(map[int]struct{})
159 | 	for _, pos := range ignoreValueColumns {
160 | 		lookupMap[pos] = struct{}{}
161 | 	}
162 | 
163 | 	valueColumns := make(digest.Positions, 0)
164 | 	if len(ignoreValueColumns) > 0 {
165 | 		for i := 0; i < recordCount; i++ {
166 | 			if _, exists := lookupMap[i]; !exists {
167 | 				valueColumns = append(valueColumns, i)
168 | 			}
169 | 		}
170 | 	}
171 | 
172 | 	return valueColumns
173 | }
174 | 
175 | func assertAll(elements []int, assertFn func(element int) bool) bool {
176 | 	for _, el := range elements {
177 | 		if !assertFn(el) {
178 | 			return false
179 | 		}
180 | 	}
181 | 	return true
182 | }
183 | 
184 | func getColumnsCount(fs afero.Fs, filename string, separator rune, lazyQuotes bool) (int, error) {
185 | 	base, err := fs.Open(filename)
186 | 	if err != nil {
187 | 		return 0, err
188 | 	}
189 | 	defer base.Close()
190 | 	csvReader := csv.NewReader(base)
191 | 	csvReader.Comma = separator
192 | 	csvReader.LazyQuotes = lazyQuotes
193 | 	record, err := csvReader.Read()
194 | 	if err != nil {
195 | 		if err == io.EOF {
196 | 			return 0, fmt.Errorf("unable to process headers from csv file. EOF reached. invalid CSV file")
197 | 		}
198 | 		return 0, err
199 | 	}
200 | 
201 | 	return len(record), nil
202 | }
203 | 
204 | // BaseDigestConfig creates a digest.Context from cmd.Context
205 | // that is needed to start the diff process
206 | func (c *Context) BaseDigestConfig() (digest.Config, error) {
207 | 	return digest.Config{
208 | 		Reader:     c.baseFile,
209 | 		Value:      c.valueColumnPositions,
210 | 		Key:        c.primaryKeyPositions,
211 | 		Include:    c.includeColumnPositions,
212 | 		Separator:  c.separator,
213 | 		LazyQuotes: c.lazyQuotes,
214 | 	}, nil
215 | }
216 | 
217 | // DeltaDigestConfig creates a digest.Context from cmd.Context
218 | // that is needed to start the diff process
219 | func (c *Context) DeltaDigestConfig() (digest.Config, error) {
220 | 	return digest.Config{
221 | 		Reader:     c.deltaFile,
222 | 		Value:      c.valueColumnPositions,
223 | 		Key:        c.primaryKeyPositions,
224 | 		Include:    c.includeColumnPositions,
225 | 		Separator:  c.separator,
226 | 		LazyQuotes: c.lazyQuotes,
227 | 	}, nil
228 | }
229 | 
230 | // Close all file handles
231 | func (c *Context) Close() {
232 | 	if c.baseFile != nil {
233 | 		_ = c.baseFile.Close()
234 | 	}
235 | 	if c.deltaFile != nil {
236 | 		_ = c.deltaFile.Close()
237 | 	}
238 | }
239 | 


--------------------------------------------------------------------------------
/cmd/config_test.go:
--------------------------------------------------------------------------------
  1 | package cmd_test
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/aswinkarthik/csvdiff/cmd"
  8 | 	"github.com/spf13/afero"
  9 | 
 10 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
 11 | 	"github.com/stretchr/testify/assert"
 12 | )
 13 | 
 14 | func TestPrimaryKeyPositions(t *testing.T) {
 15 | 	type testCase struct {
 16 | 		name string
 17 | 		in   []int
 18 | 		out  digest.Positions
 19 | 	}
 20 | 	testCases := []testCase{
 21 | 		{
 22 | 			name: "should return primary key columns",
 23 | 			in:   []int{0, 1},
 24 | 			out:  []int{0, 1},
 25 | 		},
 26 | 		{
 27 | 			name: "should return primary key columns as default input is empty",
 28 | 			in:   []int{},
 29 | 			out:  []int{0},
 30 | 		},
 31 | 		{
 32 | 			name: "should return primary key columns as default input is nil",
 33 | 			in:   []int{},
 34 | 			out:  []int{0},
 35 | 		},
 36 | 	}
 37 | 	for _, tt := range testCases {
 38 | 		t.Run(tt.name, func(t *testing.T) {
 39 | 			fs := afero.NewMemMapFs()
 40 | 			setupFiles(t, fs)
 41 | 			ctx, err := cmd.NewContext(fs,
 42 | 				tt.in,
 43 | 				nil,
 44 | 				nil,
 45 | 				nil,
 46 | 				"json",
 47 | 				"/base.csv",
 48 | 				"/delta.csv",
 49 | 				',',
 50 | 				false,
 51 | 			)
 52 | 			assert.NoError(t, err)
 53 | 			assert.Equal(t, tt.out, ctx.GetPrimaryKeys())
 54 | 
 55 | 		})
 56 | 	}
 57 | }
 58 | 
 59 | func TestValueColumnPositions(t *testing.T) {
 60 | 	type testCase struct {
 61 | 		name string
 62 | 		in   []int
 63 | 		out  digest.Positions
 64 | 	}
 65 | 	testCases := []testCase{
 66 | 		{
 67 | 			name: "should return value columns",
 68 | 			in:   []int{0, 1},
 69 | 			out:  []int{0, 1},
 70 | 		},
 71 | 		{
 72 | 			name: "should return value columns as empty if input is empty",
 73 | 			in:   []int{},
 74 | 			out:  []int{},
 75 | 		},
 76 | 		{
 77 | 			name: "should return value columns as empty if input is nil",
 78 | 			in:   []int{},
 79 | 			out:  []int{},
 80 | 		},
 81 | 	}
 82 | 	for _, tt := range testCases {
 83 | 		t.Run(tt.name, func(t *testing.T) {
 84 | 			fs := afero.NewMemMapFs()
 85 | 			setupFiles(t, fs)
 86 | 			ctx, err := cmd.NewContext(fs,
 87 | 				nil,
 88 | 				tt.in,
 89 | 				nil,
 90 | 				nil,
 91 | 				"json",
 92 | 				"/base.csv",
 93 | 				"/delta.csv",
 94 | 				',',
 95 | 				false,
 96 | 			)
 97 | 			assert.NoError(t, err)
 98 | 			assert.Equal(t, tt.out, ctx.GetValueColumns())
 99 | 
100 | 		})
101 | 	}
102 | }
103 | 
104 | func TestNewContext(t *testing.T) {
105 | 
106 | 	t.Run("should validate format", func(t *testing.T) {
107 | 		fs := afero.NewMemMapFs()
108 | 
109 | 		setupFiles(t, fs)
110 | 
111 | 		t.Run("empty format", func(t *testing.T) {
112 | 			_, err := cmd.NewContext(
113 | 				fs,
114 | 				nil,
115 | 				nil,
116 | 				nil,
117 | 				nil,
118 | 				"",
119 | 				"/base.csv",
120 | 				"/delta.csv",
121 | 				',',
122 | 				false,
123 | 			)
124 | 
125 | 			assert.EqualError(t, err, "validation failed: specified format is not valid")
126 | 		})
127 | 
128 | 		t.Run("valid format", func(t *testing.T) {
129 | 			_, err := cmd.NewContext(
130 | 				fs,
131 | 				nil,
132 | 				nil,
133 | 				nil,
134 | 				nil,
135 | 				"rowmark",
136 | 				"/base.csv",
137 | 				"/delta.csv",
138 | 				',',
139 | 				false,
140 | 			)
141 | 
142 | 			assert.NoError(t, err)
143 | 		})
144 | 
145 | 		t.Run("case-insensitive valid format", func(t *testing.T) {
146 | 			_, err := cmd.NewContext(
147 | 				fs,
148 | 				nil,
149 | 				nil,
150 | 				nil,
151 | 				nil,
152 | 				"jSOn",
153 | 				"/base.csv",
154 | 				"/delta.csv",
155 | 				',',
156 | 				false,
157 | 			)
158 | 
159 | 			assert.NoError(t, err)
160 | 		})
161 | 
162 | 	})
163 | 
164 | 	t.Run("should validate base file existence", func(t *testing.T) {
165 | 		fs := afero.NewMemMapFs()
166 | 		_, err := cmd.NewContext(
167 | 			fs,
168 | 			nil,
169 | 			nil,
170 | 			nil,
171 | 			nil,
172 | 			"json",
173 | 			"/base.csv",
174 | 			"/delta.csv",
175 | 			',',
176 | 			false,
177 | 		)
178 | 		assert.EqualError(t, err, "error in base-file: open "+string(os.PathSeparator)+"base.csv: file does not exist")
179 | 	})
180 | 
181 | 	t.Run("should validate if base file is a csv file", func(t *testing.T) {
182 | 		fs := afero.NewMemMapFs()
183 | 		{
184 | 			err := fs.Mkdir("/base.csv", os.ModePerm)
185 | 			assert.NoError(t, err)
186 | 		}
187 | 
188 | 		_, err := cmd.NewContext(
189 | 			fs,
190 | 			nil,
191 | 			nil,
192 | 			nil,
193 | 			nil,
194 | 			"json",
195 | 			"/base.csv",
196 | 			"/delta.csv",
197 | 			',',
198 | 			false,
199 | 		)
200 | 		assert.EqualError(t, err, "error in base-file: unable to process headers from csv file. EOF reached. invalid CSV file")
201 | 	})
202 | 	t.Run("should validate if delta file is a csv file", func(t *testing.T) {
203 | 		fs := afero.NewMemMapFs()
204 | 		{
205 | 			assert.NoError(t, afero.WriteFile(fs, "/base.csv", []byte("id"), os.ModePerm))
206 | 			err := fs.Mkdir("/delta.csv", os.ModePerm)
207 | 			assert.NoError(t, err)
208 | 		}
209 | 
210 | 		_, err := cmd.NewContext(
211 | 			fs,
212 | 			nil,
213 | 			nil,
214 | 			nil,
215 | 			nil,
216 | 			"json",
217 | 			"/base.csv",
218 | 			"/delta.csv",
219 | 			',',
220 | 			false,
221 | 		)
222 | 		assert.EqualError(t, err, "error in delta-file: unable to process headers from csv file. EOF reached. invalid CSV file")
223 | 	})
224 | 
225 | 	t.Run("should validate if both base and delta file exist", func(t *testing.T) {
226 | 		fs := afero.NewMemMapFs()
227 | 		setupFiles(t, fs)
228 | 
229 | 		_, err := cmd.NewContext(
230 | 			fs,
231 | 			nil,
232 | 			nil,
233 | 			nil,
234 | 			nil,
235 | 			"json",
236 | 			"/base.csv",
237 | 			"/delta.csv",
238 | 			',',
239 | 			false,
240 | 		)
241 | 		assert.NoError(t, err)
242 | 	})
243 | 
244 | 	t.Run("should validate if positions are within the limits of the csv file", func(t *testing.T) {
245 | 		fs := afero.NewMemMapFs()
246 | 		{
247 | 			baseContent := []byte("id,name,age,desc")
248 | 			err := afero.WriteFile(fs, "/base.csv", baseContent, os.ModePerm)
249 | 			assert.NoError(t, err)
250 | 		}
251 | 		{
252 | 			deltaContent := []byte("id,name,age,desc")
253 | 			err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm)
254 | 			assert.NoError(t, err)
255 | 		}
256 | 
257 | 		t.Run("primary key positions", func(t *testing.T) {
258 | 			_, err := cmd.NewContext(
259 | 				fs,
260 | 				[]int{4},
261 | 				nil,
262 | 				nil,
263 | 				nil,
264 | 				"json",
265 | 				"/base.csv",
266 | 				"/delta.csv",
267 | 				',',
268 | 				false,
269 | 			)
270 | 
271 | 			assert.EqualError(t, err, "validation failed: --primary-key positions are out of bounds")
272 | 		})
273 | 
274 | 		t.Run("include positions", func(t *testing.T) {
275 | 			_, err := cmd.NewContext(
276 | 				fs,
277 | 				nil,
278 | 				nil,
279 | 				nil,
280 | 				[]int{4},
281 | 				"json",
282 | 				"/base.csv",
283 | 				"/delta.csv",
284 | 				',',
285 | 				false,
286 | 			)
287 | 
288 | 			assert.EqualError(t, err, "validation failed: --include positions are out of bounds")
289 | 		})
290 | 
291 | 		t.Run("value positions", func(t *testing.T) {
292 | 			_, err := cmd.NewContext(
293 | 				fs,
294 | 				nil,
295 | 				[]int{4},
296 | 				nil,
297 | 				nil,
298 | 				"json",
299 | 				"/base.csv",
300 | 				"/delta.csv",
301 | 				',',
302 | 				false,
303 | 			)
304 | 
305 | 			assert.EqualError(t, err, "validation failed: --columns positions are out of bounds")
306 | 		})
307 | 
308 | 		t.Run("inequal base and delta files", func(t *testing.T) {
309 | 			{
310 | 				deltaContent := []byte("id,name,age,desc,size")
311 | 				err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm)
312 | 				assert.NoError(t, err)
313 | 			}
314 | 
315 | 			_, err := cmd.NewContext(
316 | 				fs,
317 | 				nil,
318 | 				nil,
319 | 				nil,
320 | 				nil,
321 | 				"json",
322 | 				"/base.csv",
323 | 				"/delta.csv",
324 | 				',',
325 | 				false,
326 | 			)
327 | 			assert.EqualError(t, err, "base-file and delta-file columns count do not match")
328 | 		})
329 | 	})
330 | 
331 | 	t.Run("should pass only one of columns or ignore columns", func(t *testing.T) {
332 | 		fs := afero.NewMemMapFs()
333 | 		setupFiles(t, fs)
334 | 
335 | 		_, err := cmd.NewContext(
336 | 			fs,
337 | 			nil,
338 | 			[]int{0},
339 | 			[]int{0},
340 | 			nil,
341 | 			"jSOn",
342 | 			"/base.csv",
343 | 			"/delta.csv",
344 | 			',',
345 | 			false,
346 | 		)
347 | 
348 | 		assert.EqualError(t, err, "only one of --columns or --ignore-columns")
349 | 	})
350 | }
351 | 
352 | func TestConfig_DigestConfig(t *testing.T) {
353 | 	t.Run("should create digest ctx", func(t *testing.T) {
354 | 		fs := afero.NewMemMapFs()
355 | 		setupFiles(t, fs)
356 | 
357 | 		valueColumns := digest.Positions{0, 1, 2}
358 | 		primaryColumns := digest.Positions{0, 1}
359 | 		includeColumns := digest.Positions{2}
360 | 		ctx, err := cmd.NewContext(
361 | 			fs,
362 | 			primaryColumns,
363 | 			valueColumns,
364 | 			nil,
365 | 			includeColumns,
366 | 			"jSOn",
367 | 			"/base.csv",
368 | 			"/delta.csv",
369 | 			',',
370 | 			false,
371 | 		)
372 | 		assert.NoError(t, err)
373 | 
374 | 		baseConfig, err := ctx.BaseDigestConfig()
375 | 
376 | 		assert.NoError(t, err)
377 | 		assert.NotNil(t, baseConfig.Reader)
378 | 		assert.Equal(t, valueColumns, baseConfig.Value)
379 | 		assert.Equal(t, primaryColumns, baseConfig.Key)
380 | 		assert.Equal(t, includeColumns, baseConfig.Include)
381 | 
382 | 		deltaConfig, err := ctx.DeltaDigestConfig()
383 | 
384 | 		assert.NoError(t, err)
385 | 		assert.NotNil(t, deltaConfig.Reader)
386 | 		assert.Equal(t, valueColumns, deltaConfig.Value)
387 | 		assert.Equal(t, primaryColumns, deltaConfig.Key)
388 | 		assert.Equal(t, includeColumns, deltaConfig.Include)
389 | 	})
390 | 	t.Run("should infer values columns as inverse of ignore columns digest ctx", func(t *testing.T) {
391 | 		fs := afero.NewMemMapFs()
392 | 		setupFiles(t, fs)
393 | 
394 | 		ignoreValueColumns := digest.Positions{0, 1, 2}
395 | 		primaryColumns := digest.Positions{0, 1}
396 | 		ctx, err := cmd.NewContext(
397 | 			fs,
398 | 			primaryColumns,
399 | 			nil,
400 | 			ignoreValueColumns,
401 | 			nil,
402 | 			"jSOn",
403 | 			"/base.csv",
404 | 			"/delta.csv",
405 | 			',',
406 | 			false,
407 | 		)
408 | 		assert.NoError(t, err)
409 | 
410 | 		baseConfig, err := ctx.BaseDigestConfig()
411 | 
412 | 		assert.NoError(t, err)
413 | 		assert.NotNil(t, baseConfig.Reader)
414 | 		assert.Equal(t, digest.Positions{3}, baseConfig.Value)
415 | 		assert.Equal(t, primaryColumns, baseConfig.Key)
416 | 
417 | 		deltaConfig, err := ctx.DeltaDigestConfig()
418 | 
419 | 		assert.NoError(t, err)
420 | 		assert.NotNil(t, deltaConfig.Reader)
421 | 		assert.Equal(t, digest.Positions{3}, deltaConfig.Value)
422 | 		assert.Equal(t, primaryColumns, deltaConfig.Key)
423 | 	})
424 | }
425 | func setupFiles(t *testing.T, fs afero.Fs) {
426 | 	{
427 | 		baseContent := []byte("id,name,age,desc")
428 | 		err := afero.WriteFile(fs, "/base.csv", baseContent, os.ModePerm)
429 | 		assert.NoError(t, err)
430 | 	}
431 | 	{
432 | 		deltaContent := []byte("id,name,age,desc")
433 | 		err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm)
434 | 		assert.NoError(t, err)
435 | 	}
436 | }
437 | 


--------------------------------------------------------------------------------
/cmd/formatter.go:
--------------------------------------------------------------------------------
  1 | package cmd
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
  7 | 	"github.com/fatih/color"
  8 | 	"io"
  9 | )
 10 | 
 11 | const (
 12 | 	rowmark          = "rowmark"
 13 | 	jsonFormat       = "json"
 14 | 	legacyJSONFormat = "legacy-json"
 15 | 	lineDiff         = "diff"
 16 | 	wordDiff         = "word-diff"
 17 | 	colorWords       = "color-words"
 18 | )
 19 | 
 20 | var allFormats = []string{rowmark, jsonFormat, legacyJSONFormat, lineDiff, wordDiff, colorWords}
 21 | 
 22 | // Formatter can print the differences to stdout
 23 | // and accompanying metadata to stderr
 24 | type Formatter struct {
 25 | 	stdout io.Writer
 26 | 	stderr io.Writer
 27 | 	ctx    Context
 28 | }
 29 | 
 30 | // NewFormatter can be used to create a new formatter
 31 | func NewFormatter(stdout, stderr io.Writer, ctx Context) *Formatter {
 32 | 	if ctx.separator == rune(0) {
 33 | 		ctx.separator = ','
 34 | 	}
 35 | 	return &Formatter{stdout: stdout, stderr: stderr, ctx: ctx}
 36 | }
 37 | 
 38 | // Format can be used to format the differences based on ctx
 39 | // to appropriate writers
 40 | func (f *Formatter) Format(diff digest.Differences) error {
 41 | 	switch f.ctx.format {
 42 | 	case legacyJSONFormat:
 43 | 		return f.legacyJSON(diff)
 44 | 	case jsonFormat:
 45 | 		return f.json(diff)
 46 | 	case rowmark:
 47 | 		return f.rowMark(diff)
 48 | 	case lineDiff:
 49 | 		return f.lineDiff(diff)
 50 | 	case wordDiff:
 51 | 		return f.wordDiff(diff)
 52 | 	case colorWords:
 53 | 		return f.colorWords(diff)
 54 | 	default:
 55 | 		return fmt.Errorf("formatter not found")
 56 | 	}
 57 | }
 58 | 
 59 | // JSONFormatter formats diff to as a JSON Object
 60 | // { "Additions": [...], "Modifications": [...] }
 61 | func (f *Formatter) legacyJSON(diff digest.Differences) error {
 62 | 	// jsonDifference is a struct to represent legacy JSON format
 63 | 	type jsonDifference struct {
 64 | 		Additions     []string
 65 | 		Modifications []string
 66 | 		Deletions     []string
 67 | 	}
 68 | 
 69 | 	includes := f.ctx.GetIncludeColumnPositions()
 70 | 
 71 | 	additions := make([]string, 0, len(diff.Additions))
 72 | 	for _, addition := range diff.Additions {
 73 | 		additions = append(additions, includes.String(addition, f.ctx.separator))
 74 | 	}
 75 | 
 76 | 	modifications := make([]string, 0, len(diff.Modifications))
 77 | 	for _, modification := range diff.Modifications {
 78 | 		modifications = append(modifications, includes.String(modification.Current, f.ctx.separator))
 79 | 	}
 80 | 
 81 | 	deletions := make([]string, 0, len(diff.Deletions))
 82 | 	for _, deletion := range diff.Deletions {
 83 | 		deletions = append(deletions, includes.String(deletion, f.ctx.separator))
 84 | 	}
 85 | 
 86 | 	jsonDiff := jsonDifference{Additions: additions, Modifications: modifications, Deletions: deletions}
 87 | 	data, err := json.MarshalIndent(jsonDiff, "", "  ")
 88 | 
 89 | 	if err != nil {
 90 | 		return fmt.Errorf("error when serializing with JSON formatter: %v", err)
 91 | 	}
 92 | 
 93 | 	_, err = f.stdout.Write(data)
 94 | 
 95 | 	if err != nil {
 96 | 		return fmt.Errorf("error when writing to writer with JSON formatter: %v", err)
 97 | 	}
 98 | 
 99 | 	return nil
100 | }
101 | 
102 | // JSONFormatter formats diff to as a JSON Object
103 | // { "Additions": [...], "Modifications": [{ "Original": [...], "Current": [...]}]}
104 | func (f *Formatter) json(diff digest.Differences) error {
105 | 	includes := f.ctx.GetIncludeColumnPositions()
106 | 
107 | 	additions := make([]string, 0, len(diff.Additions))
108 | 	for _, addition := range diff.Additions {
109 | 		additions = append(additions, includes.String(addition, f.ctx.separator))
110 | 	}
111 | 
112 | 	deletions := make([]string, 0, len(diff.Deletions))
113 | 	for _, deletion := range diff.Deletions {
114 | 		deletions = append(deletions, includes.String(deletion, f.ctx.separator))
115 | 	}
116 | 
117 | 	type modification struct {
118 | 		Original string
119 | 		Current  string
120 | 	}
121 | 
122 | 	type jsonDifference struct {
123 | 		Additions     []string
124 | 		Modifications []modification
125 | 		Deletions     []string
126 | 	}
127 | 
128 | 	modifications := make([]modification, 0, len(diff.Modifications))
129 | 	for _, mods := range diff.Modifications {
130 | 		m := modification{Original: includes.String(mods.Original, f.ctx.separator), Current: includes.String(mods.Current, f.ctx.separator)}
131 | 		modifications = append(modifications, m)
132 | 	}
133 | 
134 | 	data, err := json.MarshalIndent(jsonDifference{Additions: additions, Modifications: modifications, Deletions: deletions}, "", "  ")
135 | 
136 | 	if err != nil {
137 | 		return fmt.Errorf("error when serializing with JSON formatter: %v", err)
138 | 	}
139 | 
140 | 	_, err = f.stdout.Write(data)
141 | 
142 | 	if err != nil {
143 | 		return fmt.Errorf("error when writing to writer with JSON formatter: %v", err)
144 | 	}
145 | 
146 | 	return nil
147 | }
148 | 
149 | // RowMarkFormatter formats diff by marking each row as
150 | // ADDED/MODIFIED. It mutates the row and adds as a new column.
151 | func (f *Formatter) rowMark(diff digest.Differences) error {
152 | 	_, _ = fmt.Fprintf(f.stderr, "Additions %d\n", len(diff.Additions))
153 | 	_, _ = fmt.Fprintf(f.stderr, "Modifications %d\n", len(diff.Modifications))
154 | 	_, _ = fmt.Fprintf(f.stderr, "Deletions %d\n", len(diff.Deletions))
155 | 	_, _ = fmt.Fprintf(f.stderr, "Rows:\n")
156 | 
157 | 	includes := f.ctx.GetIncludeColumnPositions()
158 | 
159 | 	additions := make([]string, 0, len(diff.Additions))
160 | 	for _, addition := range diff.Additions {
161 | 		additions = append(additions, includes.String(addition, f.ctx.separator))
162 | 	}
163 | 
164 | 	modifications := make([]string, 0, len(diff.Modifications))
165 | 	for _, modification := range diff.Modifications {
166 | 		modifications = append(modifications, includes.String(modification.Current, f.ctx.separator))
167 | 	}
168 | 
169 | 	deletions := make([]string, 0, len(diff.Deletions))
170 | 	for _, deletion := range diff.Deletions {
171 | 		deletions = append(deletions, includes.String(deletion, f.ctx.separator))
172 | 	}
173 | 
174 | 	for _, added := range additions {
175 | 		_, _ = fmt.Fprintf(f.stdout, "%s,%s\n", added, "ADDED")
176 | 	}
177 | 
178 | 	for _, modified := range modifications {
179 | 		_, _ = fmt.Fprintf(f.stdout, "%s,%s\n", modified, "MODIFIED")
180 | 	}
181 | 
182 | 	for _, deleted := range deletions {
183 | 		_, _ = fmt.Fprintf(f.stdout, "%s,%s\n", deleted, "DELETED")
184 | 	}
185 | 
186 | 	return nil
187 | }
188 | 
189 | // lineDiff is git-style line diff
190 | func (f *Formatter) lineDiff(diff digest.Differences) error {
191 | 	includes := f.ctx.GetIncludeColumnPositions()
192 | 
193 | 	blue := color.New(color.FgBlue).FprintfFunc()
194 | 	red := color.New(color.FgRed).FprintfFunc()
195 | 	green := color.New(color.FgGreen).FprintfFunc()
196 | 
197 | 	blue(f.stderr, "# Additions (%d)\n", len(diff.Additions))
198 | 	for _, addition := range diff.Additions {
199 | 		green(f.stdout, "+ %s\n", includes.String(addition, f.ctx.separator))
200 | 	}
201 | 	blue(f.stderr, "# Modifications (%d)\n", len(diff.Modifications))
202 | 	for _, modification := range diff.Modifications {
203 | 		red(f.stdout, "- %s\n", includes.String(modification.Original, f.ctx.separator))
204 | 		green(f.stdout, "+ %s\n", includes.String(modification.Current, f.ctx.separator))
205 | 	}
206 | 	blue(f.stderr, "# Deletions (%d)\n", len(diff.Deletions))
207 | 	for _, deletion := range diff.Deletions {
208 | 		red(f.stdout, "- %s\n", includes.String(deletion, f.ctx.separator))
209 | 	}
210 | 
211 | 	return nil
212 | }
213 | 
214 | // wordDiff is git-style --word-diff
215 | func (f *Formatter) wordDiff(diff digest.Differences) error {
216 | 	return f.wordLevelDiffs(diff, "[-%s-]", "{+%s+}")
217 | }
218 | 
219 | // colorWords is git-style --color-words
220 | func (f *Formatter) colorWords(diff digest.Differences) error {
221 | 	return f.wordLevelDiffs(diff, "%s", "%s")
222 | }
223 | 
224 | func (f *Formatter) wordLevelDiffs(diff digest.Differences, deletionFormat, additionFormat string) error {
225 | 	includes := f.ctx.GetIncludeColumnPositions()
226 | 	if len(includes) <= 0 {
227 | 		includes = f.ctx.GetValueColumns()
228 | 	}
229 | 	blue := color.New(color.FgBlue).SprintfFunc()
230 | 	red := color.New(color.FgRed).SprintfFunc()
231 | 	green := color.New(color.FgGreen).SprintfFunc()
232 | 
233 | 	_, _ = fmt.Fprintln(f.stderr, blue("# Additions (%d)", len(diff.Additions)))
234 | 	for _, addition := range diff.Additions {
235 | 		_, _ = fmt.Fprintln(f.stdout, green(additionFormat, includes.String(addition, f.ctx.separator)))
236 | 	}
237 | 
238 | 	_, _ = fmt.Fprintln(f.stderr, blue("# Modifications (%d)", len(diff.Modifications)))
239 | 	for _, modification := range diff.Modifications {
240 | 		result := make([]string, 0, len(modification.Current))
241 | 		for i := 0; i < len(includes) || i < len(modification.Current); i++ {
242 | 			if modification.Original[i] != modification.Current[i] {
243 | 				removed := red(deletionFormat, modification.Original[i])
244 | 				added := green(additionFormat, modification.Current[i])
245 | 				result = append(result, fmt.Sprintf("%s%s", removed, added))
246 | 			} else {
247 | 				result = append(result, modification.Current[i])
248 | 			}
249 | 		}
250 | 		_, _ = fmt.Fprintln(f.stdout, includes.String(result, f.ctx.separator))
251 | 	}
252 | 
253 | 	_, _ = fmt.Fprintln(f.stderr, blue("# Deletions (%d)", len(diff.Deletions)))
254 | 	for _, deletion := range diff.Deletions {
255 | 		_, _ = fmt.Fprintln(f.stdout, red(deletionFormat, includes.String(deletion, f.ctx.separator)))
256 | 	}
257 | 
258 | 	return nil
259 | 
260 | }
261 | 


--------------------------------------------------------------------------------
/cmd/formatter_test.go:
--------------------------------------------------------------------------------
  1 | package cmd
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | func TestLegacyJSONFormat(t *testing.T) {
 13 | 	diff := digest.Differences{
 14 | 		Additions:     []digest.Addition{[]string{"additions"}},
 15 | 		Modifications: []digest.Modification{{Current: []string{"modification"}}},
 16 | 		Deletions:     []digest.Deletion{[]string{"deletions"}},
 17 | 	}
 18 | 	expected := `{
 19 |   "Additions": [
 20 |     "additions"
 21 |   ],
 22 |   "Modifications": [
 23 |     "modification"
 24 |   ],
 25 |   "Deletions": [
 26 |     "deletions"
 27 |   ]
 28 | }`
 29 | 
 30 | 	var stdout bytes.Buffer
 31 | 	var stderr bytes.Buffer
 32 | 
 33 | 	formatter := NewFormatter(&stdout, &stderr, Context{format: "legacy-json"})
 34 | 
 35 | 	err := formatter.Format(diff)
 36 | 	assert.NoError(t, err)
 37 | 	assert.Equal(t, expected, stdout.String())
 38 | }
 39 | 
 40 | func TestJSONFormat(t *testing.T) {
 41 | 	diff := digest.Differences{
 42 | 		Additions:     []digest.Addition{[]string{"additions"}},
 43 | 		Modifications: []digest.Modification{{Original: []string{"original"}, Current: []string{"modification"}}},
 44 | 		Deletions:     []digest.Deletion{[]string{"deletions"}},
 45 | 	}
 46 | 	expected := `{
 47 |   "Additions": [
 48 |     "additions"
 49 |   ],
 50 |   "Modifications": [
 51 |     {
 52 |       "Original": "original",
 53 |       "Current": "modification"
 54 |     }
 55 |   ],
 56 |   "Deletions": [
 57 |     "deletions"
 58 |   ]
 59 | }`
 60 | 
 61 | 	var stdout bytes.Buffer
 62 | 	var stderr bytes.Buffer
 63 | 
 64 | 	formatter := NewFormatter(&stdout, &stderr, Context{format: "json"})
 65 | 
 66 | 	err := formatter.Format(diff)
 67 | 	assert.NoError(t, err)
 68 | 	assert.Equal(t, expected, stdout.String())
 69 | }
 70 | func TestRowMarkFormatter(t *testing.T) {
 71 | 	diff := digest.Differences{
 72 | 		Additions:     []digest.Addition{[]string{"additions"}},
 73 | 		Modifications: []digest.Modification{{Current: []string{"modification"}}},
 74 | 		Deletions:     []digest.Deletion{[]string{"deletions"}},
 75 | 	}
 76 | 	expectedStdout := `additions,ADDED
 77 | modification,MODIFIED
 78 | deletions,DELETED
 79 | `
 80 | 	expectedStderr := `Additions 1
 81 | Modifications 1
 82 | Deletions 1
 83 | Rows:
 84 | `
 85 | 
 86 | 	var stdout bytes.Buffer
 87 | 	var stderr bytes.Buffer
 88 | 
 89 | 	formatter := NewFormatter(&stdout, &stderr, Context{format: "rowmark"})
 90 | 
 91 | 	err := formatter.Format(diff)
 92 | 
 93 | 	assert.NoError(t, err)
 94 | 	assert.Equal(t, expectedStdout, stdout.String())
 95 | 	assert.Equal(t, expectedStderr, stderr.String())
 96 | }
 97 | 
 98 | func TestLineDiff(t *testing.T) {
 99 | 	t.Run("should show line diff with comma by default", func(t *testing.T) {
100 | 		diff := digest.Differences{
101 | 			Additions: []digest.Addition{[]string{"additions"}},
102 | 			Modifications: []digest.Modification{
103 | 				{
104 | 					Original: []string{"original", "comma,separated,value"},
105 | 					Current:  []string{"modification", "comma,separated,value-2"},
106 | 				},
107 | 			},
108 | 			Deletions: []digest.Deletion{{"deletion", "this-row-was-deleted"}},
109 | 		}
110 | 		expectedStdout := `+ additions
111 | - original,"comma,separated,value"
112 | + modification,"comma,separated,value-2"
113 | - deletion,this-row-was-deleted
114 | `
115 | 		expectedStderr := `# Additions (1)
116 | # Modifications (1)
117 | # Deletions (1)
118 | `
119 | 
120 | 		var stdout bytes.Buffer
121 | 		var stderr bytes.Buffer
122 | 
123 | 		formatter := NewFormatter(&stdout, &stderr, Context{format: "diff"})
124 | 
125 | 		err := formatter.Format(diff)
126 | 
127 | 		assert.NoError(t, err)
128 | 		assert.Equal(t, expectedStdout, stdout.String())
129 | 		assert.Equal(t, expectedStderr, stderr.String())
130 | 	})
131 | 
132 | 	t.Run("should show line diff with custom separator", func(t *testing.T) {
133 | 		diff := digest.Differences{
134 | 			Additions: []digest.Addition{[]string{"additions"}},
135 | 			Modifications: []digest.Modification{
136 | 				{
137 | 					Original: []string{"original", "comma,separated,value"},
138 | 					Current:  []string{"modification", "comma,separated,value-2"},
139 | 				},
140 | 			},
141 | 			Deletions: []digest.Deletion{{"deletion", "this-row-was-deleted"}},
142 | 		}
143 | 		expectedStdout := `+ additions
144 | - original|comma,separated,value
145 | + modification|comma,separated,value-2
146 | - deletion|this-row-was-deleted
147 | `
148 | 		expectedStderr := `# Additions (1)
149 | # Modifications (1)
150 | # Deletions (1)
151 | `
152 | 
153 | 		var stdout bytes.Buffer
154 | 		var stderr bytes.Buffer
155 | 
156 | 		formatter := NewFormatter(&stdout, &stderr, Context{format: "diff", separator: '|'})
157 | 
158 | 		err := formatter.Format(diff)
159 | 
160 | 		assert.NoError(t, err)
161 | 		assert.Equal(t, expectedStdout, stdout.String())
162 | 		assert.Equal(t, expectedStderr, stderr.String())
163 | 	})
164 | 
165 | }
166 | 
167 | func TestWordDiff(t *testing.T) {
168 | 	t.Run("should cover single column happy path", func(t *testing.T) {
169 | 		diff := digest.Differences{
170 | 			Additions:     []digest.Addition{[]string{"additions"}},
171 | 			Modifications: []digest.Modification{{Original: []string{"original"}, Current: []string{"modification"}}},
172 | 			Deletions:     []digest.Deletion{{"deletions"}},
173 | 		}
174 | 		expectedStdout := `{+additions+}
175 | [-original-]{+modification+}
176 | [-deletions-]
177 | `
178 | 		expectedStderr := `# Additions (1)
179 | # Modifications (1)
180 | # Deletions (1)
181 | `
182 | 
183 | 		var stdout bytes.Buffer
184 | 		var stderr bytes.Buffer
185 | 
186 | 		formatter := NewFormatter(&stdout, &stderr, Context{format: "word-diff"})
187 | 
188 | 		err := formatter.Format(diff)
189 | 
190 | 		assert.NoError(t, err)
191 | 		assert.Equal(t, expectedStdout, stdout.String())
192 | 		assert.Equal(t, expectedStderr, stderr.String())
193 | 	})
194 | 
195 | 	t.Run("should ouput only selective columns", func(t *testing.T) {
196 | 		diff := digest.Differences{
197 | 			Additions: []digest.Addition{[]string{"additions", "ignored-column"}},
198 | 			Modifications: []digest.Modification{
199 | 				{Original: []string{"original", "ignored-column"}, Current: []string{"modification", "ignored-column"}},
200 | 			},
201 | 			Deletions: []digest.Deletion{{"deletions", "ignored-column"}},
202 | 		}
203 | 		expectedStdout := `{+additions+}
204 | [-original-]{+modification+}
205 | [-deletions-]
206 | `
207 | 		expectedStderr := `# Additions (1)
208 | # Modifications (1)
209 | # Deletions (1)
210 | `
211 | 
212 | 		var stdout bytes.Buffer
213 | 		var stderr bytes.Buffer
214 | 
215 | 		formatter := NewFormatter(&stdout, &stderr, Context{
216 | 			format:                 "word-diff",
217 | 			includeColumnPositions: digest.Positions{0},
218 | 		})
219 | 
220 | 		err := formatter.Format(diff)
221 | 
222 | 		assert.NoError(t, err)
223 | 		assert.Equal(t, expectedStdout, stdout.String())
224 | 		assert.Equal(t, expectedStderr, stderr.String())
225 | 
226 | 	})
227 | }
228 | 
229 | func TestColorWords(t *testing.T) {
230 | 	diff := digest.Differences{
231 | 		Additions:     []digest.Addition{[]string{"additions"}},
232 | 		Modifications: []digest.Modification{{Original: []string{"original"}, Current: []string{"modification"}}},
233 | 		Deletions:     []digest.Deletion{{"deletions"}},
234 | 	}
235 | 	expectedStdout := `additions
236 | originalmodification
237 | deletions
238 | `
239 | 	expectedStderr := `# Additions (1)
240 | # Modifications (1)
241 | # Deletions (1)
242 | `
243 | 
244 | 	var stdout bytes.Buffer
245 | 	var stderr bytes.Buffer
246 | 
247 | 	formatter := NewFormatter(&stdout, &stderr, Context{format: "color-words"})
248 | 
249 | 	err := formatter.Format(diff)
250 | 
251 | 	assert.NoError(t, err)
252 | 	assert.Equal(t, expectedStdout, stdout.String())
253 | 	assert.Equal(t, expectedStderr, stderr.String())
254 | }
255 | 
256 | func TestWrongFormatter(t *testing.T) {
257 | 	diff := digest.Differences{}
258 | 	formatter := NewFormatter(nil, nil, Context{format: "random-str"})
259 | 
260 | 	err := formatter.Format(diff)
261 | 
262 | 	assert.Error(t, err)
263 | }
264 | 


--------------------------------------------------------------------------------
/cmd/root.go:
--------------------------------------------------------------------------------
  1 | // Copyright © 2018 aswinkarthik
  2 | //
  3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
  4 | // of this software and associated documentation files (the "Software"), to deal
  5 | // in the Software without restriction, including without limitation the rights
  6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  7 | // copies of the Software, and to permit persons to whom the Software is
  8 | // furnished to do so, subject to the following conditions:
  9 | //
 10 | // The above copyright notice and this permission notice shall be included in
 11 | // all copies or substantial portions of the Software.
 12 | //
 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 19 | // THE SOFTWARE.
 20 | 
 21 | package cmd
 22 | 
 23 | import (
 24 | 	"fmt"
 25 | 	"io"
 26 | 	"os"
 27 | 	"strings"
 28 | 	"time"
 29 | 	"unicode/utf8"
 30 | 
 31 | 	"github.com/fatih/color"
 32 | 	"github.com/spf13/afero"
 33 | 
 34 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
 35 | 	"github.com/spf13/cobra"
 36 | )
 37 | 
 38 | var (
 39 | 	timed bool
 40 | )
 41 | 
 42 | // rootCmd represents the base command when called without any subcommands
 43 | var rootCmd = &cobra.Command{
 44 | 	Use:           "csvdiff <base-csv> <delta-csv>",
 45 | 	SilenceUsage:  true,
 46 | 	SilenceErrors: true,
 47 | 	Short:         "A diff tool for database tables dumped as csv files",
 48 | 	Long: `Differentiates two csv files and finds out the additions and modifications.
 49 | Most suitable for csv files created from database tables`,
 50 | 	PreRunE: func(cmd *cobra.Command, args []string) error {
 51 | 		// validate args
 52 | 		if len(args) != 2 {
 53 | 			return fmt.Errorf("pass 2 files. Usage: csvdiff <base-csv> <delta-csv>")
 54 | 		}
 55 | 
 56 | 		return nil
 57 | 	},
 58 | 	RunE: func(cmd *cobra.Command, args []string) error {
 59 | 		if timed {
 60 | 			defer timeTrack(time.Now(), "csvdiff")
 61 | 		}
 62 | 		fs := afero.NewOsFs()
 63 | 		baseFilename := args[0]
 64 | 		deltaFilename := args[1]
 65 | 		runeSeparator, err := parseSeparator(separator)
 66 | 		if err != nil {
 67 | 			return err
 68 | 		}
 69 | 		ctx, err := NewContext(
 70 | 			fs,
 71 | 			primaryKeyPositions,
 72 | 			valueColumnPositions,
 73 | 			ignoreValueColumnPositions,
 74 | 			includeColumnPositions,
 75 | 			format,
 76 | 			baseFilename,
 77 | 			deltaFilename,
 78 | 			runeSeparator,
 79 | 			lazyQuotes,
 80 | 		)
 81 | 
 82 | 		if err != nil {
 83 | 			return err
 84 | 		}
 85 | 		defer ctx.Close()
 86 | 
 87 | 		return runContext(ctx, os.Stdout, os.Stderr)
 88 | 	},
 89 | }
 90 | 
 91 | func runContext(ctx *Context, outputStream, errorStream io.Writer) error {
 92 | 	baseConfig, err := ctx.BaseDigestConfig()
 93 | 	if err != nil {
 94 | 		return fmt.Errorf("error opening base-file %s: %v", ctx.baseFilename, err)
 95 | 	}
 96 | 	deltaConfig, err := ctx.DeltaDigestConfig()
 97 | 	if err != nil {
 98 | 		return fmt.Errorf("error opening delta-file %s: %v", ctx.deltaFilename, err)
 99 | 	}
100 | 	defer ctx.Close()
101 | 
102 | 	diff, err := digest.Diff(baseConfig, deltaConfig)
103 | 
104 | 	if err != nil {
105 | 		return err
106 | 	}
107 | 
108 | 	return NewFormatter(outputStream, errorStream, *ctx).Format(diff)
109 | }
110 | 
111 | // Execute adds all child commands to the root command and sets flags appropriately.
112 | // This is called by main.main(). It only needs to happen once to the rootCmd.
113 | func Execute() {
114 | 	rootCmd.Version = Version()
115 | 	if err := rootCmd.Execute(); err != nil {
116 | 		_, _ = fmt.Fprint(os.Stderr, color.RedString("csvdiff: command failed - %v\n\n", err))
117 | 		_ = rootCmd.Help()
118 | 		os.Exit(1)
119 | 	}
120 | }
121 | 
122 | var (
123 | 	primaryKeyPositions        []int
124 | 	valueColumnPositions       []int
125 | 	ignoreValueColumnPositions []int
126 | 	includeColumnPositions     []int
127 | 	format                     string
128 | 	separator                  string
129 | 	lazyQuotes                 bool
130 | )
131 | 
132 | func init() {
133 | 	rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle")
134 | 
135 | 	rootCmd.Flags().IntSliceVarP(&primaryKeyPositions, "primary-key", "p", []int{0}, "Primary key positions of the Input CSV as comma separated values Eg: 1,2")
136 | 	rootCmd.Flags().IntSliceVarP(&valueColumnPositions, "columns", "", []int{}, "Selectively compare positions in CSV Eg: 1,2. Default is entire row")
137 | 	rootCmd.Flags().IntSliceVarP(&ignoreValueColumnPositions, "ignore-columns", "", []int{}, "Inverse of --columns flag. This cannot be used if --columns are specified")
138 | 	rootCmd.Flags().IntSliceVarP(&includeColumnPositions, "include", "", []int{}, "Include positions in CSV to display Eg: 1,2. Default is entire row")
139 | 	rootCmd.Flags().StringVarP(&format, "format", "o", "diff", fmt.Sprintf("Available (%s)", strings.Join(allFormats, "|")))
140 | 	rootCmd.Flags().StringVarP(&separator, "separator", "s", ",", "use specific separator (\\t, or any one character string)")
141 | 
142 | 	rootCmd.Flags().BoolVarP(&timed, "time", "", false, "Measure time")
143 | 	rootCmd.Flags().BoolVar(&lazyQuotes, "lazyquotes", false, "allow unescaped quotes")
144 | }
145 | 
146 | func timeTrack(start time.Time, name string) {
147 | 	elapsed := time.Since(start)
148 | 	_, _ = fmt.Fprintln(os.Stderr, fmt.Sprintf("%s took %s", name, elapsed))
149 | }
150 | 
151 | func parseSeparator(sep string) (rune, error) {
152 | 	if strings.HasPrefix(sep, "\\t") {
153 | 		return '\t', nil
154 | 	}
155 | 
156 | 	runesep, _ := utf8.DecodeRuneInString(sep)
157 | 	if runesep == utf8.RuneError {
158 | 		return ' ', fmt.Errorf("unable to use %v (%q) as a separator", separator, separator)
159 | 	}
160 | 
161 | 	return runesep, nil
162 | }
163 | 


--------------------------------------------------------------------------------
/cmd/root_test.go:
--------------------------------------------------------------------------------
 1 | package cmd
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"os"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
 9 | 	"github.com/spf13/afero"
10 | 	"github.com/stretchr/testify/assert"
11 | )
12 | 
13 | func TestRunContext(t *testing.T) {
14 | 	t.Run("should find diff in happy path", func(t *testing.T) {
15 | 		fs := afero.NewMemMapFs()
16 | 		{
17 | 			baseContent := []byte(`id,name,age,desc
18 | 0,tom,2,developer
19 | 2,ryan,20,qa
20 | 4,emin,40,pm
21 | 
22 | `)
23 | 			err := afero.WriteFile(fs, "/base.csv", baseContent, os.ModePerm)
24 | 			assert.NoError(t, err)
25 | 		}
26 | 		{
27 | 			deltaContent := []byte(`id,name,age,desc
28 | 0,tom,2,developer
29 | 1,caprio,3,developer
30 | 2,ryan,23,qa
31 | `)
32 | 			err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm)
33 | 			assert.NoError(t, err)
34 | 		}
35 | 
36 | 		ctx, err := NewContext(
37 | 			fs,
38 | 			digest.Positions{0},
39 | 			digest.Positions{1, 2},
40 | 			nil,
41 | 			digest.Positions{0, 1, 2},
42 | 			"json",
43 | 			"/base.csv",
44 | 			"/delta.csv",
45 | 			',',
46 | 			false,
47 | 		)
48 | 		assert.NoError(t, err)
49 | 
50 | 		outStream := &bytes.Buffer{}
51 | 		errStream := &bytes.Buffer{}
52 | 
53 | 		err = runContext(ctx, outStream, errStream)
54 | 		expected := `{
55 |   "Additions": [
56 |     "1,caprio,3"
57 |   ],
58 |   "Modifications": [
59 |     {
60 |       "Original": "2,ryan,20",
61 |       "Current": "2,ryan,23"
62 |     }
63 |   ],
64 |   "Deletions": [
65 |     "4,emin,40"
66 |   ]
67 | }`
68 | 
69 | 		assert.NoError(t, err)
70 | 		assert.Equal(t, expected, outStream.String())
71 | 
72 | 	})
73 | }
74 | 


--------------------------------------------------------------------------------
/cmd/version.go:
--------------------------------------------------------------------------------
 1 | package cmd
 2 | 
 3 | const defaultVersion = "1.0-dev"
 4 | 
 5 | var version = defaultVersion
 6 | 
 7 | // SetVersion will set the version of the cmd package
 8 | func SetVersion(_version string) {
 9 | 	if _version == "" {
10 | 		version = defaultVersion
11 | 		return
12 | 	}
13 | 
14 | 	version = _version
15 | }
16 | 
17 | // Version will return the set version of cmd package
18 | func Version() string {
19 | 	if version == "" {
20 | 		return defaultVersion
21 | 	}
22 | 
23 | 	return version
24 | }
25 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment:
2 |   layout: "reach, diff, flags, files"
3 |   behavior: default
4 |   require_changes: false # if true: only post the comment if coverage changes
5 |   require_base: no # [yes :: must have a base report to post]
6 |   require_head: yes # [yes :: must have a head report to post]
7 |   branches: null
8 | 


--------------------------------------------------------------------------------
/docker-push.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -el
 2 | 
 3 | set -e
 4 | 
 5 | VERSION=${TRAVIS_TAG}
 6 | REPO_NAME="csvdiff"
 7 | GROUP="aswinkarthik"
 8 | 
 9 | docker build -t ${REPO_NAME}:${VERSION} .
10 | 
11 | docker tag ${REPO_NAME}:${VERSION} ${GROUP}/${REPO_NAME}:latest
12 | docker tag ${REPO_NAME}:${VERSION} ${GROUP}/${REPO_NAME}:${VERSION}
13 | 
14 | docker push ${GROUP}/${REPO_NAME}:latest
15 | docker push ${GROUP}/${REPO_NAME}:${VERSION}
16 | 


--------------------------------------------------------------------------------
/examples/base-small.csv:
--------------------------------------------------------------------------------
1 | 15,12,wordpress.com,com,207790,792348,wordpress.com,com,15,12,207589,791634
2 | 43,1,europa.eu,eu,116613,353412,europa.eu,eu,41,1,119129,359818
3 | 69,48,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491
4 | 1615,905,proboards.com,com,19833,33110,proboards.com,com,1613,902,19835,33135
5 | 1616,906,ccleaner.com,com,19831,32507,ccleaner.com,com,1614,903,19834,32463
6 | 1617,907,doodle.com,com,19827,32902,doodle.com,com,1621,909,19787,32822
7 | 


--------------------------------------------------------------------------------
/examples/delta-small.csv:
--------------------------------------------------------------------------------
1 | 15,12,wordpress.com,com,207790,792348,wordpress.com,com,15,12,207589,791634
2 | 43,1,europa.eu,eu,116613,353412,europa.eu,eu,41,1,119129,359818
3 | 69,1048,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491
4 | 24564,907,completely-newsite.com,com,19827,32902,completely-newsite.com,com,1621,909,19787,32822
5 | 


--------------------------------------------------------------------------------
/examples/lazy_quotes.csv:
--------------------------------------------------------------------------------
1 | 15	12	wordpress".com	com	207790	792348	wordpress".com	com	15	12	207589	791634
2 | 43	1	europa.eu	eu	116613	353412	europa.eu	eu	41	1	119129	359818
3 | 69	48	"aol.com	com	97543	225532	"aol.com	com	70	49	97328	224491
4 | 1615	905	proboards.com	com	19833	33110	proboards.com	com	1613	902	19835	33135
5 | 1616	906	ccleaner.com	com	19831	32507	ccleaner.com	com	1614	903	19834	32463
6 | 1617	907	doodle.com	com	19827	32902	doodle.com	com	1621	909	19787	32822
7 | 


--------------------------------------------------------------------------------
/examples/lazy_quotes_delta.csv:
--------------------------------------------------------------------------------
1 | 15	12	wordpress".com	com	207790	792348	wordpress".com	com	15	12	207589	791634
2 | 43	1	europa.eu	eu	116613	353412	europa.eu	eu	41	1	119129	359818
3 | 69	1048	"aol.com	com	97543	225532	"aol.com	com	70	49	97328	224491
4 | 24564	907	completely-newsite.com	com	19827	32902	completely-newsite.com	com	1621	909	19787	32822
5 | 


--------------------------------------------------------------------------------
/examples/no_comma.csv:
--------------------------------------------------------------------------------
1 | 15	12	wordpress.com	com	207790	792348	wordpress.com	com	15	12	207589	791634
2 | 43	1	europa.eu	eu	116613	353412	europa.eu	eu	41	1	119129	359818
3 | 69	48	aol.com	com	97543	225532	aol.com	com	70	49	97328	224491
4 | 1615	905	proboards.com	com	19833	33110	proboards.com	com	1613	902	19835	33135
5 | 1616	906	ccleaner.com	com	19831	32507	ccleaner.com	com	1614	903	19834	32463
6 | 1617	907	doodle.com	com	19827	32902	doodle.com	com	1621	909	19787	32822
7 | 


--------------------------------------------------------------------------------
/examples/no_comma_delta.csv:
--------------------------------------------------------------------------------
1 | 15	12	wordpress.com	com	207790	792348	wordpress.com	com	15	12	207589	791634
2 | 43	1	europa.eu	eu	116613	353412	europa.eu	eu	41	1	119129	359818
3 | 69	1048	aol.com	com	97543	225532	aol.com	com	70	49	97328	224491
4 | 24564	907	completely-newsite.com	com	19827	32902	completely-newsite.com	com	1621	909	19787	32822
5 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/aswinkarthik/csvdiff
 2 | 
 3 | require (
 4 | 	github.com/OneOfOne/xxhash v1.2.5 // indirect
 5 | 	github.com/cespare/xxhash v1.1.0
 6 | 	github.com/fatih/color v1.7.0
 7 | 	github.com/mattn/go-colorable v0.1.2 // indirect
 8 | 	github.com/spaolacci/murmur3 v1.1.0 // indirect
 9 | 	github.com/spf13/afero v1.1.2
10 | 	github.com/spf13/cobra v0.0.5
11 | 	github.com/stretchr/testify v1.4.0
12 | 	golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa // indirect
13 | )
14 | 
15 | go 1.13
16 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
 1 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 2 | github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
 3 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
 4 | github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI=
 5 | github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
 6 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
 7 | github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
 8 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
 9 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE=
10 | github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk=
11 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk=
12 | github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE=
13 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
14 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
15 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
16 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
17 | github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
18 | github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
19 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
20 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
21 | github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
22 | github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
23 | github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
24 | github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU=
25 | github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
26 | github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE=
27 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
28 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
29 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
30 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
31 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
32 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
33 | github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g=
34 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
35 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
36 | github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
37 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
38 | github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI=
39 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
40 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
41 | github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s=
42 | github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU=
43 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
44 | github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
45 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
46 | github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
47 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
48 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
49 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
50 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
51 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
52 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
53 | github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0=
54 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q=
55 | golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
56 | golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
57 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8=
58 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
59 | golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
60 | golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
61 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
62 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
63 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
64 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=
65 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
66 | 


--------------------------------------------------------------------------------
/install:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | set -e
  3 | # Code generated by godownloader on 2018-10-26T05:33:59Z. DO NOT EDIT.
  4 | #
  5 | 
  6 | usage() {
  7 |   this=$1
  8 |   cat <<EOF
  9 | $this: download go binaries for aswinkarthik/csvdiff
 10 | 
 11 | Usage: $this [-b] bindir [-d] [tag]
 12 |   -b sets bindir or installation directory, Defaults to ./bin
 13 |   -d turns on debug logging
 14 |    [tag] is a tag from
 15 |    https://github.com/aswinkarthik/csvdiff/releases
 16 |    If tag is missing, then the latest will be used.
 17 | 
 18 |  Generated by godownloader
 19 |   https://github.com/goreleaser/godownloader
 20 | 
 21 | EOF
 22 |   exit 2
 23 | }
 24 | 
 25 | parse_args() {
 26 |   #BINDIR is ./bin unless set be ENV
 27 |   # over-ridden by flag below
 28 | 
 29 |   BINDIR=${BINDIR:-./bin}
 30 |   while getopts "b:dh?" arg; do
 31 |     case "$arg" in
 32 |       b) BINDIR="$OPTARG" ;;
 33 |       d) log_set_priority 10 ;;
 34 |       h | \?) usage "$0" ;;
 35 |     esac
 36 |   done
 37 |   shift $((OPTIND - 1))
 38 |   TAG=$1
 39 | }
 40 | # this function wraps all the destructive operations
 41 | # if a curl|bash cuts off the end of the script due to
 42 | # network, either nothing will happen or will syntax error
 43 | # out preventing half-done work
 44 | execute() {
 45 |   tmpdir=$(mktmpdir)
 46 |   log_debug "downloading files into ${tmpdir}"
 47 |   http_download "${tmpdir}/${TARBALL}" "${TARBALL_URL}"
 48 |   http_download "${tmpdir}/${CHECKSUM}" "${CHECKSUM_URL}"
 49 |   hash_sha256_verify "${tmpdir}/${TARBALL}" "${tmpdir}/${CHECKSUM}"
 50 |   srcdir="${tmpdir}"
 51 |   (cd "${tmpdir}" && untar "${TARBALL}")
 52 |   install -d "${BINDIR}"
 53 |   for binexe in "csvdiff" ; do
 54 |     if [ "$OS" = "windows" ]; then
 55 |       binexe="${binexe}.exe"
 56 |     fi
 57 |     install "${srcdir}/${binexe}" "${BINDIR}/"
 58 |     log_info "installed ${BINDIR}/${binexe}"
 59 |   done
 60 | }
 61 | is_supported_platform() {
 62 |   platform=$1
 63 |   found=1
 64 |   case "$platform" in
 65 |     windows/amd64) found=0 ;;
 66 |     darwin/amd64) found=0 ;;
 67 |     linux/amd64) found=0 ;;
 68 |   esac
 69 |   return $found
 70 | }
 71 | check_platform() {
 72 |   if is_supported_platform "$PLATFORM"; then
 73 |     # optional logging goes here
 74 |     true
 75 |   else
 76 |     log_crit "platform $PLATFORM is not supported.  Make sure this script is up-to-date and file request at https://github.com/${PREFIX}/issues/new"
 77 |     exit 1
 78 |   fi
 79 | }
 80 | tag_to_version() {
 81 |   if [ -z "${TAG}" ]; then
 82 |     log_info "checking GitHub for latest tag"
 83 |   else
 84 |     log_info "checking GitHub for tag '${TAG}'"
 85 |   fi
 86 |   REALTAG=$(github_release "$OWNER/$REPO" "${TAG}") && true
 87 |   if test -z "$REALTAG"; then
 88 |     log_crit "unable to find '${TAG}' - use 'latest' or see https://github.com/${PREFIX}/releases for details"
 89 |     exit 1
 90 |   fi
 91 |   # if version starts with 'v', remove it
 92 |   TAG="$REALTAG"
 93 |   VERSION=${TAG#v}
 94 | }
 95 | adjust_format() {
 96 |   # change format (tar.gz or zip) based on ARCH
 97 |   true
 98 | }
 99 | adjust_os() {
100 |   # adjust archive name based on OS
101 |   true
102 | }
103 | adjust_arch() {
104 |   # adjust archive name based on ARCH
105 |   true
106 | }
107 | 
108 | cat /dev/null <<EOF
109 | ------------------------------------------------------------------------
110 | https://github.com/client9/shlib - portable posix shell functions
111 | Public domain - http://unlicense.org
112 | https://github.com/client9/shlib/blob/master/LICENSE.md
113 | but credit (and pull requests) appreciated.
114 | ------------------------------------------------------------------------
115 | EOF
116 | is_command() {
117 |   command -v "$1" >/dev/null
118 | }
119 | echoerr() {
120 |   echo "$@" 1>&2
121 | }
122 | log_prefix() {
123 |   echo "$0"
124 | }
125 | _logp=6
126 | log_set_priority() {
127 |   _logp="$1"
128 | }
129 | log_priority() {
130 |   if test -z "$1"; then
131 |     echo "$_logp"
132 |     return
133 |   fi
134 |   [ "$1" -le "$_logp" ]
135 | }
136 | log_tag() {
137 |   case $1 in
138 |     0) echo "emerg" ;;
139 |     1) echo "alert" ;;
140 |     2) echo "crit" ;;
141 |     3) echo "err" ;;
142 |     4) echo "warning" ;;
143 |     5) echo "notice" ;;
144 |     6) echo "info" ;;
145 |     7) echo "debug" ;;
146 |     *) echo "$1" ;;
147 |   esac
148 | }
149 | log_debug() {
150 |   log_priority 7 || return 0
151 |   echoerr "$(log_prefix)" "$(log_tag 7)" "$@"
152 | }
153 | log_info() {
154 |   log_priority 6 || return 0
155 |   echoerr "$(log_prefix)" "$(log_tag 6)" "$@"
156 | }
157 | log_err() {
158 |   log_priority 3 || return 0
159 |   echoerr "$(log_prefix)" "$(log_tag 3)" "$@"
160 | }
161 | log_crit() {
162 |   log_priority 2 || return 0
163 |   echoerr "$(log_prefix)" "$(log_tag 2)" "$@"
164 | }
165 | uname_os() {
166 |   os=$(uname -s | tr '[:upper:]' '[:lower:]')
167 |   case "$os" in
168 |     msys_nt) os="windows" ;;
169 |   esac
170 |   echo "$os"
171 | }
172 | uname_arch() {
173 |   arch=$(uname -m)
174 |   case $arch in
175 |     x86_64) arch="amd64" ;;
176 |     x86) arch="386" ;;
177 |     i686) arch="386" ;;
178 |     i386) arch="386" ;;
179 |     aarch64) arch="arm64" ;;
180 |     armv5*) arch="armv5" ;;
181 |     armv6*) arch="armv6" ;;
182 |     armv7*) arch="armv7" ;;
183 |   esac
184 |   echo ${arch}
185 | }
186 | uname_os_check() {
187 |   os=$(uname_os)
188 |   case "$os" in
189 |     darwin) return 0 ;;
190 |     dragonfly) return 0 ;;
191 |     freebsd) return 0 ;;
192 |     linux) return 0 ;;
193 |     android) return 0 ;;
194 |     nacl) return 0 ;;
195 |     netbsd) return 0 ;;
196 |     openbsd) return 0 ;;
197 |     plan9) return 0 ;;
198 |     solaris) return 0 ;;
199 |     windows) return 0 ;;
200 |   esac
201 |   log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib"
202 |   return 1
203 | }
204 | uname_arch_check() {
205 |   arch=$(uname_arch)
206 |   case "$arch" in
207 |     386) return 0 ;;
208 |     amd64) return 0 ;;
209 |     arm64) return 0 ;;
210 |     armv5) return 0 ;;
211 |     armv6) return 0 ;;
212 |     armv7) return 0 ;;
213 |     ppc64) return 0 ;;
214 |     ppc64le) return 0 ;;
215 |     mips) return 0 ;;
216 |     mipsle) return 0 ;;
217 |     mips64) return 0 ;;
218 |     mips64le) return 0 ;;
219 |     s390x) return 0 ;;
220 |     amd64p32) return 0 ;;
221 |   esac
222 |   log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value.  Please file bug report at https://github.com/client9/shlib"
223 |   return 1
224 | }
225 | untar() {
226 |   tarball=$1
227 |   case "${tarball}" in
228 |     *.tar.gz | *.tgz) tar -xzf "${tarball}" ;;
229 |     *.tar) tar -xf "${tarball}" ;;
230 |     *.zip) unzip "${tarball}" ;;
231 |     *)
232 |       log_err "untar unknown archive format for ${tarball}"
233 |       return 1
234 |       ;;
235 |   esac
236 | }
237 | mktmpdir() {
238 |   test -z "$TMPDIR" && TMPDIR="$(mktemp -d)"
239 |   mkdir -p "${TMPDIR}"
240 |   echo "${TMPDIR}"
241 | }
242 | http_download_curl() {
243 |   local_file=$1
244 |   source_url=$2
245 |   header=$3
246 |   if [ -z "$header" ]; then
247 |     code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url")
248 |   else
249 |     code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url")
250 |   fi
251 |   if [ "$code" != "200" ]; then
252 |     log_debug "http_download_curl received HTTP status $code"
253 |     return 1
254 |   fi
255 |   return 0
256 | }
257 | http_download_wget() {
258 |   local_file=$1
259 |   source_url=$2
260 |   header=$3
261 |   if [ -z "$header" ]; then
262 |     wget -q -O "$local_file" "$source_url"
263 |   else
264 |     wget -q --header "$header" -O "$local_file" "$source_url"
265 |   fi
266 | }
267 | http_download() {
268 |   log_debug "http_download $2"
269 |   if is_command curl; then
270 |     http_download_curl "$@"
271 |     return
272 |   elif is_command wget; then
273 |     http_download_wget "$@"
274 |     return
275 |   fi
276 |   log_crit "http_download unable to find wget or curl"
277 |   return 1
278 | }
279 | http_copy() {
280 |   tmp=$(mktemp)
281 |   http_download "${tmp}" "$1" "$2" || return 1
282 |   body=$(cat "$tmp")
283 |   rm -f "${tmp}"
284 |   echo "$body"
285 | }
286 | github_release() {
287 |   owner_repo=$1
288 |   version=$2
289 |   test -z "$version" && version="latest"
290 |   giturl="https://github.com/${owner_repo}/releases/${version}"
291 |   json=$(http_copy "$giturl" "Accept:application/json")
292 |   test -z "$json" && return 1
293 |   version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//')
294 |   test -z "$version" && return 1
295 |   echo "$version"
296 | }
297 | hash_sha256() {
298 |   TARGET=${1:-/dev/stdin}
299 |   if is_command gsha256sum; then
300 |     hash=$(gsha256sum "$TARGET") || return 1
301 |     echo "$hash" | cut -d ' ' -f 1
302 |   elif is_command sha256sum; then
303 |     hash=$(sha256sum "$TARGET") || return 1
304 |     echo "$hash" | cut -d ' ' -f 1
305 |   elif is_command shasum; then
306 |     hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1
307 |     echo "$hash" | cut -d ' ' -f 1
308 |   elif is_command openssl; then
309 |     hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1
310 |     echo "$hash" | cut -d ' ' -f a
311 |   else
312 |     log_crit "hash_sha256 unable to find command to compute sha-256 hash"
313 |     return 1
314 |   fi
315 | }
316 | hash_sha256_verify() {
317 |   TARGET=$1
318 |   checksums=$2
319 |   if [ -z "$checksums" ]; then
320 |     log_err "hash_sha256_verify checksum file not specified in arg2"
321 |     return 1
322 |   fi
323 |   BASENAME=${TARGET##*/}
324 |   want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1)
325 |   if [ -z "$want" ]; then
326 |     log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'"
327 |     return 1
328 |   fi
329 |   got=$(hash_sha256 "$TARGET")
330 |   if [ "$want" != "$got" ]; then
331 |     log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got"
332 |     return 1
333 |   fi
334 | }
335 | cat /dev/null <<EOF
336 | ------------------------------------------------------------------------
337 | End of functions from https://github.com/client9/shlib
338 | ------------------------------------------------------------------------
339 | EOF
340 | 
341 | PROJECT_NAME="csvdiff"
342 | OWNER=aswinkarthik
343 | REPO="csvdiff"
344 | BINARY=csvdiff
345 | FORMAT=tar.gz
346 | OS=$(uname_os)
347 | ARCH=$(uname_arch)
348 | PREFIX="$OWNER/$REPO"
349 | 
350 | # use in logging routines
351 | log_prefix() {
352 | 	echo "$PREFIX"
353 | }
354 | PLATFORM="${OS}/${ARCH}"
355 | GITHUB_DOWNLOAD=https://github.com/${OWNER}/${REPO}/releases/download
356 | 
357 | uname_os_check "$OS"
358 | uname_arch_check "$ARCH"
359 | 
360 | parse_args "$@"
361 | 
362 | check_platform
363 | 
364 | tag_to_version
365 | 
366 | adjust_format
367 | 
368 | adjust_os
369 | 
370 | adjust_arch
371 | 
372 | log_info "found version: ${VERSION} for ${TAG}/${OS}/${ARCH}"
373 | 
374 | NAME=${PROJECT_NAME}_${VERSION}_${OS}_${ARCH}
375 | TARBALL=${NAME}.${FORMAT}
376 | TARBALL_URL=${GITHUB_DOWNLOAD}/${TAG}/${TARBALL}
377 | CHECKSUM=${PROJECT_NAME}_${VERSION}_checksums.txt
378 | CHECKSUM_URL=${GITHUB_DOWNLOAD}/${TAG}/${CHECKSUM}
379 | 
380 | 
381 | execute
382 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | set -e
  3 | # Code generated by godownloader on 2018-10-26T05:33:59Z. DO NOT EDIT.
  4 | #
  5 | 
  6 | usage() {
  7 |   this=$1
  8 |   cat <<EOF
  9 | $this: download go binaries for aswinkarthik/csvdiff
 10 | 
 11 | Usage: $this [-b] bindir [-d] [tag]
 12 |   -b sets bindir or installation directory, Defaults to ./bin
 13 |   -d turns on debug logging
 14 |    [tag] is a tag from
 15 |    https://github.com/aswinkarthik/csvdiff/releases
 16 |    If tag is missing, then the latest will be used.
 17 | 
 18 |  Generated by godownloader
 19 |   https://github.com/goreleaser/godownloader
 20 | 
 21 | EOF
 22 |   exit 2
 23 | }
 24 | 
 25 | parse_args() {
 26 |   #BINDIR is ./bin unless set be ENV
 27 |   # over-ridden by flag below
 28 | 
 29 |   BINDIR=${BINDIR:-./bin}
 30 |   while getopts "b:dh?" arg; do
 31 |     case "$arg" in
 32 |       b) BINDIR="$OPTARG" ;;
 33 |       d) log_set_priority 10 ;;
 34 |       h | \?) usage "$0" ;;
 35 |     esac
 36 |   done
 37 |   shift $((OPTIND - 1))
 38 |   TAG=$1
 39 | }
 40 | # this function wraps all the destructive operations
 41 | # if a curl|bash cuts off the end of the script due to
 42 | # network, either nothing will happen or will syntax error
 43 | # out preventing half-done work
 44 | execute() {
 45 |   tmpdir=$(mktmpdir)
 46 |   log_debug "downloading files into ${tmpdir}"
 47 |   http_download "${tmpdir}/${TARBALL}" "${TARBALL_URL}"
 48 |   http_download "${tmpdir}/${CHECKSUM}" "${CHECKSUM_URL}"
 49 |   hash_sha256_verify "${tmpdir}/${TARBALL}" "${tmpdir}/${CHECKSUM}"
 50 |   srcdir="${tmpdir}"
 51 |   (cd "${tmpdir}" && untar "${TARBALL}")
 52 |   install -d "${BINDIR}"
 53 |   for binexe in "csvdiff" ; do
 54 |     if [ "$OS" = "windows" ]; then
 55 |       binexe="${binexe}.exe"
 56 |     fi
 57 |     install "${srcdir}/${binexe}" "${BINDIR}/"
 58 |     log_info "installed ${BINDIR}/${binexe}"
 59 |   done
 60 | }
 61 | is_supported_platform() {
 62 |   platform=$1
 63 |   found=1
 64 |   case "$platform" in
 65 |     windows/amd64) found=0 ;;
 66 |     darwin/amd64) found=0 ;;
 67 |     linux/amd64) found=0 ;;
 68 |   esac
 69 |   return $found
 70 | }
 71 | check_platform() {
 72 |   if is_supported_platform "$PLATFORM"; then
 73 |     # optional logging goes here
 74 |     true
 75 |   else
 76 |     log_crit "platform $PLATFORM is not supported.  Make sure this script is up-to-date and file request at https://github.com/${PREFIX}/issues/new"
 77 |     exit 1
 78 |   fi
 79 | }
 80 | tag_to_version() {
 81 |   if [ -z "${TAG}" ]; then
 82 |     log_info "checking GitHub for latest tag"
 83 |   else
 84 |     log_info "checking GitHub for tag '${TAG}'"
 85 |   fi
 86 |   REALTAG=$(github_release "$OWNER/$REPO" "${TAG}") && true
 87 |   if test -z "$REALTAG"; then
 88 |     log_crit "unable to find '${TAG}' - use 'latest' or see https://github.com/${PREFIX}/releases for details"
 89 |     exit 1
 90 |   fi
 91 |   # if version starts with 'v', remove it
 92 |   TAG="$REALTAG"
 93 |   VERSION=${TAG#v}
 94 | }
 95 | adjust_format() {
 96 |   # change format (tar.gz or zip) based on ARCH
 97 |   true
 98 | }
 99 | adjust_os() {
100 |   # adjust archive name based on OS
101 |   true
102 | }
103 | adjust_arch() {
104 |   # adjust archive name based on ARCH
105 |   true
106 | }
107 | 
108 | cat /dev/null <<EOF
109 | ------------------------------------------------------------------------
110 | https://github.com/client9/shlib - portable posix shell functions
111 | Public domain - http://unlicense.org
112 | https://github.com/client9/shlib/blob/master/LICENSE.md
113 | but credit (and pull requests) appreciated.
114 | ------------------------------------------------------------------------
115 | EOF
116 | is_command() {
117 |   command -v "$1" >/dev/null
118 | }
119 | echoerr() {
120 |   echo "$@" 1>&2
121 | }
122 | log_prefix() {
123 |   echo "$0"
124 | }
125 | _logp=6
126 | log_set_priority() {
127 |   _logp="$1"
128 | }
129 | log_priority() {
130 |   if test -z "$1"; then
131 |     echo "$_logp"
132 |     return
133 |   fi
134 |   [ "$1" -le "$_logp" ]
135 | }
136 | log_tag() {
137 |   case $1 in
138 |     0) echo "emerg" ;;
139 |     1) echo "alert" ;;
140 |     2) echo "crit" ;;
141 |     3) echo "err" ;;
142 |     4) echo "warning" ;;
143 |     5) echo "notice" ;;
144 |     6) echo "info" ;;
145 |     7) echo "debug" ;;
146 |     *) echo "$1" ;;
147 |   esac
148 | }
149 | log_debug() {
150 |   log_priority 7 || return 0
151 |   echoerr "$(log_prefix)" "$(log_tag 7)" "$@"
152 | }
153 | log_info() {
154 |   log_priority 6 || return 0
155 |   echoerr "$(log_prefix)" "$(log_tag 6)" "$@"
156 | }
157 | log_err() {
158 |   log_priority 3 || return 0
159 |   echoerr "$(log_prefix)" "$(log_tag 3)" "$@"
160 | }
161 | log_crit() {
162 |   log_priority 2 || return 0
163 |   echoerr "$(log_prefix)" "$(log_tag 2)" "$@"
164 | }
165 | uname_os() {
166 |   os=$(uname -s | tr '[:upper:]' '[:lower:]')
167 |   case "$os" in
168 |     msys_nt) os="windows" ;;
169 |   esac
170 |   echo "$os"
171 | }
172 | uname_arch() {
173 |   arch=$(uname -m)
174 |   case $arch in
175 |     x86_64) arch="amd64" ;;
176 |     x86) arch="386" ;;
177 |     i686) arch="386" ;;
178 |     i386) arch="386" ;;
179 |     aarch64) arch="arm64" ;;
180 |     armv5*) arch="armv5" ;;
181 |     armv6*) arch="armv6" ;;
182 |     armv7*) arch="armv7" ;;
183 |   esac
184 |   echo ${arch}
185 | }
186 | uname_os_check() {
187 |   os=$(uname_os)
188 |   case "$os" in
189 |     darwin) return 0 ;;
190 |     dragonfly) return 0 ;;
191 |     freebsd) return 0 ;;
192 |     linux) return 0 ;;
193 |     android) return 0 ;;
194 |     nacl) return 0 ;;
195 |     netbsd) return 0 ;;
196 |     openbsd) return 0 ;;
197 |     plan9) return 0 ;;
198 |     solaris) return 0 ;;
199 |     windows) return 0 ;;
200 |   esac
201 |   log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib"
202 |   return 1
203 | }
204 | uname_arch_check() {
205 |   arch=$(uname_arch)
206 |   case "$arch" in
207 |     386) return 0 ;;
208 |     amd64) return 0 ;;
209 |     arm64) return 0 ;;
210 |     armv5) return 0 ;;
211 |     armv6) return 0 ;;
212 |     armv7) return 0 ;;
213 |     ppc64) return 0 ;;
214 |     ppc64le) return 0 ;;
215 |     mips) return 0 ;;
216 |     mipsle) return 0 ;;
217 |     mips64) return 0 ;;
218 |     mips64le) return 0 ;;
219 |     s390x) return 0 ;;
220 |     amd64p32) return 0 ;;
221 |   esac
222 |   log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value.  Please file bug report at https://github.com/client9/shlib"
223 |   return 1
224 | }
225 | untar() {
226 |   tarball=$1
227 |   case "${tarball}" in
228 |     *.tar.gz | *.tgz) tar -xzf "${tarball}" ;;
229 |     *.tar) tar -xf "${tarball}" ;;
230 |     *.zip) unzip "${tarball}" ;;
231 |     *)
232 |       log_err "untar unknown archive format for ${tarball}"
233 |       return 1
234 |       ;;
235 |   esac
236 | }
237 | mktmpdir() {
238 |   test -z "$TMPDIR" && TMPDIR="$(mktemp -d)"
239 |   mkdir -p "${TMPDIR}"
240 |   echo "${TMPDIR}"
241 | }
242 | http_download_curl() {
243 |   local_file=$1
244 |   source_url=$2
245 |   header=$3
246 |   if [ -z "$header" ]; then
247 |     code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url")
248 |   else
249 |     code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url")
250 |   fi
251 |   if [ "$code" != "200" ]; then
252 |     log_debug "http_download_curl received HTTP status $code"
253 |     return 1
254 |   fi
255 |   return 0
256 | }
257 | http_download_wget() {
258 |   local_file=$1
259 |   source_url=$2
260 |   header=$3
261 |   if [ -z "$header" ]; then
262 |     wget -q -O "$local_file" "$source_url"
263 |   else
264 |     wget -q --header "$header" -O "$local_file" "$source_url"
265 |   fi
266 | }
267 | http_download() {
268 |   log_debug "http_download $2"
269 |   if is_command curl; then
270 |     http_download_curl "$@"
271 |     return
272 |   elif is_command wget; then
273 |     http_download_wget "$@"
274 |     return
275 |   fi
276 |   log_crit "http_download unable to find wget or curl"
277 |   return 1
278 | }
279 | http_copy() {
280 |   tmp=$(mktemp)
281 |   http_download "${tmp}" "$1" "$2" || return 1
282 |   body=$(cat "$tmp")
283 |   rm -f "${tmp}"
284 |   echo "$body"
285 | }
286 | github_release() {
287 |   owner_repo=$1
288 |   version=$2
289 |   test -z "$version" && version="latest"
290 |   giturl="https://github.com/${owner_repo}/releases/${version}"
291 |   json=$(http_copy "$giturl" "Accept:application/json")
292 |   test -z "$json" && return 1
293 |   version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//')
294 |   test -z "$version" && return 1
295 |   echo "$version"
296 | }
297 | hash_sha256() {
298 |   TARGET=${1:-/dev/stdin}
299 |   if is_command gsha256sum; then
300 |     hash=$(gsha256sum "$TARGET") || return 1
301 |     echo "$hash" | cut -d ' ' -f 1
302 |   elif is_command sha256sum; then
303 |     hash=$(sha256sum "$TARGET") || return 1
304 |     echo "$hash" | cut -d ' ' -f 1
305 |   elif is_command shasum; then
306 |     hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1
307 |     echo "$hash" | cut -d ' ' -f 1
308 |   elif is_command openssl; then
309 |     hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1
310 |     echo "$hash" | cut -d ' ' -f a
311 |   else
312 |     log_crit "hash_sha256 unable to find command to compute sha-256 hash"
313 |     return 1
314 |   fi
315 | }
316 | hash_sha256_verify() {
317 |   TARGET=$1
318 |   checksums=$2
319 |   if [ -z "$checksums" ]; then
320 |     log_err "hash_sha256_verify checksum file not specified in arg2"
321 |     return 1
322 |   fi
323 |   BASENAME=${TARGET##*/}
324 |   want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1)
325 |   if [ -z "$want" ]; then
326 |     log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'"
327 |     return 1
328 |   fi
329 |   got=$(hash_sha256 "$TARGET")
330 |   if [ "$want" != "$got" ]; then
331 |     log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got"
332 |     return 1
333 |   fi
334 | }
335 | cat /dev/null <<EOF
336 | ------------------------------------------------------------------------
337 | End of functions from https://github.com/client9/shlib
338 | ------------------------------------------------------------------------
339 | EOF
340 | 
341 | PROJECT_NAME="csvdiff"
342 | OWNER=aswinkarthik
343 | REPO="csvdiff"
344 | BINARY=csvdiff
345 | FORMAT=tar.gz
346 | OS=$(uname_os)
347 | ARCH=$(uname_arch)
348 | PREFIX="$OWNER/$REPO"
349 | 
350 | # use in logging routines
351 | log_prefix() {
352 | 	echo "$PREFIX"
353 | }
354 | PLATFORM="${OS}/${ARCH}"
355 | GITHUB_DOWNLOAD=https://github.com/${OWNER}/${REPO}/releases/download
356 | 
357 | uname_os_check "$OS"
358 | uname_arch_check "$ARCH"
359 | 
360 | parse_args "$@"
361 | 
362 | check_platform
363 | 
364 | tag_to_version
365 | 
366 | adjust_format
367 | 
368 | adjust_os
369 | 
370 | adjust_arch
371 | 
372 | log_info "found version: ${VERSION} for ${TAG}/${OS}/${ARCH}"
373 | 
374 | NAME=${PROJECT_NAME}_${VERSION}_${OS}_${ARCH}
375 | TARBALL=${NAME}.${FORMAT}
376 | TARBALL_URL=${GITHUB_DOWNLOAD}/${TAG}/${TARBALL}
377 | CHECKSUM=${PROJECT_NAME}_${VERSION}_checksums.txt
378 | CHECKSUM_URL=${GITHUB_DOWNLOAD}/${TAG}/${CHECKSUM}
379 | 
380 | 
381 | execute
382 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
 1 | // Copyright © 2018 aswinkarthik
 2 | //
 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | // of this software and associated documentation files (the "Software"), to deal
 5 | // in the Software without restriction, including without limitation the rights
 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | // copies of the Software, and to permit persons to whom the Software is
 8 | // furnished to do so, subject to the following conditions:
 9 | //
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 | //
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | // THE SOFTWARE.
20 | 
21 | package main
22 | 
23 | import "github.com/aswinkarthik/csvdiff/cmd"
24 | 
25 | var version string
26 | 
27 | func main() {
28 | 	cmd.SetVersion(version)
29 | 	cmd.Execute()
30 | }
31 | 


--------------------------------------------------------------------------------
/pkg/digest/config.go:
--------------------------------------------------------------------------------
 1 | package digest
 2 | 
 3 | import "io"
 4 | 
 5 | // Config represents configurations that can be passed
 6 | // to create a Digest.
 7 | //
 8 | // Key: The primary key positions
 9 | // Value: The Value positions that needs to be compared for diff
10 | // Include: Include these positions in output. It is Value positions by default.
11 | type Config struct {
12 | 	Key         Positions
13 | 	Value       Positions
14 | 	Include     Positions
15 | 	Reader      io.Reader
16 | 	Separator   rune
17 | 	LazyQuotes  bool
18 | }
19 | 
20 | // NewConfig creates an instance of Config struct.
21 | func NewConfig(
22 | 	r io.Reader,
23 | 	primaryKey Positions,
24 | 	valueColumns Positions,
25 | 	includeColumns Positions,
26 | 	separator rune,
27 | 	lazyQuotes bool,
28 | ) *Config {
29 | 	if len(includeColumns) == 0 {
30 | 		includeColumns = valueColumns
31 | 	}
32 | 
33 | 	return &Config{
34 | 		Reader:     r,
35 | 		Key:        primaryKey,
36 | 		Value:      valueColumns,
37 | 		Include:    includeColumns,
38 | 		Separator:  separator,
39 | 		LazyQuotes: lazyQuotes,
40 | 	}
41 | }
42 | 


--------------------------------------------------------------------------------
/pkg/digest/diff.go:
--------------------------------------------------------------------------------
  1 | package digest
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"runtime"
  6 | )
  7 | 
  8 | type messageType int
  9 | 
 10 | const (
 11 | 	addition     messageType = iota
 12 | 	modification messageType = iota
 13 | 	deletion     messageType = iota
 14 | )
 15 | 
 16 | // Differences represents the differences
 17 | // between 2 csv content
 18 | type Differences struct {
 19 | 	Additions     []Addition
 20 | 	Modifications []Modification
 21 | 	Deletions     []Deletion
 22 | }
 23 | 
 24 | // Addition is a row appearing in delta but missing in base
 25 | type Addition []string
 26 | 
 27 | // Deletion is a row appearing in base but missing in delta
 28 | type Deletion []string
 29 | 
 30 | // Modification is a row present in both delta and base
 31 | // with the values column changed in delta
 32 | type Modification struct {
 33 | 	Original []string
 34 | 	Current  []string
 35 | }
 36 | 
 37 | type message struct {
 38 | 	original []string
 39 | 	current  []string
 40 | 	_type    messageType
 41 | }
 42 | 
 43 | // Diff finds the Differences between baseConfig and deltaConfig
 44 | func Diff(baseConfig, deltaConfig Config) (Differences, error) {
 45 | 	baseEngine := NewEngine(baseConfig)
 46 | 	baseDigestChannel, baseErrorChannel := baseEngine.StreamDigests()
 47 | 
 48 | 	baseFileDigest := NewFileDigest()
 49 | 	for digests := range baseDigestChannel {
 50 | 		for _, d := range digests {
 51 | 			baseFileDigest.Append(d)
 52 | 		}
 53 | 	}
 54 | 
 55 | 	if err := <-baseErrorChannel; err != nil {
 56 | 		return Differences{}, fmt.Errorf("error processing base file: %v", err)
 57 | 	}
 58 | 
 59 | 	deltaEngine := NewEngine(deltaConfig)
 60 | 	deltaDigestChannel, deltaErrorChannel := deltaEngine.StreamDigests()
 61 | 
 62 | 	additions := make([]Addition, 0)
 63 | 	modifications := make([]Modification, 0)
 64 | 	deletions := make([]Deletion, 0)
 65 | 
 66 | 	msgChannel := streamDifferences(baseFileDigest, deltaDigestChannel)
 67 | 	for msg := range msgChannel {
 68 | 		switch msg._type {
 69 | 		case addition:
 70 | 			additions = append(additions, msg.current)
 71 | 		case modification:
 72 | 			modifications = append(modifications, Modification{Original: msg.original, Current: msg.current})
 73 | 		case deletion:
 74 | 			deletions = append(deletions, msg.current)
 75 | 		default:
 76 | 			continue
 77 | 		}
 78 | 	}
 79 | 
 80 | 	if err := <-deltaErrorChannel; err != nil {
 81 | 		return Differences{}, fmt.Errorf("error processing delta file: %v", err)
 82 | 	}
 83 | 
 84 | 	return Differences{Additions: additions, Modifications: modifications, Deletions: deletions}, nil
 85 | }
 86 | 
 87 | func streamDifferences(baseFileDigest *FileDigest, digestChannel chan []Digest) chan message {
 88 | 	maxProcs := runtime.NumCPU()
 89 | 	msgChannel := make(chan message, maxProcs*bufferSize)
 90 | 
 91 | 	go func(base *FileDigest, digestChannel chan []Digest, msgChannel chan message) {
 92 | 		defer close(msgChannel)
 93 | 
 94 | 		for digests := range digestChannel {
 95 | 			for _, d := range digests {
 96 | 				if baseValue, present := base.Digests[d.Key]; present {
 97 | 					if baseValue != d.Value {
 98 | 						// Modification
 99 | 						msgChannel <- message{_type: modification, current: d.Source, original: base.SourceMap[d.Key]}
100 | 					}
101 | 					// delete from sourceMap so that at the end only deletions are left in base
102 | 					delete(base.SourceMap, d.Key)
103 | 				} else {
104 | 					// Addition
105 | 					msgChannel <- message{_type: addition, current: d.Source}
106 | 				}
107 | 			}
108 | 		}
109 | 
110 | 		for _, value := range base.SourceMap {
111 | 			msgChannel <- message{_type: deletion, current: value}
112 | 		}
113 | 
114 | 	}(baseFileDigest, digestChannel, msgChannel)
115 | 
116 | 	return msgChannel
117 | }
118 | 


--------------------------------------------------------------------------------
/pkg/digest/diff_test.go:
--------------------------------------------------------------------------------
  1 | package digest_test
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
  6 | 	"github.com/stretchr/testify/assert"
  7 | 	"strings"
  8 | 	"testing"
  9 | 	"unicode/utf8"
 10 | )
 11 | 
 12 | func TestDiff(t *testing.T) {
 13 | 	base := `1,col-1,col-2,col-3,one-value
 14 | 2,col-1,col-2,col-3,two-value
 15 | 3,col-1,col-2,col-3,three-value
 16 | 100,col-1,col-2,col-3,hundred-value
 17 | `
 18 | 
 19 | 	delta := `1,col-1,col-2,col-3,one-value
 20 | 2,col-1,col-2,col-3,two-value-modified
 21 | 4,col-1,col-2,col-3,four-value-added
 22 | 100,col-1-modified,col-2,col-3,hundred-value-modified
 23 | 5,col-1,col-2,col-3,five-value-added
 24 | `
 25 | 
 26 | 	t.Run("default config", func(t *testing.T) {
 27 | 		separators := []string{",", "\t", "|"}
 28 | 		for _, sep := range separators {
 29 | 			t.Run(fmt.Sprintf("should support \"%s\" as separator", sep), func(t *testing.T) {
 30 | 				sepRune, _ := utf8.DecodeRuneInString(sep)
 31 | 				baseConfig := &digest.Config{
 32 | 					Reader:     strings.NewReader(strings.ReplaceAll(base, ",", sep)),
 33 | 					Key:        []int{0},
 34 | 					Separator:  sepRune,
 35 | 					LazyQuotes: false,
 36 | 				}
 37 | 
 38 | 				deltaConfig := &digest.Config{
 39 | 					Reader:     strings.NewReader(strings.ReplaceAll(delta,",", sep)),
 40 | 					Key:        []int{0},
 41 | 					Separator:  sepRune,
 42 | 					LazyQuotes: false,
 43 | 				}
 44 | 
 45 | 				expected := digest.Differences{
 46 | 					Additions: []digest.Addition{
 47 | 						strings.Split("4,col-1,col-2,col-3,four-value-added", ","),
 48 | 						strings.Split("5,col-1,col-2,col-3,five-value-added", ","),
 49 | 					},
 50 | 					Modifications: []digest.Modification{
 51 | 						{
 52 | 							Current:  strings.Split("2,col-1,col-2,col-3,two-value-modified", ","),
 53 | 							Original: strings.Split("2,col-1,col-2,col-3,two-value", ","),
 54 | 						},
 55 | 						{
 56 | 							Current:  strings.Split("100,col-1-modified,col-2,col-3,hundred-value-modified", ","),
 57 | 							Original: strings.Split("100,col-1,col-2,col-3,hundred-value", ","),
 58 | 						},
 59 | 					},
 60 | 					Deletions: []digest.Deletion{
 61 | 						strings.Split("3,col-1,col-2,col-3,three-value", ","),
 62 | 					},
 63 | 				}
 64 | 
 65 | 				actual, err := digest.Diff(*baseConfig, *deltaConfig)
 66 | 				assert.NoError(t, err)
 67 | 				assert.Equal(t, expected, actual)
 68 | 			})
 69 | 		}
 70 | 	})
 71 | 
 72 | 	deltaLazyQuotes := `1,col-1,col-2,col-3,one-value
 73 | 2,col-1,col-2,col-3,two-value-modified
 74 | 4,col-1,col-2,col-3,four"-added
 75 | 100,col-1-modified,col-2,col-3,hundred-value-modified
 76 | 5,col-1,col-2,col-3,five"-added
 77 | `
 78 | 
 79 | 	t.Run("lazy quotes in delta config", func(t *testing.T) {
 80 | 		baseConfig := &digest.Config{
 81 | 			Reader:     strings.NewReader(base),
 82 | 			Key:        []int{0},
 83 | 			Separator:  ',',
 84 | 			LazyQuotes: false,
 85 | 		}
 86 | 
 87 | 		deltaConfig := &digest.Config{
 88 | 			Reader:     strings.NewReader(deltaLazyQuotes),
 89 | 			Key:        []int{0},
 90 | 			Separator:  ',',
 91 | 			LazyQuotes: true,
 92 | 		}
 93 | 
 94 | 		expected := digest.Differences{
 95 | 			Additions: []digest.Addition{
 96 | 				strings.Split("4,col-1,col-2,col-3,four\"-added", ","),
 97 | 				strings.Split("5,col-1,col-2,col-3,five\"-added", ","),
 98 | 			},
 99 | 			Modifications: []digest.Modification{
100 | 				{
101 | 					Current:  strings.Split("2,col-1,col-2,col-3,two-value-modified", ","),
102 | 					Original: strings.Split("2,col-1,col-2,col-3,two-value", ","),
103 | 				},
104 | 				{
105 | 					Current:  strings.Split("100,col-1-modified,col-2,col-3,hundred-value-modified", ","),
106 | 					Original: strings.Split("100,col-1,col-2,col-3,hundred-value", ","),
107 | 				},
108 | 			},
109 | 			Deletions: []digest.Deletion{
110 | 				strings.Split("3,col-1,col-2,col-3,three-value", ","),
111 | 			},
112 | 		}
113 | 
114 | 		actual, err := digest.Diff(*baseConfig, *deltaConfig)
115 | 		assert.NoError(t, err)
116 | 		assert.Equal(t, expected, actual)
117 | 	})
118 | }
119 | 


--------------------------------------------------------------------------------
/pkg/digest/digest.go:
--------------------------------------------------------------------------------
 1 | package digest
 2 | 
 3 | import (
 4 | 	"encoding/csv"
 5 | 	"runtime"
 6 | 	"sync"
 7 | 
 8 | 	"github.com/cespare/xxhash"
 9 | )
10 | 
11 | // Digest represents the binding of the key of each csv line
12 | // and the digest that gets created for the entire line
13 | type Digest struct {
14 | 	Key    uint64
15 | 	Value  uint64
16 | 	Source []string
17 | }
18 | 
19 | // CreateDigest creates a Digest for each line of csv.
20 | // There will be one Digest per line
21 | func CreateDigest(csv []string, separator string, pKey Positions, pRow Positions) Digest {
22 | 	key := xxhash.Sum64String(pKey.Join(csv, separator))
23 | 	digest := xxhash.Sum64String(pRow.Join(csv, separator))
24 | 
25 | 	return Digest{Key: key, Value: digest, Source: csv}
26 | }
27 | 
28 | const bufferSize = 512
29 | 
30 | // Create can create a Digest using the Configurations passed.
31 | // It returns the digest as a map[uint64]uint64.
32 | // It can also keep track of the Source line.
33 | func Create(config *Config) (map[uint64]uint64, map[uint64][]string, error) {
34 | 	maxProcs := runtime.NumCPU()
35 | 	reader := csv.NewReader(config.Reader)
36 | 	reader.Comma = config.Separator
37 | 	reader.LazyQuotes = config.LazyQuotes
38 | 	output := make(map[uint64]uint64)
39 | 	sourceMap := make(map[uint64][]string)
40 | 
41 | 	digestChannel := make(chan []Digest, bufferSize*maxProcs)
42 | 	errorChannel := make(chan error)
43 | 	defer close(errorChannel)
44 | 
45 | 	go readAndProcess(config, reader, digestChannel, errorChannel)
46 | 
47 | 	for digests := range digestChannel {
48 | 		for _, digest := range digests {
49 | 			output[digest.Key] = digest.Value
50 | 			sourceMap[digest.Key] = digest.Source
51 | 		}
52 | 	}
53 | 
54 | 	if err := <-errorChannel; err != nil {
55 | 		return nil, nil, err
56 | 	}
57 | 
58 | 	return output, sourceMap, nil
59 | }
60 | 
61 | func readAndProcess(config *Config, reader *csv.Reader, digestChannel chan<- []Digest, errorChannel chan<- error) {
62 | 	var wg sync.WaitGroup
63 | 	for {
64 | 		lines, eofReached, err := getNextNLines(reader)
65 | 		if err != nil {
66 | 			wg.Wait()
67 | 			close(digestChannel)
68 | 			errorChannel <- err
69 | 			return
70 | 		}
71 | 
72 | 		wg.Add(1)
73 | 		go createDigestForNLines(lines, config, digestChannel, &wg)
74 | 
75 | 		if eofReached {
76 | 			break
77 | 		}
78 | 	}
79 | 	wg.Wait()
80 | 	close(digestChannel)
81 | 	errorChannel <- nil
82 | }
83 | 
84 | func createDigestForNLines(lines [][]string,
85 | 	config *Config,
86 | 	digestChannel chan<- []Digest,
87 | 	wg *sync.WaitGroup,
88 | ) {
89 | 	output := make([]Digest, len(lines))
90 | 	separator := string(config.Separator)
91 | 	for i, line := range lines {
92 | 		output[i] = CreateDigest(line, separator, config.Key, config.Value)
93 | 	}
94 | 
95 | 	digestChannel <- output
96 | 	wg.Done()
97 | }
98 | 


--------------------------------------------------------------------------------
/pkg/digest/digest_benchmark_test.go:
--------------------------------------------------------------------------------
 1 | package digest
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 	"testing"
 7 | )
 8 | 
 9 | const SomeText = "something-name-%d,346345ty,fdhfdh,5436456,gfgjfgj,45234545,nfhgjfgj,45745745,djhgfjfgj"
10 | 
11 | func BenchmarkCreate1(b *testing.B)     { benchmarkCreate(1, b) }
12 | func BenchmarkCreate10(b *testing.B)    { benchmarkCreate(10, b) }
13 | func BenchmarkCreate100(b *testing.B)   { benchmarkCreate(100, b) }
14 | func BenchmarkCreate1000(b *testing.B)  { benchmarkCreate(1000, b) }
15 | func BenchmarkCreate10000(b *testing.B) { benchmarkCreate(10000, b) }
16 | 
17 | func BenchmarkCreate100000(b *testing.B)   { benchmarkCreate(100000, b) }
18 | func BenchmarkCreate1000000(b *testing.B)  { benchmarkCreate(1000000, b) }
19 | func BenchmarkCreate10000000(b *testing.B) { benchmarkCreate(10000000, b) }
20 | 
21 | func benchmarkCreate(limit int, b *testing.B) {
22 | 	for i := 0; i < b.N; i++ {
23 | 		CreateDigestFor(limit, b)
24 | 	}
25 | }
26 | 
27 | func CreateDigestFor(count int, b *testing.B) {
28 | 	b.StopTimer()
29 | 	reader := &Reader{limit: count}
30 | 
31 | 	config := &Config{
32 | 		Reader: reader,
33 | 		Key:    []int{0},
34 | 		Value:  []int{1},
35 | 	}
36 | 
37 | 	b.StartTimer()
38 | 	_, _, _ = Create(config)
39 | }
40 | 
41 | type Reader struct {
42 | 	counter int
43 | 	limit   int
44 | }
45 | 
46 | func (r *Reader) Read(p []byte) (n int, err error) {
47 | 	if r.counter == r.limit {
48 | 		return 0, io.EOF
49 | 	}
50 | 	toRead := fmt.Sprintf("%d,%s\n", r.counter, SomeText)
51 | 	r.counter++
52 | 	return copy(p, toRead), nil
53 | }
54 | 


--------------------------------------------------------------------------------
/pkg/digest/digest_test.go:
--------------------------------------------------------------------------------
  1 | package digest_test
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"strings"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
  9 | 	"github.com/cespare/xxhash"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | func TestCreateDigestWithSource(t *testing.T) {
 14 | 	firstLine := "1,someline"
 15 | 	firstKey := xxhash.Sum64String("1")
 16 | 	firstLineDigest := xxhash.Sum64String(firstLine)
 17 | 
 18 | 	expectedDigest := digest.Digest{
 19 | 		Key:    firstKey,
 20 | 		Value:  firstLineDigest,
 21 | 		Source: strings.Split(firstLine, comma),
 22 | 	}
 23 | 
 24 | 	actualDigest := digest.CreateDigest(strings.Split(firstLine, comma), comma, []int{0}, []int{})
 25 | 
 26 | 	assert.Equal(t, expectedDigest, actualDigest)
 27 | }
 28 | 
 29 | func TestDigestForFile(t *testing.T) {
 30 | 	firstLine := "1,first-line,some-columne,friday"
 31 | 	firstKey := xxhash.Sum64String("1")
 32 | 	firstDigest := xxhash.Sum64String(firstLine)
 33 | 	fridayDigest := xxhash.Sum64String("friday")
 34 | 
 35 | 	secondLine := "2,second-line,nobody-needs-this,saturday"
 36 | 	secondKey := xxhash.Sum64String("2")
 37 | 	secondDigest := xxhash.Sum64String(secondLine)
 38 | 	saturdayDigest := xxhash.Sum64String("saturday")
 39 | 
 40 | 	t.Run("should create digest for given key and all values", func(t *testing.T) {
 41 | 		testConfig := &digest.Config{
 42 | 			Reader:    strings.NewReader(firstLine + "\n" + secondLine),
 43 | 			Key:       []int{0},
 44 | 			Separator: ',',
 45 | 		}
 46 | 
 47 | 		actualDigest, sourceMap, err := digest.Create(testConfig)
 48 | 
 49 | 		expectedDigest := map[uint64]uint64{firstKey: firstDigest, secondKey: secondDigest}
 50 | 
 51 | 		assert.NoError(t, err)
 52 | 		assert.Len(t, sourceMap, 2)
 53 | 		assert.Equal(t, expectedDigest, actualDigest)
 54 | 	})
 55 | 
 56 | 	t.Run("should create digest for given key and given values", func(t *testing.T) {
 57 | 		testConfig := &digest.Config{
 58 | 			Reader:    strings.NewReader(firstLine + "\n" + secondLine),
 59 | 			Key:       []int{0},
 60 | 			Value:     []int{3},
 61 | 			Separator: ',',
 62 | 		}
 63 | 
 64 | 		actualDigest, _, err := digest.Create(testConfig)
 65 | 		expectedDigest := map[uint64]uint64{firstKey: fridayDigest, secondKey: saturdayDigest}
 66 | 
 67 | 		assert.NoError(t, err)
 68 | 		assert.Equal(t, expectedDigest, actualDigest)
 69 | 	})
 70 | 
 71 | 	t.Run("should return ParseError if csv reading fails", func(t *testing.T) {
 72 | 		testConfig := &digest.Config{
 73 | 			Reader:    strings.NewReader(firstLine + "\n" + "some-random-line"),
 74 | 			Key:       []int{0},
 75 | 			Value:     []int{3},
 76 | 			Separator: ',',
 77 | 		}
 78 | 
 79 | 		actualDigest, _, err := digest.Create(testConfig)
 80 | 
 81 | 		assert.Error(t, err)
 82 | 
 83 | 		_, isParseError := err.(*csv.ParseError)
 84 | 
 85 | 		assert.True(t, isParseError)
 86 | 		assert.Nil(t, actualDigest)
 87 | 	})
 88 | }
 89 | 
 90 | func TestNewConfig(t *testing.T) {
 91 | 	r := strings.NewReader("a,csv,as,str")
 92 | 	primaryColumns := digest.Positions{0}
 93 | 	values := digest.Positions{0, 1, 2}
 94 | 	include := digest.Positions{0, 1}
 95 | 
 96 | 	t.Run("should create config from given params", func(t *testing.T) {
 97 | 		conf := digest.NewConfig(r, primaryColumns, values, include, ',', false)
 98 | 		expectedConf := digest.Config{
 99 | 			Reader:     r,
100 | 			Key:        primaryColumns,
101 | 			Value:      values,
102 | 			Include:    include,
103 | 			Separator:  ',',
104 | 			LazyQuotes: false,
105 | 		}
106 | 
107 | 		assert.Equal(t, expectedConf, *conf)
108 | 	})
109 | 
110 | 	t.Run("should use valueColumns as includeColumns for includes not specified", func(t *testing.T) {
111 | 		conf := digest.NewConfig(r, primaryColumns, values, nil, ',', false)
112 | 		expectedConf := digest.Config{
113 | 			Reader:     r,
114 | 			Key:        primaryColumns,
115 | 			Value:      values,
116 | 			Include:    values,
117 | 			Separator:  ',',
118 | 			LazyQuotes: false,
119 | 		}
120 | 
121 | 		assert.Equal(t, expectedConf, *conf)
122 | 	})
123 | }
124 | 


--------------------------------------------------------------------------------
/pkg/digest/engine.go:
--------------------------------------------------------------------------------
  1 | package digest
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"runtime"
  6 | 	"sync"
  7 | )
  8 | 
  9 | // Engine to create a FileDigest
 10 | type Engine struct {
 11 | 	config Config
 12 | 	lock   *sync.Mutex
 13 | }
 14 | 
 15 | // NewEngine instantiates an engine
 16 | func NewEngine(config Config) *Engine {
 17 | 	return &Engine{
 18 | 		config: config,
 19 | 		lock:   &sync.Mutex{},
 20 | 	}
 21 | }
 22 | 
 23 | // GenerateFileDigest generates FileDigest with thread safety
 24 | func (e Engine) GenerateFileDigest() (*FileDigest, error) {
 25 | 	e.lock.Lock()
 26 | 	defer e.lock.Unlock()
 27 | 
 28 | 	fd := NewFileDigest()
 29 | 
 30 | 	digestChannel, errorChannel := e.StreamDigests()
 31 | 
 32 | 	for digests := range digestChannel {
 33 | 		for _, digest := range digests {
 34 | 			fd.Append(digest)
 35 | 		}
 36 | 	}
 37 | 
 38 | 	if err := <-errorChannel; err != nil {
 39 | 		return nil, err
 40 | 	}
 41 | 
 42 | 	return fd, nil
 43 | }
 44 | 
 45 | // StreamDigests starts creating digests in the background
 46 | // Returns 2 buffered channels, a digestChannel and an errorChannel
 47 | //
 48 | // digestChannel has all digests
 49 | // errorChannel has any errors created during processing
 50 | //
 51 | // If there are any errors while processing csv, all existing go routines
 52 | // to creates digests are waited to be closed and the digestChannel is closed at the end.
 53 | // Only after that an error is created on the errorChannel.
 54 | func (e Engine) StreamDigests() (chan []Digest, chan error) {
 55 | 	maxProcs := runtime.NumCPU()
 56 | 	digestChannel := make(chan []Digest, bufferSize*maxProcs)
 57 | 	errorChannel := make(chan error, 1)
 58 | 
 59 | 	go func(digestChannel chan []Digest, errorChannel chan error) {
 60 | 		wg := &sync.WaitGroup{}
 61 | 		reader := csv.NewReader(e.config.Reader)
 62 | 		reader.Comma = e.config.Separator
 63 | 		reader.LazyQuotes = e.config.LazyQuotes
 64 | 		for {
 65 | 			lines, eofReached, err := getNextNLines(reader)
 66 | 
 67 | 			if err != nil {
 68 | 				wg.Wait()
 69 | 				close(digestChannel)
 70 | 				errorChannel <- err
 71 | 				close(errorChannel)
 72 | 				return
 73 | 			}
 74 | 
 75 | 			wg.Add(1)
 76 | 			go e.digestForLines(lines, digestChannel, wg)
 77 | 
 78 | 			if eofReached {
 79 | 				break
 80 | 			}
 81 | 		}
 82 | 		wg.Wait()
 83 | 		close(digestChannel)
 84 | 		errorChannel <- nil
 85 | 		close(errorChannel)
 86 | 	}(digestChannel, errorChannel)
 87 | 
 88 | 	return digestChannel, errorChannel
 89 | 
 90 | }
 91 | 
 92 | func (e Engine) digestForLines(lines [][]string, digestChannel chan []Digest, wg *sync.WaitGroup) {
 93 | 	output := make([]Digest, 0, len(lines))
 94 | 	separator := string(e.config.Separator)
 95 | 	for _, line := range lines {
 96 | 		output = append(output, CreateDigest(line, separator, e.config.Key, e.config.Value))
 97 | 	}
 98 | 
 99 | 	digestChannel <- output
100 | 	wg.Done()
101 | }
102 | 


--------------------------------------------------------------------------------
/pkg/digest/engine_test.go:
--------------------------------------------------------------------------------
  1 | package digest_test
  2 | 
  3 | import (
  4 | 	"encoding/csv"
  5 | 	"strings"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
  9 | 	"github.com/cespare/xxhash"
 10 | 	"github.com/stretchr/testify/assert"
 11 | )
 12 | 
 13 | func TestEngine_GenerateFileDigest(t *testing.T) {
 14 | 	firstLine := "1,first-line,some-columne,friday"
 15 | 	firstKey := xxhash.Sum64String("1")
 16 | 	firstDigest := xxhash.Sum64String(firstLine)
 17 | 	fridayDigest := xxhash.Sum64String("friday")
 18 | 
 19 | 	secondLine := "2,second-line,nobody-needs-this,saturday"
 20 | 	secondKey := xxhash.Sum64String("2")
 21 | 	secondDigest := xxhash.Sum64String(secondLine)
 22 | 	saturdayDigest := xxhash.Sum64String("saturday")
 23 | 
 24 | 	t.Run("should create digest for given key and all values", func(t *testing.T) {
 25 | 		conf := digest.Config{
 26 | 			Reader:    strings.NewReader(firstLine + "\n" + secondLine),
 27 | 			Key:       []int{0},
 28 | 			Separator: ',',
 29 | 		}
 30 | 
 31 | 		engine := digest.NewEngine(conf)
 32 | 
 33 | 		dChan, eChan := engine.StreamDigests()
 34 | 
 35 | 		err := <-eChan
 36 | 		assert.NoError(t, err)
 37 | 
 38 | 		actualDigest := digestsFrom(dChan)
 39 | 		expectedDigest := []digest.Digest{
 40 | 			{Key: firstKey, Value: firstDigest, Source: strings.Split(firstLine, ",")},
 41 | 			{Key: secondKey, Value: secondDigest, Source: strings.Split(secondLine, ",")},
 42 | 		}
 43 | 
 44 | 		assert.ElementsMatch(t, expectedDigest, actualDigest)
 45 | 	})
 46 | 
 47 | 	t.Run("should create digest skeeping source", func(t *testing.T) {
 48 | 		conf := digest.Config{
 49 | 			Reader:    strings.NewReader(firstLine + "\n" + secondLine),
 50 | 			Key:       []int{0},
 51 | 			Separator: ',',
 52 | 		}
 53 | 
 54 | 		engine := digest.NewEngine(conf)
 55 | 
 56 | 		dChan, eChan := engine.StreamDigests()
 57 | 
 58 | 		err := <-eChan
 59 | 		assert.NoError(t, err)
 60 | 
 61 | 		actualDigest := digestsFrom(dChan)
 62 | 		expectedDigest := []digest.Digest{
 63 | 			{Key: firstKey, Value: firstDigest, Source: strings.Split(firstLine, ",")},
 64 | 			{Key: secondKey, Value: secondDigest, Source: strings.Split(secondLine, ",")},
 65 | 		}
 66 | 
 67 | 		assert.ElementsMatch(t, expectedDigest, actualDigest)
 68 | 	})
 69 | 
 70 | 	t.Run("should create digest for given key and given values", func(t *testing.T) {
 71 | 		conf := digest.Config{
 72 | 			Reader:    strings.NewReader(firstLine + "\n" + secondLine),
 73 | 			Key:       []int{0},
 74 | 			Value:     []int{3},
 75 | 			Separator: ',',
 76 | 		}
 77 | 
 78 | 		engine := digest.NewEngine(conf)
 79 | 
 80 | 		dChan, eChan := engine.StreamDigests()
 81 | 
 82 | 		err := <-eChan
 83 | 		assert.NoError(t, err)
 84 | 
 85 | 		actualDigest := digestsFrom(dChan)
 86 | 		expectedDigest := []digest.Digest{
 87 | 			{Key: firstKey, Value: fridayDigest, Source: strings.Split(firstLine, ",")},
 88 | 			{Key: secondKey, Value: saturdayDigest, Source: strings.Split(secondLine, ",")},
 89 | 		}
 90 | 
 91 | 		assert.ElementsMatch(t, expectedDigest, actualDigest)
 92 | 	})
 93 | 
 94 | 	t.Run("should return ParseError if csv reading fails", func(t *testing.T) {
 95 | 		conf := digest.Config{
 96 | 			Reader:    strings.NewReader(firstLine + "\n" + "some-random-line"),
 97 | 			Key:       []int{0},
 98 | 			Value:     []int{3},
 99 | 			Separator: ',',
100 | 		}
101 | 
102 | 		engine := digest.NewEngine(conf)
103 | 
104 | 		dChan, eChan := engine.StreamDigests()
105 | 
106 | 		err := <-eChan
107 | 
108 | 		assert.Error(t, err)
109 | 
110 | 		_, isParseError := err.(*csv.ParseError)
111 | 
112 | 		assert.True(t, isParseError)
113 | 
114 | 		actualDigest := digestsFrom(dChan)
115 | 		assert.Empty(t, actualDigest)
116 | 	})
117 | }
118 | 
119 | func digestsFrom(digestChan chan []digest.Digest) []digest.Digest {
120 | 	result := make([]digest.Digest, 0, 10)
121 | 
122 | 	for d := range digestChan {
123 | 		result = append(result, d...)
124 | 	}
125 | 
126 | 	return result
127 | }
128 | 


--------------------------------------------------------------------------------
/pkg/digest/file_digest.go:
--------------------------------------------------------------------------------
 1 | package digest
 2 | 
 3 | import (
 4 | 	"sync"
 5 | )
 6 | 
 7 | // FileDigest represents the digests created from one file
 8 | type FileDigest struct {
 9 | 	Digests   map[uint64]uint64
10 | 	SourceMap map[uint64][]string
11 | 	lock      *sync.Mutex
12 | }
13 | 
14 | // NewFileDigest to instantiate a new FileDigest
15 | func NewFileDigest() *FileDigest {
16 | 	return &FileDigest{
17 | 		Digests:   make(map[uint64]uint64),
18 | 		SourceMap: make(map[uint64][]string),
19 | 		lock:      &sync.Mutex{},
20 | 	}
21 | }
22 | 
23 | // Append a Digest to a FileDigest
24 | // This operation is not thread safe
25 | func (f *FileDigest) Append(d Digest) {
26 | 	f.Digests[d.Key] = d.Value
27 | 	f.SourceMap[d.Key] = d.Source
28 | }
29 | 
30 | // SafeAppend a Digest to a FileDigest
31 | // This operation is thread safe
32 | func (f *FileDigest) SafeAppend(d Digest) {
33 | 	f.lock.Lock()
34 | 	defer f.lock.Unlock()
35 | 
36 | 	f.Digests[d.Key] = d.Value
37 | 	f.SourceMap[d.Key] = d.Source
38 | }
39 | 


--------------------------------------------------------------------------------
/pkg/digest/file_digest_test.go:
--------------------------------------------------------------------------------
 1 | package digest_test
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
 8 | 
 9 | 	"github.com/stretchr/testify/assert"
10 | )
11 | 
12 | func TestNewFileDigest(t *testing.T) {
13 | 	fd := digest.NewFileDigest()
14 | 
15 | 	assert.NotNil(t, fd)
16 | 	assert.Zero(t, len(fd.Digests))
17 | 	assert.Zero(t, len(fd.SourceMap))
18 | }
19 | 
20 | func TestFileDigest_Append(t *testing.T) {
21 | 	fd := digest.NewFileDigest()
22 | 
23 | 	fd.Append(digest.Digest{Key: uint64(1), Value: uint64(1)})
24 | 
25 | 	assert.NotNil(t, fd)
26 | 	assert.Len(t, fd.Digests, 1)
27 | 	assert.Len(t, fd.SourceMap, 1)
28 | 	assert.Len(t, fd.SourceMap[uint64(1)], 0)
29 | }
30 | 
31 | func TestFileDigest_SafeAppend(t *testing.T) {
32 | 	fd := digest.NewFileDigest()
33 | 
34 | 	wg := &sync.WaitGroup{}
35 | 	for i := 0; i < 1000; i++ {
36 | 		wg.Add(1)
37 | 		go func(i uint64) {
38 | 			fd.SafeAppend(digest.Digest{Key: i, Value: i})
39 | 			wg.Done()
40 | 		}(uint64(i))
41 | 	}
42 | 
43 | 	wg.Wait()
44 | 	assert.NotNil(t, fd)
45 | 	assert.Len(t, fd.Digests, 1000)
46 | 	assert.Len(t, fd.SourceMap, 1000)
47 | }
48 | 


--------------------------------------------------------------------------------
/pkg/digest/positions.go:
--------------------------------------------------------------------------------
 1 | package digest
 2 | 
 3 | import (
 4 | 	csvlib "encoding/csv"
 5 | 	"strings"
 6 | )
 7 | 
 8 | // Positions represents positions of columns in a CSV array.
 9 | type Positions []int
10 | 
11 | // Join plucks the values from CSV from
12 | // their respective positions and concatenates
13 | // them using separator as a string.
14 | func (p Positions) Join(csv []string, separator string) string {
15 | 	if len(p) == 0 {
16 | 		return strings.Join(csv, separator)
17 | 	}
18 | 
19 | 	csvStr := strings.Builder{}
20 | 	for _, pos := range p[:len(p)-1] {
21 | 		csvStr.WriteString(csv[pos])
22 | 		csvStr.WriteString(separator)
23 | 	}
24 | 	csvStr.WriteString(csv[p[len(p)-1]])
25 | 	return csvStr.String()
26 | }
27 | 
28 | // String method converts to csv mapping to positions
29 | // escapes necessary characters
30 | func (p Positions) String(csv []string, separator rune) string {
31 | 	selectiveCsv := csv
32 | 	if len(p) != 0 {
33 | 		selectiveCsv = make([]string, 0, len(p))
34 | 		for _, pos := range p {
35 | 			selectiveCsv = append(selectiveCsv, csv[pos])
36 | 		}
37 | 	}
38 | 
39 | 	csvStr := strings.Builder{}
40 | 	w := csvlib.NewWriter(&csvStr)
41 | 	w.Comma = separator
42 | 	_ = w.Write(selectiveCsv)
43 | 	w.Flush()
44 | 	csvWithNewLine := csvStr.String()
45 | 	return csvWithNewLine[:len(csvWithNewLine)-1]
46 | }
47 | 
48 | // Append additional positions to existing positions.
49 | // Imp: Removes Duplicate. Does not mutate the original array
50 | func (p Positions) Append(additional Positions) Positions {
51 | 	for _, toBeAdded := range additional {
52 | 		if !p.Contains(toBeAdded) {
53 | 			p = append(p, toBeAdded)
54 | 		}
55 | 	}
56 | 
57 | 	return p
58 | }
59 | 
60 | // Contains returns true if position is already present in Positions
61 | func (p Positions) Contains(position int) bool {
62 | 	for _, each := range p {
63 | 		if each == position {
64 | 			return true
65 | 		}
66 | 	}
67 | 
68 | 	return false
69 | }
70 | 


--------------------------------------------------------------------------------
/pkg/digest/positions_test.go:
--------------------------------------------------------------------------------
  1 | package digest_test
  2 | 
  3 | import (
  4 | 	"strings"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/aswinkarthik/csvdiff/pkg/digest"
  8 | 	"github.com/stretchr/testify/assert"
  9 | )
 10 | 
 11 | const comma = ","
 12 | 
 13 | func TestPositions_MapValues(t *testing.T) {
 14 | 	t.Run("should map positions to string", func(t *testing.T) {
 15 | 		positions := digest.Positions([]int{0, 3})
 16 | 		csv := []string{"zero", "one", "two", "three"}
 17 | 
 18 | 		actual := positions.Join(csv, comma)
 19 | 		expected := "zero,three"
 20 | 
 21 | 		assert.Equal(t, expected, actual)
 22 | 	})
 23 | 
 24 | 	t.Run("should map all positions to string if positions is empty", func(t *testing.T) {
 25 | 		positions := digest.Positions([]int{})
 26 | 		csv := []string{"zero", "one", "two", "three"}
 27 | 
 28 | 		actual := positions.Join(csv, comma)
 29 | 		expected := strings.Join(csv, comma)
 30 | 
 31 | 		assert.Equal(t, expected, actual)
 32 | 	})
 33 | 
 34 | 	t.Run("should not escape comma but retain new line if it is part of csv when mapping to values", func(t *testing.T) {
 35 | 		positions := digest.Positions([]int{0, 3})
 36 | 		csv := []string{"zero\n", "one", "two", "three,3"}
 37 | 
 38 | 		actual := positions.Join(csv, comma)
 39 | 		expected := "zero\n,three,3"
 40 | 
 41 | 		assert.Equal(t, expected, actual)
 42 | 	})
 43 | }
 44 | 
 45 | func TestPositions_String(t *testing.T) {
 46 | 	t.Run("should map positions to string", func(t *testing.T) {
 47 | 		positions := digest.Positions([]int{0, 3})
 48 | 		csv := []string{"zero", "one", "two", "three"}
 49 | 
 50 | 		actual := positions.String(csv, ',')
 51 | 		expected := "zero,three"
 52 | 
 53 | 		assert.Equal(t, expected, actual)
 54 | 	})
 55 | 
 56 | 	t.Run("should map positions to string using custom separator", func(t *testing.T) {
 57 | 		positions := digest.Positions([]int{0, 3})
 58 | 		csv := []string{"zero", "one", "two", "three"}
 59 | 
 60 | 		actual := positions.String(csv, '|')
 61 | 		expected := "zero|three"
 62 | 
 63 | 		assert.Equal(t, expected, actual)
 64 | 	})
 65 | 
 66 | 	t.Run("should map all positions to string if positions is empty", func(t *testing.T) {
 67 | 		positions := digest.Positions([]int{})
 68 | 		csv := []string{"zero", "one", "two", "three"}
 69 | 
 70 | 		actual := positions.String(csv, ',')
 71 | 		expected := strings.Join(csv, comma)
 72 | 
 73 | 		assert.Equal(t, expected, actual)
 74 | 	})
 75 | 
 76 | 	t.Run("should escape comma or new line if it is part of csv when mapping to values", func(t *testing.T) {
 77 | 		positions := digest.Positions([]int{0, 3})
 78 | 		csv := []string{"zero\n", "one", "two", "three,3"}
 79 | 
 80 | 		actual := positions.String(csv, ',')
 81 | 		expected := "\"zero\n\",\"three,3\""
 82 | 
 83 | 		assert.Equal(t, expected, actual)
 84 | 	})
 85 | }
 86 | 
 87 | func TestPosition_Contains(t *testing.T) {
 88 | 	positions := digest.Positions([]int{0, 3})
 89 | 
 90 | 	assert.True(t, positions.Contains(3))
 91 | 	assert.False(t, positions.Contains(4))
 92 | }
 93 | 
 94 | func TestPosition_Append(t *testing.T) {
 95 | 	positions := digest.Positions([]int{0, 3})
 96 | 	additionalPositions := digest.Positions([]int{4, 3})
 97 | 
 98 | 	positions = positions.Append(additionalPositions)
 99 | 
100 | 	assert.ElementsMatch(t, []int{0, 3, 4}, []int(positions))
101 | }
102 | 


--------------------------------------------------------------------------------
/pkg/digest/utils.go:
--------------------------------------------------------------------------------
 1 | package digest
 2 | 
 3 | import (
 4 | 	"encoding/csv"
 5 | 	"io"
 6 | )
 7 | 
 8 | func getNextNLines(reader *csv.Reader) ([][]string, bool, error) {
 9 | 	lines := make([][]string, bufferSize)
10 | 
11 | 	lineCount := 0
12 | 	eofReached := false
13 | 	for ; lineCount < bufferSize; lineCount++ {
14 | 		line, err := reader.Read()
15 | 		lines[lineCount] = line
16 | 		if err != nil {
17 | 			if err == io.EOF {
18 | 				eofReached = true
19 | 				break
20 | 			}
21 | 
22 | 			return nil, true, err
23 | 		}
24 | 	}
25 | 
26 | 	return lines[:lineCount], eofReached, nil
27 | }
28 | 


--------------------------------------------------------------------------------
/pkg/digest/utils_test.go:
--------------------------------------------------------------------------------
 1 | package digest
 2 | 
 3 | import (
 4 | 	"encoding/csv"
 5 | 	"fmt"
 6 | 	"strconv"
 7 | 	"strings"
 8 | 	"testing"
 9 | 
10 | 	"github.com/stretchr/testify/assert"
11 | )
12 | 
13 | func TestGetNextNLines(t *testing.T) {
14 | 	t.Run("should get given number of lines from csv", func(t *testing.T) {
15 | 		var csvBuilder strings.Builder
16 | 		const totalLines = 1000
17 | 		for i := 0; i < totalLines; i++ {
18 | 			csvBuilder.WriteString(fmt.Sprintf("%d,random-col-1,random-col-2\n", i))
19 | 		}
20 | 
21 | 		csvFile := csv.NewReader(strings.NewReader(csvBuilder.String()))
22 | 
23 | 		lines, eofReached, err := getNextNLines(csvFile)
24 | 
25 | 		assert.Len(t, lines, bufferSize)
26 | 		assert.False(t, eofReached)
27 | 		assert.NoError(t, err)
28 | 
29 | 		for i := 0; i < bufferSize; i++ {
30 | 			expected := []string{strconv.Itoa(i), "random-col-1", "random-col-2"}
31 | 			assert.Equal(t, expected, lines[i])
32 | 		}
33 | 
34 | 		lines, eofReached, err = getNextNLines(csvFile)
35 | 
36 | 		assert.Len(t, lines, totalLines-bufferSize)
37 | 		assert.True(t, eofReached)
38 | 		assert.NoError(t, err)
39 | 
40 | 		for i := 0; i < totalLines-bufferSize; i++ {
41 | 			expected := []string{strconv.Itoa(i + bufferSize), "random-col-1", "random-col-2"}
42 | 			assert.Equal(t, expected, lines[i])
43 | 		}
44 | 	})
45 | 
46 | 	t.Run("should throw error if not a valid csv", func(t *testing.T) {
47 | 		sampleInvalidCSV := `1,2,3
48 | 4,5,6
49 | random-stuff
50 | 7,8,9`
51 | 		csvFile := csv.NewReader(strings.NewReader(sampleInvalidCSV))
52 | 
53 | 		_, _, err := getNextNLines(csvFile)
54 | 
55 | 		assert.Error(t, err)
56 | 	})
57 | }
58 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | set -e
4 | 
5 | sudo apt-get update -y && sudo apt-get install rpm -y
6 | test -n "$TRAVIS_TAG" && curl -sL https://git.io/goreleaser | bash


--------------------------------------------------------------------------------