├── .gitignore ├── .goreleaser.yml ├── .travis.yml ├── Dockerfile ├── LICENSE ├── Makefile ├── README.md ├── _config.yml ├── benchmark ├── README.md └── data-table.r ├── cmd ├── config.go ├── config_test.go ├── formatter.go ├── formatter_test.go ├── root.go ├── root_test.go └── version.go ├── codecov.yml ├── docker-push.sh ├── examples ├── base-small.csv ├── delta-small.csv ├── lazy_quotes.csv ├── lazy_quotes_delta.csv ├── no_comma.csv └── no_comma_delta.csv ├── go.mod ├── go.sum ├── install ├── install.sh ├── main.go ├── pkg └── digest │ ├── config.go │ ├── diff.go │ ├── diff_test.go │ ├── digest.go │ ├── digest_benchmark_test.go │ ├── digest_test.go │ ├── engine.go │ ├── engine_test.go │ ├── file_digest.go │ ├── file_digest_test.go │ ├── positions.go │ ├── positions_test.go │ ├── utils.go │ └── utils_test.go └── release.sh /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/go 3 | 4 | ### Go ### 5 | # Binaries for programs and plugins 6 | *.exe 7 | *.exe~ 8 | *.dll 9 | *.so 10 | *.dylib 11 | 12 | # Test binary, build with `go test -c` 13 | *.test 14 | 15 | # Output of the go coverage tool, specifically when used with LiteIDE 16 | *.out 17 | 18 | .idea/* 19 | 20 | out/ 21 | 22 | # End of https://www.gitignore.io/api/go 23 | 24 | vendor/ 25 | coverage.txt 26 | 27 | majestic_million*.csv 28 | 29 | # Output binary 30 | csvdiff -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | project_name: csvdiff 2 | 3 | release: 4 | github: 5 | owner: aswinkarthik 6 | name: csvdiff 7 | 8 | builds: 9 | - main: ./main.go 10 | binary: csvdiff 11 | goos: 12 | - windows 13 | - darwin 14 | - linux 15 | goarch: 16 | - amd64 17 | nfpms: 18 | - file_name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}" 19 | replacements: 20 | amd64: 64-bit 21 | 386: 32-bit 22 | darwin: macOS 23 | linux: linux 24 | vendor: aswinkarthik 25 | homepage: https://github.com/aswinkarthik/csvdiff 26 | maintainer: aswinkarthik 27 | description: A Blazingly fast diff tool for comparing csv files. 28 | license: MIT 29 | formats: 30 | - deb 31 | - rpm -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | addons: 4 | apt: 5 | packages: 6 | - rpm 7 | 8 | go: 9 | - 1.x 10 | 11 | env: 12 | global: 13 | - GO111MODULE=on 14 | - GORELEASER_ON=1 15 | 16 | gobuild_args: -ldflags "-X main.version=${TRAVIS_TAG}" 17 | 18 | script: 19 | - make lint test 20 | - GOOS=linux go build 21 | 22 | after_success: 23 | - curl -sL https://codecov.io/bash | bash 24 | 25 | deploy: 26 | - provider: script 27 | script: curl -sL https://git.io/goreleaser | bash 28 | skip_cleanup: true 29 | on: 30 | branch: master 31 | tags: true 32 | repo: aswinkarthik/csvdiff 33 | condition: $GORELEASER_ON = 1 34 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM alpine 2 | 3 | COPY csvdiff /csvdiff 4 | 5 | CMD /csvdiff 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright © 2018 aswinkarthik 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | -include .env 2 | 3 | VERSION := $(shell git describe --tags) 4 | BUILD := $(shell git rev-parse --short HEAD) 5 | PROJECTNAME := $(shell basename "$(PWD)") 6 | 7 | # Go related variables. 8 | GOBASE := $(shell pwd) 9 | GOPATH := $(GOBASE)/vendor:$(GOBASE) 10 | GOBIN := $(GOBASE)/out 11 | GOFILES := $(wildcard *.go) 12 | 13 | # Use linker flags to provide version/build settings 14 | LDFLAGS=-ldflags "-X=main.Version=$(VERSION) -X=main.Build=$(BUILD)" 15 | 16 | # Make is verbose in Linux. Make it silent. 17 | MAKEFLAGS += --silent 18 | 19 | ## install: Install missing dependencies. Runs `go get` internally. e.g; make install get=github.com/foo/bar 20 | install: go-get 21 | 22 | ## lint: Lint the codebase using golangci-lint 23 | lint: 24 | ifeq (,$(wildcard ./out/golangci-lint)) 25 | curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(GOBIN) 26 | endif 27 | $(GOBIN)/golangci-lint run -v ./... 28 | 29 | ## test: Run all tests 30 | test: go-test 31 | 32 | ## compie: Compile the binary. 33 | compile: 34 | @-$(MAKE) -s go-compile 35 | 36 | ## exec: Run given command, wrapped with custom GOPATH. e.g; make exec run="go test ./..." 37 | exec: 38 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) $(run) 39 | 40 | ## clean: Clean build files. Runs `go clean` internally. 41 | clean: 42 | @-rm $(GOBIN)/$(PROJECTNAME) 2> /dev/null 43 | @-$(MAKE) go-clean 44 | 45 | go-compile: go-get go-build 46 | 47 | go-build: 48 | @echo " > Building binary..." 49 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) go build $(LDFLAGS) -o $(GOBIN)/$(PROJECTNAME) $(GOFILES) 50 | 51 | go-generate: 52 | @echo " > Generating dependency files..." 53 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) go generate $(generate) 54 | 55 | go-get: 56 | @echo " > Checking if there is any missing dependencies..." 57 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) go get $(get) 58 | 59 | go-install: 60 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) go mod tidy 61 | 62 | go-vendor: 63 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) go mod vendor 64 | 65 | go-test: 66 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) go test -race -coverprofile=coverage.txt -covermode=atomic -v ./... 67 | 68 | richgo-test: 69 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) richgo test -v ./... 70 | 71 | go-clean: 72 | @echo " > Cleaning build cache" 73 | @GOPATH=$(GOPATH) GOBIN=$(GOBIN) go clean 74 | 75 | .PHONY: help 76 | all: help 77 | help: Makefile 78 | @echo 79 | @echo " Choose a command run in "$(PROJECTNAME)":" 80 | @echo 81 | @sed -n 's/^##//p' $< | column -t -s ':' | sed -e 's/^/ /' 82 | @echo 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # csvdiff 2 | 3 | [![Build Status](https://travis-ci.org/aswinkarthik/csvdiff.svg?branch=master)](https://travis-ci.org/aswinkarthik/csvdiff) 4 | [![Go Doc](https://godoc.org/github.com/aswinkarthik/csvdiff?status.svg)](https://godoc.org/github.com/aswinkarthik/csvdiff) 5 | [![Go Report Card](https://goreportcard.com/badge/github.com/aswinkarthik/csvdiff)](https://goreportcard.com/report/github.com/aswinkarthik/csvdiff) 6 | [![codecov](https://codecov.io/gh/aswinkarthik/csvdiff/branch/master/graph/badge.svg)](https://codecov.io/gh/aswinkarthik/csvdiff) 7 | [![Downloads](https://img.shields.io/github/downloads/aswinkarthik/csvdiff/total.svg)](https://github.com/aswinkarthik/csvdiff/releases) 8 | [![Latest release](https://img.shields.io/github/release/aswinkarthik/csvdiff.svg)](https://github.com/aswinkarthik/csvdiff/releases) 9 | 10 | A fast diff tool for comparing csv files. 11 | 12 | ## What is csvdiff? 13 | 14 | Csvdiff is a difftool to compute changes between two csv files. 15 | 16 | - It is not a traditional diff tool. It is **most suitable** for comparing csv files dumped from **database tables**. GNU diff tool is orders of magnitude faster on comparing line by line. 17 | - Supports selective comparison of fields in a row. 18 | - Supports specifying group of columns as primary-key i.e uniquely identify a row. 19 | - Support ignoring columns e.g ignore columns like `created_at` timestamps. 20 | - Compares csvs of million records csv in under 2 seconds. 21 | - Supports lot of output formats e.g colored git style output or JSON for post-processing. 22 | 23 | ## Why? 24 | 25 | I wanted to compare if the rows of a table before and after a given time and see what is the new changes that came in. Also, I wanted to selectively compare columns ignoring columns like `created_at` and `updated_at`. All I had was just the dumped csv files. 26 | 27 | ## Demo 28 | 29 | [![asciicast](https://asciinema.org/a/YNO5G0b2qL92MZWmb2IeiXveN.svg)](https://asciinema.org/a/YNO5G0b2qL92MZWmb2IeiXveN?speed=2&autoplay=1&size=medium&rows=20&cols=150) 30 | 31 | ## Usage 32 | 33 | ```diff 34 | $ csvdiff base.csv delta.csv 35 | # Additions (1) 36 | + 24564,907,completely-newsite.com,com,19827,32902,completely-newsite.com,com,1621,909,19787,32822 37 | # Modifications (1) 38 | - 69,48,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491 39 | + 69,1048,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491 40 | # Deletions (1) 41 | - 1618,907,deleted-website.com,com,19827,32902,deleted-website.com,com,1621,909,19787,32822 42 | ``` 43 | 44 | 45 | ```bash 46 | Differentiates two csv files and finds out the additions and modifications. 47 | Most suitable for csv files created from database tables 48 | 49 | Usage: 50 | csvdiff [flags] 51 | 52 | Flags: 53 | --columns ints Selectively compare positions in CSV Eg: 1,2. Default is entire row 54 | -o, --format string Available (rowmark|json|legacy-json|diff|word-diff|color-words) (default "diff") 55 | -h, --help help for csvdiff 56 | --ignore-columns ints Inverse of --columns flag. This cannot be used if --columns are specified 57 | --include ints Include positions in CSV to display Eg: 1,2. Default is entire row 58 | -p, --primary-key ints Primary key positions of the Input CSV as comma separated values Eg: 1,2 (default [0]) 59 | -s, --separator string use specific separator (\t, or any one character string) (default ",") 60 | --time Measure time 61 | -t, --toggle Help message for toggle 62 | --version version for csvdiff 63 | ``` 64 | 65 | ## Installation 66 | 67 | ### Homebrew 68 | 69 | ```bash 70 | brew tap thecasualcoder/stable 71 | brew install csvdiff 72 | ``` 73 | 74 | ### Using binaries 75 | 76 | ```bash 77 | # binary will be $GOPATH/bin/csvdiff 78 | curl -sfL https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s -- -b $GOPATH/bin 79 | 80 | # or install it into ./bin/ 81 | curl -sfL https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s 82 | 83 | # In alpine linux (as it does not come with curl by default) 84 | wget -O - -q https://raw.githubusercontent.com/aswinkarthik/csvdiff/master/install.sh | sh -s 85 | ``` 86 | 87 | ### Using source code 88 | 89 | ```bash 90 | go get -u github.com/aswinkarthik/csvdiff 91 | ``` 92 | 93 | ## Use case 94 | 95 | - Cases where you have a base database dump as csv. If you receive the changes as another database dump as csv, this tool can be used to figure out what are the additions and modifications to the original database dump. The `additions.csv` can be used to create an `insert.sql` and with the `modifications.csv` an `update.sql` data migration. 96 | - The delta file can either contain just the changes or the entire table dump along with the changes. 97 | 98 | ## Supported 99 | 100 | - Additions 101 | - Modifications 102 | - Deletions 103 | - Non comma separators 104 | 105 | ## Not Supported 106 | 107 | - Cannot be used as a generic difftool. Requires a column to be used as a primary key from the csv. 108 | 109 | ## Formats 110 | 111 | There are a number of formats supported 112 | 113 | - `diff`: Git's diff style 114 | - `word-diff`: Git's --word-diff style 115 | - `color-words`: Git's --color-words style 116 | - `json`: JSON serialization of result 117 | - `legacy-json`: JSON serialization of result in old format 118 | - `rowmark`: Marks each row with ADDED or MODIFIED status. 119 | 120 | ## Miscellaneous features 121 | 122 | - The `--primary-key` in an integer array. Specify comma separated positions if the table has a compound key. Using this primary key, it can figure out modifications. If the primary key changes, it is an addition. 123 | 124 | ```bash 125 | % csvdiff base.csv delta.csv --primary-key 0,1 126 | ``` 127 | 128 | - If you want to compare only few columns in the csv when computing hash, 129 | 130 | ```bash 131 | % csvdiff base.csv delta.csv --primary-key 0,1 --columns 2 132 | ``` 133 | 134 | - Supports JSON format for post processing 135 | 136 | ```bash 137 | % csvdiff examples/base-small.csv examples/delta-small.csv --format json | jq '.' 138 | { 139 | "Additions": [ 140 | "24564,907,completely-newsite.com,com,19827,32902,completely-newsite.com,com,1621,909,19787,32822" 141 | ], 142 | "Modifications": [{ 143 | "Original": "69,1048,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491", 144 | "Current": "69,1049,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491" 145 | }], 146 | "Deletions": [ 147 | "1615,905,deleted-website.com,com,19833,33110,deleted-website.com,com,1613,902,19835,33135" 148 | ] 149 | } 150 | ``` 151 | 152 | ## Build locally 153 | 154 | ```bash 155 | $ git clone https://github.com/aswinkarthik/csvdiff 156 | $ go get ./... 157 | $ go build 158 | 159 | # To run tests 160 | $ go get github.com/stretchr/testify/assert 161 | $ go test -v ./... 162 | ``` 163 | 164 | ## Algorithm 165 | 166 | - Creates a map of for both base and delta file 167 | - `key` is a hash of the primary key values as csv 168 | - `value` is a hash of the entire row 169 | - Two maps as initial processing output 170 | - base-map 171 | - delta-map 172 | - The delta map is compared with the base map. As long as primary key is unchanged, they row will have same `key`. An entry in delta map is a 173 | - **Addition**, if the base-map's does not have a `value`. 174 | - **Modification**, if the base-map's `value` is different. 175 | - **Deletions**, if the base-map has no match on the delta map. 176 | 177 | ## Credits 178 | 179 | - Uses 64 bit [xxHash](https://cyan4973.github.io/xxHash/) algorithm, an extremely fast non-cryptographic hash algorithm, for creating the hash. Implementations from [cespare](https://github.com/cespare/xxhash) 180 | - Used [Majestic million](https://blog.majestic.com/development/majestic-million-csv-daily/) data for demo. 181 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # Comparison with other tools 2 | 3 | ## Setup 4 | 5 | * Using the majestic million data. (Source in credits section) 6 | * Both files have 998390 rows and 12 columns. 7 | * Only one modification between both files. 8 | * Ran on Processor: Intel Core i7 2.5 GHz 4 cores 16 GB RAM 9 | 10 | ## Baseline 11 | 12 | 0. csvdiff (this tool) : *0m1.159s* 13 | 14 | ```bash 15 | time csvdiff majestic_million.csv majestic_million_diff.csv 16 | Additions 0 17 | Modifications 1 18 | ... 19 | 20 | real 0m1.159s 21 | user 0m2.167s 22 | sys 0m0.222s 23 | ``` 24 | 25 | ## Other tools 26 | 27 | 1. [data.table](https://github.com/Rdatatable/data.table) : *0m4.284s* 28 | 29 | * Join both csvs using `id` column. 30 | * Check inequality between both columns 31 | * Rscript in [data-table.r](/benchmark/data-table.r) (Can it be written better? New to R) 32 | 33 | ```bash 34 | time Rscript data-table.r 35 | 36 | real 0m4.284s 37 | user 0m3.887s 38 | sys 0m0.284s 39 | ``` 40 | 41 | 2. [csvdiff](https://pypi.org/project/csvdiff/) written in Python : *0m48.115s* 42 | 43 | ```bash 44 | time csvdiff --style=summary id majestic_million.csv majestic_million_diff.csv 45 | 0 rows removed (0.0%) 46 | 0 rows added (0.0%) 47 | 1 rows changed (0.0%) 48 | 49 | real 0m48.115s 50 | user 0m42.895s 51 | sys 0m3.948s 52 | ``` 53 | 54 | 3. GNU diff (Fastest) : *0m0.297s* 55 | 56 | * Seems the fastest. Couldn't even come close here. 57 | * However, it does line by line diff. Does not support compound keys of a csv or selective compare of columns. Hence the disclaimer, cannot be used a generic diff tool. 58 | * On another note, lets see if we can reach this. 59 | 60 | ```bash 61 | time diff majestic_million.csv majestic_million_diff.csv 62 | 63 | real 0m0.297s 64 | user 0m0.144s 65 | sys 0m0.147s 66 | ``` 67 | 68 | ## Go Benchmark Results 69 | 70 | Benchmark test can be found [here](https://github.com/aswinkarthik/csvdiff/blob/master/pkg/digest/digest_benchmark_test.go). 71 | 72 | ```bash 73 | $ cd ./pkg/digest 74 | $ go test -bench=. -v -benchmem -benchtime=5s -cover 75 | ``` 76 | 77 | ``` 78 | BenchmarkCreate1-8 200000 31794 ns/op 116163 B/op 24 allocs/op 79 | BenchmarkCreate10-8 200000 43351 ns/op 119993 B/op 79 allocs/op 80 | BenchmarkCreate100-8 50000 142645 ns/op 160577 B/op 634 allocs/op 81 | BenchmarkCreate1000-8 10000 907308 ns/op 621694 B/op 6085 allocs/op 82 | BenchmarkCreate10000-8 1000 7998083 ns/op 5117977 B/op 60345 allocs/op 83 | BenchmarkCreate100000-8 100 81260585 ns/op 49106849 B/op 604563 allocs/op 84 | BenchmarkCreate1000000-8 10 788485738 ns/op 520115434 B/op 6042650 allocs/op 85 | BenchmarkCreate10000000-8 1 7878009695 ns/op 5029061632 B/op 60346535 allocs/op 86 | ``` -------------------------------------------------------------------------------- /benchmark/data-table.r: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | 3 | csv1 = fread('majestic_million.csv') 4 | csv2 = fread('majestic_million_diff.csv') 5 | 6 | setkey(csv1,id) 7 | setkey(csv2,id) 8 | 9 | result <- merge(csv2, csv1, all.x=TRUE) 10 | 11 | diff <- result[result$"col-1.x" != result$"col-1.y" | result$"col-2.x" != result$"col-2.y" | result$"col-3.x" != result$"col-3.y" | result$"col-4.x" != result$"col-4.y" | result$"col-5.x" != result$"col-5.y" | result$"col-6.x" != result$"col-6.y" | result$"col-7.x" != result$"col-7.y" | result$"col-8.x" != result$"col-8.y" | result$"col-9.x" != result$"col-9.y" | result$"col-10.x" != result$"col-10.y" | result$"col-11.x" != result$"col-11.y"] 12 | 13 | diff 14 | -------------------------------------------------------------------------------- /cmd/config.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "encoding/csv" 5 | "fmt" 6 | "io" 7 | "strings" 8 | 9 | "github.com/spf13/afero" 10 | 11 | "github.com/aswinkarthik/csvdiff/pkg/digest" 12 | ) 13 | 14 | // Context is to store all command line Flags. 15 | type Context struct { 16 | fs afero.Fs 17 | primaryKeyPositions []int 18 | valueColumnPositions []int 19 | includeColumnPositions []int 20 | format string 21 | baseFilename string 22 | deltaFilename string 23 | baseFile afero.File 24 | deltaFile afero.File 25 | recordCount int 26 | separator rune 27 | lazyQuotes bool 28 | } 29 | 30 | // NewContext can take all CLI flags and create a cmd.Context 31 | // Validations are done as part of this. 32 | // File pointers are created too. 33 | func NewContext( 34 | fs afero.Fs, 35 | primaryKeyPositions []int, 36 | valueColumnPositions []int, 37 | ignoreValueColumnPositions []int, 38 | includeColumnPositions []int, 39 | format string, 40 | baseFilename string, 41 | deltaFilename string, 42 | separator rune, 43 | lazyQuotes bool, 44 | ) (*Context, error) { 45 | baseRecordCount, err := getColumnsCount(fs, baseFilename, separator, lazyQuotes) 46 | if err != nil { 47 | return nil, fmt.Errorf("error in base-file: %v", err) 48 | } 49 | 50 | deltaRecordCount, err := getColumnsCount(fs, deltaFilename, separator, lazyQuotes) 51 | if err != nil { 52 | return nil, fmt.Errorf("error in delta-file: %v", err) 53 | } 54 | 55 | if baseRecordCount != deltaRecordCount { 56 | return nil, fmt.Errorf("base-file and delta-file columns count do not match") 57 | } 58 | 59 | if len(ignoreValueColumnPositions) > 0 && len(valueColumnPositions) > 0 { 60 | return nil, fmt.Errorf("only one of --columns or --ignore-columns") 61 | } 62 | if len(ignoreValueColumnPositions) > 0 { 63 | valueColumnPositions = inferValueColumns(baseRecordCount, ignoreValueColumnPositions) 64 | } 65 | 66 | baseFile, err := fs.Open(baseFilename) 67 | if err != nil { 68 | return nil, err 69 | } 70 | deltaFile, err := fs.Open(deltaFilename) 71 | if err != nil { 72 | return nil, err 73 | } 74 | ctx := &Context{ 75 | fs: fs, 76 | primaryKeyPositions: primaryKeyPositions, 77 | valueColumnPositions: valueColumnPositions, 78 | includeColumnPositions: includeColumnPositions, 79 | format: format, 80 | baseFilename: baseFilename, 81 | deltaFilename: deltaFilename, 82 | baseFile: baseFile, 83 | deltaFile: deltaFile, 84 | recordCount: baseRecordCount, 85 | separator: separator, 86 | lazyQuotes: lazyQuotes, 87 | } 88 | 89 | if err := ctx.validate(); err != nil { 90 | return nil, fmt.Errorf("validation failed: %v", err) 91 | } 92 | 93 | return ctx, nil 94 | } 95 | 96 | // GetPrimaryKeys is to return the --primary-key flags as digest.Positions array. 97 | func (c *Context) GetPrimaryKeys() digest.Positions { 98 | if len(c.primaryKeyPositions) > 0 { 99 | return c.primaryKeyPositions 100 | } 101 | return []int{0} 102 | } 103 | 104 | // GetValueColumns is to return the --columns flags as digest.Positions array. 105 | func (c *Context) GetValueColumns() digest.Positions { 106 | if len(c.valueColumnPositions) > 0 { 107 | return c.valueColumnPositions 108 | } 109 | return []int{} 110 | } 111 | 112 | // GetIncludeColumnPositions is to return the --include flags as digest.Positions array. 113 | // If empty, it is value columns 114 | func (c Context) GetIncludeColumnPositions() digest.Positions { 115 | if len(c.includeColumnPositions) > 0 { 116 | return c.includeColumnPositions 117 | } 118 | return c.GetValueColumns() 119 | } 120 | 121 | // validate validates the context object 122 | // and returns error if not valid. 123 | func (c *Context) validate() error { 124 | { 125 | // format validation 126 | 127 | formatFound := false 128 | for _, format := range allFormats { 129 | if strings.ToLower(c.format) == format { 130 | formatFound = true 131 | } 132 | } 133 | if !formatFound { 134 | return fmt.Errorf("specified format is not valid") 135 | } 136 | } 137 | 138 | { 139 | comparator := func(element int) bool { 140 | return element < c.recordCount 141 | } 142 | 143 | if !assertAll(c.primaryKeyPositions, comparator) { 144 | return fmt.Errorf("--primary-key positions are out of bounds") 145 | } 146 | if !assertAll(c.includeColumnPositions, comparator) { 147 | return fmt.Errorf("--include positions are out of bounds") 148 | } 149 | if !assertAll(c.valueColumnPositions, comparator) { 150 | return fmt.Errorf("--columns positions are out of bounds") 151 | } 152 | } 153 | 154 | return nil 155 | } 156 | 157 | func inferValueColumns(recordCount int, ignoreValueColumns []int) digest.Positions { 158 | lookupMap := make(map[int]struct{}) 159 | for _, pos := range ignoreValueColumns { 160 | lookupMap[pos] = struct{}{} 161 | } 162 | 163 | valueColumns := make(digest.Positions, 0) 164 | if len(ignoreValueColumns) > 0 { 165 | for i := 0; i < recordCount; i++ { 166 | if _, exists := lookupMap[i]; !exists { 167 | valueColumns = append(valueColumns, i) 168 | } 169 | } 170 | } 171 | 172 | return valueColumns 173 | } 174 | 175 | func assertAll(elements []int, assertFn func(element int) bool) bool { 176 | for _, el := range elements { 177 | if !assertFn(el) { 178 | return false 179 | } 180 | } 181 | return true 182 | } 183 | 184 | func getColumnsCount(fs afero.Fs, filename string, separator rune, lazyQuotes bool) (int, error) { 185 | base, err := fs.Open(filename) 186 | if err != nil { 187 | return 0, err 188 | } 189 | defer base.Close() 190 | csvReader := csv.NewReader(base) 191 | csvReader.Comma = separator 192 | csvReader.LazyQuotes = lazyQuotes 193 | record, err := csvReader.Read() 194 | if err != nil { 195 | if err == io.EOF { 196 | return 0, fmt.Errorf("unable to process headers from csv file. EOF reached. invalid CSV file") 197 | } 198 | return 0, err 199 | } 200 | 201 | return len(record), nil 202 | } 203 | 204 | // BaseDigestConfig creates a digest.Context from cmd.Context 205 | // that is needed to start the diff process 206 | func (c *Context) BaseDigestConfig() (digest.Config, error) { 207 | return digest.Config{ 208 | Reader: c.baseFile, 209 | Value: c.valueColumnPositions, 210 | Key: c.primaryKeyPositions, 211 | Include: c.includeColumnPositions, 212 | Separator: c.separator, 213 | LazyQuotes: c.lazyQuotes, 214 | }, nil 215 | } 216 | 217 | // DeltaDigestConfig creates a digest.Context from cmd.Context 218 | // that is needed to start the diff process 219 | func (c *Context) DeltaDigestConfig() (digest.Config, error) { 220 | return digest.Config{ 221 | Reader: c.deltaFile, 222 | Value: c.valueColumnPositions, 223 | Key: c.primaryKeyPositions, 224 | Include: c.includeColumnPositions, 225 | Separator: c.separator, 226 | LazyQuotes: c.lazyQuotes, 227 | }, nil 228 | } 229 | 230 | // Close all file handles 231 | func (c *Context) Close() { 232 | if c.baseFile != nil { 233 | _ = c.baseFile.Close() 234 | } 235 | if c.deltaFile != nil { 236 | _ = c.deltaFile.Close() 237 | } 238 | } 239 | -------------------------------------------------------------------------------- /cmd/config_test.go: -------------------------------------------------------------------------------- 1 | package cmd_test 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | 7 | "github.com/aswinkarthik/csvdiff/cmd" 8 | "github.com/spf13/afero" 9 | 10 | "github.com/aswinkarthik/csvdiff/pkg/digest" 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func TestPrimaryKeyPositions(t *testing.T) { 15 | type testCase struct { 16 | name string 17 | in []int 18 | out digest.Positions 19 | } 20 | testCases := []testCase{ 21 | { 22 | name: "should return primary key columns", 23 | in: []int{0, 1}, 24 | out: []int{0, 1}, 25 | }, 26 | { 27 | name: "should return primary key columns as default input is empty", 28 | in: []int{}, 29 | out: []int{0}, 30 | }, 31 | { 32 | name: "should return primary key columns as default input is nil", 33 | in: []int{}, 34 | out: []int{0}, 35 | }, 36 | } 37 | for _, tt := range testCases { 38 | t.Run(tt.name, func(t *testing.T) { 39 | fs := afero.NewMemMapFs() 40 | setupFiles(t, fs) 41 | ctx, err := cmd.NewContext(fs, 42 | tt.in, 43 | nil, 44 | nil, 45 | nil, 46 | "json", 47 | "/base.csv", 48 | "/delta.csv", 49 | ',', 50 | false, 51 | ) 52 | assert.NoError(t, err) 53 | assert.Equal(t, tt.out, ctx.GetPrimaryKeys()) 54 | 55 | }) 56 | } 57 | } 58 | 59 | func TestValueColumnPositions(t *testing.T) { 60 | type testCase struct { 61 | name string 62 | in []int 63 | out digest.Positions 64 | } 65 | testCases := []testCase{ 66 | { 67 | name: "should return value columns", 68 | in: []int{0, 1}, 69 | out: []int{0, 1}, 70 | }, 71 | { 72 | name: "should return value columns as empty if input is empty", 73 | in: []int{}, 74 | out: []int{}, 75 | }, 76 | { 77 | name: "should return value columns as empty if input is nil", 78 | in: []int{}, 79 | out: []int{}, 80 | }, 81 | } 82 | for _, tt := range testCases { 83 | t.Run(tt.name, func(t *testing.T) { 84 | fs := afero.NewMemMapFs() 85 | setupFiles(t, fs) 86 | ctx, err := cmd.NewContext(fs, 87 | nil, 88 | tt.in, 89 | nil, 90 | nil, 91 | "json", 92 | "/base.csv", 93 | "/delta.csv", 94 | ',', 95 | false, 96 | ) 97 | assert.NoError(t, err) 98 | assert.Equal(t, tt.out, ctx.GetValueColumns()) 99 | 100 | }) 101 | } 102 | } 103 | 104 | func TestNewContext(t *testing.T) { 105 | 106 | t.Run("should validate format", func(t *testing.T) { 107 | fs := afero.NewMemMapFs() 108 | 109 | setupFiles(t, fs) 110 | 111 | t.Run("empty format", func(t *testing.T) { 112 | _, err := cmd.NewContext( 113 | fs, 114 | nil, 115 | nil, 116 | nil, 117 | nil, 118 | "", 119 | "/base.csv", 120 | "/delta.csv", 121 | ',', 122 | false, 123 | ) 124 | 125 | assert.EqualError(t, err, "validation failed: specified format is not valid") 126 | }) 127 | 128 | t.Run("valid format", func(t *testing.T) { 129 | _, err := cmd.NewContext( 130 | fs, 131 | nil, 132 | nil, 133 | nil, 134 | nil, 135 | "rowmark", 136 | "/base.csv", 137 | "/delta.csv", 138 | ',', 139 | false, 140 | ) 141 | 142 | assert.NoError(t, err) 143 | }) 144 | 145 | t.Run("case-insensitive valid format", func(t *testing.T) { 146 | _, err := cmd.NewContext( 147 | fs, 148 | nil, 149 | nil, 150 | nil, 151 | nil, 152 | "jSOn", 153 | "/base.csv", 154 | "/delta.csv", 155 | ',', 156 | false, 157 | ) 158 | 159 | assert.NoError(t, err) 160 | }) 161 | 162 | }) 163 | 164 | t.Run("should validate base file existence", func(t *testing.T) { 165 | fs := afero.NewMemMapFs() 166 | _, err := cmd.NewContext( 167 | fs, 168 | nil, 169 | nil, 170 | nil, 171 | nil, 172 | "json", 173 | "/base.csv", 174 | "/delta.csv", 175 | ',', 176 | false, 177 | ) 178 | assert.EqualError(t, err, "error in base-file: open "+string(os.PathSeparator)+"base.csv: file does not exist") 179 | }) 180 | 181 | t.Run("should validate if base file is a csv file", func(t *testing.T) { 182 | fs := afero.NewMemMapFs() 183 | { 184 | err := fs.Mkdir("/base.csv", os.ModePerm) 185 | assert.NoError(t, err) 186 | } 187 | 188 | _, err := cmd.NewContext( 189 | fs, 190 | nil, 191 | nil, 192 | nil, 193 | nil, 194 | "json", 195 | "/base.csv", 196 | "/delta.csv", 197 | ',', 198 | false, 199 | ) 200 | assert.EqualError(t, err, "error in base-file: unable to process headers from csv file. EOF reached. invalid CSV file") 201 | }) 202 | t.Run("should validate if delta file is a csv file", func(t *testing.T) { 203 | fs := afero.NewMemMapFs() 204 | { 205 | assert.NoError(t, afero.WriteFile(fs, "/base.csv", []byte("id"), os.ModePerm)) 206 | err := fs.Mkdir("/delta.csv", os.ModePerm) 207 | assert.NoError(t, err) 208 | } 209 | 210 | _, err := cmd.NewContext( 211 | fs, 212 | nil, 213 | nil, 214 | nil, 215 | nil, 216 | "json", 217 | "/base.csv", 218 | "/delta.csv", 219 | ',', 220 | false, 221 | ) 222 | assert.EqualError(t, err, "error in delta-file: unable to process headers from csv file. EOF reached. invalid CSV file") 223 | }) 224 | 225 | t.Run("should validate if both base and delta file exist", func(t *testing.T) { 226 | fs := afero.NewMemMapFs() 227 | setupFiles(t, fs) 228 | 229 | _, err := cmd.NewContext( 230 | fs, 231 | nil, 232 | nil, 233 | nil, 234 | nil, 235 | "json", 236 | "/base.csv", 237 | "/delta.csv", 238 | ',', 239 | false, 240 | ) 241 | assert.NoError(t, err) 242 | }) 243 | 244 | t.Run("should validate if positions are within the limits of the csv file", func(t *testing.T) { 245 | fs := afero.NewMemMapFs() 246 | { 247 | baseContent := []byte("id,name,age,desc") 248 | err := afero.WriteFile(fs, "/base.csv", baseContent, os.ModePerm) 249 | assert.NoError(t, err) 250 | } 251 | { 252 | deltaContent := []byte("id,name,age,desc") 253 | err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm) 254 | assert.NoError(t, err) 255 | } 256 | 257 | t.Run("primary key positions", func(t *testing.T) { 258 | _, err := cmd.NewContext( 259 | fs, 260 | []int{4}, 261 | nil, 262 | nil, 263 | nil, 264 | "json", 265 | "/base.csv", 266 | "/delta.csv", 267 | ',', 268 | false, 269 | ) 270 | 271 | assert.EqualError(t, err, "validation failed: --primary-key positions are out of bounds") 272 | }) 273 | 274 | t.Run("include positions", func(t *testing.T) { 275 | _, err := cmd.NewContext( 276 | fs, 277 | nil, 278 | nil, 279 | nil, 280 | []int{4}, 281 | "json", 282 | "/base.csv", 283 | "/delta.csv", 284 | ',', 285 | false, 286 | ) 287 | 288 | assert.EqualError(t, err, "validation failed: --include positions are out of bounds") 289 | }) 290 | 291 | t.Run("value positions", func(t *testing.T) { 292 | _, err := cmd.NewContext( 293 | fs, 294 | nil, 295 | []int{4}, 296 | nil, 297 | nil, 298 | "json", 299 | "/base.csv", 300 | "/delta.csv", 301 | ',', 302 | false, 303 | ) 304 | 305 | assert.EqualError(t, err, "validation failed: --columns positions are out of bounds") 306 | }) 307 | 308 | t.Run("inequal base and delta files", func(t *testing.T) { 309 | { 310 | deltaContent := []byte("id,name,age,desc,size") 311 | err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm) 312 | assert.NoError(t, err) 313 | } 314 | 315 | _, err := cmd.NewContext( 316 | fs, 317 | nil, 318 | nil, 319 | nil, 320 | nil, 321 | "json", 322 | "/base.csv", 323 | "/delta.csv", 324 | ',', 325 | false, 326 | ) 327 | assert.EqualError(t, err, "base-file and delta-file columns count do not match") 328 | }) 329 | }) 330 | 331 | t.Run("should pass only one of columns or ignore columns", func(t *testing.T) { 332 | fs := afero.NewMemMapFs() 333 | setupFiles(t, fs) 334 | 335 | _, err := cmd.NewContext( 336 | fs, 337 | nil, 338 | []int{0}, 339 | []int{0}, 340 | nil, 341 | "jSOn", 342 | "/base.csv", 343 | "/delta.csv", 344 | ',', 345 | false, 346 | ) 347 | 348 | assert.EqualError(t, err, "only one of --columns or --ignore-columns") 349 | }) 350 | } 351 | 352 | func TestConfig_DigestConfig(t *testing.T) { 353 | t.Run("should create digest ctx", func(t *testing.T) { 354 | fs := afero.NewMemMapFs() 355 | setupFiles(t, fs) 356 | 357 | valueColumns := digest.Positions{0, 1, 2} 358 | primaryColumns := digest.Positions{0, 1} 359 | includeColumns := digest.Positions{2} 360 | ctx, err := cmd.NewContext( 361 | fs, 362 | primaryColumns, 363 | valueColumns, 364 | nil, 365 | includeColumns, 366 | "jSOn", 367 | "/base.csv", 368 | "/delta.csv", 369 | ',', 370 | false, 371 | ) 372 | assert.NoError(t, err) 373 | 374 | baseConfig, err := ctx.BaseDigestConfig() 375 | 376 | assert.NoError(t, err) 377 | assert.NotNil(t, baseConfig.Reader) 378 | assert.Equal(t, valueColumns, baseConfig.Value) 379 | assert.Equal(t, primaryColumns, baseConfig.Key) 380 | assert.Equal(t, includeColumns, baseConfig.Include) 381 | 382 | deltaConfig, err := ctx.DeltaDigestConfig() 383 | 384 | assert.NoError(t, err) 385 | assert.NotNil(t, deltaConfig.Reader) 386 | assert.Equal(t, valueColumns, deltaConfig.Value) 387 | assert.Equal(t, primaryColumns, deltaConfig.Key) 388 | assert.Equal(t, includeColumns, deltaConfig.Include) 389 | }) 390 | t.Run("should infer values columns as inverse of ignore columns digest ctx", func(t *testing.T) { 391 | fs := afero.NewMemMapFs() 392 | setupFiles(t, fs) 393 | 394 | ignoreValueColumns := digest.Positions{0, 1, 2} 395 | primaryColumns := digest.Positions{0, 1} 396 | ctx, err := cmd.NewContext( 397 | fs, 398 | primaryColumns, 399 | nil, 400 | ignoreValueColumns, 401 | nil, 402 | "jSOn", 403 | "/base.csv", 404 | "/delta.csv", 405 | ',', 406 | false, 407 | ) 408 | assert.NoError(t, err) 409 | 410 | baseConfig, err := ctx.BaseDigestConfig() 411 | 412 | assert.NoError(t, err) 413 | assert.NotNil(t, baseConfig.Reader) 414 | assert.Equal(t, digest.Positions{3}, baseConfig.Value) 415 | assert.Equal(t, primaryColumns, baseConfig.Key) 416 | 417 | deltaConfig, err := ctx.DeltaDigestConfig() 418 | 419 | assert.NoError(t, err) 420 | assert.NotNil(t, deltaConfig.Reader) 421 | assert.Equal(t, digest.Positions{3}, deltaConfig.Value) 422 | assert.Equal(t, primaryColumns, deltaConfig.Key) 423 | }) 424 | } 425 | func setupFiles(t *testing.T, fs afero.Fs) { 426 | { 427 | baseContent := []byte("id,name,age,desc") 428 | err := afero.WriteFile(fs, "/base.csv", baseContent, os.ModePerm) 429 | assert.NoError(t, err) 430 | } 431 | { 432 | deltaContent := []byte("id,name,age,desc") 433 | err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm) 434 | assert.NoError(t, err) 435 | } 436 | } 437 | -------------------------------------------------------------------------------- /cmd/formatter.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/aswinkarthik/csvdiff/pkg/digest" 7 | "github.com/fatih/color" 8 | "io" 9 | ) 10 | 11 | const ( 12 | rowmark = "rowmark" 13 | jsonFormat = "json" 14 | legacyJSONFormat = "legacy-json" 15 | lineDiff = "diff" 16 | wordDiff = "word-diff" 17 | colorWords = "color-words" 18 | ) 19 | 20 | var allFormats = []string{rowmark, jsonFormat, legacyJSONFormat, lineDiff, wordDiff, colorWords} 21 | 22 | // Formatter can print the differences to stdout 23 | // and accompanying metadata to stderr 24 | type Formatter struct { 25 | stdout io.Writer 26 | stderr io.Writer 27 | ctx Context 28 | } 29 | 30 | // NewFormatter can be used to create a new formatter 31 | func NewFormatter(stdout, stderr io.Writer, ctx Context) *Formatter { 32 | if ctx.separator == rune(0) { 33 | ctx.separator = ',' 34 | } 35 | return &Formatter{stdout: stdout, stderr: stderr, ctx: ctx} 36 | } 37 | 38 | // Format can be used to format the differences based on ctx 39 | // to appropriate writers 40 | func (f *Formatter) Format(diff digest.Differences) error { 41 | switch f.ctx.format { 42 | case legacyJSONFormat: 43 | return f.legacyJSON(diff) 44 | case jsonFormat: 45 | return f.json(diff) 46 | case rowmark: 47 | return f.rowMark(diff) 48 | case lineDiff: 49 | return f.lineDiff(diff) 50 | case wordDiff: 51 | return f.wordDiff(diff) 52 | case colorWords: 53 | return f.colorWords(diff) 54 | default: 55 | return fmt.Errorf("formatter not found") 56 | } 57 | } 58 | 59 | // JSONFormatter formats diff to as a JSON Object 60 | // { "Additions": [...], "Modifications": [...] } 61 | func (f *Formatter) legacyJSON(diff digest.Differences) error { 62 | // jsonDifference is a struct to represent legacy JSON format 63 | type jsonDifference struct { 64 | Additions []string 65 | Modifications []string 66 | Deletions []string 67 | } 68 | 69 | includes := f.ctx.GetIncludeColumnPositions() 70 | 71 | additions := make([]string, 0, len(diff.Additions)) 72 | for _, addition := range diff.Additions { 73 | additions = append(additions, includes.String(addition, f.ctx.separator)) 74 | } 75 | 76 | modifications := make([]string, 0, len(diff.Modifications)) 77 | for _, modification := range diff.Modifications { 78 | modifications = append(modifications, includes.String(modification.Current, f.ctx.separator)) 79 | } 80 | 81 | deletions := make([]string, 0, len(diff.Deletions)) 82 | for _, deletion := range diff.Deletions { 83 | deletions = append(deletions, includes.String(deletion, f.ctx.separator)) 84 | } 85 | 86 | jsonDiff := jsonDifference{Additions: additions, Modifications: modifications, Deletions: deletions} 87 | data, err := json.MarshalIndent(jsonDiff, "", " ") 88 | 89 | if err != nil { 90 | return fmt.Errorf("error when serializing with JSON formatter: %v", err) 91 | } 92 | 93 | _, err = f.stdout.Write(data) 94 | 95 | if err != nil { 96 | return fmt.Errorf("error when writing to writer with JSON formatter: %v", err) 97 | } 98 | 99 | return nil 100 | } 101 | 102 | // JSONFormatter formats diff to as a JSON Object 103 | // { "Additions": [...], "Modifications": [{ "Original": [...], "Current": [...]}]} 104 | func (f *Formatter) json(diff digest.Differences) error { 105 | includes := f.ctx.GetIncludeColumnPositions() 106 | 107 | additions := make([]string, 0, len(diff.Additions)) 108 | for _, addition := range diff.Additions { 109 | additions = append(additions, includes.String(addition, f.ctx.separator)) 110 | } 111 | 112 | deletions := make([]string, 0, len(diff.Deletions)) 113 | for _, deletion := range diff.Deletions { 114 | deletions = append(deletions, includes.String(deletion, f.ctx.separator)) 115 | } 116 | 117 | type modification struct { 118 | Original string 119 | Current string 120 | } 121 | 122 | type jsonDifference struct { 123 | Additions []string 124 | Modifications []modification 125 | Deletions []string 126 | } 127 | 128 | modifications := make([]modification, 0, len(diff.Modifications)) 129 | for _, mods := range diff.Modifications { 130 | m := modification{Original: includes.String(mods.Original, f.ctx.separator), Current: includes.String(mods.Current, f.ctx.separator)} 131 | modifications = append(modifications, m) 132 | } 133 | 134 | data, err := json.MarshalIndent(jsonDifference{Additions: additions, Modifications: modifications, Deletions: deletions}, "", " ") 135 | 136 | if err != nil { 137 | return fmt.Errorf("error when serializing with JSON formatter: %v", err) 138 | } 139 | 140 | _, err = f.stdout.Write(data) 141 | 142 | if err != nil { 143 | return fmt.Errorf("error when writing to writer with JSON formatter: %v", err) 144 | } 145 | 146 | return nil 147 | } 148 | 149 | // RowMarkFormatter formats diff by marking each row as 150 | // ADDED/MODIFIED. It mutates the row and adds as a new column. 151 | func (f *Formatter) rowMark(diff digest.Differences) error { 152 | _, _ = fmt.Fprintf(f.stderr, "Additions %d\n", len(diff.Additions)) 153 | _, _ = fmt.Fprintf(f.stderr, "Modifications %d\n", len(diff.Modifications)) 154 | _, _ = fmt.Fprintf(f.stderr, "Deletions %d\n", len(diff.Deletions)) 155 | _, _ = fmt.Fprintf(f.stderr, "Rows:\n") 156 | 157 | includes := f.ctx.GetIncludeColumnPositions() 158 | 159 | additions := make([]string, 0, len(diff.Additions)) 160 | for _, addition := range diff.Additions { 161 | additions = append(additions, includes.String(addition, f.ctx.separator)) 162 | } 163 | 164 | modifications := make([]string, 0, len(diff.Modifications)) 165 | for _, modification := range diff.Modifications { 166 | modifications = append(modifications, includes.String(modification.Current, f.ctx.separator)) 167 | } 168 | 169 | deletions := make([]string, 0, len(diff.Deletions)) 170 | for _, deletion := range diff.Deletions { 171 | deletions = append(deletions, includes.String(deletion, f.ctx.separator)) 172 | } 173 | 174 | for _, added := range additions { 175 | _, _ = fmt.Fprintf(f.stdout, "%s,%s\n", added, "ADDED") 176 | } 177 | 178 | for _, modified := range modifications { 179 | _, _ = fmt.Fprintf(f.stdout, "%s,%s\n", modified, "MODIFIED") 180 | } 181 | 182 | for _, deleted := range deletions { 183 | _, _ = fmt.Fprintf(f.stdout, "%s,%s\n", deleted, "DELETED") 184 | } 185 | 186 | return nil 187 | } 188 | 189 | // lineDiff is git-style line diff 190 | func (f *Formatter) lineDiff(diff digest.Differences) error { 191 | includes := f.ctx.GetIncludeColumnPositions() 192 | 193 | blue := color.New(color.FgBlue).FprintfFunc() 194 | red := color.New(color.FgRed).FprintfFunc() 195 | green := color.New(color.FgGreen).FprintfFunc() 196 | 197 | blue(f.stderr, "# Additions (%d)\n", len(diff.Additions)) 198 | for _, addition := range diff.Additions { 199 | green(f.stdout, "+ %s\n", includes.String(addition, f.ctx.separator)) 200 | } 201 | blue(f.stderr, "# Modifications (%d)\n", len(diff.Modifications)) 202 | for _, modification := range diff.Modifications { 203 | red(f.stdout, "- %s\n", includes.String(modification.Original, f.ctx.separator)) 204 | green(f.stdout, "+ %s\n", includes.String(modification.Current, f.ctx.separator)) 205 | } 206 | blue(f.stderr, "# Deletions (%d)\n", len(diff.Deletions)) 207 | for _, deletion := range diff.Deletions { 208 | red(f.stdout, "- %s\n", includes.String(deletion, f.ctx.separator)) 209 | } 210 | 211 | return nil 212 | } 213 | 214 | // wordDiff is git-style --word-diff 215 | func (f *Formatter) wordDiff(diff digest.Differences) error { 216 | return f.wordLevelDiffs(diff, "[-%s-]", "{+%s+}") 217 | } 218 | 219 | // colorWords is git-style --color-words 220 | func (f *Formatter) colorWords(diff digest.Differences) error { 221 | return f.wordLevelDiffs(diff, "%s", "%s") 222 | } 223 | 224 | func (f *Formatter) wordLevelDiffs(diff digest.Differences, deletionFormat, additionFormat string) error { 225 | includes := f.ctx.GetIncludeColumnPositions() 226 | if len(includes) <= 0 { 227 | includes = f.ctx.GetValueColumns() 228 | } 229 | blue := color.New(color.FgBlue).SprintfFunc() 230 | red := color.New(color.FgRed).SprintfFunc() 231 | green := color.New(color.FgGreen).SprintfFunc() 232 | 233 | _, _ = fmt.Fprintln(f.stderr, blue("# Additions (%d)", len(diff.Additions))) 234 | for _, addition := range diff.Additions { 235 | _, _ = fmt.Fprintln(f.stdout, green(additionFormat, includes.String(addition, f.ctx.separator))) 236 | } 237 | 238 | _, _ = fmt.Fprintln(f.stderr, blue("# Modifications (%d)", len(diff.Modifications))) 239 | for _, modification := range diff.Modifications { 240 | result := make([]string, 0, len(modification.Current)) 241 | for i := 0; i < len(includes) || i < len(modification.Current); i++ { 242 | if modification.Original[i] != modification.Current[i] { 243 | removed := red(deletionFormat, modification.Original[i]) 244 | added := green(additionFormat, modification.Current[i]) 245 | result = append(result, fmt.Sprintf("%s%s", removed, added)) 246 | } else { 247 | result = append(result, modification.Current[i]) 248 | } 249 | } 250 | _, _ = fmt.Fprintln(f.stdout, includes.String(result, f.ctx.separator)) 251 | } 252 | 253 | _, _ = fmt.Fprintln(f.stderr, blue("# Deletions (%d)", len(diff.Deletions))) 254 | for _, deletion := range diff.Deletions { 255 | _, _ = fmt.Fprintln(f.stdout, red(deletionFormat, includes.String(deletion, f.ctx.separator))) 256 | } 257 | 258 | return nil 259 | 260 | } 261 | -------------------------------------------------------------------------------- /cmd/formatter_test.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bytes" 5 | "testing" 6 | 7 | "github.com/aswinkarthik/csvdiff/pkg/digest" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func TestLegacyJSONFormat(t *testing.T) { 13 | diff := digest.Differences{ 14 | Additions: []digest.Addition{[]string{"additions"}}, 15 | Modifications: []digest.Modification{{Current: []string{"modification"}}}, 16 | Deletions: []digest.Deletion{[]string{"deletions"}}, 17 | } 18 | expected := `{ 19 | "Additions": [ 20 | "additions" 21 | ], 22 | "Modifications": [ 23 | "modification" 24 | ], 25 | "Deletions": [ 26 | "deletions" 27 | ] 28 | }` 29 | 30 | var stdout bytes.Buffer 31 | var stderr bytes.Buffer 32 | 33 | formatter := NewFormatter(&stdout, &stderr, Context{format: "legacy-json"}) 34 | 35 | err := formatter.Format(diff) 36 | assert.NoError(t, err) 37 | assert.Equal(t, expected, stdout.String()) 38 | } 39 | 40 | func TestJSONFormat(t *testing.T) { 41 | diff := digest.Differences{ 42 | Additions: []digest.Addition{[]string{"additions"}}, 43 | Modifications: []digest.Modification{{Original: []string{"original"}, Current: []string{"modification"}}}, 44 | Deletions: []digest.Deletion{[]string{"deletions"}}, 45 | } 46 | expected := `{ 47 | "Additions": [ 48 | "additions" 49 | ], 50 | "Modifications": [ 51 | { 52 | "Original": "original", 53 | "Current": "modification" 54 | } 55 | ], 56 | "Deletions": [ 57 | "deletions" 58 | ] 59 | }` 60 | 61 | var stdout bytes.Buffer 62 | var stderr bytes.Buffer 63 | 64 | formatter := NewFormatter(&stdout, &stderr, Context{format: "json"}) 65 | 66 | err := formatter.Format(diff) 67 | assert.NoError(t, err) 68 | assert.Equal(t, expected, stdout.String()) 69 | } 70 | func TestRowMarkFormatter(t *testing.T) { 71 | diff := digest.Differences{ 72 | Additions: []digest.Addition{[]string{"additions"}}, 73 | Modifications: []digest.Modification{{Current: []string{"modification"}}}, 74 | Deletions: []digest.Deletion{[]string{"deletions"}}, 75 | } 76 | expectedStdout := `additions,ADDED 77 | modification,MODIFIED 78 | deletions,DELETED 79 | ` 80 | expectedStderr := `Additions 1 81 | Modifications 1 82 | Deletions 1 83 | Rows: 84 | ` 85 | 86 | var stdout bytes.Buffer 87 | var stderr bytes.Buffer 88 | 89 | formatter := NewFormatter(&stdout, &stderr, Context{format: "rowmark"}) 90 | 91 | err := formatter.Format(diff) 92 | 93 | assert.NoError(t, err) 94 | assert.Equal(t, expectedStdout, stdout.String()) 95 | assert.Equal(t, expectedStderr, stderr.String()) 96 | } 97 | 98 | func TestLineDiff(t *testing.T) { 99 | t.Run("should show line diff with comma by default", func(t *testing.T) { 100 | diff := digest.Differences{ 101 | Additions: []digest.Addition{[]string{"additions"}}, 102 | Modifications: []digest.Modification{ 103 | { 104 | Original: []string{"original", "comma,separated,value"}, 105 | Current: []string{"modification", "comma,separated,value-2"}, 106 | }, 107 | }, 108 | Deletions: []digest.Deletion{{"deletion", "this-row-was-deleted"}}, 109 | } 110 | expectedStdout := `+ additions 111 | - original,"comma,separated,value" 112 | + modification,"comma,separated,value-2" 113 | - deletion,this-row-was-deleted 114 | ` 115 | expectedStderr := `# Additions (1) 116 | # Modifications (1) 117 | # Deletions (1) 118 | ` 119 | 120 | var stdout bytes.Buffer 121 | var stderr bytes.Buffer 122 | 123 | formatter := NewFormatter(&stdout, &stderr, Context{format: "diff"}) 124 | 125 | err := formatter.Format(diff) 126 | 127 | assert.NoError(t, err) 128 | assert.Equal(t, expectedStdout, stdout.String()) 129 | assert.Equal(t, expectedStderr, stderr.String()) 130 | }) 131 | 132 | t.Run("should show line diff with custom separator", func(t *testing.T) { 133 | diff := digest.Differences{ 134 | Additions: []digest.Addition{[]string{"additions"}}, 135 | Modifications: []digest.Modification{ 136 | { 137 | Original: []string{"original", "comma,separated,value"}, 138 | Current: []string{"modification", "comma,separated,value-2"}, 139 | }, 140 | }, 141 | Deletions: []digest.Deletion{{"deletion", "this-row-was-deleted"}}, 142 | } 143 | expectedStdout := `+ additions 144 | - original|comma,separated,value 145 | + modification|comma,separated,value-2 146 | - deletion|this-row-was-deleted 147 | ` 148 | expectedStderr := `# Additions (1) 149 | # Modifications (1) 150 | # Deletions (1) 151 | ` 152 | 153 | var stdout bytes.Buffer 154 | var stderr bytes.Buffer 155 | 156 | formatter := NewFormatter(&stdout, &stderr, Context{format: "diff", separator: '|'}) 157 | 158 | err := formatter.Format(diff) 159 | 160 | assert.NoError(t, err) 161 | assert.Equal(t, expectedStdout, stdout.String()) 162 | assert.Equal(t, expectedStderr, stderr.String()) 163 | }) 164 | 165 | } 166 | 167 | func TestWordDiff(t *testing.T) { 168 | t.Run("should cover single column happy path", func(t *testing.T) { 169 | diff := digest.Differences{ 170 | Additions: []digest.Addition{[]string{"additions"}}, 171 | Modifications: []digest.Modification{{Original: []string{"original"}, Current: []string{"modification"}}}, 172 | Deletions: []digest.Deletion{{"deletions"}}, 173 | } 174 | expectedStdout := `{+additions+} 175 | [-original-]{+modification+} 176 | [-deletions-] 177 | ` 178 | expectedStderr := `# Additions (1) 179 | # Modifications (1) 180 | # Deletions (1) 181 | ` 182 | 183 | var stdout bytes.Buffer 184 | var stderr bytes.Buffer 185 | 186 | formatter := NewFormatter(&stdout, &stderr, Context{format: "word-diff"}) 187 | 188 | err := formatter.Format(diff) 189 | 190 | assert.NoError(t, err) 191 | assert.Equal(t, expectedStdout, stdout.String()) 192 | assert.Equal(t, expectedStderr, stderr.String()) 193 | }) 194 | 195 | t.Run("should ouput only selective columns", func(t *testing.T) { 196 | diff := digest.Differences{ 197 | Additions: []digest.Addition{[]string{"additions", "ignored-column"}}, 198 | Modifications: []digest.Modification{ 199 | {Original: []string{"original", "ignored-column"}, Current: []string{"modification", "ignored-column"}}, 200 | }, 201 | Deletions: []digest.Deletion{{"deletions", "ignored-column"}}, 202 | } 203 | expectedStdout := `{+additions+} 204 | [-original-]{+modification+} 205 | [-deletions-] 206 | ` 207 | expectedStderr := `# Additions (1) 208 | # Modifications (1) 209 | # Deletions (1) 210 | ` 211 | 212 | var stdout bytes.Buffer 213 | var stderr bytes.Buffer 214 | 215 | formatter := NewFormatter(&stdout, &stderr, Context{ 216 | format: "word-diff", 217 | includeColumnPositions: digest.Positions{0}, 218 | }) 219 | 220 | err := formatter.Format(diff) 221 | 222 | assert.NoError(t, err) 223 | assert.Equal(t, expectedStdout, stdout.String()) 224 | assert.Equal(t, expectedStderr, stderr.String()) 225 | 226 | }) 227 | } 228 | 229 | func TestColorWords(t *testing.T) { 230 | diff := digest.Differences{ 231 | Additions: []digest.Addition{[]string{"additions"}}, 232 | Modifications: []digest.Modification{{Original: []string{"original"}, Current: []string{"modification"}}}, 233 | Deletions: []digest.Deletion{{"deletions"}}, 234 | } 235 | expectedStdout := `additions 236 | originalmodification 237 | deletions 238 | ` 239 | expectedStderr := `# Additions (1) 240 | # Modifications (1) 241 | # Deletions (1) 242 | ` 243 | 244 | var stdout bytes.Buffer 245 | var stderr bytes.Buffer 246 | 247 | formatter := NewFormatter(&stdout, &stderr, Context{format: "color-words"}) 248 | 249 | err := formatter.Format(diff) 250 | 251 | assert.NoError(t, err) 252 | assert.Equal(t, expectedStdout, stdout.String()) 253 | assert.Equal(t, expectedStderr, stderr.String()) 254 | } 255 | 256 | func TestWrongFormatter(t *testing.T) { 257 | diff := digest.Differences{} 258 | formatter := NewFormatter(nil, nil, Context{format: "random-str"}) 259 | 260 | err := formatter.Format(diff) 261 | 262 | assert.Error(t, err) 263 | } 264 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | // Copyright © 2018 aswinkarthik 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | 21 | package cmd 22 | 23 | import ( 24 | "fmt" 25 | "io" 26 | "os" 27 | "strings" 28 | "time" 29 | "unicode/utf8" 30 | 31 | "github.com/fatih/color" 32 | "github.com/spf13/afero" 33 | 34 | "github.com/aswinkarthik/csvdiff/pkg/digest" 35 | "github.com/spf13/cobra" 36 | ) 37 | 38 | var ( 39 | timed bool 40 | ) 41 | 42 | // rootCmd represents the base command when called without any subcommands 43 | var rootCmd = &cobra.Command{ 44 | Use: "csvdiff ", 45 | SilenceUsage: true, 46 | SilenceErrors: true, 47 | Short: "A diff tool for database tables dumped as csv files", 48 | Long: `Differentiates two csv files and finds out the additions and modifications. 49 | Most suitable for csv files created from database tables`, 50 | PreRunE: func(cmd *cobra.Command, args []string) error { 51 | // validate args 52 | if len(args) != 2 { 53 | return fmt.Errorf("pass 2 files. Usage: csvdiff ") 54 | } 55 | 56 | return nil 57 | }, 58 | RunE: func(cmd *cobra.Command, args []string) error { 59 | if timed { 60 | defer timeTrack(time.Now(), "csvdiff") 61 | } 62 | fs := afero.NewOsFs() 63 | baseFilename := args[0] 64 | deltaFilename := args[1] 65 | runeSeparator, err := parseSeparator(separator) 66 | if err != nil { 67 | return err 68 | } 69 | ctx, err := NewContext( 70 | fs, 71 | primaryKeyPositions, 72 | valueColumnPositions, 73 | ignoreValueColumnPositions, 74 | includeColumnPositions, 75 | format, 76 | baseFilename, 77 | deltaFilename, 78 | runeSeparator, 79 | lazyQuotes, 80 | ) 81 | 82 | if err != nil { 83 | return err 84 | } 85 | defer ctx.Close() 86 | 87 | return runContext(ctx, os.Stdout, os.Stderr) 88 | }, 89 | } 90 | 91 | func runContext(ctx *Context, outputStream, errorStream io.Writer) error { 92 | baseConfig, err := ctx.BaseDigestConfig() 93 | if err != nil { 94 | return fmt.Errorf("error opening base-file %s: %v", ctx.baseFilename, err) 95 | } 96 | deltaConfig, err := ctx.DeltaDigestConfig() 97 | if err != nil { 98 | return fmt.Errorf("error opening delta-file %s: %v", ctx.deltaFilename, err) 99 | } 100 | defer ctx.Close() 101 | 102 | diff, err := digest.Diff(baseConfig, deltaConfig) 103 | 104 | if err != nil { 105 | return err 106 | } 107 | 108 | return NewFormatter(outputStream, errorStream, *ctx).Format(diff) 109 | } 110 | 111 | // Execute adds all child commands to the root command and sets flags appropriately. 112 | // This is called by main.main(). It only needs to happen once to the rootCmd. 113 | func Execute() { 114 | rootCmd.Version = Version() 115 | if err := rootCmd.Execute(); err != nil { 116 | _, _ = fmt.Fprint(os.Stderr, color.RedString("csvdiff: command failed - %v\n\n", err)) 117 | _ = rootCmd.Help() 118 | os.Exit(1) 119 | } 120 | } 121 | 122 | var ( 123 | primaryKeyPositions []int 124 | valueColumnPositions []int 125 | ignoreValueColumnPositions []int 126 | includeColumnPositions []int 127 | format string 128 | separator string 129 | lazyQuotes bool 130 | ) 131 | 132 | func init() { 133 | rootCmd.Flags().BoolP("toggle", "t", false, "Help message for toggle") 134 | 135 | rootCmd.Flags().IntSliceVarP(&primaryKeyPositions, "primary-key", "p", []int{0}, "Primary key positions of the Input CSV as comma separated values Eg: 1,2") 136 | rootCmd.Flags().IntSliceVarP(&valueColumnPositions, "columns", "", []int{}, "Selectively compare positions in CSV Eg: 1,2. Default is entire row") 137 | rootCmd.Flags().IntSliceVarP(&ignoreValueColumnPositions, "ignore-columns", "", []int{}, "Inverse of --columns flag. This cannot be used if --columns are specified") 138 | rootCmd.Flags().IntSliceVarP(&includeColumnPositions, "include", "", []int{}, "Include positions in CSV to display Eg: 1,2. Default is entire row") 139 | rootCmd.Flags().StringVarP(&format, "format", "o", "diff", fmt.Sprintf("Available (%s)", strings.Join(allFormats, "|"))) 140 | rootCmd.Flags().StringVarP(&separator, "separator", "s", ",", "use specific separator (\\t, or any one character string)") 141 | 142 | rootCmd.Flags().BoolVarP(&timed, "time", "", false, "Measure time") 143 | rootCmd.Flags().BoolVar(&lazyQuotes, "lazyquotes", false, "allow unescaped quotes") 144 | } 145 | 146 | func timeTrack(start time.Time, name string) { 147 | elapsed := time.Since(start) 148 | _, _ = fmt.Fprintln(os.Stderr, fmt.Sprintf("%s took %s", name, elapsed)) 149 | } 150 | 151 | func parseSeparator(sep string) (rune, error) { 152 | if strings.HasPrefix(sep, "\\t") { 153 | return '\t', nil 154 | } 155 | 156 | runesep, _ := utf8.DecodeRuneInString(sep) 157 | if runesep == utf8.RuneError { 158 | return ' ', fmt.Errorf("unable to use %v (%q) as a separator", separator, separator) 159 | } 160 | 161 | return runesep, nil 162 | } 163 | -------------------------------------------------------------------------------- /cmd/root_test.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | "testing" 7 | 8 | "github.com/aswinkarthik/csvdiff/pkg/digest" 9 | "github.com/spf13/afero" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestRunContext(t *testing.T) { 14 | t.Run("should find diff in happy path", func(t *testing.T) { 15 | fs := afero.NewMemMapFs() 16 | { 17 | baseContent := []byte(`id,name,age,desc 18 | 0,tom,2,developer 19 | 2,ryan,20,qa 20 | 4,emin,40,pm 21 | 22 | `) 23 | err := afero.WriteFile(fs, "/base.csv", baseContent, os.ModePerm) 24 | assert.NoError(t, err) 25 | } 26 | { 27 | deltaContent := []byte(`id,name,age,desc 28 | 0,tom,2,developer 29 | 1,caprio,3,developer 30 | 2,ryan,23,qa 31 | `) 32 | err := afero.WriteFile(fs, "/delta.csv", deltaContent, os.ModePerm) 33 | assert.NoError(t, err) 34 | } 35 | 36 | ctx, err := NewContext( 37 | fs, 38 | digest.Positions{0}, 39 | digest.Positions{1, 2}, 40 | nil, 41 | digest.Positions{0, 1, 2}, 42 | "json", 43 | "/base.csv", 44 | "/delta.csv", 45 | ',', 46 | false, 47 | ) 48 | assert.NoError(t, err) 49 | 50 | outStream := &bytes.Buffer{} 51 | errStream := &bytes.Buffer{} 52 | 53 | err = runContext(ctx, outStream, errStream) 54 | expected := `{ 55 | "Additions": [ 56 | "1,caprio,3" 57 | ], 58 | "Modifications": [ 59 | { 60 | "Original": "2,ryan,20", 61 | "Current": "2,ryan,23" 62 | } 63 | ], 64 | "Deletions": [ 65 | "4,emin,40" 66 | ] 67 | }` 68 | 69 | assert.NoError(t, err) 70 | assert.Equal(t, expected, outStream.String()) 71 | 72 | }) 73 | } 74 | -------------------------------------------------------------------------------- /cmd/version.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | const defaultVersion = "1.0-dev" 4 | 5 | var version = defaultVersion 6 | 7 | // SetVersion will set the version of the cmd package 8 | func SetVersion(_version string) { 9 | if _version == "" { 10 | version = defaultVersion 11 | return 12 | } 13 | 14 | version = _version 15 | } 16 | 17 | // Version will return the set version of cmd package 18 | func Version() string { 19 | if version == "" { 20 | return defaultVersion 21 | } 22 | 23 | return version 24 | } 25 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: 2 | layout: "reach, diff, flags, files" 3 | behavior: default 4 | require_changes: false # if true: only post the comment if coverage changes 5 | require_base: no # [yes :: must have a base report to post] 6 | require_head: yes # [yes :: must have a head report to post] 7 | branches: null 8 | -------------------------------------------------------------------------------- /docker-push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -el 2 | 3 | set -e 4 | 5 | VERSION=${TRAVIS_TAG} 6 | REPO_NAME="csvdiff" 7 | GROUP="aswinkarthik" 8 | 9 | docker build -t ${REPO_NAME}:${VERSION} . 10 | 11 | docker tag ${REPO_NAME}:${VERSION} ${GROUP}/${REPO_NAME}:latest 12 | docker tag ${REPO_NAME}:${VERSION} ${GROUP}/${REPO_NAME}:${VERSION} 13 | 14 | docker push ${GROUP}/${REPO_NAME}:latest 15 | docker push ${GROUP}/${REPO_NAME}:${VERSION} 16 | -------------------------------------------------------------------------------- /examples/base-small.csv: -------------------------------------------------------------------------------- 1 | 15,12,wordpress.com,com,207790,792348,wordpress.com,com,15,12,207589,791634 2 | 43,1,europa.eu,eu,116613,353412,europa.eu,eu,41,1,119129,359818 3 | 69,48,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491 4 | 1615,905,proboards.com,com,19833,33110,proboards.com,com,1613,902,19835,33135 5 | 1616,906,ccleaner.com,com,19831,32507,ccleaner.com,com,1614,903,19834,32463 6 | 1617,907,doodle.com,com,19827,32902,doodle.com,com,1621,909,19787,32822 7 | -------------------------------------------------------------------------------- /examples/delta-small.csv: -------------------------------------------------------------------------------- 1 | 15,12,wordpress.com,com,207790,792348,wordpress.com,com,15,12,207589,791634 2 | 43,1,europa.eu,eu,116613,353412,europa.eu,eu,41,1,119129,359818 3 | 69,1048,aol.com,com,97543,225532,aol.com,com,70,49,97328,224491 4 | 24564,907,completely-newsite.com,com,19827,32902,completely-newsite.com,com,1621,909,19787,32822 5 | -------------------------------------------------------------------------------- /examples/lazy_quotes.csv: -------------------------------------------------------------------------------- 1 | 15 12 wordpress".com com 207790 792348 wordpress".com com 15 12 207589 791634 2 | 43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818 3 | 69 48 "aol.com com 97543 225532 "aol.com com 70 49 97328 224491 4 | 1615 905 proboards.com com 19833 33110 proboards.com com 1613 902 19835 33135 5 | 1616 906 ccleaner.com com 19831 32507 ccleaner.com com 1614 903 19834 32463 6 | 1617 907 doodle.com com 19827 32902 doodle.com com 1621 909 19787 32822 7 | -------------------------------------------------------------------------------- /examples/lazy_quotes_delta.csv: -------------------------------------------------------------------------------- 1 | 15 12 wordpress".com com 207790 792348 wordpress".com com 15 12 207589 791634 2 | 43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818 3 | 69 1048 "aol.com com 97543 225532 "aol.com com 70 49 97328 224491 4 | 24564 907 completely-newsite.com com 19827 32902 completely-newsite.com com 1621 909 19787 32822 5 | -------------------------------------------------------------------------------- /examples/no_comma.csv: -------------------------------------------------------------------------------- 1 | 15 12 wordpress.com com 207790 792348 wordpress.com com 15 12 207589 791634 2 | 43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818 3 | 69 48 aol.com com 97543 225532 aol.com com 70 49 97328 224491 4 | 1615 905 proboards.com com 19833 33110 proboards.com com 1613 902 19835 33135 5 | 1616 906 ccleaner.com com 19831 32507 ccleaner.com com 1614 903 19834 32463 6 | 1617 907 doodle.com com 19827 32902 doodle.com com 1621 909 19787 32822 7 | -------------------------------------------------------------------------------- /examples/no_comma_delta.csv: -------------------------------------------------------------------------------- 1 | 15 12 wordpress.com com 207790 792348 wordpress.com com 15 12 207589 791634 2 | 43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818 3 | 69 1048 aol.com com 97543 225532 aol.com com 70 49 97328 224491 4 | 24564 907 completely-newsite.com com 19827 32902 completely-newsite.com com 1621 909 19787 32822 5 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/aswinkarthik/csvdiff 2 | 3 | require ( 4 | github.com/OneOfOne/xxhash v1.2.5 // indirect 5 | github.com/cespare/xxhash v1.1.0 6 | github.com/fatih/color v1.7.0 7 | github.com/mattn/go-colorable v0.1.2 // indirect 8 | github.com/spaolacci/murmur3 v1.1.0 // indirect 9 | github.com/spf13/afero v1.1.2 10 | github.com/spf13/cobra v0.0.5 11 | github.com/stretchr/testify v1.4.0 12 | golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa // indirect 13 | ) 14 | 15 | go 1.13 16 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 2 | github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= 3 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= 4 | github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI= 5 | github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= 6 | github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= 7 | github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= 8 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= 9 | github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= 10 | github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= 11 | github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= 12 | github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= 13 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 14 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 15 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 16 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 17 | github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= 18 | github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= 19 | github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= 20 | github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= 21 | github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= 22 | github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= 23 | github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= 24 | github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU= 25 | github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= 26 | github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE= 27 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= 28 | github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= 29 | github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= 30 | github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= 31 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 32 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 33 | github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= 34 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= 35 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 36 | github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= 37 | github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 38 | github.com/spf13/afero v1.1.2 h1:m8/z1t7/fwjysjQRYbP0RD+bUIF/8tJwPdEZsI83ACI= 39 | github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= 40 | github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= 41 | github.com/spf13/cobra v0.0.5 h1:f0B+LkLX6DtmRH1isoNA9VTtNUK9K8xYd28JNNfOv/s= 42 | github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= 43 | github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= 44 | github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg= 45 | github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= 46 | github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= 47 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 48 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 49 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 50 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 51 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 52 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 53 | github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= 54 | github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= 55 | golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= 56 | golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 57 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223 h1:DH4skfRX4EBpamg7iV4ZlCpblAHI6s6TDM39bFZumv8= 58 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 59 | golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M= 60 | golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 61 | golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= 62 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 63 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 64 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= 65 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 66 | -------------------------------------------------------------------------------- /install: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | # Code generated by godownloader on 2018-10-26T05:33:59Z. DO NOT EDIT. 4 | # 5 | 6 | usage() { 7 | this=$1 8 | cat </dev/null 118 | } 119 | echoerr() { 120 | echo "$@" 1>&2 121 | } 122 | log_prefix() { 123 | echo "$0" 124 | } 125 | _logp=6 126 | log_set_priority() { 127 | _logp="$1" 128 | } 129 | log_priority() { 130 | if test -z "$1"; then 131 | echo "$_logp" 132 | return 133 | fi 134 | [ "$1" -le "$_logp" ] 135 | } 136 | log_tag() { 137 | case $1 in 138 | 0) echo "emerg" ;; 139 | 1) echo "alert" ;; 140 | 2) echo "crit" ;; 141 | 3) echo "err" ;; 142 | 4) echo "warning" ;; 143 | 5) echo "notice" ;; 144 | 6) echo "info" ;; 145 | 7) echo "debug" ;; 146 | *) echo "$1" ;; 147 | esac 148 | } 149 | log_debug() { 150 | log_priority 7 || return 0 151 | echoerr "$(log_prefix)" "$(log_tag 7)" "$@" 152 | } 153 | log_info() { 154 | log_priority 6 || return 0 155 | echoerr "$(log_prefix)" "$(log_tag 6)" "$@" 156 | } 157 | log_err() { 158 | log_priority 3 || return 0 159 | echoerr "$(log_prefix)" "$(log_tag 3)" "$@" 160 | } 161 | log_crit() { 162 | log_priority 2 || return 0 163 | echoerr "$(log_prefix)" "$(log_tag 2)" "$@" 164 | } 165 | uname_os() { 166 | os=$(uname -s | tr '[:upper:]' '[:lower:]') 167 | case "$os" in 168 | msys_nt) os="windows" ;; 169 | esac 170 | echo "$os" 171 | } 172 | uname_arch() { 173 | arch=$(uname -m) 174 | case $arch in 175 | x86_64) arch="amd64" ;; 176 | x86) arch="386" ;; 177 | i686) arch="386" ;; 178 | i386) arch="386" ;; 179 | aarch64) arch="arm64" ;; 180 | armv5*) arch="armv5" ;; 181 | armv6*) arch="armv6" ;; 182 | armv7*) arch="armv7" ;; 183 | esac 184 | echo ${arch} 185 | } 186 | uname_os_check() { 187 | os=$(uname_os) 188 | case "$os" in 189 | darwin) return 0 ;; 190 | dragonfly) return 0 ;; 191 | freebsd) return 0 ;; 192 | linux) return 0 ;; 193 | android) return 0 ;; 194 | nacl) return 0 ;; 195 | netbsd) return 0 ;; 196 | openbsd) return 0 ;; 197 | plan9) return 0 ;; 198 | solaris) return 0 ;; 199 | windows) return 0 ;; 200 | esac 201 | log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib" 202 | return 1 203 | } 204 | uname_arch_check() { 205 | arch=$(uname_arch) 206 | case "$arch" in 207 | 386) return 0 ;; 208 | amd64) return 0 ;; 209 | arm64) return 0 ;; 210 | armv5) return 0 ;; 211 | armv6) return 0 ;; 212 | armv7) return 0 ;; 213 | ppc64) return 0 ;; 214 | ppc64le) return 0 ;; 215 | mips) return 0 ;; 216 | mipsle) return 0 ;; 217 | mips64) return 0 ;; 218 | mips64le) return 0 ;; 219 | s390x) return 0 ;; 220 | amd64p32) return 0 ;; 221 | esac 222 | log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value. Please file bug report at https://github.com/client9/shlib" 223 | return 1 224 | } 225 | untar() { 226 | tarball=$1 227 | case "${tarball}" in 228 | *.tar.gz | *.tgz) tar -xzf "${tarball}" ;; 229 | *.tar) tar -xf "${tarball}" ;; 230 | *.zip) unzip "${tarball}" ;; 231 | *) 232 | log_err "untar unknown archive format for ${tarball}" 233 | return 1 234 | ;; 235 | esac 236 | } 237 | mktmpdir() { 238 | test -z "$TMPDIR" && TMPDIR="$(mktemp -d)" 239 | mkdir -p "${TMPDIR}" 240 | echo "${TMPDIR}" 241 | } 242 | http_download_curl() { 243 | local_file=$1 244 | source_url=$2 245 | header=$3 246 | if [ -z "$header" ]; then 247 | code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url") 248 | else 249 | code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url") 250 | fi 251 | if [ "$code" != "200" ]; then 252 | log_debug "http_download_curl received HTTP status $code" 253 | return 1 254 | fi 255 | return 0 256 | } 257 | http_download_wget() { 258 | local_file=$1 259 | source_url=$2 260 | header=$3 261 | if [ -z "$header" ]; then 262 | wget -q -O "$local_file" "$source_url" 263 | else 264 | wget -q --header "$header" -O "$local_file" "$source_url" 265 | fi 266 | } 267 | http_download() { 268 | log_debug "http_download $2" 269 | if is_command curl; then 270 | http_download_curl "$@" 271 | return 272 | elif is_command wget; then 273 | http_download_wget "$@" 274 | return 275 | fi 276 | log_crit "http_download unable to find wget or curl" 277 | return 1 278 | } 279 | http_copy() { 280 | tmp=$(mktemp) 281 | http_download "${tmp}" "$1" "$2" || return 1 282 | body=$(cat "$tmp") 283 | rm -f "${tmp}" 284 | echo "$body" 285 | } 286 | github_release() { 287 | owner_repo=$1 288 | version=$2 289 | test -z "$version" && version="latest" 290 | giturl="https://github.com/${owner_repo}/releases/${version}" 291 | json=$(http_copy "$giturl" "Accept:application/json") 292 | test -z "$json" && return 1 293 | version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//') 294 | test -z "$version" && return 1 295 | echo "$version" 296 | } 297 | hash_sha256() { 298 | TARGET=${1:-/dev/stdin} 299 | if is_command gsha256sum; then 300 | hash=$(gsha256sum "$TARGET") || return 1 301 | echo "$hash" | cut -d ' ' -f 1 302 | elif is_command sha256sum; then 303 | hash=$(sha256sum "$TARGET") || return 1 304 | echo "$hash" | cut -d ' ' -f 1 305 | elif is_command shasum; then 306 | hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1 307 | echo "$hash" | cut -d ' ' -f 1 308 | elif is_command openssl; then 309 | hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1 310 | echo "$hash" | cut -d ' ' -f a 311 | else 312 | log_crit "hash_sha256 unable to find command to compute sha-256 hash" 313 | return 1 314 | fi 315 | } 316 | hash_sha256_verify() { 317 | TARGET=$1 318 | checksums=$2 319 | if [ -z "$checksums" ]; then 320 | log_err "hash_sha256_verify checksum file not specified in arg2" 321 | return 1 322 | fi 323 | BASENAME=${TARGET##*/} 324 | want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1) 325 | if [ -z "$want" ]; then 326 | log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'" 327 | return 1 328 | fi 329 | got=$(hash_sha256 "$TARGET") 330 | if [ "$want" != "$got" ]; then 331 | log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got" 332 | return 1 333 | fi 334 | } 335 | cat /dev/null </dev/null 118 | } 119 | echoerr() { 120 | echo "$@" 1>&2 121 | } 122 | log_prefix() { 123 | echo "$0" 124 | } 125 | _logp=6 126 | log_set_priority() { 127 | _logp="$1" 128 | } 129 | log_priority() { 130 | if test -z "$1"; then 131 | echo "$_logp" 132 | return 133 | fi 134 | [ "$1" -le "$_logp" ] 135 | } 136 | log_tag() { 137 | case $1 in 138 | 0) echo "emerg" ;; 139 | 1) echo "alert" ;; 140 | 2) echo "crit" ;; 141 | 3) echo "err" ;; 142 | 4) echo "warning" ;; 143 | 5) echo "notice" ;; 144 | 6) echo "info" ;; 145 | 7) echo "debug" ;; 146 | *) echo "$1" ;; 147 | esac 148 | } 149 | log_debug() { 150 | log_priority 7 || return 0 151 | echoerr "$(log_prefix)" "$(log_tag 7)" "$@" 152 | } 153 | log_info() { 154 | log_priority 6 || return 0 155 | echoerr "$(log_prefix)" "$(log_tag 6)" "$@" 156 | } 157 | log_err() { 158 | log_priority 3 || return 0 159 | echoerr "$(log_prefix)" "$(log_tag 3)" "$@" 160 | } 161 | log_crit() { 162 | log_priority 2 || return 0 163 | echoerr "$(log_prefix)" "$(log_tag 2)" "$@" 164 | } 165 | uname_os() { 166 | os=$(uname -s | tr '[:upper:]' '[:lower:]') 167 | case "$os" in 168 | msys_nt) os="windows" ;; 169 | esac 170 | echo "$os" 171 | } 172 | uname_arch() { 173 | arch=$(uname -m) 174 | case $arch in 175 | x86_64) arch="amd64" ;; 176 | x86) arch="386" ;; 177 | i686) arch="386" ;; 178 | i386) arch="386" ;; 179 | aarch64) arch="arm64" ;; 180 | armv5*) arch="armv5" ;; 181 | armv6*) arch="armv6" ;; 182 | armv7*) arch="armv7" ;; 183 | esac 184 | echo ${arch} 185 | } 186 | uname_os_check() { 187 | os=$(uname_os) 188 | case "$os" in 189 | darwin) return 0 ;; 190 | dragonfly) return 0 ;; 191 | freebsd) return 0 ;; 192 | linux) return 0 ;; 193 | android) return 0 ;; 194 | nacl) return 0 ;; 195 | netbsd) return 0 ;; 196 | openbsd) return 0 ;; 197 | plan9) return 0 ;; 198 | solaris) return 0 ;; 199 | windows) return 0 ;; 200 | esac 201 | log_crit "uname_os_check '$(uname -s)' got converted to '$os' which is not a GOOS value. Please file bug at https://github.com/client9/shlib" 202 | return 1 203 | } 204 | uname_arch_check() { 205 | arch=$(uname_arch) 206 | case "$arch" in 207 | 386) return 0 ;; 208 | amd64) return 0 ;; 209 | arm64) return 0 ;; 210 | armv5) return 0 ;; 211 | armv6) return 0 ;; 212 | armv7) return 0 ;; 213 | ppc64) return 0 ;; 214 | ppc64le) return 0 ;; 215 | mips) return 0 ;; 216 | mipsle) return 0 ;; 217 | mips64) return 0 ;; 218 | mips64le) return 0 ;; 219 | s390x) return 0 ;; 220 | amd64p32) return 0 ;; 221 | esac 222 | log_crit "uname_arch_check '$(uname -m)' got converted to '$arch' which is not a GOARCH value. Please file bug report at https://github.com/client9/shlib" 223 | return 1 224 | } 225 | untar() { 226 | tarball=$1 227 | case "${tarball}" in 228 | *.tar.gz | *.tgz) tar -xzf "${tarball}" ;; 229 | *.tar) tar -xf "${tarball}" ;; 230 | *.zip) unzip "${tarball}" ;; 231 | *) 232 | log_err "untar unknown archive format for ${tarball}" 233 | return 1 234 | ;; 235 | esac 236 | } 237 | mktmpdir() { 238 | test -z "$TMPDIR" && TMPDIR="$(mktemp -d)" 239 | mkdir -p "${TMPDIR}" 240 | echo "${TMPDIR}" 241 | } 242 | http_download_curl() { 243 | local_file=$1 244 | source_url=$2 245 | header=$3 246 | if [ -z "$header" ]; then 247 | code=$(curl -w '%{http_code}' -sL -o "$local_file" "$source_url") 248 | else 249 | code=$(curl -w '%{http_code}' -sL -H "$header" -o "$local_file" "$source_url") 250 | fi 251 | if [ "$code" != "200" ]; then 252 | log_debug "http_download_curl received HTTP status $code" 253 | return 1 254 | fi 255 | return 0 256 | } 257 | http_download_wget() { 258 | local_file=$1 259 | source_url=$2 260 | header=$3 261 | if [ -z "$header" ]; then 262 | wget -q -O "$local_file" "$source_url" 263 | else 264 | wget -q --header "$header" -O "$local_file" "$source_url" 265 | fi 266 | } 267 | http_download() { 268 | log_debug "http_download $2" 269 | if is_command curl; then 270 | http_download_curl "$@" 271 | return 272 | elif is_command wget; then 273 | http_download_wget "$@" 274 | return 275 | fi 276 | log_crit "http_download unable to find wget or curl" 277 | return 1 278 | } 279 | http_copy() { 280 | tmp=$(mktemp) 281 | http_download "${tmp}" "$1" "$2" || return 1 282 | body=$(cat "$tmp") 283 | rm -f "${tmp}" 284 | echo "$body" 285 | } 286 | github_release() { 287 | owner_repo=$1 288 | version=$2 289 | test -z "$version" && version="latest" 290 | giturl="https://github.com/${owner_repo}/releases/${version}" 291 | json=$(http_copy "$giturl" "Accept:application/json") 292 | test -z "$json" && return 1 293 | version=$(echo "$json" | tr -s '\n' ' ' | sed 's/.*"tag_name":"//' | sed 's/".*//') 294 | test -z "$version" && return 1 295 | echo "$version" 296 | } 297 | hash_sha256() { 298 | TARGET=${1:-/dev/stdin} 299 | if is_command gsha256sum; then 300 | hash=$(gsha256sum "$TARGET") || return 1 301 | echo "$hash" | cut -d ' ' -f 1 302 | elif is_command sha256sum; then 303 | hash=$(sha256sum "$TARGET") || return 1 304 | echo "$hash" | cut -d ' ' -f 1 305 | elif is_command shasum; then 306 | hash=$(shasum -a 256 "$TARGET" 2>/dev/null) || return 1 307 | echo "$hash" | cut -d ' ' -f 1 308 | elif is_command openssl; then 309 | hash=$(openssl -dst openssl dgst -sha256 "$TARGET") || return 1 310 | echo "$hash" | cut -d ' ' -f a 311 | else 312 | log_crit "hash_sha256 unable to find command to compute sha-256 hash" 313 | return 1 314 | fi 315 | } 316 | hash_sha256_verify() { 317 | TARGET=$1 318 | checksums=$2 319 | if [ -z "$checksums" ]; then 320 | log_err "hash_sha256_verify checksum file not specified in arg2" 321 | return 1 322 | fi 323 | BASENAME=${TARGET##*/} 324 | want=$(grep "${BASENAME}" "${checksums}" 2>/dev/null | tr '\t' ' ' | cut -d ' ' -f 1) 325 | if [ -z "$want" ]; then 326 | log_err "hash_sha256_verify unable to find checksum for '${TARGET}' in '${checksums}'" 327 | return 1 328 | fi 329 | got=$(hash_sha256 "$TARGET") 330 | if [ "$want" != "$got" ]; then 331 | log_err "hash_sha256_verify checksum for '$TARGET' did not verify ${want} vs $got" 332 | return 1 333 | fi 334 | } 335 | cat /dev/null <