├── .gitignore ├── .travis.yml ├── go.mod ├── bin └── xxhashdir.go ├── go.sum ├── .goreleaser.yml ├── xxhashdir_test.go ├── LICENSE ├── README.md └── xxhashdir.go /.gitignore: -------------------------------------------------------------------------------- 1 | xxhashdir 2 | dist 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - "1.x" 4 | script: go test -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/razum2um/xxhashdir 2 | 3 | require github.com/cespare/xxhash v1.1.0 4 | -------------------------------------------------------------------------------- /bin/xxhashdir.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | 7 | "github.com/razum2um/xxhashdir" 8 | ) 9 | 10 | func main() { 11 | out := make(chan xxhashdir.Entry) 12 | xxhashdir.Hashdir(os.Args[1], out) 13 | for entry := range out { 14 | fmt.Printf("%-21d %s\n", entry.Xxhash, entry.Path) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= 2 | github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= 3 | github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= 4 | github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= 5 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= 6 | github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= 7 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | # This is an example goreleaser.yaml file with some sane defaults. 2 | # Make sure to check the documentation at http://goreleaser.com 3 | before: 4 | hooks: 5 | # you may remove this if you don't use vgo 6 | - go mod download 7 | # you may remove this if you don't need go generate 8 | - go generate ./... 9 | builds: 10 | - 11 | id: "xxhashdir" 12 | main: ./bin/xxhashdir.go 13 | binary: xxhashdir 14 | env: 15 | - CGO_ENABLED=0 16 | goos: 17 | - linux 18 | - darwin 19 | - windows 20 | goarch: 21 | - amd64 22 | - arm64 23 | archives: 24 | - replacements: 25 | darwin: Darwin 26 | linux: Linux 27 | windows: Windows 28 | 386: i386 29 | amd64: x86_64 30 | checksum: 31 | name_template: 'checksums.txt' 32 | snapshot: 33 | name_template: "{{ .Tag }}-next" 34 | changelog: 35 | sort: asc 36 | filters: 37 | exclude: 38 | - '^docs:' 39 | - '^test:' 40 | -------------------------------------------------------------------------------- /xxhashdir_test.go: -------------------------------------------------------------------------------- 1 | package xxhashdir 2 | 3 | import ( 4 | "path/filepath" 5 | "testing" 6 | ) 7 | 8 | func checkOut(t *testing.T, out chan Entry) { 9 | got := make([]Entry, 0) 10 | for path := range out { 11 | got = append(got, path) 12 | } 13 | 14 | if len(got) < 1 { 15 | t.Fatalf("no entries to hash, expected one") 16 | } 17 | 18 | if len(got) > 1 { 19 | t.Fatalf("extra entry to hash: %v", got[1]) 20 | } 21 | 22 | expectedXxhash := uint64(6467850080536788703) 23 | if got[0].Xxhash != expectedXxhash { 24 | t.Fatalf("got: %v; expected: %v", got[0].Xxhash, expectedXxhash) 25 | } 26 | 27 | expectedPath := "bin/xxhashdir.go" 28 | if filepath.ToSlash(got[0].Path) != expectedPath { 29 | t.Fatalf("got: %v; expected: %v", got[0].Path, expectedPath) 30 | } 31 | } 32 | 33 | func TestAll(t *testing.T) { 34 | t.Run("Test directory", func(t *testing.T) { 35 | out := make(chan Entry) 36 | Hashdir("bin", out) 37 | checkOut(t, out) 38 | }) 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Lunatic Cat 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # xxhashdir 2 | 3 | [![Build Status](https://travis-ci.com/lunatic-cat/xxhashdir.svg?branch=master)](https://travis-ci.org/lunatic-cat/xxhashdir) 4 | 5 | ## Usage 6 | 7 | this package does fast filesystem fingerprinting using [xxHash](http://cyan4973.github.io/xxHash/) 8 | 9 | ```sh 10 | # instead of "find . -type f -exec xxhsum {} \+" 11 | $ ./xxhashdir . 12 | ... 13 | 880788507839261490 README.md 14 | 11541949788444589007 .travis.yml 15 | 6467850080536788703 bin/xxhashdir.go 16 | ... 17 | ``` 18 | 19 | typical CLI use: 20 | 21 | ```sh 22 | ./xxhashdir dir > before 23 | # modify fs 24 | ./xxhashdir dir > after 25 | diff <(sort before) <(sort after) | sort -nk3 26 | ``` 27 | 28 | ## Speed 29 | 30 | Times faster than find + exec. Digesting xcode-10.2 with >250K files: 31 | 32 | | Time | Cmd | 33 | | --- | --- | 34 | | 656 sec | time find /Applications/Xcode.app -type f -exec xxhsum {} \; > xxhsum.txt | 35 | | 88 sec | time find /Applications/Xcode.app -type f -exec xxhsum {} \+ > xxhsum.txt | 36 | | 45 sec | time ./xxhashdir /Applications/Xcode.app > xxhsumdir.txt | 37 | 38 | ## Golang api 39 | 40 | ```go 41 | func Hashdir(root string, out chan Entry) 42 | ``` 43 | 44 | where 45 | 46 | ```go 47 | type Entry struct { 48 | Path string 49 | Xxhash uint64 50 | } 51 | ``` 52 | -------------------------------------------------------------------------------- /xxhashdir.go: -------------------------------------------------------------------------------- 1 | package xxhashdir 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "path/filepath" 7 | "runtime" 8 | "sync" 9 | 10 | "github.com/cespare/xxhash" 11 | ) 12 | 13 | // Entry of out chan 14 | type Entry struct { 15 | Path string 16 | Xxhash uint64 17 | } 18 | 19 | func hashFile(path string) (uint64, error) { 20 | var ( 21 | hash = xxhash.New() 22 | file, err = os.Open(path) 23 | ) 24 | 25 | if file != nil { 26 | defer file.Close() 27 | _, err = io.Copy(hash, file) 28 | } 29 | 30 | return hash.Sum64(), err 31 | } 32 | 33 | func produce(root string, in chan string) { 34 | defer close(in) 35 | filepath.Walk(root, func(path string, info os.FileInfo, err error) error { 36 | fi, err := os.Stat(path) 37 | if (err == nil) && (fi.Mode().IsRegular()) { 38 | in <- path 39 | } 40 | return nil 41 | }) 42 | } 43 | 44 | func consume(in chan string, out chan Entry, wg *sync.WaitGroup) { 45 | defer wg.Done() 46 | 47 | for path := range in { 48 | hash, err := hashFile(path) 49 | if err == nil { 50 | out <- Entry{Path: path, Xxhash: hash} 51 | } 52 | } 53 | } 54 | 55 | func stop(out chan Entry, wg *sync.WaitGroup) { 56 | wg.Wait() 57 | close(out) 58 | } 59 | 60 | // Hashdir prints all directory contents with xxhash sums 61 | func Hashdir(root string, out chan Entry) { 62 | in := make(chan string) 63 | wg := &sync.WaitGroup{} 64 | go produce(root, in) 65 | 66 | for i := 0; i < runtime.NumCPU(); i++ { 67 | wg.Add(1) 68 | go consume(in, out, wg) 69 | } 70 | go stop(out, wg) 71 | } 72 | --------------------------------------------------------------------------------