├── README.md ├── .gitignore ├── go.mod ├── go.sum ├── LICENSE ├── lookup_test.go ├── lookup.go ├── scan_test.go ├── fpcalc.go ├── main_test.go ├── db_test.go ├── db.go ├── scan.go └── main.go /README.md: -------------------------------------------------------------------------------- 1 | # Moved to 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /soundalike 2 | /soundalike.exe 3 | /soundalike-*.tar.gz 4 | /soundalike-*.zip 5 | /testdata 6 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/derat/soundalike 2 | 3 | go 1.15 4 | 5 | require github.com/mattn/go-sqlite3 v1.14.11 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/mattn/go-sqlite3 v1.14.11 h1:gt+cp9c0XGqe9S/wAHTL3n/7MqY+siPWgWJgqdsFrzQ= 2 | github.com/mattn/go-sqlite3 v1.14.11/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2022, Daniel Erat 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /lookup_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "reflect" 8 | "sort" 9 | "testing" 10 | ) 11 | 12 | func TestLookupTable(t *testing.T) { 13 | table := newLookupTable() 14 | for _, f := range []struct { 15 | id fileID 16 | fprint []uint32 17 | }{ 18 | {1, []uint32{0x44442222, 0x44441111, 0x33332222, 0x55553333}}, 19 | {2, []uint32{0x44442222, 0x44442222, 0x44441111, 0x55553333}}, 20 | {3, []uint32{0x33332222, 0x33331111, 0x33334444, 0x44442222}}, 21 | } { 22 | table.add(f.id, f.fprint) 23 | } 24 | 25 | for _, tc := range []struct { 26 | fprint []uint32 27 | thresh int 28 | want []fileID 29 | }{ 30 | {[]uint32{0x44442222, 0x44441111, 0x33332222, 0x55553333}, 4, []fileID{1}}, 31 | {[]uint32{0x44441111, 0x44448888, 0x33331111, 0x55551111}, 4, []fileID{1}}, 32 | {[]uint32{0x44442222, 0x44441111, 0x33332222, 0x55553333}, 3, []fileID{1, 2}}, 33 | {[]uint32{0x44442222, 0x44441111, 0x33332222, 0x55553333}, 2, []fileID{1, 2, 3}}, 34 | {[]uint32{0x44442222, 0x44442222, 0x44442222, 0x44442222}, 4, []fileID{}}, 35 | {[]uint32{0x44442222, 0x44442222, 0x44442222, 0x44442222}, 3, []fileID{2}}, 36 | {[]uint32{0x99999999, 0x99999999, 0x99999999, 0x99999999}, 1, []fileID{}}, 37 | {[]uint32{0x33333333, 0x33333333, 0x33333333, 0x33333333}, 4, []fileID{}}, 38 | {[]uint32{0x33333333, 0x33333333, 0x33333333, 0x33333333}, 3, []fileID{3}}, 39 | {[]uint32{0x33333333, 0x33333333, 0x33333333, 0x33333333}, 2, []fileID{3}}, 40 | {[]uint32{0x33333333, 0x33333333, 0x33333333, 0x33333333}, 1, []fileID{1, 3}}, 41 | } { 42 | got := table.find(tc.fprint, tc.thresh) 43 | sort.Slice(got, func(i, j int) bool { return got[i] < got[j] }) 44 | if !reflect.DeepEqual(got, tc.want) { 45 | t.Errorf("find(%v, %d) = %v; want %v", tc.fprint, tc.thresh, got, tc.want) 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /lookup.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | // lookupTable is used to quickly find approximate matches for a given fingerprint. 7 | // 32-bit fingerprint values are truncated to 16 bits to conserve space. 8 | type lookupTable struct { 9 | m map[uint16]map[fileID]int16 // truncated fingerprint value -> file -> count 10 | } 11 | 12 | func newLookupTable() *lookupTable { return &lookupTable{make(map[uint16]map[fileID]int16)} } 13 | 14 | // add adds the supplied file to the table. 15 | func (t *lookupTable) add(id fileID, fprint []uint32) { 16 | for _, v := range fprint { 17 | key := uint16(v >> 16) 18 | counts := t.m[key] 19 | if counts == nil { 20 | counts = make(map[fileID]int16) 21 | t.m[key] = counts 22 | } 23 | counts[id]++ 24 | } 25 | } 26 | 27 | // find returns files that share at least thresh truncated values with fprint. 28 | func (t *lookupTable) find(fprint []uint32, thresh int) []fileID { 29 | // For each file, maintain a map from truncated fingerprint value to the 30 | // number of hits we've had so far. This makes sure that we don't overcount 31 | // the number of hits: if fprint contains two copies of value 4 but 4 only 32 | // appears once in a given file, we don't want to double-count it. 33 | hits := make(map[fileID]map[uint16]int16) 34 | 35 | for _, v := range fprint { 36 | key := uint16(v >> 16) 37 | for id, cnt := range t.m[key] { 38 | if seen := hits[id][key]; seen < cnt { 39 | m := hits[id] 40 | if m == nil { 41 | m = make(map[uint16]int16) 42 | hits[id] = m 43 | } 44 | m[key]++ 45 | } 46 | } 47 | } 48 | 49 | // Sum the hits for each file and keep the ones that reached the threshold. 50 | ids := make([]fileID, 0, len(hits)) 51 | for id, m := range hits { 52 | var cnt int 53 | for _, v := range m { 54 | cnt += int(v) 55 | } 56 | if cnt >= thresh { 57 | ids = append(ids, id) 58 | } 59 | } 60 | return ids 61 | } 62 | -------------------------------------------------------------------------------- /scan_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "reflect" 8 | "sort" 9 | "testing" 10 | ) 11 | 12 | func TestCompareFingerprints(t *testing.T) { 13 | for _, tc := range []struct { 14 | a, b []uint32 15 | minLength bool 16 | score float64 17 | aoff, boff int 18 | }{ 19 | { 20 | []uint32{0x0000ffe4}, 21 | []uint32{0xffff0f14}, 22 | false, 8.0 / 32, 0, 0, 23 | }, 24 | { 25 | []uint32{0xfffffffe, 0x80000001}, 26 | []uint32{0x7fffffff, 0xf0000001}, 27 | false, 59.0 / 64, 0, 0, 28 | }, 29 | { 30 | []uint32{0x00000000, 0x01010101, 0xffffffff, 0xcafebeef}, 31 | []uint32{0x01010101, 0xffffffff, 0xcafebeef, 0x00000000}, 32 | false, 96.0 / 128, 1, 0, 33 | }, 34 | { 35 | []uint32{0xffffffff, 0x01010101}, 36 | []uint32{0x00000000, 0xffffffff, 0x01010101}, 37 | false, 64.0 / 96, 0, 1, 38 | }, 39 | { 40 | []uint32{0x00000000, 0xffffffff, 0x01010101}, 41 | []uint32{0xffffffff, 0x01010101}, 42 | true, 64.0 / 64, 1, 0, 43 | }, 44 | } { 45 | if score, aoff, boff := compareFingerprints(tc.a, tc.b, tc.minLength); score != tc.score || aoff != tc.aoff || boff != tc.boff { 46 | t.Errorf("compareFingerprints(%v, %v, %v) = (%0.3f, %d, %d); want (%0.3f, %d, %d)", 47 | tc.a, tc.b, tc.minLength, score, aoff, boff, tc.score, tc.aoff, tc.boff) 48 | } 49 | } 50 | } 51 | 52 | func TestComponents(t *testing.T) { 53 | edges := make(map[fileID][]fileID) 54 | add := func(a, b fileID) { 55 | edges[a] = append(edges[a], b) 56 | edges[b] = append(edges[b], a) 57 | } 58 | add(1, 2) 59 | add(1, 3) 60 | add(2, 3) 61 | add(3, 4) 62 | add(5, 6) 63 | add(5, 7) 64 | 65 | got := components(edges) 66 | for i := range got { 67 | sort.Slice(got[i], func(a, b int) bool { return got[i][a] < got[i][b] }) 68 | } 69 | sort.Slice(got, func(a, b int) bool { return got[a][0] < got[b][0] }) 70 | if want := [][]fileID{{1, 2, 3, 4}, {5, 6, 7}}; !reflect.DeepEqual(got, want) { 71 | t.Errorf("components(...) = %v; want %v", got, want) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /fpcalc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "bytes" 8 | "encoding/json" 9 | "errors" 10 | "fmt" 11 | "os/exec" 12 | "strconv" 13 | "strings" 14 | ) 15 | 16 | // fpcalcSettings contains command-line settings for the fpcalc utility. 17 | type fpcalcSettings struct { 18 | length float64 // "-length SECS Restrict the duration of the processed input audio (default 120)" 19 | chunk float64 // "-chunk SECS Split the input audio into chunks of this duration" 20 | algorithm int // "-algorithm NUM Set the algorithm method (default 2)" 21 | overlap bool // "-overlap Overlap the chunks slightly to make sure audio on the edges is fingerprinted" 22 | } 23 | 24 | func defaultFpcalcSettings() *fpcalcSettings { 25 | return &fpcalcSettings{ 26 | length: 15, 27 | chunk: 0, 28 | algorithm: 2, 29 | overlap: false, 30 | } 31 | } 32 | 33 | func (s *fpcalcSettings) String() string { 34 | return fmt.Sprintf("length=%0.3f,chunk=%0.3f,algorithm=%d,overlap=%v", 35 | s.length, s.chunk, s.algorithm, s.overlap) 36 | } 37 | 38 | // haveFpcalc returns false if fpcalc isn't in $PATH. 39 | func haveFpcalc() bool { 40 | _, err := exec.LookPath("fpcalc") 41 | return err == nil 42 | } 43 | 44 | // getFpcalcVersion returns a string describing the installed version of fpcalc. 45 | func getFpcalcVersion() (string, error) { 46 | if !haveFpcalc() { 47 | return "", errors.New("fpcalc executable not found") 48 | } 49 | var stderr bytes.Buffer 50 | cmd := exec.Command("fpcalc", "-version") 51 | cmd.Stderr = &stderr 52 | out, err := cmd.Output() 53 | if err != nil { 54 | return "", fmt.Errorf("%v (%q)", err, strings.Split(stderr.String(), "\n")[0]) 55 | } 56 | return strings.TrimSpace(string(out)), nil 57 | } 58 | 59 | // fpcalcResult contains the result of running fpcalc against a file. 60 | type fpcalcResult struct { 61 | Fingerprint []uint32 `json:"fingerprint"` 62 | Duration float64 `json:"duration"` 63 | } 64 | 65 | // emptyFingerprintError is returned by runFpcalc when an audio file is too short 66 | // to be fingerprinted. 67 | var errEmptyFingerprint = errors.New("empty fingerprint") 68 | 69 | // runFpcalc runs fpcalc to compute a fingerprint for path per settings. 70 | func runFpcalc(path string, settings *fpcalcSettings) (*fpcalcResult, error) { 71 | args := []string{ 72 | "-raw", 73 | "-json", 74 | "-length", strconv.FormatFloat(settings.length, 'f', 3, 64), 75 | "-algorithm", strconv.Itoa(settings.algorithm), 76 | } 77 | if settings.chunk > 0 { 78 | args = append(args, "-chunk", strconv.FormatFloat(settings.chunk, 'f', 3, 64)) 79 | } 80 | if settings.overlap { 81 | args = append(args, "-overlap") 82 | } 83 | args = append(args, path) 84 | 85 | out, err := exec.Command("fpcalc", args...).Output() 86 | if err != nil { 87 | // Try to get some additional info from stderr. 88 | if exit, ok := err.(*exec.ExitError); ok { 89 | if stderr := strings.SplitN(string(exit.Stderr), "\n", 2)[0]; stderr != "" { 90 | if strings.TrimSpace(stderr) == "ERROR: Empty fingerprint" { 91 | return nil, errEmptyFingerprint 92 | } 93 | err = fmt.Errorf("%v (%v)", err, stderr) 94 | } 95 | } 96 | return nil, err 97 | } 98 | var res fpcalcResult 99 | if err := json.Unmarshal(out, &res); err != nil { 100 | return nil, err 101 | } 102 | return &res, nil 103 | } 104 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "errors" 8 | "os" 9 | "os/exec" 10 | "path/filepath" 11 | "strconv" 12 | "strings" 13 | "testing" 14 | ) 15 | 16 | func checkTestEnv() error { 17 | if _, err := exec.LookPath("soundalike"); err != nil { 18 | return errors.New("soundalike executable not in path") 19 | } 20 | if _, err := os.Stat("testdata"); os.IsNotExist(err) { 21 | return errors.New("testdata/ should be https://github.com/derat/soundalike-testdata checkout") 22 | } else if err != nil { 23 | return err 24 | } 25 | return nil 26 | } 27 | 28 | func TestMain_Scan(t *testing.T) { 29 | if err := checkTestEnv(); err != nil { 30 | t.Fatal("Bad test environment: ", err) 31 | } 32 | 33 | want := strings.TrimLeft(` 34 | 64/Fanfare for Space.mp3 35 | orig/Fanfare for Space.mp3 36 | pad/Fanfare for Space.mp3 37 | 38 | 64/Honey Bee.mp3 39 | orig/Honey Bee.mp3 40 | pad/Honey Bee.mp3 41 | `, "\n") 42 | 43 | db := filepath.Join(t.TempDir(), "test.db") 44 | scanCmd := exec.Command( 45 | "soundalike", 46 | "-db="+db, 47 | "-log-sec=0", 48 | "-print-file-info=false", 49 | "-fpcalc-length=45", 50 | "testdata", 51 | ) 52 | if got, err := scanCmd.Output(); err != nil { 53 | t.Errorf("%s failed: %v", scanCmd, err) 54 | } else if string(got) != want { 55 | t.Errorf("%s printed unexpected output:\n got: %q\n want: %q", scanCmd, string(got), want) 56 | } 57 | 58 | // Exclude the second group. 59 | excludeCmd := exec.Command( 60 | "soundalike", 61 | "-db="+db, 62 | "-fpcalc-length=45", 63 | "-exclude", 64 | "64/Honey Bee.mp3", 65 | "orig/Honey Bee.mp3", 66 | "pad/Honey Bee.mp3", 67 | ) 68 | if err := excludeCmd.Run(); err != nil { 69 | t.Errorf("%s failed: %v", excludeCmd, err) 70 | } 71 | 72 | // Do another scan and check that only the first group is printed. 73 | want2 := strings.Split(want, "\n\n")[0] + "\n" 74 | scanCmd = exec.Command(scanCmd.Args[0], scanCmd.Args[1:]...) 75 | if got, err := scanCmd.Output(); err != nil { 76 | t.Errorf("%s failed: %v", scanCmd, err) 77 | } else if string(got) != want2 { 78 | t.Errorf("%s printed unexpected output:\n got: %q\n want: %q", scanCmd, string(got), want2) 79 | } 80 | } 81 | 82 | func TestMain_Compare(t *testing.T) { 83 | if err := checkTestEnv(); err != nil { 84 | t.Fatal("Bad test environment: ", err) 85 | } 86 | 87 | type result int 88 | const ( 89 | identical result = iota 90 | similar 91 | different 92 | ) 93 | const thresh = 0.95 // threshold for "similar" songs 94 | 95 | const ( 96 | file1 = "Honey Bee.mp3" 97 | file2 = "Fanfare for Space.mp3" 98 | ) 99 | 100 | for _, tc := range []struct { 101 | a, b string // paths under testdata/ 102 | res result 103 | }{ 104 | {"orig/" + file1, "orig/" + file1, identical}, 105 | {"orig/" + file1, "orig/" + file2, different}, 106 | {"orig/" + file1, "64/" + file1, similar}, 107 | {"orig/" + file1, "pad/" + file1, similar}, 108 | } { 109 | cmd := exec.Command( 110 | "soundalike", 111 | "-compare", 112 | filepath.Join("testdata", tc.a), 113 | filepath.Join("testdata", tc.b), 114 | ) 115 | out, err := cmd.Output() 116 | if err != nil { 117 | t.Errorf("%s failed: %v", cmd, err) 118 | continue 119 | } 120 | got, err := strconv.ParseFloat(strings.TrimSpace(string(out)), 64) 121 | if err != nil { 122 | t.Errorf("%s printed bad output %q: %v", cmd, string(out), err) 123 | continue 124 | } 125 | if tc.res == identical && got != 1.0 { 126 | t.Errorf("%s returned %0.3f; want 1.0", cmd, got) 127 | } else if tc.res == similar && (got < thresh || got >= 1.0) { 128 | t.Errorf("%s returned %0.3f; want [%0.3f, 1.0)", cmd, got, thresh) 129 | } else if tc.res == different && (got < 0 || got >= thresh) { 130 | t.Errorf("%s returned %0.3f; want [0.0, %0.3f)", cmd, got, thresh) 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /db_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "path/filepath" 8 | "reflect" 9 | "testing" 10 | ) 11 | 12 | func TestNewAudioDB(t *testing.T) { 13 | p := filepath.Join(t.TempDir(), "test.db") 14 | settings := defaultFpcalcSettings() 15 | if db, err := newAudioDB(p, settings); err != nil { 16 | t.Fatal("newAudioDB with new file failed: ", err) 17 | } else if err := db.close(); err != nil { 18 | t.Fatal("close failed: ", err) 19 | } 20 | 21 | if db, err := newAudioDB(p, settings); err != nil { 22 | t.Fatal("newAudioDB with existing file failed: ", err) 23 | } else if err := db.close(); err != nil { 24 | t.Fatal("close failed: ", err) 25 | } 26 | 27 | settings.length *= 2 28 | if db, err := newAudioDB(p, settings); err == nil { 29 | db.close() 30 | t.Fatal("newAudioDB with different settings unexpectedly succeeded") 31 | } 32 | } 33 | 34 | func TestAudioDB_Save_Get(t *testing.T) { 35 | // Save a fingerprint to the database. 36 | p := filepath.Join(t.TempDir(), "test.db") 37 | settings := defaultFpcalcSettings() 38 | db, err := newAudioDB(p, settings) 39 | if err != nil { 40 | t.Fatal("newAudioDB failed: ", err) 41 | } 42 | const ( 43 | path = "artist/album/01-title.mp3" 44 | size = 2 * 1024 * 1024 45 | dur = 103.4 46 | ) 47 | fprint := []uint32{ 48 | 2835786340, 2835868260, 2836164325, 2903256545, 3976998131, 3976543474, 49 | 3980795026, 4156954754, 4135987330, 4135991426, 3532003458, 3532019842, 50 | } 51 | id, err := db.save(&fileInfo{0, path, size, dur, fprint}) 52 | if err != nil { 53 | db.close() 54 | t.Fatal("save failed: ", err) 55 | } 56 | if err := db.close(); err != nil { 57 | t.Fatal("close failed: ", err) 58 | } 59 | 60 | // Reopen the database and read the file info back. 61 | if db, err = newAudioDB(p, settings); err != nil { 62 | t.Fatal("newAudioDB failed: ", err) 63 | } 64 | defer db.close() 65 | 66 | want := fileInfo{id, path, size, dur, fprint} 67 | if got, err := db.get(0, path); err != nil { 68 | t.Errorf("get(0, %q) failed: %v", path, err) 69 | } else if got == nil { 70 | t.Errorf("get(0, %q) returned nil", path) 71 | } else if !reflect.DeepEqual(*got, want) { 72 | t.Errorf("get(0, %q) = %+v; want %+v", path, *got, want) 73 | } 74 | if got, err := db.get(id, ""); err != nil { 75 | t.Errorf(`get(%d, "") failed: %v`, id, err) 76 | } else if got == nil { 77 | t.Errorf(`get(%d, "") returned nil`, id) 78 | } else if !reflect.DeepEqual(*got, want) { 79 | t.Errorf(`get(%d, "") = %+v; want %+v`, id, *got, want) 80 | } 81 | 82 | // Check that nil is returned for missing fingerprints. 83 | const path2 = "some-other-song.mp3" 84 | if got, err := db.get(0, path2); err != nil { 85 | t.Errorf("get(0, %q) failed: %v", path2, err) 86 | } else if got != nil { 87 | t.Errorf("get(0, %q) = %+v; want 0 nil", path2, *got) 88 | } 89 | } 90 | 91 | func TestAudioDB_ExcludedPairs(t *testing.T) { 92 | p := filepath.Join(t.TempDir(), "test.db") 93 | settings := defaultFpcalcSettings() 94 | db, err := newAudioDB(p, settings) 95 | if err != nil { 96 | t.Fatal("newAudioDB failed: ", err) 97 | } 98 | 99 | const ( 100 | a = "a.mp3" 101 | b = "b.mp3" 102 | c = "c.mp3" 103 | ) 104 | 105 | if ok, err := db.isExcludedPair(a, b); err != nil { 106 | t.Fatalf("isExcludedPair(%q, %q) failed: %v", a, b, err) 107 | } else if ok { 108 | t.Fatalf("isExcludedPair(%q, %q) = %v; want %v", a, b, ok, false) 109 | } 110 | if err := db.saveExcludedPair(a, b); err != nil { 111 | t.Fatalf("saveExcludedPair(%q, %q) failed: %v", a, b, err) 112 | } 113 | if ok, err := db.isExcludedPair(a, b); err != nil { 114 | t.Fatalf("isExcludedPair(%q, %q) failed: %v", a, b, err) 115 | } else if !ok { 116 | t.Fatalf("isExcludedPair(%q, %q) = %v; want %v", a, b, ok, true) 117 | } 118 | if ok, err := db.isExcludedPair(b, a); err != nil { 119 | t.Fatalf("isExcludedPair(%q, %q) failed: %v", b, a, err) 120 | } else if !ok { 121 | t.Fatalf("isExcludedPair(%q, %q) = %v; want %v", b, a, ok, true) 122 | } 123 | if ok, err := db.isExcludedPair(a, c); err != nil { 124 | t.Fatalf("isExcludedPair(%q, %q) failed: %v", a, c, err) 125 | } else if ok { 126 | t.Fatalf("isExcludedPair(%q, %q) = %v; want %v", a, c, ok, false) 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /db.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "bytes" 8 | "database/sql" 9 | "encoding/binary" 10 | "fmt" 11 | "math" 12 | "os" 13 | 14 | _ "github.com/mattn/go-sqlite3" 15 | ) 16 | 17 | var dbByteOrder = binary.LittleEndian 18 | 19 | // audioDB holds previously-computed audio fingerprints. 20 | type audioDB struct{ db *sql.DB } 21 | 22 | // newAudioDB opens or creates a audioDB at path with the supplied settings. 23 | // An error is returned if an existing database was created with different settings. 24 | func newAudioDB(path string, settings *fpcalcSettings) (*audioDB, error) { 25 | if _, err := os.Stat(path); err != nil && !os.IsNotExist(err) { 26 | return nil, err 27 | } 28 | db, err := sql.Open("sqlite3", path) 29 | if err != nil { 30 | return nil, err 31 | } 32 | defer func() { 33 | if db != nil { 34 | db.Close() 35 | } 36 | }() 37 | 38 | for _, q := range []string{ 39 | `CREATE TABLE IF NOT EXISTS Settings (Desc STRING PRIMARY KEY NOT NULL)`, 40 | `CREATE TABLE IF NOT EXISTS Files ( 41 | Path STRING PRIMARY KEY NOT NULL, 42 | Duration FLOAT NOT NULL, 43 | Size INTEGER NOT NULL, 44 | Fingerprint BLOB NOT NULL)`, 45 | `CREATE TABLE IF NOT EXISTS ExcludedPairs ( 46 | PathA STRING NOT NULL, 47 | PathB STRING NOT NULL, 48 | PRIMARY KEY (PathA, PathB))`, 49 | } { 50 | if _, err = db.Exec(q); err != nil { 51 | return nil, err 52 | } 53 | } 54 | 55 | // Check that the database wasn't created with different settings from what we're using now. 56 | var dbSettings string 57 | if err := db.QueryRow(`SELECT Desc FROM Settings`).Scan(&dbSettings); err == nil { 58 | if s := settings.String(); dbSettings != s { 59 | return nil, fmt.Errorf("database settings (%v) don't match current settings (%v)", dbSettings, s) 60 | } 61 | } else if err == sql.ErrNoRows { 62 | if _, err := db.Exec(`INSERT INTO Settings (Desc) VALUES(?)`, settings.String()); err != nil { 63 | return nil, err 64 | } 65 | } else { 66 | return nil, err 67 | } 68 | 69 | adb := &audioDB{db} 70 | db = nil // disarm Close() call 71 | return adb, nil 72 | } 73 | 74 | func (adb *audioDB) close() error { return adb.db.Close() } 75 | 76 | // fileID uniquely identifies a file in audioDB. 77 | type fileID int32 78 | 79 | // fileInfo contains information about a file stored in audioDB. 80 | type fileInfo struct { 81 | id fileID // unique ID 82 | path string // relative to music dir 83 | size int64 // bytes 84 | duration float64 // seconds 85 | fprint []uint32 86 | } 87 | 88 | // get returns information about the file with the specified ID or relative path. 89 | // If the file is not present in the database, nil is returned. 90 | func (adb *audioDB) get(id fileID, path string) (*fileInfo, error) { 91 | // ROWID is automatically assigned by SQLite: https://www.sqlite.org/autoinc.html 92 | pre := `SELECT ROWID, Path, Size, Duration, Fingerprint FROM Files WHERE ` 93 | var row *sql.Row 94 | if id > 0 { 95 | row = adb.db.QueryRow(pre+`ROWID = ?`, id) 96 | } else { 97 | row = adb.db.QueryRow(pre+`Path = ?`, path) 98 | } 99 | 100 | var b []byte 101 | var info fileInfo 102 | if err := row.Scan(&info.id, &info.path, &info.size, &info.duration, &b); err == sql.ErrNoRows { 103 | return nil, nil 104 | } else if err != nil { 105 | return nil, err 106 | } 107 | 108 | if len(b)%4 != 0 { 109 | return nil, fmt.Errorf("invalid fingerprint size %v", len(b)) 110 | } 111 | info.fprint = make([]uint32, 0, len(b)/4) 112 | for i := 0; i < len(b); i += 4 { 113 | info.fprint = append(info.fprint, dbByteOrder.Uint32(b[i:i+4])) 114 | } 115 | return &info, nil 116 | } 117 | 118 | // save saves the supplied file information to the database. 119 | // info.id is ignored. 120 | func (adb *audioDB) save(info *fileInfo) (id fileID, err error) { 121 | var b bytes.Buffer 122 | if err := binary.Write(&b, dbByteOrder, info.fprint); err != nil { 123 | return 0, err 124 | } 125 | res, err := adb.db.Exec(`INSERT INTO Files (Path, Size, Duration, Fingerprint) VALUES(?, ?, ?, ?)`, 126 | info.path, info.size, info.duration, b.Bytes()) 127 | if err != nil { 128 | return 0, err 129 | } 130 | id64, err := res.LastInsertId() 131 | if err != nil { 132 | return 0, err 133 | } 134 | // This is a hack to save space. ROWID is really an int64, but int32 seems like 135 | // more than enough here since IDs are apparently assigned in increasing order. 136 | if id64 <= 0 || id64 > math.MaxInt32 { 137 | return 0, fmt.Errorf("invalid id %v", id64) 138 | } 139 | return fileID(id64), nil 140 | } 141 | 142 | // saveExcludedPair records that the supplied paths are not duplicates of each other. 143 | func (adb *audioDB) saveExcludedPair(pa, pb string) error { 144 | if pb < pa { 145 | pa, pb = pb, pa 146 | } 147 | _, err := adb.db.Exec(`REPLACE INTO ExcludedPairs (PathA, PathB) VALUES(?, ?)`, pa, pb) 148 | return err 149 | } 150 | 151 | // isExcludedPair returns true if the supplied paths have previously been recorded as 152 | // not being duplicates of each other. 153 | func (adb *audioDB) isExcludedPair(pa, pb string) (bool, error) { 154 | if pb < pa { 155 | pa, pb = pb, pa 156 | } 157 | if rows, err := adb.db.Query(`SELECT * FROM ExcludedPairs WHERE PathA = ? AND PathB = ?`, pa, pb); err != nil { 158 | return false, err 159 | } else { 160 | defer rows.Close() 161 | return rows.Next(), nil 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /scan.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "fmt" 8 | "log" 9 | "math/bits" 10 | "os" 11 | "path/filepath" 12 | "regexp" 13 | "sort" 14 | "strings" 15 | "time" 16 | ) 17 | 18 | // scanOptions contains options for scanFiles. 19 | type scanOptions struct { 20 | dir string // directory containing audio files 21 | fileString string // uncompiled fileRegexp 22 | fileRegexp *regexp.Regexp // matches files to scan 23 | logSec int // logging frequency 24 | lookupThresh float64 // threshold for lookup table in (0.0, 1.0] 25 | matchThresh float64 // threshold for bitwise comparisons in (0.0, 1.0] 26 | matchMinLength bool // use min length (instead of max) for bitwise comparisons 27 | skipBadFiles bool // skip files that can't be fingerprinted by fpcalc 28 | skipNewFiles bool // skip files that aren't in database 29 | } 30 | 31 | func defaultScanOptions() *scanOptions { 32 | return &scanOptions{ 33 | // TODO: I'm just guessing what should be included here. See 34 | // https://en.wikipedia.org/wiki/Audio_file_format#List_of_formats and 35 | // https://en.wikipedia.org/wiki/FFmpeg#Supported_codecs_and_formats. 36 | fileString: `(?i)\.(aiff|flac|m4a|mp3|oga|ogg|opus|wav|wma)$`, 37 | logSec: 10, 38 | lookupThresh: 0.25, 39 | matchThresh: 0.95, 40 | skipBadFiles: true, 41 | } 42 | } 43 | 44 | func (o *scanOptions) finish() error { 45 | if o.dir != "" { 46 | o.dir = strings.TrimRight(o.dir, "/") 47 | if fi, err := os.Stat(o.dir); err != nil { 48 | return err 49 | } else if !fi.IsDir() { 50 | return fmt.Errorf("%v is not a directory", o.dir) 51 | } 52 | } 53 | 54 | if o.lookupThresh <= 0 || o.lookupThresh > 1.0 { 55 | return fmt.Errorf("bad lookup threshold %v", o.lookupThresh) 56 | } 57 | if o.matchThresh <= 0 || o.matchThresh > 1.0 { 58 | return fmt.Errorf("bad match threshold %v", o.matchThresh) 59 | } 60 | 61 | var err error 62 | if o.fileRegexp, err = regexp.Compile(o.fileString); err != nil { 63 | return fmt.Errorf("bad file regexp: %v", err) 64 | } 65 | 66 | return nil 67 | } 68 | 69 | // scanFiles scans opts.dir and returns groups of similar files. 70 | func scanFiles(opts *scanOptions, db *audioDB, fps *fpcalcSettings) ([][]*fileInfo, error) { 71 | // filepath.Walk doesn't follow symlinks, so do it manually first. 72 | dir, err := filepath.EvalSymlinks(opts.dir) 73 | if err != nil { 74 | return nil, err 75 | } 76 | 77 | lookup := newLookupTable() 78 | edges := make(map[fileID][]fileID) 79 | 80 | lastLog := time.Now() 81 | var scanned int 82 | if err := filepath.Walk(dir, func(p string, fi os.FileInfo, err error) error { 83 | if p == dir || fi.IsDir() || !opts.fileRegexp.MatchString(filepath.Base(p)) { 84 | return nil 85 | } 86 | 87 | rel, err := filepath.Rel(dir, p) 88 | if err != nil { 89 | return err 90 | } 91 | 92 | info, err := db.get(0, rel) 93 | if err != nil { 94 | return fmt.Errorf("get %q: %v", rel, err) 95 | } else if info == nil { 96 | if opts.skipNewFiles { 97 | return nil 98 | } 99 | finfo, err := runFpcalc(p, fps) 100 | if err == errEmptyFingerprint { 101 | return nil // skip short files 102 | } else if err != nil { 103 | if opts.skipBadFiles { 104 | log.Printf("Skipping %v: %v", p, err) 105 | return nil 106 | } 107 | return fmt.Errorf("%v: %v", p, err) 108 | } 109 | info = &fileInfo{ 110 | path: rel, 111 | size: fi.Size(), 112 | duration: finfo.Duration, 113 | fprint: finfo.Fingerprint, 114 | } 115 | if info.id, err = db.save(info); err != nil { 116 | return fmt.Errorf("save %q: %v", rel, err) 117 | } 118 | } 119 | 120 | thresh := int(float64(len(info.fprint)) * opts.lookupThresh) 121 | for _, oid := range lookup.find(info.fprint, thresh) { 122 | oinfo, err := db.get(oid, "") 123 | if err != nil { 124 | return err 125 | } else if oinfo == nil { 126 | return fmt.Errorf("%d not in database", oid) 127 | } 128 | if ok, err := db.isExcludedPair(info.path, oinfo.path); err != nil { 129 | return fmt.Errorf("check %q and %q: %v", info.path, oinfo.path, err) 130 | } else if ok { 131 | continue 132 | } 133 | score, _, _ := compareFingerprints(info.fprint, oinfo.fprint, opts.matchMinLength) 134 | if score >= opts.matchThresh { 135 | edges[info.id] = append(edges[info.id], oid) 136 | edges[oid] = append(edges[oid], info.id) 137 | } 138 | } 139 | 140 | lookup.add(info.id, info.fprint) 141 | 142 | scanned++ 143 | if opts.logSec > 0 && time.Now().Sub(lastLog).Seconds() >= float64(opts.logSec) { 144 | log.Printf("Scanned %d files", scanned) 145 | lastLog = time.Now() 146 | } 147 | 148 | return nil 149 | }); err != nil { 150 | return nil, err 151 | } 152 | 153 | if opts.logSec > 0 { 154 | log.Printf("Finished scanning %d files", scanned) 155 | } 156 | 157 | var groups [][]*fileInfo 158 | GroupLoop: 159 | for _, comp := range components(edges) { 160 | group := make([]*fileInfo, len(comp)) 161 | for i, id := range comp { 162 | info, err := db.get(id, "") 163 | if err != nil { 164 | return nil, fmt.Errorf("getting info for %d: %v", id, err) 165 | } else if info == nil { 166 | return nil, fmt.Errorf("no info for %d", id) 167 | } 168 | group[i] = info 169 | } 170 | // It's possible for a previously-excluded pair to get joined into the same group 171 | // by a newly-added song. Throw the whole group out if any of its members were 172 | // previously excluded. 173 | for i := 0; i < len(group)-1; i++ { 174 | for j := i + 1; j < len(group); j++ { 175 | if ok, err := db.isExcludedPair(group[i].path, group[j].path); err != nil { 176 | return nil, err 177 | } else if ok { 178 | continue GroupLoop 179 | } 180 | } 181 | } 182 | sort.Slice(group, func(i, j int) bool { return group[i].path < group[j].path }) 183 | groups = append(groups, group) 184 | } 185 | sort.Slice(groups, func(i, j int) bool { return groups[i][0].path < groups[j][0].path }) 186 | return groups, nil 187 | } 188 | 189 | // compareFingerprints returns the ratio of identical bits in a and b to the 190 | // total bits in the longer (or shorter if minLength is true) of the two. 191 | // All possible alignments are checked, and the highest ratio is returned. 192 | func compareFingerprints(a, b []uint32, minLength bool) (ratio float64, aoff, boff int) { 193 | if len(a) == 0 || len(b) == 0 { 194 | return 0, 0, 0 195 | } 196 | 197 | count := func(a, b []uint32) int { 198 | var cnt int 199 | for i := 0; i < len(a) && i < len(b); i++ { 200 | cnt += 32 - bits.OnesCount32(a[i]^b[i]) 201 | } 202 | return cnt 203 | } 204 | 205 | best := count(a, b) 206 | for i := 1; i < len(a); i++ { 207 | if cnt := count(a[i:], b); cnt > best { 208 | best = cnt 209 | aoff, boff = i, 0 210 | } 211 | } 212 | for i := 1; i < len(b); i++ { 213 | if cnt := count(a, b[i:]); cnt > best { 214 | best = cnt 215 | aoff, boff = 0, i 216 | } 217 | } 218 | 219 | total := len(a) 220 | if (minLength && len(b) < total) || (!minLength && len(b) > total) { 221 | total = len(b) 222 | } 223 | return float64(best) / float64(32*total), aoff, boff 224 | } 225 | 226 | // components returns all components from the undirected graph described by edges. 227 | func components(edges map[fileID][]fileID) [][]fileID { 228 | visited := make(map[fileID]struct{}) 229 | 230 | var search func(fileID) []fileID 231 | search = func(src fileID) []fileID { 232 | if _, ok := visited[src]; ok { 233 | return nil 234 | } 235 | visited[src] = struct{}{} 236 | comp := []fileID{src} 237 | for _, dst := range edges[src] { 238 | comp = append(comp, search(dst)...) 239 | } 240 | return comp 241 | } 242 | 243 | var comps [][]fileID 244 | for src := range edges { 245 | if _, ok := visited[src]; !ok { 246 | comps = append(comps, search(src)) 247 | } 248 | } 249 | return comps 250 | } 251 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Daniel Erat. 2 | // All rights reserved. 3 | 4 | package main 5 | 6 | import ( 7 | "flag" 8 | "fmt" 9 | "io/ioutil" 10 | "math/bits" 11 | "os" 12 | "os/exec" 13 | "runtime" 14 | "strconv" 15 | "strings" 16 | ) 17 | 18 | var buildVersion = "[non-release]" // injected by create_release.sh 19 | 20 | func main() { 21 | fps := defaultFpcalcSettings() 22 | opts := defaultScanOptions() 23 | 24 | flag.Usage = func() { 25 | fmt.Fprintln(flag.CommandLine.Output(), "Usage: soundalike [flag]... ") 26 | fmt.Fprintln(flag.CommandLine.Output(), "Find duplicate audio files within a directory.") 27 | fmt.Fprintln(flag.CommandLine.Output()) 28 | flag.PrintDefaults() 29 | } 30 | compare := flag.Bool("compare", false, `Compare two files given via positional args instead of scanning directory`+ 31 | "\n(increases -fpcalc-length by default)") 32 | compareInterval := flag.Int("compare-interval", 0, `Score interval for -compare (0 to print overall score)`) 33 | dbPath := flag.String("db", "", `SQLite database file for storing file info (temp file if unset)`) 34 | exclude := flag.Bool("exclude", false, `Update database to exclude files in positional args from being grouped together`) 35 | flag.StringVar(&opts.fileString, "file-regexp", opts.fileString, "Regular expression for audio files") 36 | flag.IntVar(&fps.algorithm, "fpcalc-algorithm", fps.algorithm, `Fingerprint algorithm`) 37 | flag.Float64Var(&fps.chunk, "fpcalc-chunk", fps.chunk, `Audio chunk duration in seconds`) 38 | flag.Float64Var(&fps.length, "fpcalc-length", fps.length, `Max audio duration in seconds to process`) 39 | flag.BoolVar(&fps.overlap, "fpcalc-overlap", fps.overlap, `Overlap audio chunks in fingerprints`) 40 | flag.IntVar(&opts.logSec, "log-sec", opts.logSec, `Logging frequency in seconds (0 or negative to disable logging)`) 41 | flag.Float64Var(&opts.lookupThresh, "lookup-threshold", opts.lookupThresh, `Threshold for lookup table in (0.0, 1.0]`) 42 | flag.Float64Var(&opts.matchThresh, "match-threshold", opts.matchThresh, `Threshold for bitwise comparisons in (0.0, 1.0]`) 43 | flag.BoolVar(&opts.matchMinLength, "match-min-length", opts.matchMinLength, 44 | `Use shorter fingerprint length when scoring bitwise comparisons`) 45 | printFileInfo := flag.Bool("print-file-info", true, `Print file sizes and durations`) 46 | printFullPaths := flag.Bool("print-full-paths", false, `Print absolute file paths (rather than relative to dir)`) 47 | flag.BoolVar(&opts.skipBadFiles, "skip-bad-files", opts.skipBadFiles, `Skip files that can't be fingerprinted by fpcalc`) 48 | flag.BoolVar(&opts.skipNewFiles, "skip-new-files", opts.skipNewFiles, `Skip files not already in database given via -db`) 49 | printVersion := flag.Bool("version", false, `Print version and exit`) 50 | flag.Parse() 51 | 52 | os.Exit(func() int { 53 | if *printVersion { 54 | doVersion() 55 | return 0 56 | } 57 | 58 | // Perform some initial checks before creating the database file. 59 | if *compare { 60 | if flag.NArg() != 2 { 61 | flag.Usage() 62 | return 2 63 | } 64 | } else if *exclude { 65 | if flag.NArg() < 2 { 66 | flag.Usage() 67 | return 2 68 | } 69 | if *dbPath == "" { 70 | fmt.Fprintln(os.Stderr, "-exclude requires -db") 71 | return 2 72 | } 73 | } else { 74 | if flag.NArg() != 1 { 75 | flag.Usage() 76 | return 2 77 | } 78 | opts.dir = flag.Arg(0) 79 | } 80 | if err := opts.finish(); err != nil { 81 | fmt.Fprintln(os.Stderr, err) 82 | return 2 83 | } 84 | 85 | if !haveFpcalc() { 86 | advice := "install from https://github.com/acoustid/chromaprint/releases" 87 | if _, err := exec.LookPath("apt"); err == nil { 88 | advice = "apt install libchromaprint-tools" 89 | } 90 | fmt.Fprintf(os.Stderr, "fpcalc not in path (%v)\n", advice) 91 | return 1 92 | } 93 | 94 | if *compare { 95 | // If -fpcalc-length wasn't specified, make it default to a larger 96 | // value so we'll fingerprint the files in their entirety. 97 | if !flagWasSet("fpcalc-length") { 98 | fps.length = 7200 99 | } 100 | return doCompare(flag.Arg(0), flag.Arg(1), opts, fps, *compareInterval) 101 | } 102 | 103 | if *dbPath == "" { 104 | f, err := ioutil.TempFile("", "soundalike.db.*") 105 | if err != nil { 106 | fmt.Fprintln(os.Stderr, "Failed creating temp file for database:", err) 107 | return 1 108 | } 109 | f.Close() 110 | *dbPath = f.Name() 111 | defer os.Remove(*dbPath) 112 | } 113 | db, err := newAudioDB(*dbPath, fps) 114 | if err != nil { 115 | fmt.Fprintln(os.Stderr, "Failed opening database:", err) 116 | return 1 117 | } 118 | defer func() { 119 | if err := db.close(); err != nil { 120 | fmt.Fprintln(os.Stderr, "Failed closing database:", err) 121 | } 122 | }() 123 | 124 | if *exclude { 125 | // Save all possible pairs within the group. 126 | for i := 0; i < flag.NArg()-1; i++ { 127 | for j := i + 1; j < flag.NArg(); j++ { 128 | if err := db.saveExcludedPair(flag.Arg(i), flag.Arg(j)); err != nil { 129 | fmt.Fprintln(os.Stderr, "Failed saving excluded pair:", err) 130 | return 1 131 | } 132 | } 133 | } 134 | return 0 135 | } 136 | 137 | groups, err := scanFiles(opts, db, fps) 138 | if err != nil { 139 | fmt.Fprintln(os.Stderr, "Failed scanning files:", err) 140 | return 1 141 | } 142 | 143 | var pre string 144 | if *printFullPaths { 145 | pre = opts.dir + "/" 146 | } 147 | for i, infos := range groups { 148 | if i != 0 { 149 | fmt.Println() 150 | } 151 | if *printFileInfo { 152 | for _, ln := range formatFiles(infos, pre) { 153 | fmt.Println(ln) 154 | } 155 | } else { 156 | for _, info := range infos { 157 | fmt.Println(pre + info.path) 158 | } 159 | } 160 | } 161 | 162 | return 0 163 | }()) 164 | } 165 | 166 | // flagWasSet returns true if the specified flag was passed on the command line. 167 | func flagWasSet(name string) bool { 168 | var found bool 169 | flag.Visit(func(f *flag.Flag) { 170 | if f.Name == name { 171 | found = true 172 | } 173 | }) 174 | return found 175 | } 176 | 177 | // doVersion prints the soundalike and fpcalc versions to stdout. 178 | func doVersion() { 179 | fmt.Printf("soundalike version %v compiled with %v for %v/%v\n", 180 | buildVersion, runtime.Version(), runtime.GOOS, runtime.GOARCH) 181 | if ver, err := getFpcalcVersion(); err != nil { 182 | fmt.Printf("Failed getting fpcalc version: %v\n", err) 183 | } else { 184 | fmt.Println(ver) 185 | } 186 | } 187 | 188 | // doCompare compares the files at pa and pb on behalf of the -compare flag. 189 | func doCompare(pa, pb string, opts *scanOptions, fps *fpcalcSettings, interval int) int { 190 | ra, err := runFpcalc(pa, fps) 191 | if err != nil { 192 | fmt.Fprintf(os.Stderr, "Failed fingerprinting %v: %v\n", pa, err) 193 | return 1 194 | } 195 | rb, err := runFpcalc(pb, fps) 196 | if err != nil { 197 | fmt.Fprintf(os.Stderr, "Failed fingerprinting %v: %v\n", pb, err) 198 | return 1 199 | } 200 | score, aoff, boff := compareFingerprints(ra.Fingerprint, rb.Fingerprint, opts.matchMinLength) 201 | if interval <= 0 { 202 | fmt.Printf("%0.3f\n", score) 203 | } else { 204 | if aoff > boff { 205 | fmt.Printf("[%d only in b]\n", aoff-boff) 206 | } else if boff > aoff { 207 | fmt.Printf("[%d only in a]\n", boff-aoff) 208 | } 209 | a := ra.Fingerprint[aoff:] 210 | b := rb.Fingerprint[boff:] 211 | var i, ncmp, nbits int 212 | for ; i < len(a) && i < len(b); i++ { 213 | if i%interval == 0 && ncmp > 0 { 214 | fmt.Printf("%4d: %0.3f\n", i, float64(nbits)/float64(32*ncmp)) 215 | nbits = 0 216 | ncmp = 0 217 | } 218 | nbits += 32 - bits.OnesCount32(a[i]^b[i]) 219 | ncmp++ 220 | } 221 | if ncmp > 0 { 222 | fmt.Printf("%4d: %0.3f\n", i, float64(nbits)/float64(32*ncmp)) 223 | } 224 | if na, nb := len(a), len(b); na > nb { 225 | fmt.Printf("[%d only in a]\n", na-nb) 226 | } else if nb > na { 227 | fmt.Printf("[%d only in b]\n", nb-na) 228 | } 229 | } 230 | return 0 231 | } 232 | 233 | // formatFiles returns column-aligned lines describing each supplied file. 234 | func formatFiles(infos []*fileInfo, pathPrefix string) []string { 235 | if len(infos) == 0 { 236 | return nil 237 | } 238 | 239 | var rows [][]string 240 | lens := make([]int, 3) 241 | for _, info := range infos { 242 | row := []string{ 243 | pathPrefix + info.path, 244 | strconv.FormatFloat(float64(info.size)/(1024*1024), 'f', 2, 64), 245 | strconv.FormatFloat(info.duration, 'f', 2, 64), 246 | } 247 | rows = append(rows, row) 248 | for i, max := range lens { 249 | if ln := len(row[i]); ln > max { 250 | lens[i] = ln 251 | } 252 | } 253 | } 254 | lines := make([]string, len(rows)) 255 | fs := strings.Join([]string{ 256 | "%" + strconv.Itoa(-lens[0]) + "s", // path 257 | "%" + strconv.Itoa(lens[1]) + "s MB", // size 258 | "%" + strconv.Itoa(lens[2]) + "s sec", // duration 259 | }, " ") 260 | for i, row := range rows { 261 | lines[i] = fmt.Sprintf(fs, row[0], row[1], row[2]) 262 | } 263 | return lines 264 | } 265 | --------------------------------------------------------------------------------