├── CONTRIBUTING.md ├── internal ├── dfs │ ├── test_files │ │ ├── fileThree.bin │ │ ├── fileTwo.bin │ │ ├── fileOne.bin │ │ ├── fileFour.bin │ │ └── fileFourCpy.bin │ ├── detectother.go │ ├── detectmacos.go │ ├── detectlinux.go │ ├── detectwin.go │ ├── dfs_test.go │ ├── fsinfo_test.go │ ├── fsinfo.go │ └── dfs.go ├── dwalk │ ├── test_files │ │ ├── fileThree.bin │ │ ├── fileTwo.bin │ │ ├── test_dir │ │ │ ├── file1.txt │ │ │ └── file2.txt │ │ ├── fileFour.bin │ │ ├── fileOne.bin │ │ └── fileFourCpy.bin │ ├── inode_other.go │ ├── inode_unix.go │ ├── dwalk_hardlink_unix_test.go │ ├── dwalk_test.go │ └── dwalk.go ├── dmap │ ├── test_files │ ├── export_other.go │ ├── fileszmap.go │ ├── export_unix.go │ ├── export.go │ ├── dmap.go │ └── dmap_test.go ├── config │ ├── config_test.go │ └── config.go ├── ui │ ├── ui_test.go │ └── ui.go ├── dsklog │ ├── dsklog_test.go │ └── dsklog.go └── bench │ └── bench_test.go ├── ss ├── ss-tui.png ├── dskDitto-ss.png ├── ss-confirm.png ├── ss-pretty.png ├── dskDitto-ss-old.png ├── dskDitto-ss-one.png └── dskDitto-ss-two.png ├── docs ├── ditto.png ├── gnome.png ├── gnome-small.png ├── dskditto_gnome_128.png ├── dskditto_gnome_256.png ├── dskditto_gnome_32.png └── dskditto_gnome_64.png ├── cmd └── dskDitto │ ├── main_test.go │ └── main.go ├── tools ├── xtime.zsh └── genfiles.go ├── .gitignore ├── pkg └── utils │ ├── utils_test.go │ ├── utils.go │ ├── ansiart_test.go │ └── ansiart.go ├── Makefile ├── go.mod ├── README.md ├── LICENSE └── go.sum /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /internal/dfs/test_files/fileThree.bin: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /internal/dfs/test_files/fileTwo.bin: -------------------------------------------------------------------------------- 1 | J0M -------------------------------------------------------------------------------- /internal/dwalk/test_files/fileThree.bin: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /internal/dwalk/test_files/fileTwo.bin: -------------------------------------------------------------------------------- 1 | J0M -------------------------------------------------------------------------------- /internal/dmap/test_files: -------------------------------------------------------------------------------- 1 | ../dwalk/test_files/ -------------------------------------------------------------------------------- /internal/config/config_test.go: -------------------------------------------------------------------------------- 1 | package config 2 | -------------------------------------------------------------------------------- /internal/dwalk/test_files/test_dir/file1.txt: -------------------------------------------------------------------------------- 1 | test1 2 | -------------------------------------------------------------------------------- /internal/dwalk/test_files/test_dir/file2.txt: -------------------------------------------------------------------------------- 1 | test2 2 | -------------------------------------------------------------------------------- /ss/ss-tui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/ss/ss-tui.png -------------------------------------------------------------------------------- /docs/ditto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/docs/ditto.png -------------------------------------------------------------------------------- /docs/gnome.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/docs/gnome.png -------------------------------------------------------------------------------- /ss/dskDitto-ss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/ss/dskDitto-ss.png -------------------------------------------------------------------------------- /ss/ss-confirm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/ss/ss-confirm.png -------------------------------------------------------------------------------- /ss/ss-pretty.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/ss/ss-pretty.png -------------------------------------------------------------------------------- /docs/gnome-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/docs/gnome-small.png -------------------------------------------------------------------------------- /ss/dskDitto-ss-old.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/ss/dskDitto-ss-old.png -------------------------------------------------------------------------------- /ss/dskDitto-ss-one.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/ss/dskDitto-ss-one.png -------------------------------------------------------------------------------- /ss/dskDitto-ss-two.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/ss/dskDitto-ss-two.png -------------------------------------------------------------------------------- /docs/dskditto_gnome_128.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/docs/dskditto_gnome_128.png -------------------------------------------------------------------------------- /docs/dskditto_gnome_256.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/docs/dskditto_gnome_256.png -------------------------------------------------------------------------------- /docs/dskditto_gnome_32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/docs/dskditto_gnome_32.png -------------------------------------------------------------------------------- /docs/dskditto_gnome_64.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/docs/dskditto_gnome_64.png -------------------------------------------------------------------------------- /cmd/dskDitto/main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "testing" 4 | 5 | func TestNewDupView(t *testing.T) { 6 | 7 | } 8 | -------------------------------------------------------------------------------- /internal/dfs/test_files/fileOne.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/internal/dfs/test_files/fileOne.bin -------------------------------------------------------------------------------- /internal/dfs/test_files/fileFour.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/internal/dfs/test_files/fileFour.bin -------------------------------------------------------------------------------- /internal/dwalk/test_files/fileFour.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/internal/dwalk/test_files/fileFour.bin -------------------------------------------------------------------------------- /internal/dwalk/test_files/fileOne.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/internal/dwalk/test_files/fileOne.bin -------------------------------------------------------------------------------- /internal/dfs/test_files/fileFourCpy.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/internal/dfs/test_files/fileFourCpy.bin -------------------------------------------------------------------------------- /internal/dwalk/test_files/fileFourCpy.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdefrancesco/dskDitto/HEAD/internal/dwalk/test_files/fileFourCpy.bin -------------------------------------------------------------------------------- /tools/xtime.zsh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | 3 | # Use gnu-time. macOS time utility isn't as robust it seems. 4 | /opt/homebrew/bin/gtime -f '%Uu %Ss %er %MkB %C' "$@" 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binary name. 2 | *.prof 3 | *.svg 4 | .DS_Store 5 | *.out 6 | *.log 7 | files/ 8 | app.log 9 | bin/ 10 | .vscode/ 11 | testdata/ 12 | test_files/ 13 | 14 | ./bin/dskDitto 15 | bench.test 16 | -------------------------------------------------------------------------------- /internal/dfs/detectother.go: -------------------------------------------------------------------------------- 1 | //go:build !linux && !darwin && !freebsd && !openbsd && !netbsd && !dragonfly && !windows 2 | 3 | package dfs 4 | 5 | import "fmt" 6 | 7 | func detectFilesystem(path string) (string, error) { 8 | return "", fmt.Errorf("filesystem detection not supported on this OS") 9 | } 10 | -------------------------------------------------------------------------------- /internal/dwalk/inode_other.go: -------------------------------------------------------------------------------- 1 | //go:build !unix 2 | 3 | package dwalk 4 | 5 | import "os" 6 | 7 | // fileIdentity is a no-op placeholder on non-Unix platforms where we don't 8 | // currently attempt inode-based hardlink deduplication. 9 | type fileIdentity struct{} 10 | 11 | func getFileIdentity(info os.FileInfo) (fileIdentity, bool) { 12 | return fileIdentity{}, false 13 | } 14 | -------------------------------------------------------------------------------- /internal/dmap/export_other.go: -------------------------------------------------------------------------------- 1 | //go:build !unix 2 | 3 | package dmap 4 | 5 | import ( 6 | "fmt" 7 | "os" 8 | ) 9 | 10 | func openFileSecure(absPath, dirPath, fileName string) (*os.File, error) { 11 | file, err := os.OpenFile(absPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o600) 12 | if err != nil { 13 | return nil, fmt.Errorf("open output file %s: %w", absPath, err) 14 | } 15 | return file, nil 16 | } 17 | -------------------------------------------------------------------------------- /internal/dfs/detectmacos.go: -------------------------------------------------------------------------------- 1 | //go:build darwin || freebsd || openbsd || netbsd || dragonfly 2 | 3 | package dfs 4 | 5 | import ( 6 | "syscall" 7 | ) 8 | 9 | // extract name from fixed-size C array 10 | func bsdNameToString(arr []int8) string { 11 | buf := make([]byte, 0, len(arr)) 12 | for _, c := range arr { 13 | if c == 0 { 14 | break 15 | } 16 | buf = append(buf, byte(c)) 17 | } 18 | return string(buf) 19 | } 20 | 21 | func detectFilesystem(path string) (string, error) { 22 | var stat syscall.Statfs_t 23 | if err := syscall.Statfs(path, &stat); err != nil { 24 | return "", err 25 | } 26 | 27 | return bsdNameToString(stat.Fstypename[:]), nil 28 | } 29 | -------------------------------------------------------------------------------- /internal/ui/ui_test.go: -------------------------------------------------------------------------------- 1 | package ui 2 | 3 | import "testing" 4 | 5 | // TestGenerateConfirmationCodes tests the GenConfirmationCode function 6 | func TestGenerateConfirmationCodes(t *testing.T) { 7 | 8 | for i := range 100 { 9 | code := GenConfirmationCode() 10 | 11 | if len(code) < 5 || len(code) > 8 { 12 | t.Errorf("Generated code length out of bounds: got %d, want between 5 and 8", len(code)) 13 | } 14 | for _, c := range code { 15 | if !((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')) { 16 | t.Errorf("Generated code contains invalid character: %q", c) 17 | } 18 | } 19 | 20 | if i%10 == 0 { 21 | t.Logf("Sample generated code: %s", code) 22 | } 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /internal/dfs/detectlinux.go: -------------------------------------------------------------------------------- 1 | //go:build linux 2 | 3 | package dfs 4 | 5 | import ( 6 | "fmt" 7 | "syscall" 8 | ) 9 | 10 | func detectFilesystem(path string) (string, error) { 11 | var stat syscall.Statfs_t 12 | if err := syscall.Statfs(path, &stat); err != nil { 13 | return "", err 14 | } 15 | 16 | magic := uint32(stat.Type) 17 | 18 | known := map[uint32]string{ 19 | 0xEF53: "ext2/ext3/ext4", 20 | 0x9123683E: "btrfs", 21 | 0x58465342: "xfs", 22 | 0x5346544e: "ntfs", 23 | 0x01021994: "tmpfs", 24 | 0x73717368: "squashfs", 25 | 0x2fc12fc1: "zfs", 26 | 0x62656572: "f2fs", 27 | 0x6462671f: "debugfs", 28 | 0x858458f6: "ramfs", 29 | } 30 | 31 | if fs, ok := known[magic]; ok { 32 | return fs, nil 33 | } 34 | 35 | return fmt.Sprintf("unknown (magic=0x%x)", magic), nil 36 | } 37 | -------------------------------------------------------------------------------- /internal/dfs/detectwin.go: -------------------------------------------------------------------------------- 1 | //go:build windows 2 | 3 | package dfs 4 | 5 | import ( 6 | "path/filepath" 7 | 8 | "golang.org/x/sys/windows" 9 | ) 10 | 11 | func detectFilesystem(path string) (string, error) { 12 | full, err := filepath.Abs(path) 13 | if err != nil { 14 | return "", err 15 | } 16 | 17 | p, err := windows.UTF16PtrFromString(full) 18 | if err != nil { 19 | return "", err 20 | } 21 | 22 | volName := make([]uint16, windows.MAX_PATH) 23 | fsName := make([]uint16, windows.MAX_PATH) 24 | var serial, maxCompLen, flags uint32 25 | 26 | err = windows.GetVolumeInformation( 27 | p, 28 | &volName[0], 29 | uint32(len(volName)), 30 | &serial, 31 | &maxCompLen, 32 | &flags, 33 | &fsName[0], 34 | uint32(len(fsName)), 35 | ) 36 | if err != nil { 37 | return "", err 38 | } 39 | 40 | return windows.UTF16ToString(fsName), nil 41 | } 42 | -------------------------------------------------------------------------------- /internal/dwalk/inode_unix.go: -------------------------------------------------------------------------------- 1 | //go:build unix 2 | 3 | package dwalk 4 | 5 | import ( 6 | "os" 7 | "syscall" 8 | ) 9 | 10 | // fileIdentity uniquely identifies a file by device and inode on Unix systems. 11 | type fileIdentity struct { 12 | dev uint64 13 | ino uint64 14 | } 15 | 16 | // getFileIdentity extracts a stable identity for a regular file based on its 17 | // underlying Stat_t. If the platform or info does not expose this, the 18 | // second return value is false. 19 | func getFileIdentity(info os.FileInfo) (fileIdentity, bool) { 20 | stat, ok := info.Sys().(*syscall.Stat_t) 21 | if !ok || stat == nil { 22 | return fileIdentity{}, false 23 | } 24 | return fileIdentity{ 25 | dev: uint64(stat.Dev), // #nosec G115 -- platform-defined but safely representable in uint64 26 | ino: uint64(stat.Ino), // #nosec G115 -- platform-defined but safely representable in uint64 27 | }, true 28 | } 29 | -------------------------------------------------------------------------------- /internal/dmap/fileszmap.go: -------------------------------------------------------------------------------- 1 | // DFileSize cache is a simple KV store of files and sizes we have seen so far. We won't 2 | // need to hash any file without an entry in the map. 3 | package dmap 4 | 5 | import ( 6 | "fmt" 7 | ) 8 | 9 | type DFileSizeCache struct { 10 | // A small store that keeps file sizes cached so we reference 11 | // it in order to decide if hashing the entire file is necessary. 12 | // i.e if file has size 100, the entry will be the file size as key 13 | // value the number of files with that size. If entry has more than one 14 | // file of specific size we may need to hash things or filter through 15 | // another heuristic. 16 | sizeMap map[uint64]uint64 17 | } 18 | 19 | func NewDFileSizeCache() *DFileSizeCache { 20 | fileCache := &DFileSizeCache{} 21 | 22 | fileCache.sizeMap = make(map[uint64]uint64) 23 | return fileCache 24 | } 25 | 26 | func (b *DFileSizeCache) displayMap() { 27 | fmt.Printf("%v+", b.sizeMap) 28 | } 29 | -------------------------------------------------------------------------------- /internal/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import "github.com/jdefrancesco/dskDitto/internal/dfs" 4 | 5 | type Config struct { 6 | // Skip over empty files. 7 | SkipEmpty bool 8 | // Ignore Symbolic Links. 9 | SkipSymLinks bool 10 | // Ignore Hard Links. 11 | SkipHardLinks bool 12 | // SkipHidden controls whether hidden dotfiles and directories are skipped. 13 | SkipHidden bool 14 | // SkipVirtualFS controls whether well-known virtual filesystem mount points are skipped. 15 | SkipVirtualFS bool 16 | // MaxDepth limits how deeply the walker will recurse into subdirectories. A value of -1 means unlimited. 17 | MaxDepth int 18 | // File size limits. 19 | MinFileSize uint 20 | MaxFileSize uint 21 | // MinDuplicates controls the minimum number of files required for a duplicate group to be surfaced. 22 | MinDuplicates uint 23 | // HashAlgorithm selects which digest is used when hashing file contents. 24 | HashAlgorithm dfs.HashAlgorithm 25 | } 26 | -------------------------------------------------------------------------------- /internal/dmap/export_unix.go: -------------------------------------------------------------------------------- 1 | //go:build unix 2 | 3 | package dmap 4 | 5 | import ( 6 | "fmt" 7 | "os" 8 | "path/filepath" 9 | 10 | "golang.org/x/sys/unix" 11 | ) 12 | 13 | func openFileSecure(absPath, dirPath, fileName string) (*os.File, error) { 14 | cleaned := filepath.Clean(fileName) 15 | if cleaned == "" || cleaned == "." || cleaned == ".." || cleaned != fileName { 16 | return nil, fmt.Errorf("invalid output filename %q", fileName) 17 | } 18 | 19 | // #nosec G304 -- dirPath and fileName are validated and originate from user-supplied path after cleaning 20 | dirHandle, err := os.Open(dirPath) 21 | if err != nil { 22 | return nil, fmt.Errorf("open directory %s: %w", dirPath, err) 23 | } 24 | defer dirHandle.Close() 25 | 26 | fd, err := unix.Openat(int(dirHandle.Fd()), cleaned, unix.O_WRONLY|unix.O_CREAT|unix.O_TRUNC|unix.O_CLOEXEC, 0o600) 27 | if err != nil { 28 | return nil, fmt.Errorf("open output file %s: %w", absPath, err) 29 | } 30 | 31 | return os.NewFile(uintptr(fd), absPath), nil 32 | } 33 | -------------------------------------------------------------------------------- /internal/dwalk/dwalk_hardlink_unix_test.go: -------------------------------------------------------------------------------- 1 | //go:build unix 2 | 3 | package dwalk 4 | 5 | import ( 6 | "os" 7 | "path/filepath" 8 | "testing" 9 | 10 | "github.com/jdefrancesco/dskDitto/internal/config" 11 | "github.com/jdefrancesco/dskDitto/internal/dfs" 12 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 13 | ) 14 | 15 | // TestHardlinkDeduplicationUnix verifies that multiple hardlinks to the same 16 | // inode are treated as a single file by the walker. 17 | func TestHardlinkDeduplicationUnix(t *testing.T) { 18 | dsklog.InitializeDlogger("/dev/null") 19 | 20 | root := t.TempDir() 21 | orig := filepath.Join(root, "orig.txt") 22 | link := filepath.Join(root, "link.txt") 23 | 24 | if err := os.WriteFile(orig, []byte("hello"), 0o644); err != nil { 25 | t.Fatalf("failed to create original file: %v", err) 26 | } 27 | 28 | if err := os.Link(orig, link); err != nil { 29 | // If the platform does not support hard links for some reason, skip. 30 | t.Skipf("hard links not supported: %v", err) 31 | } 32 | 33 | cfg := config.Config{ 34 | HashAlgorithm: dfs.HashSHA256, 35 | SkipVirtualFS: true, 36 | MaxDepth: -1, 37 | } 38 | 39 | paths := collectRelativePaths(t, root, cfg) 40 | if len(paths) != 1 { 41 | t.Fatalf("expected hardlinked files to be treated as one; got %d paths: %v", len(paths), paths) 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /tools/genfiles.go: -------------------------------------------------------------------------------- 1 | //go:build tools 2 | // +build tools 3 | 4 | package main 5 | 6 | import ( 7 | "crypto/rand" 8 | "fmt" 9 | "io" 10 | "os" 11 | "path/filepath" 12 | ) 13 | 14 | var sizes = []int64{ 15 | 1024, // 1KB 16 | 1024 * 1024, // 1MB 17 | 100 * 1024 * 1024, // 100MB 18 | } 19 | 20 | func randBytes(size int64) []byte { 21 | b := make([]byte, size) 22 | _, _ = rand.Read(b) 23 | return b 24 | } 25 | 26 | func createFiles(dir string, n int) error { 27 | fmt.Println("createFiles running...") 28 | for i := 0; i < n; i++ { 29 | size := sizes[i%len(sizes)] 30 | fname := fmt.Sprintf("file_%d.dat", i) 31 | path := filepath.Join(dir, fname) 32 | 33 | fmt.Printf("Creating: %s\n", path) 34 | f, err := os.Create(path) 35 | if err != nil { 36 | return err 37 | } 38 | defer f.Close() 39 | 40 | _, err = io.Copy(f, &randReader{remaining: size}) 41 | if err != nil { 42 | return err 43 | } 44 | } 45 | return nil 46 | } 47 | 48 | type randReader struct { 49 | remaining int64 50 | } 51 | 52 | func (r *randReader) Read(p []byte) (int, error) { 53 | if r.remaining <= 0 { 54 | return 0, io.EOF 55 | } 56 | if int64(len(p)) > r.remaining { 57 | p = p[:r.remaining] 58 | } 59 | n, err := rand.Read(p) 60 | r.remaining -= int64(n) 61 | return n, err 62 | } 63 | 64 | func main() { 65 | dir := "./files" 66 | os.MkdirAll(dir, 0755) 67 | n := 100 68 | if err := createFiles(dir, n); err != nil { 69 | fmt.Fprintf(os.Stderr, "failed: %v\n", err) 70 | os.Exit(1) 71 | } 72 | fmt.Println("Created", n, "test files in", dir) 73 | } 74 | -------------------------------------------------------------------------------- /internal/dsklog/dsklog_test.go: -------------------------------------------------------------------------------- 1 | package dsklog 2 | 3 | import ( 4 | "bytes" 5 | "path/filepath" 6 | "testing" 7 | 8 | "github.com/sirupsen/logrus" 9 | ) 10 | 11 | // Test initialization of the global logger for testing (Dlogger) 12 | func TestGlobalLoggerInitialization(t *testing.T) { 13 | t.Setenv(logLevelEnvVar, "debug") 14 | 15 | logPath := filepath.Join(t.TempDir(), "test.log") 16 | InitializeDlogger(logPath) 17 | Dlogger.Debug("Test message") 18 | 19 | // Ensure logger is not nil 20 | if Dlogger == nil { 21 | t.Fatal("Dlogger is not initialized") 22 | } 23 | 24 | // Verify the log level 25 | if Dlogger.GetLevel() != logrus.DebugLevel { 26 | t.Fatalf("Expected log level to be Debug, got %v", Dlogger.GetLevel()) 27 | } 28 | 29 | // Test logging output to a buffer instead of file 30 | var buf bytes.Buffer 31 | Dlogger.Out = &buf 32 | 33 | // Log something 34 | Dlogger.Debug("Test message") 35 | 36 | // Check the buffer for the logged message 37 | if !bytes.Contains(buf.Bytes(), []byte("Test message")) { 38 | t.Errorf("Expected log message not found in buffer") 39 | } 40 | } 41 | 42 | func TestSetLevel(t *testing.T) { 43 | t.Setenv(logLevelEnvVar, "") 44 | 45 | logPath := filepath.Join(t.TempDir(), "test.log") 46 | InitializeDlogger(logPath) 47 | 48 | if err := SetLevel("error"); err != nil { 49 | t.Fatalf("SetLevel returned error: %v", err) 50 | } 51 | 52 | if Dlogger.GetLevel() != logrus.ErrorLevel { 53 | t.Fatalf("Expected log level to be Error, got %v", Dlogger.GetLevel()) 54 | } 55 | 56 | if err := SetLevel("invalid"); err == nil { 57 | t.Fatalf("SetLevel should fail for invalid level") 58 | } 59 | 60 | if Dlogger.GetLevel() != logrus.ErrorLevel { 61 | t.Fatalf("Log level should remain Error after invalid SetLevel attempt, got %v", Dlogger.GetLevel()) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /pkg/utils/utils_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | // Test the DisplaySize function 9 | func TestDisplaySize(t *testing.T) { 10 | tests := []struct { 11 | bytes uint64 12 | want string 13 | }{ 14 | {0, "0 B"}, 15 | {1, "1 B"}, 16 | {1024, "1.00 KiB"}, 17 | {1024 * 1024, "1.00 MiB"}, 18 | {1024 * 1024 * 1024, "1.00 GiB"}, 19 | {1024 * 1024 * 1024 * 1024, "1.00 TiB"}, 20 | {1024 * 1024 * 1024 * 1024 * 1024, "1.00 PiB"}, 21 | {1024 * 1024 * 1024 * 1024 * 1024 * 1024, "1.00 EiB"}, 22 | {234, "234 B"}, 23 | {200034, "195.35 KiB"}, 24 | } 25 | 26 | for _, test := range tests { 27 | got := DisplaySize(test.bytes) 28 | if got != test.want { 29 | t.Errorf("DisplaySize(%d) = %q; want %q", test.bytes, got, test.want) 30 | } 31 | fmt.Printf("Success. DisplaySize(%d) = %s\n", test.bytes, got) 32 | } 33 | } 34 | 35 | func TestParseSize(t *testing.T) { 36 | tests := []struct { 37 | input string 38 | want uint64 39 | }{ 40 | {"0", 0}, 41 | {"1024", 1024}, 42 | {"1K", KB}, 43 | {"1KB", KB}, 44 | {"1KiB", uint64(KiB)}, 45 | {"1.5G", GB + GB/2}, 46 | {"2Gi", 2 * uint64(GiB)}, 47 | {"2GB", 2 * GB}, 48 | {"750MiB", 750 * uint64(MiB)}, 49 | {"1e3", 1000}, 50 | {"512b", 512}, 51 | {"2 GiB", 2 * uint64(GiB)}, 52 | } 53 | 54 | for _, tc := range tests { 55 | got, err := ParseSize(tc.input) 56 | if err != nil { 57 | t.Fatalf("ParseSize(%q) returned error: %v", tc.input, err) 58 | } 59 | if got != tc.want { 60 | t.Errorf("ParseSize(%q) = %d; want %d", tc.input, got, tc.want) 61 | } 62 | } 63 | 64 | invalid := []string{"", "abc", "-1", "1XB"} 65 | for _, input := range invalid { 66 | if _, err := ParseSize(input); err == nil { 67 | t.Errorf("ParseSize(%q) expected error, got nil", input) 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /internal/dfs/dfs_test.go: -------------------------------------------------------------------------------- 1 | package dfs 2 | 3 | import ( 4 | "path/filepath" 5 | "testing" 6 | ) 7 | 8 | func TestNewDfile(t *testing.T) { 9 | var tests = []struct { 10 | fileName string 11 | fileSize int64 12 | fileHash string 13 | }{ 14 | {"test_files/fileOne.bin", 100, "3fa2a6033f2b531361adf2bf300774fd1b75a5db13828e387d6e4c3c03400d61"}, 15 | {"test_files/fileTwo.bin", 3, "f2e0e2beb73c21338a1dc872cd7b900c24c4547b6d9ae882e02bcd4257ac7bd4"}, 16 | {"test_files/fileThree.bin", 0, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}, 17 | {"test_files/fileFour.bin", 1, "5ee0dd4d4840229fab4a86438efbcaf1b9571af94f5ace5acc94de19e98ea9ab"}, 18 | } 19 | 20 | for _, test := range tests { 21 | df, err := NewDfile(test.fileName, test.fileSize, HashSHA256) 22 | if err != nil { 23 | t.Errorf("Failed to read file %s: %v", test.fileName, err) 24 | } 25 | 26 | fileSize := df.FileSize() 27 | fileName := df.FileName() 28 | fileBaseName := df.BaseName() 29 | fileHash := df.HashString() // Use HashString() for comparison with hex string 30 | 31 | testBaseFileName := filepath.Base(test.fileName) 32 | 33 | testFullFileName, err := filepath.Abs(test.fileName) 34 | if err != nil { 35 | t.Errorf("filepath.Base() error: %s\n", err) 36 | } 37 | 38 | if testFullFileName != fileName { 39 | t.Errorf("testFullFileName want = %s, got = %s\n", testFullFileName, fileName) 40 | } 41 | 42 | if testBaseFileName != fileBaseName { 43 | t.Errorf("t.fileName want = %s, got = %s\n", testBaseFileName, fileBaseName) 44 | } 45 | 46 | if test.fileSize != fileSize { 47 | t.Errorf("t.fileSize want = %d, got = %d\n", test.fileSize, fileSize) 48 | } 49 | 50 | if test.fileHash != fileHash { 51 | t.Errorf("t.fileHash want = %s, got = %s\n", test.fileHash, fileHash) 52 | } 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /internal/dfs/fsinfo_test.go: -------------------------------------------------------------------------------- 1 | package dfs 2 | 3 | import ( 4 | _ "fmt" 5 | "os" 6 | "testing" 7 | 8 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 9 | ) 10 | 11 | // Need to initialize logger 12 | func TestMain(m *testing.M) { 13 | dsklog.InitializeDlogger("/dev/null") 14 | os.Exit(m.Run()) 15 | } 16 | 17 | // Check if we can correctly enumerate disk information. 18 | func TestListFileSystems(t *testing.T) { 19 | ListFileSystems() 20 | } 21 | 22 | func TestGetFileUidGid(t *testing.T) { 23 | test_files := []string{"./test_files/fileOne.bin", 24 | "./test_files/fileTwo.bin"} 25 | 26 | curr_uid := os.Getuid() 27 | // XXX: Need to fix this test and get UID from env variable 28 | for _, test := range test_files { 29 | uid, _ := GetFileUidGid(test) 30 | if uid != curr_uid { 31 | t.Errorf("uid incorrect (%v)", uid) 32 | } 33 | } 34 | } 35 | func TestGetFileSize(t *testing.T) { 36 | // Create a temporary file 37 | tmpFile, err := os.CreateTemp("", "testfile") 38 | if err != nil { 39 | t.Fatalf("failed to create temp file: %v", err) 40 | } 41 | defer os.Remove(tmpFile.Name()) 42 | 43 | // Write some data to the file 44 | data := []byte("hello world") 45 | if _, err := tmpFile.Write(data); err != nil { 46 | t.Fatalf("failed to write to temp file: %v", err) 47 | } 48 | tmpFile.Close() 49 | 50 | // Test: valid file 51 | got := GetFileSize(tmpFile.Name()) 52 | want := uint64(len(data)) 53 | if got != want { 54 | t.Errorf("GetFileSize(%q) = %d, want %d", tmpFile.Name(), got, want) 55 | } 56 | 57 | // Test: empty file name 58 | got = GetFileSize("") 59 | if got != 0 { 60 | t.Errorf("GetFileSize(\"\") = %d, want 0", got) 61 | } 62 | 63 | // Test: non-existent file 64 | got = GetFileSize("non_existent_file.txt") 65 | if got != 0 { 66 | t.Errorf("GetFileSize(non_existent_file.txt) = %d, want 0", got) 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /internal/dsklog/dsklog.go: -------------------------------------------------------------------------------- 1 | // dsklog package is just a simple wrapper around logrus 2 | package dsklog 3 | 4 | import ( 5 | "errors" 6 | "os" 7 | "strings" 8 | 9 | "github.com/sirupsen/logrus" 10 | ) 11 | 12 | // Global logger instance 13 | var Dlogger *logrus.Logger 14 | 15 | // Set log-level via env variable. 16 | const logLevelEnvVar = "DSKDITTO_LOG_LEVEL" 17 | 18 | // InitializeDlogger initializes or resets the global logger (Dlogger) 19 | func InitializeDlogger(logFile string) { 20 | Dlogger = logrus.New() 21 | 22 | // #nosec G304 23 | file, err := os.OpenFile(logFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o600) 24 | if err != nil { 25 | logrus.Fatalf("Failed to open log file: %v", err) 26 | } 27 | 28 | // Set the logger output to the log file 29 | Dlogger.Out = file 30 | // Set the log format (can be JSON or TextFormatter) 31 | Dlogger.SetFormatter(&logrus.TextFormatter{ 32 | FullTimestamp: true, 33 | }) 34 | 35 | level := resolveLevel(os.Getenv(logLevelEnvVar)) 36 | Dlogger.SetLevel(level) 37 | } 38 | 39 | // SetLevel allows callers to adjust the global logger level at runtime. 40 | func SetLevel(level string) error { 41 | if Dlogger == nil { 42 | return errors.New("logger not initialized") 43 | } 44 | 45 | parsed, err := parseLevel(level) 46 | if err != nil { 47 | return err 48 | } 49 | 50 | Dlogger.SetLevel(parsed) 51 | return nil 52 | } 53 | 54 | // resolveLevel calls parseLevel; handling any potential error cases. It helps keep 55 | // the InitializeDlogger a bit cleaner. 56 | func resolveLevel(raw string) logrus.Level { 57 | parsed, err := parseLevel(raw) 58 | if err != nil { 59 | return logrus.InfoLevel 60 | } 61 | return parsed 62 | } 63 | 64 | // parseLevel processes the environment variable string. If no string is found or the log 65 | // level isn't valid we return INFO. 66 | func parseLevel(raw string) (logrus.Level, error) { 67 | // No env variable set. Default to INFO. 68 | if strings.TrimSpace(raw) == "" { 69 | return logrus.InfoLevel, nil 70 | } 71 | 72 | level, err := logrus.ParseLevel(strings.ToLower(strings.TrimSpace(raw))) 73 | if err != nil { 74 | return logrus.InfoLevel, err 75 | } 76 | return level, nil 77 | } 78 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | GOCMD = go 2 | GOBUILD= $(GOCMD) build 3 | GOCLEAN = $(GOCMD) clean 4 | GOTEST = $(GOCMD) test 5 | GOGET = $(GOCMD) get 6 | BINARY_NAME = dskDitto 7 | 8 | BENCH_BIN = ./bin/bench.test 9 | CPU_PROFILE ?= cpu.prof 10 | MEM_PROFILE ?= mem.prof 11 | PROFILE ?= $(CPU_PROFILE) 12 | PPROF_ADDR ?= localhost:6060 13 | 14 | PREFIX = /usr/local/bin 15 | 16 | all: test build 17 | 18 | debug: 19 | # Get rid of exposed profile webserver warning for now. 20 | gosec -exclude=G108,G104 ./... 21 | $(GOBUILD) -o ./bin/$(BINARY_NAME) -v -gcflags "all=-N -l" ./cmd/$(BINARY_NAME) 22 | 23 | build: 24 | gosec -exclude=G104,G108 ./... 25 | go build -o ./bin/dskDitto ./cmd/$(BINARY_NAME) 26 | 27 | .PHONY: build-darwin-arm64 28 | build-darwin-arm64: 29 | GOOS=darwin GOARCH=arm64 go build -o ./bin/dskDitto ./cmd/$(BINARY_NAME) 30 | 31 | .PHONY: test 32 | test: 33 | $(GOTEST) -v ./... 34 | 35 | .PHONY: bench 36 | bench: 37 | $(GOTEST) -bench=. -benchmem ./internal/bench/ 38 | 39 | 40 | .PHONY: bench-build 41 | bench-build: 42 | mkdir -p $(dir $(BENCH_BIN)) 43 | $(GOTEST) -c -o $(BENCH_BIN) ./internal/bench 44 | 45 | .PHONY: bench-profile 46 | bench-profile: bench-build 47 | $(BENCH_BIN) -test.run=^$$ -test.bench=. -test.benchmem -test.cpuprofile=$(CPU_PROFILE) -test.memprofile=$(MEM_PROFILE) 48 | @echo "CPU profile written to $(CPU_PROFILE)" 49 | @echo "Memory profile written to $(MEM_PROFILE)" 50 | @echo "Inspect profiles with: make pprof-web PROFILE=$(CPU_PROFILE)" 51 | 52 | .PHONY: pprof-web 53 | pprof-web: bench-build 54 | @if [ ! -f $(PROFILE) ]; then \ 55 | echo "Profile '$(PROFILE)' not found. Run 'make bench-profile' or set PROFILE=."; \ 56 | exit 1; \ 57 | fi 58 | go tool pprof -http=$(PPROF_ADDR) $(BENCH_BIN) $(PROFILE) 59 | 60 | 61 | .PHONY: gosec 62 | gosec: 63 | gosec -exclude=G104 ./... 64 | 65 | .PHONY: install 66 | install: 67 | cp ./dskDitto $(PREFIX)/dskDitto 68 | 69 | 70 | 71 | .PHONY: clean 72 | clean: 73 | $(GOCLEAN) 74 | @if [ -e ./bin/$(BINARY_NAME) ]; then rm ./bin/$(BINARY_NAME); fi 75 | @if [ -e ./bin/bench.test ]; then rm ./bin/bench.test; fi 76 | @if ls *.log >/dev/null 2>&1; then rm *.log; fi 77 | @if ls *.prof >/dev/null 2>&1; then rm *.prof; fi 78 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/jdefrancesco/dskDitto 2 | 3 | go 1.24.9 4 | 5 | require ( 6 | github.com/charmbracelet/bubbletea v1.3.10 7 | github.com/charmbracelet/lipgloss v1.1.0 8 | github.com/cloudfoundry/gosigar v1.3.112 9 | github.com/mattn/go-runewidth v0.0.19 10 | github.com/pterm/pterm v0.12.82 11 | github.com/sirupsen/logrus v1.9.3 12 | golang.org/x/sync v0.19.0 13 | golang.org/x/sys v0.39.0 14 | lukechampine.com/blake3 v1.4.1 15 | ) 16 | 17 | require ( 18 | atomicgo.dev/cursor v0.2.0 // indirect 19 | atomicgo.dev/keyboard v0.2.9 // indirect 20 | atomicgo.dev/schedule v0.1.0 // indirect 21 | github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect 22 | github.com/charmbracelet/colorprofile v0.3.3 // indirect 23 | github.com/charmbracelet/x/ansi v0.11.2 // indirect 24 | github.com/charmbracelet/x/cellbuf v0.0.14 // indirect 25 | github.com/charmbracelet/x/term v0.2.2 // indirect 26 | github.com/clipperhouse/displaywidth v0.6.1 // indirect 27 | github.com/clipperhouse/stringish v0.1.1 // indirect 28 | github.com/clipperhouse/uax29/v2 v2.3.0 // indirect 29 | github.com/containerd/console v1.0.5 // indirect 30 | github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f // indirect 31 | github.com/google/pprof v0.0.0-20251208000136-3d256cb9ff16 // indirect 32 | github.com/gookit/color v1.6.0 // indirect 33 | github.com/klauspost/cpuid/v2 v2.3.0 // indirect 34 | github.com/lithammer/fuzzysearch v1.1.8 // indirect 35 | github.com/lucasb-eyer/go-colorful v1.3.0 // indirect 36 | github.com/mattn/go-isatty v0.0.20 // indirect 37 | github.com/mattn/go-localereader v0.0.1 // indirect 38 | github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 // indirect 39 | github.com/muesli/cancelreader v0.2.2 // indirect 40 | github.com/muesli/termenv v0.16.0 // indirect 41 | github.com/onsi/ginkgo/v2 v2.27.3 // indirect 42 | github.com/onsi/gomega v1.38.3 // indirect 43 | github.com/rivo/uniseg v0.4.7 // indirect 44 | github.com/sergi/go-diff v1.4.0 // indirect 45 | github.com/stretchr/testify v1.11.1 // indirect 46 | github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect 47 | golang.org/x/exp v0.0.0-20251125195548-87e1e737ad39 // indirect 48 | golang.org/x/net v0.48.0 // indirect 49 | golang.org/x/term v0.38.0 // indirect 50 | golang.org/x/text v0.32.0 // indirect 51 | golang.org/x/tools v0.40.0 // indirect 52 | ) 53 | -------------------------------------------------------------------------------- /internal/dmap/export.go: -------------------------------------------------------------------------------- 1 | package dmap 2 | 3 | import ( 4 | "encoding/csv" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "os" 9 | "path/filepath" 10 | "sort" 11 | "strconv" 12 | 13 | "github.com/jdefrancesco/dskDitto/internal/dfs" 14 | ) 15 | 16 | type exportFile struct { 17 | Path string `json:"path"` 18 | Size uint64 `json:"size"` 19 | } 20 | 21 | type exportGroup struct { 22 | Hash string `json:"hash"` 23 | DuplicateCount int `json:"duplicate_count"` 24 | Files []exportFile `json:"files"` 25 | } 26 | 27 | type exportSummary struct { 28 | GroupCount int `json:"group_count"` 29 | Groups []exportGroup `json:"groups"` 30 | } 31 | 32 | // WriteJSON writes duplicate groups that satisfy the minimum duplicate threshold to a JSON file. 33 | func (d *Dmap) WriteJSON(path string) error { 34 | summary := d.collectExportSummary() 35 | data, err := json.MarshalIndent(summary, "", " ") 36 | if err != nil { 37 | return fmt.Errorf("marshal JSON: %w", err) 38 | } 39 | 40 | file, err := secureOutputFile(path) 41 | if err != nil { 42 | return err 43 | } 44 | defer file.Close() 45 | 46 | if _, err := file.Write(data); err != nil { 47 | return fmt.Errorf("write JSON file %s: %w", path, err) 48 | } 49 | return nil 50 | } 51 | 52 | // WriteCSV writes duplicate groups that satisfy the minimum duplicate threshold to a CSV file. 53 | func (d *Dmap) WriteCSV(path string) error { 54 | summary := d.collectExportSummary() 55 | file, err := secureOutputFile(path) 56 | if err != nil { 57 | return err 58 | } 59 | defer file.Close() 60 | 61 | writer := csv.NewWriter(file) 62 | if err := writer.Write([]string{"hash", "duplicate_count", "path", "size_bytes"}); err != nil { 63 | return fmt.Errorf("write CSV header: %w", err) 64 | } 65 | 66 | for _, group := range summary.Groups { 67 | count := strconv.Itoa(group.DuplicateCount) 68 | for _, f := range group.Files { 69 | if err := writer.Write([]string{group.Hash, count, f.Path, strconv.FormatUint(f.Size, 10)}); err != nil { 70 | return fmt.Errorf("write CSV row: %w", err) 71 | } 72 | } 73 | } 74 | 75 | writer.Flush() 76 | if err := writer.Error(); err != nil { 77 | return fmt.Errorf("flush CSV writer: %w", err) 78 | } 79 | return nil 80 | } 81 | 82 | // collectExportSummary builds an exportSummary containing deduplicated file groups, 83 | // filtered by the minimum duplicate threshold, sorted by hash, and enriched with file sizes. 84 | func (d *Dmap) collectExportSummary() exportSummary { 85 | if d == nil { 86 | return exportSummary{} 87 | } 88 | 89 | type groupData struct { 90 | hash string 91 | files []string 92 | } 93 | 94 | groups := make([]groupData, 0, len(d.filesMap)) 95 | for digest, files := range d.filesMap { 96 | if uint(len(files)) < d.minDuplicates { 97 | continue 98 | } 99 | groups = append(groups, groupData{ 100 | hash: fmt.Sprintf("%x", digest), 101 | files: append([]string(nil), files...), 102 | }) 103 | } 104 | 105 | sort.Slice(groups, func(i, j int) bool { 106 | return groups[i].hash < groups[j].hash 107 | }) 108 | 109 | exportGroups := make([]exportGroup, 0, len(groups)) 110 | for _, g := range groups { 111 | item := exportGroup{ 112 | Hash: g.hash, 113 | DuplicateCount: len(g.files), 114 | Files: make([]exportFile, 0, len(g.files)), 115 | } 116 | for _, path := range g.files { 117 | item.Files = append(item.Files, exportFile{ 118 | Path: path, 119 | Size: dfs.GetFileSize(path), 120 | }) 121 | } 122 | exportGroups = append(exportGroups, item) 123 | } 124 | 125 | return exportSummary{ 126 | GroupCount: len(exportGroups), 127 | Groups: exportGroups, 128 | } 129 | } 130 | 131 | func secureOutputFile(path string) (*os.File, error) { 132 | if path == "" { 133 | return nil, errors.New("output path is empty") 134 | } 135 | 136 | clean := filepath.Clean(path) 137 | abs, err := filepath.Abs(clean) 138 | if err != nil { 139 | return nil, fmt.Errorf("resolve output path %s: %w", path, err) 140 | } 141 | 142 | info, err := os.Stat(abs) 143 | if err == nil { 144 | if info.IsDir() { 145 | return nil, fmt.Errorf("output path %s is a directory", abs) 146 | } 147 | } 148 | 149 | dirPath := filepath.Dir(abs) 150 | base := filepath.Base(abs) 151 | 152 | return openFileSecure(abs, dirPath, base) 153 | } 154 | -------------------------------------------------------------------------------- /internal/dfs/fsinfo.go: -------------------------------------------------------------------------------- 1 | // Utility functions and types foir querying information about 2 | // File System we wish to run unditto on. 3 | package dfs 4 | 5 | import ( 6 | "fmt" 7 | "os" 8 | "path/filepath" 9 | "syscall" 10 | 11 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 12 | 13 | sigar "github.com/cloudfoundry/gosigar" 14 | ) 15 | 16 | const OutputFormat = "%-15s %4s %4s %5s %4s %-15s\n" 17 | 18 | // DFileStat holds our filesystem specific Stat 19 | // information. We are currently only concerned 20 | // with Darwin. 21 | type DFileStat struct { 22 | Dev uint64 23 | Inode uint64 24 | Nlink uint64 25 | Mode uint32 26 | Uid uint32 27 | Gid uint32 28 | Rdev uint64 29 | Size int64 30 | Blksize int64 31 | Blocks int64 32 | } 33 | 34 | // GetDFileState will return Stat information of file. 35 | // This includes UID/Filesize/etc. 36 | func GetDFileStat(info os.FileInfo) (*DFileStat, error) { 37 | if stat, ok := info.Sys().(*syscall.Stat_t); ok { 38 | statDev := stat.Dev 39 | if stat.Dev < 0 || stat.Rdev < 0 { 40 | return nil, fmt.Errorf("device values are negative: dev=%d rdev=%d", stat.Dev, stat.Rdev) 41 | } 42 | return &DFileStat{ 43 | Dev: uint64(statDev), 44 | Inode: stat.Ino, 45 | Nlink: uint64(stat.Nlink), 46 | Mode: uint32(stat.Mode), 47 | Uid: stat.Uid, 48 | Gid: stat.Gid, 49 | Rdev: uint64(stat.Rdev), 50 | Size: stat.Size, 51 | Blksize: int64(stat.Blksize), 52 | Blocks: stat.Blocks, 53 | }, nil 54 | } 55 | 56 | return nil, fmt.Errorf("unable to get file stat for %#v", info) 57 | 58 | } 59 | 60 | // List filesystems on machine 61 | func ListFileSystems() bool { 62 | 63 | fsList := sigar.FileSystemList{} 64 | fsList.Get() 65 | 66 | fmt.Fprintf(os.Stdout, OutputFormat, 67 | "Filesystem", "Size", "Used", "Avail", "Use%", "Mounted On") 68 | 69 | for _, fs := range fsList.List { 70 | dirName := fs.DirName 71 | usage := sigar.FileSystemUsage{} 72 | usage.Get(dirName) 73 | 74 | fmt.Fprintf(os.Stdout, OutputFormat, 75 | fs.DevName, 76 | formatSize(usage.Total), 77 | formatSize(usage.Used), 78 | formatSize(usage.Avail), 79 | sigar.FormatPercent(usage.UsePercent()), 80 | dirName) 81 | 82 | } 83 | 84 | // For our Example test harness 85 | return true 86 | } 87 | 88 | // formatSize will make out sizes more human friendly 89 | func formatSize(size uint64) string { 90 | return sigar.FormatSize(size * 1024) 91 | } 92 | 93 | // GetFileUidGid calls Stat on a function and grabs the Uid and Gid 94 | // of a file. 95 | func GetFileUidGid(filename string) (Uid, Gid int) { 96 | 97 | info, err := os.Stat(filename) 98 | if err != nil { 99 | err := fmt.Errorf("error os.Stat %v", err) 100 | fmt.Println(err) 101 | return -1, -1 102 | } 103 | stat, ok := info.Sys().(*syscall.Stat_t) 104 | if !ok { 105 | fmt.Println("error calling Stat") 106 | return -1, -1 107 | } 108 | 109 | Uid = int(stat.Uid) 110 | Gid = int(stat.Gid) 111 | return Uid, Gid 112 | } 113 | 114 | // Check if we have proper permissions for investigating 115 | // a file. Performs additional fast safety checks to avoid 116 | // symlink traversal and non-regular special files. 117 | // TODO: These additional checks need to be refactored out. Not sure what I was 118 | // 119 | // thinking when I put them there. 120 | func CheckFilePerms(path string) bool { 121 | cleanPath := filepath.Clean(path) 122 | absPath, err := filepath.Abs(cleanPath) 123 | if err != nil { 124 | return false 125 | } 126 | 127 | // Fast path checks without following symlinks. 128 | fi, err := os.Lstat(absPath) 129 | if err != nil { 130 | return false 131 | } 132 | // Disallow symlinks 133 | if fi.Mode()&os.ModeSymlink != 0 { 134 | return false 135 | } 136 | // Only allow regular files (exclude sockets, devices, pipes, etc.) 137 | if !fi.Mode().IsRegular() { 138 | return false 139 | } 140 | 141 | // #nosec G304 142 | // Use O_NOFOLLOW for additional safety (avoid TOCTOU on symlinks) 143 | fd, err := syscall.Open(absPath, syscall.O_RDONLY|syscall.O_NOFOLLOW, 0) 144 | if err != nil { 145 | return false 146 | } 147 | _ = syscall.Close(fd) 148 | return true 149 | } 150 | 151 | // GetFileSize will return size of file in bytes. 152 | // If file name isn't provided we return zero. Will 153 | // refactor later for better error handling. 154 | func GetFileSize(file_name string) uint64 { 155 | if len(file_name) == 0 { 156 | dsklog.Dlogger.Warn("Empty file name provided") 157 | return 0 158 | } 159 | 160 | file, err := os.Stat(file_name) 161 | if err != nil { 162 | dsklog.Dlogger.Warnf("Error calling os.Stat on %s: %v", file_name, err) 163 | return 0 164 | } 165 | 166 | size := file.Size() 167 | // This shouldn't happen. 168 | if size < 0 { 169 | return 0 170 | } 171 | return uint64(size) 172 | } 173 | -------------------------------------------------------------------------------- /internal/dfs/dfs.go: -------------------------------------------------------------------------------- 1 | package dfs 2 | 3 | import ( 4 | "crypto/sha256" 5 | "errors" 6 | "fmt" 7 | "hash" 8 | "io" 9 | "os" 10 | "path/filepath" 11 | "strings" 12 | "sync" 13 | 14 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 15 | 16 | "lukechampine.com/blake3" 17 | ) 18 | 19 | // For now this will be our max open-file descriptor limit. This value 20 | // is used by hashing function semaphore logic. 21 | // TODO: Make tuneable. 22 | const OpenFileDescLimMax = 2048 23 | 24 | // HashAlgorithm identifies which digest to use when hashing files. 25 | type HashAlgorithm string 26 | 27 | // XXX: Removal of Blake3 until I find issue with implementation or my usage. 28 | const ( 29 | HashSHA256 HashAlgorithm = "sha256" 30 | HashBLAKE3 HashAlgorithm = "blake3" 31 | ) 32 | 33 | // Dfile structure will describe a given file. We 34 | // only care about the few file properties that will 35 | // allow us to detect a duplicate. 36 | type Dfile struct { 37 | fileName string 38 | fileSize int64 39 | algo HashAlgorithm 40 | fileHash [32]byte 41 | } 42 | 43 | // New creates a new Dfile. 44 | func NewDfile(fName string, fSize int64, algo HashAlgorithm) (*Dfile, error) { 45 | 46 | if fName == "" { 47 | fmt.Printf("File name must be specified\n") 48 | return nil, errors.New("file name needs to be specified") 49 | } 50 | 51 | fullFileName, err := filepath.Abs(fName) 52 | if err != nil { 53 | fmt.Printf("couldn't get absolute filename for %s\n", fName) 54 | return nil, err 55 | } 56 | 57 | d := &Dfile{ 58 | fileName: fullFileName, 59 | fileSize: fSize, 60 | algo: algo, 61 | } 62 | 63 | if err = d.hashFile(); err != nil { 64 | return d, errors.New("failed to hash file") 65 | } 66 | 67 | dsklog.Dlogger.Debugf("Hash algorithm chosen is %s", d.algo) 68 | return d, nil 69 | } 70 | 71 | // FileName will return the name of the file currently described by the dfile 72 | func (d *Dfile) FileName() string { return d.fileName } 73 | 74 | // BaseName returns the base filename only instead of the full pathname. 75 | func (d *Dfile) BaseName() string { return filepath.Base(d.fileName) } 76 | 77 | // FileSize will return the size of the file described by dfile object. 78 | func (d *Dfile) FileSize() int64 { return d.fileSize } 79 | 80 | // Algorithm returns the hashing algorithm used to create this Dfile. 81 | func (d *Dfile) Algorithm() HashAlgorithm { return d.algo } 82 | 83 | // GetHash will return hash bytes as fixed-size array. 84 | func (d *Dfile) Hash() [32]byte { return d.fileHash } 85 | 86 | // GetHashString will return SHA256 Hash as hex string for display purposes. 87 | func (d *Dfile) HashString() string { return fmt.Sprintf("%x", d.fileHash) } 88 | 89 | // GetPerms will give us UNIX permissions we need to ensure we can access 90 | // a file. 91 | func (d *Dfile) GetPerms() string { 92 | return "" 93 | } 94 | 95 | // Semaphore that controls how many open file descriptors we can have at once.. 96 | var sema = make(chan struct{}, OpenFileDescLimMax) 97 | 98 | // We want to re-use a pool of buffers to make things easier on GC. We hash files 99 | // in quick succession. Instead of creating a new buffer for each file we can re-use what we have 100 | // available. 101 | var bufPool = sync.Pool{ 102 | New: func() any { 103 | var arr [1 << 20]byte 104 | return &arr 105 | }, 106 | } 107 | 108 | func (d *Dfile) hashFile() error { 109 | sema <- struct{}{} 110 | defer func() { <-sema }() 111 | 112 | bufPtr := bufPool.Get().(*[1 << 20]byte) 113 | defer bufPool.Put(bufPtr) 114 | 115 | f, err := os.Open(d.fileName) 116 | if err != nil { 117 | return fmt.Errorf("failed to open file %s: %w", d.fileName, err) 118 | } 119 | defer f.Close() 120 | 121 | h, err := newHash(d.algo) 122 | if err != nil { 123 | return err 124 | } 125 | 126 | if _, err := io.CopyBuffer(h, f, bufPtr[:]); err != nil { 127 | return fmt.Errorf("failed to copy file %s into hash buffer for processing: %w", d.fileName, err) 128 | } 129 | 130 | sum := h.Sum(nil) 131 | copy(d.fileHash[:], sum) 132 | return nil 133 | } 134 | 135 | func newHash(algo HashAlgorithm) (hash.Hash, error) { 136 | switch algo { 137 | case HashBLAKE3: 138 | return blake3.New(32, nil), nil 139 | case HashSHA256, "": 140 | return sha256.New(), nil 141 | default: 142 | return nil, fmt.Errorf("unsupported hash algorithm: %s", algo) 143 | } 144 | } 145 | 146 | // ParseHashAlgorithm returns the supported hash algorithm constant for the supplied string. 147 | func ParseHashAlgorithm(name string) (HashAlgorithm, error) { 148 | switch HashAlgorithm(strings.ToLower(name)) { 149 | case HashSHA256, "": 150 | return HashSHA256, nil 151 | case HashBLAKE3: 152 | return HashBLAKE3, nil 153 | default: 154 | return "", fmt.Errorf("unsupported hash algorithm %q", name) 155 | } 156 | } 157 | 158 | // DetectFilesystem returns the filesystem type for the path. 159 | func DetectFilesystem(path string) (string, error) { 160 | return detectFilesystem(path) 161 | } 162 | -------------------------------------------------------------------------------- /pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "strings" 8 | "unicode" 9 | ) 10 | 11 | // Our size constants. Powers of two! 12 | const ( 13 | _ = iota 14 | KiB = 1 << (10 * iota) 15 | MiB 16 | GiB 17 | TiB 18 | PiB 19 | EiB 20 | ) 21 | 22 | const ( 23 | KB uint64 = 1000 24 | MB = KB * 1000 25 | GB = MB * 1000 26 | TB = GB * 1000 27 | PB = TB * 1000 28 | EB = PB * 1000 29 | ) 30 | 31 | var sizeSuffixMultipliers = map[string]uint64{ 32 | "": 1, 33 | "b": 1, 34 | "byte": 1, 35 | "bytes": 1, 36 | "k": KB, 37 | "kb": KB, 38 | "kbyte": KB, 39 | "kbytes": KB, 40 | "kilobyte": KB, 41 | "kilobytes": KB, 42 | "m": MB, 43 | "mb": MB, 44 | "mbyte": MB, 45 | "mbytes": MB, 46 | "megabyte": MB, 47 | "megabytes": MB, 48 | "g": GB, 49 | "gb": GB, 50 | "gbyte": GB, 51 | "gbytes": GB, 52 | "gigabyte": GB, 53 | "gigabytes": GB, 54 | "t": TB, 55 | "tb": TB, 56 | "terabyte": TB, 57 | "terabytes": TB, 58 | "p": PB, 59 | "pb": PB, 60 | "petabyte": PB, 61 | "petabytes": PB, 62 | "e": EB, 63 | "eb": EB, 64 | "exabyte": EB, 65 | "exabytes": EB, 66 | "ki": uint64(KiB), 67 | "kib": uint64(KiB), 68 | "kibi": uint64(KiB), 69 | "kibibyte": uint64(KiB), 70 | "kibibytes": uint64(KiB), 71 | "mi": uint64(MiB), 72 | "mib": uint64(MiB), 73 | "mibi": uint64(MiB), 74 | "mibibyte": uint64(MiB), 75 | "mibibytes": uint64(MiB), 76 | "gi": uint64(GiB), 77 | "gib": uint64(GiB), 78 | "gibi": uint64(GiB), 79 | "gibibyte": uint64(GiB), 80 | "gibibytes": uint64(GiB), 81 | "ti": uint64(TiB), 82 | "tib": uint64(TiB), 83 | "tibi": uint64(TiB), 84 | "tibibyte": uint64(TiB), 85 | "tibibytes": uint64(TiB), 86 | "pi": uint64(PiB), 87 | "pib": uint64(PiB), 88 | "pibi": uint64(PiB), 89 | "pibibyte": uint64(PiB), 90 | "pibibytes": uint64(PiB), 91 | "ei": uint64(EiB), 92 | "eib": uint64(EiB), 93 | "eibi": uint64(EiB), 94 | "eibibyte": uint64(EiB), 95 | "eibibytes": uint64(EiB), 96 | } 97 | 98 | // ParseSize converts human-readable size strings (e.g. "10M", "4GiB", "1.5T") to bytes. 99 | // Supported suffixes default to binary multiples (KiB, MiB, GiB, etc.). 100 | func ParseSize(input string) (uint64, error) { 101 | trimmed := strings.TrimSpace(input) 102 | if trimmed == "" { 103 | return 0, fmt.Errorf("size string is empty") 104 | } 105 | 106 | normalized := strings.ToLower(trimmed) 107 | normalized = strings.ReplaceAll(normalized, " ", "") 108 | normalized = strings.ReplaceAll(normalized, "_", "") 109 | normalized = strings.ReplaceAll(normalized, ",", "") 110 | 111 | if f, err := strconv.ParseFloat(normalized, 64); err == nil { 112 | if f < 0 { 113 | return 0, fmt.Errorf("size must be non-negative: %s", input) 114 | } 115 | if f > float64(math.MaxUint64) { 116 | return 0, fmt.Errorf("size %s overflows uint64", input) 117 | } 118 | return uint64(f), nil 119 | } 120 | 121 | if strings.HasPrefix(normalized, "-") { 122 | return 0, fmt.Errorf("size must be non-negative: %s", input) 123 | } 124 | 125 | normalized = strings.TrimPrefix(normalized, "+") 126 | 127 | if normalized == "" { 128 | return 0, fmt.Errorf("size string is empty") 129 | } 130 | 131 | idx := 0 132 | for idx < len(normalized) { 133 | r := normalized[idx] 134 | if (r >= '0' && r <= '9') || r == '.' { 135 | idx++ 136 | continue 137 | } 138 | break 139 | } 140 | 141 | numPart := normalized[:idx] 142 | suffix := normalized[idx:] 143 | if numPart == "" { 144 | return 0, fmt.Errorf("invalid size %q", input) 145 | } 146 | 147 | value, err := strconv.ParseFloat(numPart, 64) 148 | if err != nil { 149 | return 0, fmt.Errorf("invalid size %q: %w", input, err) 150 | } 151 | 152 | multiplier, err := lookupMultiplier(suffix) 153 | if err != nil { 154 | return 0, err 155 | } 156 | 157 | product := value * float64(multiplier) 158 | if product < 0 || product > float64(math.MaxUint64) { 159 | return 0, fmt.Errorf("size %s overflows uint64", input) 160 | } 161 | if math.IsInf(product, 1) || math.IsNaN(product) { 162 | return 0, fmt.Errorf("size %s is not a finite number", input) 163 | } 164 | 165 | return uint64(product), nil 166 | } 167 | 168 | func lookupMultiplier(suffix string) (uint64, error) { 169 | candidates := []string{ 170 | suffix, 171 | strings.TrimSuffix(suffix, "s"), 172 | strings.TrimSuffix(suffix, "byte"), 173 | strings.TrimSuffix(suffix, "bytes"), 174 | strings.TrimSuffix(suffix, "b"), 175 | } 176 | 177 | for _, candidate := range candidates { 178 | if multiplier, ok := sizeSuffixMultipliers[candidate]; ok { 179 | return multiplier, nil 180 | } 181 | } 182 | 183 | if suffix == "" { 184 | return 1, nil 185 | } 186 | 187 | return 0, fmt.Errorf("unknown size suffix %q", suffix) 188 | } 189 | 190 | // DisplaySize takes a number of bytes and returns a human-readable string 191 | func DisplaySize(bytes uint64) string { 192 | 193 | switch { 194 | case bytes < KiB: 195 | return fmt.Sprintf("%d B", bytes) 196 | case bytes < MiB: 197 | return fmt.Sprintf("%.2f KiB", float64(bytes)/float64(KiB)) 198 | case bytes < GiB: 199 | return fmt.Sprintf("%.2f MiB", float64(bytes)/float64(MiB)) 200 | case bytes < TiB: 201 | return fmt.Sprintf("%.2f GiB", float64(bytes)/float64(GiB)) 202 | case bytes < PiB: 203 | return fmt.Sprintf("%.2f TiB", float64(bytes)/float64(TiB)) 204 | case bytes < EiB: 205 | return fmt.Sprintf("%.2f PiB", float64(bytes)/float64(PiB)) 206 | default: 207 | return fmt.Sprintf("%.2f EiB", float64(bytes)/float64(EiB)) 208 | } 209 | } 210 | 211 | // IsAlphanumeric checks if a rune is alphanumeric (letter or digit) 212 | func IsAlphanumeric(r rune) bool { 213 | return unicode.IsLetter(r) || unicode.IsDigit(r) 214 | } 215 | -------------------------------------------------------------------------------- /internal/dwalk/dwalk_test.go: -------------------------------------------------------------------------------- 1 | package dwalk 2 | 3 | import ( 4 | "bytes" 5 | "context" 6 | "fmt" 7 | "os" 8 | "path/filepath" 9 | "sort" 10 | "testing" 11 | "time" 12 | 13 | "github.com/jdefrancesco/dskDitto/internal/config" 14 | "github.com/jdefrancesco/dskDitto/internal/dfs" 15 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 16 | ) 17 | 18 | // Test basic walking... 19 | func TestNewDWalk(t *testing.T) { 20 | 21 | // Initialize logger for testing 22 | dsklog.InitializeDlogger("test.log") 23 | 24 | rootDirs := []string{"test_files"} 25 | 26 | dFiles := make(chan *dfs.Dfile) 27 | walker := NewDWalker(rootDirs, dFiles, config.Config{SkipHidden: true, SkipVirtualFS: true, HashAlgorithm: dfs.HashSHA256, MaxDepth: -1}) 28 | 29 | // walker 30 | ctx, cancel := context.WithCancel(context.Background()) 31 | defer cancel() 32 | walker.Run(ctx) 33 | 34 | var nfiles int64 35 | tick := time.Tick(500 * time.Millisecond) 36 | 37 | loop: 38 | for { 39 | 40 | select { 41 | case _, ok := <-dFiles: 42 | if !ok { 43 | break loop 44 | } 45 | // Test dir and subdirs should only have 14 files 46 | nfiles++ 47 | 48 | case <-tick: 49 | fmt.Println("Tick...") 50 | 51 | } 52 | } 53 | 54 | fmt.Printf("%d files\n", nfiles) 55 | 56 | } 57 | 58 | func TestSkipHiddenFiles(t *testing.T) { 59 | // Ensure logger is initialized once for tests. 60 | dsklog.InitializeDlogger("/dev/null") 61 | 62 | t.Run("SkipHidden", func(t *testing.T) { 63 | names := collectFiles(t, true) 64 | if contains(names, ".hidden.txt") { 65 | t.Fatalf("expected hidden file to be skipped, names=%v", names) 66 | } 67 | if !contains(names, "visible.txt") { 68 | t.Fatalf("expected visible file to be processed, names=%v", names) 69 | } 70 | }) 71 | 72 | t.Run("IncludeHidden", func(t *testing.T) { 73 | names := collectFiles(t, false) 74 | if !contains(names, ".hidden.txt") { 75 | t.Fatalf("expected hidden file to be included when skipHidden=false, names=%v", names) 76 | } 77 | if !contains(names, "visible.txt") { 78 | t.Fatalf("expected visible file to be processed, names=%v", names) 79 | } 80 | }) 81 | } 82 | 83 | func collectFiles(t *testing.T, skipHidden bool) []string { 84 | t.Helper() 85 | 86 | dir := t.TempDir() 87 | 88 | visible := filepath.Join(dir, "visible.txt") 89 | hidden := filepath.Join(dir, ".hidden.txt") 90 | 91 | if err := os.WriteFile(visible, []byte("visible"), 0o644); err != nil { 92 | t.Fatalf("failed to write visible file: %v", err) 93 | } 94 | if err := os.WriteFile(hidden, []byte("hidden"), 0o644); err != nil { 95 | t.Fatalf("failed to write hidden file: %v", err) 96 | } 97 | 98 | dFiles := make(chan *dfs.Dfile, 4) 99 | walker := NewDWalker([]string{dir}, dFiles, config.Config{SkipHidden: skipHidden, SkipVirtualFS: true, HashAlgorithm: dfs.HashSHA256, MaxDepth: -1}) 100 | ctx, cancel := context.WithCancel(context.Background()) 101 | defer cancel() 102 | 103 | walker.Run(ctx) 104 | 105 | var names []string 106 | for df := range dFiles { 107 | names = append(names, filepath.Base(df.FileName())) 108 | } 109 | return names 110 | } 111 | 112 | func contains(list []string, target string) bool { 113 | for _, item := range list { 114 | if item == target { 115 | return true 116 | } 117 | } 118 | return false 119 | } 120 | 121 | func TestSkipVirtualFSToggle(t *testing.T) { 122 | dsklog.InitializeDlogger("/dev/null") 123 | 124 | dFiles := make(chan *dfs.Dfile) 125 | 126 | walkerSkip := NewDWalker([]string{"/"}, dFiles, config.Config{SkipVirtualFS: true, HashAlgorithm: dfs.HashSHA256, MaxDepth: -1}) 127 | if !walkerSkip.shouldSkipDir("/proc") { 128 | t.Fatalf("expected /proc to be skipped when SkipVirtualFS is true") 129 | } 130 | 131 | walkerInclude := NewDWalker([]string{"/"}, dFiles, config.Config{SkipVirtualFS: false, HashAlgorithm: dfs.HashSHA256, MaxDepth: -1}) 132 | if walkerInclude.shouldSkipDir("/proc") { 133 | t.Fatalf("expected /proc not to be skipped when SkipVirtualFS is false") 134 | } 135 | } 136 | 137 | func TestMaxDepthLimit(t *testing.T) { 138 | dsklog.InitializeDlogger("/dev/null") 139 | 140 | root := t.TempDir() 141 | level1 := filepath.Join(root, "level1") 142 | level2 := filepath.Join(level1, "level2") 143 | 144 | if err := os.MkdirAll(level2, 0o755); err != nil { 145 | t.Fatalf("failed to create directories: %v", err) 146 | } 147 | 148 | files := []struct { 149 | path string 150 | data string 151 | }{ 152 | {filepath.Join(root, "root.txt"), "root"}, 153 | {filepath.Join(level1, "one.txt"), "level1"}, 154 | {filepath.Join(level2, "two.txt"), "level2"}, 155 | } 156 | 157 | for _, f := range files { 158 | if err := os.WriteFile(f.path, []byte(f.data), 0o644); err != nil { 159 | t.Fatalf("failed to write %s: %v", f.path, err) 160 | } 161 | } 162 | 163 | collect := func(depth int) []string { 164 | cfg := config.Config{ 165 | HashAlgorithm: dfs.HashSHA256, 166 | SkipVirtualFS: true, 167 | MaxDepth: depth, 168 | } 169 | return collectRelativePaths(t, root, cfg) 170 | } 171 | 172 | all := collect(-1) 173 | expectPathsEqual(t, all, []string{"level1/level2/two.txt", "level1/one.txt", "root.txt"}) 174 | 175 | depth0 := collect(0) 176 | expectPathsEqual(t, depth0, []string{"root.txt"}) 177 | 178 | depth1 := collect(1) 179 | expectPathsEqual(t, depth1, []string{"level1/one.txt", "root.txt"}) 180 | } 181 | 182 | func TestMaxFileSizeLimit(t *testing.T) { 183 | dsklog.InitializeDlogger("/dev/null") 184 | 185 | root := t.TempDir() 186 | small := filepath.Join(root, "small.dat") 187 | large := filepath.Join(root, "large.dat") 188 | 189 | if err := os.WriteFile(small, bytes.Repeat([]byte("a"), 1024), 0o644); err != nil { 190 | t.Fatalf("failed to create small file: %v", err) 191 | } 192 | if err := os.WriteFile(large, bytes.Repeat([]byte("b"), 5*1024*1024), 0o644); err != nil { 193 | t.Fatalf("failed to create large file: %v", err) 194 | } 195 | 196 | cfg := config.Config{ 197 | HashAlgorithm: dfs.HashSHA256, 198 | SkipVirtualFS: true, 199 | MaxFileSize: 2048, // 2 KiB 200 | } 201 | 202 | paths := collectRelativePaths(t, root, cfg) 203 | expectPathsEqual(t, paths, []string{"small.dat"}) 204 | } 205 | 206 | func collectRelativePaths(t *testing.T, root string, cfg config.Config) []string { 207 | t.Helper() 208 | 209 | dFiles := make(chan *dfs.Dfile, 16) 210 | walker := NewDWalker([]string{root}, dFiles, cfg) 211 | ctx, cancel := context.WithCancel(context.Background()) 212 | defer cancel() 213 | 214 | walker.Run(ctx) 215 | 216 | var names []string 217 | for df := range dFiles { 218 | rel, err := filepath.Rel(root, df.FileName()) 219 | if err != nil { 220 | t.Fatalf("failed to compute relative path: %v", err) 221 | } 222 | names = append(names, filepath.ToSlash(rel)) 223 | } 224 | 225 | sort.Strings(names) 226 | return names 227 | } 228 | 229 | func expectPathsEqual(t *testing.T, got []string, want []string) { 230 | t.Helper() 231 | if len(got) != len(want) { 232 | t.Fatalf("unexpected path count: got %d want %d (values=%v)", len(got), len(want), got) 233 | } 234 | for i := range want { 235 | if got[i] != want[i] { 236 | t.Fatalf("unexpected paths: got %v want %v", got, want) 237 | } 238 | } 239 | } 240 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dskDitto 2 | 3 | [![Go Reference](https://pkg.go.dev/badge/github.com/jdefrancesco/dskDitto.svg)](https://pkg.go.dev/github.com/jdefrancesco/dskDitto) 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/jdefrancesco/dskDitto)](https://goreportcard.com/report/github.com/jdefrancesco/dskDitto) 5 | 6 | ![dskDitto gnome logo](./docs/gnome-small.png) 7 | 8 | `dskDitto` is a fast, parallel duplicate-file detector with an optional interactive terminal UI that lets you review, keep, or safely delete redundant files. 9 | 10 | ## Features 11 | 12 | - Concurrent directory walker tuned for large trees and multi-core systems 13 | - Pluggable hashing with SHA-256 (default) or BLAKE3 14 | - Multiple output modes: TUI, bullet lists, or text-friendly dumps 15 | - CSV and JSON output supported via flags. 16 | - Optional automated duplicate removal with confirmation safety rails 17 | - Profiling toggles and micro-benchmarks for power users 18 | 19 | ## Install 20 | 21 | Install straight from source using Go 1.22+: 22 | 23 | ```bash 24 | go install github.com/jdefrancesco/dskDitto/cmd/dskDitto@latest 25 | ``` 26 | 27 | This drops the binary at `$(go env GOPATH)/bin/dskDitto` (or `~/go/bin` by default). 28 | 29 | Prefer cloning? Build locally via Make: 30 | 31 | ```bash 32 | git clone https://github.com/jdefrancesco/dskDitto 33 | cd dskDitto 34 | make 35 | ``` 36 | 37 | The resulting binary lives in `bin/dskDitto`. Add it to your `$PATH` or run it from the repo root. 38 | 39 | ## Usage 40 | 41 | ```bash 42 | dskDitto [options] PATH... 43 | ``` 44 | 45 | Common flags: 46 | 47 | | Flag | Description | 48 | | ---- | ----------- | 49 | | `--min-size ` | Ignore files smaller than the provided size | 50 | | `--max-size ` | Skip files larger than the provided size (default 4 GiB) | 51 | | `--hidden` | Include dot files and dot-directories | 52 | | `--no-symlinks` | Skip symbolic links | 53 | | `--empty` | Include zero-byte files | 54 | | `--include-vfs` | Include virtual filesystem directories such as `/proc` or `/dev` | 55 | | `--no-recurse` | Restrict the scan to the provided paths only | 56 | | `--depth ` | Limit recursion to `` directories below the starting paths | 57 | | `--text`, `--bullet` | Render duplicates without launching the TUI | 58 | | `--remove ` | Operate on duplicates, keeping the first `` entries per group | 59 | | `--link` | With `--remove`, convert extra duplicates to symlinks instead of deleting them | 60 | | `--hash ` | Select hash algorithm: `sha256` (default) or `blake3` | 61 | 62 | Press `Ctrl+C` at any time to abort a scan. When duplicates are removed or converted, a confirmation dialog prevents accidental mass changes. 63 | 64 | ### Duplicate removal and symlink conversion 65 | 66 | `dskDitto` never deletes or rewrites anything unless you explicitly ask it to with `--remove`. 67 | 68 | - **Dry / interactive modes:** by default (or with `--text` / `--bullet`) the tool only reports duplicates. 69 | - **Delete extras:** use `--remove ` to delete all but `` files in each duplicate group. 70 | - **Convert extras to symlinks:** combine `--remove --link` to replace extra duplicates with symlinks pointing at one kept file per group. 71 | 72 | On Unix-like systems, multiple hard links to the same underlying file are treated as a single entry during scanning: `dskDitto` hashes the content once and does not report those hard-link paths as separate space-wasting duplicates. 73 | 74 | When using `--link`, the on-disk layout after the operation looks like this for a group of 3 identical files and `--remove 1 --link`: 75 | 76 | ```text 77 | /path/to/keep/file.txt # original file kept 78 | /path/to/dup/file-copy.txt -> /path/to/keep/file.txt (symlink) 79 | /another/location/file.txt -> /path/to/keep/file.txt (symlink) 80 | ``` 81 | 82 | In the TUI, files that are symlinks are annotated with a `[symlink]` suffix so you can see which entries were converted. 83 | 84 | ### Hash algorithms 85 | 86 | By default, `dskDitto` uses SHA-256 for content hashing: 87 | 88 | - **SHA-256 (`--hash sha256`)**: conservative, widely-supported choice with strong collision guarantees. 89 | - **BLAKE3 (`--hash blake3`)**: Under many circumstances this is significantly faster on modern CPUs. However, on macOS `SHA256` is fine tuned and out performs `BLAKE3` most of the time. Thus, we leave `SHA-256` as the default for now. 90 | 91 | 92 | ## Examples 93 | 94 | Scan your home directory and interactively review duplicates: 95 | 96 | ```bash 97 | dskDitto $HOME 98 | ``` 99 | 100 | List duplicates for scripting or grepping, without launching the TUI: 101 | 102 | ```bash 103 | dskDitto --text ~/Pictures ~/Movies | grep "\.jpg$" 104 | ``` 105 | 106 | Find and safely delete duplicates larger than 100 MiB, keeping one copy per group: 107 | 108 | ```bash 109 | dskDitto --min-size 100MiB --remove 1 /mnt/big-disk 110 | ``` 111 | 112 | Shrink a media library by converting duplicates into symlinks instead of deleting them: 113 | 114 | ```bash 115 | dskDitto --remove 1 --link ~/Media 116 | ``` 117 | 118 | Export duplicate information to CSV or JSON for offline analysis: 119 | 120 | ```bash 121 | dskDitto --csv-out dupes.csv ~/Photos 122 | dskDitto --json-out dupes.json ~/Projects 123 | ``` 124 | 125 | ### Recipes 126 | 127 | - **Clean a downloads folder but keep one copy of each installer:** 128 | 129 | ```bash 130 | dskDitto --min-size 10MiB --remove 1 ~/Downloads 131 | ``` 132 | 133 | - **Deduplicate a photo drive while preserving directory layout with symlinks:** 134 | 135 | ```bash 136 | dskDitto --remove 1 --link /Volumes/photo-archive 137 | ``` 138 | 139 | - **Hunt for big redundant media files only:** 140 | 141 | ```bash 142 | dskDitto --min-size 500MiB --text ~/Movies ~/TV 143 | ``` 144 | 145 | - **Use BLAKE3** 146 | 147 | >*NOTE:* On *macOS*, `Blake3` will actually perform **worse** than `SHA256` hence, we leave it as default for time being. `Blake3's` implementation may improve in the future, possibly out performing `SHA256`. 148 | 149 | ```bash 150 | dskDitto --hash blake3 --min-size 10MiB --text /mnt/big-disk 151 | ``` 152 | 153 | - **Feed duplicate groups into another tool via CSV:** 154 | 155 | ```bash 156 | dskDitto --csv-out dupes.csv /data 157 | ``` 158 | 159 | ## Configuration 160 | 161 | - **Log level:** set `DSKDITTO_LOG_LEVEL` to `debug`, `info`, `warn`, etc. 162 | - **Default options:** wrap `dskDitto` in a shell alias or script with your favorite defaults. 163 | - **Profiling:** supply `--pprof host:port` to expose Go's `pprof` endpoints while the tool runs. 164 | 165 | ## Screenshots 166 | 167 | ### `dskDitto` rendered as a table 168 | 169 | ![Screenshot: pretty table output](./ss/ss-pretty.png) 170 | 171 | ### TUI for interactively selecting files to remove or keep 172 | 173 | ![Screenshot: interactive TUI](./ss/ss-tui.png) 174 | 175 | ### Confirmation window keeps you from deleting the wrong files 176 | 177 | ![Confirmation dialog screenshot](./ss/ss-confirm.png) 178 | 179 | ### Legacy UI shots 180 | 181 | ![Legacy screenshot 3](./ss/dskDitto-ss-one.png) 182 | 183 | ![Legacy screenshot 4](./ss/dskDitto-ss-two.png) 184 | 185 | ## Development 186 | 187 | ```bash 188 | make debug # Create development build 189 | make test # go test ./... 190 | make bench # run benchmarks (adds -benchmem) 191 | make bench-profile # capture cpu.prof and mem.prof into the repo root 192 | make pprof-web # launch go tool pprof with HTTP UI for the latest profile 193 | ``` 194 | 195 | ## Contributing 196 | 197 | Issues and PRs are welcome. Open an issue if you have ideas for improvements, new output modes, or performance tweaks. 198 | 199 | ## License 200 | 201 | This project is released under the Apache license. See [`LICENSE`](LICENSE) for details. 202 | -------------------------------------------------------------------------------- /internal/dmap/dmap.go: -------------------------------------------------------------------------------- 1 | // Implement our primary data structure Dmap. 2 | // 3 | // Dmap will be a hash map with roughly the following simple structure: 4 | // 5 | // { HashDigest --> [fileClone1, fileClone2, etc...]} 6 | // 7 | // That is, SHA256 hash of file will serve as our hash map key, which maps to a simple list of file names. 8 | package dmap 9 | 10 | import ( 11 | "encoding/hex" 12 | "errors" 13 | "fmt" 14 | "math" 15 | "os" 16 | 17 | "github.com/jdefrancesco/dskDitto/internal/dfs" 18 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 19 | 20 | "github.com/pterm/pterm" 21 | ) 22 | 23 | type Digest [32]byte 24 | 25 | // DigestFromHex converts a hex string to Digest 26 | func DigestFromHex(hexStr string) (Digest, error) { 27 | var hash Digest 28 | bytes, err := hex.DecodeString(hexStr) 29 | if err != nil { 30 | return hash, err 31 | } 32 | if len(bytes) != 32 { 33 | return hash, fmt.Errorf("invalid hash length: expected 32 bytes, got %d", len(bytes)) 34 | } 35 | copy(hash[:], bytes) 36 | return hash, nil 37 | } 38 | 39 | // Initial size of our map. This will grow, but reasonable starting size helps performance. 40 | const mapInitSize = 4096 41 | 42 | // Dmap structure will hold our file duplication data. 43 | // It is the primary data structure that will house the results 44 | // that will eventually be returned to the user. 45 | type Dmap struct { 46 | // Primary map structure. 47 | filesMap map[Digest][]string 48 | 49 | // Files deffered for reasons such as size are stored here for later processing. 50 | deferredFiles []string 51 | // Number of files in our map. 52 | fileCount uint 53 | // Batches of duplicate files. 54 | // batchCount uint 55 | minDuplicates uint 56 | } 57 | 58 | // NewDmap returns a new Dmap structure. 59 | func NewDmap(minDuplicates uint) (*Dmap, error) { 60 | 61 | dmap := &Dmap{ 62 | fileCount: 0, 63 | minDuplicates: minDuplicates, 64 | } 65 | if dmap.minDuplicates < 2 { 66 | dmap.minDuplicates = 2 67 | } 68 | // Initialize our map. 69 | dmap.filesMap = make(map[Digest][]string, mapInitSize) 70 | dsklog.Dlogger.Debug("Dmap created with initial size: ", mapInitSize) 71 | 72 | return dmap, nil 73 | } 74 | 75 | // Add will take a dfile and add it the map. 76 | func (d *Dmap) Add(dfile *dfs.Dfile) { 77 | hash := Digest(dfile.Hash()) 78 | d.filesMap[hash] = append(d.filesMap[hash], dfile.FileName()) 79 | d.fileCount++ 80 | } 81 | 82 | // AddDeferredFile will add a file to the deferredFiles slice. 83 | func (d *Dmap) AddDeferredFile(file string) { 84 | if file == "" { 85 | return 86 | } 87 | d.deferredFiles = append(d.deferredFiles, file) 88 | } 89 | 90 | // PrintDmap will print entries currently stored in map in more text friendly way. 91 | func (d *Dmap) PrintDmap() { 92 | for k, v := range d.filesMap { 93 | if uint(len(v)) < d.minDuplicates { 94 | continue 95 | } 96 | hash := fmt.Sprintf("%x", k) 97 | fmt.Printf("Hash: %s \n", hash) 98 | for i, f := range v { 99 | fmt.Printf(" %d: %s \n", i+1, f) 100 | } 101 | fmt.Printf("\n\n") 102 | } 103 | } 104 | 105 | // ShowResultsBullet will display duplicates held in our Dmap as 106 | // a bullet list.. 107 | func (d *Dmap) ShowResultsBullet() { 108 | 109 | var bl []pterm.BulletListItem 110 | for hash, files := range d.filesMap { 111 | 112 | if uint(len(files)) < d.minDuplicates { 113 | continue 114 | } 115 | h := fmt.Sprintf("%x", hash) 116 | pterm.Println(pterm.Green("Hash: ") + pterm.Cyan(h)) 117 | for _, f := range files { 118 | blContent := pterm.BulletListItem{Level: 0, Text: f} 119 | bl = append(bl, blContent) 120 | } 121 | pterm.DefaultBulletList.WithItems(bl).Render() 122 | bl = nil 123 | } 124 | 125 | } 126 | 127 | func (d *Dmap) IsEmpty() bool { 128 | return d.MapSize() == 0 129 | } 130 | 131 | // MapSize returns number of entries in the map. 132 | func (d *Dmap) MapSize() int { 133 | return len(d.filesMap) 134 | } 135 | 136 | // FileCount will return the number of files our map currently 137 | // references. 138 | func (d *Dmap) FileCount() uint { 139 | return d.fileCount 140 | } 141 | 142 | // Get will get slice of files associated with hash. 143 | func (d *Dmap) Get(hash Digest) (files []string, err error) { 144 | res, ok := d.filesMap[hash] 145 | if !ok { 146 | return []string{}, err 147 | } 148 | 149 | return res, nil 150 | } 151 | 152 | // GetMap will return the map. 153 | func (d *Dmap) GetMap() map[Digest][]string { 154 | return d.filesMap 155 | } 156 | 157 | // MinDuplicates returns the current threshold for displaying duplicate groups. 158 | func (d *Dmap) MinDuplicates() uint { 159 | return d.minDuplicates 160 | } 161 | 162 | // RemoveDuplicates removes duplicates, leaving at most "keep" files per group. Returns removed file paths. 163 | func (d *Dmap) RemoveDuplicates(keep uint) ([]string, error) { 164 | if keep == 0 { 165 | return nil, errors.New("keep count must be greater than zero") 166 | } 167 | 168 | // Guard against integer overflow 169 | if keep > uint(math.MaxInt) { 170 | dsklog.Dlogger.Debug("keep value overflow") 171 | return nil, fmt.Errorf("keep count of %d exceeds maximum %d", keep, math.MaxInt) 172 | } 173 | keepThreshold := int(keep) 174 | 175 | var removed []string 176 | var errs []error 177 | 178 | for hash, files := range d.filesMap { 179 | if uint(len(files)) <= keep { 180 | continue 181 | } 182 | 183 | keepCount := keepThreshold 184 | if keepCount > len(files) { 185 | keepCount = len(files) 186 | } 187 | 188 | survivors := append([]string(nil), files[:keepCount]...) 189 | 190 | for _, path := range files[keepCount:] { 191 | if err := os.Remove(path); err != nil { 192 | errs = append(errs, fmt.Errorf("remove %s: %w", path, err)) 193 | survivors = append(survivors, path) 194 | continue 195 | } 196 | dsklog.Dlogger.Infof("Removed duplicate file: %s", path) 197 | removed = append(removed, path) 198 | if d.fileCount > 0 { 199 | d.fileCount-- 200 | } 201 | } 202 | 203 | if len(survivors) == 0 { 204 | delete(d.filesMap, hash) 205 | continue 206 | } 207 | 208 | d.filesMap[hash] = survivors 209 | } 210 | 211 | if len(errs) > 0 { 212 | return removed, errors.Join(errs...) 213 | } 214 | 215 | return removed, nil 216 | } 217 | 218 | // LinkDuplicates converts duplicates to symbolic links, leaving at most "keep" real files per group. 219 | // It removes each extra duplicate and recreates it as a symlink pointing to one of the kept files. 220 | // It returns the paths that were successfully converted to symlinks. 221 | func (d *Dmap) LinkDuplicates(keep uint) ([]string, error) { 222 | if keep == 0 { 223 | return nil, errors.New("keep count must be greater than zero") 224 | } 225 | 226 | // Guard against integer overflow 227 | if keep > uint(math.MaxInt) { 228 | dsklog.Dlogger.Debug("keep value overflow") 229 | return nil, fmt.Errorf("keep count of %d exceeds maximum %d", keep, math.MaxInt) 230 | } 231 | keepThreshold := int(keep) 232 | 233 | var linked []string 234 | var errs []error 235 | 236 | for hash, files := range d.filesMap { 237 | if uint(len(files)) <= keep { 238 | continue 239 | } 240 | 241 | keepCount := keepThreshold 242 | if keepCount > len(files) { 243 | keepCount = len(files) 244 | } 245 | 246 | // Survivors remain as real files. We point all converted symlinks at the first survivor. 247 | survivors := append([]string(nil), files[:keepCount]...) 248 | target := survivors[0] 249 | 250 | for _, path := range files[keepCount:] { 251 | if err := os.Remove(path); err != nil { 252 | errs = append(errs, fmt.Errorf("remove %s: %w", path, err)) 253 | survivors = append(survivors, path) 254 | continue 255 | } 256 | if err := os.Symlink(target, path); err != nil { 257 | errs = append(errs, fmt.Errorf("symlink %s -> %s: %w", path, target, err)) 258 | // Try to preserve logical membership if the symlink creation fails. 259 | survivors = append(survivors, path) 260 | continue 261 | } 262 | dsklog.Dlogger.Infof("Converted duplicate to symlink: %s -> %s", path, target) 263 | linked = append(linked, path) 264 | } 265 | 266 | if len(survivors) == 0 { 267 | delete(d.filesMap, hash) 268 | continue 269 | } 270 | 271 | d.filesMap[hash] = survivors 272 | } 273 | 274 | if len(errs) > 0 { 275 | return linked, errors.Join(errs...) 276 | } 277 | 278 | return linked, nil 279 | } 280 | -------------------------------------------------------------------------------- /pkg/utils/ansiart_test.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "image" 7 | "image/color" 8 | "io" 9 | "math" 10 | "os" 11 | "strconv" 12 | "strings" 13 | "testing" 14 | ) 15 | 16 | func TestResizeNearest(t *testing.T) { 17 | src := image.NewRGBA(image.Rect(0, 0, 2, 2)) 18 | src.Set(0, 0, color.RGBA{10, 0, 0, 255}) 19 | src.Set(1, 0, color.RGBA{0, 20, 0, 255}) 20 | src.Set(0, 1, color.RGBA{0, 0, 30, 255}) 21 | src.Set(1, 1, color.RGBA{40, 50, 60, 255}) 22 | 23 | dst := resizeNearest(src, 4, 4) 24 | 25 | if got, want := dst.Bounds().Dx(), 4; got != want { 26 | t.Fatalf("unexpected width: got %d want %d", got, want) 27 | } 28 | if got, want := dst.Bounds().Dy(), 4; got != want { 29 | t.Fatalf("unexpected height: got %d want %d", got, want) 30 | } 31 | 32 | assertColorClose(t, color.RGBAModel.Convert(dst.At(0, 0)).(color.RGBA), src.RGBAAt(0, 0)) 33 | assertColorClose(t, color.RGBAModel.Convert(dst.At(3, 3)).(color.RGBA), src.RGBAAt(1, 1)) 34 | } 35 | 36 | func TestApplyShadow(t *testing.T) { 37 | src := image.NewRGBA(image.Rect(0, 0, 5, 5)) 38 | baseColor := color.RGBA{100, 150, 200, 255} 39 | src.Set(1, 1, baseColor) 40 | 41 | out := applyShadow(src) 42 | 43 | if got := color.RGBAModel.Convert(out.At(1, 1)).(color.RGBA); got != baseColor { 44 | t.Fatalf("original pixel altered: got %v want %v", got, baseColor) 45 | } 46 | 47 | shadowPixel := out.At(3, 2) 48 | r, g, b, a := shadowPixel.RGBA() 49 | if a == 0 { 50 | t.Fatalf("expected shadow pixel alpha to be >0, got 0") 51 | } 52 | if r == 0 && g == 0 && b == 0 { 53 | t.Fatalf("expected shadow pixel to carry color information, got %v", shadowPixel) 54 | } 55 | } 56 | 57 | func TestQuantizeWithDither(t *testing.T) { 58 | got := quantizeWithDither(128, 2, 0, 0) 59 | const want = uint8(64) 60 | if got != want { 61 | t.Fatalf("unexpected quantized value: got %d want %d", got, want) 62 | } 63 | } 64 | 65 | func TestRenderImageANSI(t *testing.T) { 66 | img := image.NewRGBA(image.Rect(0, 0, 1, 2)) 67 | img.Set(0, 0, color.RGBA{255, 0, 0, 255}) 68 | img.Set(0, 1, color.RGBA{0, 0, 255, 255}) 69 | 70 | opts := Options{Width: 1, EnableShadow: false, PaletteBits: 8} 71 | output := renderImageANSIToString(t, img, opts) 72 | var topR, topG, topB, bottomR, bottomG, bottomB int 73 | format := "\x1b[38;2;%d;%d;%dm\x1b[48;2;%d;%d;%dm▀\x1b[0m\n\x1b[0m" 74 | if _, err := fmt.Sscanf(output, format, &topR, &topG, &topB, &bottomR, &bottomG, &bottomB); err != nil { 75 | t.Fatalf("unexpected ANSI sequence: %q (parse error: %v)", output, err) 76 | } 77 | if topR < 240 || topG != 0 || topB != 0 { 78 | t.Fatalf("unexpected top color values: R=%d G=%d B=%d", topR, topG, topB) 79 | } 80 | if bottomR != 0 || bottomG != 0 || bottomB < 240 { 81 | t.Fatalf("unexpected bottom color values: R=%d G=%d B=%d", bottomR, bottomG, bottomB) 82 | } 83 | } 84 | 85 | func TestRenderImageANSIWithPNG(t *testing.T) { 86 | img := mustLoadFixtureImageByName(t, "image.png") 87 | opts := Options{Width: 80, EnableShadow: false, PaletteBits: 8} 88 | output := renderImageANSIToString(t, img, opts) 89 | t.Logf("\n%s", output) 90 | if !strings.Contains(output, "▀") { 91 | t.Fatalf("expected output to contain block character, got %q", output) 92 | } 93 | 94 | expectedLines := expectedLineCount(img, opts) 95 | 96 | actualLines := strings.Count(output, "\n") 97 | if actualLines != expectedLines { 98 | t.Fatalf("unexpected line count: got %d want %d", actualLines, expectedLines) 99 | } 100 | 101 | } 102 | 103 | func TestRenderImageANSIWithWarmPalette(t *testing.T) { 104 | img := mustLoadFixtureImageByName(t, "warm_palette.png") 105 | opts := Options{Width: 80, EnableShadow: false, PaletteBits: 8} 106 | output := renderImageANSIToString(t, img, opts) 107 | 108 | colors := parseFGColors(output) 109 | if len(colors) == 0 { 110 | t.Fatalf("expected foreground colors in output") 111 | } 112 | 113 | var minR, maxR, minG, maxG, minB, maxB int = 255, 0, 255, 0, 255, 0 114 | for _, c := range colors { 115 | if c[0] < minR { 116 | minR = c[0] 117 | } 118 | if c[0] > maxR { 119 | maxR = c[0] 120 | } 121 | if c[1] < minG { 122 | minG = c[1] 123 | } 124 | if c[1] > maxG { 125 | maxG = c[1] 126 | } 127 | if c[2] < minB { 128 | minB = c[2] 129 | } 130 | if c[2] > maxB { 131 | maxB = c[2] 132 | } 133 | } 134 | 135 | if maxR < 220 || maxR-minR < 30 { 136 | t.Fatalf("expected warm reds with some variation, got range %d-%d", minR, maxR) 137 | } 138 | if maxG < 80 || minG > 70 { 139 | t.Fatalf("expected green range around warm palette, got %d-%d", minG, maxG) 140 | } 141 | if maxB > 150 || minB < 20 { 142 | t.Fatalf("expected constrained blues, got %d-%d", minB, maxB) 143 | } 144 | } 145 | 146 | func TestRenderImageANSIDithering(t *testing.T) { 147 | img := generateGradientImage(32, 32) 148 | 149 | high := renderImageANSIToString(t, img, Options{Width: 32, EnableShadow: false, PaletteBits: 8}) 150 | low := renderImageANSIToString(t, img, Options{Width: 32, EnableShadow: false, PaletteBits: 3}) 151 | 152 | if high == low { 153 | t.Fatalf("expected quantization to change ANSI output") 154 | } 155 | 156 | highColors := parseFGColorSet(high) 157 | lowColors := parseFGColorSet(low) 158 | if len(lowColors) >= len(highColors) { 159 | t.Fatalf("expected fewer unique colors with reduced palette: high=%d low=%d", len(highColors), len(lowColors)) 160 | } 161 | } 162 | 163 | func TestRenderImageANSIToTerminal(t *testing.T) { 164 | if _, ok := os.LookupEnv("DSKDITTO_PRINT_ANSI_ART"); !ok { 165 | t.Skip("set DSKDITTO_PRINT_ANSI_ART=1 to view ANSI art output") 166 | } 167 | 168 | tty, err := os.OpenFile("/dev/tty", os.O_WRONLY, 0) 169 | if err != nil { 170 | t.Skipf("unable to open /dev/tty: %v", err) 171 | } 172 | defer tty.Close() 173 | 174 | origStdout := os.Stdout 175 | defer func() { os.Stdout = origStdout }() 176 | os.Stdout = tty 177 | 178 | img := mustLoadFixtureImageByName(t, "image.png") 179 | opts := Options{Width: 80, EnableShadow: false, PaletteBits: 8} 180 | RenderImageANSI(img, opts) 181 | fmt.Fprintln(tty) 182 | } 183 | 184 | func expectedLineCount(img image.Image, opts Options) int { 185 | b := img.Bounds() 186 | width := b.Dx() 187 | height := b.Dy() 188 | 189 | outW := opts.Width 190 | if outW <= 0 || outW > width { 191 | outW = width 192 | } 193 | 194 | scale := float64(outW) / float64(width) 195 | outH := int(math.Round(float64(height) * scale)) 196 | if outH%2 != 0 { 197 | outH++ 198 | } 199 | 200 | resized := resizeNearest(img, outW, outH) 201 | if opts.EnableShadow { 202 | resized = applyShadow(resized) 203 | } 204 | cropped := cropOpaqueRegion(resized) 205 | croppedH := cropped.Bounds().Dy() 206 | if croppedH%2 != 0 { 207 | croppedH++ 208 | } 209 | return croppedH / 2 210 | } 211 | 212 | func assertColorClose(t *testing.T, got, want color.RGBA) { 213 | t.Helper() 214 | if abs(int(got.R)-int(want.R)) > 3 || abs(int(got.G)-int(want.G)) > 3 || abs(int(got.B)-int(want.B)) > 3 || abs(int(got.A)-int(want.A)) > 3 { 215 | t.Fatalf("color mismatch: got %v want %v", got, want) 216 | } 217 | } 218 | 219 | func abs(v int) int { 220 | if v < 0 { 221 | return -v 222 | } 223 | return v 224 | } 225 | 226 | func renderImageANSIToString(t testing.TB, img image.Image, opts Options) string { 227 | t.Helper() 228 | 229 | r, w, err := os.Pipe() 230 | if err != nil { 231 | t.Fatalf("failed to create pipe: %v", err) 232 | } 233 | defer r.Close() 234 | 235 | origStdout := os.Stdout 236 | defer func() { os.Stdout = origStdout }() 237 | os.Stdout = w 238 | 239 | var buf bytes.Buffer 240 | copyDone := make(chan error, 1) 241 | go func() { 242 | _, err := io.Copy(&buf, r) 243 | copyDone <- err 244 | }() 245 | 246 | RenderImageANSI(img, opts) 247 | 248 | w.Close() 249 | if err := <-copyDone; err != nil { 250 | t.Fatalf("failed to read render output: %v", err) 251 | } 252 | 253 | return buf.String() 254 | } 255 | 256 | func mustLoadFixtureImageByName(t testing.TB, name string) image.Image { 257 | t.Helper() 258 | img, err := loadFixtureImage(name) 259 | if err != nil { 260 | t.Fatalf("failed to load png fixture %s: %v", name, err) 261 | } 262 | return img 263 | } 264 | 265 | func loadFixtureImage(name string) (image.Image, error) { 266 | f, err := os.Open("testdata/" + name) 267 | if err != nil { 268 | return nil, err 269 | } 270 | defer f.Close() 271 | 272 | img, _, err := image.Decode(f) 273 | if err != nil { 274 | return nil, err 275 | } 276 | return img, nil 277 | } 278 | 279 | func parseFGColors(output string) [][]int { 280 | parts := strings.Split(output, "\x1b[38;2;") 281 | colors := make([][]int, 0, len(parts)) 282 | for _, part := range parts[1:] { 283 | end := strings.Index(part, "m") 284 | if end == -1 { 285 | continue 286 | } 287 | triplet := strings.Split(part[:end], ";") 288 | if len(triplet) < 3 { 289 | continue 290 | } 291 | r, err1 := strconv.Atoi(triplet[0]) 292 | g, err2 := strconv.Atoi(triplet[1]) 293 | b, err3 := strconv.Atoi(triplet[2]) 294 | if err1 != nil || err2 != nil || err3 != nil { 295 | continue 296 | } 297 | colors = append(colors, []int{r, g, b}) 298 | } 299 | return colors 300 | } 301 | 302 | func parseFGColorSet(output string) map[string]struct{} { 303 | colors := make(map[string]struct{}) 304 | parts := strings.Split(output, "\x1b[38;2;") 305 | for _, part := range parts[1:] { 306 | end := strings.Index(part, "m") 307 | if end == -1 { 308 | continue 309 | } 310 | colors[part[:end]] = struct{}{} 311 | } 312 | return colors 313 | } 314 | 315 | func generateGradientImage(w, h int) image.Image { 316 | img := image.NewNRGBA(image.Rect(0, 0, w, h)) 317 | for y := 0; y < h; y++ { 318 | for x := 0; x < w; x++ { 319 | img.Set(x, y, color.NRGBA{ 320 | R: uint8((x * 255) / (w - 1)), 321 | G: uint8((y * 255) / (h - 1)), 322 | B: uint8(((x + y) * 255) / (w + h - 2)), 323 | A: 255, 324 | }) 325 | } 326 | } 327 | return img 328 | } 329 | -------------------------------------------------------------------------------- /internal/dwalk/dwalk.go: -------------------------------------------------------------------------------- 1 | // dwalk is a parallel, fast directory walker written for the needs of dskditto 2 | package dwalk 3 | 4 | import ( 5 | "context" 6 | "os" 7 | "path/filepath" 8 | "runtime" 9 | "strings" 10 | "sync" 11 | 12 | "github.com/jdefrancesco/dskDitto/internal/config" 13 | "github.com/jdefrancesco/dskDitto/internal/dfs" 14 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 15 | 16 | "golang.org/x/sync/semaphore" 17 | ) 18 | 19 | const ( 20 | MAX_FILE_SIZE uint = 1024 * 1024 * 1024 * 4 // 4GB 21 | DEFAULT_DIR_CONCURRENCY = 50 // Optimal balance for directory reading 22 | ) 23 | 24 | // These are generally special VFS directories a user normally wouldn't want to recurse into nor touch. 25 | // In future versions we can expand user config abilities. 26 | var defaultSkipDirPrefixes = []string{ 27 | "/proc", 28 | "/sys", 29 | "/dev", 30 | "/run", 31 | "/var/run", 32 | } 33 | 34 | // DWalk is our primary object for traversing filesystem 35 | // in a parallel manner. 36 | type DWalk struct { 37 | rootDirs []string 38 | wg sync.WaitGroup 39 | 40 | // Channel used to communicate with main monitor goroutine. 41 | dFiles chan<- *dfs.Dfile 42 | sem *semaphore.Weighted 43 | skipHidden bool 44 | skipEmpty bool 45 | skipSymLinks bool 46 | minFileSize uint 47 | maxFileSize uint 48 | hashAlgo dfs.HashAlgorithm 49 | skipDirPrefixes []string 50 | maxDepth int 51 | 52 | // seenFiles tracks unique files by device+inode so multiple hardlinks 53 | // are treated as a single file during scanning. 54 | seenMu sync.Mutex 55 | seenFiles map[fileIdentity]struct{} 56 | } 57 | 58 | // NewDWalker returns a new DWalk instance that accepts traversal options. 59 | // TODO: Fix blake3 support. Currently only SHA256 is properly implemented. 60 | func NewDWalker(rootDirs []string, dFiles chan<- *dfs.Dfile, cfg config.Config) *DWalk { 61 | 62 | hashAlgo := cfg.HashAlgorithm 63 | if hashAlgo == "" { 64 | hashAlgo = dfs.HashSHA256 65 | } 66 | 67 | maxSize := cfg.MaxFileSize 68 | if maxSize == 0 { 69 | maxSize = MAX_FILE_SIZE 70 | } 71 | 72 | maxDepth := cfg.MaxDepth 73 | if maxDepth < 0 { 74 | maxDepth = -1 75 | } 76 | 77 | var skipPrefixes []string 78 | if cfg.SkipVirtualFS { 79 | skipPrefixes = normalizeSkipPrefixes(defaultSkipDirPrefixes) 80 | } 81 | 82 | walker := &DWalk{ 83 | rootDirs: rootDirs, 84 | dFiles: dFiles, 85 | skipHidden: cfg.SkipHidden, 86 | skipEmpty: cfg.SkipEmpty, 87 | skipSymLinks: cfg.SkipSymLinks, 88 | minFileSize: cfg.MinFileSize, 89 | maxFileSize: maxSize, 90 | hashAlgo: hashAlgo, 91 | skipDirPrefixes: skipPrefixes, 92 | maxDepth: maxDepth, 93 | seenFiles: make(map[fileIdentity]struct{}), 94 | } 95 | 96 | // Set semaphore to optimal value based on system resources 97 | optimalConcurrency := getOptimalConcurrency() 98 | dsklog.Dlogger.Infof("Setting directory concurrency to %d (based on %d CPUs)", optimalConcurrency, runtime.NumCPU()) 99 | walker.sem = semaphore.NewWeighted(int64(optimalConcurrency)) 100 | return walker 101 | 102 | } 103 | 104 | // Run method kicks off filesystem crawl for file dupes. 105 | func (d *DWalk) Run(ctx context.Context) { 106 | 107 | for _, root := range d.rootDirs { 108 | if d.shouldSkipDir(root) { 109 | dsklog.Dlogger.Infof("Skipping directory %s due to restricted filesystem", root) 110 | continue 111 | } 112 | d.wg.Add(1) 113 | go walkDir(ctx, root, 0, d, d.dFiles) 114 | } 115 | 116 | // Wait for all goroutines to finish. 117 | go func() { 118 | d.wg.Wait() 119 | close(d.dFiles) 120 | }() 121 | 122 | } 123 | 124 | // cancelled polls, checking for cancellation. 125 | func cancelled(ctx context.Context) bool { 126 | 127 | select { 128 | case <-ctx.Done(): 129 | return true 130 | default: 131 | return false 132 | } 133 | 134 | } 135 | 136 | // walkDir recursively walk directories and send files to our monitor go routine 137 | // (in main.go) to be added to the duplication map. 138 | func walkDir(ctx context.Context, dir string, depth int, d *DWalk, dFiles chan<- *dfs.Dfile) { 139 | defer func() { 140 | if r := recover(); r != nil { 141 | dsklog.Dlogger.Errorf("Recovered panic while walking directory %s: %v", dir, r) 142 | } 143 | d.wg.Done() 144 | }() 145 | 146 | // Check for cancellation. 147 | if cancelled(ctx) { 148 | return 149 | } 150 | 151 | if d.shouldSkipDir(dir) { 152 | dsklog.Dlogger.Debugf("Skipping directory %s due to restricted filesystem", dir) 153 | return 154 | } 155 | 156 | for _, entry := range dirEntries(ctx, dir, d) { 157 | // Handle processing of dotfiles (hidden) 158 | name := entry.Name() 159 | if d.skipHidden && strings.HasPrefix(name, ".") { 160 | dsklog.Dlogger.Debugf("Skipping hidden entry: %s", filepath.Join(dir, name)) 161 | continue 162 | } 163 | 164 | if d.skipSymLinks && entry.Type()&os.ModeSymlink != 0 { 165 | dsklog.Dlogger.Debugf("Skipping symlink: %s", filepath.Join(dir, name)) 166 | continue 167 | } 168 | 169 | if entry.IsDir() { 170 | subDir := filepath.Join(dir, name) 171 | if d.shouldSkipDir(subDir) { 172 | dsklog.Dlogger.Debugf("Skipping directory %s due to restricted filesystem", subDir) 173 | continue 174 | } 175 | if d.maxDepth >= 0 && depth >= d.maxDepth { 176 | dsklog.Dlogger.Debugf("Skipping directory %s due to max depth %d", subDir, d.maxDepth) 177 | continue 178 | } 179 | d.wg.Add(1) 180 | go walkDir(ctx, subDir, depth+1, d, d.dFiles) 181 | continue 182 | } 183 | 184 | info, err := entry.Info() 185 | if err != nil { 186 | dsklog.Dlogger.Debugf("Error getting file info for %s: %v", name, err) 187 | continue 188 | } 189 | 190 | if d.skipSymLinks && info.Mode()&os.ModeSymlink != 0 { 191 | dsklog.Dlogger.Debugf("Skipping symlink (resolved): %s", filepath.Join(dir, name)) 192 | continue 193 | } 194 | 195 | // Skip non-regular files (sockets, pipes, device files, etc.) 196 | if !info.Mode().IsRegular() { 197 | dsklog.Dlogger.Debugf("Skipping non-regular file: %s (mode: %s)", entry.Name(), info.Mode()) 198 | continue 199 | } 200 | 201 | // Treat multiple hardlinks to the same underlying inode as a single file 202 | // to avoid redundant hashing and duplicate entries. 203 | if id, ok := getFileIdentity(info); ok { 204 | d.seenMu.Lock() 205 | if _, exists := d.seenFiles[id]; exists { 206 | d.seenMu.Unlock() 207 | dsklog.Dlogger.Debugf("Skipping hardlink duplicate: %s", filepath.Join(dir, name)) 208 | continue 209 | } 210 | d.seenFiles[id] = struct{}{} 211 | d.seenMu.Unlock() 212 | } 213 | 214 | // Check file size properties the user set. 215 | fileSize := uint(max(info.Size(), 0)) // #nosec G115 216 | if d.skipEmpty && fileSize == 0 { 217 | dsklog.Dlogger.Debugf("Skipping empty file: %s", name) 218 | continue 219 | } 220 | if d.minFileSize > 0 && fileSize < d.minFileSize { 221 | dsklog.Dlogger.Debugf("File %s smaller than minimum. Skipping", name) 222 | continue 223 | } 224 | if d.maxFileSize > 0 && fileSize >= d.maxFileSize { 225 | dsklog.Dlogger.Infof("File %s larger than maximum. Skipping", name) 226 | continue 227 | } 228 | 229 | absFileName := filepath.Join(dir, name) 230 | if !dfs.CheckFilePerms(absFileName) { 231 | dsklog.Dlogger.Debugf("Cannot access file. Invalid permissions: %s", absFileName) 232 | continue 233 | } 234 | 235 | dFileEntry, err := dfs.NewDfile(absFileName, info.Size(), d.hashAlgo) 236 | if err == nil { 237 | dFiles <- dFileEntry 238 | } 239 | } 240 | } 241 | 242 | // dirEntries returns contents of a directory specified by dir. 243 | // The semaphore limits concurrency; preventing system resource 244 | // exhaustion. 245 | func dirEntries(ctx context.Context, dir string, d *DWalk) []os.DirEntry { 246 | 247 | if cancelled(ctx) { 248 | return nil 249 | } 250 | 251 | // Semaphore helps control concurrency. 252 | if err := d.sem.Acquire(ctx, 1); err != nil { 253 | return nil 254 | } 255 | defer d.sem.Release(1) 256 | 257 | entries, err := os.ReadDir(dir) 258 | if err != nil { 259 | dsklog.Dlogger.Errorf("Directory read error: %v", err) 260 | return nil 261 | } 262 | 263 | return entries 264 | } 265 | 266 | // getOptimalConcurrency returns optimal concurrency based on system resources 267 | func getOptimalConcurrency() int { 268 | procs := runtime.GOMAXPROCS(0) 269 | if procs < 1 { 270 | procs = runtime.NumCPU() 271 | } 272 | concurrency := min(procs*4, 128) 273 | 274 | dsklog.Dlogger.Debugf("Directory walker concurrency: %d (procs=%d)", concurrency, procs) 275 | return concurrency 276 | } 277 | 278 | // normalizeSkipPrefixes filters and deduplicates skip prefixes by cleaning their absolute paths, 279 | // discarding empty entries and root-only paths before returning the normalized list. 280 | func normalizeSkipPrefixes(prefixes []string) []string { 281 | cleaned := make([]string, 0, len(prefixes)) 282 | seen := make(map[string]struct{}, len(prefixes)) 283 | for _, p := range prefixes { 284 | if p == "" { 285 | continue 286 | } 287 | normalized := cleanAbsPath(p) 288 | if normalized == "" || normalized == string(os.PathSeparator) { 289 | continue 290 | } 291 | if _, ok := seen[normalized]; ok { 292 | continue 293 | } 294 | seen[normalized] = struct{}{} 295 | cleaned = append(cleaned, normalized) 296 | } 297 | return cleaned 298 | } 299 | 300 | func (d *DWalk) shouldSkipDir(path string) bool { 301 | normalized := cleanAbsPath(path) 302 | if normalized == "" { 303 | return false 304 | } 305 | for _, prefix := range d.skipDirPrefixes { 306 | if normalized == prefix || strings.HasPrefix(normalized, prefix+string(os.PathSeparator)) { 307 | return true 308 | } 309 | } 310 | return false 311 | } 312 | 313 | func cleanAbsPath(path string) string { 314 | if path == "" { 315 | return "" 316 | } 317 | abs, err := filepath.Abs(path) 318 | if err != nil { 319 | abs = filepath.Clean(path) 320 | } else { 321 | abs = filepath.Clean(abs) 322 | } 323 | return abs 324 | } 325 | -------------------------------------------------------------------------------- /internal/dmap/dmap_test.go: -------------------------------------------------------------------------------- 1 | package dmap 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/csv" 6 | "encoding/json" 7 | "errors" 8 | "fmt" 9 | "os" 10 | "path/filepath" 11 | "strconv" 12 | "testing" 13 | 14 | "github.com/jdefrancesco/dskDitto/internal/dfs" 15 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 16 | ) 17 | 18 | // setupLogging initializes the logger and other necessary components 19 | func setupLogging() { 20 | // Initialize the logger to prevent nil pointer dereference 21 | dsklog.InitializeDlogger("/dev/null") 22 | } 23 | 24 | // Test Dmap type.. Eventually I should make these tests far more robust. 25 | // For now, lets just get things working so I can see all the pieces in place. 26 | func TestNewDmap(t *testing.T) { 27 | 28 | setupLogging() 29 | 30 | dmap, err := NewDmap(0) 31 | if err != nil { 32 | t.Errorf("Couldn't create new dmap: %s", err) 33 | } 34 | 35 | var dfiles = []struct { 36 | fileName string 37 | fileSize int64 38 | fileHash string 39 | }{ 40 | {"test_files/fileOne.bin", 101, "3fa2a6033f2b531361adf2bf300774fd1b75a5db13828e387d6e4c3c03400d61"}, 41 | {"test_files/fileTwo.bin", 3, "f2e0e2beb73c21338a1dc872cd7b900c24c4547b6d9ae882e02bcd4257ac7bd4"}, 42 | {"test_files/fileThree.bin", 0, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"}, 43 | {"test_files/fileFour.bin", 1, "5ee0dd4d4840229fab4a86438efbcaf1b9571af94f5ace5acc94de19e98ea9ab"}, 44 | } 45 | 46 | for _, f := range dfiles { 47 | df, dfErr := dfs.NewDfile(f.fileName, f.fileSize, dfs.HashSHA256) 48 | if dfErr != nil { 49 | t.Errorf("Failed to read file %s: %v", f.fileName, dfErr) 50 | } 51 | 52 | dmap.Add(df) 53 | } 54 | 55 | dmap.PrintDmap() 56 | 57 | size := dmap.MapSize() 58 | 59 | // Size should be four because we have one duplciate entry. 60 | if size != 4 { 61 | t.Errorf("Size incorrect got %d\n", size) 62 | } 63 | 64 | fmt.Println("Testing dmap.Get()") 65 | hash, hexErr := DigestFromHex("3fa2a6033f2b531361adf2bf300774fd1b75a5db13828e387d6e4c3c03400d61") 66 | if hexErr != nil { 67 | t.Errorf("Error converting hex to hash: %v", hexErr) 68 | } 69 | files, getErr := dmap.Get(hash) 70 | if getErr != nil { 71 | t.Errorf("Error gettings hash from map") 72 | } 73 | 74 | if len(files) != len(dfiles) { 75 | fmt.Println(files) 76 | } 77 | 78 | } 79 | 80 | func TestNewFileCount(t *testing.T) { 81 | 82 | fmap := NewDFileSizeCache() 83 | if fmap == nil { 84 | t.Errorf("Couldn't create object.") 85 | } 86 | 87 | } 88 | 89 | func FuzzDmapAdd(f *testing.F) { 90 | // Add seed inputs for the fuzzer 91 | f.Add("file.txt", int64(123)) 92 | f.Add("test.bin", int64(0)) 93 | f.Add("large_file.dat", int64(1024*1024)) 94 | f.Add("", int64(0)) 95 | f.Add("very_long_filename_that_might_cause_issues.txt", int64(999999)) 96 | 97 | f.Fuzz(func(t *testing.T, name string, size int64) { 98 | // Skip invalid inputs that would cause issues 99 | if len(name) > 512 || size < 0 || size > 1024*1024*1024 { 100 | return 101 | } 102 | 103 | // Create a new Dmap for each test 104 | dm, err := NewDmap(0) 105 | if err != nil { 106 | t.Fatalf("Failed to create Dmap: %v", err) 107 | } 108 | 109 | // Create a temporary file for testing if name is not empty 110 | if name == "" { 111 | name = "fuzz_temp_file" 112 | } 113 | 114 | // Test that Add doesn't panic with various inputs 115 | defer func() { 116 | if r := recover(); r != nil { 117 | t.Errorf("Dmap operations panicked with input name=%q, size=%d: %v", name, size, r) 118 | } 119 | }() 120 | 121 | testHash := Digest{0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 122 | 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0x10, 123 | 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 124 | 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20} 125 | 126 | // Test adding files to the map 127 | dm.filesMap[testHash] = append(dm.filesMap[testHash], name) 128 | 129 | // Test Get operation 130 | files, err := dm.Get(testHash) 131 | if err != nil { 132 | t.Errorf("Get failed: %v", err) 133 | } 134 | 135 | if len(files) == 0 { 136 | t.Errorf("Expected at least one file, got none") 137 | } 138 | 139 | // Test MapSize 140 | mapSize := dm.MapSize() 141 | if mapSize < 0 { 142 | t.Errorf("MapSize returned negative value: %d", mapSize) 143 | } 144 | }) 145 | } 146 | 147 | func FuzzDigestFromHex(f *testing.F) { 148 | f.Add("3fa2a6033f2b531361adf2bf300774fd1b75a5db13828e387d6e4c3c03400d61") 149 | f.Add("deadbeef1234567890abcdef1234567890abcdef1234567890abcdef12345678") 150 | f.Add("") 151 | f.Add("invalid_hex") 152 | f.Add("too_short") 153 | f.Add("way_too_long_hex_string_that_exceeds_normal_hash_length_by_far_and_should_be_rejected") 154 | 155 | f.Fuzz(func(t *testing.T, hexStr string) { 156 | defer func() { 157 | if r := recover(); r != nil { 158 | t.Errorf("DigestFromHex panicked with input %q: %v", hexStr, r) 159 | } 160 | }() 161 | 162 | hash, err := DigestFromHex(hexStr) 163 | 164 | // Case 1: Expect valid only when it's a valid 64-char hex string 165 | isValidHex := len(hexStr) == 64 166 | if isValidHex { 167 | for _, c := range hexStr { 168 | if !((c >= '0' && c <= '9') || 169 | (c >= 'a' && c <= 'f') || 170 | (c >= 'A' && c <= 'F')) { 171 | isValidHex = false 172 | break 173 | } 174 | } 175 | } 176 | 177 | if isValidHex { 178 | if err != nil { 179 | t.Errorf("Expected valid hex %q to succeed, got error: %v", hexStr, err) 180 | } 181 | 182 | // No non-zero requirement! Zero digest is valid. 183 | if len(hash) != sha256.Size { 184 | t.Errorf("Expected digest size %d, got %d", sha256.Size, len(hash)) 185 | } 186 | 187 | } else { 188 | if err == nil { 189 | t.Errorf("Expected invalid hex %q to fail, but got success", hexStr) 190 | } 191 | } 192 | }) 193 | } 194 | 195 | func TestRemoveDuplicates(t *testing.T) { 196 | setupLogging() 197 | 198 | dm, err := NewDmap(2) 199 | if err != nil { 200 | t.Fatalf("NewDmap failed: %v", err) 201 | } 202 | 203 | tmp := t.TempDir() 204 | const keep uint = 1 205 | var dfiles []*dfs.Dfile 206 | for i := 0; i < 3; i++ { 207 | path := filepath.Join(tmp, fmt.Sprintf("dup_%d.dat", i)) 208 | if writeErr := os.WriteFile(path, []byte("duplicate"), 0o644); writeErr != nil { 209 | t.Fatalf("write %s: %v", path, writeErr) 210 | } 211 | df, dfErr := dfs.NewDfile(path, int64(len("duplicate")), dfs.HashSHA256) 212 | if dfErr != nil { 213 | t.Fatalf("NewDfile(%s): %v", path, dfErr) 214 | } 215 | dfiles = append(dfiles, df) 216 | dm.Add(df) 217 | } 218 | 219 | removed, removeErr := dm.RemoveDuplicates(keep) 220 | if removeErr != nil { 221 | t.Fatalf("RemoveDuplicates returned error: %v", removeErr) 222 | } 223 | if len(removed) != 2 { 224 | t.Fatalf("expected 2 files removed, got %d", len(removed)) 225 | } 226 | 227 | for _, path := range removed { 228 | if _, statErr := os.Stat(path); !errors.Is(statErr, os.ErrNotExist) { 229 | t.Fatalf("expected %s to be removed, stat err: %v", path, statErr) 230 | } 231 | } 232 | 233 | hashKey := Digest(dfiles[0].Hash()) 234 | remaining := dm.GetMap()[hashKey] 235 | if len(remaining) != int(keep) { 236 | t.Fatalf("expected %d survivor, got %d", keep, len(remaining)) 237 | } 238 | 239 | if dm.FileCount() != keep { 240 | t.Fatalf("expected fileCount %d, got %d", keep, dm.FileCount()) 241 | } 242 | } 243 | 244 | func TestRemoveDuplicatesZeroKeep(t *testing.T) { 245 | setupLogging() 246 | 247 | dm, err := NewDmap(0) 248 | if err != nil { 249 | t.Fatalf("NewDmap failed: %v", err) 250 | } 251 | 252 | if _, removeErr := dm.RemoveDuplicates(0); removeErr == nil { 253 | t.Fatalf("expected error when keep is zero") 254 | } 255 | } 256 | 257 | func TestExportJSONAndCSV(t *testing.T) { 258 | setupLogging() 259 | 260 | dm, err := NewDmap(2) 261 | if err != nil { 262 | t.Fatalf("NewDmap failed: %v", err) 263 | } 264 | 265 | tmp := t.TempDir() 266 | fileA := filepath.Join(tmp, "dupA.txt") 267 | fileB := filepath.Join(tmp, "dupB.txt") 268 | 269 | if writeErr := os.WriteFile(fileA, []byte("hello"), 0o644); writeErr != nil { 270 | t.Fatalf("write %s: %v", fileA, writeErr) 271 | } 272 | if writeErr := os.WriteFile(fileB, []byte("greetings"), 0o644); writeErr != nil { 273 | t.Fatalf("write %s: %v", fileB, writeErr) 274 | } 275 | 276 | var digest Digest 277 | digest[0] = 0x1 278 | dm.filesMap[digest] = []string{fileA, fileB} 279 | 280 | jsonPath := filepath.Join(tmp, "dups.json") 281 | if err := dm.WriteJSON(jsonPath); err != nil { 282 | t.Fatalf("WriteJSON failed: %v", err) 283 | } 284 | 285 | jsonData, err := os.ReadFile(jsonPath) 286 | if err != nil { 287 | t.Fatalf("ReadFile JSON: %v", err) 288 | } 289 | 290 | var summary exportSummary 291 | if err := json.Unmarshal(jsonData, &summary); err != nil { 292 | t.Fatalf("Unmarshal JSON: %v", err) 293 | } 294 | 295 | if summary.GroupCount != 1 { 296 | t.Fatalf("expected 1 group, got %d", summary.GroupCount) 297 | } 298 | if len(summary.Groups) != 1 { 299 | t.Fatalf("expected 1 group entry, got %d", len(summary.Groups)) 300 | } 301 | group := summary.Groups[0] 302 | if group.Hash != fmt.Sprintf("%x", digest) { 303 | t.Fatalf("unexpected hash: %s", group.Hash) 304 | } 305 | if group.DuplicateCount != 2 { 306 | t.Fatalf("expected duplicate count 2, got %d", group.DuplicateCount) 307 | } 308 | if len(group.Files) != 2 { 309 | t.Fatalf("expected 2 file entries, got %d", len(group.Files)) 310 | } 311 | 312 | filesSeen := make(map[string]uint64) 313 | for _, f := range group.Files { 314 | filesSeen[f.Path] = f.Size 315 | } 316 | if filesSeen[fileA] != 5 { 317 | t.Fatalf("expected size 5 for %s, got %d", fileA, filesSeen[fileA]) 318 | } 319 | if filesSeen[fileB] != 9 { 320 | t.Fatalf("expected size 9 for %s, got %d", fileB, filesSeen[fileB]) 321 | } 322 | 323 | csvPath := filepath.Join(tmp, "dups.csv") 324 | if err := dm.WriteCSV(csvPath); err != nil { 325 | t.Fatalf("WriteCSV failed: %v", err) 326 | } 327 | 328 | csvFile, err := os.Open(csvPath) 329 | if err != nil { 330 | t.Fatalf("Open CSV: %v", err) 331 | } 332 | defer csvFile.Close() 333 | 334 | reader := csv.NewReader(csvFile) 335 | rows, err := reader.ReadAll() 336 | if err != nil { 337 | t.Fatalf("ReadAll CSV: %v", err) 338 | } 339 | 340 | if len(rows) != 3 { // header + 2 rows 341 | t.Fatalf("expected 3 rows, got %d", len(rows)) 342 | } 343 | 344 | header := rows[0] 345 | expectedHeader := []string{"hash", "duplicate_count", "path", "size_bytes"} 346 | for i, col := range expectedHeader { 347 | if header[i] != col { 348 | t.Fatalf("unexpected header column %d: %s", i, header[i]) 349 | } 350 | } 351 | 352 | expectedHash := fmt.Sprintf("%x", digest) 353 | for _, row := range rows[1:] { 354 | if row[0] != expectedHash { 355 | t.Fatalf("unexpected hash in CSV row: %s", row[0]) 356 | } 357 | if row[1] != "2" { 358 | t.Fatalf("unexpected duplicate count in CSV row: %s", row[1]) 359 | } 360 | path := row[2] 361 | size, err := strconv.ParseUint(row[3], 10, 64) 362 | if err != nil { 363 | t.Fatalf("parse size: %v", err) 364 | } 365 | if filesSeen[path] != size { 366 | t.Fatalf("CSV size mismatch for %s: got %d want %d", path, size, filesSeen[path]) 367 | } 368 | } 369 | } 370 | -------------------------------------------------------------------------------- /pkg/utils/ansiart.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "fmt" 5 | "image" 6 | "image/color" 7 | "image/draw" 8 | _ "image/jpeg" 9 | _ "image/png" 10 | "log" 11 | "math" 12 | "os" 13 | "strconv" 14 | ) 15 | 16 | // EXAMPLE 17 | // [go:embed] assets/help-icon.png 18 | // var helpIconPNG []byte 19 | 20 | // func printHelpIcon() { 21 | // img, _, err := image.Decode(bytes.NewReader(helpIconPNG)) 22 | // if err != nil { 23 | // log.Printf("help icon decode failed: %v", err) 24 | // return 25 | // } 26 | // utils.RenderImageANSI(img, utils.Options{Width: 60, PaletteBits: 4}) 27 | // fmt.Println() 28 | // } 29 | 30 | // Options controls rendering behaviour. 31 | type Options struct { 32 | Width int // target width in terminal cells 33 | EnableShadow bool 34 | PaletteBits int // bits per channel (set <8 to enable ordered dithering) 35 | } 36 | 37 | // resizeNearest does a basic nearest-neighbour resize (no deps). 38 | func resizeNearest(src image.Image, newW, newH int) *image.RGBA { 39 | dst := image.NewRGBA(image.Rect(0, 0, newW, newH)) 40 | b := src.Bounds() 41 | 42 | if newW == b.Dx() && newH == b.Dy() { 43 | draw.Draw(dst, dst.Bounds(), src, b.Min, draw.Src) 44 | return dst 45 | } 46 | 47 | scaleX := float64(b.Dx()) / float64(newW) 48 | scaleY := float64(b.Dy()) / float64(newH) 49 | 50 | sample := func(x, y int) color.NRGBA { 51 | x = clamp(x, b.Min.X, b.Max.X-1) 52 | y = clamp(y, b.Min.Y, b.Max.Y-1) 53 | return color.NRGBAModel.Convert(src.At(x, y)).(color.NRGBA) 54 | } 55 | 56 | for y := 0; y < newH; y++ { 57 | srcY := float64(b.Min.Y) + (float64(y)+0.5)*scaleY - 0.5 58 | y0 := int(math.Floor(srcY)) 59 | y1 := y0 + 1 60 | fy := srcY - float64(y0) 61 | 62 | for x := 0; x < newW; x++ { 63 | srcX := float64(b.Min.X) + (float64(x)+0.5)*scaleX - 0.5 64 | x0 := int(math.Floor(srcX)) 65 | x1 := x0 + 1 66 | fx := srcX - float64(x0) 67 | 68 | c00 := sample(x0, y0) 69 | c10 := sample(x1, y0) 70 | c01 := sample(x0, y1) 71 | c11 := sample(x1, y1) 72 | 73 | r := bilerp(c00.R, c10.R, c01.R, c11.R, fx, fy) 74 | g := bilerp(c00.G, c10.G, c01.G, c11.G, fx, fy) 75 | bCol := bilerp(c00.B, c10.B, c01.B, c11.B, fx, fy) 76 | a := bilerp(c00.A, c10.A, c01.A, c11.A, fx, fy) 77 | 78 | dst.Set(x, y, color.NRGBA{uint8(r), uint8(g), uint8(bCol), uint8(a)}) 79 | } 80 | } 81 | return dst 82 | } 83 | 84 | // applyShadow composites a dark, slightly blurred offset copy behind src. 85 | func applyShadow(src *image.RGBA) *image.RGBA { 86 | w := src.Bounds().Dx() 87 | h := src.Bounds().Dy() 88 | 89 | shadow := image.NewRGBA(src.Bounds()) 90 | 91 | // Create dark offset copy. 92 | const ( 93 | offsetX = 2 94 | offsetY = 1 95 | // Value 0 to 1 controls how dark. 96 | intensity = 0.4 97 | ) 98 | 99 | for y := range h { 100 | for x := range w { 101 | r, g, b, a := src.At(x, y).RGBA() 102 | if a == 0 { 103 | continue 104 | } 105 | sx := x + offsetX 106 | sy := y + offsetY 107 | if sx >= w || sy >= h { 108 | continue 109 | } 110 | dr := uint8(float64(r>>8) * intensity) 111 | dg := uint8(float64(g>>8) * intensity) 112 | db := uint8(float64(b>>8) * intensity) 113 | shadow.Set(sx, sy, color.RGBA{dr, dg, db, 255}) 114 | } 115 | } 116 | 117 | // Very small box blur to soften the shadow edges. 118 | blur := func(src *image.RGBA) *image.RGBA { 119 | dst := image.NewRGBA(src.Bounds()) 120 | for y := 0; y < h; y++ { 121 | for x := 0; x < w; x++ { 122 | var rs, gs, bs, as, count uint32 123 | for dy := -1; dy <= 1; dy++ { 124 | for dx := -1; dx <= 1; dx++ { 125 | nx := x + dx 126 | ny := y + dy 127 | if nx < 0 || ny < 0 || nx >= w || ny >= h { 128 | continue 129 | } 130 | r, g, b, a := src.At(nx, ny).RGBA() 131 | rs += r 132 | gs += g 133 | bs += b 134 | as += a 135 | count++ 136 | } 137 | } 138 | if count == 0 { 139 | continue 140 | } 141 | avgR := rs / count 142 | avgG := gs / count 143 | avgB := bs / count 144 | avgA := as / count 145 | dst.Set(x, y, color.RGBA{ 146 | R: fromRGBA16(avgR), 147 | G: fromRGBA16(avgG), 148 | B: fromRGBA16(avgB), 149 | A: fromRGBA16(avgA), 150 | }) 151 | } 152 | } 153 | return dst 154 | } 155 | shadow = blur(shadow) 156 | 157 | // Composite shadow behind src (simple "over" with opaque fg). 158 | out := image.NewRGBA(src.Bounds()) 159 | for y := 0; y < h; y++ { 160 | for x := 0; x < w; x++ { 161 | fr, fg, fb, fa := src.At(x, y).RGBA() 162 | sr, sg, sb, sa := shadow.At(x, y).RGBA() 163 | 164 | if fa>>8 > 0 { // foreground pixel wins 165 | out.Set(x, y, color.RGBA{ 166 | R: fromRGBA16(fr), 167 | G: fromRGBA16(fg), 168 | B: fromRGBA16(fb), 169 | A: 255, 170 | }) 171 | } else if sa>>8 > 0 { 172 | out.Set(x, y, color.RGBA{ 173 | R: fromRGBA16(sr), 174 | G: fromRGBA16(sg), 175 | B: fromRGBA16(sb), 176 | A: 255, 177 | }) 178 | } else { 179 | out.Set(x, y, color.RGBA{0, 0, 0, 0}) 180 | } 181 | } 182 | } 183 | return out 184 | } 185 | 186 | // 4x4 Bayer matrix for ordered dithering (0..15). 187 | var bayer4 = [4][4]float64{ 188 | {0, 8, 2, 10}, 189 | {12, 4, 14, 6}, 190 | {3, 11, 1, 9}, 191 | {15, 7, 13, 5}, 192 | } 193 | 194 | // quantizeWithDither crushes a color channel with an ordered dither. 195 | // bitsPerChan: 1–4 (higher = more colors, less retro). 196 | func quantizeWithDither(v uint8, bitsPerChan int, x, y int) uint8 { 197 | if bitsPerChan <= 0 { 198 | return v 199 | } 200 | if bitsPerChan > 8 { 201 | bitsPerChan = 8 202 | } 203 | levels := 1 << bitsPerChan // e.g. 8 for 3 bits 204 | step := 256.0 / float64(levels) // size of each bucket 205 | b := bayer4[y&3][x&3]/16.0 - 0.5 // -0.5..+0.5 206 | val := float64(v) + b*step*0.75 // tweak factor for strength 207 | if val < 0 { 208 | val = 0 209 | } 210 | if val > 255 { 211 | val = 255 212 | } 213 | q := int(val / step) 214 | if q >= levels { 215 | q = levels - 1 216 | } 217 | return uint8(float64(q) * step) 218 | } 219 | 220 | // RenderImageANSI prints an image as 8-bit-style ANSI art using foreground / 221 | // background colors and the ▀ character (top half block). It applies optional 222 | // shadow + ordered dithering. 223 | func RenderImageANSI(img image.Image, opts Options) { 224 | if opts.PaletteBits <= 0 { 225 | opts.PaletteBits = 8 // default to full color unless caller opts in to dithering 226 | } 227 | b := img.Bounds() 228 | w := b.Dx() 229 | h := b.Dy() 230 | 231 | outW := opts.Width 232 | if outW <= 0 || outW > w { 233 | outW = w 234 | } 235 | 236 | // Height in terminal rows (2 image pixels per cell vertically). 237 | scale := float64(outW) / float64(w) 238 | outH := int(math.Round(float64(h) * scale)) 239 | if outH%2 != 0 { 240 | outH++ 241 | } 242 | 243 | resized := resizeNearest(img, outW, outH) 244 | if opts.EnableShadow { 245 | resized = applyShadow(resized) 246 | } 247 | resized = cropOpaqueRegion(resized) 248 | outW = resized.Bounds().Dx() 249 | outH = resized.Bounds().Dy() 250 | 251 | useQuant := opts.PaletteBits > 0 && opts.PaletteBits < 8 252 | 253 | for y := 0; y < outH; y += 2 { 254 | for x := 0; x < outW; x++ { 255 | top := color.NRGBAModel.Convert(resized.At(x, y)).(color.NRGBA) 256 | bottom := color.NRGBA{A: 0} 257 | if y+1 < outH { 258 | bottom = color.NRGBAModel.Convert(resized.At(x, y+1)).(color.NRGBA) 259 | } 260 | 261 | ta := top.A 262 | ba := bottom.A 263 | 264 | // Quantise visible pixels only so transparent regions stay untouched. 265 | var tr, tg, tb, br, bg, bb uint8 266 | if ta > 0 { 267 | if useQuant { 268 | tr = quantizeWithDither(top.R, opts.PaletteBits, x, y) 269 | tg = quantizeWithDither(top.G, opts.PaletteBits, x, y) 270 | tb = quantizeWithDither(top.B, opts.PaletteBits, x, y) 271 | } else { 272 | tr, tg, tb = top.R, top.G, top.B 273 | } 274 | } 275 | if ba > 0 { 276 | if useQuant { 277 | br = quantizeWithDither(bottom.R, opts.PaletteBits, x, y+1) 278 | bg = quantizeWithDither(bottom.G, opts.PaletteBits, x, y+1) 279 | bb = quantizeWithDither(bottom.B, opts.PaletteBits, x, y+1) 280 | } else { 281 | br, bg, bb = bottom.R, bottom.G, bottom.B 282 | } 283 | } 284 | 285 | switch { 286 | case ta == 0 && ba == 0: 287 | fmt.Print("\x1b[0m ") 288 | case ta == 0: 289 | // Only bottom pixel is visible → draw lower half block. 290 | fmt.Printf("\x1b[0m\x1b[38;2;%d;%d;%dm▄", br, bg, bb) 291 | case ba == 0: 292 | // Only top pixel visible → draw upper half block and reset bg. 293 | fmt.Printf("\x1b[38;2;%d;%d;%dm\x1b[49m▀", tr, tg, tb) 294 | default: 295 | // Both visible → regular top-half block with background colour. 296 | fmt.Printf("\x1b[38;2;%d;%d;%dm\x1b[48;2;%d;%d;%dm▀", 297 | tr, tg, tb, 298 | br, bg, bb, 299 | ) 300 | } 301 | } 302 | fmt.Print("\x1b[0m\n") 303 | } 304 | fmt.Print("\x1b[0m") // final reset 305 | } 306 | 307 | func main() { 308 | if len(os.Args) < 2 { 309 | log.Fatalf("usage: %s image.(png|jpg) [width]\n", os.Args[0]) 310 | } 311 | 312 | f, err := os.Open(os.Args[1]) 313 | if err != nil { 314 | log.Fatal(err) 315 | } 316 | defer f.Close() 317 | 318 | img, _, err := image.Decode(f) 319 | if err != nil { 320 | log.Fatal(err) 321 | } 322 | 323 | opts := Options{ 324 | Width: 64, // default 325 | EnableShadow: true, 326 | PaletteBits: 3, // 3 bits/channel ≈ 512 colors, nice retro 327 | } 328 | if len(os.Args) >= 3 { 329 | if w, err := strconv.Atoi(os.Args[2]); err == nil && w > 0 { 330 | opts.Width = w 331 | } 332 | } 333 | 334 | RenderImageANSI(img, opts) 335 | } 336 | 337 | func bilerp(c00, c10, c01, c11 uint8, fx, fy float64) float64 { 338 | c00f := float64(c00) 339 | c10f := float64(c10) 340 | c01f := float64(c01) 341 | c11f := float64(c11) 342 | 343 | return c00f*(1-fx)*(1-fy) + c10f*fx*(1-fy) + c01f*(1-fx)*fy + c11f*fx*fy 344 | } 345 | 346 | // cropOpaqueRegion trims fully transparent borders so the rendered art focuses on 347 | // visible pixels. 348 | func cropOpaqueRegion(img *image.RGBA) *image.RGBA { 349 | b := img.Bounds() 350 | minX, minY := b.Max.X, b.Max.Y 351 | maxX, maxY := b.Min.X-1, b.Min.Y-1 352 | found := false 353 | 354 | for y := b.Min.Y; y < b.Max.Y; y++ { 355 | for x := b.Min.X; x < b.Max.X; x++ { 356 | _, _, _, a := img.At(x, y).RGBA() 357 | if a>>8 > 0 { 358 | if x < minX { 359 | minX = x 360 | } 361 | if y < minY { 362 | minY = y 363 | } 364 | if x > maxX { 365 | maxX = x 366 | } 367 | if y > maxY { 368 | maxY = y 369 | } 370 | found = true 371 | } 372 | } 373 | } 374 | 375 | if !found { 376 | return img 377 | } 378 | 379 | rect := image.Rect(minX, minY, maxX+1, maxY+1) 380 | if rect == b { 381 | return img 382 | } 383 | 384 | dst := image.NewRGBA(image.Rect(0, 0, rect.Dx(), rect.Dy())) 385 | draw.Draw(dst, dst.Bounds(), img, rect.Min, draw.Src) 386 | return dst 387 | } 388 | 389 | func clamp(v, min, max int) int { 390 | if v < min { 391 | return min 392 | } 393 | if v > max { 394 | return max 395 | } 396 | return v 397 | } 398 | 399 | func fromRGBA16(v uint32) uint8 { 400 | // Values produced by color.RGBA() occupy the 0-65535 range; clamp before narrowing to a byte. 401 | val := v >> 8 402 | if val > 0xFF { 403 | return 0xFF 404 | } 405 | return byte(val) 406 | } 407 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /cmd/dskDitto/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "flag" 6 | "fmt" 7 | "math" 8 | _ "net/http/pprof" 9 | "os" 10 | "os/signal" 11 | "runtime/pprof" 12 | "syscall" 13 | "time" 14 | 15 | "github.com/jdefrancesco/dskDitto/internal/config" 16 | "github.com/jdefrancesco/dskDitto/internal/dfs" 17 | "github.com/jdefrancesco/dskDitto/internal/dmap" 18 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 19 | "github.com/jdefrancesco/dskDitto/internal/dwalk" 20 | "github.com/jdefrancesco/dskDitto/internal/ui" 21 | "github.com/jdefrancesco/dskDitto/pkg/utils" 22 | 23 | "github.com/pterm/pterm" 24 | "github.com/pterm/pterm/putils" 25 | ) 26 | 27 | // Version 28 | const ver = "0.1" 29 | 30 | func init() { 31 | 32 | // Custom help message 33 | flag.Usage = func() { 34 | showHeader() 35 | fmt.Fprintf(os.Stderr, "Usage: dskDitto [options] PATHS\n\n") 36 | fmt.Fprintf(os.Stderr, "Options:\n") 37 | fmt.Fprintf(os.Stderr, " --no-banner Do not show the dskDitto banner.\n") 38 | fmt.Fprintf(os.Stderr, " --version Display version information.\n") 39 | fmt.Fprintf(os.Stderr, " --profile Write CPU profile to disk for analysis.\n") 40 | fmt.Fprintf(os.Stderr, " --time-only Report scan duration only (for development).\n") 41 | fmt.Fprintf(os.Stderr, " --min-size Skip files smaller than the given size (e.g. 512K, 5MiB).\n") 42 | fmt.Fprintf(os.Stderr, " --max-size Skip files larger than the given size (default 4GiB).\n") 43 | fmt.Fprintf(os.Stderr, " --text Emit duplicate results in text-friendly format.\n") 44 | fmt.Fprintf(os.Stderr, " --bullet Show duplicates as a formatted bullet list.\n") 45 | fmt.Fprintf(os.Stderr, " --empty Include empty files (default: ignore).\n") 46 | fmt.Fprintf(os.Stderr, " --no-symlinks Skip symbolic links (default true).\n") 47 | fmt.Fprintf(os.Stderr, " --hidden Include hidden dotfiles and directories (default: ignore).\n") 48 | fmt.Fprintf(os.Stderr, " --current Do not descend into subdirectories.\n") 49 | fmt.Fprintf(os.Stderr, " --depth Limit recursion to directories below the start paths.\n") 50 | fmt.Fprintf(os.Stderr, " --include-vfs Include virtual filesystem directories like /proc or /dev.\n") 51 | fmt.Fprintf(os.Stderr, " --dups Require at least this many files per duplicate group (default 2).\n") 52 | fmt.Fprintf(os.Stderr, " --remove Operate on duplicates, keeping only files per group.\n") 53 | fmt.Fprintf(os.Stderr, " --link With --remove, convert extra duplicates into symlinks instead of deleting them.\n") 54 | fmt.Fprintf(os.Stderr, " --hash Hash algorithm: sha256 (default) or blake3.\n") 55 | fmt.Fprintf(os.Stderr, " --csv-out Write duplicate groups to a CSV file.\n") 56 | fmt.Fprintf(os.Stderr, " --json-out Write duplicate groups to a JSON file.\n") 57 | fmt.Fprintf(os.Stderr, " --fs-detect Detect and display the filesystem containing path.\n\n") 58 | fmt.Fprintf(os.Stderr, "Notes:\n") 59 | fmt.Fprintf(os.Stderr, " Display-oriented options like --bullet only render results; no files are removed.\n") 60 | } 61 | } 62 | 63 | // signalHandler will handle SIGINT and others in order to 64 | // gracefully shutdown. 65 | func signalHandler(ctx context.Context, sig os.Signal) { 66 | dsklog.Dlogger.Infoln("Signal received") 67 | 68 | // The terminal settings might be in a state that messes up 69 | // future output. To be safe I reset them. 70 | ui.StopTUI() 71 | 72 | switch sig { 73 | case syscall.SIGINT: 74 | fmt.Fprintf(os.Stderr, "\r[!] SIGINT! Quitting...\n") 75 | ctx.Done() 76 | os.Exit(1) 77 | default: 78 | fmt.Fprintf(os.Stderr, "\r[!] Unhandled/Unknown signal.\n") 79 | ctx.Done() 80 | os.Exit(1) 81 | } 82 | } 83 | 84 | func main() { 85 | 86 | // Initialize logger 87 | dsklog.InitializeDlogger(".dskditto.log") 88 | dsklog.Dlogger.Info("Logger initialized") 89 | 90 | // Setup signal handler 91 | sigChan := make(chan os.Signal, 1) 92 | signal.Notify(sigChan, syscall.SIGINT) 93 | 94 | // Create a context. 95 | ctx, cancel := context.WithCancel(context.Background()) 96 | defer cancel() 97 | 98 | go func() { 99 | for { 100 | sig := <-sigChan 101 | signalHandler(ctx, sig) 102 | } 103 | }() 104 | 105 | // Parse command flags. 106 | // Note these messages aren't what user sees any longer. See flUsage for that. 107 | var ( 108 | flNoBanner = flag.Bool("no-banner", false, "Do not show the dskDitto banner.") 109 | flShowVersion = flag.Bool("version", false, "Display version") 110 | flCpuProfile = flag.String("profile", "", "Write CPU profile to disk for analysis.") 111 | flTimeOnly = flag.Bool("time-only", false, "Use to show only the time taken to scan directory for duplicates.") 112 | flMinFileSize = flag.String("min-size", "", "Skip files smaller than this size (supports suffixes like 512K, 5MiB).") 113 | flMaxFileSize = flag.String("max-size", "", "Skip files larger than this size (default 4GiB).") 114 | flTextOutput = flag.Bool("text", false, "Dump results in grep/text friendly format. Useful for scripting.") 115 | flShowBullets = flag.Bool("bullet", false, "Show duplicates as formatted bullet list.") 116 | flIncludeEmpty = flag.Bool("empty", false, "Include empty files (0 bytes).") 117 | flSkipSymLinks = flag.Bool("no-symlinks", true, "Skip symbolic links. This is on by default.") 118 | flIncludeHidden = flag.Bool("hidden", false, "Include hidden files and directories (dotfiles).") 119 | flNoRecurse = flag.Bool("current", false, "Only scan the provided directories without descending into subdirectories.") 120 | flDepth = flag.Int("depth", -1, "Maximum recursion depth; 0 inspects only the provided paths, -1 means unlimited.") 121 | flIncludeVFS = flag.Bool("include-vfs", false, "Include virtual filesystem mount points such as /proc and /dev.") 122 | flMinDups = flag.Uint("dups", 2, "Minimum number of duplicates required to display a group.") 123 | flHashAlgo = flag.String("hash", "sha256", "Hash algorithm to use: sha256 (default) or blake3.") 124 | flKeep = flag.Uint("remove", 0, "Operate on duplicates, keeping only this many files per group.") 125 | flLinkMode = flag.Bool("link", false, "Convert extra duplicates into symlinks instead of deleting them (use with --remove).") 126 | flCSVOut = flag.String("csv-out", "", "Write duplicate groups to the specified CSV file.") 127 | flJSONOut = flag.String("json-out", "", "Write duplicate groups to the specified JSON file.") 128 | flDetectFS = flag.String("fs-detect", "", "Detect filesystem in use by specified path") 129 | ) 130 | flag.Parse() 131 | 132 | // Enable CPU profiling 133 | if *flCpuProfile != "" { 134 | f, err := os.Create(*flCpuProfile) 135 | if err != nil { 136 | dsklog.Dlogger.Info("profile failed") 137 | os.Exit(1) 138 | } 139 | pprof.StartCPUProfile(f) 140 | } 141 | 142 | if !*flNoBanner { 143 | showHeader() 144 | } 145 | 146 | fmt.Printf("[!] Press CTRL+C to stop dskDitto at any time.\n") 147 | 148 | // Just show version then quit. 149 | if *flShowVersion { 150 | showVersion() 151 | os.Exit(0) 152 | } 153 | 154 | if *flDetectFS != "" { 155 | fs, err := dfs.DetectFilesystem(".") 156 | if err != nil { 157 | panic(err) 158 | } 159 | fmt.Printf("Filesystem: %s\n\n", fs) 160 | } 161 | 162 | // Maximum uint size. 163 | maxUint := ^uint(0) 164 | MinFileSize := uint(0) 165 | 166 | if *flMinFileSize != "" { 167 | value, err := utils.ParseSize(*flMinFileSize) 168 | if err != nil { 169 | fmt.Fprintf(os.Stderr, "invalid value for --min-size: %v\n", err) 170 | os.Exit(1) 171 | } 172 | if value > uint64(math.MaxUint) { 173 | fmt.Fprintf(os.Stderr, "--min-size %s exceeds platform limit (%d bytes)\n", *flMinFileSize, maxUint) 174 | os.Exit(1) 175 | } 176 | 177 | MinFileSize = uint(value) 178 | if MinFileSize > 0 { 179 | fmt.Printf("Skipping files smaller than: ~ %s.\n", utils.DisplaySize(uint64(MinFileSize))) 180 | } 181 | dsklog.Dlogger.Debugf("Min file size set to %d bytes.\n", MinFileSize) 182 | } 183 | 184 | MaxFileSize := dwalk.MAX_FILE_SIZE // Default is 4 GiB. 185 | if *flMaxFileSize != "" { 186 | value, err := utils.ParseSize(*flMaxFileSize) 187 | if err != nil { 188 | fmt.Fprintf(os.Stderr, "invalid value for --max-size: %v\n", err) 189 | os.Exit(1) 190 | } 191 | if value > uint64(math.MaxUint) { 192 | fmt.Fprintf(os.Stderr, "--max-size %s exceeds platform limit (%d bytes)\n", *flMaxFileSize, maxUint) 193 | os.Exit(1) 194 | } 195 | if value > 0 { 196 | MaxFileSize = uint(value) 197 | fmt.Printf("Skipping files larger than: %s (%d bytes).\n", utils.DisplaySize(uint64(MaxFileSize)), MaxFileSize) 198 | } 199 | dsklog.Dlogger.Debugf("Max file size set to %d bytes.\n", MaxFileSize) 200 | } 201 | 202 | if *flDepth < -1 { 203 | dsklog.Dlogger.Debugf("Invalid depth of %d \n", *flDepth) 204 | fmt.Fprintf(os.Stderr, "invalid depth %d; must be -1 or greater\n", *flDepth) 205 | os.Exit(1) 206 | } 207 | 208 | maxDepth := -1 209 | if *flDepth >= 0 { 210 | maxDepth = *flDepth 211 | } 212 | if *flNoRecurse { 213 | maxDepth = 0 214 | } 215 | 216 | if maxDepth == 0 && (*flNoRecurse || *flDepth >= 0) { 217 | dsklog.Dlogger.Debug("Recursion disabled. Invoked with current flag. Only checking current directory for dups.") 218 | } else if maxDepth > 0 { 219 | dsklog.Dlogger.Debugf("Limiting recursion depth to %d level(s).\n", maxDepth) 220 | } 221 | 222 | hashAlgo, err := dfs.ParseHashAlgorithm(*flHashAlgo) 223 | if err != nil { 224 | fmt.Fprintf(os.Stderr, "unsupported hash algorithm %q; must be 'sha256' or 'blake3'\n", *flHashAlgo) 225 | os.Exit(1) 226 | } 227 | dsklog.Dlogger.Debugf("Using hash algorithm: %s", hashAlgo) 228 | 229 | rootDirs := flag.Args() 230 | if len(rootDirs) == 0 { 231 | rootDirs = []string{"."} 232 | } 233 | 234 | // Dmap stores duplicate file information. Failure is fatal. 235 | minDups := *flMinDups 236 | if minDups < 2 { 237 | fmt.Fprintf(os.Stderr, "invalid duplicate threshold %d; must be at least 2\n", minDups) 238 | os.Exit(1) 239 | } 240 | 241 | keepCount := *flKeep 242 | if keepCount == 0 { 243 | dsklog.Dlogger.Debug("No removal requested. keepCount is zero") 244 | } 245 | 246 | // Hold app config. 247 | appCfg := config.Config{ 248 | SkipEmpty: !*flIncludeEmpty, 249 | SkipSymLinks: *flSkipSymLinks, 250 | SkipHidden: !*flIncludeHidden, 251 | SkipVirtualFS: !*flIncludeVFS, 252 | MaxDepth: maxDepth, 253 | MinFileSize: MinFileSize, 254 | MaxFileSize: MaxFileSize, 255 | MinDuplicates: minDups, 256 | HashAlgorithm: hashAlgo, 257 | } 258 | 259 | dMap, err := dmap.NewDmap(appCfg.MinDuplicates) 260 | if err != nil { 261 | dsklog.Dlogger.Fatal("Failed to make new Dmap: ", err) 262 | os.Exit(1) 263 | } 264 | 265 | // Receive files we need to process via this channel. 266 | // Settled on using 1 for high throughput and no hidden back pressure that 267 | // I may have been hiding with buffered channel. 268 | dFiles := make(chan *dfs.Dfile, 1) 269 | 270 | walker := dwalk.NewDWalker(rootDirs, dFiles, appCfg) 271 | walker.Run(ctx) 272 | 273 | start := time.Now() 274 | 275 | // Show progress to user at intervals specified by tick. 276 | tick := time.Tick(time.Duration(500) * time.Millisecond) 277 | infoSpinner, _ := pterm.DefaultSpinner.Start() 278 | 279 | // Number of files we have processed so far. 280 | var nfiles uint 281 | 282 | MainLoop: 283 | for { 284 | select { 285 | case <-ctx.Done(): 286 | // Drain dFiles. 287 | for range dFiles { 288 | } 289 | break MainLoop 290 | 291 | case dFile, ok := <-dFiles: 292 | if !ok { 293 | break MainLoop 294 | } 295 | 296 | if dFile == nil { 297 | dsklog.Dlogger.Warn("Received nil dFile, skipping...") 298 | continue 299 | } 300 | // Add the file to our map. 301 | dMap.Add(dFile) 302 | nfiles++ 303 | 304 | case <-tick: 305 | // Display progress information. 306 | progressMsg := fmt.Sprintf("Processed %d files...", nfiles) 307 | infoSpinner.UpdateText(progressMsg) 308 | } 309 | } 310 | 311 | infoSpinner.Stop() 312 | duration := time.Since(start) 313 | 314 | // Stop profiling after this point. Profile data should now be 315 | // written to disk. 316 | pprof.StopCPUProfile() 317 | 318 | // Status bar update 319 | finalInfo := "Total of " + pterm.LightWhite(nfiles) + " files processed in " + 320 | pterm.LightWhite(duration) 321 | pterm.Success.Println(finalInfo) 322 | 323 | // Dump to CSV, then exit without dropping into TUI 324 | if *flCSVOut != "" { 325 | pterm.Success.Printf("CSV file %s being written to disk...", *flCSVOut) 326 | if err := dMap.WriteCSV(*flCSVOut); err != nil { 327 | fmt.Fprintf(os.Stderr, "failed to write CSV output: %v\n", err) 328 | os.Exit(1) 329 | } 330 | os.Exit(0) 331 | } 332 | 333 | // Dump files to JSON then exit. 334 | if *flJSONOut != "" { 335 | pterm.Success.Printf("JSON file %s being written to disk...", *flJSONOut) 336 | if err := dMap.WriteJSON(*flJSONOut); err != nil { 337 | fmt.Fprintf(os.Stderr, "failed to write JSON output: %v\n", err) 338 | os.Exit(1) 339 | } 340 | os.Exit(0) 341 | } 342 | 343 | // Zero value for moveKeep means don't remove or relink anything. 344 | if keepCount > 0 { 345 | if *flLinkMode { 346 | linkedPaths, linkErr := dMap.LinkDuplicates(keepCount) 347 | fmt.Printf("Converted %d duplicate files to symlinks, kept %d real file(s) per group.\n", len(linkedPaths), keepCount) 348 | if linkErr != nil { 349 | fmt.Fprintf(os.Stderr, "Linking completed with errors: %v\n", linkErr) 350 | os.Exit(1) 351 | } 352 | } else { 353 | removedPaths, removeErr := dMap.RemoveDuplicates(keepCount) 354 | fmt.Printf("Removed %d duplicate files, kept %d per group.\n", len(removedPaths), keepCount) 355 | if removeErr != nil { 356 | fmt.Fprintf(os.Stderr, "Removal completed with errors: %v\n", removeErr) 357 | os.Exit(1) 358 | } 359 | } 360 | os.Exit(0) 361 | } 362 | 363 | // For debugging to test speed 364 | if *flTimeOnly { 365 | os.Exit(0) 366 | } 367 | 368 | fmt.Println() 369 | // Dump results in various format. No interactive results are shown. These 370 | // options are better for scripting or grepping through. 371 | switch { 372 | case *flTextOutput: 373 | dMap.PrintDmap() 374 | os.Exit(0) 375 | case *flShowBullets: 376 | dMap.ShowResultsBullet() 377 | os.Exit(0) 378 | } 379 | 380 | // Show TUI interactive interface. 381 | ui.LaunchTUI(dMap) 382 | } 383 | 384 | // showHeader prints colorful dskDitto banner. 385 | func showHeader() { 386 | 387 | fmt.Println("") 388 | 389 | pterm.DefaultBigText.WithLetters( 390 | putils.LettersFromStringWithStyle("dsk", pterm.NewStyle(pterm.FgLightGreen)), 391 | putils.LettersFromStringWithStyle("Ditto", pterm.NewStyle(pterm.FgLightWhite))). 392 | Render() 393 | } 394 | 395 | func showVersion() { 396 | fmt.Printf("Version: %s\n", ver) 397 | fmt.Printf("Github: https://github.com/jdefrancesco/dskDitto") 398 | } 399 | -------------------------------------------------------------------------------- /internal/bench/bench_test.go: -------------------------------------------------------------------------------- 1 | // This package contains benchmark related logic/tests. 2 | package bench 3 | 4 | import ( 5 | "bytes" 6 | "context" 7 | "fmt" 8 | "os" 9 | "path/filepath" 10 | "runtime" 11 | "sync" 12 | "testing" 13 | 14 | "github.com/jdefrancesco/dskDitto/internal/config" 15 | "github.com/jdefrancesco/dskDitto/internal/dfs" 16 | "github.com/jdefrancesco/dskDitto/internal/dmap" 17 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 18 | "github.com/jdefrancesco/dskDitto/internal/dwalk" 19 | ) 20 | 21 | var benchmarkInit sync.Once 22 | 23 | const defaultHashAlgorithm = dfs.HashSHA256 24 | 25 | // setupBenchmark ensures the logger is initialised exactly once for benchmarks. 26 | func setupBenchmark(tb testing.TB) { 27 | tb.Helper() 28 | benchmarkInit.Do(func() { 29 | dsklog.InitializeDlogger("/dev/null") 30 | }) 31 | } 32 | 33 | // BenchmarkNewDfile benchmarks overhead of creating a new Dfile. A Dfile 34 | // is the abstraction we use for files we crawl and analyze. 35 | func BenchmarkNewDfile(b *testing.B) { 36 | benchmarkNewDfile(b, defaultHashAlgorithm) 37 | } 38 | 39 | // BenchmarkNewDfileBLAKE3 measures Dfile creation when using the BLAKE3 digest. 40 | func BenchmarkNewDfileBLAKE3(b *testing.B) { 41 | benchmarkNewDfile(b, dfs.HashBLAKE3) 42 | } 43 | 44 | func benchmarkNewDfile(b *testing.B, algo dfs.HashAlgorithm) { 45 | setupBenchmark(b) 46 | 47 | dir := b.TempDir() 48 | path := makeSizedFile(b, dir, "benchfile.dat", 1<<20) // 1 MiB 49 | info, err := os.Stat(path) 50 | if err != nil { 51 | b.Fatal(err) 52 | } 53 | 54 | b.ResetTimer() 55 | for b.Loop() { 56 | if _, err := dfs.NewDfile(path, info.Size(), algo); err != nil { 57 | b.Fatalf("NewDfile failed: %v", err) 58 | } 59 | } 60 | } 61 | 62 | // BenchmarkHashFileSHA256 measures hashing throughput for SHA-256 across file sizes. 63 | func BenchmarkHashFileSHA256(b *testing.B) { 64 | benchmarkHashFile(b, dfs.HashSHA256) 65 | } 66 | 67 | // BenchmarkHashFileBLAKE3 measures hashing throughput for BLAKE3 across file sizes. 68 | func BenchmarkHashFileBLAKE3(b *testing.B) { 69 | benchmarkHashFile(b, dfs.HashBLAKE3) 70 | } 71 | 72 | func benchmarkHashFile(b *testing.B, algo dfs.HashAlgorithm) { 73 | setupBenchmark(b) 74 | 75 | tests := []struct { 76 | name string 77 | size int 78 | }{ 79 | {"4KiB", 4 * 1024}, 80 | {"64KiB", 64 * 1024}, 81 | {"1MiB", 1 << 20}, 82 | {"8MiB", 8 << 20}, 83 | } 84 | 85 | for _, tc := range tests { 86 | tc := tc 87 | b.Run(tc.name, func(b *testing.B) { 88 | dir := b.TempDir() 89 | path := makeSizedFile(b, dir, "hash.dat", tc.size) 90 | info, err := os.Stat(path) 91 | if err != nil { 92 | b.Fatal(err) 93 | } 94 | 95 | b.ResetTimer() 96 | for b.Loop() { 97 | if _, err := dfs.NewDfile(path, info.Size(), algo); err != nil { 98 | b.Fatalf("hash failed: %v", err) 99 | } 100 | } 101 | }) 102 | } 103 | } 104 | 105 | // BenchmarkDWalkRun exercises the directory walker under different tree shapes. 106 | func BenchmarkDWalkRun(b *testing.B) { 107 | setupBenchmark(b) 108 | 109 | scenarios := []struct { 110 | name string 111 | depth int 112 | breadth int 113 | filesPerDir int 114 | fileSize int 115 | }{ 116 | {"Shallow", 1, 4, 8, 4 * 1024}, 117 | {"Deep", 3, 2, 4, 2 * 1024}, 118 | {"LargeFiles", 2, 3, 3, 512 * 1024}, 119 | } 120 | 121 | for _, scenario := range scenarios { 122 | scenario := scenario 123 | b.Run(scenario.name, func(b *testing.B) { 124 | root := b.TempDir() 125 | paths := createDirectoryTree(b, root, scenario.depth, scenario.breadth, scenario.filesPerDir, scenario.fileSize) 126 | expected := len(paths) 127 | 128 | b.ResetTimer() 129 | for b.Loop() { 130 | dFiles := make(chan *dfs.Dfile, expected) 131 | walker := dwalk.NewDWalker( 132 | []string{root}, 133 | dFiles, 134 | config.Config{HashAlgorithm: defaultHashAlgorithm, SkipHidden: true, SkipVirtualFS: true, MaxDepth: -1}, 135 | ) 136 | ctx := context.Background() 137 | walker.Run(ctx) 138 | 139 | count := 0 140 | for range dFiles { 141 | count++ 142 | } 143 | 144 | if count != expected { 145 | b.Fatalf("unexpected file count: got %d want %d", count, expected) 146 | } 147 | } 148 | }) 149 | } 150 | } 151 | 152 | // BenchmarkMonitorLoop benchmarks the monitor loop that processes files and builds the duplicate map. 153 | func BenchmarkMonitorLoop(b *testing.B) { 154 | setupBenchmark(b) 155 | 156 | root := b.TempDir() 157 | paths := createDuplicateFiles(b, root, 128, 8*1024) 158 | infos := mustStatPaths(b, paths) 159 | prehashed := mustMakeDfiles(b, paths, infos, defaultHashAlgorithm) 160 | expected := uint(len(prehashed)) 161 | 162 | b.Run("Prehashed", func(b *testing.B) { 163 | b.ResetTimer() 164 | for b.Loop() { 165 | result := runMonitorLoop(b, prehashed, nil) 166 | if result != expected { 167 | b.Fatalf("unexpected file count: got %d want %d", result, expected) 168 | } 169 | } 170 | }) 171 | 172 | b.Run("WithHashing", func(b *testing.B) { 173 | b.ResetTimer() 174 | for b.Loop() { 175 | result := runMonitorLoop(b, nil, func() []*dfs.Dfile { 176 | files := make([]*dfs.Dfile, 0, len(paths)) 177 | for idx, path := range paths { 178 | df, err := dfs.NewDfile(path, infos[idx].Size(), defaultHashAlgorithm) 179 | if err != nil { 180 | b.Fatalf("NewDfile failed: %v", err) 181 | } 182 | files = append(files, df) 183 | } 184 | return files 185 | }) 186 | if result != expected { 187 | b.Fatalf("unexpected file count: got %d want %d", result, expected) 188 | } 189 | } 190 | }) 191 | 192 | b.Run("ConcurrentProducers", func(b *testing.B) { 193 | workers := runtime.GOMAXPROCS(0) 194 | b.ResetTimer() 195 | for b.Loop() { 196 | result := runMonitorLoopConcurrent(b, prehashed, workers) 197 | if result != expected { 198 | b.Fatalf("unexpected file count: got %d want %d", result, expected) 199 | } 200 | } 201 | }) 202 | } 203 | 204 | // BenchmarkMonitorLoopBLAKE3 mirrors BenchmarkMonitorLoop but exercises the pipeline using BLAKE3. 205 | func BenchmarkMonitorLoopBLAKE3(b *testing.B) { 206 | setupBenchmark(b) 207 | 208 | root := b.TempDir() 209 | paths := createDuplicateFiles(b, root, 128, 8*1024) 210 | infos := mustStatPaths(b, paths) 211 | prehashed := mustMakeDfiles(b, paths, infos, dfs.HashBLAKE3) 212 | expected := uint(len(prehashed)) 213 | 214 | b.Run("Prehashed", func(b *testing.B) { 215 | b.ResetTimer() 216 | for b.Loop() { 217 | result := runMonitorLoopWithAlgorithm(b, dfs.HashBLAKE3, prehashed, nil) 218 | if result != expected { 219 | b.Fatalf("unexpected file count: got %d want %d", result, expected) 220 | } 221 | } 222 | }) 223 | 224 | b.Run("WithHashing", func(b *testing.B) { 225 | b.ResetTimer() 226 | for b.Loop() { 227 | result := runMonitorLoopWithAlgorithm(b, dfs.HashBLAKE3, nil, func() []*dfs.Dfile { 228 | files := make([]*dfs.Dfile, 0, len(paths)) 229 | for idx, path := range paths { 230 | df, err := dfs.NewDfile(path, infos[idx].Size(), dfs.HashBLAKE3) 231 | if err != nil { 232 | b.Fatalf("NewDfile failed: %v", err) 233 | } 234 | files = append(files, df) 235 | } 236 | return files 237 | }) 238 | if result != expected { 239 | b.Fatalf("unexpected file count: got %d want %d", result, expected) 240 | } 241 | } 242 | }) 243 | 244 | b.Run("ConcurrentProducers", func(b *testing.B) { 245 | workers := runtime.GOMAXPROCS(0) 246 | b.ResetTimer() 247 | for b.Loop() { 248 | result := runMonitorLoopConcurrentWithAlgorithm(b, dfs.HashBLAKE3, prehashed, workers) 249 | if result != expected { 250 | b.Fatalf("unexpected file count: got %d want %d", result, expected) 251 | } 252 | } 253 | }) 254 | } 255 | 256 | // BenchmarkDmapOperations benchmarks core Dmap operations. 257 | func BenchmarkDmapOperations(b *testing.B) { 258 | setupBenchmark(b) 259 | 260 | root := b.TempDir() 261 | paths := createDuplicateFiles(b, root, 256, 4*1024) 262 | infos := mustStatPaths(b, paths) 263 | prehashed := mustMakeDfiles(b, paths, infos, defaultHashAlgorithm) 264 | 265 | b.ResetTimer() 266 | for b.Loop() { 267 | dMap, err := dmap.NewDmap(2) 268 | if err != nil { 269 | b.Fatal(err) 270 | } 271 | 272 | for _, df := range prehashed { 273 | dMap.Add(df) 274 | } 275 | 276 | _ = dMap.MapSize() 277 | _ = dMap.GetMap() 278 | } 279 | } 280 | 281 | // runMonitorLoop is a helper that runs the monitor loop using either a fixed slice 282 | // of dfiles or a factory function that returns a fresh slice. 283 | func runMonitorLoop(b *testing.B, cached []*dfs.Dfile, factory func() []*dfs.Dfile) uint { 284 | return runMonitorLoopWithAlgorithm(b, defaultHashAlgorithm, cached, factory) 285 | } 286 | 287 | func runMonitorLoopWithAlgorithm(b *testing.B, algo dfs.HashAlgorithm, cached []*dfs.Dfile, factory func() []*dfs.Dfile) uint { 288 | b.Helper() 289 | 290 | dMap, err := dmap.NewDmap(2) 291 | if err != nil { 292 | b.Fatal(err) 293 | } 294 | 295 | var files []*dfs.Dfile 296 | if cached != nil { 297 | files = cached 298 | } else { 299 | files = factory() 300 | } 301 | 302 | dFiles := make(chan *dfs.Dfile, len(files)) 303 | var wg sync.WaitGroup 304 | wg.Add(1) 305 | go func() { 306 | defer wg.Done() 307 | for df := range dFiles { 308 | dMap.Add(df) 309 | } 310 | }() 311 | 312 | for _, df := range files { 313 | dFiles <- df 314 | } 315 | close(dFiles) 316 | wg.Wait() 317 | 318 | return dMap.FileCount() 319 | } 320 | 321 | // runMonitorLoopConcurrent stresses the monitor loop with multiple producers feeding the channel. 322 | func runMonitorLoopConcurrent(b *testing.B, files []*dfs.Dfile, workers int) uint { 323 | return runMonitorLoopConcurrentWithAlgorithm(b, defaultHashAlgorithm, files, workers) 324 | } 325 | 326 | // runMonitorLoopConcurrentWithAlgorithm concurrently feeds the provided Dfiles into a shared dmap, using the given worker pool size, and returns 327 | // the total number of files recorded after all producers and the single consumer complete. 328 | func runMonitorLoopConcurrentWithAlgorithm(b *testing.B, algo dfs.HashAlgorithm, files []*dfs.Dfile, workers int) uint { 329 | b.Helper() 330 | 331 | dMap, err := dmap.NewDmap(2) 332 | if err != nil { 333 | b.Fatal(err) 334 | } 335 | 336 | if workers <= 0 { 337 | workers = 1 338 | } 339 | 340 | // Set channel to 1 so we uncover backpressure. 341 | dFiles := make(chan *dfs.Dfile, 1) 342 | var consumer sync.WaitGroup 343 | consumer.Add(1) 344 | go func() { 345 | defer consumer.Done() 346 | for df := range dFiles { 347 | dMap.Add(df) 348 | } 349 | }() 350 | 351 | chunk := chunkSize(len(files), workers) 352 | var producers sync.WaitGroup 353 | for i := 0; i < workers; i++ { 354 | start := i * chunk 355 | if start >= len(files) { 356 | break 357 | } 358 | end := start + chunk 359 | if end > len(files) { 360 | end = len(files) 361 | } 362 | slice := files[start:end] 363 | producers.Add(1) 364 | go func(batch []*dfs.Dfile) { 365 | defer producers.Done() 366 | for _, df := range batch { 367 | dFiles <- df 368 | } 369 | }(slice) 370 | } 371 | 372 | producers.Wait() 373 | close(dFiles) 374 | consumer.Wait() 375 | 376 | return dMap.FileCount() 377 | } 378 | 379 | // createDirectoryTree builds a directory tree with predictable fanout. 380 | func createDirectoryTree(tb testing.TB, root string, depth, breadth, filesPerDir, fileSize int) []string { 381 | tb.Helper() 382 | 383 | var paths []string 384 | var build func(level int, dir string) 385 | build = func(level int, dir string) { 386 | if err := os.MkdirAll(dir, 0o755); err != nil { 387 | tb.Fatalf("mkdir %s: %v", dir, err) 388 | } 389 | 390 | for i := 0; i < filesPerDir; i++ { 391 | name := fmt.Sprintf("file_%d_%d.dat", level, i) 392 | paths = append(paths, makeSizedFile(tb, dir, name, fileSize)) 393 | } 394 | 395 | if level >= depth { 396 | return 397 | } 398 | 399 | for i := 0; i < breadth; i++ { 400 | sub := filepath.Join(dir, fmt.Sprintf("dir_%d_%d", level+1, i)) 401 | build(level+1, sub) 402 | } 403 | } 404 | 405 | build(0, root) 406 | return paths 407 | } 408 | 409 | // createDuplicateFiles creates matching pairs of files so that hashing and monitor loop benchmarks see duplicates. 410 | func createDuplicateFiles(tb testing.TB, dir string, duplicates, size int) []string { 411 | tb.Helper() 412 | 413 | if err := os.MkdirAll(dir, 0o755); err != nil { 414 | tb.Fatalf("mkdir %s: %v", dir, err) 415 | } 416 | 417 | var paths []string 418 | for i := range duplicates { 419 | content := bytes.Repeat([]byte{byte(i % 251)}, size) 420 | left := filepath.Join(dir, fmt.Sprintf("dupA_%03d.dat", i)) 421 | right := filepath.Join(dir, fmt.Sprintf("dupB_%03d.dat", i)) 422 | 423 | if err := os.WriteFile(left, content, 0o644); err != nil { 424 | tb.Fatalf("write %s: %v", left, err) 425 | } 426 | if err := os.WriteFile(right, content, 0o644); err != nil { 427 | tb.Fatalf("write %s: %v", right, err) 428 | } 429 | 430 | paths = append(paths, left, right) 431 | } 432 | 433 | return paths 434 | } 435 | 436 | // makeSizedFile writes a file of the requested size using deterministic content. 437 | func makeSizedFile(tb testing.TB, dir, name string, size int) string { 438 | tb.Helper() 439 | 440 | if err := os.MkdirAll(dir, 0o755); err != nil { 441 | tb.Fatalf("mkdir %s: %v", dir, err) 442 | } 443 | 444 | path := filepath.Join(dir, name) 445 | file, err := os.Create(path) 446 | if err != nil { 447 | tb.Fatalf("create %s: %v", path, err) 448 | } 449 | defer file.Close() 450 | 451 | if size == 0 { 452 | return path 453 | } 454 | 455 | const chunkSize = 32 * 1024 456 | chunk := bytes.Repeat([]byte{0xA5}, min(size, chunkSize)) 457 | remaining := size 458 | for remaining > 0 { 459 | writeLen := min(remaining, chunkSize) 460 | if _, err := file.Write(chunk[:writeLen]); err != nil { 461 | tb.Fatalf("write %s: %v", path, err) 462 | } 463 | remaining -= writeLen 464 | } 465 | 466 | return path 467 | } 468 | 469 | // mustStatPaths retrieves os.FileInfo for each path. 470 | func mustStatPaths(tb testing.TB, paths []string) []os.FileInfo { 471 | tb.Helper() 472 | 473 | infos := make([]os.FileInfo, len(paths)) 474 | for i, path := range paths { 475 | info, err := os.Stat(path) 476 | if err != nil { 477 | tb.Fatalf("stat %s: %v", path, err) 478 | } 479 | infos[i] = info 480 | } 481 | return infos 482 | } 483 | 484 | // mustMakeDfiles precomputes dfs.Dfile instances for the supplied paths. 485 | func mustMakeDfiles(tb testing.TB, paths []string, infos []os.FileInfo, algo dfs.HashAlgorithm) []*dfs.Dfile { 486 | tb.Helper() 487 | 488 | files := make([]*dfs.Dfile, 0, len(paths)) 489 | for i, path := range paths { 490 | df, err := dfs.NewDfile(path, infos[i].Size(), algo) 491 | if err != nil { 492 | tb.Fatalf("NewDfile(%s) failed: %v", path, err) 493 | } 494 | files = append(files, df) 495 | } 496 | return files 497 | } 498 | 499 | // chunkSize calculates work distribution for concurrent producers. 500 | func chunkSize(total, workers int) int { 501 | if workers <= 0 { 502 | return total 503 | } 504 | size := total / workers 505 | if size*workers < total { 506 | size++ 507 | } 508 | if size == 0 { 509 | size = 1 510 | } 511 | return size 512 | } 513 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | atomicgo.dev/assert v0.0.2 h1:FiKeMiZSgRrZsPo9qn/7vmr7mCsh5SZyXY4YGYiYwrg= 2 | atomicgo.dev/assert v0.0.2/go.mod h1:ut4NcI3QDdJtlmAxQULOmA13Gz6e2DWbSAS8RUOmNYQ= 3 | atomicgo.dev/cursor v0.2.0 h1:H6XN5alUJ52FZZUkI7AlJbUc1aW38GWZalpYRPpoPOw= 4 | atomicgo.dev/cursor v0.2.0/go.mod h1:Lr4ZJB3U7DfPPOkbH7/6TOtJ4vFGHlgj1nc+n900IpU= 5 | atomicgo.dev/keyboard v0.2.9 h1:tOsIid3nlPLZ3lwgG8KZMp/SFmr7P0ssEN5JUsm78K8= 6 | atomicgo.dev/keyboard v0.2.9/go.mod h1:BC4w9g00XkxH/f1HXhW2sXmJFOCWbKn9xrOunSFtExQ= 7 | atomicgo.dev/schedule v0.1.0 h1:nTthAbhZS5YZmgYbb2+DH8uQIZcTlIrd4eYr3UQxEjs= 8 | atomicgo.dev/schedule v0.1.0/go.mod h1:xeUa3oAkiuHYh8bKiQBRojqAMq3PXXbJujjb0hw8pEU= 9 | github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs= 10 | github.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8= 11 | github.com/MarvinJWendt/testza v0.2.8/go.mod h1:nwIcjmr0Zz+Rcwfh3/4UhBp7ePKVhuBExvZqnKYWlII= 12 | github.com/MarvinJWendt/testza v0.2.10/go.mod h1:pd+VWsoGUiFtq+hRKSU1Bktnn+DMCSrDrXDpX2bG66k= 13 | github.com/MarvinJWendt/testza v0.2.12/go.mod h1:JOIegYyV7rX+7VZ9r77L/eH6CfJHHzXjB69adAhzZkI= 14 | github.com/MarvinJWendt/testza v0.3.0/go.mod h1:eFcL4I0idjtIx8P9C6KkAuLgATNKpX4/2oUqKc6bF2c= 15 | github.com/MarvinJWendt/testza v0.4.2/go.mod h1:mSdhXiKH8sg/gQehJ63bINcCKp7RtYewEjXsvsVUPbE= 16 | github.com/MarvinJWendt/testza v0.5.2 h1:53KDo64C1z/h/d/stCYCPY69bt/OSwjq5KpFNwi+zB4= 17 | github.com/MarvinJWendt/testza v0.5.2/go.mod h1:xu53QFE5sCdjtMCKk8YMQ2MnymimEctc4n3EjyIYvEY= 18 | github.com/Masterminds/semver/v3 v3.4.0 h1:Zog+i5UMtVoCU8oKka5P7i9q9HgrJeGzI9SA1Xbatp0= 19 | github.com/Masterminds/semver/v3 v3.4.0/go.mod h1:4V+yj/TJE1HU9XfppCwVMZq3I84lprf4nC11bSS5beM= 20 | github.com/atomicgo/cursor v0.0.1/go.mod h1:cBON2QmmrysudxNBFthvMtN32r3jxVRIvzkUiF/RuIk= 21 | github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= 22 | github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= 23 | github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= 24 | github.com/charmbracelet/bubbletea v1.3.10/go.mod h1:ORQfo0fk8U+po9VaNvnV95UPWA1BitP1E0N6xJPlHr4= 25 | github.com/charmbracelet/colorprofile v0.3.3 h1:DjJzJtLP6/NZ8p7Cgjno0CKGr7wwRJGxWUwh2IyhfAI= 26 | github.com/charmbracelet/colorprofile v0.3.3/go.mod h1:nB1FugsAbzq284eJcjfah2nhdSLppN2NqvfotkfRYP4= 27 | github.com/charmbracelet/lipgloss v1.1.0 h1:vYXsiLHVkK7fp74RkV7b2kq9+zDLoEU4MZoFqR/noCY= 28 | github.com/charmbracelet/lipgloss v1.1.0/go.mod h1:/6Q8FR2o+kj8rz4Dq0zQc3vYf7X+B0binUUBwA0aL30= 29 | github.com/charmbracelet/x/ansi v0.11.2 h1:XAG3FSjiVtFvgEgGrNBkCNNYrsucAt8c6bfxHyROLLs= 30 | github.com/charmbracelet/x/ansi v0.11.2/go.mod h1:9tY2bzX5SiJCU0iWyskjBeI2BRQfvPqI+J760Mjf+Rg= 31 | github.com/charmbracelet/x/cellbuf v0.0.14 h1:iUEMryGyFTelKW3THW4+FfPgi4fkmKnnaLOXuc+/Kj4= 32 | github.com/charmbracelet/x/cellbuf v0.0.14/go.mod h1:P447lJl49ywBbil/KjCk2HexGh4tEY9LH0/1QrZZ9rA= 33 | github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk= 34 | github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI= 35 | github.com/clipperhouse/displaywidth v0.6.1 h1:/zMlAezfDzT2xy6acHBzwIfyu2ic0hgkT83UX5EY2gY= 36 | github.com/clipperhouse/displaywidth v0.6.1/go.mod h1:R+kHuzaYWFkTm7xoMmK1lFydbci4X2CicfbGstSGg0o= 37 | github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs= 38 | github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA= 39 | github.com/clipperhouse/uax29/v2 v2.3.0 h1:SNdx9DVUqMoBuBoW3iLOj4FQv3dN5mDtuqwuhIGpJy4= 40 | github.com/clipperhouse/uax29/v2 v2.3.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= 41 | github.com/cloudfoundry/gosigar v1.3.112 h1:cGGZ2sj1GKyiwSxzouIR7ATNbgAkC4zqwWDxYQ2ObPc= 42 | github.com/cloudfoundry/gosigar v1.3.112/go.mod h1:Ldc+tVw3dfqPwasZ9om1LT2aRwpjC1eFfbWKfv2WbDI= 43 | github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U= 44 | github.com/containerd/console v1.0.5 h1:R0ymNeydRqH2DmakFNdmjR2k0t7UPuiOV/N/27/qqsc= 45 | github.com/containerd/console v1.0.5/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= 46 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 47 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 48 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 49 | github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= 50 | github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f/go.mod h1:vw97MGsxSvLiUE2X8qFplwetxpGLQrlU1Q9AUEIzCaM= 51 | github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= 52 | github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= 53 | github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= 54 | github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= 55 | github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= 56 | github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= 57 | github.com/google/pprof v0.0.0-20251208000136-3d256cb9ff16 h1:ptucaU8cwiAc+/jqDblz0kb1ECLqPTeX/qQym8OBYzY= 58 | github.com/google/pprof v0.0.0-20251208000136-3d256cb9ff16/go.mod h1:67FPmZWbr+KDT/VlpWtw6sO9XSjpJmLuHpoLmWiTGgY= 59 | github.com/gookit/assert v0.1.1 h1:lh3GcawXe/p+cU7ESTZ5Ui3Sm/x8JWpIis4/1aF0mY0= 60 | github.com/gookit/assert v0.1.1/go.mod h1:jS5bmIVQZTIwk42uXl4lyj4iaaxx32tqH16CFj0VX2E= 61 | github.com/gookit/color v1.4.2/go.mod h1:fqRyamkC1W8uxl+lxCQxOT09l/vYfZ+QeiX3rKQHCoQ= 62 | github.com/gookit/color v1.5.0/go.mod h1:43aQb+Zerm/BWh2GnrgOQm7ffz7tvQXEKV6BFMl7wAo= 63 | github.com/gookit/color v1.6.0 h1:JjJXBTk1ETNyqyilJhkTXJYYigHG24TM9Xa2M1xAhRA= 64 | github.com/gookit/color v1.6.0/go.mod h1:9ACFc7/1IpHGBW8RwuDm/0YEnhg3dwwXpoMsmtyHfjs= 65 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 66 | github.com/klauspost/cpuid/v2 v2.0.10/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= 67 | github.com/klauspost/cpuid/v2 v2.0.12/go.mod h1:g2LTdtYhdyuGPqyWyv7qRAmj1WBqxuObKfj5c0PQa7c= 68 | github.com/klauspost/cpuid/v2 v2.3.0 h1:S4CRMLnYUhGeDFDqkGriYKdfoFlDnMtqTiI/sFzhA9Y= 69 | github.com/klauspost/cpuid/v2 v2.3.0/go.mod h1:hqwkgyIinND0mEev00jJYCxPNVRVXFQeu1XKlok6oO0= 70 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 71 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 72 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 73 | github.com/lithammer/fuzzysearch v1.1.8 h1:/HIuJnjHuXS8bKaiTMeeDlW2/AyIWk2brx1V8LFgLN4= 74 | github.com/lithammer/fuzzysearch v1.1.8/go.mod h1:IdqeyBClc3FFqSzYq/MXESsS4S0FsZ5ajtkr5xPLts4= 75 | github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag= 76 | github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= 77 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 78 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 79 | github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= 80 | github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+EiG4R1k4Cjx5p88= 81 | github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 82 | github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= 83 | github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= 84 | github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6 h1:ZK8zHtRHOkbHy6Mmr5D264iyp3TiX5OmNcI5cIARiQI= 85 | github.com/muesli/ansi v0.0.0-20230316100256-276c6243b2f6/go.mod h1:CJlz5H+gyd6CUWT45Oy4q24RdLyn7Md9Vj2/ldJBSIo= 86 | github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= 87 | github.com/muesli/cancelreader v0.2.2/go.mod h1:3XuTXfFS2VjM+HTLZY9Ak0l6eUKfijIfMUZ4EgX0QYo= 88 | github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= 89 | github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= 90 | github.com/onsi/ginkgo/v2 v2.27.3 h1:ICsZJ8JoYafeXFFlFAG75a7CxMsJHwgKwtO+82SE9L8= 91 | github.com/onsi/ginkgo/v2 v2.27.3/go.mod h1:ArE1D/XhNXBXCBkKOLkbsb2c81dQHCRcF5zwn/ykDRo= 92 | github.com/onsi/gomega v1.38.3 h1:eTX+W6dobAYfFeGC2PV6RwXRu/MyT+cQguijutvkpSM= 93 | github.com/onsi/gomega v1.38.3/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= 94 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 95 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 96 | github.com/pterm/pterm v0.12.27/go.mod h1:PhQ89w4i95rhgE+xedAoqous6K9X+r6aSOI2eFF7DZI= 97 | github.com/pterm/pterm v0.12.29/go.mod h1:WI3qxgvoQFFGKGjGnJR849gU0TsEOvKn5Q8LlY1U7lg= 98 | github.com/pterm/pterm v0.12.30/go.mod h1:MOqLIyMOgmTDz9yorcYbcw+HsgoZo3BQfg2wtl3HEFE= 99 | github.com/pterm/pterm v0.12.31/go.mod h1:32ZAWZVXD7ZfG0s8qqHXePte42kdz8ECtRyEejaWgXU= 100 | github.com/pterm/pterm v0.12.33/go.mod h1:x+h2uL+n7CP/rel9+bImHD5lF3nM9vJj80k9ybiiTTE= 101 | github.com/pterm/pterm v0.12.36/go.mod h1:NjiL09hFhT/vWjQHSj1athJpx6H8cjpHXNAK5bUw8T8= 102 | github.com/pterm/pterm v0.12.40/go.mod h1:ffwPLwlbXxP+rxT0GsgDTzS3y3rmpAO1NMjUkGTYf8s= 103 | github.com/pterm/pterm v0.12.82 h1:+D9wYhCaeaK0FIQoZtqbNQuNpe2lB2tajKKsTd5paVQ= 104 | github.com/pterm/pterm v0.12.82/go.mod h1:TyuyrPjnxfwP+ccJdBTeWHtd/e0ybQHkOS/TakajZCw= 105 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 106 | github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= 107 | github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= 108 | github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= 109 | github.com/sergi/go-diff v1.4.0 h1:n/SP9D5ad1fORl+llWyN+D6qoUETXNZARKjyY2/KVCw= 110 | github.com/sergi/go-diff v1.4.0/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= 111 | github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= 112 | github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= 113 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 114 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 115 | github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 116 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 117 | github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= 118 | github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= 119 | github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs= 120 | github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= 121 | github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= 122 | github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= 123 | go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= 124 | go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= 125 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 126 | golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= 127 | golang.org/x/exp v0.0.0-20251125195548-87e1e737ad39 h1:DHNhtq3sNNzrvduZZIiFyXWOL9IWaDPHqTnLJp+rCBY= 128 | golang.org/x/exp v0.0.0-20251125195548-87e1e737ad39/go.mod h1:46edojNIoXTNOhySWIWdix628clX9ODXwPsQuG6hsK0= 129 | golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= 130 | golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= 131 | golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= 132 | golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= 133 | golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 134 | golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= 135 | golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= 136 | golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= 137 | golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= 138 | golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= 139 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 140 | golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 141 | golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 142 | golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= 143 | golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= 144 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 145 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 146 | golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 147 | golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 148 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 149 | golang.org/x/sys v0.0.0-20210809222454-d867a43fc93e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 150 | golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 151 | golang.org/x/sys v0.0.0-20220319134239-a9b59b0215f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 152 | golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 153 | golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 154 | golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 155 | golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 156 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 157 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 158 | golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= 159 | golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= 160 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 161 | golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 162 | golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 163 | golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= 164 | golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= 165 | golang.org/x/term v0.38.0 h1:PQ5pkm/rLO6HnxFR7N2lJHOZX6Kez5Y1gDSJla6jo7Q= 166 | golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= 167 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 168 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 169 | golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= 170 | golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= 171 | golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= 172 | golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= 173 | golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= 174 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 175 | golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= 176 | golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= 177 | golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= 178 | golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= 179 | golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= 180 | golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 181 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 182 | gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 183 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 184 | gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 185 | gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= 186 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 187 | gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 188 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 189 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 190 | lukechampine.com/blake3 v1.4.1 h1:I3Smz7gso8w4/TunLKec6K2fn+kyKtDxr/xcQEN84Wg= 191 | lukechampine.com/blake3 v1.4.1/go.mod h1:QFosUxmjB8mnrWFSNwKmvxHpfY72bmD2tQ0kBMM3kwo= 192 | -------------------------------------------------------------------------------- /internal/ui/ui.go: -------------------------------------------------------------------------------- 1 | package ui 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | "sync" 10 | "time" 11 | "unicode" 12 | 13 | "github.com/jdefrancesco/dskDitto/internal/dfs" 14 | "github.com/jdefrancesco/dskDitto/internal/dmap" 15 | "github.com/jdefrancesco/dskDitto/internal/dsklog" 16 | 17 | "github.com/jdefrancesco/dskDitto/pkg/utils" 18 | 19 | tea "github.com/charmbracelet/bubbletea" 20 | "github.com/charmbracelet/lipgloss" 21 | runewidth "github.com/mattn/go-runewidth" 22 | ) 23 | 24 | // LaunchTUI builds and runs the Bubble Tea program that visualizes duplicate files. 25 | func LaunchTUI(dMap *dmap.Dmap) { 26 | if dMap == nil { 27 | dsklog.Dlogger.Warn("nil duplicate map supplied to LaunchTUI") 28 | return 29 | } 30 | 31 | program := tea.NewProgram(newModel(dMap), tea.WithAltScreen(), tea.WithMouseCellMotion()) 32 | setCurrentProgram(program) 33 | defer clearCurrentProgram(program) 34 | 35 | if _, err := program.Run(); err != nil { 36 | panic(err) 37 | } 38 | } 39 | 40 | // StopTUI signals the currently running Bubble Tea program (if any) to quit. 41 | func StopTUI() { 42 | programMu.Lock() 43 | defer programMu.Unlock() 44 | if currentProgram != nil { 45 | currentProgram.Quit() 46 | } 47 | } 48 | 49 | var ( 50 | programMu sync.Mutex 51 | currentProgram *tea.Program 52 | ) 53 | 54 | // Color scheme to make things look good! 55 | var ( 56 | titleStyle = lipgloss.NewStyle(). 57 | Foreground(lipgloss.Color("#A6E22E")). 58 | Bold(true). 59 | PaddingBottom(0) 60 | 61 | dividerStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#3F3F46")) 62 | cursorActiveStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#FF5555")).Bold(true) 63 | cursorInactiveStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#5C6370")) 64 | groupStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#FFD866")).Bold(false) 65 | groupCollapsedStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#FFD866")) 66 | fileStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#E5E5E5")) 67 | // selectedLineStyle = lipgloss.NewStyle().Background(lipgloss.Color("#1F2937")) 68 | markedStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#50FA7B")).Bold(true) 69 | unmarkedStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#4B5563")) 70 | statusDeletedStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#50FA7B")).Bold(true) 71 | statusErrorStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#FF5555")).Bold(true) 72 | statusInfoStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#7F848E")) 73 | footerStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#9CA3AF")) 74 | resultStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#FFB86C")) 75 | emptyStateStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#6C7086")).Italic(true) 76 | 77 | confirmPanelStyle = lipgloss.NewStyle(). 78 | Border(lipgloss.RoundedBorder()). 79 | BorderForeground(lipgloss.Color("#86fb71ff")). 80 | Padding(1, 2) 81 | 82 | confirmCodeStyle = lipgloss.NewStyle().Bold(true).Foreground(lipgloss.Color("#FBBF24")) 83 | confirmInputStyle = lipgloss.NewStyle().Bold(true).Foreground(lipgloss.Color("#E2E8F0")) 84 | errorTextStyle = lipgloss.NewStyle().Foreground(lipgloss.Color("#F87171")).Bold(true) 85 | ) 86 | 87 | func setCurrentProgram(p *tea.Program) { 88 | programMu.Lock() 89 | defer programMu.Unlock() 90 | currentProgram = p 91 | } 92 | 93 | func clearCurrentProgram(p *tea.Program) { 94 | programMu.Lock() 95 | defer programMu.Unlock() 96 | if currentProgram == p { 97 | currentProgram = nil 98 | } 99 | } 100 | 101 | type viewMode int 102 | 103 | const ( 104 | modeTree viewMode = iota 105 | modeConfirm 106 | ) 107 | 108 | type nodeType int 109 | 110 | const ( 111 | nodeGroup nodeType = iota 112 | nodeFile 113 | ) 114 | 115 | type fileStatus int 116 | 117 | const ( 118 | fileStatusPending fileStatus = iota 119 | fileStatusDeleted 120 | fileStatusError 121 | ) 122 | 123 | // fileEntry represents a file tracked by the UI, capturing its path, marked state, 124 | // status, and any associated message. 125 | type fileEntry struct { 126 | Path string 127 | Marked bool 128 | Status fileStatus 129 | Message string 130 | } 131 | 132 | // These are batches of file dups 133 | type duplicateGroup struct { 134 | Hash dmap.Digest 135 | Title string 136 | Files []*fileEntry 137 | Expanded bool 138 | } 139 | 140 | type nodeRef struct { 141 | // typ tracks the classification of the current node within the UI layer. 142 | typ nodeType 143 | group int 144 | file int 145 | } 146 | 147 | // model struct for Bubble Tea. This core structure 148 | // holds bulk of what is needed to render TUI. 149 | // See Bubble Tea github page for tutorial. 150 | type model struct { 151 | groups []*duplicateGroup 152 | visible []nodeRef 153 | cursor int 154 | scroll int 155 | minDuplicates uint 156 | 157 | // double-click tracking 158 | lastClickIdx int 159 | lastClickAt time.Time 160 | 161 | mode viewMode 162 | 163 | confirmCode string 164 | confirmInput string 165 | confirmError string 166 | 167 | deleteResult string 168 | 169 | width int 170 | height int 171 | } 172 | 173 | var _ tea.Model = (*model)(nil) 174 | 175 | // newModel constructs a model initialized with duplicate groups derived from the provided dmap, 176 | // filtering out groups below the minimum duplicate threshold, preparing file entries, and 177 | // rebuilding the visible UI nodes before returning the result. 178 | func newModel(dMap *dmap.Dmap) *model { 179 | m := &model{ 180 | mode: modeTree, 181 | minDuplicates: dMap.MinDuplicates(), 182 | } 183 | 184 | for hash, files := range dMap.GetMap() { 185 | if uint(len(files)) < m.minDuplicates { 186 | continue 187 | } 188 | 189 | group := &duplicateGroup{ 190 | Hash: hash, 191 | Title: formatGroupTitle(hash, files), 192 | Expanded: true, 193 | } 194 | 195 | for _, file := range files { 196 | group.Files = append(group.Files, &fileEntry{Path: file}) 197 | } 198 | 199 | autoMarkGroup(group) 200 | m.groups = append(m.groups, group) 201 | } 202 | 203 | m.rebuildVisibleNodes() 204 | return m 205 | } 206 | 207 | func (m *model) Init() tea.Cmd { 208 | return nil 209 | } 210 | 211 | // Update our Bubble Tea view. Handle key presses and mouse 212 | // activity as well. 213 | func (m *model) Update(msg tea.Msg) (tea.Model, tea.Cmd) { 214 | 215 | switch msg := msg.(type) { 216 | 217 | case tea.KeyMsg: 218 | if m.mode == modeConfirm { 219 | return m.handleConfirmKeys(msg) 220 | } 221 | return m.handleTreeKeys(msg) 222 | 223 | case tea.MouseMsg: 224 | return m.handleMouse(msg) 225 | 226 | case tea.WindowSizeMsg: 227 | m.width = msg.Width 228 | m.height = msg.Height 229 | m.adjustScroll() 230 | } 231 | 232 | return m, nil 233 | } 234 | 235 | // View is a primary method used by Bubble Tea. Similiar to MVC. 236 | func (m *model) View() string { 237 | // Modal window that pops up when user triggers file removal 238 | // process. 239 | if m.mode == modeConfirm { 240 | return m.renderConfirmView() 241 | } 242 | 243 | // Interactive tree view for viewing results and selecting course of action. 244 | return m.renderTreeView() 245 | } 246 | 247 | // handleTreeKeys allows user to navigate the TUI. 248 | // It wraps the finer detail handling so we can keep our Update 249 | // method clean. We provide some vim key binding too! 250 | func (m *model) handleTreeKeys(msg tea.KeyMsg) (tea.Model, tea.Cmd) { 251 | 252 | // Prefer string-based matching for common keys. 253 | switch msg.String() { 254 | 255 | // Quit application 256 | case "ctrl+c", "esc", "q": 257 | return m, tea.Quit 258 | 259 | // Move cursor up with arrow and vim like binding 260 | case "up", "k": 261 | m.moveCursor(-1) 262 | 263 | case "down", "j": 264 | m.moveCursor(1) 265 | 266 | case "left", "h": 267 | m.collapseCurrentGroup() 268 | 269 | case "right", "l": 270 | m.expandCurrentGroup() 271 | 272 | case "pgup": 273 | m.pageMove(-1) 274 | 275 | case "pgdown", "pgdn": 276 | m.pageMove(1) 277 | 278 | case "ctrl+u": 279 | m.halfPageMove(-1) 280 | 281 | case "ctrl+d": 282 | m.halfPageMove(1) 283 | 284 | case "a", "A", "ctrl+a": 285 | m.markAllFiles() 286 | 287 | case "u", "U": 288 | m.unmarkAllFiles() 289 | 290 | case "enter": 291 | m.toggleCurrentGroup() 292 | 293 | // m and space bar will mark a file. 294 | case "m", " ": 295 | m.toggleCurrentFileMark() 296 | 297 | case "d": 298 | m.startConfirmationPrompt() 299 | } 300 | 301 | // Also catch PageUp/PageDown by key type for wider terminal support. 302 | switch msg.Type { 303 | 304 | case tea.KeyPgUp: 305 | m.pageMove(-1) 306 | 307 | case tea.KeyPgDown: 308 | m.pageMove(1) 309 | } 310 | 311 | return m, nil 312 | } 313 | 314 | // handleMouse supports scroll wheel and selecting a row by clicking. 315 | // TODO: Refactor and remove any deprecated Bubble Tea types. 316 | func (m *model) handleMouse(msg tea.MouseMsg) (tea.Model, tea.Cmd) { 317 | 318 | if m.mode != modeTree { 319 | return m, nil 320 | } 321 | 322 | // Refactor later as some of these methods seem to be deprecated. 323 | switch msg.Action { 324 | case tea.MouseActionPress: 325 | switch msg.Button { 326 | case tea.MouseButtonWheelUp: 327 | // Scroll up a few lines per tick. 328 | m.moveCursor(-3) 329 | return m, nil 330 | case tea.MouseButtonWheelDown: 331 | m.moveCursor(3) 332 | return m, nil 333 | case tea.MouseButtonLeft: 334 | // Map Y position to list row. 335 | row := msg.Y - m.listTopOffset() 336 | if row >= 0 && row < m.listAreaHeight() { 337 | idx := m.scroll + row 338 | if idx >= 0 && idx < len(m.visible) { 339 | // Detect double-click on the same row within a short threshold 340 | const dbl = 350 * time.Millisecond 341 | now := time.Now() 342 | if idx == m.lastClickIdx && now.Sub(m.lastClickAt) <= dbl { 343 | // Double-click: toggle group if clicking on a group header 344 | ref := m.visible[idx] 345 | if ref.typ == nodeGroup { 346 | // Keep cursor on the group and toggle expansion 347 | m.cursor = idx 348 | m.toggleCurrentGroup() 349 | } 350 | // Reset to avoid repeated toggles on subsequent events 351 | m.lastClickIdx = -1 352 | m.lastClickAt = time.Time{} 353 | } else { 354 | // Single click: move cursor and record for potential double-click 355 | m.cursor = idx 356 | m.adjustScroll() 357 | m.lastClickIdx = idx 358 | m.lastClickAt = now 359 | } 360 | } 361 | } 362 | } 363 | } 364 | 365 | return m, nil 366 | } 367 | 368 | // handleConfirmKeys ensures the user doesn't shoot themselves in the foot. The files will 369 | // be removed only if they type the code correctly. 370 | func (m *model) handleConfirmKeys(msg tea.KeyMsg) (tea.Model, tea.Cmd) { 371 | 372 | switch msg.Type { 373 | 374 | case tea.KeyEsc: 375 | m.mode = modeTree 376 | m.confirmError = "" 377 | m.confirmInput = "" 378 | 379 | case tea.KeyEnter: 380 | if m.confirmInput == m.confirmCode { 381 | m.processDeletion() 382 | } else { 383 | m.confirmError = "Incorrect code. Try again." 384 | m.confirmInput = "" 385 | } 386 | 387 | case tea.KeyBackspace: 388 | if len(m.confirmInput) > 0 { 389 | m.confirmInput = m.confirmInput[:len(m.confirmInput)-1] 390 | } 391 | 392 | case tea.KeyRunes: 393 | if len(m.confirmInput) >= len(m.confirmCode) { 394 | return m, nil 395 | } 396 | for _, r := range msg.Runes { 397 | if isAlphaNumeric(r) { 398 | m.confirmInput += string(r) 399 | } 400 | } 401 | } 402 | 403 | return m, nil 404 | } 405 | 406 | // renderTreeView provides View with primary tree based TUI the user interacts with. 407 | func (m *model) renderTreeView() string { 408 | 409 | width := m.effectiveWidth() 410 | divider := dividerStyle.Render(strings.Repeat("─", width)) 411 | 412 | var sections []string 413 | 414 | // Title at top of view 415 | title := "dskDitto • Interactive Results" 416 | sections = append(sections, 417 | titleStyle.Width(width).Render(runewidth.Truncate(title, width, "…"))) 418 | sections = append(sections, divider) 419 | 420 | if len(m.visible) == 0 { 421 | sections = append(sections, emptyStateStyle.Render("No duplicate groups found. Press q to exit.")) 422 | } else { 423 | // Render only the portion of the list that fits in the viewport. 424 | contentH := max(m.listAreaHeight(), 1) 425 | start := max(m.scroll, 0) 426 | end := min(start+contentH, len(m.visible)) 427 | for i := start; i < end; i++ { 428 | ref := m.visible[i] 429 | sections = append(sections, m.renderNodeLine(ref, i == m.cursor)) 430 | } 431 | } 432 | 433 | sections = append(sections, divider) 434 | countStr := fmt.Sprintf("%d", m.countMarked()) 435 | markedLabel := "marked files: " 436 | countStyled := lipgloss.NewStyle().Foreground(lipgloss.Color("#50FA7B")).Render(countStr) 437 | sections = append(sections, 438 | footerStyle.Render(markedLabel)+countStyled, 439 | ) 440 | if m.deleteResult != "" { 441 | sections = append(sections, resultStyle.Render(m.deleteResult)) 442 | } 443 | // Navigation instructions. 444 | sections = append(sections, footerStyle.Render("enter expand/fold • arrows/j/k nav. list • m toggle selection • a select all • u clear selection • d delete marked • esc/q exit")) 445 | 446 | return strings.Join(sections, "\n") 447 | } 448 | 449 | // renderConfirmView is our modal box that prevents the user from "shooting themelves in the foot" 450 | // In order to delete files they have selected, they must first enter small code. Dunno how far or useful 451 | // this type of thing really is but it satisfies my OCD for time being. 452 | func (m *model) renderConfirmView() string { 453 | width := m.effectiveWidth() 454 | content := []string{ 455 | titleStyle.Render("Confirm Deletion"), 456 | statusInfoStyle.Render(fmt.Sprintf("You are about to delete %d file(s).", m.countMarked())), 457 | "", 458 | fmt.Sprintf("Confirmation code: %s", confirmCodeStyle.Render(m.confirmCode)), 459 | fmt.Sprintf("Your input: %s", confirmInputStyle.Render(m.confirmInput)), 460 | } 461 | 462 | if m.confirmError != "" { 463 | content = append(content, "", errorTextStyle.Render(m.confirmError)) 464 | } 465 | 466 | content = append(content, "", footerStyle.Render("Enter confirms • Esc cancels")) 467 | panel := confirmPanelStyle.Width(min(width, 80)).Render(strings.Join(content, "\n")) 468 | renderWidth := max(width, lipgloss.Width(panel)) 469 | return lipgloss.Place(renderWidth, lipgloss.Height(panel), lipgloss.Center, lipgloss.Center, panel) 470 | } 471 | 472 | // moveCursor moves the indicator on the left of the listed items. 473 | func (m *model) moveCursor(delta int) { 474 | if len(m.visible) == 0 { 475 | m.cursor = 0 476 | return 477 | } 478 | 479 | m.cursor += delta 480 | if m.cursor < 0 { 481 | m.cursor = 0 482 | } 483 | if m.cursor >= len(m.visible) { 484 | m.cursor = len(m.visible) - 1 485 | } 486 | m.adjustScroll() 487 | } 488 | 489 | // pageMove moves the cursor up or down by one viewport height and adjusts scroll. 490 | func (m *model) pageMove(direction int) { 491 | if len(m.visible) == 0 { 492 | return 493 | } 494 | amount := max(m.listAreaHeight(), 1) 495 | if direction < 0 { 496 | m.cursor -= amount 497 | } else { 498 | m.cursor += amount 499 | } 500 | if m.cursor < 0 { 501 | m.cursor = 0 502 | } 503 | if m.cursor >= len(m.visible) { 504 | m.cursor = len(m.visible) - 1 505 | } 506 | m.adjustScroll() 507 | } 508 | 509 | // halfPageMove moves the cursor by half the viewport height. 510 | // Ctrl+D/U will let the user navigate by half page up or down. 511 | func (m *model) halfPageMove(direction int) { 512 | if len(m.visible) == 0 { 513 | return 514 | } 515 | amount := max(m.listAreaHeight()/2, 1) 516 | if direction < 0 { 517 | m.cursor -= amount 518 | } else { 519 | m.cursor += amount 520 | } 521 | if m.cursor < 0 { 522 | m.cursor = 0 523 | } 524 | if m.cursor >= len(m.visible) { 525 | m.cursor = len(m.visible) - 1 526 | } 527 | m.adjustScroll() 528 | } 529 | 530 | func (m *model) currentNode() *nodeRef { 531 | if len(m.visible) == 0 || m.cursor < 0 || m.cursor >= len(m.visible) { 532 | return nil 533 | } 534 | return &m.visible[m.cursor] 535 | } 536 | 537 | // collapseCurrentGroup collapses the currently selected group node, 538 | // ensuring its expanded state is false and refreshing the visible node list when needed. 539 | func (m *model) collapseCurrentGroup() { 540 | node := m.currentNode() 541 | if node == nil || node.typ != nodeGroup { 542 | return 543 | } 544 | group := m.groups[node.group] 545 | if group.Expanded { 546 | group.Expanded = false 547 | m.rebuildVisibleNodes() 548 | } 549 | } 550 | 551 | // expandCurrentGroup marks the currently selected group as expanded. 552 | func (m *model) expandCurrentGroup() { 553 | node := m.currentNode() 554 | if node == nil || node.typ != nodeGroup { 555 | return 556 | } 557 | group := m.groups[node.group] 558 | if !group.Expanded { 559 | group.Expanded = true 560 | m.rebuildVisibleNodes() 561 | } 562 | } 563 | 564 | // toggleCurrentGroup toggles between collapsed and expanded view. 565 | func (m *model) toggleCurrentGroup() { 566 | node := m.currentNode() 567 | if node == nil || node.typ != nodeGroup { 568 | return 569 | } 570 | group := m.groups[node.group] 571 | group.Expanded = !group.Expanded 572 | m.rebuildVisibleNodes() 573 | } 574 | 575 | func (m *model) toggleCurrentFileMark() { 576 | node := m.currentNode() 577 | if node == nil || node.typ != nodeFile { 578 | return 579 | } 580 | 581 | entry := m.groups[node.group].Files[node.file] 582 | if entry.Status == fileStatusDeleted { 583 | return 584 | } 585 | 586 | entry.Marked = !entry.Marked 587 | m.deleteResult = "" 588 | } 589 | 590 | // markAllFiles marks every non-deleted file in all groups. 591 | func (m *model) markAllFiles() { 592 | for _, group := range m.groups { 593 | for _, entry := range group.Files { 594 | if entry.Status == fileStatusDeleted { 595 | continue 596 | } 597 | entry.Marked = true 598 | } 599 | } 600 | m.deleteResult = "" 601 | } 602 | 603 | // unmarkAllFiles clears the marked flag for every file. 604 | func (m *model) unmarkAllFiles() { 605 | for _, group := range m.groups { 606 | for _, entry := range group.Files { 607 | entry.Marked = false 608 | } 609 | } 610 | m.deleteResult = "" 611 | } 612 | 613 | // startConfirmationPrompt is modal window to tell user what is about to happen 614 | // and asking them to confirm moving forward with file removal 615 | func (m *model) startConfirmationPrompt() { 616 | if m.countMarked() == 0 { 617 | return 618 | } 619 | m.confirmCode = GenConfirmationCode() 620 | m.confirmInput = "" 621 | m.confirmError = "" 622 | m.deleteResult = "" 623 | m.mode = modeConfirm 624 | } 625 | 626 | // processDeletion actually removes the duplicate files. 627 | // TODO: Factor out logic that removes file. 628 | func (m *model) processDeletion() { 629 | m.mode = modeTree 630 | m.confirmInput = "" 631 | m.confirmError = "" 632 | 633 | if len(m.groups) == 0 { 634 | return 635 | } 636 | 637 | var deleted, failures int 638 | for _, entry := range m.markedEntries() { 639 | err := os.Remove(entry.Path) 640 | if err != nil { 641 | entry.Status = fileStatusError 642 | entry.Message = err.Error() 643 | dsklog.Dlogger.Errorf("Failed to delete file %s: %v", entry.Path, err) 644 | failures++ 645 | } else { 646 | entry.Status = fileStatusDeleted 647 | entry.Message = fmt.Sprintf("deleted (%s)", filepath.Base(entry.Path)) 648 | dsklog.Dlogger.Infof("Successfully deleted file: %s", entry.Path) 649 | deleted++ 650 | } 651 | entry.Marked = false 652 | } 653 | 654 | switch { 655 | case deleted == 0 && failures == 0: 656 | m.deleteResult = "No files were deleted." 657 | case failures == 0: 658 | m.deleteResult = fmt.Sprintf("Deleted %d file(s).", deleted) 659 | case deleted == 0: 660 | m.deleteResult = fmt.Sprintf("Failed to delete %d file(s).", failures) 661 | default: 662 | m.deleteResult = fmt.Sprintf("Deleted %d file(s); %d error(s) occurred.", deleted, failures) 663 | } 664 | } 665 | 666 | // markedEntries return a slice of files selected (marked) for removal. 667 | func (m *model) markedEntries() []*fileEntry { 668 | var entries []*fileEntry 669 | for _, group := range m.groups { 670 | for _, entry := range group.Files { 671 | if entry.Marked { 672 | entries = append(entries, entry) 673 | } 674 | } 675 | } 676 | return entries 677 | } 678 | 679 | func (m *model) countMarked() int { 680 | count := 0 681 | for _, group := range m.groups { 682 | for _, entry := range group.Files { 683 | if entry.Marked { 684 | count++ 685 | } 686 | } 687 | } 688 | return count 689 | } 690 | 691 | func (m *model) rebuildVisibleNodes() { 692 | m.visible = m.visible[:0] 693 | for gi, group := range m.groups { 694 | m.visible = append(m.visible, nodeRef{typ: nodeGroup, group: gi}) 695 | if group.Expanded { 696 | for fi := range group.Files { 697 | m.visible = append(m.visible, nodeRef{typ: nodeFile, group: gi, file: fi}) 698 | } 699 | } 700 | } 701 | if len(m.visible) == 0 { 702 | m.cursor = 0 703 | m.scroll = 0 704 | return 705 | } 706 | if m.cursor >= len(m.visible) { 707 | m.cursor = len(m.visible) - 1 708 | } 709 | if m.cursor < 0 { 710 | m.cursor = 0 711 | } 712 | m.adjustScroll() 713 | } 714 | 715 | // renderNodeLine renders the visual line for the provided node reference, applying the appropriate 716 | // cursor state, group or file styling, truncation, and status formatting based on the available width 717 | // and whether the node is currently selected. 718 | func (m *model) renderNodeLine(ref nodeRef, selected bool) string { 719 | cursor := cursorInactiveStyle.Render(" ") 720 | if selected { 721 | cursor = cursorActiveStyle.Render("▸ ") 722 | } 723 | 724 | var content string 725 | width := m.effectiveWidth() 726 | avail := width - lipgloss.Width(cursor) 727 | switch ref.typ { 728 | case nodeGroup: 729 | group := m.groups[ref.group] 730 | indicator := groupCollapsedStyle.Render("▸") 731 | if group.Expanded { 732 | indicator = groupStyle.Render("▾") 733 | } 734 | // Truncate group title to avoid line wrapping. 735 | // Reserve 2 for indicator + space. 736 | titleMax := max(avail-(lipgloss.Width(indicator)+1), 0) 737 | title := group.Title 738 | if runewidth.StringWidth(title) > titleMax { 739 | title = runewidth.Truncate(title, titleMax, "…") 740 | } 741 | body := lipgloss.JoinHorizontal(lipgloss.Left, indicator, " ", groupStyle.Render(title)) 742 | content = body 743 | 744 | case nodeFile: 745 | entry := m.groups[ref.group].Files[ref.file] 746 | mark := unmarkedStyle.Render("□") 747 | if entry.Marked { 748 | mark = markedStyle.Render("■") 749 | } 750 | // Add a bit of space between the checkbox and filename for readability. 751 | markStr := " " + mark + " " 752 | 753 | // First, estimate a status width budget as a third of available after mark. 754 | markW := lipgloss.Width(markStr) 755 | baseAvail := max(avail-markW, 1) 756 | // Make sure we have enough room to display "DELETED" 757 | statusBudget := max(baseAvail/3, 8) 758 | statusStr := formatFileStatus(entry, statusBudget) 759 | 760 | // Now compute remaining width for the path and recompute status if needed. 761 | used := lipgloss.Width(markStr) + lipgloss.Width(statusStr) 762 | pathMax := max(avail-used, 1) 763 | path := entry.Path 764 | // If this path is currently a symlink on disk, annotate it so the user 765 | // can distinguish converted duplicates. 766 | if fi, err := os.Lstat(entry.Path); err == nil && fi.Mode()&os.ModeSymlink != 0 { 767 | path += " [symlink]" 768 | } 769 | if runewidth.StringWidth(path) > pathMax { 770 | path = runewidth.Truncate(path, pathMax, "…") 771 | } 772 | 773 | // Recompute status with the final remaining width after mark+path (in case status was too big). 774 | usedAfterPath := lipgloss.Width(markStr) + lipgloss.Width(fileStyle.Render(path)) 775 | rem := max(avail-usedAfterPath, 0) 776 | statusStr = formatFileStatus(entry, rem) 777 | body := lipgloss.JoinHorizontal(lipgloss.Left, 778 | markStr, 779 | fileStyle.Render(path), 780 | statusStr, 781 | ) 782 | content = body 783 | } 784 | 785 | line := lipgloss.JoinHorizontal(lipgloss.Left, cursor, content) 786 | return line 787 | } 788 | 789 | func formatFileStatus(entry *fileEntry, maxWidth int) string { 790 | if maxWidth <= 0 { 791 | return "" 792 | } 793 | switch entry.Status { 794 | case fileStatusDeleted: 795 | text := "DELETED" 796 | if runewidth.StringWidth(text) > maxWidth { 797 | text = runewidth.Truncate(text, maxWidth, "…") 798 | } 799 | return " " + statusDeletedStyle.Render(text) 800 | case fileStatusError: 801 | text := "ERROR" 802 | if entry.Message != "" { 803 | text = "ERROR: " + entry.Message 804 | } 805 | if runewidth.StringWidth(text) > maxWidth { 806 | text = runewidth.Truncate(text, maxWidth, "…") 807 | } 808 | return " " + statusErrorStyle.Render(text) 809 | default: 810 | return "" 811 | } 812 | } 813 | 814 | // effectiveWidth returns the model's width constrained between 80 and 120, 815 | // defaulting to 80 when the current width is non-positive. 816 | func (m *model) effectiveWidth() int { 817 | switch { 818 | case m.width <= 0: 819 | return 80 820 | case m.width > 120: 821 | return 120 822 | default: 823 | return m.width 824 | } 825 | } 826 | 827 | // listAreaHeight returns how many rows are available to render the list 828 | // given the current terminal height and static header/footer rows. 829 | func (m *model) listAreaHeight() int { 830 | h := m.height 831 | if h <= 0 { 832 | h = 24 833 | } 834 | // Static rows: title (1) + top divider (1) + bottom divider (1) 835 | // + marked footer (1) + instructions (1) = 5 836 | reserved := 5 837 | if m.deleteResult != "" { 838 | reserved++ // extra line when we show the deletion result 839 | } 840 | return max(1, h-reserved) 841 | } 842 | 843 | // listTopOffset returns the number of rows occupied above the list. 844 | func (m *model) listTopOffset() int { 845 | // title (1) + top divider (1) 846 | return 2 847 | } 848 | 849 | // adjustScroll ensures the scroll offset keeps the cursor within the viewport 850 | // and clamps both cursor and scroll to valid ranges. 851 | // TODO: Refactor to use min/max 852 | func (m *model) adjustScroll() { 853 | if len(m.visible) == 0 || m.mode != modeTree { 854 | m.scroll = 0 855 | return 856 | } 857 | contentH := max(m.listAreaHeight(), 1) 858 | 859 | // Clamp cursor to valid range. 860 | if m.cursor < 0 { 861 | m.cursor = 0 862 | } 863 | if m.cursor >= len(m.visible) { 864 | m.cursor = len(m.visible) - 1 865 | } 866 | 867 | // Clamp scroll to [0, maxScroll]. 868 | maxScroll := max(len(m.visible)-contentH, 0) 869 | if m.scroll < 0 { 870 | m.scroll = 0 871 | } 872 | if m.scroll > maxScroll { 873 | m.scroll = maxScroll 874 | } 875 | 876 | // Ensure cursor is visible inside [scroll, scroll+contentH-1]. 877 | if m.cursor < m.scroll { 878 | m.scroll = m.cursor 879 | } else if m.cursor >= m.scroll+contentH { 880 | m.scroll = max(m.cursor-contentH+1, 0) 881 | m.scroll = min(m.scroll, maxScroll) 882 | } 883 | } 884 | 885 | func formatGroupTitle(hash dmap.Digest, files []string) string { 886 | if len(files) == 0 { 887 | return "Empty group" 888 | } 889 | 890 | const tmpl = "%s - %d files - (approx. size %s)" 891 | fileSize := dfs.GetFileSize(files[0]) 892 | totalSize := uint64(fileSize) * uint64(len(files)) 893 | // Show 16 hex chars of SHA-256 hash. 894 | hashHex := fmt.Sprintf("%x", hash[:16]) 895 | return fmt.Sprintf(tmpl, hashHex, len(files), utils.DisplaySize(totalSize)) 896 | } 897 | 898 | // autoMarkGroup marks all but one in the duplicate group. For UX, assumes users will want 899 | // to probably keep at least one of the files. 900 | func autoMarkGroup(group *duplicateGroup) { 901 | if group == nil { 902 | return 903 | } 904 | for i, entry := range group.Files { 905 | if i == 0 { 906 | continue 907 | } 908 | entry.Marked = true 909 | dsklog.Dlogger.Debugf("Auto-marked file for deletion: %s", entry.Path) 910 | } 911 | } 912 | 913 | func isAlphaNumeric(r rune) bool { 914 | return unicode.IsLetter(r) || unicode.IsDigit(r) 915 | } 916 | 917 | // GenConfirmationCode generates a random alphanumeric confirmation code. 918 | func GenConfirmationCode() string { 919 | const charset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" 920 | // #nosec G404 921 | r := rand.New(rand.NewSource(time.Now().UnixNano())) 922 | 923 | codeLen := r.Intn(4) + 5 // between 5 and 8 characters 924 | code := make([]byte, codeLen) 925 | for i := range code { 926 | code[i] = charset[r.Intn(len(charset))] 927 | } 928 | return string(code) 929 | } 930 | --------------------------------------------------------------------------------