├── .editorconfig ├── .github ├── FUNDING.yml └── workflows │ ├── ci.yml │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── cmd └── chkbit │ ├── dedup.go │ ├── help.go │ ├── main.go │ └── util │ ├── fm.go │ ├── lang.go │ ├── progress.go │ ├── progress_test.go │ ├── ratecalc.go │ ├── sparkline.go │ ├── sparkline_test.go │ ├── spinner.go │ ├── strings.go │ └── strings_test.go ├── common.go ├── context.go ├── dedup.go ├── fuse.go ├── go.mod ├── go.sum ├── hashfile.go ├── ignore.go ├── ignore_test.go ├── index.go ├── indexstore.go ├── intutil └── size.go ├── os.go ├── os_darwin.go ├── os_linux.go ├── scripts ├── build ├── chkfmt ├── lint ├── maketestsample ├── maketestsample.go ├── run_test.go ├── tests └── xbuild ├── status.go └── worker.go /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | # top-most EditorConfig file 6 | root = true 7 | 8 | [*] 9 | insert_final_newline = true 10 | charset = utf-8 11 | trim_trailing_whitespace = true 12 | indent_style = space 13 | indent_size = 4 14 | 15 | [*.md] 16 | indent_size = 2 17 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: laktak 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: 4 | push: 5 | branches: [] 6 | pull_request: 7 | 8 | jobs: 9 | 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-go@v5 15 | with: 16 | go-version: "1.22" 17 | 18 | - name: chkfmt 19 | run: scripts/chkfmt 20 | 21 | - name: tests 22 | run: | 23 | scripts/tests 24 | 25 | - name: xbuild 26 | run: scripts/xbuild 27 | 28 | - name: artifacts 29 | uses: actions/upload-artifact@v4 30 | with: 31 | name: prerelease-artifacts 32 | path: dist/* 33 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | push: 5 | tags: ["v*"] 6 | 7 | jobs: 8 | 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-go@v5 14 | with: 15 | go-version: "1.22" 16 | 17 | - name: chkfmt 18 | run: scripts/chkfmt 19 | 20 | - name: tests 21 | run: | 22 | scripts/tests 23 | 24 | - name: xbuild 25 | run: version=${GITHUB_REF#$"refs/tags/v"} scripts/xbuild 26 | 27 | - name: release 28 | uses: softprops/action-gh-release@v2 29 | with: 30 | draft: true 31 | files: dist/* 32 | 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # bin 2 | /chkbit 3 | dist 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Christian Zangl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # chkbit 3 | 4 | chkbit alerts you to data corruption in your files, especially during transfers, backups, and after recovery. It helps detect issues like disk damage, filesystem errors, and malware interference. 5 | 6 | The latest version can also detect duplicate files and run deduplication on supported systems. 7 | 8 | ![gif of chkbit](https://raw.githubusercontent.com/wiki/laktak/chkbit/readme/chkbit.gif "chkbit") 9 | 10 | [Chkbit Introduction](https://laktak.github.io/chkbit/) 11 | - [Install](https://laktak.github.io/chkbit/get/) 12 | - [How does it work?](https://laktak.github.io/chkbit/how/) 13 | - [File Deduplication](https://laktak.github.io/chkbit/dedup/) 14 | - [Usage](https://laktak.github.io/chkbit/usage/) 15 | - [FAQ](https://laktak.github.io/chkbit/faq/) 16 | 17 | ## Latest Releases 18 | 19 | ### version 6.3 20 | 21 | - new file deduplication command! 22 | 23 | ### version 6 24 | 25 | - chkbit adds a new `atom` mode to store all indices in a single file 26 | - there is a new `fuse` command to merge split indexes into an atom 27 | - If you come from an old version, please check out the new simplified CLI syntax 28 | - Note that some commands have suboption (e.g. to skip checking existing hashes, see `chkbit update -h`) 29 | 30 | -------------------------------------------------------------------------------- /cmd/chkbit/dedup.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "math" 7 | "os" 8 | "os/signal" 9 | "strings" 10 | "time" 11 | 12 | "github.com/laktak/chkbit/v6" 13 | "github.com/laktak/chkbit/v6/cmd/chkbit/util" 14 | "github.com/laktak/chkbit/v6/intutil" 15 | "github.com/laktak/lterm" 16 | ) 17 | 18 | func (m *Main) handleDedupProgress(mode1 bool) { 19 | 20 | abortChan := make(chan os.Signal, 1) 21 | signal.Notify(abortChan, os.Interrupt) 22 | 23 | last := time.Now().Add(-updateInterval) 24 | spinnerChan := util.Spinner(500 * time.Millisecond) 25 | spin := " " 26 | stat := "" 27 | for { 28 | select { 29 | case <-abortChan: 30 | if m.dedup.DidAbort() { 31 | m.printStderr("Immediate abort!") 32 | os.Exit(1) 33 | } 34 | m.dedup.Abort() 35 | m.dedup.LogQueue <- &chkbit.LogEvent{Stat: chkbit.StatusPanic, 36 | Message: "Aborting after current operation (press again for immediate exit)"} 37 | case item := <-m.dedup.LogQueue: 38 | if item == nil { 39 | if m.progress == Fancy { 40 | lterm.Printline("") 41 | } 42 | return 43 | } 44 | if m.logStatus(item.Stat, item.Message) { 45 | if m.progress == Fancy { 46 | lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r") 47 | } else { 48 | fmt.Print(m.dedup.NumTotal(), "\r") 49 | } 50 | } 51 | case perf := <-m.dedup.PerfQueue: 52 | now := time.Now() 53 | m.fps.Push(now, perf.NumFiles) 54 | if last.Add(updateInterval).Before(now) { 55 | last = now 56 | if m.progress == Fancy { 57 | pa, pb := util.Progress(perf.Percent, int(math.Min(12, float64(m.termWidth/4)))) 58 | stat = fmt.Sprintf("[$%s$%s$]$ %5.0f%% ", pa, pb, perf.Percent*100) 59 | 60 | if mode1 { 61 | stat += fmt.Sprintf("$ # %7d ", m.dedup.NumTotal()) 62 | statF := fmt.Sprintf("%d files/s", m.fps.Last()) 63 | stat += fmt.Sprintf("$ %s $%-13s ", util.Sparkline(m.fps.Stats), statF) 64 | } else { 65 | stat += fmt.Sprintf("$ # %d ", m.dedup.NumTotal()) 66 | stat += fmt.Sprintf("$ %sB reclaimed ", intutil.FormatSize(m.dedup.ReclaimedTotal())) 67 | } 68 | 69 | stat = util.LeftTruncate(stat, m.termWidth-1+5) // extra for col tokens 70 | 71 | stat = strings.Replace(stat, "$", termFG2, 1) // progress1 72 | stat = strings.Replace(stat, "$", termFG3, 1) // progress2 73 | stat = strings.Replace(stat, "$", termFG1, 1) // ] 74 | stat = strings.Replace(stat, "$", termFG1, 1) // text 75 | stat = strings.Replace(stat, "$", termSepFG+termSep+termFG1, 1) // count 76 | stat = strings.Replace(stat, "$", termSepFG+termSep+termFG2, 1) 77 | if mode1 { 78 | stat = strings.Replace(stat, "$", termFG1, 1) // text 79 | } 80 | } 81 | } 82 | case spin = <-spinnerChan: 83 | if m.progress == Fancy { 84 | lterm.Write(termBG, termFG1, stat, spin, lterm.ClearLine(0), lterm.Reset, "\r") 85 | } else if m.progress == Plain { 86 | fmt.Print(m.dedup.NumTotal(), "\r") 87 | } 88 | } 89 | } 90 | } 91 | 92 | func (m *Main) showDedupStatus(list []*chkbit.DedupBag, showDetails bool) { 93 | 94 | chash := uint64(0) 95 | cfile := uint64(0) 96 | minsize := uint64(0) 97 | maxsize := uint64(0) 98 | actsize := uint64(0) 99 | extUnknownCount := 0 100 | for i, bag := range list { 101 | 102 | bagLen := uint64(len(bag.ItemList)) 103 | chash += 1 104 | cfile += bagLen 105 | minsize += bag.Size 106 | maxsize += bag.Size * bagLen 107 | actsize += bag.SizeExclusive 108 | bagUnknown := bag.ExtUnknown != nil && *bag.ExtUnknown 109 | if bagUnknown { 110 | extUnknownCount++ 111 | } 112 | 113 | if showDetails { 114 | if !bagUnknown { 115 | fmt.Printf("#%d %s [%s, shared=%s, exclusive=%s]\n", 116 | i, bag.Hash, intutil.FormatSize(bag.Size), 117 | intutil.FormatSize(bag.SizeShared), intutil.FormatSize(bag.SizeExclusive)) 118 | } else { 119 | fmt.Printf("#%d %s [%s*]\n", 120 | i, bag.Hash, intutil.FormatSize(bag.Size)) 121 | } 122 | for _, item := range bag.ItemList { 123 | c := "-" 124 | if item.Merged { 125 | c = "+" 126 | } 127 | fmt.Println(c, item.Path) 128 | } 129 | } 130 | } 131 | 132 | fmt.Println() 133 | fmt.Printf("Detected %d hashes that are shared by %d files:\n", chash, cfile) 134 | if extUnknownCount*2 > len(list) { 135 | fmt.Printf("- Used space: %s\n", intutil.FormatSize(actsize)) 136 | fmt.Printf("\n*) failed to load file-extents on this OS/filesystem for\n"+ 137 | " %.2f%% of files, cannot show details and reclaimable\n space\n", (float64(extUnknownCount)/float64(len(list)))*100) 138 | } else { 139 | fmt.Printf("- Minimum required space: %s\n", intutil.FormatSize(minsize)) 140 | fmt.Printf("- Maximum required space: %s\n", intutil.FormatSize(maxsize)) 141 | fmt.Printf("- Actual used space: %s\n", intutil.FormatSize(actsize)) 142 | fmt.Printf("- Reclaimable space: %s\n", intutil.FormatSize(actsize-minsize)) 143 | if maxsize-minsize > 0 { 144 | fmt.Printf("- Efficiency: %.2f%%\n", (1-(float64(actsize-minsize)/float64(maxsize-minsize)))*100) 145 | } 146 | if extUnknownCount > 0 { 147 | fmt.Printf("\n*) failed to load file-extents on this OS/filesystem for\n"+ 148 | " %.2f%% of files, shown data is not accurate\n", (float64(extUnknownCount)/float64(len(list)))*100) 149 | 150 | } 151 | } 152 | } 153 | 154 | func (m *Main) runDedup(command string, dd *CLIDedup, indexName string) int { 155 | var err error 156 | 157 | var argPath string 158 | switch command { 159 | case cmdDedupDetect: 160 | argPath = dd.Detect.Path 161 | case cmdDedupShow: 162 | argPath = dd.Show.Path 163 | case cmdDedupRun: 164 | argPath = dd.Run.Path 165 | } 166 | 167 | st, root, err := chkbit.LocateIndex(argPath, chkbit.IndexTypeAny, indexName) 168 | if err != nil { 169 | m.printError(err) 170 | return 1 171 | } 172 | if st != chkbit.IndexTypeAtom { 173 | m.printErr("error: dedup requires an atom index; you can create one with `chkbit fuse` while leaving your split index in place") 174 | return 1 175 | } 176 | 177 | m.dedup, err = chkbit.NewDedup(root, indexName, command == cmdDedupDetect) 178 | if err != nil { 179 | m.printError(err) 180 | if command != cmdDedupDetect && os.IsNotExist(err) { 181 | m.printStderr("Did you forget to run `chkbit detect`?") 182 | } 183 | return 1 184 | } 185 | defer m.dedup.Finish() 186 | 187 | mode1 := true 188 | printUpdated := func() { 189 | m.logInfo("", "last updated "+m.dedup.LastUpdated().Format(time.DateTime)) 190 | } 191 | 192 | switch command { 193 | case cmdDedupShow: 194 | if list, err := m.dedup.Show(); err == nil { 195 | if dd.Show.Json { 196 | if data, err := json.Marshal(&list); err == nil { 197 | fmt.Println(string(data)) 198 | } 199 | } else { 200 | m.logInfo("", "chkbit dedup show "+argPath) 201 | printUpdated() 202 | m.showDedupStatus(list, dd.Show.Details) 203 | } 204 | } 205 | return 0 206 | case cmdDedupDetect: 207 | m.logInfo("", "chkbit dedup detect "+argPath) 208 | fmt.Println(abortTip) 209 | case cmdDedupRun, cmdDedupRun2: 210 | m.logInfo("", fmt.Sprintf("chkbit dedup detect %s %s", argPath, dd.Run.Hashes)) 211 | printUpdated() 212 | fmt.Println(abortTip) 213 | mode1 = false 214 | } 215 | 216 | resultCh := make(chan error, 1) 217 | go func() { 218 | var err error 219 | switch command { 220 | case cmdDedupDetect: 221 | err = m.dedup.DetectDupes(dd.Detect.MinSize, m.verbose) 222 | case cmdDedupRun, cmdDedupRun2: 223 | err = m.dedup.Dedup(dd.Run.Hashes, m.verbose) 224 | } 225 | resultCh <- err 226 | m.dedup.LogQueue <- nil 227 | }() 228 | m.handleDedupProgress(mode1) 229 | 230 | if err = <-resultCh; err != nil { 231 | m.printError(err) 232 | if !chkbit.IsAborted(err) { 233 | return 1 234 | } 235 | } 236 | 237 | switch command { 238 | case cmdDedupDetect, cmdDedupRun, cmdDedupRun2: 239 | if m.progress == Fancy { 240 | elapsed := time.Since(m.fps.Start) 241 | elapsedS := elapsed.Seconds() 242 | m.logInfo("", fmt.Sprintf("- %s elapsed", elapsed.Truncate(time.Second))) 243 | m.logInfo("", fmt.Sprintf("- %s processed", util.LangNum1Choice(m.dedup.NumTotal(), "file", "files"))) 244 | m.logInfo("", fmt.Sprintf("- %.2f files/second", (float64(m.fps.Total)+float64(m.fps.Current))/elapsedS)) 245 | if m.dedup.ReclaimedTotal() > 0 { 246 | m.logInfo("", fmt.Sprintf("- %sB reclaimed", intutil.FormatSize(m.dedup.ReclaimedTotal()))) 247 | } 248 | } 249 | } 250 | 251 | switch command { 252 | case cmdDedupDetect: 253 | if list, err := m.dedup.Show(); err == nil { 254 | m.showDedupStatus(list, dd.Show.Details) 255 | } 256 | } 257 | 258 | return 0 259 | } 260 | -------------------------------------------------------------------------------- /cmd/chkbit/help.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | var headerHelp = `Alerts you of data corruption in your files, especially during transfers, backups and after recovery. 4 | 5 | For help tips run "chkbit -H" or go to 6 | https://github.com/laktak/chkbit 7 | Author: Christian Zangl 8 | ` 9 | 10 | var helpTips = ` 11 | .chkbitignore rules: 12 | - each line should contain exactly one name 13 | - you may use Unix shell-style wildcards 14 | - * matches everything except / 15 | - ? matches any single character except / 16 | - [seq] matches any character/range in seq 17 | - [^seq] matches any character/range not in seq 18 | - \\ escape to match the following character 19 | - lines starting with '#' are skipped 20 | - lines starting with '/' are only applied to the current directory 21 | 22 | Status codes: 23 | PNC: exception/panic, unable to continue 24 | DMG: error, data damage detected 25 | ERX: error, index damaged 26 | old: warning, file replaced by an older version 27 | upd: file updated 28 | new: new file 29 | ok : checked and ok (verbose) 30 | del: file/directory removed (-m) 31 | ign: ignored (see .chkbitignore) 32 | msg: message 33 | 34 | Configuration file (json): 35 | - location 36 | - key names are the option names with '-' replaced by '_' 37 | - for example --include-dot is written as: 38 | { "include_dot": true } 39 | 40 | Performance: 41 | - chkbit uses 5 workers by default. To speed it up tune it with the --workers flag. 42 | - Note: slow/spinning disks work best with just 1 worker! 43 | 44 | Deduplication requires a supported OS: 45 | - Linux with a CoW filesystem like btrfs (full support) 46 | - macOS with a APFS (detect supported) 47 | - for details see https://laktak.github.io/chkbit/dedup/ 48 | ` 49 | -------------------------------------------------------------------------------- /cmd/chkbit/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "os" 8 | "os/signal" 9 | slpath "path" 10 | "path/filepath" 11 | "strings" 12 | "sync" 13 | "time" 14 | 15 | "github.com/alecthomas/kong" 16 | "github.com/laktak/chkbit/v6" 17 | "github.com/laktak/chkbit/v6/cmd/chkbit/util" 18 | "github.com/laktak/chkbit/v6/intutil" 19 | "github.com/laktak/lterm" 20 | ) 21 | 22 | type Progress int 23 | 24 | const ( 25 | Quiet Progress = iota 26 | Summary 27 | Plain 28 | Fancy 29 | ) 30 | 31 | const ( 32 | updateInterval = time.Millisecond * 700 33 | sizeMB int64 = 1024 * 1024 34 | abortTip = "> you can abort by pressing control+c" 35 | ) 36 | 37 | const ( 38 | cmdCheck = "check " 39 | cmdAdd = "add " 40 | cmdUpdate = "update " 41 | cmdShowIgnored = "show-ignored " 42 | cmdInit = "init " 43 | cmdFuse = "fuse " 44 | cmdDedupDetect = "dedup detect " 45 | cmdDedupShow = "dedup show " 46 | cmdDedupRun = "dedup run " 47 | cmdDedupRun2 = "dedup run " 48 | cmdUtilFileext = "util fileext " 49 | cmdUtilFilededup = "util filededup " 50 | cmdTips = "tips" 51 | cmdVersion = "version" 52 | ) 53 | 54 | var appVersion = "vdev" 55 | var ( 56 | termBG = lterm.Bg8(240) 57 | termSep = "|" 58 | termSepFG = lterm.Fg8(235) 59 | termFG1 = lterm.Fg8(255) 60 | termFG2 = lterm.Fg8(228) 61 | termFG3 = lterm.Fg8(202) 62 | termOKFG = lterm.Fg4(2) 63 | termAlertFG = lterm.Fg4(1) 64 | termDimFG = lterm.Fg8(240) 65 | ) 66 | 67 | type CLI struct { 68 | Check struct { 69 | Paths []string `arg:"" name:"paths" help:"directories to check"` 70 | SkipNew bool `short:"s" help:"verify index only, do not report new files"` 71 | } `cmd:"" help:"chkbit will verify files in readonly mode"` 72 | 73 | Add struct { 74 | Paths []string `arg:"" name:"paths" help:"directories to add"` 75 | } `cmd:"" help:"add and update modified files (alias for chkbit update -s)"` 76 | 77 | Update struct { 78 | Paths []string `arg:"" name:"paths" help:"directories to update"` 79 | SkipExisting bool `short:"s" help:"only add new and modified files, do not check existing (quicker)"` 80 | Force bool `help:"force update of damaged items (advanced usage only)"` 81 | } `cmd:"" help:"add and update modified files, also checking existing ones (see chkbit update -h)"` 82 | 83 | Init struct { 84 | Mode string `arg:"" enum:"split,atom" help:"{split|atom} split mode creates one index per directory while in atom mode a single index is created at the given path"` 85 | Path string `arg:"" help:"directory for the index"` 86 | Force bool `help:"force init if a index already exists"` 87 | } `cmd:"" help:"initialize a new index at the given path that manages the path and all its subfolders (see chkbit init -h)"` 88 | 89 | Fuse struct { 90 | Path string `arg:"" help:"directory for the index"` 91 | Force bool `help:"force overwrite if a index already exists"` 92 | } `cmd:"" help:"merge all indexes (split&atom) under this path into a single atom index, will NOT delete the source indexes"` 93 | 94 | Dedup CLIDedup `cmd:"" help:"Deduplication commands"` 95 | 96 | Util struct { 97 | Fileext struct { 98 | Paths []string `arg:"" name:"paths" help:"files to check"` 99 | } `cmd:"" help:"check if the given files occupy the same block on disk"` 100 | 101 | Filededup struct { 102 | Paths []string `arg:"" name:"paths" help:"files to dedup"` 103 | } `cmd:"" help:"run deduplication for the given files, makes all duplicate file blocks point to the same space"` 104 | } `cmd:"" help:"Utility functions; requires supported OS & filesystem (see tips)"` 105 | 106 | ShowIgnored struct { 107 | Paths []string `arg:"" name:"paths" help:"directories to list"` 108 | } `cmd:"" help:"show ignored files (see tips)"` 109 | 110 | Tips struct { 111 | } `cmd:"" help:"show tips"` 112 | 113 | Version struct { 114 | } `cmd:"" help:"show version information"` 115 | 116 | LogDeleted bool `short:"x" help:"log deleted/missing files/directories since the last run" negatable:""` 117 | IncludeDot bool `short:"d" help:"include dot files" negatable:""` 118 | SkipSymlinks bool `short:"S" help:"do not follow symlinks" negatable:""` 119 | NoRecurse bool `short:"R" help:"do not recurse into subdirectories" negatable:""` 120 | NoDirInIndex bool `short:"D" help:"do not track directories in the index" negatable:""` 121 | NoConfig bool `help:"ignore the config file"` 122 | MaxDepth int `default:0 help:"process a directory only if it is N or fewer levels below the specified path(s); 0 for no limit"` 123 | LogFile string `short:"l" help:"write to a logfile if specified"` 124 | LogVerbose bool `help:"verbose logging" negatable:""` 125 | Algo string `default:"blake3" help:"hash algorithm: md5, sha512, blake3"` 126 | IndexName string `default:".chkbit" help:"filename where chkbit stores its hashes, needs to start with '.'"` 127 | IgnoreName string `default:".chkbitignore" help:"filename that chkbit reads its ignore list from, needs to start with '.'"` 128 | Workers int `short:"w" default:"5" help:"number of workers to use. For slow IO (like on a spinning disk) --workers=1 will be faster"` 129 | Plain bool `help:"show plain status instead of being fancy" negatable:""` 130 | Quiet bool `short:"q" help:"quiet, don't show progress/information" negatable:""` 131 | Verbose bool `short:"v" help:"verbose output" negatable:""` 132 | } 133 | 134 | type CLIDedup struct { 135 | Detect struct { 136 | Path string `arg:"" help:"directory for the index"` 137 | MinSize uint64 `default:8192 help:"minimum file size"` 138 | } `cmd:"" help:"use the atom index to detect duplicates"` 139 | 140 | Show struct { 141 | Path string `arg:"" help:"directory for the index"` 142 | Details bool `short:"f" help:"show file details" negatable:""` 143 | Json bool `short:"j" help:"output json" negatable:""` 144 | } `cmd:"" help:"show detected duplicate status"` 145 | 146 | Run struct { 147 | Path string `arg:"" help:"directory for the index"` 148 | Hashes []string `arg:"" optional:"" name:"hashes" help:"hashes to select (all if not specified)"` 149 | } `cmd:"" help:"run deduplication, makes all duplicate file blocks point to the same space; requires supported OS & filesystem (see tips)"` 150 | } 151 | 152 | func toSlash(paths []string) []string { 153 | for i, path := range paths { 154 | paths[i] = filepath.ToSlash(path) 155 | } 156 | return paths 157 | } 158 | 159 | func (cli *CLI) toSlash() { 160 | cli.Check.Paths = toSlash(cli.Check.Paths) 161 | cli.Add.Paths = toSlash(cli.Add.Paths) 162 | cli.Update.Paths = toSlash(cli.Update.Paths) 163 | cli.Init.Path = filepath.ToSlash(cli.Init.Path) 164 | cli.Fuse.Path = filepath.ToSlash(cli.Fuse.Path) 165 | cli.Dedup.Detect.Path = filepath.ToSlash(cli.Dedup.Detect.Path) 166 | cli.Dedup.Show.Path = filepath.ToSlash(cli.Dedup.Show.Path) 167 | cli.Dedup.Run.Path = filepath.ToSlash(cli.Dedup.Run.Path) 168 | cli.Util.Fileext.Paths = toSlash(cli.Util.Fileext.Paths) 169 | cli.Util.Filededup.Paths = toSlash(cli.Util.Filededup.Paths) 170 | cli.ShowIgnored.Paths = toSlash(cli.ShowIgnored.Paths) 171 | } 172 | 173 | type Main struct { 174 | context *chkbit.Context 175 | dedup *chkbit.Dedup 176 | dmgList []string 177 | errList []string 178 | verbose bool 179 | hideNew bool 180 | logger *log.Logger 181 | logVerbose bool 182 | progress Progress 183 | termWidth int 184 | fps *util.RateCalc 185 | bps *util.RateCalc 186 | } 187 | 188 | func (m *Main) log(text string) { 189 | m.logger.Println(time.Now().UTC().Format("2006-01-02 15:04:05"), text) 190 | } 191 | 192 | func (m *Main) logInfo(col, text string) { 193 | if m.progress != Quiet { 194 | if m.progress == Fancy { 195 | lterm.Printline(col, text, lterm.Reset) 196 | } else { 197 | fmt.Println(text) 198 | } 199 | } 200 | m.log(text) 201 | } 202 | 203 | func (m *Main) printStderr(msg string) { 204 | fmt.Fprintln(os.Stderr, msg) 205 | } 206 | 207 | func (m *Main) printErr(text string) { 208 | if m.progress == Fancy { 209 | lterm.Write(termAlertFG) 210 | m.printStderr(text) 211 | lterm.Write(lterm.Reset) 212 | } else { 213 | m.printStderr(text) 214 | } 215 | } 216 | 217 | func (m *Main) printError(err error) { 218 | m.printErr("error: " + err.Error()) 219 | } 220 | 221 | func (m *Main) logStatus(stat chkbit.Status, message string) bool { 222 | if stat == chkbit.StatusUpdateIndex || m.hideNew && stat == chkbit.StatusNew { 223 | return false 224 | } 225 | 226 | if stat == chkbit.StatusErrorDamage { 227 | m.dmgList = append(m.dmgList, message) 228 | } else if stat == chkbit.StatusPanic { 229 | m.errList = append(m.errList, message) 230 | } 231 | 232 | if m.logVerbose || !stat.IsVerbose() { 233 | m.log(stat.String() + " " + message) 234 | } 235 | 236 | if m.verbose || !stat.IsVerbose() { 237 | 238 | if m.progress == Quiet && stat == chkbit.StatusInfo { 239 | return false 240 | } 241 | 242 | col := lterm.Reset 243 | col1 := termDimFG 244 | if stat.IsErrorOrWarning() { 245 | col = termAlertFG 246 | col1 = col 247 | } 248 | lterm.Printline(col1, stat.String(), " ", col, message, lterm.Reset) 249 | return true 250 | } 251 | return false 252 | } 253 | 254 | func (m *Main) handleProgress() { 255 | 256 | abortChan := make(chan os.Signal, 1) 257 | signal.Notify(abortChan, os.Interrupt) 258 | 259 | last := time.Now().Add(-updateInterval) 260 | stat := "" 261 | for { 262 | select { 263 | case <-abortChan: 264 | m.context.Abort() 265 | case item := <-m.context.LogQueue: 266 | if item == nil { 267 | if m.progress == Fancy { 268 | lterm.Printline("") 269 | } 270 | return 271 | } 272 | if m.logStatus(item.Stat, item.Message) { 273 | if m.progress == Fancy { 274 | lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r") 275 | } else { 276 | fmt.Print(m.context.NumTotal, "\r") 277 | } 278 | } 279 | case perf := <-m.context.PerfQueue: 280 | now := time.Now() 281 | m.fps.Push(now, perf.NumFiles) 282 | m.bps.Push(now, perf.NumBytes) 283 | if last.Add(updateInterval).Before(now) { 284 | last = now 285 | if m.progress == Fancy { 286 | statF := fmt.Sprintf("%d files/s", m.fps.Last()) 287 | statB := fmt.Sprintf("%d MB/s", m.bps.Last()/sizeMB) 288 | stat = "RW" 289 | if !m.context.UpdateIndex { 290 | stat = "RO" 291 | } 292 | stat = fmt.Sprintf("[%s:%d] %5d files $ %s %-13s $ %s %-13s", 293 | stat, m.context.NumWorkers, m.context.NumTotal, 294 | util.Sparkline(m.fps.Stats), statF, 295 | util.Sparkline(m.bps.Stats), statB) 296 | stat = util.LeftTruncate(stat, m.termWidth-1) 297 | stat = strings.Replace(stat, "$", termSepFG+termSep+termFG2, 1) 298 | stat = strings.Replace(stat, "$", termSepFG+termSep+termFG3, 1) 299 | lterm.Write(termBG, termFG1, stat, lterm.ClearLine(0), lterm.Reset, "\r") 300 | } else if m.progress == Plain { 301 | fmt.Print(m.context.NumTotal, "\r") 302 | } 303 | } 304 | } 305 | } 306 | } 307 | 308 | func (m *Main) runCmd(command string, cli CLI) int { 309 | var err error 310 | m.context, err = chkbit.NewContext(cli.Workers, cli.Algo, cli.IndexName, cli.IgnoreName) 311 | if err != nil { 312 | m.printError(err) 313 | return 1 314 | } 315 | 316 | var pathList []string 317 | switch command { 318 | case cmdCheck: 319 | pathList = cli.Check.Paths 320 | m.log("chkbit check " + strings.Join(pathList, ", ")) 321 | m.hideNew = cli.Check.SkipNew 322 | case cmdUpdate: 323 | pathList = cli.Update.Paths 324 | m.context.UpdateIndex = true 325 | m.context.UpdateSkipCheck = cli.Update.SkipExisting 326 | m.context.ForceUpdateDmg = cli.Update.Force 327 | m.log("chkbit update " + strings.Join(pathList, ", ")) 328 | case cmdShowIgnored: 329 | pathList = cli.ShowIgnored.Paths 330 | m.verbose = true 331 | m.context.ShowIgnoredOnly = true 332 | m.log("chkbit show-ignored " + strings.Join(pathList, ", ")) 333 | } 334 | 335 | m.context.LogDeleted = cli.LogDeleted 336 | m.context.IncludeDot = cli.IncludeDot 337 | m.context.SkipSymlinks = cli.SkipSymlinks 338 | m.context.SkipSubdirectories = cli.NoRecurse 339 | m.context.TrackDirectories = !cli.NoDirInIndex 340 | m.context.MaxDepth = cli.MaxDepth 341 | 342 | st, root, err := chkbit.LocateIndex(pathList[0], chkbit.IndexTypeAny, m.context.IndexFilename) 343 | if err != nil { 344 | m.printError(err) 345 | return 1 346 | } 347 | 348 | if st == chkbit.IndexTypeAtom { 349 | pathList, err = m.context.UseAtomIndexStore(root, pathList) 350 | if err == nil { 351 | // pathList is relative to root 352 | if err = os.Chdir(root); err != nil { 353 | m.printError(err) 354 | return 1 355 | } 356 | m.logInfo("", "Using atom-index in "+root) 357 | } else { 358 | m.printError(err) 359 | return 1 360 | } 361 | } 362 | 363 | var wg sync.WaitGroup 364 | wg.Add(1) 365 | go func() { 366 | defer wg.Done() 367 | m.handleProgress() 368 | }() 369 | m.context.Process(pathList) 370 | wg.Wait() 371 | 372 | if command == cmdShowIgnored { 373 | return 0 374 | } 375 | 376 | // result 377 | numIdxUpd := m.context.NumIdxUpd 378 | numNew := m.context.NumNew 379 | numUpd := m.context.NumUpd 380 | if m.hideNew { 381 | numNew = 0 382 | } 383 | 384 | didUpdate := m.context.UpdateIndex 385 | if m.context.DidAbort() { 386 | if m.context.GetIndexType() == chkbit.IndexTypeAtom { 387 | didUpdate = false 388 | } 389 | } 390 | 391 | if m.progress != Quiet { 392 | mode := "" 393 | if !m.context.UpdateIndex { 394 | mode = " in readonly mode" 395 | } 396 | status := fmt.Sprintf("Processed %s%s", util.LangNum1MutateSuffix(m.context.NumTotal, "file"), mode) 397 | m.logInfo(termOKFG, status) 398 | 399 | if m.progress == Fancy && m.context.NumTotal > 0 { 400 | elapsed := time.Since(m.fps.Start) 401 | elapsedS := elapsed.Seconds() 402 | m.logInfo("", fmt.Sprintf("- %s elapsed", elapsed.Truncate(time.Second))) 403 | m.logInfo("", fmt.Sprintf("- %.2f files/second", (float64(m.fps.Total)+float64(m.fps.Current))/elapsedS)) 404 | m.logInfo("", fmt.Sprintf("- %.2f MB/second", (float64(m.bps.Total)+float64(m.bps.Current))/float64(sizeMB)/elapsedS)) 405 | } 406 | 407 | if didUpdate { 408 | if numIdxUpd > 0 { 409 | m.logInfo(termOKFG, fmt.Sprintf("- %s updated", util.LangNum1Choice(numIdxUpd, "directory was", "directories were"))) 410 | m.logInfo(termOKFG, fmt.Sprintf("- %s added", util.LangNum1Choice(numNew, "file hash was", "file hashes were"))) 411 | m.logInfo(termOKFG, fmt.Sprintf("- %s updated", util.LangNum1Choice(numUpd, "file hash was", "file hashes were"))) 412 | if m.context.NumDel > 0 { 413 | m.logInfo(termOKFG, fmt.Sprintf("- %s been removed", util.LangNum1Choice(m.context.NumDel, "file/directory has", "files/directories have"))) 414 | } 415 | } 416 | } else if numNew+numUpd+m.context.NumDel > 0 { 417 | m.logInfo(termAlertFG, "No changes were made") 418 | m.logInfo(termAlertFG, fmt.Sprintf("- %s would have been added", util.LangNum1MutateSuffix(numNew, "file"))) 419 | m.logInfo(termAlertFG, fmt.Sprintf("- %s would have been updated", util.LangNum1MutateSuffix(numUpd, "file"))) 420 | if m.context.NumDel > 0 { 421 | m.logInfo(termAlertFG, fmt.Sprintf("- %s would have been removed", util.LangNum1Choice(m.context.NumDel, "file/directory", "files/directories"))) 422 | } 423 | } 424 | } 425 | 426 | // summarize errors 427 | if len(m.dmgList) > 0 { 428 | m.printErr("chkbit detected damage in these files:") 429 | for _, item := range m.dmgList { 430 | m.printStderr(item) 431 | } 432 | n := len(m.dmgList) 433 | status := fmt.Sprintf("error: detected %s with damage!", util.LangNum1MutateSuffix(n, "file")) 434 | m.log(status) 435 | m.printErr(status) 436 | } 437 | 438 | if len(m.errList) > 0 { 439 | status := "chkbit ran into errors" 440 | m.log(status + "!") 441 | m.printErr(status + ":") 442 | for _, item := range m.errList { 443 | m.printStderr(item) 444 | } 445 | } 446 | 447 | if m.context.DidAbort() { 448 | m.logInfo(termAlertFG, "Aborted") 449 | } 450 | 451 | if len(m.dmgList) > 0 || len(m.errList) > 0 { 452 | return 1 453 | } 454 | return 0 455 | } 456 | 457 | func (m *Main) run() int { 458 | 459 | if len(os.Args) < 2 { 460 | os.Args = append(os.Args, "--help") 461 | } 462 | 463 | var configPath = "chkbit-config.json" 464 | configRoot, err := os.UserConfigDir() 465 | if err == nil { 466 | configPath = slpath.Join(configRoot, "chkbit/config.json") 467 | } 468 | 469 | var cli CLI 470 | var ctx *kong.Context 471 | kongOptions := []kong.Option{ 472 | kong.Name("chkbit"), 473 | kong.Description(headerHelp), 474 | kong.ConfigureHelp(kong.HelpOptions{Tree: true, FlagsLast: true}), 475 | kong.UsageOnError(), 476 | } 477 | 478 | ctx = kong.Parse(&cli, append(kongOptions, kong.Configuration(kong.JSON, configPath))...) 479 | 480 | if cli.NoConfig { 481 | cli = CLI{} 482 | ctx = kong.Parse(&cli, kongOptions...) 483 | } 484 | 485 | cli.toSlash() 486 | 487 | if cli.Quiet { 488 | m.progress = Quiet 489 | } else if fileInfo, err := os.Stdout.Stat(); err == nil && (fileInfo.Mode()&os.ModeCharDevice) == 0 { 490 | m.progress = Summary 491 | } else if cli.Plain { 492 | m.progress = Plain 493 | } else { 494 | m.progress = Fancy 495 | } 496 | 497 | m.verbose = cli.Verbose 498 | if cli.LogFile != "" { 499 | m.logVerbose = cli.LogVerbose 500 | f, err := os.OpenFile(cli.LogFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) 501 | if err != nil { 502 | m.printError(err) 503 | return 1 504 | } 505 | defer f.Close() 506 | m.logger = log.New(f, "", 0) 507 | } 508 | 509 | cmd := ctx.Command() 510 | switch cmd { 511 | case cmdCheck, cmdUpdate, cmdShowIgnored: 512 | return m.runCmd(cmd, cli) 513 | case cmdAdd: 514 | cli.Update.Paths = cli.Add.Paths 515 | cli.Update.SkipExisting = true 516 | return m.runCmd(cmdUpdate, cli) 517 | case cmdInit: 518 | m.logInfo("", fmt.Sprintf("chkbit init %s %s", cli.Init.Mode, cli.Init.Path)) 519 | st := chkbit.IndexTypeSplit 520 | if cli.Init.Mode == "atom" { 521 | st = chkbit.IndexTypeAtom 522 | } 523 | if err := chkbit.InitializeIndexStore(st, cli.Init.Path, cli.IndexName, cli.Init.Force); err != nil { 524 | text := chkbit.StatusPanic.String() + " " + err.Error() 525 | m.printErr(text) 526 | m.log(text) 527 | return 1 528 | } 529 | return 0 530 | case cmdFuse: 531 | m.logInfo("", fmt.Sprintf("chkbit fuse %s", cli.Fuse.Path)) 532 | log := func(text string) { 533 | m.logInfo("", text) 534 | } 535 | if err := chkbit.FuseIndexStore(cli.Fuse.Path, cli.IndexName, cli.SkipSymlinks, cli.Verbose, cli.Fuse.Force, log); err != nil { 536 | text := chkbit.StatusPanic.String() + " " + err.Error() 537 | m.printErr(text) 538 | m.log(text) 539 | return 1 540 | } 541 | return 0 542 | case cmdDedupDetect, cmdDedupShow, cmdDedupRun, cmdDedupRun2: 543 | return m.runDedup(cmd, &cli.Dedup, cli.IndexName) 544 | 545 | case cmdUtilFileext: 546 | paths := cli.Util.Fileext.Paths 547 | allMatch := true 548 | var first chkbit.FileExtentList 549 | for i, path := range paths { 550 | blocks, err := chkbit.GetFileExtents(path) 551 | if err != nil { 552 | m.printError(err) 553 | return 1 554 | } 555 | if i == 0 { 556 | first = blocks 557 | } else { 558 | if !chkbit.ExtentsMatch(first, blocks) { 559 | m.printErr(fmt.Sprintf("Files do not occupie the same blocks (%s, %s).", paths[0], path)) 560 | allMatch = false 561 | } 562 | } 563 | if m.verbose || len(paths) == 1 { 564 | fmt.Println(path) 565 | fmt.Print(chkbit.ShowExtents(blocks)) 566 | } 567 | } 568 | if len(paths) > 1 && allMatch { 569 | fmt.Println("Files occupie the same blocks.") 570 | return 0 571 | } 572 | return 1 573 | 574 | case cmdUtilFilededup: 575 | paths := cli.Util.Filededup.Paths 576 | if len(paths) < 2 { 577 | fmt.Println("error: supply two or more paths") 578 | return 1 579 | } 580 | var reclaimedTotal uint64 581 | var first string 582 | for i, path := range paths { 583 | if i == 0 { 584 | first = path 585 | } else { 586 | if reclaimed, err := chkbit.DeduplicateFiles(first, path); err != nil { 587 | m.printErr(fmt.Sprintf("Unable to deduplicate (%s, %s): %s", paths[0], path, err.Error())) 588 | return 1 589 | } else { 590 | reclaimedTotal += reclaimed 591 | } 592 | } 593 | } 594 | fmt.Printf("Dedup success, reclaimed %s.\n", intutil.FormatSize(reclaimedTotal)) 595 | return 0 596 | 597 | case cmdTips: 598 | fmt.Println(strings.ReplaceAll(helpTips, "", configPath)) 599 | return 0 600 | case cmdVersion: 601 | fmt.Println("github.com/laktak/chkbit") 602 | fmt.Println(appVersion) 603 | return 0 604 | default: 605 | fmt.Println("unknown: " + cmd) 606 | return 1 607 | } 608 | } 609 | 610 | func main() { 611 | defer func() { 612 | if r := recover(); r != nil { 613 | // panic 614 | fmt.Fprintln(os.Stderr, r) 615 | os.Exit(1) 616 | } 617 | }() 618 | 619 | termWidth := lterm.GetWidth() 620 | m := &Main{ 621 | logger: log.New(io.Discard, "", 0), 622 | termWidth: termWidth, 623 | fps: util.NewRateCalc(time.Second, (termWidth-70)/2), 624 | bps: util.NewRateCalc(time.Second, (termWidth-70)/2), 625 | } 626 | os.Exit(m.run()) 627 | } 628 | -------------------------------------------------------------------------------- /cmd/chkbit/util/fm.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | func Minimum(series []int64) int64 { 8 | var min int64 = math.MaxInt64 9 | for _, value := range series { 10 | if value < min { 11 | min = value 12 | } 13 | } 14 | return min 15 | } 16 | 17 | func Maximum(series []int64) int64 { 18 | var max int64 = math.MinInt64 19 | for _, value := range series { 20 | if value > max { 21 | max = value 22 | } 23 | } 24 | return max 25 | } 26 | 27 | func Clamp(min int64, max int64, n int64) int64 { 28 | if n < min { 29 | return min 30 | } 31 | if n > max { 32 | return max 33 | } 34 | return n 35 | } 36 | -------------------------------------------------------------------------------- /cmd/chkbit/util/lang.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "fmt" 4 | 5 | func LangNum1MutateSuffix(num int, u string) string { 6 | s := "" 7 | if num != 1 { 8 | s = "s" 9 | } 10 | return fmt.Sprintf("%d %s%s", num, u, s) 11 | } 12 | 13 | func LangNum1Choice(num int, u1, u2 string) string { 14 | u := u1 15 | if num != 1 { 16 | u = u2 17 | } 18 | return fmt.Sprintf("%d %s", num, u) 19 | } 20 | -------------------------------------------------------------------------------- /cmd/chkbit/util/progress.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | func Progress(percent float64, width int) (string, string) { 8 | if width >= 5 { 9 | pc := int(percent * float64(width)) 10 | return strings.Repeat("#", pc), strings.Repeat(":", width-pc) 11 | } 12 | return "", "" 13 | } 14 | -------------------------------------------------------------------------------- /cmd/chkbit/util/progress_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestProgress(t *testing.T) { 8 | expected := "###########:::::::::::::::::::" 9 | a, b := Progress(.375, 30) 10 | actual := a + b 11 | if expected != actual { 12 | t.Error("expected:", expected, "actual:", actual) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /cmd/chkbit/util/ratecalc.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type RateCalc struct { 8 | Interval time.Duration 9 | MaxStat int 10 | Start time.Time 11 | Updated time.Time 12 | Total int64 13 | Current int64 14 | Stats []int64 15 | } 16 | 17 | func NewRateCalc(interval time.Duration, maxStat int) *RateCalc { 18 | if maxStat < 10 { 19 | maxStat = 10 20 | } 21 | rc := &RateCalc{ 22 | Interval: interval, 23 | MaxStat: maxStat, 24 | } 25 | rc.Reset() 26 | return rc 27 | } 28 | 29 | func (rc *RateCalc) Reset() { 30 | rc.Start = time.Now() 31 | rc.Updated = rc.Start 32 | rc.Total = 0 33 | rc.Current = 0 34 | rc.Stats = make([]int64, rc.MaxStat) 35 | } 36 | 37 | func (rc *RateCalc) Last() int64 { 38 | return rc.Stats[len(rc.Stats)-1] 39 | } 40 | 41 | func (rc *RateCalc) Push(ts time.Time, value int64) { 42 | for rc.Updated.Add(rc.Interval).Before(ts) { 43 | rc.Stats = append(rc.Stats, rc.Current) 44 | if len(rc.Stats) > rc.MaxStat { 45 | rc.Stats = rc.Stats[len(rc.Stats)-rc.MaxStat:] 46 | } 47 | rc.Total += rc.Current 48 | rc.Current = 0 49 | rc.Updated = rc.Updated.Add(rc.Interval) 50 | } 51 | rc.Current += value 52 | } 53 | -------------------------------------------------------------------------------- /cmd/chkbit/util/sparkline.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "math" 5 | ) 6 | 7 | var sparkChars = []rune{'▁', '▂', '▃', '▄', '▅', '▆', '▇', '█'} 8 | 9 | func Sparkline(series []int64) string { 10 | out := make([]rune, len(series)) 11 | min := Minimum(series) 12 | max := Maximum(series) 13 | dataRange := max - min 14 | if dataRange == 0 { 15 | for i := range series { 16 | out[i] = sparkChars[0] 17 | } 18 | } else { 19 | step := float64(len(sparkChars)-1) / float64(dataRange) 20 | for i, n := range series { 21 | idx := int(math.Round(float64(Clamp(min, max, n)-min) * step)) 22 | if idx < 0 { 23 | out[i] = ' ' 24 | } else if idx > len(sparkChars) { 25 | out[i] = sparkChars[len(sparkChars)-1] 26 | } else { 27 | out[i] = sparkChars[idx] 28 | } 29 | } 30 | } 31 | return string(out) 32 | } 33 | -------------------------------------------------------------------------------- /cmd/chkbit/util/sparkline_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestSpark(t *testing.T) { 8 | expected := "▁▁▂▄▅▇██▆▄▂" 9 | actual := Sparkline([]int64{5, 12, 35, 73, 80, 125, 150, 142, 118, 61, 19}) 10 | if expected != actual { 11 | t.Error("expected:", expected, "actual:", actual) 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /cmd/chkbit/util/spinner.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | func Spinner(timeout time.Duration) <-chan string { 8 | spinnerChars := []string{"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"} 9 | ch := make(chan string) 10 | go func() { 11 | for i := 0; ; i++ { 12 | ch <- spinnerChars[i%len(spinnerChars)] 13 | time.Sleep(timeout) 14 | } 15 | }() 16 | return ch 17 | } 18 | -------------------------------------------------------------------------------- /cmd/chkbit/util/strings.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | func LeftTruncate(s string, nMax int) string { 4 | for i := range s { 5 | nMax-- 6 | if nMax < 0 { 7 | return s[:i] 8 | } 9 | } 10 | return s 11 | } 12 | -------------------------------------------------------------------------------- /cmd/chkbit/util/strings_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestTrunc(t *testing.T) { 8 | expected := "ab©def" 9 | actual := LeftTruncate(expected+"ghijk", 6) 10 | if expected != actual { 11 | t.Error("expected:", expected, "actual:", actual) 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /common.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "errors" 5 | ) 6 | 7 | var ( 8 | errNotSupported = errors.New("operation not supported") 9 | ) 10 | 11 | func IsNotSupported(err error) bool { 12 | return err == errNotSupported 13 | } 14 | -------------------------------------------------------------------------------- /context.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | slpath "path" 8 | "path/filepath" 9 | "strings" 10 | "sync" 11 | ) 12 | 13 | type Context struct { 14 | NumWorkers int 15 | UpdateIndex bool // add and update hashes 16 | UpdateSkipCheck bool // do not check existing hashes when updating 17 | ShowIgnoredOnly bool // print ignored files 18 | LogDeleted bool // output deleted files and directories 19 | IncludeDot bool // include dot files 20 | ForceUpdateDmg bool 21 | HashAlgo string 22 | TrackDirectories bool // keep track of directories 23 | SkipSymlinks bool 24 | SkipSubdirectories bool 25 | IndexFilename string 26 | IgnoreFilename string 27 | MaxDepth int 28 | 29 | WorkQueue chan *WorkItem 30 | LogQueue chan *LogEvent 31 | PerfQueue chan *PerfEvent 32 | 33 | store *indexStore 34 | 35 | mutex sync.Mutex 36 | doAbort bool 37 | NumTotal int 38 | NumIdxUpd int 39 | NumNew int 40 | NumUpd int 41 | NumDel int 42 | } 43 | 44 | func (context *Context) Abort() { 45 | context.doAbort = true 46 | } 47 | 48 | func (context *Context) DidAbort() bool { 49 | return context.doAbort 50 | } 51 | 52 | func (context *Context) GetIndexType() IndexType { 53 | if context.store.atom { 54 | return IndexTypeAtom 55 | } 56 | return IndexTypeSplit 57 | } 58 | 59 | func NewContext(numWorkers int, hashAlgo string, indexFilename string, ignoreFilename string) (*Context, error) { 60 | if indexFilename[0] != '.' { 61 | return nil, errors.New("the index filename must start with a dot") 62 | } 63 | if ignoreFilename[0] != '.' { 64 | return nil, errors.New("the ignore filename must start with a dot") 65 | } 66 | if hashAlgo != "md5" && hashAlgo != "sha512" && hashAlgo != "blake3" { 67 | return nil, errors.New(hashAlgo + " is unknown") 68 | } 69 | if numWorkers < 1 { 70 | return nil, errors.New("expected numWorkers >= 1") 71 | } 72 | logQueue := make(chan *LogEvent, numWorkers*100) 73 | return &Context{ 74 | NumWorkers: numWorkers, 75 | HashAlgo: hashAlgo, 76 | IndexFilename: indexFilename, 77 | IgnoreFilename: ignoreFilename, 78 | WorkQueue: make(chan *WorkItem, numWorkers*10), 79 | LogQueue: logQueue, 80 | PerfQueue: make(chan *PerfEvent, numWorkers*10), 81 | store: &indexStore{logErr: func(message string) { logQueue <- &LogEvent{StatusPanic, "indexstore: " + message} }}, 82 | }, nil 83 | } 84 | 85 | func (context *Context) log(stat Status, message string) { 86 | context.mutex.Lock() 87 | defer context.mutex.Unlock() 88 | switch stat { 89 | case StatusErrorDamage: 90 | context.NumTotal++ 91 | case StatusUpdateIndex: 92 | context.NumIdxUpd++ 93 | case StatusUpdateWarnOld: 94 | context.NumTotal++ 95 | context.NumUpd++ 96 | case StatusUpdate: 97 | context.NumTotal++ 98 | context.NumUpd++ 99 | case StatusNew: 100 | context.NumTotal++ 101 | context.NumNew++ 102 | case StatusOK: 103 | if !context.UpdateSkipCheck { 104 | context.NumTotal++ 105 | } 106 | case StatusDeleted: 107 | context.NumDel++ 108 | } 109 | 110 | context.LogQueue <- &LogEvent{stat, message} 111 | } 112 | 113 | func (context *Context) logErr(path string, err error) { 114 | context.log(StatusPanic, path+": "+err.Error()) 115 | } 116 | 117 | func (context *Context) perfMonFiles(numFiles int64) { 118 | context.PerfQueue <- &PerfEvent{numFiles, 0} 119 | } 120 | 121 | func (context *Context) perfMonBytes(numBytes int64) { 122 | context.PerfQueue <- &PerfEvent{0, numBytes} 123 | } 124 | 125 | func (context *Context) addWork(path string, filesToIndex []string, dirList []string, ignore *Ignore) { 126 | context.WorkQueue <- &WorkItem{path, filesToIndex, dirList, ignore} 127 | } 128 | 129 | func (context *Context) endWork() { 130 | context.WorkQueue <- nil 131 | } 132 | 133 | func (context *Context) isChkbitFile(name string) bool { 134 | // any file with the index prefix is ignored (to allow for .bak and -db files) 135 | return strings.HasPrefix(name, context.IndexFilename) || name == context.IgnoreFilename 136 | } 137 | 138 | func (context *Context) Process(pathList []string) { 139 | context.NumTotal = 0 140 | context.NumIdxUpd = 0 141 | context.NumNew = 0 142 | context.NumUpd = 0 143 | context.NumDel = 0 144 | 145 | err := context.store.Open(!context.UpdateIndex, context.NumWorkers*10) 146 | if err != nil { 147 | context.logErr("index", err) 148 | context.LogQueue <- nil 149 | return 150 | } 151 | 152 | var wg sync.WaitGroup 153 | wg.Add(context.NumWorkers) 154 | for i := 0; i < context.NumWorkers; i++ { 155 | go func(id int) { 156 | defer wg.Done() 157 | context.runWorker(id) 158 | }(i) 159 | } 160 | go func() { 161 | for _, path := range pathList { 162 | context.scanDir(path, nil, 1) 163 | } 164 | for i := 0; i < context.NumWorkers; i++ { 165 | context.endWork() 166 | } 167 | }() 168 | wg.Wait() 169 | 170 | if _, err := context.store.Finish(context.doAbort); err != nil { 171 | context.logErr("indexstore", err) 172 | } 173 | context.LogQueue <- nil 174 | } 175 | 176 | func (context *Context) scanDir(root string, parentIgnore *Ignore, depth int) { 177 | 178 | if context.doAbort { 179 | return 180 | } 181 | 182 | files, err := os.ReadDir(root) 183 | if err != nil { 184 | context.logErr(root+"/", err) 185 | return 186 | } 187 | 188 | var dirList []string 189 | var filesToIndex []string 190 | 191 | ignore, err := GetIgnore(context, root, parentIgnore) 192 | if err != nil { 193 | context.logErr(root+"/", err) 194 | } 195 | 196 | for _, file := range files { 197 | path := slpath.Join(root, file.Name()) 198 | if isDir(file, path, context.SkipSymlinks) { 199 | if !ignore.shouldIgnore(file.Name()) { 200 | dirList = append(dirList, file.Name()) 201 | } else { 202 | context.log(StatusIgnore, file.Name()+"/") 203 | } 204 | } else if file.Type().IsRegular() { 205 | filesToIndex = append(filesToIndex, file.Name()) 206 | } 207 | } 208 | 209 | context.addWork(root, filesToIndex, dirList, ignore) 210 | 211 | if !context.SkipSubdirectories && (context.MaxDepth == 0 || depth < context.MaxDepth) { 212 | for _, name := range dirList { 213 | context.scanDir(slpath.Join(root, name), ignore, depth+1) 214 | } 215 | } 216 | } 217 | 218 | func (context *Context) UseAtomIndexStore(root string, pathList []string) (relativePathList []string, err error) { 219 | 220 | for _, path := range pathList { 221 | path, err = filepath.Abs(path) 222 | if err != nil { 223 | return nil, err 224 | } 225 | 226 | // below root? 227 | if !strings.HasPrefix(filepath.ToSlash(path), root) { 228 | return nil, fmt.Errorf("path %s is not below the atom index in %s", path, root) 229 | } 230 | 231 | relativePath, err := filepath.Rel(root, path) 232 | if err != nil { 233 | return nil, err 234 | } 235 | relativePath = filepath.ToSlash(relativePath) 236 | relativePathList = append(relativePathList, relativePath) 237 | } 238 | 239 | context.store.UseAtom(root, context.IndexFilename, len(relativePathList) == 1 && relativePathList[0] == ".") 240 | 241 | return 242 | } 243 | 244 | func isDir(file os.DirEntry, path string, skipSymlinks bool) bool { 245 | if file.IsDir() { 246 | return true 247 | } 248 | ft := file.Type() 249 | if !skipSymlinks && ft&os.ModeSymlink != 0 { 250 | rpath, err := filepath.EvalSymlinks(path) 251 | if err == nil { 252 | fi, err := os.Lstat(rpath) 253 | return err == nil && fi.IsDir() 254 | } 255 | } 256 | return false 257 | } 258 | -------------------------------------------------------------------------------- /dedup.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "os" 8 | slpath "path" 9 | "slices" 10 | "time" 11 | 12 | "github.com/laktak/chkbit/v6/intutil" 13 | bolt "go.etcd.io/bbolt" 14 | ) 15 | 16 | type Dedup struct { 17 | rootPath string 18 | indexName string 19 | 20 | LogQueue chan *LogEvent 21 | PerfQueue chan *DedupPerfEvent 22 | 23 | status ddStatus 24 | conn *bolt.DB 25 | 26 | doAbort bool 27 | numTotal int 28 | reclaimedTotal uint64 29 | } 30 | 31 | type ddStatus struct { 32 | Gen int `json:"gen"` 33 | Updated time.Time `json:"mod"` 34 | } 35 | 36 | type ddBag struct { 37 | Gen int `json:"gen"` 38 | Size int64 `json:"size"` 39 | SizeShared uint64 `json:"shared"` 40 | SizeExclusive uint64 `json:"exclusive"` 41 | ExtUnknown *bool `json:"extUnknown,omitempty"` 42 | ItemList []*DedupItem `json:"item"` 43 | } 44 | 45 | type DedupBag struct { 46 | Hash string `json:"hash"` 47 | Size uint64 `json:"size"` 48 | SizeShared uint64 `json:"shared"` 49 | SizeExclusive uint64 `json:"exclusive"` 50 | ExtUnknown *bool `json:"extUnknown,omitempty"` 51 | ItemList []*DedupItem `json:"item"` 52 | } 53 | 54 | type DedupItem struct { 55 | Path string `json:"path"` 56 | Merged bool `json:"merged"` 57 | } 58 | 59 | const ( 60 | dedupSuffix = "-dedup.db" 61 | ) 62 | 63 | var ( 64 | ddStatusBucketName = []byte("status") 65 | ddStatusName = []byte("1") 66 | ddItemBucketName = []byte("item") 67 | 68 | errAborted = errors.New("aborted") 69 | ) 70 | 71 | func IsAborted(err error) bool { 72 | return err == errAborted 73 | } 74 | 75 | func (d *Dedup) Abort() { 76 | d.doAbort = true 77 | } 78 | func (d *Dedup) DidAbort() bool { 79 | return d.doAbort 80 | } 81 | 82 | func (d *Dedup) NumTotal() int { 83 | return d.numTotal 84 | } 85 | 86 | func (d *Dedup) ReclaimedTotal() uint64 { 87 | return d.reclaimedTotal 88 | } 89 | 90 | func (d *Dedup) LastUpdated() time.Time { 91 | return d.status.Updated 92 | } 93 | 94 | func (d *Dedup) log(stat Status, message string) { 95 | d.LogQueue <- &LogEvent{stat, message} 96 | } 97 | 98 | func (d *Dedup) logMsg(message string) { 99 | d.log(StatusInfo, message) 100 | } 101 | 102 | func (d *Dedup) perfMonFiles(numFiles int, i float64, l int) { 103 | d.numTotal += numFiles 104 | pc := 0.0 105 | if l > 0 { 106 | pc = i / float64(l) 107 | } 108 | d.PerfQueue <- &DedupPerfEvent{int64(numFiles), pc} 109 | } 110 | 111 | func NewDedup(path string, indexName string, createIfNotExists bool) (*Dedup, error) { 112 | var err error 113 | d := &Dedup{ 114 | rootPath: path, 115 | indexName: indexName, 116 | LogQueue: make(chan *LogEvent, 100), 117 | PerfQueue: make(chan *DedupPerfEvent, 100), 118 | } 119 | dedupFile := slpath.Join(path, d.indexName+dedupSuffix) 120 | 121 | _, err = os.Stat(dedupFile) 122 | if err != nil { 123 | if !os.IsNotExist(err) || !createIfNotExists { 124 | return nil, err 125 | } 126 | } 127 | 128 | d.conn, err = bolt.Open(dedupFile, 0600, getBoltOptions(false)) 129 | if err != nil { 130 | return nil, err 131 | } 132 | if err = d.conn.Update(func(tx *bolt.Tx) error { 133 | sb, err := tx.CreateBucketIfNotExists(ddStatusBucketName) 134 | if err != nil { 135 | return err 136 | } 137 | 138 | status := sb.Get(ddStatusName) 139 | if status != nil { 140 | json.Unmarshal(status, &d.status) 141 | } 142 | 143 | _, err = tx.CreateBucketIfNotExists(ddItemBucketName) 144 | return err 145 | }); err != nil { 146 | return nil, err 147 | } 148 | return d, nil 149 | } 150 | 151 | func (d *Dedup) Finish() error { 152 | if d.conn != nil { 153 | if err := d.conn.Close(); err != nil { 154 | return err 155 | } 156 | } 157 | d.conn = nil 158 | return nil 159 | } 160 | 161 | func (d *Dedup) nextGen(tx *bolt.Tx) error { 162 | if sb := tx.Bucket(ddStatusBucketName); sb != nil { 163 | d.status.Gen += 1 164 | d.status.Updated = time.Now() 165 | if data, err := json.Marshal(&d.status); err == nil { 166 | return sb.Put(ddStatusName, data) 167 | } else { 168 | return err 169 | } 170 | } 171 | return errors.New("missing bucket") 172 | } 173 | 174 | func (d *Dedup) DetectDupes(minSize uint64, verbose bool) (err error) { 175 | 176 | file, err := os.Open(getAtomFile(d.rootPath, d.indexName, "")) 177 | if err != nil { 178 | return err 179 | } 180 | defer file.Close() 181 | 182 | decoder := json.NewDecoder(file) 183 | 184 | if t, err := decoder.Token(); err != nil || t != json.Delim('{') { 185 | return errors.New("invalid json (start)") 186 | } 187 | 188 | if err = verifyAtomJsonHead(decoder); err != nil { 189 | return err 190 | } 191 | 192 | d.logMsg(fmt.Sprintf("collect matching hashes (min=%s)", intutil.FormatSize(minSize))) 193 | all := make(map[string]*ddBag) 194 | for { 195 | if d.doAbort { 196 | return errAborted 197 | } 198 | 199 | t, err := decoder.Token() 200 | if err != nil { 201 | return err 202 | } 203 | if t == json.Delim('}') { 204 | break 205 | } 206 | key, ok := t.(string) 207 | if !ok { 208 | return errors.New("invalid json (loop)") 209 | } 210 | 211 | if key != "" { 212 | key += "/" 213 | } 214 | 215 | var value json.RawMessage 216 | if err = decoder.Decode(&value); err != nil { 217 | return err 218 | } 219 | 220 | index, err := loadIndexFile(value) 221 | if err != nil { 222 | return err 223 | } 224 | 225 | for k, v := range index.fileList { 226 | 227 | if v.Size != nil && *v.Size >= 0 && uint64(*v.Size) < minSize { 228 | continue 229 | } 230 | 231 | bag := all[*v.Hash] 232 | if bag == nil { 233 | bag = &ddBag{ 234 | Size: -1, 235 | } 236 | } 237 | if bag.Size == -1 && v.Size != nil && *v.Size >= 0 { 238 | bag.Size = *v.Size 239 | } 240 | bag.ItemList = append(bag.ItemList, 241 | &DedupItem{ 242 | Path: key + k, 243 | }) 244 | all[*v.Hash] = bag 245 | } 246 | } 247 | 248 | if t, err := decoder.Token(); err != nil || t != json.Delim('}') { 249 | return errors.New("invalid json (end)") 250 | } 251 | 252 | // legacy index items don't contain a file size 253 | d.logMsg("update file sizes (for legacy indexes)") 254 | for hash, bag := range all { 255 | if bag.Size == -1 { 256 | for _, p := range bag.ItemList { 257 | if s, err := os.Stat(slpath.Join(d.rootPath, p.Path)); err == nil { 258 | bag.Size = s.Size() 259 | break 260 | } 261 | } 262 | } 263 | if bag.Size < int64(minSize) { 264 | delete(all, hash) 265 | } 266 | } 267 | 268 | markDelete := [][]byte{} 269 | 270 | // now check resultset for exclusive/shared space 271 | d.logMsg("collect matching files") 272 | if err = d.conn.Update(func(tx *bolt.Tx) error { 273 | 274 | if err := d.nextGen(tx); err != nil { 275 | return err 276 | } 277 | 278 | b := tx.Bucket(ddItemBucketName) 279 | i := 0 280 | d.perfMonFiles(0, 0, len(all)) 281 | for hash, bag := range all { 282 | i += 1 283 | 284 | if d.doAbort { 285 | return errAborted 286 | } 287 | 288 | if len(bag.ItemList) <= 1 { 289 | continue 290 | } 291 | bhash := []byte(hash) 292 | 293 | // combine with old status 294 | // todo, ignore for now 295 | /* 296 | prevData := b.Get(bhash) 297 | if prevData != nil { 298 | var prevItem ddBag 299 | err := json.Unmarshal(prevData, &prevItem) 300 | if err == nil { 301 | for _, o := range prevItem.ItemList { 302 | for i, p := range item.ItemList { 303 | if o.Path == p.Path { 304 | item.ItemList[i].Merged = o.Merged 305 | } 306 | } 307 | } 308 | } // else ignore 309 | } 310 | */ 311 | 312 | type match struct { 313 | id int 314 | el FileExtentList 315 | item *DedupItem 316 | } 317 | 318 | extUnknown := false 319 | var matches []match 320 | d.perfMonFiles(len(bag.ItemList), float64(i), len(all)) 321 | for _, item := range bag.ItemList { 322 | if res, err := GetFileExtents(slpath.Join(d.rootPath, item.Path)); err == nil { 323 | matches = append(matches, match{-1, res, item}) 324 | } else if IsNotSupported(err) { 325 | matches = append(matches, match{-1, nil, item}) 326 | extUnknown = true 327 | } else { 328 | // file is ignored 329 | if !os.IsNotExist(err) { 330 | d.log(StatusPanic, err.Error()) 331 | } 332 | } 333 | } 334 | 335 | // compare extents and set id for matching 336 | for i := range matches { 337 | if matches[i].id != -1 { 338 | continue 339 | } 340 | matches[i].id = i 341 | for j := i + 1; j < len(matches); j++ { 342 | if matches[j].id == -1 && ExtentsMatch(matches[i].el, matches[j].el) { 343 | matches[j].id = i 344 | } 345 | } 346 | } 347 | 348 | // count matches and get maxId 349 | maxId := -1 350 | maxCount := 1 351 | count := map[int]int{} 352 | for _, o := range matches { 353 | count[o.id] += 1 354 | } 355 | for id, c := range count { 356 | if c > maxCount { 357 | maxId = id 358 | maxCount = c 359 | } 360 | } 361 | if extUnknown { 362 | bag.ExtUnknown = &extUnknown 363 | } 364 | bag.SizeShared = 0 365 | bag.SizeExclusive = 0 366 | bag.ItemList = []*DedupItem{} 367 | for i := range matches { 368 | merged := matches[i].id == maxId 369 | 370 | matches[i].item.Merged = merged 371 | bag.ItemList = append(bag.ItemList, matches[i].item) 372 | if merged { 373 | bag.SizeShared += uint64(bag.Size) 374 | } 375 | if matches[i].id == i { 376 | bag.SizeExclusive += uint64(bag.Size) 377 | } 378 | } 379 | 380 | if len(bag.ItemList) < 2 { 381 | // remove because of missing files 382 | markDelete = append(markDelete, bhash) 383 | continue 384 | } 385 | 386 | bag.Gen = d.status.Gen 387 | if data, err := json.Marshal(bag); err != nil { 388 | return err 389 | } else { 390 | if err = b.Put(bhash, data); err != nil { 391 | return err 392 | } 393 | } 394 | } 395 | 396 | // remove old gen (don't use c.Delete()) 397 | c := b.Cursor() 398 | for k, v := c.First(); k != nil; k, v = c.Next() { 399 | var bag ddBag 400 | if err := json.Unmarshal(v, &bag); err != nil { 401 | return err 402 | } 403 | if bag.Gen != d.status.Gen { 404 | markDelete = append(markDelete, k) 405 | } 406 | } 407 | for _, k := range markDelete { 408 | if err := b.Delete(k); err != nil { 409 | return err 410 | } 411 | } 412 | 413 | return nil 414 | }); err != nil { 415 | return err 416 | } 417 | 418 | return nil 419 | } 420 | 421 | func (d *Dedup) Show() ([]*DedupBag, error) { 422 | var list []*DedupBag 423 | if err := d.conn.View(func(tx *bolt.Tx) error { 424 | b := tx.Bucket(ddItemBucketName) 425 | c := b.Cursor() 426 | for k, v := c.First(); k != nil; k, v = c.Next() { 427 | var bag ddBag 428 | if err := json.Unmarshal(v, &bag); err != nil { 429 | return err 430 | } 431 | list = append(list, &DedupBag{ 432 | Hash: string(k), 433 | Size: uint64(bag.Size), 434 | SizeShared: bag.SizeShared, 435 | SizeExclusive: bag.SizeExclusive, 436 | ExtUnknown: bag.ExtUnknown, 437 | ItemList: bag.ItemList, 438 | }) 439 | } 440 | return nil 441 | }); err != nil { 442 | return nil, err 443 | } 444 | slices.SortFunc(list, func(a, b *DedupBag) int { 445 | r := int64(b.Size) - int64(a.Size) 446 | if r < 0 { 447 | return -1 448 | } else if r > 0 { 449 | return 1 450 | } else { 451 | return 0 452 | } 453 | }) 454 | return list, nil 455 | } 456 | 457 | func (d *Dedup) Dedup(hashes []string, verbose bool) error { 458 | 459 | if len(hashes) == 0 { 460 | if bags, err := d.Show(); err == nil { 461 | for _, o := range bags { 462 | hashes = append(hashes, o.Hash) 463 | } 464 | } else { 465 | return err 466 | } 467 | } 468 | 469 | if err := d.conn.Update(func(tx *bolt.Tx) error { 470 | b := tx.Bucket(ddItemBucketName) 471 | done := 0 472 | d.perfMonFiles(0, 0, len(hashes)) 473 | 474 | for _, hash := range hashes { 475 | 476 | if d.doAbort { 477 | return errAborted 478 | } 479 | 480 | var bag ddBag 481 | bhash := []byte(hash) 482 | v := b.Get(bhash) 483 | if err := json.Unmarshal(v, &bag); err != nil { 484 | return err 485 | } 486 | list := bag.ItemList 487 | slices.SortFunc(list, func(a, b *DedupItem) int { 488 | if a.Merged == b.Merged { 489 | return 0 490 | } 491 | if a.Merged { 492 | return -1 493 | } 494 | return 1 495 | }) 496 | todoCount := 0.0 497 | for i := 1; i < len(list); i++ { 498 | if !list[i].Merged { 499 | todoCount += 1 500 | } 501 | } 502 | listDone := 0.0 503 | 504 | // merged are at the top 505 | for i := 1; i < len(list); i++ { 506 | if d.doAbort { 507 | return errAborted 508 | } 509 | 510 | if !list[i].Merged { 511 | a := slpath.Join(d.rootPath, list[0].Path) 512 | b := slpath.Join(d.rootPath, list[i].Path) 513 | if verbose { 514 | d.logMsg(fmt.Sprintf("dedup %s %s \"%s\" -- \"%s\"", hash, intutil.FormatSize(uint64(bag.Size)), a, b)) 515 | } else { 516 | d.logMsg(fmt.Sprintf("dedup %s %s", intutil.FormatSize(uint64(bag.Size)), a)) 517 | } 518 | if reclaimed, err := DeduplicateFiles(a, b); err == nil { 519 | if !list[0].Merged { 520 | bag.SizeShared += uint64(bag.Size) 521 | } 522 | list[0].Merged = true 523 | list[i].Merged = true 524 | bag.SizeShared += uint64(bag.Size) 525 | bag.SizeExclusive -= uint64(bag.Size) 526 | d.reclaimedTotal += reclaimed 527 | } else if IsNotSupported(err) { 528 | d.log(StatusPanic, "Dedupliate is not supported for this OS/fs, please see https://laktak.github.io/chkbit/dedup/") 529 | return err 530 | } else { 531 | d.log(StatusPanic, err.Error()) 532 | } 533 | listDone += 1 534 | d.perfMonFiles(1, float64(done)+listDone/todoCount, len(hashes)) 535 | } 536 | } 537 | done += 1 538 | 539 | if data, err := json.Marshal(&bag); err == nil { 540 | if err := b.Put(bhash, data); err != nil { 541 | return err 542 | } 543 | } else { 544 | return err 545 | } 546 | } 547 | d.perfMonFiles(0, float64(done), len(hashes)) 548 | return nil 549 | }); err != nil { 550 | return err 551 | } 552 | return nil 553 | } 554 | -------------------------------------------------------------------------------- /fuse.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "os" 8 | slpath "path" 9 | ) 10 | 11 | type fuseStore struct { 12 | indexName string 13 | skipSymlinks bool 14 | verbose bool 15 | store *indexStore 16 | count int 17 | log FuseLogFunc 18 | } 19 | 20 | type FuseLogFunc func(string) 21 | 22 | func FuseIndexStore(path, indexName string, skipSymlinks, verbose bool, force bool, log FuseLogFunc) error { 23 | 24 | if err := InitializeIndexStore(IndexTypeAtom, path, indexName, force); err != nil { 25 | return err 26 | } 27 | 28 | fileName := getMarkerFile(IndexTypeAtom, path, indexName) 29 | if _, err := os.Stat(fileName); err != nil { 30 | return errMissingIndex 31 | } 32 | 33 | fuse := &fuseStore{ 34 | indexName: indexName, 35 | skipSymlinks: skipSymlinks, 36 | verbose: verbose, 37 | log: log, 38 | } 39 | 40 | store := &indexStore{ 41 | logErr: func(message string) { fuse.logErr("(indexstore) " + message) }, 42 | } 43 | fuse.store = store 44 | 45 | store.UseAtom(path, indexName, false) 46 | if err := store.Open(false, 100); err != nil { 47 | return err 48 | } 49 | 50 | fuse.fuseScanDir(path, "") 51 | 52 | if _, err := store.Finish(false); err != nil { 53 | return err 54 | } 55 | 56 | fuse.log(fmt.Sprintf("fused %d indexes", fuse.count)) 57 | return nil 58 | } 59 | 60 | func (f *fuseStore) logErr(message string) { 61 | f.log("panic: " + message) 62 | } 63 | 64 | func (f *fuseStore) fuseScanDir(root, prefix string) { 65 | files, err := os.ReadDir(root) 66 | if err != nil { 67 | f.logErr(root + "/:" + err.Error()) 68 | return 69 | } 70 | 71 | for _, file := range files { 72 | path := slpath.Join(root, file.Name()) 73 | if isDir(file, path, f.skipSymlinks) { 74 | newPrefix := prefix + file.Name() + "/" 75 | if fileName, ok, _ := existsMarkerFile(IndexTypeAtom, path, f.indexName); ok { 76 | if err = f.fuseAtom(fileName, newPrefix); err != nil { 77 | f.logErr("fuse " + path + "/:" + err.Error()) 78 | } 79 | } else if fileName, ok, _ := existsMarkerFile(IndexTypeSplit, path, f.indexName); ok { 80 | if err = f.fuseSplit(fileName, newPrefix); err != nil { 81 | f.logErr("fuse " + path + "/:" + err.Error()) 82 | } 83 | } 84 | f.fuseScanDir(path, newPrefix) 85 | } 86 | } 87 | } 88 | 89 | func (f *fuseStore) fuseAtom(fileName, prefix string) error { 90 | 91 | if f.verbose { 92 | f.log("fusing " + fileName) 93 | } 94 | 95 | file, err := os.Open(fileName) 96 | if err != nil { 97 | return err 98 | } 99 | defer file.Close() 100 | 101 | decoder := json.NewDecoder(file) 102 | 103 | if t, err := decoder.Token(); err != nil || t != json.Delim('{') { 104 | return errors.New("invalid json (start)") 105 | } 106 | 107 | if err = verifyAtomJsonHead(decoder); err != nil { 108 | return err 109 | } 110 | 111 | for { 112 | t, err := decoder.Token() 113 | if err != nil { 114 | return err 115 | } 116 | if t == json.Delim('}') { 117 | break 118 | } 119 | key, ok := t.(string) 120 | if !ok { 121 | return errors.New("invalid json (loop)") 122 | } 123 | 124 | // add prefix + index filename 125 | if key != "" { 126 | key += "/" 127 | } 128 | key = prefix + key + f.indexName 129 | 130 | // test 131 | var value json.RawMessage 132 | if err = decoder.Decode(&value); err != nil { 133 | return err 134 | } 135 | 136 | if err = f.store.Save(key, value); err != nil { 137 | return err 138 | } 139 | 140 | f.count++ 141 | } 142 | 143 | if t, err := decoder.Token(); err != nil || t != json.Delim('}') { 144 | return errors.New("invalid json (end)") 145 | } 146 | 147 | return nil 148 | } 149 | 150 | func (f *fuseStore) fuseSplit(fileName, prefix string) error { 151 | if f.verbose { 152 | f.log("fusing " + fileName + " prefix: " + prefix) 153 | } 154 | 155 | value, err := os.ReadFile(fileName) 156 | if err != nil { 157 | return err 158 | } 159 | 160 | // test 161 | if _, err = loadIndexFile(value); err != nil { 162 | return err 163 | } 164 | 165 | key := prefix + f.indexName 166 | 167 | if err = f.store.Save(key, value); err != nil { 168 | return err 169 | } 170 | 171 | f.count++ 172 | 173 | return nil 174 | } 175 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/laktak/chkbit/v6 2 | 3 | go 1.22.3 4 | 5 | require ( 6 | github.com/alecthomas/kong v1.6.0 7 | github.com/laktak/lterm v1.1.0 8 | go.etcd.io/bbolt v1.3.11 9 | golang.org/x/sys v0.27.0 10 | lukechampine.com/blake3 v1.3.0 11 | ) 12 | 13 | require ( 14 | github.com/klauspost/cpuid/v2 v2.0.9 // indirect 15 | golang.org/x/term v0.26.0 // indirect 16 | ) 17 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/alecthomas/assert/v2 v2.11.0 h1:2Q9r3ki8+JYXvGsDyBXwH3LcJ+WK5D0gc5E8vS6K3D0= 2 | github.com/alecthomas/assert/v2 v2.11.0/go.mod h1:Bze95FyfUr7x34QZrjL+XP+0qgp/zg8yS+TtBj1WA3k= 3 | github.com/alecthomas/kong v1.6.0 h1:mwOzbdMR7uv2vul9J0FU3GYxE7ls/iX1ieMg5WIM6gE= 4 | github.com/alecthomas/kong v1.6.0/go.mod h1:p2vqieVMeTAnaC83txKtXe8FLke2X07aruPWXyMPQrU= 5 | github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= 6 | github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= 7 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 8 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 9 | github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= 10 | github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= 11 | github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= 12 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 13 | github.com/laktak/lterm v1.1.0 h1:14irGudDwcgo/Hu8klxTQVybQL8No348Aw1vY+1Ld5w= 14 | github.com/laktak/lterm v1.1.0/go.mod h1:zwGyZi5PNuySqsDsRVNvBBYANy9k61oYgW6Flsm2AZg= 15 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 16 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 17 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 18 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 19 | go.etcd.io/bbolt v1.3.11 h1:yGEzV1wPz2yVCLsD8ZAiGHhHVlczyC9d1rP43/VCRJ0= 20 | go.etcd.io/bbolt v1.3.11/go.mod h1:dksAq7YMXoljX0xu6VF5DMZGbhYYoLUalEiSySYAS4I= 21 | golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= 22 | golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= 23 | golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= 24 | golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 25 | golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= 26 | golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= 27 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 28 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 29 | lukechampine.com/blake3 v1.3.0 h1:sJ3XhFINmHSrYCgl958hscfIa3bw8x4DqMP3u1YvoYE= 30 | lukechampine.com/blake3 v1.3.0/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k= 31 | -------------------------------------------------------------------------------- /hashfile.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "crypto/md5" 5 | "crypto/sha512" 6 | "encoding/hex" 7 | "errors" 8 | "hash" 9 | "io" 10 | "os" 11 | 12 | "lukechampine.com/blake3" 13 | ) 14 | 15 | const BLOCKSIZE = 2 << 10 << 7 // kb 16 | 17 | func Hashfile(path string, hashAlgo string, perfMonBytes func(int64)) (string, error) { 18 | var h hash.Hash 19 | switch hashAlgo { 20 | case "md5": 21 | h = md5.New() 22 | case "sha512": 23 | h = sha512.New() 24 | case "blake3": 25 | h = blake3.New(32, nil) 26 | default: 27 | return "", errors.New("algo '" + hashAlgo + "' is unknown.") 28 | } 29 | 30 | file, err := os.Open(path) 31 | if err != nil { 32 | return "", err 33 | } 34 | defer file.Close() 35 | 36 | buf := make([]byte, BLOCKSIZE) 37 | for { 38 | bytesRead, err := file.Read(buf) 39 | if err != nil && err != io.EOF { 40 | return "", err 41 | } 42 | if bytesRead == 0 { 43 | break 44 | } 45 | h.Write(buf[:bytesRead]) 46 | if perfMonBytes != nil { 47 | perfMonBytes(int64(bytesRead)) 48 | } 49 | } 50 | return hex.EncodeToString(h.Sum(nil)), nil 51 | } 52 | 53 | func hashMd5(data []byte) string { 54 | h := md5.New() 55 | h.Write(data) 56 | return hex.EncodeToString(h.Sum(nil)) 57 | } 58 | -------------------------------------------------------------------------------- /ignore.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "bufio" 5 | "os" 6 | slpath "path" 7 | "strings" 8 | ) 9 | 10 | type Ignore struct { 11 | parentIgnore *Ignore 12 | context *Context 13 | path string 14 | name string 15 | itemList []string 16 | } 17 | 18 | func GetIgnore(context *Context, path string, parentIgnore *Ignore) (*Ignore, error) { 19 | ignore := &Ignore{ 20 | parentIgnore: parentIgnore, 21 | context: context, 22 | path: path, 23 | name: slpath.Base(path) + "/", 24 | } 25 | err := ignore.loadIgnore() 26 | if err != nil { 27 | return ignore, err 28 | } 29 | return ignore, nil 30 | } 31 | 32 | func (ignore *Ignore) getIgnoreFilepath() string { 33 | return slpath.Join(ignore.path, ignore.context.IgnoreFilename) 34 | } 35 | 36 | func (ignore *Ignore) loadIgnore() error { 37 | if _, err := os.Stat(ignore.getIgnoreFilepath()); err != nil { 38 | if os.IsNotExist(err) { 39 | return nil 40 | } 41 | return err 42 | } 43 | 44 | file, err := os.Open(ignore.getIgnoreFilepath()) 45 | if err != nil { 46 | return err 47 | } 48 | defer file.Close() 49 | 50 | scanner := bufio.NewScanner(file) 51 | for scanner.Scan() { 52 | line := strings.TrimSpace(scanner.Text()) 53 | if line != "" && line[0] != '#' { 54 | ignore.itemList = append(ignore.itemList, line) 55 | } 56 | } 57 | return scanner.Err() 58 | } 59 | 60 | func (ignore *Ignore) shouldIgnore(name string) bool { 61 | if ignore.context.isChkbitFile(name) { 62 | return true 63 | } 64 | if !ignore.context.IncludeDot && name[0] == '.' { 65 | return true 66 | } 67 | return ignore.shouldIgnore2(name, "") 68 | } 69 | 70 | func (ignore *Ignore) shouldIgnore2(name string, fullname string) bool { 71 | for _, item := range ignore.itemList { 72 | if item[0] == '/' { 73 | if len(fullname) > 0 { 74 | continue 75 | } else { 76 | item = item[1:] 77 | } 78 | } 79 | if match, _ := slpath.Match(item, name); match { 80 | return true 81 | } 82 | if fullname != "" { 83 | if match, _ := slpath.Match(item, fullname); match { 84 | return true 85 | } 86 | } 87 | } 88 | if ignore.parentIgnore != nil { 89 | if fullname != "" { 90 | return ignore.parentIgnore.shouldIgnore2(name, ignore.name+fullname) 91 | } else { 92 | return ignore.parentIgnore.shouldIgnore2(name, ignore.name+name) 93 | } 94 | } 95 | return false 96 | } 97 | -------------------------------------------------------------------------------- /ignore_test.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestShouldIgnore(t *testing.T) { 8 | context, err := NewContext(1, "blake3", ".chkbit", ".chkbitignore") 9 | if err != nil { 10 | t.Error(err) 11 | } 12 | context.IncludeDot = true 13 | 14 | ignore1 := &Ignore{ 15 | parentIgnore: nil, 16 | context: context, 17 | path: "na", 18 | name: "vienna/", 19 | itemList: []string{"*.txt", "/photo.jpg", "tokyo", "/sydney", "berlin/oslo"}, 20 | } 21 | 22 | ignore2 := &Ignore{ 23 | parentIgnore: ignore1, 24 | context: context, 25 | path: "na", 26 | name: "berlin/", 27 | itemList: []string{"/*.md"}, 28 | } 29 | 30 | ignore3 := &Ignore{ 31 | parentIgnore: ignore2, 32 | context: context, 33 | path: "na", 34 | name: "sydney/", 35 | itemList: []string{}, 36 | } 37 | 38 | tests := []struct { 39 | ignore *Ignore 40 | filename string 41 | expected bool 42 | }{ 43 | // test root 44 | {ignore1, ".chkbit-db", true}, 45 | {ignore1, "all.txt", true}, 46 | {ignore1, "readme.md", false}, 47 | {ignore1, "photo.jpg", true}, 48 | {ignore1, "berlin", false}, 49 | // test directories 50 | {ignore1, "tokyo", true}, 51 | {ignore1, "sydney", true}, 52 | // test in berlin 53 | {ignore2, ".chkbit", true}, 54 | {ignore2, "all.txt", true}, 55 | {ignore2, "readme.md", true}, 56 | {ignore2, "photo.jpg", false}, 57 | // test directories 58 | {ignore2, "tokyo", true}, 59 | {ignore2, "sydney", false}, 60 | {ignore2, "oslo", true}, 61 | // test in sydney 62 | {ignore3, "all.txt", true}, 63 | {ignore3, "readme.md", false}, 64 | {ignore3, "photo.jpg", false}, 65 | } 66 | 67 | for _, tt := range tests { 68 | t.Run("test "+tt.filename+" in "+tt.ignore.name, func(t *testing.T) { 69 | if tt.ignore.shouldIgnore(tt.filename) != tt.expected { 70 | t.Errorf("shouldIgnore(%s) = %v, want %v", tt.filename, !tt.expected, tt.expected) 71 | } 72 | }) 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /index.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "os" 7 | slpath "path" 8 | "slices" 9 | ) 10 | 11 | const VERSION = 2 // index version 12 | 13 | type idxInfo struct { 14 | ModTime int64 `json:"mod"` 15 | Algo *string `json:"a,omitempty"` 16 | Hash *string `json:"h,omitempty"` 17 | // 2025-02-16 18 | Size *int64 `json:"s,omitempty"` 19 | // legacy python format 20 | LegacyHash *string `json:"md5,omitempty"` 21 | } 22 | 23 | type indexFile struct { 24 | V int `json:"v"` 25 | // IdxRaw -> map[string]idxInfo 26 | IdxRaw json.RawMessage `json:"idx"` 27 | IdxHash string `json:"idx_hash"` 28 | // 2024-08 optional, list of subdirectories 29 | Dir []string `json:"dirlist,omitempty"` 30 | } 31 | 32 | type indexLoadResult struct { 33 | fileList map[string]idxInfo 34 | dirList []string 35 | converted bool 36 | verified bool 37 | } 38 | 39 | type Index struct { 40 | context *Context 41 | path string 42 | files []string 43 | cur map[string]idxInfo 44 | new map[string]idxInfo 45 | curDirList []string 46 | newDirList []string 47 | modified bool 48 | readonly bool 49 | } 50 | 51 | func newIndex(context *Context, path string, files []string, dirList []string, readonly bool) *Index { 52 | slices.Sort(dirList) 53 | return &Index{ 54 | context: context, 55 | path: path, 56 | files: files, 57 | cur: make(map[string]idxInfo), 58 | new: make(map[string]idxInfo), 59 | curDirList: make([]string, 0), 60 | newDirList: dirList, 61 | readonly: readonly, 62 | } 63 | } 64 | 65 | func getMtS(path string) (mtime, size int64, err error) { 66 | var info os.FileInfo 67 | if info, err = os.Stat(path); err == nil { 68 | mtime = int64(info.ModTime().UnixNano() / 1e6) 69 | size = info.Size() 70 | } 71 | return 72 | } 73 | 74 | func (i *Index) getIndexFilepath() string { 75 | return slpath.Join(i.path, i.context.IndexFilename) 76 | } 77 | 78 | func (i *Index) logFilePanic(name string, message string) { 79 | i.context.log(StatusPanic, slpath.Join(i.path, name)+": "+message) 80 | } 81 | 82 | func (i *Index) logFile(stat Status, name string) { 83 | i.context.log(stat, slpath.Join(i.path, name)) 84 | } 85 | 86 | func (i *Index) logDir(stat Status, name string) { 87 | i.context.log(stat, slpath.Join(i.path, name)+"/") 88 | } 89 | 90 | func (i *Index) calcHashes(ignore *Ignore) { 91 | for _, name := range i.files { 92 | if ignore.shouldIgnore(name) { 93 | if !ignore.context.isChkbitFile(name) { 94 | i.logFile(StatusIgnore, name) 95 | } 96 | continue 97 | } 98 | 99 | var err error 100 | var info *idxInfo 101 | algo := i.context.HashAlgo 102 | if val, ok := i.cur[name]; ok { 103 | // existing file 104 | if val.Algo != nil { 105 | algo = *val.Algo 106 | } 107 | if i.context.UpdateSkipCheck && !i.mtimeChanged(name, val) { 108 | info = &val 109 | } else { 110 | info, err = i.calcFile(name, algo) 111 | } 112 | } else { 113 | // new file 114 | if i.readonly { 115 | info = &idxInfo{Algo: &algo} 116 | } else { 117 | info, err = i.calcFile(name, algo) 118 | } 119 | } 120 | if err != nil { 121 | i.logFilePanic(name, err.Error()) 122 | } else { 123 | i.new[name] = *info 124 | } 125 | } 126 | } 127 | 128 | func (i *Index) showIgnoredOnly(ignore *Ignore) { 129 | for _, name := range i.files { 130 | if ignore.shouldIgnore(name) { 131 | i.logFile(StatusIgnore, name) 132 | } 133 | } 134 | } 135 | 136 | func (i *Index) checkFix(forceUpdateDmg bool) { 137 | for name, b := range i.new { 138 | if a, ok := i.cur[name]; !ok { 139 | i.logFile(StatusNew, name) 140 | i.modified = true 141 | } else { 142 | amod := int64(a.ModTime) 143 | bmod := int64(b.ModTime) 144 | if a.Hash != nil && b.Hash != nil && *a.Hash == *b.Hash { 145 | i.logFile(StatusOK, name) 146 | if amod != bmod { 147 | i.modified = true 148 | } 149 | continue 150 | } 151 | 152 | if amod == bmod { 153 | i.logFile(StatusErrorDamage, name) 154 | if !forceUpdateDmg { 155 | // keep DMG entry 156 | i.new[name] = a 157 | } else { 158 | i.modified = true 159 | } 160 | } else if amod < bmod { 161 | i.logFile(StatusUpdate, name) 162 | i.modified = true 163 | } else if amod > bmod { 164 | i.logFile(StatusUpdateWarnOld, name) 165 | i.modified = true 166 | } 167 | } 168 | } 169 | // track deleted files 170 | for name := range i.cur { 171 | if _, ok := i.new[name]; !ok { 172 | i.modified = true 173 | if i.context.LogDeleted { 174 | i.logFile(StatusDeleted, name) 175 | } 176 | } 177 | } 178 | 179 | // dirs 180 | m := make(map[string]bool) 181 | for _, n := range i.newDirList { 182 | m[n] = true 183 | } 184 | for _, name := range i.curDirList { 185 | if !m[name] { 186 | i.modified = true 187 | if i.context.LogDeleted { 188 | i.logDir(StatusDeleted, name+"/") 189 | } 190 | } 191 | } 192 | if len(i.newDirList) != len(i.curDirList) { 193 | // added 194 | i.modified = true 195 | } 196 | } 197 | 198 | func (i *Index) mtimeChanged(name string, ii idxInfo) bool { 199 | mtime, _, _ := getMtS(slpath.Join(i.path, name)) 200 | return ii.ModTime != mtime 201 | } 202 | 203 | func (i *Index) calcFile(name string, algo string) (*idxInfo, error) { 204 | path := slpath.Join(i.path, name) 205 | mtime, size, err := getMtS(path) 206 | if err != nil { 207 | return nil, err 208 | } 209 | hash, err := Hashfile(path, algo, i.context.perfMonBytes) 210 | if err != nil { 211 | return nil, err 212 | } 213 | i.context.perfMonFiles(1) 214 | return &idxInfo{ 215 | ModTime: mtime, 216 | Algo: &algo, 217 | Hash: &hash, 218 | Size: &size, 219 | }, nil 220 | } 221 | 222 | func (i *Index) save() (bool, error) { 223 | if i.modified || !i.readonly && i.context.store.refresh { 224 | if i.readonly { 225 | return false, errors.New("error trying to save a readonly index") 226 | } 227 | 228 | text, err := json.Marshal(i.new) 229 | if err != nil { 230 | return false, err 231 | } 232 | data := indexFile{ 233 | V: VERSION, 234 | IdxRaw: text, 235 | IdxHash: hashMd5(text), 236 | } 237 | if i.context.TrackDirectories { 238 | data.Dir = i.newDirList 239 | } 240 | 241 | file, err := json.Marshal(data) 242 | if err != nil { 243 | return false, err 244 | } 245 | 246 | err = i.context.store.Save(i.getIndexFilepath(), file) 247 | if err != nil { 248 | return false, err 249 | } 250 | 251 | // only report actual modifications 252 | if i.modified { 253 | i.modified = false 254 | return true, nil 255 | } 256 | } 257 | return false, nil 258 | } 259 | 260 | func (i *Index) load() error { 261 | fileData, err := i.context.store.Load(i.getIndexFilepath()) 262 | if fileData == nil || err != nil { 263 | return err 264 | } 265 | i.modified = false 266 | 267 | res, err := loadIndexFile(fileData) 268 | if err != nil { 269 | return err 270 | } 271 | i.cur = res.fileList 272 | if !res.verified { 273 | i.logFile(StatusErrorIdx, i.getIndexFilepath()) 274 | } 275 | i.modified = !res.verified || res.converted 276 | 277 | // dirs 278 | if res.dirList != nil { 279 | slices.Sort(res.dirList) 280 | i.curDirList = res.dirList 281 | } 282 | 283 | return nil 284 | } 285 | 286 | func loadIndexFile(fileData []byte) (*indexLoadResult, error) { 287 | 288 | type idxInfo1 struct { 289 | ModTime int64 `json:"mod"` 290 | Hash string `json:"md5"` 291 | } 292 | 293 | type indexFile1 struct { 294 | Data map[string]idxInfo1 `json:"data"` 295 | } 296 | 297 | var legacyAlgoMd5 = "md5" 298 | 299 | if fileData == nil { 300 | return nil, errors.New("fileData is nil") 301 | } 302 | res := &indexLoadResult{} 303 | 304 | var data indexFile 305 | err := json.Unmarshal(fileData, &data) 306 | if err != nil { 307 | return nil, err 308 | } 309 | if data.IdxRaw != nil { 310 | err = json.Unmarshal(data.IdxRaw, &res.fileList) 311 | if err != nil { 312 | return nil, err 313 | } 314 | text := data.IdxRaw 315 | if data.IdxHash != hashMd5(text) { 316 | // old versions may have saved the JSON encoded with extra spaces 317 | text, _ = json.Marshal(data.IdxRaw) 318 | } 319 | res.verified = data.IdxHash == hashMd5(text) 320 | 321 | // convert from py1/md5 to new format 322 | for name, item := range res.fileList { 323 | if item.LegacyHash != nil { 324 | item2 := idxInfo{ 325 | ModTime: item.ModTime, 326 | Algo: &legacyAlgoMd5, 327 | Hash: item.LegacyHash, 328 | } 329 | res.fileList[name] = item2 330 | } 331 | } 332 | } else { 333 | var data1 indexFile1 334 | json.Unmarshal(fileData, &data1) 335 | res.fileList = make(map[string]idxInfo) 336 | if data1.Data != nil { 337 | // convert from js to new format 338 | for name, item := range data1.Data { 339 | res.fileList[name] = idxInfo{ 340 | ModTime: item.ModTime, 341 | Algo: &legacyAlgoMd5, 342 | Hash: &item.Hash, 343 | } 344 | } 345 | } 346 | res.converted = true 347 | res.verified = true 348 | } 349 | 350 | res.dirList = data.Dir 351 | return res, nil 352 | } 353 | -------------------------------------------------------------------------------- /indexstore.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "os" 7 | slpath "path" 8 | "path/filepath" 9 | "slices" 10 | "sync" 11 | "time" 12 | 13 | bolt "go.etcd.io/bbolt" 14 | ) 15 | 16 | type IndexType int 17 | 18 | const ( 19 | IndexTypeAny IndexType = iota 20 | IndexTypeSplit 21 | IndexTypeAtom 22 | ) 23 | 24 | const ( 25 | atomSuffix = "-db" 26 | bakSuffix = ".bak" 27 | newSuffix = ".new" 28 | dbTxTimeoutSec = 30 29 | atomDataPrefix = `{"type":"chkbit","version":6,"data":{` 30 | atomDataSuffix = `}}` 31 | ) 32 | 33 | var ( 34 | errMissingIndex = errors.New("index could not be located (see chkbit init)") 35 | indexStoreDataBucketName = []byte("data") 36 | ) 37 | 38 | type indexStore struct { 39 | indexName string 40 | logErr storeLogPanicFunc 41 | 42 | readOnly bool 43 | atom bool 44 | refresh bool 45 | dirty bool 46 | atomPath string 47 | cacheFileR string 48 | cacheFileW string 49 | connR *bolt.DB 50 | connW *bolt.DB 51 | storeDbQueue chan *storeDbItem 52 | storeDbWg sync.WaitGroup 53 | } 54 | 55 | type storeLogPanicFunc func(string) 56 | 57 | type storeDbItem struct { 58 | key []byte 59 | value []byte 60 | } 61 | 62 | var IndexTypeList = []IndexType{IndexTypeAtom, IndexTypeSplit} 63 | 64 | func (s *indexStore) UseAtom(path string, indexName string, refresh bool) { 65 | s.atomPath = path 66 | s.indexName = indexName 67 | s.atom = true 68 | s.refresh = refresh 69 | } 70 | 71 | func (s *indexStore) Open(readOnly bool, dbQueueSize int) error { 72 | var err error 73 | s.readOnly = readOnly 74 | if s.atom { 75 | 76 | if s.cacheFileR, err = getTempDbFile(s.indexName); err != nil { 77 | return err 78 | } 79 | if err = s.importCache(s.cacheFileR); err != nil { 80 | return err 81 | } 82 | if s.connR, err = bolt.Open(s.cacheFileR, 0600, getBoltOptions(false)); err != nil { 83 | return err 84 | } 85 | 86 | if !readOnly { 87 | 88 | // test if the new atom file is writeable before failing at the end 89 | testWrite := getAtomFile(s.atomPath, s.indexName, newSuffix) 90 | if file, err := os.Create(testWrite); err != nil { 91 | return err 92 | } else { 93 | defer file.Close() 94 | } 95 | 96 | if s.refresh { 97 | // write to a new db 98 | if s.cacheFileW, err = getTempDbFile(s.indexName); err != nil { 99 | return err 100 | } 101 | s.connW, err = bolt.Open(s.cacheFileW, 0600, getBoltOptions(false)) 102 | if err == nil { 103 | err = s.connW.Update(func(tx *bolt.Tx) error { 104 | _, err := tx.CreateBucketIfNotExists(indexStoreDataBucketName) 105 | return err 106 | }) 107 | } 108 | } else { 109 | s.connW = s.connR 110 | } 111 | 112 | s.storeDbQueue = make(chan *storeDbItem, dbQueueSize) 113 | s.storeDbWg.Add(1) 114 | go s.storeDbWorker() 115 | } 116 | } 117 | 118 | return err 119 | } 120 | 121 | func (s *indexStore) Finish(abort bool) (updated bool, err error) { 122 | 123 | if !s.atom { 124 | return 125 | } 126 | 127 | if !s.readOnly { 128 | s.storeDbQueue <- nil 129 | s.storeDbWg.Wait() 130 | } 131 | 132 | if s.connR != nil { 133 | if err = s.connR.Close(); err != nil { 134 | return 135 | } 136 | if !s.readOnly && s.refresh { 137 | if err = s.connW.Close(); err != nil { 138 | return 139 | } 140 | } 141 | } 142 | s.connR = nil 143 | s.connW = nil 144 | 145 | if !s.readOnly && s.dirty { 146 | 147 | cacheFile := s.cacheFileR 148 | if s.cacheFileW != "" { 149 | cacheFile = s.cacheFileW 150 | } 151 | 152 | if !abort { 153 | var newFile string 154 | if newFile, err = s.exportCache(cacheFile, newSuffix); err != nil { 155 | return 156 | } 157 | 158 | atomFile := getAtomFile(s.atomPath, s.indexName, "") 159 | if err = os.Rename(atomFile, getAtomFile(s.atomPath, s.indexName, bakSuffix)); err != nil { 160 | return 161 | } 162 | if err = os.Rename(newFile, atomFile); err != nil { 163 | return 164 | } 165 | } 166 | 167 | updated = true 168 | } 169 | 170 | if s.cacheFileR != "" { 171 | os.Remove(s.cacheFileR) 172 | s.cacheFileR = "" 173 | } 174 | if s.cacheFileW != "" { 175 | os.Remove(s.cacheFileW) 176 | s.cacheFileW = "" 177 | } 178 | 179 | return 180 | } 181 | 182 | func (s *indexStore) Load(indexPath string) ([]byte, error) { 183 | var err error 184 | var value []byte 185 | if s.atom { 186 | if s.connR == nil { 187 | return nil, errors.New("db not loaded") 188 | } 189 | err = s.connR.View(func(tx *bolt.Tx) error { 190 | b := tx.Bucket(indexStoreDataBucketName) 191 | value = b.Get([]byte(indexPath)) 192 | return nil 193 | }) 194 | } else { 195 | if _, err = os.Stat(indexPath); err != nil { 196 | if os.IsNotExist(err) { 197 | return nil, nil 198 | } 199 | return nil, err 200 | } 201 | value, err = os.ReadFile(indexPath) 202 | } 203 | return value, err 204 | } 205 | 206 | func (s *indexStore) Save(indexPath string, value []byte) error { 207 | var err error 208 | s.dirty = true 209 | if s.atom { 210 | s.storeDbQueue <- &storeDbItem{[]byte(indexPath), value} 211 | } else { 212 | // try to preserve the directory mod time but ignore if unsupported 213 | dirPath := slpath.Dir(indexPath) 214 | dirStat, dirErr := os.Stat(dirPath) 215 | err = os.WriteFile(indexPath, value, 0644) 216 | if dirErr == nil { 217 | os.Chtimes(dirPath, dirStat.ModTime(), dirStat.ModTime()) 218 | } 219 | } 220 | return err 221 | } 222 | 223 | func (s *indexStore) storeDbWorker() { 224 | 225 | var tx *bolt.Tx 226 | var b *bolt.Bucket 227 | var txExpires time.Time 228 | var err error 229 | defer s.storeDbWg.Done() 230 | 231 | for item := range s.storeDbQueue { 232 | 233 | if item == nil { 234 | break 235 | } 236 | 237 | if tx != nil && time.Now().After(txExpires) { 238 | if err = tx.Commit(); err != nil { 239 | break 240 | } 241 | tx = nil 242 | } 243 | 244 | if tx == nil { 245 | txExpires = time.Now().Add(dbTxTimeoutSec * time.Second) 246 | if tx, err = s.connW.Begin(true); err != nil { 247 | break 248 | } 249 | b = tx.Bucket(indexStoreDataBucketName) 250 | } 251 | 252 | if err = b.Put(item.key, item.value); err != nil { 253 | break 254 | } 255 | } 256 | 257 | if err != nil { 258 | s.logErr(err.Error()) 259 | } else if tx != nil { 260 | if err = tx.Commit(); err != nil { 261 | s.logErr(err.Error()) 262 | } 263 | } 264 | } 265 | 266 | func (s *indexStore) exportCache(dbFile, suffix string) (exportFile string, err error) { 267 | connR, err := bolt.Open(dbFile, 0600, getBoltOptions(true)) 268 | if err != nil { 269 | return 270 | } 271 | defer connR.Close() 272 | 273 | exportFile = getAtomFile(s.atomPath, s.indexName, suffix) 274 | file, err := os.Create(exportFile) 275 | if err != nil { 276 | return 277 | } 278 | defer file.Close() 279 | 280 | // export version 6 atom 281 | if _, err = file.WriteString(atomDataPrefix); err != nil { 282 | return 283 | } 284 | 285 | if err = connR.View(func(tx *bolt.Tx) error { 286 | b := tx.Bucket(indexStoreDataBucketName) 287 | c := b.Cursor() 288 | var ierr error 289 | first := true 290 | for k, v := c.First(); k != nil; k, v = c.Next() { 291 | 292 | if first { 293 | first = false 294 | } else { 295 | if _, ierr = file.WriteString(","); ierr != nil { 296 | break 297 | } 298 | } 299 | 300 | // remove index filename 301 | key := slpath.Dir(string(k)) 302 | if key == "." { 303 | key = "" 304 | } 305 | if idxPath, ierr := json.Marshal(key); ierr == nil { 306 | if _, ierr = file.Write(idxPath); ierr != nil { 307 | break 308 | } 309 | } else { 310 | break 311 | } 312 | 313 | if _, ierr = file.WriteString(":"); ierr != nil { 314 | break 315 | } 316 | 317 | if _, ierr = file.Write(v); ierr != nil { 318 | break 319 | } 320 | } 321 | return ierr 322 | }); err != nil { 323 | return 324 | } 325 | 326 | if _, err = file.WriteString(atomDataSuffix); err != nil { 327 | return 328 | } 329 | 330 | return 331 | } 332 | 333 | func (s *indexStore) importCache(dbFile string) error { 334 | 335 | connW, err := bolt.Open(dbFile, 0600, getBoltOptions(false)) 336 | if err != nil { 337 | return err 338 | } 339 | defer connW.Close() 340 | 341 | file, err := os.Open(getAtomFile(s.atomPath, s.indexName, "")) 342 | if err != nil { 343 | return err 344 | } 345 | defer file.Close() 346 | 347 | decoder := json.NewDecoder(file) 348 | 349 | if t, err := decoder.Token(); err != nil || t != json.Delim('{') { 350 | return errors.New("invalid json (start)") 351 | } 352 | 353 | if err = verifyAtomJsonHead(decoder); err != nil { 354 | return err 355 | } 356 | 357 | if err = connW.Update(func(tx *bolt.Tx) error { 358 | b, err := tx.CreateBucketIfNotExists(indexStoreDataBucketName) 359 | if err != nil { 360 | return err 361 | } 362 | 363 | for { 364 | t, err := decoder.Token() 365 | if err != nil { 366 | return err 367 | } 368 | if t == json.Delim('}') { 369 | break 370 | } 371 | key, ok := t.(string) 372 | if !ok { 373 | return errors.New("invalid json (loop)") 374 | } 375 | 376 | // append index filename for compability with file based version 377 | if key != "" { 378 | key += "/" 379 | } 380 | key += s.indexName 381 | 382 | var value json.RawMessage 383 | if err = decoder.Decode(&value); err != nil { 384 | return err 385 | } 386 | 387 | if err = b.Put([]byte(key), value); err != nil { 388 | return err 389 | } 390 | } 391 | return nil 392 | }); err != nil { 393 | return err 394 | } 395 | 396 | if t, err := decoder.Token(); err != nil || t != json.Delim('}') { 397 | return errors.New("invalid json (end)") 398 | } 399 | 400 | return nil 401 | } 402 | 403 | func verifyAtomJsonHead(decoder *json.Decoder) error { 404 | // we only accept our fixed json, in this order: 405 | 406 | // type: chkbit 407 | var jsonType string 408 | if t, err := decoder.Token(); err != nil || t != "type" { 409 | return errors.New("invalid json (type)") 410 | } 411 | if err := decoder.Decode(&jsonType); err != nil || jsonType != "chkbit" { 412 | return errors.New("invalid json (chkbit)") 413 | } 414 | 415 | // version: 6 416 | var jsonVersion int 417 | if t, err := decoder.Token(); err != nil || t != "version" { 418 | return errors.New("invalid json (version)") 419 | } 420 | if err := decoder.Decode(&jsonVersion); err != nil || jsonVersion != 6 { 421 | return errors.New("invalid json (version 6)") 422 | } 423 | 424 | // data: 425 | if t, err := decoder.Token(); err != nil || t != "data" { 426 | return errors.New("invalid json (data)") 427 | } 428 | if t, err := decoder.Token(); err != nil || t != json.Delim('{') { 429 | return errors.New("invalid json (data start)") 430 | } 431 | return nil 432 | } 433 | 434 | func getAtomFile(path, indexName, suffix string) string { 435 | return slpath.Join(path, indexName+atomSuffix+suffix) 436 | } 437 | 438 | func getMarkerFile(st IndexType, path, indexName string) string { 439 | if st == IndexTypeSplit { 440 | return slpath.Join(path, indexName) 441 | } else { 442 | return getAtomFile(path, indexName, "") 443 | } 444 | } 445 | 446 | func existsMarkerFile(st IndexType, path, indexName string) (fileName string, ok bool, err error) { 447 | fileName = getMarkerFile(st, path, indexName) 448 | _, err = os.Stat(fileName) 449 | if err == nil { 450 | ok = true 451 | } else if os.IsNotExist(err) { 452 | err = nil 453 | } 454 | return 455 | } 456 | 457 | func getTempDbFile(indexName string) (string, error) { 458 | tempFile, err := os.CreateTemp("", "*"+indexName) 459 | if err == nil { 460 | tempFile.Close() 461 | } 462 | return tempFile.Name(), err 463 | } 464 | 465 | func getBoltOptions(readOnly bool) *bolt.Options { 466 | return &bolt.Options{ 467 | ReadOnly: readOnly, 468 | Timeout: 0, 469 | NoGrowSync: false, 470 | FreelistType: bolt.FreelistArrayType, 471 | } 472 | } 473 | 474 | func InitializeIndexStore(st IndexType, path, indexName string, force bool) error { 475 | if !slices.Contains(IndexTypeList, st) { 476 | return errors.New("invalid type") 477 | } 478 | fileName := getMarkerFile(st, path, indexName) 479 | _, err := os.Stat(fileName) 480 | if !os.IsNotExist(err) { 481 | if force { 482 | if err := os.Remove(fileName); err != nil { 483 | return err 484 | } 485 | } else { 486 | return errors.New("index already exists") 487 | } 488 | } 489 | file, err := os.Create(fileName) 490 | if err != nil { 491 | return err 492 | } 493 | defer file.Close() 494 | init := atomDataPrefix + atomDataSuffix 495 | if st == IndexTypeSplit { 496 | init = "{}" 497 | } 498 | _, err = file.WriteString(init) 499 | return err 500 | } 501 | 502 | func LocateIndex(startPath string, filter IndexType, indexName string) (st IndexType, path string, err error) { 503 | if path, err = filepath.Abs(startPath); err != nil { 504 | return 505 | } 506 | path = filepath.ToSlash(path) 507 | for { 508 | var ok bool 509 | for _, st = range IndexTypeList { 510 | if filter == IndexTypeAny || filter == st { 511 | if _, ok, err = existsMarkerFile(st, path, indexName); ok || err != nil { 512 | return 513 | } 514 | } 515 | } 516 | 517 | path = slpath.Dir(path) 518 | if len(path) < 1 || path[len(path)-1] == '/' { 519 | // reached root 520 | err = errMissingIndex 521 | return 522 | } 523 | } 524 | } 525 | -------------------------------------------------------------------------------- /intutil/size.go: -------------------------------------------------------------------------------- 1 | package intutil 2 | 3 | import ( 4 | "fmt" 5 | "math" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | // ParseSize takes a string representation of a size (e.g. "1G", "10M") and returns 11 | // the size in bytes as an uint64. If the input string is not a valid size 12 | // representation, an error is returned. 13 | func ParseSize(sizeStr string) (uint64, error) { 14 | units := map[string]uint64{"B": 1, "K": 1 << 10, "M": 1 << 20, "G": 1 << 30, "T": 1 << 40} 15 | sizeStr = strings.ToUpper(sizeStr) 16 | unit := sizeStr[len(sizeStr)-1:] 17 | size, err := strconv.ParseFloat(sizeStr[:len(sizeStr)-1], 64) 18 | if err != nil { 19 | return 0, err 20 | } 21 | return uint64(size * float64(units[unit])), nil 22 | } 23 | 24 | // FormatSize takes an uint64 representation of a size in bytes and returns a string 25 | // representation of the size with a unit (e.g. "1G", "10M"). The size is rounded to 26 | // the nearest whole number if it is an integer, otherwise it is rounded to one 27 | // decimal place. 28 | func FormatSize(size uint64) string { 29 | units := []string{"", "K", "M", "G", "T", "P"} 30 | unitIndex := int(math.Log(float64(size)) / math.Log(1024)) 31 | value := float64(size) / math.Pow(1024, float64(unitIndex)) 32 | if unitIndex >= 0 && unitIndex < len(units) { 33 | if value == math.Floor(value) { 34 | return fmt.Sprintf("%d%s", uint64(value), units[unitIndex]) 35 | } 36 | return fmt.Sprintf("%.1f%s", value, units[unitIndex]) 37 | } else { 38 | return fmt.Sprintf("%d", size) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /os.go: -------------------------------------------------------------------------------- 1 | //go:build !linux && !darwin 2 | 3 | package chkbit 4 | 5 | type FileExtentList []int 6 | 7 | func GetFileExtents(_ string) (FileExtentList, error) { 8 | return nil, errNotSupported 9 | } 10 | 11 | func ExtentsMatch(_, _ FileExtentList) bool { 12 | return false 13 | } 14 | 15 | func ShowExtents(_ FileExtentList) string { 16 | return "" 17 | } 18 | 19 | func DeduplicateFiles(_, _ string) (uint64, error) { 20 | return 0, errNotSupported 21 | } 22 | -------------------------------------------------------------------------------- /os_darwin.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "syscall" 8 | "unsafe" 9 | 10 | "golang.org/x/sys/unix" 11 | ) 12 | 13 | // https://www.unix.com/man_page/osx/2/fcntl/ 14 | 15 | type fiemapExtent struct { 16 | Logical uint64 // byte offset of the extent in the file 17 | Physical uint64 // byte offset of extent on disk 18 | Length uint64 // length in bytes for this extent 19 | } 20 | 21 | type FileExtentList []fiemapExtent 22 | 23 | func (fe *fiemapExtent) matches(o *fiemapExtent) bool { 24 | return fe.Logical == o.Logical && fe.Physical == o.Physical && fe.Length == o.Length 25 | } 26 | 27 | func (fe FileExtentList) find(offs uint64) *fiemapExtent { 28 | for _, o := range fe { 29 | if o.Logical == offs { 30 | return &o 31 | } 32 | } 33 | return nil 34 | } 35 | 36 | func getFileExtentsFp(file *os.File) (FileExtentList, os.FileInfo, error) { 37 | 38 | fileInfo, err := file.Stat() 39 | if err != nil { 40 | return nil, nil, err 41 | } 42 | 43 | var all []fiemapExtent 44 | start := uint64(0) 45 | size := uint64(fileInfo.Size()) 46 | maxReq := uint64(100 * 1024 * 1024) 47 | 48 | // don't use syscall.Log2phys_t as it's alignment is incorrect 49 | 50 | type Log2phys_t2 struct { 51 | // IN: number of bytes to be queried; OUT: number of contiguous bytes allocated at this position 52 | Contigbytes uint64 53 | // IN: bytes into file; OUT: bytes into device 54 | Devoffset uint64 55 | } 56 | 57 | buf := make([]byte, 8*3) 58 | for { 59 | 60 | // skip flags 61 | lp := (*Log2phys_t2)(unsafe.Pointer(&buf[4])) 62 | lp.Contigbytes = maxReq 63 | lp.Devoffset = start 64 | 65 | rc, err := unix.FcntlInt(file.Fd(), syscall.F_LOG2PHYS_EXT, int(uintptr(unsafe.Pointer(&buf[0])))) 66 | if err != nil { 67 | return nil, nil, err 68 | } 69 | if rc < 0 { 70 | return nil, nil, errors.New("log2phys failed") 71 | } 72 | 73 | all = append(all, 74 | fiemapExtent{ 75 | Logical: start, 76 | Physical: lp.Devoffset, 77 | Length: lp.Contigbytes, 78 | }) 79 | 80 | start += lp.Contigbytes 81 | if start >= size { 82 | return all, fileInfo, nil 83 | } 84 | } 85 | } 86 | 87 | func GetFileExtents(filePath string) (FileExtentList, error) { 88 | file, err := os.Open(filePath) 89 | if err != nil { 90 | return nil, err 91 | } 92 | file.Sync() 93 | defer file.Close() 94 | fe, _, err := getFileExtentsFp(file) 95 | if err != nil { 96 | return nil, fmt.Errorf("failed to get fileextents for %s: %v", filePath, err) 97 | } 98 | return fe, err 99 | } 100 | 101 | func ExtentsMatch(extList1, extList2 FileExtentList) bool { 102 | if len(extList1) != len(extList2) { 103 | return false 104 | } 105 | for i := range extList1 { 106 | a := extList1[i] 107 | b := extList2[i] 108 | if !a.matches(&b) { 109 | return false 110 | } 111 | } 112 | 113 | return true 114 | } 115 | 116 | func ShowExtents(extList FileExtentList) string { 117 | res := "" 118 | for _, b := range extList { 119 | res += fmt.Sprintf("offs=%x len=%x phys=%x\n", b.Logical, b.Length, b.Physical) 120 | } 121 | return res 122 | } 123 | 124 | func DeduplicateFiles(file1, file2 string) (uint64, error) { 125 | return 0, errors.New("deduplicate is not supported on this OS") 126 | } 127 | -------------------------------------------------------------------------------- /os_linux.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "os" 7 | "syscall" 8 | "unsafe" 9 | 10 | "golang.org/x/sys/unix" 11 | ) 12 | 13 | // https://www.kernel.org/doc/html/latest/filesystems/fiemap.html 14 | 15 | const ( 16 | sizeOfFiemap = 32 17 | sizeOfExtent = 56 18 | fs_IOC_FIEMAP = 0xc020660b 19 | fiemap_FLAG_SYNC = 0x0001 // sync the file before mapping 20 | fiemap_EXTENT_LAST = 0x0001 // last extent in file 21 | ) 22 | 23 | type fiemap struct { 24 | start uint64 // byte offset (inclusive) at which to start mapping (in) 25 | length uint64 // logical length of mapping which userspace wants (in) 26 | flags uint32 // FIEMAP_FLAG_* flags for request (in/out) 27 | mappedExtents uint32 // number of extents that were mapped (out) 28 | extentCount uint32 // size of fm_extents array (in) 29 | } 30 | 31 | type fiemapExtent struct { 32 | Logical uint64 // byte offset of the extent in the file 33 | Physical uint64 // byte offset of extent on disk 34 | Length uint64 // length in bytes for this extent 35 | reserved1 uint64 36 | reserved2 uint64 37 | Flags uint32 // FIEMAP_EXTENT_* flags for this extent 38 | } 39 | 40 | type FileExtentList []fiemapExtent 41 | 42 | func (fe *fiemapExtent) matches(o *fiemapExtent) bool { 43 | return fe.Logical == o.Logical && fe.Physical == o.Physical && fe.Length == o.Length 44 | } 45 | 46 | func (fe FileExtentList) find(offs uint64) *fiemapExtent { 47 | for _, o := range fe { 48 | if o.Logical == offs { 49 | return &o 50 | } 51 | } 52 | return nil 53 | } 54 | 55 | func ioctlFileMap(file *os.File, start uint64, length uint64) ([]fiemapExtent, bool, error) { 56 | 57 | if length == 0 { 58 | return nil, true, nil 59 | } 60 | 61 | extentCount := uint32(50) 62 | buf := make([]byte, sizeOfFiemap+extentCount*sizeOfExtent) 63 | fm := (*fiemap)(unsafe.Pointer(&buf[0])) 64 | fm.start = start 65 | fm.length = length 66 | fm.flags = fiemap_FLAG_SYNC 67 | fm.extentCount = extentCount 68 | _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, file.Fd(), fs_IOC_FIEMAP, uintptr(unsafe.Pointer(&buf[0]))) 69 | if errno != 0 { 70 | return nil, true, fmt.Errorf("fiemap errno %v", errno) 71 | } 72 | 73 | extents := make([]fiemapExtent, fm.mappedExtents) 74 | done := fm.mappedExtents == 0 75 | lastOffs := start 76 | for i := range fm.mappedExtents { 77 | rawinfo := (*fiemapExtent)(unsafe.Pointer(uintptr(unsafe.Pointer(&buf[0])) + uintptr(sizeOfFiemap) + uintptr(i*sizeOfExtent))) 78 | if rawinfo.Logical < lastOffs { 79 | // return nil, true, errors.New("invalid order") 80 | return nil, true, fmt.Errorf("invalid order %v", rawinfo.Logical) 81 | } 82 | lastOffs = rawinfo.Logical 83 | extents[i].Logical = rawinfo.Logical 84 | extents[i].Physical = rawinfo.Physical 85 | extents[i].Length = rawinfo.Length 86 | extents[i].Flags = rawinfo.Flags 87 | done = rawinfo.Flags&fiemap_EXTENT_LAST != 0 88 | } 89 | 90 | return extents, done, nil 91 | } 92 | 93 | func getFileExtentsFp(file *os.File) (FileExtentList, os.FileInfo, error) { 94 | 95 | fileInfo, err := file.Stat() 96 | if err != nil { 97 | return nil, nil, err 98 | } 99 | 100 | var all []fiemapExtent 101 | start := uint64(0) 102 | size := uint64(fileInfo.Size()) 103 | for { 104 | part, done, err := ioctlFileMap(file, start, size-start) 105 | if err != nil { 106 | return nil, nil, err 107 | } 108 | 109 | all = append(all, part...) 110 | if done { 111 | return all, fileInfo, nil 112 | } 113 | 114 | if len(part) == 0 { 115 | return nil, fileInfo, errors.ErrUnsupported 116 | } 117 | last := part[len(part)-1] 118 | start = last.Logical + last.Length 119 | } 120 | } 121 | 122 | func GetFileExtents(filePath string) (FileExtentList, error) { 123 | file, err := os.Open(filePath) 124 | if err != nil { 125 | return nil, err 126 | } 127 | defer file.Close() 128 | fe, _, err := getFileExtentsFp(file) 129 | 130 | if err != nil { 131 | return nil, fmt.Errorf("failed to get fileextents for %s: %v", filePath, err) 132 | } 133 | return fe, err 134 | } 135 | 136 | func ExtentsMatch(extList1, extList2 FileExtentList) bool { 137 | // define that zero blocks can't match 138 | if len(extList1) == 0 || len(extList1) != len(extList2) { 139 | return false 140 | } 141 | for i := range extList1 { 142 | a := extList1[i] 143 | b := extList2[i] 144 | if !a.matches(&b) { 145 | return false 146 | } 147 | } 148 | 149 | return true 150 | } 151 | 152 | func ShowExtents(extList FileExtentList) string { 153 | res := "" 154 | for _, b := range extList { 155 | res += fmt.Sprintf("offs=%x len=%x phys=%x flags=%x\n", b.Logical, b.Length, b.Physical, b.Flags) 156 | } 157 | return res 158 | } 159 | 160 | // https://www.man7.org/linux/man-pages/man2/ioctl_fideduperange.2.html 161 | 162 | func umin(x, y uint64) uint64 { 163 | if x < y { 164 | return x 165 | } 166 | return y 167 | } 168 | 169 | func DeduplicateFiles(file1, file2 string) (uint64, error) { 170 | f1, err := os.Open(file1) 171 | if err != nil { 172 | return 0, fmt.Errorf("failed to open file %s: %v", file1, err) 173 | } 174 | defer f1.Close() 175 | 176 | // dest must be open for writing 177 | f2, err := os.OpenFile(file2, os.O_RDWR, 0) 178 | if err != nil { 179 | return 0, fmt.Errorf("failed to open file %s: %v", file2, err) 180 | } 181 | defer f2.Close() 182 | 183 | el1, fileInfo1, err := getFileExtentsFp(f1) 184 | if err != nil { 185 | return 0, fmt.Errorf("failed to get fileextents for %s: %v", file1, err) 186 | } 187 | 188 | reclaimed := uint64(0) 189 | size := uint64(fileInfo1.Size()) 190 | var offs uint64 = 0 191 | for { 192 | if offs >= size { 193 | break 194 | } 195 | 196 | el2, _, err := getFileExtentsFp(f2) 197 | if err != nil { 198 | return reclaimed, fmt.Errorf("failed to get fileextents for %s: %v", file2, err) 199 | } 200 | 201 | dlen := size - offs 202 | e1 := el1.find(offs) 203 | e2 := el2.find(offs) 204 | if e1 != nil { 205 | dlen = umin(e1.Length, dlen) 206 | if e2 != nil { 207 | if e1.matches(e2) { 208 | offs += e1.Length 209 | continue 210 | } else if e2.Length < e1.Length { 211 | dlen = umin(e2.Length, dlen) 212 | } 213 | } 214 | } 215 | 216 | dedupe := unix.FileDedupeRange{ 217 | Src_offset: offs, 218 | Src_length: dlen, 219 | Info: []unix.FileDedupeRangeInfo{ 220 | unix.FileDedupeRangeInfo{ 221 | Dest_fd: int64(f2.Fd()), 222 | Dest_offset: offs, 223 | }, 224 | }} 225 | 226 | if err = unix.IoctlFileDedupeRange(int(f1.Fd()), &dedupe); err != nil { 227 | return reclaimed, fmt.Errorf("deduplication failed (offs=%x, len=%x): %s", offs, dlen, err) 228 | } 229 | 230 | if dedupe.Info[0].Status < 0 { 231 | errno := unix.Errno(-dedupe.Info[0].Status) 232 | if errno == unix.EOPNOTSUPP { 233 | return reclaimed, errNotSupported 234 | } else if errno == unix.EINVAL { 235 | return reclaimed, errors.New("deduplication status failed: EINVAL;") 236 | } 237 | return reclaimed, fmt.Errorf("deduplication status failed: %s", unix.ErrnoName(errno)) 238 | } else if dedupe.Info[0].Status == unix.FILE_DEDUPE_RANGE_DIFFERS { 239 | return reclaimed, fmt.Errorf("deduplication unexpected different range (offs=%x, len=%x)", offs, dlen) 240 | } 241 | done := dedupe.Info[0].Bytes_deduped 242 | reclaimed += done 243 | if offs+done == size { 244 | break 245 | } else if offs+done < size { 246 | // continue 247 | offs += done 248 | } else { 249 | return reclaimed, fmt.Errorf("deduplication unexpected amount of bytes deduped (offs=%x, len=%x)", offs, dlen) 250 | } 251 | } 252 | 253 | return reclaimed, nil 254 | } 255 | -------------------------------------------------------------------------------- /scripts/build: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eE -o pipefail 3 | 4 | script_dir=$(dirname "$(realpath "$0")") 5 | cd $script_dir/.. 6 | 7 | version=$(git describe --tags --always) 8 | echo build $version 9 | CGO_ENABLED=0 go build -ldflags="-X main.appVersion=$version" ./cmd/chkbit 10 | -------------------------------------------------------------------------------- /scripts/chkfmt: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eE -o pipefail 3 | 4 | script_dir=$(dirname "$(realpath "$0")") 5 | cd $script_dir/.. 6 | 7 | res="$(gofmt -l . 2>&1)" 8 | 9 | if [ -n "$res" ]; then 10 | echo "gofmt check failed:" 11 | echo "${res}" 12 | exit 1 13 | fi 14 | -------------------------------------------------------------------------------- /scripts/lint: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eE -o pipefail 3 | 4 | script_dir=$(dirname "$(realpath "$0")") 5 | cd $script_dir/.. 6 | 7 | go vet -structtag=false -composites=false ./... 8 | -------------------------------------------------------------------------------- /scripts/maketestsample: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | script_dir=$(dirname "$(realpath "$0")") 5 | 6 | go run $script_dir/maketestsample.go -root /tmp/sample 7 | 8 | echo 9 | echo '$ ls -l /tmp/sample/root' 10 | ls -l /tmp/sample/root 11 | -------------------------------------------------------------------------------- /scripts/maketestsample.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "path/filepath" 8 | "time" 9 | ) 10 | 11 | // perform integration test using the compiled binary 12 | 13 | // misc files 14 | 15 | var ( 16 | startList = []string{"time", "year", "people", "way", "day", "thing"} 17 | wordList = []string{"life", "world", "school", "state", "family", "student", "group", "country", "problem", "hand", "part", "place", "case", "week", "company", "system", "program", "work", "government", "number", "night", "point", "home", "water", "room", "mother", "area", "money", "story", "fact", "month", "lot", "right", "study", "book", "eye", "job", "word", "business", "issue", "side", "kind", "head", "house", "service", "friend", "father", "power", "hour", "game", "line", "end", "member", "law", "car", "city", "community", "name", "president", "team", "minute", "idea", "kid", "body", "information", "back", "face", "others", "level", "office", "door", "health", "person", "art", "war", "history", "party", "result", "change", "morning", "reason", "research", "moment", "air", "teacher", "force", "education"} 18 | extList = []string{"txt", "md", "pdf", "jpg", "jpeg", "png", "mp4", "mp3", "csv"} 19 | startDate = time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC) 20 | endDate = time.Date(2024, 12, 1, 0, 0, 0, 0, time.UTC) 21 | dateList = []time.Time{} 22 | ) 23 | 24 | type genContext struct { 25 | wordIdx int 26 | extIdx int 27 | dateIdx int 28 | } 29 | 30 | func init() { 31 | var c int64 = 50 32 | interval := (int64)(endDate.Sub(startDate).Seconds()) / c 33 | for i := range make([]int64, c) { 34 | dateList = append(dateList, startDate.Add(time.Duration(interval*(int64)(i))*time.Second)) 35 | } 36 | } 37 | 38 | func (g *genContext) nextWord() string { 39 | word := wordList[g.wordIdx%len(wordList)] 40 | g.wordIdx++ 41 | return word 42 | } 43 | 44 | func (g *genContext) nextExt() string { 45 | ext := extList[g.extIdx%len(extList)] 46 | g.extIdx++ 47 | return ext 48 | } 49 | 50 | func (g *genContext) setDate(filename string, r int) { 51 | date := dateList[g.dateIdx%len(dateList)] 52 | m := 17 * g.dateIdx / len(dateList) 53 | date = date.Add(time.Duration(m) * time.Hour) 54 | g.dateIdx++ 55 | os.Chtimes(filename, date, date) 56 | } 57 | 58 | func (g *genContext) genFile(path string, size int) { 59 | os.WriteFile(path, make([]byte, size), 0644) 60 | g.setDate(path, size*size) 61 | } 62 | 63 | func (g *genContext) genFiles(dir string, a int) { 64 | os.MkdirAll(dir, 0755) 65 | for i := 1; i <= 5; i++ { 66 | size := a*i*g.wordIdx*100 + g.extIdx 67 | file := g.nextWord() + "-" + g.nextWord() 68 | 69 | if i%3 == 0 { 70 | file += "-" + g.nextWord() 71 | } 72 | 73 | file += "." + g.nextExt() 74 | g.genFile(filepath.Join(dir, file), size) 75 | } 76 | } 77 | 78 | func (g *genContext) genDir(root string) { 79 | for _, start := range startList { 80 | 81 | for i := 1; i <= 5; i++ { 82 | dir := filepath.Join(root, start, g.nextWord()) 83 | g.genFiles(dir, 1) 84 | 85 | if g.wordIdx%3 == 0 { 86 | dir = filepath.Join(dir, g.nextWord()) 87 | g.genFiles(dir, 1) 88 | } 89 | } 90 | } 91 | } 92 | 93 | func (g *genContext) makeTestSampleFiles(testDir string) { 94 | 95 | if err := os.RemoveAll(testDir); err != nil { 96 | fmt.Println("Failed to clean", err) 97 | panic(err) 98 | } 99 | 100 | root := filepath.Join(testDir, "root") 101 | g.genDir(root) 102 | 103 | os.MkdirAll(filepath.Join(root, "day/car/empty"), 0755) 104 | 105 | rootPeople := filepath.Join(root, "people") 106 | testPeople := filepath.Join(testDir, "people") 107 | 108 | err := os.Rename(rootPeople, testPeople) 109 | if err != nil { 110 | fmt.Println("Rename failed", err) 111 | panic(err) 112 | } 113 | 114 | err = os.Symlink(testPeople, rootPeople) 115 | if err != nil { 116 | fmt.Println("Symlink failed", err) 117 | panic(err) 118 | } 119 | } 120 | 121 | func main() { 122 | root := flag.String("root", "", "root path to sample data (will be cleared)") 123 | flag.Parse() 124 | if *root == "" { 125 | fmt.Println("error: root parameter is required") 126 | os.Exit(1) 127 | } 128 | fmt.Printf("Clearing and generating test data in %s\n", *root) 129 | g := genContext{} 130 | g.makeTestSampleFiles(*root) 131 | } 132 | -------------------------------------------------------------------------------- /scripts/run_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/exec" 7 | "path/filepath" 8 | "runtime" 9 | "strings" 10 | "testing" 11 | "time" 12 | ) 13 | 14 | // perform integration test using the compiled binary 15 | 16 | const testDirBase = "/tmp/chkbit" 17 | 18 | func runCmd(args ...string) *exec.Cmd { 19 | _, filename, _, _ := runtime.Caller(0) 20 | prjRoot := filepath.Dir(filepath.Dir(filename)) 21 | tool := filepath.Join(prjRoot, "chkbit") 22 | args = append([]string{"--no-config"}, args...) 23 | return exec.Command(tool, args...) 24 | } 25 | 26 | func checkOut(t *testing.T, sout string, expected string) { 27 | if !strings.Contains(sout, expected) { 28 | t.Errorf("Expected '%s' in output, got '%s'\n", expected, sout) 29 | } 30 | } 31 | 32 | func checkNotOut(t *testing.T, sout string, notExpected string) { 33 | if strings.Contains(sout, notExpected) { 34 | t.Errorf("Did not expect '%s' in output, got '%s'\n", notExpected, sout) 35 | } 36 | } 37 | 38 | func initIndexStore(t *testing.T, indexType, root string) { 39 | t.Run("init", func(t *testing.T) { 40 | cmd := runCmd("init", indexType, root) 41 | out, err := cmd.Output() 42 | if err != nil { 43 | t.Fatalf("failed with '%s'\n", err) 44 | } 45 | sout := string(out) 46 | checkOut(t, sout, "chkbit init "+indexType) 47 | checkNotOut(t, sout, "EXC") 48 | }) 49 | } 50 | 51 | func testRoot(t *testing.T, indexType string) { 52 | 53 | testDir := filepath.Join(testDirBase, indexType) 54 | root := filepath.Join(testDir, "root") 55 | g := genContext{} 56 | g.makeTestSampleFiles(testDir) 57 | 58 | checkPrefix := "/tmp/chkbit/split/root/" 59 | if indexType == "atom" { 60 | checkPrefix = "" 61 | } 62 | 63 | initIndexStore(t, indexType, root) 64 | 65 | // update index, no recourse 66 | t.Run("no-recourse", func(t *testing.T) { 67 | cmd := runCmd("update", "--log-deleted", "--no-recurse", filepath.Join(root, "day/office")) 68 | out, err := cmd.Output() 69 | if err != nil { 70 | t.Fatalf("failed with '%s'\n", err) 71 | } 72 | sout := string(out) 73 | checkOut(t, sout, "Processed 5 files") 74 | checkOut(t, sout, "- 1 directory was updated") 75 | checkOut(t, sout, "- 5 file hashes were added") 76 | checkOut(t, sout, "- 0 file hashes were updated") 77 | checkNotOut(t, sout, "removed") 78 | }) 79 | 80 | // update remaining index from root 81 | t.Run("update-remaining", func(t *testing.T) { 82 | cmd := runCmd("update", "--log-deleted", root) 83 | out, err := cmd.Output() 84 | if err != nil { 85 | t.Fatalf("failed with '%s'\n", err) 86 | } 87 | sout := string(out) 88 | checkOut(t, sout, "Processed 300 files") 89 | checkOut(t, sout, "- 66 directories were updated") 90 | checkOut(t, sout, "- 295 file hashes were added") 91 | checkOut(t, sout, "- 0 file hashes were updated") 92 | checkNotOut(t, sout, "removed") 93 | }) 94 | 95 | // delete files, check for missing 96 | t.Run("delete", func(t *testing.T) { 97 | os.RemoveAll(filepath.Join(root, "thing/change")) 98 | os.Remove(filepath.Join(root, "time/hour/minute/body-information.csv")) 99 | 100 | cmd := runCmd("check", "--log-deleted", root) 101 | out, err := cmd.Output() 102 | if err != nil { 103 | t.Fatalf("failed with '%s'\n", err) 104 | } 105 | sout := string(out) 106 | checkOut(t, sout, "del "+checkPrefix+"thing/change/") 107 | checkOut(t, sout, "2 files/directories would have been removed") 108 | }) 109 | 110 | // do not report missing without --log-deleted 111 | t.Run("no-missing", func(t *testing.T) { 112 | cmd := runCmd("check", root) 113 | out, err := cmd.Output() 114 | if err != nil { 115 | t.Fatalf("failed with '%s'\n", err) 116 | } 117 | sout := string(out) 118 | checkNotOut(t, sout, "del ") 119 | checkNotOut(t, sout, "removed") 120 | }) 121 | 122 | // check for missing and update 123 | t.Run("missing", func(t *testing.T) { 124 | cmd := runCmd("update", "--log-deleted", root) 125 | out, err := cmd.Output() 126 | if err != nil { 127 | t.Fatalf("failed with '%s'\n", err) 128 | } 129 | sout := string(out) 130 | checkOut(t, sout, "del "+checkPrefix+"thing/change/") 131 | checkOut(t, sout, "2 files/directories have been removed") 132 | }) 133 | 134 | // check again 135 | t.Run("repeat", func(t *testing.T) { 136 | for i := 0; i < 10; i++ { 137 | cmd := runCmd("update", "-v", root) 138 | out, err := cmd.Output() 139 | if err != nil { 140 | t.Fatalf("failed with '%s'\n", err) 141 | } 142 | sout := string(out) 143 | checkOut(t, sout, "Processed 289 files") 144 | checkNotOut(t, sout, "removed") 145 | checkNotOut(t, sout, "updated") 146 | checkNotOut(t, sout, "added") 147 | } 148 | }) 149 | 150 | // add files only 151 | t.Run("add-only", func(t *testing.T) { 152 | 153 | g.genFiles(filepath.Join(root, "way/add"), 99) 154 | g.genFile(filepath.Join(root, "time/add-file.txt"), 500) 155 | 156 | cmd := runCmd("update", "--skip-existing", root) 157 | out, err := cmd.Output() 158 | if err != nil { 159 | t.Fatalf("failed with '%s'\n", err) 160 | } 161 | sout := string(out) 162 | checkOut(t, sout, "Processed 6 files") 163 | checkOut(t, sout, "- 3 directories were updated") 164 | checkOut(t, sout, "- 6 file hashes were added") 165 | checkOut(t, sout, "- 0 file hashes were updated") 166 | }) 167 | 168 | // add modified files only 169 | t.Run("add-only-mod", func(t *testing.T) { 170 | 171 | // modify existing 172 | g.genFile(filepath.Join(root, "way/job/word-business.mp3"), 500) 173 | 174 | cmd := runCmd("update", "--skip-existing", root) 175 | out, err := cmd.Output() 176 | if err != nil { 177 | t.Fatalf("failed with '%s'\n", err) 178 | } 179 | sout := string(out) 180 | checkOut(t, sout, "old "+checkPrefix+"way/job/word-business.mp3") 181 | checkOut(t, sout, "Processed 1 file") 182 | checkOut(t, sout, "- 1 directory was updated") 183 | checkOut(t, sout, "- 0 file hashes were added") 184 | checkOut(t, sout, "- 1 file hash was updated") 185 | }) 186 | 187 | // update remaining 188 | t.Run("update-remaining-add", func(t *testing.T) { 189 | cmd := runCmd("update", root) 190 | out, err := cmd.Output() 191 | if err != nil { 192 | t.Fatalf("failed with '%s'\n", err) 193 | } 194 | sout := string(out) 195 | checkOut(t, sout, "Processed 295 files") 196 | }) 197 | 198 | // ignore dot 199 | t.Run("ignore-dot", func(t *testing.T) { 200 | 201 | g.genFiles(filepath.Join(root, "way/.hidden"), 99) 202 | g.genFile(filepath.Join(root, "time/.ignored"), 999) 203 | 204 | cmd := runCmd("update", root) 205 | out, err := cmd.Output() 206 | if err != nil { 207 | t.Fatalf("failed with '%s'\n", err) 208 | } 209 | sout := string(out) 210 | checkOut(t, sout, "Processed 295 files") 211 | }) 212 | 213 | // include dot 214 | t.Run("include-dot", func(t *testing.T) { 215 | 216 | cmd := runCmd("update", "--include-dot", root) 217 | out, err := cmd.Output() 218 | if err != nil { 219 | t.Fatalf("failed with '%s'\n", err) 220 | } 221 | sout := string(out) 222 | checkOut(t, sout, "Processed 301 files") 223 | checkOut(t, sout, "- 3 directories were updated") 224 | checkOut(t, sout, "- 6 file hashes were added") 225 | checkOut(t, sout, "- 0 file hashes were updated") 226 | }) 227 | } 228 | 229 | func testDMG(t *testing.T, indexType string) { 230 | 231 | testDmg := filepath.Join(testDirBase, "test_dmg", indexType) 232 | if err := os.RemoveAll(testDmg); err != nil { 233 | fmt.Println("Failed to clean", err) 234 | panic(err) 235 | } 236 | if err := os.MkdirAll(testDmg, 0755); err != nil { 237 | fmt.Println("Failed to create test directory", err) 238 | panic(err) 239 | } 240 | 241 | if err := os.Chdir(testDmg); err != nil { 242 | fmt.Println("Failed to cd test directory", err) 243 | panic(err) 244 | } 245 | 246 | initIndexStore(t, indexType, ".") 247 | 248 | testFile := filepath.Join(testDmg, "test.txt") 249 | t1, _ := time.Parse(time.RFC3339, "2022-02-01T11:00:00Z") 250 | t2, _ := time.Parse(time.RFC3339, "2022-02-01T12:00:00Z") 251 | t3, _ := time.Parse(time.RFC3339, "2022-02-01T13:00:00Z") 252 | 253 | // create test and set the modified time" 254 | t.Run("create", func(t *testing.T) { 255 | os.WriteFile(testFile, []byte("foo1"), 0644) 256 | os.Chtimes(testFile, t2, t2) 257 | 258 | cmd := runCmd("update", ".") 259 | if out, err := cmd.Output(); err != nil { 260 | t.Fatalf("failed with '%s'\n", err) 261 | } else { 262 | checkOut(t, string(out), "new test.txt") 263 | } 264 | }) 265 | 266 | // update test with different content & old modified (expect 'old')" 267 | t.Run("expect-old", func(t *testing.T) { 268 | os.WriteFile(testFile, []byte("foo2"), 0644) 269 | os.Chtimes(testFile, t1, t1) 270 | 271 | cmd := runCmd("update", ".") 272 | if out, err := cmd.Output(); err != nil { 273 | t.Fatalf("failed with '%s'\n", err) 274 | } else { 275 | checkOut(t, string(out), "old test.txt") 276 | } 277 | }) 278 | 279 | // update test & new modified (expect 'upd')" 280 | t.Run("expect-upd", func(t *testing.T) { 281 | os.WriteFile(testFile, []byte("foo3"), 0644) 282 | os.Chtimes(testFile, t3, t3) 283 | 284 | cmd := runCmd("update", ".") 285 | if out, err := cmd.Output(); err != nil { 286 | t.Fatalf("failed with '%s'\n", err) 287 | } else { 288 | checkOut(t, string(out), "upd test.txt") 289 | } 290 | }) 291 | 292 | // Now update test with the same modified to simulate damage (expect DMG)" 293 | t.Run("expect-DMG", func(t *testing.T) { 294 | os.WriteFile(testFile, []byte("foo4"), 0644) 295 | os.Chtimes(testFile, t3, t3) 296 | 297 | cmd := runCmd("update", ".") 298 | if out, err := cmd.Output(); err != nil { 299 | if cmd.ProcessState.ExitCode() != 1 { 300 | t.Fatalf("expected to fail with exit code 1 vs %d!", cmd.ProcessState.ExitCode()) 301 | } 302 | checkOut(t, string(out), "DMG test.txt") 303 | } else { 304 | t.Fatal("expected to fail!") 305 | } 306 | }) 307 | } 308 | 309 | func TestRootAtom(t *testing.T) { 310 | testRoot(t, "atom") 311 | } 312 | 313 | func TestRootSplit(t *testing.T) { 314 | testRoot(t, "split") 315 | } 316 | 317 | func TestDmgAtom(t *testing.T) { 318 | testDMG(t, "atom") 319 | } 320 | 321 | func TestDmgSplit(t *testing.T) { 322 | testDMG(t, "split") 323 | } 324 | -------------------------------------------------------------------------------- /scripts/tests: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | export NO_COLOR=1 4 | 5 | script_dir=$(dirname "$(realpath "$0")") 6 | cd $script_dir/.. 7 | 8 | echo "# test module" 9 | go test -v . 10 | echo "# test util" 11 | go test -v ./cmd/chkbit/util -count=1 12 | 13 | echo "# build" 14 | $script_dir/build 15 | 16 | echo "# test files" 17 | go test -v ./scripts -count=1 18 | -------------------------------------------------------------------------------- /scripts/xbuild: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eE -o pipefail 3 | 4 | script_dir=$(dirname "$(realpath "$0")") 5 | cd $script_dir/.. 6 | 7 | if [ -z "$version" ]; then 8 | version=$(git rev-parse HEAD) 9 | fi 10 | 11 | echo "building version $version" 12 | 13 | mkdir -p dist 14 | rm -f dist/* 15 | 16 | build() { 17 | echo "- $1-$2" 18 | rm -f dist/chkbit 19 | CGO_ENABLED=0 GOOS="$1" GOARCH="$2" go build -o dist -ldflags="-X main.appVersion=$version" ./cmd/chkbit 20 | 21 | pushd dist 22 | 23 | case "$1" in 24 | windows) 25 | outfile="chkbit-$1-$2.zip" 26 | zip "$outfile" chkbit.exe --move 27 | ;; 28 | *) 29 | outfile="chkbit-$1-$2.tar.gz" 30 | tar -czf "$outfile" chkbit --remove-files 31 | ;; 32 | esac 33 | 34 | popd 35 | } 36 | 37 | if [[ -z $2 ]]; then 38 | build android arm64 39 | build darwin amd64 40 | build darwin arm64 41 | build freebsd amd64 42 | build freebsd arm64 43 | build freebsd riscv64 44 | build linux amd64 45 | build linux arm64 46 | build linux riscv64 47 | build netbsd amd64 48 | build netbsd arm64 49 | build openbsd amd64 50 | build openbsd arm64 51 | build windows amd64 52 | build windows arm64 53 | else 54 | build $1 $2 55 | fi 56 | -------------------------------------------------------------------------------- /status.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | type Status string 4 | 5 | const ( 6 | StatusPanic Status = "PNC" 7 | StatusErrorDamage Status = "DMG" 8 | StatusErrorIdx Status = "ERX" 9 | StatusUpdateWarnOld Status = "old" 10 | StatusUpdate Status = "upd" 11 | StatusNew Status = "new" 12 | StatusOK Status = "ok " 13 | StatusDeleted Status = "del" 14 | StatusIgnore Status = "ign" 15 | StatusInfo Status = "msg" 16 | 17 | // internal 18 | StatusUpdateIndex Status = "xup" 19 | ) 20 | 21 | func (s Status) String() string { 22 | return (string)(s) 23 | } 24 | 25 | func (s Status) IsErrorOrWarning() bool { 26 | return s == StatusPanic || s == StatusErrorDamage || s == StatusErrorIdx || s == StatusUpdateWarnOld 27 | } 28 | 29 | func (s Status) IsVerbose() bool { 30 | return s == StatusOK || s == StatusIgnore 31 | } 32 | 33 | type LogEvent struct { 34 | Stat Status 35 | Message string 36 | } 37 | 38 | type PerfEvent struct { 39 | NumFiles int64 40 | NumBytes int64 41 | } 42 | 43 | type DedupPerfEvent struct { 44 | NumFiles int64 45 | Percent float64 46 | } 47 | -------------------------------------------------------------------------------- /worker.go: -------------------------------------------------------------------------------- 1 | package chkbit 2 | 3 | type WorkItem struct { 4 | path string 5 | filesToIndex []string 6 | dirList []string 7 | ignore *Ignore 8 | } 9 | 10 | func (context *Context) runWorker(_ int) { 11 | for { 12 | item := <-context.WorkQueue 13 | if item == nil || context.doAbort { 14 | break 15 | } 16 | 17 | index := newIndex(context, item.path, item.filesToIndex, item.dirList, !context.UpdateIndex) 18 | err := index.load() 19 | if err != nil { 20 | context.logErr(index.getIndexFilepath(), err) 21 | } 22 | 23 | if context.ShowIgnoredOnly { 24 | index.showIgnoredOnly(item.ignore) 25 | } else { 26 | index.calcHashes(item.ignore) 27 | index.checkFix(context.ForceUpdateDmg) 28 | 29 | if context.UpdateIndex { 30 | if changed, err := index.save(); err != nil { 31 | context.logErr(item.path, err) 32 | } else if changed { 33 | context.log(StatusUpdateIndex, "") 34 | } 35 | } 36 | } 37 | } 38 | } 39 | --------------------------------------------------------------------------------