├── .github └── workflows │ ├── main.yml │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── diskrsync ├── main.go └── progress.go ├── go.mod ├── go.sum ├── sync.go └── sync_test.go /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - "**" 5 | tags-ignore: 6 | - "**" 7 | pull_request: 8 | 9 | name: Test 10 | jobs: 11 | test: 12 | strategy: 13 | matrix: 14 | go-version: [1.16.x, 1.x] 15 | os: [ubuntu-latest] 16 | arch: ["", "386"] 17 | fail-fast: false 18 | runs-on: ${{ matrix.os }} 19 | steps: 20 | - name: Install Go 21 | uses: actions/setup-go@v2 22 | with: 23 | go-version: ${{ matrix.go-version }} 24 | - name: Checkout code 25 | uses: actions/checkout@v2 26 | - name: Run tests 27 | env: 28 | GOARCH: ${{ matrix.arch }} 29 | run: go test ./... 30 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | on: 2 | release: 3 | types: [created] 4 | name: Release 5 | jobs: 6 | releases-matrix: 7 | name: Release Go Binary 8 | runs-on: ubuntu-latest 9 | strategy: 10 | matrix: 11 | goos: [linux, darwin] 12 | goarch: [amd64, arm64, arm] 13 | exclude: 14 | - goarch: arm 15 | goos: darwin 16 | steps: 17 | - uses: actions/checkout@v2 18 | - uses: wangyoucao577/go-release-action@v1.25 19 | with: 20 | github_token: ${{ secrets.GITHUB_TOKEN }} 21 | goos: ${{ matrix.goos }} 22 | goarch: ${{ matrix.goarch }} 23 | project_path: "./diskrsync" 24 | binary_name: "diskrsync" 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.iml 3 | testdata 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Dmitry Panov 2 | 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and 5 | associated documentation files (the "Software"), to deal in the Software without restriction, 6 | including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all copies or substantial 11 | portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 14 | LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 16 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | diskrsync -- rsync for block devices and disk images 2 | === 3 | 4 | This is a utility for remote backup of disk image files or devices. It uses ssh as a transport 5 | and tries to minimise the amount of data transferred. This is done by building a Merkle tree of 6 | [blake2](https://blake2.net/) hashes on both the source and the target hosts and then 7 | traversing the tree only transmitting those blocks where the hashes differ. 8 | 9 | It is important that the source file does not change during the process. Typically this would 10 | be achieved by creating an LVM snapshot or by freezing the VM if that's not possible. 11 | 12 | By default the resulting file is compressed using the [spgz](https://github.com/dop251/spgz) library (this can be disabled by 13 | using --no-compress flag). Note this only works on filesystems that support punching holes in 14 | files like xfs or ext4. 15 | 16 | The utility handles sparse files (or just files with a lot of zeros) efficiently. The resulting 17 | file will be sparse (even if not compressed). 18 | 19 | Size changes are also supported (both shrinks and expansions). 20 | 21 | Installation 22 | --- 23 | 1. Install go. Version 1.16 is minimum required but the newer the better. 24 | If your distribution lacks the required version, check backports or updates (e.g. for [debian](https://packages.debian.org/search?keywords=golang) or [ubuntu](https://packages.ubuntu.com/search?keywords=golang)) 25 | Alternatively, install [manually](https://golang.org/doc/install). 26 | 27 | 2. Run the following commands: 28 | ```shell 29 | mkdir workspace 30 | cd workspace 31 | GOPATH=$(pwd) go install github.com/dop251/diskrsync/diskrsync@latest 32 | sudo cp -a bin/diskrsync /usr/local/bin 33 | ``` 34 | 35 | 3. Make sure the binary is copied to the remote machine as well. 36 | If the remote machine has a different CPU or OS you may want to 37 | use [cross-compilation](https://dave.cheney.net/2015/08/22/cross-compilation-with-go-1-5). 38 | For example if you are want to build a binary for ARM: 39 | ```shell 40 | GOPATH=$(pwd) GOARCH=arm go install github.com/dop251/diskrsync/diskrsync@latest 41 | ls -l bin/linux_arm/diskrsync 42 | ``` 43 | 44 | 45 | Usage examples 46 | --- 47 | 48 | ```shell 49 | diskrsync /dev/vg00/lv_snap ruser@backuphost:/mnt/backup/disk 50 | ``` 51 | 52 | This ensures that /mnt/backup/disk is up-to-date with the LV snapshot. The file will be compressed 53 | using spgz and can be recovered using the following command: 54 | 55 | ```shell 56 | spgz -x /mnt/backup/disk /dev/vg00/.... 57 | ``` 58 | 59 | 60 | 61 | ```shell 62 | diskrsync --verbose --no-compress --ssh-flags="-i id_file" /var/lib/libvirt/images/disk.img ruser@rbackuphost:/mnt/backup/ 63 | ``` 64 | 65 | This ensures that /mnt/backup/disk.img is up-to-date with the source file. 66 | -------------------------------------------------------------------------------- /diskrsync/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "log" 10 | "os" 11 | "os/exec" 12 | "path/filepath" 13 | "strings" 14 | 15 | "github.com/dop251/diskrsync" 16 | "github.com/dop251/spgz" 17 | 18 | flag "github.com/spf13/pflag" 19 | "github.com/vbauerster/mpb/v7" 20 | ) 21 | 22 | const ( 23 | modeSource = iota 24 | modeTarget 25 | ) 26 | 27 | type options struct { 28 | sshFlags string 29 | cmdPath string 30 | noCompress bool 31 | verbose bool 32 | } 33 | 34 | type proc interface { 35 | Start(cmdReader io.Reader, cmdWriter io.WriteCloser, errChan chan error, calcPl, syncPl diskrsync.ProgressListener) error 36 | IsLocal() bool 37 | } 38 | 39 | type localProc struct { 40 | p string 41 | mode int 42 | opts *options 43 | } 44 | 45 | type remoteProc struct { 46 | p string 47 | mode int 48 | opts *options 49 | host string 50 | cmd *exec.Cmd 51 | } 52 | 53 | // used to prevent output to stderr while the progress bars are active 54 | var bufStderr = bufferedOut{ 55 | w: os.Stderr, 56 | } 57 | 58 | func usage() { 59 | _, _ = fmt.Fprintf(os.Stderr, "Usage: %s [flags] \nsrc and dst is [[user@]host:]path\n\nFlags:\n", os.Args[0]) 60 | flag.PrintDefaults() 61 | os.Exit(2) 62 | } 63 | 64 | func split(arg string) (host, path string) { 65 | a := strings.SplitN(arg, ":", 2) 66 | if len(a) == 2 { 67 | host = a[0] 68 | path = a[1] 69 | if path == "" { 70 | path = "./" 71 | } 72 | } else { 73 | path = arg 74 | } 75 | return 76 | } 77 | 78 | func createProc(arg string, mode int, opts *options) proc { 79 | host, path := split(arg) 80 | if host != "" { 81 | return createRemoteProc(host, path, mode, opts) 82 | } 83 | return createLocalProc(path, mode, opts) 84 | } 85 | 86 | func createRemoteProc(host, path string, mode int, opts *options) proc { 87 | return &remoteProc{ 88 | host: host, 89 | p: path, 90 | mode: mode, 91 | opts: opts, 92 | } 93 | } 94 | 95 | func createLocalProc(p string, mode int, opts *options) proc { 96 | return &localProc{ 97 | p: p, 98 | mode: mode, 99 | opts: opts, 100 | } 101 | } 102 | 103 | func (p *localProc) Start(cmdReader io.Reader, cmdWriter io.WriteCloser, errChan chan error, calcPl, syncPl diskrsync.ProgressListener) error { 104 | go p.run(cmdReader, cmdWriter, errChan, calcPl, syncPl) 105 | return nil 106 | } 107 | 108 | func (p *localProc) IsLocal() bool { 109 | return true 110 | } 111 | 112 | func (p *localProc) run(cmdReader io.Reader, cmdWriter io.WriteCloser, errChan chan error, calcPl, syncPl diskrsync.ProgressListener) { 113 | var err error 114 | if p.mode == modeSource { 115 | err = doSource(p.p, cmdReader, cmdWriter, p.opts, calcPl, syncPl) 116 | } else { 117 | err = doTarget(p.p, cmdReader, cmdWriter, p.opts, calcPl, syncPl) 118 | } 119 | 120 | cerr := cmdWriter.Close() 121 | if err == nil { 122 | err = cerr 123 | } 124 | errChan <- err 125 | } 126 | 127 | func (p *remoteProc) pipeCopy(dst io.WriteCloser, src io.Reader) { 128 | _, err := io.Copy(dst, src) 129 | if err != nil { 130 | log.Printf("pipe copy failed: %v", err) 131 | } 132 | err = dst.Close() 133 | if err != nil { 134 | log.Printf("close failed after pipe copy: %v", err) 135 | } 136 | } 137 | 138 | func (p *remoteProc) Start(cmdReader io.Reader, cmdWriter io.WriteCloser, errChan chan error, calcPl, syncPl diskrsync.ProgressListener) error { 139 | cmd := exec.Command("ssh") 140 | p.cmd = cmd 141 | args := cmd.Args 142 | 143 | if p.opts.sshFlags != "" { 144 | flags := strings.Split(p.opts.sshFlags, " ") 145 | args = append(args, flags...) 146 | } 147 | 148 | var cmdPath string 149 | if cp := p.opts.cmdPath; cp != "" { 150 | cmdPath = cp 151 | } else { 152 | cmdPath = os.Args[0] 153 | } 154 | args = append(args, p.host, cmdPath) 155 | 156 | if p.mode == modeSource { 157 | args = append(args, "--source") 158 | } else { 159 | args = append(args, "--target") 160 | if p.opts.noCompress { 161 | args = append(args, " --no-compress") 162 | } 163 | } 164 | if p.opts.verbose && calcPl == nil { 165 | args = append(args, " --verbose") 166 | } 167 | if calcPl == nil { 168 | cmd.Stderr = os.Stderr 169 | } else { 170 | stderr, err := cmd.StderrPipe() 171 | if err != nil { 172 | return err 173 | } 174 | args = append(args, "--calc-progress") 175 | if syncPl != nil { 176 | args = append(args, "--sync-progress") 177 | } 178 | 179 | go func() { 180 | r := bufio.NewReader(stderr) 181 | readStart := func() (string, error) { 182 | for { 183 | line, err := r.ReadString('\n') 184 | if name := strings.TrimPrefix(line, "[Start "); name != line && len(name) > 1 { 185 | return name[:len(name)-2], nil 186 | } 187 | if len(line) > 0 { 188 | _, werr := bufStderr.Write([]byte(line)) 189 | if werr != nil { 190 | return "", werr 191 | } 192 | } 193 | if err != nil { 194 | return "", err 195 | } 196 | } 197 | } 198 | pr := &progressReader{ 199 | r: r, 200 | w: &bufStderr, 201 | pl: calcPl, 202 | } 203 | name, err := readStart() 204 | if err != nil { 205 | return 206 | } 207 | if name == "calc" { 208 | err := pr.read() 209 | if err != nil { 210 | return 211 | } 212 | if syncPl != nil { 213 | name, err = readStart() 214 | if err != nil { 215 | return 216 | } 217 | } 218 | } 219 | if syncPl != nil && name == "sync" { 220 | pr.pl = syncPl 221 | err = pr.read() 222 | if err != nil { 223 | return 224 | } 225 | } 226 | _, _ = io.Copy(os.Stderr, r) 227 | }() 228 | } 229 | 230 | args = append(args, p.p) 231 | cmd.Args = args 232 | cmd.Stdin = cmdReader 233 | 234 | r, err := cmd.StdoutPipe() 235 | if err != nil { 236 | return err 237 | } 238 | 239 | err = cmd.Start() 240 | if err != nil { 241 | return err 242 | } 243 | go p.run(cmdWriter, r, errChan) 244 | return nil 245 | } 246 | 247 | func (p *remoteProc) IsLocal() bool { 248 | return false 249 | } 250 | 251 | func (p *remoteProc) run(w io.WriteCloser, r io.Reader, errChan chan error) { 252 | p.pipeCopy(w, r) 253 | errChan <- p.cmd.Wait() 254 | } 255 | 256 | func doSource(p string, cmdReader io.Reader, cmdWriter io.WriteCloser, opts *options, calcPl, syncPl diskrsync.ProgressListener) error { 257 | f, err := os.Open(p) 258 | if err != nil { 259 | return err 260 | } 261 | 262 | defer f.Close() 263 | 264 | var src io.ReadSeeker 265 | 266 | // Try to open as an spgz file 267 | sf, err := spgz.NewFromFile(f, os.O_RDONLY) 268 | if err != nil { 269 | if err != spgz.ErrInvalidFormat { 270 | return err 271 | } 272 | src = f 273 | } else { 274 | src = sf 275 | } 276 | 277 | size, err := src.Seek(0, io.SeekEnd) 278 | if err != nil { 279 | return err 280 | } 281 | 282 | _, err = src.Seek(0, io.SeekStart) 283 | if err != nil { 284 | return err 285 | } 286 | 287 | err = diskrsync.Source(src, size, cmdReader, cmdWriter, true, opts.verbose, calcPl, syncPl) 288 | cerr := cmdWriter.Close() 289 | if err == nil { 290 | err = cerr 291 | } 292 | return err 293 | } 294 | 295 | func doTarget(p string, cmdReader io.Reader, cmdWriter io.WriteCloser, opts *options, calcPl, syncPl diskrsync.ProgressListener) (err error) { 296 | var w spgz.SparseFile 297 | useReadBuffer := false 298 | 299 | f, err := os.OpenFile(p, os.O_RDWR|os.O_CREATE, 0666) 300 | if err != nil { 301 | return 302 | } 303 | 304 | info, err := f.Stat() 305 | if err != nil { 306 | _ = f.Close() 307 | return 308 | } 309 | 310 | if info.Mode()&(os.ModeDevice|os.ModeCharDevice) != 0 { 311 | w = spgz.NewSparseFileWithoutHolePunching(f) 312 | useReadBuffer = true 313 | } else if !opts.noCompress { 314 | sf, err := spgz.NewFromFileSize(f, os.O_RDWR|os.O_CREATE, diskrsync.DefTargetBlockSize) 315 | if err != nil { 316 | if err != spgz.ErrInvalidFormat { 317 | if err == spgz.ErrPunchHoleNotSupported { 318 | err = fmt.Errorf("target does not support compression. Try with -no-compress option (error was '%v')", err) 319 | } 320 | _ = f.Close() 321 | return err 322 | } 323 | } else { 324 | w = &diskrsync.FixingSpgzFileWrapper{SpgzFile: sf} 325 | } 326 | } 327 | 328 | if w == nil { 329 | w = spgz.NewSparseFileWithFallback(f) 330 | useReadBuffer = true 331 | } 332 | 333 | defer func() { 334 | cerr := w.Close() 335 | if err == nil { 336 | err = cerr 337 | } 338 | }() 339 | 340 | size, err := w.Seek(0, io.SeekEnd) 341 | if err != nil { 342 | return err 343 | } 344 | 345 | _, err = w.Seek(0, io.SeekStart) 346 | 347 | if err != nil { 348 | return err 349 | } 350 | 351 | err = diskrsync.Target(w, size, cmdReader, cmdWriter, useReadBuffer, opts.verbose, calcPl, syncPl) 352 | cerr := cmdWriter.Close() 353 | if err == nil { 354 | err = cerr 355 | } 356 | 357 | return 358 | } 359 | 360 | func doCmd(opts *options) (err error) { 361 | src := createProc(flag.Arg(0), modeSource, opts) 362 | 363 | path := flag.Arg(1) 364 | if _, p := split(path); strings.HasSuffix(p, "/") { 365 | path += filepath.Base(flag.Arg(0)) 366 | } 367 | 368 | dst := createProc(path, modeTarget, opts) 369 | 370 | srcErrChan := make(chan error, 1) 371 | dstErrChan := make(chan error, 1) 372 | 373 | srcReader, dstWriter := io.Pipe() 374 | dstReader, srcWriter := io.Pipe() 375 | 376 | sr := &diskrsync.CountingReader{Reader: srcReader} 377 | sw := &diskrsync.CountingWriteCloser{WriteCloser: srcWriter} 378 | 379 | var ( 380 | p *mpb.Progress 381 | cancel func() 382 | srcCalcPl, syncPl diskrsync.ProgressListener 383 | ) 384 | 385 | if opts.verbose { 386 | var ctx context.Context 387 | ctx, cancel = context.WithCancel(context.Background()) 388 | defer func() { 389 | if cancel != nil { 390 | cancel() 391 | } 392 | bufStderr.Release() 393 | }() 394 | p = mpb.NewWithContext(ctx) 395 | if src.IsLocal() && !dst.IsLocal() { 396 | syncPl = newSyncProgressBarListener(p) 397 | } 398 | srcCalcPl = newCalcProgressBarListener(p, "Source Checksums") 399 | log.SetOutput(&bufStderr) 400 | } 401 | err = src.Start(sr, sw, srcErrChan, srcCalcPl, syncPl) 402 | 403 | if err != nil { 404 | return fmt.Errorf("could not start source: %w", err) 405 | } 406 | 407 | var dstCalcPl, dstSyncPl diskrsync.ProgressListener 408 | 409 | if opts.verbose { 410 | dstCalcPl = newCalcProgressBarListener(p, "Target Checksums") 411 | if syncPl == nil { 412 | dstSyncPl = newSyncProgressBarListener(p) 413 | } 414 | } 415 | err = dst.Start(dstReader, dstWriter, dstErrChan, dstCalcPl, dstSyncPl) 416 | 417 | if err != nil { 418 | return fmt.Errorf("could not start target: %w", err) 419 | } 420 | 421 | L: 422 | for srcErrChan != nil || dstErrChan != nil { 423 | select { 424 | case dstErr := <-dstErrChan: 425 | if dstErr != nil { 426 | if !errors.Is(dstErr, io.EOF) { 427 | err = fmt.Errorf("target error: %w", dstErr) 428 | break L 429 | } 430 | } 431 | dstErrChan = nil 432 | case srcErr := <-srcErrChan: 433 | if srcErr != nil { 434 | if !errors.Is(srcErr, io.EOF) { 435 | err = fmt.Errorf("source error: %w", srcErr) 436 | break L 437 | } 438 | } 439 | srcErrChan = nil 440 | } 441 | } 442 | 443 | if cancel != nil { 444 | if err == nil { 445 | p.Wait() 446 | } 447 | cancel() 448 | cancel = nil 449 | } 450 | 451 | if opts.verbose { 452 | log.Printf("Read: %d, wrote: %d\n", sr.Count(), sw.Count()) 453 | } 454 | return 455 | } 456 | 457 | func main() { 458 | // These flags are for the remote command mode, not to be used directly. 459 | var sourceMode = flag.Bool("source", false, "Source mode") 460 | var targetMode = flag.Bool("target", false, "Target mode") 461 | var calcProgress = flag.Bool("calc-progress", false, "Write calc progress") 462 | var syncProgress = flag.Bool("sync-progress", false, "Write sync progress") 463 | flag.CommandLine.VisitAll(func(f *flag.Flag) { 464 | f.Hidden = true 465 | }) 466 | 467 | var opts options 468 | 469 | flag.StringVar(&opts.sshFlags, "ssh-flags", "", "SSH flags") 470 | flag.StringVar(&opts.cmdPath, "cmd-path", "", "Remote command path (defaults to argv[0])") 471 | flag.BoolVar(&opts.noCompress, "no-compress", false, "Store target as a raw file") 472 | flag.BoolVar(&opts.verbose, "verbose", false, "Print statistics, progress, and some debug info") 473 | 474 | flag.Parse() 475 | 476 | if *sourceMode || *targetMode { 477 | var calcPl, syncPl diskrsync.ProgressListener 478 | 479 | if *calcProgress { 480 | calcPl = &progressWriter{ 481 | name: "calc", 482 | w: os.Stderr, 483 | } 484 | } 485 | 486 | if *syncProgress { 487 | syncPl = &progressWriter{ 488 | name: "sync", 489 | w: os.Stderr, 490 | } 491 | } 492 | 493 | if *sourceMode { 494 | err := doSource(flag.Arg(0), os.Stdin, os.Stdout, &opts, calcPl, syncPl) 495 | if err != nil { 496 | log.Fatalf("Source failed: %s", err.Error()) 497 | } 498 | } else { 499 | err := doTarget(flag.Arg(0), os.Stdin, os.Stdout, &opts, calcPl, syncPl) 500 | if err != nil { 501 | log.Fatalf("Target failed: %s", err.Error()) 502 | } 503 | } 504 | } else { 505 | if flag.Arg(0) == "" || flag.Arg(1) == "" { 506 | usage() 507 | } 508 | err := doCmd(&opts) 509 | if err != nil { 510 | log.Fatal(err) 511 | } 512 | } 513 | 514 | } 515 | -------------------------------------------------------------------------------- /diskrsync/progress.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "io" 8 | "strconv" 9 | "strings" 10 | "sync" 11 | "time" 12 | 13 | "github.com/dop251/diskrsync" 14 | "github.com/vbauerster/mpb/v7" 15 | "github.com/vbauerster/mpb/v7/decor" 16 | ) 17 | 18 | type bufferedOut struct { 19 | sync.Mutex 20 | buf bytes.Buffer 21 | w io.Writer 22 | 23 | blocked bool 24 | } 25 | 26 | func (b *bufferedOut) Write(p []byte) (int, error) { 27 | b.Lock() 28 | defer b.Unlock() 29 | if !b.blocked { 30 | return b.w.Write(p) 31 | } 32 | return b.buf.Write(p) 33 | } 34 | 35 | func (b *bufferedOut) Block() { 36 | b.Lock() 37 | b.blocked = true 38 | b.Unlock() 39 | } 40 | 41 | func (b *bufferedOut) Release() { 42 | b.Lock() 43 | defer b.Unlock() 44 | if b.blocked { 45 | _, _ = b.w.Write(b.buf.Bytes()) 46 | b.buf.Reset() 47 | b.blocked = false 48 | } 49 | } 50 | 51 | type calcProgressBar struct { 52 | p *mpb.Progress 53 | bar *mpb.Bar 54 | name string 55 | lastUpdate time.Time 56 | } 57 | 58 | func newCalcProgressBarListener(p *mpb.Progress, name string) *calcProgressBar { 59 | return &calcProgressBar{ 60 | p: p, 61 | name: name, 62 | } 63 | } 64 | 65 | func (pb *calcProgressBar) Start(size int64) { 66 | bufStderr.Block() 67 | pb.bar = pb.p.New(size, 68 | mpb.BarStyle().Rbound("|").Padding(" "), 69 | mpb.BarPriority(1), 70 | mpb.PrependDecorators( 71 | decor.Name(pb.name, decor.WC{C: decor.DidentRight | decor.DextraSpace | decor.DSyncWidth}), 72 | decor.CountersKibiByte("% .2f / % .2f", decor.WCSyncSpace), 73 | ), 74 | mpb.AppendDecorators( 75 | decor.OnComplete( 76 | decor.EwmaETA(decor.ET_STYLE_GO, 60, decor.WC{W: 4}), "done", 77 | ), 78 | decor.Name(" ] "), 79 | decor.EwmaSpeed(decor.UnitKiB, "% .2f", 60, decor.WCSyncSpace), 80 | ), 81 | ) 82 | pb.lastUpdate = time.Now() 83 | } 84 | 85 | func (pb *calcProgressBar) Update(pos int64) { 86 | pb.bar.SetCurrent(pos) 87 | now := time.Now() 88 | pb.bar.DecoratorEwmaUpdate(now.Sub(pb.lastUpdate)) 89 | pb.lastUpdate = now 90 | } 91 | 92 | type syncProgressBar struct { 93 | p *mpb.Progress 94 | bar *mpb.Bar 95 | } 96 | 97 | func newSyncProgressBarListener(p *mpb.Progress) *syncProgressBar { 98 | return &syncProgressBar{ 99 | p: p, 100 | } 101 | } 102 | 103 | func (pb *syncProgressBar) Start(size int64) { 104 | const name = "Sync" 105 | bufStderr.Block() 106 | pb.bar = pb.p.New(size, 107 | mpb.BarStyle().Padding(" "), 108 | mpb.BarPriority(2), 109 | mpb.PrependDecorators( 110 | decor.Name(name, decor.WC{C: decor.DidentRight | decor.DextraSpace | decor.DSyncWidth}), 111 | decor.CountersKibiByte("% .2f / % .2f", decor.WCSyncSpace), 112 | ), 113 | mpb.AppendDecorators( 114 | decor.Percentage(decor.WCSyncSpace), 115 | ), 116 | ) 117 | } 118 | 119 | func (pb *syncProgressBar) Update(pos int64) { 120 | pb.bar.SetCurrent(pos) 121 | } 122 | 123 | type progressWriter struct { 124 | name string 125 | w io.Writer 126 | 127 | size int64 128 | lastWrite time.Time 129 | } 130 | 131 | func (pw *progressWriter) Start(size int64) { 132 | pw.size = size 133 | pw.lastWrite = time.Now() 134 | fmt.Fprintf(pw.w, "[Start %s]\nSize: %d\n", pw.name, size) 135 | } 136 | 137 | func (pw *progressWriter) Update(pos int64) { 138 | if pw.size <= 0 { 139 | return 140 | } 141 | now := time.Now() 142 | if pos >= pw.size || now.Sub(pw.lastWrite) >= 250*time.Millisecond { 143 | fmt.Fprintf(pw.w, "Update: %d\n", pos) 144 | pw.lastWrite = now 145 | } 146 | } 147 | 148 | type progressReader struct { 149 | r *bufio.Reader 150 | w io.Writer 151 | pl diskrsync.ProgressListener 152 | } 153 | 154 | func (pr *progressReader) read() error { 155 | var size int64 156 | for { 157 | str, err := pr.r.ReadString('\n') 158 | if s := strings.TrimPrefix(str, "Size: "); s != str && len(s) > 0 { 159 | sz, err := strconv.ParseInt(s[:len(s)-1], 10, 64) 160 | if err == nil { 161 | pr.pl.Start(sz) 162 | size = sz 163 | break 164 | } 165 | } 166 | if len(str) > 0 { 167 | _, werr := pr.w.Write([]byte(str)) 168 | if werr != nil { 169 | return werr 170 | } 171 | } 172 | if err != nil { 173 | return err 174 | } 175 | } 176 | if size <= 0 { 177 | return nil 178 | } 179 | for { 180 | str, err := pr.r.ReadString('\n') 181 | if s := strings.TrimPrefix(str, "Update: "); s != str && len(s) > 0 { 182 | pos, err := strconv.ParseInt(s[:len(s)-1], 10, 64) 183 | if err == nil { 184 | pr.pl.Update(pos) 185 | if pos >= size { 186 | break 187 | } 188 | continue 189 | } 190 | } 191 | if len(str) > 0 { 192 | _, werr := pr.w.Write([]byte(str)) 193 | if werr != nil { 194 | return werr 195 | } 196 | } 197 | if err != nil { 198 | return err 199 | } 200 | } 201 | return nil 202 | } 203 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/dop251/diskrsync 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/dop251/spgz v1.2.0 7 | github.com/spf13/pflag v1.0.5 8 | github.com/vbauerster/mpb/v7 v7.4.1 9 | golang.org/x/crypto v0.0.0-20220312131142-6068a2e6cfdc 10 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow= 2 | github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4= 3 | github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d h1:licZJFw2RwpHMqeKTCYkitsPqHNxTmd4SNR5r94FGM8= 4 | github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d/go.mod h1:asat636LX7Bqt5lYEZ27JNDcqxfjdBQuJ/MM4CN/Lzo= 5 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 6 | github.com/dop251/buse v1.1.0/go.mod h1:MGzyYwutwcAUZa2KlHLEhUaYBr6JpZf4sqyh1v1lEZs= 7 | github.com/dop251/nbd v0.0.0-20170916130042-b8933b281cb7/go.mod h1:/YqO/I24sucjxhCgQHgDrnffSwg5HzoYHQASayZnYl8= 8 | github.com/dop251/spgz v1.2.0 h1:/VXInlcNmrhdehE228zLnTK9jTdpnNxtxG/t6XlFn14= 9 | github.com/dop251/spgz v1.2.0/go.mod h1:TvZEdiTP+5fkWTBiO9Po3zlegP9MXzwVKw9O97IJijQ= 10 | github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= 11 | github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= 12 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 13 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 14 | github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY= 15 | github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= 16 | github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= 17 | github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= 18 | github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= 19 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 20 | github.com/vbauerster/mpb/v7 v7.4.1 h1:NhLMWQ3gNg2KJR8oeA9lO8Xvq+eNPmixDmB6JEQOUdA= 21 | github.com/vbauerster/mpb/v7 v7.4.1/go.mod h1:Ygg2mV9Vj9sQBWqsK2m2pidcf9H3s6bNKtqd3/M4gBo= 22 | golang.org/x/crypto v0.0.0-20220312131142-6068a2e6cfdc h1:i6Z9eOQAdM7lvsbkT3fwFNtSAAC+A59TYilFj53HW+E= 23 | golang.org/x/crypto v0.0.0-20220312131142-6068a2e6cfdc/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= 24 | golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= 25 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 26 | golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 27 | golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 28 | golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 29 | golang.org/x/sys v0.0.0-20220209214540-3681064d5158/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 30 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5 h1:y/woIyUBFbpQGKS0u1aHF/40WUDnek3fPOyD08H5Vng= 31 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 32 | golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= 33 | golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 34 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 35 | -------------------------------------------------------------------------------- /sync.go: -------------------------------------------------------------------------------- 1 | package diskrsync 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "encoding/binary" 7 | "errors" 8 | "fmt" 9 | "hash" 10 | "io" 11 | "log" 12 | "math" 13 | 14 | "github.com/dop251/spgz" 15 | "golang.org/x/crypto/blake2b" 16 | ) 17 | 18 | const ( 19 | hdrMagic = "BSNC0002" 20 | ) 21 | 22 | const ( 23 | hashSize = 64 24 | 25 | DefTargetBlockSize = 128 * 1024 26 | ) 27 | 28 | const ( 29 | cmdHole byte = iota 30 | cmdBlock 31 | cmdEqual 32 | cmdNotEqual 33 | ) 34 | 35 | var ( 36 | ErrInvalidFormat = errors.New("invalid data format") 37 | ) 38 | 39 | type ProgressListener interface { 40 | Start(size int64) 41 | Update(position int64) 42 | } 43 | 44 | type hashPool []hash.Hash 45 | 46 | type workCtx struct { 47 | buf []byte 48 | n *node 49 | hash hash.Hash 50 | 51 | avail, hashReady chan struct{} 52 | } 53 | 54 | type node struct { 55 | buf [hashSize]byte 56 | parent *node 57 | idx int 58 | 59 | children []*node 60 | 61 | size int 62 | 63 | hash hash.Hash 64 | sum []byte 65 | } 66 | 67 | type tree struct { 68 | root *node 69 | size int64 70 | reader io.ReadSeeker 71 | useBuffer bool 72 | } 73 | 74 | type base struct { 75 | t tree 76 | buf []byte 77 | cmdReader io.Reader 78 | cmdWriter io.Writer 79 | 80 | syncProgressListener ProgressListener 81 | } 82 | 83 | type source struct { 84 | base 85 | reader io.ReadSeeker 86 | } 87 | 88 | type target struct { 89 | base 90 | writer *batchingWriter 91 | } 92 | 93 | // Accumulates successive writes into a large buffer so that the writes into the underlying spgz.SpgzFile 94 | // cover compressed blocks completely, so they are not read and unpacked before writing. 95 | type batchingWriter struct { 96 | writer spgz.SparseFile 97 | maxSize int 98 | 99 | offset int64 100 | holeSize int64 101 | buf []byte 102 | } 103 | 104 | func (w *batchingWriter) Flush() error { 105 | if w.holeSize > 0 { 106 | err := w.writer.PunchHole(w.offset, w.holeSize) 107 | if err == nil { 108 | w.offset += w.holeSize 109 | w.holeSize = 0 110 | } 111 | return err 112 | } 113 | if len(w.buf) == 0 { 114 | return nil 115 | } 116 | n, err := w.writer.WriteAt(w.buf, w.offset) 117 | if err != nil { 118 | return err 119 | } 120 | w.buf = w.buf[:0] 121 | w.offset += int64(n) 122 | return nil 123 | } 124 | 125 | func (w *batchingWriter) prepareWrite() error { 126 | if w.holeSize > 0 { 127 | err := w.Flush() 128 | if err != nil { 129 | return err 130 | } 131 | } 132 | if cap(w.buf) < w.maxSize { 133 | buf := make([]byte, w.maxSize) 134 | copy(buf, w.buf) 135 | w.buf = buf[:len(w.buf)] 136 | } 137 | return nil 138 | } 139 | 140 | func (w *batchingWriter) Write(p []byte) (int, error) { 141 | if err := w.prepareWrite(); err != nil { 142 | return 0, err 143 | } 144 | written := 0 145 | for len(p) > 0 { 146 | if len(p) >= w.maxSize && len(w.buf) == 0 { 147 | residue := len(p) % w.maxSize 148 | n, err := w.writer.WriteAt(p[:len(p)-residue], w.offset) 149 | written += n 150 | w.offset += int64(n) 151 | if err != nil { 152 | return written, err 153 | } 154 | p = p[n:] 155 | } else { 156 | n := copy(w.buf[len(w.buf):w.maxSize], p) 157 | w.buf = w.buf[:len(w.buf)+n] 158 | if len(w.buf) == w.maxSize { 159 | n1, err := w.writer.WriteAt(w.buf, w.offset) 160 | w.offset += int64(n1) 161 | n2 := n1 - (len(w.buf) - n) 162 | w.buf = w.buf[:0] 163 | if n2 < 0 { 164 | n2 = 0 165 | } 166 | written += n2 167 | if err != nil { 168 | return written, err 169 | } 170 | } else { 171 | written += n 172 | } 173 | p = p[n:] 174 | } 175 | } 176 | 177 | return written, nil 178 | } 179 | 180 | func (w *batchingWriter) ReadFrom(src io.Reader) (int64, error) { 181 | if err := w.prepareWrite(); err != nil { 182 | return 0, err 183 | } 184 | 185 | var read int64 186 | for { 187 | n, err := src.Read(w.buf[len(w.buf):w.maxSize]) 188 | read += int64(n) 189 | w.buf = w.buf[:len(w.buf)+n] 190 | if err == io.EOF { 191 | return read, nil 192 | } 193 | if err != nil { 194 | return read, err 195 | } 196 | if len(w.buf) == w.maxSize { 197 | err = w.Flush() 198 | if err != nil { 199 | return read, err 200 | } 201 | } 202 | } 203 | } 204 | 205 | func (w *batchingWriter) WriteHole(size int64) error { 206 | if w.holeSize == 0 { 207 | err := w.Flush() 208 | if err != nil { 209 | return err 210 | } 211 | } 212 | w.holeSize += size 213 | return nil 214 | } 215 | 216 | func (w *batchingWriter) Seek(offset int64, whence int) (int64, error) { 217 | var o int64 218 | if w.holeSize > 0 { 219 | o = w.offset + w.holeSize 220 | } else { 221 | o = w.offset + int64(len(w.buf)) 222 | } 223 | switch whence { 224 | case io.SeekStart: 225 | // no-op 226 | case io.SeekCurrent: 227 | offset = o + offset 228 | case io.SeekEnd: 229 | var err error 230 | offset, err = w.writer.Seek(offset, whence) 231 | if err != nil { 232 | return offset, err 233 | } 234 | } 235 | if offset != o { 236 | err := w.Flush() 237 | w.offset = offset 238 | if err != nil { 239 | return offset, err 240 | } 241 | } 242 | return offset, nil 243 | } 244 | 245 | type counting struct { 246 | count int64 247 | } 248 | 249 | type CountingReader struct { 250 | io.Reader 251 | counting 252 | } 253 | 254 | type CountingWriteCloser struct { 255 | io.WriteCloser 256 | counting 257 | } 258 | 259 | func (p *hashPool) get() (h hash.Hash) { 260 | l := len(*p) 261 | if l > 0 { 262 | l-- 263 | h = (*p)[l] 264 | (*p)[l] = nil 265 | *p = (*p)[:l] 266 | h.Reset() 267 | } else { 268 | h, _ = blake2b.New512(nil) 269 | } 270 | return 271 | } 272 | 273 | func (p *hashPool) put(h hash.Hash) { 274 | *p = append(*p, h) 275 | } 276 | 277 | func (c *counting) Count() int64 { 278 | return c.count 279 | } 280 | 281 | func (r *CountingReader) Read(buf []byte) (n int, err error) { 282 | n, err = r.Reader.Read(buf) 283 | r.count += int64(n) 284 | return 285 | } 286 | 287 | func (r *CountingWriteCloser) Write(buf []byte) (n int, err error) { 288 | n, err = r.WriteCloser.Write(buf) 289 | r.count += int64(n) 290 | return 291 | } 292 | 293 | func (n *node) next() *node { 294 | if n.parent != nil { 295 | if n.idx < len(n.parent.children)-1 { 296 | return n.parent.children[n.idx+1] 297 | } 298 | nn := n.parent.next() 299 | if nn != nil { 300 | return nn.children[0] 301 | } 302 | } 303 | return nil 304 | } 305 | 306 | func (n *node) childReady(child *node, pool *hashPool, h hash.Hash) { 307 | if n.hash == nil { 308 | if h != nil { 309 | h.Reset() 310 | n.hash = h 311 | } else { 312 | n.hash = pool.get() 313 | } 314 | } else { 315 | if h != nil { 316 | pool.put(h) 317 | } 318 | } 319 | n.hash.Write(child.sum) 320 | if child.idx == len(n.children)-1 { 321 | n.sum = n.hash.Sum(n.buf[:0]) 322 | if n.parent != nil { 323 | n.parent.childReady(n, pool, n.hash) 324 | } 325 | n.hash = nil 326 | } 327 | } 328 | 329 | func (b *base) buffer(size int64) []byte { 330 | if int64(cap(b.buf)) < size { 331 | b.buf = make([]byte, size+1) 332 | } 333 | return b.buf[:size] 334 | } 335 | 336 | func (t *tree) build(offset, length int64, order, level int) *node { 337 | n := &node{} 338 | level-- 339 | if level > 0 { 340 | n.children = make([]*node, order) 341 | b := offset 342 | for i := 0; i < order; i++ { 343 | l := offset + (length * int64(i+1) / int64(order)) - b 344 | child := t.build(b, l, order, level) 345 | child.parent = n 346 | child.idx = i 347 | n.children[i] = child 348 | b += l 349 | } 350 | } else { 351 | n.size = int(length) 352 | } 353 | return n 354 | } 355 | 356 | func (t *tree) first(n *node) *node { 357 | if len(n.children) > 0 { 358 | return t.first(n.children[0]) 359 | } 360 | return n 361 | } 362 | 363 | func (t *tree) calc(verbose bool, progressListener ProgressListener) error { 364 | 365 | var targetBlockSize int64 = DefTargetBlockSize 366 | 367 | for t.size/targetBlockSize > 1048576 { 368 | targetBlockSize <<= 1 369 | } 370 | 371 | blocks := t.size / targetBlockSize 372 | 373 | levels := 8 374 | order := 1 375 | 376 | if blocks > 0 { 377 | var d int64 = -1 378 | for { 379 | b := int64(math.Pow(float64(order+1), 7)) 380 | bs := t.size / b 381 | if bs < targetBlockSize/2 { 382 | break 383 | } 384 | nd := targetBlockSize - bs 385 | if nd < 0 { 386 | nd = -nd 387 | } 388 | // log.Printf("b: %d, d: %d\n", b, nd) 389 | if d != -1 && nd > d { 390 | break 391 | } 392 | d = nd 393 | order++ 394 | } 395 | if order < 2 { 396 | order = 2 397 | levels = int(math.Log2(float64(blocks))) + 1 398 | } 399 | } else { 400 | levels = 1 401 | order = 1 402 | } 403 | 404 | bs := int(float64(t.size) / math.Pow(float64(order), float64(levels-1))) 405 | 406 | if verbose { 407 | log.Printf("Levels: %d, order: %d, target block size: %d, block size: %d\n", levels, order, targetBlockSize, bs) 408 | } 409 | 410 | t.root = t.build(0, t.size, order, levels) 411 | 412 | rr := int64(0) 413 | 414 | var reader io.Reader 415 | 416 | if t.useBuffer { 417 | var bufSize int 418 | for bufSize = DefTargetBlockSize; bufSize < bs; bufSize <<= 1 { 419 | } 420 | 421 | reader = bufio.NewReaderSize(t.reader, bufSize) 422 | } else { 423 | reader = t.reader 424 | } 425 | 426 | var pool hashPool = make([]hash.Hash, 0, levels) 427 | 428 | workItems := make([]*workCtx, 2) 429 | for i := range workItems { 430 | workItems[i] = &workCtx{ 431 | buf: make([]byte, bs+1), 432 | avail: make(chan struct{}, 1), 433 | hashReady: make(chan struct{}, 1), 434 | } 435 | workItems[i].hash, _ = blake2b.New512(nil) 436 | workItems[i].avail <- struct{}{} 437 | } 438 | 439 | go func() { 440 | idx := 0 441 | for { 442 | wi := workItems[idx] 443 | <-wi.hashReady 444 | if wi.n == nil { 445 | break 446 | } 447 | if wi.n.parent != nil { 448 | wi.n.parent.childReady(wi.n, &pool, nil) 449 | } 450 | wi.avail <- struct{}{} 451 | idx++ 452 | if idx >= len(workItems) { 453 | idx = 0 454 | } 455 | } 456 | }() 457 | 458 | workIdx := 0 459 | 460 | if progressListener != nil { 461 | progressListener.Start(t.size) 462 | } 463 | 464 | for n := t.first(t.root); n != nil; n = n.next() { 465 | if n.size == 0 { 466 | panic("Leaf node size is zero") 467 | } 468 | 469 | wi := workItems[workIdx] 470 | 471 | <-wi.avail 472 | 473 | b := wi.buf[:n.size] 474 | r, err := io.ReadFull(reader, b) 475 | if err != nil { 476 | return fmt.Errorf("in calc at %d (expected %d, read %d): %w", rr, len(b), r, err) 477 | } 478 | rr += int64(r) 479 | if progressListener != nil { 480 | progressListener.Update(rr) 481 | } 482 | 483 | wi.n = n 484 | 485 | go func() { 486 | wi.hash.Write(b) 487 | wi.n.sum = wi.hash.Sum(wi.n.buf[:0]) 488 | wi.hash.Reset() 489 | wi.hashReady <- struct{}{} 490 | }() 491 | 492 | workIdx++ 493 | if workIdx >= len(workItems) { 494 | workIdx = 0 495 | } 496 | 497 | } 498 | 499 | // wait until fully processed 500 | for i := range workItems { 501 | <-workItems[i].avail 502 | } 503 | 504 | // finish the goroutine 505 | workItems[workIdx].n = nil 506 | workItems[workIdx].hashReady <- struct{}{} 507 | 508 | if rr < t.size { 509 | return fmt.Errorf("read less data (%d) than expected (%d)", rr, t.size) 510 | } 511 | 512 | return nil 513 | } 514 | 515 | func readHeader(reader io.Reader) (size int64, err error) { 516 | buf := make([]byte, len(hdrMagic)+8) 517 | _, err = io.ReadFull(reader, buf) 518 | if err != nil { 519 | return 520 | } 521 | 522 | if string(buf[:len(hdrMagic)]) != hdrMagic { 523 | err = ErrInvalidFormat 524 | return 525 | } 526 | 527 | size = int64(binary.LittleEndian.Uint64(buf[len(hdrMagic):])) 528 | return 529 | } 530 | 531 | func writeHeader(writer io.Writer, size int64) (err error) { 532 | buf := make([]byte, len(hdrMagic)+8) 533 | copy(buf, hdrMagic) 534 | binary.LittleEndian.PutUint64(buf[len(hdrMagic):], uint64(size)) 535 | _, err = writer.Write(buf) 536 | return 537 | } 538 | 539 | func Source(reader io.ReadSeeker, size int64, cmdReader io.Reader, cmdWriter io.Writer, useBuffer bool, verbose bool, calcPl, syncPl ProgressListener) (err error) { 540 | err = writeHeader(cmdWriter, size) 541 | if err != nil { 542 | return 543 | } 544 | var remoteSize int64 545 | remoteSize, err = readHeader(cmdReader) 546 | if err != nil { 547 | return fmt.Errorf("could not read header: %w", err) 548 | } 549 | 550 | var commonSize int64 551 | 552 | if remoteSize < size { 553 | commonSize = remoteSize 554 | } else { 555 | commonSize = size 556 | } 557 | 558 | if commonSize > 0 { 559 | s := source{ 560 | base: base{ 561 | t: tree{ 562 | reader: reader, 563 | size: commonSize, 564 | useBuffer: useBuffer, 565 | }, 566 | cmdReader: cmdReader, 567 | cmdWriter: cmdWriter, 568 | }, 569 | reader: reader, 570 | } 571 | 572 | err = s.t.calc(verbose, calcPl) 573 | if err != nil { 574 | return 575 | } 576 | 577 | if syncPl != nil { 578 | s.syncProgressListener = syncPl 579 | syncPl.Start(size) 580 | } 581 | 582 | err = s.subtree(s.t.root, 0, commonSize) 583 | if err != nil { 584 | return 585 | } 586 | } else { 587 | if syncPl != nil { 588 | syncPl.Start(size) 589 | } 590 | } 591 | 592 | if size > commonSize { 593 | // Write the tail 594 | _, err = reader.Seek(commonSize, io.SeekStart) 595 | if err != nil { 596 | return 597 | } 598 | 599 | holeStart := int64(-1) 600 | curPos := commonSize 601 | buf := make([]byte, DefTargetBlockSize) 602 | bw := bufio.NewWriterSize(cmdWriter, DefTargetBlockSize*2) 603 | 604 | for { 605 | var r int 606 | var stop bool 607 | r, err = io.ReadFull(reader, buf) 608 | if err != nil { 609 | if err == io.EOF { 610 | break 611 | } 612 | if err != io.ErrUnexpectedEOF { 613 | return fmt.Errorf("source, reading tail: %w", err) 614 | } 615 | buf = buf[:r] 616 | stop = true 617 | err = nil 618 | } 619 | if spgz.IsBlockZero(buf) { 620 | if holeStart == -1 { 621 | holeStart = curPos 622 | } 623 | } else { 624 | if holeStart != -1 { 625 | err = bw.WriteByte(cmdHole) 626 | if err != nil { 627 | return 628 | } 629 | 630 | err = binary.Write(bw, binary.LittleEndian, curPos-holeStart) 631 | if err != nil { 632 | return 633 | } 634 | 635 | holeStart = -1 636 | } 637 | err = bw.WriteByte(cmdBlock) 638 | if err != nil { 639 | return 640 | } 641 | _, err = bw.Write(buf) 642 | if err != nil { 643 | return 644 | } 645 | 646 | } 647 | if err != nil { 648 | return 649 | } 650 | curPos += int64(r) 651 | if syncPl != nil { 652 | syncPl.Update(curPos) 653 | } 654 | if stop { 655 | break 656 | } 657 | } 658 | if holeStart != -1 { 659 | err = bw.WriteByte(cmdHole) 660 | if err != nil { 661 | return 662 | } 663 | 664 | err = binary.Write(bw, binary.LittleEndian, curPos-holeStart) 665 | if err != nil { 666 | return 667 | } 668 | } 669 | err = bw.Flush() 670 | } 671 | 672 | return 673 | } 674 | 675 | func (s *source) subtree(root *node, offset, size int64) (err error) { 676 | remoteHash := make([]byte, hashSize) 677 | 678 | _, err = io.ReadFull(s.cmdReader, remoteHash) 679 | if err != nil { 680 | return fmt.Errorf("source/subtree, reading hash: %w", err) 681 | } 682 | 683 | if bytes.Equal(root.sum, remoteHash) { 684 | err = binary.Write(s.cmdWriter, binary.LittleEndian, cmdEqual) 685 | if s.syncProgressListener != nil { 686 | s.syncProgressListener.Update(offset + size) 687 | } 688 | return 689 | } 690 | 691 | if root.size > 0 { 692 | // log.Printf("Blocks at %d don't match\n", offset) 693 | 694 | if int64(root.size) != size { 695 | panic("Leaf node size mismatch") 696 | } 697 | 698 | _, err = s.reader.Seek(offset, io.SeekStart) 699 | if err != nil { 700 | return 701 | } 702 | 703 | buf := s.buffer(size) 704 | _, err = io.ReadFull(s.reader, buf) 705 | if err != nil { 706 | return fmt.Errorf("source read failed at %d: %w", offset, err) 707 | } 708 | 709 | if spgz.IsBlockZero(buf) { 710 | err = binary.Write(s.cmdWriter, binary.LittleEndian, cmdHole) 711 | } else { 712 | err = binary.Write(s.cmdWriter, binary.LittleEndian, cmdNotEqual) 713 | if err != nil { 714 | return 715 | } 716 | 717 | _, err = s.cmdWriter.Write(buf) 718 | } 719 | if s.syncProgressListener != nil { 720 | s.syncProgressListener.Update(offset + size) 721 | } 722 | } else { 723 | err = binary.Write(s.cmdWriter, binary.LittleEndian, cmdNotEqual) 724 | if err != nil { 725 | return 726 | } 727 | 728 | b := offset 729 | order := byte(len(root.children)) 730 | for i := byte(0); i < order; i++ { 731 | l := offset + (size * int64(i+1) / int64(order)) - b 732 | err = s.subtree(root.children[i], b, l) 733 | if err != nil { 734 | return 735 | } 736 | b += l 737 | } 738 | } 739 | 740 | return 741 | } 742 | 743 | type TargetFile interface { 744 | io.ReadWriteSeeker 745 | io.WriterAt 746 | io.Closer 747 | spgz.Truncatable 748 | } 749 | 750 | // FixingSpgzFileWrapper conceals read errors caused by compressed data corruption by re-writing the corrupt 751 | // blocks with zeros. Such errors are usually caused by abrupt termination of the writing process. 752 | // This wrapper is used as the sync target so the corrupt blocks will be updated during the sync process. 753 | type FixingSpgzFileWrapper struct { 754 | *spgz.SpgzFile 755 | } 756 | 757 | func (rw *FixingSpgzFileWrapper) checkErr(err error) error { 758 | var ce *spgz.ErrCorruptCompressedBlock 759 | if errors.As(err, &ce) { 760 | if ce.Size() == 0 { 761 | return rw.SpgzFile.Truncate(ce.Offset()) 762 | } 763 | 764 | buf := make([]byte, ce.Size()) 765 | _, err = rw.SpgzFile.WriteAt(buf, ce.Offset()) 766 | } 767 | return err 768 | } 769 | 770 | func (rw *FixingSpgzFileWrapper) Read(p []byte) (n int, err error) { 771 | for n == 0 && err == nil { // avoid returning (0, nil) after a fix 772 | n, err = rw.SpgzFile.Read(p) 773 | if err != nil { 774 | err = rw.checkErr(err) 775 | } 776 | } 777 | return 778 | } 779 | 780 | func (rw *FixingSpgzFileWrapper) Seek(offset int64, whence int) (int64, error) { 781 | o, err := rw.SpgzFile.Seek(offset, whence) 782 | if err != nil { 783 | err = rw.checkErr(err) 784 | if err == nil { 785 | o, err = rw.SpgzFile.Seek(offset, whence) 786 | } 787 | } 788 | return o, err 789 | } 790 | 791 | func Target(writer spgz.SparseFile, size int64, cmdReader io.Reader, cmdWriter io.Writer, useReadBuffer bool, verbose bool, calcPl, syncPl ProgressListener) (err error) { 792 | 793 | ch := make(chan error) 794 | go func() { 795 | ch <- writeHeader(cmdWriter, size) 796 | }() 797 | 798 | var remoteSize int64 799 | remoteSize, err = readHeader(cmdReader) 800 | if err != nil { 801 | return 802 | } 803 | 804 | err = <-ch 805 | if err != nil { 806 | return 807 | } 808 | 809 | commonSize := size 810 | if remoteSize < commonSize { 811 | commonSize = remoteSize 812 | } 813 | 814 | if commonSize > 0 { 815 | t := target{ 816 | base: base{ 817 | t: tree{ 818 | reader: writer, 819 | size: commonSize, 820 | useBuffer: useReadBuffer, 821 | }, 822 | cmdReader: cmdReader, 823 | cmdWriter: cmdWriter, 824 | }, 825 | writer: &batchingWriter{writer: writer, maxSize: DefTargetBlockSize * 16}, 826 | } 827 | err = t.t.calc(verbose, calcPl) 828 | if err != nil { 829 | return 830 | } 831 | 832 | if syncPl != nil { 833 | t.syncProgressListener = syncPl 834 | syncPl.Start(remoteSize) 835 | } 836 | 837 | err = t.subtree(t.t.root, 0, commonSize) 838 | if err != nil { 839 | return 840 | } 841 | err = t.writer.Flush() 842 | if err != nil { 843 | return 844 | } 845 | if syncPl != nil { 846 | syncPl.Update(commonSize) 847 | } 848 | } else { 849 | if syncPl != nil { 850 | syncPl.Start(remoteSize) 851 | } 852 | } 853 | 854 | if size < remoteSize { 855 | // Read the tail 856 | pos := commonSize 857 | _, err = writer.Seek(pos, io.SeekStart) 858 | if err != nil { 859 | return 860 | } 861 | 862 | hole := false 863 | rd := bufio.NewReaderSize(cmdReader, DefTargetBlockSize*2) 864 | 865 | for { 866 | var cmd byte 867 | cmd, err = rd.ReadByte() 868 | if err != nil { 869 | if err == io.EOF { 870 | err = nil 871 | break 872 | } 873 | return fmt.Errorf("target: while reading tail block header: %w", err) 874 | } 875 | 876 | if cmd == cmdBlock { 877 | var n int64 878 | n, err = io.CopyN(writer, rd, DefTargetBlockSize) 879 | pos += n 880 | 881 | hole = false 882 | if err != nil { 883 | if err == io.EOF { 884 | err = nil 885 | if syncPl != nil { 886 | syncPl.Update(pos) 887 | } 888 | break 889 | } else { 890 | return fmt.Errorf("target: while copying block: %w", err) 891 | } 892 | } 893 | } else { 894 | if cmd == cmdHole { 895 | var holeSize int64 896 | err = binary.Read(rd, binary.LittleEndian, &holeSize) 897 | if err != nil { 898 | return fmt.Errorf("target: while reading hole size: %w", err) 899 | } 900 | _, err = writer.Seek(holeSize, io.SeekCurrent) 901 | if err != nil { 902 | return 903 | } 904 | hole = true 905 | pos += holeSize 906 | } else { 907 | return fmt.Errorf("unexpected cmd: %d", cmd) 908 | } 909 | } 910 | if syncPl != nil { 911 | syncPl.Update(pos) 912 | } 913 | } 914 | 915 | if hole { 916 | if f, ok := writer.(spgz.Truncatable); ok { 917 | err = f.Truncate(remoteSize) 918 | } 919 | } 920 | 921 | } else if size > remoteSize { 922 | // Truncate target 923 | if f, ok := writer.(spgz.Truncatable); ok { 924 | err = f.Truncate(commonSize) 925 | } 926 | } 927 | 928 | return 929 | } 930 | 931 | func (t *target) subtree(root *node, offset, size int64) (err error) { 932 | _, err = t.cmdWriter.Write(root.sum) 933 | if err != nil { 934 | return 935 | } 936 | 937 | var cmd byte 938 | err = binary.Read(t.cmdReader, binary.LittleEndian, &cmd) 939 | if err != nil { 940 | return fmt.Errorf("target: while reading block header at %d: %w", offset, err) 941 | } 942 | 943 | // log.Printf("offset: %d, size: %d, cmd: %d\n", offset, size, cmd) 944 | 945 | if cmd == cmdNotEqual || cmd == cmdHole { 946 | if root.size > 0 { 947 | _, err = t.writer.Seek(offset, io.SeekStart) 948 | if err != nil { 949 | return 950 | } 951 | 952 | if cmd == cmdNotEqual { 953 | _, err = io.CopyN(t.writer, t.cmdReader, size) 954 | if err != nil { 955 | err = fmt.Errorf("while copying block data at %d: %w", offset, err) 956 | } 957 | } else { 958 | err = t.writer.WriteHole(size) 959 | } 960 | } else { 961 | b := offset 962 | order := byte(len(root.children)) 963 | for i := byte(0); i < order; i++ { 964 | l := offset + (size * int64(i+1) / int64(order)) - b 965 | err = t.subtree(root.children[i], b, l) 966 | if err != nil { 967 | return 968 | } 969 | b += l 970 | } 971 | } 972 | } 973 | 974 | return 975 | } 976 | -------------------------------------------------------------------------------- /sync_test.go: -------------------------------------------------------------------------------- 1 | package diskrsync 2 | 3 | import ( 4 | "bytes" 5 | "crypto/sha1" 6 | "errors" 7 | "hash" 8 | "io" 9 | "math/rand" 10 | "os" 11 | "reflect" 12 | "testing" 13 | "time" 14 | 15 | "github.com/dop251/spgz" 16 | 17 | "golang.org/x/crypto/blake2b" 18 | ) 19 | 20 | type memSparseFile struct { 21 | data []byte 22 | offset int64 23 | } 24 | 25 | func (s *memSparseFile) Read(buf []byte) (n int, err error) { 26 | if s.offset >= int64(len(s.data)) { 27 | err = io.EOF 28 | return 29 | } 30 | n = copy(buf, s.data[s.offset:]) 31 | s.offset += int64(n) 32 | return 33 | } 34 | 35 | func (s *memSparseFile) ensureSize(newSize int64) { 36 | if newSize > int64(len(s.data)) { 37 | if newSize <= int64(cap(s.data)) { 38 | l := int64(len(s.data)) 39 | s.data = s.data[:newSize] 40 | for i := l; i < s.offset; i++ { 41 | s.data[i] = 0 42 | } 43 | } else { 44 | d := make([]byte, newSize) 45 | copy(d, s.data) 46 | s.data = d 47 | } 48 | } 49 | } 50 | 51 | func (s *memSparseFile) Write(buf []byte) (n int, err error) { 52 | s.ensureSize(s.offset + int64(len(buf))) 53 | n = copy(s.data[s.offset:], buf) 54 | if n < len(buf) { 55 | err = io.ErrShortWrite 56 | } 57 | s.offset += int64(n) 58 | return 59 | } 60 | 61 | func (s *memSparseFile) Seek(offset int64, whence int) (int64, error) { 62 | switch whence { 63 | case io.SeekStart: 64 | s.offset = offset 65 | return s.offset, nil 66 | case io.SeekCurrent: 67 | s.offset += offset 68 | return s.offset, nil 69 | case io.SeekEnd: 70 | s.offset = int64(len(s.data)) + offset 71 | return s.offset, nil 72 | } 73 | return s.offset, errors.New("invalid whence") 74 | } 75 | 76 | func (s *memSparseFile) Truncate(size int64) error { 77 | if size > int64(len(s.data)) { 78 | if size <= int64(cap(s.data)) { 79 | l := len(s.data) 80 | s.data = s.data[:size] 81 | for i := l; i < len(s.data); i++ { 82 | s.data[i] = 0 83 | } 84 | } else { 85 | d := make([]byte, size) 86 | copy(d, s.data) 87 | s.data = d 88 | } 89 | } else if size < int64(len(s.data)) { 90 | s.data = s.data[:size] 91 | } 92 | return nil 93 | } 94 | 95 | func (s *memSparseFile) PunchHole(offset, size int64) error { 96 | if offset < int64(len(s.data)) { 97 | d := offset + size - int64(len(s.data)) 98 | if d > 0 { 99 | size -= d 100 | } 101 | for i := offset; i < offset+size; i++ { 102 | s.data[i] = 0 103 | } 104 | } 105 | return nil 106 | } 107 | 108 | func (s *memSparseFile) ReadAt(p []byte, off int64) (n int, err error) { 109 | if off < int64(len(s.data)) { 110 | n = copy(p, s.data[off:]) 111 | } 112 | if n < len(p) { 113 | err = io.EOF 114 | } 115 | return 116 | } 117 | 118 | func (s *memSparseFile) WriteAt(p []byte, off int64) (n int, err error) { 119 | s.ensureSize(off + int64(len(p))) 120 | n = copy(s.data[off:], p) 121 | return 122 | } 123 | 124 | func (s *memSparseFile) Close() error { 125 | return nil 126 | } 127 | 128 | func (s *memSparseFile) Sync() error { 129 | return nil 130 | } 131 | 132 | func (s *memSparseFile) Bytes() []byte { 133 | return s.data 134 | } 135 | 136 | func TestZero(t *testing.T) { 137 | src := make([]byte, 1*1024*1024) 138 | dst := make([]byte, 1*1024*1024) 139 | for i := 0; i < len(src); i++ { 140 | src[i] = byte(i) 141 | } 142 | 143 | copy(dst, src) 144 | 145 | for i := 33; i < 133000; i++ { 146 | src[i] = 0 147 | } 148 | 149 | syncAndCheckEqual(src, dst, t) 150 | } 151 | 152 | func TestRandomChange(t *testing.T) { 153 | src := make([]byte, 2*1024*1024) 154 | dst := make([]byte, 2*1024*1024) 155 | 156 | for i := 0; i < len(src); i++ { 157 | src[i] = byte(rand.Int31n(256)) 158 | } 159 | 160 | copy(dst, src) 161 | 162 | for i := 333; i < 133000; i++ { 163 | src[i] = byte(rand.Int31n(256)) 164 | } 165 | 166 | syncAndCheckEqual(src, dst, t) 167 | } 168 | 169 | func TestExpand(t *testing.T) { 170 | dst := make([]byte, 2*1024*1024) 171 | 172 | for i := 0; i < len(dst); i++ { 173 | dst[i] = byte(rand.Int31n(256)) 174 | } 175 | 176 | src := make([]byte, 2*1024*1024+333333) 177 | 178 | copy(src, dst) 179 | 180 | for i := len(dst); i < len(src); i++ { 181 | src[i] = byte(rand.Int31n(256)) 182 | } 183 | 184 | syncAndCheckEqual(src, dst, t) 185 | } 186 | 187 | func TestExpandWithZeros(t *testing.T) { 188 | dst := make([]byte, 2*1024*1024) 189 | 190 | for i := 0; i < len(dst); i++ { 191 | dst[i] = byte(rand.Int31n(256)) 192 | } 193 | 194 | src := make([]byte, 2*1024*1024+333333) 195 | 196 | copy(src, dst) 197 | 198 | syncAndCheckEqual(src, dst, t) 199 | } 200 | 201 | func TestShrink(t *testing.T) { 202 | dst := make([]byte, 2*1024*1024+333333) 203 | 204 | for i := 0; i < len(dst); i++ { 205 | dst[i] = byte(rand.Int31n(256)) 206 | } 207 | 208 | src := make([]byte, 2*1024*1024) 209 | 210 | copy(src, dst) 211 | 212 | syncAndCheckEqual(src, dst, t) 213 | } 214 | 215 | func TestNoChange(t *testing.T) { 216 | src := make([]byte, 2*1024*1024) 217 | dst := make([]byte, 2*1024*1024) 218 | 219 | for i := 0; i < len(src); i++ { 220 | src[i] = byte(rand.Int31n(256)) 221 | } 222 | 223 | copy(dst, src) 224 | 225 | sent, received := syncAndCheckEqual(src, dst, t) 226 | if sent != 17 { 227 | t.Fatalf("Sent %d bytes (expected 17)", sent) 228 | } 229 | if received != 80 { 230 | t.Fatalf("Received %d bytes (expected 80)", received) 231 | } 232 | } 233 | 234 | func TestSmallFile(t *testing.T) { 235 | src := make([]byte, 128) 236 | dst := make([]byte, 128) 237 | 238 | for i := range src { 239 | src[i] = 'x' 240 | } 241 | 242 | syncAndCheckEqual(src, dst, t) 243 | } 244 | 245 | func TestCorruptCompressedBlock(t *testing.T) { 246 | var f memSparseFile 247 | sf, err := spgz.NewFromSparseFileSize(&f, os.O_RDWR|os.O_CREATE, 3*4096) 248 | if err != nil { 249 | t.Fatal(err) 250 | } 251 | src := make([]byte, 2*1024*1024) 252 | 253 | for i := 0; i < len(src); i++ { 254 | src[i] = 'x' 255 | } 256 | 257 | _, err = sf.WriteAt(src, 0) 258 | if err != nil { 259 | t.Fatal(err) 260 | } 261 | 262 | err = sf.Close() 263 | if err != nil { 264 | t.Fatal(err) 265 | } 266 | 267 | if f.data[4096] != 1 { 268 | t.Fatalf("data: %d", f.data[4096]) 269 | } 270 | 271 | f.data[4098] = ^f.data[4098] 272 | 273 | _, _ = f.Seek(0, io.SeekStart) 274 | 275 | sf, err = spgz.NewFromSparseFileSize(&f, os.O_RDWR, 4096) 276 | if err != nil { 277 | t.Fatal(err) 278 | } 279 | 280 | syncAndCheckEqual1(&memSparseFile{data: src}, &FixingSpgzFileWrapper{SpgzFile: sf}, t) 281 | } 282 | 283 | func TestCorruptLastCompressedBlock(t *testing.T) { 284 | var f memSparseFile 285 | sf, err := spgz.NewFromSparseFileSize(&f, os.O_RDWR|os.O_CREATE, 3*4096) 286 | if err != nil { 287 | t.Fatal(err) 288 | } 289 | src := make([]byte, 2*1024*1024) 290 | 291 | for i := 0; i < len(src); i++ { 292 | src[i] = 'x' 293 | } 294 | 295 | _, err = sf.WriteAt(src, 0) 296 | if err != nil { 297 | t.Fatal(err) 298 | } 299 | 300 | err = sf.Close() 301 | if err != nil { 302 | t.Fatal(err) 303 | } 304 | 305 | offset := 4096 + len(src)/(3*4096-1)*(3*4096) 306 | 307 | if f.data[offset] != 1 { 308 | t.Fatalf("data: %d", f.data[offset]) 309 | } 310 | 311 | f.data[offset+2] = ^f.data[offset+2] 312 | 313 | _, _ = f.Seek(0, io.SeekStart) 314 | 315 | sf, err = spgz.NewFromSparseFileSize(&f, os.O_RDWR, 4096) 316 | if err != nil { 317 | t.Fatal(err) 318 | } 319 | 320 | syncAndCheckEqual1(&memSparseFile{data: src}, &FixingSpgzFileWrapper{SpgzFile: sf}, t) 321 | } 322 | 323 | func TestRandomFiles(t *testing.T) { 324 | var srcFile, dstFile memSparseFile 325 | sf, err := spgz.NewFromSparseFile(&dstFile, os.O_RDWR|os.O_CREATE) 326 | if err != nil { 327 | t.Fatal(err) 328 | } 329 | rand.Seed(1234567890) 330 | buf := make([]byte, 100*DefTargetBlockSize) 331 | rand.Read(buf) 332 | _, err = sf.WriteAt(buf, 0) 333 | if err != nil { 334 | t.Fatal(err) 335 | } 336 | 337 | o, err := sf.Seek(0, io.SeekCurrent) 338 | if err != nil { 339 | t.Fatal(err) 340 | } 341 | if o != 0 { 342 | t.Fatalf("o: %d", o) 343 | } 344 | 345 | rand.Read(buf) 346 | _, err = srcFile.WriteAt(buf, 0) 347 | if err != nil { 348 | t.Fatal(err) 349 | } 350 | 351 | syncAndCheckEqual1(&srcFile, sf, t) 352 | } 353 | 354 | type testOplogItem struct { 355 | offset int64 356 | length int 357 | } 358 | 359 | type testLoggingSparseFile struct { 360 | spgz.SparseFile 361 | wrlog []testOplogItem 362 | } 363 | 364 | func (f *testLoggingSparseFile) WriteAt(buf []byte, offset int64) (int, error) { 365 | f.wrlog = append(f.wrlog, testOplogItem{ 366 | offset: offset, 367 | length: len(buf), 368 | }) 369 | return f.SparseFile.WriteAt(buf, offset) 370 | } 371 | 372 | func TestBatchingWriter(t *testing.T) { 373 | var sf memSparseFile 374 | lsf := &testLoggingSparseFile{ 375 | SparseFile: &sf, 376 | } 377 | wr := &batchingWriter{ 378 | writer: lsf, 379 | maxSize: 100, 380 | } 381 | 382 | reset := func(t *testing.T) { 383 | _, err := wr.Seek(0, io.SeekStart) 384 | if err != nil { 385 | t.Fatal(err) 386 | } 387 | lsf.wrlog = lsf.wrlog[:0] 388 | sf.data = nil 389 | sf.offset = 0 390 | } 391 | 392 | t.Run("large_chunk", func(t *testing.T) { 393 | buf := make([]byte, 502) 394 | rand.Read(buf) 395 | reset(t) 396 | n, err := wr.Write(buf) 397 | if err != nil { 398 | t.Fatal(err) 399 | } 400 | if n != len(buf) { 401 | t.Fatal(n) 402 | } 403 | err = wr.Flush() 404 | if err != nil { 405 | t.Fatal(err) 406 | } 407 | if !bytes.Equal(sf.Bytes(), buf) { 408 | t.Fatal("not equal") 409 | } 410 | if !reflect.DeepEqual(lsf.wrlog, []testOplogItem{ 411 | {offset: 0, length: 500}, 412 | {offset: 500, length: 2}, 413 | }) { 414 | t.Fatalf("Oplog: %#v", lsf.wrlog) 415 | } 416 | }) 417 | 418 | t.Run("exact", func(t *testing.T) { 419 | buf := make([]byte, 100) 420 | rand.Read(buf) 421 | reset(t) 422 | n, err := wr.Write(buf) 423 | if err != nil { 424 | t.Fatal(err) 425 | } 426 | if n != len(buf) { 427 | t.Fatal(n) 428 | } 429 | err = wr.Flush() 430 | if err != nil { 431 | t.Fatal(err) 432 | } 433 | if !bytes.Equal(sf.Bytes(), buf) { 434 | t.Fatal("not equal") 435 | } 436 | if !reflect.DeepEqual(lsf.wrlog, []testOplogItem{ 437 | {offset: 0, length: 100}, 438 | }) { 439 | t.Fatalf("Oplog: %#v", lsf.wrlog) 440 | } 441 | }) 442 | 443 | t.Run("two_small", func(t *testing.T) { 444 | buf := make([]byte, 100) 445 | rand.Read(buf) 446 | reset(t) 447 | n, err := wr.Write(buf[:50]) 448 | if err != nil { 449 | t.Fatal(err) 450 | } 451 | if n != 50 { 452 | t.Fatal(n) 453 | } 454 | 455 | n, err = wr.Write(buf[50:]) 456 | if err != nil { 457 | t.Fatal(err) 458 | } 459 | if n != 50 { 460 | t.Fatal(n) 461 | } 462 | 463 | err = wr.Flush() 464 | if err != nil { 465 | t.Fatal(err) 466 | } 467 | if !bytes.Equal(sf.Bytes(), buf) { 468 | t.Fatal("not equal") 469 | } 470 | if !reflect.DeepEqual(lsf.wrlog, []testOplogItem{ 471 | {offset: 0, length: 100}, 472 | }) { 473 | t.Fatalf("Oplog: %#v", lsf.wrlog) 474 | } 475 | }) 476 | 477 | t.Run("seek", func(t *testing.T) { 478 | buf := make([]byte, 100) 479 | rand.Read(buf) 480 | reset(t) 481 | n, err := wr.Write(buf[:50]) 482 | if err != nil { 483 | t.Fatal(err) 484 | } 485 | if n != 50 { 486 | t.Fatal(n) 487 | } 488 | 489 | o, err := wr.Seek(0, io.SeekCurrent) 490 | if err != nil { 491 | t.Fatal(err) 492 | } 493 | if o != 50 { 494 | t.Fatal(o) 495 | } 496 | 497 | o, err = wr.Seek(55, io.SeekStart) 498 | if err != nil { 499 | t.Fatal(err) 500 | } 501 | if o != 55 { 502 | t.Fatal(o) 503 | } 504 | 505 | n, err = wr.Write(buf[50:]) 506 | if err != nil { 507 | t.Fatal(err) 508 | } 509 | if n != 50 { 510 | t.Fatal(n) 511 | } 512 | err = wr.Flush() 513 | if err != nil { 514 | t.Fatal(err) 515 | } 516 | 517 | exp := make([]byte, 105) 518 | copy(exp, buf[:50]) 519 | copy(exp[55:], buf[50:]) 520 | 521 | if !bytes.Equal(sf.Bytes(), exp) { 522 | t.Fatal("not equal") 523 | } 524 | if !reflect.DeepEqual(lsf.wrlog, []testOplogItem{ 525 | {offset: 0, length: 50}, 526 | {offset: 55, length: 50}, 527 | }) { 528 | t.Fatalf("Oplog: %#v", lsf.wrlog) 529 | } 530 | }) 531 | } 532 | 533 | func TestFuzz(t *testing.T) { 534 | const ( 535 | fileSize = 30 * 1024 * 1024 536 | 537 | numBlocks = 50 538 | numBlocksDelta = 128 539 | 540 | blockSize = 64 * 1024 541 | blockSizeDelta = 32 * 1024 542 | ) 543 | 544 | if testing.Short() { 545 | t.Skip() 546 | } 547 | seed := time.Now().UnixNano() 548 | t.Logf("Seed: %d", seed) 549 | rnd := rand.New(rand.NewSource(seed)) 550 | roll := func(mean, delta int) int { 551 | return mean + int(rnd.Int31n(int32(delta))) - delta/2 552 | } 553 | 554 | srcBuf := make([]byte, fileSize) 555 | dstBuf := make([]byte, fileSize) 556 | 557 | blockBuf := make([]byte, 0, blockSize+blockSizeDelta/2) 558 | zeroBlockBuf := make([]byte, 0, blockSize+blockSizeDelta/2) 559 | 560 | mutateBlock := func(buf []byte) { 561 | size := roll(blockSize, blockSizeDelta) 562 | offset := int(rnd.Int31n(int32(len(buf)))) 563 | blk := blockBuf[:size] 564 | typ := rnd.Int31n(16) 565 | if typ >= 5 { 566 | rnd.Read(blk) 567 | } else if typ >= 3 { 568 | for i := range blk { 569 | blk[i] = 'x' 570 | } 571 | } else { 572 | blk = zeroBlockBuf[:size] 573 | } 574 | copy(buf[offset:], blk) 575 | } 576 | 577 | for i := 0; i < 50; i++ { 578 | t.Logf("Running file %d", i) 579 | dice := rnd.Int31n(16) 580 | var srcSize, dstSize int 581 | srcSize = int(rnd.Int31n(fileSize)) 582 | if dice > 4 { 583 | dstSize = int(rnd.Int31n(fileSize)) 584 | } else { 585 | dstSize = srcSize 586 | } 587 | srcBuf = srcBuf[:srcSize] 588 | dstBuf = dstBuf[:dstSize] 589 | rnd.Read(srcBuf) 590 | nBlocks := roll(numBlocks, numBlocksDelta) 591 | for i := 0; i < nBlocks; i++ { 592 | mutateBlock(srcBuf) 593 | } 594 | 595 | copy(dstBuf, srcBuf) 596 | 597 | nBlocks = roll(numBlocks, numBlocksDelta) 598 | for i := 0; i < nBlocks; i++ { 599 | mutateBlock(dstBuf) 600 | } 601 | 602 | var mf memSparseFile 603 | sf, err := spgz.NewFromSparseFile(&mf, os.O_RDWR|os.O_CREATE) 604 | if err != nil { 605 | t.Fatal(err) 606 | } 607 | _, err = sf.WriteAt(dstBuf, 0) 608 | if err != nil { 609 | t.Fatal(err) 610 | } 611 | sent, received := syncAndCheckEqual(srcBuf, dstBuf, t) 612 | t.Logf("src size: %d, sent: %d, received: %d", len(srcBuf), sent, received) 613 | sent1, received1 := syncAndCheckEqual1(&memSparseFile{data: srcBuf}, sf, t) 614 | if sent != sent1 { 615 | t.Fatalf("Sent counts did not match: %d, %d", sent, sent1) 616 | } 617 | if received != received1 { 618 | t.Fatalf("Received counts did not match: %d, %d", received, received1) 619 | } 620 | } 621 | } 622 | 623 | func syncAndCheckEqual(src, dst []byte, t *testing.T) (sent, received int64) { 624 | return syncAndCheckEqual1(&memSparseFile{data: src}, &memSparseFile{data: dst}, t) 625 | } 626 | 627 | func getSize(s io.Seeker) (int64, error) { 628 | o, err := s.Seek(0, io.SeekCurrent) 629 | if err != nil { 630 | return 0, err 631 | } 632 | size, err := s.Seek(0, io.SeekEnd) 633 | if err != nil { 634 | return 0, err 635 | } 636 | _, err = s.Seek(o, io.SeekStart) 637 | return size, err 638 | } 639 | 640 | func getBytes(r io.ReadSeeker) ([]byte, error) { 641 | if b, ok := r.(interface { 642 | Bytes() []byte 643 | }); ok { 644 | return b.Bytes(), nil 645 | } 646 | 647 | _, err := r.Seek(0, io.SeekStart) 648 | if err != nil { 649 | return nil, err 650 | } 651 | 652 | return io.ReadAll(r) 653 | } 654 | 655 | func syncAndCheckEqual1(src io.ReadSeeker, dst spgz.SparseFile, t *testing.T) (sent, received int64) { 656 | srcReader, dstWriter := io.Pipe() 657 | dstReader, srcWriter := io.Pipe() 658 | 659 | dstReaderC := &CountingReader{Reader: dstReader} 660 | dstWriterC := &CountingWriteCloser{WriteCloser: dstWriter} 661 | 662 | srcErrChan := make(chan error, 1) 663 | 664 | srcSize, err := getSize(src) 665 | if err != nil { 666 | t.Fatal(err) 667 | } 668 | dstSize, err := getSize(dst) 669 | if err != nil { 670 | t.Fatal(err) 671 | } 672 | 673 | go func() { 674 | err := Source(src, srcSize, srcReader, srcWriter, false, false, nil, nil) 675 | cerr := srcWriter.Close() 676 | if err == nil { 677 | err = cerr 678 | } 679 | srcErrChan <- err 680 | }() 681 | 682 | err = Target(dst, dstSize, dstReaderC, dstWriterC, false, false, nil, nil) 683 | cerr := dstWriter.Close() 684 | if err == nil { 685 | err = cerr 686 | } 687 | 688 | if err != nil { 689 | t.Fatal(err) 690 | } 691 | 692 | if err = <-srcErrChan; err != nil { 693 | t.Fatal(err) 694 | } 695 | 696 | srcBytes, err := getBytes(src) 697 | if err != nil { 698 | t.Fatal(err) 699 | } 700 | 701 | dstBytes, err := getBytes(dst) 702 | if err != nil { 703 | t.Fatal(err) 704 | } 705 | 706 | if len(srcBytes) != len(dstBytes) { 707 | t.Fatalf("Len not equal: %d, %d", len(srcBytes), len(dstBytes)) 708 | } 709 | for i := 0; i < len(srcBytes); i++ { 710 | if srcBytes[i] != dstBytes[i] { 711 | t.Fatalf("Data mismatch at %d: %d, %d", i, srcBytes[i], dstBytes[i]) 712 | } 713 | } 714 | /*if !bytes.Equal(srcBytes, dstBytes) { 715 | t.Fatal("Not equal") 716 | }*/ 717 | 718 | return dstReaderC.Count(), dstWriterC.Count() 719 | } 720 | 721 | func BenchmarkBlake2(b *testing.B) { 722 | b.StopTimer() 723 | h, _ := blake2b.New512(nil) 724 | buf := make([]byte, 4096) 725 | for i := 0; i < len(buf); i++ { 726 | buf[i] = byte(i) 727 | } 728 | 729 | b.StartTimer() 730 | for i := 0; i < b.N; i++ { 731 | h.Write(buf) 732 | } 733 | } 734 | 735 | func BenchmarkBlake256(b *testing.B) { 736 | b.StopTimer() 737 | h, _ := blake2b.New256(nil) 738 | buf := make([]byte, 4096) 739 | for i := 0; i < len(buf); i++ { 740 | buf[i] = byte(i) 741 | } 742 | 743 | b.StartTimer() 744 | for i := 0; i < b.N; i++ { 745 | h.Write(buf) 746 | } 747 | 748 | } 749 | 750 | func BenchmarkSHA1(b *testing.B) { 751 | b.StopTimer() 752 | h := sha1.New() 753 | buf := make([]byte, 4096) 754 | for i := 0; i < len(buf); i++ { 755 | buf[i] = byte(i) 756 | } 757 | 758 | b.StartTimer() 759 | for i := 0; i < b.N; i++ { 760 | h.Write(buf) 761 | } 762 | 763 | } 764 | 765 | func BenchmarkSequential(b *testing.B) { 766 | b.StopTimer() 767 | hashes := make([]hash.Hash, 8) 768 | for i := 0; i < len(hashes); i++ { 769 | hashes[i], _ = blake2b.New512(nil) 770 | } 771 | buf := make([]byte, 32768) 772 | for i := 0; i < len(buf); i++ { 773 | buf[i] = byte(i) 774 | } 775 | 776 | b.StartTimer() 777 | for i := 0; i < b.N; i++ { 778 | for j := 0; j < len(hashes); j++ { 779 | hashes[j].Write(buf) 780 | } 781 | } 782 | } 783 | 784 | func BenchmarkParallel(b *testing.B) { 785 | b.StopTimer() 786 | hashes := make([]hash.Hash, 8) 787 | for i := 0; i < len(hashes); i++ { 788 | hashes[i], _ = blake2b.New512(nil) 789 | } 790 | buf := make([]byte, 32768) 791 | for i := 0; i < len(buf); i++ { 792 | buf[i] = byte(i) 793 | } 794 | 795 | ch := make(chan int, 8) 796 | 797 | b.StartTimer() 798 | for i := 0; i < b.N; i++ { 799 | for j := 0; j < len(hashes); j++ { 800 | go func(idx int) { 801 | hashes[idx].Write(buf) 802 | ch <- 1 803 | }(j) 804 | } 805 | 806 | for j := 0; j < len(hashes); j++ { 807 | <-ch 808 | } 809 | 810 | } 811 | } 812 | --------------------------------------------------------------------------------