├── README.md └── blocksync.py /README.md: -------------------------------------------------------------------------------- 1 | ## About 2 | This script is used to synchronize (large) files to a local/remote destination using a incremental algorithm. Devices are used as regular files and can be synchronized, too. 3 | 4 | blocksync.py is also a workaround for a limitation when using [rsync](https://rsync.samba.org): rsync is unable to synchronize a *device* using its incremental algorithm. blocksync.py is able to sync a device file bit by bit to a remote SSH destination. When called multiple times it will only copy those blocks which were modified - this will speed up the copy process and save a lot of bandwidth. 5 | 6 | ## Use cases 7 | * Moving physical machines to virtual ones ([p2v](https://en.wikipedia.org/wiki/Physical-to-Virtual)) 8 | * Backup failed machines' hard drives 9 | * Synchronize large files to a (remote) destination using a fast and efficent algorithm 10 | 11 | ## Requirements 12 | * SSH client on source server 13 | * SSH server on destination server, with root permissions (directly using root login or using sudo) if syncing to a device file 14 | * Python on both source and destination server 15 | * blocksync.py in home directory of destination server (executable) 16 | 17 | ## Usage 18 | Please make sure that the source file isn't changed during sync, blocksync.py will **not** notice any changes made at file positions which were already copied. You may want to boot a live linux ([grml](https://grml.org/), [knoppix](http://www.knoppix.org), [systemrescuecd](http://www.system-rescue-cd.org) etc.) if you want to sync the system drives from a running machine. 19 | 20 | ### Synchronize to a file on remote server 21 | `root@source# python blocksync.py /dev/source/file user@destination.example.com /path/to/destination/file` 22 | 23 | ### Synchronize to a local file 24 | `root@source# python blocksync.py /dev/source/file localhost /path/to/destination/file` 25 | 26 | ## Command line options 27 | Please run python blocksync.py without any arguments to get a full list of possible options. 28 | 29 | ## Contributing 30 | Please feel free to leave a bug report here at Github or drop a pull request - every help is welcome! 31 | -------------------------------------------------------------------------------- /blocksync.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | """ 3 | Synchronise block devices over the network 4 | 5 | Copyright 2006-2008 Justin Azoff 6 | Copyright 2011 Robert Coup 7 | Copyright 2012 Holger Ernst 8 | Copyright 2014 Robert McQueen 9 | Copyright 2016 Theodor-Iulian Ciobanu 10 | License: GPL 11 | 12 | Getting started: 13 | 14 | * Copy blocksync.py to the home directory on the remote host & make it executable 15 | * Make sure your remote user is either root or can sudo (use -s for sudo) 16 | * Make sure your local user can ssh to the remote host (use -i for a SSH key) 17 | * Invoke: 18 | python blocksync.py /dev/source [user@]remotehost [/dev/dest] 19 | 20 | * Specify localhost for local usage: 21 | python blocksync.py /dev/source localhost /dev/dest 22 | """ 23 | 24 | from __future__ import print_function 25 | import os 26 | import sys 27 | import signal 28 | import hashlib 29 | from math import ceil 30 | import subprocess 31 | import time 32 | from datetime import timedelta 33 | 34 | SAME = b"0" 35 | DIFF = b"1" 36 | COMPLEN = len(SAME) # SAME/DIFF length 37 | 38 | LOCAL_FADVISE = 1 39 | REMOTE_FADVISE = 2 40 | 41 | if callable(getattr(os, "posix_fadvise", False)): 42 | from os import posix_fadvise, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED 43 | fadvise = lambda fileobj, offset, length, advice: posix_fadvise(fileobj.fileno(), offset, length, advice) 44 | else: 45 | try: 46 | from fadvise import set_advice, POSIX_FADV_NOREUSE, POSIX_FADV_DONTNEED 47 | fadvise = lambda fileobj, offset, length, advice: set_advice(fileobj, advice, offset, length) 48 | except: 49 | fadvise = None 50 | 51 | if fadvise: 52 | USE_DONTNEED = sys.platform.startswith('linux') 53 | USE_NOREUSE = not(USE_DONTNEED) 54 | else: 55 | USE_NOREUSE = USE_DONTNEED = False 56 | 57 | def do_create(f, size): 58 | f = open(f, 'a', 0) 59 | f.truncate(size) 60 | f.close() 61 | 62 | 63 | def do_open(f, mode): 64 | f = open(f, mode) 65 | if USE_NOREUSE: 66 | fadvise(f, 0, 0, POSIX_FADV_NOREUSE) 67 | f.seek(0, 2) 68 | size = f.tell() 69 | f.seek(0) 70 | return f, size 71 | 72 | 73 | def getblocks(f, blocksize): 74 | while 1: 75 | block = f.read(blocksize) 76 | if not block: 77 | break 78 | if USE_DONTNEED: 79 | fadvise(f, f.tell() - blocksize, blocksize, POSIX_FADV_DONTNEED) 80 | yield block 81 | 82 | 83 | def server(dev, deleteonexit, options): 84 | global USE_NOREUSE, USE_DONTNEED 85 | 86 | blocksize = options.blocksize 87 | 88 | hash1 = getattr(hashlib, options.hash.lower()) 89 | hash2 = getattr(hashlib, options.addhash.lower()) if options.addhash else False 90 | 91 | print('init') 92 | sys.stdout.flush() 93 | 94 | if (options.fadvise & REMOTE_FADVISE == 0): 95 | print('Disabled') 96 | USE_NOREUSE = USE_DONTNEED = False 97 | elif USE_NOREUSE: 98 | print('NOREUSE') 99 | elif USE_DONTNEED: 100 | print('DONTNEED') 101 | else: 102 | print('None') 103 | sys.stdout.flush() 104 | 105 | size = int(sys.stdin.readline().strip()) 106 | if size > 0: 107 | do_create(dev, size) 108 | 109 | print(dev, blocksize) 110 | f, size = do_open(dev, 'rb+') 111 | print(size) 112 | sys.stdout.flush() 113 | 114 | startpos = int(sys.stdin.readline().strip()) 115 | maxblock = int(sys.stdin.readline().strip()) - 1 116 | 117 | f.seek(startpos) 118 | 119 | if getattr(sys.stdin, "buffer", False): 120 | stdin = sys.stdin.buffer 121 | stdout = sys.stdout.buffer 122 | else: 123 | stdin = sys.stdin 124 | stdout = sys.stdout 125 | 126 | for i, block in enumerate(getblocks(f, blocksize)): 127 | stdout.write(hash1(block).digest()) 128 | if hash2: 129 | stdout.write(hash2(block).digest()) 130 | stdout.flush() 131 | res = stdin.read(COMPLEN) 132 | if res == DIFF: 133 | newblock = stdin.read(blocksize) 134 | newblocklen = len(newblock) 135 | f.seek(-newblocklen, 1) 136 | f.write(newblock) 137 | if USE_DONTNEED: 138 | fadvise(f, f.tell() - newblocklen, newblocklen, POSIX_FADV_DONTNEED) 139 | if i == maxblock: 140 | break 141 | 142 | if deleteonexit: 143 | os.remove(__file__) 144 | 145 | 146 | def copy_self(workerid, remotecmd): 147 | with open(__file__) as srcfile: 148 | cmd = remotecmd + ['/usr/bin/env', 'sh', '-c', '"SCRIPTNAME=\`mktemp -q\`; cat >\$SCRIPTNAME; echo \$SCRIPTNAME"', '< remote_size: 284 | print("[worker %d] Source device size (%d) doesn't fit into remote device size (%d)!" % (workerid, size, remote_size), file = options.outfile) 285 | sys.exit(1) 286 | elif size < remote_size: 287 | print("[worker %d] Source device size (%d) is smaller than remote device size (%d), proceeding anyway" % (workerid, size, remote_size), file = options.outfile) 288 | 289 | same_blocks = diff_blocks = last_blocks = 0 290 | interactive = os.isatty(sys.stdout.fileno()) 291 | 292 | t0 = time.time() 293 | t_last = t0 294 | f.seek(startpos) 295 | size_blocks = ceil(chunksize / float(blocksize)) 296 | p_in.write(bytes(("%d\n%d\n" % (startpos, size_blocks)).encode("UTF-8"))) 297 | p_in.flush() 298 | print("[worker %d] Start syncing %d blocks..." % (workerid, size_blocks), file = options.outfile) 299 | for l_block in getblocks(f, blocksize): 300 | l1_sum = hash1(l_block).digest() 301 | r1_sum = p_out.read(hash1len) 302 | if hash2: 303 | l2_sum = hash2(l_block).digest() 304 | r2_sum = p_out.read(hash2len) 305 | r2_match = (l2_sum == r2_sum) 306 | else: 307 | r2_match = True 308 | if (l1_sum == r1_sum) and r2_match: 309 | same_blocks += 1 310 | p_in.write(SAME) 311 | p_in.flush() 312 | else: 313 | diff_blocks += 1 314 | if dryrun: 315 | p_in.write(SAME) 316 | p_in.flush() 317 | else: 318 | p_in.write(DIFF) 319 | p_in.flush() 320 | p_in.write(l_block) 321 | p_in.flush() 322 | 323 | if pause_ms: 324 | time.sleep(pause_ms) 325 | 326 | if not interactive: 327 | continue 328 | 329 | t1 = float(time.time()) 330 | if (t1 - t_last) >= interval: 331 | done_blocks = same_blocks + diff_blocks 332 | delta_blocks = done_blocks - last_blocks 333 | rate = delta_blocks * blocksize / (1024 * 1024 * (t1 - t_last)) 334 | print("[worker %d] same: %d, diff: %d, %d/%d, %5.1f MB/s (%s remaining)" % (workerid, same_blocks, diff_blocks, done_blocks, size_blocks, rate, timedelta(seconds = ceil((size_blocks - done_blocks) * (t1 - t0) / done_blocks))), file = options.outfile) 335 | last_blocks = done_blocks 336 | t_last = t1 337 | 338 | if (same_blocks + diff_blocks) == size_blocks: 339 | break 340 | 341 | rate = size_blocks * blocksize / (1024.0 * 1024) / (time.time() - t0) 342 | print("[worker %d] same: %d, diff: %d, %d/%d, %5.1f MB/s" % (workerid, same_blocks, diff_blocks, same_blocks + diff_blocks, size_blocks, rate), file = options.outfile) 343 | 344 | print("[worker %d] Completed in %s" % (workerid, timedelta(seconds = ceil(time.time() - t0))), file = options.outfile) 345 | 346 | return same_blocks, diff_blocks 347 | 348 | if __name__ == "__main__": 349 | from optparse import OptionParser, SUPPRESS_HELP 350 | parser = OptionParser(usage = "%prog [options] /dev/source [user@]remotehost [/dev/dest]") 351 | parser.add_option("-w", "--workers", dest = "workers", type = "int", help = "number of workers to fork (defaults to 1)", default = 1) 352 | parser.add_option("-l", "--splay", dest = "splay", type = "int", help = "sleep between creating workers (ms, defaults to 0)", default = 250) 353 | parser.add_option("-b", "--blocksize", dest = "blocksize", type = "int", help = "block size (bytes, defaults to 1MB)", default = 1024 * 1024) 354 | parser.add_option("-1", "--hash", dest = "hash", help = "hash used for block comparison (defaults to \"sha512\")", default = "sha512") 355 | parser.add_option("-2", "--additionalhash", dest = "addhash", help = "second hash used for extra comparison (default is none)") 356 | parser.add_option("-d", "--fadvise", dest = "fadvise", type = "int", help = "lower cache pressure by using posix_fadivse (requires Python 3 or python-fadvise; 0 = off, 1 = local on, 2 = remote on, 3 = both on; defaults to 3)", default = 3) 357 | parser.add_option("-p", "--pause", dest = "pause", type="int", help = "pause between processing blocks, reduces system load (ms, defaults to 0)", default = 0) 358 | parser.add_option("-c", "--cipher", dest = "cipher", help = "cipher specification for SSH (defaults to blowfish)", default = "blowfish") 359 | parser.add_option("-C", "--compress", dest = "compress", action = "store_true", help = "enable compression over SSH (defaults to on)", default = True) 360 | parser.add_option("-i", "--id", dest = "keyfile", help = "SSH public key file") 361 | parser.add_option("-P", "--pass", dest = "passenv", help = "environment variable containing SSH password (requires sshpass)") 362 | parser.add_option("-s", "--sudo", dest = "sudo", action = "store_true", help = "use sudo on the remote end (defaults to off)", default = False) 363 | parser.add_option("-x", "--extraparams", dest = "sshparams", help = "additional parameters to pass to SSH") 364 | parser.add_option("-n", "--dryrun", dest = "dryrun", action = "store_true", help = "do a dry run (don't write anything, just report differences)", default = False) 365 | parser.add_option("-T", "--createdest", dest = "createdest", action = "store_true", help = "create destination file using truncate(2). Should be safe for subsequent syncs as truncate only modifies the file when the size differs", default = False) 366 | parser.add_option("-S", "--script", dest = "script", help = "location of script on remote host (otherwise current script is sent over)") 367 | parser.add_option("-I", "--interpreter", dest = "interpreter", help = "[full path to] interpreter used to invoke remote server (defaults to python2)", default = "python2") 368 | parser.add_option("-t", "--interval", dest = "interval", type = "int", help = "interval between stats output (seconds, defaults to 1)", default = 1) 369 | parser.add_option("-o", "--output", dest = "outfile", help = "send output to file instead of console") 370 | parser.add_option("-f", "--force", dest = "force", action= "store_true", help = "force sync and DO NOT ask for confirmation if the destination file already exists") 371 | (options, args) = parser.parse_args() 372 | 373 | if len(args) < 2: 374 | parser.print_help() 375 | print(__doc__) 376 | sys.exit(1) 377 | 378 | aborting = False 379 | 380 | if options.outfile: 381 | options.outfile = open(options.outfile, 'a', 1) 382 | 383 | if args[0] == 'server': 384 | dstdev = args[1] 385 | server(dstdev, False, options) 386 | elif args[0] == 'tmpserver': 387 | dstdev = args[1] 388 | server(dstdev, True, options) 389 | else: 390 | srcdev = args[0] 391 | dsthost = args[1] 392 | if len(args) > 2: 393 | dstdev = args[2] 394 | else: 395 | dstdev = None 396 | 397 | if options.dryrun: 398 | print("Dryrun - will only report differences, no data will be written", file = options.outfile) 399 | else: 400 | if not options.force: 401 | print("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!", file = options.outfile) 402 | print("!!! !!!", file = options.outfile) 403 | print("!!! DESTINATION WILL BE PERMANENTLY CHANGED! !!!", file = options.outfile) 404 | print("!!! PRESS CTRL-C NOW TO EXIT !!!", file = options.outfile) 405 | print("!!! !!!", file = options.outfile) 406 | print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", file = options.outfile) 407 | time.sleep(5) 408 | 409 | splay_ms = 0 410 | if options.splay: 411 | # sleep() wants seconds... 412 | splay_ms = options.splay / 1000.0 413 | workers = {} 414 | for i in range(options.workers): 415 | pid = os.fork() 416 | if pid == 0: 417 | sync(i, srcdev, dsthost, dstdev, options) 418 | sys.exit(0) 419 | else: 420 | workers[pid] = i 421 | if splay_ms: 422 | time.sleep(splay_ms) 423 | 424 | for i in range(options.workers): 425 | pid, err = os.wait() 426 | print("Worker #%d exited with %d" % (workers[pid], err), file = options.outfile) 427 | if (err != 0) and not aborting: 428 | aborting = True 429 | print("Worker #%d caused ABORT" % workers[pid]) 430 | del workers[pid] 431 | for pid in workers: 432 | print("Terminating worker #%d" % workers[pid]) 433 | os.kill(pid, signal.SIGTERM) 434 | 435 | if options.outfile: 436 | options.outfile.close() 437 | 438 | if aborting: 439 | sys.exit(1) 440 | --------------------------------------------------------------------------------