├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.rst ├── backup_vm ├── __init__.py ├── backup.py ├── builder.py ├── multi.py ├── parse.py └── snapshot.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | backup_vm/_version.py 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | .tox/ 41 | .cache 42 | 43 | # Sphinx documentation 44 | docs/_build/ 45 | 46 | # pyenv 47 | .python-version 48 | 49 | # Environments 50 | .env 51 | .venv 52 | bvm-env/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Milkey Mouse 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | backup-vm 2 | ========= 3 | 4 | Back up your libvirt-based VMs using Borg_! 5 | 6 | .. _Borg: https://github.com/borgbackup/borg 7 | 8 | Features 9 | -------- 10 | 11 | * Backup running VMs 12 | 13 | * Automatically creates a `COW snapshot`_ of virtual disks to avoid corruption and pivots_ them back afterwards 14 | * From the perspective of the VM, restoring from a live backup is like a sudden power-off 15 | 16 | * Chances of file corruption are still low with a `guest agent`_ installed 17 | 18 | * Can back up multiple VM disks 19 | 20 | * Supports disk images backed by a file or a block device 21 | 22 | * Can back up to multiple Borg repositories at once 23 | 24 | * Only one snapshot operation needed for multiple backups 25 | * Auto-answers subsequent prompts from other borg processes 26 | * Shows total backup progress % (even with multiple backups) 27 | 28 | * Pass extra arguments straight to Borg on the command line 29 | 30 | * Different settings (e.g. compression) can be passed to each instance 31 | 32 | .. _COW snapshot: https://wiki.libvirt.org/page/Snapshots 33 | .. _pivots: https://wiki.libvirt.org/page/Live-disk-backup-with-active-blockcommit 34 | .. _guest agent: https://wiki.libvirt.org/page/Qemu_guest_agent 35 | 36 | Examples 37 | -------- 38 | 39 | Backup 40 | ^^^^^^ 41 | 42 | Back up a virtual machine to a single Borg repo:: 43 | 44 | backup-vm myVM myrepo::myBackup 45 | 46 | Back up a virtual machine ``webserver`` to an onsite and an offsite Borg repository with varying compression settings:: 47 | 48 | backup-vm webserver onsite::webserver-{now:%Y-%m-%d} --borg-args --compression lz4 offsite::webserver-{now:%Y-%m-%d} --borg-args --compression zlib,9 49 | 50 | Back up only the system drive of a Windows VM:: 51 | 52 | backup-vm win10 sda myrepo::win10-{now:%Y-%m-%d} 53 | 54 | Restore 55 | ^^^^^^^ 56 | 57 | A script for automatic restoration is `in development`_; however, the backups are saved with a simple directory structure that makes manual restoration easy. Each backup has the image of each disk clearly named in the root directory (e.g. ``sda.raw``, ``hdb.qcow2``). The legacy `bash script`_ for restoring follows a similar process to what the Python version will, with the notable exception that it does not handle multiple disks. 58 | 59 | .. _in development: https://github.com/milkey-mouse/backup-vm/issues/1 60 | .. _bash script: https://github.com/milkey-mouse/backup-vm/blob/bash-script/restore-vm.sh 61 | 62 | Usage 63 | ----- 64 | 65 | .. BEGIN AUTO-GENERATED USAGE 66 | :: 67 | 68 | usage: backup-vm [-hpv] domain [disk [disk ...]] archive 69 | [--borg-args ...] [archive [--borg-args ...] ...] 70 | 71 | Back up a libvirt-based VM using borg. 72 | 73 | positional arguments: 74 | domain libvirt domain to back up 75 | disk a domain block device to back up (default: all disks) 76 | archive a borg archive path (same format as borg create) 77 | 78 | optional arguments: 79 | -h, --help show this help message and exit 80 | -v, --version show version of the backup-vm package 81 | -p, --progress force progress display even if stdout isn't a tty 82 | --borg-args ... extra arguments passed straight to borg 83 | 84 | :: 85 | 86 | usage: borg-multi [-hpv] [--path PATH] [--borg-cmd SUBCOMMAND] 87 | archive [--borg-args ...] [archive [--borg-args ...] ...] 88 | 89 | Batch multiple borg commands into one. 90 | 91 | positional arguments: 92 | archive a borg archive path (same format as borg create) 93 | 94 | optional arguments: 95 | -h, --help show this help message and exit 96 | -v, --version show version of the backup-vm package 97 | -l, --path path for borg to archive (default: .) 98 | -p, --progress force progress display even if stdout isn't a tty 99 | -c, --borg-cmd alternate borg subcommand to run (default: create) 100 | --borg-args ... extra arguments passed straight to borg 101 | 102 | .. END AUTO-GENERATED USAGE 103 | 104 | Installation 105 | ------------ 106 | 107 | Python ≥3.5 is required, as well as the Python libvirt bindings. If possible, install them from the system package manager (``apt install python3-libvirt``); otherwise, use pip (``pip install libvirt-python``). To install the script, copy it into ``/usr/local/bin`` and optionally remove the ``.py`` extension. 108 | 109 | For offline backups, ``qemu-img`` is required, although it is normally installed along with libvirt. 110 | -------------------------------------------------------------------------------- /backup_vm/__init__.py: -------------------------------------------------------------------------------- 1 | from ._version import version as __version__ 2 | -------------------------------------------------------------------------------- /backup_vm/backup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os.path 4 | import sys 5 | import libvirt 6 | from . import parse 7 | from . import multi 8 | from . import builder 9 | from . import snapshot 10 | 11 | 12 | def main(): 13 | args = parse.BVMArgumentParser() 14 | conn = libvirt.open() 15 | if conn is None: 16 | print("Failed to open connection to libvirt", file=sys.stderr) 17 | sys.exit(1) 18 | try: 19 | dom = conn.lookupByName(args.domain) 20 | except libvirt.libvirtError: 21 | print("Domain '{}' not found".format(args.domain)) 22 | sys.exit(1) 23 | 24 | all_disks = set(parse.Disk.get_disks(dom)) 25 | if len(all_disks) == 0: 26 | print("Domain has no disks(!)", file=sys.stderr) 27 | sys.exit(1) 28 | 29 | disks_to_backup = args.disks and {x for x in all_disks if x.target in args.disks} or all_disks 30 | if len(disks_to_backup) != len(args.disks or all_disks): 31 | print("Some disks to be backed up don't exist on the domain:", 32 | *sorted(x.target for x in all_disks if x.target not in args.disks), file=sys.stderr) 33 | sys.exit(1) 34 | 35 | for disk in all_disks: 36 | filename = args.domain + "-" + disk.target + "-tempsnap.qcow2" 37 | if disk not in disks_to_backup: 38 | disk.snapshot_path = None 39 | elif disk.type == "dev": 40 | # we probably can't write the temporary snapshot to the same directory 41 | # as the original disk, so use the default libvirt images directory 42 | disk.snapshot_path = os.path.join("/var/lib/libvirt/images", filename) 43 | else: 44 | disk.snapshot_path = os.path.join(os.path.dirname(disk.path), filename) 45 | 46 | for archive in args.archives: 47 | archive.extra_args.append("--read-special") 48 | 49 | with snapshot.Snapshot(dom, all_disks, args.progress), \ 50 | builder.ArchiveBuilder(disks_to_backup) as archive_dir: 51 | if args.progress: 52 | borg_failed = multi.assimilate(args.archives, archive_dir.total_size) 53 | else: 54 | borg_failed = multi.assimilate(args.archives) 55 | 56 | # bug in libvirt python wrapper(?): sometimes it tries to delete 57 | # the connection object before the domain, which references it 58 | del dom 59 | del conn 60 | 61 | sys.exit(borg_failed or any(disk.failed for disk in disks_to_backup)) 62 | -------------------------------------------------------------------------------- /backup_vm/builder.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import tempfile 3 | import os.path 4 | 5 | 6 | class ArchiveBuilder(tempfile.TemporaryDirectory): 7 | 8 | """Creates the folder to be turned into a VM backup. 9 | 10 | Creates a temporary folder populated with symlinks to each disk to backup. 11 | Essentially lays out the contents of the archive to be created. 12 | 13 | Attributes: 14 | name: The path of the temporary directory. 15 | total_size: The total size of every disk linked to in the directory. 16 | """ 17 | 18 | def __init__(self, disks, *args, **kwargs): 19 | super().__init__(*args, **kwargs) 20 | self.total_size = 0 21 | self.disks = disks 22 | self.old_cwd = os.getcwd() 23 | os.chdir(self.name) 24 | 25 | def __enter__(self): 26 | for disk in self.disks: 27 | realpath = os.path.realpath(disk.path) 28 | if self.total_size is not None: 29 | try: 30 | with open(realpath) as f: 31 | # add size of disk to total 32 | f.seek(0, os.SEEK_END) 33 | self.total_size += f.tell() 34 | except (PermissionError, OSError): 35 | self.total_size = None 36 | linkpath = disk.target + "." + disk.format 37 | with open(linkpath, "w") as f: 38 | # simulate 'touch' 39 | pass 40 | # following symlinks for --read-special is still broken :( 41 | # when issue gets fixed should switch to symlinks: 42 | # https://github.com/borgbackup/borg/issues/1215 43 | subprocess.run(["mount", "--bind", realpath, linkpath], check=True) 44 | return self 45 | 46 | def cleanup(self): 47 | for disk in self.disks: 48 | linkpath = disk.target + "." + disk.format 49 | subprocess.run(["umount", linkpath], check=True) 50 | os.chdir(self.old_cwd) 51 | return super().cleanup() 52 | -------------------------------------------------------------------------------- /backup_vm/multi.py: -------------------------------------------------------------------------------- 1 | from distutils.version import LooseVersion 2 | from base64 import b64encode 3 | from getpass import getpass 4 | from pty import openpty 5 | from copy import copy 6 | import subprocess 7 | import selectors 8 | import termios 9 | import fcntl 10 | import json 11 | import sys 12 | import pty 13 | import os 14 | from . import parse 15 | 16 | 17 | def get_passphrases(archives): 18 | """Prompts the user for their archive passphrases. 19 | 20 | Checks for archives that won't open without a (non-blank, non-random) 21 | BORG_PASSPHRASE and prompts the user for their passphrases. 22 | 23 | Args: 24 | archives: A list of Location objects to check the repositories of. 25 | 26 | Returns: 27 | A dictionary mapping archives to their (purported) passphrases. The 28 | entered passphrases are not checked to actually open the archives. 29 | """ 30 | passphrases = {} 31 | env = os.environ.copy() 32 | for archive in archives: 33 | repo = copy(archive) 34 | repo.archive = None 35 | # check if we need a password as recommended by the docs: 36 | # https://borgbackup.readthedocs.io/en/stable/internals/frontends.html#passphrase-prompts 37 | if len({"BORG_PASSPHRASE", "BORG_PASSCOMMAND", "BORG_NEWPASSPHRASE"} - set(env)) == 3: 38 | # generate random password that would be incorrect were it needed 39 | env["BORG_PASSPHRASE"] = b64encode(os.urandom(16)).decode("utf-8") 40 | with subprocess.Popen(["borg", "list", str(repo)], stdin=subprocess.PIPE, 41 | stdout=subprocess.DEVNULL, stderr=subprocess.PIPE, env=env) as proc: 42 | # manually close stdin instead of /dev/null so borg knows it won't get input 43 | proc.stdin.close() 44 | proc.stdin = None 45 | err = proc.communicate(input)[1].decode("utf-8").rstrip("\n").split("\n")[-1] 46 | if proc.poll() != 0: 47 | # exact error message changes between borg versions 48 | if err.startswith("passphrase supplied") and err.endswith("is incorrect."): 49 | passphrases[archive] = getpass("Enter passphrase for key {!s}: ".format(repo)) 50 | return passphrases 51 | 52 | 53 | def log(name, msg, *args, file=sys.stderr, end="\n", **kwargs): 54 | """Logs a string to a file, prepending a "tag" to each line. 55 | 56 | Logs a string to a file (by default stderr), with a "tag" added to the 57 | beginning of each line, in the format of this example:: 58 | 59 | [repo::archive] Hello world! 60 | 61 | Args: 62 | name: The text to be put in the "tag" part of each line. 63 | msg: The string to be tagged & logged. 64 | end: The ending of the last line printed. 65 | 66 | Any other arguments passed will be passed onto print(). 67 | """ 68 | for l in msg[:-1]: 69 | print("[{}] {}".format(name, l), file=file, **kwargs) 70 | print("[{}] {}".format(name, msg[-1]), file=file, end=end, **kwargs) 71 | 72 | 73 | def process_line(p, line, total_size=None, prompt_answers={}): 74 | """Process a line coming from a borg process. 75 | 76 | Processes JSON emitted by a borg process with --log-json turned on. The 77 | lines are cached, so 1 line does not have to equal 1 JSON message. 78 | 79 | Args: 80 | p: The process the line came from (with some extra properties added to 81 | the Popen object). 82 | line: The line read from the process's stdout or stderr. If it contains 83 | progress information, update the stored progress value. If it is a 84 | prompt for the user, ask for and return the answer (& cache it for 85 | later.) If it is a log message or some other non-JSON, print it out. 86 | total_size: The total size of all files being backed up. This can be set 87 | to None to disable progress calculation. 88 | prompt_answers: A dictionary of previous answers from users' prompts. 89 | Prompts with msgids in the dictionary will be automatically answered 90 | with the value given (ostensibly from an earlier prompt). 91 | """ 92 | if len(p.json_buf) > 0 or line.startswith("{"): 93 | p.json_buf.append(line) 94 | if len(p.json_buf) > 0 and line.endswith("}"): 95 | try: 96 | msg = json.loads("\n".join(p.json_buf)) 97 | p.json_buf = [] 98 | if msg["type"] == "archive_progress" and total_size is not None: 99 | p.progress = msg["original_size"] / total_size 100 | elif msg["type"] == "log_message": 101 | log(p.archive.orig, msg["message"].split("\n")) 102 | elif msg["type"].startswith("question"): 103 | if "msgid" in msg: 104 | prompt_id = msg["msgid"] 105 | elif "message" in msg: 106 | prompt_id = msg["message"] 107 | else: 108 | raise ValueError("No msgid or message for prompt") 109 | if msg.get("is_prompt", False) or msg["type"].startswith("question_prompt"): 110 | if prompt_id not in prompt_answers: 111 | log(p.archive.orig, msg["message"].split("\n"), end="") 112 | try: 113 | prompt_answers[prompt_id] = input() 114 | print(prompt_answers[prompt_id], file=p.stdin, flush=True) 115 | except EOFError: 116 | p.stdin.close() 117 | elif not msg["type"].startswith("question_accepted"): 118 | log(p.archive.orig, msg["message"].split("\n")) 119 | except json.decoder.JSONDecodeError: 120 | log(p.archive.orig, p.json_buf) 121 | p.json_buf = [] 122 | elif line.startswith("Enter passphrase for key "): 123 | log(p.archive.orig, [line], end="") 124 | passphrase = getpass("") 125 | print(passphrase, file=p.stdin, flush=True) 126 | print("", file=sys.stderr) 127 | elif line != "": 128 | # line is not json? 129 | log(p.archive.orig, [line]) 130 | # TODO: process password here for efficiency & simplicity 131 | 132 | 133 | def get_borg_version(): 134 | """ 135 | Get the version of the system borg. 136 | 137 | Returns: 138 | The version of the system borg as a distutils.version.LooseVersion (for 139 | easy comparison with other versions). 140 | """ 141 | version_bytes = subprocess.run(["borg", "--version"], stdout=subprocess.PIPE, check=True).stdout 142 | return LooseVersion(version_bytes.decode("utf-8").split(" ")[1]) 143 | 144 | 145 | def assimilate(archives, total_size=None, dir_to_archive=".", passphrases=None, verb="create"): 146 | """ 147 | Run and manage multiple `borg create` commands. 148 | 149 | Args: 150 | archives: A list containing Location objects for the archives to create. 151 | total_size: The total size of all files being backed up. As borg 152 | normally only makes one pass over the data, it can't calculate 153 | percentages on its own. Setting this to None disables progress 154 | calculation. 155 | dir_to_archive: The directory to archive. Defaults to the current 156 | directory. 157 | 158 | Returns: 159 | A boolean indicating if any borg processes failed (True = failed). 160 | """ 161 | 162 | if dir_to_archive is None: 163 | dir_to_archive = [] 164 | else: 165 | dir_to_archive = [dir_to_archive] 166 | 167 | if passphrases is None: 168 | passphrases = get_passphrases(archives) if sys.stdout.isatty() else {} 169 | 170 | if get_borg_version() < LooseVersion("1.1.0"): 171 | # borg <1.1 doesn't support --log-json for the progress display 172 | print("You are using an old version of borg, progress indication is disabled", file=sys.stderr) 173 | recent_borg = False 174 | progress = False 175 | else: 176 | recent_borg = True 177 | progress = total_size is not None 178 | 179 | borg_processes = [] 180 | borg_failed = False 181 | try: 182 | with selectors.DefaultSelector() as sel: 183 | for idx, archive in enumerate(archives): 184 | if progress: 185 | archive.extra_args.append("--progress") 186 | if recent_borg: 187 | archive.extra_args.append("--log-json") 188 | env = os.environ.copy() 189 | passphrase = passphrases.get(archive, os.environ.get("BORG_PASSPHRASE")) 190 | if passphrase is not None: 191 | env["BORG_PASSPHRASE"] = passphrase 192 | master, slave = openpty() 193 | settings = termios.tcgetattr(master) 194 | settings[3] &= ~termios.ECHO 195 | termios.tcsetattr(master, termios.TCSADRAIN, settings) 196 | proc = subprocess.Popen(["borg", verb, str(archive), *dir_to_archive, *archive.extra_args], env=env, 197 | stdout=slave, stderr=slave, stdin=slave, close_fds=True, start_new_session=True) 198 | fl = fcntl.fcntl(master, fcntl.F_GETFL) 199 | fcntl.fcntl(master, fcntl.F_SETFL, fl | os.O_NONBLOCK) 200 | proc.stdin = os.fdopen(master, "w") 201 | proc.stdout = os.fdopen(master, "r") 202 | proc.archive = archive 203 | proc.json_buf = [] 204 | proc.progress = 0 205 | borg_processes.append(proc) 206 | sel.register(proc.stdout, selectors.EVENT_READ, data=proc) 207 | 208 | if progress: 209 | print("backup progress: 0%".ljust(25), end="\u001b[25D", flush=True) 210 | else: 211 | # give the user some feedback so the program doesn't look frozen 212 | print("starting backup", flush=True) 213 | while len(sel.get_map()) > 0: 214 | for key, mask in sel.select(1): 215 | for line in iter(key.fileobj.readline, ""): 216 | process_line(key.data, line.rstrip("\n"), total_size) 217 | for key in [*sel.get_map().values()]: 218 | if key.data.poll() is not None: 219 | key.data.wait() 220 | key.data.progress = 1 221 | if key.data.returncode != 0: 222 | borg_failed = True 223 | sel.unregister(key.fileobj) 224 | if progress: 225 | total_progress = sum(p.progress for p in borg_processes) 226 | print("backup progress: {}%".format( 227 | int(total_progress / len(borg_processes) * 100)).ljust(25), end="\u001b[25D") 228 | if progress: 229 | print() 230 | finally: 231 | for p in borg_processes: 232 | if p.poll() is not None: 233 | p.kill() 234 | try: 235 | p.communicate() 236 | except (ValueError, OSError): 237 | p.wait() 238 | return borg_failed 239 | 240 | 241 | def main(): 242 | args = parse.MultiArgumentParser() 243 | if args.command != "create" and "--path" not in sys.argv[1:]: 244 | # path needs to be explicitly specified to be included in command 245 | # if the verb is not the default 246 | args.dir = None 247 | return assimilate(args.archives, dir_to_archive=args.dir, verb=args.command) 248 | -------------------------------------------------------------------------------- /backup_vm/parse.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from xml.etree import ElementTree 3 | from textwrap import dedent 4 | import itertools 5 | import sys 6 | import os 7 | import re 8 | from . import __version__ 9 | 10 | 11 | class Location: 12 | # see https://github.com/borgbackup/borg/blob/5e2de8b/src/borg/helpers/parseformat.py#L277 13 | proto = user = _host = port = path = archive = None 14 | optional_user_re = r""" 15 | (?:(?P[^@:/]+)@)? 16 | """ 17 | scp_path_re = r""" 18 | (?!(:|//|ssh://)) 19 | (?P([^:]|(:(?!:)))+) 20 | """ 21 | file_path_re = r""" 22 | (?P(([^/]*)/([^:]|(:(?!:)))+)) 23 | """ 24 | abs_path_re = r""" 25 | (?P(/([^:]|(:(?!:)))+)) 26 | """ 27 | optional_archive_re = r""" 28 | (?: 29 | :: 30 | (?P[^/]+) 31 | )?$""" 32 | ssh_re = re.compile(r""" 33 | (?Pssh):// 34 | """ + optional_user_re + r""" 35 | (?P([^:/]+|\[[0-9a-fA-F:.]+\]))(?::(?P\d+))? 36 | """ + abs_path_re + optional_archive_re, re.VERBOSE) 37 | file_re = re.compile(r""" 38 | (?Pfile):// 39 | """ + file_path_re + optional_archive_re, re.VERBOSE) 40 | scp_re = re.compile(r""" 41 | ( 42 | """ + optional_user_re + r""" 43 | (?P([^:/]+|\[[0-9a-fA-F:.]+\])): 44 | )? 45 | """ + scp_path_re + optional_archive_re, re.VERBOSE) 46 | env_re = re.compile(r""" 47 | (?:::$) 48 | | 49 | """ + optional_archive_re, re.VERBOSE) 50 | 51 | def __init__(self, text=""): 52 | self.orig = text 53 | self.extra_args = [] 54 | if not self.parse(self.orig): 55 | raise ValueError("Location: parse failed: %s" % self.orig) 56 | 57 | def parse(self, text): 58 | # text = replace_placeholders(text) 59 | valid = self._parse(text) 60 | if valid: 61 | return True 62 | m = self.env_re.match(text) 63 | if not m: 64 | return False 65 | repo = os.environ.get("BORG_REPO") 66 | if repo is None: 67 | return False 68 | valid = self._parse(repo) 69 | if not valid: 70 | return False 71 | self.archive = m.group("archive") 72 | return True 73 | 74 | def _parse(self, text): 75 | def normpath_special(p): 76 | # avoid that normpath strips away our relative path hack and even 77 | # makes p absolute 78 | relative = p.startswith("/./") 79 | p = os.path.normpath(p) 80 | return ("/." + p) if relative else p 81 | 82 | m = self.ssh_re.match(text) 83 | if m: 84 | self.proto = m.group("proto") 85 | self.user = m.group("user") 86 | self._host = m.group("host") 87 | self.port = m.group("port") and int(m.group("port")) or None 88 | self.path = normpath_special(m.group("path")) 89 | self.archive = m.group("archive") 90 | return True 91 | m = self.file_re.match(text) 92 | if m: 93 | self.proto = m.group("proto") 94 | self.path = normpath_special(m.group("path")) 95 | self.archive = m.group("archive") 96 | return True 97 | m = self.scp_re.match(text) 98 | if m: 99 | self.user = m.group("user") 100 | self._host = m.group("host") 101 | self.path = normpath_special(m.group("path")) 102 | self.archive = m.group("archive") 103 | self.proto = self._host and "ssh" or "file" 104 | return True 105 | return False 106 | 107 | @classmethod 108 | def try_location(cls, text): 109 | try: 110 | return Location(text) 111 | except ValueError: 112 | return None 113 | 114 | def canonicalize_path(self, cwd=None): 115 | if self.proto == "file" and not os.path.isabs(self.path): 116 | if cwd is None: 117 | cwd = os.getcwd() 118 | self.path = os.path.normpath(os.path.join(cwd, self.path)) 119 | 120 | def __str__(self): 121 | # https://borgbackup.readthedocs.io/en/stable/usage/general.html#repository-urls 122 | # the path needs to be re-created instead of returning self.orig because 123 | # we change values to make paths absolute, etc. 124 | if self.proto == "file": 125 | repo = self.path 126 | elif self.proto == "ssh": 127 | _user = self.user + "@" if self.user is not None else "" 128 | if self.port is not None: 129 | # URI form needs "./" prepended to relative dirs 130 | if os.path.isabs(self.path): 131 | _path = self.path 132 | else: 133 | _path = os.path.join(".", self.path) 134 | repo = "ssh://{}{}:{}/{}".format(_user, self._host, self.port, _path) 135 | else: 136 | repo = "{}{}:{}".format(_user, self._host, self.path) 137 | if self.archive is not None: 138 | return repo + "::" + self.archive 139 | else: 140 | return repo 141 | 142 | def __hash__(self): 143 | return hash(str(self)) 144 | 145 | 146 | class Disk: 147 | 148 | """Holds information about a single disk on a libvirt domain. 149 | 150 | Attributes: 151 | xml: The original XML element representing the disk. 152 | format: The format of the disk image (qcow2, raw, etc.) 153 | target: The block device name on the guest (sda, xvdb, etc.) 154 | type: The type of storage backing the disk (file, block, etc.) 155 | path: The location of the disk storage (image file, block device, etc.) 156 | """ 157 | 158 | def __init__(self, xml): 159 | self.xml = xml 160 | self.target = xml.find("target").get("dev") 161 | # sometimes there won't be a source entry, e.g. a cd drive without a 162 | # virtual cd in it 163 | if xml.find("source") is not None: 164 | self.type, self.path = next(iter(xml.find("source").attrib.items())) 165 | else: 166 | self.type = self.path = None 167 | # apparently in some cd drives created by virt-manager, can 168 | # also be completely missing: 169 | # https://github.com/milkey-mouse/backup-vm/issues/11#issuecomment-351478233 170 | if xml.find("driver") is not None: 171 | self.format = xml.find("driver").attrib.get("type", "unknown") 172 | else: 173 | self.format = "unknown" 174 | 175 | def __repr__(self): 176 | if self.type == "file": 177 | type = "file" 178 | elif self.type == "dev": 179 | type = "block device" 180 | else: 181 | type = "unknown type" 182 | 183 | return "<{} ({}) ({} format)>".format(self.path, type, self.format) 184 | 185 | @classmethod 186 | def get_disks(cls, dom): 187 | """Generates a list of Disks representing the disks on a libvirt domain. 188 | 189 | Args: 190 | dom: A libvirt domain object. 191 | 192 | Yields: 193 | Disk objects representing each disk on the domain. 194 | """ 195 | tree = ElementTree.fromstring(dom.XMLDesc(0)) 196 | yield from {d for d in map(cls, tree.findall("devices/disk")) if d.type is not None} 197 | 198 | 199 | # TODO: reimplement this mess with getopt (argparse doesn't support --borg-args stuff) 200 | class ArgumentParser(metaclass=ABCMeta): 201 | 202 | """Base class for backup-vm parsers. 203 | 204 | Parses arguments common to all scripts in the backup-vm package (with 205 | --borg-args, multiple archive locations, etc.). 206 | """ 207 | 208 | def __init__(self, default_name, args=sys.argv): 209 | try: 210 | self.prog = os.path.basename(args[0]) 211 | except Exception: 212 | self.prog = default_name 213 | self.progress = sys.stdout.isatty() 214 | self.disks = set() 215 | self.archives = [] 216 | self.parse_args(args[1:]) 217 | 218 | def parse_arg(self, arg, needs_archive=True, lookahead=None): 219 | """Parses a single argument. 220 | 221 | Args: 222 | arg: A string representing a single argument. 223 | 224 | Returns: 225 | True if the argument was processed, False if it was not recognized 226 | """ 227 | if arg in {"-h", "--help"}: 228 | self.help() 229 | sys.exit() 230 | elif arg in {"-v", "--version"} and not self.parsing_borg_args: 231 | self.version() 232 | sys.exit() 233 | l = Location.try_location(arg) 234 | if needs_archive and l is not None and l.path is not None and \ 235 | (l.proto == "file" or l._host is not None) and l.archive is not None: 236 | self.parsing_borg_args = False 237 | l.canonicalize_path() 238 | self.archives.append(l) 239 | elif arg == "--borg-args": 240 | if len(self.archives) == 0: 241 | self.error("--borg-args must come after an archive path") 242 | else: 243 | self.parsing_borg_args = True 244 | elif not needs_archive and lookahead is not None and lookahead == "--borg-args" and \ 245 | l is not None and l.path is not None and (l.proto == "file" or l._host is not None): 246 | self.parsing_borg_args = False 247 | l.canonicalize_path() 248 | self.archives.append(l) 249 | elif self.parsing_borg_args: 250 | self.archives[-1].extra_args.append(arg) 251 | elif arg in {"-p", "--progress"}: 252 | self.progress = True 253 | else: 254 | return False 255 | return True 256 | 257 | def parse_args(self, args): 258 | if len(args) == 0: 259 | self.help() 260 | sys.exit(2) 261 | self.parsing_borg_args = False 262 | for arg, lookahead in itertools.zip_longest(args, args[1:]): 263 | if arg.startswith("-") and not arg.startswith("--") and "=" not in arg: 264 | for c in arg[1:]: 265 | if not self.parse_arg("-" + c, lookahead=lookahead): 266 | self.error("unrecognized argument: '-{}'".format(c)) 267 | else: 268 | if not self.parse_arg(arg, lookahead=lookahead): 269 | self.error("unrecognized argument: '{}'".format(arg)) 270 | if len(self.archives) == 0: 271 | self.error("at least one archive path is required") 272 | 273 | def error(self, msg): 274 | self.help(short=True) 275 | print(self.prog + ": error: " + msg, file=sys.stderr) 276 | sys.exit(2) 277 | 278 | @abstractmethod 279 | def help(self, short=False): 280 | pass 281 | 282 | def version(self): 283 | print(self.prog, __version__) 284 | 285 | 286 | class MultiArgumentParser(ArgumentParser): 287 | 288 | """Argument parser for borg-multi. 289 | 290 | Parses common arguments (--borg-args, multiple archive locations, etc.) as 291 | well as those of borg-multi (--borg-cmd, --path). 292 | """ 293 | 294 | def __init__(self, default_name="borg-multi", args=sys.argv): 295 | self.command = "create" 296 | self.dir = "." 297 | super().__init__(default_name, args) 298 | 299 | def parse_arg(self, arg, *args, **kwargs): 300 | if self.command is None: 301 | self.command = arg 302 | elif self.dir is None: 303 | self.dir = arg 304 | elif arg in {"-c", "--borg-cmd"}: 305 | self.command = None 306 | elif arg.startswith("-c"): 307 | self.command = arg[2:] 308 | elif arg.startswith("--borg-cmd="): 309 | try: 310 | self.command = arg.split("=")[1] 311 | except IndexError: 312 | self.command = None 313 | elif arg in {"-l", "--path"}: 314 | self.dir = None 315 | elif arg.startswith("-l"): 316 | self.dir = arg[2:] 317 | elif arg.startswith("--path="): 318 | try: 319 | self.dir = arg.split("=")[1] 320 | except IndexError: 321 | self.dir = None 322 | elif super().parse_arg(arg, needs_archive=False, *args, **kwargs): 323 | return True 324 | else: 325 | return False 326 | return True 327 | 328 | def parse_args(self, args): 329 | super().parse_args(args) 330 | if self.command is None: 331 | self.error("--borg-args must precede a borg subcommand") 332 | elif len(self.archives) == 0: 333 | self.error("the following arguments are required: archive") 334 | 335 | def help(self, short=False): 336 | print(dedent(""" 337 | usage: {} [-hpv] [--path PATH] [--borg-cmd SUBCOMMAND] 338 | archive [--borg-args ...] [archive [--borg-args ...] ...] 339 | """.format(self.prog).lstrip("\n"))) 340 | if not short: 341 | print(dedent(""" 342 | Batch multiple borg commands into one. 343 | 344 | positional arguments: 345 | archive a borg archive path (same format as borg create) 346 | 347 | optional arguments: 348 | -h, --help show this help message and exit 349 | -v, --version show version of the backup-vm package 350 | -l, --path path for borg to archive (default: .) 351 | -p, --progress force progress display even if stdout isn't a tty 352 | -c, --borg-cmd alternate borg subcommand to run (default: create) 353 | --borg-args ... extra arguments passed straight to borg 354 | """).strip("\n")) 355 | 356 | 357 | class BVMArgumentParser(ArgumentParser): 358 | 359 | """Argument parser for backup-vm. 360 | 361 | Parses common arguments (--borg-args, multiple archive locations, etc.) as 362 | well as those of backup-vm (domain). 363 | """ 364 | 365 | def __init__(self, default_name="backup-vm", args=sys.argv): 366 | self.domain = None 367 | super().__init__(default_name, args) 368 | 369 | def parse_arg(self, arg, *args, **kwargs): 370 | if not super().parse_arg(arg, *args, **kwargs): 371 | if self.domain is None: 372 | self.domain = arg 373 | else: 374 | self.disks.add(arg) 375 | return True 376 | 377 | def parse_args(self, args): 378 | super().parse_args(args) 379 | if self.domain is None or len(self.archives) == 0: 380 | self.error("the following arguments are required: domain, archive") 381 | 382 | def help(self, short=False): 383 | print(dedent(""" 384 | usage: {} [-hpv] domain [disk [disk ...]] archive 385 | [--borg-args ...] [archive [--borg-args ...] ...] 386 | """.format(self.prog).lstrip("\n"))) 387 | if not short: 388 | print(dedent(""" 389 | Back up a libvirt-based VM using borg. 390 | 391 | positional arguments: 392 | domain libvirt domain to back up 393 | disk a domain block device to back up (default: all disks) 394 | archive a borg archive path (same format as borg create) 395 | 396 | optional arguments: 397 | -h, --help show this help message and exit 398 | -v, --version show version of the backup-vm package 399 | -p, --progress force progress display even if stdout isn't a tty 400 | --borg-args ... extra arguments passed straight to borg 401 | """).strip("\n")) 402 | -------------------------------------------------------------------------------- /backup_vm/snapshot.py: -------------------------------------------------------------------------------- 1 | from xml.etree import ElementTree 2 | import subprocess 3 | import time 4 | import sys 5 | import os 6 | import libvirt 7 | 8 | 9 | def error_handler(ctx, err): 10 | if err[0] not in libvirt.ignored_errors: 11 | print("libvirt: error code {0}: {2}".format(*err), file=sys.stderr) 12 | 13 | 14 | libvirt.ignored_errors = [] 15 | libvirt.registerErrorHandler(error_handler, None) 16 | 17 | 18 | class Snapshot: 19 | 20 | def __init__(self, dom, disks, progress=True): 21 | self.dom = dom 22 | self.disks = disks 23 | self.progress = progress 24 | self.snapshotted = False 25 | self._do_snapshot() 26 | 27 | def _do_snapshot(self): 28 | snapshot_flags = libvirt.VIR_DOMAIN_SNAPSHOT_CREATE_NO_METADATA \ 29 | | libvirt.VIR_DOMAIN_SNAPSHOT_CREATE_ATOMIC \ 30 | | libvirt.VIR_DOMAIN_SNAPSHOT_CREATE_DISK_ONLY 31 | libvirt.ignored_errors = [ 32 | libvirt.VIR_ERR_OPERATION_INVALID, 33 | libvirt.VIR_ERR_ARGUMENT_UNSUPPORTED 34 | ] 35 | try: 36 | self.dom.fsFreeze() 37 | guest_agent_installed = True 38 | except libvirt.libvirtError: 39 | guest_agent_installed = False 40 | libvirt.ignored_errors = [] 41 | try: 42 | snapshot_xml = self.generate_snapshot_xml() 43 | self.dom.snapshotCreateXML(snapshot_xml, snapshot_flags) 44 | except libvirt.libvirtError: 45 | print("Failed to create domain snapshot", file=sys.stderr) 46 | sys.exit(1) 47 | finally: 48 | if guest_agent_installed: 49 | self.dom.fsThaw() 50 | self.snapshotted = True 51 | 52 | def generate_snapshot_xml(self): 53 | root_xml = ElementTree.Element("domainsnapshot") 54 | name_xml = ElementTree.SubElement(root_xml, "name") 55 | name_xml.text = self.dom.name() + "-tempsnap" 56 | desc_xml = ElementTree.SubElement(root_xml, "description") 57 | desc_xml.text = "Temporary snapshot used while backing up " + self.dom.name() 58 | memory_xml = ElementTree.SubElement(root_xml, "memory") 59 | memory_xml.attrib["snapshot"] = "no" 60 | disks_xml = ElementTree.SubElement(root_xml, "disks") 61 | for disk in self.disks: 62 | disk_xml = ElementTree.SubElement(disks_xml, "disk") 63 | if disk.snapshot_path is not None: 64 | disk_xml.attrib["name"] = disk.path 65 | source_xml = ElementTree.SubElement(disk_xml, "source") 66 | source_xml.attrib["file"] = disk.snapshot_path 67 | driver_xml = ElementTree.SubElement(disk_xml, "driver") 68 | driver_xml.attrib["type"] = "qcow2" 69 | else: 70 | disk_xml.attrib["name"] = disk.target 71 | disk_xml.attrib["snapshot"] = "no" 72 | return ElementTree.tostring(root_xml).decode("utf-8") 73 | 74 | def blockcommit(self, disks): 75 | for idx, disk in enumerate(disks): 76 | for commit_try in range(3): 77 | disk.failed = False 78 | if self.dom.blockCommit( 79 | disk.target, None, None, 80 | flags=libvirt.VIR_DOMAIN_BLOCK_COMMIT_ACTIVE 81 | | libvirt.VIR_DOMAIN_BLOCK_COMMIT_SHALLOW) < 0: 82 | print("Failed to start block commit for disk '{}'".format( 83 | disk.target).ljust(65), file=sys.stderr) 84 | disk.failed = True 85 | try: 86 | while True: 87 | info = self.dom.blockJobInfo(disk.target, 0) 88 | if info is not None and self.progress: 89 | progress = (idx + info["cur"] / info["end"]) / len(disks) 90 | print("block commit progress ({}): {}%".format( 91 | disk.target, int(100 * progress)).ljust(65), end="\u001b[65D") 92 | elif info is None: 93 | print("Failed to query block jobs for disk '{}'".format( 94 | disk.target).ljust(65), file=sys.stderr) 95 | disk.failed = True 96 | break 97 | if info["cur"] == info["end"]: 98 | break 99 | time.sleep(1) 100 | if not disk.failed: 101 | break 102 | finally: 103 | if self.progress: 104 | print("...pivoting...".ljust(65), end="\u001b[65D") 105 | if self.dom.blockJobAbort(disk.target, libvirt.VIR_DOMAIN_BLOCK_JOB_ABORT_PIVOT) < 0: 106 | suffix = "retrying..." if commit_try != 2 else "it may be in an inconsistent state" 107 | print("Pivot failed for disk '{}', {}".format(disk.target, suffix).ljust(65), file=sys.stderr) 108 | disk.failed = True 109 | time.sleep(5) 110 | else: 111 | try: 112 | os.remove(disk.snapshot_path) 113 | except PermissionError: 114 | print("Couldn't delete snapshot image '{}', please run as root".format( 115 | disk.snapshot_path).ljust(65), file=sys.stderr) 116 | break 117 | 118 | def offline_commit(self, disks): 119 | if self.progress: 120 | print("image commit progress: 0%".ljust(65), end="\u001b[65D") 121 | else: 122 | print("committing disk images") 123 | for idx, disk in enumerate(disks): 124 | for commit_try in range(3): 125 | disk.failed = False 126 | try: 127 | subprocess.run(["qemu-img", "commit", disk.snapshot_path], 128 | stdout=subprocess.DEVNULL, check=True) 129 | # restore the original image in domain definition 130 | # this is done automatically when pivoting for live commit 131 | new_xml = ElementTree.tostring(disk.xml).decode("utf-8") 132 | try: 133 | self.dom.updateDeviceFlags(new_xml) 134 | except libvirt.libvirtError: 135 | print("Device flags update failed for disk '{}'".format( 136 | disk.target).ljust(65), file=sys.stderr) 137 | print("Try replacing the path manually with 'virsh edit'", file=sys.stderr) 138 | disk.failed = True 139 | continue 140 | try: 141 | os.remove(disk.snapshot_path) 142 | except PermissionError: 143 | print("Couldn't delete snapshot image '{}', please run as root".format( 144 | disk.snapshot_path).ljust(65), file=sys.stderr) 145 | if self.progress: 146 | progress = (idx + 1) / len(disks) 147 | print("image commit progress ({}): {}%".format( 148 | disk.target, int(100 * progress)).ljust(65), end="\u001b[65D") 149 | break 150 | except FileNotFoundError: 151 | # not very likely as the qemu-img tool is normally installed 152 | # along with the libvirt/virsh stuff 153 | print("Install qemu-img to commit changes offline".ljust(65), file=sys.stderr) 154 | disk.failed = True 155 | return 156 | except subprocess.CalledProcessError: 157 | if commit_try == 2: 158 | failed_str = "Commit failed for disk '{}', retrying..." 159 | else: 160 | failed_str = "Commit failed for disk '{}'" 161 | print(failed_str.format(disk.target).ljust(65), file=sys.stderr) 162 | disk.failed = True 163 | time.sleep(5) 164 | 165 | def __enter__(self): 166 | return self 167 | 168 | def __exit__(self, *args): 169 | if not self.snapshotted: 170 | return False 171 | disks_to_backup = [x for x in self.disks if x.snapshot_path is not None] 172 | if self.dom.isActive(): 173 | # the domain is online. we can use libvirt's blockcommit feature 174 | # to commit the contents & automatically pivot afterwards 175 | self.blockcommit(disks_to_backup) 176 | else: 177 | # the domain is offline, use qemu-img for offline commit instead. 178 | # libvirt doesn't support external snapshots as well as internal, 179 | # hence this workaround 180 | self.offline_commit(disks_to_backup) 181 | if self.progress: 182 | print() 183 | return False 184 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import EntryPoint 2 | from setuptools import Command 3 | from setuptools import setup 4 | from itertools import chain 5 | import contextlib 6 | import sys 7 | import io 8 | 9 | 10 | class build_usage(Command): 11 | description = "update usage section in README" 12 | user_options = [] 13 | 14 | def initialize_options(self): 15 | pass 16 | 17 | def finalize_options(self): 18 | pass 19 | 20 | def run(self): 21 | with open("README.rst", "r+") as f: 22 | lines = list(self.format_readme(f)) 23 | f.seek(0) 24 | f.writelines(lines) 25 | f.truncate() 26 | 27 | def format_readme(self, lines): 28 | skipping = False 29 | for line in lines: 30 | if line == ".. END AUTO-GENERATED USAGE\n": 31 | skipping = False 32 | if not skipping: 33 | yield line 34 | if line == ".. BEGIN AUTO-GENERATED USAGE\n": 35 | yield from self.generate_usage() 36 | skipping = True 37 | 38 | 39 | def generate_usage(self): 40 | old_argv = sys.argv 41 | scripts = self.distribution.entry_points["console_scripts"] 42 | for pkg in self.distribution.packages: 43 | for ep in EntryPoint.parse_group(pkg, scripts).values(): 44 | rs = io.StringIO() 45 | sys.argv = [None, "--help"] 46 | with contextlib.redirect_stdout(rs), contextlib.suppress(SystemExit): 47 | ep.resolve()() 48 | rs.seek(0) 49 | yield "::\n\n" 50 | for line in rs.readlines(): 51 | yield (" " if line != "\n" else "") + line 52 | yield "\n" 53 | sys.argv = old_argv 54 | yield from [] 55 | 56 | 57 | def readme(): 58 | with open("README.rst") as f: 59 | return f.read() 60 | 61 | 62 | setup(name="backup-vm", 63 | use_scm_version={ 64 | "write_to": "backup_vm/_version.py", 65 | }, 66 | description="Backup libvirt VMs with borg", 67 | long_description=readme(), 68 | classifiers=[ 69 | "Development Status :: 4 - Beta", 70 | "Environment :: Console", 71 | "Intended Audience :: System Administrators", 72 | "License :: OSI Approved :: MIT License", 73 | "Operating System :: POSIX :: Linux", 74 | "Programming Language :: Python :: 3 :: Only", 75 | "Programming Language :: Python :: 3.4", 76 | "Programming Language :: Python :: 3.5", 77 | "Programming Language :: Python :: 3.6", 78 | "Topic :: System :: Archiving :: Backup", 79 | ], 80 | keywords="borg backup libvirt vm snapshot", 81 | url="https://github.com/milkey-mouse/backup-vm", 82 | author="Milkey Mouse", 83 | author_email="milkeymouse@meme.institute", 84 | license="MIT", 85 | packages=["backup_vm"], 86 | setup_requires=["setuptools_scm>=1.7"], 87 | install_requires=[ 88 | "libvirt-python", 89 | ], 90 | entry_points={ 91 | "console_scripts": [ 92 | "backup-vm=backup_vm.backup:main", 93 | "borg-multi=backup_vm.multi:main", 94 | ], 95 | }, 96 | cmdclass={"build_usage": build_usage}, 97 | include_package_data=True, 98 | zip_safe=False) 99 | --------------------------------------------------------------------------------