├── debian
    ├── compat
    ├── backurne.manpages
    ├── copyright
    ├── backurne.install
    ├── rules
    ├── control
    └── changelog
├── src
    └── backurne
    │   ├── __init__.py
    │   ├── __main__.py
    │   ├── pretty.py
    │   ├── stats.py
    │   ├── config.py
    │   ├── api.py
    │   ├── log.py
    │   ├── backup.py
    │   ├── proxmox.py
    │   ├── disk.py
    │   ├── ceph.py
    │   ├── restore.py
    │   └── backurne.py
├── .gitignore
├── graph_duration.png
├── graph_in_progress.png
├── graph
    ├── telegraf
    │   ├── backurne_inprogress
    │   └── telegraf.conf
    └── grafana-backurne.json
├── conf
    ├── uwsgi.ini
    └── backurne.conf
├── setup.py
├── sample-api.py
├── .pre-commit-config.yaml
├── bash
    └── backurne
├── api.md
├── Changelog.md
├── man
    └── backurne.1
├── cli.md
├── README.md
└── LICENSE


/debian/compat:
--------------------------------------------------------------------------------
1 | 11
2 | 


--------------------------------------------------------------------------------
/src/backurne/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.swp
3 | 


--------------------------------------------------------------------------------
/debian/backurne.manpages:
--------------------------------------------------------------------------------
1 | man/backurne.1
2 | 


--------------------------------------------------------------------------------
/src/backurne/__main__.py:
--------------------------------------------------------------------------------
1 | from backurne import main
2 | 
3 | main()
4 | 


--------------------------------------------------------------------------------
/graph_duration.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JackSlateur/backurne/HEAD/graph_duration.png


--------------------------------------------------------------------------------
/graph_in_progress.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JackSlateur/backurne/HEAD/graph_in_progress.png


--------------------------------------------------------------------------------
/graph/telegraf/backurne_inprogress:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ps -aux|grep /bin/sh | grep -i [i]mport-diff -c
3 | 


--------------------------------------------------------------------------------
/debian/copyright:
--------------------------------------------------------------------------------
1 | License: GPL-2
2 | Copyright: 2017-2019 Alexandre Bruyelles <backurne@jack.fr.eu.org>
3 | 


--------------------------------------------------------------------------------
/debian/backurne.install:
--------------------------------------------------------------------------------
1 | conf/backurne.conf /etc/backurne/
2 | bash/backurne usr/share/bash-completion/completions
3 | 


--------------------------------------------------------------------------------
/debian/rules:
--------------------------------------------------------------------------------
1 | #!/usr/bin/make -f
2 | 
3 | #export DH_VERBOSE=1
4 | 
5 | %:
6 | 	dh $@ --with python3 --buildsystem=pybuild
7 | 


--------------------------------------------------------------------------------
/graph/telegraf/telegraf.conf:
--------------------------------------------------------------------------------
1 | #
2 | [[inputs.exec]]
3 | 	commands = [ "/usr/local/bin/backurne_inprogress" ]
4 | 	name_override = "backurne_inprogress"
5 | 	data_format = "value"
6 | 


--------------------------------------------------------------------------------
/conf/uwsgi.ini:
--------------------------------------------------------------------------------
 1 | [uwsgi]
 2 | vhost = backurne-api.fqdn
 3 | plugins = python3, syslog
 4 | module = backurne.api
 5 | callable = app
 6 | workers = 4
 7 | logger = syslog:uwsgi
 8 | uid = root
 9 | gid = root
10 | socket = 127.0.0.1:7777
11 | 


--------------------------------------------------------------------------------
/src/backurne/pretty.py:
--------------------------------------------------------------------------------
 1 | from prettytable import PrettyTable
 2 | from termcolor import colored
 3 | 
 4 | 
 5 | def bold(text):
 6 |     return colored(text, attrs=["bold"])
 7 | 
 8 | 
 9 | def Pt(header):
10 |     header = [bold(i) for i in header]
11 |     pt = PrettyTable(header)
12 |     pt.align = "l"
13 |     pt.padding_width = 2
14 |     return pt
15 | 


--------------------------------------------------------------------------------
/src/backurne/stats.py:
--------------------------------------------------------------------------------
 1 | import multiprocessing
 2 | 
 3 | import humanize
 4 | 
 5 | from .ceph import Ceph
 6 | from .config import config
 7 | 
 8 | 
 9 | def print_stats():
10 |     ceph = Ceph(None)
11 | 
12 |     result = {}
13 | 
14 |     with multiprocessing.Pool(config["backup_worker"]) as p:
15 |         for sizes in p.imap_unordered(ceph.du, ceph.ls()):
16 |             sizes = sizes["images"]
17 |             for i in sizes:
18 |                 try:
19 |                     result[i["name"]] += i["used_size"]
20 |                 except KeyError:
21 |                     result[i["name"]] = i["used_size"]
22 | 
23 |     result = [(k, result[k]) for k in sorted(result, key=result.get)]
24 |     for key, value in result:
25 |         print(key, humanize.naturalsize(value))
26 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | with open("README.md") as fh:
 4 |     long_description = fh.read()
 5 | 
 6 | setuptools.setup(
 7 |     name="backurne",
 8 |     version="2.4.0",
 9 |     author="Alexandre Bruyelles",
10 |     author_email="backurne@jack.fr.eu.org",
11 |     description="Backup Ceph's RBD on Ceph, with Proxmox integration",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     url="https://github.com/JackSlateur/backurne",
15 |     packages=setuptools.find_packages("src"),
16 |     package_dir={"": "src"},
17 |     classifiers=[
18 |         "Programming Language :: Python :: 3",
19 |         "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
20 |         "Operating System :: POSIX :: Linux",
21 |     ],
22 |     entry_points={
23 |         "console_scripts": [
24 |             "backurne = backurne.backurne:main",
25 |         ]
26 |     },
27 |     python_requires=">=3.5",
28 |     install_requires=[
29 |         "termcolor",
30 |         "PTable",
31 |         "requests",
32 |         "proxmoxer",
33 |         "sh",
34 |         "python-dateutil",
35 |         "filelock",
36 |         "setproctitle",
37 |         "progressbar",
38 |     ],
39 | )
40 | 


--------------------------------------------------------------------------------
/sample-api.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | import json
 3 | 
 4 | from flask import Flask
 5 | from flask import request
 6 | 
 7 | app = Flask(__name__)
 8 | 
 9 | 
10 | def send_json(data, code=200):
11 |     return json.dumps(data), 200, {"Content-Type": "application/json"}
12 | 
13 | 
14 | @app.route("/", methods=["POST"])
15 | def profile():
16 |     # data is fed with something like:
17 |     # {'cluster': {
18 |     # 	'fqdn': 'supercluster.fqdn.org', 'name': 'supercluster', 'type': 'proxmox'},
19 |     # 	'vm': {'name': 'super-server', 'vmid': 115},
20 |     # 'disk': {'rbd': 'vm-115-disk-1', 'ceph': 'cephcluster'}
21 |     # }
22 |     data = request.get_json()
23 | 
24 |     # Add your logic here
25 |     # As a sample, we only add profiles if the VM's name is 'super-server'
26 |     if data["vm"]["name"] == "super-server" or True:
27 |         # A sample output, which is roughly the same as config's profiles
28 |         # Each profiles will be added to the config's
29 |         # Thus, there is no replacement nor override
30 |         json = {
31 |             "profiles": {
32 |                 "daily": {
33 |                     "count": 365,
34 |                     "max_on_live": 10,
35 |                 },
36 |                 "hourly": {
37 |                     "count": 48,
38 |                     "max_on_live": 0,
39 |                     "priority": "high",
40 |                 },
41 |             }
42 |         }
43 |     else:
44 |         # An empty dict means "no additional profile"
45 |         json = {}
46 | 
47 |     # Additionally, we can disable backups by setting 'backup' to False
48 |     # Any other values are meaningless
49 |     if data["vm"]["vmid"] == 1234:
50 |         json["backup"] = False
51 | 
52 |     return send_json(json)
53 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | ---
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v6.0.0
 5 |     hooks:
 6 |       - id: check-yaml
 7 |       - id: end-of-file-fixer
 8 |       - id: trailing-whitespace
 9 |       - id: check-builtin-literals
10 |       - id: check-added-large-files
11 |       - id: check-ast
12 |       - id: check-toml
13 |       - id: check-case-conflict
14 |       - id: name-tests-test
15 |       - id: detect-private-key
16 |         exclude: |
17 |           (?x)^(
18 |               README.md|
19 |               .config.yml.default
20 |           )$
21 |       - id: pretty-format-json
22 |         args: ['--autofix']
23 |       - id: mixed-line-ending
24 |         args: ['--fix=lf']
25 |         description: Forces to replace line ending by the UNIX 'lf' character.
26 |   - repo: https://github.com/astral-sh/ruff-pre-commit
27 |     # Ruff version.
28 |     rev: v0.12.10
29 |     hooks:
30 |       # Run the linter.
31 |       - id: ruff
32 |         args: [--fix]
33 |       # Run the formatter.
34 |       - id: ruff-format
35 |         args: ["--line-length", "88"]
36 |   - repo: https://github.com/compilerla/conventional-pre-commit
37 |     rev: v4.2.0
38 |     hooks:
39 |       - id: conventional-pre-commit
40 |         stages: [commit-msg]
41 |         args: []
42 |   - repo: https://github.com/codespell-project/codespell
43 |     rev: v2.4.1
44 |     hooks:
45 |       - id: codespell
46 |         additional_dependencies:
47 |           - tomli
48 |         args:
49 |           - "-L"
50 |           - "BU"
51 |   - repo: https://github.com/asottile/pyupgrade
52 |     rev: v3.20.0
53 |     hooks:
54 |       - id: pyupgrade
55 |   - repo: https://github.com/asottile/reorder-python-imports
56 |     rev: v3.15.0
57 |     hooks:
58 |       - id: reorder-python-imports
59 |         args:
60 |           - --py39-plus
61 |   - repo: https://github.com/google/yamlfmt
62 |     rev: v0.17.2
63 |     hooks:
64 |       - id: yamlfmt
65 |         args:
66 |           - -formatter
67 |           - include_document_start=true
68 | 


--------------------------------------------------------------------------------
/bash/backurne:
--------------------------------------------------------------------------------
 1 | _backurne(){
 2 | 	local cur prev opts
 3 | 	COMPREPLY=()
 4 | 	cur="${COMP_WORDS[COMP_CWORD]}"
 5 | 	prev="${COMP_WORDS[COMP_CWORD-1]}"
 6 | 	subcmd="${COMP_WORDS[1]}"
 7 | 
 8 | 	if [ "$prev" == "backurne" ]
 9 | 	then
10 | 		opts="backup check check-snap list-mapped ls map unmap stats"
11 | 		COMPREPLY=($(compgen -W "$opts" -- ${cur}))
12 | 		return 0
13 | 	fi
14 | 
15 | 	if [ "$subcmd" == "unmap" ]
16 | 	then
17 | 		if [ "$prev" == "unmap" ]
18 | 		then
19 | 			opts="$(backurne list-mapped --json | jq -r .[].parent_image)"
20 | 		else
21 | 			prev=$(echo $prev | tr -d '\\')
22 | 			opts="$(backurne list-mapped --json | jq -r ".[] | select(.parent_image | contains(\"$prev\")) | .parent_snap")"
23 | 		fi
24 | 		COMPREPLY=($(compgen -W "$opts" -- ${cur}))
25 | 		return 0
26 | 	fi
27 | 
28 | 	if [ "$subcmd" == "ls" ]
29 | 	then
30 | 		if [ "$cur" == "" ]
31 | 		then
32 | 			opts="$(backurne ls --json | jq -r .[].uuid)"
33 | 			COMPREPLY=($(compgen -W "$opts" -- ${cur}))
34 | 		else
35 | 			opts="$(backurne ls --json | jq -r .[].uuid | grep $cur)"
36 | 			COMPREPLY=($opts $(compgen -W "$opts" -- ${cur}))
37 | 		fi
38 | 		return 0
39 | 	fi
40 | 
41 | 	if [ "$subcmd" == "map" ]
42 | 	then
43 | 		if [ "$prev" == "map" ]
44 | 		then
45 | 			if [ "$cur" == "" ]
46 | 			then
47 | 				opts="$(backurne ls --json | jq -r .[].uuid)"
48 | 				COMPREPLY=($(compgen -W "$opts" -- ${cur}))
49 | 			else
50 | 				opts="$(backurne ls --json | jq -r .[].uuid | grep $cur)"
51 | 				COMPREPLY=($opts $(compgen -W "$opts" -- ${cur}))
52 | 			fi
53 | 		else
54 | 			prev=$(echo $prev | tr -d '\\')
55 | 			opts="$(backurne ls --json $prev | jq -r .[].uuid)"
56 | 			COMPREPLY=($(compgen -W "$opts" -- ${cur}))
57 | 		fi
58 | 		return 0
59 | 	fi
60 | 
61 | 	if [ "$subcmd" == "backup" ]
62 | 	then
63 | 		if [ "$prev" == "--cluster" ]
64 | 		then
65 | 			opts=$(python3 - <<EOF
66 | import backurne.config
67 | clusters = backurne.config.config['live_clusters']
68 | for i in clusters:
69 |         print(i['name'])
70 | EOF
71 | )
72 | 			COMPREPLY=($(compgen -W "$opts" -- ${cur}))
73 | 		else
74 | 			opts="--cluster --vmid --profile --force --no-cleanup --cleanup"
75 | 			COMPREPLY=($(compgen -W "$opts" -- ${cur}))
76 | 		fi
77 | 		return 0
78 | 	fi
79 | }
80 | 
81 | command -v jq && complete -o filenames -F _backurne backurne
82 | 


--------------------------------------------------------------------------------
/src/backurne/config.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | import os
 3 | import types
 4 | 
 5 | 
 6 | # Random code from https://gist.github.com/angstwad/bf22d1822c38a92ec0a9
 7 | def dict_merge(dct, merge_dct):
 8 |     for k, v in merge_dct.items():
 9 |         if (
10 |             k in dct
11 |             and isinstance(dct[k], dict)
12 |             and isinstance(merge_dct[k], collections.abc.Mapping)
13 |         ):
14 |             dict_merge(dct[k], merge_dct[k])
15 |         else:
16 |             dct[k] = merge_dct[k]
17 |     return dct
18 | 
19 | 
20 | def load_config():
21 |     # Default config
22 |     config = {
23 |         "snap_prefix": "backup",
24 |         "profiles": {},
25 |         "profiles_api": None,
26 |         "backup_cluster": {
27 |             "pool": "rbd",
28 |         },
29 |         "live_clusters": [],
30 |         "extra_retention_time": 0,
31 |         "ceph_endpoint": {},
32 |         "download_compression": False,
33 |         "fsfreeze": True,
34 |         "uuid_fallback": True,
35 |         "pretty_colors": True,
36 |         "log_level": "debug",
37 |         "backup_worker": 24,
38 |         "live_worker": 12,
39 |         "hash_binary": "/usr/bin/xxhsum",
40 |         "check_db": "/tmp/backurne.db",
41 |         "lockdir": "/var/lock/backurne",
42 |         "hooks": {
43 |             "pre_vm": None,
44 |             "pre_disk": None,
45 |             "post_disk": None,
46 |             "post_vm": None,
47 |         },
48 |         "report_time": "syslog",
49 |         "influxdb": {
50 |             "host": None,
51 |             "db": None,
52 |             "port": 8086,
53 |             "tls": False,
54 |             "verify_tls": True,
55 |             "mtls": None,
56 |         },
57 |     }
58 | 
59 |     custom = types.ModuleType("custom")
60 |     try:
61 |         exec(open("/etc/backurne/backurne.conf").read(), custom.__dict__)
62 |     except FileNotFoundError:
63 |         return config
64 | 
65 |     config = dict_merge(config, custom.config)
66 | 
67 |     prefix = config["snap_prefix"]
68 |     if ";" in prefix or ";" in prefix:
69 |         print("""fatal: "'" or " " found in snap_prefix ({})""".format(prefix))
70 |         exit(1)
71 | 
72 |     if not os.path.exists(config["lockdir"]):
73 |         os.makedirs(config["lockdir"])
74 | 
75 |     return config
76 | 
77 | 
78 | config = load_config()
79 | 


--------------------------------------------------------------------------------
/src/backurne/api.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import json
  3 | import urllib.parse
  4 | 
  5 | from flask import Blueprint
  6 | from flask import Flask
  7 | from flask_autoindex import AutoIndexBlueprint
  8 | 
  9 | from .disk import get_mapped
 10 | from .disk import prepare_tree_to_json
 11 | from .restore import Restore
 12 | 
 13 | 
 14 | app = Flask(__name__)
 15 | 
 16 | 
 17 | def send_json(data, code=200):
 18 |     return json.dumps(data), 200, {"Content-Type": "application/json"}
 19 | 
 20 | 
 21 | @app.route("/backup/")
 22 | def ls():
 23 |     restore = Restore()
 24 |     data = restore.ls()
 25 | 
 26 |     result = []
 27 |     for i in data:
 28 |         result.append(
 29 |             {
 30 |                 "ident": i["ident"],
 31 |                 "disk": i["disk"],
 32 |                 "uuid": i["uuid"],
 33 |             }
 34 |         )
 35 | 
 36 |     return send_json(result)
 37 | 
 38 | 
 39 | @app.route("/backup/host/<host>/")
 40 | def get(host):
 41 |     restore = Restore()
 42 |     data = restore.ls()
 43 | 
 44 |     result = []
 45 |     for i in data:
 46 |         if i["ident"] == host:
 47 |             result.append(
 48 |                 {
 49 |                     "ident": i["ident"],
 50 |                     "disk": i["disk"],
 51 |                     "uuid": i["uuid"],
 52 |                 }
 53 |             )
 54 | 
 55 |     return send_json(result)
 56 | 
 57 | 
 58 | @app.route("/backup/<rbd>/")
 59 | def ls_snaps(rbd):
 60 |     rbd = urllib.parse.unquote(rbd)
 61 |     restore = Restore(rbd)
 62 |     data = restore.ls()
 63 | 
 64 |     result = []
 65 |     for i in data:
 66 |         result.append(
 67 |             {
 68 |                 "creation_date": str(i["creation"]),
 69 |                 "uuid": i["uuid"],
 70 |             }
 71 |         )
 72 | 
 73 |     return send_json(result)
 74 | 
 75 | 
 76 | @app.route("/map/<rbd>/<snap>/")
 77 | def map(rbd, snap):
 78 |     restore = Restore(rbd, snap)
 79 |     status = restore.mount()
 80 |     if status is None:
 81 |         return send_json({"success": False, "path": None}, code=500)
 82 |     else:
 83 |         status = status.replace("/tmp/", "")
 84 |         return send_json({"success": True, "path": status})
 85 | 
 86 | 
 87 | @app.route("/unmap/<rbd>/<snap>/")
 88 | def unmap(rbd, snap):
 89 |     restore = Restore(rbd, snap)
 90 |     restore.umount()
 91 |     return send_json({"success": True})
 92 | 
 93 | 
 94 | @app.route("/mapped/")
 95 | def mapped():
 96 |     data = get_mapped(extended=False)
 97 |     result = []
 98 |     for tree in data:
 99 |         result.append(prepare_tree_to_json(tree))
100 |     return send_json(result)
101 | 
102 | 
103 | auto_bp = Blueprint("auto_bp", __name__)
104 | # FIXME: use config or something
105 | AutoIndexBlueprint(auto_bp, browse_root="/tmp/")
106 | 
107 | app.register_blueprint(auto_bp, url_prefix="/explore")
108 | 


--------------------------------------------------------------------------------
/debian/control:
--------------------------------------------------------------------------------
 1 | Source: backurne
 2 | Section: admin
 3 | Priority: optional
 4 | Maintainer: Alexandre Bruyelles <backurne@jack.fr.eu.org>
 5 | Build-Depends: debhelper (>= 11),
 6 |                dh-python,
 7 |                python3-setuptools,
 8 |                python3,
 9 | Standards-Version: 4.3.0
10 | Rules-Requires-Root: no
11 | Homepage: https://github.com/JackSlateur/backurne
12 | 
13 | Package: backurne
14 | Architecture: any
15 | Depends: python3 (>= 3.7),
16 |          python3-termcolor,
17 |          python3-pkg-resources,
18 |          python3-ptable | python3-prettytable,
19 |          python3-requests,
20 |          python3-proxmoxer,
21 |          python3-sh,
22 |          python3-dateutil,
23 |          python3-filelock,
24 |          python3-setproctitle,
25 |          python3-progressbar,
26 |          python3-psutil,
27 |          python3-humanize,
28 |          ceph-common (>= 12.2.0),
29 |          rbd-nbd (>= 12.2.0),
30 |          kpartx,
31 |          xxhash,
32 |          ${misc:Depends},
33 |          ${python3:Depends},
34 |          ${shlibs:Depends}
35 | Suggests: jq,
36 |           vmfs-tools,
37 |           vmfs6-tools
38 | Description: Backup Ceph's RBD on Ceph, with Proxmox integration
39 |  backurne is a handy tool for backuping RBD's image on RBD.
40 |  Yep ! What is better, for backuping a Ceph cluster, than another Ceph cluster ?
41 |  .
42 |  It does not do much by itself, though, but orchestrate and relies
43 |  heavily on other tools.
44 |  It has a fine integration with Proxmox, but is able to backup "plain"
45 |  (or "raw RBD") cluster as well.
46 |  .
47 |  Supported features
48 |    - Snapshot-based backup, with no agent strictly required on the VM.
49 |  .
50 |    - Backup inspection and restoration via command line interface as well as
51 |      via REST API.
52 |  .
53 |   -  Support multiple retention policy efficiently (both in term of storage
54 |      and network bandwidth), dynamically configurable per host (proxmox-only)
55 |      via REST API.
56 |  .
57 |   - Auto cleanup : deletion is never generated by a human, thus no human
58 |     mistakes.
59 |  .
60 |   - Compression and encryption "on the wire" for enhanced efficiency
61 |     and security.
62 |  .
63 |   - Peaceful integration with other snapshots (via Proxmox web interface
64 |     or whatever).
65 |  .
66 |   - Multiple cluster support, with mixed type ("proxmox" and "plain").
67 |  .
68 |   - A couple of backups can be stored on the live clusters, for faster recovery.
69 |  .
70 |   - Optional fsfreeze support (proxmox-only) via Qemu-quest-agent.
71 |  .
72 |   - Backup deactivation via Proxmox's web interface.
73 |  .
74 |   - VM tracking, for those who uses a single Proxmox cluster with
75 |     multiple Ceph backend.
76 |  .
77 |   - Encryption and compression at rest are also seamlessly supported
78 |     via Bluestore OSDs (see https://ceph.com/community/new-luminous-bluestore/)
79 | 


--------------------------------------------------------------------------------
/api.md:
--------------------------------------------------------------------------------
 1 | # Rest API documentation
 2 | 
 3 | #### Note
 4 | No authentication nor authorization is made in any way. You should use a proxy, with basic auth and TLS.\
 5 | Lastly, the API code **must** be run as root (well, it must CAP_SYS_ADMIN), because it will handle block devices, mount filesystems etc.
 6 | 
 7 | ## Listing backed up disks
 8 | ```
 9 | 12% [jack@jack:~]curl -s http://localhost:5000/backup/ | python -mjson.tool
10 | [
11 |     {
12 |         "disk": "vm-136-disk-1",
13 |         "ident": "test-backurne",
14 |         "uuid": "8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne"
15 |     }
16 | ]
17 | ```
18 | 
19 | ## Listing snapshot for a disk
20 | ```
21 | 11% [jack@jack:~]curl -s "http://localhost:5000/backup/8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne/" | python -mjson.tool
22 | [
23 |     {
24 |         "creation_date": "2018-06-01 15:44:26.072348",
25 |         "uuid": "backup;daily;30;2018-06-01T15:44:26.072348"
26 |     },
27 |     {
28 |         "creation_date": "2018-06-01 15:44:26.499066",
29 |         "uuid": "backup;hourly;48;2018-06-01T15:44:26.499066"
30 |     }
31 | ]
32 | ```
33 | 
34 | ## Map a snapshot
35 | ```
36 | 11% [jack@jack:~]curl -s "http://localhost:5000/map/8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne/backup;hourly;48;2018-06-01T15:44:26.499066/" | python -mjson.tool
37 | {
38 |     "path": "tmp4_6ipuaw",
39 |     "success": true
40 | }
41 | ```
42 | The files can then be explored via a webgui at http://localhost:5000/explore/tmp4_6ipuaw/
43 | 
44 | ## Listing currently mounted snapshots
45 | ```
46 | 11% [jack@jack:~]curl -s "http://localhost:5000/mapped/" | python -mjson.tool
47 | [
48 |   {
49 |     "dev": "/dev/nbd0",
50 |     "fstype": null,
51 |     "mountpoint": null,
52 |     "image": "restore-1",
53 |     "parent_image": "8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1",
54 |     "parent_snap": "test-backurne/backup;hourly;48;2018-06-01T15:44:26.499066",
55 |     "mapped": null,
56 |     "size": null,
57 |     "children": [
58 |       {
59 |         "dev": "/dev/nbd0",
60 |         "fstype": null,
61 |         "mountpoint": null,
62 |         "image": null,
63 |         "parent_image": null,
64 |         "parent_snap": null,
65 |         "mapped": null,
66 |         "size": "20G",
67 |         "children": [
68 |           {
69 |             "dev": "/dev/nbd0p1",
70 |             "fstype": "xfs",
71 |             "mountpoint": "/tmp/tmp4_6ipuaw",
72 |             "image": null,
73 |             "parent_image": null,
74 |             "parent_snap": null,
75 |             "mapped": null,
76 |             "size": "20G",
77 |             "children": []
78 |           }
79 |         ]
80 |       }
81 |     ]
82 |   }
83 | ]
84 | ```
85 | 
86 | ## Cleaning things up
87 | ```
88 | 18% [jack@jack:~]curl -s "http://localhost:5000/unmap/8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne/" | python -mjson.tool
89 | {
90 |     "success": true
91 | }
92 | ```
93 | 


--------------------------------------------------------------------------------
/debian/changelog:
--------------------------------------------------------------------------------
 1 | backurne (2.4.0) UNRELEASED; urgency=medium
 2 | 
 3 |   * Add support for RBD namespaces
 4 | 
 5 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Thu, 28 Aug 2025 12:54:19 +0200
 6 | 
 7 | backurne (2.3.3) UNRELEASED; urgency=medium
 8 | 
 9 |   * Improve the API
10 | 
11 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Wed, 16 Jul 2025 15:30:19 +0100
12 | 
13 | backurne (2.3.2) UNRELEASED; urgency=medium
14 | 
15 |   * Avoid deprecated use of `rnd nbd` command
16 | 
17 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Thu, 22 May 2025 17:08:19 +0100
18 | 
19 | backurne (2.3.1) UNRELEASED; urgency=medium
20 | 
21 |   * Support python 3.12
22 | 
23 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Sat, 13 Jul 2024 17:08:19 +0100
24 | 
25 | backurne (2.3.0) UNRELEASED; urgency=medium
26 | 
27 |   * Report time to influxdb
28 |   * Support Microsoft dynamic disks (LDM)
29 |   * Implement tasks priority
30 |   * Reimplement the workers logic, with a per-cluster pool
31 | 
32 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Sat, 30 Jul 2022 11:08:19 +0100
33 | 
34 | backurne (2.2.1) UNRELEASED; urgency=medium
35 | 
36 |   * gzip has been replaced by zstd.
37 |   * fix unmap when a LV is spread across multiple PV, inside the same vmdk
38 |   * a per backup progress is now shown in the proctitle
39 |   * add a warning if some snapshot could not be deleted in time
40 | 
41 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Mon, 12 Apr 2021 10:01:19 +0100
42 | 
43 | backurne (2.2.0) UNRELEASED; urgency=medium
44 | 
45 |   * add a --cleanup option to the `backup` subcommand.
46 |   * fix vmfs6 support.
47 |   * add a --debug option for one-shot verbosity.
48 |   * rework the `map` subcommand with enhancement to the vmdk support (especially in conjunction with lvm).
49 |   * 'Plain' cluster can now be reached not only via SSH, but also via any user-defined way. Kubernetes is the main target here, yet it should work with anything.
50 | 
51 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Sat, 24 Aug 2020 09:40:19 +0100
52 | 
53 | backurne (2.1.0) UNRELEASED; urgency=medium
54 | 
55 |   * Backuping only a subset for a `backurne backup` invocation is now possible, as well as forcing a backup (despite being considered unneeded regarding the profile).
56 |   * **Backurne** now reports time elapsed to process each backup, either to a plain file or via syslog. See the `report_time` configuration entry.
57 | 
58 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Sat, 04 Jul 2020 13:50:19 +0100
59 | 
60 | backurne (2.0.0) UNRELEASED; urgency=medium
61 | 
62 |   * The `list-mapped` subcommand has been reworked to support complex mapping. Command outputs (both cli & api) has been altered to support those changes.
63 |   * **Backurne** now supports LVM. See README.md for its specific configuration.
64 |   * **Backurne** now supports vmware. Also see README.md.
65 | 
66 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Mon, 13 Jan 2020 13:40:19 +0100
67 | 
68 | backurne (1.1.0) UNRELEASED; urgency=medium
69 | 
70 |   * Add hook support
71 | 
72 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Mon, 01 Dec 2019 13:40:19 +0100
73 | 
74 | backurne (1.0.0) UNRELEASED; urgency=medium
75 | 
76 |   * First release
77 | 
78 |  -- Alexandre Bruyelles <backurne@jack.fr.eu.org>  Mon, 30 Sep 2019 23:40:19 +0100
79 | 


--------------------------------------------------------------------------------
/Changelog.md:
--------------------------------------------------------------------------------
 1 | PENDING
 2 | ---
 3 | 
 4 | **Notable changes**:
 5 | 
 6 | Version 2.4.0
 7 | ---
 8 | 
 9 | **Notable changes**:
10 |  * Add support for RBD namespaces
11 | 
12 | Version 2.3.3
13 | ---
14 | 
15 | **Notable changes**:
16 |  * Improve the API
17 | 
18 | Version 2.3.2
19 | ---
20 | 
21 | **Notable changes**:
22 |  * Avoid deprecated use of `rnd nbd` command
23 | 
24 | Version 2.3.1
25 | ---
26 | 
27 | **Notable changes**:
28 |  * Support python 3.12
29 | 
30 | Version 2.3.0
31 | ---
32 | 
33 | **Notable changes**:
34 |  * Report time to influxdb
35 |  * Support Microsoft dynamic disks (LDM)
36 |  * Implement tasks priority
37 |  * Reimplement the workers logic, with a per-cluster pool
38 | 
39 | Thanks to Cyllene (https://www.groupe-cyllene.com/) for sponsoring this work !
40 | 
41 | Version 2.2.1
42 | ---
43 | 
44 | **Notable changes**:
45 |  * gzip has been replaced by zstd.
46 |  * fix unmap when a LV is spread across multiple PV, inside the same vmdk
47 |  * a per backup progress is now shown in the proctitle
48 |  * add a warning if some snapshot could not be deleted in time
49 | 
50 | Thanks to Cyllene (https://www.groupe-cyllene.com/) for sponsoring this work !
51 | 
52 | **Notable changes** :
53 | 
54 | Version 2.2.0
55 | ---
56 | 
57 | **Notable changes** :
58 |  * add a --cleanup option to the `backup` subcommand.
59 |  * fix vmfs6 support.
60 |  * add a --debug option for one-shot verbosity.
61 |  * rework the `map` subcommand with enhancement to the vmdk support (especially in conjunction with lvm).
62 |  * 'Plain' cluster can now be reached not only via SSH, but also via any user-defined way. Kubernetes is the main target here, yet it should work with anything.
63 | 
64 | Version 2.1.0
65 | ---
66 | 
67 | **Notable changes** :
68 |  * Backuping only a subset for a `backurne backup` invocation is now possible, as well as forcing a backup (despite being considered unneeded regarding the profile). See [cli.md](cli.md).
69 |  * **Backurne** now reports time elapsed to process each backup, either to a plain file or via syslog. See the `report_time` configuration entry.
70 | 
71 | Version 2.0.0
72 | ---
73 | 
74 | **Notable changes** :
75 |  * The `list-mapped` subcommand has been reworked to support complex mapping. Command outputs (both cli & api) has been altered to support those changes.
76 |  * **Backurne** now supports LVM. See [README.md](README.md) for its specific configuration.
77 |  * **Backurne** now supports vmware. Also see [README.md](README.md).
78 | 
79 | Version 1.1.0
80 | ---
81 | 
82 | **Notable changes** :
83 |  * **Backurne** now supports a hook infrastructure. Action can be performed before and after specific event : for instance, stopping a database slave before backup, and starting it after.
84 | 
85 | Version 1.0.0
86 | ---
87 | 
88 | This version is centered around ease of use and reporting. The core algorithm has not changed much, but the release is supposed to be easier for people to use, simpler to understand etc.
89 | 
90 | **Notable changes** :
91 |  * **Backurne** now supports per-image locks. Multiple **Backurne** can now run at the same time, safely. However, worker count is per instance (backup_worker and live_worker).
92 |  * The source tree has been reworks to use python3-setuptools. Debian packages is supported, for easier install / updates.
93 |  * Status reporting has been greatly improved : output is more concise, progress is shown as much as possible. Each process current task is shown in **ps**, **htop** etc.
94 |  * Options parsing has been reworked and is more bulletproof.
95 | 


--------------------------------------------------------------------------------
/src/backurne/log.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import logging.handlers
  3 | import sys
  4 | import syslog
  5 | 
  6 | from termcolor import colored
  7 | 
  8 | from .config import config
  9 | 
 10 | 
 11 | class ConsoleFormatter(logging.Formatter):
 12 |     def format(self, record):
 13 |         if record.levelno == logging.DEBUG:
 14 |             msg = (
 15 |                 f"[{record.filename}:{record.lineno}:{record.funcName}()] {record.msg}"
 16 |             )
 17 |         else:
 18 |             msg = record.msg
 19 |         if record.levelno == logging.ERROR:
 20 |             front = colored("  CRIT:  ", "red")
 21 |         if record.levelno == logging.WARNING:
 22 |             front = colored("  WARN:  ", "yellow")
 23 |         if record.levelno == logging.INFO:
 24 |             front = colored("  INFO:  ", "green")
 25 |         if record.levelno == logging.DEBUG:
 26 |             front = colored("  DEBUG: ", "green")
 27 | 
 28 |         msg = f"{front}{msg}"
 29 | 
 30 |         record.msg = msg
 31 | 
 32 |         return logging.Formatter.format(self, record)
 33 | 
 34 | 
 35 | def report_to_influx(image, endpoint, duration):
 36 |     from influxdb import InfluxDBClient
 37 | 
 38 |     conf = config["influxdb"]
 39 | 
 40 |     if conf["host"] is None or conf["db"] is None:
 41 |         log.warning("influxdb: host or db are not defined, cannot do proper reporting")
 42 |         return
 43 | 
 44 |     if conf["mtls"] is None:
 45 |         influx = InfluxDBClient(
 46 |             conf["host"],
 47 |             conf["port"],
 48 |             database=conf["db"],
 49 |             ssl=conf["tls"],
 50 |             verify_ssl=conf["verify_tls"],
 51 |         )
 52 |     else:
 53 |         influx = InfluxDBClient(
 54 |             conf["host"],
 55 |             conf["port"],
 56 |             database=conf["db"],
 57 |             ssl=conf["tls"],
 58 |             verify_ssl=conf["verify_tls"],
 59 |             cert=conf["mtls"],
 60 |         )
 61 | 
 62 |     data = [
 63 |         {
 64 |             "measurement": "backurne",
 65 |             "tags": {
 66 |                 "image": image,
 67 |                 "endpoint": endpoint,
 68 |             },
 69 |             "time": datetime.datetime.now().replace(microsecond=0).isoformat(),
 70 |             "fields": {
 71 |                 "duration": int(duration.total_seconds()),
 72 |             },
 73 |         }
 74 |     ]
 75 | 
 76 |     influx.write_points(data)
 77 | 
 78 | 
 79 | def report_time(image, endpoint, duration):
 80 |     if config["report_time"] is None:
 81 |         return
 82 | 
 83 |     msg = f"Image {image} from {endpoint} backed up, eelapsed time: {duration}"
 84 |     msg = f"{datetime.datetime.now()}: {msg}"
 85 |     if config["report_time"] == "syslog":
 86 |         syslog.syslog(syslog.LOG_INFO, msg)
 87 |     elif config["report_time"] == "influxdb":
 88 |         report_to_influx(image, endpoint, duration)
 89 |     else:
 90 |         with open(config["report_time"], "a") as f:
 91 |             f.write(f"{msg}\n")
 92 | 
 93 | 
 94 | def has_debug(log):
 95 |     return log.level == logging.DEBUG
 96 | 
 97 | 
 98 | log = logging.getLogger("backurne")
 99 | 
100 | slog = logging.handlers.SysLogHandler(address="/dev/log")
101 | detailed_formatter = logging.Formatter(
102 |     "%(name)s[%(process)d]: %(levelname)s: [%(filename)s:%(lineno)s:%(funcName)s()] %(message)s"
103 | )
104 | slog.setFormatter(detailed_formatter)
105 | log.addHandler(slog)
106 | 
107 | if sys.stdout.isatty():
108 |     console = logging.StreamHandler()
109 |     if config["pretty_colors"] is True:
110 |         console.setFormatter(ConsoleFormatter())
111 |     log.addHandler(console)
112 | 
113 | if config["log_level"] == "debug":
114 |     log.setLevel(logging.DEBUG)
115 | elif config["log_level"] == "info":
116 |     log.setLevel(logging.INFO)
117 | elif config["log_level"] == "warn":
118 |     log.setLevel(logging.WARNING)
119 | else:
120 |     log.setLevel(logging.ERROR)
121 | 


--------------------------------------------------------------------------------
/src/backurne/backup.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | 
  3 | import dateutil.parser
  4 | 
  5 | from .config import config
  6 | from .log import log as Log
  7 | 
  8 | 
  9 | class Bck:
 10 |     def __init__(self, name, ceph, rbd, vm=None, adapter=None):
 11 |         self.name = name
 12 |         self.ceph = ceph
 13 |         self.rbd = rbd
 14 |         self.vm = vm
 15 |         self.adapter = adapter
 16 | 
 17 |         self.source = f"{self.name}:{self.rbd}"
 18 | 
 19 |         self.dest = self.__build_dest()
 20 | 
 21 |         # Store here the last snapshot created via this object
 22 |         # It is not yet on the backup cluster
 23 |         self.last_created_snap = None
 24 | 
 25 |     def __str__(self):
 26 |         if self.vm is not None:
 27 |             return "{}/{}".format(self.vm["name"], self.rbd)
 28 |         else:
 29 |             return "{}/{}".format(self.name, self.rbd)
 30 | 
 31 |     def __build_dest(self):
 32 |         ident = self.name
 33 |         comment = None
 34 | 
 35 |         if self.vm is not None:
 36 |             comment = self.vm["name"]
 37 |             if self.vm["px"].px_config["use_smbios"]:
 38 |                 if self.vm["smbios"] is not None:
 39 |                     ident = self.vm["smbios"]
 40 |                     dest = f"{ident};{self.adapter};{comment}"
 41 |                     return dest
 42 | 
 43 |         dest = f"{ident};{self.rbd};{comment}"
 44 |         return dest
 45 | 
 46 |     def __snap_name(self, profile, value):
 47 |         name = f"{profile};{value}"
 48 |         Log.debug(f"Processing {self.source} ({name})")
 49 |         name = f"{config['snap_prefix']};{name}"
 50 |         return name
 51 | 
 52 |     def __last_snap_profile(self, profile):
 53 |         snaps = self.ceph.backup.snap(self.dest)
 54 |         good = []
 55 |         for snap in snaps:
 56 |             split = snap.split(";")
 57 |             if split[1] != profile:
 58 |                 continue
 59 |             good.append(snap)
 60 |         return self.ceph.get_last_snap(good)
 61 | 
 62 |     def dl_snap(self, snap_name, dest, last_snap):
 63 |         Log.debug(f"Exporting {self.source} {snap_name}")
 64 |         if not self.ceph.backup.exists(dest):
 65 |             # Create a dummy image, on our backup cluster,
 66 |             # which will receive a full snapshot
 67 |             self.ceph.backup("create", dest, "-s", "1")
 68 | 
 69 |         self.ceph.do_backup(self.rbd, snap_name, dest, last_snap)
 70 |         Log.debug(f"Export {self.source} {snap_name} complete")
 71 | 
 72 |     def check_profile(self, profile):
 73 |         try:
 74 |             last_profile = self.__last_snap_profile(profile)
 75 |         except Exception:
 76 |             # Image does not exists ?
 77 |             return True
 78 | 
 79 |         if profile == "daily":
 80 |             delta = datetime.timedelta(days=1)
 81 |         elif profile == "hourly":
 82 |             delta = datetime.timedelta(hours=1)
 83 |         elif profile == "monthly":
 84 |             delta = datetime.timedelta(days=30)
 85 |         else:  # weekly
 86 |             delta = datetime.timedelta(days=7)
 87 |         not_after = datetime.datetime.now() - delta
 88 |         if last_profile is not None:
 89 |             last_time = last_profile.split(";")[3]
 90 |             last_time = dateutil.parser.parse(last_time)
 91 |             if last_time > not_after:
 92 |                 Log.debug("Our last backup is still young, nothing to do")
 93 |                 return False
 94 |         return True
 95 | 
 96 |     def make_snap(self, profile, value):
 97 |         dest = self.dest
 98 |         self.snap_name = self.__snap_name(profile, value)
 99 | 
100 |         self.ceph.backup.update_desc(self.source, dest)
101 | 
102 |         last_snap = None
103 |         if self.last_created_snap is not None:
104 |             last_snap = self.last_created_snap
105 |         elif len(self.ceph.snap(self.rbd)) == 0:
106 |             Log.debug(f"No snaps found on {self.source}")
107 |         elif not self.ceph.backup.exists(dest):
108 |             Log.debug(f"backup:{dest} does not exist")
109 |         elif len(self.ceph.backup.snap(dest)) == 0:
110 |             Log.debug(f"No snaps found for backup:{dest}")
111 |         else:
112 |             last_snap = self.ceph.get_last_shared_snap(self.rbd, dest)
113 | 
114 |         if last_snap is None:
115 |             Log.debug(f"{self.source}: doing full backup")
116 |         else:
117 |             Log.debug(f"{self.source}: doing incremental backup based on {last_snap}")
118 | 
119 |         now = datetime.datetime.now().isoformat()
120 |         snap_name = f"{self.snap_name};{now}"
121 |         self.last_created_snap = snap_name
122 | 
123 |         self.ceph.mk_snap(self.rbd, snap_name, self.vm)
124 | 
125 |         return dest, last_snap, snap_name
126 | 


--------------------------------------------------------------------------------
/man/backurne.1:
--------------------------------------------------------------------------------
  1 | .TH backurne 1 "28 Aug 2025" "2.4.0" "backurne man page"
  2 | .SH NAME
  3 | backurne \- backup Ceph's RBD on Ceph, with Proxmox integration
  4 | .SH SYNOPSIS
  5 | .B backurne
  6 | .RI backup
  7 | .I [--cluster <cluster>]
  8 | .I [--vmid <vmid>]
  9 | .I [--profile <profile>]
 10 | .I [--force]
 11 | .I [--no-cleanup]
 12 | .I [--cleanup]
 13 | .br
 14 | .B backurne
 15 | .RI precheck
 16 | .br
 17 | .B backurne
 18 | .RI check
 19 | .br
 20 | .B backurne
 21 | .RI check-snap
 22 | .br
 23 | .B backurne
 24 | .RI ls
 25 | .I [rbd]
 26 | .I [--json]
 27 | .br
 28 | .B backurne
 29 | .RI map
 30 | .I rbd
 31 | .I snapshot
 32 | .I [vmdk]
 33 | .br
 34 | .B backurne
 35 | .RI unmap
 36 | .I rbd
 37 | .I snapshot
 38 | .br
 39 | .B backurne
 40 | .RI list-mapped
 41 | .I [--json]
 42 | .br
 43 | .B backurne
 44 | .RI version
 45 | 
 46 | .SH DESCRIPTION
 47 | backurne is a handy tool for backuping RBD's image on RBD.
 48 | .br
 49 | .SH COMMANDS DESCRIPTIONS
 50 | .IP "\fBbackup\fR" 4
 51 | backup
 52 | .IP
 53 | Create backups.
 54 | .br
 55 | By default,
 56 | .B Backurne
 57 | connects to every cluster defined on its configuration, processes the required backups and scan every existing backups for potential cleanup.
 58 | .br
 59 | You can process only a subset of images using a combination of
 60 | .I --cluster, --vmid
 61 | and
 62 | .I --profile.
 63 | .br
 64 | Please note that
 65 | .I --vmid
 66 | cannot be used without
 67 | .I --cluster,
 68 | because those ID are not globally unique.
 69 | .br
 70 | Also, when you process only such subset, cleaning up existing backups is not done. You may use the
 71 | .I --cleanup
 72 | option to change that behavior.
 73 | .br
 74 | Similarly, if you do not want to cleanup with a simple invocation, you can use the
 75 | .I --no-cleanup
 76 | option.
 77 | .IP "\fBprecheck\fR" 4
 78 | precheck
 79 | .IP
 80 | Preprocess checks results. Checking the current status of backup is a relatively  slow operation, which makes it unfriendly for checks using icinga2 or other nagios-like probes. Prechecking is supposed to be done regularly via cron.
 81 | .IP "\fBcheck\fR" 4
 82 | check
 83 | .IP
 84 | Print the status of backups, preprocessed by the \fBprecheck\fR sub-command. Any image not backed up for too long is reported as an error. The command outputs return code in a nagios-like fashion.
 85 | .IP "\fBcheck-snap\fR" 4
 86 | check-snap
 87 | .IP
 88 | Check the coherency of backups, using a hash comparison between backups and production data. This a very slow operation, as it reads 100% of the backup storage.
 89 | .IP "\fBls\fR" 4
 90 | ls
 91 | .I [rbd]
 92 | .I [--json]
 93 | .IP
 94 | List backed up images.
 95 | .br
 96 | Using the
 97 | .I [rbd]
 98 | argument, you can list backups for a specific image.
 99 | .br
100 | The command produces human friendly output by default.
101 | .br
102 | The
103 | .I [--json]
104 | argument lets it produce machine readable json.
105 | .IP "\fBmap\fR" 4
106 | map
107 | .I rbd
108 | .I snapshot
109 | .I [vmdk]
110 | .IP
111 | Map a snapshot. A mapped snapshot allows the user to explore and restore part of a snapshot. Volatile modifications are seamlessly allowed (the backup is never modified).
112 | .br
113 | .B Backurne
114 | will make the backup visible in /dev. Also, partitions found in the backup (if present) are also mapped.
115 | .br
116 | Finally, if a single partition is found (or no partition table),
117 | .B backurne
118 | will try to mount that filesystem using
119 | .B mount(8).
120 | .br
121 | If
122 | .B backurne
123 | did not manage to mount a filesystem, a message is printed and the user is left with manual handling. As printed, the backup shall be present at /dev/nbdX, and partitions (if any) at /dev/mapper/nbdXpY.
124 | .br
125 | By default, vmdk files are not mapped. The vmfs block device will be mounted, but vmdk will only be listed.
126 | .br
127 | You can use the optional
128 | .I vmdk
129 | parameter to also map a vmdk.
130 | .IP "\fBunmap\fR" 4
131 | unmap
132 | .I rbd
133 | .I snapshot
134 | .IP
135 | Unmap a previously mapped snapshot, removing volating modification in the process.
136 | .br
137 | If the mapping was not fully handled by
138 | .I backurne,
139 | the user must cleanup its actions before calling
140 | .I unmap.
141 | If not,
142 | .I unmap
143 | will fail and will have to be rerun.
144 | .IP "\fBlist-mapped\fR" 4
145 | list-mapped
146 | .I [--json]
147 | .IP
148 | List mapped backups.
149 | .br
150 | By default, a human friendly output is produced. Use
151 | .I --json
152 | for machine readable output.
153 | .IP "\fBversion\fR" 4
154 | version
155 | .IP
156 | Print the current version
157 | 
158 | .SH DEBUGGING
159 | .P
160 | You can pass
161 | .I --debug
162 | as a first option to increase verbosity, or increase the
163 | .I log_level
164 | entry in the configuration.
165 | 
166 | 
167 | .SH BUGS
168 | No known bugs.
169 | .SH AUTHOR
170 | Alexandre Bruyelles <backurne@jack.fr.eu.org>
171 | 


--------------------------------------------------------------------------------
/src/backurne/proxmox.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | 
  3 | from proxmoxer import ProxmoxAPI
  4 | 
  5 | from .backup import Bck
  6 | from .ceph import Ceph
  7 | from .config import config
  8 | from .log import log as Log
  9 | 
 10 | 
 11 | class Proxmox:
 12 |     def __init__(self, px_config):
 13 |         self.px_config = px_config
 14 |         self.px = ProxmoxAPI(
 15 |             px_config["fqdn"],
 16 |             user=px_config["user"],
 17 |             password=px_config["passwd"],
 18 |             verify_ssl=px_config["tls"],
 19 |         )
 20 |         self.ceph_storage = self.__get_ceph_storage()
 21 | 
 22 |     def __get_ceph_endpoint(self, storage):
 23 |         px = self.px_config["name"]
 24 |         ceph = storage
 25 |         if px in config["ceph_endpoint"]:
 26 |             if ceph in config["ceph_endpoint"][px]:
 27 |                 return config["ceph_endpoint"][px][ceph]
 28 |         if "default" in config["ceph_endpoint"]:
 29 |             if ceph in config["ceph_endpoint"]["default"]:
 30 |                 return config["ceph_endpoint"]["default"][ceph]
 31 |         return storage
 32 | 
 33 |     def __get_ceph_storage(self):
 34 |         result = {}
 35 |         for storage in self.px.storage.get():
 36 |             if storage["type"] != "rbd":
 37 |                 continue
 38 |             name = storage["storage"]
 39 |             endpoint = self.__get_ceph_endpoint(name)
 40 |             result[name] = Ceph(
 41 |                 storage["pool"], namespace=storage.get("namespace"), endpoint=endpoint
 42 |             )
 43 |         return result
 44 | 
 45 |     def nodes(self):
 46 |         nodes = self.px.nodes.get()
 47 |         return [i["node"] for i in nodes]
 48 | 
 49 |     def vms(self):
 50 |         vms = []
 51 |         for vm in self.px.cluster.resources.get(type="vm"):
 52 |             self.vmid = vm["vmid"]
 53 |             vm["px"] = self
 54 |             vm["config"] = self.px.nodes(vm["node"]).qemu(vm["vmid"]).pending.get()
 55 |             tmp = {}
 56 |             for i in vm["config"]:
 57 |                 if "value" not in i:
 58 |                     continue
 59 |                 tmp[i["key"]] = i["value"]
 60 |             vm["config"] = tmp
 61 |             vm["smbios"] = self.get_smbios(vm["config"])
 62 |             vm["to_backup"] = []
 63 |             for disk in self.get_disks(vm["config"]):
 64 |                 ceph = self.ceph_storage[disk["ceph"]]
 65 |                 bck = Bck(
 66 |                     disk["ceph"], ceph, disk["rbd"], vm=vm, adapter=disk["adapter"]
 67 |                 )
 68 |                 vm["to_backup"].append([disk, ceph, bck])
 69 |             if "agent" in vm["config"]:
 70 |                 vm["qemu_agent"] = vm["config"]["agent"]
 71 |             vms.append(vm)
 72 |         return vms
 73 | 
 74 |     def get_smbios(self, conf):
 75 |         for key, value in conf.items():
 76 |             if not re.match("^smbios", key):
 77 |                 continue
 78 |             return value.split("=")[1]
 79 |         return None
 80 | 
 81 |     def __extract_disk(self, key, value):
 82 |         disk = False
 83 |         if re.match("^virtio[0-9]+$", key):
 84 |             disk = True
 85 |         elif re.match("^ide[0-9]+$", key):
 86 |             disk = True
 87 |         elif re.match("^scsi[0-9]+$", key):
 88 |             disk = True
 89 |         elif re.match("^sata[0-9]+$", key):
 90 |             disk = True
 91 | 
 92 |         # Exclude cdrom
 93 |         if re.match(".*media=.*", str(value)):
 94 |             disk = False
 95 | 
 96 |         # "No backup" is set
 97 |         if re.match(".*backup=0.*", str(value)):
 98 |             return None, None, None
 99 | 
100 |         if not disk:
101 |             return None, None, None
102 | 
103 |         storage, volume = value.split(":")
104 |         if storage not in self.ceph_storage:
105 |             return None, None, None
106 | 
107 |         volume = volume.split(",")[0]
108 | 
109 |         match = re.match("vm-([0-9]+)-disk-[0-9]+", volume)
110 |         if match is None:
111 |             match = re.match("base-([0-9]+)-disk-[0-9]+", volume)
112 |         if match.group(1) != str(self.vmid):
113 |             return None, None, None
114 | 
115 |         return storage, volume, key
116 | 
117 |     def get_disks(self, conf):
118 |         result = []
119 |         for key, value in conf.items():
120 |             storage, volume, adapter = self.__extract_disk(key, value)
121 |             if storage is None:
122 |                 continue
123 |             result.append({"ceph": storage, "rbd": volume, "adapter": adapter})
124 |         return result
125 | 
126 |     def is_running(self, qemu):
127 |         status = qemu.status.get("current")["status"]
128 |         return status == "stopped"
129 | 
130 |     def freeze(self, node, vm):
131 |         if not config["fsfreeze"] or "qemu_agent" not in vm:
132 |             return
133 |         if vm["qemu_agent"] != 1:
134 |             return
135 |         qemu = self.px.nodes(node).qemu(vm["vmid"])
136 |         if not self.is_running(qemu):
137 |             return
138 | 
139 |         try:
140 |             Log.debug(f"Freezing {vm['vmid']}")
141 |             qemu.agent.post("fsfreeze-freeze")
142 |         except Exception as e:
143 |             Log.warning(f"{e} thrown while freezing {vm['vmid']}")
144 | 
145 |     def thaw(self, node, vm):
146 |         if not config["fsfreeze"] or "qemu_agent" not in vm:
147 |             return
148 |         if vm["qemu_agent"] != 1:
149 |             return
150 | 
151 |         qemu = self.px.nodes(node).qemu(vm["vmid"])
152 |         if not self.is_running(qemu):
153 |             return
154 | 
155 |         try:
156 |             Log.debug(f"Thawing {vm['vmid']}")
157 |             qemu.agent.post("fsfreeze-thaw")
158 |         except Exception as e:
159 |             Log.warning(f"{e} thrown while thawing {vm['vmid']}")
160 | 


--------------------------------------------------------------------------------
/cli.md:
--------------------------------------------------------------------------------
  1 | # Command line interface
  2 | 
  3 | First of all, we should create some backups. Here, we have two backup policy : a daily for 30 days, and a hourly for 48 hours, this is the default:
  4 | ```
  5 | 35% [jack:~/backurne]./backurne backup
  6 |   INFO:  Processing proxmox: infrakvm1
  7 |   INFO:  Processing infraceph1:vm-136-disk-1 (daily;30)
  8 |   DEBUG: No snaps found on infraceph1:vm-136-disk-1
  9 |   INFO:  infraceph1:vm-136-disk-1: doing full backup
 10 |   INFO:  Processing infraceph1:vm-136-disk-1 (hourly;48)
 11 |   INFO:  infraceph1:vm-136-disk-1: doing incremental backup based on backup;daily;30;2018-06-01T15:44:26.072348
 12 |   INFO:  I will now download 2 snaps from px infrakvm1
 13 |   INFO:  Exporting infraceph1:vm-136-disk-1
 14 | Exporting image: 100% complete...done.
 15 | Importing image diff: 100% complete...done.
 16 |   INFO:  Export infraceph1:vm-136-disk-1 complete
 17 |   INFO:  Exporting infraceph1:vm-136-disk-1
 18 | Exporting image: 100% complete...done.
 19 | Importing image diff: 100% complete...done.
 20 |   INFO:  Export infraceph1:vm-136-disk-1 complete
 21 |   INFO:  Deleting vm-136-disk-1@backup;daily;30;2018-06-01T15:44:26.072348 ..
 22 |   INFO:  Expiring our snapshots
 23 | ```
 24 | As you can see, on the first backup is "full", the other is incremental (based on the full made seconds ago, thus very efficient).\
 25 | This is why using multiple policy does not cost much.
 26 | 
 27 | 
 28 | Let's run the command again:
 29 | ```
 30 | 16% [jack:~/backurne]./backurne backup
 31 |   INFO:  Processing proxmox: infrakvm1
 32 |   INFO:  Our last backup is still young, nothing to do
 33 |   INFO:  Our last backup is still young, nothing to do
 34 |   INFO:  I will now download 0 snaps from px infrakvm1
 35 |   INFO:  Expiring our snapshots
 36 | ```
 37 | Nothing to do !\
 38 | You can run this command many time, as it will avoid doing backups if the previous one is not old enough.
 39 | 
 40 | By default, everything is processed. You can filter things using the following backup options:
 41 |   - `--cluster`
 42 |   - `--vmid`
 43 |   - `--profile`
 44 | 
 45 | Also, you can force the processing of a backup, even if the previous one is not old enough, using the `--force` option.
 46 | 
 47 | 
 48 | Now, we should list our backuped disks:
 49 | ```
 50 | 17% [jack:~/backurne]./backurne ls
 51 | +-----------------+---------+--------------------------------------------------------------------+
 52 | |  Ident          |  Disk   |  UUID                                                              |
 53 | +-----------------+---------+--------------------------------------------------------------------+
 54 | |  test-backurne  |  scsi0  |  8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne  |
 55 | +-----------------+---------+--------------------------------------------------------------------+
 56 | ```
 57 |  - `ident` is used as an identificator for human: for Proxmox's backups, this is the VM's name from the last run.
 58 |  - `Disk` is the disk adapter for proxmox, or the rbd image name for plain.
 59 |  - Finally, `UUID` is the real RBD image, as defined on Ceph, and is used as a primary key.
 60 | 
 61 | 
 62 | We can list the backups for this disk:
 63 | ```
 64 | 32% [jack:~/backurne]./backurne ls '8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne'
 65 | +------------------------------+-----------------------------------------------+
 66 | |  Creation date               |  UUID                                         |
 67 | +------------------------------+-----------------------------------------------+
 68 | |  2018-06-01 15:44:26.072348  |  backup;daily;30;2018-06-01T15:44:26.072348   |
 69 | |  2018-06-01 15:44:26.499066  |  backup;hourly;48;2018-06-01T15:44:26.499066  |
 70 | +------------------------------+-----------------------------------------------+
 71 | ```
 72 | We see that both snapshots were created almost at the same time.
 73 | 
 74 | 
 75 | Now, we would like to inspect a snapshot's content.
 76 | ```
 77 | 32% [jack:~/backurne]sudo ./
 78 | backurne map 28b868e3-c145-4ea7-8dff-e5ae3b8093af\;scsi0\;nsint5 backup\;daily\;30\;2019-12-30T06\:00\:04.802699
 79 |   INFO:  Mapping 28b868e3-c145-4ea7-8dff-e5ae3b8093af;scsi0;nsint5@backup;daily;30;2019-12-30T06:00:04.802699 ..
 80 |   INFO:  rbd 28b868e3-c145-4ea7-8dff-e5ae3b8093af;scsi0;nsint5 / snap backup;daily;30;2019-12-30T06:00:04.802699
 81 |   INFO:  └── /dev/nbd0 (fstype None, size 20G)
 82 |   INFO:      └── /dev/nbd0p1 on /tmp/tmp09nri0sh (fstype xfs, size 20G)
 83 | 32% [jack:~/backurne]ls /tmp/tmp09nri0sh
 84 | bin  boot  dev  dlm  etc  home  initrd.img  initrd.img.old  lib  lib32  lib64  media  mnt  opt  proc  root  run  sbin  shared  srv  sys  tmp  usr  var  vmlinuz  vmlinuz.old
 85 | ```
 86 | 
 87 | The `map` subcommand clones a specific snapshot, maps it, maps the partitions (if any) and tries to mount the filesystems.
 88 | Some things to consider:
 89 | - the subcommand must be run with CAP_SYS_ADMIN, it will handle block devices and mount filesystems.
 90 | - the mounted filesystem (or mapped block devices) is a clone of the snapshot, not the snapshot itself. It is thus writable, and will be deleted later : you can remove files or do whatever you want here without impacting the backup.
 91 | 
 92 | Wait, what is mounted here already ?
 93 | ```
 94 | 32% [jack:~/backurne]sudo ./backurne list-mapped
 95 |   INFO:  rbd 28b868e3-c145-4ea7-8dff-e5ae3b8093af;scsi0;nsint5 / snap backup;daily;30;2019-12-30T06:00:04.802699
 96 |   INFO:  └── /dev/nbd0 (fstype None, size 20G)
 97 |   INFO:      └── /dev/nbd0p1 on /tmp/tmp09nri0sh (fstype xfs, size 20G)
 98 | ```
 99 | 
100 | Once you have recovered your files, you should do some cleanups:
101 | ```
102 | 32% [jack:~/backurne]./backurne unmap '8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne' 'backup;hourly;48;2018-06-01T15:44:26.499066'
103 |   INFO:  Unmapping 8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne@backup;hourly;48;2018-06-01T15:44:26.499066 ..
104 |   INFO:  8eb4f698-afdc-45bb-9f6c-1833c42ae368;vm-136-disk-1;test-backurne@backup;hourly;48;2018-06-01T15:44:26.499066 currently mapped on /dev/nbd0
105 |   INFO:  Deleting restore-1 ..
106 | ```
107 | 
108 | Finally, there are three subcommands for checks:
109 |  - `precheck` crawls images and computes the actual arrors, if there is images on the live cluster without the daily snapshot. This subcommand may be slow, depending on the dataset.
110 |  - `check` shows errors from `precheck`.
111 |  - `check-snap` hashes images to check if the data on the backup cluster is the same as on the live cluster (but it is slow ..)
112 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # backurne
  2 | 
  3 | `backurne` is a handy tool for backuping RBD's image on RBD.\
  4 | Yep ! What is better, for backuping a Ceph cluster, than another Ceph cluster ?
  5 | 
  6 | 
  7 | It does not do much by itself, though, but orchestrate and relies heavily on other tools.\
  8 | It has a fine integration with Proxmox, but is able to backup "plain" (or "raw RBD") cluster as well.
  9 | 
 10 | Supported features
 11 | ---
 12 | - **Snapshot-based backup**, with no agent strictly required on the VM.
 13 | - Backup inspection and restoration via **command line interface** as well as via **REST API**.
 14 | - **Support multiple retention policy** efficiently (both in term of storage and network bandwidth), dynamically configurable per host (proxmox-only) via REST API.
 15 | - Auto cleanup : deletion is never generated by a human, thus **no human mistakes**.
 16 | - **Compression** and **encryption** "on the wire" for enhanced efficiency and security.
 17 | - Peaceful integration with other snapshots (via Proxmox web interface or whatever).
 18 | - Multiple cluster support, with mixed type ("proxmox" and "plain").
 19 | - A couple of backups can be stored on the live clusters, for faster recovery.
 20 | - Optional **fsfreeze** support (proxmox-only) via Qemu-quest-agent.
 21 | - Backup deactivation via Proxmox's web interface.
 22 | - External custom processing via hooks.
 23 | - LVM support: backup's lvs are detected and mapped (if possible) for further exploration. See below.
 24 | - vmware support: vmfs are detected and supported. Each vmdk are also mapped and mounted. See below.
 25 | - Microsoft dynamic disks support: each logical disk will be mapped and mounted. See below.
 26 | - VM tracking, for those who uses a single Proxmox cluster with multiple Ceph backend.
 27 | 
 28 | Encryption and compression at rest are also seamlessly supported via Bluestore OSDs (see https://ceph.com/community/new-luminous-bluestore/)
 29 | 
 30 | Required packages
 31 | ---
 32 | 
 33 | Core: python (>=3.7), python3-dateutil, python3-termcolor, python3-prettytable, python3-requests, python3-proxmoxer, python3-psutil, python3-anytree (from https://github.com/c0fec0de/anytree, .deb for buster attached for convenience), zstd for compression \
 34 | For mapping (optional): kpartx, rbd-nbd (Mimic or later), lvm2, vmfs-tools, vmfs6-tools, ldmtool\
 35 | For the REST API: python3-flask, python3-flask-autoindex\
 36 | For bash autocompletion: jq
 37 | 
 38 | 
 39 | Installation
 40 | ---
 41 | 
 42 |  - Check out the **Authentication** parts.
 43 |  - Clone the source, edit the configuration
 44 |  - Setup a Ceph cluster, used to store the backups
 45 |  - Profit ?
 46 | 
 47 | Configuration
 48 | ---
 49 | 
 50 | See [custom.conf.sample](custom.conf.sample)
 51 | 
 52 | Authentication, and where should I run what
 53 | ---
 54 | 
 55 | `backurne` interacts with the backup cluster via the `rbd` command line. It must have the required configuration at /etc/ceph/ceph.conf and the needed keyring.\
 56 | It is assumed that `backurne` will be run on a Ceph node (perhaps a monitor), but this is not strictly required (those communications will not be encrypted nor compressed).
 57 | 
 58 | `backurne` connects to proxmox's cluster via their HTTP API. No data is exchanged via this link, it is purely used for "control" (listing VM, listing disks, fetching information etc).
 59 | 
 60 | `backurne` connects to every "live" Ceph clusters via SSH. For each cluster, it will connect to a single node, always the same, defined in Proxmox (and / or overwritten via the configuration).\
 61 | SSH authentication nor authorization is **not** handled by `backurne` in any way.\
 62 | It is up to you to configure ssh : either accept or ignore the host keys, place your public key on the required hosts etc.
 63 | 
 64 | Command line interface
 65 | ---
 66 | 
 67 | See [cli.md](cli.md)
 68 | 
 69 | REST API
 70 | ---
 71 | 
 72 | See [api.md](api.md)
 73 | 
 74 | Used technology
 75 | ---
 76 | 
 77 |  - `RBD` is the core technology used by `backurne` : it provides snapshot export, import, diff, mapping etc.
 78 |  - `ssh` is used to transfer the snapshots between the live clusters and the backup cluster. `RBD` can be manipulated over TCP/IP, but without encryption nor compression, thus this solution was not kept.
 79 |  - `xxhash` (or other, see the configuration) is used to check the consistency between snapshots.
 80 |  - `rbd-nbd` is used to map a specific backup and inspect its content.
 81 |  - `kpartx`, `qemu-img`, `qemu-nbd`, `vmfs-tools` and `vmfs6-tools` are used for vmware exploration, `ldmtool` is used to map microsoft dynamic disks.
 82 | 
 83 | 
 84 | vmware support
 85 | ---
 86 | 
 87 | The assumption is that the rbd image you back up is a single datastore. It contains multiple vmdk, each of them is a VM disk.\
 88 | Datastores are a specific filesystem: VMFS. There is several version, as of today. You will need `vmfs-tools` to mount VMFS up to version 5. For version 6 support, `vmfs6-tools` is required.\
 89 | When `backurne` detects a VMFS, it will try each version until success. If no `vmfs*-tools` is available, the block device is left as is.\
 90 | Once a VMFS device is mounted, each vmdk found inside will be mapped and mounted, recursively. In theory, you could have a VMFS, with inside a VM disk (vmdk), which is itself a datastore with inside more vmdk .. This behavior is not tested, though.\
 91 | 
 92 | 
 93 | LVM support
 94 | ---
 95 | 
 96 | The same device may be seen at many layer by the device-mapped code.\
 97 | To activate some LV, especially if the lives inside vmdk (see vmware support), you will need to tell LVM to allow such behavior.\
 98 | By default, LVM refuses to activate LVs that shows up in multiple PVs.\
 99 | To allow this, edit `/etc/lvm/lvm.conf`, and set `allow_changes_with_duplicate_pvs` to `1`.\
100 | 
101 | 
102 | Microsoft LDM support
103 | ---
104 | 
105 | Microsoft dynamic disks are supported. You will need the `ldmtool` to map those.\
106 | A single dynamic disk as well as a dynamic disk spread across multiple block devices (inside a VMFS for instance) are supported.\
107 | However, mapping multiple unrelated dynamic disk is not supported. For instance, if you map a backup A, and an unrelated backup B,\
108 | while both of them contains dynamic disks, the behavior is unexpected.\
109 | 
110 | 
111 | "Bare-metal" restore
112 | ---
113 | 
114 | Restoring a complete image is out of `backurne`'s scope.\
115 | If you are using `proxmox`, you may first need to restore the configuration in `/etc/pve/`.\
116 | Any way, once you know the target rbd image name, you will have to :\
117 |  - find the desired backup image, using `backurne ls`
118 |  - find the desired backup snapshot, using `backurne ls <image>`
119 |  - export and import the image, using `rbd export <image> --snap <snap> - | ssh <ceph-host> rbd import - <dest-image>`
120 | 
121 | 
122 | Graph and reporting
123 | ---
124 | 
125 | ![alt text](https://github.com/JackSlateur/backurne/blob/master/graph_in_progress.png?raw=true)
126 | ![alt text](https://github.com/JackSlateur/backurne/blob/master/graph_duration.png?raw=true)
127 | 
128 | An ugly grafana dashboard is provided in `graph/grafana-backurne.json`, data has stored in an influxdb database.\
129 | It provides two information:\
130 |  - the number of backups currently running, using data from telegraf (both the script and the config shall be found in `graph/telegraf/*`).
131 |  - the duration of each backup
132 | 
133 | Merge requests or ideas of improvement are most welcome here.
134 | 
135 | 
136 | Note
137 | ---
138 | On Proxmox, LXC is not yet supported. Only Qemu so far :/
139 | 
140 | The project is developed mainly for Debian Buster and Proxmox, and is used here on these technologies.\
141 | The "plain" feature as well as running `backurne` on other operating system is less tested, and may be less bug-proof.\
142 | Bug report, merge requests and feature requests are welcome : some stuff are not implemented simply because I do not need them, not because it cannot be done nor because I do not want to code them.
143 | 


--------------------------------------------------------------------------------
/src/backurne/disk.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import json
  3 | import os
  4 | from collections import namedtuple
  5 | 
  6 | import humanize
  7 | import psutil
  8 | import sh
  9 | from anytree import Node
 10 | from anytree import RenderTree
 11 | from sh import lsblk
 12 | from sh import rbd
 13 | 
 14 | from .ceph import Ceph
 15 | from .log import has_debug
 16 | from .log import log as Log
 17 | 
 18 | 
 19 | fields = [
 20 |     "dev",
 21 |     "fstype",
 22 |     "mountpoint",
 23 |     "vmfs_fuse",
 24 |     "image",
 25 |     "parent_image",
 26 |     "parent_snap",
 27 |     "mapped",
 28 |     "qemu_nbd",
 29 |     "size",
 30 |     "ldm",
 31 | ]
 32 | Part = namedtuple("FS", fields, defaults=(None,) * len(fields))
 33 | 
 34 | 
 35 | def get_fs_info(dev):
 36 |     if dev is None:
 37 |         return []
 38 |     info = lsblk(
 39 |         "-I", 8, "-p", "-o", "+NAME,FSTYPE,SIZE,MOUNTPOINT,PARTTYPE", "-J", dev
 40 |     )
 41 |     info = json.loads(info.stdout)
 42 |     return info["blockdevices"]
 43 | 
 44 | 
 45 | # vmfs-fuse shows itself as /dev/fuse, in /proc/mounts
 46 | # Thus, lsblk cannot resolve the device
 47 | # However, the cmdline is straightforward: vmfs-fuse <source dev> <mntpoint>
 48 | # We will try to list all running processes, and catch the fuse daemon
 49 | def resolv_vmfs(dev):
 50 |     for i in psutil.process_iter(
 51 |         attrs=[
 52 |             "cmdline",
 53 |         ]
 54 |     ):
 55 |         i = i.info["cmdline"]
 56 |         if len(i) == 0:
 57 |             continue
 58 |         if "vmfs-fuse" not in i[0] and "vmfs6-fuse" not in i[0]:
 59 |             continue
 60 |         if i[1] != dev:
 61 |             continue
 62 |         return i[2]
 63 | 
 64 | 
 65 | def resolv_qemu_nbd(dev):
 66 |     for i in psutil.process_iter(
 67 |         attrs=[
 68 |             "cmdline",
 69 |         ]
 70 |     ):
 71 |         i = i.info["cmdline"]
 72 |         if len(i) == 0:
 73 |             continue
 74 |         if "qemu-nbd" not in i[0]:
 75 |             continue
 76 |         if i[3] != dev:
 77 |             continue
 78 |         return i[2]
 79 | 
 80 | 
 81 | def get_next_nbd():
 82 |     path = "/sys/class/block/"
 83 |     for i in glob.glob(f"{path}/nbd*"):
 84 |         dev = i.split("/")[-1]
 85 |         if "p" in dev:
 86 |             continue
 87 |         if os.path.exists(f"{path}/{dev}/pid"):
 88 |             continue
 89 |         return f"/dev/{dev}"
 90 | 
 91 | 
 92 | def get_file_size(path):
 93 |     size = os.stat(path).st_size
 94 |     return humanize.naturalsize(size, binary=True)
 95 | 
 96 | 
 97 | def get_vg_uuid(path):
 98 |     config = f'devices{{filter = [ "a|{path}|", "r|.*|" ]}}'
 99 |     config = f"--config={config}"
100 |     raw = sh.Command("vgs")("--noheadings", "-o", "uuid,pv_name,vg_name", config)
101 |     raw = raw.stdout.decode("utf-8")
102 |     for data in raw.split("\n"):
103 |         data = data.lstrip().rstrip()
104 |         if data == "":
105 |             continue
106 |         data = data.split(" ")
107 |         data = [i for i in data if i != ""]
108 |         uuid, pv_name, vg_name = data
109 |         if pv_name != path:
110 |             continue
111 |         return uuid, vg_name
112 |     return None, None
113 | 
114 | 
115 | def deactivate_vg(path):
116 |     uuid, _ = get_vg_uuid(path)
117 |     if uuid is None:
118 |         return
119 | 
120 |     Log.debug(f"vgchange -an --select vg_uuid={uuid}")
121 |     try:
122 |         sh.Command("vgchange")("-an", "--select", f"vg_uuid={uuid}")
123 |     except Exception:
124 |         pass
125 | 
126 | 
127 | def add_part(part, parent, extended, qemu_nbd=None):
128 |     if part["fstype"] == "LVM2_member":
129 |         node = Node(
130 |             Part(
131 |                 dev=part["name"],
132 |                 mountpoint=part["mountpoint"],
133 |                 fstype=part["fstype"],
134 |                 size=part["size"],
135 |                 qemu_nbd=qemu_nbd,
136 |             ),
137 |             parent=parent,
138 |             qemu_nbd=qemu_nbd,
139 |         )
140 |         if "children" not in part:
141 |             return
142 |         for child in part["children"]:
143 |             add_part(child, node, extended)
144 |     elif part["fstype"] != "VMFS_volume_member":
145 |         node = Node(
146 |             Part(
147 |                 dev=part["name"],
148 |                 mountpoint=part["mountpoint"],
149 |                 fstype=part["fstype"],
150 |                 size=part["size"],
151 |                 qemu_nbd=qemu_nbd,
152 |             ),
153 |             parent=parent,
154 |             qemu_nbd=qemu_nbd,
155 |         )
156 |     else:
157 |         part["mountpoint"] = resolv_vmfs(part["name"])
158 |         node = Node(
159 |             Part(
160 |                 dev=part["name"],
161 |                 mountpoint=part["mountpoint"],
162 |                 fstype=part["fstype"],
163 |                 size=part["size"],
164 |                 vmfs_fuse=True,
165 |                 qemu_nbd=qemu_nbd,
166 |             ),
167 |             parent=parent,
168 |             qemu_nbd=qemu_nbd,
169 |         )
170 |         vmdks = "{}/*/*-flat.vmdk".format(part["mountpoint"])
171 |         for vmdk in glob.glob(vmdks):
172 |             vmdk_size = get_file_size(vmdk)
173 |             vmdk_short = vmdk.split("/")[-1]
174 |             qcow2 = glob.glob(f"/tmp/*{vmdk_short}.qcow2")
175 |             if len(qcow2) == 0:
176 |                 sub = Node(
177 |                     Part(dev=vmdk, size=vmdk_size), parent=node, qemu_nbd=qemu_nbd
178 |                 )
179 |                 continue
180 |             qcow2 = qcow2[0]
181 |             nbd = resolv_qemu_nbd(qcow2)
182 |             sub = Node(
183 |                 Part(dev=qcow2, size=vmdk_size, qemu_nbd=nbd),
184 |                 parent=node,
185 |                 qemu_nbd=qemu_nbd,
186 |             )
187 |             get_partitions(nbd, sub, extended=extended, mapped=True)
188 | 
189 | 
190 | def filter_children(children, mapped):
191 |     if mapped is False or mapped is None:
192 |         return children
193 | 
194 |     # We know that the device is mapped
195 |     # We will ignore non-mapped devices, to avoid duplicates
196 |     result = []
197 |     for child in children:
198 |         maj = child["maj:min"].split(":")[0]
199 |         if maj.startswith("25") or child["fstype"] == "VMFS_volume_member":
200 |             result.append(child)
201 |     return result
202 | 
203 | 
204 | def get_partitions(dev, node, extended=True, mapped=None, qemu_nbd=None):
205 |     for part in get_fs_info(dev):
206 |         if part["fstype"] is not None:
207 |             add_part(part, node, extended, qemu_nbd)
208 |             continue
209 |         if "children" not in part:
210 |             continue
211 | 
212 |         # A microsoft dynamic disk has a single partition with type 0x42
213 |         if part["children"][0]["parttype"] == "0x42":
214 |             ldm = True
215 |         else:
216 |             ldm = None
217 | 
218 |         if extended is False:
219 |             sub_node = node
220 |         else:
221 |             sub_node = Node(
222 |                 Part(
223 |                     dev=dev,
224 |                     mapped=mapped,
225 |                     qemu_nbd=qemu_nbd,
226 |                     size=part["size"],
227 |                     ldm=ldm,
228 |                 ),
229 |                 parent=node,
230 |             )
231 |         part["children"] = filter_children(part["children"], mapped)
232 |         if not ldm or True:
233 |             for part in part["children"]:
234 |                 get_partitions(part["name"], sub_node, extended, mapped)
235 | 
236 | 
237 | def wait_dev(dev):
238 |     Log.debug(f"udevadm trigger {dev} -w")
239 |     sh.Command("udevadm")("trigger", dev, "-w")
240 |     sh.Command("udevadm")("settle")
241 | 
242 | 
243 | def print_node(pre, _node):
244 |     node = _node.name
245 |     if node.parent_image is not None:
246 |         Log.info("{}rbd {} / snap {}".format(pre, node.parent_image, node.parent_snap))
247 |         return
248 | 
249 |     if node.mountpoint is not None:
250 |         msg = "on {} ".format(node.mountpoint)
251 |     else:
252 |         msg = ""
253 | 
254 |     if node.dev.endswith(".vmdk"):
255 |         dev = node.dev.split("/")[-2]
256 |         dev = "vmdk {}".format(dev)
257 |         fstype = "vmfs file"
258 |     else:
259 |         dev = node.dev
260 |         fstype = "fstype {}".format(node.fstype)
261 |     if has_debug(Log):
262 |         Log.info(
263 |             "%s%s %s(%s, size %s, nbd %s, vmfs %s, mnt %s, real dev %s, ldm %s)"
264 |             % (
265 |                 pre,
266 |                 dev,
267 |                 msg,
268 |                 fstype,
269 |                 node.size,
270 |                 node.qemu_nbd,
271 |                 node.vmfs_fuse,
272 |                 node.mountpoint,
273 |                 node.dev,
274 |                 node.ldm,
275 |             )
276 |         )
277 |     else:
278 |         Log.info("{}{} {}({}, size {})".format(pre, dev, msg, fstype, node.size))
279 | 
280 | 
281 | def print_mapped(mapped):
282 |     for tree in mapped:
283 |         for pre, fill, node in RenderTree(tree):
284 |             print_node(pre, node)
285 | 
286 | 
287 | def prepare_tree_to_json(mapped):
288 |     result = mapped.name._asdict()
289 |     result["children"] = []
290 |     for child in mapped.children:
291 |         result["children"].append(prepare_tree_to_json(child))
292 |     return result
293 | 
294 | 
295 | def get_rbd_mapped():
296 |     result = []
297 |     mapped = rbd("--format", "json", "-t", "nbd", "device", "list")
298 |     for mapped in json.loads(mapped.stdout):
299 |         info = Ceph(None).info(mapped["image"])["parent"]
300 |         part = Part(
301 |             dev=mapped["device"],
302 |             image=mapped["image"],
303 |             parent_image=info["image"],
304 |             parent_snap=info["snapshot"],
305 |         )
306 |         result.append(part)
307 |     return result
308 | 
309 | 
310 | def get_mapped(extended=True):
311 |     extended = True
312 |     result = []
313 |     for i in get_rbd_mapped():
314 |         node = Node(i)
315 |         get_partitions(i.dev, node, extended=extended)
316 |         result.append(node)
317 |     return result
318 | 


--------------------------------------------------------------------------------
/conf/backurne.conf:
--------------------------------------------------------------------------------
  1 | #
  2 | config = {
  3 | 	# All our snapshot, on live, will use this prefix.
  4 | 	# Every snapshot prefixed by this prefix will be handled
  5 | 	# (and possibly deleted)
  6 | 	# Must not contains a single quote ( "'" ) nor a semicolon ( ";" )
  7 | 	#'snap_prefix': 'backup',
  8 | 
  9 | 	# Define our backup policy : when should be make a snap
 10 | 	# How many backups should we store
 11 | 	# How many backups should be kept on live (for faster restore)
 12 | 	# Regardless of this setting, the last snapshot will
 13 | 	# always be kept on live (for incremental purpose)
 14 | 	# An optional argument called 'priority' can be set on a profile, with
 15 | 	# only one meaningful value : 'high'. If set to high, those backup will
 16 | 	# always be backup first.
 17 | 	# Default: empty
 18 | 	#'profiles': {
 19 | 	#	'daily': {
 20 | 	#		'count': 30,
 21 | 	#		'max_on_live': 0,
 22 | 	#	},
 23 | 	#	'hourly': {
 24 | 	#		'count': 48,
 25 | 	#		'max_on_live': 0,
 26 | 	#		'priority': 'high',
 27 | 	#	},
 28 | 	#	'weekly': {
 29 | 	#		'count': 52,
 30 | 	#		'max_on_live': 0,
 31 | 	#	},
 32 | 	#},
 33 | 
 34 | 	# Fetch additional profiles, per VM
 35 | 	# If set to None, the default, no fetch is made
 36 | 	# Else, it must be an URL, http or https
 37 | 	# Backurne POST a json with VM's information, and
 38 | 	# expect a json dict which contains additional profiles
 39 | 	# or an empty dict
 40 | 	# See sample-api-profile.py for a simple implementation
 41 | 	#'profiles_api': None,
 42 | 
 43 | 	# Where should we store the backups ?
 44 | 	# The pool is dedicated
 45 | 	#'backup_cluster': {
 46 | 	#	'pool': 'rbd',
 47 | 	#},
 48 | 
 49 | 	# List of live clusters to back up
 50 | 	#'live_clusters': [
 51 | 	#	# A sample proxmox cluster
 52 | 	#	# We will connect to it using http(s)
 53 | 	#	{
 54 | 	#		# A handy name, which MUST be unique
 55 | 	#		'name': 'clusterpx',
 56 | 	#		'fqdn': 'clusterpx.fqdn.org',
 57 | 	#		'user': 'root@pam',
 58 | 	#		'passwd': 'awesome-passwd',
 59 | 	#		'tls': True,
 60 | 	#		'use_smbios': True,
 61 | 	#		'type': 'proxmox',
 62 | 	#	},
 63 | 	#
 64 | 	#	# A sample plain cluster
 65 | 	#	# By default, we will connect to it using ssh
 66 | 	#	# In that case, you have to ensure backurne
 67 | 	#	# can connect to it using ssh keys
 68 | 	#	{
 69 | 	#		# A handy name, which MUST be unique
 70 | 	#		'name': 'cute-cluster',
 71 | 	#		'fqdn': 'ceph.fqdn.org',
 72 | 	#		'type': 'plain',
 73 | 	#		'pool': 'vms'
 74 | 	#	},
 75 | 	#
 76 | 	#	# Alternatively, you may set 'fqdn' to None, and
 77 | 	#	# define a helper to tell backurne how to reach the cluster
 78 | 	#	# See example below, with a example on how to reach
 79 | 	#	# a rook cluster inside kubernetes
 80 | 	#	{
 81 | 	#		# A handy name, which MUST be unique
 82 | 	#		'name': 'cute-cluster-with-helper',
 83 | 	#		'fqdn': None,
 84 | 	#		'type': 'plain',
 85 | 	#		'pool': 'vms'
 86 | 	#		# The get_helper is optional
 87 | 	#		'get_helper' : {
 88 | 	#			'cmd': 'kubectl',
 89 | 	#			'args': [
 90 | 	#				'--kubeconfig',
 91 | 	#				'/path/to/kubeconfig',
 92 | 	#				'-n',
 93 | 	#				'rook-ceph',
 94 | 	#				'get',
 95 | 	#				'pod',
 96 | 	#				'-l',
 97 | 	#				'app=rook-ceph-tools',
 98 | 	#				'-o',
 99 | 	#				'jsonpath={.items[0].metadata.name}',
100 | 	#			],
101 | 	#		},
102 | 	#		'use_helper' : {
103 | 	#			'cmd': 'kubectl',
104 | 	#			'args': [
105 | 	#				'--kubeconfig',
106 | 	#				'/path/to/kubeconfig',
107 | 	#				'-n',
108 | 	#				'rook-ceph',
109 | 	#				'exec',
110 | 	#				'-i',
111 | 	#				# %HELPERNAME% will be replaced by
112 | 	#				# the output of 'get_helper', if any
113 | 	#				'%HELPERNAME%',
114 | 	#				'--',
115 | 	#			],
116 | 	#		}
117 | 	#	},
118 | 	#],
119 | 
120 | 	# Extra retention time for the last remaining backup, in day.
121 | 	# When an image is deleted from the live cluster,
122 | 	# it's backup image will slowly fade away with time
123 | 	# (each hour / day, a backup will be deleted)
124 | 	# Thus, with a 30 daily profile, the last backup will
125 | 	# be deleted 30 days after live's deleting
126 | 	# This setting increase the retention time, only for that
127 | 	# last backup.
128 | 	# If set to 30 and with a 30 daily profile, data will
129 | 	# be kept around for 60 days.
130 | 	#'extra_retention_time': 0,
131 | 
132 | 	# Override ceph's endpoint
133 | 	# We need to connect to the Ceph live cluster
134 | 	# Identification we have: proxmox's name, and Ceph's name (from proxmox)
135 | 	# That name does not necessarily maps to a domain name, thus some mapping
136 | 	# may be required (editing /etc/hosts may works too)
137 | 	# Plus, you can have the same name on multiple proxmox clusters, but
138 | 	# pointing on different Ceph cluster
139 | 	# This entry is a dict of dicts:
140 | 	# - the first level is the proxmox's name, or 'default' as a catch-all
141 | 	# - the second level is the Ceph's name inside that proxmox cluster
142 | 	# ceph_endpoint[proxmox][ceph] has the precedence
143 | 	# Then ceph_endpoint['default'][ceph]
144 | 	# If nothing match, the Ceph's name is used as-is, and must
145 | 	# resolves
146 | 	# Default: empty
147 | 	#'ceph_endpoint': {
148 | 	#	'default': {
149 | 	#		'ceph1': 'cluster1.fqdn.org',
150 | 	#	},
151 | 	#	'proxmox32-lab': {
152 | 	#		'pool-ssd': 'cephlab.fqdn.org',
153 | 	#		'pool-hdd': 'cephlab.fqdn.org',
154 | 	#	}
155 | 	#},
156 | 
157 | 	# If set to True, snapshots are compressed during transfer
158 | 	# Useful if you have a low bandwidth
159 | 	#'download_compression': False,
160 | 
161 | 	# Should we freeze the VM before snapshotting ?
162 | 	# This requires qemu-guest-agent
163 | 	# Beware, a current bug lives in proxmox: if qemu-quest-agent
164 | 	# is enabled on the VM, but the daemon inside that VM is dead,
165 | 	# then the proxmox API will be stuck in an endless loop for
166 | 	# ~1H
167 | 	#'fsfreeze': False,
168 | 
169 | 	# If we set use_smbios to True, but encounter a VM
170 | 	# without smbios, what should we do ?
171 | 	# If True, we fallback, as if use_smbios is False, for this VM
172 | 	# If False, we drop an error and skip the VM
173 | 	# If fallback is used, and an uuid is defined afterward,
174 | 	# you will lose this VM's backup history
175 | 	# (as if it was newly created)
176 | 	#'uuid_fallback': True,
177 | 
178 | 	# Print pretty color, if stdout is a tty
179 | 	#'pretty_colors': True,
180 | 
181 | 	# Log level
182 | 	# Can be any of 'debug', 'info', 'warn', 'err'
183 | 	#'log_level': 'debug',
184 | 
185 | 	# How many workers should be used when we parallelize
186 | 	# tasks on the backup cluster
187 | 	#'backup_worker': 24,
188 | 
189 | 	# How many workers should be used when we parallelize
190 | 	# tasks on the live cluster
191 | 	#'live_worker': 12,
192 | 
193 | 	# Hash binary used to compare snapshots
194 | 	# You can you any executable that meet the follow requirements:
195 | 	# - eat data from stdin
196 | 	# - require no argument
197 | 	# - output the hash to stdout as the word
198 | 	# The output may contains other words (space-separated list of char),
199 | 	# which will be ignored
200 | 	# This can be an absolute path, yet a $PATH lookup can be used
201 | 	# Default to xxhsum
202 | 	# This executable must live on every Ceph cluster, as well as the backup
203 | 	# node, because hash is done remotely
204 | 	#'hash_binary': 'xxhsum',
205 | 
206 | 	# Sqlite3 database used to track "failed" backups
207 | 	# We have to make the diff between a failed backup, and a missing backup
208 | 	# (some newly created disk not yet backed up)
209 | 	#'check_db': '/tmp/backurne.db',
210 | 
211 | 	# Backurne can run commands before and after some action
212 | 	# Each command will get parameters as argument : its type, the vm name
213 | 	# (for proxmox, undef else) and the disk name
214 | 	# Four hooks are defined:
215 | 		# - pre_vm hook, that will run once per VM per run, before
216 | 		#   any snapshot is made on that VM's disk, and only if some
217 | 		#   snapshot *will* be made.
218 | 		# - pre_disk hook, that will run once per disk, before creating
219 | 		#   a snapshot
220 | 		# - post_disk hook, run just after the snapshot creation
221 | 		# - post_vm hook, run only once per VM per run, after all
222 | 		#   needed snapshots are created
223 | 	# pre_vm and pre_disk hooks may return a non-zero return code.
224 | 	# If pre_vm or pre_disk returns a non-zero code, further processing is
225 | 	# cancelled. In that case, please note that the associated post_vm or
226 | 	# post_disk hook will not be run. A warning shall be emitted, containing
227 | 	# information about the hook, its parameters, and its output.
228 | 	# On success, hooks output (both stdout and stderr) are ignored.
229 | 	# Hooks shall clean themselves, and shall always die in a timely fashion,
230 | 	# as a stuck hook will stuck Backurne (no timeout is set).
231 | 	# By default, no hook are used. You must set each hook's path.
232 | 	#'hooks': {
233 | 	#	'pre_vm': None,
234 | 	#	'pre_disk': None,
235 | 	#	'post_disk': None,
236 | 	#	'post_vm': None,
237 | 	#},
238 | 
239 | 	# Report time to process (download and apply) a backup
240 | 	# Each disk will generate a one-line record, in a human readable,
241 | 	# with the disk name (rbd image), the cluster from which it is imported
242 | 	# and the elapsed time (excluding queue time, if present)
243 | 	# Can be:
244 | 	#  - None, to disable the feature
245 | 	#  - syslog
246 | 	#  - influxdb
247 | 	#  - some absolute file path
248 | 	# In that last case, the file will be opened, appended and closed for each records.
249 | 	# If set to influxdb, you will need to configure the 'influxdb' stanza below.
250 | 	#report_time: 'syslog',
251 | 
252 | 	# Influxdb instance used by report_time.
253 | 	# The required entries are:
254 | 	#  - host: either an IP or a resolvable FQDN that points to the influxdb instance
255 | 	#  - db: the database to connect to
256 | 	# Some optional entries exists:
257 | 	#  - port
258 | 	#  - tls: should we connect using http (the default) or https ?
259 | 	#  - verify_tls: if we are using https, should we verify the validity of the instance's
260 | 	#    certificate ?
261 | 	#  - mtls: you can enable mutual tls authentication by passing a tuple here, using a format
262 | 	#    such as ('/path/to/cert', '/path/to/private/key'). If those files are not verified, the
263 | 	#    instance may reject the connection.
264 | 	#'influxdb': {
265 | 	#	'host': None,
266 | 	#	'db': None,
267 | 	#	'port': 8086,
268 | 	#	'tls': False,
269 | 	#	'verify_tls': True,
270 | 	#	'mtls': None,
271 | 	#},
272 | }
273 | 


--------------------------------------------------------------------------------
/graph/grafana-backurne.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "__inputs": [
  3 |     {
  4 |       "description": "",
  5 |       "label": "telegraf_hosting",
  6 |       "name": "DS_TELEGRAF_HOSTING",
  7 |       "pluginId": "influxdb",
  8 |       "pluginName": "InfluxDB",
  9 |       "type": "datasource"
 10 |     }
 11 |   ],
 12 |   "__requires": [
 13 |     {
 14 |       "id": "grafana",
 15 |       "name": "Grafana",
 16 |       "type": "grafana",
 17 |       "version": "7.3.1"
 18 |     },
 19 |     {
 20 |       "id": "graph",
 21 |       "name": "Graph",
 22 |       "type": "panel",
 23 |       "version": ""
 24 |     },
 25 |     {
 26 |       "id": "influxdb",
 27 |       "name": "InfluxDB",
 28 |       "type": "datasource",
 29 |       "version": "1.0.0"
 30 |     }
 31 |   ],
 32 |   "annotations": {
 33 |     "list": [
 34 |       {
 35 |         "builtIn": 1,
 36 |         "datasource": "-- Grafana --",
 37 |         "enable": true,
 38 |         "hide": true,
 39 |         "iconColor": "rgba(0, 211, 255, 1)",
 40 |         "name": "Annotations & Alerts",
 41 |         "type": "dashboard"
 42 |       }
 43 |     ]
 44 |   },
 45 |   "editable": true,
 46 |   "gnetId": null,
 47 |   "graphTooltip": 0,
 48 |   "id": null,
 49 |   "iteration": 1619168286909,
 50 |   "links": [],
 51 |   "panels": [
 52 |     {
 53 |       "aliasColors": {},
 54 |       "bars": false,
 55 |       "dashLength": 10,
 56 |       "dashes": false,
 57 |       "datasource": "${DS_TELEGRAF_HOSTING}",
 58 |       "decimals": null,
 59 |       "fieldConfig": {
 60 |         "defaults": {
 61 |           "custom": {}
 62 |         },
 63 |         "overrides": []
 64 |       },
 65 |       "fill": 1,
 66 |       "fillGradient": 0,
 67 |       "gridPos": {
 68 |         "h": 9,
 69 |         "w": 23,
 70 |         "x": 0,
 71 |         "y": 0
 72 |       },
 73 |       "hiddenSeries": false,
 74 |       "id": 2,
 75 |       "legend": {
 76 |         "alignAsTable": true,
 77 |         "avg": false,
 78 |         "current": true,
 79 |         "max": true,
 80 |         "min": true,
 81 |         "rightSide": true,
 82 |         "show": true,
 83 |         "total": false,
 84 |         "values": true
 85 |       },
 86 |       "lines": true,
 87 |       "linewidth": 1,
 88 |       "nullPointMode": "connected",
 89 |       "options": {
 90 |         "alertThreshold": true
 91 |       },
 92 |       "percentage": false,
 93 |       "pluginVersion": "7.3.1",
 94 |       "pointradius": 2,
 95 |       "points": false,
 96 |       "renderer": "flot",
 97 |       "seriesOverrides": [],
 98 |       "spaceLength": 10,
 99 |       "stack": false,
100 |       "steppedLine": false,
101 |       "targets": [
102 |         {
103 |           "alias": "In progress",
104 |           "groupBy": [
105 |             {
106 |               "params": [
107 |                 "$__interval"
108 |               ],
109 |               "type": "time"
110 |             },
111 |             {
112 |               "params": [
113 |                 "null"
114 |               ],
115 |               "type": "fill"
116 |             }
117 |           ],
118 |           "measurement": "backurne_inprogress",
119 |           "orderByTime": "ASC",
120 |           "policy": "default",
121 |           "refId": "A",
122 |           "resultFormat": "time_series",
123 |           "select": [
124 |             [
125 |               {
126 |                 "params": [
127 |                   "value"
128 |                 ],
129 |                 "type": "field"
130 |               },
131 |               {
132 |                 "params": [],
133 |                 "type": "mean"
134 |               }
135 |             ]
136 |           ],
137 |           "tags": []
138 |         }
139 |       ],
140 |       "thresholds": [],
141 |       "timeFrom": null,
142 |       "timeRegions": [],
143 |       "timeShift": null,
144 |       "title": "Backups in progress",
145 |       "tooltip": {
146 |         "shared": true,
147 |         "sort": 0,
148 |         "value_type": "individual"
149 |       },
150 |       "type": "graph",
151 |       "xaxis": {
152 |         "buckets": null,
153 |         "mode": "time",
154 |         "name": null,
155 |         "show": true,
156 |         "values": []
157 |       },
158 |       "yaxes": [
159 |         {
160 |           "format": "short",
161 |           "label": null,
162 |           "logBase": 1,
163 |           "max": null,
164 |           "min": null,
165 |           "show": true
166 |         },
167 |         {
168 |           "format": "short",
169 |           "label": null,
170 |           "logBase": 1,
171 |           "max": null,
172 |           "min": null,
173 |           "show": true
174 |         }
175 |       ],
176 |       "yaxis": {
177 |         "align": false,
178 |         "alignLevel": null
179 |       }
180 |     },
181 |     {
182 |       "aliasColors": {},
183 |       "bars": false,
184 |       "dashLength": 10,
185 |       "dashes": false,
186 |       "datasource": "${DS_TELEGRAF_HOSTING}",
187 |       "decimals": null,
188 |       "fieldConfig": {
189 |         "defaults": {
190 |           "custom": {}
191 |         },
192 |         "overrides": []
193 |       },
194 |       "fill": 1,
195 |       "fillGradient": 0,
196 |       "gridPos": {
197 |         "h": 9,
198 |         "w": 23,
199 |         "x": 0,
200 |         "y": 9
201 |       },
202 |       "hiddenSeries": false,
203 |       "id": 3,
204 |       "legend": {
205 |         "alignAsTable": true,
206 |         "avg": false,
207 |         "current": true,
208 |         "max": true,
209 |         "min": true,
210 |         "rightSide": true,
211 |         "show": true,
212 |         "total": false,
213 |         "values": true
214 |       },
215 |       "lines": true,
216 |       "linewidth": 1,
217 |       "nullPointMode": "connected",
218 |       "options": {
219 |         "alertThreshold": true
220 |       },
221 |       "percentage": false,
222 |       "pluginVersion": "7.3.1",
223 |       "pointradius": 2,
224 |       "points": false,
225 |       "renderer": "flot",
226 |       "seriesOverrides": [],
227 |       "spaceLength": 10,
228 |       "stack": false,
229 |       "steppedLine": false,
230 |       "targets": [
231 |         {
232 |           "alias": "Duration",
233 |           "groupBy": [
234 |             {
235 |               "params": [
236 |                 "$__interval"
237 |               ],
238 |               "type": "time"
239 |             },
240 |             {
241 |               "params": [
242 |                 "null"
243 |               ],
244 |               "type": "fill"
245 |             }
246 |           ],
247 |           "measurement": "backurne",
248 |           "orderByTime": "ASC",
249 |           "policy": "default",
250 |           "refId": "A",
251 |           "resultFormat": "time_series",
252 |           "select": [
253 |             [
254 |               {
255 |                 "params": [
256 |                   "duration"
257 |                 ],
258 |                 "type": "field"
259 |               },
260 |               {
261 |                 "params": [],
262 |                 "type": "mean"
263 |               }
264 |             ]
265 |           ],
266 |           "tags": [
267 |             {
268 |               "key": "image",
269 |               "operator": "=~",
270 |               "value": "/^$image$/"
271 |             },
272 |             {
273 |               "condition": "AND",
274 |               "key": "endpoint",
275 |               "operator": "=~",
276 |               "value": "/^$endpoint$/"
277 |             }
278 |           ]
279 |         }
280 |       ],
281 |       "thresholds": [],
282 |       "timeFrom": null,
283 |       "timeRegions": [],
284 |       "timeShift": null,
285 |       "title": "Backup duration",
286 |       "tooltip": {
287 |         "shared": true,
288 |         "sort": 0,
289 |         "value_type": "individual"
290 |       },
291 |       "type": "graph",
292 |       "xaxis": {
293 |         "buckets": null,
294 |         "mode": "time",
295 |         "name": null,
296 |         "show": true,
297 |         "values": []
298 |       },
299 |       "yaxes": [
300 |         {
301 |           "$$hashKey": "object:592",
302 |           "format": "s",
303 |           "label": null,
304 |           "logBase": 1,
305 |           "max": null,
306 |           "min": null,
307 |           "show": true
308 |         },
309 |         {
310 |           "$$hashKey": "object:593",
311 |           "format": "short",
312 |           "label": null,
313 |           "logBase": 1,
314 |           "max": null,
315 |           "min": null,
316 |           "show": true
317 |         }
318 |       ],
319 |       "yaxis": {
320 |         "align": false,
321 |         "alignLevel": null
322 |       }
323 |     }
324 |   ],
325 |   "refresh": "10s",
326 |   "schemaVersion": 26,
327 |   "style": "dark",
328 |   "tags": [],
329 |   "templating": {
330 |     "list": [
331 |       {
332 |         "allValue": null,
333 |         "current": {},
334 |         "datasource": "${DS_TELEGRAF_HOSTING}",
335 |         "definition": "",
336 |         "error": null,
337 |         "hide": 0,
338 |         "includeAll": true,
339 |         "label": null,
340 |         "multi": false,
341 |         "name": "image",
342 |         "options": [],
343 |         "query": "SHOW TAG VALUES FROM \"backurne_duration\" WITH KEY = \"image\"",
344 |         "refresh": 1,
345 |         "regex": "",
346 |         "skipUrlSync": false,
347 |         "sort": 0,
348 |         "tagValuesQuery": "",
349 |         "tags": [],
350 |         "tagsQuery": "",
351 |         "type": "query",
352 |         "useTags": false
353 |       },
354 |       {
355 |         "allValue": null,
356 |         "current": {},
357 |         "datasource": "${DS_TELEGRAF_HOSTING}",
358 |         "definition": "",
359 |         "error": null,
360 |         "hide": 0,
361 |         "includeAll": true,
362 |         "label": null,
363 |         "multi": false,
364 |         "name": "endpoint",
365 |         "options": [],
366 |         "query": "SHOW TAG VALUES FROM \"backurne\" WITH KEY = \"endpoint\"",
367 |         "refresh": 1,
368 |         "regex": "",
369 |         "skipUrlSync": false,
370 |         "sort": 0,
371 |         "tagValuesQuery": "",
372 |         "tags": [],
373 |         "tagsQuery": "",
374 |         "type": "query",
375 |         "useTags": false
376 |       }
377 |     ]
378 |   },
379 |   "time": {
380 |     "from": "now-2d",
381 |     "to": "now"
382 |   },
383 |   "timepicker": {},
384 |   "timezone": "",
385 |   "title": "Backurne",
386 |   "uid": "GZwWmKuMz",
387 |   "version": 13
388 | }
389 | 


--------------------------------------------------------------------------------
/src/backurne/ceph.py:
--------------------------------------------------------------------------------
  1 | import datetime
  2 | import json
  3 | import re
  4 | import threading
  5 | import time
  6 | from subprocess import DEVNULL
  7 | from subprocess import PIPE
  8 | from subprocess import Popen
  9 | 
 10 | import dateutil.parser
 11 | import setproctitle
 12 | import sh
 13 | 
 14 | from .config import config
 15 | from .log import log as Log
 16 | from .log import report_time
 17 | 
 18 | 
 19 | class Ceph:
 20 |     def __init__(self, pool, namespace=None, endpoint=None, cluster_conf={}):
 21 |         self.endpoint = endpoint
 22 |         self.cluster = cluster_conf
 23 |         self.compress = config["download_compression"]
 24 | 
 25 |         if pool is None:
 26 |             pool = config["backup_cluster"]["pool"]
 27 |             self.pool = pool
 28 |             self.cmd = sh.Command("rbd").bake("-p", pool)
 29 |             self.esc = False
 30 |         else:
 31 |             self.backup = Ceph(None)
 32 |             self.pool = pool
 33 | 
 34 |             self.__get_helper__()
 35 |             self.cmd = self.helper.bake("rbd", "-p", pool)
 36 |         if namespace is not None:
 37 |             self.namespace = namespace
 38 |             self.cmd = self.cmd.bake("--namespace", namespace)
 39 | 
 40 |         self.json = self.cmd.bake("--format", "json")
 41 | 
 42 |     def __get_helper__(self):
 43 |         if self.endpoint is not None:
 44 |             self.helper = sh.Command("ssh").bake("-n", self.endpoint)
 45 |             self.esc = True
 46 |             return
 47 | 
 48 |         if self.cluster.get("get_helper") is not None:
 49 |             get_helper_cmd = self.cluster["get_helper"]["cmd"]
 50 |             get_helper_args = self.cluster["get_helper"]["args"]
 51 |             helper_name = sh.Command(get_helper_cmd)(*get_helper_args)
 52 |             helper_name = helper_name.stdout.decode("utf-8")
 53 | 
 54 |         if self.cluster.get("use_helper") is None:
 55 |             Log.error(f"One of fqdn or use_helper must be defined ({self.cluster}")
 56 |             exit(1)
 57 | 
 58 |         use_helper_cmd = self.cluster["use_helper"]["cmd"]
 59 |         use_helper_args = self.cluster["use_helper"]["args"]
 60 |         use_helper_args = [
 61 |             i if i != "%HELPERNAME%" else helper_name for i in use_helper_args
 62 |         ]
 63 |         self.helper = sh.Command(use_helper_cmd).bake(*use_helper_args)
 64 |         self.esc = False
 65 |         self.compress = False
 66 | 
 67 |     def __str__(self):
 68 |         result = f"pool {self.pool} using config {self.cluster}"
 69 |         return result
 70 | 
 71 |     def __call__(self, *args):
 72 |         return self.cmd(args)
 73 | 
 74 |     def __fetch(self, *args):
 75 |         result = self.json(args)
 76 |         result = json.loads(result.stdout.decode("utf-8"))
 77 |         return result
 78 | 
 79 |     def __esc(self, snap):
 80 |         if self.esc is True:
 81 |             return f"'{snap}'"
 82 |         else:
 83 |             return snap
 84 | 
 85 |     def info(self, image):
 86 |         return self.__fetch("info", image)
 87 | 
 88 |     def ls(self):
 89 |         return self.__fetch("ls")
 90 | 
 91 |     def du(self, image):
 92 |         return self.__fetch("du", image)
 93 | 
 94 |     def snap(self, image):
 95 |         snap = self.__fetch("snap", "ls", image)
 96 |         snap = [i["name"] for i in snap]
 97 |         snap = [i for i in snap if i.startswith(config["snap_prefix"])]
 98 |         return snap
 99 | 
100 |     def protect(self, extsnap):
101 |         info = self.info(extsnap)
102 |         if info["protected"] == "true":
103 |             return
104 |         self("snap", "protect", extsnap)
105 | 
106 |     def unprotect(self, extsnap):
107 |         info = self.info(extsnap)
108 |         if info["protected"] == "false":
109 |             return
110 |         self("snap", "unprotect", extsnap)
111 | 
112 |     def clone(self, extsnap):
113 |         for i in range(1, 100):
114 |             clone = f"restore-{i}"
115 |             if not self.exists(clone):
116 |                 break
117 |         self("clone", extsnap, f"{self.pool}/{clone}")
118 |         return clone
119 | 
120 |     def map(self, image):
121 |         # lazy import to avoid circular imports
122 |         from .disk import get_rbd_mapped
123 | 
124 |         if self.esc is True:
125 |             Log.error("BUG: cannot map via ssh")
126 |             exit(1)
127 | 
128 |         cmd = ["device", "-t", "nbd", "map", image]
129 |         cmd = str(self.cmd).split(" ") + cmd
130 | 
131 |         Popen(cmd, stdout=DEVNULL, stderr=DEVNULL)
132 | 
133 |         # Should be enough .. right ?
134 |         time.sleep(1)
135 |         for mapped in get_rbd_mapped():
136 |             if mapped.image == image:
137 |                 return mapped.dev
138 | 
139 |     def unmap(self, dev):
140 |         if self.esc is True:
141 |             Log.error("BUG: cannot unmap via ssh")
142 |             exit(1)
143 | 
144 |         self.cmd("device", "-t", "nbd", "unmap", dev)
145 | 
146 |         # Wait a bit to make sure the dev is effectively gone
147 |         time.sleep(1)
148 | 
149 |     def rm(self, image):
150 |         Log.debug(f"Deleting image {image} ..")
151 |         try:
152 |             self("rm", image)
153 |         except sh.ErrorReturnCode:
154 |             Log.debug(f"{image} cannot be removed, maybe someone mapped it")
155 | 
156 |     def rm_snap(self, image, snap):
157 |         Log.debug(f"Deleting snapshot {image}@{snap} .. ")
158 |         snap = self.__esc(snap)
159 |         try:
160 |             self("snap", "rm", "--snap", snap, image)
161 |         except sh.ErrorReturnCode:
162 |             Log.debug(f"Cannot rm {image}@{snap}, may be held by something")
163 | 
164 |     def mk_snap(self, image, snap, vm=None):
165 |         snap = self.__esc(snap)
166 | 
167 |         Log.debug(f"Creating snapshot {image}@{snap} .. ")
168 | 
169 |         if vm is None:
170 |             self("snap", "create", "--snap", snap, image)
171 |             return
172 | 
173 |         self("snap", "create", "--snap", snap, image)
174 | 
175 |     def exists(self, image):
176 |         try:
177 |             self.cmd("info", image)
178 |             return True
179 |         except sh.ErrorReturnCode:
180 |             return False
181 | 
182 |     def enqueue_output(self, out):
183 |         original = setproctitle.getproctitle()
184 | 
185 |         regexp = re.compile(r"\w* \w*: (.*)%")
186 |         line = ""
187 |         for char in iter(lambda: out.read(1), b""):
188 |             char = char.decode("utf-8")
189 |             if ord(char) != 13:
190 |                 line += char
191 |                 continue
192 | 
193 |             if line == "":
194 |                 continue
195 | 
196 |             progress = regexp.match(line)
197 |             if progress is not None:
198 |                 progress = f"{progress.group(1)}% complete"
199 |             else:
200 |                 progress = line
201 |             setproctitle.setproctitle(f"{original} ({progress})")
202 |             line = ""
203 |         out.close()
204 | 
205 |     def do_backup(self, image, snap, dest, last_snap=None):
206 |         # On this function, we burden ourselves with Popen
207 |         # I have not figured out how do fast data transfer
208 |         # between processes with python3-sh
209 |         snap = self.__esc(snap)
210 |         export = ["export-diff", image, "--snap", snap]
211 |         export = str(self.cmd).split(" ") + export
212 |         if last_snap is None:
213 |             export += [
214 |                 "-",
215 |             ]
216 |         else:
217 |             last_snap = self.__esc(last_snap)
218 |             export += ["--from-snap", last_snap, "-"]
219 | 
220 |         if self.compress is True:
221 |             export += ["|", "zstd"]
222 |             imp = f'zstdcat | {self.backup.cmd} import-diff --no-progress - "{dest}"'
223 |         else:
224 |             imp = f'{self.backup.cmd} import-diff --no-progress - "{dest}"'
225 | 
226 |         start = datetime.datetime.now()
227 | 
228 |         p1 = Popen(export, stdout=PIPE, stderr=PIPE, bufsize=0)
229 | 
230 |         p2 = Popen(imp, stdin=p1.stdout, shell=True)
231 |         t = threading.Thread(target=self.enqueue_output, args=(p1.stderr,))
232 |         t.start()
233 | 
234 |         p1.stdout.close()
235 |         p2.communicate()
236 |         t.join()
237 |         end = datetime.datetime.now()
238 |         report_time(image, self.endpoint, end - start)
239 | 
240 |     def get_last_snap(self, snaps):
241 |         last_date = datetime.datetime.fromtimestamp(0)
242 |         last = None
243 |         for snap in snaps:
244 |             split = snap.split(";")
245 |             date = dateutil.parser.parse(split[3])
246 |             if date > last_date:
247 |                 last_date = date
248 |                 last = snap
249 |         return last
250 | 
251 |     def get_last_shared_snap(self, image, dest):
252 |         live_snaps = self.snap(image)
253 |         backup_snaps = self.backup.snap(dest)
254 | 
255 |         inter = list(set(live_snaps).intersection(backup_snaps))
256 |         return self.get_last_snap(inter)
257 | 
258 |     def update_desc(self, source, dest):
259 |         split = dest.split(";")
260 |         found = False
261 |         for i in self.ls():
262 |             snap = i.split(";")
263 |             if snap[0] != split[0] or snap[1] != split[1]:
264 |                 continue
265 | 
266 |             if snap[2] == split[2]:
267 |                 # This is my image, nothing to do
268 |                 continue
269 | 
270 |             if found is True:
271 |                 Log.error(f"{i} matches {dest}, but we already found a match")
272 |             found = True
273 |             self("mv", i, dest)
274 | 
275 |     def checksum(self, image, snap):
276 |         snap = self.__esc(snap)
277 |         cmd = ["export", image, "--snap", snap, "-"]
278 |         cmd = str(self.cmd).split(" ") + cmd
279 | 
280 |         if self.esc is True:
281 |             # via ssh
282 |             cmd += ["|", config["hash_binary"]]
283 |             p1 = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
284 |         else:
285 |             p2 = Popen(cmd, stdout=PIPE, stderr=DEVNULL)
286 |             p1 = Popen(
287 |                 [
288 |                     config["hash_binary"],
289 |                 ],
290 |                 stdin=p2.stdout,
291 |                 stdout=PIPE,
292 |                 stderr=DEVNULL,
293 |             )
294 |         out = p1.communicate()[0]
295 |         out = out.decode("utf-8").split(" ")[0]
296 |         return out
297 | 


--------------------------------------------------------------------------------
/src/backurne/restore.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import os.path
  3 | import tempfile
  4 | 
  5 | import dateutil.parser
  6 | import sh
  7 | 
  8 | from .ceph import Ceph
  9 | from .disk import deactivate_vg
 10 | from .disk import filter_children
 11 | from .disk import get_fs_info
 12 | from .disk import get_mapped
 13 | from .disk import get_next_nbd
 14 | from .disk import resolv_qemu_nbd
 15 | from .disk import resolv_vmfs
 16 | from .disk import wait_dev
 17 | from .log import log as Log
 18 | 
 19 | 
 20 | class Restore:
 21 |     def __init__(self, rbd=None, snap=None, vmdk=None):
 22 |         self.ceph = Ceph(None)
 23 |         self.dev = None
 24 | 
 25 |         self.rbd = rbd
 26 |         self.snap = snap
 27 |         self.vmdk = vmdk
 28 |         self.extsnap = f"{self.rbd}@{self.snap}"
 29 |         self.umounted = []
 30 | 
 31 |     def ls(self):
 32 |         result = []
 33 |         if self.rbd is None:
 34 |             for i in self.ceph.ls():
 35 |                 if i.startswith("restore-"):
 36 |                     continue
 37 |                 split = i.split(";")
 38 |                 if len(split) != 3:
 39 |                     Log.warning(f"Unknown image: {i}")
 40 |                     continue
 41 |                 result.append(
 42 |                     {
 43 |                         "ident": split[2],
 44 |                         "disk": split[1],
 45 |                         "uuid": i,
 46 |                     }
 47 |                 )
 48 |         else:
 49 |             for i in self.ceph.snap(self.rbd):
 50 |                 split = i.split(";")
 51 |                 creation = dateutil.parser.parse(split[3])
 52 |                 result.append(
 53 |                     {
 54 |                         "creation": creation,
 55 |                         "uuid": i,
 56 |                     }
 57 |                 )
 58 |         return result
 59 | 
 60 |     def get_tmpdir(self):
 61 |         tmp_dir = tempfile.mkdtemp()
 62 |         return tmp_dir
 63 | 
 64 |     def __map_vmdks(self, path):
 65 |         if self.vmdk is None:
 66 |             Log.debug("No vmdk specified, not mapping those")
 67 |             return
 68 | 
 69 |         for vmdk in glob.glob(f"{path}/{self.vmdk}/*-flat.vmdk"):
 70 |             self.__map_vmdk(vmdk)
 71 | 
 72 |     def __map_vmdk(self, vmdk):
 73 |         vmdk_file = vmdk.split("/")[-1]
 74 |         vmdk_overlay = f"/tmp/{self.clone}-{vmdk_file}.qcow2"
 75 |         Log.debug(f"qemu-img create {vmdk_overlay} over {vmdk}")
 76 |         sh.Command("qemu-img")("create", "-f", "qcow2", "-b", vmdk, vmdk_overlay)
 77 |         next_nbd = get_next_nbd()
 78 |         Log.debug(f"qemu-nbd {vmdk_overlay} as {next_nbd}")
 79 |         sh.Command("qemu-nbd")("--connect", next_nbd, vmdk_overlay)
 80 |         wait_dev(next_nbd)
 81 |         try:
 82 |             sh.Command("kpartx")("-av", next_nbd)
 83 |             self.mount_dev(next_nbd)
 84 |         except Exception:
 85 |             pass
 86 | 
 87 |     def __mount_vmfs(self, path, tmp_dir):
 88 |         for cmd in ("vmfs-fuse", "vmfs6-fuse"):
 89 |             try:
 90 |                 Log.debug(f"{cmd} {path} {tmp_dir}")
 91 |                 sh.Command(cmd)(path, tmp_dir)
 92 |                 self.__map_vmdks(tmp_dir)
 93 |                 return
 94 |             except Exception:
 95 |                 pass
 96 | 
 97 |     def mount_dev(self, dev, ignore_mapped=False):
 98 |         wait_dev(dev)
 99 |         info = get_fs_info(dev)[0]
100 |         if info["fstype"] == "VMFS_volume_member":
101 |             info["mountpoint"] = resolv_vmfs(dev)
102 | 
103 |         if info["fstype"] == "swap":
104 |             return False
105 | 
106 |         if info["parttype"] == "0x42":
107 |             Log.debug("windows dynamic disk detected: scanning and creating devices")
108 |             sh.Command("ldmtool")("scan")
109 |             sh.Command("ldmtool")("create", "all")
110 |             return False
111 |         if (
112 |             info["fstype"] is not None
113 |             and info["mountpoint"] is None
114 |             and info["fstype"] != "LVM2_member"
115 |         ):
116 |             tmp_dir = self.get_tmpdir()
117 |             if info["fstype"] == "VMFS_volume_member":
118 |                 self.__mount_vmfs(dev, tmp_dir)
119 |                 return True
120 |             Log.debug(f"mounting {dev} as {info['fstype']} into {tmp_dir}")
121 |             if info["fstype"] == "xfs":
122 |                 Log.debug(f"xfs_repair -L {dev}")
123 |                 sh.Command("xfs_repair")("-L", dev)
124 |             Log.debug(f"mount {dev} {tmp_dir}")
125 |             try:
126 |                 sh.Command("mount")(dev, tmp_dir)
127 |             except Exception as e:
128 |                 os.rmdir(tmp_dir)
129 |                 if info["fstype"] == "ntfs":
130 |                     Log.debug(e)
131 |                 else:
132 |                     Log.warn(e)
133 |                 pass
134 | 
135 |             return True
136 | 
137 |         if info["fstype"] == "VMFS_volume_member":
138 |             changed = False
139 |             for vmdk in glob.glob(f"{info['mountpoint']}/{self.vmdk}/*-flat.vmdk"):
140 |                 vmdk_file = vmdk.split("/")[-1]
141 |                 vmdk_overlay = f"/tmp/{self.clone}-{vmdk_file}.qcow2"
142 |                 nbd = resolv_qemu_nbd(vmdk_overlay)
143 |                 if nbd is None:
144 |                     self.__map_vmdk(vmdk)
145 |                     return True
146 |                 wait_dev(nbd)
147 |                 result = self.mount_dev(nbd, ignore_mapped=True)
148 |                 if result is True:
149 |                     changed = True
150 |             if changed is True:
151 |                 return True
152 | 
153 |         if "children" not in info:
154 |             return False
155 | 
156 |         info["children"] = filter_children(info["children"], ignore_mapped)
157 |         for child in info["children"]:
158 |             result = self.mount_dev(child["name"])
159 |             if result is True:
160 |                 return True
161 |         return False
162 | 
163 |     def clone_image(self):
164 |         for i in get_mapped(extended=False):
165 |             if i.name.parent_image != self.rbd or i.name.parent_snap != self.snap:
166 |                 continue
167 |             self.clone = i.name.image
168 |             self.dev = i.name.dev
169 |             return
170 | 
171 |         Log.info(f"Cloning {self.extsnap} ..")
172 |         self.ceph.protect(self.extsnap)
173 |         self.clone = self.ceph.clone(self.extsnap)
174 |         self.dev = self.ceph.map(self.clone)
175 | 
176 |     def mount(self):
177 |         if self.vmdk is None:
178 |             Log.info(f"Mapping {self.extsnap} ..")
179 |         else:
180 |             Log.info(f"Mapping {self.extsnap} with vmdk {self.vmdk} ..")
181 |         self.clone_image()
182 | 
183 |         if self.dev is None:
184 |             Log.error(f"Cannot map {self.clone} (cloned from {self.extsnap})")
185 |             return
186 | 
187 |         while self.mount_dev(self.dev):
188 |             Log.debug("Some progress was made, keep running")
189 |             pass
190 | 
191 |         return
192 | 
193 |     def has_pv(self, tree):
194 |         for i in tree.descendants:
195 |             if i.name.fstype == "LVM2_member":
196 |                 return True
197 |         return False
198 | 
199 |     def umount_tree(self, tree, first_pass=False):
200 |         for child in tree.children:
201 |             if child.name.dev.endswith(".vmdk"):
202 |                 self.umount_tree(child, first_pass=first_pass)
203 | 
204 |         ldm = False
205 |         for child in tree.children:
206 |             if child.name.ldm is True and first_pass is True:
207 |                 ldm = True
208 |             if child.name.dev.endswith(".vmdk"):
209 |                 continue
210 |             self.umount_tree(child, first_pass=first_pass)
211 |             if tree.name.fstype == "LVM2_member":
212 |                 deactivate_vg(tree.name.dev)
213 | 
214 |         if ldm is True:
215 |             Log.debug("windows dynamic disk detected: removing all devices")
216 |             sh.Command("ldmtool")("remove", "all")
217 | 
218 |         if first_pass is True and self.has_pv(tree):
219 |             Log.debug(f"{tree.name.dev}: pv found, return")
220 |             return
221 | 
222 |         if tree.name.mountpoint is not None:
223 |             if tree.name.mountpoint in self.umounted:
224 |                 Log.debug(f"We already umounted {tree.name.mountpoint}")
225 |                 return
226 | 
227 |             self.umounted.append(tree.name.mountpoint)
228 |             Log.debug(f"\t{tree.name.dev}: umount {tree.name.mountpoint}")
229 |             sh.Command("umount")(tree.name.mountpoint)
230 |             Log.debug(f"\t{tree.name.dev}: rmdir {tree.name.mountpoint}")
231 |             os.rmdir(tree.name.mountpoint)
232 |             return
233 | 
234 |         if tree.name.qemu_nbd is not None:
235 |             Log.debug(f"\t{tree.name.dev}: kpartx -dv {tree.name.qemu_nbd}")
236 |             sh.Command("kpartx")("-dv", tree.name.qemu_nbd)
237 |             Log.debug(f"\t{tree.name.dev}: qemu-nbd --disconnect {tree.name.qemu_nbd}")
238 |             sh.Command("qemu-nbd")("--disconnect", tree.name.qemu_nbd)
239 |             Log.debug(f"\t{tree.name.dev}: rm {tree.name.dev}")
240 |             try:
241 |                 os.unlink(tree.name.dev)
242 |             except FileNotFoundError:
243 |                 pass
244 |             return
245 | 
246 |         if tree.name.image is not None and first_pass is False:
247 |             Log.debug(f"\t{tree.name.dev}: rbd unmap {tree.name.image}")
248 |             self.ceph.unmap(tree.name.dev)
249 |             Log.debug(f"\t{tree.name.dev}: rbd rm {tree.name.image}")
250 |             self.ceph.rm(tree.name.image)
251 |             Log.debug(
252 |                 f"\t{tree.name.dev}: rbd unprotect --snap {tree.name.parent_snap} {tree.name.parent_image}"
253 |             )
254 |             self.ceph.unprotect(f"{tree.name.parent_image}@{tree.name.parent_snap}")
255 |             return
256 |         Log.debug(f"{tree.name.dev}: Nothing to do ?")
257 | 
258 |     def umount(self, recursed=False):
259 |         Log.info(f"Unmapping {self.extsnap} ..")
260 |         for i in get_mapped():
261 |             part = i.name
262 |             if part.parent_image != self.rbd or part.parent_snap != self.snap:
263 |                 continue
264 |             Log.debug("First pass: skip devices which contains PV")
265 |             self.umount_tree(i, first_pass=True)
266 |             Log.debug("Second pass: process all remaining devices")
267 |             self.umount_tree(i, first_pass=False)
268 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 2, June 1991
  3 | 
  4 |  Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
  5 |  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  6 |  Everyone is permitted to copy and distribute verbatim copies
  7 |  of this license document, but changing it is not allowed.
  8 | 
  9 |                             Preamble
 10 | 
 11 |   The licenses for most software are designed to take away your
 12 | freedom to share and change it.  By contrast, the GNU General Public
 13 | License is intended to guarantee your freedom to share and change free
 14 | software--to make sure the software is free for all its users.  This
 15 | General Public License applies to most of the Free Software
 16 | Foundation's software and to any other program whose authors commit to
 17 | using it.  (Some other Free Software Foundation software is covered by
 18 | the GNU Lesser General Public License instead.)  You can apply it to
 19 | your programs, too.
 20 | 
 21 |   When we speak of free software, we are referring to freedom, not
 22 | price.  Our General Public Licenses are designed to make sure that you
 23 | have the freedom to distribute copies of free software (and charge for
 24 | this service if you wish), that you receive source code or can get it
 25 | if you want it, that you can change the software or use pieces of it
 26 | in new free programs; and that you know you can do these things.
 27 | 
 28 |   To protect your rights, we need to make restrictions that forbid
 29 | anyone to deny you these rights or to ask you to surrender the rights.
 30 | These restrictions translate to certain responsibilities for you if you
 31 | distribute copies of the software, or if you modify it.
 32 | 
 33 |   For example, if you distribute copies of such a program, whether
 34 | gratis or for a fee, you must give the recipients all the rights that
 35 | you have.  You must make sure that they, too, receive or can get the
 36 | source code.  And you must show them these terms so they know their
 37 | rights.
 38 | 
 39 |   We protect your rights with two steps: (1) copyright the software, and
 40 | (2) offer you this license which gives you legal permission to copy,
 41 | distribute and/or modify the software.
 42 | 
 43 |   Also, for each author's protection and ours, we want to make certain
 44 | that everyone understands that there is no warranty for this free
 45 | software.  If the software is modified by someone else and passed on, we
 46 | want its recipients to know that what they have is not the original, so
 47 | that any problems introduced by others will not reflect on the original
 48 | authors' reputations.
 49 | 
 50 |   Finally, any free program is threatened constantly by software
 51 | patents.  We wish to avoid the danger that redistributors of a free
 52 | program will individually obtain patent licenses, in effect making the
 53 | program proprietary.  To prevent this, we have made it clear that any
 54 | patent must be licensed for everyone's free use or not licensed at all.
 55 | 
 56 |   The precise terms and conditions for copying, distribution and
 57 | modification follow.
 58 | 
 59 |                     GNU GENERAL PUBLIC LICENSE
 60 |    TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
 61 | 
 62 |   0. This License applies to any program or other work which contains
 63 | a notice placed by the copyright holder saying it may be distributed
 64 | under the terms of this General Public License.  The "Program", below,
 65 | refers to any such program or work, and a "work based on the Program"
 66 | means either the Program or any derivative work under copyright law:
 67 | that is to say, a work containing the Program or a portion of it,
 68 | either verbatim or with modifications and/or translated into another
 69 | language.  (Hereinafter, translation is included without limitation in
 70 | the term "modification".)  Each licensee is addressed as "you".
 71 | 
 72 | Activities other than copying, distribution and modification are not
 73 | covered by this License; they are outside its scope.  The act of
 74 | running the Program is not restricted, and the output from the Program
 75 | is covered only if its contents constitute a work based on the
 76 | Program (independent of having been made by running the Program).
 77 | Whether that is true depends on what the Program does.
 78 | 
 79 |   1. You may copy and distribute verbatim copies of the Program's
 80 | source code as you receive it, in any medium, provided that you
 81 | conspicuously and appropriately publish on each copy an appropriate
 82 | copyright notice and disclaimer of warranty; keep intact all the
 83 | notices that refer to this License and to the absence of any warranty;
 84 | and give any other recipients of the Program a copy of this License
 85 | along with the Program.
 86 | 
 87 | You may charge a fee for the physical act of transferring a copy, and
 88 | you may at your option offer warranty protection in exchange for a fee.
 89 | 
 90 |   2. You may modify your copy or copies of the Program or any portion
 91 | of it, thus forming a work based on the Program, and copy and
 92 | distribute such modifications or work under the terms of Section 1
 93 | above, provided that you also meet all of these conditions:
 94 | 
 95 |     a) You must cause the modified files to carry prominent notices
 96 |     stating that you changed the files and the date of any change.
 97 | 
 98 |     b) You must cause any work that you distribute or publish, that in
 99 |     whole or in part contains or is derived from the Program or any
100 |     part thereof, to be licensed as a whole at no charge to all third
101 |     parties under the terms of this License.
102 | 
103 |     c) If the modified program normally reads commands interactively
104 |     when run, you must cause it, when started running for such
105 |     interactive use in the most ordinary way, to print or display an
106 |     announcement including an appropriate copyright notice and a
107 |     notice that there is no warranty (or else, saying that you provide
108 |     a warranty) and that users may redistribute the program under
109 |     these conditions, and telling the user how to view a copy of this
110 |     License.  (Exception: if the Program itself is interactive but
111 |     does not normally print such an announcement, your work based on
112 |     the Program is not required to print an announcement.)
113 | 
114 | These requirements apply to the modified work as a whole.  If
115 | identifiable sections of that work are not derived from the Program,
116 | and can be reasonably considered independent and separate works in
117 | themselves, then this License, and its terms, do not apply to those
118 | sections when you distribute them as separate works.  But when you
119 | distribute the same sections as part of a whole which is a work based
120 | on the Program, the distribution of the whole must be on the terms of
121 | this License, whose permissions for other licensees extend to the
122 | entire whole, and thus to each and every part regardless of who wrote it.
123 | 
124 | Thus, it is not the intent of this section to claim rights or contest
125 | your rights to work written entirely by you; rather, the intent is to
126 | exercise the right to control the distribution of derivative or
127 | collective works based on the Program.
128 | 
129 | In addition, mere aggregation of another work not based on the Program
130 | with the Program (or with a work based on the Program) on a volume of
131 | a storage or distribution medium does not bring the other work under
132 | the scope of this License.
133 | 
134 |   3. You may copy and distribute the Program (or a work based on it,
135 | under Section 2) in object code or executable form under the terms of
136 | Sections 1 and 2 above provided that you also do one of the following:
137 | 
138 |     a) Accompany it with the complete corresponding machine-readable
139 |     source code, which must be distributed under the terms of Sections
140 |     1 and 2 above on a medium customarily used for software interchange; or,
141 | 
142 |     b) Accompany it with a written offer, valid for at least three
143 |     years, to give any third party, for a charge no more than your
144 |     cost of physically performing source distribution, a complete
145 |     machine-readable copy of the corresponding source code, to be
146 |     distributed under the terms of Sections 1 and 2 above on a medium
147 |     customarily used for software interchange; or,
148 | 
149 |     c) Accompany it with the information you received as to the offer
150 |     to distribute corresponding source code.  (This alternative is
151 |     allowed only for noncommercial distribution and only if you
152 |     received the program in object code or executable form with such
153 |     an offer, in accord with Subsection b above.)
154 | 
155 | The source code for a work means the preferred form of the work for
156 | making modifications to it.  For an executable work, complete source
157 | code means all the source code for all modules it contains, plus any
158 | associated interface definition files, plus the scripts used to
159 | control compilation and installation of the executable.  However, as a
160 | special exception, the source code distributed need not include
161 | anything that is normally distributed (in either source or binary
162 | form) with the major components (compiler, kernel, and so on) of the
163 | operating system on which the executable runs, unless that component
164 | itself accompanies the executable.
165 | 
166 | If distribution of executable or object code is made by offering
167 | access to copy from a designated place, then offering equivalent
168 | access to copy the source code from the same place counts as
169 | distribution of the source code, even though third parties are not
170 | compelled to copy the source along with the object code.
171 | 
172 |   4. You may not copy, modify, sublicense, or distribute the Program
173 | except as expressly provided under this License.  Any attempt
174 | otherwise to copy, modify, sublicense or distribute the Program is
175 | void, and will automatically terminate your rights under this License.
176 | However, parties who have received copies, or rights, from you under
177 | this License will not have their licenses terminated so long as such
178 | parties remain in full compliance.
179 | 
180 |   5. You are not required to accept this License, since you have not
181 | signed it.  However, nothing else grants you permission to modify or
182 | distribute the Program or its derivative works.  These actions are
183 | prohibited by law if you do not accept this License.  Therefore, by
184 | modifying or distributing the Program (or any work based on the
185 | Program), you indicate your acceptance of this License to do so, and
186 | all its terms and conditions for copying, distributing or modifying
187 | the Program or works based on it.
188 | 
189 |   6. Each time you redistribute the Program (or any work based on the
190 | Program), the recipient automatically receives a license from the
191 | original licensor to copy, distribute or modify the Program subject to
192 | these terms and conditions.  You may not impose any further
193 | restrictions on the recipients' exercise of the rights granted herein.
194 | You are not responsible for enforcing compliance by third parties to
195 | this License.
196 | 
197 |   7. If, as a consequence of a court judgment or allegation of patent
198 | infringement or for any other reason (not limited to patent issues),
199 | conditions are imposed on you (whether by court order, agreement or
200 | otherwise) that contradict the conditions of this License, they do not
201 | excuse you from the conditions of this License.  If you cannot
202 | distribute so as to satisfy simultaneously your obligations under this
203 | License and any other pertinent obligations, then as a consequence you
204 | may not distribute the Program at all.  For example, if a patent
205 | license would not permit royalty-free redistribution of the Program by
206 | all those who receive copies directly or indirectly through you, then
207 | the only way you could satisfy both it and this License would be to
208 | refrain entirely from distribution of the Program.
209 | 
210 | If any portion of this section is held invalid or unenforceable under
211 | any particular circumstance, the balance of the section is intended to
212 | apply and the section as a whole is intended to apply in other
213 | circumstances.
214 | 
215 | It is not the purpose of this section to induce you to infringe any
216 | patents or other property right claims or to contest validity of any
217 | such claims; this section has the sole purpose of protecting the
218 | integrity of the free software distribution system, which is
219 | implemented by public license practices.  Many people have made
220 | generous contributions to the wide range of software distributed
221 | through that system in reliance on consistent application of that
222 | system; it is up to the author/donor to decide if he or she is willing
223 | to distribute software through any other system and a licensee cannot
224 | impose that choice.
225 | 
226 | This section is intended to make thoroughly clear what is believed to
227 | be a consequence of the rest of this License.
228 | 
229 |   8. If the distribution and/or use of the Program is restricted in
230 | certain countries either by patents or by copyrighted interfaces, the
231 | original copyright holder who places the Program under this License
232 | may add an explicit geographical distribution limitation excluding
233 | those countries, so that distribution is permitted only in or among
234 | countries not thus excluded.  In such case, this License incorporates
235 | the limitation as if written in the body of this License.
236 | 
237 |   9. The Free Software Foundation may publish revised and/or new versions
238 | of the General Public License from time to time.  Such new versions will
239 | be similar in spirit to the present version, but may differ in detail to
240 | address new problems or concerns.
241 | 
242 | Each version is given a distinguishing version number.  If the Program
243 | specifies a version number of this License which applies to it and "any
244 | later version", you have the option of following the terms and conditions
245 | either of that version or of any later version published by the Free
246 | Software Foundation.  If the Program does not specify a version number of
247 | this License, you may choose any version ever published by the Free Software
248 | Foundation.
249 | 
250 |   10. If you wish to incorporate parts of the Program into other free
251 | programs whose distribution conditions are different, write to the author
252 | to ask for permission.  For software which is copyrighted by the Free
253 | Software Foundation, write to the Free Software Foundation; we sometimes
254 | make exceptions for this.  Our decision will be guided by the two goals
255 | of preserving the free status of all derivatives of our free software and
256 | of promoting the sharing and reuse of software generally.
257 | 
258 |                             NO WARRANTY
259 | 
260 |   11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
268 | REPAIR OR CORRECTION.
269 | 
270 |   12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
278 | POSSIBILITY OF SUCH DAMAGES.
279 | 
280 |                      END OF TERMS AND CONDITIONS
281 | 
282 |             How to Apply These Terms to Your New Programs
283 | 
284 |   If you develop a new program, and you want it to be of the greatest
285 | possible use to the public, the best way to achieve this is to make it
286 | free software which everyone can redistribute and change under these terms.
287 | 
288 |   To do so, attach the following notices to the program.  It is safest
289 | to attach them to the start of each source file to most effectively
290 | convey the exclusion of warranty; and each file should have at least
291 | the "copyright" line and a pointer to where the full notice is found.
292 | 
293 |     <one line to give the program's name and a brief idea of what it does.>
294 |     Copyright (C) <year>  <name of author>
295 | 
296 |     This program is free software; you can redistribute it and/or modify
297 |     it under the terms of the GNU General Public License as published by
298 |     the Free Software Foundation; either version 2 of the License, or
299 |     (at your option) any later version.
300 | 
301 |     This program is distributed in the hope that it will be useful,
302 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
303 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
304 |     GNU General Public License for more details.
305 | 
306 |     You should have received a copy of the GNU General Public License along
307 |     with this program; if not, write to the Free Software Foundation, Inc.,
308 |     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
309 | 
310 | Also add information on how to contact you by electronic and paper mail.
311 | 
312 | If the program is interactive, make it output a short notice like this
313 | when it starts in an interactive mode:
314 | 
315 |     Gnomovision version 69, Copyright (C) year name of author
316 |     Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
317 |     This is free software, and you are welcome to redistribute it
318 |     under certain conditions; type `show c' for details.
319 | 
320 | The hypothetical commands `show w' and `show c' should show the appropriate
321 | parts of the General Public License.  Of course, the commands you use may
322 | be called something other than `show w' and `show c'; they could even be
323 | mouse-clicks or menu items--whatever suits your program.
324 | 
325 | You should also get your employer (if you work as a programmer) or your
326 | school, if any, to sign a "copyright disclaimer" for the program, if
327 | necessary.  Here is a sample; alter the names:
328 | 
329 |   Yoyodyne, Inc., hereby disclaims all copyright interest in the program
330 |   `Gnomovision' (which makes passes at compilers) written by James Hacker.
331 | 
332 |   <signature of Ty Coon>, 1 April 1989
333 |   Ty Coon, President of Vice
334 | 
335 | This General Public License does not permit incorporating your program into
336 | proprietary programs.  If your program is a subroutine library, you may
337 | consider it more useful to permit linking proprietary applications with the
338 | library.  If this is what you want to do, use the GNU Lesser General
339 | Public License instead of this License.
340 | 


--------------------------------------------------------------------------------
/src/backurne/backurne.py:
--------------------------------------------------------------------------------
   1 | import argparse
   2 | import atexit
   3 | import datetime
   4 | import json
   5 | import multiprocessing
   6 | import queue
   7 | import signal
   8 | import sqlite3
   9 | import time
  10 | from functools import wraps
  11 | 
  12 | import dateutil.parser
  13 | import filelock
  14 | import progressbar
  15 | import requests
  16 | import setproctitle
  17 | import sh
  18 | 
  19 | from . import pretty
  20 | from . import stats
  21 | from .backup import Bck
  22 | from .ceph import Ceph
  23 | from .config import config
  24 | from .disk import get_mapped
  25 | from .disk import prepare_tree_to_json
  26 | from .disk import print_mapped
  27 | from .log import log as Log
  28 | from .proxmox import Proxmox
  29 | from .restore import Restore
  30 | 
  31 | 
  32 | VERSION = "2.4.0"
  33 | 
  34 | 
  35 | def handle_exc(func):
  36 |     @wraps(func)
  37 |     def wrapper(*args, **kwargs):
  38 |         try:
  39 |             return func(*args, **kwargs)
  40 |         except filelock.Timeout as e:
  41 |             Log.debug(e)
  42 |         except Exception as e:
  43 |             Log.warning(f"{e} thrown while running {func.__name__}()")
  44 | 
  45 |     return wrapper
  46 | 
  47 | 
  48 | class Check:
  49 |     def __init__(self, cluster):
  50 |         self.cluster = cluster
  51 |         self.err = []
  52 | 
  53 |     def add_err(self, msg):
  54 |         if msg is None:
  55 |             return
  56 |         msg["cluster"] = self.cluster["name"]
  57 |         self.err.append(msg)
  58 | 
  59 |     @handle_exc
  60 |     def check_img(self, args):
  61 |         ceph = args["ceph"]
  62 |         backup = args["backup"]
  63 |         rbd = args["image"]
  64 | 
  65 |         if not ceph.backup.exists(backup.dest):
  66 |             msg = f"No backup found for {backup} at {ceph} (image does not exists)"
  67 |             return {"image": rbd, "msg": msg}
  68 | 
  69 |         last = ceph.get_last_shared_snap(rbd, backup.dest)
  70 |         if last is None:
  71 |             msg = f"No backup found for {backup} at {ceph} (no shared snap)"
  72 |             return {"image": rbd, "msg": msg}
  73 | 
  74 |         when = last.split(";")[3]
  75 |         when = dateutil.parser.parse(when)
  76 |         deadline = datetime.timedelta(days=1) + datetime.timedelta(hours=6)
  77 |         deadline = datetime.datetime.now() - deadline
  78 |         if when < deadline:
  79 |             msg = (
  80 |                 f"Backup found for {backup} at {ceph}, yet too old (created at {when})"
  81 |             )
  82 |             return {"image": rbd, "msg": msg}
  83 | 
  84 |         snaps = ceph.backup.snap(backup.dest)
  85 |         for snap in snaps:
  86 |             if not Backup.is_expired(snap):
  87 |                 continue
  88 |             msg = f"Snapshot {backup.dest} / {snap} was not deleted in time, please investigate (may be protected or mapped)."
  89 |             return {"image": rbd, "msg": msg}
  90 | 
  91 |     def cmp_snap(self, backup, ceph, rbd):
  92 |         live_snaps = ceph.snap(rbd)
  93 |         try:
  94 |             backup_snaps = ceph.backup.snap(backup.dest)
  95 |         except Exception:
  96 |             backup_snaps = []
  97 |         inter = list(set(live_snaps).intersection(backup_snaps))
  98 |         for snap in inter:
  99 |             Log.debug(f"checking {rbd} @ {snap}")
 100 |             live = ceph.checksum(rbd, snap)
 101 |             back = ceph.backup.checksum(backup.dest, snap)
 102 |             if live == back:
 103 |                 continue
 104 | 
 105 |             err = {
 106 |                 "image": rbd,
 107 |                 "msg": f"ERR: shared snapshot {snap} does not match\n\tOn live (image: {rbd}): {live}\n\tOn backup (image: {backup.dest}): {back}",
 108 |             }
 109 |             self.add_err(err)
 110 | 
 111 | 
 112 | class CheckProxmox(Check):
 113 |     def __init__(self, cluster):
 114 |         super().__init__(cluster)
 115 |         self.px = Proxmox(cluster)
 116 | 
 117 |     def check(self):
 118 |         data = []
 119 |         for vm in self.px.vms():
 120 |             for disk, ceph, bck in vm["to_backup"]:
 121 |                 data.append({"ceph": ceph, "backup": bck, "image": disk["rbd"]})
 122 | 
 123 |         self.err = []
 124 |         with multiprocessing.Pool() as pool:
 125 |             for msg in pool.imap_unordered(self.check_img, data):
 126 |                 self.add_err(msg)
 127 | 
 128 |         return self.err
 129 | 
 130 |     def check_snap(self):
 131 |         for vm in self.px.vms():
 132 |             for disk, ceph, bck in vm["to_backup"]:
 133 |                 self.cmp_snap(bck, ceph, disk["rbd"])
 134 |         return self.err
 135 | 
 136 | 
 137 | class CheckPlain(Check):
 138 |     def __init__(self, cluster):
 139 |         super().__init__(cluster)
 140 |         self.ceph = Ceph(
 141 |             self.cluster["pool"],
 142 |             namespace=self.cluster.get("namespace"),
 143 |             endpoint=self.cluster["fqdn"],
 144 |             cluster_conf=self.cluster,
 145 |         )
 146 | 
 147 |     def check(self):
 148 |         data = []
 149 |         for rbd in self.ceph.ls():
 150 |             bck = Bck(self.cluster["name"], self.ceph, rbd)
 151 |             data.append({"ceph": self.ceph, "backup": bck, "image": rbd})
 152 | 
 153 |         self.err = []
 154 |         with multiprocessing.Pool() as pool:
 155 |             for msg in pool.imap_unordered(self.check_img, data):
 156 |                 self.add_err(msg)
 157 | 
 158 |         return self.err
 159 | 
 160 |     def check_snap(self):
 161 |         for rbd in self.ceph.ls():
 162 |             bck = Bck(self.cluster["name"], self.ceph, rbd)
 163 |             self.cmp_snap(bck, self.ceph, rbd)
 164 |         return self.err
 165 | 
 166 | 
 167 | def run_hook(kind, vmname, diskname):
 168 |     if config["hooks"][kind] is not None:
 169 |         sh.Command(config["hooks"][kind])(kind, vmname, diskname)
 170 | 
 171 | 
 172 | class Backup:
 173 |     def __init__(self, cluster, regular_queue, priority_queue, status_queue, args=None):
 174 |         self.cluster = cluster
 175 |         self.regular_queue = regular_queue
 176 |         self.priority_queue = priority_queue
 177 |         self.status_queue = status_queue
 178 |         self.args = args
 179 | 
 180 |     def is_expired(snap, last=False):
 181 |         split = snap.split(";")
 182 |         created_at = dateutil.parser.parse(split[-1])
 183 |         profile = split[-3]
 184 |         value = int(split[-2])
 185 |         if profile == "daily":
 186 |             expiration = datetime.timedelta(days=value)
 187 |         elif profile == "hourly":
 188 |             expiration = datetime.timedelta(hours=value)
 189 |         elif profile == "weekly":
 190 |             expiration = datetime.timedelta(days=7 * value)
 191 |         elif profile == "monthly":
 192 |             expiration = datetime.timedelta(days=30 * value)
 193 |         else:
 194 |             Log.warning(f"Unknown profile found, no action taken: {profile}")
 195 |             return False
 196 | 
 197 |         expired_at = created_at + expiration
 198 |         if last is True:
 199 |             expired_at += datetime.timedelta(days=config["extra_retention_time"])
 200 | 
 201 |         now = datetime.datetime.now()
 202 |         if expired_at > now:
 203 |             return False
 204 |         return True
 205 | 
 206 |     def _create_snap(self, bck, profiles, pre_vm_hook):
 207 |         todo = []
 208 |         is_high_prio = False
 209 | 
 210 |         hooked = False
 211 | 
 212 |         try:
 213 |             with Lock(bck.dest):
 214 |                 for profile, value in profiles:
 215 |                     self.status_queue.put("add_item")
 216 |                     if not self.args.force and not bck.check_profile(profile):
 217 |                         self.status_queue.put("done_item")
 218 |                         continue
 219 | 
 220 |                     if pre_vm_hook is False:
 221 |                         try:
 222 |                             run_hook("pre_vm", bck.vm["name"], bck.rbd)
 223 |                         except Exception as e:
 224 |                             out = (
 225 |                                 e.stdout.decode("utf-8")
 226 |                                 + e.stderr.decode("utf-8").rstrip()
 227 |                             )
 228 |                             Log.warn(
 229 |                                 "pre_vm hook failed on %s/%s with code %s : %s"
 230 |                                 % (bck.vm["name"], bck.rbd, e.exit_code, out)
 231 |                             )
 232 |                             self.status_queue.put("done_item")
 233 |                             return None
 234 |                         hooked = True
 235 | 
 236 |                     try:
 237 |                         if bck.vm is not None:
 238 |                             run_hook("pre_disk", bck.vm["name"], bck.rbd)
 239 |                         else:
 240 |                             run_hook("pre_disk", bck.source, bck.rbd)
 241 |                     except Exception as e:
 242 |                         out = (
 243 |                             e.stdout.decode("utf-8") + e.stderr.decode("utf-8").rstrip()
 244 |                         )
 245 |                         Log.warn(
 246 |                             "pre_disk hook failed on %s/%s with code %s : %s"
 247 |                             % (bck.vm["name"], bck.rbd, e.exit_code, out)
 248 |                         )
 249 |                         self.status_queue.put("done_item")
 250 |                         continue
 251 |                     setproctitle.setproctitle(
 252 |                         f"Backurne: snapshooting {bck.rbd} on {bck.name}"
 253 |                     )
 254 |                     dest, last_snap, snap_name = bck.make_snap(profile, value["count"])
 255 | 
 256 |                     try:
 257 |                         run_hook("post_disk", bck.vm["name"], bck.rbd)
 258 |                     except Exception:
 259 |                         pass
 260 | 
 261 |                     if dest is not None:
 262 |                         todo.append(
 263 |                             {
 264 |                                 "dest": dest,
 265 |                                 "last_snap": last_snap,
 266 |                                 "snap_name": snap_name,
 267 |                                 "backup": bck,
 268 |                             }
 269 |                         )
 270 | 
 271 |                         priority = value.get("priority")
 272 |                         if priority == "high":
 273 |                             is_high_prio = True
 274 |         except filelock.Timeout:
 275 |             Log.info(f"unable to acquire lock for {bck.vm['name']}")
 276 |             pass
 277 |         if len(todo) != 0:
 278 |             if is_high_prio:
 279 |                 self.priority_queue.put(todo)
 280 |             else:
 281 |                 self.regular_queue.put(todo)
 282 |         setproctitle.setproctitle("Backurne idle producer")
 283 |         return hooked
 284 | 
 285 |     def create_snaps(self):
 286 |         items = self.list()
 287 |         with multiprocessing.Pool(config["live_worker"]) as pool:
 288 |             for i in pool.imap_unordered(self.create_snap, items):
 289 |                 pass
 290 | 
 291 |     def _custom_key(self, item):
 292 |         return item.split(";")[3]
 293 | 
 294 |     def _expire_item(self, ceph, disk, vm=None):
 295 |         self.status_queue.put("add_item")
 296 |         self.status_queue.put("done_item")
 297 | 
 298 |         if vm is not None:
 299 |             bck = Bck(disk["ceph"], ceph, disk["rbd"], vm=vm, adapter=disk["adapter"])
 300 |             rbd = disk["rbd"]
 301 |         else:
 302 |             bck = Bck(self.cluster["name"], ceph, disk)
 303 |             rbd = disk
 304 | 
 305 |         backups = Ceph(None).snap(bck.dest)
 306 | 
 307 |         snaps = ceph.snap(rbd)
 308 |         shared = list(set(backups).intersection(snaps))
 309 | 
 310 |         try:
 311 |             shared.sort(key=self._custom_key)
 312 |             shared = shared.pop()
 313 |         except IndexError:
 314 |             shared = None
 315 | 
 316 |         by_profile = {}
 317 |         for snap in snaps:
 318 |             # The last shared snapshot must be kept
 319 |             # Also, subsequent snaps shall be kept as well,
 320 |             # because a backup may be pending elsewhere
 321 |             if shared is None or snap.split(";")[3] >= shared.split(";")[3]:
 322 |                 continue
 323 |             tmp = snap.split(";")
 324 |             if tmp[1] not in by_profile:
 325 |                 by_profile[tmp[1]] = []
 326 |             i = by_profile[tmp[1]]
 327 |             i.append(snap)
 328 | 
 329 |         to_del = []
 330 |         for profile, snaps in by_profile.items():
 331 |             try:
 332 |                 profile = config["profiles"][profile]
 333 |             except KeyError:
 334 |                 # Profile no longer exists, we can drop all these snaps
 335 |                 to_del += snaps
 336 |                 continue
 337 |             try:
 338 |                 max_on_live = profile["max_on_live"]
 339 |             except KeyError:
 340 |                 max_on_live = 1
 341 | 
 342 |             for _ in range(0, max_on_live):
 343 |                 try:
 344 |                     snaps.pop()
 345 |                 except IndexError:
 346 |                     # We do not have enough snaps on live
 347 |                     # snaps is now an empty list, nothing to delete
 348 |                     break
 349 | 
 350 |             to_del += snaps
 351 |         for i in to_del:
 352 |             ceph.rm_snap(rbd, i)
 353 | 
 354 |     def expire_live(self):
 355 |         items = self.list()
 356 |         with multiprocessing.Pool(config["live_worker"]) as pool:
 357 |             for i in pool.imap_unordered(self.expire_item, items):
 358 |                 pass
 359 | 
 360 |     @handle_exc
 361 |     def expire_backup(i):
 362 |         ceph = i["ceph"]
 363 |         image = i["image"]
 364 |         i["status_queue"].put("done_item")
 365 | 
 366 |         with Lock(image):
 367 |             snaps = ceph.snap(image)
 368 |             try:
 369 |                 # Pop the last snapshot
 370 |                 # We will take care of it later
 371 |                 last = snaps.pop()
 372 |             except IndexError:
 373 |                 # We found an image without snapshot
 374 |                 # Someone is messing around, or this is a bug
 375 |                 # Anyway, the image can be deleted
 376 |                 ceph.rm(image)
 377 |                 return
 378 | 
 379 |             for snap in snaps:
 380 |                 if not Backup.is_expired(snap):
 381 |                     continue
 382 |                 ceph.rm_snap(image, snap)
 383 | 
 384 |             snaps = ceph.snap(image)
 385 |             if len(snaps) == 1:
 386 |                 if Backup.is_expired(last, last=True):
 387 |                     ceph.rm_snap(image, snaps[0])
 388 | 
 389 |             if len(ceph.snap(image)) == 0:
 390 |                 Log.debug(f"{image} has no snapshot left, deleting")
 391 |                 ceph.rm(image)
 392 | 
 393 | 
 394 | class BackupProxmox(Backup):
 395 |     def __init__(self, cluster, regular_queue, priority_queue, status_queue, args):
 396 |         super().__init__(cluster, regular_queue, priority_queue, status_queue, args)
 397 | 
 398 |     def __fetch_profiles(self, vm, disk):
 399 |         profiles = list(config["profiles"].items())
 400 | 
 401 |         if config["profiles_api"] is None:
 402 |             return profiles
 403 | 
 404 |         try:
 405 |             json = {
 406 |                 "cluster": {
 407 |                     "type": "proxmox",
 408 |                     "name": self.cluster["name"],
 409 |                     "fqdn": self.cluster["fqdn"],
 410 |                 },
 411 |                 "vm": {
 412 |                     "vmid": vm["vmid"],
 413 |                     "name": vm["name"],
 414 |                 },
 415 |                 "disk": disk,
 416 |             }
 417 | 
 418 |             add = requests.post(config["profiles_api"], json=json, timeout=10)
 419 |             add.raise_for_status()
 420 |             add = add.json()
 421 | 
 422 |             if "backup" in add and add["backup"] is False:
 423 |                 return []
 424 | 
 425 |             if "profiles" in add:
 426 |                 profiles += list(add["profiles"].items())
 427 | 
 428 |         except Exception as e:
 429 |             Log.warning(f"{e} thrown while fetching profiles for {vm}")
 430 |         return profiles
 431 | 
 432 |     def list(self):
 433 |         result = []
 434 | 
 435 |         try:
 436 |             px = Proxmox(self.cluster)
 437 |             for vm in px.vms():
 438 |                 if vm["smbios"] is None and self.cluster["use_smbios"] is True:
 439 |                     if config["uuid_fallback"] is False:
 440 |                         Log.warning("No smbios found, skipping")
 441 |                         continue
 442 |                 result.append(vm)
 443 |         except Exception as e:
 444 |             Log.error(f"{e} thrown while listing vm on {self.cluster['name']}")
 445 |         return result
 446 | 
 447 |     def filter_profiles(self, profiles, _filter):
 448 |         if _filter is None:
 449 |             return profiles
 450 | 
 451 |         result = []
 452 |         for profile in profiles:
 453 |             if profile[0] == _filter:
 454 |                 result.append(profile)
 455 |             else:
 456 |                 Log.debug(f"Skipping profile {profile[0]}, due to --profile")
 457 |         return result
 458 | 
 459 |     @handle_exc
 460 |     def create_snap(self, vm):
 461 |         setproctitle.setproctitle("Backurne idle producer")
 462 | 
 463 |         if self.args.vmid is not None:
 464 |             if vm["vmid"] != self.args.vmid:
 465 |                 Log.debug(f"Skipping VM {vm['vmid']}, due to --vmid")
 466 |                 return
 467 | 
 468 |         px = Proxmox(self.cluster)
 469 |         # We freeze the VM once, thus create all snaps at the same time
 470 |         # Exports are done after thawing, because it it time-consuming,
 471 |         # and we must not keep the VM frozen more than necessary
 472 |         px.freeze(vm["node"], vm)
 473 | 
 474 |         pre_vm_hook = False
 475 | 
 476 |         for disk, ceph, bck in vm["to_backup"]:
 477 |             profiles = self.__fetch_profiles(vm, disk)
 478 |             profiles = self.filter_profiles(profiles, self.args.profile)
 479 |             hooked = self._create_snap(bck, profiles, pre_vm_hook)
 480 |             if hooked is None:
 481 |                 # pre_vm hook failed, we skip all its disks
 482 |                 break
 483 | 
 484 |             if hooked is True:
 485 |                 pre_vm_hook = True
 486 | 
 487 |         if pre_vm_hook is True:
 488 |             run_hook("post_vm", bck.vm["name"], bck.rbd)
 489 | 
 490 |         px.thaw(vm["node"], vm)
 491 | 
 492 |     @handle_exc
 493 |     def expire_item(self, vm):
 494 |         for disk, ceph, bck in vm["to_backup"]:
 495 |             if self.args.vmid is not None:
 496 |                 if vm["vmid"] != self.args.vmid:
 497 |                     Log.debug(f"Skipping VM {vm['vmid']}, due to --vmid")
 498 |                     return
 499 | 
 500 |             with Lock(bck.dest):
 501 |                 self._expire_item(ceph, disk, vm)
 502 | 
 503 | 
 504 | class BackupPlain(Backup):
 505 |     def __init__(self, cluster, regular_queue, priority_queue, status_queue, args):
 506 |         super().__init__(cluster, regular_queue, priority_queue, status_queue, args)
 507 |         self.ceph = Ceph(
 508 |             self.cluster["pool"],
 509 |             namespace=self.cluster.get("namespace"),
 510 |             endpoint=self.cluster["fqdn"],
 511 |             cluster_conf=self.cluster,
 512 |         )
 513 | 
 514 |     def list(self):
 515 |         try:
 516 |             return self.ceph.ls()
 517 |         except Exception as e:
 518 |             Log.warning(e)
 519 |             return []
 520 | 
 521 |     @handle_exc
 522 |     def create_snap(self, rbd):
 523 |         setproctitle.setproctitle("Backurne idle producer")
 524 |         bck = Bck(self.cluster["name"], self.ceph, rbd)
 525 |         self._create_snap(bck, config["profiles"].items(), True)
 526 | 
 527 |     @handle_exc
 528 |     def expire_item(self, rbd):
 529 |         bck = Bck(self.cluster["name"], self.ceph, rbd)
 530 |         with Lock(bck.dest):
 531 |             self._expire_item(self.ceph, rbd)
 532 | 
 533 | 
 534 | class Status_updater:
 535 |     class Real_updater:
 536 |         def __init__(self, status_queue, desc):
 537 |             self.todo = 0
 538 |             self.total = 0
 539 |             self.status_queue = status_queue
 540 |             self.desc = desc
 541 | 
 542 |             if config["log_level"] != "debug":
 543 |                 # progressbar uses signal.SIGWINCH
 544 |                 # It messes with multiprocessing, so we break it
 545 |                 real_signal = signal.signal
 546 |                 signal.signal = None
 547 |                 widget = [
 548 |                     progressbar.widgets.SimpleProgress(),
 549 |                     " ",
 550 |                     desc,
 551 |                     " (",
 552 |                     progressbar.widgets.Timer(),
 553 |                     ")",
 554 |                 ]
 555 |                 self.bar = progressbar.ProgressBar(maxval=1, widgets=widget)
 556 |                 signal.signal = real_signal
 557 | 
 558 |         @handle_exc
 559 |         def __call__(self):
 560 |             Log.debug("Real_updater started")
 561 |             if config["log_level"] != "debug":
 562 |                 self.bar.start()
 563 |             self.__work__()
 564 |             if config["log_level"] != "debug":
 565 |                 self.bar.finish()
 566 |             Log.debug("Real_updater ended")
 567 | 
 568 |         def __update(self):
 569 |             done = self.total - self.todo
 570 |             msg = f"Backurne : {done}/{self.total} {self.desc}"
 571 |             setproctitle.setproctitle(msg)
 572 |             if config["log_level"] != "debug":
 573 |                 self.bar.maxval = self.total
 574 |                 self.bar.update(done)
 575 | 
 576 |         def __work__(self):
 577 |             while True:
 578 |                 try:
 579 |                     msg = self.status_queue.get(block=False)
 580 |                 except queue.Empty:
 581 |                     self.__update()
 582 |                     time.sleep(1)
 583 |                     continue
 584 |                 if msg == "add_item":
 585 |                     self.total += 1
 586 |                     self.todo += 1
 587 |                 elif msg == "done_item":
 588 |                     self.todo -= 1
 589 |                 else:
 590 |                     Log.error(f"Unknown message received: {msg}")
 591 |                 self.__update()
 592 | 
 593 |     def __init__(self, manager, desc):
 594 |         self.status_queue = manager.Queue()
 595 |         self.desc = desc
 596 | 
 597 |     def __enter__(self):
 598 |         target = Status_updater.Real_updater(self.status_queue, self.desc)
 599 |         self.real_updater = multiprocessing.Process(target=target)
 600 |         atexit.register(self.real_updater.terminate)
 601 |         self.real_updater.start()
 602 |         return self.status_queue
 603 | 
 604 |     def __exit__(self, type, value, traceback):
 605 |         self.real_updater.terminate()
 606 |         print("")
 607 | 
 608 | 
 609 | class Lock:
 610 |     def __init__(self, path):
 611 |         path = path.replace("/", "")
 612 |         self.path = f"{config['lockdir']}/{path}"
 613 |         self.lock = filelock.FileLock(self.path, timeout=0)
 614 | 
 615 |     def __enter__(self):
 616 |         Log.debug(f"locking {self.path}")
 617 |         self.lock.acquire()
 618 | 
 619 |     def __exit__(self, type, value, traceback):
 620 |         Log.debug(f"releasing lock {self.path}")
 621 |         self.lock.release()
 622 | 
 623 | 
 624 | class Producer:
 625 |     def __init__(self, params, args):
 626 |         self.cluster = params["cluster"]
 627 |         self.regular_queue = params["regular_q"]
 628 |         self.priority_queue = params["priority_q"]
 629 |         self.status_queue = params["status_q"]
 630 |         self.args = args
 631 | 
 632 |     @handle_exc
 633 |     def __call__(self):
 634 |         Log.debug("Producer started")
 635 |         setproctitle.setproctitle("Backurne Producer")
 636 |         self.__work__()
 637 |         # We send one None per live_worker
 638 |         # That way, all of them shall die
 639 |         for i in range(0, config["live_worker"]):
 640 |             try:
 641 |                 self.regular_queue.put(None)
 642 |                 self.priority_queue.put(None)
 643 |             except Exception:
 644 |                 Log.error(
 645 |                     "cannot end a live_worker! This is a critical bug, we will never die"
 646 |                 )
 647 | 
 648 |         Log.debug("Producer ended")
 649 | 
 650 |     @handle_exc
 651 |     def __work__(self):
 652 |         if self.args.cluster is not None:
 653 |             if self.cluster["name"] != self.args.cluster:
 654 |                 Log.debug(f"Skipping cluster {self.cluster['name']} due to --cluster")
 655 |                 return
 656 |         Log.debug(f"Backuping {self.cluster['type']}: {self.cluster['name']}")
 657 |         if self.cluster["type"] == "proxmox":
 658 |             bidule = BackupProxmox(
 659 |                 self.cluster,
 660 |                 self.regular_queue,
 661 |                 self.priority_queue,
 662 |                 self.status_queue,
 663 |                 self.args,
 664 |             )
 665 |         else:
 666 |             bidule = BackupPlain(
 667 |                 self.cluster,
 668 |                 self.regular_queue,
 669 |                 self.priority_queue,
 670 |                 self.status_queue,
 671 |                 self.args,
 672 |             )
 673 |         bidule.create_snaps()
 674 | 
 675 | 
 676 | class Consumer:
 677 |     def __init__(self, params):
 678 |         self.id = params["id"]
 679 |         self.cluster = params["cluster"]
 680 |         self.regular_queue = params["regular_q"]
 681 |         self.priority_queue = params["priority_q"]
 682 |         self.status_queue = params["status_q"]
 683 | 
 684 |         # Track the queue status
 685 |         # When both are dead, the worker can die in peace
 686 |         self.priority_alive = True
 687 |         self.regular_alive = True
 688 | 
 689 |     @handle_exc
 690 |     def __call__(self):
 691 |         Log.debug("Consumer started")
 692 |         setproctitle.setproctitle("Backurne Consumer")
 693 |         try:
 694 |             lockname = f"Consumer-{self.cluster['name']}-{self.id}"
 695 |             with Lock(lockname):
 696 |                 self.__work__()
 697 |         except filelock.Timeout:
 698 |             Log.debug(f"Cannot lock: {lockname}, another instance is running")
 699 |         Log.debug("Consumer ended")
 700 | 
 701 |     def __work__(self):
 702 |         while True:
 703 |             setproctitle.setproctitle(
 704 |                 f"Backurne idle consumer ({self.cluster['name']})"
 705 |             )
 706 | 
 707 |             if self.priority_alive is False and self.regular_alive is False:
 708 |                 break
 709 | 
 710 |             snaps = []
 711 |             if self.priority_alive is True:
 712 |                 try:
 713 |                     snaps = self.priority_queue.get_nowait()
 714 |                 except queue.Empty:
 715 |                     pass
 716 | 
 717 |                 if snaps is None:
 718 |                     self.priority_alive = False
 719 |                     continue
 720 | 
 721 |             if len(snaps) == 0 and self.regular_alive is True:
 722 |                 try:
 723 |                     snaps = self.regular_queue.get_nowait()
 724 |                 except queue.Empty:
 725 |                     pass
 726 | 
 727 |                 if snaps is None:
 728 |                     self.regular_alive = False
 729 |                     continue
 730 | 
 731 |             if len(snaps) == 0:
 732 |                 time.sleep(1)
 733 |                 continue
 734 | 
 735 |             try:
 736 |                 with Lock(snaps[0]["dest"]):
 737 |                     for snap in snaps:
 738 |                         setproctitle.setproctitle(
 739 |                             f"Backurne: fetching {snap['backup'].source} ({snap['snap_name']})"
 740 |                         )
 741 |                         backup = snap["backup"]
 742 |                         backup.dl_snap(
 743 |                             snap["snap_name"], snap["dest"], snap["last_snap"]
 744 |                         )
 745 |             except filelock.Timeout:
 746 |                 pass
 747 |             except Exception as e:
 748 |                 Log.error(e)
 749 |             self.status_queue.put("done_item")
 750 |             setproctitle.setproctitle("Backurne idle consumer")
 751 | 
 752 | 
 753 | def get_sqlite():
 754 |     sql = sqlite3.connect(config["check_db"], isolation_level=None)
 755 |     sql.execute(
 756 |         "create table if not exists results (date text, cluster text, disk text, msg text)"
 757 |     )
 758 |     return sql
 759 | 
 760 | 
 761 | def print_check_results():
 762 |     sql = get_sqlite()
 763 | 
 764 |     failed = sql.execute(
 765 |         'select * from results where date < strftime("%s", "now") - 7200'
 766 |     )
 767 |     failed = [i for i in failed]
 768 | 
 769 |     if len(failed) > 0:
 770 |         print(f"Error: {len(failed)} failed backups found")
 771 |         for err in failed:
 772 |             print(f"{err[1]} : {err[3]}")
 773 |         exit(2)
 774 | 
 775 |     print("OK: all things are backed up!")
 776 |     exit(0)
 777 | 
 778 | 
 779 | def update_check_results(check_results):
 780 |     sql = get_sqlite()
 781 | 
 782 |     failed_db = [i for i in sql.execute("select date, cluster, disk from results")]
 783 |     for i in failed_db:
 784 |         found = False
 785 |         for j in check_results:
 786 |             if i[1] != j["cluster"]:
 787 |                 continue
 788 |             if i[2] != j["image"]:
 789 |                 continue
 790 |             found = True
 791 |             break
 792 |         if found is False:
 793 |             sql.execute(
 794 |                 "delete from results where cluster = ? and disk = ?", (i[1], i[2])
 795 |             )
 796 | 
 797 |     for i in check_results:
 798 |         found = False
 799 |         for j in failed_db:
 800 |             if j[1] != i["cluster"]:
 801 |                 continue
 802 |             if j[2] != i["image"]:
 803 |                 continue
 804 |             found = True
 805 |             break
 806 |         if found is False:
 807 |             sql.execute(
 808 |                 'insert into results values(strftime("%s", "now"), ?, ?, ?)',
 809 |                 (i["cluster"], i["image"], i["msg"]),
 810 |             )
 811 | 
 812 | 
 813 | def get_args():
 814 |     parser = argparse.ArgumentParser()
 815 |     parser.add_argument("--debug", action="store_true")
 816 | 
 817 |     sub = parser.add_subparsers(dest="action", required=True)
 818 |     back = sub.add_parser("backup")
 819 |     back.add_argument("--cluster", dest="cluster", nargs="?")
 820 |     back.add_argument("--vmid", dest="vmid", nargs="?", type=int)
 821 |     back.add_argument("--profile", dest="profile", nargs="?")
 822 |     back.add_argument("--force", action="store_true")
 823 |     back.add_argument("--no-cleanup", action="store_true")
 824 |     back.add_argument("--cleanup", action="store_true")
 825 | 
 826 |     sub.add_parser("precheck")
 827 |     sub.add_parser("check")
 828 |     sub.add_parser("check-snap")
 829 |     sub.add_parser("stats")
 830 |     sub.add_parser("version")
 831 | 
 832 |     ls = sub.add_parser("list-mapped")
 833 |     ls.add_argument("--json", action="store_true")
 834 | 
 835 |     ls = sub.add_parser("ls")
 836 |     ls.add_argument(dest="rbd", nargs="?")
 837 |     ls.add_argument("--json", action="store_true")
 838 | 
 839 |     _map = sub.add_parser("map")
 840 |     _map.add_argument(dest="rbd")
 841 |     _map.add_argument(dest="snapshot")
 842 |     _map.add_argument(dest="vmdk", nargs="?")
 843 | 
 844 |     unmap = sub.add_parser("unmap")
 845 |     unmap.add_argument(dest="rbd")
 846 |     unmap.add_argument(dest="snapshot")
 847 |     return parser.parse_args()
 848 | 
 849 | 
 850 | def main():
 851 |     args = get_args()
 852 | 
 853 |     if args.debug:
 854 |         import logging
 855 | 
 856 |         Log.setLevel(logging.DEBUG)
 857 | 
 858 |     if args.action == "stats":
 859 |         stats.print_stats()
 860 |     elif args.action == "version":
 861 |         print(f"Backurne version {VERSION}")
 862 |     elif args.action == "check":
 863 |         print_check_results()
 864 |     elif args.action in ("precheck", "check-snap"):
 865 |         result = []
 866 | 
 867 |         for cluster in config["live_clusters"]:
 868 |             Log.info(f"Checking {cluster['type']}: {cluster['name']}")
 869 |             if cluster["type"] == "proxmox":
 870 |                 check = CheckProxmox(cluster)
 871 |             else:
 872 |                 check = CheckPlain(cluster)
 873 |             if args.action == "precheck":
 874 |                 ret = check.check()
 875 |             else:
 876 |                 ret = check.check_snap()
 877 |             result += ret
 878 | 
 879 |         update_check_results(result)
 880 |         print_check_results()
 881 |     elif args.action == "backup":
 882 |         if args.vmid is not None and args.cluster is None:
 883 |             Log.error("--vmid has no meaning without --cluster")
 884 |             exit(1)
 885 | 
 886 |         manager = multiprocessing.Manager()
 887 |         atexit.register(manager.shutdown)
 888 | 
 889 |         live_workers = []
 890 | 
 891 |         with Status_updater(manager, "images processed") as status_queue:
 892 |             for cluster in config["live_clusters"]:
 893 |                 params = {
 894 |                     "cluster": cluster,
 895 |                     "regular_q": manager.Queue(),
 896 |                     "priority_q": manager.Queue(),
 897 |                     "status_q": status_queue,
 898 |                 }
 899 | 
 900 |                 producer = multiprocessing.Process(target=Producer(params, args))
 901 |                 atexit.register(producer.terminate)
 902 |                 producer.start()
 903 | 
 904 |                 for i in range(0, config["live_worker"]):
 905 |                     params["id"] = i
 906 |                     pid = multiprocessing.Process(target=Consumer(params))
 907 |                     atexit.register(pid.terminate)
 908 |                     live_workers.append(pid)
 909 |                     pid.start()
 910 | 
 911 |             # Workers will exit upon a None reception
 912 |             # When all of them are done, we are done
 913 |             for pid in live_workers:
 914 |                 pid.join()
 915 | 
 916 |         if args.no_cleanup is True:
 917 |             Log.debug("not cleaning up as --no-cleanup is used")
 918 |             exit(0)
 919 | 
 920 |         with Status_updater(
 921 |             manager, "images cleaned up on live clusters"
 922 |         ) as status_queue:
 923 |             for cluster in config["live_clusters"]:
 924 |                 if args.cluster is not None:
 925 |                     if cluster["name"] != args.cluster:
 926 |                         Log.debug(
 927 |                             f"Skipping cluster {cluster['name']} due to --cluster"
 928 |                         )
 929 |                         continue
 930 | 
 931 |                 Log.debug(
 932 |                     f"Expire snapshots from live {cluster['type']}: {cluster['name']}"
 933 |                 )
 934 |                 if cluster["type"] == "proxmox":
 935 |                     bidule = BackupProxmox(cluster, None, None, status_queue, args)
 936 |                 else:
 937 |                     bidule = BackupPlain(cluster, None, None, status_queue, args)
 938 |                 bidule.expire_live()
 939 | 
 940 |         if (
 941 |             args.cleanup
 942 |             or args.cluster is None
 943 |             and args.profile is None
 944 |             and args.vmid is None
 945 |         ):
 946 |             Log.debug("Expiring our snapshots")
 947 |             # Dummy Ceph object used to retrieve the real backup Object
 948 |             ceph = Ceph(None)
 949 | 
 950 |             with Status_updater(
 951 |                 manager, "images cleaned up on backup cluster"
 952 |             ) as status_queue:
 953 |                 data = []
 954 |                 for i in ceph.ls():
 955 |                     data.append(
 956 |                         {"ceph": ceph, "image": i, "status_queue": status_queue}
 957 |                     )
 958 |                     status_queue.put("add_item")
 959 |                 with multiprocessing.Pool(config["backup_worker"]) as pool:
 960 |                     for i in pool.imap_unordered(Backup.expire_backup, data):
 961 |                         pass
 962 | 
 963 |         manager.shutdown()
 964 |     elif args.action == "ls":
 965 |         restore = Restore(args.rbd, None)
 966 |         data = restore.ls()
 967 |         if args.rbd is None:
 968 |             pt = pretty.Pt(["Ident", "Disk", "UUID"])
 969 | 
 970 |             for i in data:
 971 |                 row = [i["ident"], i["disk"], i["uuid"]]
 972 |                 pt.add_row(row)
 973 |         else:
 974 |             pt = pretty.Pt(["Creation date", "UUID"])
 975 | 
 976 |             for i in data:
 977 |                 row = [i["creation"], i["uuid"]]
 978 |                 pt.add_row(row)
 979 | 
 980 |         if args.json is True:
 981 |             print(json.dumps(data, default=str))
 982 |         else:
 983 |             print(pt)
 984 |     elif args.action == "list-mapped":
 985 |         data = get_mapped(extended=False)
 986 |         if args.json is True:
 987 |             result = []
 988 |             for tree in data:
 989 |                 result.append(prepare_tree_to_json(tree))
 990 |             print(json.dumps(result))
 991 |         else:
 992 |             print_mapped(data)
 993 |     elif args.action == "map":
 994 |         Restore(args.rbd, args.snapshot, args.vmdk).mount()
 995 |         for i in get_mapped(extended=False):
 996 |             if i.name.parent_image != args.rbd or i.name.parent_snap != args.snapshot:
 997 |                 continue
 998 |             print_mapped(
 999 |                 [
1000 |                     i,
1001 |                 ]
1002 |             )
1003 |             return
1004 | 
1005 |     elif args.action == "unmap":
1006 |         restore = Restore(args.rbd, args.snapshot)
1007 |         restore.umount()
1008 | 
1009 | 
1010 | if __name__ == "__main__":
1011 |     main()
1012 | 


--------------------------------------------------------------------------------