├── .gitignore ├── LICENSE ├── README.md ├── debian ├── changelog ├── control ├── copyright ├── rules ├── source │ ├── format │ └── options └── watch ├── examples ├── pull │ ├── local-replication.yaml │ ├── ssh+netcat-replication.yaml │ └── ssh-replication.yaml └── push │ ├── local-replication.yaml │ ├── parallel-replication.yaml │ ├── ssh+netcat-replication.yaml │ └── ssh-replication.yaml ├── integration-tests ├── configuration │ └── test_configuration.py ├── replication │ ├── test_bad_incremental_base.py │ ├── test_compression.py │ ├── test_creates_intermediate_datasets.py │ ├── test_data_progress.py │ ├── test_dataset_gone.py │ ├── test_dst.py │ ├── test_encrypted_target.py │ ├── test_encryption_inherit.py │ ├── test_keeps_mount_structure.py │ ├── test_mount.py │ ├── test_multiple_source_datasets.py │ ├── test_name_regex.py │ ├── test_nothing_to_replicate.py │ ├── test_only_from_scratch.py │ ├── test_parallel_replication.py │ ├── test_pre_retention.py │ ├── test_preserves_clone_origin.py │ ├── test_preserves_deleted_datasets.py │ ├── test_progress.py │ ├── test_properties.py │ ├── test_property_receive.py │ ├── test_pull_replication.py │ ├── test_push_replication.py │ ├── test_readonly.py │ ├── test_replicate.py │ ├── test_replication_resume.py │ ├── test_replication_retry.py │ ├── test_rewording_is_not_earlier_than_it.py │ ├── test_shell.py │ ├── test_snapshot_gone.py │ ├── test_target_without_snapshots_but_with_data.py │ ├── test_unencrypted_to_encrypted.py │ └── test_zvol_replication.py ├── retention │ ├── test_hold_pending_snapshots.py │ ├── test_local_retention.py │ ├── test_long_running_replication_task.py │ ├── test_remote_retention.py │ ├── test_removal_dates.py │ ├── test_zfs_clone.py │ └── test_zfs_hold.py ├── snapshot │ ├── test_allow_empty.py │ ├── test_destroy.py │ ├── test_legit_step_back.py │ └── test_snapshot_exclude.py └── transport │ ├── test_async_exec_timeout.py │ ├── test_local_async_exec_stop.py │ ├── test_ssh_authentication_error.py │ ├── test_ssh_read_stdout.py │ └── test_timeout.py ├── setup.py ├── tests ├── dataset │ ├── test_exclude.py │ └── test_relationship.py ├── replication │ ├── task │ │ ├── test_dataset.py │ │ ├── test_retention_policy.py │ │ ├── test_should_replicate.py │ │ ├── test_snapshot_owner.py │ │ └── test_task.py │ ├── test_monitor.py │ ├── test_partially_complete_state.py │ └── test_run.py ├── retention │ └── test_calculate.py ├── scheduler │ ├── test_clock.py │ ├── test_cron.py │ └── test_tz_clock.py ├── snapshot │ ├── task │ │ ├── test_naming_schema.py │ │ └── test_nonintersecting_sets.py │ ├── test_create.py │ ├── test_destroy.py │ ├── test_empty.py │ ├── test_list.py │ └── test_name.py ├── test_zettarepl.py ├── transport │ └── test_progress_report_mixin.py └── utils │ ├── test_logging.py │ ├── test_re.py │ └── test_shlex.py └── zettarepl ├── __init__.py ├── __main__.py ├── commands ├── __init__.py ├── create_dataset.py ├── list_datasets.py ├── run.py └── utils.py ├── dataset ├── __init__.py ├── create.py ├── data.py ├── exclude.py ├── list.py └── relationship.py ├── definition ├── __init__.py ├── definition.py └── schema │ ├── __init__.py │ ├── logging-level.schema.yaml │ ├── periodic-snapshot-task.schema.yaml │ ├── replication-task.schema.yaml │ ├── schedule.schema.yaml │ ├── schema.yaml │ └── transport.schema.yaml ├── main.py ├── observer.py ├── replication ├── __init__.py ├── dataset_size_observer.py ├── error.py ├── monitor.py ├── partially_complete_state.py ├── pre_retention.py ├── process_runner.py ├── run.py ├── snapshots_to_send.py └── task │ ├── __init__.py │ ├── compression.py │ ├── dataset.py │ ├── direction.py │ ├── encryption.py │ ├── name_pattern.py │ ├── naming_schema.py │ ├── readonly_behavior.py │ ├── retention_policy.py │ ├── should_replicate.py │ ├── snapshot_owner.py │ ├── snapshot_query.py │ └── task.py ├── retention ├── __init__.py ├── calculate.py ├── snapshot_owner.py └── snapshot_removal_date_snapshot_owner.py ├── scheduler ├── __init__.py ├── clock.py ├── cron.py ├── scheduler.py └── tz_clock.py ├── snapshot ├── __init__.py ├── create.py ├── destroy.py ├── empty.py ├── list.py ├── name.py ├── snapshot.py └── task │ ├── __init__.py │ ├── nonintersecting_sets.py │ ├── snapshot_owner.py │ └── task.py ├── transport ├── __init__.py ├── async_exec_tee.py ├── base_ssh.py ├── compare.py ├── create.py ├── encryption_context.py ├── interface.py ├── local.py ├── progress_report_mixin.py ├── ssh.py ├── ssh_netcat.py ├── ssh_netcat_helper.py ├── utils.py └── zfscli │ ├── __init__.py │ ├── exception.py │ ├── parse.py │ └── warning.py ├── truenas ├── __init__.py └── removal_dates.py ├── utils ├── __init__.py ├── datetime.py ├── itertools.py ├── lang.py ├── logging.py ├── re.py ├── shlex.py └── test.py ├── zcp ├── __init__.py ├── recursive_snapshot_exclude.lua └── render_zcp.py └── zettarepl.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info/ 2 | __pycache__ 3 | /.pytest_cache 4 | /.idea/ 5 | /build/ 6 | /dist/ 7 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | zettarepl (0.0+git20200429-1~truenas+1) bullseye-truenas-unstable; urgency=medium 2 | 3 | * Initial release (Closes: #nnnn) 4 | 5 | -- William Grzybowski Mon, 12 Aug 2019 16:26:27 -0300 6 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: zettarepl 2 | Section: admin 3 | Priority: optional 4 | Maintainer: William Grzybowski 5 | Build-Depends: debhelper-compat (= 12), 6 | dh-python, 7 | python3-all, 8 | python3-croniter, 9 | python3-isodate, 10 | python3-jsonschema, 11 | python3-paramiko, 12 | python3-setuptools, 13 | python3-yaml, 14 | Standards-Version: 4.4.0 15 | Homepage: https://github.com/freenas/zettarepl 16 | Vcs-Browser: https://salsa.debian.org/python-team/modules/zettarepl 17 | Vcs-Git: https://salsa.debian.org/python-team/modules/zettarepl.git 18 | Testsuite: autopkgtest-pkg-python 19 | 20 | Package: python3-zettarepl 21 | Architecture: all 22 | Depends: ${misc:Depends}, ${python3:Depends}, lz4, mbuffer, pigz, plzip, xz-utils, xzdec 23 | Description: Zettabyte File System (ZFS) Replication Tool written in Python 24 | zettarepl is a cross-platform ZFS replication solution. It provides: 25 | . 26 | - Snapshot-based PUSH and PULL replication over SSH or high-speed 27 | unencrypted connection 28 | - Extensible snapshot creation and replication schedule, replication of 29 | manually created snapshots 30 | - Consistent recursive snapshots with possibility to exclude certain datasets 31 | - All modern ZFS features support including resumable replication 32 | - Flexible snapshot retention on both local and remote sides 33 | - Comprehensive logging that helps you to understand what is going on and why 34 | - Configuration via simple and clear YAML file 35 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: zettarepl 3 | Source: 4 | 5 | Files: * 6 | Copyright: 2018-2019 iXsystems Inc 7 | 2018-2019 Vladimir Vinogradenko 8 | License: BSD-3-Clause 9 | 10 | Files: debian/* 11 | Copyright: 2019 William Grzybowski 12 | License: BSD-3-Clause 13 | 14 | License: BSD-3-Clause 15 | Redistribution and use in source and binary forms, with or without 16 | modification, are permitted provided that the following conditions 17 | are met: 18 | 1. Redistributions of source code must retain the above copyright 19 | notice, this list of conditions and the following disclaimer. 20 | 2. Redistributions in binary form must reproduce the above copyright 21 | notice, this list of conditions and the following disclaimer in the 22 | documentation and/or other materials provided with the distribution. 23 | 3. Neither the name of the University nor the names of its contributors 24 | may be used to endorse or promote products derived from this software 25 | without specific prior written permission. 26 | . 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE HOLDERS OR 31 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 32 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 33 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 34 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 35 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 36 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 37 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | #export DH_VERBOSE = 1 3 | 4 | export PYBUILD_NAME=zettarepl 5 | 6 | %: 7 | dh $@ --with python3 --buildsystem=pybuild 8 | 9 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /debian/source/options: -------------------------------------------------------------------------------- 1 | extend-diff-ignore = "^[^/]*[.]egg-info/" 2 | -------------------------------------------------------------------------------- /debian/watch: -------------------------------------------------------------------------------- 1 | version=4 2 | opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%zettarepl-$1.tar.gz%" \ 3 | https://github.com/freenas/zettarepl/tags \ 4 | (?:.*?/)?v?(\d[\d.]*)\.tar\.gz debian uupdate 5 | -------------------------------------------------------------------------------- /examples/pull/local-replication.yaml: -------------------------------------------------------------------------------- 1 | timezone: "Europe/Moscow" 2 | 3 | periodic-snapshot-tasks: 4 | src: 5 | dataset: data/src 6 | recursive: true 7 | lifetime: P365D 8 | naming-schema: snap-%Y-%m-%d-%H-%M 9 | schedule: 10 | minute: "*" 11 | hour: "*" 12 | day-of-month: "*" 13 | month: "*" 14 | day-of-week: "*" 15 | 16 | replication-tasks: 17 | src: 18 | direction: push 19 | transport: 20 | type: local 21 | source-dataset: data/src 22 | target-dataset: data/dst 23 | recursive: true 24 | periodic-snapshot-tasks: 25 | - src 26 | auto: true 27 | retention-policy: source 28 | -------------------------------------------------------------------------------- /examples/pull/ssh+netcat-replication.yaml: -------------------------------------------------------------------------------- 1 | timezone: "Europe/Moscow" 2 | 3 | periodic-snapshot-tasks: 4 | src: 5 | dataset: data/src 6 | recursive: true 7 | exclude: 8 | - data/src/garbage 9 | lifetime: P365D 10 | naming-schema: snap-%Y-%m-%d-%H-%M 11 | schedule: 12 | minute: "*" 13 | hour: "*" 14 | day-of-month: "*" 15 | month: "*" 16 | day-of-week: "*" 17 | 18 | replication-tasks: 19 | src: 20 | direction: push 21 | transport: 22 | type: ssh+netcat 23 | hostname: 192.168.0.187 24 | # ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDr4PsBZ02czxPl/Y/WPlUjBUAGO6C0vbnaISGEZZM1pHZ24IqS5mYbf6jnlvS+jaZSmNa34IqwuRUv978mBmrOUdib4yFGzirvM4Oj8gS3H+4BUozOpZLRmz01F1TVSeGwROOuGFCE0XDHsxRiUHazz6YB24tt6pG5UgqxfODlHP6bdUtaXhHzYDXShmXRQ/IhwQcC0cyAxXIbNBw1SzGyn+g3jSeeReJcdpe1IzGMS048uei3bVsnRlXxEWZe0modeDJNMzmZokzcnCqEwcPa1weh2oJyfG+65a/fWkAH4cpDwgx05FieFmqqz8G3zSBEtC4kGk15fb9wo8MMwJGd 25 | private-key: | 26 | -----BEGIN RSA PRIVATE KEY----- 27 | MIIEowIBAAKCAQEA6+D7AWdNnM8T5f2P1j5VIwVABjugtL252iEhhGWTNaR2duCK 28 | kuZmG3+o55b0vo2mUpjWt+CKsLkVL/e/JgZqzlHYm+MhRs4q7zODo/IEtx/uAVKM 29 | zqWS0Zs9NRdU1UnhsETjrhhQhNFwx7MUYlB2s8+mAduLbeqRuVIKsXzg5Rz+m3VL 30 | Wl4R82A10oZl0UPyIcEHAtHMgMVyGzQcNUsxsp/oN40nnkXiXHaXtSMxjEtOPLno 31 | t21bJ0ZV8RFmXtJqHXgyTTM5maJM3JwqhMHD2tcHodqCcnxvuuWv31pAB+HKQ8IM 32 | dORYnhZqqs/Bt80gRLQuJBpNeX2/cKPDDMCRnQIDAQABAoIBAQCil6+N9R5rw9Ys 33 | iA85GDhpbnoGkd2iGNHeiU3oTHgf1uEN6pO61PR3ahUMpmLIYy3N66q+jxoq3Tm8 34 | meL6HBxNYd+U/Qh4HS89OV45iV80t97ArJ2A6GL+9ypGyXFhoI7giWwEGqCOHSzH 35 | iyq25k4cfjspNqOyval7fBEA7Vq8smAMDJQE7WIJWzqrTbVAmVf9ho4r5dYxYBNW 36 | fXWo84DU8K+p0mE0BTokqqMWhKiA5JJG7OZB/iyeW2BWFOdASXvQmh1hRwMzpU4q 37 | BcZ7cJHz248SNSGMe5R3w7SmLO7PRr1/QkktJNdFmT7o/RGmQh8+KHql6r/vIzMM 38 | ci60OAxlAoGBAPYsZJZF3HK70fK3kARSzOD1LEVBDTCLnpVVzMSp6thG8cQqfCI5 39 | pCfT/NcUsCAP6J+yl6dqdtonXISmGolI1s1KCBihs5D4jEdjbg9KbKh68AsHXaD3 40 | v5L3POJ9hQnI6zJdvCfxniHdUArfyYhqsp1bnCn+85g4ed7BzDqMX2IDAoGBAPVL 41 | Y45rALw7lsjxJndyFdffJtyAeuwxgJNwWGuY21xhwqPbuwsgLHsGerHNKB5QAJT8 42 | JOlrcrfC13s6Tt4wmIy/o2h1p9tMaitmVR6pJzEfHyJhSRTbeFybQ9yqlKHuk2tI 43 | jcUZV/59cyRrjhPKWoVym3Fh/P7D1t1kfdTvBrvfAoGAUH0rVkb5UTo/5xBFsmQw 44 | QM1o8CvY2CqOa11mWlcERjrMCcuqUrZuCeeyH9DP1WveL3kBROf2fFWqVmTJAGIk 45 | eXLfOs6EG75of17vOWioJl4r5i8+WccniDH2YkeQHCbpX8puHtFNVt05spSBHG1m 46 | gTTW1pRZqUet8TuEPxBuj2kCgYAVjCrRruqgnmdvfWeQpI/wp6SlSBAEQZD24q6R 47 | vRq/8cKEXGAA6TGfGQGcLtZwWzzB2ahwbMTmCZKeO5AECqbL7mWvXm6BYCQPbeza 48 | Raews/grL/qYf3MCR41djAqEcw22Jeh2QPSu4VxE/cG8UVFEWb335tCvnIp6ZkJ7 49 | ewfPZwKBgEnc8HH1aq8IJ6vRBePNu6M9ON6PB9qW+ZHHcy47bcGogvYRQk1Ng77G 50 | LdZpyjWzzmb0Z4kjEYcrlGdbNQf9iaT0r+SJPzwBDG15+fRqK7EJI00UhjB0T67M 51 | otrkElxOBGqHSOl0jfUBrpSkSHiy0kDc3/cTAWKn0gowaznSlR9N 52 | -----END RSA PRIVATE KEY----- 53 | host-key: "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEg6bOfeVvexN5JgzlKVzM/2NgD6qNpeq9aF396qDiJR56bAx8ru2F0+fnrLnDywW1mYI0jxjbdofSjh0BYDWkM=" 54 | active-side: local 55 | source-dataset: data/src 56 | target-dataset: data/dst 57 | recursive: true 58 | exclude: 59 | - data/src/garbage 60 | periodic-snapshot-tasks: 61 | - src 62 | auto: true 63 | allow-from-scratch: true 64 | retention-policy: source 65 | -------------------------------------------------------------------------------- /examples/pull/ssh-replication.yaml: -------------------------------------------------------------------------------- 1 | timezone: "Europe/Moscow" 2 | 3 | replication-tasks: 4 | src: 5 | direction: pull 6 | transport: 7 | type: ssh 8 | hostname: 192.168.0.187 9 | # ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDr4PsBZ02czxPl/Y/WPlUjBUAGO6C0vbnaISGEZZM1pHZ24IqS5mYbf6jnlvS+jaZSmNa34IqwuRUv978mBmrOUdib4yFGzirvM4Oj8gS3H+4BUozOpZLRmz01F1TVSeGwROOuGFCE0XDHsxRiUHazz6YB24tt6pG5UgqxfODlHP6bdUtaXhHzYDXShmXRQ/IhwQcC0cyAxXIbNBw1SzGyn+g3jSeeReJcdpe1IzGMS048uei3bVsnRlXxEWZe0modeDJNMzmZokzcnCqEwcPa1weh2oJyfG+65a/fWkAH4cpDwgx05FieFmqqz8G3zSBEtC4kGk15fb9wo8MMwJGd 10 | private-key: | 11 | -----BEGIN RSA PRIVATE KEY----- 12 | MIIEowIBAAKCAQEA6+D7AWdNnM8T5f2P1j5VIwVABjugtL252iEhhGWTNaR2duCK 13 | kuZmG3+o55b0vo2mUpjWt+CKsLkVL/e/JgZqzlHYm+MhRs4q7zODo/IEtx/uAVKM 14 | zqWS0Zs9NRdU1UnhsETjrhhQhNFwx7MUYlB2s8+mAduLbeqRuVIKsXzg5Rz+m3VL 15 | Wl4R82A10oZl0UPyIcEHAtHMgMVyGzQcNUsxsp/oN40nnkXiXHaXtSMxjEtOPLno 16 | t21bJ0ZV8RFmXtJqHXgyTTM5maJM3JwqhMHD2tcHodqCcnxvuuWv31pAB+HKQ8IM 17 | dORYnhZqqs/Bt80gRLQuJBpNeX2/cKPDDMCRnQIDAQABAoIBAQCil6+N9R5rw9Ys 18 | iA85GDhpbnoGkd2iGNHeiU3oTHgf1uEN6pO61PR3ahUMpmLIYy3N66q+jxoq3Tm8 19 | meL6HBxNYd+U/Qh4HS89OV45iV80t97ArJ2A6GL+9ypGyXFhoI7giWwEGqCOHSzH 20 | iyq25k4cfjspNqOyval7fBEA7Vq8smAMDJQE7WIJWzqrTbVAmVf9ho4r5dYxYBNW 21 | fXWo84DU8K+p0mE0BTokqqMWhKiA5JJG7OZB/iyeW2BWFOdASXvQmh1hRwMzpU4q 22 | BcZ7cJHz248SNSGMe5R3w7SmLO7PRr1/QkktJNdFmT7o/RGmQh8+KHql6r/vIzMM 23 | ci60OAxlAoGBAPYsZJZF3HK70fK3kARSzOD1LEVBDTCLnpVVzMSp6thG8cQqfCI5 24 | pCfT/NcUsCAP6J+yl6dqdtonXISmGolI1s1KCBihs5D4jEdjbg9KbKh68AsHXaD3 25 | v5L3POJ9hQnI6zJdvCfxniHdUArfyYhqsp1bnCn+85g4ed7BzDqMX2IDAoGBAPVL 26 | Y45rALw7lsjxJndyFdffJtyAeuwxgJNwWGuY21xhwqPbuwsgLHsGerHNKB5QAJT8 27 | JOlrcrfC13s6Tt4wmIy/o2h1p9tMaitmVR6pJzEfHyJhSRTbeFybQ9yqlKHuk2tI 28 | jcUZV/59cyRrjhPKWoVym3Fh/P7D1t1kfdTvBrvfAoGAUH0rVkb5UTo/5xBFsmQw 29 | QM1o8CvY2CqOa11mWlcERjrMCcuqUrZuCeeyH9DP1WveL3kBROf2fFWqVmTJAGIk 30 | eXLfOs6EG75of17vOWioJl4r5i8+WccniDH2YkeQHCbpX8puHtFNVt05spSBHG1m 31 | gTTW1pRZqUet8TuEPxBuj2kCgYAVjCrRruqgnmdvfWeQpI/wp6SlSBAEQZD24q6R 32 | vRq/8cKEXGAA6TGfGQGcLtZwWzzB2ahwbMTmCZKeO5AECqbL7mWvXm6BYCQPbeza 33 | Raews/grL/qYf3MCR41djAqEcw22Jeh2QPSu4VxE/cG8UVFEWb335tCvnIp6ZkJ7 34 | ewfPZwKBgEnc8HH1aq8IJ6vRBePNu6M9ON6PB9qW+ZHHcy47bcGogvYRQk1Ng77G 35 | LdZpyjWzzmb0Z4kjEYcrlGdbNQf9iaT0r+SJPzwBDG15+fRqK7EJI00UhjB0T67M 36 | otrkElxOBGqHSOl0jfUBrpSkSHiy0kDc3/cTAWKn0gowaznSlR9N 37 | -----END RSA PRIVATE KEY----- 38 | host-key: "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEg6bOfeVvexN5JgzlKVzM/2NgD6qNpeq9aF396qDiJR56bAx8ru2F0+fnrLnDywW1mYI0jxjbdofSjh0BYDWkM=" 39 | source-dataset: data/dst 40 | target-dataset: data/pull 41 | recursive: true 42 | exclude: 43 | - data/src/garbage 44 | naming-schema: 45 | - snap-%Y-%m-%d-%H-%M 46 | auto: true 47 | schedule: 48 | minute: "*" 49 | hour: "*" 50 | day-of-month: "*" 51 | month: "*" 52 | day-of-week: "*" 53 | only-matching-schedule: true 54 | allow-from-scratch: true 55 | retention-policy: source 56 | speed-limit: 102400 57 | -------------------------------------------------------------------------------- /examples/push/local-replication.yaml: -------------------------------------------------------------------------------- 1 | timezone: "Europe/Moscow" 2 | 3 | periodic-snapshot-tasks: 4 | src: 5 | dataset: data/src 6 | recursive: true 7 | lifetime: P365D 8 | naming-schema: snap-%Y-%m-%d-%H-%M 9 | schedule: 10 | minute: "*" 11 | hour: "*" 12 | day-of-month: "*" 13 | month: "*" 14 | day-of-week: "*" 15 | 16 | replication-tasks: 17 | src: 18 | direction: push 19 | transport: 20 | type: local 21 | source-dataset: data/src 22 | target-dataset: data/dst 23 | recursive: true 24 | periodic-snapshot-tasks: 25 | - src 26 | auto: true 27 | retention-policy: source 28 | -------------------------------------------------------------------------------- /examples/push/ssh+netcat-replication.yaml: -------------------------------------------------------------------------------- 1 | timezone: "Europe/Moscow" 2 | 3 | periodic-snapshot-tasks: 4 | src: 5 | dataset: data/src 6 | recursive: true 7 | exclude: 8 | - data/src/garbage 9 | lifetime: P365D 10 | naming-schema: snap-%Y-%m-%d-%H-%M 11 | schedule: 12 | minute: "*" 13 | hour: "*" 14 | day-of-month: "*" 15 | month: "*" 16 | day-of-week: "*" 17 | 18 | replication-tasks: 19 | src: 20 | direction: push 21 | transport: 22 | type: ssh+netcat 23 | hostname: 192.168.0.187 24 | # ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDr4PsBZ02czxPl/Y/WPlUjBUAGO6C0vbnaISGEZZM1pHZ24IqS5mYbf6jnlvS+jaZSmNa34IqwuRUv978mBmrOUdib4yFGzirvM4Oj8gS3H+4BUozOpZLRmz01F1TVSeGwROOuGFCE0XDHsxRiUHazz6YB24tt6pG5UgqxfODlHP6bdUtaXhHzYDXShmXRQ/IhwQcC0cyAxXIbNBw1SzGyn+g3jSeeReJcdpe1IzGMS048uei3bVsnRlXxEWZe0modeDJNMzmZokzcnCqEwcPa1weh2oJyfG+65a/fWkAH4cpDwgx05FieFmqqz8G3zSBEtC4kGk15fb9wo8MMwJGd 25 | private-key: | 26 | -----BEGIN RSA PRIVATE KEY----- 27 | MIIEowIBAAKCAQEA6+D7AWdNnM8T5f2P1j5VIwVABjugtL252iEhhGWTNaR2duCK 28 | kuZmG3+o55b0vo2mUpjWt+CKsLkVL/e/JgZqzlHYm+MhRs4q7zODo/IEtx/uAVKM 29 | zqWS0Zs9NRdU1UnhsETjrhhQhNFwx7MUYlB2s8+mAduLbeqRuVIKsXzg5Rz+m3VL 30 | Wl4R82A10oZl0UPyIcEHAtHMgMVyGzQcNUsxsp/oN40nnkXiXHaXtSMxjEtOPLno 31 | t21bJ0ZV8RFmXtJqHXgyTTM5maJM3JwqhMHD2tcHodqCcnxvuuWv31pAB+HKQ8IM 32 | dORYnhZqqs/Bt80gRLQuJBpNeX2/cKPDDMCRnQIDAQABAoIBAQCil6+N9R5rw9Ys 33 | iA85GDhpbnoGkd2iGNHeiU3oTHgf1uEN6pO61PR3ahUMpmLIYy3N66q+jxoq3Tm8 34 | meL6HBxNYd+U/Qh4HS89OV45iV80t97ArJ2A6GL+9ypGyXFhoI7giWwEGqCOHSzH 35 | iyq25k4cfjspNqOyval7fBEA7Vq8smAMDJQE7WIJWzqrTbVAmVf9ho4r5dYxYBNW 36 | fXWo84DU8K+p0mE0BTokqqMWhKiA5JJG7OZB/iyeW2BWFOdASXvQmh1hRwMzpU4q 37 | BcZ7cJHz248SNSGMe5R3w7SmLO7PRr1/QkktJNdFmT7o/RGmQh8+KHql6r/vIzMM 38 | ci60OAxlAoGBAPYsZJZF3HK70fK3kARSzOD1LEVBDTCLnpVVzMSp6thG8cQqfCI5 39 | pCfT/NcUsCAP6J+yl6dqdtonXISmGolI1s1KCBihs5D4jEdjbg9KbKh68AsHXaD3 40 | v5L3POJ9hQnI6zJdvCfxniHdUArfyYhqsp1bnCn+85g4ed7BzDqMX2IDAoGBAPVL 41 | Y45rALw7lsjxJndyFdffJtyAeuwxgJNwWGuY21xhwqPbuwsgLHsGerHNKB5QAJT8 42 | JOlrcrfC13s6Tt4wmIy/o2h1p9tMaitmVR6pJzEfHyJhSRTbeFybQ9yqlKHuk2tI 43 | jcUZV/59cyRrjhPKWoVym3Fh/P7D1t1kfdTvBrvfAoGAUH0rVkb5UTo/5xBFsmQw 44 | QM1o8CvY2CqOa11mWlcERjrMCcuqUrZuCeeyH9DP1WveL3kBROf2fFWqVmTJAGIk 45 | eXLfOs6EG75of17vOWioJl4r5i8+WccniDH2YkeQHCbpX8puHtFNVt05spSBHG1m 46 | gTTW1pRZqUet8TuEPxBuj2kCgYAVjCrRruqgnmdvfWeQpI/wp6SlSBAEQZD24q6R 47 | vRq/8cKEXGAA6TGfGQGcLtZwWzzB2ahwbMTmCZKeO5AECqbL7mWvXm6BYCQPbeza 48 | Raews/grL/qYf3MCR41djAqEcw22Jeh2QPSu4VxE/cG8UVFEWb335tCvnIp6ZkJ7 49 | ewfPZwKBgEnc8HH1aq8IJ6vRBePNu6M9ON6PB9qW+ZHHcy47bcGogvYRQk1Ng77G 50 | LdZpyjWzzmb0Z4kjEYcrlGdbNQf9iaT0r+SJPzwBDG15+fRqK7EJI00UhjB0T67M 51 | otrkElxOBGqHSOl0jfUBrpSkSHiy0kDc3/cTAWKn0gowaznSlR9N 52 | -----END RSA PRIVATE KEY----- 53 | host-key: "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEg6bOfeVvexN5JgzlKVzM/2NgD6qNpeq9aF396qDiJR56bAx8ru2F0+fnrLnDywW1mYI0jxjbdofSjh0BYDWkM=" 54 | active-side: local 55 | source-dataset: data/src 56 | target-dataset: data/dst 57 | recursive: true 58 | exclude: 59 | - data/src/garbage 60 | periodic-snapshot-tasks: 61 | - src 62 | auto: true 63 | allow-from-scratch: true 64 | retention-policy: source 65 | -------------------------------------------------------------------------------- /examples/push/ssh-replication.yaml: -------------------------------------------------------------------------------- 1 | timezone: "Europe/Moscow" 2 | 3 | periodic-snapshot-tasks: 4 | src: 5 | dataset: data/src 6 | recursive: true 7 | exclude: 8 | - data/src/garbage 9 | lifetime: P365D 10 | naming-schema: snap-%Y-%m-%d-%H-%M 11 | schedule: 12 | minute: "*" 13 | hour: "*" 14 | day-of-month: "*" 15 | month: "*" 16 | day-of-week: "*" 17 | 18 | replication-tasks: 19 | src: 20 | direction: push 21 | transport: 22 | type: ssh 23 | hostname: 192.168.0.187 24 | # ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDr4PsBZ02czxPl/Y/WPlUjBUAGO6C0vbnaISGEZZM1pHZ24IqS5mYbf6jnlvS+jaZSmNa34IqwuRUv978mBmrOUdib4yFGzirvM4Oj8gS3H+4BUozOpZLRmz01F1TVSeGwROOuGFCE0XDHsxRiUHazz6YB24tt6pG5UgqxfODlHP6bdUtaXhHzYDXShmXRQ/IhwQcC0cyAxXIbNBw1SzGyn+g3jSeeReJcdpe1IzGMS048uei3bVsnRlXxEWZe0modeDJNMzmZokzcnCqEwcPa1weh2oJyfG+65a/fWkAH4cpDwgx05FieFmqqz8G3zSBEtC4kGk15fb9wo8MMwJGd 25 | private-key: | 26 | -----BEGIN RSA PRIVATE KEY----- 27 | MIIEowIBAAKCAQEA6+D7AWdNnM8T5f2P1j5VIwVABjugtL252iEhhGWTNaR2duCK 28 | kuZmG3+o55b0vo2mUpjWt+CKsLkVL/e/JgZqzlHYm+MhRs4q7zODo/IEtx/uAVKM 29 | zqWS0Zs9NRdU1UnhsETjrhhQhNFwx7MUYlB2s8+mAduLbeqRuVIKsXzg5Rz+m3VL 30 | Wl4R82A10oZl0UPyIcEHAtHMgMVyGzQcNUsxsp/oN40nnkXiXHaXtSMxjEtOPLno 31 | t21bJ0ZV8RFmXtJqHXgyTTM5maJM3JwqhMHD2tcHodqCcnxvuuWv31pAB+HKQ8IM 32 | dORYnhZqqs/Bt80gRLQuJBpNeX2/cKPDDMCRnQIDAQABAoIBAQCil6+N9R5rw9Ys 33 | iA85GDhpbnoGkd2iGNHeiU3oTHgf1uEN6pO61PR3ahUMpmLIYy3N66q+jxoq3Tm8 34 | meL6HBxNYd+U/Qh4HS89OV45iV80t97ArJ2A6GL+9ypGyXFhoI7giWwEGqCOHSzH 35 | iyq25k4cfjspNqOyval7fBEA7Vq8smAMDJQE7WIJWzqrTbVAmVf9ho4r5dYxYBNW 36 | fXWo84DU8K+p0mE0BTokqqMWhKiA5JJG7OZB/iyeW2BWFOdASXvQmh1hRwMzpU4q 37 | BcZ7cJHz248SNSGMe5R3w7SmLO7PRr1/QkktJNdFmT7o/RGmQh8+KHql6r/vIzMM 38 | ci60OAxlAoGBAPYsZJZF3HK70fK3kARSzOD1LEVBDTCLnpVVzMSp6thG8cQqfCI5 39 | pCfT/NcUsCAP6J+yl6dqdtonXISmGolI1s1KCBihs5D4jEdjbg9KbKh68AsHXaD3 40 | v5L3POJ9hQnI6zJdvCfxniHdUArfyYhqsp1bnCn+85g4ed7BzDqMX2IDAoGBAPVL 41 | Y45rALw7lsjxJndyFdffJtyAeuwxgJNwWGuY21xhwqPbuwsgLHsGerHNKB5QAJT8 42 | JOlrcrfC13s6Tt4wmIy/o2h1p9tMaitmVR6pJzEfHyJhSRTbeFybQ9yqlKHuk2tI 43 | jcUZV/59cyRrjhPKWoVym3Fh/P7D1t1kfdTvBrvfAoGAUH0rVkb5UTo/5xBFsmQw 44 | QM1o8CvY2CqOa11mWlcERjrMCcuqUrZuCeeyH9DP1WveL3kBROf2fFWqVmTJAGIk 45 | eXLfOs6EG75of17vOWioJl4r5i8+WccniDH2YkeQHCbpX8puHtFNVt05spSBHG1m 46 | gTTW1pRZqUet8TuEPxBuj2kCgYAVjCrRruqgnmdvfWeQpI/wp6SlSBAEQZD24q6R 47 | vRq/8cKEXGAA6TGfGQGcLtZwWzzB2ahwbMTmCZKeO5AECqbL7mWvXm6BYCQPbeza 48 | Raews/grL/qYf3MCR41djAqEcw22Jeh2QPSu4VxE/cG8UVFEWb335tCvnIp6ZkJ7 49 | ewfPZwKBgEnc8HH1aq8IJ6vRBePNu6M9ON6PB9qW+ZHHcy47bcGogvYRQk1Ng77G 50 | LdZpyjWzzmb0Z4kjEYcrlGdbNQf9iaT0r+SJPzwBDG15+fRqK7EJI00UhjB0T67M 51 | otrkElxOBGqHSOl0jfUBrpSkSHiy0kDc3/cTAWKn0gowaznSlR9N 52 | -----END RSA PRIVATE KEY----- 53 | host-key: "ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEg6bOfeVvexN5JgzlKVzM/2NgD6qNpeq9aF396qDiJR56bAx8ru2F0+fnrLnDywW1mYI0jxjbdofSjh0BYDWkM=" 54 | source-dataset: data/src 55 | target-dataset: data/dst 56 | recursive: true 57 | exclude: 58 | - data/src/garbage 59 | periodic-snapshot-tasks: 60 | - src 61 | auto: true 62 | allow-from-scratch: true 63 | retention-policy: source 64 | speed-limit: 1024 65 | -------------------------------------------------------------------------------- /integration-tests/replication/test_bad_incremental_base.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import pytest 6 | import yaml 7 | 8 | from zettarepl.utils.test import transports, create_dataset, run_replication_test 9 | 10 | 11 | @pytest.mark.parametrize("transport", transports()) 12 | def test_bad_incremental_base(transport): 13 | subprocess.call("zfs destroy -r data/src", shell=True) 14 | subprocess.call("zfs destroy -r data/dst", shell=True) 15 | 16 | create_dataset("data/src") 17 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 18 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_02-00", shell=True) 19 | 20 | create_dataset("data/dst") 21 | subprocess.check_call("zfs snapshot -r data/dst@2018-10-01_01-00", shell=True) 22 | 23 | definition = yaml.safe_load(textwrap.dedent("""\ 24 | timezone: "UTC" 25 | 26 | periodic-snapshot-tasks: 27 | src: 28 | dataset: data/src 29 | recursive: true 30 | lifetime: PT1H 31 | naming-schema: "%Y-%m-%d_%H-%M" 32 | schedule: 33 | minute: "0" 34 | 35 | replication-tasks: 36 | src: 37 | direction: push 38 | source-dataset: data/src 39 | target-dataset: data/dst 40 | recursive: true 41 | periodic-snapshot-tasks: 42 | - src 43 | auto: true 44 | retention-policy: none 45 | retries: 1 46 | """)) 47 | definition["replication-tasks"]["src"]["transport"] = transport 48 | 49 | assert ( 50 | "does not match incremental source" in run_replication_test(definition, success=False).error.replace("\n", " ") 51 | ) 52 | -------------------------------------------------------------------------------- /integration-tests/replication/test_compression.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import pytest 6 | import yaml 7 | 8 | from zettarepl.utils.test import set_localhost_transport_options, create_dataset, run_replication_test 9 | 10 | 11 | @pytest.mark.parametrize("compression", ["pigz", "plzip", "lz4", "xz"]) 12 | def test_push_replication(compression): 13 | subprocess.call("zfs destroy -r data/src", shell=True) 14 | subprocess.call("zfs destroy -r data/dst", shell=True) 15 | 16 | create_dataset("data/src") 17 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 18 | 19 | definition = yaml.safe_load(textwrap.dedent("""\ 20 | timezone: "UTC" 21 | 22 | periodic-snapshot-tasks: 23 | src: 24 | dataset: data/src 25 | recursive: true 26 | lifetime: PT1H 27 | naming-schema: "%Y-%m-%d_%H-%M" 28 | schedule: 29 | minute: "0" 30 | 31 | replication-tasks: 32 | src: 33 | direction: push 34 | transport: 35 | type: ssh 36 | hostname: 127.0.0.1 37 | source-dataset: data/src 38 | target-dataset: data/dst 39 | recursive: true 40 | periodic-snapshot-tasks: 41 | - src 42 | auto: true 43 | retention-policy: none 44 | retries: 1 45 | """)) 46 | set_localhost_transport_options(definition["replication-tasks"]["src"]["transport"]) 47 | definition["replication-tasks"]["src"]["compression"] = compression 48 | 49 | run_replication_test(definition) 50 | -------------------------------------------------------------------------------- /integration-tests/replication/test_creates_intermediate_datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import yaml 6 | 7 | from zettarepl.snapshot.list import list_snapshots 8 | from zettarepl.transport.local import LocalShell 9 | from zettarepl.utils.test import run_replication_test 10 | 11 | 12 | def test_creates_intermediate_datasets(): 13 | subprocess.call("zfs destroy -r data/src", shell=True) 14 | subprocess.call("zfs receive -A data/deeply", shell=True) 15 | subprocess.call("zfs destroy -r data/deeply", shell=True) 16 | 17 | subprocess.check_call("zfs create -V 1M data/src", shell=True) 18 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 19 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_02-00", shell=True) 20 | 21 | definition = yaml.safe_load(textwrap.dedent("""\ 22 | timezone: "UTC" 23 | 24 | replication-tasks: 25 | src: 26 | direction: push 27 | transport: 28 | type: local 29 | source-dataset: data/src 30 | target-dataset: data/deeply/nested/dst 31 | recursive: true 32 | also-include-naming-schema: 33 | - "%Y-%m-%d_%H-%M" 34 | auto: false 35 | retention-policy: none 36 | retries: 1 37 | """)) 38 | run_replication_test(definition) 39 | 40 | local_shell = LocalShell() 41 | assert len(list_snapshots(local_shell, "data/deeply/nested/dst", False)) == 2 42 | -------------------------------------------------------------------------------- /integration-tests/replication/test_data_progress.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | from unittest.mock import Mock, patch 5 | 6 | import yaml 7 | 8 | from zettarepl.definition.definition import Definition 9 | from zettarepl.replication.task.task import ReplicationTask 10 | from zettarepl.observer import ReplicationTaskDataProgress 11 | from zettarepl.utils.itertools import select_by_class 12 | from zettarepl.utils.test import set_localhost_transport_options, create_zettarepl, wait_replication_tasks_to_complete 13 | 14 | 15 | def test_replication_data_progress(): 16 | subprocess.call("zfs destroy -r data/src", shell=True) 17 | subprocess.call("zfs destroy -r data/dst", shell=True) 18 | 19 | subprocess.check_call("zfs create data/src", shell=True) 20 | 21 | subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/blob bs=1M count=1", shell=True) 22 | subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) 23 | 24 | definition = yaml.safe_load(textwrap.dedent("""\ 25 | timezone: "UTC" 26 | 27 | replication-tasks: 28 | src: 29 | direction: push 30 | transport: 31 | type: ssh 32 | hostname: 127.0.0.1 33 | source-dataset: 34 | - data/src 35 | target-dataset: data/dst 36 | recursive: true 37 | also-include-naming-schema: 38 | - "%Y-%m-%d_%H-%M" 39 | auto: false 40 | retention-policy: none 41 | retries: 1 42 | """)) 43 | set_localhost_transport_options(definition["replication-tasks"]["src"]["transport"]) 44 | definition["replication-tasks"]["src"]["speed-limit"] = 10240 * 9 45 | 46 | with patch("zettarepl.replication.run.DatasetSizeObserver.INTERVAL", 5): 47 | definition = Definition.from_data(definition) 48 | zettarepl = create_zettarepl(definition) 49 | zettarepl._spawn_replication_tasks(Mock(), select_by_class(ReplicationTask, definition.tasks)) 50 | wait_replication_tasks_to_complete(zettarepl) 51 | 52 | calls = [call for call in zettarepl.observer.call_args_list 53 | if call[0][0].__class__ == ReplicationTaskDataProgress] 54 | 55 | assert len(calls) == 2 56 | 57 | assert 1024 * 1024 * 0.8 <= calls[0][0][0].src_size <= 1024 * 1024 * 1.2 58 | assert 0 <= calls[0][0][0].dst_size <= 10240 * 1.2 59 | 60 | assert 1024 * 1024 * 0.8 <= calls[1][0][0].src_size <= 1024 * 1024 * 1.2 61 | assert 10240 * 6 * 0.8 <= calls[1][0][0].dst_size <= 10240 * 6 * 1.2 62 | -------------------------------------------------------------------------------- /integration-tests/replication/test_dataset_gone.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | from unittest.mock import Mock, patch 5 | 6 | import pytest 7 | import yaml 8 | 9 | from zettarepl.definition.definition import Definition 10 | from zettarepl.observer import ReplicationTaskSuccess 11 | from zettarepl.snapshot.list import list_snapshots 12 | from zettarepl.replication.run import resume_replications 13 | from zettarepl.replication.task.task import ReplicationTask 14 | from zettarepl.transport.local import LocalShell 15 | from zettarepl.utils.itertools import select_by_class 16 | from zettarepl.utils.test import transports, wait_replication_tasks_to_complete 17 | from zettarepl.zettarepl import Zettarepl 18 | 19 | 20 | @pytest.mark.parametrize("transport", transports()) 21 | def test_dataset_gone(transport): 22 | subprocess.call("zfs destroy -r data/src", shell=True) 23 | subprocess.call("zfs destroy -r data/dst", shell=True) 24 | 25 | subprocess.check_call("zfs create data/src", shell=True) 26 | subprocess.check_call("zfs create data/src/a", shell=True) 27 | subprocess.check_call("zfs create data/src/b", shell=True) 28 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 29 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_02-00", shell=True) 30 | 31 | definition = yaml.safe_load(textwrap.dedent("""\ 32 | timezone: "UTC" 33 | 34 | periodic-snapshot-tasks: 35 | src: 36 | dataset: data/src 37 | recursive: true 38 | lifetime: PT1H 39 | naming-schema: "%Y-%m-%d_%H-%M" 40 | schedule: 41 | minute: "0" 42 | 43 | replication-tasks: 44 | src: 45 | direction: push 46 | source-dataset: data/src 47 | target-dataset: data/dst 48 | recursive: true 49 | periodic-snapshot-tasks: 50 | - src 51 | auto: true 52 | retention-policy: none 53 | retries: 2 54 | """)) 55 | definition["replication-tasks"]["src"]["transport"] = transport 56 | definition = Definition.from_data(definition) 57 | 58 | local_shell = LocalShell() 59 | zettarepl = Zettarepl(Mock(), local_shell) 60 | zettarepl._spawn_retention = Mock() 61 | observer = Mock() 62 | zettarepl.set_observer(observer) 63 | zettarepl.set_tasks(definition.tasks) 64 | 65 | deleted = False 66 | def resume_replications_mock(*args, **kwargs): 67 | nonlocal deleted 68 | if not deleted: 69 | # Datasets are already listed, and now we remove one of them to simulate removing a dataset during 70 | # the replication. Only do this once. 71 | subprocess.check_call("zfs destroy -r data/src/b", shell=True) 72 | deleted = True 73 | 74 | return resume_replications(*args, **kwargs) 75 | 76 | with patch("zettarepl.replication.run.resume_replications", resume_replications_mock): 77 | zettarepl._spawn_replication_tasks(Mock(), select_by_class(ReplicationTask, definition.tasks)) 78 | wait_replication_tasks_to_complete(zettarepl) 79 | 80 | error = observer.call_args_list[-1][0][0] 81 | assert isinstance(error, ReplicationTaskSuccess), error 82 | 83 | assert len(list_snapshots(local_shell, "data/dst/a", False)) == 2 84 | -------------------------------------------------------------------------------- /integration-tests/replication/test_dst.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | from itertools import permutations 4 | import subprocess 5 | import textwrap 6 | 7 | import pytest 8 | import pytz 9 | import yaml 10 | 11 | from zettarepl.snapshot.list import list_snapshots 12 | from zettarepl.snapshot.snapshot import Snapshot 13 | from zettarepl.transport.local import LocalShell 14 | from zettarepl.utils.test import run_periodic_snapshot_test, run_replication_test 15 | 16 | 17 | @pytest.mark.parametrize("naming_schemas", list(permutations([ 18 | "auto-%Y-%m-%d-%H-%M", 19 | "auto-%Y-%m-%d-%H-%M%z", 20 | ]))) 21 | def test_dst(naming_schemas): 22 | subprocess.call("zfs destroy -r data/src", shell=True) 23 | subprocess.call("zfs receive -A data/dst", shell=True) 24 | subprocess.call("zfs destroy -r data/dst", shell=True) 25 | 26 | subprocess.check_call("zfs create data/src", shell=True) 27 | 28 | definition = yaml.safe_load(textwrap.dedent(f"""\ 29 | timezone: "Europe/Moscow" 30 | 31 | periodic-snapshot-tasks: 32 | task1: 33 | dataset: data/src 34 | recursive: true 35 | naming-schema: "{naming_schemas[0]}" 36 | schedule: 37 | minute: "*" 38 | hour: "*" 39 | day-of-month: "*" 40 | month: "*" 41 | day-of-week: "*" 42 | task2: 43 | dataset: data/src 44 | recursive: true 45 | naming-schema: "{naming_schemas[1]}" 46 | schedule: 47 | minute: "*" 48 | hour: "*" 49 | day-of-month: "*" 50 | month: "*" 51 | day-of-week: "*" 52 | """)) 53 | 54 | run_periodic_snapshot_test( 55 | definition, 56 | datetime(2010, 10, 30, 22, 0, 0, tzinfo=pytz.UTC).astimezone(pytz.timezone("Europe/Moscow")) 57 | ) 58 | 59 | local_shell = LocalShell() 60 | assert list_snapshots(local_shell, "data/src", False) == [ 61 | Snapshot("data/src", "auto-2010-10-31-02-00"), 62 | Snapshot("data/src", "auto-2010-10-31-02-00:0400"), 63 | ] 64 | 65 | run_periodic_snapshot_test( 66 | definition, 67 | datetime(2010, 10, 30, 23, 0, 0, tzinfo=pytz.UTC).astimezone(pytz.timezone("Europe/Moscow")), 68 | False, 69 | ) 70 | 71 | assert list_snapshots(local_shell, "data/src", False) == [ 72 | Snapshot("data/src", "auto-2010-10-31-02-00"), 73 | Snapshot("data/src", "auto-2010-10-31-02-00:0300"), 74 | Snapshot("data/src", "auto-2010-10-31-02-00:0400"), 75 | ] 76 | 77 | definition = yaml.safe_load(textwrap.dedent("""\ 78 | timezone: "UTC" 79 | 80 | replication-tasks: 81 | src: 82 | direction: push 83 | transport: 84 | type: local 85 | source-dataset: data/src 86 | target-dataset: data/dst 87 | recursive: true 88 | also-include-naming-schema: 89 | - "auto-%Y-%m-%d-%H-%M" 90 | - "auto-%Y-%m-%d-%H-%M%z" 91 | auto: false 92 | retention-policy: none 93 | retries: 1 94 | """)) 95 | run_replication_test(definition) 96 | 97 | assert list_snapshots(local_shell, "data/dst", False) == [ 98 | Snapshot("data/dst", "auto-2010-10-31-02-00"), 99 | Snapshot("data/dst", "auto-2010-10-31-02-00:0300"), 100 | Snapshot("data/dst", "auto-2010-10-31-02-00:0400"), 101 | ] 102 | -------------------------------------------------------------------------------- /integration-tests/replication/test_encryption_inherit.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import yaml 6 | 7 | from zettarepl.utils.test import create_dataset, run_replication_test 8 | 9 | 10 | def test_inherit_encryption_when_parent_is_not_encrypted(): 11 | subprocess.call("zfs destroy -r data/src", shell=True) 12 | subprocess.call("zfs destroy -r data/dst", shell=True) 13 | 14 | create_dataset("data/src") 15 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 16 | 17 | create_dataset("data/dst") 18 | 19 | definition = yaml.safe_load(textwrap.dedent("""\ 20 | timezone: "UTC" 21 | 22 | replication-tasks: 23 | src: 24 | direction: push 25 | transport: 26 | type: local 27 | source-dataset: data/src 28 | target-dataset: data/dst/child/grandchild 29 | recursive: false 30 | encryption: inherit 31 | also-include-naming-schema: 32 | - "%Y-%m-%d_%H-%M" 33 | auto: false 34 | retention-policy: none 35 | retries: 1 36 | """)) 37 | 38 | error = run_replication_test(definition, success=False) 39 | assert error.error == ( 40 | "Encryption inheritance requested for destination dataset 'data/dst/child/grandchild', but its existing parent " 41 | "is not encrypted." 42 | ) 43 | 44 | 45 | def test_inherit_encryption_when_parent_is_encrypted(): 46 | subprocess.call("zfs destroy -r data/src", shell=True) 47 | subprocess.call("zfs destroy -r data/dst", shell=True) 48 | 49 | create_dataset("data/src") 50 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 51 | 52 | create_dataset("data/dst") 53 | create_dataset("data/dst/child", encrypted=True) 54 | 55 | definition = yaml.safe_load(textwrap.dedent("""\ 56 | timezone: "UTC" 57 | 58 | replication-tasks: 59 | src: 60 | direction: push 61 | transport: 62 | type: local 63 | source-dataset: data/src 64 | target-dataset: data/dst/child/grandchild 65 | recursive: false 66 | encryption: inherit 67 | also-include-naming-schema: 68 | - "%Y-%m-%d_%H-%M" 69 | auto: false 70 | retention-policy: none 71 | retries: 1 72 | """)) 73 | 74 | run_replication_test(definition) 75 | -------------------------------------------------------------------------------- /integration-tests/replication/test_keeps_mount_structure.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import os 3 | import subprocess 4 | import textwrap 5 | 6 | import yaml 7 | 8 | from zettarepl.utils.test import run_replication_test 9 | 10 | 11 | def test_keeps_mount_structure(): 12 | subprocess.call("zfs destroy -r data/src", shell=True) 13 | subprocess.call("zfs receive -A data/dst", shell=True) 14 | subprocess.call("zfs destroy -r data/dst", shell=True) 15 | 16 | subprocess.check_call("zfs create data/src", shell=True) 17 | subprocess.check_call("zfs create data/src/child", shell=True) 18 | subprocess.check_call("zfs create data/src/child/grandchild", shell=True) 19 | with open("/mnt/data/src/child/grandchild/file", "w") as f: 20 | pass 21 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 22 | 23 | definition = yaml.safe_load(textwrap.dedent("""\ 24 | timezone: "UTC" 25 | 26 | replication-tasks: 27 | src: 28 | direction: push 29 | transport: 30 | type: local 31 | source-dataset: data/src 32 | target-dataset: data/dst 33 | recursive: true 34 | also-include-naming-schema: 35 | - "%Y-%m-%d_%H-%M" 36 | auto: false 37 | retention-policy: none 38 | retries: 1 39 | """)) 40 | run_replication_test(definition) 41 | 42 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_02-00", shell=True) 43 | run_replication_test(definition) 44 | 45 | assert os.path.exists("/mnt/data/dst/child/grandchild/file") 46 | -------------------------------------------------------------------------------- /integration-tests/replication/test_multiple_source_datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import yaml 6 | 7 | from zettarepl.snapshot.list import list_snapshots 8 | from zettarepl.transport.local import LocalShell 9 | from zettarepl.utils.test import run_replication_test 10 | 11 | 12 | def test_multiple_source_datasets(): 13 | subprocess.call("zfs destroy -r data/src", shell=True) 14 | subprocess.call("zfs receive -A data/dst", shell=True) 15 | subprocess.call("zfs destroy -r data/dst", shell=True) 16 | 17 | subprocess.check_call("zfs create data/src", shell=True) 18 | subprocess.check_call("zfs create data/src/internal", shell=True) 19 | subprocess.check_call("zfs create data/src/internal/DISK1", shell=True) 20 | subprocess.check_call("zfs create data/src/internal/DISK1/Apps", shell=True) 21 | subprocess.check_call("zfs create data/src/internal/DISK1/ISO", shell=True) 22 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 23 | 24 | subprocess.check_call("zfs create data/dst", shell=True) 25 | subprocess.check_call("zfs create data/dst/core", shell=True) 26 | subprocess.check_call("zfs send -R data/src/internal/DISK1@2018-10-01_01-00 | " 27 | "zfs recv data/dst/core/tsaukpaetra", shell=True) 28 | 29 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_02-00", shell=True) 30 | 31 | definition = yaml.safe_load(textwrap.dedent("""\ 32 | timezone: "UTC" 33 | 34 | replication-tasks: 35 | src: 36 | direction: push 37 | transport: 38 | type: local 39 | source-dataset: 40 | - data/src/internal/DISK1/Apps 41 | - data/src/internal/DISK1/ISO 42 | target-dataset: data/dst/core/tsaukpaetra 43 | recursive: false 44 | also-include-naming-schema: 45 | - "%Y-%m-%d_%H-%M" 46 | auto: false 47 | retention-policy: none 48 | retries: 1 49 | """)) 50 | 51 | run_replication_test(definition) 52 | 53 | local_shell = LocalShell() 54 | assert len(list_snapshots(local_shell, "data/dst/core/tsaukpaetra/Apps", False)) == 2 55 | assert len(list_snapshots(local_shell, "data/dst/core/tsaukpaetra/ISO", False)) == 2 56 | -------------------------------------------------------------------------------- /integration-tests/replication/test_nothing_to_replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import yaml 6 | 7 | from zettarepl.utils.test import run_replication_test 8 | 9 | 10 | def test_nothing_to_replicate(): 11 | subprocess.call("zfs destroy -r data/src", shell=True) 12 | subprocess.call("zfs receive -A data/dst", shell=True) 13 | subprocess.call("zfs destroy -r data/dst", shell=True) 14 | 15 | subprocess.check_call("zfs create data/src", shell=True) 16 | subprocess.check_call("zfs snapshot data/src@manual-snap", shell=True) 17 | 18 | definition = yaml.safe_load(textwrap.dedent("""\ 19 | timezone: "UTC" 20 | 21 | replication-tasks: 22 | src: 23 | direction: push 24 | transport: 25 | type: local 26 | source-dataset: data/src 27 | target-dataset: data/dst 28 | recursive: true 29 | also-include-naming-schema: 30 | - "%Y-%m-%d_%H-%M" 31 | auto: false 32 | retention-policy: none 33 | """)) 34 | 35 | assert run_replication_test(definition, success=False).error == ( 36 | "Dataset 'data/src' does not have any matching snapshots to replicate" 37 | ) 38 | -------------------------------------------------------------------------------- /integration-tests/replication/test_only_from_scratch.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import pytest 6 | import yaml 7 | 8 | from zettarepl.snapshot.list import list_snapshots 9 | from zettarepl.snapshot.snapshot import Snapshot 10 | from zettarepl.transport.local import LocalShell 11 | from zettarepl.utils.test import run_replication_test 12 | 13 | 14 | @pytest.mark.parametrize("has_dst", [0, 1, 2]) 15 | def test_only_from_scratch(has_dst): 16 | subprocess.call("zfs destroy -r data/src", shell=True) 17 | subprocess.call("zfs receive -A data/dst", shell=True) 18 | subprocess.call("zfs destroy -r data/dst", shell=True) 19 | 20 | subprocess.check_call("zfs create data/src", shell=True) 21 | subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) 22 | 23 | if has_dst: 24 | subprocess.check_call("zfs create data/dst", shell=True) 25 | if has_dst == 2: 26 | subprocess.check_call("zfs snapshot data/dst@2018-10-01_01-00", shell=True) 27 | 28 | definition = yaml.safe_load(textwrap.dedent(f"""\ 29 | timezone: "Europe/Moscow" 30 | 31 | replication-tasks: 32 | src: 33 | direction: push 34 | transport: 35 | type: local 36 | source-dataset: data/src 37 | target-dataset: data/dst 38 | recursive: true 39 | also-include-naming-schema: 40 | - "%Y-%m-%d_%H-%M" 41 | auto: false 42 | only-from-scratch: true 43 | retention-policy: none 44 | retries: 1 45 | """)) 46 | if has_dst: 47 | error = run_replication_test(definition, success=False) 48 | assert error.error == "Target dataset 'data/dst' already exists" 49 | else: 50 | run_replication_test(definition) 51 | 52 | local_shell = LocalShell() 53 | assert list_snapshots(local_shell, "data/dst", False) == [ 54 | Snapshot("data/dst", "2018-10-01_01-00"), 55 | ] 56 | -------------------------------------------------------------------------------- /integration-tests/replication/test_preserves_clone_origin.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import yaml 6 | 7 | from zettarepl.utils.test import run_replication_test 8 | 9 | 10 | def test_preserves_clone_origin(): 11 | subprocess.call("zfs destroy -r data/src", shell=True) 12 | subprocess.call("zfs receive -A data/dst", shell=True) 13 | subprocess.call("zfs destroy -r data/dst", shell=True) 14 | 15 | subprocess.check_call("zfs create data/src", shell=True) 16 | subprocess.check_call("zfs create data/src/iocage", shell=True) 17 | subprocess.check_call("zfs create data/src/iocage/child", shell=True) 18 | subprocess.check_call("zfs create data/src/iocage/child/dataset", shell=True) 19 | subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/iocage/child/dataset/blob bs=1M count=1", shell=True) 20 | subprocess.check_call("zfs snapshot -r data/src@2019-11-08_14-00", shell=True) 21 | subprocess.check_call("zfs create data/src/iocage/another", shell=True) 22 | subprocess.check_call("zfs create data/src/iocage/another/child", shell=True) 23 | subprocess.check_call("zfs clone data/src/iocage/child/dataset@2019-11-08_14-00 " 24 | "data/src/iocage/another/child/clone", shell=True) 25 | subprocess.check_call("zfs snapshot -r data/src@2019-11-08_15-00", shell=True) 26 | 27 | assert ( 28 | subprocess.check_output( 29 | "zfs get -H origin data/src/iocage/another/child/clone", 30 | encoding="utf-8", shell=True 31 | ).split("\n")[0].split("\t")[2] == 32 | "data/src/iocage/child/dataset@2019-11-08_14-00" 33 | ) 34 | assert int( 35 | subprocess.check_output( 36 | "zfs get -H -p used data/src/iocage/another/child/clone", 37 | encoding="utf-8", shell=True 38 | ).split("\n")[0].split("\t")[2] 39 | ) < 2e6 40 | 41 | definition = yaml.safe_load(textwrap.dedent("""\ 42 | timezone: "UTC" 43 | 44 | replication-tasks: 45 | src: 46 | direction: push 47 | transport: 48 | type: local 49 | source-dataset: data/src 50 | target-dataset: data/dst 51 | recursive: true 52 | properties: true 53 | replicate: true 54 | also-include-naming-schema: 55 | - "%Y-%m-%d_%H-%M" 56 | auto: false 57 | retention-policy: none 58 | retries: 1 59 | """)) 60 | 61 | run_replication_test(definition) 62 | 63 | assert ( 64 | subprocess.check_output( 65 | "zfs get -H origin data/dst/iocage/another/child/clone", 66 | encoding="utf-8", shell=True 67 | ).split("\n")[0].split("\t")[2] == 68 | "data/dst/iocage/child/dataset@2019-11-08_14-00" 69 | ) 70 | assert int( 71 | subprocess.check_output( 72 | "zfs get -H -p used data/dst/iocage/another/child/clone", 73 | encoding="utf-8", shell=True 74 | ).split("\n")[0].split("\t")[2] 75 | ) < 2e6 76 | -------------------------------------------------------------------------------- /integration-tests/replication/test_preserves_deleted_datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | from unittest.mock import Mock 5 | 6 | import yaml 7 | 8 | from zettarepl.definition.definition import Definition 9 | from zettarepl.observer import ReplicationTaskSuccess 10 | from zettarepl.snapshot.list import list_snapshots 11 | from zettarepl.replication.task.task import ReplicationTask 12 | from zettarepl.transport.local import LocalShell 13 | from zettarepl.utils.itertools import select_by_class 14 | from zettarepl.utils.test import create_zettarepl, wait_replication_tasks_to_complete 15 | 16 | 17 | def test_push_replication(): 18 | subprocess.call("zfs destroy -r data/src", shell=True) 19 | subprocess.call("zfs destroy -r data/dst", shell=True) 20 | 21 | subprocess.check_call("zfs create data/src", shell=True) 22 | subprocess.check_call("zfs create data/src/child", shell=True) 23 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 24 | 25 | subprocess.check_call("zfs create data/dst", shell=True) 26 | 27 | definition = yaml.safe_load(textwrap.dedent("""\ 28 | timezone: "UTC" 29 | 30 | periodic-snapshot-tasks: 31 | src: 32 | dataset: data/src 33 | recursive: true 34 | lifetime: PT1H 35 | naming-schema: "%Y-%m-%d_%H-%M" 36 | schedule: 37 | minute: "0" 38 | 39 | replication-tasks: 40 | src: 41 | direction: push 42 | transport: 43 | type: local 44 | source-dataset: data/src 45 | target-dataset: data/dst 46 | recursive: true 47 | periodic-snapshot-tasks: 48 | - src 49 | auto: true 50 | retention-policy: none 51 | """)) 52 | 53 | definition = Definition.from_data(definition) 54 | zettarepl = create_zettarepl(definition) 55 | zettarepl._spawn_replication_tasks(Mock(), select_by_class(ReplicationTask, definition.tasks)) 56 | wait_replication_tasks_to_complete(zettarepl) 57 | 58 | assert sum(1 for m in zettarepl.observer.call_args_list if isinstance(m[0][0], ReplicationTaskSuccess)) == 1 59 | 60 | subprocess.check_call("zfs destroy -r data/src/child", shell=True) 61 | subprocess.check_call("zfs snapshot data/src@2018-10-01_02-00", shell=True) 62 | 63 | zettarepl._spawn_replication_tasks(Mock(), select_by_class(ReplicationTask, definition.tasks)) 64 | wait_replication_tasks_to_complete(zettarepl) 65 | 66 | assert sum(1 for m in zettarepl.observer.call_args_list if isinstance(m[0][0], ReplicationTaskSuccess)) == 2 67 | 68 | local_shell = LocalShell() 69 | assert len(list_snapshots(local_shell, "data/dst/child", False)) == 1 70 | -------------------------------------------------------------------------------- /integration-tests/replication/test_property_receive.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import pytest 6 | import yaml 7 | 8 | from zettarepl.utils.test import run_replication_test, transports 9 | 10 | 11 | @pytest.mark.parametrize("transport", transports(netcat=False, unprivileged=True)) 12 | def test_property_receive(transport): 13 | subprocess.call("zfs destroy -r data/src", shell=True) 14 | subprocess.call("zfs receive -A data/dst", shell=True) 15 | subprocess.call("zfs destroy -r data/dst", shell=True) 16 | 17 | subprocess.check_call("zfs create data/src", shell=True) 18 | subprocess.check_call("zfs snapshot -r data/src@2021-03-10_12-00", shell=True) 19 | subprocess.check_call("zfs set truenas:customproperty=1 data/src", shell=True) 20 | subprocess.check_call("zfs snapshot -r data/src@2021-03-10_12-01", shell=True) 21 | 22 | subprocess.check_call("zfs create data/dst", shell=True) 23 | subprocess.check_call("zfs create data/dst/dst", shell=True) 24 | subprocess.check_call("zfs allow user receive,create,mount data/dst/dst", shell=True) 25 | subprocess.check_call("zfs send data/src@2021-03-10_12-00 | zfs recv -s -F data/dst/dst", shell=True) 26 | subprocess.check_call("zfs umount data/dst/dst", shell=True) 27 | subprocess.check_call("chown user:user /mnt/data/dst/dst", shell=True) 28 | 29 | definition = yaml.safe_load(textwrap.dedent("""\ 30 | timezone: "UTC" 31 | 32 | replication-tasks: 33 | src: 34 | direction: push 35 | source-dataset: data/src 36 | target-dataset: data/dst/dst 37 | recursive: false 38 | properties: true 39 | also-include-naming-schema: 40 | - "%Y-%m-%d_%H-%M" 41 | auto: false 42 | retention-policy: none 43 | retries: 2 44 | """)) 45 | definition["replication-tasks"]["src"]["transport"] = transport 46 | 47 | warning = "cannot receive truenas:customproperty property on data/dst/dst: permission denied" 48 | 49 | assert warning in run_replication_test(definition).warnings 50 | 51 | subprocess.check_call("zfs snapshot -r data/src@2021-03-10_12-02", shell=True) 52 | 53 | assert warning in run_replication_test(definition).warnings 54 | -------------------------------------------------------------------------------- /integration-tests/replication/test_pull_replication.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import pytest 6 | import yaml 7 | 8 | from zettarepl.snapshot.list import list_snapshots 9 | from zettarepl.transport.local import LocalShell 10 | from zettarepl.utils.test import transports, run_replication_test 11 | 12 | 13 | @pytest.mark.parametrize("transport", transports()) 14 | def test_pull_replication(transport): 15 | subprocess.call("zfs destroy -r data/src", shell=True) 16 | subprocess.call("zfs receive -A data/dst", shell=True) 17 | subprocess.call("zfs destroy -r data/dst", shell=True) 18 | 19 | subprocess.check_call("zfs create data/src", shell=True) 20 | subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) 21 | subprocess.check_call("zfs snapshot data/src@2018-10-01_02-00", shell=True) 22 | 23 | subprocess.check_call("zfs create data/dst", shell=True) 24 | 25 | definition = yaml.safe_load(textwrap.dedent("""\ 26 | timezone: "UTC" 27 | 28 | replication-tasks: 29 | src: 30 | direction: pull 31 | source-dataset: data/src 32 | target-dataset: data/dst 33 | recursive: true 34 | naming-schema: 35 | - "%Y-%m-%d_%H-%M" 36 | auto: false 37 | retention-policy: none 38 | """)) 39 | definition["replication-tasks"]["src"]["transport"] = transport 40 | 41 | run_replication_test(definition) 42 | 43 | local_shell = LocalShell() 44 | assert len(list_snapshots(local_shell, "data/dst", False)) == 2 45 | -------------------------------------------------------------------------------- /integration-tests/replication/test_replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import pytest 6 | import yaml 7 | 8 | from zettarepl.utils.test import run_replication_test 9 | 10 | 11 | @pytest.mark.parametrize("snapshot_to_destroy,error_text", [ 12 | # Fake incomplete `zfs send -R` by removing one of the children's most recent snapshots. 13 | ( 14 | "data/dst/child2@2021-08-23_19-30", 15 | ( 16 | f"Last full ZFS replication failed to transfer all the children of the snapshot data/src@2021-08-23_19-30. " 17 | "The snapshot data/dst/child2@2021-08-23_19-30 was not transferred. Please run " 18 | "`zfs destroy -r data/dst@2021-08-23_19-30` on the target system and run replication again." 19 | ), 20 | ), 21 | # Older child snapshots might have been removed by retention or manually, we should not care about them. 22 | ("data/dst/child2@2021-08-23_19-25", None), 23 | ]) 24 | @pytest.mark.parametrize("snapshot_match_options", [ 25 | {"also-include-naming-schema": ["%Y-%m-%d_%H-%M"]}, 26 | {"name-regex": ".+"}, 27 | ]) 28 | @pytest.mark.parametrize("take_new_snapshot", [True, False]) 29 | def test_replicate(snapshot_to_destroy, error_text, snapshot_match_options, take_new_snapshot): 30 | subprocess.call("zfs destroy -r data/src", shell=True) 31 | subprocess.call("zfs receive -A data/dst", shell=True) 32 | subprocess.call("zfs destroy -r data/dst", shell=True) 33 | 34 | subprocess.check_call("zfs create data/src", shell=True) 35 | subprocess.check_call("zfs create data/src/child1", shell=True) 36 | subprocess.check_call("zfs create data/src/child2", shell=True) 37 | subprocess.check_call("zfs snapshot -r data/src@2021-08-23_19-25", shell=True) 38 | subprocess.check_call("zfs snapshot -r data/src@2021-08-23_19-30", shell=True) 39 | subprocess.check_call("zfs send -R data/src@2021-08-23_19-25 | zfs recv data/dst", shell=True) 40 | subprocess.check_call("zfs send -R -i data/src@2021-08-23_19-25 data/src@2021-08-23_19-30 | " 41 | "zfs recv data/dst", shell=True) 42 | subprocess.check_call(f"zfs destroy {snapshot_to_destroy}", shell=True) 43 | 44 | if take_new_snapshot: 45 | subprocess.check_call(f"zfs snapshot -r data/src@2021-08-23_19-35", shell=True) 46 | 47 | definition = yaml.safe_load(textwrap.dedent("""\ 48 | timezone: "UTC" 49 | 50 | replication-tasks: 51 | src: 52 | direction: push 53 | transport: 54 | type: local 55 | source-dataset: data/src 56 | target-dataset: data/dst 57 | recursive: true 58 | replicate: true 59 | auto: false 60 | retention-policy: none 61 | retries: 1 62 | """)) 63 | definition["replication-tasks"]["src"].update(snapshot_match_options) 64 | 65 | error = run_replication_test(definition, success=error_text is None) 66 | if error_text is not None: 67 | assert error.error == error_text 68 | -------------------------------------------------------------------------------- /integration-tests/replication/test_replication_retry.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import subprocess 4 | import textwrap 5 | import time 6 | from unittest.mock import Mock 7 | 8 | import pytest 9 | import yaml 10 | 11 | from zettarepl.definition.definition import Definition 12 | from zettarepl.observer import ReplicationTaskSuccess 13 | from zettarepl.snapshot.list import list_snapshots 14 | from zettarepl.replication.task.task import ReplicationTask 15 | from zettarepl.transport.local import LocalShell 16 | from zettarepl.utils.itertools import select_by_class 17 | from zettarepl.utils.test import create_zettarepl, set_localhost_transport_options, wait_replication_tasks_to_complete 18 | 19 | 20 | @pytest.mark.parametrize("direction", ["push", "pull"]) 21 | def test_replication_retry(caplog, direction): 22 | subprocess.call("zfs destroy -r data/src", shell=True) 23 | subprocess.call("zfs receive -A data/dst", shell=True) 24 | subprocess.call("zfs destroy -r data/dst", shell=True) 25 | 26 | subprocess.check_call("zfs create data/src", shell=True) 27 | subprocess.check_call("dd if=/dev/urandom of=/mnt/data/src/blob bs=1M count=1", shell=True) 28 | subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) 29 | 30 | definition = yaml.safe_load(textwrap.dedent("""\ 31 | timezone: "UTC" 32 | 33 | periodic-snapshot-tasks: 34 | src: 35 | dataset: data/src 36 | recursive: true 37 | lifetime: PT1H 38 | naming-schema: "%Y-%m-%d_%H-%M" 39 | schedule: 40 | minute: "0" 41 | 42 | replication-tasks: 43 | src: 44 | transport: 45 | type: ssh 46 | hostname: 127.0.0.1 47 | source-dataset: data/src 48 | target-dataset: data/dst 49 | recursive: true 50 | auto: false 51 | retention-policy: none 52 | speed-limit: 200000 53 | retries: 2 54 | """)) 55 | definition["replication-tasks"]["src"]["direction"] = direction 56 | if direction == "push": 57 | definition["replication-tasks"]["src"]["periodic-snapshot-tasks"] = ["src"] 58 | else: 59 | definition["replication-tasks"]["src"]["naming-schema"] = ["%Y-%m-%d_%H-%M"] 60 | set_localhost_transport_options(definition["replication-tasks"]["src"]["transport"]) 61 | definition = Definition.from_data(definition) 62 | 63 | caplog.set_level(logging.INFO) 64 | zettarepl = create_zettarepl(definition) 65 | zettarepl._spawn_replication_tasks(Mock(), select_by_class(ReplicationTask, definition.tasks)) 66 | 67 | time.sleep(2) 68 | if direction == "push": 69 | subprocess.check_output("kill $(pgrep -f '^zfs recv')", shell=True) 70 | else: 71 | subprocess.check_output("kill $(pgrep -f '^(zfs send|zfs: sending)')", shell=True) 72 | 73 | wait_replication_tasks_to_complete(zettarepl) 74 | 75 | assert any( 76 | " recoverable replication error" in record.message 77 | for record in caplog.get_records("call") 78 | ) 79 | assert any( 80 | "Resuming replication for destination dataset" in record.message 81 | for record in caplog.get_records("call") 82 | ) 83 | 84 | success = zettarepl.observer.call_args_list[-1][0][0] 85 | assert isinstance(success, ReplicationTaskSuccess), success 86 | 87 | local_shell = LocalShell() 88 | assert len(list_snapshots(local_shell, "data/dst", False)) == 1 89 | -------------------------------------------------------------------------------- /integration-tests/replication/test_rewording_is_not_earlier_than_it.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import os 3 | import subprocess 4 | import textwrap 5 | 6 | import pytest 7 | import yaml 8 | 9 | from zettarepl.utils.test import run_replication_test, transports 10 | 11 | 12 | @pytest.mark.parametrize("transport", transports()) 13 | @pytest.mark.parametrize("direction", ["push", "pull"]) 14 | def test_rewording_is_not_earlier_than_it(transport, direction): 15 | if transport["type"] == "ssh+netcat": 16 | uname = os.uname() 17 | if uname.sysname == "FreeBSD" and uname.release.startswith("12"): 18 | # FIXME: https://jira.ixsystems.com/browse/NAS-106452 19 | return 20 | 21 | subprocess.call("zfs destroy -r data/src", shell=True) 22 | subprocess.call("zfs receive -A data/dst", shell=True) 23 | subprocess.call("zfs destroy -r data/dst", shell=True) 24 | 25 | subprocess.check_call("zfs create data/src", shell=True) 26 | subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) 27 | subprocess.check_call("zfs snapshot data/src@2018-10-01_03-00", shell=True) 28 | subprocess.check_call("zfs snapshot data/src@2018-10-01_02-00", shell=True) 29 | 30 | definition = yaml.safe_load(textwrap.dedent("""\ 31 | timezone: "UTC" 32 | 33 | replication-tasks: 34 | src: 35 | source-dataset: data/src 36 | target-dataset: data/dst 37 | recursive: true 38 | auto: false 39 | retention-policy: none 40 | retries: 1 41 | """)) 42 | definition["replication-tasks"]["src"]["direction"] = direction 43 | definition["replication-tasks"]["src"]["transport"] = transport 44 | if direction == "push": 45 | definition["replication-tasks"]["src"]["also-include-naming-schema"] = "%Y-%m-%d_%H-%M" 46 | else: 47 | definition["replication-tasks"]["src"]["naming-schema"] = "%Y-%m-%d_%H-%M" 48 | 49 | error = run_replication_test(definition, success=False) 50 | assert ( 51 | "is newer than" in error.error and 52 | "but has an older date" in error.error 53 | ) 54 | -------------------------------------------------------------------------------- /integration-tests/replication/test_shell.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import os 3 | import subprocess 4 | import textwrap 5 | 6 | import pytest 7 | import yaml 8 | 9 | from zettarepl.utils.test import run_replication_test, transports 10 | 11 | 12 | @pytest.mark.parametrize("direction", ["push", "pull"]) 13 | @pytest.mark.parametrize("shell", ["/bin/sh", "/bin/csh"]) 14 | @pytest.mark.parametrize("transport", transports(netcat=False, unprivileged=True)) 15 | def test_shells(direction, shell, transport): 16 | subprocess.call("zfs destroy -r data/src", shell=True) 17 | subprocess.call("zfs receive -A data/dst", shell=True) 18 | subprocess.call("zfs destroy -r data/dst", shell=True) 19 | 20 | if not os.path.exists(shell): 21 | pytest.skip(f"{shell} does not exist") 22 | 23 | subprocess.check_call(["chsh", "-s", shell, "user"]) 24 | 25 | subprocess.check_call("zfs create data/src", shell=True) 26 | subprocess.check_call("zfs snapshot -r data/src@2021-03-10_12-00", shell=True) 27 | 28 | subprocess.check_call("zfs create data/dst", shell=True) 29 | subprocess.check_call("zfs create data/dst/dst", shell=True) 30 | subprocess.check_call("zfs allow user receive,create,mount data/dst", shell=True) 31 | subprocess.check_call("zfs allow user receive,create,mount data/dst/dst", shell=True) 32 | subprocess.check_call("chown -R user:user /mnt/data/dst", shell=True) 33 | subprocess.check_call("zfs umount data/dst/dst", shell=True) 34 | 35 | definition = yaml.safe_load(textwrap.dedent("""\ 36 | timezone: "UTC" 37 | 38 | replication-tasks: 39 | src: 40 | source-dataset: data/src 41 | target-dataset: data/dst/dst 42 | recursive: false 43 | properties: true 44 | auto: false 45 | retention-policy: none 46 | retries: 2 47 | """)) 48 | definition["replication-tasks"]["src"]["direction"] = direction 49 | definition["replication-tasks"]["src"]["transport"] = transport 50 | if direction == "push": 51 | definition["replication-tasks"]["src"]["also-include-naming-schema"] = ["%Y-%m-%d_%H-%M"] 52 | else: 53 | definition["replication-tasks"]["src"]["naming-schema"] = ["%Y-%m-%d_%H-%M"] 54 | 55 | run_replication_test(definition) 56 | -------------------------------------------------------------------------------- /integration-tests/replication/test_snapshot_gone.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | from unittest.mock import Mock, patch 5 | 6 | import pytest 7 | import yaml 8 | 9 | from zettarepl.definition.definition import Definition 10 | from zettarepl.observer import ReplicationTaskSuccess 11 | from zettarepl.snapshot.list import list_snapshots 12 | from zettarepl.replication.run import resume_replications 13 | from zettarepl.replication.task.task import ReplicationTask 14 | from zettarepl.transport.local import LocalShell 15 | from zettarepl.utils.itertools import select_by_class 16 | from zettarepl.utils.test import transports, wait_replication_tasks_to_complete 17 | from zettarepl.zettarepl import Zettarepl 18 | 19 | 20 | @pytest.mark.parametrize("transport", transports()) 21 | def test_snapshot_gone(transport): 22 | subprocess.call("zfs destroy -r data/src", shell=True) 23 | subprocess.call("zfs destroy -r data/dst", shell=True) 24 | 25 | subprocess.check_call("zfs create data/src", shell=True) 26 | subprocess.check_call("zfs snapshot data/src@2018-10-01_01-00", shell=True) 27 | subprocess.check_call("zfs snapshot data/src@2018-10-01_02-00", shell=True) 28 | 29 | definition = yaml.safe_load(textwrap.dedent("""\ 30 | timezone: "UTC" 31 | 32 | periodic-snapshot-tasks: 33 | src: 34 | dataset: data/src 35 | recursive: true 36 | lifetime: PT1H 37 | naming-schema: "%Y-%m-%d_%H-%M" 38 | schedule: 39 | minute: "0" 40 | 41 | replication-tasks: 42 | src: 43 | direction: push 44 | source-dataset: data/src 45 | target-dataset: data/dst 46 | recursive: true 47 | periodic-snapshot-tasks: 48 | - src 49 | auto: true 50 | retention-policy: none 51 | retries: 2 52 | """)) 53 | definition["replication-tasks"]["src"]["transport"] = transport 54 | definition = Definition.from_data(definition) 55 | 56 | local_shell = LocalShell() 57 | zettarepl = Zettarepl(Mock(), local_shell) 58 | zettarepl._spawn_retention = Mock() 59 | observer = Mock() 60 | zettarepl.set_observer(observer) 61 | zettarepl.set_tasks(definition.tasks) 62 | 63 | deleted = False 64 | def resume_replications_mock(*args, **kwargs): 65 | nonlocal deleted 66 | if not deleted: 67 | # Snapshots are already listed, and now we remove one of them to simulate PULL replication 68 | # from remote system that has `allow_empty_snapshots: false`. Only do this once. 69 | subprocess.check_call("zfs destroy data/src@2018-10-01_01-00", shell=True) 70 | deleted = True 71 | 72 | return resume_replications(*args, **kwargs) 73 | 74 | with patch("zettarepl.replication.run.resume_replications", resume_replications_mock): 75 | zettarepl._spawn_replication_tasks(Mock(), select_by_class(ReplicationTask, definition.tasks)) 76 | wait_replication_tasks_to_complete(zettarepl) 77 | 78 | error = observer.call_args_list[-1][0][0] 79 | assert isinstance(error, ReplicationTaskSuccess), error 80 | 81 | assert len(list_snapshots(local_shell, "data/dst", False)) == 1 82 | -------------------------------------------------------------------------------- /integration-tests/replication/test_unencrypted_to_encrypted.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import textwrap 4 | 5 | import pytest 6 | import yaml 7 | 8 | from zettarepl.utils.test import create_dataset, run_replication_test, transports 9 | 10 | 11 | @pytest.mark.parametrize("transport", transports()) 12 | @pytest.mark.parametrize("properties", [False, True]) 13 | @pytest.mark.parametrize("encryption", [ 14 | None, 15 | { 16 | "key": "password", 17 | "key-format": "passphrase", 18 | "key-location": "$TrueNAS", 19 | }, 20 | "inherit", 21 | ]) 22 | @pytest.mark.parametrize("source_encrypted", [False, True]) 23 | def test_unencrypted_to_encrypted(transport, properties, encryption, source_encrypted): 24 | if properties and encryption and source_encrypted: 25 | # Re-encrypting already encrypted source dataset 'data/src' while preserving its properties is not supported 26 | return 27 | 28 | subprocess.call("zfs destroy -r data/src", shell=True) 29 | subprocess.call("zfs destroy -r data/dst", shell=True) 30 | 31 | create_dataset("data/src", encrypted=source_encrypted) 32 | subprocess.check_call("zfs snapshot -r data/src@2018-10-01_01-00", shell=True) 33 | 34 | create_dataset("data/dst", encrypted=True) 35 | 36 | definition = yaml.safe_load(textwrap.dedent("""\ 37 | timezone: "UTC" 38 | 39 | replication-tasks: 40 | src: 41 | direction: push 42 | source-dataset: data/src 43 | target-dataset: data/dst/child/grandchild 44 | recursive: false 45 | also-include-naming-schema: 46 | - "%Y-%m-%d_%H-%M" 47 | auto: false 48 | retention-policy: none 49 | retries: 1 50 | """)) 51 | definition["replication-tasks"]["src"]["transport"] = transport 52 | definition["replication-tasks"]["src"]["properties"] = properties 53 | definition["replication-tasks"]["src"]["encryption"] = encryption 54 | 55 | if (properties and source_encrypted) or encryption: 56 | run_replication_test(definition) 57 | 58 | if encryption == "inherit": 59 | encryptionroot = "data/dst" 60 | else: 61 | encryptionroot = "data/dst/child/grandchild" 62 | 63 | assert subprocess.check_output( 64 | "zfs get -H -p encryptionroot data/dst/child/grandchild", 65 | encoding="utf-8", shell=True 66 | ).split("\n")[0].split("\t")[2] == encryptionroot 67 | else: 68 | error = run_replication_test(definition, success=False) 69 | 70 | if properties: 71 | assert error.error == ( 72 | "Destination dataset 'data/dst/child/grandchild' must be encrypted (as one of its ancestors is " 73 | "encrypted). Refusing to transfer unencrypted source dataset 'data/src'. Please, set up replication " 74 | "task encryption in order to replicate this dataset." 75 | ) 76 | else: 77 | assert error.error == ( 78 | "Destination dataset 'data/dst/child/grandchild' must be encrypted (as one of its ancestors is " 79 | "encrypted). Refusing to transfer source dataset 'data/src' without properties and without replication " 80 | "task encryption." 81 | ) 82 | -------------------------------------------------------------------------------- /integration-tests/retention/test_zfs_clone.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | 4 | from zettarepl.snapshot.destroy import destroy_snapshots 5 | from zettarepl.snapshot.list import list_snapshots 6 | from zettarepl.snapshot.snapshot import Snapshot 7 | from zettarepl.transport.local import LocalShell 8 | 9 | snapshots = [ 10 | Snapshot("data/dst", "2018-10-01_00-00"), 11 | Snapshot("data/dst", "2018-10-01_01-00"), 12 | Snapshot("data/dst", "2018-10-01_02-00"), 13 | ] 14 | 15 | 16 | def test_zfs_clone(): 17 | subprocess.call("zfs destroy -r data/src", shell=True) 18 | subprocess.call("zfs destroy -r data/dst", shell=True) 19 | 20 | subprocess.check_call("zfs create data/dst", shell=True) 21 | for snapshot in snapshots: 22 | subprocess.check_call(f"zfs snapshot {snapshot.dataset}@{snapshot.name}", shell=True) 23 | subprocess.check_call(f"zfs clone {snapshots[1].dataset}@{snapshots[1].name} data/src", shell=True) 24 | 25 | local_shell = LocalShell() 26 | destroy_snapshots(local_shell, snapshots) 27 | 28 | assert list_snapshots(local_shell, "data/dst", False) == [snapshots[1]] 29 | -------------------------------------------------------------------------------- /integration-tests/retention/test_zfs_hold.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | import subprocess 4 | 5 | from zettarepl.snapshot.destroy import destroy_snapshots 6 | from zettarepl.snapshot.list import list_snapshots 7 | from zettarepl.snapshot.snapshot import Snapshot 8 | from zettarepl.transport.local import LocalShell 9 | 10 | snapshots = [ 11 | Snapshot("data/dst", "2018-10-01_00-00"), 12 | Snapshot("data/dst", "2018-10-01_01-00"), 13 | Snapshot("data/dst", "2018-10-01_02-00"), 14 | Snapshot("data/dst", "2018-10-01_03-00"), 15 | ] 16 | 17 | 18 | @pytest.mark.parametrize("hold", [ 19 | [], 20 | [1], 21 | [1, 2], 22 | [1, 3], 23 | [0, 1, 2, 3], 24 | ]) 25 | def test_zfs_hold(hold): 26 | try: 27 | subprocess.call("zfs destroy -r data/src", shell=True) 28 | subprocess.call("zfs destroy -r data/dst", shell=True) 29 | 30 | subprocess.check_call("zfs create data/dst", shell=True) 31 | for snapshot in snapshots: 32 | subprocess.check_call(f"zfs snapshot {snapshot.dataset}@{snapshot.name}", shell=True) 33 | for i in hold: 34 | snapshot = snapshots[i] 35 | subprocess.check_call(f"zfs hold keep {snapshot.dataset}@{snapshot.name}", shell=True) 36 | 37 | local_shell = LocalShell() 38 | destroy_snapshots(local_shell, snapshots) 39 | 40 | assert list_snapshots(local_shell, "data/dst", False) == [snapshots[i] for i in hold] 41 | finally: 42 | for snapshot in snapshots: 43 | subprocess.call(f"zfs release keep {snapshot.dataset}@{snapshot.name}", shell=True) 44 | -------------------------------------------------------------------------------- /integration-tests/snapshot/test_destroy.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | 4 | from zettarepl.snapshot.destroy import destroy_snapshots 5 | from zettarepl.snapshot.list import list_snapshots 6 | from zettarepl.snapshot.snapshot import Snapshot 7 | from zettarepl.transport.local import LocalShell 8 | 9 | 10 | def test_ignores_nonexisting_snapshots(): 11 | local_shell = LocalShell() 12 | 13 | destroy_snapshots(local_shell, [Snapshot("data", "nonexistent")]) 14 | 15 | 16 | def test_ignores_nonexisting_snapshots_but_destroys_existing(): 17 | local_shell = LocalShell() 18 | 19 | subprocess.call("zfs destroy -r data/src", shell=True) 20 | 21 | subprocess.check_call("zfs create data/src", shell=True) 22 | subprocess.check_call("zfs snapshot data/src@snap-1", shell=True) 23 | 24 | destroy_snapshots(local_shell, [Snapshot("data", "nonexistent"), Snapshot("data/src", "snap-1")]) 25 | 26 | assert list_snapshots(local_shell, "data/src", False) == [] 27 | -------------------------------------------------------------------------------- /integration-tests/snapshot/test_legit_step_back.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | import subprocess 4 | import textwrap 5 | from unittest.mock import Mock 6 | 7 | import yaml 8 | 9 | from zettarepl.definition.definition import Definition 10 | from zettarepl.observer import PeriodicSnapshotTaskStart, PeriodicSnapshotTaskSuccess 11 | from zettarepl.scheduler.scheduler import Scheduler 12 | from zettarepl.scheduler.tz_clock import TzClock 13 | from zettarepl.snapshot.list import list_snapshots 14 | from zettarepl.transport.local import LocalShell 15 | from zettarepl.utils.test import create_zettarepl 16 | 17 | 18 | def test_snapshot_exclude(): 19 | subprocess.call("zfs destroy -r data/src", shell=True) 20 | 21 | subprocess.check_call("zfs create data/src", shell=True) 22 | 23 | definition = yaml.safe_load(textwrap.dedent("""\ 24 | timezone: "Europe/Moscow" 25 | 26 | periodic-snapshot-tasks: 27 | src: 28 | dataset: data/src 29 | recursive: true 30 | naming-schema: "%Y-%m-%d-%H-%M-%S" 31 | schedule: 32 | minute: "0" 33 | hour: "*" 34 | day-of-month: "*" 35 | month: "*" 36 | day-of-week: "*" 37 | """)) 38 | 39 | definition = Definition.from_data(definition) 40 | clock = Mock() 41 | clock.tick.side_effect = [ 42 | datetime(2010, 10, 30, 22, 0, 0), 43 | datetime(2010, 10, 30, 22, 1, 0), 44 | datetime(2010, 10, 30, 23, 0, 0), 45 | None, 46 | ] 47 | tz_clock = TzClock(definition.timezone, datetime(2010, 10, 30, 21, 59, 59)) 48 | zettarepl = create_zettarepl(definition, Scheduler(clock, tz_clock)) 49 | zettarepl.run() 50 | 51 | assert isinstance(zettarepl.observer.call_args_list[0][0][0], PeriodicSnapshotTaskStart) 52 | assert isinstance(zettarepl.observer.call_args_list[1][0][0], PeriodicSnapshotTaskSuccess) 53 | assert isinstance(zettarepl.observer.call_args_list[2][0][0], PeriodicSnapshotTaskStart) 54 | assert isinstance(zettarepl.observer.call_args_list[3][0][0], PeriodicSnapshotTaskSuccess) 55 | 56 | local_shell = LocalShell() 57 | assert len(list_snapshots(local_shell, "data/src", False)) == 1 58 | -------------------------------------------------------------------------------- /integration-tests/snapshot/test_snapshot_exclude.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | import subprocess 4 | import textwrap 5 | 6 | import yaml 7 | 8 | from zettarepl.snapshot.list import list_snapshots 9 | from zettarepl.transport.local import LocalShell 10 | from zettarepl.utils.test import run_periodic_snapshot_test 11 | 12 | 13 | def test_snapshot_exclude(): 14 | subprocess.call("zfs destroy -r data/src", shell=True) 15 | 16 | subprocess.check_call("zfs create data/src", shell=True) 17 | for dataset in ["DISK1", "DISK1/Apps", "DISK1/ISO", "waggnas", "DISK2", "DISK2/Apps", "DISK2/ISO"]: 18 | subprocess.check_call(f"zfs create data/src/{dataset}", shell=True) 19 | 20 | definition = yaml.safe_load(textwrap.dedent("""\ 21 | timezone: "UTC" 22 | 23 | periodic-snapshot-tasks: 24 | internal: 25 | dataset: data/src 26 | recursive: true 27 | exclude: 28 | - data/src/waggnas 29 | - data/src/*/ISO 30 | lifetime: "P7W" 31 | naming-schema: "auto-%Y%m%d.%H%M%S-2w" 32 | schedule: 33 | minute: "0" 34 | hour: "6" 35 | day-of-month: "*" 36 | month: "*" 37 | day-of-week: "*" 38 | begin: "06:00" 39 | end: "18:00" 40 | """)) 41 | 42 | run_periodic_snapshot_test(definition, datetime(2020, 1, 17, 6, 0)) 43 | 44 | local_shell = LocalShell() 45 | assert len(list_snapshots(local_shell, "data/src", False)) == 1 46 | assert len(list_snapshots(local_shell, "data/src/DISK1/Apps", False)) == 1 47 | assert len(list_snapshots(local_shell, "data/src/DISK1/ISO", False)) == 0 48 | assert len(list_snapshots(local_shell, "data/src/DISK2/Apps", False)) == 1 49 | assert len(list_snapshots(local_shell, "data/src/DISK2/ISO", False)) == 0 50 | assert len(list_snapshots(local_shell, "data/src/waggnas", False)) == 0 51 | -------------------------------------------------------------------------------- /integration-tests/transport/test_async_exec_timeout.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import copy 3 | import subprocess 4 | import time 5 | 6 | import pytest 7 | 8 | from zettarepl.transport.create import create_transport 9 | from zettarepl.utils.test import transports 10 | 11 | 12 | @pytest.mark.parametrize("transport", transports(False)) 13 | @pytest.mark.parametrize("stdout", [True, False]) 14 | def test__async_exec_timeout(transport, stdout): 15 | if transport["type"] == "local": 16 | expected_timeout = 5 17 | else: 18 | expected_timeout = 10 19 | 20 | subprocess.run( 21 | "kill -9 $(ps axw | grep ZETTAREPL_TEST_MARKER_1 | grep -v grep | awk '{print $1}')", shell=True, 22 | ) 23 | 24 | transport_inst = create_transport(copy.deepcopy(transport)) 25 | shell = transport_inst.shell(transport_inst) 26 | 27 | start = time.monotonic() 28 | with pytest.raises(TimeoutError): 29 | shell.exec(["python", "-c", "'ZETTAREPL_TEST_MARKER_1'; import time; time.sleep(15)"], timeout=5) 30 | end = time.monotonic() 31 | assert expected_timeout * 0.9 < end - start < expected_timeout * 1.1 32 | 33 | if transport["type"] == "local": 34 | assert int(subprocess.check_output( 35 | "ps axw | grep ZETTAREPL_TEST_MARKER_1 | grep -v grep | wc -l", shell=True, encoding="utf-8", 36 | ).strip()) == 0 37 | -------------------------------------------------------------------------------- /integration-tests/transport/test_local_async_exec_stop.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | import time 4 | 5 | from zettarepl.transport.local import LocalShell 6 | from zettarepl.utils.shlex import pipe 7 | 8 | 9 | def test__local_async_exec_stop(): 10 | def assert_marker_count(count): 11 | assert int(subprocess.check_output( 12 | "ps axw | grep ZETTAREPL_TEST_MARKER_1 | grep -v grep | wc -l", shell=True, encoding="utf-8", 13 | ).strip()) == count 14 | 15 | subprocess.run( 16 | "kill -9 $(ps axw | grep ZETTAREPL_TEST_MARKER_1 | grep -v grep | awk '{print $1}')", shell=True, 17 | ) 18 | 19 | assert_marker_count(0) 20 | 21 | local_shell = LocalShell() 22 | exec = local_shell.exec_async(pipe(["python", "-c", "'ZETTAREPL_TEST_MARKER_1'; import time; time.sleep(60)"], 23 | ["python", "-c", "'ZETTAREPL_TEST_MARKER_1'; import time; time.sleep(60)"])) 24 | 25 | time.sleep(2) 26 | assert_marker_count(6) 27 | 28 | exec.stop() 29 | 30 | time.sleep(1) 31 | assert_marker_count(0) 32 | -------------------------------------------------------------------------------- /integration-tests/transport/test_ssh_read_stdout.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from queue import Queue 3 | import time 4 | 5 | import pytest 6 | 7 | from zettarepl.transport.ssh import SshTransport 8 | from zettarepl.utils.test import set_localhost_transport_options 9 | 10 | 11 | @pytest.mark.parametrize("bufsize", [1, 9000]) 12 | @pytest.mark.parametrize("stdout_to_queue", [False, True]) 13 | def test__ssh_read_stdout(bufsize, stdout_to_queue): 14 | data = dict(hostname="127.0.0.1", port=22, username="root") 15 | set_localhost_transport_options(data) 16 | transport = SshTransport.from_data(data) 17 | f1 = "0" * bufsize 18 | f2 = "1" * bufsize 19 | f3 = "2" * bufsize 20 | q = None 21 | if stdout_to_queue: 22 | q = Queue() 23 | start = time.monotonic() 24 | result = transport.shell(transport).exec(["sh", "-c", f"echo {f1}; sleep 15; echo {f2}; sleep 15; echo {f3}"], 25 | stdout=q) 26 | if stdout_to_queue: 27 | result = [] 28 | 29 | while True: 30 | data = q.get() 31 | if data is None: 32 | break 33 | 34 | result.append(data) 35 | 36 | assert result == [f"{f1}\n", f"{f2}\n", f"{f3}\n"] 37 | else: 38 | assert result == f"{f1}\n{f2}\n{f3}\n" 39 | 40 | assert time.monotonic() - start <= (15 + 15) * 1.1 41 | -------------------------------------------------------------------------------- /integration-tests/transport/test_timeout.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | 4 | from zettarepl.transport.local import LocalShell 5 | 6 | 7 | def test__timeout(): 8 | with pytest.raises(TimeoutError): 9 | LocalShell().exec(["sleep", "10"], timeout=5) 10 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | 4 | install_requires = [ 5 | "coloredlogs", 6 | "croniter", 7 | "isodate", 8 | "jsonschema", 9 | "paramiko", 10 | "python-dateutil", 11 | "pytz", 12 | "pyyaml", 13 | ] 14 | 15 | 16 | setup( 17 | name="zettarepl", 18 | description="zettarepl is a cross-platform ZFS replication solution", 19 | packages=find_packages(), 20 | package_data={ 21 | "zettarepl.definition.schema": ["*.yaml"], 22 | "zettarepl.zcp": ["*.lua"], 23 | }, 24 | include_package_data=True, 25 | license="BSD", 26 | platforms="any", 27 | classifiers=[ 28 | "Development Status :: 2 - Pre-Alpha", 29 | "Environment :: No Input/Output (Daemon)", 30 | "Intended Audience :: System Administrators", 31 | "License :: OSI Approved :: BSD License", 32 | "Operating System :: POSIX", 33 | "Programming Language :: Python", 34 | "Programming Language :: Python :: 3 :: Only", 35 | "Topic :: System :: Archiving :: Mirroring", 36 | ], 37 | install_requires=install_requires, 38 | entry_points={ 39 | "console_scripts": [ 40 | "zettarepl = zettarepl.main:main", 41 | ], 42 | }, 43 | ) 44 | -------------------------------------------------------------------------------- /tests/dataset/test_exclude.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | 4 | from zettarepl.dataset.exclude import should_exclude 5 | 6 | 7 | @pytest.mark.parametrize("dataset,exclude,result", [ 8 | ("data", ["data/.system"], False), 9 | ("data/.system", ["data/.system"], True), 10 | ("data/.system/cores", ["data/.system"], True), 11 | ("data/.system-settings", ["data/.system"], False), 12 | ("my-data", ["data/.system"], False), 13 | ("my-data/.system", ["data/.system"], False), 14 | ("data/.system/cores", ["data/*/cores"], True), 15 | ("data/.system/cores2", ["data/*/cores"], False), 16 | ]) 17 | def test__should_exclude(dataset, exclude, result): 18 | assert should_exclude(dataset, exclude) == result 19 | -------------------------------------------------------------------------------- /tests/dataset/test_relationship.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | 4 | from zettarepl.dataset.relationship import is_child, is_immediate_child 5 | 6 | 7 | @pytest.mark.parametrize("child,parent,result", [ 8 | ("data", "data/.system", False), 9 | ("data/.system", "data/.system", True), 10 | ("data/.system/cores", "data/.system", True), 11 | ("data/.system-settings", "data/.system", False), 12 | ("my-data", "data/.system", False), 13 | ("my-data/.system", "data/.system", False), 14 | ]) 15 | def test__is_child(child, parent, result): 16 | assert is_child(child, parent) == result 17 | 18 | 19 | @pytest.mark.parametrize("child,parent,result", [ 20 | ("data/.system", "data/.system", False), 21 | ("data/.system/cores", "data/.system", True), 22 | ("data/.system/cores/linux", "data/.system", False), 23 | ("my-data/.system/cores/linux", "data/.system", False), 24 | ]) 25 | def test__is_immediate_child(child, parent, result): 26 | assert is_immediate_child(child, parent) == result 27 | -------------------------------------------------------------------------------- /tests/replication/task/test_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from unittest.mock import Mock 3 | 4 | import pytest 5 | 6 | from zettarepl.replication.task.dataset import get_source_dataset_base, get_source_dataset, get_target_dataset 7 | 8 | 9 | @pytest.mark.parametrize("source_datasets,base", [ 10 | (["data/work", "data/windows"], "data"), 11 | (["data/work/a", "data/work/b"], "data/work"), 12 | (["data", "ix-data"], "") 13 | ]) 14 | def test__get_source_dataset_base(source_datasets, base): 15 | assert get_source_dataset_base(Mock(source_datasets=source_datasets)) == base 16 | 17 | 18 | @pytest.mark.parametrize("source_datasets,target_dataset,dst_dataset,result", [ 19 | (["data/work", "data/windows"], "backup", "backup/windows", "data/windows"), 20 | (["data/work"], "backup", "backup/excel", "data/work/excel"), 21 | ]) 22 | def test__get_source_dataset(source_datasets, target_dataset, dst_dataset, result): 23 | assert get_source_dataset(Mock(source_datasets=source_datasets, target_dataset=target_dataset), 24 | dst_dataset) == result 25 | 26 | 27 | @pytest.mark.parametrize("source_datasets,target_dataset,src_dataset,result", [ 28 | (["data/work", "data/windows"], "backup", "data/windows/x", "backup/windows/x"), 29 | ]) 30 | def test__get_target_dataset(source_datasets, target_dataset, src_dataset, result): 31 | assert get_target_dataset(Mock(source_datasets=source_datasets, target_dataset=target_dataset), 32 | src_dataset) == result 33 | -------------------------------------------------------------------------------- /tests/replication/task/test_retention_policy.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | 4 | from zettarepl.replication.task.retention_policy import TargetSnapshotRetentionPolicy 5 | from zettarepl.snapshot.name import parse_snapshots_names 6 | 7 | 8 | def test__custom_snapshot_retention_policy(): 9 | retention_policy = TargetSnapshotRetentionPolicy.from_data({ 10 | "retention-policy": "custom", 11 | "lifetime": "P1D", 12 | "lifetimes": { 13 | "daily": { 14 | "schedule": {"hour": "0"}, 15 | "lifetime": "P14D", 16 | }, 17 | "weekly": { 18 | "schedule": {"hour": "0", "day-of-week": "1"}, 19 | "lifetime": "P30D", 20 | }, 21 | } 22 | }) 23 | 24 | now = datetime(2021, 4, 21, 13, 00) 25 | snapshots = parse_snapshots_names([ 26 | # Stays because taken less than a day ago 27 | "2021-04-20-19-00", 28 | # Goes because taken more than a day ago 29 | "2021-04-20-01-00", 30 | # Stays because is taken daily and should live for 14 days 31 | "2021-04-20-00-00", 32 | # Goes because was taken more than 14 days ago 33 | "2021-04-06-00-00", 34 | # Stays because was taken on monday 35 | "2021-04-05-00-00", 36 | ], "%Y-%m-%d-%H-%M") 37 | to_remove = [1, 3] 38 | 39 | assert retention_policy.calculate_delete_snapshots(now, None, snapshots) == [snapshots[i].name for i in to_remove] 40 | -------------------------------------------------------------------------------- /tests/replication/task/test_should_replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | from unittest.mock import Mock 4 | 5 | from zettarepl.replication.task.should_replicate import replication_task_replicates_target_dataset 6 | 7 | 8 | @pytest.mark.parametrize("replication_task,dataset,result", [ 9 | ( 10 | Mock(source_datasets=["data/work"], 11 | target_dataset="repl/work", 12 | recursive=True, 13 | exclude=["data/work/garbage"]), 14 | "repl/work/ix", 15 | True, 16 | ), 17 | ( 18 | Mock(source_datasets=["data/work"], 19 | target_dataset="repl/work", 20 | recursive=True, 21 | exclude=["data/work/garbage"]), 22 | "repl/work/garbage", 23 | False, 24 | ) 25 | ]) 26 | def test__replication_task_replicates_target_dataset(replication_task, dataset, result): 27 | assert replication_task_replicates_target_dataset(replication_task, dataset) == result 28 | -------------------------------------------------------------------------------- /tests/replication/task/test_task.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | from unittest.mock import Mock 4 | 5 | from zettarepl.replication.task.task import ReplicationTask 6 | 7 | 8 | @pytest.mark.parametrize("pst,source_dataset,exclude,raises", [ 9 | ( 10 | [("data", ["data/temp"])], 11 | ["data/games"], 12 | [], 13 | False, 14 | ), 15 | ( 16 | [("data", ["data/temp"])], 17 | ["data"], 18 | [], 19 | True, 20 | ), 21 | ( 22 | [("data", ["data/temp"])], 23 | ["data"], 24 | ["data/temp"], 25 | False, 26 | ), 27 | ]) 28 | def test__validate_exclude(pst, source_dataset, exclude, raises): 29 | args = ( 30 | { 31 | "source-dataset": source_dataset, 32 | "exclude": exclude, 33 | }, 34 | [ 35 | Mock(dataset=dataset, exclude=exclude) 36 | for dataset, exclude in pst 37 | ], 38 | ) 39 | 40 | if raises: 41 | with pytest.raises(ValueError): 42 | ReplicationTask._validate_exclude(*args) 43 | else: 44 | ReplicationTask._validate_exclude(*args) 45 | -------------------------------------------------------------------------------- /tests/replication/test_monitor.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | from unittest.mock import Mock, patch 4 | 5 | from zettarepl.replication.monitor import ReplicationMonitor 6 | 7 | 8 | def test__replication_monitor__ok(): 9 | logging.getLogger().setLevel(logging.DEBUG) 10 | 11 | get_receive_resume_token = Mock(side_effect=[None, "a", "a", "a", "a", "b"]) 12 | 13 | with patch("zettarepl.replication.monitor.get_receive_resume_token", get_receive_resume_token): 14 | with patch("zettarepl.replication.monitor.threading.Event") as Event: 15 | event = Mock() 16 | event.wait.side_effect = [False, False, False, False, False, False, True] 17 | Event.return_value = event 18 | assert ReplicationMonitor(Mock(), Mock(), 60.0, 5).run() == True 19 | 20 | 21 | def test__replication_monitor__not_ok(): 22 | logging.getLogger().setLevel(logging.DEBUG) 23 | 24 | get_receive_resume_token = Mock(side_effect=[None, "a", "a", "a", "a", "a", "b"]) 25 | 26 | with patch("zettarepl.replication.monitor.get_receive_resume_token", get_receive_resume_token): 27 | with patch("zettarepl.replication.monitor.threading.Event") as Event: 28 | event = Mock() 29 | event.wait.side_effect = [False, False, False, False, False, False, False, True] 30 | Event.return_value = event 31 | assert ReplicationMonitor(Mock(), Mock(), 60.0, 5).run() == False 32 | -------------------------------------------------------------------------------- /tests/replication/test_partially_complete_state.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from unittest.mock import Mock, patch 3 | 4 | import pytest 5 | 6 | from zettarepl.replication.error import ContainsPartiallyCompleteState 7 | from zettarepl.replication.partially_complete_state import retry_contains_partially_complete_state 8 | from zettarepl.transport.interface import ExecException 9 | 10 | ERROR = ContainsPartiallyCompleteState() 11 | 12 | 13 | def test__normal(): 14 | run = Mock() 15 | 16 | retry_contains_partially_complete_state(run) 17 | 18 | run.assert_called_once_with() 19 | 20 | 21 | def test__contains_partially_complete__other_error(): 22 | other_error = ExecException(1, "Everything failed.") 23 | 24 | run = Mock(side_effect=other_error) 25 | 26 | with pytest.raises(ExecException) as e: 27 | retry_contains_partially_complete_state(run) 28 | 29 | assert e.value == other_error 30 | 31 | 32 | def test__contains_partially_complete__contains_partially_complete_state(): 33 | run = Mock(side_effect=[ERROR, None]) 34 | 35 | with patch("zettarepl.replication.partially_complete_state.time.sleep") as sleep: 36 | retry_contains_partially_complete_state(run) 37 | 38 | sleep.assert_called_once_with(60) 39 | 40 | 41 | def test__contains_partially_complete__contains_partially_complete_state__forever(): 42 | run = Mock(side_effect=[ERROR] * 60) 43 | 44 | with pytest.raises(ContainsPartiallyCompleteState) as e: 45 | with patch("zettarepl.replication.partially_complete_state.time.sleep"): 46 | retry_contains_partially_complete_state(run) 47 | -------------------------------------------------------------------------------- /tests/retention/test_calculate.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime, timedelta 3 | import pytest 4 | from unittest.mock import Mock 5 | 6 | from zettarepl.snapshot.snapshot import Snapshot 7 | from zettarepl.snapshot.task.snapshot_owner import PeriodicSnapshotTaskSnapshotOwner 8 | from zettarepl.retention.calculate import calculate_snapshots_to_remove, calculate_dataset_snapshots_to_remove 9 | 10 | 11 | def test__calculate_snapshots_to_remove(): 12 | assert calculate_snapshots_to_remove( 13 | [ 14 | PeriodicSnapshotTaskSnapshotOwner( 15 | datetime(2019, 5, 30, 21, 52), 16 | Mock(dataset="dst/work", 17 | recursive=False, 18 | exclude=[], 19 | lifetime=timedelta(days=14), 20 | naming_schema="auto-%Y-%m-%d_%H-%M") 21 | ), 22 | PeriodicSnapshotTaskSnapshotOwner( 23 | datetime(2019, 5, 30, 21, 52), 24 | Mock(dataset="dst/work", 25 | recursive=False, 26 | exclude=[], 27 | lifetime=timedelta(hours=1), 28 | naming_schema="snap%d%m%Y%H%M") 29 | ), 30 | ], 31 | [Snapshot("dst/work", "snap300520191856"), Snapshot("dst/work", "snap300520191857")] 32 | ) == [Snapshot("dst/work", "snap300520191856")] 33 | 34 | 35 | @pytest.mark.parametrize("owners,dataset,snapshots,result", [ 36 | ( 37 | [ 38 | Mock( 39 | get_naming_schemas=Mock(return_value=["snap-%Y-%m-%d_%H-%M-%S"]), 40 | owns_dataset=Mock(return_value=True), 41 | owns_snapshot=Mock(return_value=True), 42 | should_retain=Mock(side_effect=lambda dataset, parsed_snapshot_name: 43 | parsed_snapshot_name.datetime >= datetime(2018, 8, 21, 23, 0)) 44 | ), 45 | Mock( 46 | get_naming_schemas=Mock(return_value=["snap-%Y-%m-%d_%H-%M-%S"]), 47 | owns_dataset=Mock(return_value=True), 48 | owns_snapshot=Mock(side_effect=lambda dataset, parsed_snapshot_name: 49 | parsed_snapshot_name.datetime.minute % 2 == 0), 50 | should_retain=Mock(side_effect=lambda dataset, parsed_snapshot_name: 51 | parsed_snapshot_name.datetime >= datetime(2018, 8, 11, 23, 0)) 52 | ), 53 | ], 54 | "data", 55 | [ 56 | "snap-2018-08-21_22-58-00", 57 | "snap-2018-08-21_22-59-00", 58 | "snap-2018-08-21_23-00-00", 59 | "snap-2018-08-21_23-01-00", 60 | "snap-2018-08-21_23-02-00", 61 | ], 62 | [ 63 | "snap-2018-08-21_22-59-00" 64 | ] 65 | ) 66 | ]) 67 | def test__calculate_dataset_snapshots_to_remove(owners, dataset, snapshots, result): 68 | assert calculate_dataset_snapshots_to_remove(owners, dataset, snapshots) == result 69 | -------------------------------------------------------------------------------- /tests/scheduler/test_clock.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | 4 | from unittest.mock import Mock, patch 5 | import pytest 6 | 7 | from zettarepl.scheduler.clock import Clock 8 | 9 | 10 | @pytest.fixture() 11 | def sleep(monkeypatch): 12 | mock = Mock() 13 | mock.wait.return_value = None 14 | monkeypatch.setattr("threading.Event", Mock(return_value=mock)) 15 | return mock.wait 16 | 17 | 18 | def test__time_backward(sleep): 19 | with patch("zettarepl.scheduler.clock.datetime") as datetime_: 20 | clock = Clock() 21 | 22 | clock.now = datetime(2018, 8, 31, 13, 20, 25) 23 | 24 | datetime_.utcnow.return_value = datetime(2018, 8, 31, 13, 20, 20) 25 | assert clock._tick() is None 26 | 27 | sleep.assert_not_called() 28 | assert clock.now == datetime(2018, 8, 31, 13, 20, 20) 29 | 30 | 31 | def test__sleep_max_10s(sleep): 32 | with patch("zettarepl.scheduler.clock.datetime") as datetime_: 33 | clock = Clock() 34 | 35 | clock.now = datetime(2018, 8, 31, 13, 20, 25) 36 | 37 | datetime_.utcnow.return_value = datetime(2018, 8, 31, 13, 20, 35) 38 | assert clock._tick() is None 39 | 40 | sleep.assert_called_once_with(10) 41 | assert clock.now == datetime(2018, 8, 31, 13, 20, 35) 42 | 43 | 44 | def test__sleep_at_the_end_of_the_minute(sleep): 45 | with patch("zettarepl.scheduler.clock.datetime") as datetime_: 46 | clock = Clock() 47 | 48 | clock.now = datetime(2018, 8, 31, 13, 20, 45) 49 | 50 | datetime_.utcnow.return_value = datetime(2018, 8, 31, 13, 20, 55, 500000) 51 | assert clock._tick() is None 52 | 53 | sleep.assert_called_once_with(4.5) 54 | assert clock.now == datetime(2018, 8, 31, 13, 20, 55, 500000) 55 | 56 | 57 | def test__time_forward(sleep): 58 | with patch("zettarepl.scheduler.clock.datetime") as datetime_: 59 | clock = Clock() 60 | 61 | clock.now = datetime(2018, 8, 31, 13, 20, 50) 62 | 63 | datetime_.utcnow.return_value = datetime(2018, 8, 31, 13, 21, 1, 2) 64 | assert clock._tick() == datetime(2018, 8, 31, 13, 21, 1, 2) 65 | 66 | sleep.assert_not_called() 67 | assert clock.now == datetime(2018, 8, 31, 13, 21, 1, 2) 68 | -------------------------------------------------------------------------------- /tests/scheduler/test_cron.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime, time 3 | 4 | import pytest 5 | 6 | from zettarepl.scheduler.cron import CronSchedule 7 | 8 | 9 | @pytest.mark.parametrize("schedule,datetime,result", [ 10 | (CronSchedule(0, "*", "*", "*", "*", time(0, 0), time(23, 59)), datetime(2018, 8, 31, 16, 0, 5, 54412), True), 11 | (CronSchedule(0, "*", "*", "*", "*", time(9, 0), time(15, 00)), datetime(2018, 8, 31, 16, 0, 5, 54412), False), 12 | (CronSchedule(0, "*", "*", "*", "*", time(15, 0), time(9, 00)), datetime(2018, 8, 31, 16, 0, 5, 54412), True), 13 | (CronSchedule(0, "*", "*", "*", "*", time(15, 0), time(9, 00)), datetime(2018, 8, 31, 8, 0, 5, 54412), True), 14 | (CronSchedule(0, "*", "*", "*", "*", time(15, 0), time(9, 00)), datetime(2018, 8, 31, 12, 0, 5, 54412), False), 15 | ]) 16 | def test__cron(schedule, datetime, result): 17 | assert schedule.should_run(datetime) == result 18 | -------------------------------------------------------------------------------- /tests/scheduler/test_tz_clock.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime, timedelta 3 | 4 | from pytz import timezone 5 | 6 | from zettarepl.scheduler.tz_clock import * 7 | 8 | 9 | def test__legit_time_backward(): 10 | tz = timezone("Europe/Moscow") 11 | 12 | tz_clock = TzClock(tz, datetime(2010, 10, 30, 22, 59, 59)) 13 | 14 | assert tz_clock.tick(datetime(2010, 10, 30, 23, 0, 0)) == TzClockDateTime( 15 | tz.localize(datetime(2010, 10, 31, 2, 0, 0)).replace(tzinfo=None), 16 | tz.localize(datetime(2010, 10, 31, 2, 0, 0)), 17 | datetime(2010, 10, 30, 23, 0, 0), 18 | timedelta(hours=1), 19 | ) 20 | 21 | 22 | def test__nonlegit_time_backward(): 23 | tz = timezone("Europe/Moscow") 24 | 25 | tz_clock = TzClock(tz, datetime(2010, 8, 30, 22, 59, 59)) 26 | 27 | assert tz_clock.tick(datetime(2010, 8, 30, 22, 59, 58)) == TzClockDateTime( 28 | tz.localize(datetime(2010, 8, 31, 2, 59, 58)).replace(tzinfo=None), 29 | tz.localize(datetime(2010, 8, 31, 2, 59, 58)), 30 | datetime(2010, 8, 30, 22, 59, 58), 31 | None, 32 | ) 33 | 34 | 35 | def test__time_forward(): 36 | tz = timezone("Europe/Moscow") 37 | 38 | tz_clock = TzClock(tz, datetime(2010, 8, 30, 22, 59, 59)) 39 | 40 | assert tz_clock.tick(datetime(2010, 8, 30, 23, 0, 0)) == TzClockDateTime( 41 | tz.localize(datetime(2010, 8, 31, 3, 0, 0)).replace(tzinfo=None), 42 | tz.localize(datetime(2010, 8, 31, 3, 0, 0)), 43 | datetime(2010, 8, 30, 23, 0, 0), 44 | None, 45 | ) 46 | -------------------------------------------------------------------------------- /tests/snapshot/task/test_naming_schema.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | 4 | from zettarepl.snapshot.name import * 5 | 6 | 7 | @pytest.mark.parametrize("naming_schema",[ 8 | "snap_%Y%m%d_%H%M", 9 | "snap_%s", 10 | ]) 11 | def test__validate_snapshot_naming_schema(naming_schema): 12 | validate_snapshot_naming_schema(naming_schema) 13 | 14 | 15 | @pytest.mark.parametrize("naming_schema,error",[ 16 | ("snap_%y%m%d_%H%M", "%Y must be present in snapshot naming schema"), 17 | ("snap_%y%m%d_%H%M%%Y", "% is not an allowed character in ZFS snapshot name"), 18 | ("snap_%Y%m%d_%H%M$", "$ is not an allowed character in ZFS snapshot name"), 19 | ("snap_%Y%m%d_%H%M$&", "$& are not allowed characters in ZFS snapshot name"), 20 | ("snap_%s%z", "No other placeholder can be used with %s in naming schema"), 21 | ("snap_%Y%m%d%H%M%M", "Invalid naming schema: redefinition of group name 'M' as group 6; was group 5"), 22 | ("snap_%s%s", "Invalid naming schema: redefinition of group name 's' as group 2; was group 1"), 23 | ]) 24 | def test__validate_snapshot_naming_schema__error(naming_schema, error): 25 | with pytest.raises(ValueError) as e: 26 | validate_snapshot_naming_schema(naming_schema) 27 | 28 | assert e.value.args[0] == error 29 | -------------------------------------------------------------------------------- /tests/snapshot/task/test_nonintersecting_sets.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from unittest.mock import Mock 3 | 4 | from zettarepl.snapshot.task.nonintersecting_sets import tasks_intersect 5 | 6 | 7 | def test__tasks_intersect__same_dataset(): 8 | t1 = Mock(dataset="data/work") 9 | t2 = Mock(dataset="data/work") 10 | assert tasks_intersect(t1, t2) 11 | 12 | 13 | def test__tasks_intersect__different_dataset_1(): 14 | t1 = Mock(dataset="data/work") 15 | t2 = Mock(dataset="data/windows") 16 | assert not tasks_intersect(t1, t2) 17 | 18 | 19 | def test__tasks_intersect__different_dataset_2(): 20 | t1 = Mock(dataset="data/work/etc") 21 | t2 = Mock(dataset="data/workaholics") 22 | assert not tasks_intersect(t1, t2) 23 | 24 | 25 | def test__tasks_intersect__common_parent(): 26 | t1 = Mock(dataset="data/a") 27 | t2 = Mock(dataset="data/b") 28 | assert not tasks_intersect(t1, t2) 29 | 30 | 31 | def test__tasks_intersect__longest_is_recursive_1(): 32 | t1 = Mock(dataset="data/work/python", recursive=True) 33 | t2 = Mock(dataset="data/work", recursive=False) 34 | assert not tasks_intersect(t1, t2) 35 | 36 | 37 | def test__tasks_intersect__longest_is_recursive_2(): 38 | t1 = Mock(dataset="data/work", recursive=False) 39 | t2 = Mock(dataset="data/work/python", recursive=True) 40 | assert not tasks_intersect(t1, t2) 41 | 42 | 43 | def test__tasks_intersect__shortest_is_recursive_1(): 44 | t1 = Mock(dataset="data/work/python", recursive=False) 45 | t2 = Mock(dataset="data/work", recursive=True) 46 | assert tasks_intersect(t1, t2) 47 | 48 | 49 | def test__tasks_intersect__shortest_is_recursive_2(): 50 | t1 = Mock(dataset="data/work", recursive=True) 51 | t2 = Mock(dataset="data/work/python", recursive=False) 52 | assert tasks_intersect(t1, t2) 53 | -------------------------------------------------------------------------------- /tests/snapshot/test_create.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | import textwrap 4 | from unittest.mock import ANY, Mock, call 5 | 6 | from zettarepl.snapshot.create import * 7 | from zettarepl.snapshot.snapshot import Snapshot 8 | from zettarepl.transport.interface import ExecException 9 | 10 | 11 | def test__create_snapshot__zfscli_no_properties(): 12 | shell = Mock() 13 | 14 | create_snapshot(shell, Snapshot("data/src", "snap-1"), True, [], {}) 15 | 16 | shell.exec.assert_called_once_with(["zfs", "snapshot", "-r", "data/src@snap-1"]) 17 | 18 | 19 | def test__create_snapshot__zfscli_properties(): 20 | shell = Mock() 21 | 22 | create_snapshot(shell, Snapshot("data/src", "snap-1"), True, [], {"freenas:vmsynced": "Y"}) 23 | 24 | shell.exec.assert_called_once_with(["zfs", "snapshot", "-r", "-o", "freenas:vmsynced=Y", "data/src@snap-1"]) 25 | 26 | 27 | def test__create_snapshot__zcp_ok(): 28 | shell = Mock() 29 | shell.exec.return_value = "Channel program fully executed with no return value." 30 | 31 | create_snapshot(shell, Snapshot("data/src", "snap-1"), True, ["data/src/garbage", "data/src/temp"], {}) 32 | 33 | shell.exec.assert_has_calls([call(["zfs", "list", "-t", "filesystem,volume", "-H", "-o", "name", "-s", "name", "-r", "data/src"]), call(["zfs", "program", "data", ANY])]) 34 | 35 | 36 | def test__create_snapshot__zcp_errors(): 37 | shell = Mock() 38 | 39 | list_dataset_shell_call = textwrap.dedent("""\ 40 | boot-pool 2.80G 15.6G 96K none 41 | boot-pool/ROOT 2.79G 15.6G 96K none 42 | boot-pool/ROOT/22.02.3 2.79G 15.6G 2.77G legacy 43 | boot-pool/ROOT/Initial-Install 8K 15.6G 2.66G / 44 | boot-pool/grub 8.18M 15.6G 8.18M legacy 45 | data 13.8M 2.61G 104K /mnt/data 46 | data/.system 9.95M 2.61G 104K legacy 47 | data/.system/configs-cd93307f360c4818ad53abf4dac4059c 96K 2.61G 96K legacy 48 | data/.system/cores 96K 1024M 96K legacy 49 | data/.system/ctdb_shared_vol 96K 2.61G 96K legacy 50 | data/.system/glusterd 104K 2.61G 104K legacy 51 | data/.system/rrd-cd93307f360c4818ad53abf4dac4059c 8.68M 2.61G 8.68M legacy 52 | data/.system/samba4 148K 2.61G 148K legacy 53 | data/.system/services 96K 2.61G 96K legacy 54 | data/.system/syslog-cd93307f360c4818ad53abf4dac4059c 464K 2.61G 464K legacy 55 | data/.system/webui 96K 2.61G 96K legacy 56 | data/src 792K 2.61G 112K /mnt/data/src 57 | data/src/DISK1 296K 2.61G 104K /mnt/data/src/DISK1 58 | data/src/DISK1/Apps 96K 2.61G 96K /mnt/data/src/DISK1/Apps 59 | data/src/DISK1/ISO 96K 2.61G 96K /mnt/data/src/DISK1/ISO 60 | data/src/DISK2 288K 2.61G 96K /mnt/data/src/DISK2 61 | data/src/DISK2/Apps 96K 2.61G 96K /mnt/data/src/DISK2/Apps 62 | data/src/DISK2/ISO 96K 2.61G 96K /mnt/data/src/DISK2/ISO 63 | data/src/waggnas 96K 2.61G 96K /mnt/data/src/waggnas 64 | """) 65 | 66 | zcp_program_shell_call = ExecException(1, textwrap.dedent("""\ 67 | Channel program execution failed: 68 | [string "channel program"]:44: snapshot=data/src/home@snap-1 error=17, snapshot=data/src/work@snap-1 error=17 69 | stack traceback: 70 | [C]: in function 'error' 71 | [string "channel program"]:44: in main chunk 72 | """)) 73 | 74 | shell.exec.side_effect = [list_dataset_shell_call, zcp_program_shell_call] 75 | 76 | with pytest.raises(CreateSnapshotError) as e: 77 | create_snapshot(shell, Snapshot("data/src", "snap-1"), True, ["data/src/garbage"], {}) 78 | 79 | assert e.value.args[0] == ( 80 | "Failed to create following snapshots:\n" 81 | "'data/src/home@snap-1': File exists\n" 82 | "'data/src/work@snap-1': File exists" 83 | ) 84 | -------------------------------------------------------------------------------- /tests/snapshot/test_destroy.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from unittest.mock import call, Mock, patch 3 | 4 | from zettarepl.snapshot.destroy import destroy_snapshots 5 | from zettarepl.snapshot.snapshot import Snapshot 6 | 7 | 8 | def test__destroy_snapshots__works(): 9 | shell = Mock() 10 | 11 | destroy_snapshots(shell, [Snapshot("data", "snap-1"), Snapshot("data/work", "snap-1"), Snapshot("data", "snap-2")]) 12 | 13 | assert shell.exec.call_count == 2 14 | shell.exec.assert_has_calls([ 15 | call(["zfs", "destroy", "data@snap-1,snap-2"]), 16 | call(["zfs", "destroy", "data/work@snap-1"]) 17 | ], True) 18 | 19 | 20 | def test__destroy_snapshots__arg_max(): 21 | shell = Mock() 22 | 23 | with patch("zettarepl.snapshot.destroy.ARG_MAX", 20): 24 | destroy_snapshots(shell, [Snapshot("data", "snap-1"), 25 | Snapshot("data", "snap-2"), 26 | Snapshot("data", "snap-3")]) 27 | 28 | assert shell.exec.call_count == 2 29 | shell.exec.assert_has_calls([ 30 | call(["zfs", "destroy", "data@snap-1,snap-2"]), 31 | call(["zfs", "destroy", "data@snap-3"]) 32 | ], True) 33 | -------------------------------------------------------------------------------- /tests/snapshot/test_empty.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | from unittest.mock import Mock, patch 4 | 5 | from zettarepl.snapshot.empty import get_empty_snapshots_for_deletion, get_task_snapshots 6 | from zettarepl.snapshot.snapshot import Snapshot 7 | 8 | 9 | @pytest.mark.parametrize("datasets,tasks_with_snapshot_names,result", [ 10 | ( 11 | ["data/src", "data/src/work"], 12 | [ 13 | (Mock(dataset="data/src", recursive=True, exclude=[], allow_empty=True), "snap-1"), 14 | (Mock(dataset="data/src/work", recursive=False, exclude=[], allow_empty=False), "snap-1"), 15 | ], 16 | [] 17 | ), 18 | ( 19 | ["data/src", "data/src/garbage", "data/src/work"], 20 | [ 21 | (Mock(dataset="data/src", recursive=True, exclude=[], allow_empty=False), "snap-1"), 22 | (Mock(dataset="data/src", recursive=True, exclude=["data/src/garbage"], allow_empty=True), "snap-1"), 23 | ], 24 | [Snapshot("data/src/garbage", "snap-1")] 25 | ), 26 | ]) 27 | def test__get_empty_snapshots_for_deletion__1(datasets, tasks_with_snapshot_names, result): 28 | with patch("zettarepl.snapshot.empty.list_datasets", Mock(return_value=datasets)): 29 | with patch("zettarepl.snapshot.empty.is_empty_snapshot", Mock(return_value=True)): 30 | assert get_empty_snapshots_for_deletion(Mock(), tasks_with_snapshot_names) == result 31 | 32 | 33 | @pytest.mark.parametrize("all_datasets,task,task_datasets", [ 34 | ( 35 | ["data/src", "data/src/work", "data/dst"], 36 | Mock(dataset="data/src", recursive=True, exclude=[]), 37 | ["data/src", "data/src/work"], 38 | ), 39 | ( 40 | ["data/src", "data/src/garbage", "data/src/work", "data/dst"], 41 | Mock(dataset="data/src", recursive=True, exclude=["data/src/garbage"]), 42 | ["data/src", "data/src/work"], 43 | ), 44 | ( 45 | ["data/src", "data/src/work", "data2"], 46 | Mock(dataset="data/src", recursive=True, exclude=[]), 47 | ["data/src", "data/src/work"], 48 | ), 49 | ( 50 | ["data/src", "data/src/work", "data2"], 51 | Mock(dataset="data/src", recursive=False, exclude=[]), 52 | ["data/src"], 53 | ), 54 | ]) 55 | def test__get_task_snapshots(all_datasets, task, task_datasets): 56 | assert [snapshot.dataset for snapshot in get_task_snapshots(all_datasets, task, "")] == task_datasets 57 | -------------------------------------------------------------------------------- /tests/snapshot/test_list.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | 4 | from zettarepl.snapshot.list import simplify_snapshot_list_queries 5 | 6 | 7 | @pytest.mark.parametrize("queries,simple", [ 8 | ( 9 | [("data/work", False), ("data", True)], 10 | [("data", True)] 11 | ), 12 | ( 13 | [("data/work", True), ("data", False)], 14 | [("data", False), ("data/work", True)] 15 | ), 16 | ( 17 | [("data/work", True), ("data", False)], 18 | [("data", False), ("data/work", True)] 19 | ), 20 | ( 21 | [("data/work", True), ("data", False), ("data/home", True)], 22 | [("data", False), ("data/home", True), ("data/work", True)] 23 | ), 24 | ( 25 | [("data/work", False), ("data", False), ("data/work", True)], 26 | [("data", False), ("data/work", True)] 27 | ), 28 | ( 29 | [("data/work", False), ("data", False), ("data/work/home", True)], 30 | [("data", False), ("data/work", False), ("data/work/home", True)] 31 | ), 32 | ]) 33 | def test__simplify_snapshot_list_queries(queries, simple): 34 | assert simplify_snapshot_list_queries(queries) == simple 35 | -------------------------------------------------------------------------------- /tests/transport/test_progress_report_mixin.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | 4 | from zettarepl.transport.progress_report_mixin import parse_zfs_progress 5 | 6 | 7 | @pytest.mark.parametrize("s,result", [ 8 | ('COMMAND\nzfs: sending data/src/src1@2018-10-01_02-00 (96%: 9437184/1048576)\n', (9437184, 1048576)), 9 | ('COMMAND\nzfs: sending data/src/src1@2018-10-01_02-00 (96%: 1.00M/1.04M)\n', (1000000, 1040000)), 10 | ]) 11 | def test_parse_zfs_progress(s, result): 12 | assert parse_zfs_progress(s) == result 13 | -------------------------------------------------------------------------------- /tests/utils/test_logging.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from unittest.mock import Mock, patch 3 | 4 | import pytest 5 | 6 | from zettarepl.utils.logging import LongStringsFilter 7 | 8 | 9 | @pytest.mark.parametrize("length,input,output", [ 10 | ("16", "aaaabbbbccccddddeeee", "aaaabb....ddeeee"), 11 | ("20", "aaaabbbbccccddddeeee", "aaaabbbbccccddddeeee"), 12 | ("32", "aaaabbbbccccddddeeee", "aaaabbbbccccddddeeee"), 13 | ( 14 | "8", 15 | ("aaaabbbbcccc", 1, ["ddddeeeeffff"], {"gg": "hhhhiiiikkkk"}), 16 | ("aa....cc", 1, ["dd....ff"], {"gg": "hh....kk"}) 17 | ), 18 | ]) 19 | def test__long_strings_filter(length, input, output): 20 | with patch("zettarepl.utils.logging.os.environ.get", Mock(return_value=length)): 21 | record = Mock(args=input) 22 | LongStringsFilter().filter(record) 23 | assert record.args == output 24 | -------------------------------------------------------------------------------- /tests/utils/test_re.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import pytest 3 | 4 | from zettarepl.utils.re import re_search_to 5 | 6 | 7 | @pytest.mark.parametrize("regex,s,result,out", [ 8 | ("Date is: (?P[0-9]+)/([0-9]+)", "Hi! Date is: 08/01", True, { 9 | 0: "Date is: 08/01", 10 | 1: "08", 11 | 2: "01", 12 | "day": "08", 13 | }), 14 | ("Date is: (?P[0-9]+)/([0-9]+)", "Hi! Date Is: 08/01", False, {}), 15 | ]) 16 | def test__re_search_to(regex, s, result, out): 17 | m = {} 18 | assert re_search_to(m, regex, s) == result 19 | assert m == out 20 | -------------------------------------------------------------------------------- /tests/utils/test_shlex.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import subprocess 3 | 4 | import pytest 5 | 6 | from zettarepl.utils.shlex import pipe 7 | 8 | 9 | @pytest.mark.parametrize("bad_command", [None] + list(range(4))) 10 | def test__pipe(bad_command): 11 | commands = [ 12 | "echo a", 13 | "sed 's/a/)/'", 14 | "sed 's/)/\"/'", 15 | "sed 's/\"/d/'", 16 | ] 17 | if bad_command is not None: 18 | commands[bad_command] += f"; echo ERROR 1>&2; exit {bad_command * 10 + 1}" 19 | 20 | piped = pipe(*[["sh", "-c", cmd] for cmd in commands]) 21 | 22 | cp = subprocess.run(piped, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding="utf-8") 23 | 24 | assert cp.returncode == (0 if bad_command is None else bad_command * 10 + 1) 25 | assert cp.stdout == "d\n" 26 | if bad_command is not None: 27 | assert cp.stderr == "ERROR\n" 28 | -------------------------------------------------------------------------------- /zettarepl/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = [] 7 | -------------------------------------------------------------------------------- /zettarepl/__main__.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from .main import main 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | 9 | if __name__ == "__main__": 10 | main() 11 | -------------------------------------------------------------------------------- /zettarepl/commands/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/commands/__init__.py -------------------------------------------------------------------------------- /zettarepl/commands/create_dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from zettarepl.dataset.create import create_dataset as create_dataset_on_shell 5 | 6 | from .utils import get_transport 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | __all__ = ["create_dataset"] 11 | 12 | 13 | def create_dataset(args): 14 | transport = get_transport(args.definition_path, args.transport) 15 | create_dataset_on_shell(transport.shell(transport), args.name) 16 | -------------------------------------------------------------------------------- /zettarepl/commands/list_datasets.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from zettarepl.dataset.list import list_datasets as list_datasets_on_shell 5 | 6 | from .utils import get_transport 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | __all__ = ["list_datasets"] 11 | 12 | 13 | def list_datasets(args): 14 | transport = get_transport(args.definition_path, args.transport) 15 | print("\n".join(list_datasets_on_shell(transport.shell(transport)))) 16 | -------------------------------------------------------------------------------- /zettarepl/commands/run.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from zettarepl.zettarepl import create_zettarepl 5 | 6 | from .utils import load_definition 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | __all__ = ["run"] 11 | 12 | 13 | def run(args): 14 | definition = load_definition(args.definition_path) 15 | 16 | zettarepl = create_zettarepl(definition, clock_args=(args.once,)) 17 | zettarepl.set_tasks(definition.tasks) 18 | zettarepl.run() 19 | -------------------------------------------------------------------------------- /zettarepl/commands/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 - 2 | import jsonschema.exceptions 3 | import logging 4 | import sys 5 | 6 | import yaml 7 | 8 | from zettarepl.definition.definition import Definition 9 | from zettarepl.transport.create import create_transport 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | __all__ = ["load_definition", "load_definition_raw", "get_transport"] 14 | 15 | 16 | def load_definition(path): 17 | return process_definition(path, Definition.from_data) 18 | 19 | 20 | def load_definition_raw(path): 21 | def cb(data): 22 | Definition.validate(data) 23 | return data 24 | 25 | return process_definition(path, cb) 26 | 27 | 28 | def process_definition(path, cb): 29 | try: 30 | data = yaml.safe_load(path) 31 | except yaml.YAMLError as e: 32 | sys.stderr.write(f"Definition syntax error: {e!s}\n") 33 | sys.exit(1) 34 | 35 | try: 36 | return cb(data) 37 | except yaml.YAMLError as e: 38 | sys.stderr.write(f"Definition syntax error: {e!s}\n") 39 | sys.exit(1) 40 | except jsonschema.exceptions.ValidationError as e: 41 | sys.stderr.write(f"Definition validation error: {e!s}\n") 42 | sys.exit(1) 43 | except ValueError as e: 44 | sys.stderr.write(f"{e!s}\n") 45 | sys.exit(1) 46 | 47 | 48 | def get_transport(definition_path, transport): 49 | definition = load_definition_raw(definition_path) 50 | 51 | if transport: 52 | try: 53 | transport = definition.get("transports", {})[transport] 54 | except KeyError: 55 | sys.stderr.write(f"Invalid transport {transport!r}\n") 56 | sys.exit(1) 57 | 58 | return create_transport(transport) 59 | else: 60 | return create_transport({"type": "local"}) 61 | -------------------------------------------------------------------------------- /zettarepl/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/dataset/__init__.py -------------------------------------------------------------------------------- /zettarepl/dataset/create.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from zettarepl.transport.interface import Shell 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["create_dataset"] 9 | 10 | 11 | def create_dataset(shell: Shell, dataset: str): 12 | shell.exec(["zfs", "create", "-p", dataset]) 13 | -------------------------------------------------------------------------------- /zettarepl/dataset/exclude.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import fnmatch 4 | 5 | import zettarepl.dataset.relationship 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["should_exclude"] 10 | 11 | 12 | def should_exclude(dataset: str, exclude: [str]): 13 | return any(zettarepl.dataset.relationship.is_child(dataset, excl) or fnmatch.fnmatch(dataset, excl) for excl in exclude) 14 | -------------------------------------------------------------------------------- /zettarepl/dataset/list.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from zettarepl.transport.interface import Shell 5 | from zettarepl.transport.zfscli import ZfsCliExceptionHandler, parse_property 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["list_datasets", "list_datasets_with_properties"] 10 | 11 | 12 | def list_datasets(shell: Shell, dataset: str=None, recursive: bool=True): 13 | return [dataset["name"] for dataset in list_datasets_with_properties(shell, dataset, recursive)] 14 | 15 | 16 | def list_datasets_with_properties(shell: Shell, dataset: str=None, recursive: bool=True, properties=None): 17 | properties = properties or {} 18 | 19 | properties["name"] = str 20 | 21 | args = ["zfs", "list", "-t", "filesystem,volume", "-H", "-o", ",".join(properties.keys()), "-s", "name"] 22 | if recursive: 23 | args.extend(["-r"]) 24 | else: 25 | args.extend(["-d", "0"]) 26 | if dataset is not None: 27 | args.append(dataset) 28 | 29 | with ZfsCliExceptionHandler(): 30 | output = shell.exec(args) 31 | 32 | return [ 33 | { 34 | property: parse_property(value, properties[property]) 35 | for property, value in zip(properties, line.split("\t")) 36 | } 37 | for line in filter(None, output.split("\n")) 38 | ] 39 | -------------------------------------------------------------------------------- /zettarepl/dataset/relationship.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import os 4 | 5 | import zettarepl.dataset.exclude 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["is_child", "is_immediate_child", "belongs_to_tree"] 10 | 11 | 12 | def is_child(child: str, parent: str): 13 | rel = os.path.relpath(child, parent) 14 | return rel == "." or not rel.startswith("..") 15 | 16 | 17 | def is_immediate_child(child: str, parent: str): 18 | rel = os.path.relpath(child, parent) 19 | return rel != "." and rel != ".." and "/" not in rel 20 | 21 | 22 | def belongs_to_tree(dataset: str, root: str, recursive: bool, exclude: [str]): 23 | return ( 24 | is_child(dataset, root) and not zettarepl.dataset.exclude.should_exclude(dataset, exclude) 25 | if recursive 26 | else dataset == root 27 | ) 28 | -------------------------------------------------------------------------------- /zettarepl/definition/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/definition/__init__.py -------------------------------------------------------------------------------- /zettarepl/definition/definition.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import copy 3 | import logging 4 | 5 | from dateutil.tz import tzlocal 6 | import pytz 7 | import pytz.exceptions 8 | 9 | from zettarepl.replication.task.task import ReplicationTask 10 | from zettarepl.snapshot.task.task import PeriodicSnapshotTask 11 | 12 | from .schema import schema_validator 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | __all__ = ["DefinitionErrors", "DefinitionError", "PeriodicSnapshotTaskDefinitionError", 17 | "ReplicationTaskDefinitionError", "Definition"] 18 | 19 | 20 | class DefinitionErrors(ValueError): 21 | def __init__(self, errors): 22 | self.errors = errors 23 | 24 | def __str__(self): 25 | return "\n".join([str(e) for e in self.errors]) 26 | 27 | 28 | class DefinitionError(ValueError): 29 | pass 30 | 31 | 32 | class PeriodicSnapshotTaskDefinitionError(DefinitionError): 33 | def __init__(self, task_id, error): 34 | self.task_id = task_id 35 | self.error = error 36 | 37 | def __str__(self): 38 | return f"When parsing periodic snapshot task {self.task_id}: {self.error!s}" 39 | 40 | 41 | class ReplicationTaskDefinitionError(DefinitionError): 42 | def __init__(self, task_id, error): 43 | self.task_id = task_id 44 | self.error = error 45 | 46 | def __str__(self): 47 | return f"When parsing replication task {self.task_id}: {self.error!s}" 48 | 49 | 50 | class Definition: 51 | def __init__(self, tasks, max_parallel_replication_tasks, timezone, use_removal_dates, errors): 52 | self.tasks = tasks 53 | self.max_parallel_replication_tasks = max_parallel_replication_tasks 54 | self.timezone = timezone 55 | self.use_removal_dates = use_removal_dates 56 | 57 | self.errors = errors 58 | 59 | @classmethod 60 | def validate(cls, data): 61 | schema_validator.validate(data) 62 | 63 | @classmethod 64 | def from_data(cls, data, raise_on_error=True): 65 | data = copy.deepcopy(data) 66 | 67 | cls.validate(data) 68 | 69 | errors = [] 70 | 71 | max_parallel_replication_tasks = data.get("max-parallel-replication-tasks") 72 | 73 | timezone = tzlocal() 74 | if "timezone" in data: 75 | try: 76 | timezone = pytz.timezone(data["timezone"]) 77 | except pytz.exceptions.UnknownTimeZoneError: 78 | errors.append(DefinitionError("Unknown timezone: {data['timezone']!r}")) 79 | 80 | periodic_snapshot_tasks = [] 81 | for id, task in data.get("periodic-snapshot-tasks", {}).items(): 82 | try: 83 | periodic_snapshot_tasks.append(PeriodicSnapshotTask.from_data(id, task)) 84 | except ValueError as e: 85 | errors.append(PeriodicSnapshotTaskDefinitionError(id, e)) 86 | 87 | transports = data.get("transports", {}) 88 | 89 | replication_tasks = [] 90 | for id, task in data.get("replication-tasks", {}).items(): 91 | if not isinstance(task["transport"], dict): 92 | try: 93 | task["transport"] = transports[task["transport"]] 94 | except KeyError: 95 | e = ValueError("Invalid transport {task['transport']!r}") 96 | errors.append(ReplicationTaskDefinitionError(id, e)) 97 | continue 98 | 99 | try: 100 | replication_tasks.append(ReplicationTask.from_data(id, task, periodic_snapshot_tasks)) 101 | except ValueError as e: 102 | errors.append(ReplicationTaskDefinitionError(id, e)) 103 | 104 | if errors and raise_on_error: 105 | raise DefinitionErrors(errors) 106 | 107 | return cls(periodic_snapshot_tasks + replication_tasks, max_parallel_replication_tasks, timezone, 108 | data.get("use-removal-dates", False), errors) 109 | -------------------------------------------------------------------------------- /zettarepl/definition/schema/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import os 4 | 5 | import jsonschema 6 | import jsonschema.validators 7 | import yaml 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | __all__ = ["periodic_snapshot_task_validator", "replication_task_validator", "schedule_validator", "schema_validator"] 12 | 13 | 14 | class LocalResolver(jsonschema.RefResolver): 15 | def __init__(self, *args, **kwargs): 16 | super().__init__(*args, **kwargs) 17 | 18 | self.base_uri_head = os.path.split(self.base_uri)[0] 19 | 20 | def resolve_remote(self, uri): 21 | head, tail = os.path.split(uri) 22 | if head == self.base_uri_head: 23 | with open(os.path.join(os.path.dirname(__file__), os.path.splitext(tail)[0] + ".yaml")) as f: 24 | return yaml.safe_load(f) 25 | 26 | return super().resolve_remote(uri) 27 | 28 | 29 | def create_validator(filename): 30 | with open(os.path.join(os.path.dirname(__file__), filename)) as f: 31 | schema = yaml.safe_load(f) 32 | 33 | validator_cls = jsonschema.validators.validator_for(schema) 34 | validator_cls.check_schema(schema) 35 | resolver = LocalResolver.from_schema(schema) 36 | validator = validator_cls(schema, resolver=resolver) 37 | return validator 38 | 39 | 40 | periodic_snapshot_task_validator = create_validator("periodic-snapshot-task.schema.yaml") 41 | replication_task_validator = create_validator("replication-task.schema.yaml") 42 | schedule_validator = create_validator("schedule.schema.yaml") 43 | schema_validator = create_validator("schema.yaml") 44 | -------------------------------------------------------------------------------- /zettarepl/definition/schema/logging-level.schema.yaml: -------------------------------------------------------------------------------- 1 | $id: http://freenas.org/zettarepl/logging-level.schema.json 2 | $schema: http://json-schema.org/draft-07/schema# 3 | type: string 4 | enum: 5 | - notset 6 | - debug 7 | - info 8 | - warning 9 | - error 10 | -------------------------------------------------------------------------------- /zettarepl/definition/schema/periodic-snapshot-task.schema.yaml: -------------------------------------------------------------------------------- 1 | $id: http://freenas.org/zettarepl/periodic-snapshot-task.schema.json 2 | $schema: http://json-schema.org/draft-07/schema# 3 | type: object 4 | required: 5 | - dataset 6 | - recursive 7 | - naming-schema 8 | - schedule 9 | additionalProperties: false 10 | properties: 11 | dataset: 12 | type: string 13 | recursive: 14 | type: boolean 15 | exclude: 16 | type: array 17 | items: 18 | type: string 19 | lifetime: 20 | type: string 21 | naming-schema: 22 | type: string 23 | schedule: 24 | $ref: http://freenas.org/zettarepl/schedule.schema.json 25 | allow-empty: 26 | type: boolean 27 | -------------------------------------------------------------------------------- /zettarepl/definition/schema/replication-task.schema.yaml: -------------------------------------------------------------------------------- 1 | $id: http://freenas.org/zettarepl/replication-task.schema.json 2 | $schema: http://json-schema.org/draft-07/schema# 3 | type: object 4 | required: 5 | - direction 6 | - transport 7 | - source-dataset 8 | - target-dataset 9 | - recursive 10 | - auto 11 | - retention-policy 12 | additionalProperties: false 13 | properties: 14 | direction: 15 | enum: 16 | - push 17 | - pull 18 | type: string 19 | transport: 20 | anyOf: 21 | - {} 22 | - $ref: http://freenas.org/zettarepl/transport.schema.json 23 | source-dataset: 24 | anyOf: 25 | - type: array 26 | items: 27 | type: string 28 | - type: string 29 | target-dataset: 30 | type: string 31 | recursive: 32 | type: boolean 33 | exclude: 34 | type: array 35 | items: 36 | type: string 37 | properties: 38 | type: boolean 39 | properties-exclude: 40 | type: array 41 | items: 42 | type: string 43 | properties-override: 44 | type: object 45 | additionalProperties: true 46 | replicate: 47 | type: boolean 48 | encryption: 49 | anyOf: 50 | - type: "null" 51 | - const: "inherit" 52 | - type: object 53 | required: 54 | - key 55 | - key-format 56 | - key-location 57 | additionalProperties: false 58 | properties: 59 | key: 60 | type: string 61 | key-format: 62 | enum: 63 | - hex 64 | - passphrase 65 | type: string 66 | key-location: 67 | type: string 68 | periodic-snapshot-tasks: 69 | type: array 70 | items: {} 71 | naming-schema: 72 | anyOf: 73 | - type: array 74 | items: 75 | type: string 76 | - type: string 77 | also-include-naming-schema: 78 | anyOf: 79 | - type: array 80 | items: 81 | type: string 82 | - type: string 83 | name-regex: 84 | type: string 85 | auto: 86 | type: boolean 87 | schedule: 88 | $ref: http://freenas.org/zettarepl/schedule.schema.json 89 | restrict-schedule: 90 | $ref: http://freenas.org/zettarepl/schedule.schema.json 91 | only-matching-schedule: 92 | type: boolean 93 | readonly: 94 | type: string 95 | enum: 96 | - ignore 97 | - set 98 | - require 99 | mount: 100 | type: boolean 101 | allow-from-scratch: 102 | type: boolean 103 | only-from-scratch: 104 | type: boolean 105 | hold-pending-snapshots: 106 | type: boolean 107 | retention-policy: 108 | type: string 109 | enum: 110 | - source 111 | - custom 112 | - none 113 | lifetime: 114 | type: string 115 | lifetimes: 116 | type: object 117 | additionalProperties: false 118 | patternProperties: 119 | ^.+$: 120 | type: object 121 | required: 122 | - schedule 123 | - lifetime 124 | additionalProperties: false 125 | properties: 126 | schedule: 127 | $ref: http://freenas.org/zettarepl/schedule.schema.json 128 | lifetime: 129 | type: string 130 | compression: 131 | type: string 132 | enum: 133 | - lz4 134 | - pigz 135 | - plzip 136 | - xz 137 | speed-limit: 138 | type: integer 139 | dedup: 140 | type: boolean 141 | large-block: 142 | type: boolean 143 | embed: 144 | type: boolean 145 | compressed: 146 | type: boolean 147 | retries: 148 | type: integer 149 | logging-level: 150 | $ref: http://freenas.org/zettarepl/logging-level.schema.json 151 | -------------------------------------------------------------------------------- /zettarepl/definition/schema/schedule.schema.yaml: -------------------------------------------------------------------------------- 1 | $id: http://freenas.org/zettarepl/schedule.schema.json 2 | $schema: http://json-schema.org/draft-07/schema# 3 | type: object 4 | additionalProperties: false 5 | properties: 6 | minute: 7 | type: string 8 | hour: 9 | type: string 10 | day-of-month: 11 | type: string 12 | month: 13 | type: string 14 | day-of-week: 15 | type: string 16 | begin: 17 | type: string 18 | end: 19 | type: string 20 | -------------------------------------------------------------------------------- /zettarepl/definition/schema/schema.yaml: -------------------------------------------------------------------------------- 1 | $id: http://freenas.org/zettarepl/schema.json 2 | $schema: http://json-schema.org/draft-07/schema# 3 | type: object 4 | additionalProperties: false 5 | properties: 6 | max-parallel-replication-tasks: 7 | anyOf: 8 | - type: integer 9 | minimum: 1 10 | - type: "null" 11 | timezone: 12 | type: string 13 | use-removal-dates: 14 | type: boolean 15 | periodic-snapshot-tasks: 16 | type: object 17 | additionalProperties: false 18 | patternProperties: 19 | ^.+$: 20 | $ref: http://freenas.org/zettarepl/periodic-snapshot-task.schema.json 21 | transports: 22 | type: object 23 | additionalProperties: false 24 | patternProperties: 25 | ^.+$: 26 | $ref: http://freenas.org/zettarepl/transport.schema.json 27 | replication-tasks: 28 | type: object 29 | additionalProperties: false 30 | patternProperties: 31 | ^.+$: 32 | $ref: http://freenas.org/zettarepl/replication-task.schema.json 33 | -------------------------------------------------------------------------------- /zettarepl/definition/schema/transport.schema.yaml: -------------------------------------------------------------------------------- 1 | $id: http://freenas.org/zettarepl/transport.schema.json 2 | $schema: http://json-schema.org/draft-07/schema# 3 | type: object 4 | additionalProperties: false 5 | anyOf: 6 | - $ref: "#/definitions/ssh-transport" 7 | - $ref: "#/definitions/ssh-netcat-transport" 8 | - $ref: "#/definitions/local-transport" 9 | 10 | definitions: 11 | base-ssh-transport: 12 | type: object 13 | required: 14 | - hostname 15 | - private-key 16 | - host-key 17 | properties: 18 | hostname: 19 | type: string 20 | port: 21 | type: integer 22 | username: 23 | type: string 24 | private-key: 25 | type: string 26 | host-key: 27 | type: string 28 | connect-timeout: 29 | type: integer 30 | sudo: 31 | type: boolean 32 | ssh-transport: 33 | allOf: 34 | - $ref: "#/definitions/base-ssh-transport" 35 | - required: 36 | - type 37 | properties: 38 | type: 39 | type: string 40 | enum: 41 | - ssh 42 | cipher: 43 | type: string 44 | enum: 45 | - standard 46 | - fast 47 | - disabled 48 | ssh-netcat-transport: 49 | allOf: 50 | - $ref: "#/definitions/base-ssh-transport" 51 | - required: 52 | - type 53 | - active-side 54 | properties: 55 | type: 56 | type: string 57 | enum: 58 | - ssh+netcat 59 | active-side: 60 | type: string 61 | enum: 62 | - local 63 | - remote 64 | active-side-listen-address: 65 | type: string 66 | active-side-min-port: 67 | type: integer 68 | active-side-max-port: 69 | type: integer 70 | passive-side-connect-address: 71 | type: string 72 | local-transport: 73 | type: object 74 | required: 75 | - type 76 | properties: 77 | type: 78 | type: string 79 | enum: 80 | - local 81 | -------------------------------------------------------------------------------- /zettarepl/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import argparse 3 | import logging 4 | import sys 5 | 6 | import coloredlogs 7 | 8 | from .commands.create_dataset import create_dataset 9 | from .commands.list_datasets import list_datasets 10 | from .commands.run import run 11 | from .utils.logging import LongStringsFilter, ReplicationTaskLoggingLevelFilter 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class LoggingConfiguration: 17 | def __init__(self, value): 18 | self.default_level = logging.INFO 19 | self.loggers = [] 20 | 21 | for v in value.split(","): 22 | if ":" in v: 23 | logger_name, level_name = v.split(":", 1) 24 | try: 25 | level = logging._nameToLevel[level_name.upper()] 26 | except KeyError: 27 | raise argparse.ArgumentTypeError(f"Unknown logging level: {level_name!r}") from None 28 | 29 | self.loggers.append((logger_name, level)) 30 | else: 31 | level_name = v 32 | try: 33 | level = logging._nameToLevel[level_name.upper()] 34 | except KeyError: 35 | raise argparse.ArgumentTypeError(f"Unknown logging level: {level_name!r}") from None 36 | 37 | self.default_level = level 38 | 39 | 40 | def main(): 41 | parser = argparse.ArgumentParser(prog="zettarepl") 42 | 43 | parser.add_argument("-l", "--logging", type=LoggingConfiguration, default="info", 44 | help='Per-logger logging level configuration. E.g.: "info", "warning" or "debug,paramiko:info"') 45 | 46 | subparsers = parser.add_subparsers() 47 | subparsers.required = True 48 | subparsers.dest = "command" 49 | 50 | list_datasets_parser = subparsers.add_parser("list_datasets", help="List datasets") 51 | list_datasets_parser.add_argument("definition_path", type=argparse.FileType("r")) 52 | list_datasets_parser.add_argument("transport", nargs="?") 53 | list_datasets_parser.set_defaults(func=list_datasets) 54 | 55 | run_parser = subparsers.add_parser("create_dataset", help="Create dataset") 56 | run_parser.add_argument("definition_path", type=argparse.FileType("r")) 57 | run_parser.add_argument("name") 58 | run_parser.add_argument("transport", nargs="?") 59 | run_parser.set_defaults(func=create_dataset) 60 | 61 | run_parser = subparsers.add_parser("run", help="Continuously run scheduled replication tasks") 62 | run_parser.add_argument("definition_path", type=argparse.FileType("r")) 63 | run_parser.add_argument("--once", action="store_true", 64 | help="Run replication tasks scheduled for current moment of time and exit") 65 | run_parser.set_defaults(func=run) 66 | 67 | args = parser.parse_args() 68 | 69 | logging_format = "[%(asctime)s] %(levelname)-8s [%(threadName)s] [%(name)s] %(message)s" 70 | logging.basicConfig(level=logging.DEBUG, format=logging_format) 71 | if sys.stdout.isatty(): 72 | coloredlogs.install(level=logging.DEBUG, fmt=logging_format) 73 | for name, level in args.logging.loggers: 74 | logging.getLogger(name).setLevel(level) 75 | for handler in logging.getLogger().handlers: 76 | handler.addFilter(LongStringsFilter()) 77 | handler.addFilter(ReplicationTaskLoggingLevelFilter(args.logging.default_level)) 78 | 79 | args.func(args) 80 | -------------------------------------------------------------------------------- /zettarepl/observer.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = ["notify", "PeriodicSnapshotTaskStart", "PeriodicSnapshotTaskSuccess", "PeriodicSnapshotTaskError", 7 | "ReplicationTaskScheduled", "ReplicationTaskStart", "ReplicationTaskSnapshotStart", 8 | "ReplicationTaskSnapshotProgress", "ReplicationTaskSnapshotSuccess", "ReplicationTaskDataProgress", 9 | "ReplicationTaskSuccess", "ReplicationTaskError"] 10 | 11 | 12 | def notify(observer, message): 13 | result = None 14 | if observer is not None: 15 | try: 16 | result = observer(message) 17 | except Exception: 18 | logger.error("Unhandled exception in observer %r", observer, exc_info=True) 19 | 20 | if message.response is not None and result is None: 21 | result = message.response() 22 | 23 | return result 24 | 25 | 26 | class ObserverMessage: 27 | response = None 28 | 29 | 30 | class PeriodicSnapshotTaskStartResponse: 31 | def __init__(self, properties=None): 32 | self.properties = properties or {} 33 | 34 | 35 | class PeriodicSnapshotTaskStart(ObserverMessage): 36 | response = PeriodicSnapshotTaskStartResponse 37 | 38 | def __init__(self, task_id): 39 | self.task_id = task_id 40 | 41 | 42 | class PeriodicSnapshotTaskSuccess(ObserverMessage): 43 | def __init__(self, task_id, dataset, snapshot): 44 | self.task_id = task_id 45 | self.dataset = dataset 46 | self.snapshot = snapshot 47 | 48 | 49 | class PeriodicSnapshotTaskError(ObserverMessage): 50 | def __init__(self, task_id, error): 51 | self.task_id = task_id 52 | self.error = error 53 | 54 | 55 | class ReplicationTaskScheduled(ObserverMessage): 56 | def __init__(self, task_id, waiting_reason): 57 | self.task_id = task_id 58 | self.waiting_reason = waiting_reason 59 | 60 | 61 | class ReplicationTaskStart(ObserverMessage): 62 | def __init__(self, task_id): 63 | self.task_id = task_id 64 | 65 | 66 | class ReplicationTaskSnapshotStart(ObserverMessage): 67 | def __init__(self, task_id, dataset, snapshot, snapshots_sent, snapshots_total): 68 | self.task_id = task_id 69 | self.dataset = dataset 70 | self.snapshot = snapshot 71 | self.snapshots_sent = snapshots_sent 72 | self.snapshots_total = snapshots_total 73 | 74 | 75 | class ReplicationTaskSnapshotProgress(ObserverMessage): 76 | def __init__(self, task_id, dataset, snapshot, snapshots_sent, snapshots_total, bytes_sent, bytes_total): 77 | self.task_id = task_id 78 | self.dataset = dataset 79 | self.snapshot = snapshot 80 | self.snapshots_sent = snapshots_sent 81 | self.snapshots_total = snapshots_total 82 | self.bytes_sent = bytes_sent 83 | self.bytes_total = bytes_total 84 | 85 | 86 | class ReplicationTaskSnapshotSuccess(ObserverMessage): 87 | def __init__(self, task_id, dataset, snapshot, snapshots_sent, snapshots_total): 88 | self.task_id = task_id 89 | self.dataset = dataset 90 | self.snapshot = snapshot 91 | self.snapshots_sent = snapshots_sent 92 | self.snapshots_total = snapshots_total 93 | 94 | 95 | class ReplicationTaskDataProgress(ObserverMessage): 96 | def __init__(self, task_id, dataset, src_size, dst_size): 97 | self.task_id = task_id 98 | self.dataset = dataset 99 | self.src_size = src_size 100 | self.dst_size = dst_size 101 | 102 | 103 | class ReplicationTaskSuccess(ObserverMessage): 104 | def __init__(self, task_id, warnings): 105 | self.task_id = task_id 106 | self.warnings = warnings 107 | 108 | 109 | class ReplicationTaskError(ObserverMessage): 110 | def __init__(self, task_id, error): 111 | self.task_id = task_id 112 | self.error = error 113 | -------------------------------------------------------------------------------- /zettarepl/replication/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/replication/__init__.py -------------------------------------------------------------------------------- /zettarepl/replication/dataset_size_observer.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import socket 4 | import threading 5 | 6 | import paramiko.ssh_exception 7 | 8 | from zettarepl.transport.zfscli import get_property 9 | from zettarepl.transport.zfscli.exception import DatasetDoesNotExistException 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | __all__ = ["DatasetSizeObserver"] 14 | 15 | 16 | class DatasetSizeObserver: 17 | INTERVAL = 30 18 | 19 | def __init__(self, src_shell, dst_shell, src_dataset, dst_dataset, observer): 20 | self.src_shell = src_shell 21 | self.dst_shell = dst_shell 22 | self.src_dataset = src_dataset 23 | self.dst_dataset = dst_dataset 24 | self.observer = observer 25 | self.event = threading.Event() 26 | self.lock = threading.Lock() 27 | 28 | def __enter__(self): 29 | threading.Thread( 30 | daemon=True, 31 | name=f"{threading.current_thread().name}.dataset_size_observer", 32 | target=self._run, 33 | ).start() 34 | 35 | def __exit__(self, exc_type, exc_val, exc_tb): 36 | with self.lock: 37 | self.event.set() 38 | 39 | def _run(self): 40 | self.event.wait(self.INTERVAL) 41 | 42 | while not self.event.is_set(): 43 | try: 44 | self._run_once() 45 | except (socket.error, paramiko.ssh_exception.SSHException, OSError) as e: 46 | logger.error("Dataset size observer error: %r", e) 47 | except Exception: 48 | logger.error("Unhandled exception in dataset size observer", exc_info=True) 49 | 50 | self.event.wait(self.INTERVAL) 51 | 52 | def _run_once(self): 53 | src_used = get_property(self.src_shell, self.src_dataset, "used", int) 54 | 55 | try: 56 | dst_used = get_property(self.dst_shell, self.dst_dataset, "used", int) 57 | except DatasetDoesNotExistException: 58 | logger.info("Destination dataset %r on shell %r does not exist yet", self.dst_dataset, self.dst_shell) 59 | dst_used = 0 60 | 61 | with self.lock: 62 | if not self.event.is_set(): 63 | self.observer(src_used, dst_used) 64 | -------------------------------------------------------------------------------- /zettarepl/replication/error.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = ["ReplicationError", "ReplicationConfigurationError", "RecoverableReplicationError", 7 | "NoIncrementalBaseReplicationError", "StuckReplicationError", "ContainsPartiallyCompleteState"] 8 | 9 | 10 | class ReplicationError(Exception): 11 | pass 12 | 13 | 14 | class ReplicationConfigurationError(ReplicationError): 15 | pass 16 | 17 | 18 | class RecoverableReplicationError(ReplicationError): 19 | pass 20 | 21 | 22 | class NoIncrementalBaseReplicationError(ReplicationError): 23 | pass 24 | 25 | 26 | class StuckReplicationError(RecoverableReplicationError): 27 | pass 28 | 29 | 30 | class ContainsPartiallyCompleteState(ReplicationError): 31 | pass 32 | -------------------------------------------------------------------------------- /zettarepl/replication/monitor.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import deque 3 | import logging 4 | import threading 5 | 6 | from zettarepl.transport.zfscli import get_receive_resume_token 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | __all__ = ["ReplicationMonitor"] 11 | 12 | 13 | class ReplicationMonitor: 14 | def __init__(self, shell, dataset, poll_interval=600.0, fail_on_repeat_count=6): 15 | self.shell = shell 16 | self.dataset = dataset 17 | self.poll_interval = poll_interval 18 | self.fail_on_repeat_count = fail_on_repeat_count 19 | 20 | self.stop_event = threading.Event() 21 | 22 | def run(self): 23 | receive_resume_tokens = deque([], self.fail_on_repeat_count) 24 | while not self.stop_event.wait(self.poll_interval): 25 | receive_resume_tokens.append(get_receive_resume_token(self.shell, self.dataset)) 26 | token_count = len(receive_resume_tokens) 27 | unique_count = len(set(receive_resume_tokens)) 28 | logger.debug(f"receive_resume_tokens: count=%d, unique=%d", token_count, unique_count) 29 | if token_count == self.fail_on_repeat_count and unique_count == 1 and receive_resume_tokens[0] is not None: 30 | return False 31 | 32 | return True 33 | 34 | def stop(self): 35 | self.stop_event.set() 36 | -------------------------------------------------------------------------------- /zettarepl/replication/partially_complete_state.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import itertools 3 | import logging 4 | import time 5 | 6 | from .error import ContainsPartiallyCompleteState 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | __all__ = ["retry_contains_partially_complete_state"] 11 | 12 | 13 | def retry_contains_partially_complete_state(func): 14 | for i in itertools.count(1): 15 | try: 16 | return func() 17 | except ContainsPartiallyCompleteState: 18 | logger.warning( 19 | "Specified receive_resume_token, but received an error: contains partially-complete state. Allowing " 20 | "ZFS to catch up" 21 | ) 22 | if i >= 60: 23 | raise 24 | 25 | time.sleep(60) 26 | -------------------------------------------------------------------------------- /zettarepl/replication/pre_retention.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | import logging 4 | 5 | from zettarepl.dataset.relationship import is_child 6 | from zettarepl.retention.calculate import calculate_snapshots_to_remove 7 | from zettarepl.snapshot.destroy import destroy_snapshots 8 | from zettarepl.snapshot.snapshot import Snapshot 9 | 10 | from .snapshots_to_send import get_parsed_incremental_base 11 | from .task.dataset import get_source_dataset 12 | from .task.snapshot_owner import ExecutedReplicationTaskSnapshotOwner 13 | from .task.task import ReplicationTask 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | __all__ = ["pre_retention"] 18 | 19 | 20 | class RetentionBeforePushReplicationSnapshotOwner(ExecutedReplicationTaskSnapshotOwner): 21 | def __init__(self, target_dataset: str, *args, **kwargs): 22 | self.target_dataset = target_dataset 23 | super().__init__(*args, **kwargs) 24 | 25 | for dst_dataset, snapshots in self.delete_snapshots.items(): 26 | incremental_base = get_parsed_incremental_base( 27 | self.parsed_src_snapshots_names.get(get_source_dataset(self.replication_task, dst_dataset), []), 28 | self.parsed_dst_snapshots_names[dst_dataset] 29 | ) 30 | if incremental_base: 31 | try: 32 | snapshots.remove(incremental_base) 33 | except ValueError: 34 | pass 35 | 36 | def owns_dataset(self, dataset: str): 37 | # FIXME: Replication tasks that have multiple source datasets are executed as independent parts. 38 | # Retention has to be executed as independent parts too. Part 2 retention will not be executed until part 1 39 | # replication is completed. That might lead to disk space / quota overflow which could have been prevented 40 | # if all retentions were executed first. 41 | return is_child(dataset, self.target_dataset) and super().owns_dataset(dataset) 42 | 43 | 44 | def pre_retention(now: datetime, replication_task: ReplicationTask, source_snapshots: {str: [str]}, 45 | target_snapshots: {str: [str]}, target_dataset: str, target_shell): 46 | owners = [ 47 | RetentionBeforePushReplicationSnapshotOwner(target_dataset, now, replication_task, source_snapshots, 48 | target_snapshots) 49 | ] 50 | remote_snapshots = sum( 51 | [ 52 | [ 53 | Snapshot(dataset, snapshot) 54 | for snapshot in snapshots 55 | ] 56 | for dataset, snapshots in target_snapshots.items() 57 | ], 58 | [] 59 | ) 60 | 61 | snapshots_to_destroy = calculate_snapshots_to_remove(owners, remote_snapshots) 62 | logger.info("Pre-retention destroying snapshots: %r", snapshots_to_destroy) 63 | destroy_snapshots(target_shell, snapshots_to_destroy) 64 | -------------------------------------------------------------------------------- /zettarepl/replication/process_runner.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import threading 4 | 5 | from .error import StuckReplicationError 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["ReplicationProcessRunner"] 10 | 11 | 12 | class ReplicationProcessRunner: 13 | def __init__(self, replication_process, monitor): 14 | self.replication_process = replication_process 15 | self.monitor = monitor 16 | 17 | self.event = threading.Event() 18 | self.process_exception = None 19 | self.process_stuck = False 20 | 21 | def run(self): 22 | self.replication_process.run() 23 | 24 | threading.Thread(daemon=True, name=f"{threading.current_thread().name}.process", 25 | target=self._wait_process).start() 26 | threading.Thread(daemon=True, name=f"{threading.current_thread().name}.monitor", 27 | target=self._run_monitor).start() 28 | 29 | self.event.wait() # Wait for at least one of the threads to finish (`finally` block in one will stop the other) 30 | if self.process_stuck: 31 | raise StuckReplicationError("Replication has stuck") 32 | if self.process_exception: 33 | raise self.process_exception 34 | 35 | def _wait_process(self): 36 | try: 37 | self.replication_process.wait() 38 | except Exception as e: 39 | self.process_exception = e 40 | finally: 41 | self.event.set() 42 | self.monitor.stop() 43 | 44 | def _run_monitor(self): 45 | try: 46 | self.process_stuck = not self.monitor.run() 47 | if self.process_stuck: 48 | logger.warning("Stopping stuck replication process") 49 | self.replication_process.stop() 50 | except Exception: 51 | logger.error("Unhandled exception in monitor", exc_info=True) 52 | finally: 53 | self.event.set() 54 | self.replication_process.stop() 55 | -------------------------------------------------------------------------------- /zettarepl/replication/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/replication/task/__init__.py -------------------------------------------------------------------------------- /zettarepl/replication/task/compression.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import namedtuple 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["ReplicationCompression", "replication_compressions"] 8 | 9 | ReplicationCompression = namedtuple("ReplicationCompression", ["compress", "decompress"]) 10 | 11 | replication_compressions = { 12 | "pigz": ReplicationCompression(["pigz"], ["pigz", "-d"]), 13 | "plzip": ReplicationCompression(["plzip"], ["plzip", "-d"]), 14 | "lz4": ReplicationCompression(["lz4c"], ["lz4c", "-d"]), 15 | "xz": ReplicationCompression(["xz"], ["xzdec"]), 16 | } 17 | -------------------------------------------------------------------------------- /zettarepl/replication/task/dataset.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import os 4 | 5 | from .task import ReplicationTask 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["get_source_dataset", "get_target_dataset"] 10 | 11 | 12 | def get_source_dataset_base(replication_task: ReplicationTask): 13 | commonprefix = os.path.commonpath(replication_task.source_datasets).rstrip("/") 14 | if not all(source_dataset == commonprefix or source_dataset.startswith(f"{commonprefix}/") 15 | for source_dataset in replication_task.source_datasets): 16 | commonprefix = commonprefix[:commonprefix.rfind("/") + 1] 17 | 18 | return commonprefix 19 | 20 | 21 | def relpath(path: str, base: str): 22 | rel = os.path.relpath(path, base) 23 | if rel.startswith(".."): 24 | raise ValueError(f"Dataset {path!r} is not an ancestor of {base!r}") 25 | return rel 26 | 27 | 28 | def get_source_dataset(replication_task: ReplicationTask, dst_dataset: str): 29 | return os.path.normpath(os.path.join(get_source_dataset_base(replication_task), 30 | relpath(dst_dataset, replication_task.target_dataset))) 31 | 32 | 33 | def get_target_dataset(replication_task: ReplicationTask, src_dataset: str): 34 | return os.path.normpath(os.path.join(replication_task.target_dataset, 35 | relpath(src_dataset, get_source_dataset_base(replication_task)))) 36 | -------------------------------------------------------------------------------- /zettarepl/replication/task/direction.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import enum 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["ReplicationDirection"] 8 | 9 | 10 | class ReplicationDirection(enum.Enum): 11 | PUSH = "push" 12 | PULL = "pull" 13 | -------------------------------------------------------------------------------- /zettarepl/replication/task/encryption.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import namedtuple 3 | import enum 4 | import logging 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["ReplicationEncryption", "KeyFormat"] 9 | 10 | ReplicationEncryption = namedtuple("ReplicationEncryption", ["inherit", "key", "key_format", "key_location"]) 11 | 12 | 13 | class KeyFormat(enum.Enum): 14 | HEX = "hex" 15 | PASSPHRASE = "passphrase" 16 | -------------------------------------------------------------------------------- /zettarepl/replication/task/name_pattern.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import re 3 | 4 | __all__ = ["compile_name_regex"] 5 | 6 | 7 | def compile_name_regex(name_regex): 8 | return re.compile(f"({name_regex})$") 9 | -------------------------------------------------------------------------------- /zettarepl/replication/task/naming_schema.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from .task import ReplicationTask 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["replication_task_naming_schemas"] 9 | 10 | 11 | def replication_task_naming_schemas(replication_task: ReplicationTask): 12 | return ( 13 | set(periodic_snapshot_task.naming_schema 14 | for periodic_snapshot_task in replication_task.periodic_snapshot_tasks) | 15 | set(replication_task.also_include_naming_schema) 16 | ) 17 | -------------------------------------------------------------------------------- /zettarepl/replication/task/readonly_behavior.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import enum 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["ReadOnlyBehavior"] 8 | 9 | 10 | class ReadOnlyBehavior(enum.Enum): 11 | IGNORE = "ignore" 12 | SET = "set" 13 | REQUIRE = "require" 14 | -------------------------------------------------------------------------------- /zettarepl/replication/task/should_replicate.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from zettarepl.dataset.relationship import belongs_to_tree 5 | from zettarepl.snapshot.name import ParsedSnapshotName 6 | 7 | from .dataset import get_target_dataset 8 | from .task import ReplicationTask 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | __all__ = ["replication_task_should_replicate_dataset", "replication_task_replicates_target_dataset", 13 | "replication_task_should_replicate_parsed_snapshot"] 14 | 15 | 16 | def replication_task_should_replicate_dataset(replication_task: ReplicationTask, dataset: str): 17 | return any( 18 | belongs_to_tree(dataset, source_dataset, replication_task.recursive, 19 | replication_task.exclude) 20 | for source_dataset in replication_task.source_datasets 21 | ) 22 | 23 | 24 | def replication_task_replicates_target_dataset(replication_task: ReplicationTask, dataset: str): 25 | return any( 26 | belongs_to_tree(dataset, get_target_dataset(replication_task, source_dataset), replication_task.recursive, 27 | [get_target_dataset(replication_task, exclude) for exclude in replication_task.exclude]) 28 | for source_dataset in replication_task.source_datasets 29 | ) 30 | 31 | 32 | def replication_task_should_replicate_parsed_snapshot(replication_task: ReplicationTask, 33 | parsed_snapshot: ParsedSnapshotName): 34 | return ( 35 | ( 36 | replication_task.restrict_schedule is None or 37 | replication_task.restrict_schedule.should_run(parsed_snapshot.datetime) 38 | ) and 39 | ( 40 | not replication_task.only_matching_schedule or 41 | replication_task.schedule.should_run(parsed_snapshot.datetime) 42 | ) 43 | ) 44 | -------------------------------------------------------------------------------- /zettarepl/replication/task/snapshot_query.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from .dataset import get_target_dataset 5 | from .task import ReplicationTask 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["replication_tasks_source_datasets_queries", "replication_tasks_target_datasets_queries"] 10 | 11 | 12 | def replication_tasks_source_datasets_queries(replication_tasks: [ReplicationTask]): 13 | return sum([ 14 | [ 15 | (source_dataset, replication_task.recursive) 16 | for source_dataset in replication_task.source_datasets 17 | ] 18 | for replication_task in replication_tasks 19 | ], []) 20 | 21 | 22 | def replication_tasks_target_datasets_queries(replication_tasks: [ReplicationTask]): 23 | return sum( 24 | [ 25 | [ 26 | (get_target_dataset(replication_task, dataset), replication_task.recursive) 27 | for dataset in replication_task.source_datasets 28 | ] 29 | for replication_task in replication_tasks 30 | ], 31 | [], 32 | ) 33 | -------------------------------------------------------------------------------- /zettarepl/retention/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = [] 7 | -------------------------------------------------------------------------------- /zettarepl/retention/calculate.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import defaultdict 3 | import logging 4 | 5 | from zettarepl.snapshot.list import group_snapshots_by_datasets 6 | from zettarepl.snapshot.name import parse_snapshots_names_with_multiple_schemas 7 | from zettarepl.snapshot.snapshot import Snapshot 8 | 9 | from .snapshot_owner import SnapshotOwner 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | __all__ = ["calculate_snapshots_to_remove"] 14 | 15 | 16 | def calculate_snapshots_to_remove(owners: [SnapshotOwner], snapshots: [Snapshot]): 17 | result = [] 18 | for dataset, dataset_snapshots in group_snapshots_by_datasets(snapshots).items(): 19 | dataset_owners = [owner for owner in owners if owner.owns_dataset(dataset)] 20 | result.extend([ 21 | Snapshot(dataset, snapshot) 22 | for snapshot in calculate_dataset_snapshots_to_remove(dataset_owners, dataset, dataset_snapshots) 23 | ]) 24 | return result 25 | 26 | 27 | def calculate_dataset_snapshots_to_remove(owners: [SnapshotOwner], dataset: str, snapshots: [Snapshot]): 28 | try: 29 | parsed_snapshot_names = parse_snapshots_names_with_multiple_schemas( 30 | snapshots, 31 | set().union(*[set(owner.get_naming_schemas()) for owner in owners]) 32 | ) 33 | except ValueError as e: 34 | logger.warning("Error parsing snapshot names for dataset %r: %r", dataset, e) 35 | return [] 36 | 37 | newest_snapshot_for_naming_schema = {} 38 | for parsed_snapshot_name in parsed_snapshot_names: 39 | if parsed_snapshot_name.naming_schema is None: 40 | continue 41 | 42 | if ( 43 | parsed_snapshot_name.naming_schema not in newest_snapshot_for_naming_schema or 44 | ( 45 | newest_snapshot_for_naming_schema[parsed_snapshot_name.naming_schema].parsed_datetime < 46 | parsed_snapshot_name.parsed_datetime 47 | ) 48 | ): 49 | newest_snapshot_for_naming_schema[parsed_snapshot_name.naming_schema] = parsed_snapshot_name 50 | newest_snapshot_for_naming_schema = {k: v.name for k, v in newest_snapshot_for_naming_schema.items()} 51 | 52 | snapshots_left_for_naming_schema = defaultdict(set) 53 | for parsed_snapshot_name in parsed_snapshot_names: 54 | snapshots_left_for_naming_schema[parsed_snapshot_name.naming_schema].add(parsed_snapshot_name.name) 55 | 56 | result = [] 57 | for parsed_snapshot_name in parsed_snapshot_names: 58 | snapshot_owners = [ 59 | owner 60 | for owner in owners 61 | if ( 62 | # Owners owning `None` naming schema may own all snapshots 63 | {parsed_snapshot_name.naming_schema, None} & set(owner.get_naming_schemas()) and 64 | owner.owns_snapshot(dataset, parsed_snapshot_name) 65 | ) 66 | ] 67 | if ( 68 | snapshot_owners and 69 | any(owner.wants_to_delete() for owner in snapshot_owners) and 70 | not any(owner.should_retain(dataset, parsed_snapshot_name) for owner in snapshot_owners) 71 | ): 72 | logger.debug("No one of %r retains snapshot %r", snapshot_owners, parsed_snapshot_name.name) 73 | snapshots_left_for_naming_schema[parsed_snapshot_name.naming_schema].discard(parsed_snapshot_name.name) 74 | result.append(parsed_snapshot_name.name) 75 | 76 | for naming_schema, snapshots_left in snapshots_left_for_naming_schema.items(): 77 | if naming_schema is None: 78 | # We do not want this behavior for snapshots with unknown naming schema 79 | continue 80 | 81 | if not snapshots_left: 82 | newest_snapshot = newest_snapshot_for_naming_schema[naming_schema] 83 | logger.info("Not destroying %r as it is the only snapshot left for naming schema %r", 84 | newest_snapshot, naming_schema) 85 | result.remove(newest_snapshot) 86 | 87 | return result 88 | -------------------------------------------------------------------------------- /zettarepl/retention/snapshot_owner.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from zettarepl.snapshot.name import ParsedSnapshotName 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["SnapshotOwner"] 9 | 10 | 11 | class SnapshotOwner: 12 | def get_naming_schemas(self) -> [str]: 13 | raise NotImplementedError 14 | 15 | def owns_dataset(self, dataset: str): 16 | raise NotImplementedError 17 | 18 | def owns_snapshot(self, dataset: str, parsed_snapshot_name: ParsedSnapshotName): 19 | raise NotImplementedError 20 | 21 | def wants_to_delete(self): 22 | raise NotImplementedError() 23 | 24 | def should_retain(self, dataset: str, parsed_snapshot_name: ParsedSnapshotName): 25 | raise NotImplementedError 26 | -------------------------------------------------------------------------------- /zettarepl/retention/snapshot_removal_date_snapshot_owner.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | import logging 4 | 5 | from zettarepl.snapshot.name import ParsedSnapshotName 6 | 7 | from .snapshot_owner import SnapshotOwner 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | __all__ = ["SnapshotRemovalDateSnapshotOwner"] 12 | 13 | 14 | class SnapshotRemovalDateSnapshotOwner(SnapshotOwner): 15 | def __init__(self, now: datetime, removal_dates): 16 | self.now = now 17 | self.removal_dates = removal_dates 18 | self.datasets = {snapshot.split("@", 1)[0] for snapshot in self.removal_dates.keys()} 19 | 20 | def get_naming_schemas(self) -> [str]: 21 | return [None] 22 | 23 | def owns_dataset(self, dataset: str): 24 | return dataset in self.datasets 25 | 26 | def owns_snapshot(self, dataset: str, parsed_snapshot_name: ParsedSnapshotName): 27 | return f"{dataset}@{parsed_snapshot_name.name}" in self.removal_dates 28 | 29 | def wants_to_delete(self): 30 | return True 31 | 32 | def should_retain(self, dataset: str, parsed_snapshot_name: ParsedSnapshotName): 33 | return self.removal_dates[f"{dataset}@{parsed_snapshot_name.name}"].replace(tzinfo=None) > self.now 34 | 35 | def __repr__(self): 36 | return f"<{self.__class__.__name__} {len(self.removal_dates)}>" 37 | -------------------------------------------------------------------------------- /zettarepl/scheduler/__init__.py: -------------------------------------------------------------------------------- 1 | from .clock import * 2 | from .cron import * 3 | from .scheduler import * 4 | from .tz_clock import * 5 | -------------------------------------------------------------------------------- /zettarepl/scheduler/clock.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime, timedelta 3 | import logging 4 | import threading 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["Clock"] 9 | 10 | 11 | class Clock: 12 | def __init__(self, once=False): 13 | self.once = once 14 | 15 | self.ticked = False 16 | self.now = datetime.utcnow() 17 | 18 | self.interrupt_event = threading.Event() 19 | 20 | def tick(self): 21 | if self.once: 22 | if self.ticked: 23 | return None 24 | else: 25 | self.ticked = True 26 | return self.now 27 | 28 | while True: 29 | now = self._tick() 30 | if now is not None: 31 | return now 32 | 33 | def interrupt(self): 34 | self.interrupt_event.set() 35 | 36 | def _tick(self): 37 | now = datetime.utcnow() 38 | 39 | try: 40 | if now < self.now: 41 | logger.warning("Time has stepped back (%r -> %r)", self.now, now) 42 | return 43 | 44 | if self._minutetuple(self.now) == self._minutetuple(now): 45 | next_minute_begin = (now + timedelta(minutes=1)).replace(second=0, microsecond=0) 46 | if self.interrupt_event.wait(min(10, (next_minute_begin - now).total_seconds())): 47 | logger.info("Interrupted") 48 | self.interrupt_event.clear() 49 | try: 50 | return now 51 | finally: 52 | now = self.now # To resume from the same moment next time 53 | return 54 | 55 | return now 56 | finally: 57 | self.now = now 58 | 59 | def _minutetuple(self, d: datetime): 60 | return (d.year, d.month, d.day, d.hour, d.minute) 61 | -------------------------------------------------------------------------------- /zettarepl/scheduler/cron.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime, timedelta 3 | import logging 4 | 5 | import isodate 6 | from croniter import croniter 7 | 8 | from zettarepl.definition.schema import schedule_validator 9 | from zettarepl.utils.datetime import idealized_datetime 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | __all__ = ["CronSchedule"] 14 | 15 | 16 | class CronSchedule: 17 | def __init__(self, minute, hour, day_of_month, month, day_of_week, begin, end): 18 | self.expr_format = " ".join([str(minute), str(hour), str(day_of_month), str(month), str(day_of_week)]) 19 | self.begin = begin 20 | self.end = end 21 | 22 | @classmethod 23 | def from_data(cls, data): 24 | schedule_validator.validate(data) 25 | 26 | data.setdefault("minute", "*") 27 | data.setdefault("hour", "*") 28 | data.setdefault("day-of-month", "*") 29 | data.setdefault("month", "*") 30 | data.setdefault("day-of-week", "*") 31 | data.setdefault("begin", "00:00") 32 | data.setdefault("end", "23:59") 33 | 34 | return cls(data["minute"], data["hour"], data["day-of-month"], data["month"], data["day-of-week"], 35 | isodate.parse_time(data["begin"]), isodate.parse_time(data["end"])) 36 | 37 | def should_run(self, d: datetime): 38 | idealized = idealized_datetime(d) 39 | if self.begin < self.end: 40 | if not (self.begin <= idealized.time() <= self.end): 41 | return False 42 | else: 43 | if not (idealized.time() >= self.begin or idealized.time() <= self.end): 44 | return False 45 | return croniter(self.expr_format, idealized - timedelta(seconds=1)).get_next(datetime) == idealized 46 | -------------------------------------------------------------------------------- /zettarepl/scheduler/scheduler.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import namedtuple 3 | import logging 4 | import threading 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["Scheduler"] 9 | 10 | SchedulerResult = namedtuple("SchedulerResult", ["datetime", "tasks"]) 11 | 12 | 13 | class Scheduler: 14 | def __init__(self, clock, tz_clock): 15 | self.clock = clock 16 | self.tz_clock = tz_clock 17 | 18 | self.tasks = [] 19 | 20 | self.interrupt_lock = threading.Lock() 21 | self.interrupt_tasks = [] 22 | 23 | def set_tasks(self, tasks): 24 | self.tasks = tasks 25 | 26 | def schedule(self): 27 | while True: 28 | utcnow = self.clock.tick() 29 | if utcnow is None: 30 | break 31 | 32 | now = self.tz_clock.tick(utcnow) 33 | 34 | tasks = [] 35 | with self.interrupt_lock: 36 | if self.interrupt_tasks: 37 | tasks = self.interrupt_tasks 38 | self.interrupt_tasks = [] 39 | for task in self.tasks.copy(): 40 | if task.schedule.should_run(now.datetime): 41 | tasks.append(task) 42 | 43 | yield SchedulerResult(now, tasks) 44 | 45 | def interrupt(self, tasks): 46 | with self.interrupt_lock: 47 | self.interrupt_tasks = tasks 48 | self.clock.interrupt() 49 | -------------------------------------------------------------------------------- /zettarepl/scheduler/tz_clock.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import namedtuple 3 | import logging 4 | 5 | import pytz 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["TzClockDateTime", "TzClock"] 10 | 11 | TzClockDateTime = namedtuple("TzClockDateTime", ["datetime", "offset_aware_datetime", "utc_datetime", 12 | "legit_step_back"]) 13 | 14 | 15 | class TzClock: 16 | def __init__(self, timezone, utcnow): 17 | self.timezone = timezone 18 | 19 | self.utcnow = utcnow 20 | self.now = self._calculate_now(self.utcnow) 21 | self.now_naive = self.now.replace(tzinfo=None) 22 | 23 | def tick(self, utcnow): 24 | now = self._calculate_now(utcnow) 25 | now_naive = now.replace(tzinfo=None) 26 | try: 27 | if now_naive < self.now_naive and not (utcnow < self.utcnow): 28 | return TzClockDateTime( 29 | now_naive, 30 | now, 31 | utcnow, 32 | (self.now_naive - now_naive) + (utcnow - self.utcnow), 33 | ) 34 | 35 | return TzClockDateTime(now_naive, now, utcnow, None) 36 | finally: 37 | self.utcnow = utcnow 38 | self.now = now 39 | self.now_naive = now_naive 40 | 41 | def _calculate_now(self, utcnow): 42 | return utcnow.replace(tzinfo=pytz.UTC).astimezone(self.timezone) 43 | -------------------------------------------------------------------------------- /zettarepl/snapshot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/snapshot/__init__.py -------------------------------------------------------------------------------- /zettarepl/snapshot/create.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import io 3 | import logging 4 | import os 5 | import re 6 | import typing 7 | import tempfile 8 | 9 | from zettarepl.transport.interface import * 10 | from zettarepl.transport.utils import put_buffer 11 | from zettarepl.dataset.list import list_datasets 12 | from zettarepl.dataset.exclude import should_exclude 13 | from zettarepl.zcp.render_zcp import render_zcp 14 | 15 | from .snapshot import Snapshot 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | __all__ = ["CreateSnapshotError", "create_snapshot"] 20 | 21 | 22 | class CreateSnapshotError(Exception): 23 | pass 24 | 25 | 26 | def iterate_excluded_datasets(exclude_rules: [str], datasets: typing.Iterable): 27 | for dataset in datasets: 28 | if should_exclude(dataset, exclude_rules): 29 | yield dataset 30 | 31 | 32 | def create_snapshot(shell: Shell, snapshot: Snapshot, recursive: bool, exclude_rules: [str], properties: {str: typing.Any}): 33 | logger.info("On %r creating %s snapshot %r", shell, "recursive" if recursive else "non-recursive", snapshot) 34 | 35 | if exclude_rules: 36 | # TODO: support adding properties to snapshots created by channel program 37 | 38 | pool_name = snapshot.dataset.split("/")[0] 39 | 40 | snapshot_program = io.BytesIO() 41 | render_zcp( 42 | snapshot_program, 43 | snapshot.dataset, 44 | snapshot.name, 45 | iterate_excluded_datasets(exclude_rules, list_datasets(shell, snapshot.dataset, recursive)), 46 | ) 47 | program = put_buffer(snapshot_program, "recursive_snapshot_exclude.lua", shell) 48 | 49 | args = ["zfs", "program", pool_name, program] 50 | 51 | try: 52 | shell.exec(args) 53 | except ExecException as e: 54 | logger.debug(e) 55 | errors = [] 56 | for snapshot, error in re.findall(r"snapshot=(.+?) error=([0-9]+)", e.stdout): 57 | errors.append((snapshot, os.strerror(int(error)))) 58 | if errors: 59 | raise CreateSnapshotError( 60 | "Failed to create following snapshots:\n" + 61 | "\n".join([f"{snapshot!r}: {error}" for snapshot, error in errors]) 62 | ) from None 63 | else: 64 | raise CreateSnapshotError(e) from None 65 | else: 66 | args = ["zfs", "snapshot"] 67 | 68 | if recursive: 69 | args.extend(["-r"]) 70 | 71 | if properties: 72 | args.extend(sum([["-o", f"{k}={v}"] for k, v in properties.items()], [])) 73 | 74 | args.append(str(snapshot)) 75 | 76 | try: 77 | shell.exec(args) 78 | except ExecException as e: 79 | raise CreateSnapshotError(e) from None 80 | 81 | return 82 | -------------------------------------------------------------------------------- /zettarepl/snapshot/destroy.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import re 3 | import logging 4 | 5 | from zettarepl.transport.interface import ExecException, Shell 6 | from zettarepl.utils.itertools import sortedgroupby 7 | 8 | from .snapshot import Snapshot 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | __all__ = ["destroy_snapshots"] 13 | 14 | ARG_MAX = 262000 # FreeBSD, on Linux it is even higher 15 | MAX_BATCH_SIZE = 100 # Deleting too many snapshots at once can cause performance issues 16 | 17 | 18 | def destroy_snapshots(shell: Shell, snapshots: [Snapshot]): 19 | for dataset, snapshots in sortedgroupby(snapshots, lambda snapshot: snapshot.dataset): 20 | names = {snapshot.name for snapshot in snapshots} 21 | 22 | logger.info("On %r for dataset %r destroying snapshots %r", shell, dataset, names) 23 | 24 | while names: 25 | chunk = set() 26 | sum_len = len(dataset) 27 | for name in sorted(names): 28 | if len(chunk) >= MAX_BATCH_SIZE: 29 | break 30 | 31 | new_sum_len = sum_len + len(name) + 1 32 | if new_sum_len >= ARG_MAX: 33 | break 34 | 35 | chunk.add(name) 36 | sum_len = new_sum_len 37 | 38 | args = ["zfs", "destroy", f"{dataset}@" + ",".join(sorted(chunk))] 39 | try: 40 | try: 41 | shell.exec(args, timeout=3600) # Destroying snapshots can take a really long time 42 | except ExecException as e: 43 | if "could not find any snapshots to destroy; check snapshot names" in e.stdout: 44 | # Snapshots might be already removed by another process 45 | pass 46 | else: 47 | raise 48 | 49 | names -= chunk 50 | except ExecException as e: 51 | if m := re.search(r"cannot destroy snapshot .+?@(.+?): dataset is busy", e.stdout): 52 | reason = "busy" 53 | name = m.group(1) 54 | elif m := re.search(r"cannot destroy '.+?@(.+?)': snapshot has dependent clones", e.stdout): 55 | reason = "cloned" 56 | name = m.group(1) 57 | else: 58 | raise 59 | 60 | logger.info("Snapshot %r on dataset %r is %s, skipping", name, dataset, reason) 61 | names.discard(name) 62 | -------------------------------------------------------------------------------- /zettarepl/snapshot/empty.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import defaultdict 3 | import logging 4 | 5 | from zettarepl.dataset.exclude import should_exclude 6 | from zettarepl.dataset.list import list_datasets 7 | from zettarepl.dataset.relationship import is_child 8 | from zettarepl.snapshot.task.task import PeriodicSnapshotTask 9 | from zettarepl.snapshot.snapshot import Snapshot 10 | from zettarepl.transport.interface import ExecException, Shell 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | __all__ = ["get_empty_snapshots_for_deletion"] 15 | 16 | 17 | def get_empty_snapshots_for_deletion(shell: Shell, tasks_with_snapshot_names: [(PeriodicSnapshotTask, str)]): 18 | datasets = list_datasets(shell) 19 | 20 | datasets__allow_empty = defaultdict(list) 21 | datasets__snapshots = defaultdict(list) 22 | for task, snapshot_name in tasks_with_snapshot_names: 23 | for snapshot in get_task_snapshots(datasets, task, snapshot_name): 24 | datasets__allow_empty[snapshot.dataset].append(task.allow_empty) 25 | datasets__snapshots[snapshot.dataset].append(snapshot) 26 | 27 | empty_snapshots = [] 28 | for dataset in [dataset for dataset, allow_empty in datasets__allow_empty.items() if not any(allow_empty)]: 29 | try: 30 | if all(all(is_empty_snapshot(shell, snapshot) for snapshot in datasets__snapshots[ds]) 31 | for ds in datasets if ds == dataset or is_child(ds, dataset)): 32 | empty_snapshots.extend(datasets__snapshots[dataset]) 33 | except ExecException as e: 34 | logger.warning("Failed to check if snapshots for dataset %r are empty, assuming they are is not. Error: %r", 35 | dataset, e) 36 | 37 | return empty_snapshots 38 | 39 | 40 | def get_task_snapshots(datasets: [str], task: PeriodicSnapshotTask, snapshot_name: str): 41 | if task.recursive: 42 | return [ 43 | Snapshot(dataset, snapshot_name) 44 | for dataset in datasets 45 | if is_child(dataset, task.dataset) and not should_exclude(dataset, task.exclude) 46 | ] 47 | else: 48 | return [Snapshot(task.dataset, snapshot_name)] 49 | 50 | 51 | def is_empty_snapshot(shell: Shell, snapshot: Snapshot): 52 | return shell.exec(["zfs", "get", "-H", "-o", "value", "written", str(snapshot)]).strip() == "0" 53 | -------------------------------------------------------------------------------- /zettarepl/snapshot/list.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import defaultdict, OrderedDict 3 | import logging 4 | 5 | from zettarepl.dataset.relationship import is_child 6 | from zettarepl.transport.interface import Shell 7 | from zettarepl.transport.zfscli.exception import DatasetDoesNotExistException, ZfsCliExceptionHandler 8 | 9 | from .snapshot import Snapshot 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | __all__ = ["list_snapshots", "multilist_snapshots", "group_snapshots_by_datasets"] 14 | 15 | 16 | def list_snapshots(shell: Shell, dataset: str, recursive: bool, sort: str = "name") -> [Snapshot]: 17 | args = ["zfs", "list", "-t", "snapshot", "-H", "-o", "name", "-s", sort] 18 | if recursive: 19 | args.extend(["-r"]) 20 | else: 21 | args.extend(["-d", "1"]) 22 | args.append(dataset) 23 | return list(map(lambda s: Snapshot(*s.split("@")), filter(None, shell.exec(args).split("\n")))) 24 | 25 | 26 | def multilist_snapshots(shell: Shell, queries: [(str, bool)], *, ignore_nonexistent=False) -> [Snapshot]: 27 | snapshots = [] 28 | for dataset, recursive in simplify_snapshot_list_queries(queries): 29 | try: 30 | with ZfsCliExceptionHandler(): 31 | dataset_snapshots = list_snapshots(shell, dataset, recursive) 32 | except DatasetDoesNotExistException as e: 33 | if ignore_nonexistent: 34 | continue 35 | 36 | raise 37 | 38 | snapshots.extend(dataset_snapshots) 39 | 40 | return snapshots 41 | 42 | 43 | def simplify_snapshot_list_queries(queries: [(str, bool)]) -> [(str, bool)]: 44 | simple = [] 45 | for dataset, recursive in sorted(queries, key=lambda q: (q[0], 0 if q[1] else 1)): 46 | if recursive: 47 | queries_may_include_this = filter(lambda q: q[1], simple) 48 | else: 49 | queries_may_include_this = simple 50 | 51 | if not any(is_child(dataset, ds) if r else dataset == ds 52 | for ds, r in queries_may_include_this): 53 | simple.append((dataset, recursive)) 54 | 55 | return simple 56 | 57 | 58 | def group_snapshots_by_datasets(snapshots: [Snapshot]) -> {str: [str]}: 59 | datasets = defaultdict(list) 60 | for snapshot in snapshots: 61 | datasets[snapshot.dataset].append(snapshot.name) 62 | return OrderedDict(sorted(datasets.items(), key=lambda t: t[0])) 63 | -------------------------------------------------------------------------------- /zettarepl/snapshot/snapshot.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from collections import namedtuple 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["Snapshot"] 8 | 9 | 10 | class Snapshot(namedtuple("Snapshot", ["dataset", "name"])): 11 | def __str__(self): 12 | return f"{self.dataset}@{self.name}" 13 | -------------------------------------------------------------------------------- /zettarepl/snapshot/task/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/snapshot/task/__init__.py -------------------------------------------------------------------------------- /zettarepl/snapshot/task/nonintersecting_sets.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import os 4 | 5 | from .task import PeriodicSnapshotTask 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["calculate_nonintersecting_sets"] 10 | 11 | 12 | def calculate_nonintersecting_sets(tasks: [PeriodicSnapshotTask]): 13 | sets = [] 14 | for task in tasks: 15 | added = False 16 | for set_tasks in sets: 17 | for set_task in set_tasks: 18 | if tasks_intersect(task, set_task): 19 | set_tasks.append(task) 20 | added = True 21 | break 22 | if added: 23 | break 24 | if not added: 25 | sets.append([task]) 26 | 27 | return sets 28 | 29 | 30 | def tasks_intersect(t1: PeriodicSnapshotTask, t2: PeriodicSnapshotTask): 31 | if t1.dataset == t2.dataset: 32 | return True 33 | 34 | commonprefix = os.path.commonpath([t1.dataset, t2.dataset]) 35 | 36 | # data/work, data/windows 37 | if not commonprefix.endswith("/"): 38 | # data, data/windows 39 | if t1.dataset == commonprefix or t2.dataset == commonprefix: 40 | pass 41 | else: 42 | commonprefix = commonprefix[:commonprefix.rfind("/") + 1] 43 | 44 | # completely different datasets 45 | if commonprefix == "": 46 | return False 47 | 48 | # data/a, data/b 49 | if commonprefix.endswith("/"): 50 | return False 51 | 52 | t1, t2 = sorted([t1, t2], key=lambda t: len(t.dataset)) 53 | return t1.recursive 54 | -------------------------------------------------------------------------------- /zettarepl/snapshot/task/snapshot_owner.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | import logging 4 | 5 | from zettarepl.dataset.relationship import belongs_to_tree 6 | from zettarepl.retention.snapshot_owner import SnapshotOwner 7 | from zettarepl.snapshot.name import * 8 | from zettarepl.utils.datetime import idealized_datetime 9 | 10 | from .task import PeriodicSnapshotTask 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | __all__ = ["PeriodicSnapshotTaskSnapshotOwner"] 15 | 16 | 17 | class PeriodicSnapshotTaskSnapshotOwner(SnapshotOwner): 18 | def __init__(self, now: datetime, periodic_snapshot_task: PeriodicSnapshotTask): 19 | self.idealized_now = idealized_datetime(now) 20 | self.periodic_snapshot_task = periodic_snapshot_task 21 | 22 | def get_naming_schemas(self): 23 | return [self.periodic_snapshot_task.naming_schema] 24 | 25 | def owns_dataset(self, dataset: str): 26 | return belongs_to_tree(dataset, self.periodic_snapshot_task.dataset, self.periodic_snapshot_task.recursive, 27 | self.periodic_snapshot_task.exclude) 28 | 29 | def owns_snapshot(self, dataset: str, parsed_snapshot_name: ParsedSnapshotName): 30 | return self.periodic_snapshot_task.schedule.should_run(parsed_snapshot_name.datetime) 31 | 32 | def wants_to_delete(self): 33 | return True 34 | 35 | def should_retain(self, dataset: str, parsed_snapshot_name: ParsedSnapshotName): 36 | delete_before = self.idealized_now - self.periodic_snapshot_task.lifetime 37 | return idealized_datetime(parsed_snapshot_name.datetime) > delete_before 38 | 39 | def __repr__(self): 40 | return f"<{self.__class__.__name__} {self.periodic_snapshot_task.id!r}>" 41 | -------------------------------------------------------------------------------- /zettarepl/snapshot/task/task.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import timedelta 3 | import logging 4 | 5 | import isodate 6 | 7 | from zettarepl.definition.schema import periodic_snapshot_task_validator 8 | from zettarepl.scheduler.cron import CronSchedule 9 | from zettarepl.snapshot.name import validate_snapshot_naming_schema 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | __all__ = ["PeriodicSnapshotTask"] 14 | 15 | 16 | class PeriodicSnapshotTask: 17 | def __init__(self, id, dataset: str, recursive: bool, exclude: [str], lifetime: timedelta, 18 | naming_schema: str, schedule: CronSchedule, allow_empty: bool): 19 | self.id = id 20 | self.dataset = dataset 21 | self.recursive = recursive 22 | self.exclude = exclude 23 | self.lifetime = lifetime 24 | self.naming_schema = naming_schema 25 | self.schedule = schedule 26 | self.allow_empty = allow_empty 27 | 28 | validate_snapshot_naming_schema(self.naming_schema) 29 | 30 | def __repr__(self): 31 | return f"" 32 | 33 | @classmethod 34 | def from_data(cls, id, data): 35 | periodic_snapshot_task_validator.validate(data) 36 | 37 | data.setdefault("exclude", []) 38 | data.setdefault("allow-empty", True) 39 | 40 | if "lifetime" in data: 41 | lifetime = isodate.parse_duration(data["lifetime"]) 42 | else: 43 | # timedelta.max is not good here because operations with it would result in 44 | # OverflowError: date value out of range 45 | lifetime = timedelta(days=36500) 46 | 47 | return cls( 48 | id, data["dataset"], data["recursive"], data["exclude"], lifetime, 49 | data["naming-schema"], CronSchedule.from_data(data["schedule"]), data["allow-empty"]) 50 | -------------------------------------------------------------------------------- /zettarepl/transport/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/transport/__init__.py -------------------------------------------------------------------------------- /zettarepl/transport/compare.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from .base_ssh import BaseSshTransport 5 | from .interface import Transport 6 | from .local import LocalTransport 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | __all__ = ["are_same_host"] 11 | 12 | 13 | def are_same_host(t1: Transport, t2: Transport): 14 | if isinstance(t1, LocalTransport) and isinstance(t2, LocalTransport): 15 | return True 16 | 17 | if isinstance(t1, BaseSshTransport) and isinstance(t2, BaseSshTransport): 18 | return t1.hostname == t2.hostname 19 | 20 | return False 21 | -------------------------------------------------------------------------------- /zettarepl/transport/create.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from .local import LocalTransport 5 | from .ssh import SshTransport 6 | from .ssh_netcat import SshNetcatTransport 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | __all__ = ["create_transport"] 11 | 12 | 13 | def create_transport(data): 14 | return { 15 | "local": LocalTransport, 16 | "ssh": SshTransport, 17 | "ssh+netcat": SshNetcatTransport, 18 | }[data.pop("type")].from_data(data) 19 | -------------------------------------------------------------------------------- /zettarepl/transport/encryption_context.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import io 3 | import json 4 | import logging 5 | import random 6 | import string 7 | 8 | from zettarepl.replication.error import ReplicationError 9 | from zettarepl.replication.task.encryption import KeyFormat 10 | from zettarepl.transport.interface import ExecException 11 | 12 | logger = logging.getLogger(__name__) 13 | 14 | __all__ = ["EncryptionContext"] 15 | 16 | 17 | class EncryptionContext: 18 | def __init__(self, replication_process, shell): 19 | self.replication_process = replication_process 20 | self.shell = shell 21 | 22 | self.tmp_key_location = None 23 | 24 | def enter(self): 25 | if self.replication_process.encryption.inherit: 26 | return ["encryption"], {} 27 | else: 28 | if self.replication_process.encryption.key_location == "$TrueNAS": 29 | self.tmp_key_location = "/tmp/zettarepl-key-" + ( 30 | "".join([random.choice(string.ascii_letters + string.digits) for _ in range(32)]) 31 | ) 32 | key_location = self.tmp_key_location 33 | else: 34 | key_location = self.replication_process.encryption.key_location 35 | 36 | self.shell.put_file(io.BytesIO(self.replication_process.encryption.key.encode("utf-8")), key_location) 37 | 38 | return [], { 39 | "encryption": "on", 40 | "keyformat": self.replication_process.encryption.key_format.value, 41 | "keylocation": f"file://{key_location}" 42 | } 43 | 44 | def exit(self, success): 45 | if self.tmp_key_location is not None: 46 | self.shell.exec(["rm", self.tmp_key_location]) 47 | 48 | if ( 49 | success and 50 | self.replication_process.encryption.key_location == "$TrueNAS" and 51 | self.replication_process.encryption.key_format != KeyFormat.PASSPHRASE 52 | ): 53 | try: 54 | self.shell.exec(["midclt", "call", "pool.dataset.insert_or_update_encrypted_record", json.dumps({ 55 | "name": self.replication_process.target_dataset, 56 | "encryption_key": self.replication_process.encryption.key, 57 | "key_format": self.replication_process.encryption.key_format.value.upper(), 58 | })]) 59 | except ExecException as e: 60 | if e.returncode == 127: # sh: midclt: not found 61 | raise ReplicationError( 62 | 'Replication is configured to store target system dataset encryption key in the TrueNAS ' 63 | 'database, but target system is not a TrueNAS system. Please, use different target dataset ' 64 | 'encryption settings.' 65 | ) from None 66 | 67 | raise 68 | -------------------------------------------------------------------------------- /zettarepl/transport/progress_report_mixin.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import re 4 | import threading 5 | 6 | from zettarepl.utils.shlex import implode 7 | 8 | from .interface import * 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | __all__ = ["ProgressReportMixin"] 13 | 14 | 15 | def parse_zfs_progress(s): 16 | m = re.search( 17 | r"zfs: sending (?P.+) \([0-9]+%: (?P[0-9.]+[KMGT]?)/(?P[0-9.]+[KMGT]?)\)", 18 | s, 19 | ) 20 | if m: 21 | current = parse_zfs_progress_value(m.group("current")) 22 | total = parse_zfs_progress_value(m.group("total")) 23 | return current, total 24 | 25 | 26 | def parse_zfs_progress_value(s): 27 | multiplier = 1 28 | if s.endswith("K"): 29 | multiplier = 1000 30 | s = s[:-1] 31 | elif s.endswith("M"): 32 | multiplier = 1000000 33 | s = s[:-1] 34 | elif s.endswith("G"): 35 | multiplier = 1000000000 36 | s = s[:-1] 37 | elif s.endswith("T"): 38 | multiplier = 1000000000000 39 | s = s[:-1] 40 | 41 | return int(float(s) * multiplier) 42 | 43 | 44 | class ProgressReportMixin: 45 | stop_progress_observer = None 46 | 47 | def _get_send_shell(self): 48 | raise NotImplementedError 49 | 50 | def _send_uses_sudo(self): 51 | raise NotImplementedError 52 | 53 | def _zfs_send_can_report_progress(self): 54 | send_shell = self._get_send_shell() 55 | 56 | try: 57 | send_shell.exec(["zfs", "send", "-V"]) 58 | except ExecException as e: 59 | if "missing snapshot argument" in e.stdout: 60 | # Option is supported (patched zfs on FreeNAS) 61 | return True 62 | else: 63 | # invalid option 'V' 64 | return False 65 | else: 66 | return False 67 | 68 | def _wrap_send(self, send): 69 | return ["sh", "-c", "(" + implode(send) + " & PID=$!; echo \"zettarepl: zfs send PID is $PID\" 1>&2; " 70 | "wait $PID)"] 71 | 72 | def _start_progress_observer(self): 73 | self.stop_progress_observer = threading.Event() 74 | 75 | try: 76 | pid = self.async_exec.head(self._get_zettarepl_pid, 10) 77 | except TimeoutError: 78 | raise TimeoutError("Timeout waiting for `zfs send` to start") 79 | 80 | threading.Thread(daemon=True, name=f"{threading.current_thread().name}.progress_observer", 81 | target=self._progress_observer, args=(pid,)).start() 82 | 83 | def _stop_progress_observer(self): 84 | if self.stop_progress_observer: 85 | self.stop_progress_observer.set() 86 | 87 | def _get_zettarepl_pid(self, line): 88 | m = re.match("zettarepl: zfs send PID is ([0-9]+)", line.strip()) 89 | if m: 90 | return int(m.group(1)) 91 | 92 | def _progress_observer(self, pid): 93 | try: 94 | send_shell = self._get_send_shell() 95 | 96 | while True: 97 | if self.stop_progress_observer.wait(10): 98 | return 99 | 100 | try: 101 | s = send_shell.exec(["ps", "-o", "command", "--ppid" if self._send_uses_sudo() else "-p", str(pid)]) 102 | except ExecException as e: 103 | if e.returncode == 1 and e.stdout.strip() == "COMMAND": 104 | logger.debug("zfs send with PID %r is gone", pid) 105 | return 106 | 107 | raise 108 | 109 | if progress := parse_zfs_progress(s): 110 | current, total = progress 111 | if total == 0: 112 | total = current + 1 113 | self.notify_progress_observer(current, total) 114 | else: 115 | logger.debug("Unable to find ZFS send progress in %r", s) 116 | except Exception: 117 | logger.error("Unhandled exception in progress observer", exc_info=True) 118 | -------------------------------------------------------------------------------- /zettarepl/transport/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import hashlib 3 | import logging 4 | import os 5 | import typing 6 | 7 | from .encryption_context import EncryptionContext 8 | from .interface import ReplicationProcess, Shell 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | __all__ = ["get_properties_exclude_override", "put_file"] 13 | 14 | 15 | def get_properties_exclude_override(process: ReplicationProcess, encryption_context: EncryptionContext): 16 | properties_exclude = [] 17 | properties_override = {} 18 | 19 | if encryption_context: 20 | context_properties_exclude, context_properties_override = encryption_context.enter() 21 | properties_exclude += context_properties_exclude 22 | properties_override.update(**context_properties_override) 23 | 24 | for property in process.properties_exclude: 25 | if property not in properties_exclude: 26 | properties_exclude.append(property) 27 | properties_override.update(process.properties_override) 28 | 29 | return properties_exclude, properties_override 30 | 31 | 32 | def put_file(name, shell: Shell): 33 | local_path = os.path.join(os.path.dirname(__file__), "..", name) 34 | with open(local_path, "rb") as f: 35 | md5 = hashlib.md5(f.read()).hexdigest() 36 | f.seek(0) 37 | 38 | remote_path = f"/tmp/zettarepl--{name.replace('/', '--')}--{md5}" 39 | if not shell.exists(remote_path): 40 | shell.put_file(f, remote_path) 41 | 42 | return remote_path 43 | 44 | 45 | def put_buffer(buffer: typing.IO[bytes], name: str, shell: Shell): 46 | buffer.seek(0) 47 | md5 = hashlib.md5(buffer.read()).hexdigest() 48 | buffer.seek(0) 49 | 50 | remote_path = f"/tmp/zettarepl--{name.replace('/', '--')}--{md5}" 51 | if not shell.exists(remote_path): 52 | shell.put_file(buffer, remote_path) 53 | 54 | return remote_path 55 | -------------------------------------------------------------------------------- /zettarepl/transport/zfscli/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | from .exception import ZfsCliExceptionHandler 5 | from .parse import zfs_bool 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | __all__ = ["zfs_send", "zfs_recv", "get_receive_resume_token", "get_properties_recursive", "get_properties", 10 | "get_property"] 11 | 12 | 13 | def zfs_send(source_dataset: str, 14 | snapshot: str, 15 | properties: bool, 16 | replicate: bool, 17 | incremental_base: str, 18 | include_intermediate: bool, 19 | receive_resume_token: str, 20 | dedup: bool, 21 | large_block: bool, 22 | embed: bool, 23 | compressed: bool, 24 | raw: bool, 25 | report_progress=False): 26 | send = ["zfs", "send"] 27 | 28 | if embed: 29 | send.append("-e") 30 | 31 | if report_progress: 32 | send.append("-V") 33 | 34 | if receive_resume_token is None: 35 | assert snapshot is not None 36 | 37 | if replicate: 38 | send.append("-R") 39 | else: 40 | if properties: 41 | send.append("-p") 42 | 43 | if raw: 44 | send.append("-w") 45 | 46 | if incremental_base is not None: 47 | if include_intermediate: 48 | send.append("-I") 49 | else: 50 | send.append("-i") 51 | 52 | send.append(f"{source_dataset}@{incremental_base}") 53 | 54 | if dedup: 55 | send.append("-D") 56 | 57 | if large_block: 58 | send.append("-L") 59 | 60 | if compressed: 61 | send.append("-c") 62 | 63 | send.append(f"{source_dataset}@{snapshot}") 64 | else: 65 | assert snapshot is None 66 | assert incremental_base is None 67 | 68 | send.extend(["-t", receive_resume_token]) 69 | 70 | return send 71 | 72 | 73 | def zfs_recv(target_dataset, mount: bool, properties_exclude: [str], properties_override: {str: str}): 74 | result = ["zfs", "recv", "-s", "-F"] 75 | 76 | if not mount: 77 | result.append("-u") 78 | 79 | result.extend(sum([["-x", property] for property in properties_exclude], [])) 80 | result.extend(sum([["-o", f"{property}={value}"] for property, value in properties_override.items()], [])) 81 | 82 | result.append(target_dataset) 83 | 84 | return result 85 | 86 | 87 | def get_receive_resume_token(shell, dataset): 88 | return get_property(shell, dataset, "receive_resume_token") 89 | 90 | 91 | def get_properties_recursive(shell, datasets, properties: {str: type}, include_source: bool = False, 92 | recursive: bool = False): 93 | with ZfsCliExceptionHandler(): 94 | cmd = ["zfs", "get", "-H", "-p", "-t", "filesystem,volume"] 95 | if recursive: 96 | cmd.append("-r") 97 | cmd.append(",".join(properties.keys())) 98 | cmd.extend(datasets) 99 | output = shell.exec(cmd) 100 | 101 | result = {} 102 | for line in output.strip().split("\n"): 103 | name, property, value, source = line.split("\t", 3) 104 | result.setdefault(name, {}) 105 | result[name][property] = parse_property(value, properties[property]) 106 | if include_source: 107 | result[name][property] = result[name][property], source 108 | 109 | return result 110 | 111 | 112 | def get_properties(shell, dataset, properties: {str: type}, include_source: bool = False): 113 | return get_properties_recursive(shell, [dataset], properties, include_source)[dataset] 114 | 115 | 116 | def get_property(shell, dataset, property, type=str, include_source: bool = False): 117 | return get_properties(shell, dataset, {property: type}, include_source)[property] 118 | 119 | 120 | def parse_property(value, type): 121 | if type == bool: 122 | type = zfs_bool 123 | 124 | if value == "-": 125 | return None 126 | 127 | return type(value) 128 | -------------------------------------------------------------------------------- /zettarepl/transport/zfscli/parse.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = ["zfs_bool"] 7 | 8 | 9 | def zfs_bool(v): 10 | return { 11 | "off": False, 12 | "on": True, 13 | 14 | "no": False, 15 | "yes": True, 16 | }[v] 17 | -------------------------------------------------------------------------------- /zettarepl/transport/zfscli/warning.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import re 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["warnings_from_zfs_success"] 8 | 9 | 10 | def warnings_from_zfs_success(stdout): 11 | if re.search(r"cannot receive .+ property", stdout): 12 | return [stdout.rstrip("\n")] 13 | 14 | return [] 15 | -------------------------------------------------------------------------------- /zettarepl/truenas/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/truenas/__init__.py -------------------------------------------------------------------------------- /zettarepl/truenas/removal_dates.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = ["get_removal_dates"] 7 | 8 | 9 | def get_removal_dates(): 10 | from truenas_api_client import Client 11 | with Client() as c: 12 | return c.call("zettarepl.get_removal_dates") 13 | -------------------------------------------------------------------------------- /zettarepl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = [] 7 | -------------------------------------------------------------------------------- /zettarepl/utils/datetime.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | from datetime import datetime 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["idealized_datetime"] 8 | 9 | 10 | def idealized_datetime(d: datetime): 11 | return d.replace(second=0, microsecond=0, tzinfo=None) 12 | -------------------------------------------------------------------------------- /zettarepl/utils/itertools.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import itertools 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["bisect", "bisect_by_class", "select_by_class", "sortedgroupby"] 8 | 9 | 10 | def bisect(condition, iterable): 11 | a = [] 12 | b = [] 13 | for val in iterable: 14 | if condition(val): 15 | a.append(val) 16 | else: 17 | b.append(val) 18 | 19 | return a, b 20 | 21 | 22 | def bisect_by_class(klass, iterable): 23 | return bisect(lambda v: isinstance(v, klass), iterable) 24 | 25 | 26 | def select_by_class(klass, iterable): 27 | return list(filter(lambda v: isinstance(v, klass), iterable)) 28 | 29 | 30 | def sortedgroupby(iterable, key, comparable=True): 31 | return [ 32 | (a, list(b)) 33 | for a, b in itertools.groupby( 34 | sorted(iterable, key=key if comparable else lambda v: hash(key(v))), 35 | key=key 36 | ) 37 | ] 38 | -------------------------------------------------------------------------------- /zettarepl/utils/lang.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | __all__ = ["undefined"] 7 | 8 | undefined = object() 9 | -------------------------------------------------------------------------------- /zettarepl/utils/logging.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import os 4 | import re 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["LongStringsFilter", "ReplicationTaskLoggingLevelFilter", "logging_record_replication_task", 9 | "PrefixLoggerAdapter"] 10 | 11 | 12 | class LongStringsFilter(logging.Filter): 13 | def __init__(self, name=""): 14 | super().__init__(name) 15 | 16 | self.max_string_length = int(os.environ.get("LOGGING_MAX_STRING_LENGTH", "512")) 17 | 18 | def filter(self, record): 19 | record.args = self._process(record.args) 20 | return True 21 | 22 | def _process(self, value): 23 | if isinstance(value, dict): 24 | return {k: self._process(v) for k, v in value.items()} 25 | 26 | if isinstance(value, list): 27 | return list(map(self._process, value)) 28 | 29 | if isinstance(value, tuple): 30 | return tuple(map(self._process, value)) 31 | 32 | if self.max_string_length: 33 | if isinstance(value, bytes): 34 | placeholder = b"...." 35 | elif isinstance(value, str): 36 | placeholder = "...." 37 | else: 38 | return value 39 | 40 | if len(value) <= self.max_string_length: 41 | return value 42 | 43 | return ( 44 | value[:int((self.max_string_length - 4) / 2)] + 45 | placeholder + 46 | value[-int((self.max_string_length - 4) / 2):] 47 | ) 48 | 49 | return value 50 | 51 | 52 | class ReplicationTaskLoggingLevelFilter(logging.Filter): 53 | levels = {} 54 | 55 | def __init__(self, default_level=logging.NOTSET): 56 | self.default_level = default_level 57 | super().__init__() 58 | 59 | def filter(self, record: logging.LogRecord): 60 | task_id = logging_record_replication_task(record) 61 | if task_id is not None: 62 | if task_id in self.levels: 63 | if self.levels[task_id] != logging.NOTSET: 64 | return record.levelno >= self.levels[task_id] 65 | else: 66 | logger.debug("I don't have logging level for task %r", task_id) 67 | 68 | return record.levelno >= self.default_level 69 | 70 | 71 | def logging_record_replication_task(record: logging.LogRecord): 72 | m1 = re.match("replication_task__([^.]+)", record.threadName) 73 | m2 = re.match("zettarepl\.paramiko\.replication_task__([^.]+)", record.name) 74 | if m1 or m2: 75 | if m1: 76 | return m1.group(1) 77 | else: 78 | return m2.group(1) 79 | 80 | 81 | class PrefixLoggerAdapter(logging.LoggerAdapter): 82 | def __init__(self, logger, prefix): 83 | super().__init__(logger, {"prefix": prefix}) 84 | 85 | def process(self, msg, kwargs): 86 | return f"[{self.extra['prefix']}] {msg}", kwargs 87 | -------------------------------------------------------------------------------- /zettarepl/utils/re.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import re 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["re_search_to"] 8 | 9 | 10 | def re_search_to(m: dict, *args, **kwargs): 11 | result = re.search(*args, **kwargs) 12 | if result: 13 | m[0] = result.group(0) 14 | m.update({i + 1: v for i, v in enumerate(result.groups())}) 15 | m.update(result.groupdict()) 16 | return True 17 | else: 18 | return False 19 | -------------------------------------------------------------------------------- /zettarepl/utils/shlex.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import shlex 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | __all__ = ["implode", "pipe"] 8 | 9 | 10 | def implode(args): 11 | return " ".join([shlex.quote(arg) for arg in args]) 12 | 13 | 14 | class pipe: 15 | def __init__(self, *cmds): 16 | self.cmds = cmds 17 | 18 | def __iter__(self): 19 | # We need to enable pipefail because sometimes `zfs recv` can exit with successful exit code while it has done 20 | # nothing. 21 | # But we can't just run `sh -o pipefail` because it's not present everywhere (e.g. it's not present in dash). 22 | 23 | # We'll generate some shell code and eval it. `eval` evaluates what's in the stdout, 24 | # so we'll redirect stdout to 3 in subshells and redirect 3 to stdout in parent shell. 25 | command = "exec 3>&1; " 26 | 27 | # What's inside will print text like: 28 | # pipestatus0=1 29 | # pipestatus1=0 30 | # pipestatus2=0 31 | # We'll eval that to get these variables in our scope 32 | command += "eval $(" 33 | 34 | # We'll print 'pipestatusX=Y' to 4, and we'll to pass it to eval through stdout 35 | # We'll redirect real stdout to 3, parent shell will print it back to stdout 36 | # We'll close fd 3 because we don't need it 37 | command += "exec 4>&1 >&3 3>&-; " 38 | 39 | command += " | ".join([f"{{ {implode(args)} 4>&-; echo \"pipestatus{i}=$?;\" >&4; }}" 40 | for i, args in enumerate(self.cmds)]) 41 | 42 | # close eval 43 | command += "); " 44 | 45 | # Fail with exit code of the first failed command 46 | command += "; ".join([f"[ $pipestatus{i} -ne 0 ] && exit $pipestatus{i}" for i in range(len(self.cmds))]) + "; " 47 | 48 | # No command failed 49 | command += "exit 0" 50 | 51 | return iter(["sh", "-c", command]) 52 | 53 | def __repr__(self): 54 | return f"Pipe({self.cmds!r})" 55 | -------------------------------------------------------------------------------- /zettarepl/zcp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/truenas/zettarepl/1345a696653c313ef75901018fd9edf9b4fc29bf/zettarepl/zcp/__init__.py -------------------------------------------------------------------------------- /zettarepl/zcp/recursive_snapshot_exclude.lua: -------------------------------------------------------------------------------- 1 | snapshots_to_create = {} 2 | function populate_snapshots_to_create(dataset) 3 | table.insert(snapshots_to_create, dataset .. "@" .. snapshot_name) 4 | 5 | local iterator = zfs.list.children(dataset) 6 | while true do 7 | local child = iterator() 8 | if child == nil then 9 | break 10 | end 11 | 12 | local include = true 13 | for _, excl in ipairs(excluded_datasets) do 14 | if child == excl then 15 | include = false 16 | break 17 | end 18 | end 19 | if include then 20 | populate_snapshots_to_create(child) 21 | end 22 | end 23 | end 24 | populate_snapshots_to_create(dataset) 25 | 26 | errors = {} 27 | for _, snapshot in ipairs(snapshots_to_create) do 28 | local error = zfs.check.snapshot(snapshot) 29 | if (error ~= 0) then 30 | table.insert(errors, "snapshot=" .. snapshot .. " error=" .. tostring(error)) 31 | end 32 | end 33 | 34 | if (#errors ~= 0) then 35 | error(table.concat(errors, ", ")) 36 | end 37 | 38 | for _, snapshot in ipairs(snapshots_to_create) do 39 | assert(zfs.sync.snapshot(snapshot) == 0) 40 | end 41 | -------------------------------------------------------------------------------- /zettarepl/zcp/render_zcp.py: -------------------------------------------------------------------------------- 1 | # -*- coding=utf-8 -*- 2 | import logging 3 | import os 4 | import typing 5 | 6 | logger = logging.getLogger(__name__) 7 | 8 | __all__ = ["render_zcp"] 9 | 10 | with open(os.path.join(os.path.dirname(__file__), "recursive_snapshot_exclude.lua")) as f: 11 | zcp_program = f.read() 12 | 13 | 14 | def render_vars(buffer: typing.IO[bytes], dataset: str, snapshot_name: str, excluded_datasets: typing.Iterable): 15 | buffer.write(f'dataset = "{dataset}"\n'.encode("utf-8")) 16 | buffer.write(f'snapshot_name = "{snapshot_name}"\n'.encode("utf-8")) 17 | buffer.write('excluded_datasets = {'.encode("utf-8")) 18 | 19 | for excluded_dataset in excluded_datasets: 20 | buffer.write(f'"{excluded_dataset}", '.encode("utf-8")) 21 | 22 | buffer.write('}\n'.encode("utf-8")) 23 | 24 | 25 | def render_zcp(buffer: typing.IO[bytes], dataset: str, snapshot_name: str, excluded_datasets: typing.Iterable): 26 | render_vars(buffer, dataset, snapshot_name, excluded_datasets) 27 | buffer.write(zcp_program.encode("utf-8")) 28 | --------------------------------------------------------------------------------