├── .gitignore ├── LICENSE ├── README.md ├── debian ├── README ├── changelog ├── compat ├── control ├── copyright ├── docs ├── rules └── source │ └── format ├── setup.py └── whisperbackup ├── __init__.py ├── disk.py ├── fill.py ├── gcs.py ├── multiprocessinglog.py ├── noop.py ├── pycronscript.py ├── s3.py ├── swift.py └── whisperbackup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | *.pyo 4 | build/ 5 | whisper_backup.egg-info/ 6 | test/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | whisper-backup 2 | =============== 3 | 4 | I needed a handy way to backup my Graphite cluster to reliable storage such as 5 | Amazon S3, Google Cloud Storage, or OpenStack Swift. Also, the ability to 6 | restore that data in a sane away. Hence, I wrote `whisper-backup`. 7 | 8 | Examples 9 | -------- 10 | 11 | Backup: 12 | ``` 13 | $ whisper-backup --logfile /opt/graphite/storage/log/whisper-backup/whisper-backup.log \ 14 | --bucket $(hostname -s) \ 15 | --retention 5 \ 16 | --quiet \ 17 | backup swift 18 | ``` 19 | 20 | Restore: 21 | ``` 22 | $ whisper-backup --logfile /opt/graphite/storage/log/whisper-backup/whisper-backup.log \ 23 | --bucket $(hostname -s) \ 24 | --prefix /data/tmp \ 25 | restore swift 26 | ``` 27 | 28 | Goals 29 | ----- 30 | 31 | * Compress WSP data. Space is cash and they compress well. Gzip and Snappy 32 | compression is supported. 33 | * Support storing multiple backups of the same WSP DB over a retention 34 | period. 35 | * Be able to restore and backup all or part of an existing tree of metrics. 36 | * Don't waste space on duplicate WSP files. 37 | * Verify restored data. 38 | * Allow for manual restores if needed, we simply store Gzip/Snappy versions of 39 | the WSP files in our storage backend. 40 | * Support multiple storage backends. 41 | * Use `flock()` (the same locking method that Whisper uses) to lock each DB 42 | file before uploading it. This ensures we have a copy that wasn't in the 43 | middle of a file update procedure. You have your carbon-cache daemons 44 | set to use locking, right? 45 | * On restore, if the WSP file already exists just backfill in the data 46 | rather than overwrite it. 47 | * File space for temp file copies is limited and is definitely not 48 | large enough to fit an entire backup set into. 49 | * Use multiprocessing to handle large backup sets faster. 50 | 51 | Usage 52 | ----- 53 | 54 | I decided not to design this to store multiple servers worth of WSP files in 55 | a single bucket/container of the storage service. For large clusters this 56 | could be millions of files which may cause slowness with the API and other 57 | issues. So if you have multiple servers you should set each machine to backup 58 | to its own bucket/container. 59 | 60 | ``` 61 | Usage: whisperbackup.py [options] backup|restore|purge|list disk|gcs|noop|s3|swift [storage args] 62 | 63 | Options: 64 | -p PREFIX, --prefix=PREFIX 65 | Root of where the whisper files live or will be 66 | restored to, default /opt/graphite/storage/whisper 67 | -f PROCESSES, --processes=PROCESSES 68 | Number of worker processes to spawn, default 4 69 | -r RETENTION, --retention=RETENTION 70 | Number of unique backups to retain for each whisper 71 | file, default 5 72 | -x PURGE, --purge=PURGE 73 | Days to keep unknown Whisper file backups, -1 74 | disables, default 45 75 | -n, --noop Do not modify the object store, default False 76 | -b BUCKET, --bucket=BUCKET 77 | The AWS S3 bucket name or Swift container to use, 78 | default graphite-backups 79 | -m METRICS, --metrics=METRICS 80 | Glob pattern of metric names to backup or restore, 81 | default * 82 | -c DATE, --date=DATE String in ISO-8601 date format. The last backup before 83 | this date will be used during the restore. Default is 84 | now or 2019-09-30T17:52:51+00:00. 85 | -a ALGORITHM, --algorithm=ALGORITHM 86 | Compression format to use based on installed Python 87 | modules. Choices: gz, sz 88 | --storage-path=STORAGE_PATH 89 | Path in the bucket to store the backup, default 90 | -d, --debug Minimum log level of DEBUG 91 | -q, --quiet Only WARN and above to stdout 92 | --nolog Do not log to LOGFILE 93 | --logfile=LOGFILE File to log to, default /var/log/whisperbackup.py.log 94 | --syslog Log to syslog instead of a file 95 | --nolock Do not use a lockfile 96 | --lockfile=LOCKFILE Lock file, default /var/lock/whisperbackup.py 97 | --nostamp Do not use a success stamp file 98 | --stampfile=STAMPFILE 99 | Success stamp file, default 100 | /var/tmp/whisperbackup.py.success 101 | --locktimeout=LOCKTIMEOUT 102 | Lock timeout in seconds, default 90 103 | --splay=SPLAY Sleep a random time between 0 and N seconds before 104 | starting, default 0 105 | -h, --help show this help message and exit 106 | 107 | ``` 108 | 109 | Notes: 110 | * Purge removes Whisper backups in the datastore for Whisper files not 111 | presently on the server. Such as deleted or moved Whisper files. A setting 112 | of 0 will immediately purge backups for metrics not on the local disk, 113 | -1 will disable purge. 114 | 115 | Compression Algorithms and Notes 116 | -------------------------------- 117 | 118 | Historically this tool has compressed Whisper files with Python's gzip 119 | implementation. This was done so that the compressed files could be manually 120 | pulled and restored if needed. All the compressed Whisper files were readable 121 | by the `gunzip` utility. 122 | 123 | Gzip offers reasonable compression, but is quite slow. If a Graphite cluster 124 | has many Whisper files, this backup utility would take hours or days to 125 | complete a backup cycle due to the time spend gzipping each Whisper file. 126 | Due to this whisper-backup now supports multiple compression algorithms. 127 | 128 | Each supported algorithm is identified by its file name suffix: 129 | 130 | * Gzip (default): `gz` 131 | * Google Snappy: `sz` 132 | 133 | On a test Graphite data node with only a few thousand metrics, using Gzip 134 | made a runtime of 73+ minutes to complete a backup cycle. With Snappy that 135 | dropped to 8 minutes. 136 | 137 | To decompress a `*.sz` file manually you can use the python-snappy module 138 | that whisper-backup uses: 139 | 140 | python -m snappy -d compressed.wsp.sz cleartext.wsp 141 | 142 | You can compress as well with the `-c` option rather than `-d`. Any tool 143 | that supports the [Snappy Framing Format][1] should be able to decompress 144 | these files. 145 | 146 | Requirements 147 | ------------ 148 | 149 | Required Python packages and the versions of which I've tested with. 150 | 151 | * whisper >= 0.9.12 152 | * carbon >= 0.9.12 153 | * lockfile 154 | 155 | Storage Backends and Requirements 156 | --------------------------------- 157 | 158 | ### Google Snappy Compression 159 | 160 | Installing the `snappy` Python module will enable support in whisper-backup. 161 | 162 | $ pip install snappy 163 | 164 | Some distributions may package this as `python-snappy`. 165 | 166 | ### AWS S3 Backend 167 | 168 | The `boto` package must be installed. 169 | 170 | $ pip install boto 171 | 172 | Make sure your AWS credentials are set so that Boto will find them. This is 173 | normally setting the environment variables `AWS_ACCESS_KEY_ID` and 174 | `AWS_SECRET_ACCESS_KEY`. 175 | 176 | ### OpenStack Swift Backend 177 | 178 | Make sure the `swiftclient` Python package is installed that is version 3.0.0 179 | or better. 180 | 181 | $ pip install swiftclient 182 | 183 | Set the environment variables `ST_AUTH`, `ST_USER`, and `ST_KEY` for 184 | authentication to your Swift endpoint. 185 | 186 | ### Google Cloud Storage Backend 187 | 188 | The `google-cloud-storage` Python package must be installed. 189 | 190 | $ pip install google-cloud-storage 191 | 192 | This uses the Google's default application credentials system to locate 193 | credentials to use. If this is running in GCP the service account that 194 | the GCE VMs running this code use simply needs the correct access to GCS 195 | buckets. Otherwise the `GOOGLE_APPLICATION_CREDENTIALS` environment variable 196 | should be set to reference the on disk file of GCP credentials. 197 | 198 | Contributions 199 | ------------- 200 | 201 | PRs are welcome. 202 | 203 | To Do 204 | ----- 205 | 206 | * We use multiprocess.Pool for backups, but restores are still single process 207 | * Purge bug: If a metric has been idle for 45 days 208 | then the backup date on that file in the object store hasn't changed. So 209 | once that metric is removed from local disk it will be immediately removed 210 | from the object store rather than 45 days after it was removed from local 211 | disk. 212 | * Signal handler or Control-C to terminate all processes. 213 | 214 | [1]: https://github.com/google/snappy/blob/master/framing_format.txt 215 | -------------------------------------------------------------------------------- /debian/README: -------------------------------------------------------------------------------- 1 | The Debian Package whisper-backup 2 | ---------------------------- 3 | 4 | Some better packaging techniques could be used here: 5 | 6 | * We depend on python-swift or boto -- one or both. 7 | * We depend on whisper for restores. 8 | 9 | -- Jack Neely Tue, 02 Jun 2015 15:49:25 -0400 10 | -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | whisper-backup (0.3.0-1) unstable; urgency=medium 2 | 3 | * Add Google Cloud Storage support 4 | 5 | -- Jack Neely Tue, 01 Oct 2019 13:34:08 -0400 6 | 7 | whisper-backup (0.2.0-1) unstable; urgency=medium 8 | 9 | * Support Google Snappy compression for faster backup cycles 10 | 11 | -- Jack Neely Wed, 17 May 2017 16:29:37 -0400 12 | 13 | whisper-backup (0.1.3-1) unstable; urgency=medium 14 | 15 | * Fix purges taking for ever to complete. 16 | 17 | -- Jack Neely Tue, 21 Mar 2017 11:38:11 -0400 18 | 19 | whisper-backup (0.1.2-1) unstable; urgency=medium 20 | 21 | * Attempt a 0.1.2 release with some new features that have been 22 | contributed and some logging crazy. 23 | 24 | -- Jack Neely Mon, 20 Mar 2017 16:46:24 -0400 25 | 26 | whisper-backup (0.1.1-1) unstable; urgency=medium 27 | 28 | * Build requiring python-swiftclient >= 3.0.0 which includes support 29 | for timeouts on the HTTP connections 30 | 31 | -- Jack Neely Fri, 13 Jan 2017 15:17:51 -0500 32 | 33 | whisper-backup (0.1.0-1) unstable; urgency=medium 34 | 35 | * Updated pycronscript to 0.2.1 36 | * Old backup purge support 37 | 38 | -- Jack Neely Tue, 18 Aug 2015 11:47:33 -0400 39 | 40 | whisper-backup (0.0.4-1) unstable; urgency=low 41 | 42 | * Initial Release. 43 | 44 | -- Jack Neely Tue, 02 Jun 2015 15:49:25 -0400 45 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: whisper-backup 2 | Section: unknown 3 | Priority: optional 4 | Maintainer: Jack Neely 5 | Build-Depends: debhelper (>= 8.0.0), dh-python, python-all (>= 2.6.6-3~), python-setuptools 6 | Standards-Version: 3.9.4 7 | Homepage: https://github.com/jjneely/whisper-backup 8 | 9 | Package: whisper-backup 10 | Architecture: all 11 | Depends: ${shlibs:Depends}, ${misc:Depends}, python-swiftclient (>= 3.0.0), python-boto 12 | Description: Backup Graphite Whisper files to reliable storage. 13 | Backup Graphite's storage directory full of Whisper files to reliable 14 | storage including AWS S3 and OpenStack Swift. Multi-process and fast 15 | backups. Restores make use of whisper-fill to merge data in with existing 16 | on disk data. 17 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: whisper-backup 3 | Source: https://github.com/jjneely/whisper-backup 4 | 5 | Files: whisperbackup/fill.py 6 | Copyright: Booking.com, Fabian Groffen 7 | Scott Sanders 8 | License: Apache-2.0 9 | 10 | Files: * 11 | Copyright: 2015 Jack Neely 12 | License: Apache-2.0 13 | 14 | License: Apache-2.0 15 | Licensed under the Apache License, Version 2.0 (the "License"); 16 | you may not use this file except in compliance with the License. 17 | You may obtain a copy of the License at 18 | . 19 | http://www.apache.org/licenses/LICENSE-2.0 20 | . 21 | Unless required by applicable law or agreed to in writing, software 22 | distributed under the License is distributed on an "AS IS" BASIS, 23 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 | See the License for the specific language governing permissions and 25 | limitations under the License. 26 | . 27 | On Debian systems, the complete text of the Apache version 2.0 license 28 | can be found in "/usr/share/common-licenses/Apache-2.0". 29 | 30 | -------------------------------------------------------------------------------- /debian/docs: -------------------------------------------------------------------------------- 1 | README.md 2 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | # -*- makefile -*- 3 | 4 | # Uncomment this to turn on verbose mode. 5 | #export DH_VERBOSE=1 6 | export PYBUILD_NAME=whisper-fill 7 | 8 | %: 9 | dh $@ --with python2 --buildsystem=pybuild 10 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2019 42 Lines, Inc. 4 | # Original Author: Jack Neely 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | 19 | setup_args = { 20 | "name": "whisper-backup", 21 | "version": "0.3.0", 22 | "platforms": ["any"], 23 | "description": "Backup whisper DB files into S3 or Swift", 24 | "long_description": """\ 25 | Whisper-backup stores compressed WSP files in Amazon S3 or OpenStack Swift 26 | from a Graphite setup. It can backup and restore selected metric globs, 27 | has a retention policy setting, and does not stage backups on the local 28 | server. 29 | """, 30 | "author": "Jack Neely", 31 | "author_email": "jjneely@42lines.net", 32 | "maintainer": "Jack Neely", 33 | "maintainer_email": "jjneely@42lines.net", 34 | "url": 'https://github.com/jjneely/whisper-backup', 35 | "license": "Apache Software License", 36 | "packages": ["whisperbackup"], 37 | "install_requires": ['lockfile', 'whisper'], 38 | "classifiers": [ 39 | "Development Status :: 4 - Beta", 40 | "Intended Audience :: System Administrators", 41 | "License :: OSI Approved :: Apache Software License", 42 | "Operating System :: OS Independent", 43 | "Programming Language :: Python", 44 | "Topic :: System :: Systems Administration" 45 | ], 46 | "entry_points": { 47 | "console_scripts": [ 48 | "whisper-backup = whisperbackup.whisperbackup:main" 49 | ] 50 | } 51 | } 52 | 53 | try: 54 | from setuptools import setup 55 | except ImportError: 56 | from distutils.core import setup 57 | 58 | setup(**setup_args) 59 | -------------------------------------------------------------------------------- /whisperbackup/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jjneely/whisper-backup/e45fb240c59ff2004968fa1e28b35f7b58b282b2/whisperbackup/__init__.py -------------------------------------------------------------------------------- /whisperbackup/disk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # 4 | # Original Author: Charles Dunbar 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import __main__ 19 | import glob 20 | import logging 21 | import os 22 | 23 | logger = logging.getLogger(__main__.__name__) 24 | 25 | class Disk(object): 26 | 27 | def __init__(self, bucket, noop=False): 28 | 29 | self.bucket = bucket 30 | self.noop = noop 31 | 32 | def list(self, prefix="*/"): 33 | """ Return all keys in this bucket.""" 34 | 35 | list_rep = glob.glob(self.bucket + "/" + prefix + "/*") 36 | for i in list_rep: 37 | # Remove preceding bucket name and potential leading slash from returned key value 38 | i = i.replace(self.bucket, "") 39 | if i[0] == '/': i = i[1:] 40 | yield i 41 | 42 | def get(self, src): 43 | """Return the contents of src from disk as a string.""" 44 | 45 | if not os.path.exists(os.path.dirname(self.bucket + "/" + src)): 46 | return None 47 | k = "" 48 | try: 49 | with open(self.bucket + "/" + src, 'rb') as f: 50 | k = f.read() 51 | except Exception as e: 52 | logger.warning("Exception during get: %s" % str(e)) 53 | return k 54 | 55 | def put(self, dst, data): 56 | """Store the contents of the string data at a key named by dst 57 | on disk.""" 58 | 59 | if self.noop: 60 | logger.info("No-Op Put: %s" % dst) 61 | else: 62 | filename = self.bucket + "/" + dst 63 | if not os.path.exists(os.path.dirname(filename)): 64 | os.makedirs(os.path.dirname(filename)) 65 | try: 66 | with open(self.bucket + "/" + dst, 'wb') as f: 67 | f.write(data) 68 | except Exception as e: 69 | logger.warning("Exception during put: %s" % str(e)) 70 | 71 | 72 | def delete(self, src): 73 | """Delete the object on disk referenced by the key name src.""" 74 | 75 | if self.noop: 76 | logger.info("No-Op Delete: %s" % self.bucket + "/" + src) 77 | else: 78 | logger.info("Trying to delete %s" % self.bucket + "/" + src) 79 | os.remove(self.bucket + "/" + src) 80 | -------------------------------------------------------------------------------- /whisperbackup/fill.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # whisper-fill: unlike whisper-merge, don't overwrite data that's 4 | # already present in the target file, but instead, only add the missing 5 | # data (e.g. where the gaps in the target file are). Because no values 6 | # are overwritten, no data or precision gets lost. Also, unlike 7 | # whisper-merge, try to take the highest-precision archive to provide 8 | # the data, instead of the one with the largest retention. 9 | # Using this script, reconciliation between two replica instances can be 10 | # performed by whisper-fill-ing the data of the other replica with the 11 | # data that exists locally, without introducing the quite remarkable 12 | # gaps that whisper-merge leaves behind (filling a higher precision 13 | # archive with data from a lower precision one) 14 | 15 | # Work performed by Fabian Groffen @grobian while working at Booking.com. 16 | # additional patches are from https://github.com/jssjr/carbonate/ 17 | 18 | from whisper import info, fetch, update_many 19 | 20 | try: 21 | from whisper import operator 22 | HAS_OPERATOR = True 23 | except ImportError: 24 | HAS_OPERATOR = False 25 | 26 | import itertools 27 | import time 28 | import sys 29 | 30 | def itemgetter(*items): 31 | if HAS_OPERATOR: 32 | return operator.itemgetter(*items) 33 | else: 34 | if len(items) == 1: 35 | item = items[0] 36 | 37 | def g(obj): 38 | return obj[item] 39 | else: 40 | 41 | def g(obj): 42 | return tuple(obj[item] for item in items) 43 | return g 44 | 45 | 46 | def fill(src, dst, tstart, tstop): 47 | # fetch range start-stop from src, taking values from the highest 48 | # precision archive, thus optionally requiring multiple fetch + merges 49 | srcHeader = info(src) 50 | 51 | srcArchives = srcHeader['archives'] 52 | srcArchives.sort(key=itemgetter('retention')) 53 | 54 | # find oldest point in time, stored by both files 55 | srcTime = int(time.time()) - srcHeader['maxRetention'] 56 | 57 | if tstart < srcTime and tstop < srcTime: 58 | return 59 | 60 | # we want to retain as much precision as we can, hence we do backwards 61 | # walk in time 62 | 63 | # skip forward at max 'step' points at a time 64 | for archive in srcArchives: 65 | # skip over archives that don't have any data points 66 | rtime = time.time() - archive['retention'] 67 | if tstop <= rtime: 68 | continue 69 | 70 | untilTime = tstop 71 | fromTime = rtime if rtime > tstart else tstart 72 | 73 | (timeInfo, values) = fetch(src, fromTime, untilTime) 74 | (start, end, archive_step) = timeInfo 75 | pointsToWrite = list(itertools.ifilter( 76 | lambda points: points[1] is not None, 77 | itertools.izip(xrange(start, end, archive_step), values))) 78 | # order points by timestamp, newest first 79 | pointsToWrite.sort(key=lambda p: p[0], reverse=True) 80 | update_many(dst, pointsToWrite) 81 | 82 | tstop = fromTime 83 | 84 | # can stop when there's nothing to fetch any more 85 | if tstart == tstop: 86 | return 87 | 88 | 89 | def fill_archives(src, dst, startFrom): 90 | header = info(dst) 91 | archives = header['archives'] 92 | archives = sorted(archives, key=lambda t: t['retention']) 93 | 94 | for archive in archives: 95 | fromTime = time.time() - archive['retention'] 96 | if fromTime >= startFrom: 97 | continue 98 | 99 | (timeInfo, values) = fetch(dst, fromTime, startFrom) 100 | (start, end, step) = timeInfo 101 | gapstart = None 102 | for v in values: 103 | if not v and not gapstart: 104 | gapstart = start 105 | elif v and gapstart: 106 | # ignore single units lost 107 | if (start - gapstart) > archive['secondsPerPoint']: 108 | fill(src, dst, gapstart - step, start) 109 | gapstart = None 110 | elif gapstart and start == end - step: 111 | fill(src, dst, gapstart - step, start) 112 | 113 | start += step 114 | 115 | startFrom = fromTime 116 | 117 | 118 | def main(argv): 119 | if len(argv) != 2: 120 | print("usage: whisper-fill.py src dst"); 121 | print(" copies data from src in dst, if missing") 122 | sys.exit(1) 123 | 124 | src = argv[0] 125 | dst = argv[1] 126 | startFrom = time.time() 127 | 128 | fill_archives(src, dst, startFrom) 129 | 130 | 131 | if __name__ == "__main__": 132 | main(sys.argv[1:]) -------------------------------------------------------------------------------- /whisperbackup/gcs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2019 42 Lines, Inc. 4 | # Original Author: Jack Neely 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import __main__ 19 | import logging 20 | 21 | from google.cloud import storage 22 | 23 | logger = logging.getLogger(__main__.__name__) 24 | 25 | # Google Cloud Storage 26 | class GCS(object): 27 | 28 | def __init__(self, bucket, project="", region="us", noop=False): 29 | """Setup the GCS storage backend with the bucket we will use and 30 | optional region.""" 31 | if project == "": 32 | self.client = storage.Client() 33 | else: 34 | self.client = storage.Client(project) 35 | 36 | self.noop = noop 37 | 38 | self.bucket = storage.Bucket(self.client, bucket) 39 | self.bucket.location = region 40 | self.bucket.storage_class = "STANDARD" 41 | 42 | # Create the bucket if it doesn't exist 43 | if not self.bucket.exists(): 44 | if not noop: 45 | self.bucket.create() 46 | else: 47 | logger.info("No-Op: Create bucket: %s" % bucket) 48 | 49 | def list(self, prefix=""): 50 | """Return all keys in this bucket.""" 51 | for i in self.client.list_blobs(self.bucket, prefix=prefix): 52 | yield i.name 53 | 54 | def get(self, src): 55 | """Return the contents of src from this bucket as a string.""" 56 | obj = storage.blob.Blob(src, self.bucket) 57 | if not obj.exists(): 58 | return None 59 | 60 | return obj.download_as_string() 61 | 62 | def put(self, dst, data): 63 | """Store the contents of the string data at a key named by dst 64 | in GCS.""" 65 | 66 | if self.noop: 67 | logger.info("No-Op Put: %s" % dst) 68 | else: 69 | obj = storage.blob.Blob(dst, self.bucket) 70 | obj.upload_from_string(data, content_type="application/octet-stream") 71 | 72 | def delete(self, src): 73 | """Delete the object in GCP referenced by the key name src.""" 74 | 75 | if self.noop: 76 | logger.info("No-Op Delete: %s" % src) 77 | else: 78 | obj = storage.blob.Blob(dst, self.bucket) 79 | obj.delete() 80 | 81 | -------------------------------------------------------------------------------- /whisperbackup/multiprocessinglog.py: -------------------------------------------------------------------------------- 1 | from logging.handlers import RotatingFileHandler 2 | import multiprocessing, threading, logging, sys, traceback 3 | 4 | class MultiProcessingLog(logging.Handler): 5 | 6 | def __init__(self, filename, mode='a', maxBytes=0, backupCount=0, encoding=None, delay=0): 7 | logging.Handler.__init__(self) 8 | 9 | # In case our call to RotatingFileHandler blows up we first set 10 | # the _handler to None 11 | self._handler = None 12 | self._handler = RotatingFileHandler(filename, mode, maxBytes, backupCount, 13 | encoding, delay) 14 | self.queue = multiprocessing.Queue(-1) 15 | 16 | t = threading.Thread(target=self.receive) 17 | t.daemon = True 18 | t.start() 19 | 20 | def setFormatter(self, fmt): 21 | logging.Handler.setFormatter(self, fmt) 22 | self._handler.setFormatter(fmt) 23 | 24 | def receive(self): 25 | while True: 26 | try: 27 | record = self.queue.get() 28 | self._handler.emit(record) 29 | except (KeyboardInterrupt, SystemExit): 30 | raise 31 | except EOFError: 32 | break 33 | except: 34 | traceback.print_exc(file=sys.stderr) 35 | 36 | def send(self, s): 37 | self.queue.put_nowait(s) 38 | 39 | def _format_record(self, record): 40 | # ensure that exc_info and args 41 | # have been stringified. Removes any chance of 42 | # unpickleable things inside and possibly reduces 43 | # message size sent over the pipe 44 | if record.args: 45 | record.msg = record.msg % record.args 46 | record.args = None 47 | if record.exc_info: 48 | dummy = self.format(record) 49 | record.exc_info = None 50 | 51 | return record 52 | 53 | def emit(self, record): 54 | try: 55 | s = self._format_record(record) 56 | self.send(s) 57 | except (KeyboardInterrupt, SystemExit): 58 | raise 59 | except: 60 | self.handleError(record) 61 | 62 | def close(self): 63 | if self._handler is not None: 64 | self._handler.close() 65 | logging.Handler.close(self) 66 | 67 | 68 | class MultiProcessingLogStream(MultiProcessingLog): 69 | 70 | def __init__(self, stream=None): 71 | logging.Handler.__init__(self) 72 | 73 | # In case our call to StreamHandler blows up we first set 74 | # the _handler to None 75 | self._handler = None 76 | self._handler = logging.StreamHandler(stream) 77 | self.queue = multiprocessing.Queue(-1) 78 | 79 | t = threading.Thread(target=self.receive) 80 | t.daemon = True 81 | t.start() 82 | -------------------------------------------------------------------------------- /whisperbackup/noop.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2014 42 Lines, Inc. 4 | # Original Author: Jack Neely 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import __main__ 19 | import logging 20 | 21 | logger = logging.getLogger(__main__.__name__) 22 | 23 | class NoOP(object): 24 | 25 | def __init__(self, bucket, noop): 26 | """Setup the S3 storage backend with the bucket we will use and 27 | optional region.""" 28 | self.bucket = bucket 29 | self.noop = noop 30 | 31 | def list(self, prefix=""): 32 | """Return all keys in this bucket.""" 33 | 34 | logger.debug("Call to list('%s') under no-op." % prefix) 35 | return [] 36 | 37 | def get(self, src): 38 | """Return the contents of src from S3 as a string.""" 39 | 40 | logger.debug("Call to get('%s') under no-op." % src) 41 | return None 42 | 43 | def put(self, dst, data): 44 | """Store the contents of the string data at a key named by dst 45 | in S3.""" 46 | 47 | logger.debug("Call to put('%s') under no-op." % dst) 48 | 49 | def delete(self, src): 50 | """Delete the object in S3 referenced by the key name src.""" 51 | 52 | logger.debug("Call to delete('%s') under no-op." % src) 53 | -------------------------------------------------------------------------------- /whisperbackup/pycronscript.py: -------------------------------------------------------------------------------- 1 | ''' Convenience class for writing cron scripts''' 2 | # pylint: disable=R0903 3 | 4 | # Copyright 2014 42Lines, Inc. 5 | # Original Author: Jim Browne 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | import datetime as DT 20 | from lockfile import FileLock, LockFailed, LockTimeout 21 | import logging 22 | import logging.handlers 23 | import __main__ as main 24 | from optparse import OptionParser, make_option 25 | import os 26 | from random import randint 27 | import sys 28 | import time 29 | 30 | # Support for RotateFileHandler in multiple processes 31 | from multiprocessinglog import MultiProcessingLog, MultiProcessingLogStream 32 | 33 | __version__ = '0.2.1' 34 | 35 | 36 | class StdErrFilter(logging.Filter): 37 | ''' Discard all events below a configured level ''' 38 | 39 | def __init__(self, level=logging.WARNING, discard_all=False): 40 | self.level = level 41 | self.discard_all = discard_all 42 | super(StdErrFilter, self).__init__() 43 | 44 | def filter(self, record): 45 | if self.discard_all: 46 | return False 47 | else: 48 | return (record.levelno >= self.level) 49 | 50 | 51 | class CronScript(object): 52 | ''' Convenience class for writing cron scripts ''' 53 | 54 | def __init__(self, args=None, options=None, usage=None, 55 | disable_interspersed_args=False): 56 | self.lock = None 57 | self.start_time = None 58 | self.end_time = None 59 | 60 | if options is None: 61 | options = [] 62 | 63 | if args is None: 64 | args = sys.argv[1:] 65 | 66 | prog = os.path.basename(main.__file__) 67 | logfile = os.path.join('/var/log/', "%s.log" % prog) 68 | lockfile = os.path.join('/var/lock/', "%s" % prog) 69 | stampfile = os.path.join('/var/tmp/', "%s.success" % prog) 70 | options.append(make_option("--debug", "-d", action="store_true", 71 | help="Minimum log level of DEBUG")) 72 | options.append(make_option("--quiet", "-q", action="store_true", 73 | help="Only WARN and above to stdout")) 74 | options.append(make_option("--nolog", action="store_true", 75 | help="Do not log to LOGFILE")) 76 | options.append(make_option("--logfile", type="string", 77 | default=logfile, 78 | help="File to log to, default %default")) 79 | options.append(make_option("--syslog", action="store_true", 80 | help="Log to syslog instead of a file")) 81 | options.append(make_option("--nolock", action="store_true", 82 | help="Do not use a lockfile")) 83 | options.append(make_option("--lockfile", type="string", 84 | default=lockfile, 85 | help="Lock file, default %default")) 86 | options.append(make_option("--nostamp", action="store_true", 87 | help="Do not use a success stamp file")) 88 | options.append(make_option("--stampfile", type="string", 89 | default=stampfile, 90 | help="Success stamp file, default %default")) 91 | helpmsg = "Lock timeout in seconds, default %default" 92 | options.append(make_option("--locktimeout", default=90, type="int", 93 | help=helpmsg)) 94 | helpmsg = "Sleep a random time between 0 and N seconds before starting, default %default" 95 | options.append(make_option("--splay", default=0, type="int", 96 | help=helpmsg)) 97 | 98 | parser = OptionParser(option_list=options, usage=usage) 99 | if disable_interspersed_args: 100 | # Stop option parsing at first non-option 101 | parser.disable_interspersed_args() 102 | (self.options, self.args) = parser.parse_args(args) 103 | 104 | self.logger = logging.getLogger(main.__name__) 105 | 106 | if self.options.debug: 107 | self.logger.setLevel(logging.DEBUG) 108 | else: 109 | self.logger.setLevel(logging.INFO) 110 | 111 | # Log to syslog 112 | if self.options.syslog: 113 | syslog_formatter = logging.Formatter("%s: %%(levelname)s %%(message)s" % prog) 114 | handler = logging.handlers.SysLogHandler( 115 | address="/dev/log", 116 | facility=logging.handlers.SysLogHandler.LOG_LOCAL3 117 | ) 118 | handler.setFormatter(syslog_formatter) 119 | self.logger.addHandler(handler) 120 | 121 | default_formatter = logging.Formatter("%(asctime)s;%(levelname)s;%(message)s", 122 | "%Y-%m-%d-%H:%M:%S") 123 | if not self.options.nolog: 124 | # Log to file 125 | try: 126 | handler = MultiProcessingLog( 127 | "%s" % (self.options.logfile), 128 | maxBytes=(50 * 1024 * 1024), 129 | backupCount=10) 130 | except IOError: 131 | sys.stderr.write("Fatal: Could not open log file: %s\n" 132 | % self.options.logfile) 133 | sys.exit(1) 134 | 135 | handler.setFormatter(default_formatter) 136 | self.logger.addHandler(handler) 137 | 138 | # If quiet, only WARNING and above go to STDERR; otherwise all 139 | # logging goes to stderr 140 | handler2 = MultiProcessingLogStream(sys.stderr) 141 | if self.options.quiet: 142 | err_filter = StdErrFilter() 143 | handler2.addFilter(err_filter) 144 | handler2.setFormatter(default_formatter) 145 | self.logger.addHandler(handler2) 146 | 147 | self.logger.debug(self.options) 148 | 149 | def __enter__(self): 150 | if self.options.storage_path > 0 and not self.options.storage_path.endswith('/'): 151 | self.options.storage_path = self.options.storage_path + '/' 152 | 153 | if self.options.splay > 0: 154 | splay = randint(0, self.options.splay) 155 | self.logger.debug('Sleeping for %d seconds (splay=%d)' % 156 | (splay, self.options.splay)) 157 | time.sleep(splay) 158 | self.start_time = DT.datetime.today() 159 | if not self.options.nolock: 160 | self.logger.debug('Attempting to acquire lock %s (timeout %s)', 161 | self.options.lockfile, 162 | self.options.locktimeout) 163 | self.lock = FileLock(self.options.lockfile) 164 | try: 165 | self.lock.acquire(timeout=self.options.locktimeout) 166 | except LockFailed as e: 167 | self.logger.error("Lock could not be acquired.") 168 | self.logger.error(str(e)) 169 | sys.exit(1) 170 | except LockTimeout as e: 171 | msg = "Lock could not be acquired. Timeout exceeded." 172 | self.logger.error(msg) 173 | sys.exit(1) 174 | 175 | def __exit__(self, etype, value, traceback): 176 | self.end_time = DT.datetime.today() 177 | self.logger.debug('Run time: %s', self.end_time - self.start_time) 178 | if not self.options.nolock: 179 | self.logger.debug('Attempting to release lock %s', 180 | self.options.lockfile) 181 | self.lock.release() 182 | if etype is None: 183 | if not self.options.nostamp: 184 | open(self.options.stampfile, "w") 185 | -------------------------------------------------------------------------------- /whisperbackup/s3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2014 42 Lines, Inc. 4 | # Original Author: Jack Neely 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import boto 19 | import __main__ 20 | import logging 21 | 22 | from boto.s3.key import Key 23 | 24 | logger = logging.getLogger(__main__.__name__) 25 | 26 | class S3(object): 27 | 28 | def __init__(self, bucket, region="us-east-1", noop=False): 29 | """Setup the S3 storage backend with the bucket we will use and 30 | optional region.""" 31 | self.conn = boto.s3.connect_to_region(region) 32 | self.bucket = bucket 33 | self.noop = noop 34 | 35 | b = self.conn.lookup(self.bucket) 36 | if not noop and b is None: 37 | # Create the bucket if it doesn't exist 38 | self.conn.create_bucket(self.bucket, location=region) 39 | 40 | self.__b = self.conn.get_bucket(self.bucket) 41 | 42 | def list(self, prefix=""): 43 | """Return all keys in this bucket.""" 44 | for i in self.__b.list(prefix): 45 | yield i.key 46 | 47 | def get(self, src): 48 | """Return the contents of src from S3 as a string.""" 49 | if self.__b.get_key(src) is None: 50 | return None 51 | 52 | k = Key(self.__b) 53 | k.key = src 54 | return k.get_contents_as_string() 55 | 56 | def put(self, dst, data): 57 | """Store the contents of the string data at a key named by dst 58 | in S3.""" 59 | 60 | if self.noop: 61 | logger.info("No-Op Put: %s" % dst) 62 | else: 63 | k = Key(self.__b) 64 | k.key = dst 65 | k.set_contents_from_string(data) 66 | 67 | def delete(self, src): 68 | """Delete the object in S3 referenced by the key name src.""" 69 | 70 | if self.noop: 71 | logger.info("No-Op Delete: %s" % src) 72 | else: 73 | k = Key(self.__b) 74 | k.key = src 75 | k.delete() 76 | -------------------------------------------------------------------------------- /whisperbackup/swift.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2017 42 Lines, Inc. 4 | # Original Author: Jack Neely 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import __main__ 19 | import logging 20 | import os 21 | import sys 22 | 23 | from swiftclient.client import Connection 24 | from swiftclient.exceptions import ClientException 25 | 26 | logger = logging.getLogger(__main__.__name__) 27 | 28 | class Swift(object): 29 | 30 | def __init__(self, bucket, noop): 31 | """Setup the S3 storage backend with the bucket we will use and 32 | optional region.""" 33 | 34 | # This is our Swift container 35 | self.bucket = bucket 36 | self.noop = noop 37 | 38 | # We assume your environment variables are set correctly just like 39 | # you would for the swift command line util 40 | try: 41 | self.conn = Connection(authurl=os.environ["ST_AUTH"], 42 | user=os.environ["ST_USER"], 43 | key=os.environ["ST_KEY"], 44 | timeout=30) 45 | except KeyError: 46 | logger.warning("Missing environment variables for Swift authentication") 47 | logger.warning("Bailing...") 48 | sys.exit(1) 49 | 50 | headers, objs = self.conn.get_account(self.bucket) 51 | for i in objs: 52 | logger.debug("Searching for bucket %s == %s" % (self.bucket, i)) 53 | if not noop and self.bucket not in objs: 54 | self.conn.put_container(self.bucket) 55 | 56 | 57 | def list(self, prefix=None): 58 | """Return all keys in this bucket.""" 59 | 60 | headers, objs = self.conn.get_container(self.bucket, prefix=prefix) 61 | while objs: 62 | # Handle paging 63 | i = {} 64 | for i in objs: 65 | yield i["name"] 66 | headers, objs = self.conn.get_container(self.bucket, 67 | marker=i["name"], prefix=prefix) 68 | 69 | 70 | def get(self, src): 71 | """Return the contents of src from S3 as a string.""" 72 | 73 | try: 74 | headers, obj = self.conn.get_object(self.bucket, src) 75 | return obj 76 | except ClientException: 77 | # Request failed....object doesn't exist 78 | return None 79 | 80 | 81 | def put(self, dst, data): 82 | """Store the contents of the string data at a key named by dst 83 | in S3.""" 84 | 85 | if self.noop: 86 | logger.info("No-Op Put: %s" % dst) 87 | else: 88 | self.conn.put_object(self.bucket, dst, data) 89 | 90 | 91 | def delete(self, src): 92 | """Delete the object in S3 referenced by the key name src.""" 93 | 94 | if self.noop: 95 | logger.info("No-Op Delete: %s" % src) 96 | else: 97 | self.conn.delete_object(self.bucket, src) 98 | -------------------------------------------------------------------------------- /whisperbackup/whisperbackup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # Copyright 2014-2017 42 Lines, Inc. 4 | # Original Author: Jack Neely 5 | # 6 | # Licensed under the Apache License, Version 2.0 (the "License"); 7 | # you may not use this file except in compliance with the License. 8 | # You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | 18 | import sys 19 | import os 20 | import os.path 21 | import logging 22 | import fcntl 23 | import gzip 24 | import hashlib 25 | import datetime 26 | import time 27 | import tempfile 28 | import shutil 29 | 30 | from multiprocessing import Pool 31 | from optparse import make_option 32 | from fnmatch import fnmatch 33 | from StringIO import StringIO 34 | 35 | try: 36 | import snappy 37 | except ImportError: 38 | snappy = None 39 | 40 | from fill import fill_archives 41 | from pycronscript import CronScript 42 | 43 | import __main__ 44 | 45 | logger = logging.getLogger(__main__.__name__) 46 | 47 | def listMetrics(storage_dir, storage_path, glob): 48 | storage_dir = storage_dir.rstrip(os.sep) 49 | 50 | for root, dirnames, filenames in os.walk(storage_dir): 51 | for filename in filenames: 52 | if filename.endswith(".wsp"): 53 | root_path = root[len(storage_dir) + 1:] 54 | m_path = os.path.join(root_path, filename) 55 | m_name, m_ext = os.path.splitext(m_path) 56 | m_name = m_name.replace('/', '.') 57 | if glob == "*" or fnmatch(m_name, glob): 58 | # We use globbing on the metric name, not the path 59 | yield storage_path + m_name, os.path.join(root, filename) 60 | 61 | 62 | def toPath(prefix, metric): 63 | """Translate the metric key name in metric to its OS path location 64 | rooted under prefix.""" 65 | 66 | m = metric.replace(".", "/") + ".wsp" 67 | return os.path.join(prefix, m) 68 | 69 | 70 | def storageBackend(script): 71 | if len(script.args) <= 1: 72 | logger.error("Storage backend must be specified, either: disk, gcs, noop, s3, or swift") 73 | sys.exit(1) 74 | if script.args[1].lower() == "disk": 75 | import disk 76 | return disk.Disk(script.options.bucket, script.options.noop) 77 | if script.args[1].lower() == "noop": 78 | import noop 79 | return noop.NoOP(script.options.bucket, script.options.noop) 80 | if script.args[1].lower() == "s3": 81 | import s3 82 | s3args = {"region": "us-east-1"} 83 | for i in script.args[2:]: 84 | fields = i.split("=") 85 | if len(fields) > 1: 86 | s3args[fields[0]] = fields[1] 87 | else: 88 | s3args["region"] = fields[0] 89 | return s3.S3(script.options.bucket, s3args["region"], script.options.noop) 90 | if script.args[1].lower() == "swift": 91 | import swift 92 | return swift.Swift(script.options.bucket, script.options.noop) 93 | if script.args[1].lower() == "gcs": 94 | import gcs 95 | gcsargs = {"project": "", "region": "us"} 96 | for i in script.args[2:]: 97 | fields = i.split("=") 98 | if len(fields) > 1: 99 | gcsargs[fields[0]] = fields[1] 100 | else: 101 | gcsargs["region"] = fields[0] 102 | return gcs.GCS(script.options.bucket, gcsargs["project"], 103 | gcsargs["region"], script.options.noop) 104 | 105 | logger.error("Invalid storage backend, must be: disk, gcs, noop, s3, or swift") 106 | sys.exit(1) 107 | 108 | 109 | def utc(): 110 | return datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%S+00:00") 111 | 112 | 113 | def backup(script): 114 | # I want to modify these variables in a sub-function, this is the 115 | # only thing about python 2.x that makes me scream. 116 | data = {} 117 | data['complete'] = 0 118 | data['length'] = 0 119 | 120 | def init(script): 121 | # The script object isn't pickle-able 122 | globals()['script'] = script 123 | 124 | def cb(result): 125 | # Do some progress tracking when jobs complete 126 | data['complete'] = data['complete'] + 1 127 | if data['complete'] % 5 == 0: 128 | # Some rate limit on logging 129 | logger.info("Progress: %s/%s or %f%%" \ 130 | % (data['complete'], data['length'], 131 | 100 * float(data['complete']) / float(data['length']))) 132 | 133 | logger.info("Scanning filesystem...") 134 | # Unroll the generator so we can calculate length 135 | jobs = [ (k, p) for k, p in listMetrics(script.options.prefix, script.options.storage_path, script.options.metrics) ] 136 | data['length'] = len(jobs) 137 | 138 | workers = Pool(processes=script.options.processes, 139 | initializer=init, initargs=[script]) 140 | logger.info("Starting backup of %d whisper files" % data['length']) 141 | for k, p in jobs: 142 | workers.apply_async(backupWorker, [k, p], callback=cb) 143 | 144 | workers.close() 145 | workers.join() 146 | logger.info("Backup complete") 147 | 148 | purge(script, { k: True for k, p in jobs }) 149 | 150 | 151 | def purge(script, localMetrics): 152 | """Purge backups in our store that are non-existant on local disk and 153 | are more than purge days old as set in the command line options.""" 154 | 155 | # localMetrics must be a dict so we can do fast lookups 156 | 157 | if script.options.purge < 0: 158 | log.debug("Purge is disabled, skipping") 159 | return 160 | 161 | logger.info("Beginning purge operation.") 162 | metrics = search(script) 163 | expireDate = datetime.datetime.utcnow() - datetime.timedelta(days=script.options.purge) 164 | expireStamp = expireDate.strftime("%Y-%m-%dT%H:%M:%S+00:00") 165 | c = 0 166 | 167 | # Search through the in-store metrics 168 | for k, v in metrics.items(): 169 | if k in localMetrics: 170 | continue 171 | for p in v: 172 | ts = p[p.find("/")+1:] 173 | if ts < expireStamp: 174 | logger.info("Purging %s @ %s" % (k, ts)) 175 | try: 176 | # Delete the WSP file first, if the delete of the SHA1 177 | # causes the error, the next run will get it, rather 178 | # than just leaking the WSP storage space. 179 | t = time.time() 180 | if not script.options.noop: 181 | script.store.delete("%s%s/%s.wsp.%s" 182 | % (script.options.storage_path, k, ts, 183 | script.options.algorithm)) 184 | script.store.delete("%s%s/%s.sha1" 185 | % (script.options.storage_path, k, ts)) 186 | else: 187 | # Do a list to check for 404s 188 | t = "%s%s/%s" % (k, ts) 189 | d = [ i for i in script.store.list("%s.wsp.%s" \ 190 | % (t, script.options.algorithm)) ] 191 | if len(d) == 0: 192 | logger.warn("Purge: Missing file in store: %s.wsp.%s" \ 193 | % (p, script.options.algorithm)) 194 | d = [ i for i in script.store.list("%s.sha1" % t) ] 195 | if len(d) == 0: 196 | logger.warn("Purge: Missing file in store: %s.sha1" % t) 197 | 198 | logger.debug("Purge of %s @ %s took %d seconds" % (k, ts, time.time()-t)) 199 | except KeyboardInterrupt: 200 | raise 201 | except Exception as e: 202 | # On an error here we want to leave files alone. 203 | # This includes file not found (404) errors 204 | logger.warning("Exception during delete: %s" % str(e)) 205 | else: 206 | c += 1 207 | 208 | logger.info("Purge complete -- %d backups removed" % c) 209 | 210 | 211 | def backupWorker(k, p): 212 | # Inside this fuction/process 'script' is global 213 | logger.info("Backup: Processing %s ..." % k) 214 | # We acquire a file lock using the same locks whisper uses. flock() 215 | # exclusive locks are cleared when the file handle is closed. This 216 | # is the same practice that the whisper code uses. 217 | logger.debug("Locking file...") 218 | try: 219 | with open(p, "rb") as fh: 220 | fcntl.flock(fh.fileno(), fcntl.LOCK_EX) # May block 221 | blob = fh.read() 222 | timestamp = utc() 223 | except IOError as e: 224 | logger.warning("An IOError occured locking %s: %s" \ 225 | % (k, str(e))) 226 | return 227 | except Exception as e: 228 | logger.error("An Unknown exception occurred, skipping metric: %s" 229 | % str(e)) 230 | return 231 | 232 | # SHA1 hash...have we seen this metric DB file before? 233 | logger.debug("Calculating hash and searching data store...") 234 | blobSHA = hashlib.sha1(blob).hexdigest() 235 | knownBackups = [] 236 | for i in script.store.list(k+"/"): 237 | if i.endswith(".sha1"): 238 | knownBackups.append(i) 239 | 240 | knownBackups.sort() 241 | if len(knownBackups) > 0: 242 | i = knownBackups[-1] # The last known backup 243 | logger.debug("Examining %s from data store of %d backups" 244 | % (i, len(knownBackups))) 245 | if script.store.get(i) == blobSHA: 246 | logger.info("Metric DB %s is unchanged from last backup, " \ 247 | "skipping." % k) 248 | # We purposely do not check retention in this case 249 | return 250 | 251 | # We're going to backup this file, compress it as a normal .gz 252 | # file so that it can be restored manually if needed 253 | if not script.options.noop: 254 | logger.debug("Compressing data...") 255 | blobgz = StringIO() 256 | if script.options.algorithm == "gz": 257 | fd = gzip.GzipFile(fileobj=blobgz, mode="wb") 258 | fd.write(blob) 259 | fd.close() 260 | elif script.options.algorithm == "sz": 261 | compressor = snappy.StreamCompressor() 262 | blobgz.write(compressor.compress(blob)) 263 | else: 264 | raise StandardError("Unknown compression format requested") 265 | 266 | # Grab our timestamp and assemble final upstream key location 267 | logger.debug("Uploading payload as: %s/%s.wsp.%s" \ 268 | % (k, timestamp, script.options.algorithm)) 269 | logger.debug("Uploading SHA1 as : %s/%s.sha1" % (k, timestamp)) 270 | try: 271 | if not script.options.noop: 272 | t = time.time() 273 | script.store.put("%s/%s.wsp.%s" \ 274 | % (k, timestamp, script.options.algorithm), blobgz.getvalue()) 275 | script.store.put("%s/%s.sha1" % (k, timestamp), blobSHA) 276 | logger.debug("Upload of %s @ %s took %d seconds" 277 | % (k, timestamp, time.time()-t)) 278 | except Exception as e: 279 | logger.warning("Exception during upload: %s" % str(e)) 280 | 281 | # Free Memory 282 | blobgz.close() 283 | del blob 284 | 285 | # Handle our retention policy, we keep at most X backups 286 | while len(knownBackups) + 1 > script.options.retention: 287 | # The oldest (and not current) backup 288 | i = knownBackups[0].replace(".sha1", "") 289 | logger.info("Removing old backup: %s.wsp.%s" % (i, script.options.algorithm)) 290 | logger.debug("Removing old SHA1: %s.sha1" % i) 291 | try: 292 | t = time.time() 293 | if not script.options.noop: 294 | script.store.delete("%s.wsp.%s" % (i, script.options.algorithm)) 295 | script.store.delete("%s.sha1" % i) 296 | else: 297 | # Do a list, we want to log if there's a 404 298 | d = [ i for i in script.store.list("%s.wsp.%s" \ 299 | % (i, script.options.algorithm)) ] 300 | if len(d) == 0: 301 | logger.warn("Missing file in store: %s.wsp.%s" \ 302 | % (i, script.options.algorithm)) 303 | d = [ i for i in script.store.list("%s.sha1" % i) ] 304 | if len(d) == 0: 305 | logger.warn("Missing file in store: %s.sha1" % i) 306 | 307 | logger.debug("Retention removal of %s took %d seconds" 308 | % (i, time.time()-t)) 309 | except Exception as e: 310 | # On an error here we want to leave files alone 311 | logger.warning("Exception during delete: %s" % str(e)) 312 | 313 | del knownBackups[0] 314 | 315 | 316 | def findBackup(script, objs, date): 317 | """Return the UTC ISO 8601 timestamp embedded in the given list of file 318 | objs that is the last timestamp before date. Where date is a 319 | ISO 8601 string.""" 320 | 321 | timestamps = [] 322 | for i in objs: 323 | i = i[i.find("/")+1:] 324 | if "." in i: 325 | i = i[:i.find(".")] 326 | # So now i is just the ISO8601 timestamp 327 | # XXX: Should probably actually parse the tz here 328 | timestamps.append(datetime.datetime.strptime(i, "%Y-%m-%dT%H:%M:%S+00:00")) 329 | 330 | refDate = datetime.datetime.strptime(script.options.date, "%Y-%m-%dT%H:%M:%S+00:00") 331 | timestamps.sort() 332 | timestamps.reverse() 333 | for i in timestamps: 334 | if refDate > i: 335 | return i.strftime("%Y-%m-%dT%H:%M:%S+00:00") 336 | 337 | logger.warning("XXX: I shouldn't have found myself here") 338 | return None 339 | 340 | 341 | def heal(script, metric, data): 342 | """Heal the metric in metric with the WSP data stored as a string 343 | in data.""" 344 | 345 | path = toPath(script.options.prefix, metric) 346 | error = False 347 | 348 | # Make a tmp file 349 | fd, filename = tempfile.mkstemp(prefix="whisper-backup") 350 | fd = os.fdopen(fd, "wb") 351 | fd.write(data) 352 | fd.close() 353 | 354 | # Figure out what to do 355 | if os.path.exists(path): 356 | logger.debug("Healing existing whisper file: %s" % path) 357 | try: 358 | fill_archives(filename, path, time.time()) 359 | except Exception as e: 360 | logger.warning("Exception during heal of %s will overwrite." % path) 361 | logger.warning(str(e)) 362 | error = True 363 | 364 | # Last ditch effort, we just copy the file in place 365 | if error or not os.path.exists(path): 366 | logger.debug("Copying restored DB file into place") 367 | try: 368 | os.makedirs(os.path.dirname(path)) 369 | except os.error: 370 | # Directory exists 371 | pass 372 | 373 | shutil.copyfile(filename, path) 374 | 375 | os.unlink(filename) 376 | 377 | def search(script): 378 | """Return a hash such that all keys are metric names found in our 379 | backup store and metric names match the glob given on the command 380 | line. Each value will be a list paths into the backup store of 381 | all present backups. Technically, the path to the SHA1 checksum file 382 | but the path will not have the ".sha1" extension.""" 383 | 384 | logger.info("Searching remote file store...") 385 | metrics = {} 386 | 387 | for i in script.store.list(prefix=script.options.storage_path): 388 | i = i[len(script.options.storage_path):] 389 | # The SHA1 is my canary/flag, we look for it 390 | if i.endswith(".sha1"): 391 | # The metric name is everything before the first / 392 | m = i[:i.find("/")] 393 | if fnmatch(m, script.options.metrics): 394 | metrics.setdefault(m, []).append(i[:-5]) 395 | 396 | return metrics 397 | 398 | 399 | def restore(script): 400 | # Build a list of metrics to restore from our object store and globbing 401 | metrics = search(script) 402 | 403 | # For each metric, find the date we want 404 | for i in metrics.keys(): 405 | objs = metrics[i] 406 | d = findBackup(script, objs, script.options.date) 407 | logger.info("Restoring %s from timestamp %s" % (i, d)) 408 | 409 | blobgz = script.store.get("%s%s/%s.wsp.%s" \ 410 | % (script.options.storage_path, i, d, script.options.algorithm)) 411 | blobSHA = script.store.get("%s%s/%s.sha1" \ 412 | % (script.options.storage_path, i, d)) 413 | 414 | if blobgz is None: 415 | logger.warning("Skipping missing file in object store: %s/%s.wsp.%s" \ 416 | % (i, d, script.options.algorithm)) 417 | continue 418 | 419 | # Decompress 420 | blobgz = StringIO(blobgz) 421 | blob = None 422 | if script.options.algorithm == "gz": 423 | fd = gzip.GzipFile(fileobj=blobgz, mode="rb") 424 | blob = fd.read() 425 | fd.close() 426 | elif script.options.algorithm == "sz": 427 | compressor = snappy.StreamDecompressor() 428 | blob = compressor.decompress(blobgz.getvalue()) 429 | try: 430 | compressor.flush() 431 | except UncompressError as e: 432 | logger.error("Corrupt file in store: %s%s/%s.wsp.sz Error %s" \ 433 | % (script.options.storage_path, i, d, str(e))) 434 | continue 435 | 436 | # Verify 437 | if blobSHA is None: 438 | logger.warning("Missing SHA1 checksum file...no verification") 439 | else: 440 | if hashlib.sha1(blob).hexdigest() != blobSHA: 441 | logger.warning("Backup does NOT verify, skipping metric %s" \ 442 | % i) 443 | continue 444 | 445 | heal(script, i, blob) 446 | 447 | # Clean up 448 | del blob 449 | blobgz.close() 450 | 451 | 452 | 453 | def listbackups(script): 454 | c = 0 455 | # This list is sorted, we will use that to our advantage 456 | key = None 457 | for i in script.store.list(): 458 | if i.endswith(".wsp.%s" % script.options.algorithm): 459 | if key is None or key != i: 460 | key = i 461 | print key[:-33] 462 | 463 | print "\tDate: %s" % key[len(key[:-32]):-7] 464 | c += 1 465 | 466 | print 467 | if c == 0: 468 | print "No backups found." 469 | else: 470 | print "%s compressed whisper databases found." % c 471 | 472 | 473 | def main(): 474 | usage = "%prog [options] backup|restore|purge|list disk|gcs|noop|s3|swift [storage args]" 475 | options = [] 476 | 477 | options.append(make_option("-p", "--prefix", type="string", 478 | default="/opt/graphite/storage/whisper", 479 | help="Root of where the whisper files live or will be restored to, default %default")) 480 | options.append(make_option("-f", "--processes", type="int", 481 | default=4, 482 | help="Number of worker processes to spawn, default %default")) 483 | options.append(make_option("-r", "--retention", type="int", 484 | default=5, 485 | help="Number of unique backups to retain for each whisper file, default %default")) 486 | options.append(make_option("-x", "--purge", type="int", 487 | default=45, 488 | help="Days to keep unknown Whisper file backups, -1 disables, default %default")) 489 | options.append(make_option("-n", "--noop", action="store_true", 490 | default=False, 491 | help="Do not modify the object store, default %default")) 492 | options.append(make_option("-b", "--bucket", type="string", 493 | default="graphite-backups", 494 | help="The AWS S3 bucket name or Swift container to use, default %default")) 495 | options.append(make_option("-m", "--metrics", type="string", 496 | default="*", 497 | help="Glob pattern of metric names to backup or restore, default %default")) 498 | options.append(make_option("-c", "--date", type="string", 499 | default=utc(), 500 | help="String in ISO-8601 date format. The last backup before this date will be used during the restore. Default is now or %s." % utc())) 501 | choices = ["gz"] 502 | if snappy is not None: 503 | choices.append("sz") 504 | options.append(make_option("-a", "--algorithm", type="choice", 505 | default="gz", choices=choices, dest="algorithm", 506 | help="Compression format to use based on installed Python modules. " \ 507 | "Choices: %s" % ", ".join(choices))) 508 | options.append(make_option("--storage-path", type="string", 509 | default="", 510 | help="Path in the bucket to store the backup, default %default")) 511 | 512 | script = CronScript(usage=usage, options=options) 513 | 514 | if len(script.args) == 0: 515 | logger.info("whisper-backup.py - A Python script for backing up whisper " \ 516 | "database trees as used with Graphite") 517 | logger.info("Copyright (c) 2014 - 2019 42 Lines, Inc.") 518 | logger.info("Original Author: Jack Neely ") 519 | logger.info("See the README for help or use the --help option.") 520 | sys.exit(1) 521 | 522 | mode = script.args[0].lower() 523 | if mode == "backup": 524 | with script: 525 | # Use splay and lockfile settings 526 | script.store = storageBackend(script) 527 | backup(script) 528 | elif mode == "restore": 529 | with script: 530 | # Use splay and lockfile settings 531 | script.store = storageBackend(script) 532 | restore(script) 533 | elif mode == "purge": 534 | with script: 535 | # Use splay and lockfile settings 536 | script.store = storageBackend(script) 537 | localMetrics = listMetrics(script.options.prefix, 538 | script.options.storage_path, script.options.metrics) 539 | purge(script, { k: True for k, p in localMetrics }) 540 | elif mode == "list": 541 | # Splay and lockfile settings make no sense here 542 | script.store = storageBackend(script) 543 | listbackups(script) 544 | else: 545 | logger.error("Command %s unknown. Must be one of backup, restore, " \ 546 | "purge, or list." % script.args[0]) 547 | sys.exit(1) 548 | 549 | 550 | if __name__ == "__main__": 551 | main() 552 | --------------------------------------------------------------------------------