├── .gitignore ├── CHANGELOG.rst ├── CHANGES.rst ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── bin └── boto-rsync └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .coverage 3 | MANIFEST 4 | tests/ 5 | build/ 6 | dist/ 7 | *.egg-info/ 8 | -------------------------------------------------------------------------------- /CHANGELOG.rst: -------------------------------------------------------------------------------- 1 | ================================================================================ 2 | Changelog 3 | ================================================================================ 4 | 5 | 6 | v0.8.1 -- 02/15/2012 7 | ================================================================================ 8 | 9 | * Fixed an issue where the contents of local directories didn't get copied into 10 | remote directories if the remote directory existed and was empty. 11 | 12 | 13 | v0.8 -- 02/05/2012 14 | ================================================================================ 15 | 16 | * Added the `--anon` option. 17 | * Added the `-m/--metadata` option. 18 | * Switched from getopt to argparse for argument parsing. 19 | * Changed `boto` dependency to >=2.2.1 (required for the `--anon` option). 20 | 21 | 22 | v0.7 -- 01/09/2012 23 | ================================================================================ 24 | 25 | * Added the `--glob` option. 26 | * Added the `--endpoint` option. 27 | * Long form options were renamed from `--long_name` to `--long-name`. 28 | * Made the creation of remote "directory keys" the default behavior and added 29 | the `--skip-dirkeys` option. 30 | * Fixed issue with `--long-option` arguments not being parsed correctly. 31 | 32 | 33 | v0.6 -- 12/11/2011 34 | ================================================================================ 35 | 36 | * Added the `--no_recurse` option. 37 | * Fixed an issue where output on Windows was improperly formatted. 38 | 39 | 40 | v0.5.1 - v0.5.3 -- 12/08/2011 - 12/10/2011 41 | ================================================================================ 42 | 43 | * Added info on boto's advanced configuration options. 44 | * Fixed boto's version requirement. The "encrypt_key" option requires boto v2.1 45 | or greater. 46 | * Fixed issues with PyPI. 47 | * Fixed/updated setup script. 48 | 49 | 50 | v0.5 -- 12/08/2011 51 | ================================================================================ 52 | 53 | * Added initial support for Google Storage. 54 | 55 | 56 | v0.4 -- 12/04/2011 57 | ================================================================================ 58 | 59 | * Refactored the callback to be more accurate (hopefully). 60 | * Added estimated time remaining / transfer duration to callback. 61 | * Fixed a bug where SIGINT might not exit properly. 62 | 63 | 64 | v0.3 - v0.3.1 -- 11/30/2011 65 | ================================================================================ 66 | 67 | * Added an ASCII spinner to help the user understand that the process hasn't 68 | hung. 69 | * Fixed bugs from the `--ignore_empty` and directory features that were added in 70 | v0.2. 71 | * Fixed a bug where the beginning of file/key names were sometimes stripped. 72 | * Fixed a bug where directory downloads sometimes crashed. 73 | 74 | 75 | v0.2 -- 11/29/2011 76 | ================================================================================ 77 | 78 | * Added the `--ignore_empty` option. 79 | * Always assume that a key name ending in "/" is an S3 "directory." 80 | 81 | 82 | v0.1 -- 11/27/2011 83 | ================================================================================ 84 | 85 | * Initial release. 86 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | Latest Changes 2 | ================================================================================ 3 | 4 | 5 | v0.8.1 -- 02/15/2012 6 | -------------------------------------------------------------------------------- 7 | 8 | * Fixed an issue where the contents of local directories didn't get copied into 9 | remote directories if the remote directory existed and was empty. 10 | 11 | 12 | v0.8 -- 02/05/2012 13 | -------------------------------------------------------------------------------- 14 | 15 | * Added the `--anon` option. 16 | * Added the `-m/--metadata` option. 17 | * Switched from getopt to argparse for argument parsing. 18 | * Changed `boto` dependency to >=2.2.1 (required for the `--anon` option). 19 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2012 Seth Davis http://www.curiasolutions.com/ 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 7 | of the Software, and to permit persons to whom the Software is furnished to do 8 | so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.txt *.rst 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | ================================================================================ 2 | boto rsync v0.8.1 3 | ================================================================================ 4 | 5 | | Copyright: (c) 2012 Seth Davis 6 | | http://github.com/seedifferently/boto_rsync 7 | 8 | 9 | Synopsis 10 | ================================================================================ 11 | 12 | boto-rsync is a rough adaptation of boto's s3put script which has been 13 | reengineered to more closely mimic rsync. Its goal is to provide a familiar 14 | rsync-like wrapper for boto's S3 and Google Storage interfaces. 15 | 16 | By default, the script works recursively and differences between files are 17 | checked by comparing file sizes (e.g. rsync's --recursive and --size-only 18 | options). If the file exists on the destination but its size differs from 19 | the source, then it will be overwritten (unless the -w option is used). 20 | 21 | 22 | Installation 23 | ================================================================================ 24 | 25 | To install, simply:: 26 | 27 | pip install boto_rsync 28 | 29 | * You'll need to have `Python`_ 2.5+ and `pip`_ installed. 30 | * You might have to be root (or use sudo) for pip to install the script into a 31 | globally executable directory in your $PATH. 32 | * pip should automatically install boto for you, but the advanced user can find 33 | it here: http://github.com/boto/boto/ 34 | 35 | .. _Python: http://www.python.org 36 | .. _pip: http://www.pip-installer.org 37 | 38 | 39 | Usage 40 | ================================================================================ 41 | 42 | :: 43 | 44 | boto-rsync [OPTIONS] SOURCE DESTINATION 45 | 46 | SOURCE and DESTINATION can be: 47 | 48 | * A local path to a directory or specific file 49 | * A custom S3 or GS URL to a directory or specific key in the format of 50 | s3://bucketname/path/or/key 51 | * A S3 to S3 transfer using two S3 URLs 52 | * A GS to GS transfer using two GS URLs 53 | 54 | 55 | Examples 56 | ================================================================================ 57 | 58 | :: 59 | 60 | boto-rsync [OPTIONS] /local/path/ s3://bucketname/remote/path/ 61 | 62 | or:: 63 | 64 | boto-rsync [OPTIONS] gs://bucketname/remote/path/or/key /local/path/ 65 | 66 | or:: 67 | 68 | boto-rsync [OPTIONS] s3://bucketname/ s3://another_bucket/ 69 | 70 | 71 | Options 72 | ================================================================================ 73 | 74 | :: 75 | 76 | -a KEY, --access-key KEY 77 | Your Access Key ID. If not supplied, boto will look 78 | for an environment variable or a credentials file. 79 | -s SECRET, --secret-key SECRET 80 | Your Secret Key. If not supplied, boto will look for 81 | an environment variable or a credentials file. 82 | --anon Connect without credentials (S3 only). Useful if 83 | working with others' buckets that have a global 84 | read/write ACL. 85 | --endpoint HOST Specify a specific S3 endpoint to connect to via 86 | boto's "host" connection argument (S3 only). 87 | -g GRANT, --grant GRANT 88 | A canned ACL policy that will be granted on each file 89 | transferred to S3/GS. The value provided must be one 90 | of the "canned" ACL policies supported by S3/GS: 91 | private, public-read, public-read-write (S3 only), or 92 | authenticated-read 93 | -m METADATA [METADATA ...], --metadata METADATA [METADATA ...] 94 | One or more "Name: value" pairs specifying what 95 | metadata to set on each file transferred to S3/GS. 96 | Note: Be sure to end your args with "--" if this is 97 | the last argument specified so that SOURCE and 98 | DESTINATION can be read properly. e.g. boto-rsync -m 99 | "Content-Type: audio/mpeg" "Content-Disposition: 100 | attachment" -- ./path/ s3://bucket/ 101 | -r, --reduced Enable reduced redundancy on files copied to S3. 102 | -e, --encrypt-keys Enable server-side encryption on files copied to S3 103 | (only applies when S3 is the destination). 104 | -p, --preserve-acl Copy the ACL from the source key to the destination 105 | key (only applies in S3/S3 and GS/GS transfer modes). 106 | -w, --no-overwrite No files will be overwritten, if the file/key exists 107 | on the destination it will be kept. Note that this is 108 | not a sync--even if the file has been updated on the 109 | source it will not be updated on the destination. 110 | --glob Interpret the tail end of SOURCE as a filename pattern 111 | and filter transfers accordingly. Note: If globbing a 112 | local path, make sure that your CLI's automatic 113 | filename expansion is disabled (typically accomplished 114 | by enclosing SOURCE in quotes, e.g. "/path/*.zip"). 115 | --no-recurse Do not recurse into directories. 116 | --skip-dirkeys When syncing to S3 or GS, skip the creation of keys 117 | which represent "directories" (an empty key ending in 118 | "/" for S3 or "_$folder$" for GS). 119 | --ignore-empty Ignore empty (0-byte) keys/files/directories. This 120 | will skip the transferring of empty directories and 121 | keys/files whose size is 0. Warning: S3/GS often uses 122 | empty keys with special trailing characters to specify 123 | directories. 124 | --delete Delete extraneous files from destination dirs after 125 | the transfer has finished (e.g. rsync's --delete- 126 | after). 127 | -n, --dry-run No files will be transferred, but informational 128 | messages will be printed about what would have 129 | happened. 130 | -v, --verbose Print additional informational messages. 131 | -d LEVEL, --debug LEVEL 132 | Level 0 means no debug output (default), 1 means 133 | normal debug output from boto, and 2 means boto debug 134 | output plus request/response output from httplib. 135 | --version show program's version number and exit 136 | -h, --help show the help message and exit 137 | 138 | 139 | Advanced Configuration Options 140 | -------------------------------------------------------------------------------- 141 | 142 | boto supports the option to read access/secret keys from the environment or from 143 | a credentials file. Set the AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY or 144 | GS_ACCESS_KEY_ID/GS_SECRET_ACCESS_KEY environment variables or use boto's 145 | advanced configuration options to set up a credentials file. 146 | 147 | More information on boto's advanced configuration options can be found here: 148 | http://boto.cloudhackers.com/en/latest/boto_config_tut.html 149 | 150 | 151 | Known Issues and Limitations 152 | ================================================================================ 153 | 154 | * Differences between keys/files are assumed *only* by checking the size. 155 | * Due to the nature of how directories work in S3/GS, some non-standard folder 156 | structures might not transfer correctly. Empty directories may also be 157 | overlooked in some cases. When in doubt, use "-n" first. 158 | * Simple "globbing" (e.g. ``/path/*.zip``) is supported but may behave strangely 159 | on some systems. See the "--glob" option's help text for more info. 160 | * At this time, the script does not take advantage of boto's "multipart" 161 | transfer methods. (pull requests welcome!) 162 | 163 | 164 | Disclaimers and Warnings 165 | ================================================================================ 166 | 167 | This is Alpha software--always remember to use the "-n" option first! 168 | 169 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 170 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 171 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR BE 172 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 173 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 174 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 175 | -------------------------------------------------------------------------------- /bin/boto-rsync: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) 2012 Seth Davis http://www.curiasolutions.com/ 3 | # s3put is Copyright (c) 2006,2007,2008 Mitch Garnaat http://garnaat.org/ 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files (the 7 | # "Software"), to deal in the Software without restriction, including 8 | # without limitation the rights to use, copy, modify, merge, publish, dis- 9 | # tribute, sublicense, and/or sell copies of the Software, and to permit 10 | # persons to whom the Software is furnished to do so, subject to the fol- 11 | # lowing conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- 18 | # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 | # IN THE SOFTWARE. 23 | # 24 | import sys, os, time, datetime, argparse, threading, signal 25 | from fnmatch import fnmatch 26 | import boto 27 | 28 | __version__ = '0.8.1' 29 | 30 | version= """ 31 | %(prog)s v%(version)s 32 | Copyright (c) 2012 Seth Davis 33 | http://github.com/seedifferently/boto_rsync 34 | """ 35 | 36 | description = """ 37 | SOURCE and DESTINATION can either be a local path to a directory or specific 38 | file, a custom S3 or GS URL to a directory or specific key in the format of 39 | s3://bucketname/path/or/key, a S3 to S3 transfer using two S3 URLs, or a GS to 40 | GS transfer using two GS URLs. 41 | 42 | examples: 43 | boto-rsync [OPTIONS] /local/path/ s3://bucketname/remote/path/ 44 | or 45 | boto-rsync [OPTIONS] gs://bucketname/remote/path/or/key /local/path/ 46 | or 47 | boto-rsync [OPTIONS] s3://bucketname/ s3://another_bucket/ 48 | """ 49 | 50 | def usage(parser): 51 | """Prints the usage string and exits.""" 52 | parser.print_help() 53 | sys.exit(2) 54 | 55 | def get_full_path(path): 56 | """ 57 | Returns a full path with special markers such as "~" and "$USER" expanded. 58 | """ 59 | path = os.path.expanduser(path) 60 | path = os.path.expandvars(path) 61 | if path and path.endswith(os.sep): 62 | path = os.path.abspath(path) + os.sep 63 | else: 64 | path = os.path.abspath(path) 65 | return path 66 | 67 | def convert_bytes(n): 68 | """Converts byte sizes into human readable forms such as KB/MB/etc.""" 69 | for x in ['b','K','M','G','T']: 70 | if n < 1024.0: 71 | return "%.1f%s" % (n, x) 72 | n /= 1024.0 73 | return "%.1f%s" % (n, x) 74 | 75 | def spinner(event, every): 76 | """Animates an ASCII spinner.""" 77 | while True: 78 | if event.isSet(): 79 | sys.stdout.write('\b \b') 80 | sys.stdout.flush() 81 | break 82 | sys.stdout.write('\b\\') 83 | sys.stdout.flush() 84 | event.wait(every) 85 | sys.stdout.write('\b|') 86 | sys.stdout.flush() 87 | event.wait(every) 88 | sys.stdout.write('\b/') 89 | sys.stdout.flush() 90 | event.wait(every) 91 | sys.stdout.write('\b-') 92 | sys.stdout.flush() 93 | event.wait(every) 94 | 95 | def submit_cb(bytes_so_far, total_bytes): 96 | """The "progress" callback for file transfers.""" 97 | global speeds 98 | 99 | # Setup speed calculation 100 | if bytes_so_far < 1: 101 | speeds = [] 102 | speeds.append((bytes_so_far, time.time())) 103 | # Skip processing if our last process was less than 850ms ago 104 | elif bytes_so_far != total_bytes and (time.time() - speeds[-1][1]) < .85: 105 | return 106 | 107 | speeds.append((bytes_so_far, time.time())) 108 | 109 | # Try to get ~5 seconds of data info for speed calculation 110 | s1, t1 = speeds[-1] 111 | for speed in reversed(speeds): 112 | s2, t2 = speed 113 | 114 | if (t1 - t2) > 5: 115 | break 116 | 117 | # Calculate the speed 118 | if bytes_so_far == total_bytes: 119 | # Calculate the overall average speed 120 | seconds = int(round(speeds[-1][1] - speeds[0][1])) 121 | if seconds < 1: 122 | seconds = 1 123 | speed = 1.0 * total_bytes / seconds 124 | else: 125 | # Calculate the current average speed 126 | seconds = t1 - t2 127 | if seconds < 1: 128 | seconds = 1 129 | size = s1 - s2 130 | speed = 1.0 * size / seconds 131 | 132 | # Calculate the duration 133 | try: 134 | if bytes_so_far == total_bytes: 135 | # Calculate time taken 136 | duration = int(round(speeds[-1][1] - speeds[0][1])) 137 | else: 138 | # Calculate remaining time 139 | duration = int(round((total_bytes - bytes_so_far) / speed)) 140 | duration = str(datetime.timedelta(seconds=duration)) 141 | except ZeroDivisionError: 142 | duration = '0:00:00' 143 | 144 | # Calculate the progress 145 | try: 146 | progress = round((1.0 * bytes_so_far / total_bytes) * 100) 147 | except ZeroDivisionError: 148 | progress = 100 149 | 150 | sys.stdout.write(' %6s of %6s %3d%% %6s/s %7s \r' % ( 151 | convert_bytes(bytes_so_far), convert_bytes(total_bytes), progress, 152 | convert_bytes(speed), duration) 153 | ) 154 | sys.stdout.flush() 155 | 156 | def get_key_name(fullpath, prefix): 157 | """Returns a key compatible name for a file.""" 158 | key_name = fullpath[len(prefix):] 159 | l = key_name.split(os.sep) 160 | key_name = '/'.join(l) 161 | return key_name.lstrip('/') 162 | 163 | def signal_handler(signum, frame): 164 | """Handles signals.""" 165 | global ev 166 | 167 | if signum == signal.SIGINT: 168 | if ev: 169 | ev.set() 170 | 171 | sys.stdout.write('\n') 172 | sys.exit(0) 173 | 174 | def main(): 175 | global speeds, ev 176 | 177 | signal.signal(signal.SIGINT, signal_handler) 178 | ev = None 179 | speeds = [] 180 | cb = submit_cb 181 | num_cb = 10 182 | rename = False 183 | copy_file = True 184 | 185 | parser = argparse.ArgumentParser( 186 | formatter_class=argparse.RawDescriptionHelpFormatter, 187 | usage='%(prog)s [OPTIONS] SOURCE DESTINATION', 188 | description=description, 189 | add_help=False 190 | ) 191 | parser.add_argument( 192 | '-a', '--access-key', metavar='KEY', dest='cloud_access_key_id', 193 | help='Your Access Key ID. If not supplied, boto will look for an ' + \ 194 | 'environment variable or a credentials file (see README.rst ' + \ 195 | 'for more info).' 196 | ) 197 | parser.add_argument( 198 | '-s', '--secret-key', metavar='SECRET', dest='cloud_secret_access_key', 199 | help='Your Secret Key. If not supplied, boto will look for an ' + \ 200 | 'environment variable or a credentials file.' 201 | ) 202 | parser.add_argument( 203 | '--anon', action='store_true', 204 | help='Connect without credentials (S3 only). Useful if working ' + \ 205 | 'with others\' buckets that have a global read/write ACL.' 206 | ) 207 | parser.add_argument( 208 | '--endpoint', metavar='HOST', default='s3.amazonaws.com', 209 | help='Specify a specific S3 endpoint to connect to via boto\'s ' + \ 210 | '"host" connection argument (S3 only).' 211 | ) 212 | parser.add_argument( 213 | '-g', '--grant', 214 | help='A canned ACL policy that will be granted on each file ' + \ 215 | 'transferred to S3/GS. The value provided must be one of the ' + \ 216 | '"canned" ACL policies supported by S3/GS: private, ' + \ 217 | 'public-read, public-read-write (S3 only), or authenticated-read' 218 | ) 219 | parser.add_argument( 220 | '-m', '--metadata', nargs='+', default=dict(), 221 | help='One or more "Name: value" pairs specifying what metadata to ' + \ 222 | 'set on each file transferred to S3/GS. Note: Be sure to end ' + \ 223 | 'your args with "--" if this is the last argument specified ' + \ 224 | 'so that SOURCE and DESTINATION can be read properly. e.g. ' + \ 225 | '%(prog)s -m "Content-Type: audio/mpeg" "Content-Disposition: ' + \ 226 | 'attachment" -- ./path/ s3://bucket/' 227 | ) 228 | parser.add_argument( 229 | '-r', '--reduced', action='store_true', 230 | help='Enable reduced redundancy on files copied to S3.' 231 | ) 232 | parser.add_argument( 233 | '-e', '--encrypt-keys', dest='encrypt', action='store_true', 234 | help='Enable server-side encryption on files copied to S3 (only ' + \ 235 | 'applies when S3 is the destination).' 236 | ) 237 | parser.add_argument( 238 | '-p', '--preserve-acl', dest='preserve', action='store_true', 239 | help='Copy the ACL from the source key to the destination key ' + \ 240 | '(only applies in S3/S3 and GS/GS transfer modes).' 241 | ) 242 | parser.add_argument( 243 | '-w', '--no-overwrite', action='store_true', 244 | help='No files will be overwritten, if the file/key exists on the ' + \ 245 | 'destination it will be kept. Note that this is not a sync--' + \ 246 | 'even if the file has been updated on the source it will not ' + \ 247 | 'be updated on the destination.' 248 | ) 249 | parser.add_argument( 250 | '--glob', action='store_true', 251 | help='Interpret the tail end of SOURCE as a filename pattern and ' + \ 252 | 'filter transfers accordingly. Note: If globbing a local ' + \ 253 | 'path, make sure that your CLI\'s automatic filename ' + \ 254 | 'expansion is disabled (typically accomplished by enclosing ' + \ 255 | 'SOURCE in quotes, e.g. "/path/*.zip").' 256 | ) 257 | parser.add_argument( 258 | '--no-recurse', action='store_true', 259 | help='Do not recurse into directories.' 260 | ) 261 | parser.add_argument( 262 | '--skip-dirkeys', action='store_true', 263 | help='When syncing to S3 or GS, skip the creation of keys which ' + \ 264 | 'represent "directories" (an empty key ending in "/" for S3 ' + \ 265 | 'or "_$folder$" for GS).' 266 | ) 267 | parser.add_argument( 268 | '--ignore-empty', action='store_true', 269 | help='Ignore empty (0-byte) keys/files/directories. This will skip ' + \ 270 | 'the transferring of empty directories and keys/files whose ' + \ 271 | 'size is 0. Warning: S3/GS often uses empty keys with special ' + \ 272 | 'trailing characters to specify directories.' 273 | ) 274 | parser.add_argument( 275 | '--delete', action='store_true', 276 | help='Delete extraneous files from destination dirs after the ' + \ 277 | 'transfer has finished (e.g. rsync\'s --delete-after).' 278 | ) 279 | parser.add_argument( 280 | '-n', '--dry-run', action='store_true', dest='no_op', 281 | help='No files will be transferred, but informational messages ' + \ 282 | 'will be printed about what would have happened.' 283 | ) 284 | parser.add_argument( 285 | '-v', '--verbose', action='store_false', dest='quiet', 286 | help='Print additional informational messages.' 287 | ) 288 | parser.add_argument( 289 | '-d', '--debug', metavar='LEVEL', choices=[0, 1, 2], default=0, 290 | type=int, 291 | help='Level 0 means no debug output (default), 1 means normal ' + \ 292 | 'debug output from boto, and 2 means boto debug output plus ' + \ 293 | 'request/response output from httplib.' 294 | ) 295 | parser.add_argument( 296 | '--version', action='version', 297 | version=version % dict(prog=parser.prog, version=__version__) 298 | ) 299 | parser.add_argument( 300 | '-h', '--help', action='help', 301 | help='show this help message and exit' 302 | ) 303 | parser.add_argument('SOURCE', help=argparse.SUPPRESS) 304 | parser.add_argument('DESTINATION', help=argparse.SUPPRESS) 305 | 306 | try: 307 | args = parser.parse_args() 308 | except argparse.ArgumentTypeError: 309 | pass 310 | 311 | try: 312 | cloud_access_key_id = args.cloud_access_key_id 313 | cloud_secret_access_key = args.cloud_secret_access_key 314 | anon = args.anon 315 | endpoint = args.endpoint 316 | grant = args.grant 317 | metadata = args.metadata 318 | if not isinstance(metadata, dict): 319 | metadata = dict([meta.split(': ', 1) for meta in metadata]) 320 | reduced = args.reduced 321 | encrypt = args.encrypt 322 | preserve = args.preserve 323 | no_overwrite = args.no_overwrite 324 | glob = args.glob 325 | no_recurse = args.no_recurse or glob 326 | skip_dirkeys = args.skip_dirkeys 327 | ignore_empty = args.ignore_empty 328 | delete = args.delete 329 | no_op = args.no_op 330 | quiet = args.quiet 331 | debug = args.debug 332 | source = args.SOURCE 333 | dest = args.DESTINATION 334 | except: 335 | sys.stdout.write('\nERROR: Improperly formatted arguments.\n\n') 336 | usage(parser) 337 | 338 | if (source.startswith('s3://') and dest.startswith('gs://') or 339 | source.startswith('gs://') and dest.startswith('s3://')): 340 | sys.stdout.write('ERROR: You cannot directly sync between S3 and ' + 341 | 'Google Storage.\n\n') 342 | usage(parser) 343 | elif not source.startswith('s3://') and dest.startswith('s3://'): 344 | # S3 upload sync 345 | cloud_service = 's3' 346 | path = get_full_path(source) 347 | cloud_bucket = dest[5:].split('/')[0] 348 | cloud_path = dest[(len(cloud_bucket) + 5):] 349 | xfer_type = 'upload' 350 | elif source.startswith('s3://') and not dest.startswith('s3://'): 351 | # S3 download sync 352 | cloud_service = 's3' 353 | cloud_bucket = source[5:].split('/')[0] 354 | cloud_path = source[(len(cloud_bucket) + 5):] 355 | path = get_full_path(dest) 356 | xfer_type = 'download' 357 | elif not source.startswith('gs://') and dest.startswith('gs://'): 358 | # GS upload sync 359 | cloud_service = 'gs' 360 | path = get_full_path(source) 361 | cloud_bucket = dest[5:].split('/')[0] 362 | cloud_path = dest[(len(cloud_bucket) + 5):] 363 | xfer_type = 'upload' 364 | elif source.startswith('gs://') and not dest.startswith('gs://'): 365 | # GS download sync 366 | cloud_service = 'gs' 367 | cloud_bucket = source[5:].split('/')[0] 368 | cloud_path = source[(len(cloud_bucket) + 5):] 369 | path = get_full_path(dest) 370 | xfer_type = 'download' 371 | elif source.startswith('s3://') and dest.startswith('s3://'): 372 | # S3 to S3 sync 373 | cloud_service = 's3' 374 | cloud_bucket = source[5:].split('/')[0] 375 | cloud_path = source[(len(cloud_bucket) + 5):] 376 | cloud_dest_bucket = dest[5:].split('/')[0] 377 | cloud_dest_path = dest[(len(cloud_dest_bucket) + 5):] 378 | xfer_type = 'sync' 379 | elif source.startswith('gs://') and dest.startswith('gs://'): 380 | # GS to GS sync 381 | cloud_service = 'gs' 382 | cloud_bucket = source[5:].split('/')[0] 383 | cloud_path = source[(len(cloud_bucket) + 5):] 384 | cloud_dest_bucket = dest[5:].split('/')[0] 385 | cloud_dest_path = dest[(len(cloud_dest_bucket) + 5):] 386 | xfer_type = 'sync' 387 | else: 388 | usage(parser) 389 | 390 | # Cloud paths shouldn't have a leading slash 391 | cloud_path = cloud_path.lstrip('/') 392 | 393 | if xfer_type in ['download', 'upload']: 394 | if not os.path.isdir(path) and not os.path.split(path)[0]: 395 | sys.stdout.write( 396 | '\nERROR: %s is not a valid path (does it exist?)\n\n' % path 397 | ) 398 | usage(parser) 399 | elif not cloud_bucket or len(cloud_bucket) < 3: 400 | sys.stdout.write('\nERROR: Bucket name is invalid\n\n') 401 | usage(parser) 402 | elif xfer_type in ['sync']: 403 | if not cloud_bucket or len(cloud_bucket) < 3 and \ 404 | not cloud_dest_bucket or len(cloud_dest_bucket) < 3: 405 | sys.stdout.write('\nERROR: Bucket name is invalid\n\n') 406 | usage(parser) 407 | 408 | # Cloud paths shouldn't have a leading slash 409 | cloud_dest_path = cloud_dest_path.lstrip('/') 410 | 411 | 412 | # Connect to Cloud 413 | if cloud_service == 'gs': 414 | c = boto.connect_gs(gs_access_key_id=cloud_access_key_id, 415 | gs_secret_access_key=cloud_secret_access_key) 416 | else: 417 | if anon: 418 | c = boto.connect_s3(host=endpoint, anon=True) 419 | else: 420 | c = boto.connect_s3(aws_access_key_id=cloud_access_key_id, 421 | aws_secret_access_key=cloud_secret_access_key, 422 | host=endpoint) 423 | c.debug = debug 424 | b = c.get_bucket(cloud_bucket) 425 | if xfer_type in ['sync']: 426 | b2 = c.get_bucket(cloud_dest_bucket) 427 | 428 | 429 | if xfer_type == 'upload': 430 | # Perform cloud "upload" 431 | 432 | # Check for globbing 433 | if glob: 434 | glob = path.split(os.sep)[-1] 435 | if glob: 436 | path = path[:-len(glob)] 437 | 438 | if os.path.isdir(path) or glob: 439 | # Possible multi file upload 440 | sys.stdout.write('Scanning for files to transfer... ') 441 | sys.stdout.flush() 442 | 443 | if cloud_path and not cloud_path.endswith('/'): 444 | cloud_path += '/' 445 | 446 | # Start "spinner" thread 447 | ev = threading.Event() 448 | t1 = threading.Thread(target=spinner, args=(ev, 0.25)) 449 | t1.start() 450 | 451 | try: 452 | keys = {} 453 | for key in b.list(prefix=cloud_path): 454 | if no_recurse and '/' in key.name[len(cloud_path):]: 455 | continue 456 | 457 | if glob and not fnmatch(key.name.split('/')[-1], glob): 458 | continue 459 | 460 | keys[key.name] = key.size 461 | except Exception, e: 462 | raise e 463 | finally: 464 | # End "spinner" thread 465 | ev.set() 466 | t1.join() 467 | 468 | # Clean stdout 469 | sys.stdout.write('\n') 470 | 471 | # "Walk" the directory and upload files 472 | for root, dirs, files in os.walk(path): 473 | if no_recurse: 474 | if root != path: 475 | continue 476 | 477 | # Create "subdirectories" 478 | if root != path and not skip_dirkeys: 479 | create_dirkey = True 480 | 481 | if cloud_service == 'gs': 482 | key_name = cloud_path + get_key_name(root, path) + \ 483 | '_$folder$' 484 | else: 485 | key_name = cloud_path + get_key_name(root, path) + '/' 486 | 487 | if ignore_empty and not files: 488 | if not quiet: 489 | sys.stdout.write( 490 | 'Skipping %s (empty directory)\n' % 491 | key_name.replace('_$folder$', '/') 492 | ) 493 | create_dirkey = False 494 | elif key_name in keys: 495 | if no_overwrite: 496 | if not quiet: 497 | sys.stdout.write( 498 | 'Skipping %s (not overwriting)\n' % 499 | key_name.replace('_$folder$', '/') 500 | ) 501 | create_dirkey = False 502 | elif key_name.endswith('/') or \ 503 | key_name.endswith('_$folder$'): 504 | if not quiet: 505 | sys.stdout.write( 506 | 'Skipping %s (size matches)\n' % 507 | key_name.replace('_$folder$', '/') 508 | ) 509 | create_dirkey = False 510 | 511 | if create_dirkey: 512 | sys.stdout.write( 513 | '%s\n' % 514 | os.path.join(root[len(path):], '').lstrip(os.sep) 515 | ) 516 | if not no_op: 517 | # Setup callback 518 | num_cb = 1 519 | 520 | # Send the directory 521 | k = b.new_key(key_name) 522 | if cloud_service == 'gs': 523 | k.set_contents_from_string( 524 | '', cb=cb, num_cb=num_cb, policy=grant 525 | ) 526 | else: 527 | k.set_contents_from_string( 528 | '', cb=cb, num_cb=num_cb, policy=grant, 529 | reduced_redundancy=reduced, 530 | encrypt_key=encrypt 531 | ) 532 | keys[key_name] = 0 533 | 534 | # Clean stdout 535 | sys.stdout.write('\n') 536 | 537 | for file in files: 538 | if glob and not fnmatch(file, glob): 539 | continue 540 | 541 | fullpath = os.path.join(root, file) 542 | key_name = cloud_path + get_key_name(fullpath, path) 543 | file_size = os.path.getsize(fullpath) 544 | 545 | if file_size == 0: 546 | if ignore_empty: 547 | if not quiet: 548 | sys.stdout.write( 549 | 'Skipping %s (empty file)\n' % 550 | fullpath[len(path):].lstrip(os.sep) 551 | ) 552 | continue 553 | 554 | if key_name in keys: 555 | if no_overwrite: 556 | if not quiet: 557 | sys.stdout.write( 558 | 'Skipping %s (not overwriting)\n' % 559 | fullpath[len(path):].lstrip(os.sep) 560 | ) 561 | continue 562 | elif keys[key_name] == file_size: 563 | if not quiet: 564 | sys.stdout.write( 565 | 'Skipping %s (size matches)\n' % 566 | fullpath[len(path):].lstrip(os.sep) 567 | ) 568 | continue 569 | 570 | sys.stdout.write( 571 | '%s\n' % 572 | fullpath[len(path):].lstrip(os.sep) 573 | ) 574 | 575 | if not no_op: 576 | # Setup callback 577 | num_cb = int(file_size ** .25) 578 | 579 | # Send the file 580 | k = b.new_key(key_name) 581 | k.update_metadata(metadata) 582 | if cloud_service == 'gs': 583 | k.set_contents_from_filename( 584 | fullpath, cb=cb, num_cb=num_cb, policy=grant 585 | ) 586 | else: 587 | k.set_contents_from_filename( 588 | fullpath, cb=cb, num_cb=num_cb, 589 | policy=grant, reduced_redundancy=reduced, 590 | encrypt_key=encrypt 591 | ) 592 | keys[key_name] = file_size 593 | 594 | # Clean stdout 595 | sys.stdout.write('\n') 596 | 597 | # If specified, perform deletes 598 | if delete: 599 | if cloud_path and cloud_path in keys: 600 | del(keys[cloud_path]) 601 | 602 | for root, dirs, files in os.walk(path): 603 | if no_recurse: 604 | if root != path: 605 | continue 606 | 607 | for file in files: 608 | fullpath = os.path.join(root, file) 609 | key_name = cloud_path + get_key_name(fullpath, path) 610 | if key_name in keys: 611 | del(keys[key_name]) 612 | 613 | if root != path: 614 | if cloud_service == 'gs': 615 | key_name = cloud_path + get_key_name(root, path) + \ 616 | '_$folder$' 617 | else: 618 | key_name = cloud_path + get_key_name(root, path) + \ 619 | '/' 620 | 621 | if key_name in keys: 622 | del(keys[key_name]) 623 | 624 | for key_name, key_size in keys.iteritems(): 625 | sys.stdout.write( 626 | 'deleting %s\n' % 627 | key_name[len(cloud_path):].replace('_$folder$', '/') 628 | ) 629 | if not no_op: 630 | # Delete the key 631 | b.delete_key(key_name) 632 | 633 | elif os.path.isfile(path): 634 | # Single file upload 635 | if cloud_path and not cloud_path.endswith('/'): 636 | key_name = cloud_path 637 | else: 638 | key_name = cloud_path + os.path.split(path)[1] 639 | filename = os.path.split(path)[1] 640 | file_size = os.path.getsize(path) 641 | 642 | copy_file = True 643 | key = b.get_key(key_name) 644 | 645 | if file_size == 0: 646 | if ignore_empty: 647 | if not quiet: 648 | sys.stdout.write( 649 | 'Skipping %s -> %s (empty file)\n' % 650 | filename, key_name.split('/')[-1] 651 | ) 652 | copy_file = False 653 | 654 | if key: 655 | if no_overwrite: 656 | copy_file = False 657 | if not quiet: 658 | if filename != key_name.split('/')[-1]: 659 | sys.stdout.write( 660 | 'Skipping %s -> %s (not overwriting)\n' % 661 | filename, key_name.split('/')[-1] 662 | ) 663 | else: 664 | sys.stdout.write('Skipping %s (not overwriting)\n' % 665 | filename) 666 | elif key.size == file_size: 667 | copy_file = False 668 | if not quiet: 669 | if filename != key_name.split('/')[-1]: 670 | sys.stdout.write( 671 | 'Skipping %s -> %s (size matches)\n' % 672 | filename, key_name.split('/')[-1] 673 | ) 674 | else: 675 | sys.stdout.write('Skipping %s (size matches)\n' % 676 | filename) 677 | 678 | if copy_file: 679 | if filename != key_name.split('/')[-1]: 680 | sys.stdout.write('%s -> %s\n' % 681 | (filename, key_name.split('/')[-1])) 682 | else: 683 | sys.stdout.write('%s\n' % filename) 684 | 685 | if not no_op: 686 | # Setup callback 687 | num_cb = int(file_size ** .25) 688 | 689 | # Send the file 690 | k = b.new_key(key_name) 691 | k.update_metadata(metadata) 692 | if cloud_service == 'gs': 693 | k.set_contents_from_filename( 694 | path, cb=cb, num_cb=num_cb, policy=grant 695 | ) 696 | else: 697 | k.set_contents_from_filename( 698 | path, cb=cb, num_cb=num_cb, policy=grant, 699 | reduced_redundancy=reduced, encrypt_key=encrypt 700 | ) 701 | 702 | # Clean stdout 703 | sys.stdout.write('\n') 704 | 705 | elif xfer_type == 'download': 706 | # Perform cloud "download" 707 | 708 | cloud_path_key = None 709 | 710 | if cloud_path: 711 | # Check for globbing 712 | if glob: 713 | glob = cloud_path.split('/')[-1] 714 | if glob: 715 | cloud_path = cloud_path[:-len(glob)] 716 | 717 | if cloud_path: 718 | cloud_path_key = b.get_key(cloud_path) 719 | else: 720 | glob = False 721 | 722 | if cloud_path_key and not cloud_path_key.name.endswith('/'): 723 | # Single file download 724 | key = cloud_path_key 725 | keypath = key.name.split('/')[-1] 726 | if not os.path.isdir(path) and not path.endswith(os.sep): 727 | rename = True 728 | fullpath = path 729 | else: 730 | fullpath = os.path.join(path, keypath) 731 | 732 | if key.size == 0: 733 | if ignore_empty: 734 | if not quiet: 735 | if rename: 736 | sys.stdout.write( 737 | 'Skipping %s -> %s (empty key)\n' % 738 | keypath, fullpath.split(os.sep)[-1] 739 | ) 740 | else: 741 | sys.stdout.write( 742 | 'Skipping %s (empty key)\n' % 743 | fullpath.split(os.sep)[-1] 744 | ) 745 | copy_file = False 746 | 747 | if not os.path.isdir(os.path.split(fullpath)[0]): 748 | if not quiet: 749 | sys.stdout.write( 750 | 'Creating new directory: %s\n' % 751 | os.path.split(fullpath)[0] 752 | ) 753 | if not no_op: 754 | os.makedirs(os.path.split(fullpath)[0]) 755 | elif os.path.exists(fullpath): 756 | if no_overwrite: 757 | if not quiet: 758 | if rename: 759 | sys.stdout.write( 760 | 'Skipping %s -> %s (not overwriting)\n' % 761 | keypath, fullpath.split(os.sep)[-1] 762 | ) 763 | else: 764 | sys.stdout.write( 765 | 'Skipping %s (not overwriting)\n' % 766 | fullpath.split(os.sep)[-1] 767 | ) 768 | copy_file = False 769 | elif key.size == os.path.getsize(fullpath): 770 | if not quiet: 771 | if rename: 772 | sys.stdout.write( 773 | 'Skipping %s -> %s (size matches)\n' % 774 | keypath.replace('/', os.sep), 775 | fullpath.split(os.sep)[-1] 776 | ) 777 | else: 778 | sys.stdout.write( 779 | 'Skipping %s (size matches)\n' % 780 | fullpath.split(os.sep)[-1] 781 | ) 782 | copy_file = False 783 | 784 | if copy_file: 785 | if rename: 786 | sys.stdout.write( 787 | '%s -> %s\n' % (keypath, fullpath.split(os.sep)[-1]) 788 | ) 789 | else: 790 | sys.stdout.write('%s\n' % keypath) 791 | 792 | if not no_op: 793 | # Setup callback 794 | num_cb = int(key.size ** .25) 795 | 796 | # Get the file 797 | key.get_contents_to_filename(fullpath, cb=cb, num_cb=num_cb) 798 | 799 | # Clean stdout 800 | sys.stdout.write('\n') 801 | 802 | else: 803 | # Possible multi file download 804 | if not cloud_path_key and cloud_path and \ 805 | not cloud_path.endswith('/'): 806 | cloud_path += '/' 807 | 808 | keys = [] 809 | 810 | sys.stdout.write('Scanning for keys to transfer...\n') 811 | 812 | for key in b.list(prefix=cloud_path): 813 | # Skip the key if it is the cloud path 814 | if not key.name[len(cloud_path):] or \ 815 | key.name[len(cloud_path):] == '$folder$': 816 | continue 817 | 818 | if no_recurse and '/' in key.name[len(cloud_path):]: 819 | continue 820 | 821 | if glob and not fnmatch(key.name.split('/')[-1], glob): 822 | continue 823 | 824 | keypath = key.name[len(cloud_path):] 825 | if cloud_service == 'gs': 826 | fullpath = os.path.join( 827 | path, 828 | keypath.replace('_$folder$', os.sep) 829 | ) 830 | else: 831 | fullpath = os.path.join(path, keypath.replace('/', os.sep)) 832 | 833 | keys.append(fullpath) 834 | 835 | if key.size == 0 and ignore_empty: 836 | if not quiet: 837 | sys.stdout.write( 838 | 'Skipping %s (empty key)\n' % 839 | fullpath[len(os.path.join(path, '')):] 840 | ) 841 | continue 842 | 843 | if not os.path.isdir(os.path.split(fullpath)[0]): 844 | if not quiet: 845 | sys.stdout.write( 846 | 'Creating new directory: %s\n' % 847 | os.path.split(fullpath)[0] 848 | ) 849 | if not no_op: 850 | os.makedirs(os.path.split(fullpath)[0]) 851 | elif os.path.exists(fullpath): 852 | if no_overwrite: 853 | if not quiet: 854 | sys.stdout.write( 855 | 'Skipping %s (not overwriting)\n' % 856 | fullpath[len(os.path.join(path, '')):] 857 | ) 858 | continue 859 | elif key.size == os.path.getsize(fullpath) or \ 860 | key.name.endswith('/') or \ 861 | key.name.endswith('_$folder$'): 862 | if not quiet: 863 | sys.stdout.write( 864 | 'Skipping %s (size matches)\n' % 865 | fullpath[len(os.path.join(path, '')):] 866 | ) 867 | continue 868 | 869 | if cloud_service == 'gs': 870 | sys.stdout.write('%s\n' % 871 | keypath.replace('_$folder$', os.sep)) 872 | else: 873 | sys.stdout.write('%s\n' % keypath.replace('/', os.sep)) 874 | 875 | if not no_op: 876 | if key.name.endswith('/') or key.name.endswith('_$folder$'): 877 | # Looks like a directory, so just print the status 878 | submit_cb(0, 0) 879 | else: 880 | # Setup callback 881 | num_cb = int(key.size ** .25) 882 | 883 | # Get the file 884 | key.get_contents_to_filename(fullpath, cb=cb, 885 | num_cb=num_cb) 886 | 887 | # Clean stdout 888 | sys.stdout.write('\n') 889 | 890 | # If specified, perform deletes 891 | if delete: 892 | for root, dirs, files in os.walk(path): 893 | if no_recurse: 894 | if root != path: 895 | continue 896 | 897 | if files: 898 | for file in files: 899 | if glob and not fnmatch(file, glob): 900 | continue 901 | 902 | filepath = os.path.join(root, file) 903 | if filepath not in keys: 904 | sys.stdout.write( 905 | 'deleting %s\n' % 906 | filepath[len(os.path.join(path, '')):] 907 | ) 908 | if not no_op: 909 | # Delete the file 910 | os.remove(filepath) 911 | elif root != path: 912 | dirpath = os.path.join(root, '') 913 | if dirpath not in keys: 914 | sys.stdout.write( 915 | 'deleting %s\n' % 916 | dirpath[len(os.path.join(path, '')):] 917 | ) 918 | if not no_op: 919 | # Remove the directory 920 | os.rmdir(dirpath) 921 | else: 922 | # Perform cloud to cloud "sync" 923 | 924 | cloud_path_key = None 925 | 926 | if cloud_path: 927 | # Check for globbing 928 | if glob: 929 | glob = cloud_path.split('/')[-1] 930 | if glob: 931 | cloud_path = cloud_path[:-len(glob)] 932 | 933 | if cloud_path: 934 | cloud_path_key = b.get_key(cloud_path) 935 | else: 936 | glob = False 937 | 938 | if cloud_path_key and not cloud_path_key.name.endswith('/'): 939 | # Single file sync 940 | key = cloud_path_key 941 | keypath = key.name.split('/')[-1] 942 | if cloud_dest_path and not cloud_dest_path.endswith('/'): 943 | rename = True 944 | fullpath = cloud_dest_path 945 | else: 946 | fullpath = cloud_dest_path + keypath 947 | fullpath = fullpath.lstrip('/') 948 | 949 | dest_key = b2.get_key(fullpath) 950 | 951 | if key.size == 0: 952 | if ignore_empty: 953 | if not quiet: 954 | if rename: 955 | sys.stdout.write( 956 | 'Skipping %s -> %s (empty key)\n' % 957 | keypath.split('/')[-1], fullpath.split('/')[-1] 958 | ) 959 | else: 960 | sys.stdout.write( 961 | 'Skipping %s (empty key)\n' % fullpath 962 | ) 963 | copy_file = False 964 | 965 | if dest_key: 966 | # TODO: Check for differing ACL 967 | if no_overwrite: 968 | if not quiet: 969 | if rename: 970 | sys.stdout.write( 971 | 'Skipping %s -> %s (not overwriting)\n' % 972 | keypath.split('/')[-1], fullpath.split('/')[-1] 973 | ) 974 | else: 975 | sys.stdout.write( 976 | 'Skipping %s (not overwriting)\n' % fullpath 977 | ) 978 | copy_file = False 979 | elif key.size == dest_key.size: 980 | if not quiet: 981 | if rename: 982 | sys.stdout.write( 983 | 'Skipping %s -> %s (size matches)\n' % 984 | keypath.split('/')[-1], fullpath.split('/')[-1] 985 | ) 986 | else: 987 | sys.stdout.write( 988 | 'Skipping %s (size matches)\n' % fullpath 989 | ) 990 | copy_file = False 991 | 992 | if copy_file: 993 | if rename: 994 | sys.stdout.write('%s -> %s... ' % ( 995 | keypath.split('/')[-1], fullpath.split('/')[-1]) 996 | ) 997 | else: 998 | sys.stdout.write('%s... ' % keypath) 999 | sys.stdout.flush() 1000 | if not no_op: 1001 | speeds.append((0, time.time())) 1002 | 1003 | # Start "spinner" thread 1004 | ev = threading.Event() 1005 | t1 = threading.Thread(target=spinner, args=(ev, 0.25)) 1006 | t1.start() 1007 | 1008 | try: 1009 | # Transfer the key 1010 | key.copy(cloud_dest_bucket, fullpath, 1011 | metadata=metadata, reduced_redundancy=reduced, 1012 | preserve_acl=preserve, encrypt_key=encrypt) 1013 | except Exception, e: 1014 | raise e 1015 | finally: 1016 | # End "spinner" thread 1017 | ev.set() 1018 | t1.join() 1019 | 1020 | if rename: 1021 | sys.stdout.write('\r%s -> %s \n' % ( 1022 | keypath.split('/')[-1], fullpath.split('/')[-1] 1023 | )) 1024 | else: 1025 | sys.stdout.write('\r%s \n' % keypath) 1026 | sys.stdout.flush() 1027 | submit_cb(key.size, key.size) 1028 | else: 1029 | if rename: 1030 | sys.stdout.write('\r%s -> %s ' % ( 1031 | keypath.split('/')[-1], fullpath.split('/')[-1]) 1032 | ) 1033 | else: 1034 | sys.stdout.write('\r%s ' % keypath) 1035 | sys.stdout.flush() 1036 | 1037 | # Clean stdout 1038 | sys.stdout.write('\n') 1039 | 1040 | else: 1041 | # Possible multi file sync 1042 | if not cloud_path_key and cloud_path and \ 1043 | not cloud_path.endswith('/'): 1044 | cloud_path += '/' 1045 | if cloud_dest_path and not cloud_dest_path.endswith('/'): 1046 | cloud_dest_path += '/' 1047 | 1048 | keys = [] 1049 | 1050 | sys.stdout.write('Scanning for keys to transfer...\n') 1051 | 1052 | for key in b.list(prefix=cloud_path): 1053 | if no_recurse and '/' in key.name[len(cloud_path):]: 1054 | continue 1055 | 1056 | if glob and not fnmatch(key.name.split('/')[-1], glob): 1057 | continue 1058 | 1059 | if key.name == cloud_path: 1060 | keypath = key.name.split('/')[-2] + '/' 1061 | else: 1062 | keypath = key.name[len(cloud_path):] 1063 | fullpath = cloud_dest_path + keypath 1064 | fullpath = fullpath.lstrip('/') 1065 | 1066 | keys.append(fullpath) 1067 | dest_key = b2.get_key(fullpath) 1068 | 1069 | if key.size == 0: 1070 | if ignore_empty: 1071 | if not quiet: 1072 | sys.stdout.write( 1073 | 'Skipping %s (empty key)\n' % 1074 | fullpath.replace('_$folder$', '/') 1075 | ) 1076 | continue 1077 | 1078 | if dest_key: 1079 | # TODO: Check for differing ACL 1080 | if no_overwrite: 1081 | if not quiet: 1082 | sys.stdout.write( 1083 | 'Skipping %s (not overwriting)\n' % 1084 | fullpath.replace('_$folder$', '/') 1085 | ) 1086 | continue 1087 | elif key.size == dest_key.size: 1088 | if not quiet: 1089 | sys.stdout.write( 1090 | 'Skipping %s (size matches)\n' % 1091 | fullpath.replace('_$folder$', '/') 1092 | ) 1093 | continue 1094 | 1095 | sys.stdout.write('%s... ' % keypath.replace('_$folder$', '/')) 1096 | sys.stdout.flush() 1097 | if not no_op: 1098 | speeds.append((0, time.time())) 1099 | 1100 | # Start "spinner" thread 1101 | ev = threading.Event() 1102 | t1 = threading.Thread(target=spinner, args=(ev, 0.25)) 1103 | t1.start() 1104 | 1105 | try: 1106 | # Transfer the key 1107 | key.copy(cloud_dest_bucket, fullpath, 1108 | metadata=metadata, reduced_redundancy=reduced, 1109 | preserve_acl=preserve, encrypt_key=encrypt) 1110 | except Exception, e: 1111 | raise e 1112 | finally: 1113 | # End "spinner" thread 1114 | ev.set() 1115 | t1.join() 1116 | 1117 | sys.stdout.write('\r%s \n' % \ 1118 | keypath.replace('_$folder$', '/')) 1119 | sys.stdout.flush() 1120 | submit_cb(key.size, key.size) 1121 | else: 1122 | sys.stdout.write('\r%s ' % \ 1123 | keypath.replace('_$folder$', '/')) 1124 | sys.stdout.flush() 1125 | 1126 | # Clean stdout 1127 | sys.stdout.write('\n') 1128 | 1129 | # If specified, perform deletes 1130 | if delete: 1131 | for key in b2.list(prefix=cloud_dest_path): 1132 | if no_recurse and '/' in key.name[len(cloud_dest_path):]: 1133 | continue 1134 | 1135 | if glob and not fnmatch(key.name.split('/')[-1], glob): 1136 | continue 1137 | 1138 | keypath = key.name[len(cloud_dest_path):] 1139 | 1140 | if key.name not in keys: 1141 | sys.stdout.write( 1142 | 'deleting %s\n' % keypath.replace('_$folder$', '/') 1143 | ) 1144 | if not no_op: 1145 | # Delete the key 1146 | key.delete() 1147 | 1148 | if __name__ == "__main__": 1149 | main() 1150 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | try: 3 | from distutils.core import setup 4 | except ImportError: 5 | from setuptools import setup 6 | 7 | install_requires = ['boto>=2.2.1'] 8 | 9 | if sys.version_info[0] >= 3 or sys.version_info[:2] < (2, 5): 10 | raise RuntimeError('Requires Python 2.5 or above and does not support ' 11 | 'Python 3') 12 | elif sys.version_info[:2] < (2, 7): 13 | install_requires.append('argparse') 14 | 15 | here = os.path.abspath(os.path.dirname(__file__)) 16 | try: 17 | README = open(os.path.join(here, 'README.rst')).read() 18 | CHANGES = open(os.path.join(here, 'CHANGES.rst')).read() 19 | except IOError: 20 | README = "boto-rsync is a rough adaptation of boto's s3put script " + \ 21 | "which has been reengineered to more closely mimic rsync. Its " + \ 22 | "goal is to provide a familiar rsync-like wrapper for boto's " + \ 23 | "S3 and Google Storage interfaces." 24 | CHANGES = '' 25 | 26 | setup( 27 | name='boto_rsync', 28 | version='0.8.1', 29 | author='Seth Davis', 30 | author_email='seth@curiasolutions.com', 31 | description="An rsync-like wrapper for boto's S3 and Google Storage " + \ 32 | "interfaces.", 33 | long_description=README + '\n\n' + CHANGES, 34 | url='http://github.com/seedifferently/boto_rsync', 35 | keywords='boto amazon aws s3 gs google storage cloud sync rsync', 36 | packages=[], 37 | install_requires=install_requires, 38 | scripts=['bin/boto-rsync'], 39 | license = "MIT", 40 | platforms = "Posix; MacOS X; Windows", 41 | classifiers=[ 42 | 'Development Status :: 3 - Alpha', 43 | 'Environment :: Console', 44 | 'Intended Audience :: Developers', 45 | 'Intended Audience :: System Administrators', 46 | 'License :: OSI Approved :: MIT License', 47 | 'Programming Language :: Python', 48 | 'Programming Language :: Python :: 2', 49 | 'Programming Language :: Python :: 2.5', 50 | 'Programming Language :: Python :: 2.6', 51 | 'Programming Language :: Python :: 2.7', 52 | 'Topic :: Utilities', 53 | 'Topic :: System :: Archiving', 54 | 'Topic :: System :: Archiving :: Backup', 55 | 'Topic :: System :: Archiving :: Mirroring' 56 | ] 57 | ) 58 | --------------------------------------------------------------------------------