├── .gitignore ├── LICENSE ├── README.md ├── pystream ├── __init__.py └── main.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, EverythingMe 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pystream 2 | 3 | Stream backups directly to/from S3/HDFS without wasting disk space during the process. 4 | This tool is a command line interface for the [smart_open](https://pypi.python.org/pypi/smart_open/) library 5 | 6 | ## Installation 7 | 8 | `pip install pystream` 9 | 10 | ## Usage 11 | 12 | Stream `mysqldump` directly to S3 without wasting any additional disk space during the dump process 13 | ``` 14 | mysqldump | gzip | pystream - s3://backups/mysqldump.gz 15 | ``` 16 | 17 | Restore MySQL backup directly from S3 18 | ``` 19 | pystream s3://backups/mysqldump.gz - | gunzip | mysql 20 | ``` 21 | 22 | Stream a tarball to S3 23 | ``` 24 | tar cz . | pystream - s3://backups/backup.tar.gz 25 | ``` 26 | 27 | Stream a tarball from S3 28 | ``` 29 | pystream s3://backups/backup.tar.gz - | tar xz 30 | ``` 31 | 32 | S3 `cat` 33 | ``` 34 | pystream s3://bucket/path/to/key - 35 | ``` 36 | 37 | And the usual `s3cmd cp` like usage: 38 | ``` 39 | pystream s3://bucket/path/to/key /path/on/filesystem 40 | 41 | pystream /path/on/filesystem s3://bucket/path/to/key 42 | ``` 43 | -------------------------------------------------------------------------------- /pystream/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/EverythingMe/pystream/bed7274ce1b02c3e681ba39d9be33b99e13f9d36/pystream/__init__.py -------------------------------------------------------------------------------- /pystream/main.py: -------------------------------------------------------------------------------- 1 | import click 2 | import smart_open 3 | 4 | 5 | class SmartFile(click.File): 6 | """ 7 | The convenience of click.File with the power of smart_open. 8 | 9 | If smart_open detects the file as file:// scheme, use the default click.File (accepts "-" for stdin/stdout) 10 | Otherwise, open it with smart_open 11 | """ 12 | 13 | def convert(self, value, param, ctx): 14 | uri = smart_open.ParseUri(value) 15 | 16 | if uri.scheme == 'file': 17 | return super(SmartFile, self).convert(uri.uri_path, param, ctx) 18 | else: 19 | return smart_open.smart_open(value, self.mode) 20 | 21 | 22 | @click.command() 23 | @click.option('--chunk-size', default=8192) 24 | @click.argument('src', type=SmartFile('rb')) 25 | @click.argument('dst', type=SmartFile('wb')) 26 | def main(chunk_size, src, dst): 27 | while True: 28 | chunk = src.read(chunk_size) 29 | 30 | if len(chunk) == 0: 31 | break 32 | 33 | dst.write(chunk) 34 | 35 | dst.close() 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='pystream', 5 | version='0.1', 6 | description='Stream uploads/downloads to/from S3 easily with smart_open', 7 | author='EverythingMe', 8 | author_email='omrib@everything.me', 9 | url='http://github.com/EverythingMe/pystream', 10 | packages=find_packages(), 11 | install_requires=['smart_open', 'click'], 12 | 13 | entry_points={ 14 | 'console_scripts': [ 15 | 'pystream = pystream.main:main' 16 | ] 17 | }, 18 | 19 | classifiers=[ 20 | 'Development Status :: 5 - Production/Stable', 21 | 'Environment :: Console', 22 | 'Intended Audience :: Developers', 23 | 'Intended Audience :: System Administrators', 24 | 'Operating System :: OS Independent', 25 | 'Programming Language :: Python :: 2.7', 26 | 'Topic :: System :: Clustering', 27 | 'Topic :: System :: Systems Administration', 28 | 'Topic :: Utilities' 29 | ] 30 | ) 31 | 32 | --------------------------------------------------------------------------------