├── .gitignore ├── LICENSE ├── README.rst ├── pyproject.toml └── requests_download.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | /dist/ 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Thomas Kluyver 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | A convenient function to download to a file using requests. 2 | 3 | Basic usage: 4 | 5 | .. code-block:: python 6 | 7 | url = "https://github.com/takluyver/requests_download/archive/master.zip" 8 | download(url, "requests_download.zip") 9 | 10 | An optional ``headers=`` parameter is passed through to requests. 11 | 12 | **Trackers** are a lightweight way to monitor the data being downloaded. 13 | Two trackers are included: 14 | 15 | - ``ProgressTracker`` - displays a progress bar, using the `progressbar2 16 | `_ package. 17 | - ``HashTracker`` - wraps a hashlib object to calculate a hash (e.g. sha256 or 18 | md5) of the file as you download it. 19 | 20 | Here's an example of using both of them: 21 | 22 | .. code-block:: python 23 | 24 | import hashlib 25 | # progressbar is provided by progressbar2 on PYPI. 26 | from progressbar import DataTransferBar 27 | from requests_download import download, HashTracker, ProgressTracker 28 | 29 | hasher = HashTracker(hashlib.sha256()) 30 | progress = ProgressTracker(DataTransferBar()) 31 | 32 | download('https://github.com/takluyver/requests_download/archive/master.zip', 33 | 'requests_download.zip', trackers=(hasher, progress)) 34 | 35 | assert hasher.hashobj.hexdigest() == '...' 36 | 37 | To make your own tracker, subclass TrackerBase and define any of these methods: 38 | 39 | .. code-block:: python 40 | 41 | from requests_download import TrackerBase 42 | 43 | class MyTracker(TrackerBase): 44 | def on_start(self, response): 45 | """Called with requests.Response object, which has response headers""" 46 | pass 47 | 48 | def on_chunk(self, chunk): 49 | """Called multiple times, with bytestrings of data received""" 50 | pass 51 | 52 | def on_finish(self): 53 | """Called when the download has completed""" 54 | pass 55 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["flit_core >=2,<4"] 3 | build-backend = "flit_core.buildapi" 4 | 5 | [tool.flit.metadata] 6 | module = "requests_download" 7 | author = "Thomas Kluyver" 8 | author-email = "thomas@kluyver.me.uk" 9 | home-page = "https://www.github.com/takluyver/requests_download" 10 | description-file = "README.rst" 11 | classifiers = ["License :: OSI Approved :: MIT License"] 12 | requires = ["requests"] 13 | 14 | -------------------------------------------------------------------------------- /requests_download.py: -------------------------------------------------------------------------------- 1 | """Download files using requests and save them to a target path 2 | 3 | Usage example:: 4 | 5 | import hashlib 6 | # progressbar is provided by progressbar2 on PYPI. 7 | from progressbar import DataTransferBar 8 | from requests_download import download, HashTracker, ProgressTracker 9 | 10 | hasher = HashTracker(hashlib.sha256()) 11 | progress = ProgressTracker(DataTransferBar()) 12 | 13 | download('https://github.com/takluyver/requests_download/archive/master.zip', 14 | 'requests_download.zip', trackers=(hasher, progress)) 15 | 16 | assert hasher.hashobj.hexdigest() == '...' 17 | 18 | """ 19 | 20 | import requests 21 | 22 | __version__ = '0.1.2' 23 | 24 | class TrackerBase(object): 25 | def on_start(self, response): 26 | pass 27 | 28 | def on_chunk(self, chunk): 29 | pass 30 | 31 | def on_finish(self): 32 | pass 33 | 34 | class ProgressTracker(TrackerBase): 35 | def __init__(self, progressbar): 36 | self.progressbar = progressbar 37 | self.recvd = 0 38 | 39 | def on_start(self, response): 40 | max_value = None 41 | if 'content-length' in response.headers: 42 | max_value = int(response.headers['content-length']) 43 | self.progressbar.start(max_value=max_value) 44 | self.recvd = 0 45 | 46 | def on_chunk(self, chunk): 47 | self.recvd += len(chunk) 48 | try: 49 | self.progressbar.update(self.recvd) 50 | except ValueError: 51 | # Probably the HTTP headers lied. 52 | pass 53 | 54 | def on_finish(self): 55 | self.progressbar.finish() 56 | 57 | 58 | class HashTracker(TrackerBase): 59 | def __init__(self, hashobj): 60 | self.hashobj = hashobj 61 | 62 | def on_chunk(self, chunk): 63 | self.hashobj.update(chunk) 64 | 65 | def download(url, target, headers=None, trackers=()): 66 | """Download a file using requests. 67 | 68 | This is like urllib.request.urlretrieve, but: 69 | 70 | - requests validates SSL certificates by default 71 | - you can pass tracker objects to e.g. display a progress bar or calculate 72 | a file hash. 73 | """ 74 | if headers is None: 75 | headers = {} 76 | headers.setdefault('user-agent', 'requests_download/'+__version__) 77 | r = requests.get(url, headers=headers, stream=True) 78 | r.raise_for_status() 79 | for t in trackers: 80 | t.on_start(r) 81 | 82 | 83 | with open(target, 'wb') as f: 84 | for chunk in r.iter_content(chunk_size=8192): 85 | if chunk: 86 | f.write(chunk) 87 | for t in trackers: 88 | t.on_chunk(chunk) 89 | 90 | for t in trackers: 91 | t.on_finish() 92 | --------------------------------------------------------------------------------