├── LICENSE ├── MANIFEST ├── README.md ├── setup.cfg ├── setup.py └── wgetter.py /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | Copyright (c) 2014 Fernando Giannasi 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | The above copyright notice and this permission notice shall be included in all 10 | copies or substantial portions of the Software. 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 17 | 18 | -------------------------------------------------------------------------------- /MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | setup.cfg 3 | setup.py 4 | wgetter.py 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | About 2 | ===== 3 | 4 | Wgetter is another command line download utility written completely in python. 5 | It is based on python-wget (https://bitbucket.org/techtonik/python-wget/src) 6 | with some improvements. 7 | 8 | It works on python >= 2.6 or python >=3.0 9 | Runs on Windows or Linux or Mac 10 | 11 | 12 | Usage 13 | ===== 14 | 15 | python -m wgetter 16 | 17 | 18 | API Usage 19 | ========= 20 | 21 | >>> import wgetter 22 | >>> filename = wgetter.download('https://sites.google.com/site/doctormike/pacman-1.2.tar.gz', outdir='/home/user') 23 | 100 % [====================================================>] 19.9KiB / 19.9KiB 100.0KiB/s eta 0:00:01 24 | >>> filename 25 | '/home/user/pacman-1.2.tar.gz' 26 | 27 | Obs.: If not set, output directory (outdir) defaults to current directory 28 | 29 | Installation 30 | ============ 31 | 32 | Using PIP: 33 | 34 | pip install wgetter 35 | 36 | Manually: 37 | 38 | Get the tarball at 39 | 40 | https://github.com/phoemur/wgetter/tarball/0.6 41 | 42 | or git clone 43 | 44 | git clone https://github.com/phoemur/wgetter.git 45 | 46 | Then 47 | 48 | python setup.py install 49 | 50 | Changelog 51 | ========= 52 | 53 | 0.7 (2017-07-01) 54 | * Some Bug Fixes 55 | 56 | 0.6 (2015-03-07) 57 | * Some Bug Fixes 58 | 59 | 0.5.1 (2014-08-25) 60 | * Added improved bar and estimated transfer time 61 | 62 | 0.3 (2014-03-08) 63 | * Added the option to set download's output directory 64 | 65 | 0.2 (2014-03-06) 66 | * Init version, uses urllib2 instead of urlretrieve (deprecated), reads in chunks with network transfer rate calculation. 67 | Fancy bar. Human readable file-sizes. Checks Md5 if available and download final size. 68 | It's a heavy modification of python-wget made for my needs that i decided to share. 69 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='wgetter', 5 | version='0.7', 6 | author='Fernando Giannasi ', 7 | url='https://github.com/phoemur/wgetter', 8 | download_url = 'https://github.com/phoemur/wgetter/tarball/0.7', 9 | 10 | description="Another command line download utility written in python", 11 | license="MIT", 12 | classifiers=[ 13 | 'Environment :: Console', 14 | 'License :: OSI Approved :: MIT License', 15 | 'Operating System :: OS Independent', 16 | 'Programming Language :: Python :: 2', 17 | 'Programming Language :: Python :: 3', 18 | 'Topic :: Software Development :: Libraries :: Python Modules', 19 | 'Topic :: System :: Networking', 20 | 'Topic :: Utilities', 21 | ], 22 | 23 | py_modules=['wgetter'], 24 | 25 | long_description='''Wgetter is another command line download utility written completely in python. 26 | It is based on python-wget (https://bitbucket.org/techtonik/python-wget/src) 27 | with some improvements. 28 | 29 | It works on python >= 2.6 or python >=3.0 30 | Runs on Windows or Linux or Mac ''', 31 | ) 32 | -------------------------------------------------------------------------------- /wgetter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Wgetter is another command line download utility written completely in python. 5 | It is based on python-wget (https://bitbucket.org/techtonik/python-wget/src) with some improvements. 6 | It works on python >= 2.6 or python >=3.0 Runs on Windows or Linux or Mac 7 | 8 | API Usage: 9 | 10 | >>> import wgetter 11 | >>> filename = wgetter.download('https://sites.google.com/site/doctormike/pacman-1.2.tar.gz', outdir='/home/user') 12 | 100 % [====================================================>] 19.9KiB / 19.9KiB 100.0KiB/s eta 0:00:01 13 | >>> filename 14 | '/home/user/pacman-1.2.tar.gz' 15 | """ 16 | 17 | import sys 18 | import os 19 | import shutil 20 | import tempfile 21 | import hashlib 22 | import datetime 23 | 24 | from time import time 25 | 26 | PY3K = sys.version_info >= (3, 0) 27 | 28 | if PY3K: 29 | import urllib.request as ulib 30 | import urllib.parse as urlparse 31 | import http.cookiejar as cjar 32 | else: 33 | import urllib2 as ulib 34 | import urlparse 35 | import cookielib as cjar 36 | 37 | SUFFIXES = {1000: ['KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB'], 38 | 1024: ['KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB']} 39 | 40 | 41 | def approximate_size(size, a_kilobyte_is_1024_bytes=True): 42 | ''' 43 | Humansize.py from Dive into Python3 44 | Mark Pilgrim - http://www.diveintopython3.net/ 45 | Copyright (c) 2009, Mark Pilgrim, All rights reserved. 46 | 47 | Convert a file size to human-readable form. 48 | Keyword arguments: 49 | size -- file size in bytes 50 | a_kilobyte_is_1024_bytes -- if True (default), use multiples of 1024 51 | if False, use multiples of 1000 52 | Returns: string 53 | ''' 54 | 55 | size = float(size) 56 | 57 | if size < 0: 58 | raise ValueError('number must be non-negative') 59 | 60 | multiple = 1024 if a_kilobyte_is_1024_bytes else 1000 61 | for suffix in SUFFIXES[multiple]: 62 | size /= multiple 63 | if size < multiple: 64 | return '{0:.1f}{1}'.format(size, suffix) 65 | 66 | raise ValueError('number too large') 67 | 68 | 69 | def get_console_width(): 70 | """Return width of available window area. Autodetection works for 71 | Windows and POSIX platforms. Returns 80 for others 72 | 73 | Code from http://bitbucket.org/techtonik/python-pager 74 | """ 75 | 76 | if os.name == 'nt': 77 | STD_INPUT_HANDLE = -10 78 | STD_OUTPUT_HANDLE = -11 79 | STD_ERROR_HANDLE = -12 80 | 81 | # get console handle 82 | from ctypes import windll, Structure, byref 83 | try: 84 | from ctypes.wintypes import SHORT, WORD, DWORD 85 | except ImportError: 86 | # workaround for missing types in Python 2.5 87 | from ctypes import ( 88 | c_short as SHORT, c_ushort as WORD, c_ulong as DWORD) 89 | console_handle = windll.kernel32.GetStdHandle(STD_OUTPUT_HANDLE) 90 | 91 | # CONSOLE_SCREEN_BUFFER_INFO Structure 92 | class COORD(Structure): 93 | _fields_ = [("X", SHORT), ("Y", SHORT)] 94 | 95 | class SMALL_RECT(Structure): 96 | _fields_ = [("Left", SHORT), ("Top", SHORT), 97 | ("Right", SHORT), ("Bottom", SHORT)] 98 | 99 | class CONSOLE_SCREEN_BUFFER_INFO(Structure): 100 | _fields_ = [("dwSize", COORD), 101 | ("dwCursorPosition", COORD), 102 | ("wAttributes", WORD), 103 | ("srWindow", SMALL_RECT), 104 | ("dwMaximumWindowSize", DWORD)] 105 | 106 | sbi = CONSOLE_SCREEN_BUFFER_INFO() 107 | ret = windll.kernel32.GetConsoleScreenBufferInfo( 108 | console_handle, byref(sbi)) 109 | if ret == 0: 110 | return 0 111 | return sbi.srWindow.Right + 1 112 | 113 | elif os.name == 'posix': 114 | from fcntl import ioctl 115 | from termios import TIOCGWINSZ 116 | from array import array 117 | 118 | winsize = array("H", [0] * 4) 119 | try: 120 | ioctl(sys.stdout.fileno(), TIOCGWINSZ, winsize) 121 | except IOError: 122 | pass 123 | return (winsize[1], winsize[0])[0] 124 | 125 | return 80 126 | 127 | CONSOLE_WIDTH = get_console_width() 128 | 129 | # Need 2 spaces more to avoid linefeed on Windows 130 | AVAIL_WIDTH = CONSOLE_WIDTH - 59 if os.name == 'nt' else CONSOLE_WIDTH - 57 131 | 132 | 133 | def filename_from_url(url): 134 | """:return: detected filename or None""" 135 | fname = os.path.basename(urlparse.urlparse(url).path) 136 | if len(fname.strip(" \n\t.")) == 0: 137 | return None 138 | return fname 139 | 140 | 141 | def filename_from_headers(headers): 142 | """Detect filename from Content-Disposition headers if present. 143 | http://greenbytes.de/tech/tc2231/ 144 | 145 | :param: headers as dict, list or string 146 | :return: filename from content-disposition header or None 147 | """ 148 | if type(headers) == str: 149 | headers = headers.splitlines() 150 | if type(headers) == list: 151 | headers = dict([x.split(':', 1) for x in headers]) 152 | cdisp = headers.get("Content-Disposition") 153 | if not cdisp: 154 | return None 155 | cdtype = cdisp.split(';') 156 | if len(cdtype) == 1: 157 | return None 158 | if cdtype[0].strip().lower() not in ('inline', 'attachment'): 159 | return None 160 | # several filename params is illegal, but just in case 161 | fnames = [x for x in cdtype[1:] if x.strip().startswith('filename=')] 162 | if len(fnames) > 1: 163 | return None 164 | name = fnames[0].split('=')[1].strip(' \t"') 165 | name = os.path.basename(name) 166 | if not name: 167 | return None 168 | return name 169 | 170 | 171 | def filename_fix_existing(filename, dirname): 172 | """Expands name portion of filename with numeric ' (x)' suffix to 173 | return filename that doesn't exist already. 174 | """ 175 | name, ext = filename.rsplit('.', 1) 176 | names = [x for x in os.listdir(dirname) if x.startswith(name)] 177 | names = [x.rsplit('.', 1)[0] for x in names] 178 | suffixes = [x.replace(name, '') for x in names] 179 | # filter suffixes that match ' (x)' pattern 180 | suffixes = [x[2:-1] for x in suffixes 181 | if x.startswith(' (') and x.endswith(')')] 182 | indexes = [int(x) for x in suffixes 183 | if set(x) <= set('0123456789')] 184 | idx = 1 185 | if indexes: 186 | idx += sorted(indexes)[-1] 187 | return '{0}({1}).{2}'.format(name, idx, ext) 188 | 189 | 190 | def report_bar(bytes_so_far, total_size, speed, eta): 191 | ''' 192 | This callback for the download function is used to print the download bar 193 | ''' 194 | percent = int(bytes_so_far * 100 / total_size) 195 | current = approximate_size(bytes_so_far).center(9) 196 | total = approximate_size(total_size).center(9) 197 | shaded = int(float(bytes_so_far) / total_size * AVAIL_WIDTH) 198 | sys.stdout.write( 199 | " {0}% [{1}{2}{3}] {4}/{5} {6} eta{7}".format(str(percent).center(4), 200 | '=' * (shaded - 1), 201 | '>', 202 | ' ' * (AVAIL_WIDTH - shaded), 203 | current, 204 | total, 205 | (approximate_size(speed) + '/s').center(11), 206 | eta.center(10))) 207 | sys.stdout.write("\r") 208 | sys.stdout.flush() 209 | 210 | 211 | def report_unknown(bytes_so_far, total_size, speed, eta): 212 | ''' 213 | This callback for the download function is used 214 | when the total size is unknown 215 | ''' 216 | sys.stdout.write( 217 | "Downloading: {0} / Unknown - {1}/s ".format(approximate_size(bytes_so_far), 218 | approximate_size(speed))) 219 | 220 | sys.stdout.write("\r") 221 | sys.stdout.flush() 222 | 223 | 224 | def report_onlysize(bytes_so_far, total_size, speed, eta): 225 | ''' 226 | This callback for the download function is used when console width 227 | is not enough to print the bar. 228 | It prints only the sizes 229 | ''' 230 | percent = int(bytes_so_far * 100 / total_size) 231 | current = approximate_size(bytes_so_far).center(10) 232 | total = approximate_size(total_size).center(10) 233 | sys.stdout.write('D: {0}% -{1}/{2}'.format(percent, current, total) + "eta {0}".format(eta)) 234 | sys.stdout.write("\r") 235 | sys.stdout.flush() 236 | 237 | 238 | def md5sum(filename, blocksize=8192): 239 | ''' 240 | Returns the MD5 checksum of a file 241 | ''' 242 | with open(filename, 'rb') as fh: 243 | m = hashlib.md5() 244 | while True: 245 | data = fh.read(blocksize) 246 | if not data: 247 | break 248 | m.update(data) 249 | return m.hexdigest() 250 | 251 | 252 | def download(link, outdir='.', chunk_size=4096): 253 | ''' 254 | This is the Main function, which downloads a given link 255 | and saves on outdir (default = current directory) 256 | ''' 257 | url = None 258 | fh = None 259 | eta = 'unknown ' 260 | bytes_so_far = 0 261 | filename = filename_from_url(link) or "." 262 | cj = cjar.CookieJar() 263 | 264 | # get filename for temp file in current directory 265 | (fd_tmp, tmpfile) = tempfile.mkstemp( 266 | ".tmp", prefix=filename + ".", dir=outdir) 267 | os.close(fd_tmp) 268 | os.unlink(tmpfile) 269 | 270 | try: 271 | opener = ulib.build_opener(ulib.HTTPCookieProcessor(cj)) 272 | url = opener.open(link) 273 | fh = open(tmpfile, mode='wb') 274 | 275 | headers = url.info() 276 | try: 277 | total_size = int(headers['Content-Length']) 278 | except (ValueError, KeyError, TypeError): 279 | total_size = 'unknown' 280 | 281 | try: 282 | md5_header = headers['Content-MD5'] 283 | except (ValueError, KeyError, TypeError): 284 | md5_header = None 285 | 286 | # Define which callback we're gonna use 287 | if total_size != 'unknown': 288 | if CONSOLE_WIDTH > 57: 289 | reporthook = report_bar 290 | else: 291 | reporthook = report_onlysize 292 | else: 293 | reporthook = report_unknown 294 | 295 | # Below are the registers to calculate network transfer rate 296 | time_register = time() 297 | speed = 0.0 298 | speed_list = [] 299 | bytes_register = 0.0 300 | eta = 'unknown ' 301 | 302 | # Loop that reads in chunks, calculates speed and does the callback to 303 | # print the progress 304 | while True: 305 | chunk = url.read(chunk_size) 306 | # Update Download Speed every 1 second 307 | if time() - time_register > 0.5: 308 | speed = (bytes_so_far - bytes_register) / \ 309 | (time() - time_register) 310 | speed_list.append(speed) 311 | 312 | # Set register properly for future use 313 | time_register = time() 314 | bytes_register = bytes_so_far 315 | 316 | # Estimative of remaining download time 317 | if total_size != 'unknown' and len(speed_list) == 3: 318 | speed_mean = sum(speed_list) / 3 319 | eta_sec = int((total_size - bytes_so_far) / speed_mean) 320 | eta = str(datetime.timedelta(seconds=eta_sec)) 321 | speed_list = [] 322 | 323 | bytes_so_far += len(chunk) 324 | 325 | if not chunk: 326 | sys.stdout.write('\n') 327 | break 328 | 329 | fh.write(chunk) 330 | reporthook(bytes_so_far, total_size, speed, eta) 331 | except KeyboardInterrupt: 332 | print('\n\nCtrl + C: Download aborted by user') 333 | print('Partial downloaded file:\n{0}'.format(os.path.abspath(tmpfile))) 334 | sys.exit(1) 335 | finally: 336 | if url: 337 | url.close() 338 | if fh: 339 | fh.close() 340 | 341 | filenamealt = filename_from_headers(headers) 342 | if filenamealt: 343 | filename = filenamealt 344 | 345 | # add numeric '(x)' suffix if filename already exists 346 | if os.path.exists(os.path.join(outdir, filename)): 347 | filename = filename_fix_existing(filename, outdir) 348 | filename = os.path.join(outdir, filename) 349 | 350 | shutil.move(tmpfile, filename) 351 | 352 | # Check if sizes matches 353 | if total_size != 'unknown' and total_size != bytes_so_far: 354 | print( 355 | '\n\nWARNING!! Downloaded file size mismatches... Probably corrupted...') 356 | 357 | # Check md5 if it was in html header 358 | if md5_header: 359 | print('\nValidating MD5 checksum...') 360 | if md5_header == md5sum(filename): 361 | print('MD5 checksum passed!') 362 | else: 363 | print('MD5 checksum do NOT passed!!!') 364 | 365 | return filename 366 | 367 | if __name__ == '__main__': 368 | if len(sys.argv) == 1 or sys.argv[1] in {'-h', '--help'}: 369 | print('Usage: {0} '.format(sys.argv[0])) 370 | 371 | args = [str(elem) for elem in sys.argv[1:]] 372 | 373 | for link in args: 374 | print('Downloading ' + link) 375 | filename = download(link) 376 | print('\nSaved under {0}'.format(filename)) 377 | --------------------------------------------------------------------------------