├── AUTHOR ├── README.md └── mfdl.py /AUTHOR: -------------------------------------------------------------------------------- 1 | Mangafox Download Script by Kunal Sarkhel and contributors -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Mangafox Download Script 2 | ======================== 3 | 4 | About 5 | ----- 6 | Mangafox Download Script is a manga downloader similar to my old Onemanga Download Script (although onemanga.com shut down). It works by scraping the image URL from every page in a manga chapter. It then it downloads all the images. 7 | I created this because I prefer reading manga with the use of a viewer like Comix. I also prefer keeping manga on my hard drive in case I am not connected to the internet. 8 | 9 | Dependencies 10 | ------------ 11 | 12 | * Python 3.3 or better 13 | * BeautifulSoup (``pip install beautifulsoup4``) 14 | 15 | Tested on Arch Linux. It should work on any Linux, OS X, or Windows machine as long as the dependencies are installed. 16 | 17 | Usage 18 | ----- 19 | 20 | Mandatory argument: 21 | -m --manga 22 | 23 | Optional Argumentsq: 24 | -s 25 | -e 26 | -c Create cbz Archive 27 | -r Remove image files after the creation of cbz archive""" 28 | 29 | To download an entire series: 30 | 31 | ~ $ python mfdl.py -m MANGA_NAME 32 | 33 | To download a specific chapter: 34 | 35 | ~ $ python mfdl.py -m MANGA_NAME -s CHAPTER 36 | 37 | To download a range of manga chapter: 38 | 39 | ~ $ python mfdl.py python mfdl.py -m MANGA_NAME -s CHAPTER_START -e CHAPTER_END 40 | 41 | Examples 42 | -------- 43 | Download all of The World God Only Knows: 44 | 45 | ~ $ python mfdl.py -m "The World God Only Knows" 46 | 47 | Download The World God Only Knows chapter 222.5: 48 | 49 | ~ $ python mfdl.py -m "The World God Only Knows" -s 222.5 50 | 51 | Download The World God Only Knows chapters 190-205: 52 | 53 | ~ $ python mfdl.py -m "The World God Only Knows" -s 190 -e 205 54 | 55 | Notes 56 | ----- 57 | Please do not overuse and abuse this and destroy Mangafox. If you've got some cash, why not donate some to them and help them keep alive and combat server costs? I really would not like people to destroy Mangafox because of greedy downloading. Use this wisely and don't be evil. 58 | -------------------------------------------------------------------------------- /mfdl.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import sys 5 | import argparse 6 | import os 7 | import urllib.request 8 | import glob 9 | import shutil 10 | import re 11 | import time 12 | from itertools import filterfalse 13 | from zipfile import ZipFile 14 | from functools import reduce 15 | from bs4 import BeautifulSoup 16 | from contextlib import closing 17 | from collections import OrderedDict 18 | 19 | from io import StringIO 20 | import gzip 21 | 22 | URL_BASE = "http://mangafox.me/" 23 | 24 | def get_page_soup(url): 25 | """Download a page and return a BeautifulSoup object of the html""" 26 | response = urllib.request.urlopen(url) 27 | 28 | if response.info().get('Content-Encoding') == 'gzip': 29 | gzipFile = gzip.GzipFile(fileobj=response) 30 | page_content = gzipFile.read() 31 | else: 32 | page_content = response.read() 33 | 34 | soup_page = BeautifulSoup(page_content, "html.parser") 35 | 36 | return soup_page 37 | 38 | def get_chapter_urls(manga_name): 39 | """Get the chapter list for a manga""" 40 | replace = lambda s, k: s.replace(k, '_') 41 | manga_url = reduce(replace, [' ', '-'], manga_name.lower()) 42 | url = '{0}manga/{1}'.format(URL_BASE, manga_url) 43 | print('Url: ' + url) 44 | soup = get_page_soup(url) 45 | manga_does_not_exist = soup.find('form', {'id': 'searchform'}) 46 | if manga_does_not_exist: 47 | search_sort_options = 'sort=views&order=za' 48 | url = '{0}/search.php?name={1}&{2}'.format(URL_BASE, 49 | manga_url, 50 | search_sort_options) 51 | soup = get_page_soup(url) 52 | results = soup.findAll('a', {'class': 'series_preview'}) 53 | error_text = 'Error: Manga \'{0}\' does not exist'.format(manga_name) 54 | error_text += '\nDid you meant one of the following?\n * ' 55 | error_text += '\n * '.join([manga.text for manga in results][:10]) 56 | sys.exit(error_text) 57 | warning = soup.find('div', {'class': 'warning'}) 58 | if warning and 'licensed' in warning.text: 59 | sys.exit('Error: ' + warning.text) 60 | chapters = OrderedDict() 61 | links = soup.findAll('a', {'class': 'tips'}) 62 | if(len(links) == 0): 63 | sys.exit('Error: Manga either does not exist or has no chapters') 64 | replace_manga_name = re.compile(re.escape(manga_name.replace('_', ' ')), 65 | re.IGNORECASE) 66 | 67 | for link in links: 68 | chapters[float(replace_manga_name.sub('', link.text).strip())] = link['href'] 69 | 70 | ordered_chapters = OrderedDict(sorted(chapters.items())) 71 | 72 | return ordered_chapters 73 | 74 | def get_page_numbers(soup): 75 | """Return the list of page numbers from the parsed page""" 76 | raw = soup.findAll('select', {'class': 'm'})[0] 77 | return (html['value'] for html in raw.findAll('option')) 78 | 79 | def get_chapter_image_urls(url_fragment): 80 | """Find all image urls of a chapter and return them""" 81 | print('Getting chapter urls') 82 | url_fragment = os.path.dirname(url_fragment) + '/' 83 | chapter_url = url_fragment 84 | chapter = get_page_soup(chapter_url) 85 | pages = get_page_numbers(chapter) 86 | image_urls = [] 87 | print('Getting image urls...') 88 | for page in pages: 89 | print('url_fragment: {0}'.format(url_fragment)) 90 | print('page: {0}'.format(page)) 91 | print('Getting image url from {0}{1}.html'.format(url_fragment, page)) 92 | page_soup = get_page_soup(chapter_url + page + '.html') 93 | images = page_soup.findAll('img', {'id': 'image'}) 94 | if images: image_urls.append(images[0]['src']) 95 | return image_urls 96 | 97 | def get_chapter_number(url_fragment): 98 | """Parse the url fragment and return the chapter number.""" 99 | return ''.join(url_fragment.rsplit("/")[5:-1]) 100 | 101 | def download_urls(image_urls, manga_name, chapter_number): 102 | """Download all images from a list""" 103 | download_dir = '{0}/{1}/'.format(manga_name, chapter_number) 104 | if os.path.exists(download_dir): 105 | shutil.rmtree(download_dir) 106 | os.makedirs(download_dir) 107 | for i, url in enumerate(image_urls): 108 | filename = './{0}/{1}/{2:03}.jpg'.format(manga_name, chapter_number, i) 109 | 110 | print('Downloading {0} to {1}'.format(url, filename)) 111 | while True: 112 | time.sleep(2) 113 | try: 114 | urllib.request.urlretrieve(url, filename) 115 | except urllib.error.HTTPError as http_err: 116 | print ('HTTP error ', http_err.code, ": ", http_err.reason) 117 | if http_err.code == 404: 118 | break 119 | 120 | except urllib.error.ContentTooShortError: 121 | print ('The image has been retrieve only partially.') 122 | except: 123 | print ('Unknown error') 124 | else: 125 | break 126 | 127 | def make_cbz(dirname): 128 | """Create CBZ files for all JPEG image files in a directory.""" 129 | zipname = dirname + '.cbz' 130 | images = sorted(glob.glob(os.path.abspath(dirname) + '/*.jpg')) 131 | with closing(ZipFile(zipname, 'w')) as zipfile: 132 | for filename in images: 133 | print('writing {0} to {1}'.format(filename, zipname)) 134 | zipfile.write(filename) 135 | 136 | def download_manga(manga_name, range_start=1, range_end=None, b_make_cbz=False, remove=False): 137 | """Download a range of a chapters""" 138 | 139 | chapter_urls = get_chapter_urls(manga_name) 140 | 141 | if range_end == None : range_end = max(chapter_urls.keys()) 142 | 143 | for chapter, url in filterfalse (lambda chapter_url: 144 | chapter_url[0] < range_start 145 | or chapter_url[0] > range_end, 146 | chapter_urls.items()): 147 | chapter_number = get_chapter_number(url) 148 | 149 | print('===============================================') 150 | print('Chapter ' + chapter_number) 151 | print('===============================================') 152 | image_urls = get_chapter_image_urls(url) 153 | download_urls(image_urls, manga_name, chapter_number) 154 | download_dir = './{0}/{1}'.format(manga_name, chapter_number) 155 | if b_make_cbz is True: 156 | make_cbz(download_dir) 157 | if remove is True: shutil.rmtree(download_dir) 158 | 159 | def main(): 160 | parser = argparse.ArgumentParser(description='Manga Fox Downloader') 161 | 162 | parser.add_argument('--manga', '-m', 163 | required=True, 164 | action='store', 165 | help='Manga to download') 166 | 167 | parser.add_argument('--start', '-s', 168 | action='store', 169 | type=int, 170 | default=1, 171 | help='Chapter to start downloading from') 172 | 173 | parser.add_argument('--end', '-e', 174 | action='store', 175 | type=int, 176 | default=None, 177 | help='Chapter to end downloading to') 178 | 179 | parser.add_argument('--cbz', '-c', 180 | action="store_true", 181 | default=False, 182 | help="Create cbz archive after download") 183 | 184 | parser.add_argument('--remove', '-r', 185 | action="store_true", 186 | default=False, 187 | help="Remove image files after the creation of a cbz archive") 188 | 189 | args = parser.parse_args() 190 | 191 | print('Getting chapter of ', args.manga, 'from ', args.start, ' to ', args.end) 192 | 193 | download_manga(args.manga, args.start, args.end, args.cbz, args.remove) 194 | 195 | if __name__ == "__main__": 196 | main() 197 | --------------------------------------------------------------------------------