├── .gitignore ├── LICENSE ├── README.md └── catch.py /.gitignore: -------------------------------------------------------------------------------- 1 | Images 2 | log.txt 3 | tags 4 | config 5 | \#config\# 6 | all_tags.txt -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Kanagi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | MoeDownloader 2 | ====== 3 | 基于python的福利图嗅探器,目前可以嗅探草榴、煎蛋和二次萌エロ画像ブログ这三个网站的图片,如果需要加入其他网站也比较容易。 4 | 5 | 基本用法: 6 | ====== 7 | "` 8 | python catch.py [topic] 9 | `" 10 | 11 | 其中,[topic]可以是caoliu、moeimg、jandan三个选项之一 12 | 13 | 更多的用法请输入 14 | "` 15 | python catch.py -h 16 | `" 17 | 来查看 18 | -------------------------------------------------------------------------------- /catch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # 4 | # LICENSE: see LICENSE file 5 | # 6 | # bbs mode: 7 | # You must rewrite Download,GetCurrentDir,CheckThreadsValid, 8 | # GetThreadUrl and GetTitle function. 9 | # single-page mode: 10 | # You must rewrite Download function. 11 | 12 | import sys 13 | import logging 14 | import os 15 | import os.path 16 | import requests 17 | import requesocks 18 | import re 19 | import ConfigParser 20 | import argparse 21 | import imghdr 22 | from HTMLParser import HTMLParser 23 | 24 | def success(val): return val,None 25 | def error(why): return None,why 26 | def get_val(m_val): return m_val[0] 27 | def get_error(m_val): return m_val[1] 28 | 29 | #global variables 30 | init_with_config_file = True 31 | has_log_file = True 32 | 33 | if os.name != 'nt': 34 | WindowsError = OSError 35 | 36 | class Downloader(object): 37 | """docstring for ClassName""" 38 | def __init__(self): 39 | super(Downloader, self).__init__() 40 | 41 | self.type = 'none' 42 | self._isUrlFormat = re.compile(r'https?://([\w-]+\.)+[\w-]+(/[\w\- ./?%&=]*)?'); 43 | self._path = get_val(self.DealDir("Images")) 44 | self.currentDir = "" 45 | self.cf = ConfigParser.ConfigParser() 46 | self.pageNum = 1 47 | self.pageTo = 1 48 | self.isMono = False 49 | self.keepOriginTitle = True 50 | self.numToDownload = -1 51 | self.loggingFile = 'log.txt' 52 | self.retryTimes = 5 53 | self.encode = None 54 | self.useProxy = False 55 | self.httpProxy = '127.0.0.1:1080' 56 | self.httpsProxy = '127.0.0.1:1080' 57 | self.imageCount = 0 58 | self.verbose = False 59 | self.silent = False 60 | self.targetThread = "" # single thread 61 | self.targetThreadRegex = "" 62 | 63 | #moeimg specific 64 | self.moeimgdomain = 'moeimg.net' 65 | self.moeimgTags = False 66 | self.moeimgSortWithTags = False 67 | self.currentTag = 'default' 68 | 69 | #caoliu specific 70 | self.caoliudomain = 't66y.com' 71 | 72 | #jandan specific 73 | self.jandandomain = 'jandan.net' 74 | self.jandanPageToDownload = 1 75 | 76 | global init_with_config_file 77 | global has_log_file 78 | if init_with_config_file: 79 | if not os.path.exists('config'): 80 | self.InternalPrint('No config file. Creating a default one.', False) 81 | self.SetDefaultConfig(); 82 | self.LoadConfig() 83 | #init logging file 84 | if has_log_file: 85 | logging.basicConfig(filename = os.path.join(os.getcwd(), self.loggingFile), level = logging.WARN, filemode = 'a+', format = '%(asctime)s - %(levelname)s: %(message)s') 86 | 87 | def InternalPrint(self, msg, is_verbose): 88 | if not self.silent: 89 | if is_verbose: 90 | if(self.verbose): 91 | print(msg) 92 | else: 93 | print(msg) 94 | 95 | def LoadConfig(self): 96 | self.cf.read("config") 97 | self.pageNum = self.cf.getint('web','page_from') 98 | self.pageTo = self.cf.getint('web','page_to') 99 | self.isMono = self.cf.getboolean('file','mono') 100 | self.numToDownload = self.cf.getint('web','num_to_download') 101 | self.loggingFile = self.cf.get('basic','log_file') 102 | self.retryTimes = self.cf.getint('web','retry_times') 103 | self.caoliudomain = self.cf.get('caoliu','domain') 104 | self.moeimgdomain = self.cf.get('moeimg','domain') 105 | self.keepOriginTitle = self.cf.getboolean('file','keep_origin_title') 106 | self.jandandomain = self.cf.get('jandan','domain') 107 | self.jandanPageToDownload = self.cf.getint('jandan','pages_to_download') 108 | self.moeimgTags = self.cf.getboolean('moeimg','tags') 109 | self.moeimgSortWithTags = self.cf.getboolean('moeimg','sort_with_tags') 110 | self.useProxy = self.cf.getboolean('basic','use_proxy') 111 | self.httpProxy = self.cf.get('basic','http_proxy') 112 | self.httpsProxy = self.cf.get('basic','https_proxy') 113 | 114 | 115 | def SetDefaultConfig(self): 116 | self.cf.add_section('basic') 117 | self.cf.set('basic','log_file','log.txt') 118 | self.cf.set('basic','use_proxy','false') 119 | self.cf.set('basic','http_proxy','127.0.0.1:1080') 120 | self.cf.set('basic','https_proxy','127.0.0.1:1080') 121 | self.cf.add_section('web') 122 | self.cf.set('web','page_from','1') 123 | self.cf.set('web','page_to','1') 124 | self.cf.set('web','num_to_download','-1') 125 | self.cf.set('web','retry_times','5') 126 | self.cf.add_section('caoliu') 127 | self.cf.set('caoliu','domain','t66y.com') 128 | self.cf.add_section('moeimg') 129 | self.cf.set('moeimg','domain','moeimg.net') 130 | self.cf.set('moeimg','tags','false') 131 | self.cf.set('moeimg','sort_with_tags','false') 132 | self.cf.add_section('jandan') 133 | self.cf.set('jandan','domain','jandan.net') 134 | self.cf.set('jandan','pages_to_download','1') 135 | self.cf.add_section('file') 136 | self.cf.set('file','mono','false') 137 | self.cf.set('file','keep_origin_title','true') 138 | with open('config', 'wb') as configfile: 139 | self.cf.write(configfile) 140 | 141 | def StripIllegalChar(self, path): 142 | return path.strip('>').strip('<').strip('*').strip('|').strip('?').strip(':').strip('"').strip('/') 143 | 144 | def DealDir(self, path): 145 | solved = False 146 | while True: 147 | try: 148 | if not os.path.exists(path): 149 | os.mkdir(path) 150 | return success(path) 151 | except WindowsError: 152 | #windows specific 153 | global has_log_file 154 | if has_log_file: 155 | logging.error('Windows error with path %s' % path) 156 | if not solved: 157 | path = self.StripIllegalChar(path) 158 | solved = True 159 | else: 160 | return error('Invalid path name %s' % path) 161 | 162 | def FetchHtml(self, url): 163 | retry = 0 164 | proxies = { 165 | 'http':self.httpProxy, 166 | 'https':self.httpsProxy, 167 | } 168 | headers = { 169 | 'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9', 170 | } 171 | while True: 172 | try: 173 | self.InternalPrint("Fetching HTML: %s" % url, True) 174 | session = requesocks.session() 175 | session.headers = headers; 176 | if self.useProxy: 177 | self.InternalPrint("Using proxy: http %s, https %s" % (self.httpProxy, self.httpsProxy), True) 178 | session.proxies = proxies 179 | else: 180 | self.InternalPrint("No proxy.", True) 181 | response = session.get(url) 182 | if response.status_code != 200: 183 | self.InternalPrint(response.text, True) 184 | return error("Failed to fetch html. CODE:%i" % response.status_code) 185 | elif (response.text) == 0: 186 | return error("Empty html.") 187 | else: 188 | if self.encode != None: 189 | response.encoding = self.encode 190 | return success(response.text) 191 | #except requests.ConnectionError: 192 | except requesocks.exceptions.ConnectionError: 193 | if retry0 and num>=self.numToDownload: 241 | break 242 | 243 | def DoFetchSingleThread(self, url): 244 | self.InternalPrint('Thread:'+url, False) 245 | 246 | self.InternalPrint("Fetching thread html...", True) 247 | res = self.FetchHtml(url) 248 | if get_error(res): 249 | return res 250 | self.InternalPrint("Thread html fetched.", True) 251 | 252 | html = get_val(res) 253 | #get current directory 254 | if self.keepOriginTitle: 255 | # get thread title 256 | #self.currentDir = self.GetTitle(href) 257 | prog = re.compile(self.targetThreadRegex, re.IGNORECASE) 258 | matches = prog.findall(html) 259 | self.currentDir = matches[0] 260 | else: 261 | self.currentDir = url.split('/')[-1].split('.')[-2] 262 | #TODO: gb2312 bug 263 | try: 264 | self.InternalPrint(self.currentDir.encode(sys.getfilesystemencoding())+'/', False) 265 | except UnicodeEncodeError: 266 | global has_log_file 267 | if has_log_file: 268 | logging.warning('Unicode encode error at %s' % url) 269 | self.currentDir = 'tmp' 270 | self.InternalPrint(self.currentDir+'/', False) 271 | 272 | html = get_val(res) 273 | self.currentTag = self.GetThreadTagName(html) 274 | self.FetchImgLinksFromThread(html); 275 | return success(0) 276 | 277 | # need to rewrite 278 | def GetThreadUrl(self, href):pass 279 | def GetTitle(self, href):pass 280 | def CheckThreadsValid(self, href):pass 281 | def GetCurrentDir(self, href):pass 282 | def GetThreadTagName(self, html):return 'default' 283 | def Download(self): 284 | self.init() 285 | 286 | def PreHandleImgLink(self, href): 287 | return href 288 | 289 | def PreHandleTagName(self, local_file): 290 | return local_file 291 | 292 | def FetchThreadHtml(self, threadurl): 293 | self.InternalPrint("Fetching thread html...", True) 294 | res = self.FetchHtml(threadurl) 295 | self.InternalPrint("Thread html fetched.", True) 296 | if get_error(res): 297 | return res 298 | html = get_val(res) 299 | self.currentTag = self.GetThreadTagName(html) 300 | self.FetchImgLinksFromThread(html); 301 | return success(html) 302 | 303 | def FetchImgLinksFromThread(self, htmlSource): 304 | prog = re.compile(self.ImgRegex, re.IGNORECASE) 305 | matchesImgSrc = prog.findall(htmlSource) 306 | global has_log_file 307 | if not self.isMono: 308 | self.imageCount = 0 309 | for href in matchesImgSrc: 310 | self.InternalPrint(href, True) 311 | href = self.PreHandleImgLink(href) 312 | if not self.CheckIsUrlFormat(href): 313 | #warning: requests library does not support non-http(s) url 314 | self.InternalPrint('Invalid url format %s' % href, False) 315 | if has_log_file: 316 | logging.error('Invalid url format %s' % href) 317 | continue; 318 | res = self.download_file(href) 319 | if get_error(res): 320 | self.InternalPrint(get_error(res).encode(sys.getfilesystemencoding()), False) 321 | self.imageCount += 1 322 | 323 | def CheckIsUrlFormat(self, value): 324 | return self._isUrlFormat.match(value) is not None 325 | 326 | def GetImageType(self, img_path): 327 | type = imghdr.what(img_path) 328 | if type != None: 329 | return type 330 | else: 331 | return "jpg" 332 | 333 | def ImageExists(self, path, img_name): 334 | files = os.listdir(path) 335 | for f in files: 336 | if img_name == os.path.splitext(f)[0]: 337 | return True 338 | return False 339 | 340 | def download_file(self, url): 341 | dir = self.type 342 | local_directory = "" 343 | if self.isMono: 344 | local_directory = "Images/"+ dir + '/' 345 | self.DealDir(local_directory) 346 | local_directory = self.PreHandleTagName(local_directory) 347 | else: 348 | local_directory = "Images/" + dir + '/' 349 | self.DealDir(local_directory) 350 | local_directory = self.PreHandleTagName(local_directory) 351 | # deal windows directory error 352 | res = self.DealDir(local_directory + self.currentDir + '/') 353 | if get_error(res): 354 | #self.InternalPrint(get_error(res), False) 355 | self.DealDir(local_directory + 'tmp/') 356 | local_directory += 'tmp/' 357 | else: 358 | local_directory += self.currentDir + '/' 359 | 360 | #local_filename = local_filename + self.StripIllegalChar(url.split('/')[-1])#has bug in windows 361 | image_path = local_directory + str(self.imageCount)# so use image count instead 362 | if self.ImageExists(local_directory, str(self.imageCount)): 363 | if not self.isMono: 364 | return error('\t skip '+image_path) 365 | else: 366 | while(self.ImageExists(local_directory, str(self.imageCount))): 367 | self.imageCount+=1 368 | image_path = local_directory + str(self.imageCount) 369 | 370 | self.InternalPrint('\t=>'+image_path.encode(sys.getfilesystemencoding()), False) 371 | # NOTE the stream=True parameter 372 | retry = 0 373 | proxies = { 374 | 'http':self.httpProxy, 375 | 'https':self.httpsProxy, 376 | } 377 | global has_log_file 378 | while True: 379 | try: 380 | session = requesocks.session() 381 | if self.useProxy: 382 | self.InternalPrint("Using proxy: http %s, https %s" % (self.httpProxy, self.httpsProxy), True) 383 | session.proxies = proxies 384 | #r = requests.get(url, stream=True, proxies=proxies) 385 | #else: 386 | #r = requests.get(url, stream=True) 387 | r = session.get(url) 388 | break 389 | #except requests.ConnectionError: 390 | except requesocks.exceptions.ConnectionError: 391 | if retry]+?)[ \'"]\s*(?:alt="\d*")?\s*class="thumbnail_image"' 421 | #self.ThreadsRegex = r'\s*]+?)[\'"]\s*title=["\']?([^\'"]+?)[\'"]' 422 | self.ThreadsRegex = r'

\s*\s*([^<]+?)\s*\s*

' 423 | self.targetThreadRegex = r'\s*\s*([^<]+?)\s*' 424 | 425 | def Download(self): 426 | if self.moeimgTags: 427 | res = self.LoadTags() 428 | if get_error(res): 429 | self.InternalPrint(get_error(res), False) 430 | return 431 | tags = get_val(res) 432 | else: 433 | tags = ['default'] 434 | self.InternalPrint("=============== start ===============", False) 435 | i = self.pageNum 436 | domain = '' 437 | for tag in tags: 438 | self.currentTag = tag 439 | if self.targetThread == "": 440 | for i in range(self.pageNum, self.pageTo+1): 441 | if not self.moeimgTags: 442 | self.InternalPrint("=============== loading page {0} ===============".format(i), False) 443 | if i == 1: 444 | domain = "http://"+self.moeimgdomain 445 | else: 446 | domain = "http://"+self.moeimgdomain+"/page/{0}".format(i) 447 | else: 448 | self.InternalPrint("=============== loading tag: %s page %i ===============" % (tag.decode('utf-8').encode(sys.getfilesystemencoding()),i), False) 449 | if i == 1: 450 | domain = "http://"+self.moeimgdomain+"/tag/%s" % (tag) 451 | else: 452 | domain = "http://"+self.moeimgdomain+"/tag/%s/page/%i" % (tag,i) 453 | res = self.DoFetch(domain) 454 | if get_error(res): 455 | self.InternalPrint(get_error(res), False) 456 | else: 457 | self.InternalPrint("=============== loading target thread {0} ===============".format(self.targetThread), False) 458 | res = self.DoFetchSingleThread(self.targetThread) 459 | if get_error(res): 460 | self.InternalPrint(get_error(res), False) 461 | self.InternalPrint("=============== end ===============", False) 462 | 463 | def FetchAllTags(self): 464 | res = self.FetchHtml('http://'+self.moeimgdomain+'/taglist') 465 | if get_error(res): 466 | return res 467 | html = get_val(res) 468 | tagRegex = r'([^<]+?)' 469 | prog = re.compile(tagRegex, re.IGNORECASE) 470 | matches = prog.findall(html) 471 | tags = [] 472 | for m in matches: 473 | if re.search('tag', m[0]): 474 | if not m[1] in tags: 475 | tags.append(m[1]) 476 | self.InternalPrint('Fetched %s tags.' % len(tags), True) 477 | return success(tags) 478 | 479 | def LoadTags(self): 480 | if os.path.exists(self.tag_file): 481 | tagsfile = open(self.tag_file, 'r') 482 | else: 483 | return error('No tags file.') 484 | 485 | tags = [] 486 | for tag in tagsfile: 487 | tags.append(tag.strip('\n').strip(';').decode('utf-8').replace(' ', '-').lower()) 488 | self.InternalPrint('Loaded %s tags.' % len(tags), True) 489 | return success(tags) 490 | 491 | def GetCurrentDir(self, href): 492 | dir = href[0].split('/')[-1] 493 | dir = dir.split('.')[-2] 494 | return dir 495 | 496 | def GetThreadTagName(self, html): 497 | #tagRegex = r'\s*]+?)[ \'"]\s*>([^<]*)' 498 | tagRegex = r']+?)[ \'"]\s*rel="tag">([^<]*)' 499 | prog = re.compile(tagRegex, re.IGNORECASE) 500 | matches = prog.findall(html) 501 | for m in matches: 502 | if re.search('http://moeimg.net/tag/',m[0]): 503 | return m[1] 504 | return 'default' 505 | 506 | def PreHandleTagName(self, local_file): 507 | if self.moeimgSortWithTags: 508 | if self.moeimgTags: 509 | local_file += self.currentTag.encode(sys.getfilesystemencoding()) + '/' 510 | else: 511 | local_file += self.currentTag + '/' 512 | self.DealDir(local_file) 513 | return local_file 514 | 515 | def CheckThreadsValid(self, href): 516 | return True 517 | 518 | def GetThreadUrl(self, href): 519 | return href[0] 520 | 521 | def GetTitle(self, href): 522 | return href[1] 523 | 524 | class CaoliuDownloader(Downloader): 525 | def __init__(self): 526 | super(CaoliuDownloader, self).__init__() 527 | 528 | self.type = 'caoliu' 529 | self.encode = 'gbk' 530 | self.ImgRegex = r']+?)[ \'"]\s*type=\'image\'' 531 | self.ThreadsRegex = r'

]+?)[ \'"][^>]*?>(?:)?([^<]*)(?:)?

' 532 | self.targetThreadRegex = r' --> [^<]+?\s*([^<]+?)\s*' 533 | 534 | def Download(self): 535 | self.InternalPrint("=============== start ===============", False) 536 | if self.targetThread == "": 537 | for i in range(self.pageNum, self.pageTo+1): 538 | self.InternalPrint("=============== loading page {0} ===============".format(i), False) 539 | domain = "http://"+self.caoliudomain+"/thread0806.php?fid=16&search=&page={0}".format(i) 540 | res = self.DoFetch(domain) 541 | if get_error(res): 542 | self.InternalPrint(get_error(res), False) 543 | else: 544 | self.InternalPrint("=============== loading target thread {0} ===============".format(self.targetThread), False) 545 | res = self.DoFetchSingleThread(self.targetThread) 546 | if get_error(res): 547 | self.InternalPrint(get_error(res), False) 548 | self.InternalPrint("=============== end ===============", False) 549 | 550 | def GetCurrentDir(self, href): 551 | dir = href[0].split('/')[-3] + href[0].split('/')[-2] + href[0].split('/')[-1] 552 | dir = dir.split('.')[-2] 553 | return dir 554 | 555 | def CheckThreadsValid(self, href): 556 | return href[0][0:8] == "htm_data" 557 | 558 | def GetThreadUrl(self, href): 559 | return 'http://'+self.caoliudomain+'/' + href[0] 560 | 561 | def GetTitle(self, href): 562 | return href[1] 563 | 564 | class MLStripper(HTMLParser): 565 | def __init__(self): 566 | self.reset() 567 | self.fed = [] 568 | def handle_data(self, d): 569 | self.fed.append(d) 570 | def get_data(self): 571 | return ''.join(self.fed) 572 | 573 | class JanDanDownloader(Downloader): 574 | def __init__(self): 575 | super(JanDanDownloader, self).__init__() 576 | 577 | self.isMono = True 578 | 579 | self.type = 'jandan' 580 | self.encode = 'utf-8' 581 | self.ImgRegex = r'

\s*]+?)[ \'"]\s*target="_blank"\s*class="view_img_link"\s*>' 582 | 583 | def Download(self): 584 | #get max 585 | res = self.FetchHtml("http://"+self.jandandomain+"/ooxx") 586 | if get_error(res): 587 | self.InternalPrint(get_error(res), False) 588 | return res 589 | html = get_val(res) 590 | newest = self.get_max(html) 591 | 592 | self.InternalPrint("=============== start ===============", False) 593 | for i in range(newest-self.jandanPageToDownload+1, newest+1): 594 | self.InternalPrint("=============== loading page {0} ===============".format(i), False) 595 | domain = "http://"+self.jandandomain+"/ooxx/page-{0}#comments".format(i) 596 | res = self.FetchThreadHtml(domain) 597 | if get_error(res): 598 | self.InternalPrint(get_error(res), False) 599 | self.InternalPrint("=============== end ===============", False) 600 | 601 | def strip_tags(self, html): 602 | s = MLStripper() 603 | s.feed(html) 604 | return s.get_data() 605 | 606 | def get_max(self, html_code): 607 | m = re.search('.+cp-pagenavi.+', html_code) 608 | m = re.search('\d+', self.strip_tags(m.group(0)).strip()) 609 | return int(m.group(0)) 610 | 611 | def download_file(self, url): 612 | dir = self.type 613 | local_directory = "Images/"+ dir + '/' 614 | self.DealDir(local_directory) 615 | image_path = local_directory + url.split('/')[-1] 616 | if os.path.exists(image_path): 617 | return error('\t skip '+image_path) 618 | self.InternalPrint('\t=>'+image_path.encode(sys.getfilesystemencoding()), False) 619 | # NOTE the stream=True parameter 620 | retry = 0 621 | proxies = { 622 | 'http':self.httpProxy, 623 | 'https':self.httpsProxy, 624 | } 625 | global has_log_file 626 | while True: 627 | try: 628 | if self.useProxy: 629 | r = requests.get(url, stream=True, proxies=proxies) 630 | else: 631 | r = requests.get(url, stream=True) 632 | break 633 | except requests.ConnectionError: 634 | if retry 0: 655 | d.pageTo = d.pageNum + num - 1 656 | 657 | def parse_general_args(obj, args): 658 | if args.no_log: 659 | obj.hasLog = False 660 | if args.threads: 661 | obj.numToDownload = args.threads 662 | if args.single: 663 | obj.targetThread = args.single[0] 664 | if args.proxy: 665 | obj.useProxy = True 666 | obj.httpProxy = args.proxy[0] 667 | obj.httpsProxy = args.proxy[0] 668 | if args.direct: 669 | obj.useProxy = False 670 | if args.retry: 671 | obj.retryTimes = args.retry 672 | if args.mono: 673 | obj.isMono = True 674 | if args.verbose: 675 | obj.verbose = True 676 | if args.quiet: 677 | obj.silent = True 678 | 679 | def caoliu(args): 680 | cl = CaoliuDownloader() 681 | if args.pages: 682 | process_pages(cl, args.pages) 683 | if args.domain: 684 | cl.caoliudomain = args.domain 685 | parse_general_args(cl, args) 686 | cl.InternalPrint("Processing caoliu...", False) 687 | cl.Download() 688 | 689 | def moeimg(args): 690 | moe = MoeimgDownloader() 691 | if args.pages: 692 | process_pages(moe, args.pages) 693 | if args.domain: 694 | moe.moeimgdomain = args.domain 695 | if args.sort_with_tags: 696 | moe.moeimgSortWithTags = True 697 | parse_general_args(moe, args) 698 | moe.InternalPrint("Processing moeimg...", False) 699 | if args.fetch_all_tags: 700 | res = moe.FetchAllTags() 701 | if get_error(res): 702 | print(get_error(res)) 703 | return 704 | tags = get_val(res) 705 | with open('all_tags.txt', 'w') as all_tags_file: 706 | for t in tags: 707 | all_tags_file.write(t + '\n') 708 | print('Fetched all tags.') 709 | elif args.with_tags: 710 | if args.tag_file: 711 | moe.tag_file = args.tag_file 712 | moe.moeimgTags = True 713 | moe.Download() 714 | else: 715 | moe.Download() 716 | 717 | def jandan(args): 718 | j = JanDanDownloader() 719 | if args.pages: 720 | j.jandanPageToDownload = args.pages 721 | if args.domain: 722 | j.jandandomain = args.domain 723 | parse_general_args(j, args) 724 | j.InternalPrint("Processing jandan...", False) 725 | j.Download() 726 | 727 | #def all():pass 728 | 729 | def main(): 730 | global init_with_config_file 731 | global has_log_file 732 | ap = argparse.ArgumentParser(description='This tool can download ooxx image from some websites. :P', 733 | epilog=" Please report bugs to https://github.com/KanagiMiss/MoeDownloader/issues") 734 | sp = ap.add_subparsers(title='subcommands', 735 | description='available subcommands', 736 | help='') 737 | 738 | p_caoliu = sp.add_parser("caoliu", help="download caoliu images") 739 | p_caoliu.set_defaults(func=caoliu) 740 | p_moeimg = sp.add_parser("moeimg", help="download moeimg images") 741 | p_moeimg.set_defaults(func=moeimg) 742 | p_jandan = sp.add_parser("jandan", help="download jandan images") 743 | p_jandan.set_defaults(func=jandan) 744 | # p_all = sp.add_parser("all", help="download all images") 745 | 746 | g1 = ap.add_mutually_exclusive_group() 747 | g2 = ap.add_mutually_exclusive_group() 748 | ap.add_argument("-p", "--pages", type=int, 749 | help="number of pages to download") 750 | 751 | #general options 752 | ap.add_argument("-i", "--ignore_config", action="store_true", help="ignore config file and load with default options") 753 | ap.add_argument("-n", "--no_log", action="store_true", help="run without log") 754 | ap.add_argument("-r", "--retry", type=int, help="retry times if failed") 755 | ap.add_argument("-m", "--mono", action="store_true", help="set if mono file") 756 | ap.add_argument("-t", "--threads", type=int, help="number of threads to download") 757 | ap.add_argument("-S", "--single", nargs=1, help="download single thread") 758 | g1.add_argument("-q", "--quiet", action="store_true", help="run quietly and briefly") 759 | g1.add_argument("-v", "--verbose", action="store_true", help="run verbosely") 760 | g2.add_argument("-d", "--direct", action="store_true", help="connect directly(without proxy)") 761 | g2.add_argument("--proxy", nargs=1, help='set http and https proxy') 762 | ap.add_argument('--version', action='version', version='%(prog)s 1.0') 763 | 764 | #moeimg options 765 | p_moeimg.add_argument("-T", "--fetch_all_tags", action="store_true", help="fetch all tags from site") 766 | p_moeimg.add_argument("-t", "--with_tags", action="store_true", help="download with tags") 767 | p_moeimg.add_argument("-s", "--sort_with_tags", action="store_true", help="sort files with tags") 768 | p_moeimg.add_argument("--domain", nargs=1, help="set domain") 769 | p_moeimg.add_argument("-f", "--tag_file", type=argparse.FileType('r'), help="set specific tag file") 770 | 771 | #caoliu options 772 | p_caoliu.add_argument("--domain", nargs=1, help="set domain") 773 | 774 | #jandan options 775 | p_jandan.add_argument("--domain", nargs=1, help="set domain") 776 | 777 | args = ap.parse_args() 778 | 779 | # run with default config (ignore config file) 780 | if args.ignore_config: 781 | init_with_config_file = False 782 | 783 | # run without log file 784 | if args.no_log: 785 | has_log_file = False 786 | 787 | args.func(args) 788 | 789 | if __name__ == '__main__': 790 | reload(sys) 791 | sys.setdefaultencoding(sys.getfilesystemencoding()) 792 | main() 793 | --------------------------------------------------------------------------------