├── .gitignore ├── README.md ├── download.sh ├── im2pdf.py ├── main.py ├── pdf.sh ├── sign.py ├── sp0.py ├── sp1.py ├── sp2.py └── yunpan.py /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pmp 2 | 色情漫画爬虫 3 | 4 | 最近发现好多做色情漫画的网站,决定随便找一个爬一下~ 5 | 6 | ## 目前试验品是http://a.qdtechworld.com/ ## 7 | 8 | ### 但愿他活的久一点 ### 9 | 10 | 目前大部分色轻漫画网站都是这么设计的 11 | 12 | ### 前几章是免费的,后面各种方式收费。### 13 | 14 | 但是这个网站做的比较粗糙,可以每日签到获得100金币用户白嫖2章内容。 15 | 16 | 而且基于网页的限制,他采用的UA来识别用户,所以只要我不断的换UA,签到,就可以不断地白嫖。 17 | 18 | 大体思路如此。 19 | 20 | ## 如果有有兴趣的小伙伴,可以一起来 ## 21 | 22 | `python main.py` 23 | -------------------------------------------------------------------------------- /download.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | start=0 3 | end=1000 4 | count=1000 5 | if [ $# -eq 2 ];then 6 | start=$1 7 | end=$2 8 | count=`expr ${end} - ${start}` 9 | fi 10 | mkdir -p download 11 | script_path=script 12 | scripts=`ls ${script_path}|head -n ${end}|tail -n ${count}` 13 | cd download 14 | for file in $scripts: 15 | do 16 | cmd="sh ../${script_path}/${file}" 17 | echo $cmd 18 | $cmd 19 | done 20 | -------------------------------------------------------------------------------- /im2pdf.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | import sys 4 | 5 | 6 | def rea(path, pdf_name): 7 | file_list = os.listdir(path) 8 | pic_name = [] 9 | im_list = [] 10 | for x in file_list: 11 | if "jpg" in x or 'png' in x or 'jpeg' in x: 12 | if x == 'cover.jpg': 13 | continue 14 | pic_name.append(x) 15 | pic_name.sort() 16 | # print("hec", pic_name) 17 | 18 | im1 = Image.open(os.path.join(path, pic_name[0])) 19 | pic_name.pop(0) 20 | for i in pic_name: 21 | img = Image.open(os.path.join(path, i)) 22 | try: 23 | img.load() 24 | except IOError as e: 25 | continue 26 | im_list.append(Image.open(os.path.join(path, i))) 27 | # if img.mode == "RGBA": 28 | # img = img.convert('RGB') 29 | # im_list.append(img) 30 | # else: 31 | # im_list.append(img) 32 | im1.save(pdf_name, "PDF", resolution=100.0, save_all=True, append_images=im_list) 33 | print("输出文件名称:", pdf_name) 34 | 35 | 36 | if __name__ == '__main__': 37 | mypath= sys.argv[1] 38 | pdf_name = sys.argv[2] 39 | 40 | if ".pdf" in pdf_name: 41 | rea(mypath, pdf_name=pdf_name) 42 | else: 43 | rea(mypath, pdf_name="{}.pdf".format(pdf_name)) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import os, sys 3 | 4 | from sp0 import SP0 5 | from sp1 import SP1 6 | from sp2 import SP2 7 | 8 | # 由于爬虫实在太费时间,简单的先写个下载脚本 9 | if __name__=='__main__': 10 | prefix = 'http://ac.wet8955.cn' 11 | root = 'script' 12 | 13 | try: 14 | os.makedirs(root) 15 | except OSError as e: 16 | pass 17 | 18 | menu0 = SP0().sp() 19 | 20 | for index in range(59,len(menu0)): 21 | menu1 = menu0[index] 22 | dir0 = menu1['name'] 23 | filename = "%s/%s.sh" % (root,dir0) 24 | fo = open(filename, "w",encoding='utf-8') 25 | fo.write( "#!/bin/sh\n") 26 | fo.write( "mkdir -p %s\n" % dir0 ) 27 | fo.write( "curl -o %s/cover.jpg %s/%s\n" % (dir0,prefix,menu1['cover'])) 28 | id = menu1['id'] 29 | menu2 = SP1(id).sp() 30 | for chapter in menu2: 31 | comic_id = chapter['comic_id'] 32 | chapter_id = chapter['chapter_id'] 33 | dir1 = '%d-%s' %(chapter_id,chapter['name']) 34 | fo.write( "mkdir -p %s/%s\n" % (dir0,dir1) ) 35 | fo.write( "curl -o %s/%s/cover.jpg %s/%s\n" % (dir0,dir1,prefix,chapter['cover'])) 36 | data = SP2(comic_id,chapter_id).sp() 37 | if data == None: 38 | fo.write( '###err here!\n' ) 39 | continue 40 | images = data['images'] 41 | for i in range(len(images)): 42 | fo.write( "curl -o %s/%s/%03d.jpg %s/%s\n" % (dir0,dir1,i,prefix,images[i])) 43 | print(index,'/',len(menu0),dir0,dir1,'ok') 44 | fo.close() 45 | -------------------------------------------------------------------------------- /pdf.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | mkdir -p pdf 3 | download_path=download 4 | files=`ls ${download_path}` 5 | cd pdf 6 | for file in $files: 7 | do 8 | mkdir -p ${file} 9 | cs=`ls ../${download_path}/${file}` 10 | for c in $cs: 11 | do 12 | cmd="python3 ../im2pdf.py ../${download_path}/${file}/${c} ${file}/${c}.pdf" 13 | echo $cmd 14 | $cmd 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /sign.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import requests 3 | 4 | class Sign: 5 | def __init__(self,ua): 6 | # print('爬取签到的类') 7 | self.url = 'http://j.jinkongjianshe.com/api/user/sign_in' 8 | self.ua = ua 9 | def sp(self): 10 | try: 11 | headers = {'User-Agent': self.ua} 12 | r = requests.post(self.url,headers=headers,timeout=10) 13 | # data = r.json()['code'] 14 | # data = r.text 15 | # print(data) 16 | # self.data = data 17 | # return data 18 | except Exception as e: 19 | print(e) 20 | 21 | if __name__=='__main__': 22 | s = Sign("1a") 23 | s.sp() -------------------------------------------------------------------------------- /sp0.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import requests 3 | 4 | class SP0: 5 | def __init__(self): 6 | # print('爬取目录的类') 7 | self.url = 'http://j.jinkongjianshe.com/api/comic/rank?page=1&limit=1000&sort=popularity' 8 | def sp(self): 9 | for retry in range(10): 10 | try: 11 | r = requests.get(self.url) 12 | data = r.json()['data']['data'] 13 | # print(data) 14 | self.data = data 15 | return data 16 | except Exception as e: 17 | print(e) 18 | return None 19 | 20 | if __name__=='__main__': 21 | s = SP0() 22 | s.sp() -------------------------------------------------------------------------------- /sp1.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import requests 3 | 4 | class SP1: 5 | def __init__(self,comic_id): 6 | # print('爬取一本书目录的类') 7 | self.url = 'http://j.jinkongjianshe.com/api/comic/comic_detail/?comic_id=%d'%(comic_id) 8 | def sp(self): 9 | for retry in range(10): 10 | try: 11 | r = requests.get(self.url) 12 | data = r.json()['data']['chapter_list'] 13 | # print(data) 14 | self.data = data 15 | return data 16 | except Exception as e: 17 | print(e) 18 | return None 19 | 20 | if __name__=='__main__': 21 | s = SP1(234) 22 | print(s.sp()) -------------------------------------------------------------------------------- /sp2.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | import requests 3 | from sign import Sign 4 | import random 5 | 6 | class SP2: 7 | def __init__(self,comic_id,chapter_id): 8 | # print('爬取一本书内容的类') 9 | self.url = 'http://j.jinkongjianshe.com/api/comic/read_chapter?comic_id=%d&chapter_id=%d' % (comic_id,chapter_id) 10 | self.ua = 'baipiaoshiwokuaile' 11 | def sp(self): 12 | for retry in range(10): 13 | try: 14 | headers = {'User-Agent': self.ua} 15 | r = requests.get(self.url,headers=headers) 16 | data = r.json()['data'] 17 | canRead = data['can_read'] 18 | if canRead != True: 19 | Sign(self.randomUA()).sp() 20 | continue 21 | # print(data) 22 | self.data = data 23 | return data 24 | except Exception as e: 25 | print(e) 26 | return None 27 | 28 | def randomUA(self): 29 | self.ua = str(random.random()) 30 | return self.ua 31 | 32 | if __name__=='__main__': 33 | s = SP2(234,8420) 34 | print(s.sp()) -------------------------------------------------------------------------------- /yunpan.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | 3 | from bypy import ByPy 4 | bp=ByPy() 5 | bp.list() --------------------------------------------------------------------------------