├── README.md └── pixiv.py /README.md: -------------------------------------------------------------------------------- 1 | # Pixiv 2 | Python3; 3 | Get image from pixiv. 4 | 5 | # Update 6 | ### [2019/07/15] 7 | 1. Fix some error. 8 | 2. Add User ID in file name. 9 | 10 | # Default 11 | ``` 12 | # Get RankLink: https://www.pixiv.net/ranking.php?mode=male 13 | python3 pixiv.py 14 | ``` 15 | 16 | # Usage 17 | ``` 18 | # By UserID: 19 | python3 pixiv.py "673179" 20 | # By RankLink: 21 | python3 pixiv.py "https://www.pixiv.net/ranking.php?mode=male" 22 | ``` 23 | 24 | -------------------------------------------------------------------------------- /pixiv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- encoding: utf-8 -*- 3 | # Author: MoeClub.org 4 | 5 | from urllib import request 6 | import threading 7 | import queue 8 | import re 9 | import os 10 | 11 | 12 | class pixiv: 13 | def __init__(self): 14 | self.folder = 'PixivImage' 15 | self.web_coding = 'utf-8' 16 | self.root = os.path.dirname(os.path.abspath(__file__)) 17 | self.DefaultHeader = { 18 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0", 19 | "Accept": "*/*", 20 | "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3", 21 | "Accept-Encoding": "", 22 | "Connection": "keep-alive", 23 | } 24 | self.data_low = [] 25 | self.num = 0 26 | 27 | def _http(self, url, headers, Obj=False): 28 | res = request.urlopen(request.Request(url, headers=headers, method='GET')) 29 | if Obj: 30 | return res 31 | else: 32 | return res.read().decode(self.web_coding, "ignore") 33 | 34 | def data_image(self, url_id): 35 | _header = self.DefaultHeader.copy() 36 | _header["Referer"] = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id={}".format(url_id) 37 | _url_data = "https://www.pixiv.net/touch/ajax/illust/details?illust_id={}".format(url_id) 38 | _data_details = self._http(_url_data, _header) 39 | data_url = self.sort_data(re.findall('"url_big":"[^"]*"', _data_details)) 40 | data_uid = str(str(str(re.findall('"user_id":"[^"]*"', _data_details)[0]).split(':', 1)[-1]).strip('"')) 41 | return data_url, _header, data_uid 42 | 43 | def sort_data(self, data): 44 | _data = [] 45 | for item in data: 46 | if item not in _data: 47 | _data.append(item) 48 | return [str(str(item).replace('\\', '').split(':', 1)[-1]).strip('"') for item in _data] 49 | 50 | def get_item(self, UserID=None): 51 | if not UserID: 52 | UserID = 'https://www.pixiv.net/ranking.php?mode=male' 53 | if '://' in str(UserID): 54 | Mode_ID = False 55 | else: 56 | Mode_ID = True 57 | if Mode_ID: 58 | _url = "https://www.pixiv.net/ajax/user/{}/profile/all".format(str(UserID)) 59 | page = self._http(_url, self.DefaultHeader, True) 60 | if page.code != 200: 61 | raise Exception("Pixiv Page:", page.code) 62 | _data = re.findall('"[0-9]+":null', page.read().decode(self.web_coding, "ignore")) 63 | self.data_low = [str(str(item).split(":")[0]).strip('"') for item in _data if ':null' in str(item)] 64 | else: 65 | page = self._http(UserID, self.DefaultHeader, True) 66 | if page.code != 200: 67 | raise Exception("Pixiv Page:", page.code) 68 | _data = re.findall('data-src="[^"]*"', page.read().decode(self.web_coding, "ignore")) 69 | self.data_low = [str(str(str(str(str(item).split("=", 1)[-1]).strip('"')).rsplit('/', 1)[-1]).split('_')[0]) for item in _data if '/img-master/img/' in str(item)] 70 | self.fliter_item() 71 | 72 | def fliter_item(self): 73 | folder = os.path.join(self.root, self.folder) 74 | if not os.path.exists(folder): 75 | return None 76 | _split = "_" 77 | _exist = {}.fromkeys([str(str(item).split(_split)[1]) for item in os.listdir(folder) if _split in item]).keys() 78 | print("Exist Item:", len(_exist)) 79 | for _item in self.data_low.copy(): 80 | if _item in _exist: 81 | self.data_low.remove(_item) 82 | 83 | def get_data_by_item(self, item): 84 | data = self.data_image(item) 85 | for data_url in data[0]: 86 | image = self._http(data_url, data[1], True) 87 | if image.code != 200: 88 | raise Exception("Pixiv Image: [{} | {}]".format(image.code, data[0])) 89 | self.write(str("{}_{}").format(str(data[2]), str(str(data_url).rsplit('/', 1)[-1])), image.read()) 90 | 91 | def get_data(self, data_list=None): 92 | if not data_list: 93 | data_list = self.data_low 94 | for item in data_list: 95 | self.get_data_by_item(item) 96 | print("\nTotal Image: ", self.num) 97 | 98 | def write(self, name, data): 99 | folder = os.path.join(self.root, self.folder) 100 | if not os.path.exists(folder): 101 | os.mkdir(folder) 102 | file = os.path.join(folder, str(name)) 103 | fp = open(file, 'wb') 104 | fp.write(data) 105 | fp.close() 106 | self.num += 1 107 | print("Pixiv Image: [ OK | {} ]".format(file)) 108 | 109 | def add_queue(self, _queue, data_list=None): 110 | for item in data_list: 111 | _item = str(item).strip() 112 | if item and _item: 113 | _queue.put(_item) 114 | 115 | def multi_data(self, data_list=None, max=25): 116 | if not data_list: 117 | data_list = self.data_low 118 | print("New Item:", len(data_list)) 119 | _threads = [] 120 | _queue = queue.Queue(maxsize=max) 121 | task_main = threading.Thread(target=self.add_queue, args=(_queue, data_list)) 122 | task_main.setName("TaskMain") 123 | task_main.setDaemon(True) 124 | task_main.start() 125 | while _queue.qsize() > 0: 126 | if len(_threads) >= max: 127 | for _item in _threads.copy(): 128 | if not _item.is_alive(): 129 | _threads.remove(_item) 130 | continue 131 | item = _queue.get() 132 | task = threading.Thread(target=self.get_data_by_item, args=(item,)) 133 | task.setDaemon(True) 134 | task.start() 135 | _threads.append(task) 136 | for _task in _threads: 137 | _task.join() 138 | print("\nTotal Image: ", self.num) 139 | 140 | 141 | if __name__ == '__main__': 142 | try: 143 | task = os.sys.argv[1] 144 | except: 145 | task = None 146 | p = pixiv() 147 | p.get_item(task) 148 | p.multi_data(max=25) 149 | --------------------------------------------------------------------------------