├── requirements.txt ├── LICENSE ├── README.md ├── .gitignore └── main.py /requirements.txt: -------------------------------------------------------------------------------- 1 | requests -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 nonPointer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PixivDownloader 2 | 3 | [English](#English) | [中文](#中文) 4 | 5 | TODO: 6 | 7 | + [ ] Optimize code 8 | + [ ] Tag filter 9 | + [X] Built-in proxy settings 10 | 11 | # English 12 | 13 | PixivDownloader is a simple batch tool to download all original images of one's on Pixiv. 14 | 15 | ## Feature 16 | 17 | + Auto-mkdir for individual author 18 | + Original image 19 | + Multi-threading support 20 | 21 | ## Usage 22 | 23 | 0. Install dependencies `pip install -r requirements.txt` 24 | 1. Copy cookies into `main.py` (**Optional to download restricted content** ) 25 | 2. Execute script 26 | 3. Input author user ID (`https://www.pixiv.net/member.php?id=[AUTHOR_UID]`) 27 | 3. waiting for `Job finished` prompt and deal with another author 28 | 29 | # 中文 30 | 31 | PixivDownloader 是一个基于 Python3 的 Pixiv 用户作品多线程批量下载脚本。 32 | 33 | ## 功能 34 | 35 | + 自动为不同作者建立子目录 36 | + 下载原始图片 37 | + 多线程下载 38 | 39 | ## 使用方法 40 | 41 | 0. 安装依赖 `pip install -r requirements.txt` 42 | 1. 在文件中填入自己的 Cookies (**可选:用于下载限制级内容**) 43 | 2. 执行脚本 44 | 3. 输入作者的用户 ID (`https://www.pixiv.net/member.php?id=[作者ID]`,作者主页地址的 `id` 参数即为作者用户 ID) 45 | 4. 等待 `Job finished` 提示然后输入下一个作者的用户 ID -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # IPython 78 | profile_default/ 79 | ipython_config.py 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | .idea/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | .dmypy.json 113 | dmypy.json 114 | 115 | # Pyre type checker 116 | .pyre/ 117 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import sys 4 | import traceback 5 | import time 6 | import re 7 | import os 8 | import threading 9 | 10 | # User-Agent 11 | userAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6" 12 | 13 | # Export Log 14 | exportLog = False 15 | 16 | # Export Log File 17 | exportLogfile = "log.txt" 18 | 19 | # Api Address 20 | apiAddress = "https://www.pixiv.net/ajax/illust/" 21 | authorPrefix = "https://www.pixiv.net/ajax/user/" 22 | authorSuffix = "/profile/all" 23 | 24 | # Cookies 25 | # Use ";" to split each term 26 | cookies = "" 27 | 28 | # Threads per second 29 | threads_per_sec = 10 30 | 31 | # Enable Proxy 32 | enable_proxy = False 33 | 34 | # Enable Remote DNS Resolve via Proxies 35 | enable_remote_dns = True 36 | 37 | # Proxy Settings 38 | socks5_proxy_address = "127.0.0.1" 39 | socks5_proxy_port = "1080" 40 | 41 | if not enable_proxy: 42 | proxiesDict = {} 43 | else: 44 | if enable_remote_dns: 45 | proxiesDict = { 46 | 'http': "socks5h://" + socks5_proxy_address + ":" + socks5_proxy_port, 47 | 'https': "socks5h://" + socks5_proxy_address + ":" + socks5_proxy_port 48 | } 49 | else: 50 | proxiesDict = { 51 | 'http': "socks5://" + socks5_proxy_address + ":" + socks5_proxy_port, 52 | 'https': "socks5://" + socks5_proxy_address + ":" + socks5_proxy_port 53 | } 54 | 55 | 56 | def print_log(content): 57 | print(time.strftime('%Y-%m-%d %H:%M:%S\t', time.localtime(time.time())) + content) 58 | sys.stdout.flush() 59 | if exportLog: 60 | f_log = open(exportLogfile, "a") 61 | f_log.write(time.strftime('%Y-%m-%d %H:%M:%S\t', time.localtime(time.time())) + str(content) + '\n') 62 | f_log.close() 63 | return 64 | 65 | 66 | def mkdir(path): 67 | folder = os.path.exists(path) 68 | 69 | if not folder: 70 | os.makedirs(path) 71 | print_log("Folder created.") 72 | else: 73 | print_log("Folder exist!") 74 | 75 | 76 | def work(illust_id): 77 | try: 78 | contentJSON = requests.get(apiAddress + illust_id, headers=headers, proxies=proxiesDict) 79 | decodeContent = json.loads(contentJSON.text) 80 | if decodeContent['error'] == True: 81 | print_log("Illustration error.") 82 | else: 83 | if not os.path.exists(foldername + "\\" + illust_id + ".png"): 84 | print_log("Downloading\t [" + decodeContent['body']['illustTitle'] + "]") 85 | # print_log("\tAuthor\t [" + decodeContent['body']['userName'] + "]") 86 | # print_log("\tRAW URL\t [" + decodeContent['body']['urls']['original'] + "]") 87 | # print_log("\tRAW URL\t [" + decodeContent['body']['urls']['regular'] + "]") 88 | headers1 = { 89 | 'Referer': 'https://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + i, 90 | 'cookie': cookies 91 | } 92 | 93 | content = requests.get(decodeContent['body']['urls']['original'], headers=headers1, proxies=proxiesDict) 94 | f = open(foldername + "\\" + illust_id + ".png", "wb") 95 | f.write(content.content) 96 | f.close() 97 | else: 98 | print_log("Skip\t [" + decodeContent['body']['illustTitle'] + "]") 99 | except: 100 | traceback.print_exc() 101 | 102 | 103 | if __name__ == "__main__": 104 | 105 | headers = { 106 | "User-Agent": userAgent, 107 | "cookie": cookies 108 | } 109 | 110 | while True: 111 | # Fetch thumb list 112 | author_id = str(input()).strip().strip("\n") 113 | contentJSON = requests.get(authorPrefix + author_id + authorSuffix, headers=headers, proxies=proxiesDict) 114 | decodeContent = json.loads(contentJSON.text) 115 | 116 | # Regex Match 117 | try: 118 | illusts = re.findall("[0-9]+", str(decodeContent['body']['illusts'])) 119 | except: 120 | continue 121 | print_log("Counter\t" + str(len(illusts))) 122 | 123 | # print_log(str(decodeContent)) 124 | 125 | try: 126 | foldername = re.findall("'userName': '(.*)', 'userImageUrl'", str(decodeContent['body']['pickup']))[0] 127 | except: 128 | try: 129 | foldername = re.findall("「(.*)」.*", 130 | requests.get("https://www.pixiv.net/member.php?id=" + author_id, 131 | headers=headers, proxies=proxiesDict).text)[0] 132 | except: 133 | foldername = author_id 134 | 135 | print_log(foldername) 136 | mkdir(foldername) 137 | 138 | waitcount = 0 139 | 140 | # Fetch item info 141 | threads = [] 142 | for i in illusts: 143 | illust_id = i 144 | t = threading.Thread(target=work, args=(illust_id,)) 145 | t.setDaemon(False) 146 | t.start() 147 | threads.append(t) 148 | waitcount = waitcount + 1 149 | if waitcount % threads_per_sec == 0: 150 | time.sleep(1) 151 | # t.join() 152 | for thr in threads: 153 | if thr.is_alive(): 154 | thr.join() 155 | print_log("Job finished.") 156 | --------------------------------------------------------------------------------