├── requirements.txt
├── LICENSE
├── README.md
├── .gitignore
└── main.py


/requirements.txt:
--------------------------------------------------------------------------------
1 | requests


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 nonPointer
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PixivDownloader
 2 | 
 3 | [English](#English) | [中文](#中文)
 4 | 
 5 | TODO:
 6 | 
 7 | + [ ] Optimize code
 8 | + [ ] Tag filter
 9 | + [X] Built-in proxy settings
10 | 
11 | # English
12 | 
13 | PixivDownloader is a simple batch tool to download all original images of one's on Pixiv.
14 | 
15 | ## Feature
16 | 
17 | + Auto-mkdir for individual author
18 | + Original image
19 | + Multi-threading support
20 | 
21 | ## Usage
22 | 
23 | 0. Install dependencies `pip install -r requirements.txt`
24 | 1. Copy cookies into `main.py` (**Optional to download restricted content** )
25 | 2. Execute script
26 | 3. Input author user ID (`https://www.pixiv.net/member.php?id=[AUTHOR_UID]`)
27 | 3. waiting for `Job finished` prompt and deal with another author
28 | 
29 | # 中文
30 | 
31 | PixivDownloader 是一个基于 Python3 的 Pixiv 用户作品多线程批量下载脚本。
32 | 
33 | ## 功能
34 | 
35 | + 自动为不同作者建立子目录
36 | + 下载原始图片
37 | + 多线程下载
38 | 
39 | ## 使用方法
40 | 
41 | 0. 安装依赖 `pip install -r requirements.txt`
42 | 1. 在文件中填入自己的 Cookies （**可选：用于下载限制级内容**）
43 | 2. 执行脚本
44 | 3. 输入作者的用户 ID （`https://www.pixiv.net/member.php?id=[作者ID]`，作者主页地址的 `id` 参数即为作者用户 ID）
45 | 4. 等待 `Job finished` 提示然后输入下一个作者的用户 ID


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # IPython
 78 | profile_default/
 79 | ipython_config.py
 80 | 
 81 | # pyenv
 82 | .python-version
 83 | 
 84 | # celery beat schedule file
 85 | celerybeat-schedule
 86 | 
 87 | # SageMath parsed files
 88 | *.sage.py
 89 | 
 90 | # Environments
 91 | .env
 92 | .venv
 93 | env/
 94 | venv/
 95 | ENV/
 96 | env.bak/
 97 | venv.bak/
 98 | .idea/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | .dmypy.json
113 | dmypy.json
114 | 
115 | # Pyre type checker
116 | .pyre/
117 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import json
  3 | import sys
  4 | import traceback
  5 | import time
  6 | import re
  7 | import os
  8 | import threading
  9 | 
 10 | # User-Agent
 11 | userAgent = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6"
 12 | 
 13 | # Export Log
 14 | exportLog = False
 15 | 
 16 | # Export Log File
 17 | exportLogfile = "log.txt"
 18 | 
 19 | # Api Address
 20 | apiAddress = "https://www.pixiv.net/ajax/illust/"
 21 | authorPrefix = "https://www.pixiv.net/ajax/user/"
 22 | authorSuffix = "/profile/all"
 23 | 
 24 | # Cookies
 25 | # Use ";" to split each term
 26 | cookies = ""
 27 | 
 28 | # Threads per second
 29 | threads_per_sec = 10
 30 | 
 31 | # Enable Proxy
 32 | enable_proxy = False
 33 | 
 34 | # Enable Remote DNS Resolve via Proxies
 35 | enable_remote_dns = True
 36 | 
 37 | # Proxy Settings
 38 | socks5_proxy_address = "127.0.0.1"
 39 | socks5_proxy_port = "1080"
 40 | 
 41 | if not enable_proxy:
 42 |     proxiesDict = {}
 43 | else:
 44 |     if enable_remote_dns:
 45 |         proxiesDict = {
 46 |             'http': "socks5h://" + socks5_proxy_address + ":" + socks5_proxy_port,
 47 |             'https': "socks5h://" + socks5_proxy_address + ":" + socks5_proxy_port
 48 |         }
 49 |     else:
 50 |         proxiesDict = {
 51 |             'http': "socks5://" + socks5_proxy_address + ":" + socks5_proxy_port,
 52 |             'https': "socks5://" + socks5_proxy_address + ":" + socks5_proxy_port
 53 |         }
 54 | 
 55 | 
 56 | def print_log(content):
 57 |     print(time.strftime('%Y-%m-%d %H:%M:%S\t', time.localtime(time.time())) + content)
 58 |     sys.stdout.flush()
 59 |     if exportLog:
 60 |         f_log = open(exportLogfile, "a")
 61 |         f_log.write(time.strftime('%Y-%m-%d %H:%M:%S\t', time.localtime(time.time())) + str(content) + '\n')
 62 |         f_log.close()
 63 |     return
 64 | 
 65 | 
 66 | def mkdir(path):
 67 |     folder = os.path.exists(path)
 68 | 
 69 |     if not folder:
 70 |         os.makedirs(path)
 71 |         print_log("Folder created.")
 72 |     else:
 73 |         print_log("Folder exist!")
 74 | 
 75 | 
 76 | def work(illust_id):
 77 |     try:
 78 |         contentJSON = requests.get(apiAddress + illust_id, headers=headers, proxies=proxiesDict)
 79 |         decodeContent = json.loads(contentJSON.text)
 80 |         if decodeContent['error'] == True:
 81 |             print_log("Illustration error.")
 82 |         else:
 83 |             if not os.path.exists(foldername + "\\" + illust_id + ".png"):
 84 |                 print_log("Downloading\t [" + decodeContent['body']['illustTitle'] + "]")
 85 |                 # print_log("\tAuthor\t [" + decodeContent['body']['userName'] + "]")
 86 |                 # print_log("\tRAW URL\t [" + decodeContent['body']['urls']['original'] + "]")
 87 |                 # print_log("\tRAW URL\t [" + decodeContent['body']['urls']['regular'] + "]")
 88 |                 headers1 = {
 89 |                     'Referer': 'https://www.pixiv.net/member_illust.php?mode=medium&illust_id=' + i,
 90 |                     'cookie': cookies
 91 |                 }
 92 | 
 93 |                 content = requests.get(decodeContent['body']['urls']['original'], headers=headers1, proxies=proxiesDict)
 94 |                 f = open(foldername + "\\" + illust_id + ".png", "wb")
 95 |                 f.write(content.content)
 96 |                 f.close()
 97 |             else:
 98 |                 print_log("Skip\t [" + decodeContent['body']['illustTitle'] + "]")
 99 |     except:
100 |         traceback.print_exc()
101 | 
102 | 
103 | if __name__ == "__main__":
104 | 
105 |     headers = {
106 |         "User-Agent": userAgent,
107 |         "cookie": cookies
108 |     }
109 | 
110 |     while True:
111 |         # Fetch thumb list
112 |         author_id = str(input()).strip().strip("\n")
113 |         contentJSON = requests.get(authorPrefix + author_id + authorSuffix, headers=headers, proxies=proxiesDict)
114 |         decodeContent = json.loads(contentJSON.text)
115 | 
116 |         # Regex Match
117 |         try:
118 |             illusts = re.findall("[0-9]+", str(decodeContent['body']['illusts']))
119 |         except:
120 |             continue
121 |         print_log("Counter\t" + str(len(illusts)))
122 | 
123 |         # print_log(str(decodeContent))
124 | 
125 |         try:
126 |             foldername = re.findall("'userName': '(.*)', 'userImageUrl'", str(decodeContent['body']['pickup']))[0]
127 |         except:
128 |             try:
129 |                 foldername = re.findall("<title>「(.*)」.*</title>",
130 |                                         requests.get("https://www.pixiv.net/member.php?id=" + author_id,
131 |                                                      headers=headers, proxies=proxiesDict).text)[0]
132 |             except:
133 |                 foldername = author_id
134 | 
135 |         print_log(foldername)
136 |         mkdir(foldername)
137 | 
138 |         waitcount = 0
139 | 
140 |         # Fetch item info
141 |         threads = []
142 |         for i in illusts:
143 |             illust_id = i
144 |             t = threading.Thread(target=work, args=(illust_id,))
145 |             t.setDaemon(False)
146 |             t.start()
147 |             threads.append(t)
148 |             waitcount = waitcount + 1
149 |             if waitcount % threads_per_sec == 0:
150 |                 time.sleep(1)
151 |             # t.join()
152 |         for thr in threads:
153 |             if thr.is_alive():
154 |                 thr.join()
155 |         print_log("Job finished.")
156 | 


--------------------------------------------------------------------------------