├── .gitignore
├── LICENSE
├── README.md
├── analyze.txt
├── grab_huaban_board.py
├── gui_batchdownload.py
├── logo.ico
├── up2picbed.py
└── version_file.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 
91 | boards/
92 | .up2picbed.dat
93 | .up2picbed.dat.db
94 | myAutoUpload.sh
95 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2019, Mr.tao
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # grab_huaban_board
  2 | 批量下载花瓣网画板、堆糖网专辑
  3 | 
  4 | 
  5 | ## 解析
  6 | 
  7 | * 查看analyze.txt
  8 | 
  9 | 
 10 | ## 使用
 11 | 
 12 | ```
 13 | git clone https://github.com/staugur/grab_huaban_board
 14 | cd grab_huaban_board
 15 | ```
 16 | 
 17 | ### for Python
 18 | 
 19 | 基于python2.7（您需要python环境，不谙此道者建议使用JS版，只需要浏览器即可），测试性地支持py3
 20 | 
 21 | 1. pip install requests
 22 | 
 23 | 2. python grab_huaban_board.py --help
 24 | ```
 25 | usage: grab_huaban_board.py [-h] [-a ACTION] [-u USER] [-p PASSWORD] [-v]
 26 |                             [--board_id BOARD_ID] [--user_id USER_ID]
 27 |                             [--debug] [--proxy] [--proxy_apiurl PROXY_APIURL]
 28 | 
 29 | optional arguments:
 30 |   -h, --help            show this help message and exit
 31 |   -a ACTION, --action ACTION
 32 |                         脚本动作 -> getBoard: 抓取单画板(默认); getUser: 抓取单用户
 33 |   -u USER, --user USER  花瓣网账号-手机/邮箱
 34 |   -p PASSWORD, --password PASSWORD
 35 |                         花瓣网账号对应密码
 36 |   -v, --version         查看版本号
 37 |   -bid BOARD_ID, --board_id BOARD_ID   花瓣网单个画板id, action=getBoard时使用
 38 |   -uid USER_ID, --user_id USER_ID      花瓣网单个用户id, action=getUser时使用
 39 |   --debug               开启debug输出
 40 |   --proxy               开启IP代理池
 41 |   --proxy_apiurl PROXY_APIURL
 42 |                         IP代理池接口：开启IP代理池后，设置此选项使用非默认接口
 43 | ```
 44 | 
 45 | * 温馨提示：开启IP代理池，需要您使用 `proxy_apiurl` 设置一个能输出ip的接口！
 46 | 
 47 | * 详细使用文档请参考: [https://blog.saintic.com/blog/204.html](https://blog.saintic.com/blog/204.html "https://blog.saintic.com/blog/204.html")
 48 | 
 49 | 
 50 | ### for JavaScript(花瓣、堆糖)
 51 | 
 52 | * 详细使用文档请参考：[https://blog.saintic.com/blog/256.html](https://blog.saintic.com/blog/256.html "https://blog.saintic.com/blog/256.html")
 53 | 
 54 | * 花瓣网下载脚本主页及安装地址：[请点击我](https://greasyfork.org/zh-CN/scripts/368427-%E8%8A%B1%E7%93%A3%E7%BD%91%E4%B8%8B%E8%BD%BD "请点击我")
 55 | 
 56 | * 堆糖网下载脚本主页及安装地址：[请点击我](https://greasyfork.org/zh-CN/scripts/369842-%E5%A0%86%E7%B3%96%E7%BD%91%E4%B8%8B%E8%BD%BD "请点击我")
 57 | 
 58 | * 仓库地址：[GitHub](https://github.com/saintic/userscript)
 59 | 
 60 | * 当前仓库下有一个`gui_batchdownload.py`脚本用于这两个油猴脚本文本方式的批量下载，用以一定程度上避免迅雷等下载工具。
 61 | 
 62 |     - 环境： Windows，Py2.7
 63 | 
 64 |     - 依赖： `pip install pyinstaller pywin32`
 65 | 
 66 |     - 打包： `pyinstaller.exe -F gui_batchdownload.py -i logo.ico -w --version-file version_file.txt`
 67 | 
 68 | ## up2picbed
 69 | 
 70 | 这是一个将花瓣网画板图片上传到[picbed](https://github.com/staugur/picbed)的脚本。
 71 | 
 72 | 你需要用`grab_huaban_board.py`下载画板或用户，使用`up2picbed.py`上传画板或
 73 | 用户所有画板，这个脚本会增量上传（即自动跳过已经上传的文件，但此功能基于
 74 | 本地存储文件.up2picbed.dat且文件索引严格，如果删除dat文件则重传，如果文件名
 75 | 改变则重传）。
 76 | 
 77 | ```
 78 | $ python ./up2picbed.py -h
 79 | usage: up2picbed.py [-h] [-b] [-u] [--picbed-url PICBED_URL]
 80 |                     [--picbed-token PICBED_TOKEN]
 81 |                     board_or_user
 82 | 
 83 | positional arguments:
 84 |   board_or_user         画板ID或用户名
 85 | 
 86 | optional arguments:
 87 |   -h, --help            show this help message and exit
 88 |   -b, --board           上传画板，允许逗号选择多个，默认此项
 89 |   -u, --user            上传单个用户下所有画板
 90 |   --picbed-url PICBED_URL
 91 |                         picbed的根域名
 92 |   --picbed-token PICBED_TOKEN
 93 |                         picbed的用户token
 94 | ```
 95 | 
 96 | 示例：
 97 | 
 98 | 1. 上传画板: `./up2picbed.py --picbed-url https://picbed.saintic.com --picbed-token Token 画板ID`
 99 | 
100 | 2. 上传用户: `./up2picbed.py --picbed-url https://picbed.saintic.com --picbed-token Token -u 用户名`
101 | 
102 | ## TODO
103 | 
104 | 1. --board_ids 多画板
105 | 2. --user_ids 多用户
106 | 3. --igonre 指定忽略画板
107 | 4. ~~ip代理池~~
108 | 
109 | But，以上todo暂无计划，py版目前只针对bug
110 | 
111 | 
112 | ## 友情链接
113 | 1. [MacOS GUI 备份程序](https://github.com/ZhuPeng/grab_huaban_board "MacOS GUI 备份程序")
114 | 


--------------------------------------------------------------------------------
/analyze.txt:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | 一、对于python
 3 | 
 4 | 模仿ajax请求(header)
 5 | s=requests.Session()
 6 | s.headers.update({'X-Request': 'JSON', 'X-Requested-With': 'XMLHttpRequest', 'Referer': 'https://huaban.com', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36', 'Accept':'application/json'})
 7 | 
 8 | 
 9 | #### 以下针对画板
10 | 1. post
11 | https://huaban.com/auth/
12 | r1=s.post(url1, headers=headers)
13 | data = dict(email="", password='')
14 | headers["Content-type"] = "application/x-www-form-urlencoded; charset=UTF-8"
15 | 错误密码时返回(JSON)： {"err":403,"msg":"用户密码错误"}
16 | 正确时返回(JSON)：
17 | {u'redirect': u'http://huaban.com/', u'user': {u'username': u'\u5c81\u6708\u5982\u5200\u65a9\u5929\u9a84', u'rating': 8191, u'user_id': 9880671, u'following_count': 43, u'roles': u'', u'location': u'\u5317\u4eac', u'status': {u'newbietask': 0, u'default_board': 13582018, u'past_shiji_guide': 1, u'featuretask': 6, u'share': u'0', u'lr': 1457690110, u'emailvalid': False, u'invites': 0}, u'boards_like_count': 15, u'like_count': 22, u'profile': {}, u'avatar': {u'farm': u'farm1', u'bucket': u'hbimg', u'height': u'700', u'width': u'541', u'key': u'18796173a3f91fcba4b767d0df743cdb041897ec73a4c-XlcHNT', u'frames': u'1', u'type': u'image/png', u'id': 131352797}, u'creations_count': 0, u'follower_count': 78, u'urlname': u'staugur', u'bindings': {u'weibo': u'weibo-3271188341'}, u'commodity_count': 0, u'pin_count': 1207, u'board_count': 2}}
18 | 
19 | 2. get
20 | https://huaban.com/boards/32956845/
21 | r2=s.get(url2,headers=headers)
22 | 返回json['board': '']
23 | 
24 | 3.ajax
25 | max=data['board']['pins'][-1]['pin_id']
26 | url3 = 'https://huaban.com/boards/32956845/?max=max&limit=100&wfl=1'
27 | #Content-Type: application/json; charset=utf-8
28 | r3=s.get(url3, headers=headers)
29 | len(data3['board']['pins'])
30 | 100
31 | data3['board']['pins'][0]['pin_id']
32 | 1131315846
33 | 
34 | data3['board']['pins'][-1]['pin_id']
35 | 973346258
36 | 
37 | 
38 | ######以下针对user
39 | 1. https://huaban.com/staugur
40 | get 返回 json ['user']
41 | 
42 | board_count = data['board_count']
43 | board_ids = [ board['board_id'] for board in  data['boards'] ]
44 | 
45 | 
46 | 2. ajax
47 | last_board_id=data['boards'][-1]['board_id']
48 | https://huaban.com/staugur?jhhft3as&max=last_board_id&limit=10&wfl=1
49 | get 返回 json ['user']
50 | 数据同1中
51 | 
52 | 
53 | 二、对于js
54 | 直接获取url中board_id，使用ajax访问首页及加载后续页即可获取所有json数据，拼接imgUrl通过不同下载方式保存图片即可。
55 | 
56 | 
57 | 三、堆糖网
58 | 利用ajax获取其接口数据，主要参数album_id,limit,start。
59 | 


--------------------------------------------------------------------------------
/grab_huaban_board.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf8 -*-
  3 | 
  4 | __version__ = "5.0.4"
  5 | __author__ = "staugur"
  6 | __doc__ = "https://blog.saintic.com/blog/204.html"
  7 | 
  8 | import os
  9 | import logging
 10 | import requests
 11 | from random import choice
 12 | from time import sleep
 13 | from multiprocessing.dummy import Pool as ThreadPool
 14 | from multiprocessing import Pool as ProcessPool
 15 | 
 16 | # 花瓣网域名，目前应该设置为huaban.com，可使用http或https协议。
 17 | BASE_URL = 'https://huaban.com'
 18 | # 设置下载短暂停止时间，单位：秒
 19 | SLEEP_TIME = 1
 20 | # 开启ip代理池
 21 | WITH_IP_POOL = False
 22 | IP_POOL_API = "https://open.saintic.com/proxy/get/"
 23 | # 调试输出
 24 | DEBUG = False
 25 | 
 26 | logging.basicConfig(
 27 |     level=logging.INFO,
 28 |     format='[ %(levelname)s ] %(asctime)s %(filename)s:%(threadName)s:%(process)d:%(lineno)d %(message)s',
 29 |     datefmt='%Y-%m-%d %H:%M:%S',
 30 |     filename='huaban.log',
 31 |     filemode='a'
 32 | )
 33 | 
 34 | request = requests.Session()
 35 | request.verify = True
 36 | request.headers.update({
 37 |     'X-Request': 'JSON', 'X-Requested-With': 'XMLHttpRequest', 'Referer': BASE_URL,
 38 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
 39 | })
 40 | user_agent_list = [
 41 |     "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36",
 42 |     "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",
 43 |     "Mozilla/5.0 (Windows NT 10.0; …) Gecko/20100101 Firefox/61.0",
 44 |     "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36",
 45 |     "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36",
 46 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36",
 47 |     "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)",
 48 |     "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10.5; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/3.6.15",
 49 | ]
 50 | 
 51 | 
 52 | def get_proxy():
 53 |     resp = dict()
 54 |     if WITH_IP_POOL is True:
 55 |         try:
 56 |             _ip_proxy = request.get(IP_POOL_API, timeout=5).text
 57 |         except requests.exceptions.RequestException as e:
 58 |             logging.warn(e, exc_info=True)
 59 |         else:
 60 |             if not (_ip_proxy.startswith("http://") or _ip_proxy.startswith("https://")):
 61 |                 _ip_proxy = "http://%s" % _ip_proxy
 62 |             resp = {"http": _ip_proxy, "https": _ip_proxy}
 63 |     logging.info("Start ip_proxy_pool, get result: %s" % resp)
 64 |     return resp
 65 | 
 66 | 
 67 | def printcolor(msg, color=None):
 68 |     if color == "green":
 69 |         print('\033[92m%s\033[0m' % msg)
 70 |     elif color == "blue":
 71 |         print('\033[94m%s\033[0m' % msg)
 72 |     elif color == "yellow":
 73 |         print('\033[93m%s\033[0m' % msg)
 74 |     elif color == "red":
 75 |         print('\033[91m%s\033[0m' % msg)
 76 |     else:
 77 |         print(msg)
 78 | 
 79 | 
 80 | def makedir(d):
 81 |     if not os.path.exists(d):
 82 |         os.makedirs(d)
 83 |     if os.path.exists(d):
 84 |         return True
 85 |     else:
 86 |         return False
 87 | 
 88 | 
 89 | def _post_login(email, password):
 90 |     """登录函数"""
 91 |     res = dict(success=False)
 92 |     url = BASE_URL + "/auth/"
 93 |     try:
 94 |         resp = request.post(url, data=dict(email=email, password=password), headers={
 95 |                             'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8'}).json()
 96 |     except Exception as e:
 97 |         logging.error(e, exc_info=True)
 98 |     else:
 99 |         if "user" in resp:
100 |             # 登录成功
101 |             res.update(success=True, data=resp["user"])
102 |         else:
103 |             res.update(resp)
104 |     return res
105 | 
106 | 
107 | def _download_img(pin, retry=True):
108 |     """ 下载单个原图
109 |     @param pin dict: pin的数据，要求： {'pin_id': xx, 'suffix': u'png|jpg|jpeg...', 'key': u'xxx-xx', 'board_id': xx}
110 |     @param retry bool: 是否失败重试
111 |     """
112 |     if pin and isinstance(pin, dict) and "pin_id" in pin and "suffix" in pin and "key" in pin and "board_id" in pin:
113 |         imgurl = "https://hbimg.huabanimg.com/{}".format(pin["key"])
114 |         imgdir = pin['board_id']
115 |         imgname = os.path.join(imgdir, '{}.{}'.format(
116 |             pin["pin_id"], pin["suffix"]))
117 |         if os.path.isfile(imgname):
118 |             if DEBUG:
119 |                 printcolor("Skip downloaded images: %s" % imgname)
120 |             return
121 |         try:
122 |             makedir(imgdir)
123 |             req = request.get(imgurl)
124 |             with open(imgname, 'wb') as fp:
125 |                 fp.write(req.content)
126 |         except Exception as e:
127 |             logging.warn(e, exc_info=True)
128 |             if retry is True:
129 |                 _download_img(pin, False)
130 |             else:
131 |                 printcolor("Failed download for {}".format(imgurl), "yellow")
132 |         else:
133 |             if DEBUG:
134 |                 printcolor("Successful download for {}, save as {}".format(
135 |                     pin["pin_id"], imgname), "blue")
136 | 
137 | 
138 | def _crawl_board(board_id):
139 |     """ 获取画板下所有pin """
140 |     if not board_id:
141 |         return
142 |     limit = 100
143 |     board_url = BASE_URL + '/boards/{}/'.format(board_id)
144 |     try:
145 |         # get first pin data
146 |         r = request.get(board_url).json()
147 |     except requests.ConnectionError:
148 |         request.headers.update({"User-Agent": choice(user_agent_list)})
149 |         r = request.get(board_url).json()
150 |     except Exception as e:
151 |         printcolor("Crawl first page error, board_id: {}".format(
152 |             board_id), "yellow")
153 |         logging.error(e, exc_info=True)
154 |     else:
155 |         if "board" in r:
156 |             board_data = r["board"]
157 |         else:
158 |             printcolor(r.get("msg"))
159 |             return
160 |         pin_number = board_data["pin_count"]
161 |         retry = 2 * pin_number / limit
162 |         board_pins = board_data["pins"]
163 |         printcolor("Current board <{}> pins number is {}, first pins number is {}".format(
164 |             board_id, pin_number, len(board_pins)), 'red')
165 |         if len(board_pins) < pin_number:
166 |             last_pin = board_pins[-1]['pin_id']
167 |             while 1 <= retry:
168 |                 # get ajax pin data
169 |                 board_next_url = BASE_URL + \
170 |                     "/boards/{}/?max={}&limit={}&wfl=1".format(
171 |                         board_id, last_pin, limit)
172 |                 try:
173 |                     board_next_data = request.get(
174 |                         board_next_url).json()["board"]
175 |                 except Exception as e:
176 |                     logging.error(e, exc_info=True)
177 |                     continue
178 |                 else:
179 |                     board_pins += board_next_data["pins"]
180 |                     printcolor("ajax load board with pin_id {}, get pins number is {}, merged".format(
181 |                         last_pin, len(board_next_data["pins"])), "blue")
182 |                     if len(board_next_data["pins"]) == 0:
183 |                         break
184 |                     last_pin = board_next_data["pins"][-1]["pin_id"]
185 |                 retry -= 1
186 |                 # 减轻访问频率
187 |                 sleep(SLEEP_TIME)
188 |         #map(lambda pin: dict(pin_id=pin['pin_id'], suffix=pin['file']['type'].split('/')[-1], key=pin['file']['key'], board_id=board_id), board_pins)
189 |         board_pins = [dict(pin_id=pin['pin_id'], suffix=pin['file'].get('type', "").split(
190 |             '/')[-1] or "png", key=pin['file']['key'], board_id=board_id) for pin in board_pins]
191 |         pool = ThreadPool()
192 |         pool.map(_download_img, board_pins)
193 |         pool.close()
194 |         pool.join()
195 |         printcolor("Current board {}, download over".format(board_id), "green")
196 | 
197 | 
198 | def _crawl_user(user_id):
199 |     """ 查询user的画板 """
200 |     if not user_id:
201 |         return
202 |     user_url = BASE_URL + "/{}".format(user_id)
203 |     limit = 5
204 |     try:
205 |         # get first board data
206 |         r = request.get(user_url).json()
207 |     except requests.ConnectionError:
208 |         request.headers.update({"User-Agent": choice(user_agent_list)})
209 |         r = request.get(user_url).json()
210 |     except Exception as e:
211 |         printcolor("Crawl first page error, user_id: {}".format(
212 |             user_id), "yellow")
213 |         logging.error(e, exc_info=True)
214 |     else:
215 |         if "user" in r:
216 |             user_data = r["user"]
217 |         else:
218 |             printcolor(r.get("msg"))
219 |             return
220 |         board_number = int(user_data['board_count'])
221 |         retry = 2 * board_number / limit
222 |         board_ids = user_data['boards']
223 |         printcolor("Current user <{}> boards number is {}, first boards number is {}".format(
224 |             user_id, board_number, len(board_ids)), 'red')
225 |         if len(board_ids) < board_number:
226 |             last_board = user_data['boards'][-1]['board_id']
227 |             while 1 <= retry:
228 |                 # get ajax pin data
229 |                 user_next_url = BASE_URL + \
230 |                     "/{}?jhhft3as&max={}&limit={}&wfl=1".format(
231 |                         user_id, last_board, limit)
232 |                 try:
233 |                     user_next_data = request.get(user_next_url).json()["user"]
234 |                 except Exception as e:
235 |                     logging.error(e, exc_info=True)
236 |                     continue
237 |                 else:
238 |                     board_ids += user_next_data["boards"]
239 |                     printcolor("ajax load user with board_id {}, get boards number is {}, merged".format(
240 |                         last_board, len(user_next_data["boards"])), "blue")
241 |                     if len(user_next_data["boards"]) == 0:
242 |                         break
243 |                     last_board = user_next_data["boards"][-1]["board_id"]
244 |                 retry -= 1
245 |                 # 减轻访问频率
246 |                 sleep(SLEEP_TIME)
247 |         board_ids = map(str, [board['board_id'] for board in board_ids])
248 |         pool = ProcessPool()  # 创建进程池
249 |         # board_ids：要处理的数据列表； _crawl_board：处理列表中数据的函数
250 |         pool.map(_crawl_board, board_ids)
251 |         pool.close()  # 关闭进程池，不再接受新的进程
252 |         pool.join()  # 主进程阻塞等待子进程的退出
253 |         printcolor("Current user {}, download over".format(user_id), "green")
254 | 
255 | 
256 | def main(parser):
257 |     global WITH_IP_POOL, IP_POOL_API, request, DEBUG
258 |     args = parser.parse_args()
259 |     if not args.action:
260 |         parser.print_help()
261 |         return
262 |     action = args.action
263 |     user = args.user
264 |     password = args.password
265 |     version = args.version
266 |     board_id = args.board_id
267 |     user_id = args.user_id
268 |     if args.debug is True:
269 |         DEBUG = True
270 |     if args.proxy is True:
271 |         WITH_IP_POOL = args.proxy
272 |         IP_POOL_API = args.proxy_apiurl or IP_POOL_API
273 |         request.proxies.update(get_proxy())
274 |     if version:
275 |         printcolor(
276 |             "https://github.com/staugur/grab_huaban_board, v{}".format(__version__))
277 |         return
278 |     # 用户登录
279 |     if user and password:
280 |         auth = _post_login(user, password)
281 |         if not auth["success"]:
282 |             printcolor(auth["msg"], "yellow")
283 |             return
284 |     else:
285 |         printcolor("您未设置账号密码，将处于未登录状态，抓取的图片可能有限；设置账号密码后，图片抓取率大部分可达100%！")
286 |     # 主要动作-功能
287 |     if action == "getBoard":
288 |         # 抓取单画板
289 |         if not board_id:
290 |             printcolor("请设置board_id参数", "yellow")
291 |             return
292 |         makedir("boards")
293 |         os.chdir("boards")
294 |         _crawl_board(board_id)
295 |     elif action == "getUser":
296 |         # 抓取单用户
297 |         if not user_id:
298 |             printcolor("请设置user_id参数", "yellow")
299 |             return
300 |         makedir(user_id)
301 |         os.chdir(user_id)
302 |         _crawl_user(user_id)
303 |     else:
304 |         parser.print_help()
305 | 
306 | 
307 | if __name__ == "__main__":
308 |     import argparse
309 |     parser = argparse.ArgumentParser()
310 |     parser.add_argument("-a", "--action", default="getBoard",
311 |                         help=u"脚本动作 -> getBoard: 抓取单画板(默认); getUser: 抓取单用户")
312 |     parser.add_argument("-u", "--user", help=u"花瓣网账号-手机/邮箱")
313 |     parser.add_argument("-p", "--password", help=u"花瓣网账号对应密码")
314 |     parser.add_argument("-v", "--version", help=u"查看版本号", action='store_true')
315 |     parser.add_argument("-bid", "--board_id",
316 |                         help=u"花瓣网单个画板id, action=getBoard时使用")
317 |     parser.add_argument("-uid", "--user_id",
318 |                         help=u"花瓣网单个用户id, action=getUser时使用")
319 |     parser.add_argument("--debug", help=u"开启debug输出", action='store_true')
320 |     parser.add_argument("--proxy", help=u"开启IP代理池", action='store_true')
321 |     parser.add_argument(
322 |         "--proxy_apiurl", help=u"IP代理池接口：开启IP代理池后，设置此选项使用非默认接口")
323 |     main(parser)
324 | 


--------------------------------------------------------------------------------
/gui_batchdownload.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | #  -*- coding: utf-8 -*-
  3 | #
  4 | # 依赖： pip install pyinstaller pywin32
  5 | # 打包： pyinstaller.exe -F gui_batchdownload.py -i logo.ico -w --version-file version_file.txt
  6 | #       如需压缩，请到此https://github.com/upx/upx/releases下载对应的包（比如v3.95，win64）解压，打包时带上--upx .\upx-3.95-win64 
  7 | #
  8 | 
  9 | import os
 10 | from base64 import b64decode
 11 | import Tkinter as tk
 12 | from tkFileDialog import askdirectory
 13 | from threading import Timer, Thread
 14 | from tempfile import NamedTemporaryFile
 15 | import tkMessageBox
 16 | from sys import version_info
 17 | 
 18 | PY2 = version_info[0] == 2
 19 | if PY2:
 20 |     from urllib2 import build_opener
 21 | else:
 22 |     from urllib.request import build_opener
 23 | 
 24 | ListEqualSplit  = lambda l,n=100: [ l[i:i+n] for i in range(0,len(l), n) ]
 25 | logo_base64 = '''AAABAAEAICAAAAEAIACoEAAAFgAAACgAAAAgAAAAQAAAAAEAIAAAAAAAABAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAmNcAQGk7wICzP8CAsD5AgKoeAAAqgYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAzAQAAGUYAgLk/wIC/P8CAvz/AgL8/wsL/P8ODvzzCAj7/QIC4b8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAoxwODnHfKipM/wICzf8CAvz/AgL8/xoa+v8sLK25AACfEAAAAAAAAAAAAADrDAIC7tkAAP4KAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKEwJydN/zExT/8oKFX/AgL8/wIC/P8TE/z/Ozt7/zU1af8xMU//FxebsQAAAAAAAAAAAAAAAAMD0qsAAONIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/AhwcU/kvL1L/Ly9S/wgIiv8CAvz/AgL8/3d3/F4AAAAAAAAAANTU/wYdHcFwJSV49zMz/woAAAAAAAAAAAEB2NsICPp6AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACAn9+Li5W/y4uVv8uLlb/AgLg/wIC/P8KCvz/AAAAAAAAAAAAAAAAAAAAAAAAAAAAALYGHByR7woKwRgAAAAAAACqAgIC9/8bG/o4AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABMTXektLVv/LS1b/0dHd/0CAvn/AgL8/y0t++cAAAAAAAAAAAAAAAAAAAAAAQFnpwIC7v8CAujlIiJx/RUV3GAAAAAAAQGfqwMD/P8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAISFg/SsrX/8rK1//tLT+agIC+f8CAvz/OTn8uQAAAAAHB64iAQGZqwMDokwCAvz/AgL8/xMT/P8XF/PdKytf/zs75UQAAAAAAgL7/xgY+u8AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAjI2T/KSlk/ykpZP8AAAAAAgLs/wIC/P86OvyvAAC6GiQkX/8pKWT/KSlk/wIC/P8ICPz/AAAAAAAAAAAeHqTtLCxm/42N/ggCAtT1AgL8/zMz/xQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAACEhav8nJ2n/KChq/wAAAAACArf5AgL8/y0t/MMDA3iTJydp/ycnaf9SU5/zAgL8/yEh+vMAAAAAAAAAAAICjFgnJ2r/MzOw7QAAWlwCAvz/Fxf75wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAHBxu+yUlb/8mJnD/AAAAAAEBh4kCAvz/Hx/78wgIcq8lJW//NTV9/6qq/wICAvv/LS3z83Nz90AAAAAAAAC/BCIibf8lJW//lZX2HAIC/P8EBPz/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAUFHLjJCR1/yQkdf8AAAAAAABzHgIC/P8HB/z/BQV7kyQkdf9wcNLTAAAAAAICvPcfH/zlLy/HdgAAAAAAAAAAGRlz9yQkdf8+QLTzAgL8/wIC/P9kZPtOAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAYGgashInv/ISJ7//7+/goAAAAAAgL4/wIC/P8nJ7RsISJ7/3x84JUAAAAAAAB5KAwM+/sAAAAAAAAAAAAAAAAQEHTfISJ7/yIie/8CAvn/AgL8/0dH+qsAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABAR6OB8fgf8fH4H/gYH8aAAAAAABAbPXAgL8/y4u+6UbG4D/ZWXcuQAAAAAAAAAAAgLn/1VV/wYAAAAAAAAAAAsLdtcfH4H/Hx+B/wIC/P8CAvz/Njb8wwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGRmI/Rwcif8+P7rxAAAAAAAAgTYCAvz/DQ38/wUFlpc4Oa/9AAAAAAAAAAAAAKAiKSn6tQAAAAAAAAAADg5z7xwcif8cHIn/AgL8/wIC/P9FRfy7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKA2AwOcjwkJmVIFBZ/HGhuR/xsckv////8CAAAAAAIC5PsCAvz/W1v+KhQUkfd/f/8KAAAAAAAAAAAEBMF4Hx//CAAA0hYYGIj/GhuR/wQEp/8CAvz/AgL8/39//HYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP8CBQV63VRUzf1aWnv/LS0x+xcXMP8XGJn/FxiZ/x8fvO0AANQGAACHQAIC+/8sLP3FAABVBiMjw/kAAAAAAAAAAAAAAAADA9WZCgp79xcYmP8EBKX/AgL8/wIC/P8FBfz/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgIetucnPpocnJ18SoqLf0AAAAIFxcZeAgIXP8VFaH/ISGt+QoK7l4FBcmtAgLk+wIC/P8oKP4SAABzCgsLxfMFBZ1gBQWJ3xITl/8PD7D/AgL6/wIC/P8CAvz/AwP8/3R0/Z0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJ8Ia2vdz////wJKSk/3S0tQ/xgYGEj+/v4GNjY8/Q4OoP8SEqj/SEjwagAAmQQNDa/rAgL6/w4O+v0AAAAAAAAAAA0NyMUTE6n/EhKo/xISqP8SEqj/Pj7U9UFB/Ghzc/4KAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMDk0zh4f4qAAAAAFxcYbltbXT/KSksaAAAAABLS1PjExMc/xERrf8XF7f9////AgAAAAADA7zhAgL8/yMj/2YAAAAAAAAAAH9//wY5OfRiSUnqiXl5+1AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAwN+g/7+/hQAAAAAGhoaEhgYG8MAAAAQAAAAAJ6eotFEREz/CQle6w4Otv8qKtm5AAAAAAAAAAADA9frAwP8/wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAH84lJT+SgAAAAAAAAAAAAAAAAAAAAAAAAAAi4uN40VFTf8cHB//Cgqk/w4OvP89PfY+AAAAAAAAAAACAvb/DAz88wAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAfwI4N9bVAAAAAAAAAAAAAAAAAAAAAAAAAABCQkX5RERM/y8vMesWFhy/CAi5/xAQx/9fX/8IAAAAAAAAmRQCAvz/FBT6rwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgIn5n///8CAAAAAAAAAAAAAAAAAAAAADMzOP1DQ0v/NTU35X9/fwIZGR7tBwa58xER1vsAAAAAAAAAAAAAvxQCAvv/DQ38XgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB/BCAg3MsAAAAAAAAAAAAAAAD///8COTlA/0tLUv86Ojy/AAAAAFVVVRgSEhTPAgLhaA0M3v0AAAAAAAAAAAAAzAQCAvT3AwP3RAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAgKfWick92gAAAAAAAAAAAAAAAAyMjf9ZGRp/xERE3YAAAAAAAAAAAAAABAAAAAAAAB/AgkI498FBc0uAAAAAAAAAAACAuV6AgLpeAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQPGwRAN9lwAAAAAAAAAAEJCRNd8fIH/AAAANAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAB8f/xATEuTFICD0dgAAAAAAAAAABwfwkQ0N2kwqKvASAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABgTW0wgG650AAAAA////EFlZX/8AAAAuAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/AgAAkSICApxsBAKjuwUEt+0GBcf9BwXO/wYFzfsEA8XPAgLCYAAAfwQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABgPhowgF6/kCAstaEhIYfg8PEcsAAAAAAAAAAAAA2gYFBY8wAQGWhwQDqdsHBM7/CQXp/wkF7/8JBe//CQXv/wkF7/8JBe//CQXv/wkF7/8JBe//CAXo/wUD3YkAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQXnKgkF9P0IBez/CAXP/wYEzfcHBNr9CAXl/wkF8f8JBfP/CQXz/wkF8/8JBfP/CQXz/wkF8/8JBfP/CQXz/woG8/8XE/X/JCH3/yMg9vsUEfX/CQXz/wcE8u8cHP4IAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQU+TAQDPj7CQX4/wkF+P8JBfj/CQX4/wkF+P8JBfj/CQX4/wkF+P8JBfj/IR75/05N+8mFhftEmZn/Dv///wIAAAAAAAAAAAAAAAAiIu4OBwX5tQoG9/9ERP8OAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/P/8EW1v+MjEv/bEpJ/zzKif9/zc1+/FXV/2prq76Nn9//wgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAC5CiEe9/sAAAAA//j////gH///Ae///gB3//wPu//8D93/+A8M//iNBn/4iDJ/+IA5P/iBOT/4wTg/+Mm4H/zBuB/8Ydgf9GX4P8Ay4D+mI0A/pjGB/+YZ9/92DP///g5//74HP/++I7//3jXf//5+///uf7v/83/gf/m8AA/+AAAH/4AH8//wf/0='''
 26 | with NamedTemporaryFile(mode='w+b', prefix='grab-huaban-duitang-', suffix=".ico", delete=False) as fp:
 27 |     fp.write(b64decode(logo_base64))
 28 |     logo_file = fp.name
 29 | 
 30 | def _makedir(d):
 31 |     if not os.path.exists(d):
 32 |         os.makedirs(d)
 33 |     if os.path.exists(d):
 34 |         return True
 35 |     else:
 36 |         return False
 37 | 
 38 | def _get_url(url, site=None):
 39 |     """发起原生get请求"""
 40 |     class DO(dict):
 41 |         def __getattr__(self, name):
 42 |             try:
 43 |                 return self[name]
 44 |             except KeyError:
 45 |                 raise AttributeError(name)
 46 |     opener = build_opener()
 47 |     agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
 48 |     referer = 'https://huaban.com' if site == "huaban" else 'https://www.duitang.com'
 49 |     opener.addheaders = [('User-Agent', agent), ('Referer', referer)]
 50 |     response = opener.open(url)
 51 |     resp = dict(status=response.code, msg=response.msg, type=response.headers.type, maintype=response.headers.maintype, subtype=response.headers.subtype, headers=response.headers.dict, content=response.read())
 52 |     response.close()
 53 |     return DO(resp)
 54 | 
 55 | def _check_download_type(img_url):
 56 |     """根据img_url分析是花瓣网图片还是堆糖网图片，特征：
 57 |     #: 花瓣
 58 |     https://hbimg.b0.upaiyun.com/ec4433d815c45742a4ec1d5b6810c4ce9fe2f4d86e636-SoEBVh
 59 |     #: 堆糖
 60 |     https://b-ssl.duitang.com/uploads/item/201904/26/20190426115802_dUaAA.jpeg
 61 |     """
 62 |     if img_url:
 63 |         if img_url.startswith("https://hbimg.") or img_url.startswith("http://hbimg."):
 64 |             return "huaban"
 65 |         elif "duitang.com" in img_url:
 66 |             return "duitang"
 67 |     return "unknown"
 68 | 
 69 | class DownloadImage(Thread):
 70 | 
 71 |     def __init__(self, img_data, index=0, length=None):
 72 |         Thread.__init__(self)
 73 |         self.img_data = img_data
 74 |         self.index = index
 75 |         self.length = length - 1
 76 | 
 77 |     def run(self):
 78 |         """ 下载单个原图
 79 |         @param img_data dict: 图片所需数据，要求： {'img_url': xx, 'img_dir': xx}
 80 |         @param retry bool: 是否失败重试
 81 |         """
 82 |         imgurl = self.img_data["img_url"]
 83 |         site = _check_download_type(imgurl)
 84 |         name = imgurl.split("/")[-1]
 85 |         imgdir = os.path.join(self.img_data["img_dir"], site)
 86 |         support_showmessage(u"下载: NO.%s -> %s" %(self.index, name))
 87 |         try:
 88 |             if _makedir(imgdir) is True:
 89 |                 response = _get_url(imgurl, site)
 90 |                 if site in ("huaban", "unknown"):
 91 |                     imgname = os.path.join(imgdir, '%s.%s' %(name, response.subtype or "png"))
 92 |                 else:
 93 |                     imgname = os.path.join(imgdir, name)
 94 |                 if not os.path.isfile(imgname):
 95 |                     with open(imgname, 'wb') as fp:
 96 |                         fp.write(response.content)
 97 |         except:
 98 |             pass
 99 |         finally:
100 |             if self.index == self.length:
101 |                 support_showmessage(u"下载完成", True)
102 | 
103 | #: 主窗口辅助模块
104 | def support_showmessage(text, renew=False):
105 |     """向message部分输出信息"""
106 |     w.Label1.configure(text=text)
107 |     if renew:
108 |         #: 清空text
109 |         w.Text1.delete('1.0', tk.END)
110 |         #: 重新启用按钮
111 |         w.Button1.configure(state="active")
112 | 
113 | def support_start_thread(data):
114 |     for p in data:
115 |         p.setDaemon(True)
116 |         p.start()
117 | 
118 | def support_download_timer(text_list):
119 |     if text_list and isinstance(text_list, list):
120 |         tkMessageBox.showinfo(u"温馨提示", u"点击确定提交下载，期间请不要关闭主窗口，并关注窗口底部消息输出。")
121 |         ms = 0
122 |         seq = 20
123 |         length = len(text_list)
124 |         thread_ids = []
125 |         for i,d in enumerate(text_list):
126 |             p = DownloadImage(d, i, length)
127 |             thread_ids.append(p)
128 |         for data in ListEqualSplit(thread_ids, seq):
129 |             root.update()
130 |             root.after(ms, support_start_thread, data)
131 |             ms += 2000
132 | 
133 | def support_batchDownload():
134 |     """开始批量下载"""
135 |     #: 禁用按钮
136 |     w.Button1.configure(state="disabled")
137 |     #: 获取要下载的url列表
138 |     text = w.Text1.get(1.0, tk.END)
139 |     text_list = [ i for i in text.split("\n") if i ]
140 |     #: 下载存储的目录
141 |     download_dir = w.Label2.cget("text") or os.getcwd()
142 |     if text_list:
143 |         #: 解析text，分出huaban、duitang和unknown站点
144 |         huaban_list = [ i for i in text_list if _check_download_type(i) == "huaban" ]
145 |         duitang_list = [ i for i in text_list if _check_download_type(i) == "duitang" ]
146 |         unknown_list = [ i for i in text_list if _check_download_type(i) == "unknown" ]
147 |         all_pins = []
148 |         if os.path.isdir(os.path.join(download_dir, "huaban")):
149 |             all_pins +=  [ i.split('.')[0] for i in os.listdir(os.path.join(download_dir, "huaban")) ]
150 |         if os.path.isdir(os.path.join(download_dir, "duitang")):
151 |             all_pins += [ i for i in os.listdir(os.path.join(download_dir, "duitang")) ]
152 |         if os.path.isdir(os.path.join(download_dir, "unknown")):
153 |             all_pins += [ i for i in os.listdir(os.path.join(download_dir, "unknown")) ]
154 |         data = [ {"img_url":i, "img_dir": download_dir} for i in text_list if i.split("/")[-1] not in all_pins ]
155 |         if data:
156 |             support_showmessage(u"本次批量下载概述：花瓣网 %s 条，堆糖网 %s 条，未知 %s 条，有效 %s 条！" %(len(huaban_list), len(duitang_list), len(unknown_list), len(data)))
157 |             root.update()
158 |             root.after(1000, support_download_timer, data)
159 |         else:
160 |             support_showmessage(u"您输入的已经全部下载完成", True)
161 |     else:
162 |         support_showmessage(u"请输入花瓣网下载或堆糖网下载中复制的文本！")
163 |         w.Button1.configure(state="active")
164 | 
165 | def support_init(top, gui, *args, **kwargs):
166 |     global w, top_level, root, timer
167 |     w = gui
168 |     top_level = top
169 |     root = top
170 | 
171 | def support_destroy_window():
172 |     # Function which closes the window.
173 |     global top_level
174 |     top_level.destroy()
175 |     top_level.update
176 |     top_level = None
177 |     os.remove(logo_file)
178 | 
179 | #: 启动主窗口
180 | def vp_start_gui():
181 |     '''Starting point when module is the main routine.'''
182 |     global w, root
183 |     root = tk.Tk()
184 |     top = Toplevel1 (root)
185 |     support_init(root, top)
186 |     root.protocol("WM_DELETE_WINDOW", support_destroy_window)
187 |     root.mainloop()
188 | 
189 | #: 主窗口类
190 | class Toplevel1:
191 | 
192 |     def __init__(self, top=None):
193 |         '''This class configures and populates the toplevel window.
194 |            top is the toplevel containing window.'''
195 |         _bgcolor = '#d9d9d9'  # X11 color: 'gray85'
196 |         _fgcolor = '#000000'  # X11 color: 'black'
197 |         _compcolor = '#d9d9d9' # X11 color: 'gray85'
198 |         _ana1color = '#d9d9d9' # X11 color: 'gray85'
199 |         _ana2color = '#ececec' # Closest X11 color: 'gray92'
200 | 
201 |         top.geometry("600x450+590+251")
202 |         top.title(u"花瓣网、堆糖网脚本--文本下载工具")
203 |         top.iconbitmap(logo_file)
204 |         top.configure(background="#d9d9d9")
205 | 
206 |         self.Button1 = tk.Button(top)
207 |         self.Button1.place(relx=0.033, rely=0.022, height=28, width=137)
208 |         self.Button1.configure(activebackground="#ececec")
209 |         self.Button1.configure(activeforeground="#000000")
210 |         self.Button1.configure(background="#d9d9d9")
211 |         self.Button1.configure(command=support_batchDownload)
212 |         self.Button1.configure(disabledforeground="#a3a3a3")
213 |         self.Button1.configure(foreground="#000000")
214 |         self.Button1.configure(highlightbackground="#d9d9d9")
215 |         self.Button1.configure(highlightcolor="black")
216 |         self.Button1.configure(pady="0")
217 |         self.Button1.configure(text=u'开始批量下载')
218 | 
219 |         #选择路径
220 |         self.Button2 = tk.Button(top)
221 |         self.Button2.place(relx=0.3, rely=0.022, height=28, width=60)
222 |         self.Button2.configure(activebackground="#ececec")
223 |         self.Button2.configure(activeforeground="#000000")
224 |         self.Button2.configure(background="#d9d9d9")
225 |         self.Button2.configure(disabledforeground="#a3a3a3")
226 |         self.Button2.configure(foreground="#000000")
227 |         self.Button2.configure(highlightbackground="#d9d9d9")
228 |         self.Button2.configure(highlightcolor="black")
229 |         self.Button2.configure(pady="0")
230 |         self.Button2.configure(text=u'选择目录')
231 |         self.Button2.configure(command=self.selectPath)
232 | 
233 |         #显示选择后的路径
234 |         self.Label2 = tk.Label(top)
235 |         self.Label2.place(relx=0.4, rely=0.022, height=28)
236 |         self.Label2.configure(background="#d9d9d9")
237 |         self.Label2.configure(disabledforeground="#a3a3a3")
238 |         self.Label2.configure(foreground="#000000")
239 |         self.Label2.configure(text=u'')
240 | 
241 |         self.Text1 = tk.Text(top)
242 |         self.Text1.place(relx=0.033, rely=0.089, relheight=0.827, relwidth=0.94)
243 |         self.Text1.configure(background="white")
244 |         self.Text1.configure(borderwidth="0")
245 |         self.Text1.configure(font="TkTextFont")
246 |         self.Text1.configure(foreground="black")
247 |         self.Text1.configure(highlightbackground="#d9d9d9")
248 |         self.Text1.configure(highlightcolor="black")
249 |         self.Text1.configure(insertbackground="black")
250 |         self.Text1.configure(selectbackground="#c4c4c4")
251 |         self.Text1.configure(selectforeground="black")
252 |         self.Text1.configure(width=564)
253 |         self.Text1.configure(wrap="word")
254 | 
255 |         self.Label1 = tk.Label(top)
256 |         self.Label1.place(relx=0.033, rely=0.933, height=23, width=500)
257 |         self.Label1.configure(background="#d9d9d9")
258 |         self.Label1.configure(disabledforeground="#a3a3a3")
259 |         self.Label1.configure(foreground="#000000")
260 |         self.Label1.configure(text=u'')
261 | 
262 |     def selectPath(self):
263 |         path_ = askdirectory(title=u"请选择或新建一个目录以存储将要下载的图片")
264 |         if path_:
265 |             self.Label2.configure(text=path_)
266 | 
267 |     @staticmethod
268 |     def popup1(event, *args, **kwargs):
269 |         Popupmenu1 = tk.Menu(root, tearoff=0)
270 |         Popupmenu1.configure(activebackground="#f9f9f9")
271 |         Popupmenu1.configure(activeborderwidth="1")
272 |         Popupmenu1.configure(activeforeground="black")
273 |         Popupmenu1.configure(background="#d9d9d9")
274 |         Popupmenu1.configure(borderwidth="1")
275 |         Popupmenu1.configure(disabledforeground="#a3a3a3")
276 |         Popupmenu1.configure(font="{Microsoft YaHei UI} 9")
277 |         Popupmenu1.configure(foreground="black")
278 |         Popupmenu1.post(event.x_root, event.y_root)
279 | 
280 |     @staticmethod
281 |     def popup2(event, *args, **kwargs):
282 |         Popupmenu2 = tk.Menu(root, tearoff=0)
283 |         Popupmenu2.configure(activebackground="#f9f9f9")
284 |         Popupmenu2.configure(activeborderwidth="1")
285 |         Popupmenu2.configure(activeforeground="black")
286 |         Popupmenu2.configure(background="#d9d9d9")
287 |         Popupmenu2.configure(borderwidth="1")
288 |         Popupmenu2.configure(disabledforeground="#a3a3a3")
289 |         Popupmenu2.configure(font="{Microsoft YaHei UI} 9")
290 |         Popupmenu2.configure(foreground="black")
291 |         Popupmenu2.post(event.x_root, event.y_root)
292 | 
293 | if __name__ == '__main__':
294 |     vp_start_gui()
295 | 


--------------------------------------------------------------------------------
/logo.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/staugur/grab_huaban_board/e0ad1120bdd431e4d8eb4667331d5b82df2688ef/logo.ico


--------------------------------------------------------------------------------
/up2picbed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf8 -*-
  3 | 
  4 | __version__ = "0.1.0"
  5 | __author__ = "staugur"
  6 | __doc__ = "将图片上传到picbed"
  7 | 
  8 | import shelve
  9 | import requests
 10 | from time import time
 11 | from os import listdir, sep
 12 | from os.path import isdir, join, abspath, dirname, isfile
 13 | from sys import version_info
 14 | from grab_huaban_board import request, printcolor, BASE_URL
 15 | 
 16 | BASE_DIR = dirname(abspath(__file__))
 17 | PICBED_URL = None
 18 | PICBED_TOKEN = None
 19 | STORAGE_INDEX = ".up2picbed.dat"
 20 | ALLOWED_SUFFIX = ("png", "jpg", "jpeg", "gif", "webp")
 21 | 
 22 | PY2 = version_info[0] == 2
 23 | 
 24 | if PY2:  # pragma: nocover
 25 | 
 26 |     def iteritems(d):
 27 |         return d.iteritems()
 28 | 
 29 |     text_type = unicode
 30 |     string_types = (str, unicode)
 31 | 
 32 | else:  # pragma: nocover
 33 | 
 34 |     def iteritems(d):
 35 |         return iter(d.items())
 36 | 
 37 |     text_type = str
 38 |     string_types = (str,)
 39 | 
 40 | 
 41 | class LocalStorage(object):
 42 |     """Local file system storage based on the shelve module."""
 43 | 
 44 |     def __init__(self):
 45 |         self.index = join(BASE_DIR, STORAGE_INDEX)
 46 | 
 47 |     def _open(self, flag="c"):
 48 |         return shelve.open(
 49 |             filename=abspath(self.index),
 50 |             flag=flag,
 51 |             protocol=2,
 52 |             writeback=False
 53 |         )
 54 | 
 55 |     @property
 56 |     def list(self):
 57 |         """list all data
 58 | 
 59 |         :returns: dict
 60 |         """
 61 |         db = None
 62 |         try:
 63 |             db = self._open("r")
 64 |         except:
 65 |             return {}
 66 |         else:
 67 |             return dict(db)
 68 |         finally:
 69 |             if db:
 70 |                 db.close()
 71 | 
 72 |     def __ck(self, key):
 73 |         if PY2 and isinstance(key, text_type):
 74 |             key = key.encode("utf-8")
 75 |         if not PY2 and not isinstance(key, text_type):
 76 |             key = key.decode("utf-8")
 77 |         return key
 78 | 
 79 |     def set(self, key, value):
 80 |         """Set persistent data with shelve.
 81 | 
 82 |         :param key: str: Index key
 83 | 
 84 |         :param value: All supported data types in python
 85 |         """
 86 |         db = self._open()
 87 |         try:
 88 |             db[self.__ck(key)] = value
 89 |         finally:
 90 |             db.close()
 91 | 
 92 |     def setmany(self, **mapping):
 93 |         if mapping and isinstance(mapping, dict):
 94 |             db = self._open()
 95 |             for k, v in iteritems(mapping):
 96 |                 db[self.__ck(k)] = v
 97 |             db.close()
 98 | 
 99 |     def get(self, key, default=None):
100 |         """Get persistent data from shelve.
101 | 
102 |         :returns: data
103 |         """
104 |         try:
105 |             value = self.list[key]
106 |         except KeyError:
107 |             return default
108 |         else:
109 |             return value
110 | 
111 |     def remove(self, key):
112 |         db = self._open()
113 |         del db[key]
114 | 
115 |     def __len__(self):
116 |         return len(self.list)
117 | 
118 |     def __str__(self):
119 |         return "<%s object at %s, index is %s>" % (
120 |             self.__class__.__name__, hex(id(self)), self.index
121 |         )
122 | 
123 |     __repr__ = __str__
124 | 
125 | 
126 | def board2piced(album, board_path):
127 |     if isdir(board_path):
128 |         apiurl = "%s/api/upload" % PICBED_URL.rstrip("/")
129 |         headers = {
130 |             "User-Agent": "grab_huaban_board/%s" % __version__,
131 |             "Authorization": "Token %s" % PICBED_TOKEN
132 |         }
133 |         storage = LocalStorage()
134 |         success = []
135 |         for filename in listdir(board_path):
136 |             suffix = filename.split(".")[-1]
137 |             filepath = join(board_path, filename)
138 |             sindex = filepath.split(BASE_DIR)[-1]
139 |             if storage.get(sindex):
140 |                 printcolor("%s 已上传，继续下一个" % filepath, "blue")
141 |                 continue
142 |             if isfile(filepath) and suffix in ALLOWED_SUFFIX:
143 |                 files = {
144 |                     'picbed': (
145 |                         filename, open(filepath, 'rb'), 'image/%s' % suffix
146 |                     )
147 |                 }
148 |                 try:
149 |                     r = requests.post(
150 |                         apiurl,
151 |                         files=files,
152 |                         headers=headers,
153 |                         data=dict(album=album),
154 |                         timeout=30
155 |                     )
156 |                 except requests.exceptions.RequestException as e:
157 |                     printcolor(
158 |                         u"%s 上传错误：%s" % (filepath, e.message),
159 |                         "yellow"
160 |                     )
161 |                 else:
162 |                     result = r.json()
163 |                     if result.get("code") == 0:
164 |                         success.append(sindex)
165 |                         printcolor(u"%s 上传成功" % filepath, "green")
166 |                     else:
167 |                         printcolor(
168 |                             u"%s 上传失败：%s" % (filepath, result["msg"]),
169 |                             "yellow"
170 |                         )
171 |         if success:
172 |             storage.setmany(**{f: time() for f in success})
173 | 
174 | 
175 | def main(parser):
176 |     global PICBED_URL, PICBED_TOKEN
177 |     args = parser.parse_args()
178 |     is_board = args.board
179 |     is_user = args.user
180 |     if not is_board and not is_user:
181 |         parser.print_help()
182 |         return
183 |     bou = args.board_or_user
184 |     PICBED_URL = args.picbed_url
185 |     PICBED_TOKEN = args.picbed_token
186 |     if not PICBED_URL:
187 |         printcolor("请输入picbed地址")
188 |         return
189 |     if is_user:
190 |         if isdir(join(BASE_DIR, bou)):
191 |             boards = [
192 |                 join(BASE_DIR, bou, d)
193 |                 for d in listdir(bou)
194 |                 if isdir(join(BASE_DIR, bou, d))
195 |             ]
196 |         else:
197 |             printcolor("用户目录不存在", "red")
198 |     else:
199 |         boards = [
200 |             join(BASE_DIR, "boards", d)
201 |             for d in bou.split(",")
202 |             if isdir(join(BASE_DIR, "boards", d))
203 |         ]
204 |     for board_path in boards:
205 |         board_id = board_path.split(sep)[-1]
206 |         album = board_id
207 |         try:
208 |             board_info = request.get(
209 |                 "%s/boards/%s" % (BASE_URL, board_id),
210 |                 timeout=5
211 |             ).json()
212 |         except requests.exceptions.RequestException:
213 |             pass
214 |         else:
215 |             if isinstance(board_info, dict) and "board" in board_info:
216 |                 board_info = board_info["board"]
217 |                 album = board_info.get("title") or board_info["board_id"]
218 |         board2piced(album, board_path)
219 | 
220 | 
221 | if __name__ == "__main__":
222 |     import argparse
223 |     parser = argparse.ArgumentParser()
224 |     parser.add_argument("-b", "--board",  action='store_true', default=True,
225 |                         help=u"上传画板，允许逗号选择多个，默认此项")
226 |     parser.add_argument("-u", "--user", help=u"上传单个用户下所有画板",
227 |                         action='store_true')
228 |     parser.add_argument("--picbed-url", help=u"picbed的根域名")
229 |     parser.add_argument("--picbed-token", help=u"picbed的用户token")
230 |     parser.add_argument("board_or_user", type=str, help=u"画板ID或用户名")
231 |     main(parser)
232 | 


--------------------------------------------------------------------------------
/version_file.txt:
--------------------------------------------------------------------------------
 1 | VSVersionInfo(
 2 |   ffi=FixedFileInfo(
 3 |     filevers=(0, 1, 0, 0),
 4 |     prodvers=(0, 1, 0, 0),
 5 |     mask=0x3f,
 6 |     flags=0x0,
 7 |     OS=0x40004,
 8 |     fileType=0x1,
 9 |     subtype=0x0,
10 |     date=(0, 0)
11 |     ),
12 |   kids=[
13 |     StringFileInfo(
14 |       [
15 |       StringTable(
16 |         u'040904B0',
17 |         [StringStruct(u'CompanyName', u'SaintIC'),
18 |         StringStruct(u'FileDescription', u'花瓣、堆糖油猴脚本文本方式批量下载'),
19 |         StringStruct(u'FileVersion', u'0.1.0.0'),
20 |         StringStruct(u'InternalName', u'cmd'),
21 |         StringStruct(u'LegalCopyright', u'\xa9 SaintIC. All rights reserved.'),
22 |         StringStruct(u'OriginalFilename', u'gui_batchdownload.py'),
23 |         StringStruct(u'ProductName', u'花瓣、堆糖油猴脚本文本方式批量下载'),
24 |         StringStruct(u'ProductVersion', u'0.1.0.0')])
25 |       ]), 
26 |     VarFileInfo([VarStruct(u'Translation', [2052, 1200])])
27 |   ]
28 | )


--------------------------------------------------------------------------------