├── README.md
└── bilibili_batch_downloader.py
/README.md:
--------------------------------------------------------------------------------
1 | 本程序可以批量下载bilibili(b站)公开收藏夹下的所有视频
2 |
3 | #注意收藏夹一定要是公开的
4 | #可以下载前设置为公开,下载完毕后设置为私密
5 |
6 | #默认下载地址为本py文件所在目录下的download文件夹
7 | #默认下载画质为优先1080p
8 |
9 | #由收藏夹地址抓取收藏夹内视频BV号的功能由本人实现
10 | #下载功能完全照搬Henryhaohao的Bilibili_video_download
11 | #BV号转AV号部分的代码,来源于上述项目的issue:'针对BV号的问题',作者CodeForWuyu
12 |
13 | #如果本项目能帮到你的话,欢迎给个star~
14 |
--------------------------------------------------------------------------------
/bilibili_batch_downloader.py:
--------------------------------------------------------------------------------
1 | """Stub file for the 'time' module."""
2 | from selenium import webdriver
3 | from time import sleep
4 |
5 | import imageio
6 | imageio.plugins.ffmpeg.download()
7 |
8 | import requests, time, hashlib, urllib.request, re, json
9 | from moviepy.editor import *
10 | import os, sys
11 |
12 |
13 |
14 |
15 |
16 |
17 | start_time = time.time()
18 |
19 | def get_play_list(start_url, cid, quality):
20 | entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg'
21 | appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':')
22 | params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (appkey, cid, quality, quality)
23 | chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest()
24 | url_api = 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params, chksum)
25 | headers = {
26 | 'Referer': start_url, # 注意加上referer
27 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
28 | }
29 | # print(url_api)
30 | html = requests.get(url_api, headers=headers).json()
31 | # print(json.dumps(html))
32 | video_list = []
33 | for i in html['durl']:
34 | video_list.append(i['url'])
35 | # print(video_list)
36 | return video_list
37 |
38 |
39 | # 下载视频
40 | '''
41 | urllib.urlretrieve 的回调函数:
42 | def callbackfunc(blocknum, blocksize, totalsize):
43 | @blocknum: 已经下载的数据块
44 | @blocksize: 数据块的大小
45 | @totalsize: 远程文件的大小
46 | '''
47 |
48 |
49 | def Schedule_cmd(blocknum, blocksize, totalsize):
50 | speed = (blocknum * blocksize) / (time.time() - start_time)
51 | # speed_str = " Speed: %.2f" % speed
52 | speed_str = " Speed: %s" % format_size(speed)
53 | recv_size = blocknum * blocksize
54 |
55 | # 设置下载进度条
56 | f = sys.stdout
57 | pervent = recv_size / totalsize
58 | percent_str = "%.2f%%" % (pervent * 100)
59 | n = round(pervent * 50)
60 | s = ('#' * n).ljust(50, '-')
61 | f.write(percent_str.ljust(8, ' ') + '[' + s + ']' + speed_str)
62 | f.flush()
63 | # time.sleep(0.1)
64 | f.write('\r')
65 |
66 |
67 | def Schedule(blocknum, blocksize, totalsize):
68 | speed = (blocknum * blocksize) / (time.time() - start_time)
69 | # speed_str = " Speed: %.2f" % speed
70 | speed_str = " Speed: %s" % format_size(speed)
71 | recv_size = blocknum * blocksize
72 |
73 | # 设置下载进度条
74 | f = sys.stdout
75 | pervent = recv_size / totalsize
76 | percent_str = "%.2f%%" % (pervent * 100)
77 | n = round(pervent * 50)
78 | s = ('#' * n).ljust(50, '-')
79 | print(percent_str.ljust(6, ' ') + '-' + speed_str)
80 | f.flush()
81 | time.sleep(2)
82 | # print('\r')
83 |
84 |
85 | # 字节bytes转化K\M\G
86 | def format_size(bytes):
87 | try:
88 | bytes = float(bytes)
89 | kb = bytes / 1024
90 | except:
91 | print("传入的字节格式不对")
92 | return "Error"
93 | if kb >= 1024:
94 | M = kb / 1024
95 | if M >= 1024:
96 | G = M / 1024
97 | return "%.3fG" % (G)
98 | else:
99 | return "%.3fM" % (M)
100 | else:
101 | return "%.3fK" % (kb)
102 |
103 |
104 | # 下载视频
105 | def down_video(video_list, title, start_url, page):
106 | num = 1
107 | print('[下载P{}段视频]:'.format(page) + title)
108 | currentVideoPath = os.path.join(sys.path[0], 'download') # 当前目录作为下载目录
109 | for i in video_list:
110 | opener = urllib.request.build_opener()
111 | # 请求头
112 | opener.addheaders = [
113 | # ('Host', 'upos-hz-mirrorks3.acgvideo.com'), #注意修改host,不用也行
114 | ('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0'),
115 | ('Accept', '*/*'),
116 | ('Accept-Language', 'en-US,en;q=0.5'),
117 | ('Accept-Encoding', 'gzip, deflate, br'),
118 | ('Range', 'bytes=0-'), # Range 的值要为 bytes=0- 才能下载完整视频
119 | ('Referer', start_url), # 注意修改referer,必须要加的!
120 | ('Origin', 'https://www.bilibili.com'),
121 | ('Connection', 'keep-alive'),
122 | ]
123 | urllib.request.install_opener(opener)
124 | # 创建文件夹存放下载的视频
125 | if not os.path.exists(currentVideoPath):
126 | os.makedirs(currentVideoPath)
127 | # 开始下载
128 | if len(video_list) > 1:
129 | urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}-{}.mp4'.format(title, num)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv'
130 | else:
131 | urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}.mp4'.format(title)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv'
132 | num += 1
133 |
134 | # 合并视频
135 | def combine_video(video_list, title):
136 | currentVideoPath = os.path.join(sys.path[0], 'download') # 当前目录作为下载目录
137 | if not os.path.exists(currentVideoPath):
138 | os.makedirs(currentVideoPath)
139 | if len(video_list) >= 2:
140 | # 视频大于一段才要合并
141 | print('[下载完成,正在合并视频...]:' + title)
142 | # 定义一个数组
143 | L = []
144 | # 访问 video 文件夹 (假设视频都放在这里面)
145 | root_dir = currentVideoPath
146 | # 遍历所有文件
147 | for file in sorted(os.listdir(root_dir), key=lambda x: int(x[x.rindex("-") + 1:x.rindex(".")])):
148 | # 如果后缀名为 .mp4/.flv
149 | if os.path.splitext(file)[1] == '.flv':
150 | # 拼接成完整路径
151 | filePath = os.path.join(root_dir, file)
152 | # 载入视频
153 | video = VideoFileClip(filePath)
154 | # 添加到数组
155 | L.append(video)
156 | # 拼接视频
157 | final_clip = concatenate_videoclips(L)
158 | # 生成目标视频文件
159 | final_clip.to_videofile(os.path.join(root_dir, r'{}.mp4'.format(title)), fps=24, remove_temp=False)
160 | print('[视频合并完成]' + title)
161 |
162 | else:
163 | # 视频只有一段则直接打印下载完成
164 | print('[视频合并完成]:' + title)
165 |
166 | def getAid(Bvid):
167 | Bid=Bvid
168 | headers = {
169 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
170 | }
171 | url = "https://api.bilibili.com/x/web-interface/view?bvid="+Bid
172 | r = requests.get(url,headers=headers)
173 | j = json.loads(r.text)
174 | # print(j["data"]["aid"])
175 | return j["data"]["aid"]
176 |
177 |
178 | def download(BVid):
179 | # 用户输入av号或者视频链接地址
180 | start = 'https://www.bilibili.com/video/av' + str(getAid(BVid))
181 |
182 | if start.isdigit() == True: # 如果输入的是av号
183 | # 获取cid的api, 传入aid即可
184 | start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + start
185 | else:
186 | # https://www.bilibili.com/video/av46958874/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16
187 | start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + re.search(r'/av(\d+)/*', start).group(1)
188 |
189 | # 视频质量
190 | #
191 | #
192 | #
193 | #quality = input('请输入您要下载视频的清晰度(1080p:80;720p:64;480p:32;360p:16)(填写80或64或32或16):')
194 | quality='80'
195 | # 获取视频的cid,title
196 | headers = {
197 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
198 | }
199 | html = requests.get(start_url, headers=headers).json()
200 | data = html['data']
201 | video_title=data["title"].replace(" ","_")
202 | cid_list = []
203 | if '?p=' in start:
204 | # 单独下载分P视频中的一集
205 | p = re.search(r'\?p=(\d+)',start).group(1)
206 | cid_list.append(data['pages'][int(p) - 1])
207 | else:
208 | # 如果p不存在就是全集下载
209 | cid_list = data['pages']
210 | # print(cid_list)
211 | for item in cid_list:
212 | cid = str(item['cid'])
213 | title = item['part']
214 | if not title:
215 | title = video_title
216 | title = re.sub(r'[\/\\:*?"<>|]', '', title) # 替换为空的
217 | #print('[下载视频的cid]:' + cid)
218 | #print('[下载视频的标题]:' + title)
219 | page = str(item['page'])
220 | start_url = start_url + "/?p=" + page
221 | video_list = get_play_list(start_url, cid, quality)
222 | start_time = time.time()
223 | down_video(video_list, title, start_url, page)
224 | combine_video(video_list, title)
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 | chrome_options = webdriver.ChromeOptions()
239 | chrome_options.add_argument('--headless')
240 | driver = webdriver.Chrome(chrome_options=chrome_options)
241 | BVlist=[]
242 |
243 | def nextPage():
244 | try:
245 | for page in driver.find_elements_by_class_name("be-pager-next"):
246 | page.click()
247 | except:
248 | return 0
249 | else:
250 | print('-'*40+'翻页'+'-'*40)
251 | return 1
252 |
253 |
254 | def getBV():
255 | for link in driver.find_elements_by_class_name("small-item"):
256 | print(link.get_attribute('data-aid'))
257 | BVlist.append(link.get_attribute('data-aid'))
258 |
259 | def main(url):
260 | driver.get(url)
261 | sleep(3)
262 | getBV()
263 | while(nextPage()):
264 | sleep(3)
265 | getBV()
266 | total=len(BVlist)
267 | print('\n抓取到'+str(total)+'个视频')
268 | driver.quit()
269 | count=1
270 | for video in BVlist:
271 | print('='*40+' '+str(count)+'/'+str(total)+' '+'='*40)
272 | download(video)
273 | count+=1
274 |
275 |
276 | #收藏夹地址,注意收藏夹一定要是公开的
277 | url='https://space.bilibili.com/45110340/favlist?fid=962518540&ftype=create'
278 | main(url)
279 |
280 | #默认下载地址为本py文件所在目录下的download文件夹
281 | #默认下载画质为优先1080p
282 |
283 | #由收藏夹地址抓取收藏夹内视频BV号的功能由本人实现
284 | #下载功能完全照搬Henryhaohao的Bilibili_video_download
285 | #BV号转AV号部分的代码,来源于上述项目的issue:'针对BV号的问题',作者CodeForWuyu
286 |
287 |
288 |
289 |
290 |
--------------------------------------------------------------------------------