├── README.md
└── bilibili_batch_downloader.py


/README.md:
--------------------------------------------------------------------------------
 1 | 本程序可以批量下载bilibili(b站)公开收藏夹下的所有视频
 2 | 
 3 | #注意收藏夹一定要是公开的
 4 | #可以下载前设置为公开，下载完毕后设置为私密
 5 | 
 6 | #默认下载地址为本py文件所在目录下的download文件夹
 7 | #默认下载画质为优先1080p
 8 | 
 9 | #由收藏夹地址抓取收藏夹内视频BV号的功能由本人实现
10 | #下载功能完全照搬Henryhaohao的Bilibili_video_download
11 | #BV号转AV号部分的代码，来源于上述项目的issue:'针对BV号的问题',作者CodeForWuyu
12 | 
13 | #如果本项目能帮到你的话，欢迎给个star~
14 | 


--------------------------------------------------------------------------------
/bilibili_batch_downloader.py:
--------------------------------------------------------------------------------
  1 | """Stub file for the 'time' module."""
  2 | from selenium import webdriver
  3 | from time import sleep
  4 | 
  5 | import imageio
  6 | imageio.plugins.ffmpeg.download()
  7 | 
  8 | import requests, time, hashlib, urllib.request, re, json
  9 | from moviepy.editor import *
 10 | import os, sys
 11 | 
 12 | 
 13 | 
 14 | 
 15 | 
 16 | 
 17 | start_time = time.time()
 18 | 
 19 | def get_play_list(start_url, cid, quality):
 20 |     entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg'
 21 |     appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':')
 22 |     params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (appkey, cid, quality, quality)
 23 |     chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest()
 24 |     url_api = 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params, chksum)
 25 |     headers = {
 26 |         'Referer': start_url,  # 注意加上referer
 27 |         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
 28 |     }
 29 |     # print(url_api)
 30 |     html = requests.get(url_api, headers=headers).json()
 31 |     # print(json.dumps(html))
 32 |     video_list = []
 33 |     for i in html['durl']:
 34 |         video_list.append(i['url'])
 35 |     # print(video_list)
 36 |     return video_list
 37 | 
 38 | 
 39 | # 下载视频
 40 | '''
 41 |  urllib.urlretrieve 的回调函数：
 42 | def callbackfunc(blocknum, blocksize, totalsize):
 43 |     @blocknum:  已经下载的数据块
 44 |     @blocksize: 数据块的大小
 45 |     @totalsize: 远程文件的大小
 46 | '''
 47 | 
 48 | 
 49 | def Schedule_cmd(blocknum, blocksize, totalsize):
 50 |     speed = (blocknum * blocksize) / (time.time() - start_time)
 51 |     # speed_str = " Speed: %.2f" % speed
 52 |     speed_str = " Speed: %s" % format_size(speed)
 53 |     recv_size = blocknum * blocksize
 54 | 
 55 |     # 设置下载进度条
 56 |     f = sys.stdout
 57 |     pervent = recv_size / totalsize
 58 |     percent_str = "%.2f%%" % (pervent * 100)
 59 |     n = round(pervent * 50)
 60 |     s = ('#' * n).ljust(50, '-')
 61 |     f.write(percent_str.ljust(8, ' ') + '[' + s + ']' + speed_str)
 62 |     f.flush()
 63 |     # time.sleep(0.1)
 64 |     f.write('\r')
 65 | 
 66 | 
 67 | def Schedule(blocknum, blocksize, totalsize):
 68 |     speed = (blocknum * blocksize) / (time.time() - start_time)
 69 |     # speed_str = " Speed: %.2f" % speed
 70 |     speed_str = " Speed: %s" % format_size(speed)
 71 |     recv_size = blocknum * blocksize
 72 | 
 73 |     # 设置下载进度条
 74 |     f = sys.stdout
 75 |     pervent = recv_size / totalsize
 76 |     percent_str = "%.2f%%" % (pervent * 100)
 77 |     n = round(pervent * 50)
 78 |     s = ('#' * n).ljust(50, '-')
 79 |     print(percent_str.ljust(6, ' ') + '-' + speed_str)
 80 |     f.flush()
 81 |     time.sleep(2)
 82 |     # print('\r')
 83 | 
 84 | 
 85 | # 字节bytes转化K\M\G
 86 | def format_size(bytes):
 87 |     try:
 88 |         bytes = float(bytes)
 89 |         kb = bytes / 1024
 90 |     except:
 91 |         print("传入的字节格式不对")
 92 |         return "Error"
 93 |     if kb >= 1024:
 94 |         M = kb / 1024
 95 |         if M >= 1024:
 96 |             G = M / 1024
 97 |             return "%.3fG" % (G)
 98 |         else:
 99 |             return "%.3fM" % (M)
100 |     else:
101 |         return "%.3fK" % (kb)
102 | 
103 | 
104 | #  下载视频
105 | def down_video(video_list, title, start_url, page):
106 |     num = 1
107 |     print('[下载P{}段视频]:'.format(page) + title)
108 |     currentVideoPath = os.path.join(sys.path[0], 'download')  # 当前目录作为下载目录
109 |     for i in video_list:
110 |         opener = urllib.request.build_opener()
111 |         # 请求头
112 |         opener.addheaders = [
113 |             # ('Host', 'upos-hz-mirrorks3.acgvideo.com'),  #注意修改host,不用也行
114 |             ('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0'),
115 |             ('Accept', '*/*'),
116 |             ('Accept-Language', 'en-US,en;q=0.5'),
117 |             ('Accept-Encoding', 'gzip, deflate, br'),
118 |             ('Range', 'bytes=0-'),  # Range 的值要为 bytes=0- 才能下载完整视频
119 |             ('Referer', start_url),  # 注意修改referer,必须要加的!
120 |             ('Origin', 'https://www.bilibili.com'),
121 |             ('Connection', 'keep-alive'),
122 |         ]
123 |         urllib.request.install_opener(opener)
124 |         # 创建文件夹存放下载的视频
125 |         if not os.path.exists(currentVideoPath):
126 |             os.makedirs(currentVideoPath)
127 |         # 开始下载
128 |         if len(video_list) > 1:
129 |             urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}-{}.mp4'.format(title, num)),reporthook=Schedule_cmd)  # 写成mp4也行  title + '-' + num + '.flv'
130 |         else:
131 |             urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}.mp4'.format(title)),reporthook=Schedule_cmd)  # 写成mp4也行  title + '-' + num + '.flv'
132 |         num += 1
133 | 
134 | # 合并视频
135 | def combine_video(video_list, title):
136 |     currentVideoPath = os.path.join(sys.path[0], 'download')  # 当前目录作为下载目录
137 |     if not os.path.exists(currentVideoPath):
138 |         os.makedirs(currentVideoPath)
139 |     if len(video_list) >= 2:
140 |         # 视频大于一段才要合并
141 |         print('[下载完成,正在合并视频...]:' + title)
142 |         # 定义一个数组
143 |         L = []
144 |         # 访问 video 文件夹 (假设视频都放在这里面)
145 |         root_dir = currentVideoPath
146 |         # 遍历所有文件
147 |         for file in sorted(os.listdir(root_dir), key=lambda x: int(x[x.rindex("-") + 1:x.rindex(".")])):
148 |             # 如果后缀名为 .mp4/.flv
149 |             if os.path.splitext(file)[1] == '.flv':
150 |                 # 拼接成完整路径
151 |                 filePath = os.path.join(root_dir, file)
152 |                 # 载入视频
153 |                 video = VideoFileClip(filePath)
154 |                 # 添加到数组
155 |                 L.append(video)
156 |         # 拼接视频
157 |         final_clip = concatenate_videoclips(L)
158 |         # 生成目标视频文件
159 |         final_clip.to_videofile(os.path.join(root_dir, r'{}.mp4'.format(title)), fps=24, remove_temp=False)
160 |         print('[视频合并完成]' + title)
161 | 
162 |     else:
163 |         # 视频只有一段则直接打印下载完成
164 |         print('[视频合并完成]:' + title)
165 | 
166 | def getAid(Bvid):
167 |     Bid=Bvid
168 |     headers = {
169 |         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
170 |     }
171 |     url = "https://api.bilibili.com/x/web-interface/view?bvid="+Bid
172 |     r = requests.get(url,headers=headers)
173 |     j = json.loads(r.text)
174 |     # print(j["data"]["aid"])
175 |     return j["data"]["aid"]
176 | 
177 | 
178 | def download(BVid):
179 |     # 用户输入av号或者视频链接地址
180 |     start = 'https://www.bilibili.com/video/av' + str(getAid(BVid))
181 | 
182 |     if start.isdigit() == True:  # 如果输入的是av号
183 |         # 获取cid的api, 传入aid即可
184 |         start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + start
185 |     else:
186 |         # https://www.bilibili.com/video/av46958874/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16
187 |         start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + re.search(r'/av(\d+)/*', start).group(1)
188 | 
189 |     # 视频质量
190 |     # <accept_format><![CDATA[flv,flv720,flv480,flv360]]></accept_format>
191 |     # <accept_description><![CDATA[高清 1080P,高清 720P,清晰 480P,流畅 360P]]></accept_description>
192 |     # <accept_quality><![CDATA[80,64,32,16]]></accept_quality>
193 |     #quality = input('请输入您要下载视频的清晰度(1080p:80;720p:64;480p:32;360p:16)(填写80或64或32或16):')
194 |     quality='80'
195 |     # 获取视频的cid,title
196 |     headers = {
197 |         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
198 |     }
199 |     html = requests.get(start_url, headers=headers).json()
200 |     data = html['data']
201 |     video_title=data["title"].replace(" ","_")
202 |     cid_list = []
203 |     if '?p=' in start:
204 |         # 单独下载分P视频中的一集
205 |         p = re.search(r'\?p=(\d+)',start).group(1)
206 |         cid_list.append(data['pages'][int(p) - 1])
207 |     else:
208 |         # 如果p不存在就是全集下载
209 |         cid_list = data['pages']
210 |     # print(cid_list)
211 |     for item in cid_list:
212 |         cid = str(item['cid'])
213 |         title = item['part']
214 |         if not title:
215 |             title = video_title
216 |         title = re.sub(r'[\/\\:*?"<>|]', '', title)  # 替换为空的
217 |         #print('[下载视频的cid]:' + cid)
218 |         #print('[下载视频的标题]:' + title)
219 |         page = str(item['page'])
220 |         start_url = start_url + "/?p=" + page
221 |         video_list = get_play_list(start_url, cid, quality)
222 |         start_time = time.time()
223 |         down_video(video_list, title, start_url, page)
224 |         combine_video(video_list, title)
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | chrome_options = webdriver.ChromeOptions()
239 | chrome_options.add_argument('--headless')
240 | driver = webdriver.Chrome(chrome_options=chrome_options)
241 | BVlist=[]
242 | 
243 | def nextPage():
244 |     try:
245 |         for page in driver.find_elements_by_class_name("be-pager-next"):
246 |             page.click()
247 |     except:
248 |         return 0
249 |     else:
250 |         print('-'*40+'翻页'+'-'*40)
251 |         return 1
252 | 
253 | 
254 | def getBV():
255 |     for link in driver.find_elements_by_class_name("small-item"):
256 |         print(link.get_attribute('data-aid'))
257 |         BVlist.append(link.get_attribute('data-aid'))
258 | 
259 | def main(url):
260 |     driver.get(url)
261 |     sleep(3)
262 |     getBV()
263 |     while(nextPage()):
264 |         sleep(3)
265 |         getBV()
266 |     total=len(BVlist)
267 |     print('\n抓取到'+str(total)+'个视频')
268 |     driver.quit()
269 |     count=1
270 |     for video in BVlist:
271 |         print('='*40+' '+str(count)+'/'+str(total)+' '+'='*40)
272 |         download(video) 
273 |         count+=1
274 | 
275 | 
276 | #收藏夹地址，注意收藏夹一定要是公开的
277 | url='https://space.bilibili.com/45110340/favlist?fid=962518540&ftype=create'
278 | main(url)
279 | 
280 | #默认下载地址为本py文件所在目录下的download文件夹
281 | #默认下载画质为优先1080p
282 | 
283 | #由收藏夹地址抓取收藏夹内视频BV号的功能由本人实现
284 | #下载功能完全照搬Henryhaohao的Bilibili_video_download
285 | #BV号转AV号部分的代码，来源于上述项目的issue:'针对BV号的问题',作者CodeForWuyu
286 | 
287 | 
288 | 
289 | 
290 | 


--------------------------------------------------------------------------------