├── README.md └── bilibili_batch_downloader.py /README.md: -------------------------------------------------------------------------------- 1 | 本程序可以批量下载bilibili(b站)公开收藏夹下的所有视频 2 | 3 | #注意收藏夹一定要是公开的 4 | #可以下载前设置为公开,下载完毕后设置为私密 5 | 6 | #默认下载地址为本py文件所在目录下的download文件夹 7 | #默认下载画质为优先1080p 8 | 9 | #由收藏夹地址抓取收藏夹内视频BV号的功能由本人实现 10 | #下载功能完全照搬Henryhaohao的Bilibili_video_download 11 | #BV号转AV号部分的代码,来源于上述项目的issue:'针对BV号的问题',作者CodeForWuyu 12 | 13 | #如果本项目能帮到你的话,欢迎给个star~ 14 | -------------------------------------------------------------------------------- /bilibili_batch_downloader.py: -------------------------------------------------------------------------------- 1 | """Stub file for the 'time' module.""" 2 | from selenium import webdriver 3 | from time import sleep 4 | 5 | import imageio 6 | imageio.plugins.ffmpeg.download() 7 | 8 | import requests, time, hashlib, urllib.request, re, json 9 | from moviepy.editor import * 10 | import os, sys 11 | 12 | 13 | 14 | 15 | 16 | 17 | start_time = time.time() 18 | 19 | def get_play_list(start_url, cid, quality): 20 | entropy = 'rbMCKn@KuamXWlPMoJGsKcbiJKUfkPF_8dABscJntvqhRSETg' 21 | appkey, sec = ''.join([chr(ord(i) + 2) for i in entropy[::-1]]).split(':') 22 | params = 'appkey=%s&cid=%s&otype=json&qn=%s&quality=%s&type=' % (appkey, cid, quality, quality) 23 | chksum = hashlib.md5(bytes(params + sec, 'utf8')).hexdigest() 24 | url_api = 'https://interface.bilibili.com/v2/playurl?%s&sign=%s' % (params, chksum) 25 | headers = { 26 | 'Referer': start_url, # 注意加上referer 27 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' 28 | } 29 | # print(url_api) 30 | html = requests.get(url_api, headers=headers).json() 31 | # print(json.dumps(html)) 32 | video_list = [] 33 | for i in html['durl']: 34 | video_list.append(i['url']) 35 | # print(video_list) 36 | return video_list 37 | 38 | 39 | # 下载视频 40 | ''' 41 | urllib.urlretrieve 的回调函数: 42 | def callbackfunc(blocknum, blocksize, totalsize): 43 | @blocknum: 已经下载的数据块 44 | @blocksize: 数据块的大小 45 | @totalsize: 远程文件的大小 46 | ''' 47 | 48 | 49 | def Schedule_cmd(blocknum, blocksize, totalsize): 50 | speed = (blocknum * blocksize) / (time.time() - start_time) 51 | # speed_str = " Speed: %.2f" % speed 52 | speed_str = " Speed: %s" % format_size(speed) 53 | recv_size = blocknum * blocksize 54 | 55 | # 设置下载进度条 56 | f = sys.stdout 57 | pervent = recv_size / totalsize 58 | percent_str = "%.2f%%" % (pervent * 100) 59 | n = round(pervent * 50) 60 | s = ('#' * n).ljust(50, '-') 61 | f.write(percent_str.ljust(8, ' ') + '[' + s + ']' + speed_str) 62 | f.flush() 63 | # time.sleep(0.1) 64 | f.write('\r') 65 | 66 | 67 | def Schedule(blocknum, blocksize, totalsize): 68 | speed = (blocknum * blocksize) / (time.time() - start_time) 69 | # speed_str = " Speed: %.2f" % speed 70 | speed_str = " Speed: %s" % format_size(speed) 71 | recv_size = blocknum * blocksize 72 | 73 | # 设置下载进度条 74 | f = sys.stdout 75 | pervent = recv_size / totalsize 76 | percent_str = "%.2f%%" % (pervent * 100) 77 | n = round(pervent * 50) 78 | s = ('#' * n).ljust(50, '-') 79 | print(percent_str.ljust(6, ' ') + '-' + speed_str) 80 | f.flush() 81 | time.sleep(2) 82 | # print('\r') 83 | 84 | 85 | # 字节bytes转化K\M\G 86 | def format_size(bytes): 87 | try: 88 | bytes = float(bytes) 89 | kb = bytes / 1024 90 | except: 91 | print("传入的字节格式不对") 92 | return "Error" 93 | if kb >= 1024: 94 | M = kb / 1024 95 | if M >= 1024: 96 | G = M / 1024 97 | return "%.3fG" % (G) 98 | else: 99 | return "%.3fM" % (M) 100 | else: 101 | return "%.3fK" % (kb) 102 | 103 | 104 | # 下载视频 105 | def down_video(video_list, title, start_url, page): 106 | num = 1 107 | print('[下载P{}段视频]:'.format(page) + title) 108 | currentVideoPath = os.path.join(sys.path[0], 'download') # 当前目录作为下载目录 109 | for i in video_list: 110 | opener = urllib.request.build_opener() 111 | # 请求头 112 | opener.addheaders = [ 113 | # ('Host', 'upos-hz-mirrorks3.acgvideo.com'), #注意修改host,不用也行 114 | ('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:56.0) Gecko/20100101 Firefox/56.0'), 115 | ('Accept', '*/*'), 116 | ('Accept-Language', 'en-US,en;q=0.5'), 117 | ('Accept-Encoding', 'gzip, deflate, br'), 118 | ('Range', 'bytes=0-'), # Range 的值要为 bytes=0- 才能下载完整视频 119 | ('Referer', start_url), # 注意修改referer,必须要加的! 120 | ('Origin', 'https://www.bilibili.com'), 121 | ('Connection', 'keep-alive'), 122 | ] 123 | urllib.request.install_opener(opener) 124 | # 创建文件夹存放下载的视频 125 | if not os.path.exists(currentVideoPath): 126 | os.makedirs(currentVideoPath) 127 | # 开始下载 128 | if len(video_list) > 1: 129 | urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}-{}.mp4'.format(title, num)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv' 130 | else: 131 | urllib.request.urlretrieve(url=i, filename=os.path.join(currentVideoPath, r'{}.mp4'.format(title)),reporthook=Schedule_cmd) # 写成mp4也行 title + '-' + num + '.flv' 132 | num += 1 133 | 134 | # 合并视频 135 | def combine_video(video_list, title): 136 | currentVideoPath = os.path.join(sys.path[0], 'download') # 当前目录作为下载目录 137 | if not os.path.exists(currentVideoPath): 138 | os.makedirs(currentVideoPath) 139 | if len(video_list) >= 2: 140 | # 视频大于一段才要合并 141 | print('[下载完成,正在合并视频...]:' + title) 142 | # 定义一个数组 143 | L = [] 144 | # 访问 video 文件夹 (假设视频都放在这里面) 145 | root_dir = currentVideoPath 146 | # 遍历所有文件 147 | for file in sorted(os.listdir(root_dir), key=lambda x: int(x[x.rindex("-") + 1:x.rindex(".")])): 148 | # 如果后缀名为 .mp4/.flv 149 | if os.path.splitext(file)[1] == '.flv': 150 | # 拼接成完整路径 151 | filePath = os.path.join(root_dir, file) 152 | # 载入视频 153 | video = VideoFileClip(filePath) 154 | # 添加到数组 155 | L.append(video) 156 | # 拼接视频 157 | final_clip = concatenate_videoclips(L) 158 | # 生成目标视频文件 159 | final_clip.to_videofile(os.path.join(root_dir, r'{}.mp4'.format(title)), fps=24, remove_temp=False) 160 | print('[视频合并完成]' + title) 161 | 162 | else: 163 | # 视频只有一段则直接打印下载完成 164 | print('[视频合并完成]:' + title) 165 | 166 | def getAid(Bvid): 167 | Bid=Bvid 168 | headers = { 169 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' 170 | } 171 | url = "https://api.bilibili.com/x/web-interface/view?bvid="+Bid 172 | r = requests.get(url,headers=headers) 173 | j = json.loads(r.text) 174 | # print(j["data"]["aid"]) 175 | return j["data"]["aid"] 176 | 177 | 178 | def download(BVid): 179 | # 用户输入av号或者视频链接地址 180 | start = 'https://www.bilibili.com/video/av' + str(getAid(BVid)) 181 | 182 | if start.isdigit() == True: # 如果输入的是av号 183 | # 获取cid的api, 传入aid即可 184 | start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + start 185 | else: 186 | # https://www.bilibili.com/video/av46958874/?spm_id_from=333.334.b_63686965665f7265636f6d6d656e64.16 187 | start_url = 'https://api.bilibili.com/x/web-interface/view?aid=' + re.search(r'/av(\d+)/*', start).group(1) 188 | 189 | # 视频质量 190 | # 191 | # 192 | # 193 | #quality = input('请输入您要下载视频的清晰度(1080p:80;720p:64;480p:32;360p:16)(填写80或64或32或16):') 194 | quality='80' 195 | # 获取视频的cid,title 196 | headers = { 197 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36' 198 | } 199 | html = requests.get(start_url, headers=headers).json() 200 | data = html['data'] 201 | video_title=data["title"].replace(" ","_") 202 | cid_list = [] 203 | if '?p=' in start: 204 | # 单独下载分P视频中的一集 205 | p = re.search(r'\?p=(\d+)',start).group(1) 206 | cid_list.append(data['pages'][int(p) - 1]) 207 | else: 208 | # 如果p不存在就是全集下载 209 | cid_list = data['pages'] 210 | # print(cid_list) 211 | for item in cid_list: 212 | cid = str(item['cid']) 213 | title = item['part'] 214 | if not title: 215 | title = video_title 216 | title = re.sub(r'[\/\\:*?"<>|]', '', title) # 替换为空的 217 | #print('[下载视频的cid]:' + cid) 218 | #print('[下载视频的标题]:' + title) 219 | page = str(item['page']) 220 | start_url = start_url + "/?p=" + page 221 | video_list = get_play_list(start_url, cid, quality) 222 | start_time = time.time() 223 | down_video(video_list, title, start_url, page) 224 | combine_video(video_list, title) 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | chrome_options = webdriver.ChromeOptions() 239 | chrome_options.add_argument('--headless') 240 | driver = webdriver.Chrome(chrome_options=chrome_options) 241 | BVlist=[] 242 | 243 | def nextPage(): 244 | try: 245 | for page in driver.find_elements_by_class_name("be-pager-next"): 246 | page.click() 247 | except: 248 | return 0 249 | else: 250 | print('-'*40+'翻页'+'-'*40) 251 | return 1 252 | 253 | 254 | def getBV(): 255 | for link in driver.find_elements_by_class_name("small-item"): 256 | print(link.get_attribute('data-aid')) 257 | BVlist.append(link.get_attribute('data-aid')) 258 | 259 | def main(url): 260 | driver.get(url) 261 | sleep(3) 262 | getBV() 263 | while(nextPage()): 264 | sleep(3) 265 | getBV() 266 | total=len(BVlist) 267 | print('\n抓取到'+str(total)+'个视频') 268 | driver.quit() 269 | count=1 270 | for video in BVlist: 271 | print('='*40+' '+str(count)+'/'+str(total)+' '+'='*40) 272 | download(video) 273 | count+=1 274 | 275 | 276 | #收藏夹地址,注意收藏夹一定要是公开的 277 | url='https://space.bilibili.com/45110340/favlist?fid=962518540&ftype=create' 278 | main(url) 279 | 280 | #默认下载地址为本py文件所在目录下的download文件夹 281 | #默认下载画质为优先1080p 282 | 283 | #由收藏夹地址抓取收藏夹内视频BV号的功能由本人实现 284 | #下载功能完全照搬Henryhaohao的Bilibili_video_download 285 | #BV号转AV号部分的代码,来源于上述项目的issue:'针对BV号的问题',作者CodeForWuyu 286 | 287 | 288 | 289 | 290 | --------------------------------------------------------------------------------