├── misc ├── p1.PNG └── p2.PNG ├── LICENSE ├── README.md └── InvalidVideos.py /misc/p1.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuiwong/Bilibili-Invalid-Fav-Dump/HEAD/misc/p1.PNG -------------------------------------------------------------------------------- /misc/p2.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuiwong/Bilibili-Invalid-Fav-Dump/HEAD/misc/p2.PNG -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Yui Wong 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bilibili-Invalid-Fav-Dump 2 | 抓取Bilibili收藏夹中的无效视频,输出到本地txt文件中。 3 | 4 | 5 | #### 2019.03.06 - 由于B站更新了返回消息内容,失效视频的标题全部为“已失效视频”,也就是说无法再获取标题。目前此脚本已无实际作用,如果有人能告知获取失效AV信息的方法,请PM我。 6 | 7 | #### Intro 8 | 当你逛B站打开自己的收藏夹,会发现这种情况: 9 | ![Demo1 Img] 10 | 11 | 以前Bilibili收藏夹中无效视频还可以看到缩略图和标题,现在只能F12查看json流。 12 | 写了个python小脚本,可以提取无效视频信息并输出到本地。 13 | 14 | #### 前置需求: 15 | - 所抓取的用户有公开的收藏夹 16 | - 需要python运行时,直接[官网](https://www.python.org/downloads/)安装 17 | - 安装requests库 18 | ```cmd 19 | pip install requests 20 | ``` 21 | 22 | #### 简易用法: 23 | - 下载InvalidVideos.py\n 24 | - 查找自己B站的mid 25 | - cmd输入 26 | ```cmd 27 | python InvalidVideos.py 你的mid 28 | ``` 29 | #### 详细用法: 30 | - 安装完整的[python运行时](https://www.python.org/downloads/) 31 | - 开始菜单-输入cmd 回车 32 | - 输入pip install requests 33 | - 等待库下载完成。 34 | - 进入自己在B站的空间,例如:space.bilibili.com/17819768/#/ (数字既为你的mid) 35 | - 下载InvalidVideos.py到某文件夹,比如C:/misc/ 36 | - 进入该文件夹,在里边shift+右键,寻找‘在这里打开控制台’类似的命令 37 | - 在控制台中输入 38 | ```cmd 39 | python InvalidVideos.py 17819768 40 | ``` 41 | #### 输出结果: 42 | 会在当前目录下输出一个invalidFavVideos.txt。 43 | 长这样子。 44 | ![Demo2 Img] 45 | 46 | 赞美太阳 \\[T] / 47 | 48 | 49 | 50 | [Demo1 Img]: https://github.com/yuiwong/Bilibili-Invalid-Fav-Dump/blob/master/misc/p1.PNG 51 | [Demo2 Img]: https://github.com/yuiwong/Bilibili-Invalid-Fav-Dump/blob/master/misc/p2.PNG 52 | -------------------------------------------------------------------------------- /InvalidVideos.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import sys 3 | import requests 4 | import re 5 | import json 6 | 7 | #reload(sys) 8 | #sys.setdefaultencoding('utf-8') 9 | 10 | 11 | agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36' 12 | 13 | fav_list = [] 14 | fav_name_list = [] 15 | output_index = 1 16 | 17 | 18 | #entry point, parse command line args 19 | def run(): 20 | if len(sys.argv) < 2: 21 | print_f('Error: Please enter Bilibili user id.') 22 | return 23 | user_id = int(sys.argv[1]) 24 | if user_id <= 0: 25 | print_f('Error: Please enter a valid Bilibili user id.') 26 | return 27 | get_fav_videos_from_user(user_id) 28 | 29 | 30 | #main function flow: get all fav folders 31 | # -> loop through fav folder id 32 | # -> loop through page in that folder id 33 | # -> loop all video info to find invalid ones and record them 34 | # -> write to output file 35 | # -> done 36 | def get_fav_videos_from_user(uid): 37 | get_fav_folder_list(uid) #get all fav folders, id store to fav_list, names store to fav_name_list 38 | output = '' 39 | if len(fav_list) == 0: 40 | print_f('user mid={user} don\'t have public fav folders. if you own this mid, make your fav folders public.'.format(user=uid)) 41 | return 42 | for i in range(0, len(fav_list)): #loop all fav folders 43 | s = process_fav_folder(uid, i) #current fav folder jObject, constain all video infos 44 | output += s 45 | 46 | write_output(output) 47 | 48 | #find all fav folders ids and names. 49 | def get_fav_folder_list(uid): 50 | global fav_list 51 | 52 | url = 'https://api.bilibili.com/x/space/fav/nav?mid={userid}&jsonp=jsonp'.format(userid=uid) 53 | resp = get_HTML_text(url, agent) 54 | responed_jobject = json.loads(resp) 55 | archive = responed_jobject['data']['archive'] 56 | 57 | for i in range(0, len(archive)): 58 | fav_folder_obj = archive[i] 59 | fav_folder_id = fav_folder_obj['fid'] 60 | fav_folder_name = fav_folder_obj['name'] 61 | fav_list.append(fav_folder_id) 62 | fav_name_list.append(fav_folder_name) 63 | 64 | print_f('mid={user} has fav folders: {folder}'.format(user=uid, folder=fav_name_list)) 65 | 66 | 67 | #given a fav folder id, find pages and parse videos info 68 | def process_fav_folder(uid, fav_list_index): 69 | global fav_list 70 | 71 | fav_folder_content = '' 72 | url = 'https://api.bilibili.com/x/space/fav/arc?vmid={userid}&ps=30&fid={favid}&tid=0&keyword=&pn=1&order=fav_time&jsonp=jsonp'.format(userid=uid, favid=fav_list[fav_list_index]) 73 | resp = get_HTML_text(url, agent) 74 | responed_jobject = json.loads(resp) 75 | page_count = responed_jobject['data']['pagecount'] 76 | print_f('{favid} has {page} pages.'.format(id=uid,favid=fav_name_list[fav_list_index],page=page_count)) 77 | 78 | video_jobject = responed_jobject['data']['archives'] 79 | fav_folder_content += handle_jobject_per_page(video_jobject, fav_list_index, 1) 80 | 81 | for i in range(2, page_count + 1): 82 | url = 'https://api.bilibili.com/x/space/fav/arc?vmid={userid}&ps=30&fid={favid}&tid=0&keyword=&pn={page_index}&order=fav_time&jsonp=jsonp'.format(userid=uid, favid=fav_list[fav_list_index], page_index=i) 83 | resp = get_HTML_text(url, agent) 84 | responed_jobject = json.loads(resp) 85 | video_jobject = responed_jobject['data']['archives'] 86 | fav_folder_content += handle_jobject_per_page(video_jobject, fav_list_index, i) 87 | 88 | return fav_folder_content 89 | 90 | #each page contains 30 videos, I tried passing 1000, do NOT work. :( 91 | #parse videos in give page, write individual video info to output. 92 | #here you can custom what info you want, and the format you want. For avaliable elements, print 'jObject.Keys' 93 | def handle_jobject_per_page(page_jobjects, fav_list_index, page_index): 94 | global output_index 95 | global fav_name_list 96 | 97 | page_info = '' 98 | valid_count = 0 99 | invalid_count = 0 100 | for i in range(0, len(page_jobjects)): 101 | jObject = page_jobjects[i] 102 | if int(jObject['state']) >= 0: 103 | valid_count += 1 104 | continue 105 | invalid_count += 1 106 | s = '#{number}{title}\n'.format(number=output_index, title=jObject['title']) 107 | s += 'AV{vid} 收藏夹:{favFolder},Page:{page},Index:{num}\n'.format(vid=jObject['aid'], favFolder=fav_name_list[fav_list_index], page=page_index, num=i) 108 | s += 'UP主:{up}\n'.format(up=jObject['owner']['name'], mid=jObject['owner']['mid']) 109 | s += '注释:\n{desc}\n\n\n'.format(desc=jObject['desc']) 110 | page_info += s 111 | output_index += 1 112 | print_f(' {}:Page {} has {} valid videos and {} invalid videos, index reach to \'{}\'. '.format(fav_name_list[fav_list_index], page_index, valid_count, invalid_count, output_index)) 113 | return page_info 114 | 115 | #write infos to output file 116 | def write_output(info): 117 | file = open('invalidFavVideos.txt', 'w', encoding='utf-8') 118 | file.write(info) 119 | file.close() 120 | print_f("\nDone! Outputing invalid {} videos. Happy holding and have a nice day~ \[T] /\n".format(output_index)) 121 | 122 | def get_HTML_text(url, agent): 123 | try: 124 | headers = {'User-Agent': agent} 125 | r = requests.get(url, timeout=30, headers=headers) 126 | r.raise_for_status() 127 | r.encoding = r.apparent_encoding 128 | return r.text 129 | except: 130 | return ('Error: Unable to query Bilibili server!') 131 | 132 | 133 | def print_f(info): 134 | if sys.getdefaultencoding() == 'ascii': 135 | info.encode('gb2312') 136 | print(info) 137 | else: 138 | info.encode('utf-8') 139 | print(info) 140 | 141 | run() 142 | --------------------------------------------------------------------------------