├── .gitignore
├── .idea
├── .gitignore
├── Python_Spider_All.iml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── README.md
├── 书旗小说
├── shuqi_novel_search.py
└── 书旗小说详情加密
├── 优酷eid_uid_videoid作者ID转换
└── 转换.py
├── 哔哩哔哩下载
├── bilibili_download.py
├── bilibili_download_base.py
├── bilibili_ocr.py
├── test_option.py
├── 人工核验验证码图片的场景.html
├── 使用说明
├── 视频下架的场景.html
├── 记录哔哩哔哩问题.txt
└── 需要人机识别的场景.html
├── 哔哩哔哩主页采集
└── bilibili_user_getall.py
├── 喜马拉雅FM
├── .idea
│ ├── .gitignore
│ ├── inspectionProfiles
│ │ ├── Project_Default.xml
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── 喜马拉雅FM.iml
├── 使用教程
└── 喜马拉雅spider.py
├── 微信公众号短链接转长链接
└── short_to_long.py
├── 百度翻译spider
├── .idea
│ ├── .gitignore
│ ├── inspectionProfiles
│ │ ├── Project_Default.xml
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── workspace.xml
│ └── 百度翻译spider.iml
├── requirement.txt
├── sign加密参数破解.py
├── 使用教程
└── 百度翻译spider.py
├── 秒拍视频
├── 使用教程
└── 秒拍spider.py
├── 网易云爬虫
├── requirment.txt
├── 使用说明
├── 加密解密代码.py
└── 网易云spider.py
├── 虾米音乐spider
├── .idea
│ ├── .gitignore
│ ├── inspectionProfiles
│ │ ├── Project_Default.xml
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ ├── vcs.xml
│ └── 虾米音乐spider.iml
├── requirment.txt
├── xiami_audio_spider.py
├── xiami_test_secret_parms.py
└── 使用说明
└── 起点中文网详情字体加密破解
├── my_font_content.py
├── qidian_novel_info_spider.py
├── 字体文件解析.py
└── 字体解密记录
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | /喜马拉雅FM/.idea/
3 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/Python_Spider_All.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Python_Spider_All
2 | 每完成一个项目存储一个 欢迎添加
3 | ########################################################################
4 | 注意!!!
5 | config 本地配置文件 使用时请省略,主要包含 代理和翻页参数
6 | ########################################################################
7 |
8 | # 书旗小说
9 |
10 | # 优酷eid_uid_videoid作者ID转换
11 |
12 | # 哔哩哔哩视频下载
13 |
14 | # 哔哩哔哩主页采集
15 | 个人主页公开视频信息
16 | # 虾米音乐
17 |
18 | # 网易云
19 |
20 | # 喜马拉雅FM
21 | 2023 06 修复
22 |
23 | # 微信公众号短链接转长链接
24 | 简单的解析页面进行拼接
25 |
26 | # 百度翻译
27 |
28 | # 秒拍
29 | 老版本已经失效,可借鉴
30 |
31 | # 书旗小说 详情
--------------------------------------------------------------------------------
/书旗小说/shuqi_novel_search.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # Chance favors the prepared mind.
3 | # author : pyl owo,
4 | # time : 2020/9/21
5 | import datetime
6 | import json
7 | import random
8 | import re
9 | import time
10 | from hashlib import md5
11 |
12 | from fake_useragent import UserAgent
13 | import requests
14 | # 获取代理
15 | def get_proxy():
16 | pass
17 |
18 | # 统一请求函数
19 | def unify_requests(method="GET",url="",headers={},proxies={},data={},verify=False,cookies={}):
20 | if method=="GET":
21 | response = requests.get(url, headers=headers,proxies=proxies,data=data,cookies=cookies,timeout=5)
22 | return response
23 | else:
24 | response = requests.post(url, headers=headers,proxies=proxies,data=data,verify=verify,cookies=cookies,timeout=5)
25 | return response
26 |
27 | # 书旗小说
28 | class SFQingNovel:
29 | def __init__(self, use_proxy=True):
30 | self.proxy = get_proxy() if use_proxy else None
31 | """:cvar
32 | 有三点反爬,
33 | 1,禁用右键
34 | 2,sign
35 | 3,headers里的 authorization 属性
36 | """
37 |
38 | ############################################################
39 | # Md5 加密函数 32 返回32位的加密结果
40 | def md5_use(self, text: str) -> str:
41 | result = md5(bytes(text, encoding="utf-8")).hexdigest()
42 | # print(result)
43 | return result
44 |
45 | # 获取加密 sign timestamp
46 | def shuqi_jiami(self, book_id: str, time_stamp: str = str(int(time.time())), use_pwd='37e81a9d8f02596e1b895d07c171d5c9',
47 | user_id="8000000"):
48 | """ function i(t, n, e) {
49 | var o = Object.keys(t).filter(function(t) {
50 | return !Array.isArray(n) || -1 !== n.indexOf(t)
51 | }).sort().map(function(n) {
52 | return t[n]
53 | }).join("") + (e || n);
54 | return a()(o)
55 | }"""
56 | """"""
57 | # 改写规则就是简单的拼接 艹
58 | info = self.md5_use(book_id + time_stamp + user_id + use_pwd)
59 | # 打印 sign 时间戳 以及 书籍ID
60 | # print(info,book_id,time_stamp)
61 | return info
62 |
63 | # 获得 authorization 的值 (在请求里面 需要re)
64 | def shuqi_get_header_token(self, book_id: str):
65 | # response = requests.get("https://t.shuqi.com/cover/{}".format(book_id))
66 | response = unify_requests(url="https://t.shuqi.com/cover/{}".format(book_id), proxies=self.proxy)
67 | # print(response.text)
68 | token = re.findall(r'"token":"(.*?)"', response.text)
69 | token = token[0] if token else ""
70 | if token:
71 | # print(token)
72 | return token
73 | else:
74 | return ""
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 | #####################################3 以上👆 加密
83 |
84 | # 获得响应
85 | def get_response(self, novel_url, time_stamp: str = str(int(time.time())), user_id: str = "8000000", **kwargs):
86 | if kwargs.get('qin_quan_id_int'):
87 | bookId = str(kwargs.get('qin_quan_id_int'))
88 | elif novel_url:
89 | bookId = str(novel_url).split('?')[0].split('/')[-1]
90 | else:
91 | return {}
92 | # print(bookId)
93 | token = self.shuqi_get_header_token(bookId)
94 | if token:
95 | pass
96 | else:
97 | print("获取token authorization 失败")
98 | return False
99 | headers = {
100 | 'User-Agent': UserAgent().random,
101 | "Proxy-Tunnel": str(random.randint(1, 10000)),
102 | 'authority': 'ocean.shuqireader.com',
103 | 'accept': 'application/json, text/plain, */*',
104 | # 'authorization': 'Bearer eyJhbGciOiJIUzUxMiJ9.eyJzdWIiOiI4MDAwMDAwIiwidXRkaWQiOiIiLCJpbWVpIjoiIiwic24iOiIiLCJleHAiOjE2MDA4NDgyNTYsInVzZXJJZCI6IjgwMDAwMDAiLCJpYXQiOjE2MDA4MzAyNTYsIm9haWQiOiIiLCJwbGF0Zm9ybSI6IjAifQ.tjgtZMMoMWCoA7Z-z1M55d7MUEFy4GjruQoeoyAOnSWYy1glqk-YkEbOHfX6oSH_3T-bhF0NKz6-4If4gSKz1A',
105 | 'authorization': "Bearer " + self.shuqi_get_header_token(bookId),
106 | 'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Mobile Safari/537.36',
107 | 'content-type': 'application/x-www-form-urlencoded',
108 | 'origin': 'https://t.shuqi.com',
109 | 'sec-fetch-site': 'cross-site',
110 | 'sec-fetch-mode': 'cors',
111 | 'sec-fetch-dest': 'empty',
112 | 'referer': 'https://t.shuqi.com/cover/7027302',
113 | 'accept-language': 'zh-CN,zh;q=0.9',
114 | }
115 |
116 | data = {
117 | 'user_id': '%s' % (user_id),
118 | 'bookId': '%s' % (bookId),
119 | 'timestamp': '%s' % (time_stamp),
120 | 'sign': '%s' % (self.shuqi_jiami(bookId, time_stamp, user_id=user_id)),
121 | 'platform': '0'
122 | }
123 | # print(headers,data)
124 | # response = requests.post('https://ocean.shuqireader.com/webapi/bcspub/openapi/book/info', headers=headers,
125 | # data=data)
126 | response = unify_requests(url="https://ocean.shuqireader.com/webapi/bcspub/openapi/book/info", method="POST", headers=headers, data=data, proxies=self.proxy)
127 | # print(json.loads(response.text))
128 | return response
129 |
130 | # 获取小说所有详细信息
131 | def get_novel_info(self, novel_url, **kwargs):
132 | search_result = self.parse_novel_info(self.get_response(novel_url, **kwargs), novel_url, **kwargs)
133 | return search_result
134 |
135 |
136 | # 获取评论数
137 | def get_comment(self, novel_url, **kwargs):
138 | if kwargs.get('qin_quan_id_int'):
139 | bookId = str(kwargs.get('qin_quan_id_int'))
140 | elif novel_url:
141 | bookId = str(novel_url).split('?')[0].split('/')[-1]
142 | else:
143 | return {}
144 | token = self.shuqi_get_header_token(bookId)
145 | if token:
146 | pass
147 | else:
148 | print("获取token authorization 失败")
149 | return False
150 | headers = {
151 | 'User-Agent': UserAgent().random,
152 | "Proxy-Tunnel": str(random.randint(1, 10000)),
153 | 'authority': 'ocean.shuqireader.com',
154 | 'accept': 'application/json, text/plain, */*',
155 | 'authorization': "Bearer " + self.shuqi_get_header_token(bookId),
156 | 'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36',
157 | 'origin': 'https://t.shuqi.com',
158 | 'sec-fetch-site': 'cross-site',
159 | 'sec-fetch-mode': 'cors',
160 | 'sec-fetch-dest': 'empty',
161 | 'referer': 'https://t.shuqi.com/',
162 | 'accept-language': 'zh-CN,zh;q=0.9',
163 | # 'Cookie': 'XSRF-TOKEN=1f1a10da-49bc-44eb-a39a-8fc19e44f8a0'
164 | }
165 |
166 | info_base_url = "https://ocean.shuqireader.com/webapi/comment/novel/i.php?do=sp_get&bookId={}&fetch=merge&sqUid=8000000&source=store&size=3&page=1&score=yes&authorId=8000000" # 129676 数字id
167 |
168 | if kwargs.get('qin_quan_id_int'):
169 | respose_info = unify_requests(url=info_base_url.format('kwargs.get("qin_quan_id_int")'),
170 | headers=headers, proxies=self.proxy)
171 | elif novel_url:
172 | # print(info_base_url.format((str(novel_url).split('?')[0].split('/')[-1])))
173 | respose_info = unify_requests(url=info_base_url.format((str(novel_url).split('?')[0].split('/')[-1])),
174 | headers=headers, proxies=self.proxy)
175 | else:
176 | return {}
177 | return respose_info
178 |
179 | # 搜索视频响应解析
180 | def parse_novel_info(self, respose_info, novel_url='', **kwargs) -> dict:
181 | try:
182 | print(novel_url)
183 | response_dict = json.loads(respose_info.text).get('data', {})
184 | comment_dict = json.loads(self.get_comment(novel_url, **kwargs).text)
185 | except Exception as e:
186 | print(e)
187 | return {}
188 | else:
189 | # info_book_dict = info_dict.get('book', {})
190 | novel_dict = dict()
191 | # ''.join(response_data.xpath(''))
192 | # response_dict.get('', '')
193 | novel_dict['all_recommend_str'] = None # 总推荐数 str
194 | novel_dict['month_recommend_str'] = None # 月推荐数 str
195 | novel_dict['week_recommend_str'] = None # 周推荐数 str
196 | novel_dict['all_read_int'] = None # 总阅读数 int
197 | novel_dict['month_read_int'] = None # 月阅读数 int
198 | novel_dict['week_read_int'] = None # 周阅读数 int
199 | novel_dict['all_words_number_int'] = int(float(response_dict.get('wordCount', '')) * 10000) if response_dict.get('wordCount', '') else None # 总字数
200 | book_status = response_dict.get('state', '')
201 | if book_status == "1":
202 | book_status_str = "连载"
203 | elif book_status == "2":
204 | book_status_str = "完结"
205 | else:
206 | book_status_str = "暂无"
207 | novel_dict['book_status_str'] = book_status_str # 书籍状态 (连载,完结,暂无)bookCP
208 | novel_dict['book_property_str'] = None # 书籍属性 (免费,会员,限免)
209 | novel_dict['author_type_str'] = None # 作者类型 (金牌,签约,独立 默认无)
210 | novel_dict['book_lable_str'] = "|".join([i.get('tagName') for i in response_dict.get('tag', [])]) # 书籍标签 (用|分割的字符串 ''科幻|现实|励志'')
211 | novel_dict['book_type_str'] = None # 书籍分类 (玄幻 ,科幻,言情...)按搜索结果来多个按|分割
212 | novel_dict['book_update_time'] = datetime.datetime.strftime(datetime.datetime.fromtimestamp(response_dict.get('lastChapter', {}).get('updateTime')), "%Y-%m-%d") # 书籍更新日期 年-月-日
213 | novel_dict['book_zong_zhang_jie_int'] = None # 书籍总的章节 完结的,未完结就填目前的总章节
214 | novel_dict['book_zui_xin_zhang_jie_name_str'] = response_dict.get('lastChapter', {}).get('updateTime') # 最新章节名称
215 | novel_dict['book_introduce_text'] = response_dict.get('desc', '') # 书籍简介 text
216 | novel_dict['book_cover_image_str'] = response_dict.get('imgUrl', '') # 书籍封面 URL imgUrl
217 | novel_dict['book_detail_url_str'] = novel_url # 书籍详情URL
218 | novel_dict['book_detail_id_int'] = response_dict.get('bookId', '') # 书籍简介 text # 书籍详情ID 数字形式 bookId
219 | novel_dict['book_detail_id_str'] = str(response_dict.get('bookId', '')) # 书籍详情ID 字符形式
220 | novel_dict['book_zhan_dian_str'] = None # 书籍站点 (男生,女生,暂无)
221 | novel_dict['book_publish_str'] = '书旗小说' # 出版社 默认侵权平台'
222 | novel_dict['book_commeds_int'] = comment_dict.get('info', {}).get('total') # 书籍评论数
223 | novel_dict['author_grade_float'] = None # 作者评分
224 | novel_dict['author_id_str'] = str(response_dict.get('authorId', '')) # 作者ID 字符形式 ## 新增 authorId
225 | novel_dict['author_page_url_str'] = None # 作者主页链接 userId
226 | novel_dict['author_book_number_int'] = None # 作者书籍总数
227 | novel_dict['author_likes_int'] = None # 作者获赞总数
228 | novel_dict['author_all_words_number_str'] = None # 作者累计创作字数
229 | novel_dict['author_produce_days_str'] = None # 作者累计创作天数
230 | novel_dict['author_fens_number_int'] = None # 作者粉丝数
231 | novel_dict['author_head_image_url_str'] = response_dict.get('authorIcon', '') # 作者头像URL authorIcon
232 | return novel_dict
233 |
234 |
235 | # 统一的调用 search_novels
236 | search_novel_info = SFQingNovel(use_proxy=False).get_novel_info
237 | if __name__ == "__main__":
238 | result = search_novel_info('https://t.shuqi.com/cover/7329628')
239 | print(result)
--------------------------------------------------------------------------------
/书旗小说/书旗小说详情加密:
--------------------------------------------------------------------------------
1 | 直接执行 没加代理,
2 | 加代理
3 | use_proxy = True (代理函数 自己添加)
4 |
5 | 具体能获得的值 已经列好列,主要还是# 上面的加密的地方,参数加密 执行 函数 shuqi_jiami
6 | data = {
7 | 'user_id': '%s' % (user_id),
8 | 'bookId': '%s' % (bookId),
9 | 'timestamp': '%s' % (time_stamp),
10 | 'sign': '%s' % (self.shuqi_jiami(bookId, time_stamp, user_id=user_id)),
11 | 'platform': '0'
12 | }
13 |
14 | 后记,真的烦 唯独没有测试md5 我以为不会那么简单的,淦!!!
--------------------------------------------------------------------------------
/优酷eid_uid_videoid作者ID转换/转换.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # 享受雷霆感受雨露
3 | # author xyy,time:2022/6/9
4 | #!/usr/bin/env python
5 | # -*- coding: utf-8 -*-
6 |
7 | import base64
8 |
9 | # 作者主页字符ID转数字ID
10 | def uid2eid(uid):
11 | """ 用户数字ID 加密为 字符串ID"""
12 | return encrypt(uid, type_str=1)
13 |
14 | # 视频字符ID转数字ID
15 | def uid2vid(uid):
16 | """ 视频数字ID 加密为 字符串ID """
17 | return encrypt(uid, type_str=2)
18 |
19 | # 字符串转int
20 | def eid2uid(eid,type_str:int):
21 | """ 字符串ID 解密为 数字ID """
22 | uid = ""
23 | if type_str==1:
24 | uid = int(str_to_int(eid[1:]))/4
25 | elif type_str==2:
26 | uid = int(str_to_int(eid[4:]))/4
27 |
28 | return str(int(uid))
29 |
30 | # 字符串形式转ID的方法 bs64
31 | def str_to_int(eid:str):
32 | uid = base64.b64decode(eid).decode()
33 | return uid
34 |
35 | # 数字转换字符串形式的
36 | def encrypt(int_id:int, type_str:int):
37 | num = str(int(int_id)*4)
38 | eid = base64.b64encode(num.encode()).decode()
39 | if type_str==1:
40 | return "U"+eid
41 | elif type_str==2:
42 | return "id_X"+eid
43 |
44 | if __name__ == '__main__':
45 | # 1/作者主页 2/视频
46 | print(eid2uid("UODExNjMwNTc1Ng==",1))
47 | # print(uid2eid("1596252942"))
48 |
--------------------------------------------------------------------------------
/哔哩哔哩下载/bilibili_download.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # 享受雷霆感受雨露
3 | # author xyy,time:2020/9/02
4 | import random
5 | from you_get.common import *
6 | import requests
7 | from Task_Compar_Config import Config_Of_Compar as config
8 | from Task_Compar_Config import proxies
9 | from fake_useragent import UserAgent
10 | from task_tool_unit import match1
11 | from tort_download_unit.bilibili_tort_download.bilibili_ocr import _get_toke_and_img
12 | from tort_download_unit.bilibili_tort_download.bilibili_download_base import download_urls # 给出下载链接 下载
13 |
14 | stream_types = [
15 | {'id': 'flv_p60', 'quality': 116, 'audio_quality': 30280,
16 | 'container': 'FLV', 'video_resolution': '1080p', 'desc': '高清 1080P60'},
17 | {'id': 'hdflv2', 'quality': 112, 'audio_quality': 30280,
18 | 'container': 'FLV', 'video_resolution': '1080p', 'desc': '高清 1080P+'},
19 | {'id': 'flv', 'quality': 80, 'audio_quality': 30280,
20 | 'container': 'FLV', 'video_resolution': '1080p', 'desc': '高清 1080P'},
21 | {'id': 'flv720_p60', 'quality': 74, 'audio_quality': 30280,
22 | 'container': 'FLV', 'video_resolution': '720p', 'desc': '高清 720P60'},
23 | {'id': 'flv720', 'quality': 64, 'audio_quality': 30280,
24 | 'container': 'FLV', 'video_resolution': '720p', 'desc': '高清 720P'},
25 | {'id': 'hdmp4', 'quality': 48, 'audio_quality': 30280,
26 | 'container': 'MP4', 'video_resolution': '720p', 'desc': '高清 720P (MP4)'},
27 | {'id': 'flv480', 'quality': 32, 'audio_quality': 30280,
28 | 'container': 'FLV', 'video_resolution': '480p', 'desc': '清晰 480P'},
29 | {'id': 'flv360', 'quality': 16, 'audio_quality': 30216,
30 | 'container': 'FLV', 'video_resolution': '360p', 'desc': '流畅 360P'},
31 | # 'quality': 15?
32 | {'id': 'mp4', 'quality': 0},
33 |
34 | {'id': 'jpg', 'quality': 0},
35 | ]
36 | dry_run = False
37 | json_output = False
38 | force = False
39 | skip_existing_file_size_check = False
40 | player = None
41 | extractor_proxy = None
42 | cookies = None
43 | output_filename = None
44 | auto_rename = False
45 | insecure = False
46 | import ssl
47 | import socket
48 | import logging
49 | from urllib import request, error
50 |
51 | def urlopen_with_retry(*args, **kwargs):
52 | retry_time = 3
53 | for i in range(retry_time):
54 | try:
55 | if insecure:
56 | # ignore ssl errors
57 | ctx = ssl.create_default_context()
58 | ctx.check_hostname = False
59 | ctx.verify_mode = ssl.CERT_NONE
60 | return request.urlopen(*args, context=ctx, **kwargs)
61 | else:
62 | return request.urlopen(*args, **kwargs)
63 | except socket.timeout as e:
64 | logging.debug('request attempt %s timeout' % str(i + 1))
65 | if i + 1 == retry_time:
66 | raise e
67 | # try to tackle youku CDN fails
68 | except error.HTTPError as http_error:
69 | logging.debug('HTTP Error with code{}'.format(http_error.code))
70 | if i + 1 == retry_time:
71 | raise http_error
72 | # 通过url下载哔哩哔哩文件
73 | def bilibili_download_urls(bili_url, title, ext='mp4',proxies=proxies,output_dir=config["system_path"]+"/"+config["tort_path"])->"bool":
74 |
75 | try:
76 | urls, size = bilibili_down_load(bili_url, proxy=proxies)
77 | if isinstance(urls,list): # 正常情况
78 | if urls: # ocr识别成功
79 | headers = bilibili_headers(referer=bili_url)
80 | download_urls(urls, title, ext, size, headers=headers,
81 | output_dir=output_dir,
82 | merge=True,
83 | av=True
84 | )
85 | return True
86 | else:
87 | return False
88 | if isinstance(urls,int): # 错误的情况
89 | return urls # 错误情况返回错误代码
90 |
91 | except Exception as e:
92 | print(e)
93 | return False
94 | # 哔哩哔哩的头 headers
95 | def bilibili_headers(referer=None, cookie=None):
96 | # a reasonable UA
97 | # ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'
98 | ua = '{}'.format(UserAgent().random)
99 | # print(ua)
100 | headers = {'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': ua}
101 | if referer is not None:
102 | headers.update({'Referer': referer})
103 | if cookie is not None:
104 | headers.update({'Cookie': cookie})
105 | return headers
106 |
107 | # 哔哩哔哩下载地址
108 | def bilibili_down_load(bili_url,proxy=proxies):
109 | # 仿照 you-get
110 | stream_qualities = {s['quality']: s for s in stream_types}
111 | headers = {
112 | "Proxy-Tunnel": str(random.randint(1, 10000)),
113 | 'authority': 'www.bilibili.com',
114 | 'cache-control': 'max-age=0',
115 | 'upgrade-insecure-requests': '1',
116 | # 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36',
117 | "user-agent": UserAgent().random,
118 | 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
119 | 'sec-fetch-site': 'same-site',
120 | 'sec-fetch-mode': 'navigate',
121 | 'sec-fetch-user': '?1',
122 | 'sec-fetch-dest': 'document',
123 | 'referer': 'https://search.bilibili.com/all?keyword=beatbox&from_source=nav_search&spm_id_from=333.851.b_696e7465726e6174696f6e616c486561646572.9&order=totalrank&duration=0&tids_1=3&tids_2=193',
124 | 'accept-language': 'zh-CN,zh;q=0.9',
125 | # '$cookie': 'CURRENT_FNVAL=16; _uuid=39019883-BF03-8583-5980-65F1AB32A8B437048infoc; buvid3=A1AF6CF2-8DE1-41D4-82FA-331AAF700F4953938infoc; rpdid=|(u)~lJ|l|lJ0J\'ul))Y)m)uu; LIVE_BUVID=AUTO4115905671332477; sid=lubz9xqt; DedeUserID=101681207; DedeUserID__ckMd5=dfc9ce597d1ee703; SESSDATA=4e33cf62%2C1609722422%2Ca87cf*71; bili_jct=96dcdd930c28d4d499acbf1c31b4ebb7; Hm_lvt_8a6e55dbd2870f0f5bc9194cddf32a02=1594256989; PVID=1; bsource=search_baidu; finger=351232418; blackside_state=1',
126 | }
127 |
128 | if proxy:
129 | response = requests.get(bili_url, headers=headers)
130 |
131 | else:
132 | response = requests.get(bili_url, headers=headers,proxies=proxy)
133 |
134 | # print(response.text)
135 | playinfo_text_ = match1(response.text, r'__playinfo__=(.*?)
视频去哪了呢?_哔哩哔哩 (゜-゜)つロ 干杯~-bilibili