├── README.md ├── ximalaya_search.py └── ximalaya_vip.py /README.md: -------------------------------------------------------------------------------- 1 | # XimalayaFM 2 | 3 | python爬取喜马拉雅音频 4 | 5 | ## TODO 6 | 7 | * 写一个UI界面 8 | * 提供多种爬取选项 9 | 10 | ## 2019-10-12 19:10 11 | 12 | [CSDN](https://blog.csdn.net/weixin_42050513/article/details/101224552)上有人评论说`xm-sign`规则改了,于是去看了看,发现实际只改了一个字母,整体流程任可以看下方正文 13 | 14 | 把`hashlib.md5("ximalaya-{}".format(servertime).encode()`中的`ximalaya`替换成`himalaya`即可 15 | 16 | 改动点如下图 17 | 18 | ![](http://image.joelyings.com/20191012-1.png) 19 | 20 | ![](http://image.joelyings.com/20191012-2.png) 21 | 22 | 改动不大,可能是发现了有人在大量爬取,先小地方改动,随后可能会有较大的规则改动,教程写出来的目的是学习、测试,切勿过度爬取! 23 | 24 | ## 2020-02-02 25 | 26 | 完成VIP音频下载功能 27 | 28 | 使用方法: 29 | 30 | * 首先你已经开通了喜马会员 31 | * 该音频属于会员或者付费可听 32 | * 运行程序,选择`VIP`选项,然后输入音频集的albumID,以及你的token,点击运行即可 33 | 34 | token在下图这里复制 35 | 36 | ![](http://image.joelyings.com/2020-02-02_1.png) 37 | 38 | 就是`1&_token=xxx`,这一部分,不用加`;` 39 | 40 | 代码主要是通过`Scapy`实现的功能,对,`不是Scrapy`,Scapy具有模拟发送数据包、监听解析数据包、互联网协议解析、数据挖掘等多种用处 41 | 42 | 然后发现scapy-http这个模块,二者配合使用后,可以解析抓到的包的url等参数 43 | 44 | ### 安装工具 45 | ``` bash 46 | pip3 install scapy 47 | 48 | pip3 install scapy-http 49 | ``` 50 | 51 | 还要安装winpcap软件,为监控网卡提供接口,[下载地址](https://www.winpcap.org/install/default.htm) 52 | 53 | 注意替换代码中的iface:[iface 参数为你要监听的网卡的名称,参考这里](https://blog.csdn.net/luanpeng825485697/article/details/78379154) 54 | 55 | ## Stargazers over time 56 | 57 | [![Stargazers over time](https://starchart.cc/joelYing/XimalayaFM.svg)](https://starchart.cc/joelYing/XimalayaFM) 58 | -------------------------------------------------------------------------------- /ximalaya_search.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | # author:joel 19-9-22 4 | 5 | import hashlib 6 | import json 7 | import os 8 | import re 9 | import time 10 | import random 11 | import requests 12 | 13 | """ 14 | 注意运行前请修改 make_dir() 中的下载路径!不要过度爬取,仅供测试学习! 15 | """ 16 | 17 | 18 | class XiMa(object): 19 | def __init__(self): 20 | self.base_url = 'https://www.ximalaya.com' 21 | self.base_api = 'https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&sort=0&pageSize=30' 22 | self.time_api = 'https://www.ximalaya.com/revision/time' 23 | self.header = { 24 | 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0' 25 | } 26 | self.s = requests.session() 27 | 28 | def get_time(self): 29 | """ 30 | 获取服务器时间戳 31 | :return: 32 | """ 33 | r = self.s.get(self.time_api, headers=self.header) 34 | return r.text 35 | 36 | def get_sign(self): 37 | """ 38 | 获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳 39 | :return: xm_sign 40 | """ 41 | nowtime = str(round(time.time() * 1000)) 42 | servertime = self.get_time() 43 | sign = str(hashlib.md5("himalaya-{}".format(servertime).encode()).hexdigest()) + "({})".format( 44 | str(round(random.random() * 100))) + servertime + "({})".format(str(round(random.random() * 100))) + nowtime 45 | self.header["xm-sign"] = sign 46 | # print(sign) 47 | # return sign 48 | 49 | def index_choose(self): 50 | c_num = input(u'请输入对应操作的选项:\n' 51 | u'1、下载整部有声书\n' 52 | u'2、下载单个音源\n' 53 | u'3、返回\n') 54 | if c_num == '1': 55 | xm_id = input(u'请输入要获取喜马拉雅节目的ID:') 56 | xima.get_fm(xm_id) 57 | self.index_choose() 58 | elif c_num == '2': 59 | xm_id = input(u'请输入要获取的音源:') 60 | print(xm_id) 61 | self.index_choose() 62 | elif c_num == '3': 63 | print('结束') 64 | else: 65 | pass 66 | 67 | @staticmethod 68 | def make_dir(xm_fm_id): 69 | # 保存路径,请自行修改,这里是以有声书ID作为文件夹的路径 70 | fm_path = 'F:\\{}\\'.format(xm_fm_id) 71 | f = os.path.exists(fm_path) 72 | if not f: 73 | os.makedirs(fm_path) 74 | print('make file success...') 75 | else: 76 | print('file already exists...') 77 | return fm_path 78 | 79 | def get_fm(self, xm_fm_id): 80 | # 根据有声书ID构造url 81 | fm_url = self.base_url + '/youshengshu/{}'.format(xm_fm_id) 82 | print(fm_url) 83 | r_fm_url = self.s.get(fm_url, headers=self.header) 84 | fm_title = re.findall('

(.*?)

', r_fm_url.text, re.S)[0] 85 | print('书名:' + fm_title) 86 | # 新建有声书ID的文件夹 87 | fm_path = self.make_dir(xm_fm_id) 88 | # 取最大页数 89 | max_page = re.findall(r'', r_fm_url.text, re.S) 91 | if max_page and max_page[0]: 92 | for page in range(1, int(max_page[0]) + 1): 93 | print('第' + str(page) + '页') 94 | self.get_sign() 95 | r = self.s.get(self.base_api.format(xm_fm_id, page), headers=self.header) 96 | # print(json.loads(r.text)) 97 | r_json = json.loads(r.text) 98 | for audio in r_json['data']['tracksAudioPlay']: 99 | audio_title = str(audio['trackName']).replace(' ', '') 100 | audio_src = audio['src'] 101 | self.get_detail(audio_title, audio_src, fm_path) 102 | # 每爬取1页,30个音频,休眠3秒 103 | time.sleep(3) 104 | else: 105 | print(os.error) 106 | 107 | def get_detail(self, title, src, path): 108 | r_audio_src = self.s.get(src, headers=self.header) 109 | m4a_path = path + title + '.m4a' 110 | if not os.path.exists(m4a_path): 111 | with open(m4a_path, 'wb') as f: 112 | f.write(r_audio_src.content) 113 | print(title + '保存完毕...') 114 | else: 115 | print(title + 'm4a已存在') 116 | 117 | 118 | if __name__ == '__main__': 119 | xima = XiMa() 120 | xima.index_choose() 121 | -------------------------------------------------------------------------------- /ximalaya_vip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding:utf-8 -*- 3 | # author:joel 18-6-5 4 | 5 | import sys 6 | import hashlib 7 | import json 8 | import math 9 | import os 10 | import random 11 | import time 12 | import requests 13 | from scapy.layers.inet import TCP 14 | from scapy.all import sniff 15 | from scapy_http import http 16 | from selenium import webdriver 17 | from selenium.webdriver.chrome.options import Options 18 | from PyQt5.QtWidgets import QFileDialog, QMainWindow, QApplication 19 | from PyQt5 import QtCore, QtGui, QtWidgets 20 | 21 | 22 | class Ui_XiMaDownloader(object): 23 | def setupUi(self, XiMaDownloader): 24 | XiMaDownloader.setObjectName("XiMaDownloader") 25 | XiMaDownloader.resize(600, 563) 26 | self.centralwidget = QtWidgets.QWidget(XiMaDownloader) 27 | self.centralwidget.setObjectName("centralwidget") 28 | self.title_label = QtWidgets.QLabel(self.centralwidget) 29 | self.title_label.setGeometry(QtCore.QRect(20, 20, 191, 41)) 30 | self.title_label.setLineWidth(1) 31 | self.title_label.setScaledContents(False) 32 | self.title_label.setAlignment(QtCore.Qt.AlignCenter) 33 | self.title_label.setWordWrap(True) 34 | self.title_label.setIndent(-1) 35 | self.title_label.setObjectName("title_label") 36 | self.verticalGroupBox = QtWidgets.QGroupBox(self.centralwidget) 37 | self.verticalGroupBox.setGeometry(QtCore.QRect(0, 0, 601, 71)) 38 | self.verticalGroupBox.setAutoFillBackground(True) 39 | self.verticalGroupBox.setStyleSheet("") 40 | self.verticalGroupBox.setObjectName("verticalGroupBox") 41 | self.verticalLayout = QtWidgets.QVBoxLayout(self.verticalGroupBox) 42 | self.verticalLayout.setObjectName("verticalLayout") 43 | self.choose_label = QtWidgets.QLabel(self.centralwidget) 44 | self.choose_label.setGeometry(QtCore.QRect(10, 80, 201, 31)) 45 | self.choose_label.setObjectName("choose_label") 46 | self.free_fm = QtWidgets.QCheckBox(self.centralwidget) 47 | self.free_fm.setGeometry(QtCore.QRect(20, 140, 91, 16)) 48 | self.free_fm.setObjectName("free_fm") 49 | self.vip_fm = QtWidgets.QCheckBox(self.centralwidget) 50 | self.vip_fm.setGeometry(QtCore.QRect(150, 140, 111, 16)) 51 | self.vip_fm.setObjectName("vip_fm") 52 | self.single_fm = QtWidgets.QCheckBox(self.centralwidget) 53 | self.single_fm.setGeometry(QtCore.QRect(290, 140, 71, 16)) 54 | self.single_fm.setObjectName("single_fm") 55 | self.id_label = QtWidgets.QLabel(self.centralwidget) 56 | self.id_label.setGeometry(QtCore.QRect(20, 190, 171, 41)) 57 | self.id_label.setObjectName("id_label") 58 | self.id_input_line = QtWidgets.QLineEdit(self.centralwidget) 59 | self.id_input_line.setGeometry(QtCore.QRect(200, 200, 133, 20)) 60 | self.id_input_line.setObjectName("id_input_line") 61 | self.token_label = QtWidgets.QLabel(self.centralwidget) 62 | self.token_label.setGeometry(QtCore.QRect(20, 250, 161, 41)) 63 | self.token_label.setObjectName("token_label") 64 | self.token_input_line = QtWidgets.QLineEdit(self.centralwidget) 65 | self.token_input_line.setGeometry(QtCore.QRect(200, 260, 361, 20)) 66 | self.token_input_line.setObjectName("token_input_line") 67 | self.d_type_box = QtWidgets.QGroupBox(self.centralwidget) 68 | self.d_type_box.setGeometry(QtCore.QRect(10, 120, 581, 51)) 69 | self.d_type_box.setObjectName("d_type_box") 70 | self.path_input_line = QtWidgets.QLineEdit(self.centralwidget) 71 | self.path_input_line.setGeometry(QtCore.QRect(200, 230, 133, 20)) 72 | self.path_input_line.setObjectName("path_input_line") 73 | self.path_label = QtWidgets.QLabel(self.centralwidget) 74 | self.path_label.setGeometry(QtCore.QRect(20, 220, 171, 41)) 75 | self.path_label.setObjectName("path_label") 76 | self.d_config_box = QtWidgets.QGroupBox(self.centralwidget) 77 | self.d_config_box.setGeometry(QtCore.QRect(10, 180, 581, 111)) 78 | self.d_config_box.setObjectName("d_config_box") 79 | self.choose_file_button = QtWidgets.QPushButton(self.d_config_box) 80 | self.choose_file_button.setGeometry(QtCore.QRect(340, 50, 75, 23)) 81 | self.choose_file_button.setObjectName("choose_file_button") 82 | self.run_button = QtWidgets.QPushButton(self.centralwidget) 83 | self.run_button.setGeometry(QtCore.QRect(510, 320, 75, 23)) 84 | self.run_button.setObjectName("run_button") 85 | self.cancel_button = QtWidgets.QPushButton(self.centralwidget) 86 | self.cancel_button.setGeometry(QtCore.QRect(510, 360, 75, 23)) 87 | self.cancel_button.setObjectName("cancel_button") 88 | self.output_text = QtWidgets.QTextBrowser(self.centralwidget) 89 | self.output_text.setGeometry(QtCore.QRect(10, 300, 491, 241)) 90 | self.output_text.setObjectName("output_text") 91 | self.d_config_box.raise_() 92 | self.d_type_box.raise_() 93 | self.verticalGroupBox.raise_() 94 | self.title_label.raise_() 95 | self.choose_label.raise_() 96 | self.free_fm.raise_() 97 | self.vip_fm.raise_() 98 | self.single_fm.raise_() 99 | self.id_label.raise_() 100 | self.id_input_line.raise_() 101 | self.token_label.raise_() 102 | self.token_input_line.raise_() 103 | self.path_input_line.raise_() 104 | self.path_label.raise_() 105 | self.run_button.raise_() 106 | self.cancel_button.raise_() 107 | self.output_text.raise_() 108 | XiMaDownloader.setCentralWidget(self.centralwidget) 109 | self.statusbar = QtWidgets.QStatusBar(XiMaDownloader) 110 | self.statusbar.setObjectName("statusbar") 111 | XiMaDownloader.setStatusBar(self.statusbar) 112 | 113 | self.retranslateUi(XiMaDownloader) 114 | self.cancel_button.clicked.connect(XiMaDownloader.close) 115 | QtCore.QMetaObject.connectSlotsByName(XiMaDownloader) 116 | 117 | def retranslateUi(self, XiMaDownloader): 118 | _translate = QtCore.QCoreApplication.translate 119 | XiMaDownloader.setWindowTitle(_translate("XiMaDownloader", "XiMaFM下载器")) 120 | self.title_label.setText(_translate("XiMaDownloader", "

XiMaFM下载器

")) 121 | self.choose_label.setText(_translate("XiMaDownloader", "

请选择需要下载的音频类型:

")) 122 | self.free_fm.setText(_translate("XiMaDownloader", "免费有声书")) 123 | self.vip_fm.setText(_translate("XiMaDownloader", "VIP/付费有声书")) 124 | self.single_fm.setText(_translate("XiMaDownloader", "单个音频")) 125 | self.id_label.setText(_translate("XiMaDownloader", "

请输入需要下载的音频ID:

")) 126 | self.token_label.setText(_translate("XiMaDownloader", "

请输入你的会员token:

")) 127 | self.d_type_box.setTitle(_translate("XiMaDownloader", "下载类型")) 128 | self.path_label.setText(_translate("XiMaDownloader", "

请输入保存文件的路径:

")) 129 | self.d_config_box.setTitle(_translate("XiMaDownloader", "下载配置")) 130 | self.choose_file_button.setText(_translate("XiMaDownloader", "选择文件夹")) 131 | self.run_button.setText(_translate("XiMaDownloader", "运行")) 132 | self.cancel_button.setText(_translate("XiMaDownloader", "取消")) 133 | 134 | 135 | class XiMaControl(QMainWindow, Ui_XiMaDownloader): 136 | def __init__(self): 137 | super(XiMaControl, self).__init__() 138 | self.setupUi(self) 139 | self.choose_file_button.clicked.connect(self.open_folder) 140 | self.free_fm.clicked.connect(self.free_check_box) 141 | self.vip_fm.clicked.connect(self.vip_check_box) 142 | self.single_fm.clicked.connect(self.single_check_box) 143 | self.run_button.clicked.connect(self.run) 144 | self.info = 0 145 | self.ximamain = XiMaMain() 146 | 147 | def open_folder(self): 148 | # 选取文件 149 | foldername = QFileDialog.getExistingDirectory(self, "选择文件夹", "F:/") 150 | foldername = str(foldername).replace('/', '\\') 151 | # print(foldername) 152 | self.path_input_line.setText(foldername) 153 | 154 | def free_check_box(self): 155 | if self.free_fm.isChecked() and self.vip_fm.isChecked() is False and self.single_fm.isChecked() is False: 156 | self.info = 1 157 | 158 | def vip_check_box(self): 159 | if self.vip_fm.isChecked() and self.free_fm.isChecked() is False and self.single_fm.isChecked() is False: 160 | self.info = 2 161 | 162 | def single_check_box(self): 163 | if self.single_fm.isChecked() and self.vip_fm.isChecked() is False and self.free_fm.isChecked() is False: 164 | self.info = 3 165 | 166 | def run(self): 167 | try: 168 | xm_id = self.id_input_line.text() 169 | folder_path = self.path_input_line.text() 170 | token = self.token_input_line.text() 171 | # message = str(self.info) + xm_id + folder_path + token 172 | # print_text(message) 173 | if self.info == 1: 174 | self.ximamain.get_free_fm(xm_id, folder_path) 175 | elif self.info == 2: 176 | self.ximamain.get_pay_fm(xm_id, folder_path, token) 177 | elif self.info == 3: 178 | print_text(xm_id) 179 | else: 180 | pass 181 | except Exception as e: 182 | print(e) 183 | 184 | 185 | class XiMa: 186 | def __init__(self): 187 | self.base_url = 'https://www.ximalaya.com' 188 | # 有声书 189 | self.yss_api = 'https://www.ximalaya.com/youshengshu/{}/{}' 190 | # 需要带上sign访问的api,适用于免费的音频的播放源 191 | self.free_sign_api = 'https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&sort=0&pageSize=30' 192 | # 获取单个免费音频api (trackId) 193 | self.free_track_api = 'http://mobile.ximalaya.com/mobile/redirect/free/play/{}/2' 194 | # 时间戳api 195 | self.time_api = 'https://www.ximalaya.com/revision/time' 196 | # 获取节目总音源个数与节目名 197 | self.album_api = 'https://www.ximalaya.com/revision/album?albumId={}' 198 | # 获取指定albumID的每一页音频的ID等track信息 199 | self.album_tracks_api = 'https://www.ximalaya.com/revision/album/v1/getTracksList?albumId={}&pageNum={}' 200 | # APP抓包得到,可用于获取付费节目总音源个数与节目名,获取音集所有音频ID,通过改变pageSize的大小,(albumId, pageSize) 201 | # 2020-02-29 最新测试pageSize最大为1000,所以针对章节大的有声书修改规则 202 | self.pay_size_api = 'http://180.153.255.6/mobile-album/album/page/ts-1569206246849?ac=WIFI&albumId={}' \ 203 | '&device=android&isAsc=true&isQueryInvitationBrand=true&isVideoAsc=true&pageId=1' \ 204 | '&pageSize={}' 205 | self.header = { 206 | 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:63.0) Gecko/20100101 Firefox/63.0' 207 | } 208 | self.s = requests.session() 209 | 210 | def get_time(self): 211 | """ 212 | 获取服务器时间戳 213 | """ 214 | r = self.s.get(self.time_api, headers=self.header) 215 | r_time = r.text 216 | return r_time 217 | 218 | def get_sign(self): 219 | """ 220 | 获取sign: md5(ximalaya-服务器时间戳)(100以内随机数)服务器时间戳(100以内随机数)现在时间戳 221 | """ 222 | nowtime = str(round(time.time() * 1000)) 223 | servertime = self.get_time() 224 | sign = str(hashlib.md5("himalaya-{}".format(servertime).encode()).hexdigest()) + "({})".format( 225 | str(round(random.random() * 100))) + servertime + "({})".format(str(round(random.random() * 100))) + nowtime 226 | self.header["xm-sign"] = sign 227 | 228 | def make_dir(self, xm_fm_id, path): 229 | """ 230 | 保存路径,请自行修改,这里是以有声书ID作为文件夹的路径 231 | """ 232 | fm_path = path + '\\' + xm_fm_id 233 | if str(path).endswith('\\'): 234 | fm_path = path + xm_fm_id 235 | f = os.path.exists(fm_path) 236 | if not f: 237 | os.makedirs(fm_path) 238 | print_text('make file success...') 239 | else: 240 | print_text('file already exists...') 241 | return fm_path 242 | 243 | def get_fm(self, xm_fm_id, path): 244 | """ 245 | 根据albumID解析 免费 fm信息 246 | """ 247 | # 根据有声书ID构造url 248 | r_fm_url = self.s.get(self.album_api.format(xm_fm_id), headers=self.header) 249 | r_fm_json = json.loads(r_fm_url.text) 250 | fm_title = r_fm_json['data']['mainInfo']['albumTitle'] 251 | fm_count = r_fm_json['data']['tracksInfo']['trackTotalCount'] 252 | fm_page_size = r_fm_json['data']['tracksInfo']['pageSize'] 253 | print_text('书名:' + fm_title) 254 | # 新建有声书ID的文件夹 255 | fm_path = self.make_dir(xm_fm_id, path) 256 | # 取最大页数,向上取整 257 | max_page = math.ceil(fm_count/fm_page_size) 258 | return fm_count, fm_path, max_page 259 | 260 | def get_free_sign(self, xm_fm_id, page): 261 | """ 262 | 下载免费的音频的播放源信息 263 | :param xm_fm_id: 264 | :param page: 265 | :return: response 266 | """ 267 | self.get_sign() 268 | response = self.s.get(self.free_sign_api.format(xm_fm_id, page), headers=self.header) 269 | return response 270 | 271 | def get_pay_album(self, xm_fm_id, page_num): 272 | """ 273 | 获取付费的音频的播放源信息 274 | :param xm_fm_id: 275 | :param max_page: 276 | :return: response 277 | """ 278 | response = self.s.get(self.album_tracks_api.format(xm_fm_id, page_num), headers=self.header) 279 | return response 280 | 281 | def save_fm2local(self, title, src, path): 282 | """ 283 | 保存音频到本地 284 | :param title: 285 | :param src: 286 | :param path: 287 | """ 288 | r_audio_src = requests.get(src, headers=self.header) 289 | m4a_path = path + '\\' + title + '.m4a' 290 | if not os.path.exists(m4a_path): 291 | with open(m4a_path, 'wb') as f: 292 | f.write(r_audio_src.content) 293 | print_text(title + '保存完毕...') 294 | else: 295 | print_text(title + '.m4a 已存在') 296 | 297 | 298 | class XiMaMain: 299 | def __init__(self): 300 | self.xmd = XiMa() 301 | 302 | def get_free_fm(self, xm_fm_id, path): 303 | fm_count, fm_path, max_page = self.xmd.get_fm(xm_fm_id, path) 304 | if max_page: 305 | for page in range(1, int(max_page) + 1): 306 | print_text(str('第' + str(page) + '页')) 307 | r = self.xmd.get_free_sign(xm_fm_id, page) 308 | r_json = json.loads(r.text) 309 | for audio in r_json['data']['tracksAudioPlay']: 310 | audio_title = str(audio['trackName']).replace(' ', '') 311 | audio_src = audio['src'] 312 | self.xmd.save_fm2local(audio_title, audio_src, fm_path) 313 | # 每爬取1页,30个音频,休眠3秒 314 | time.sleep(3) 315 | else: 316 | print_text('no max_page') 317 | 318 | def get_pay_fm(self, xm_fm_id, path, token): 319 | fm_count, fm_path, max_page = self.xmd.get_fm(xm_fm_id, path) 320 | if max_page: 321 | # 这里应该是 fm_count 322 | for p in range(1, int(max_page) + 1): 323 | r = self.xmd.get_pay_album(xm_fm_id, p) 324 | r_json = json.loads(r.text) 325 | tracks = r_json['data']['tracks'] 326 | for i, track in enumerate(tracks): 327 | audio_id = track['trackId'] 328 | audio_title = str(track['title']).replace(' ', '') 329 | audio_url = self.xmd.base_url + track['url'] 330 | print_text(str(audio_title + '' + audio_url)) 331 | real_url = self.auto_click(audio_url, token) 332 | self.xmd.save_fm2local(audio_title, real_url, fm_path) 333 | # 每爬取1页,30个音频,休眠1~3秒 334 | time.sleep(random.randint(1, 3)) 335 | else: 336 | print_text('no max_page') 337 | 338 | def auto_click(self, url, token): 339 | """ 340 | 参数url为对应的VIP音频的播放页面,selenium访问页面后,带上cookie(1&_token)模拟登陆再次访问,前提你已经是会员 341 | 等待页面加载完成,通过selenium+Chromedriver的无头浏览器模拟点击音频播放按钮 342 | scapy开始抓点击后音频真实地址的数据包,退出browser,解析包 343 | 注意click与抓包的顺序,先点击再抓包 344 | """ 345 | chrome_options = Options() 346 | chrome_options.add_argument('--headless') 347 | chrome_options.add_argument('--disable-gpu') 348 | browser = webdriver.Chrome(chrome_options=chrome_options) 349 | browser.get(url) 350 | browser.add_cookie({ 351 | # 此处xxx.com前,需要带点,注意domain也是cookie必须的 352 | 'domain': '.ximalaya.com', 353 | 'name': '1&_token', 354 | 'value': token, 355 | }) 356 | browser.get(url) 357 | time.sleep(4) 358 | print_text('开始抓包') 359 | # selenium 点击播放按钮 360 | browser.find_element_by_css_selector(".play-btn.fR_").click() 361 | # 下面的iface是电脑网卡的名称 count是捕获报文的数目 362 | pkts = sniff(filter="tcp and port 80", iface="Qualcomm Atheros AR956x Wireless Network Adapter", count=5) 363 | browser.quit() 364 | for pkt in pkts: 365 | if TCP in pkt and pkt.haslayer(http.HTTPRequest): 366 | http_header = pkt[http.HTTPRequest].fields 367 | req_url = 'http://' + bytes.decode(http_header['Host']) + bytes.decode(http_header['Path']) 368 | return req_url 369 | 370 | 371 | def print_text(msg): 372 | control.output_text.append(msg) 373 | print(msg) 374 | QApplication.processEvents() 375 | 376 | 377 | if __name__ == "__main__": 378 | app = QApplication(sys.argv) 379 | control = XiMaControl() 380 | control.show() 381 | sys.exit(app.exec_()) 382 | 383 | --------------------------------------------------------------------------------