├── .env
├── .gitignore
├── Dockerfile
├── README.md
├── apis
├── __init__.py
└── pc_apis.py
├── author
├── qq.jpg
├── wx.png
├── wx_pay.png
└── zfb_pay.jpg
├── main.py
├── package-lock.json
├── package.json
├── requirements.txt
├── static
├── xhs_xray.js
├── xhs_xray_pack1.js
├── xhs_xray_pack2.js
└── xhs_xs_xsc_56.js
└── xhs_utils
├── __init__.py
├── common_utils.py
├── cookie_util.py
├── data_util.py
└── xhs_util.py
/.env:
--------------------------------------------------------------------------------
1 | COOKIES=''
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | node_modules/
3 | *.so
4 | .Python
5 | build/
6 | develop-eggs/
7 | dist/
8 | downloads/
9 | eggs/
10 | .eggs/
11 | lib/
12 | lib64/
13 | parts/
14 | sdist/
15 | var/
16 | wheels/
17 | MANIFEST
18 | *.manifest
19 | *.spec
20 | .cache
21 | *.log
22 | local_settings.py
23 | db.sqlite3
24 | __pypackages__/
25 | .venv
26 | env/
27 | venv/
28 | ENV/
29 | env.bak/
30 | venv.bak/
31 |
32 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM python:3.9
2 |
3 | WORKDIR /app
4 |
5 | COPY . .
6 |
7 | RUN npm install
8 | RUN pip install --no-cache-dir -r requirements.txt
9 |
10 | # docker build -t spider_xhs .
11 | # docker run -it spider_xhs bash
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
18 |
19 |
20 |
21 | # Spider_XHS
22 |
23 | **✨ 专业的小红书数据采集解决方案,支持笔记爬取,保存格式为excel或者media**
24 |
25 | **✨ 小红书全域运营解决方法,AI一键改写笔记(图文,视频)直接上传**
26 |
27 | ## ⭐功能列表
28 |
29 | **⚠️ 任何涉及数据注入的操作都是不被允许的,本项目仅供学习交流使用,如有违反,后果自负**
30 |
31 | | 模块 | 已实现 |
32 | |----------|---------------------------------------------------------------------------------|
33 | | 小红书创作者平台 | ✅ 二维码登录
✅ 手机验证码登录
✅ 上传(图集、视频)作品
✅查看自己上传的作品 |
34 | | 小红书PC | ✅ 二维码登录
✅ 手机验证码登录
✅ 获取无水印图片
✅ 获取无水印视频
✅ 获取主页的所有频道
✅ 获取主页推荐笔记
✅ 获取某个用户的信息
✅ 用户自己的信息
✅ 获取某个用户上传的笔记
✅ 获取某个用户所有的喜欢笔记
✅ 获取某个用户所有的收藏笔记
✅ 获取某个笔记的详细内容
✅ 搜索笔记内容
✅ 搜索用户内容
✅ 获取某个笔记的评论
✅ 获取未读消息信息
✅ 获取收到的评论和@提醒信息
✅ 获取收到的点赞和收藏信息
✅ 获取新增关注信息|
35 |
36 |
37 | ## 🌟 功能特性
38 |
39 | - ✅ **多维度数据采集**
40 | - 用户主页信息
41 | - 笔记详细内容
42 | - 智能搜索结果抓取
43 | - 🚀 **高性能架构**
44 | - 自动重试机制
45 | - 🔒 **安全稳定**
46 | - 小红书最新API适配
47 | - 异常处理机制
48 | - proxy代理
49 | - 🎨 **便捷管理**
50 | - 结构化目录存储
51 | - 格式化输出(JSON/EXCEL/MEDIA)
52 |
53 | ## 🎨效果图
54 | ### 处理后的所有用户
55 | 
56 | ### 某个用户所有的笔记
57 | 
58 | ### 某个笔记具体的内容
59 | 
60 | ### 保存的excel
61 | 
62 |
63 | ## 🛠️ 快速开始
64 | ### ⛳运行环境
65 | - Python 3.7+
66 | - Node.js 18+
67 |
68 | ### 🎯安装依赖
69 | ```
70 | pip install -r requirements.txt
71 | npm install
72 | ```
73 |
74 | ### 🎨配置文件
75 | 配置文件在项目根目录.env文件中,将下图自己的登录cookie放入其中,cookie获取➡️在浏览器f12打开控制台,点击网络,点击fetch,找一个接口点开
76 | 
77 |
78 | 复制cookie到.env文件中(注意!登录小红书后的cookie才是有效的,不登陆没有用)
79 | 
80 |
81 | ### 🚀运行项目
82 | ```
83 | python main.py
84 | ```
85 |
86 | ### 🗝️注意事项
87 | - main.py中的代码是爬虫的入口,可以根据自己的需求进行修改
88 | - apis/pc_apis.py中的代码包含了所有的api接口,可以根据自己的需求进行修改
89 |
90 |
91 | ## 🍥日志
92 |
93 | | 日期 | 说明 |
94 | |----------| --------------------------- |
95 | | 23/08/09 | - 首次提交 |
96 | | 23/09/13 | - api更改params增加两个字段,修复图片无法下载,有些页面无法访问导致报错 |
97 | | 23/09/16 | - 较大视频出现编码问题,修复视频编码问题,加入异常处理 |
98 | | 23/09/18 | - 代码重构,加入失败重试 |
99 | | 23/09/19 | - 新增下载搜索结果功能 |
100 | | 23/10/05 | - 新增跳过已下载功能,获取更详细的笔记和用户信息|
101 | | 23/10/08 | - 上传代码☞Pypi,可通过pip install安装本项目|
102 | | 23/10/17 | - 搜索下载新增排序方式选项(1、综合排序 2、热门排序 3、最新排序)|
103 | | 23/10/21 | - 新增图形化界面,上传至release v2.1.0|
104 | | 23/10/28 | - Fix Bug 修复搜索功能出现的隐藏问题|
105 | | 25/03/18 | - 更新API,修复部分问题|
106 |
107 |
108 |
109 | ## 🧸额外说明
110 | 1. 感谢star⭐和follow📰!不时更新
111 | 2. 作者的联系方式在主页里,有问题可以随时联系我
112 | 3. 可以关注下作者的其他项目,欢迎 PR 和 issue
113 | 4. 感谢赞助!如果此项目对您有帮助,请作者喝一杯奶茶~~ (开心一整天😊😊)
114 | 5. thank you~~~
115 |
116 |
117 |

118 |

119 |
120 |
121 |
122 | ## 📈 Star 趋势
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
--------------------------------------------------------------------------------
/apis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/apis/__init__.py
--------------------------------------------------------------------------------
/apis/pc_apis.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | import json
3 | import re
4 | import urllib
5 | import requests
6 | from xhs_utils.xhs_util import splice_str, generate_request_params, generate_x_b3_traceid, get_common_headers
7 | from loguru import logger
8 |
9 | """
10 | 获小红书的api
11 | :param cookies_str: 你的cookies
12 | """
13 | class XHS_Apis():
14 | def __init__(self):
15 | self.base_url = "https://edith.xiaohongshu.com"
16 |
17 | def get_homefeed_all_channel(self, cookies_str: str, proxies: dict = None):
18 | """
19 | 获取主页的所有频道
20 | 返回主页的所有频道
21 | """
22 | res_json = None
23 | try:
24 | api = "/api/sns/web/v1/homefeed/category"
25 | headers, cookies, data = generate_request_params(cookies_str, api)
26 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies)
27 | res_json = response.json()
28 | success, msg = res_json["success"], res_json["msg"]
29 | except Exception as e:
30 | success = False
31 | msg = str(e)
32 | return success, msg, res_json
33 |
34 | def get_homefeed_recommend(self, category, cursor_score, refresh_type, note_index, cookies_str: str, proxies: dict = None):
35 | """
36 | 获取主页推荐的笔记
37 | :param category: 你想要获取的频道
38 | :param cursor_score: 你想要获取的笔记的cursor
39 | :param refresh_type: 你想要获取的笔记的刷新类型
40 | :param note_index: 你想要获取的笔记的index
41 | :param cookies_str: 你的cookies
42 | 返回主页推荐的笔记
43 | """
44 | res_json = None
45 | try:
46 | api = f"/api/sns/web/v1/homefeed"
47 | data = {
48 | "cursor_score": cursor_score,
49 | "num": 20,
50 | "refresh_type": refresh_type,
51 | "note_index": note_index,
52 | "unread_begin_note_id": "",
53 | "unread_end_note_id": "",
54 | "unread_note_count": 0,
55 | "category": category,
56 | "search_key": "",
57 | "need_num": 10,
58 | "image_formats": [
59 | "jpg",
60 | "webp",
61 | "avif"
62 | ],
63 | "need_filter_image": False
64 | }
65 | headers, cookies, trans_data = generate_request_params(cookies_str, api, data)
66 | response = requests.post(self.base_url + api, headers=headers, data=trans_data, cookies=cookies, proxies=proxies)
67 | res_json = response.json()
68 | success, msg = res_json["success"], res_json["msg"]
69 | except Exception as e:
70 | success = False
71 | msg = str(e)
72 | return success, msg, res_json
73 |
74 | def get_homefeed_recommend_by_num(self, category, require_num, cookies_str: str, proxies: dict = None):
75 | """
76 | 根据数量获取主页推荐的笔记
77 | :param category: 你想要获取的频道
78 | :param require_num: 你想要获取的笔记的数量
79 | :param cookies_str: 你的cookies
80 | 根据数量返回主页推荐的笔记
81 | """
82 | cursor_score, refresh_type, note_index = "", 1, 0
83 | note_list = []
84 | try:
85 | while True:
86 | success, msg, res_json = self.get_homefeed_recommend(category, cursor_score, refresh_type, note_index, cookies_str, proxies)
87 | if not success:
88 | raise Exception(msg)
89 | if "items" not in res_json["data"]:
90 | break
91 | notes = res_json["data"]["items"]
92 | note_list.extend(notes)
93 | cursor_score = res_json["data"]["cursor_score"]
94 | refresh_type = 3
95 | note_index += 20
96 | if len(note_list) > require_num:
97 | break
98 | except Exception as e:
99 | success = False
100 | msg = str(e)
101 | if len(note_list) > require_num:
102 | note_list = note_list[:require_num]
103 | return success, msg, note_list
104 |
105 | def get_user_info(self, user_id: str, cookies_str: str, proxies: dict = None):
106 | """
107 | 获取用户的信息
108 | :param user_id: 你想要获取的用户的id
109 | :param cookies_str: 你的cookies
110 | 返回用户的信息
111 | """
112 | res_json = None
113 | try:
114 | api = f"/api/sns/web/v1/user/otherinfo"
115 | params = {
116 | "target_user_id": user_id
117 | }
118 | splice_api = splice_str(api, params)
119 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
120 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
121 | res_json = response.json()
122 | success, msg = res_json["success"], res_json["msg"]
123 | except Exception as e:
124 | success = False
125 | msg = str(e)
126 | return success, msg, res_json
127 |
128 | def get_user_self_info(self, cookies_str: str, proxies: dict = None):
129 | """
130 | 获取用户自己的信息1
131 | :param cookies_str: 你的cookies
132 | 返回用户自己的信息1
133 | """
134 | res_json = None
135 | try:
136 | api = f"/api/sns/web/v1/user/selfinfo"
137 | headers, cookies, data = generate_request_params(cookies_str, api)
138 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies)
139 | res_json = response.json()
140 | success, msg = res_json["success"], res_json["msg"]
141 | except Exception as e:
142 | success = False
143 | msg = str(e)
144 | return success, msg, res_json
145 |
146 |
147 | def get_user_self_info2(self, cookies_str: str, proxies: dict = None):
148 | """
149 | 获取用户自己的信息2
150 | :param cookies_str: 你的cookies
151 | 返回用户自己的信息2
152 | """
153 | res_json = None
154 | try:
155 | api = f"/api/sns/web/v2/user/me"
156 | headers, cookies, data = generate_request_params(cookies_str, api)
157 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies)
158 | res_json = response.json()
159 | success, msg = res_json["success"], res_json["msg"]
160 | except Exception as e:
161 | success = False
162 | msg = str(e)
163 | return success, msg, res_json
164 |
165 | def get_user_note_info(self, user_id: str, cursor: str, cookies_str: str, xsec_token='', xsec_source='', proxies: dict = None):
166 | """
167 | 获取用户指定位置的笔记
168 | :param user_id: 你想要获取的用户的id
169 | :param cursor: 你想要获取的笔记的cursor
170 | :param cookies_str: 你的cookies
171 | 返回用户指定位置的笔记
172 | """
173 | res_json = None
174 | try:
175 | api = f"/api/sns/web/v1/user_posted"
176 | params = {
177 | "num": "30",
178 | "cursor": cursor,
179 | "user_id": user_id,
180 | "image_formats": "jpg,webp,avif",
181 | "xsec_token": xsec_token,
182 | "xsec_source": xsec_source,
183 | }
184 | splice_api = splice_str(api, params)
185 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
186 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
187 | res_json = response.json()
188 | success, msg = res_json["success"], res_json["msg"]
189 | except Exception as e:
190 | success = False
191 | msg = str(e)
192 | return success, msg, res_json
193 |
194 |
195 | def get_user_all_notes(self, user_url: str, cookies_str: str, proxies: dict = None):
196 | """
197 | 获取用户所有笔记
198 | :param user_id: 你想要获取的用户的id
199 | :param cookies_str: 你的cookies
200 | 返回用户的所有笔记
201 | """
202 | cursor = ''
203 | note_list = []
204 | try:
205 | urlParse = urllib.parse.urlparse(user_url)
206 | user_id = urlParse.path.split("/")[-1]
207 | kvs = urlParse.query.split('&')
208 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs}
209 | xsec_token = kvDist['xsec_token'] if 'xsec_token' in kvDist else ""
210 | xsec_source = kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_search"
211 | while True:
212 | success, msg, res_json = self.get_user_note_info(user_id, cursor, cookies_str, xsec_token, xsec_source, proxies)
213 | if not success:
214 | raise Exception(msg)
215 | notes = res_json["data"]["notes"]
216 | if 'cursor' in res_json["data"]:
217 | cursor = str(res_json["data"]["cursor"])
218 | else:
219 | break
220 | note_list.extend(notes)
221 | if len(notes) == 0 or not res_json["data"]["has_more"]:
222 | break
223 | except Exception as e:
224 | success = False
225 | msg = str(e)
226 | return success, msg, note_list
227 |
228 | def get_user_like_note_info(self, user_id: str, cursor: str, cookies_str: str, xsec_token='', xsec_source='', proxies: dict = None):
229 | """
230 | 获取用户指定位置喜欢的笔记
231 | :param user_id: 你想要获取的用户的id
232 | :param cursor: 你想要获取的笔记的cursor
233 | :param cookies_str: 你的cookies
234 | 返回用户指定位置喜欢的笔记
235 | """
236 | res_json = None
237 | try:
238 | api = f"/api/sns/web/v1/note/like/page"
239 | params = {
240 | "num": "30",
241 | "cursor": cursor,
242 | "user_id": user_id,
243 | "image_formats": "jpg,webp,avif",
244 | "xsec_token": xsec_token,
245 | "xsec_source": xsec_source,
246 | }
247 | splice_api = splice_str(api, params)
248 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
249 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
250 | res_json = response.json()
251 | success, msg = res_json["success"], res_json["msg"]
252 | except Exception as e:
253 | success = False
254 | msg = str(e)
255 | return success, msg, res_json
256 |
257 | def get_user_all_like_note_info(self, user_url: str, cookies_str: str, proxies: dict = None):
258 | """
259 | 获取用户所有喜欢笔记
260 | :param user_id: 你想要获取的用户的id
261 | :param cookies_str: 你的cookies
262 | 返回用户的所有喜欢笔记
263 | """
264 | cursor = ''
265 | note_list = []
266 | try:
267 | urlParse = urllib.parse.urlparse(user_url)
268 | user_id = urlParse.path.split("/")[-1]
269 | kvs = urlParse.query.split('&')
270 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs}
271 | xsec_token = kvDist['xsec_token'] if 'xsec_token' in kvDist else ""
272 | xsec_source = kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_user"
273 | while True:
274 | success, msg, res_json = self.get_user_like_note_info(user_id, cursor, cookies_str, xsec_token,
275 | xsec_source, proxies)
276 | if not success:
277 | raise Exception(msg)
278 | notes = res_json["data"]["notes"]
279 | if 'cursor' in res_json["data"]:
280 | cursor = str(res_json["data"]["cursor"])
281 | else:
282 | break
283 | note_list.extend(notes)
284 | if len(notes) == 0 or not res_json["data"]["has_more"]:
285 | break
286 | except Exception as e:
287 | success = False
288 | msg = str(e)
289 | return success, msg, note_list
290 |
291 | def get_user_collect_note_info(self, user_id: str, cursor: str, cookies_str: str, xsec_token='', xsec_source='', proxies: dict = None):
292 | """
293 | 获取用户指定位置收藏的笔记
294 | :param user_id: 你想要获取的用户的id
295 | :param cursor: 你想要获取的笔记的cursor
296 | :param cookies_str: 你的cookies
297 | 返回用户指定位置收藏的笔记
298 | """
299 | res_json = None
300 | try:
301 | api = f"/api/sns/web/v2/note/collect/page"
302 | params = {
303 | "num": "30",
304 | "cursor": cursor,
305 | "user_id": user_id,
306 | "image_formats": "jpg,webp,avif",
307 | "xsec_token": xsec_token,
308 | "xsec_source": xsec_source,
309 | }
310 | splice_api = splice_str(api, params)
311 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
312 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
313 | res_json = response.json()
314 | success, msg = res_json["success"], res_json["msg"]
315 | except Exception as e:
316 | success = False
317 | msg = str(e)
318 | return success, msg, res_json
319 |
320 | def get_user_all_collect_note_info(self, user_url: str, cookies_str: str, proxies: dict = None):
321 | """
322 | 获取用户所有收藏笔记
323 | :param user_id: 你想要获取的用户的id
324 | :param cookies_str: 你的cookies
325 | 返回用户的所有收藏笔记
326 | """
327 | cursor = ''
328 | note_list = []
329 | try:
330 | urlParse = urllib.parse.urlparse(user_url)
331 | user_id = urlParse.path.split("/")[-1]
332 | kvs = urlParse.query.split('&')
333 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs}
334 | xsec_token = kvDist['xsec_token'] if 'xsec_token' in kvDist else ""
335 | xsec_source = kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_search"
336 | while True:
337 | success, msg, res_json = self.get_user_collect_note_info(user_id, cursor, cookies_str, xsec_token,
338 | xsec_source, proxies)
339 | if not success:
340 | raise Exception(msg)
341 | notes = res_json["data"]["notes"]
342 | if 'cursor' in res_json["data"]:
343 | cursor = str(res_json["data"]["cursor"])
344 | else:
345 | break
346 | note_list.extend(notes)
347 | if len(notes) == 0 or not res_json["data"]["has_more"]:
348 | break
349 | except Exception as e:
350 | success = False
351 | msg = str(e)
352 | return success, msg, note_list
353 |
354 | def get_note_info(self, url: str, cookies_str: str, proxies: dict = None):
355 | """
356 | 获取笔记的详细
357 | :param url: 你想要获取的笔记的url
358 | :param cookies_str: 你的cookies
359 | :param xsec_source: 你的xsec_source 默认为pc_search pc_user pc_feed
360 | 返回笔记的详细
361 | """
362 | res_json = None
363 | try:
364 | urlParse = urllib.parse.urlparse(url)
365 | note_id = urlParse.path.split("/")[-1]
366 | kvs = urlParse.query.split('&')
367 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs}
368 | api = f"/api/sns/web/v1/feed"
369 | data = {
370 | "source_note_id": note_id,
371 | "image_formats": [
372 | "jpg",
373 | "webp",
374 | "avif"
375 | ],
376 | "extra": {
377 | "need_body_topic": "1"
378 | },
379 | "xsec_source": kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_search",
380 | "xsec_token": kvDist['xsec_token']
381 | }
382 | headers, cookies, data = generate_request_params(cookies_str, api, data)
383 | response = requests.post(self.base_url + api, headers=headers, data=data, cookies=cookies, proxies=proxies)
384 | res_json = response.json()
385 | success, msg = res_json["success"], res_json["msg"]
386 | except Exception as e:
387 | success = False
388 | msg = str(e)
389 | return success, msg, res_json
390 |
391 |
392 | def get_search_keyword(self, word: str, cookies_str: str, proxies: dict = None):
393 | """
394 | 获取搜索关键词
395 | :param word: 你的关键词
396 | :param cookies_str: 你的cookies
397 | 返回搜索关键词
398 | """
399 | res_json = None
400 | try:
401 | api = "/api/sns/web/v1/search/recommend"
402 | params = {
403 | "keyword": urllib.parse.quote(word)
404 | }
405 | splice_api = splice_str(api, params)
406 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
407 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
408 | res_json = response.json()
409 | success, msg = res_json["success"], res_json["msg"]
410 | except Exception as e:
411 | success = False
412 | msg = str(e)
413 | return success, msg, res_json
414 |
415 | def search_note(self, query: str, cookies_str: str, page=1, sort="general", note_type=0, proxies: dict = None):
416 | """
417 | 获取搜索笔记的结果
418 | :param query 搜索的关键词
419 | :param cookies_str 你的cookies
420 | :param page 搜索的页数
421 | :param sort 排序方式 general:综合排序, time_descending:时间排序, popularity_descending:热度排序
422 | :param note_type 笔记类型 0:全部, 1:视频, 2:图文
423 | 返回搜索的结果
424 | """
425 | res_json = None
426 | try:
427 | api = "/api/sns/web/v1/search/notes"
428 | data = {
429 | "keyword": query,
430 | "page": page,
431 | "page_size": 20,
432 | "search_id": generate_x_b3_traceid(21),
433 | "sort": sort,
434 | "note_type": note_type,
435 | "ext_flags": [],
436 | "image_formats": [
437 | "jpg",
438 | "webp",
439 | "avif"
440 | ]
441 | }
442 | headers, cookies, data = generate_request_params(cookies_str, api, data)
443 | response = requests.post(self.base_url + api, headers=headers, data=data.encode('utf-8'), cookies=cookies, proxies=proxies)
444 | res_json = response.json()
445 | success, msg = res_json["success"], res_json["msg"]
446 | except Exception as e:
447 | success = False
448 | msg = str(e)
449 | return success, msg, res_json
450 |
451 | def search_some_note(self, query: str, require_num: int, cookies_str: str, sort="general", note_type=0, proxies: dict = None):
452 | """
453 | 指定数量搜索笔记,设置排序方式和笔记类型和笔记数量
454 | :param query 搜索的关键词
455 | :param require_num 搜索的数量
456 | :param cookies_str 你的cookies
457 | :param sort 排序方式 general:综合排序, time_descending:时间排序, popularity_descending:热度排序
458 | :param note_type 笔记类型 0:全部, 1:视频, 2:图文
459 | 返回搜索的结果
460 | """
461 | page = 1
462 | note_list = []
463 | try:
464 | while True:
465 | success, msg, res_json = self.search_note(query, cookies_str, page, sort, note_type, proxies)
466 | if not success:
467 | raise Exception(msg)
468 | if "items" not in res_json["data"]:
469 | break
470 | notes = res_json["data"]["items"]
471 | note_list.extend(notes)
472 | page += 1
473 | if len(note_list) >= require_num or not res_json["data"]["has_more"]:
474 | break
475 | except Exception as e:
476 | success = False
477 | msg = str(e)
478 | if len(note_list) > require_num:
479 | note_list = note_list[:require_num]
480 | return success, msg, note_list
481 |
482 | def search_user(self, query: str, cookies_str: str, page=1, proxies: dict = None):
483 | """
484 | 获取搜索用户的结果
485 | :param query 搜索的关键词
486 | :param cookies_str 你的cookies
487 | :param page 搜索的页数
488 | 返回搜索的结果
489 | """
490 | res_json = None
491 | try:
492 | api = "/api/sns/web/v1/search/usersearch"
493 | data = {
494 | "search_user_request": {
495 | "keyword": query,
496 | "search_id": "2dn9they1jbjxwawlo4xd",
497 | "page": page,
498 | "page_size": 15,
499 | "biz_type": "web_search_user",
500 | "request_id": "22471139-1723999898524"
501 | }
502 | }
503 | headers, cookies, data = generate_request_params(cookies_str, api, data)
504 | response = requests.post(self.base_url + api, headers=headers, data=data.encode('utf-8'), cookies=cookies, proxies=proxies)
505 | res_json = response.json()
506 | success, msg = res_json["success"], res_json["msg"]
507 | except Exception as e:
508 | success = False
509 | msg = str(e)
510 | return success, msg, res_json
511 |
512 | def search_some_user(self, query: str, require_num: int, cookies_str: str, proxies: dict = None):
513 | """
514 | 指定数量搜索用户
515 | :param query 搜索的关键词
516 | :param require_num 搜索的数量
517 | :param cookies_str 你的cookies
518 | 返回搜索的结果
519 | """
520 | page = 1
521 | user_list = []
522 | try:
523 | while True:
524 | success, msg, res_json = self.search_user(query, cookies_str, page, proxies)
525 | if not success:
526 | raise Exception(msg)
527 | if "users" not in res_json["data"]:
528 | break
529 | users = res_json["data"]["users"]
530 | user_list.extend(users)
531 | page += 1
532 | if len(user_list) >= require_num or not res_json["data"]["has_more"]:
533 | break
534 | except Exception as e:
535 | success = False
536 | msg = str(e)
537 | if len(user_list) > require_num:
538 | user_list = user_list[:require_num]
539 | return success, msg, user_list
540 |
541 | def get_note_out_comment(self, note_id: str, cursor: str, xsec_token: str, cookies_str: str, proxies: dict = None):
542 | """
543 | 获取指定位置的笔记一级评论
544 | :param note_id 笔记的id
545 | :param cursor 指定位置的评论的cursor
546 | :param cookies_str 你的cookies
547 | 返回指定位置的笔记一级评论
548 | """
549 | res_json = None
550 | try:
551 | api = "/api/sns/web/v2/comment/page"
552 | params = {
553 | "note_id": note_id,
554 | "cursor": cursor,
555 | "top_comment_id": "",
556 | "image_formats": "jpg,webp,avif",
557 | "xsec_token": xsec_token
558 | }
559 | splice_api = splice_str(api, params)
560 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
561 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
562 | res_json = response.json()
563 | success, msg = res_json["success"], res_json["msg"]
564 | except Exception as e:
565 | success = False
566 | msg = str(e)
567 | return success, msg, res_json
568 |
569 | def get_note_all_out_comment(self, note_id: str, xsec_token: str, cookies_str: str, proxies: dict = None):
570 | """
571 | 获取笔记的全部一级评论
572 | :param note_id 笔记的id
573 | :param cookies_str 你的cookies
574 | 返回笔记的全部一级评论
575 | """
576 | cursor = ''
577 | note_out_comment_list = []
578 | try:
579 | while True:
580 | success, msg, res_json = self.get_note_out_comment(note_id, cursor, xsec_token, cookies_str, proxies)
581 | if not success:
582 | raise Exception(msg)
583 | comments = res_json["data"]["comments"]
584 | if 'cursor' in res_json["data"]:
585 | cursor = str(res_json["data"]["cursor"])
586 | else:
587 | break
588 | note_out_comment_list.extend(comments)
589 | if len(note_out_comment_list) == 0 or not res_json["data"]["has_more"]:
590 | break
591 | except Exception as e:
592 | success = False
593 | msg = str(e)
594 | return success, msg, note_out_comment_list
595 |
596 | def get_note_inner_comment(self, comment: dict, cursor: str, xsec_token: str, cookies_str: str, proxies: dict = None):
597 | """
598 | 获取指定位置的笔记二级评论
599 | :param comment 笔记的一级评论
600 | :param cursor 指定位置的评论的cursor
601 | :param cookies_str 你的cookies
602 | 返回指定位置的笔记二级评论
603 | """
604 | res_json = None
605 | try:
606 | api = "/api/sns/web/v2/comment/sub/page"
607 | params = {
608 | "note_id": comment['note_id'],
609 | "root_comment_id": comment['id'],
610 | "num": "10",
611 | "cursor": cursor,
612 | "image_formats": "jpg,webp,avif",
613 | "top_comment_id": '',
614 | "xsec_token": xsec_token
615 | }
616 | splice_api = splice_str(api, params)
617 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
618 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
619 | res_json = response.json()
620 | success, msg = res_json["success"], res_json["msg"]
621 | except Exception as e:
622 | success = False
623 | msg = str(e)
624 | return success, msg, res_json
625 |
626 | def get_note_all_inner_comment(self, comment: dict, xsec_token: str, cookies_str: str, proxies: dict = None):
627 | """
628 | 获取笔记的全部二级评论
629 | :param comment 笔记的一级评论
630 | :param cookies_str 你的cookies
631 | 返回笔记的全部二级评论
632 | """
633 | try:
634 | if not comment['sub_comment_has_more']:
635 | return True, 'success', comment
636 | cursor = comment['sub_comment_cursor']
637 | inner_comment_list = []
638 | while True:
639 | success, msg, res_json = self.get_note_inner_comment(comment, cursor, xsec_token, cookies_str, proxies)
640 | if not success:
641 | raise Exception(msg)
642 | comments = res_json["data"]["comments"]
643 | if 'cursor' in res_json["data"]:
644 | cursor = str(res_json["data"]["cursor"])
645 | else:
646 | break
647 | inner_comment_list.extend(comments)
648 | if not res_json["data"]["has_more"]:
649 | break
650 | comment['sub_comments'].extend(inner_comment_list)
651 | except Exception as e:
652 | success = False
653 | msg = str(e)
654 | return success, msg, comment
655 |
656 | def get_note_all_comment(self, url: str, cookies_str: str, proxies: dict = None):
657 | """
658 | 获取一篇文章的所有评论
659 | :param note_id: 你想要获取的笔记的id
660 | :param cookies_str: 你的cookies
661 | 返回一篇文章的所有评论
662 | """
663 | out_comment_list = []
664 | try:
665 | urlParse = urllib.parse.urlparse(url)
666 | note_id = urlParse.path.split("/")[-1]
667 | kvs = urlParse.query.split('&')
668 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs}
669 | success, msg, out_comment_list = self.get_note_all_out_comment(note_id, kvDist['xsec_token'], cookies_str, proxies)
670 | if not success:
671 | raise Exception(msg)
672 | for comment in out_comment_list:
673 | success, msg, new_comment = self.get_note_all_inner_comment(comment, kvDist['xsec_token'], cookies_str, proxies)
674 | if not success:
675 | raise Exception(msg)
676 | except Exception as e:
677 | success = False
678 | msg = str(e)
679 | return success, msg, out_comment_list
680 |
681 | def get_unread_message(self, cookies_str: str, proxies: dict = None):
682 | """
683 | 获取未读消息
684 | :param cookies_str: 你的cookies
685 | 返回未读消息
686 | """
687 | res_json = None
688 | try:
689 | api = "/api/sns/web/unread_count"
690 | headers, cookies, data = generate_request_params(cookies_str, api)
691 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies)
692 | res_json = response.json()
693 | success, msg = res_json["success"], res_json["msg"]
694 | except Exception as e:
695 | success = False
696 | msg = str(e)
697 | return success, msg, res_json
698 |
699 | def get_metions(self, cursor: str, cookies_str: str, proxies: dict = None):
700 | """
701 | 获取评论和@提醒
702 | :param cursor: 你想要获取的评论和@提醒的cursor
703 | :param cookies_str: 你的cookies
704 | 返回评论和@提醒
705 | """
706 | res_json = None
707 | try:
708 | api = "/api/sns/web/v1/you/mentions"
709 | params = {
710 | "num": "20",
711 | "cursor": cursor
712 | }
713 | splice_api = splice_str(api, params)
714 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
715 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
716 | res_json = response.json()
717 | success, msg = res_json["success"], res_json["msg"]
718 | except Exception as e:
719 | success = False
720 | msg = str(e)
721 | return success, msg, res_json
722 |
723 | def get_all_metions(self, cookies_str: str, proxies: dict = None):
724 | """
725 | 获取全部的评论和@提醒
726 | :param cookies_str: 你的cookies
727 | 返回全部的评论和@提醒
728 | """
729 | cursor = ''
730 | metions_list = []
731 | try:
732 | while True:
733 | success, msg, res_json = self.get_metions(cursor, cookies_str, proxies)
734 | if not success:
735 | raise Exception(msg)
736 | metions = res_json["data"]["message_list"]
737 | if 'cursor' in res_json["data"]:
738 | cursor = str(res_json["data"]["cursor"])
739 | else:
740 | break
741 | metions_list.extend(metions)
742 | if not res_json["data"]["has_more"]:
743 | break
744 | except Exception as e:
745 | success = False
746 | msg = str(e)
747 | return success, msg, metions_list
748 |
749 | def get_likesAndcollects(self, cursor: str, cookies_str: str, proxies: dict = None):
750 | """
751 | 获取赞和收藏
752 | :param cursor: 你想要获取的赞和收藏的cursor
753 | :param cookies_str: 你的cookies
754 | 返回赞和收藏
755 | """
756 | res_json = None
757 | try:
758 | api = "/api/sns/web/v1/you/likes"
759 | params = {
760 | "num": "20",
761 | "cursor": cursor
762 | }
763 | splice_api = splice_str(api, params)
764 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
765 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
766 | res_json = response.json()
767 | success, msg = res_json["success"], res_json["msg"]
768 | except Exception as e:
769 | success = False
770 | msg = str(e)
771 | return success, msg, res_json
772 |
773 | def get_all_likesAndcollects(self, cookies_str: str, proxies: dict = None):
774 | """
775 | 获取全部的赞和收藏
776 | :param cookies_str: 你的cookies
777 | 返回全部的赞和收藏
778 | """
779 | cursor = ''
780 | likesAndcollects_list = []
781 | try:
782 | while True:
783 | success, msg, res_json = self.get_likesAndcollects(cursor, cookies_str, proxies)
784 | if not success:
785 | raise Exception(msg)
786 | likesAndcollects = res_json["data"]["message_list"]
787 | if 'cursor' in res_json["data"]:
788 | cursor = str(res_json["data"]["cursor"])
789 | else:
790 | break
791 | likesAndcollects_list.extend(likesAndcollects)
792 | if not res_json["data"]["has_more"]:
793 | break
794 | except Exception as e:
795 | success = False
796 | msg = str(e)
797 | return success, msg, likesAndcollects_list
798 |
799 | def get_new_connections(self, cursor: str, cookies_str: str, proxies: dict = None):
800 | """
801 | 获取新增关注
802 | :param cursor: 你想要获取的新增关注的cursor
803 | :param cookies_str: 你的cookies
804 | 返回新增关注
805 | """
806 | res_json = None
807 | try:
808 | api = "/api/sns/web/v1/you/connections"
809 | params = {
810 | "num": "20",
811 | "cursor": cursor
812 | }
813 | splice_api = splice_str(api, params)
814 | headers, cookies, data = generate_request_params(cookies_str, splice_api)
815 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies)
816 | res_json = response.json()
817 | success, msg = res_json["success"], res_json["msg"]
818 | except Exception as e:
819 | success = False
820 | msg = str(e)
821 | return success, msg, res_json
822 |
823 | def get_all_new_connections(self, cookies_str: str, proxies: dict = None):
824 | """
825 | 获取全部的新增关注
826 | :param cookies_str: 你的cookies
827 | 返回全部的新增关注
828 | """
829 | cursor = ''
830 | connections_list = []
831 | try:
832 | while True:
833 | success, msg, res_json = self.get_new_connections(cursor, cookies_str, proxies)
834 | if not success:
835 | raise Exception(msg)
836 | connections = res_json["data"]["message_list"]
837 | if 'cursor' in res_json["data"]:
838 | cursor = str(res_json["data"]["cursor"])
839 | else:
840 | break
841 | connections_list.extend(connections)
842 | if not res_json["data"]["has_more"]:
843 | break
844 | except Exception as e:
845 | success = False
846 | msg = str(e)
847 | return success, msg, connections_list
848 |
849 | @staticmethod
850 | def get_note_no_water_video(note_id):
851 | """
852 | 获取笔记无水印视频
853 | :param note_id: 你想要获取的笔记的id
854 | 返回笔记无水印视频
855 | """
856 | success = True
857 | msg = '成功'
858 | video_addr = None
859 | try:
860 | headers = get_common_headers()
861 | url = f"https://www.xiaohongshu.com/explore/{note_id}"
862 | response = requests.get(url, headers=headers)
863 | res = response.text
864 | video_addr = re.findall(r'', res)[0]
865 | except Exception as e:
866 | success = False
867 | msg = str(e)
868 | return success, msg, video_addr
869 |
870 |
871 | @staticmethod
872 | def get_note_no_water_img(img_url):
873 | """
874 | 获取笔记无水印图片
875 | :param img_url: 你想要获取的图片的url
876 | 返回笔记无水印图片
877 | """
878 | success = True
879 | msg = '成功'
880 | new_url = None
881 | try:
882 | # https://sns-webpic-qc.xhscdn.com/202403211626/c4fcecea4bd012a1fe8d2f1968d6aa91/110/0/01e50c1c135e8c010010000000018ab74db332_0.jpg!nd_dft_wlteh_webp_3
883 | if '.jpg' in img_url:
884 | img_id = '/'.join([split for split in img_url.split('/')[-3:]]).split('!')[0]
885 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/1920/format/png"
886 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/format/png"
887 | # return f'https://sns-img-hw.xhscdn.com/{img_id}'
888 | new_url = f'https://sns-img-qc.xhscdn.com/{img_id}'
889 |
890 | # 'https://sns-webpic-qc.xhscdn.com/202403231640/ea961053c4e0e467df1cc93afdabd630/spectrum/1000g0k0200n7mj8fq0005n7ikbllol6q50oniuo!nd_dft_wgth_webp_3'
891 | elif 'spectrum' in img_url:
892 | img_id = '/'.join(img_url.split('/')[-2:]).split('!')[0]
893 | # return f'http://sns-webpic.xhscdn.com/{img_id}?imageView2/2/w/1920/format/jpg'
894 | new_url = f'http://sns-webpic.xhscdn.com/{img_id}?imageView2/2/w/format/jpg'
895 | else:
896 | # 'http://sns-webpic-qc.xhscdn.com/202403181511/64ad2ea67ce04159170c686a941354f5/1040g008310cs1hii6g6g5ngacg208q5rlf1gld8!nd_dft_wlteh_webp_3'
897 | img_id = img_url.split('/')[-1].split('!')[0]
898 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/1920/format/png"
899 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/format/png"
900 | # return f'https://sns-img-hw.xhscdn.com/{img_id}'
901 | new_url = f'https://sns-img-qc.xhscdn.com/{img_id}'
902 | except Exception as e:
903 | success = False
904 | msg = str(e)
905 | return success, msg, new_url
906 |
907 | if __name__ == '__main__':
908 | """
909 | 此文件为小红书api的使用示例
910 | 所有涉及数据爬取的api都在此文件中
911 | 数据注入的api违规请勿尝试
912 | """
913 | xhs_apis = XHS_Apis()
914 | cookies_str = r''
915 | # 获取用户信息
916 | user_url = 'https://www.xiaohongshu.com/user/profile/67a332a2000000000d008358?xsec_token=ABTf9yz4cLHhTycIlksF0jOi1yIZgfcaQ6IXNNGdKJ8xg=&xsec_source=pc_feed'
917 | success, msg, user_info = xhs_apis.get_user_info('67a332a2000000000d008358', cookies_str)
918 | logger.info(f'获取用户信息结果 {json.dumps(user_info, ensure_ascii=False)}: {success}, msg: {msg}')
919 | success, msg, note_list = xhs_apis.get_user_all_notes(user_url, cookies_str)
920 | logger.info(f'获取用户所有笔记结果 {json.dumps(note_list, ensure_ascii=False)}: {success}, msg: {msg}')
921 | # 获取笔记信息
922 | note_url = r'https://www.xiaohongshu.com/explore/67d7c713000000000900e391?xsec_token=AB1ACxbo5cevHxV_bWibTmK8R1DDz0NnAW1PbFZLABXtE=&xsec_source=pc_user'
923 | success, msg, note_info = xhs_apis.get_note_info(note_url, cookies_str)
924 | logger.info(f'获取笔记信息结果 {json.dumps(note_info, ensure_ascii=False)}: {success}, msg: {msg}')
925 | # 获取搜索关键词
926 | query = "榴莲"
927 | success, msg, search_keyword = xhs_apis.get_search_keyword(query, cookies_str)
928 | logger.info(f'获取搜索关键词结果 {json.dumps(search_keyword, ensure_ascii=False)}: {success}, msg: {msg}')
929 | # 搜索笔记
930 | query = "榴莲"
931 | query_num = 10
932 | sort = "general"
933 | note_type = 0
934 | success, msg, notes = xhs_apis.search_some_note(query, query_num, cookies_str, sort, note_type)
935 | logger.info(f'搜索笔记结果 {json.dumps(notes, ensure_ascii=False)}: {success}, msg: {msg}')
936 | # 获取笔记评论
937 | note_url = r'https://www.xiaohongshu.com/explore/67d7c713000000000900e391?xsec_token=AB1ACxbo5cevHxV_bWibTmK8R1DDz0NnAW1PbFZLABXtE=&xsec_source=pc_user'
938 | success, msg, note_all_comment = xhs_apis.get_note_all_comment(note_url, cookies_str)
939 | logger.info(f'获取笔记评论结果 {json.dumps(note_all_comment, ensure_ascii=False)}: {success}, msg: {msg}')
940 |
941 |
942 |
943 |
944 |
--------------------------------------------------------------------------------
/author/qq.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/qq.jpg
--------------------------------------------------------------------------------
/author/wx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/wx.png
--------------------------------------------------------------------------------
/author/wx_pay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/wx_pay.png
--------------------------------------------------------------------------------
/author/zfb_pay.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/zfb_pay.jpg
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import os
2 | from loguru import logger
3 | from apis.pc_apis import XHS_Apis
4 | from xhs_utils.common_utils import init
5 | from xhs_utils.data_util import handle_note_info, download_note, save_to_xlsx
6 |
7 |
8 | class Data_Spider():
9 | def __init__(self):
10 | self.xhs_apis = XHS_Apis()
11 |
12 | def spider_note(self, note_url: str, cookies_str: str, proxies=None):
13 | """
14 | 爬取一个笔记的信息
15 | :param note_url:
16 | :param cookies_str:
17 | :return:
18 | """
19 | note_info = None
20 | try:
21 | success, msg, note_info = self.xhs_apis.get_note_info(note_url, cookies_str, proxies)
22 | if success:
23 | note_info = note_info['data']['items'][0]
24 | note_info['url'] = note_url
25 | note_info = handle_note_info(note_info)
26 | except Exception as e:
27 | success = False
28 | msg = e
29 | logger.info(f'爬取笔记信息 {note_url}: {success}, msg: {msg}')
30 | return success, msg, note_info
31 |
32 | def spider_some_note(self, notes: list, cookies_str: str, base_path: dict, save_choice: str, excel_name: str = '', proxies=None):
33 | """
34 | 爬取一些笔记的信息
35 | :param notes:
36 | :param cookies_str:
37 | :param base_path:
38 | :return:
39 | """
40 | if (save_choice == 'all' or save_choice == 'excel') and excel_name == '':
41 | raise ValueError('excel_name 不能为空')
42 | note_list = []
43 | for note_url in notes:
44 | success, msg, note_info = self.spider_note(note_url, cookies_str, proxies)
45 | if note_info is not None and success:
46 | note_list.append(note_info)
47 | for note_info in note_list:
48 | if save_choice == 'all' or save_choice == 'media':
49 | download_note(note_info, base_path['media'])
50 | if save_choice == 'all' or save_choice == 'excel':
51 | file_path = os.path.abspath(os.path.join(base_path['excel'], f'{excel_name}.xlsx'))
52 | save_to_xlsx(note_list, file_path)
53 |
54 |
55 | def spider_user_all_note(self, user_url: str, cookies_str: str, base_path: dict, save_choice: str, excel_name: str = '', proxies=None):
56 | """
57 | 爬取一个用户的所有笔记
58 | :param user_url:
59 | :param cookies_str:
60 | :param base_path:
61 | :return:
62 | """
63 | note_list = []
64 | try:
65 | success, msg, all_note_info = self.xhs_apis.get_user_all_notes(user_url, cookies_str, proxies)
66 | if success:
67 | logger.info(f'用户 {user_url} 作品数量: {len(all_note_info)}')
68 | for simple_note_info in all_note_info:
69 | note_url = f"https://www.xiaohongshu.com/explore/{simple_note_info['note_id']}?xsec_token={simple_note_info['xsec_token']}"
70 | note_list.append(note_url)
71 | if save_choice == 'all' or save_choice == 'excel':
72 | excel_name = user_url.split('/')[-1].split('?')[0]
73 | self.spider_some_note(note_list, cookies_str, base_path, save_choice, excel_name, proxies)
74 | except Exception as e:
75 | success = False
76 | msg = e
77 | logger.info(f'爬取用户所有视频 {user_url}: {success}, msg: {msg}')
78 | return note_list, success, msg
79 |
80 | def spider_some_search_note(self, query: str, require_num: int, cookies_str: str, base_path: dict, save_choice: str, sort="general", note_type=0, excel_name: str = '', proxies=None):
81 | """
82 | 指定数量搜索笔记,设置排序方式和笔记类型和笔记数量
83 | :param query 搜索的关键词
84 | :param require_num 搜索的数量
85 | :param cookies_str 你的cookies
86 | :param base_path 保存路径
87 | :param sort 排序方式 general:综合排序, time_descending:时间排序, popularity_descending:热度排序
88 | :param note_type 笔记类型 0:全部, 1:视频, 2:图文
89 | 返回搜索的结果
90 | """
91 | note_list = []
92 | try:
93 | success, msg, notes = self.xhs_apis.search_some_note(query, require_num, cookies_str, sort, note_type, proxies)
94 | if success:
95 | notes = list(filter(lambda x: x['model_type'] == "note", notes))
96 | logger.info(f'搜索关键词 {query} 笔记数量: {len(notes)}')
97 | for note in notes:
98 | note_url = f"https://www.xiaohongshu.com/explore/{note['id']}?xsec_token={note['xsec_token']}"
99 | note_list.append(note_url)
100 | if save_choice == 'all' or save_choice == 'excel':
101 | excel_name = query
102 | self.spider_some_note(note_list, cookies_str, base_path, save_choice, excel_name, proxies)
103 | except Exception as e:
104 | success = False
105 | msg = e
106 | logger.info(f'搜索关键词 {query} 笔记: {success}, msg: {msg}')
107 | return note_list, success, msg
108 |
109 | if __name__ == '__main__':
110 | """
111 | 此文件为爬虫的入口文件,可以直接运行
112 | apis/pc_apis.py 为爬虫的api文件,包含小红书的全部数据接口,可以继续封装,感谢star和follow
113 | """
114 | cookies_str, base_path = init()
115 | data_spider = Data_Spider()
116 | # save_choice: all: 保存所有的信息, media: 保存视频和图片, excel: 保存到excel
117 | # save_choice 为 excel 或者 all 时,excel_name 不能为空
118 | # 1
119 | notes = [
120 | r'https://www.xiaohongshu.com/explore/67d7c713000000000900e391?xsec_token=AB1ACxbo5cevHxV_bWibTmK8R1DDz0NnAW1PbFZLABXtE=&xsec_source=pc_user',
121 | ]
122 | data_spider.spider_some_note(notes, cookies_str, base_path, 'all', 'test')
123 |
124 | # 2
125 | user_url = 'https://www.xiaohongshu.com/user/profile/67a332a2000000000d008358?xsec_token=ABTf9yz4cLHhTycIlksF0jOi1yIZgfcaQ6IXNNGdKJ8xg=&xsec_source=pc_feed'
126 | data_spider.spider_user_all_note(user_url, cookies_str, base_path, 'all')
127 |
128 | # 3
129 | query = "榴莲"
130 | query_num = 10
131 | sort = "general"
132 | note_type = 0
133 | data_spider.spider_some_search_note(query, query_num, cookies_str, base_path, 'all', sort, note_type)
134 |
--------------------------------------------------------------------------------
/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "Spider_XHS",
3 | "lockfileVersion": 3,
4 | "requires": true,
5 | "packages": {
6 | "": {
7 | "dependencies": {
8 | "jsdom": "^26.0.0"
9 | }
10 | },
11 | "node_modules/@asamuzakjp/css-color": {
12 | "version": "3.1.1",
13 | "resolved": "https://mirrors.cloud.tencent.com/npm/@asamuzakjp/css-color/-/css-color-3.1.1.tgz",
14 | "integrity": "sha512-hpRD68SV2OMcZCsrbdkccTw5FXjNDLo5OuqSHyHZfwweGsDWZwDJ2+gONyNAbazZclobMirACLw0lk8WVxIqxA==",
15 | "dependencies": {
16 | "@csstools/css-calc": "^2.1.2",
17 | "@csstools/css-color-parser": "^3.0.8",
18 | "@csstools/css-parser-algorithms": "^3.0.4",
19 | "@csstools/css-tokenizer": "^3.0.3",
20 | "lru-cache": "^10.4.3"
21 | }
22 | },
23 | "node_modules/@csstools/color-helpers": {
24 | "version": "5.0.2",
25 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/color-helpers/-/color-helpers-5.0.2.tgz",
26 | "integrity": "sha512-JqWH1vsgdGcw2RR6VliXXdA0/59LttzlU8UlRT/iUUsEeWfYq8I+K0yhihEUTTHLRm1EXvpsCx3083EU15ecsA==",
27 | "funding": [
28 | {
29 | "type": "github",
30 | "url": "https://github.com/sponsors/csstools"
31 | },
32 | {
33 | "type": "opencollective",
34 | "url": "https://opencollective.com/csstools"
35 | }
36 | ],
37 | "engines": {
38 | "node": ">=18"
39 | }
40 | },
41 | "node_modules/@csstools/css-calc": {
42 | "version": "2.1.2",
43 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-calc/-/css-calc-2.1.2.tgz",
44 | "integrity": "sha512-TklMyb3uBB28b5uQdxjReG4L80NxAqgrECqLZFQbyLekwwlcDDS8r3f07DKqeo8C4926Br0gf/ZDe17Zv4wIuw==",
45 | "funding": [
46 | {
47 | "type": "github",
48 | "url": "https://github.com/sponsors/csstools"
49 | },
50 | {
51 | "type": "opencollective",
52 | "url": "https://opencollective.com/csstools"
53 | }
54 | ],
55 | "engines": {
56 | "node": ">=18"
57 | },
58 | "peerDependencies": {
59 | "@csstools/css-parser-algorithms": "^3.0.4",
60 | "@csstools/css-tokenizer": "^3.0.3"
61 | }
62 | },
63 | "node_modules/@csstools/css-color-parser": {
64 | "version": "3.0.8",
65 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-color-parser/-/css-color-parser-3.0.8.tgz",
66 | "integrity": "sha512-pdwotQjCCnRPuNi06jFuP68cykU1f3ZWExLe/8MQ1LOs8Xq+fTkYgd+2V8mWUWMrOn9iS2HftPVaMZDaXzGbhQ==",
67 | "funding": [
68 | {
69 | "type": "github",
70 | "url": "https://github.com/sponsors/csstools"
71 | },
72 | {
73 | "type": "opencollective",
74 | "url": "https://opencollective.com/csstools"
75 | }
76 | ],
77 | "dependencies": {
78 | "@csstools/color-helpers": "^5.0.2",
79 | "@csstools/css-calc": "^2.1.2"
80 | },
81 | "engines": {
82 | "node": ">=18"
83 | },
84 | "peerDependencies": {
85 | "@csstools/css-parser-algorithms": "^3.0.4",
86 | "@csstools/css-tokenizer": "^3.0.3"
87 | }
88 | },
89 | "node_modules/@csstools/css-parser-algorithms": {
90 | "version": "3.0.4",
91 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-parser-algorithms/-/css-parser-algorithms-3.0.4.tgz",
92 | "integrity": "sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==",
93 | "funding": [
94 | {
95 | "type": "github",
96 | "url": "https://github.com/sponsors/csstools"
97 | },
98 | {
99 | "type": "opencollective",
100 | "url": "https://opencollective.com/csstools"
101 | }
102 | ],
103 | "engines": {
104 | "node": ">=18"
105 | },
106 | "peerDependencies": {
107 | "@csstools/css-tokenizer": "^3.0.3"
108 | }
109 | },
110 | "node_modules/@csstools/css-tokenizer": {
111 | "version": "3.0.3",
112 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-tokenizer/-/css-tokenizer-3.0.3.tgz",
113 | "integrity": "sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==",
114 | "funding": [
115 | {
116 | "type": "github",
117 | "url": "https://github.com/sponsors/csstools"
118 | },
119 | {
120 | "type": "opencollective",
121 | "url": "https://opencollective.com/csstools"
122 | }
123 | ],
124 | "engines": {
125 | "node": ">=18"
126 | }
127 | },
128 | "node_modules/agent-base": {
129 | "version": "7.1.3",
130 | "resolved": "https://mirrors.cloud.tencent.com/npm/agent-base/-/agent-base-7.1.3.tgz",
131 | "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==",
132 | "engines": {
133 | "node": ">= 14"
134 | }
135 | },
136 | "node_modules/asynckit": {
137 | "version": "0.4.0",
138 | "resolved": "https://mirrors.cloud.tencent.com/npm/asynckit/-/asynckit-0.4.0.tgz",
139 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
140 | },
141 | "node_modules/call-bind-apply-helpers": {
142 | "version": "1.0.2",
143 | "resolved": "https://mirrors.cloud.tencent.com/npm/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz",
144 | "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==",
145 | "dependencies": {
146 | "es-errors": "^1.3.0",
147 | "function-bind": "^1.1.2"
148 | },
149 | "engines": {
150 | "node": ">= 0.4"
151 | }
152 | },
153 | "node_modules/combined-stream": {
154 | "version": "1.0.8",
155 | "resolved": "https://mirrors.cloud.tencent.com/npm/combined-stream/-/combined-stream-1.0.8.tgz",
156 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
157 | "dependencies": {
158 | "delayed-stream": "~1.0.0"
159 | },
160 | "engines": {
161 | "node": ">= 0.8"
162 | }
163 | },
164 | "node_modules/cssstyle": {
165 | "version": "4.3.0",
166 | "resolved": "https://mirrors.cloud.tencent.com/npm/cssstyle/-/cssstyle-4.3.0.tgz",
167 | "integrity": "sha512-6r0NiY0xizYqfBvWp1G7WXJ06/bZyrk7Dc6PHql82C/pKGUTKu4yAX4Y8JPamb1ob9nBKuxWzCGTRuGwU3yxJQ==",
168 | "dependencies": {
169 | "@asamuzakjp/css-color": "^3.1.1",
170 | "rrweb-cssom": "^0.8.0"
171 | },
172 | "engines": {
173 | "node": ">=18"
174 | }
175 | },
176 | "node_modules/data-urls": {
177 | "version": "5.0.0",
178 | "resolved": "https://mirrors.cloud.tencent.com/npm/data-urls/-/data-urls-5.0.0.tgz",
179 | "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==",
180 | "dependencies": {
181 | "whatwg-mimetype": "^4.0.0",
182 | "whatwg-url": "^14.0.0"
183 | },
184 | "engines": {
185 | "node": ">=18"
186 | }
187 | },
188 | "node_modules/debug": {
189 | "version": "4.4.0",
190 | "resolved": "https://mirrors.cloud.tencent.com/npm/debug/-/debug-4.4.0.tgz",
191 | "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==",
192 | "dependencies": {
193 | "ms": "^2.1.3"
194 | },
195 | "engines": {
196 | "node": ">=6.0"
197 | },
198 | "peerDependenciesMeta": {
199 | "supports-color": {
200 | "optional": true
201 | }
202 | }
203 | },
204 | "node_modules/decimal.js": {
205 | "version": "10.5.0",
206 | "resolved": "https://mirrors.cloud.tencent.com/npm/decimal.js/-/decimal.js-10.5.0.tgz",
207 | "integrity": "sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw=="
208 | },
209 | "node_modules/delayed-stream": {
210 | "version": "1.0.0",
211 | "resolved": "https://mirrors.cloud.tencent.com/npm/delayed-stream/-/delayed-stream-1.0.0.tgz",
212 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
213 | "engines": {
214 | "node": ">=0.4.0"
215 | }
216 | },
217 | "node_modules/dunder-proto": {
218 | "version": "1.0.1",
219 | "resolved": "https://mirrors.cloud.tencent.com/npm/dunder-proto/-/dunder-proto-1.0.1.tgz",
220 | "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==",
221 | "dependencies": {
222 | "call-bind-apply-helpers": "^1.0.1",
223 | "es-errors": "^1.3.0",
224 | "gopd": "^1.2.0"
225 | },
226 | "engines": {
227 | "node": ">= 0.4"
228 | }
229 | },
230 | "node_modules/entities": {
231 | "version": "4.5.0",
232 | "resolved": "https://mirrors.cloud.tencent.com/npm/entities/-/entities-4.5.0.tgz",
233 | "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
234 | "engines": {
235 | "node": ">=0.12"
236 | },
237 | "funding": {
238 | "url": "https://github.com/fb55/entities?sponsor=1"
239 | }
240 | },
241 | "node_modules/es-define-property": {
242 | "version": "1.0.1",
243 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-define-property/-/es-define-property-1.0.1.tgz",
244 | "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
245 | "engines": {
246 | "node": ">= 0.4"
247 | }
248 | },
249 | "node_modules/es-errors": {
250 | "version": "1.3.0",
251 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-errors/-/es-errors-1.3.0.tgz",
252 | "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
253 | "engines": {
254 | "node": ">= 0.4"
255 | }
256 | },
257 | "node_modules/es-object-atoms": {
258 | "version": "1.1.1",
259 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-object-atoms/-/es-object-atoms-1.1.1.tgz",
260 | "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==",
261 | "dependencies": {
262 | "es-errors": "^1.3.0"
263 | },
264 | "engines": {
265 | "node": ">= 0.4"
266 | }
267 | },
268 | "node_modules/es-set-tostringtag": {
269 | "version": "2.1.0",
270 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz",
271 | "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==",
272 | "dependencies": {
273 | "es-errors": "^1.3.0",
274 | "get-intrinsic": "^1.2.6",
275 | "has-tostringtag": "^1.0.2",
276 | "hasown": "^2.0.2"
277 | },
278 | "engines": {
279 | "node": ">= 0.4"
280 | }
281 | },
282 | "node_modules/form-data": {
283 | "version": "4.0.2",
284 | "resolved": "https://mirrors.cloud.tencent.com/npm/form-data/-/form-data-4.0.2.tgz",
285 | "integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==",
286 | "dependencies": {
287 | "asynckit": "^0.4.0",
288 | "combined-stream": "^1.0.8",
289 | "es-set-tostringtag": "^2.1.0",
290 | "mime-types": "^2.1.12"
291 | },
292 | "engines": {
293 | "node": ">= 6"
294 | }
295 | },
296 | "node_modules/function-bind": {
297 | "version": "1.1.2",
298 | "resolved": "https://mirrors.cloud.tencent.com/npm/function-bind/-/function-bind-1.1.2.tgz",
299 | "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==",
300 | "funding": {
301 | "url": "https://github.com/sponsors/ljharb"
302 | }
303 | },
304 | "node_modules/get-intrinsic": {
305 | "version": "1.3.0",
306 | "resolved": "https://mirrors.cloud.tencent.com/npm/get-intrinsic/-/get-intrinsic-1.3.0.tgz",
307 | "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==",
308 | "dependencies": {
309 | "call-bind-apply-helpers": "^1.0.2",
310 | "es-define-property": "^1.0.1",
311 | "es-errors": "^1.3.0",
312 | "es-object-atoms": "^1.1.1",
313 | "function-bind": "^1.1.2",
314 | "get-proto": "^1.0.1",
315 | "gopd": "^1.2.0",
316 | "has-symbols": "^1.1.0",
317 | "hasown": "^2.0.2",
318 | "math-intrinsics": "^1.1.0"
319 | },
320 | "engines": {
321 | "node": ">= 0.4"
322 | },
323 | "funding": {
324 | "url": "https://github.com/sponsors/ljharb"
325 | }
326 | },
327 | "node_modules/get-proto": {
328 | "version": "1.0.1",
329 | "resolved": "https://mirrors.cloud.tencent.com/npm/get-proto/-/get-proto-1.0.1.tgz",
330 | "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==",
331 | "dependencies": {
332 | "dunder-proto": "^1.0.1",
333 | "es-object-atoms": "^1.0.0"
334 | },
335 | "engines": {
336 | "node": ">= 0.4"
337 | }
338 | },
339 | "node_modules/gopd": {
340 | "version": "1.2.0",
341 | "resolved": "https://mirrors.cloud.tencent.com/npm/gopd/-/gopd-1.2.0.tgz",
342 | "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
343 | "engines": {
344 | "node": ">= 0.4"
345 | },
346 | "funding": {
347 | "url": "https://github.com/sponsors/ljharb"
348 | }
349 | },
350 | "node_modules/has-symbols": {
351 | "version": "1.1.0",
352 | "resolved": "https://mirrors.cloud.tencent.com/npm/has-symbols/-/has-symbols-1.1.0.tgz",
353 | "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==",
354 | "engines": {
355 | "node": ">= 0.4"
356 | },
357 | "funding": {
358 | "url": "https://github.com/sponsors/ljharb"
359 | }
360 | },
361 | "node_modules/has-tostringtag": {
362 | "version": "1.0.2",
363 | "resolved": "https://mirrors.cloud.tencent.com/npm/has-tostringtag/-/has-tostringtag-1.0.2.tgz",
364 | "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==",
365 | "dependencies": {
366 | "has-symbols": "^1.0.3"
367 | },
368 | "engines": {
369 | "node": ">= 0.4"
370 | },
371 | "funding": {
372 | "url": "https://github.com/sponsors/ljharb"
373 | }
374 | },
375 | "node_modules/hasown": {
376 | "version": "2.0.2",
377 | "resolved": "https://mirrors.cloud.tencent.com/npm/hasown/-/hasown-2.0.2.tgz",
378 | "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==",
379 | "dependencies": {
380 | "function-bind": "^1.1.2"
381 | },
382 | "engines": {
383 | "node": ">= 0.4"
384 | }
385 | },
386 | "node_modules/html-encoding-sniffer": {
387 | "version": "4.0.0",
388 | "resolved": "https://mirrors.cloud.tencent.com/npm/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz",
389 | "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==",
390 | "dependencies": {
391 | "whatwg-encoding": "^3.1.1"
392 | },
393 | "engines": {
394 | "node": ">=18"
395 | }
396 | },
397 | "node_modules/http-proxy-agent": {
398 | "version": "7.0.2",
399 | "resolved": "https://mirrors.cloud.tencent.com/npm/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz",
400 | "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==",
401 | "dependencies": {
402 | "agent-base": "^7.1.0",
403 | "debug": "^4.3.4"
404 | },
405 | "engines": {
406 | "node": ">= 14"
407 | }
408 | },
409 | "node_modules/https-proxy-agent": {
410 | "version": "7.0.6",
411 | "resolved": "https://mirrors.cloud.tencent.com/npm/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
412 | "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
413 | "dependencies": {
414 | "agent-base": "^7.1.2",
415 | "debug": "4"
416 | },
417 | "engines": {
418 | "node": ">= 14"
419 | }
420 | },
421 | "node_modules/iconv-lite": {
422 | "version": "0.6.3",
423 | "resolved": "https://mirrors.cloud.tencent.com/npm/iconv-lite/-/iconv-lite-0.6.3.tgz",
424 | "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==",
425 | "dependencies": {
426 | "safer-buffer": ">= 2.1.2 < 3.0.0"
427 | },
428 | "engines": {
429 | "node": ">=0.10.0"
430 | }
431 | },
432 | "node_modules/is-potential-custom-element-name": {
433 | "version": "1.0.1",
434 | "resolved": "https://mirrors.cloud.tencent.com/npm/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz",
435 | "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ=="
436 | },
437 | "node_modules/jsdom": {
438 | "version": "26.0.0",
439 | "resolved": "https://mirrors.cloud.tencent.com/npm/jsdom/-/jsdom-26.0.0.tgz",
440 | "integrity": "sha512-BZYDGVAIriBWTpIxYzrXjv3E/4u8+/pSG5bQdIYCbNCGOvsPkDQfTVLAIXAf9ETdCpduCVTkDe2NNZ8NIwUVzw==",
441 | "dependencies": {
442 | "cssstyle": "^4.2.1",
443 | "data-urls": "^5.0.0",
444 | "decimal.js": "^10.4.3",
445 | "form-data": "^4.0.1",
446 | "html-encoding-sniffer": "^4.0.0",
447 | "http-proxy-agent": "^7.0.2",
448 | "https-proxy-agent": "^7.0.6",
449 | "is-potential-custom-element-name": "^1.0.1",
450 | "nwsapi": "^2.2.16",
451 | "parse5": "^7.2.1",
452 | "rrweb-cssom": "^0.8.0",
453 | "saxes": "^6.0.0",
454 | "symbol-tree": "^3.2.4",
455 | "tough-cookie": "^5.0.0",
456 | "w3c-xmlserializer": "^5.0.0",
457 | "webidl-conversions": "^7.0.0",
458 | "whatwg-encoding": "^3.1.1",
459 | "whatwg-mimetype": "^4.0.0",
460 | "whatwg-url": "^14.1.0",
461 | "ws": "^8.18.0",
462 | "xml-name-validator": "^5.0.0"
463 | },
464 | "engines": {
465 | "node": ">=18"
466 | },
467 | "peerDependencies": {
468 | "canvas": "^3.0.0"
469 | },
470 | "peerDependenciesMeta": {
471 | "canvas": {
472 | "optional": true
473 | }
474 | }
475 | },
476 | "node_modules/lru-cache": {
477 | "version": "10.4.3",
478 | "resolved": "https://mirrors.cloud.tencent.com/npm/lru-cache/-/lru-cache-10.4.3.tgz",
479 | "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ=="
480 | },
481 | "node_modules/math-intrinsics": {
482 | "version": "1.1.0",
483 | "resolved": "https://mirrors.cloud.tencent.com/npm/math-intrinsics/-/math-intrinsics-1.1.0.tgz",
484 | "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==",
485 | "engines": {
486 | "node": ">= 0.4"
487 | }
488 | },
489 | "node_modules/mime-db": {
490 | "version": "1.52.0",
491 | "resolved": "https://mirrors.cloud.tencent.com/npm/mime-db/-/mime-db-1.52.0.tgz",
492 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
493 | "engines": {
494 | "node": ">= 0.6"
495 | }
496 | },
497 | "node_modules/mime-types": {
498 | "version": "2.1.35",
499 | "resolved": "https://mirrors.cloud.tencent.com/npm/mime-types/-/mime-types-2.1.35.tgz",
500 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
501 | "dependencies": {
502 | "mime-db": "1.52.0"
503 | },
504 | "engines": {
505 | "node": ">= 0.6"
506 | }
507 | },
508 | "node_modules/ms": {
509 | "version": "2.1.3",
510 | "resolved": "https://mirrors.cloud.tencent.com/npm/ms/-/ms-2.1.3.tgz",
511 | "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
512 | },
513 | "node_modules/nwsapi": {
514 | "version": "2.2.18",
515 | "resolved": "https://mirrors.cloud.tencent.com/npm/nwsapi/-/nwsapi-2.2.18.tgz",
516 | "integrity": "sha512-p1TRH/edngVEHVbwqWnxUViEmq5znDvyB+Sik5cmuLpGOIfDf/39zLiq3swPF8Vakqn+gvNiOQAZu8djYlQILA=="
517 | },
518 | "node_modules/parse5": {
519 | "version": "7.2.1",
520 | "resolved": "https://mirrors.cloud.tencent.com/npm/parse5/-/parse5-7.2.1.tgz",
521 | "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==",
522 | "dependencies": {
523 | "entities": "^4.5.0"
524 | },
525 | "funding": {
526 | "url": "https://github.com/inikulin/parse5?sponsor=1"
527 | }
528 | },
529 | "node_modules/punycode": {
530 | "version": "2.3.1",
531 | "resolved": "https://mirrors.cloud.tencent.com/npm/punycode/-/punycode-2.3.1.tgz",
532 | "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
533 | "engines": {
534 | "node": ">=6"
535 | }
536 | },
537 | "node_modules/rrweb-cssom": {
538 | "version": "0.8.0",
539 | "resolved": "https://mirrors.cloud.tencent.com/npm/rrweb-cssom/-/rrweb-cssom-0.8.0.tgz",
540 | "integrity": "sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw=="
541 | },
542 | "node_modules/safer-buffer": {
543 | "version": "2.1.2",
544 | "resolved": "https://mirrors.cloud.tencent.com/npm/safer-buffer/-/safer-buffer-2.1.2.tgz",
545 | "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
546 | },
547 | "node_modules/saxes": {
548 | "version": "6.0.0",
549 | "resolved": "https://mirrors.cloud.tencent.com/npm/saxes/-/saxes-6.0.0.tgz",
550 | "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==",
551 | "dependencies": {
552 | "xmlchars": "^2.2.0"
553 | },
554 | "engines": {
555 | "node": ">=v12.22.7"
556 | }
557 | },
558 | "node_modules/symbol-tree": {
559 | "version": "3.2.4",
560 | "resolved": "https://mirrors.cloud.tencent.com/npm/symbol-tree/-/symbol-tree-3.2.4.tgz",
561 | "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw=="
562 | },
563 | "node_modules/tldts": {
564 | "version": "6.1.84",
565 | "resolved": "https://mirrors.cloud.tencent.com/npm/tldts/-/tldts-6.1.84.tgz",
566 | "integrity": "sha512-aRGIbCIF3teodtUFAYSdQONVmDRy21REM3o6JnqWn5ZkQBJJ4gHxhw6OfwQ+WkSAi3ASamrS4N4nyazWx6uTYg==",
567 | "dependencies": {
568 | "tldts-core": "^6.1.84"
569 | },
570 | "bin": {
571 | "tldts": "bin/cli.js"
572 | }
573 | },
574 | "node_modules/tldts-core": {
575 | "version": "6.1.84",
576 | "resolved": "https://mirrors.cloud.tencent.com/npm/tldts-core/-/tldts-core-6.1.84.tgz",
577 | "integrity": "sha512-NaQa1W76W2aCGjXybvnMYzGSM4x8fvG2AN/pla7qxcg0ZHbooOPhA8kctmOZUDfZyhDL27OGNbwAeig8P4p1vg=="
578 | },
579 | "node_modules/tough-cookie": {
580 | "version": "5.1.2",
581 | "resolved": "https://mirrors.cloud.tencent.com/npm/tough-cookie/-/tough-cookie-5.1.2.tgz",
582 | "integrity": "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==",
583 | "dependencies": {
584 | "tldts": "^6.1.32"
585 | },
586 | "engines": {
587 | "node": ">=16"
588 | }
589 | },
590 | "node_modules/tr46": {
591 | "version": "5.1.0",
592 | "resolved": "https://mirrors.cloud.tencent.com/npm/tr46/-/tr46-5.1.0.tgz",
593 | "integrity": "sha512-IUWnUK7ADYR5Sl1fZlO1INDUhVhatWl7BtJWsIhwJ0UAK7ilzzIa8uIqOO/aYVWHZPJkKbEL+362wrzoeRF7bw==",
594 | "dependencies": {
595 | "punycode": "^2.3.1"
596 | },
597 | "engines": {
598 | "node": ">=18"
599 | }
600 | },
601 | "node_modules/w3c-xmlserializer": {
602 | "version": "5.0.0",
603 | "resolved": "https://mirrors.cloud.tencent.com/npm/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz",
604 | "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==",
605 | "dependencies": {
606 | "xml-name-validator": "^5.0.0"
607 | },
608 | "engines": {
609 | "node": ">=18"
610 | }
611 | },
612 | "node_modules/webidl-conversions": {
613 | "version": "7.0.0",
614 | "resolved": "https://mirrors.cloud.tencent.com/npm/webidl-conversions/-/webidl-conversions-7.0.0.tgz",
615 | "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==",
616 | "engines": {
617 | "node": ">=12"
618 | }
619 | },
620 | "node_modules/whatwg-encoding": {
621 | "version": "3.1.1",
622 | "resolved": "https://mirrors.cloud.tencent.com/npm/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz",
623 | "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==",
624 | "dependencies": {
625 | "iconv-lite": "0.6.3"
626 | },
627 | "engines": {
628 | "node": ">=18"
629 | }
630 | },
631 | "node_modules/whatwg-mimetype": {
632 | "version": "4.0.0",
633 | "resolved": "https://mirrors.cloud.tencent.com/npm/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz",
634 | "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==",
635 | "engines": {
636 | "node": ">=18"
637 | }
638 | },
639 | "node_modules/whatwg-url": {
640 | "version": "14.2.0",
641 | "resolved": "https://mirrors.cloud.tencent.com/npm/whatwg-url/-/whatwg-url-14.2.0.tgz",
642 | "integrity": "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==",
643 | "dependencies": {
644 | "tr46": "^5.1.0",
645 | "webidl-conversions": "^7.0.0"
646 | },
647 | "engines": {
648 | "node": ">=18"
649 | }
650 | },
651 | "node_modules/ws": {
652 | "version": "8.18.1",
653 | "resolved": "https://mirrors.cloud.tencent.com/npm/ws/-/ws-8.18.1.tgz",
654 | "integrity": "sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w==",
655 | "engines": {
656 | "node": ">=10.0.0"
657 | },
658 | "peerDependencies": {
659 | "bufferutil": "^4.0.1",
660 | "utf-8-validate": ">=5.0.2"
661 | },
662 | "peerDependenciesMeta": {
663 | "bufferutil": {
664 | "optional": true
665 | },
666 | "utf-8-validate": {
667 | "optional": true
668 | }
669 | }
670 | },
671 | "node_modules/xml-name-validator": {
672 | "version": "5.0.0",
673 | "resolved": "https://mirrors.cloud.tencent.com/npm/xml-name-validator/-/xml-name-validator-5.0.0.tgz",
674 | "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==",
675 | "engines": {
676 | "node": ">=18"
677 | }
678 | },
679 | "node_modules/xmlchars": {
680 | "version": "2.2.0",
681 | "resolved": "https://mirrors.cloud.tencent.com/npm/xmlchars/-/xmlchars-2.2.0.tgz",
682 | "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw=="
683 | }
684 | }
685 | }
686 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "jsdom": "^26.0.0"
4 | }
5 | }
6 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | PyExecJS
2 | requests
3 | loguru
4 | python-dotenv
5 | retry
6 | openpyxl
--------------------------------------------------------------------------------
/static/xhs_xray.js:
--------------------------------------------------------------------------------
1 | self = global;
2 | window = global;
3 | var zc666;
4 | !function() {
5 | "use strict";
6 | var e, t, n, r, o, i = {}, u = {};
7 | function f(e) {
8 | var t = u[e];
9 | if (void 0 !== t)
10 | return t.exports;
11 | var n = u[e] = {
12 | id: e,
13 | loaded: !1,
14 | exports: {}
15 | };
16 | console.log(e);
17 | return i[e].call(n.exports, n, n.exports, f),
18 | n.loaded = !0,
19 | n.exports
20 | }
21 | f.m = i,
22 | f.amdO = {},
23 | e = [],
24 | f.O = function(t, n, r, o) {
25 | if (!n) {
26 | var i = 1 / 0;
27 | for (d = 0; d < e.length; d++) {
28 | n = e[d][0],
29 | r = e[d][1],
30 | o = e[d][2];
31 | for (var u = !0, c = 0; c < n.length; c++)
32 | (!1 & o || i >= o) && Object.keys(f.O).every((function(e) {
33 | return f.O[e](n[c])
34 | }
35 | )) ? n.splice(c--, 1) : (u = !1,
36 | o < i && (i = o));
37 | if (u) {
38 | e.splice(d--, 1);
39 | var a = r();
40 | void 0 !== a && (t = a)
41 | }
42 | }
43 | return t
44 | }
45 | o = o || 0;
46 | for (var d = e.length; d > 0 && e[d - 1][2] > o; d--)
47 | e[d] = e[d - 1];
48 | e[d] = [n, r, o]
49 | }
50 | ,
51 | f.n = function(e) {
52 | var t = e && e.__esModule ? function() {
53 | return e.default
54 | }
55 | : function() {
56 | return e
57 | }
58 | ;
59 | return f.d(t, {
60 | a: t
61 | }),
62 | t
63 | }
64 | ,
65 | n = Object.getPrototypeOf ? function(e) {
66 | return Object.getPrototypeOf(e)
67 | }
68 | : function(e) {
69 | return e.__proto__
70 | }
71 | ,
72 | f.t = function(e, r) {
73 | if (1 & r && (e = this(e)),
74 | 8 & r)
75 | return e;
76 | if ("object" == typeof e && e) {
77 | if (4 & r && e.__esModule)
78 | return e;
79 | if (16 & r && "function" == typeof e.then)
80 | return e
81 | }
82 | var o = Object.create(null);
83 | f.r(o);
84 | var i = {};
85 | t = t || [null, n({}), n([]), n(n)];
86 | for (var u = 2 & r && e; "object" == typeof u && !~t.indexOf(u); u = n(u))
87 | Object.getOwnPropertyNames(u).forEach((function(t) {
88 | i[t] = function() {
89 | return e[t]
90 | }
91 | }
92 | ));
93 | return i.default = function() {
94 | return e
95 | }
96 | ,
97 | f.d(o, i),
98 | o
99 | }
100 | ,
101 | f.d = function(e, t) {
102 | for (var n in t)
103 | f.o(t, n) && !f.o(e, n) && Object.defineProperty(e, n, {
104 | enumerable: !0,
105 | get: t[n]
106 | })
107 | }
108 | ,
109 | f.f = {},
110 | f.e = function(e) {
111 | return Promise.all(Object.keys(f.f).reduce((function(t, n) {
112 | return f.f[n](e, t),
113 | t
114 | }
115 | ), []))
116 | }
117 | ,
118 | f.u = function(e) {
119 | return "js/" + ({
120 | 41: "Board",
121 | 94: "Login",
122 | 256: "NPS",
123 | 290: "Notification",
124 | 406: "User",
125 | 464: "FeedToNote",
126 | 540: "Explore",
127 | 692: "Track",
128 | 763: "Search",
129 | 891: "xhs-web-player",
130 | 895: "Note",
131 | 898: "minor"
132 | }[e] || e) + "." + {
133 | 13: "849e078",
134 | 18: "88c4016",
135 | 41: "a4fad25",
136 | 64: "de4ace7",
137 | 92: "1b9e4df",
138 | 94: "01eead2",
139 | 168: "256b43c",
140 | 256: "3c5b745",
141 | 281: "ade9f6a",
142 | 290: "d0e6310",
143 | 334: "afb0229",
144 | 337: "e738619",
145 | 398: "80ce566",
146 | 406: "0477db9",
147 | 426: "fd994fa",
148 | 464: "073bfcc",
149 | 469: "a49ea26",
150 | 474: "738cddb",
151 | 494: "c852c82",
152 | 513: "7ca0915",
153 | 540: "f44da86",
154 | 563: "5fc3402",
155 | 588: "67edf6f",
156 | 591: "ddde7d9",
157 | 692: "0c3ac5e",
158 | 699: "c290318",
159 | 737: "9268c58",
160 | 763: "01c6b25",
161 | 766: "f0a8354",
162 | 772: "50c8fcf",
163 | 787: "385b767",
164 | 871: "d5ef805",
165 | 891: "e811881",
166 | 895: "697ec77",
167 | 898: "868733b"
168 | }[e] + ".chunk.js"
169 | }
170 | ,
171 | f.miniCssF = function(e) {
172 | return "css/" + ({
173 | 41: "Board",
174 | 94: "Login",
175 | 256: "NPS",
176 | 290: "Notification",
177 | 406: "User",
178 | 464: "FeedToNote",
179 | 540: "Explore",
180 | 763: "Search",
181 | 895: "Note",
182 | 898: "minor"
183 | }[e] || e) + "." + {
184 | 41: "b232e5e",
185 | 92: "95cabbe",
186 | 94: "b4971ae",
187 | 256: "5d4f927",
188 | 290: "efde4b1",
189 | 334: "0f69949",
190 | 337: "919c828",
191 | 398: "ffe8b37",
192 | 406: "e3c28d5",
193 | 426: "082db25",
194 | 464: "1bbfe82",
195 | 540: "d6040d3",
196 | 588: "3e8b57e",
197 | 763: "af3c4cd",
198 | 895: "98f4076",
199 | 898: "5a4e17f"
200 | }[e] + ".chunk.css"
201 | }
202 | ,
203 | f.g = function() {
204 | if ("object" == typeof globalThis)
205 | return globalThis;
206 | try {
207 | return this || new Function("return this")()
208 | } catch (e) {
209 | if ("object" == typeof window)
210 | return window
211 | }
212 | }(),
213 | f.o = function(e, t) {
214 | return Object.prototype.hasOwnProperty.call(e, t)
215 | }
216 | ,
217 | r = {},
218 | o = "xhs-pc-web:",
219 | f.l = function(e, t, n, i) {
220 | if (r[e])
221 | r[e].push(t);
222 | else {
223 | var u, c;
224 | if (void 0 !== n)
225 | for (var a = document.getElementsByTagName("script"), d = 0; d < a.length; d++) {
226 | var l = a[d];
227 | if (l.getAttribute("src") == e || l.getAttribute("data-webpack") == o + n) {
228 | u = l;
229 | break
230 | }
231 | }
232 | u || (c = !0,
233 | (u = document.createElement("script")).charset = "utf-8",
234 | u.timeout = 120,
235 | f.nc && u.setAttribute("nonce", f.nc),
236 | u.setAttribute("data-webpack", o + n),
237 | u.src = e),
238 | r[e] = [t];
239 | var s = function(t, n) {
240 | u.onerror = u.onload = null,
241 | clearTimeout(b);
242 | var o = r[e];
243 | if (delete r[e],
244 | u.parentNode && u.parentNode.removeChild(u),
245 | o && o.forEach((function(e) {
246 | return e(n)
247 | }
248 | )),
249 | t)
250 | return t(n)
251 | }
252 | , b = setTimeout(s.bind(null, void 0, {
253 | type: "timeout",
254 | target: u
255 | }), 12e4);
256 | u.onerror = s.bind(null, u.onerror),
257 | u.onload = s.bind(null, u.onload),
258 | c && document.head.appendChild(u)
259 | }
260 | }
261 | ,
262 | f.r = function(e) {
263 | "undefined" != typeof Symbol && Symbol.toStringTag && Object.defineProperty(e, Symbol.toStringTag, {
264 | value: "Module"
265 | }),
266 | Object.defineProperty(e, "__esModule", {
267 | value: !0
268 | })
269 | }
270 | ,
271 | f.nmd = function(e) {
272 | return e.paths = [],
273 | e.children || (e.children = []),
274 | e
275 | }
276 | ,
277 | f.p = "//fe-static.xhscdn.com/formula-static/xhs-pc-web/public/",
278 | function() {
279 | if ("undefined" != typeof document) {
280 | var e = function(e) {
281 | return new Promise((function(t, n) {
282 | var r = f.miniCssF(e)
283 | , o = f.p + r;
284 | if (function(e, t) {
285 | for (var n = document.getElementsByTagName("link"), r = 0; r < n.length; r++) {
286 | var o = (u = n[r]).getAttribute("data-href") || u.getAttribute("href");
287 | if ("stylesheet" === u.rel && (o === e || o === t))
288 | return u
289 | }
290 | var i = document.getElementsByTagName("style");
291 | for (r = 0; r < i.length; r++) {
292 | var u;
293 | if ((o = (u = i[r]).getAttribute("data-href")) === e || o === t)
294 | return u
295 | }
296 | }(r, o))
297 | return t();
298 | !function(e, t, n, r, o) {
299 | var i = document.createElement("link");
300 | i.rel = "stylesheet",
301 | i.type = "text/css",
302 | f.nc && (i.nonce = f.nc),
303 | i.onerror = i.onload = function(n) {
304 | if (i.onerror = i.onload = null,
305 | "load" === n.type)
306 | r();
307 | else {
308 | var u = n && n.type
309 | , f = n && n.target && n.target.href || t
310 | , c = new Error("Loading CSS chunk " + e + " failed.\n(" + u + ": " + f + ")");
311 | c.name = "ChunkLoadError",
312 | c.code = "CSS_CHUNK_LOAD_FAILED",
313 | c.type = u,
314 | c.request = f,
315 | i.parentNode && i.parentNode.removeChild(i),
316 | o(c)
317 | }
318 | }
319 | ,
320 | i.href = t,
321 | n ? n.parentNode.insertBefore(i, n.nextSibling) : document.head.appendChild(i)
322 | }(e, o, null, t, n)
323 | }
324 | ))
325 | }
326 | , t = {
327 | 577: 0
328 | };
329 | f.f.miniCss = function(n, r) {
330 | t[n] ? r.push(t[n]) : 0 !== t[n] && {
331 | 41: 1,
332 | 92: 1,
333 | 94: 1,
334 | 256: 1,
335 | 290: 1,
336 | 334: 1,
337 | 337: 1,
338 | 398: 1,
339 | 406: 1,
340 | 426: 1,
341 | 464: 1,
342 | 540: 1,
343 | 588: 1,
344 | 763: 1,
345 | 895: 1,
346 | 898: 1
347 | }[n] && r.push(t[n] = e(n).then((function() {
348 | t[n] = 0
349 | }
350 | ), (function(e) {
351 | throw delete t[n],
352 | e
353 | }
354 | )))
355 | }
356 | }
357 | }(),
358 | function() {
359 | var e = {
360 | 577: 0
361 | };
362 | f.f.j = function(t, n) {
363 | var r = f.o(e, t) ? e[t] : void 0;
364 | if (0 !== r)
365 | if (r)
366 | n.push(r[2]);
367 | else if (577 != t) {
368 | var o = new Promise((function(n, o) {
369 | r = e[t] = [n, o]
370 | }
371 | ));
372 | n.push(r[2] = o);
373 | var i = f.p + f.u(t)
374 | , u = new Error;
375 | f.l(i, (function(n) {
376 | if (f.o(e, t) && (0 !== (r = e[t]) && (e[t] = void 0),
377 | r)) {
378 | var o = n && ("load" === n.type ? "missing" : n.type)
379 | , i = n && n.target && n.target.src;
380 | u.message = "Loading chunk " + t + " failed.\n(" + o + ": " + i + ")",
381 | u.name = "ChunkLoadError",
382 | u.type = o,
383 | u.request = i,
384 | r[1](u)
385 | }
386 | }
387 | ), "chunk-" + t, t)
388 | } else
389 | e[t] = 0
390 | }
391 | ,
392 | f.O.j = function(t) {
393 | return 0 === e[t]
394 | }
395 | ;
396 | var t = function(t, n) {
397 | var r, o, i = n[0], u = n[1], c = n[2], a = 0;
398 | if (i.some((function(t) {
399 | return 0 !== e[t]
400 | }
401 | ))) {
402 | for (r in u)
403 | f.o(u, r) && (f.m[r] = u[r]);
404 | if (c)
405 | var d = c(f)
406 | }
407 | for (t && t(n); a < i.length; a++)
408 | o = i[a],
409 | f.o(e, o) && e[o] && e[o][0](),
410 | e[o] = 0;
411 | return f.O(d)
412 | }
413 | , n = self.webpackChunkxhs_pc_web = self.webpackChunkxhs_pc_web || [];
414 | n.forEach(t.bind(null, 0)),
415 | n.push = t.bind(null, n.push.bind(n))
416 | }()
417 | zc666 = f;
418 | }();
419 | //# sourceMappingURL=https://picasso-private-1251524319.cos.ap-shanghai.myqcloud.com/data/formula-static/formula/xhs-pc-web/runtime-main.8718828.js.map
420 | try {
421 | require('./xhs_xray_pack1.js');
422 | } catch (e) {
423 | try {
424 | require('../static/xhs_xray_pack1.js');
425 | } catch (e) {
426 | require('./static/xhs_xray_pack1.js');
427 | }
428 | }
429 | try {
430 | require('./xhs_xray_pack2.js');
431 | } catch (e) {
432 | try {
433 | require('../static/xhs_xray_pack2.js');
434 | } catch (e) {
435 | require('./static/xhs_xray_pack2.js');
436 | }
437 | }
438 | var n = zc666(36497)
439 | , o = zc666(609)
440 | , i = zc666(2030);
441 | var a = zc666(81422)
442 | , u = zc666(49600);
443 |
444 | traceId = function() {
445 | var t, e, r, s = arguments.length > 0 && void 0 !== arguments[0] ? arguments[0] : i();
446 | return o(t = "".concat(n(e = u.fromNumber(s, !0).shiftLeft(23).or(a.Int.seq()).toString(16)).call(e, 16, "0"))).call(t, n(r = new u(a.Int.random(32),a.Int.random(32),!0).toString(16)).call(r, 16, "0"))
447 | }
448 |
--------------------------------------------------------------------------------
/xhs_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/xhs_utils/__init__.py
--------------------------------------------------------------------------------
/xhs_utils/common_utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | from loguru import logger
3 | from dotenv import load_dotenv
4 |
5 | def load_env():
6 | load_dotenv()
7 | cookies_str = os.getenv('COOKIES')
8 | return cookies_str
9 |
10 | def init():
11 | media_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../datas/media_datas'))
12 | excel_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../datas/excel_datas'))
13 | for base_path in [media_base_path, excel_base_path]:
14 | if not os.path.exists(base_path):
15 | os.makedirs(base_path)
16 | logger.info(f'创建目录 {base_path}')
17 | cookies_str = load_env()
18 | base_path = {
19 | 'media': media_base_path,
20 | 'excel': excel_base_path,
21 | }
22 | return cookies_str, base_path
23 |
--------------------------------------------------------------------------------
/xhs_utils/cookie_util.py:
--------------------------------------------------------------------------------
1 | def trans_cookies(cookies_str):
2 | if '; ' in cookies_str:
3 | ck = {i.split('=')[0]: '='.join(i.split('=')[1:]) for i in cookies_str.split('; ')}
4 | else:
5 | ck = {i.split('=')[0]: '='.join(i.split('=')[1:]) for i in cookies_str.split(';')}
6 | return ck
7 |
--------------------------------------------------------------------------------
/xhs_utils/data_util.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import re
4 | import time
5 | import openpyxl
6 | import requests
7 | from loguru import logger
8 | from retry import retry
9 |
10 |
11 | def norm_str(str):
12 | new_str = re.sub(r"|[\\/:*?\"<>| ]+", "", str).replace('\n', '').replace('\r', '')
13 | return new_str
14 |
15 | def norm_text(text):
16 | ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]')
17 | text = ILLEGAL_CHARACTERS_RE.sub(r'', text)
18 | return text
19 |
20 |
21 | def timestamp_to_str(timestamp):
22 | time_local = time.localtime(timestamp / 1000)
23 | dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local)
24 | return dt
25 |
26 | def handle_user_info(data, user_id):
27 | home_url = f'https://www.xiaohongshu.com/user/profile/{user_id}'
28 | nickname = data['basic_info']['nickname']
29 | avatar = data['basic_info']['imageb']
30 | red_id = data['basic_info']['red_id']
31 | gender = data['basic_info']['gender']
32 | if gender == 0:
33 | gender = '男'
34 | elif gender == 1:
35 | gender = '女'
36 | else:
37 | gender = '未知'
38 | ip_location = data['basic_info']['ip_location']
39 | desc = data['basic_info']['desc']
40 | follows = data['interactions'][0]['count']
41 | fans = data['interactions'][1]['count']
42 | interaction = data['interactions'][2]['count']
43 | tags_temp = data['tags']
44 | tags = []
45 | for tag in tags_temp:
46 | try:
47 | tags.append(tag['name'])
48 | except:
49 | pass
50 | return {
51 | 'user_id': user_id,
52 | 'home_url': home_url,
53 | 'nickname': nickname,
54 | 'avatar': avatar,
55 | 'red_id': red_id,
56 | 'gender': gender,
57 | 'ip_location': ip_location,
58 | 'desc': desc,
59 | 'follows': follows,
60 | 'fans': fans,
61 | 'interaction': interaction,
62 | 'tags': tags,
63 | }
64 |
65 | def handle_note_info(data):
66 | note_id = data['id']
67 | note_url = data['url']
68 | note_type = data['note_card']['type']
69 | if note_type == 'normal':
70 | note_type = '图集'
71 | else:
72 | note_type = '视频'
73 | user_id = data['note_card']['user']['user_id']
74 | home_url = f'https://www.xiaohongshu.com/user/profile/{user_id}'
75 | nickname = data['note_card']['user']['nickname']
76 | avatar = data['note_card']['user']['avatar']
77 | title = data['note_card']['title']
78 | if title.strip() == '':
79 | title = f'无标题'
80 | desc = data['note_card']['desc']
81 | liked_count = data['note_card']['interact_info']['liked_count']
82 | collected_count = data['note_card']['interact_info']['collected_count']
83 | comment_count = data['note_card']['interact_info']['comment_count']
84 | share_count = data['note_card']['interact_info']['share_count']
85 | image_list_temp = data['note_card']['image_list']
86 | image_list = []
87 | for image in image_list_temp:
88 | try:
89 | image_list.append(image['info_list'][1]['url'])
90 | # success, msg, img_url = XHS_Apis.get_note_no_water_img(image['info_list'][1]['url'])
91 | # image_list.append(img_url)
92 | except:
93 | pass
94 | if note_type == '视频':
95 | video_cover = image_list[0]
96 | video_addr = 'https://sns-video-bd.xhscdn.com/' + data['note_card']['video']['consumer']['origin_video_key']
97 | # success, msg, video_addr = XHS_Apis.get_note_no_water_video(note_id)
98 | else:
99 | video_cover = None
100 | video_addr = None
101 | tags_temp = data['note_card']['tag_list']
102 | tags = []
103 | for tag in tags_temp:
104 | try:
105 | tags.append(tag['name'])
106 | except:
107 | pass
108 | upload_time = timestamp_to_str(data['note_card']['time'])
109 | if 'ip_location' in data['note_card']:
110 | ip_location = data['note_card']['ip_location']
111 | else:
112 | ip_location = '未知'
113 | return {
114 | 'note_id': note_id,
115 | 'note_url': note_url,
116 | 'note_type': note_type,
117 | 'user_id': user_id,
118 | 'home_url': home_url,
119 | 'nickname': nickname,
120 | 'avatar': avatar,
121 | 'title': title,
122 | 'desc': desc,
123 | 'liked_count': liked_count,
124 | 'collected_count': collected_count,
125 | 'comment_count': comment_count,
126 | 'share_count': share_count,
127 | 'video_cover': video_cover,
128 | 'video_addr': video_addr,
129 | 'image_list': image_list,
130 | 'tags': tags,
131 | 'upload_time': upload_time,
132 | 'ip_location': ip_location,
133 | }
134 |
135 | def handle_comment_info(data):
136 | note_id = data['note_id']
137 | note_url = data['note_url']
138 | comment_id = data['id']
139 | user_id = data['user_info']['user_id']
140 | home_url = f'https://www.xiaohongshu.com/user/profile/{user_id}'
141 | nickname = data['user_info']['nickname']
142 | avatar = data['user_info']['image']
143 | content = data['content']
144 | show_tags = data['show_tags']
145 | like_count = data['like_count']
146 | upload_time = timestamp_to_str(data['create_time'])
147 | try:
148 | ip_location = data['ip_location']
149 | except:
150 | ip_location = '未知'
151 | pictures = []
152 | try:
153 | pictures_temp = data['pictures']
154 | for picture in pictures_temp:
155 | try:
156 | pictures.append(picture['info_list'][1]['url'])
157 | # success, msg, img_url = XHS_Apis.get_note_no_water_img(picture['info_list'][1]['url'])
158 | # pictures.append(img_url)
159 | except:
160 | pass
161 | except:
162 | pass
163 | return {
164 | 'note_id': note_id,
165 | 'note_url': note_url,
166 | 'comment_id': comment_id,
167 | 'user_id': user_id,
168 | 'home_url': home_url,
169 | 'nickname': nickname,
170 | 'avatar': avatar,
171 | 'content': content,
172 | 'show_tags': show_tags,
173 | 'like_count': like_count,
174 | 'upload_time': upload_time,
175 | 'ip_location': ip_location,
176 | 'pictures': pictures,
177 | }
178 | def save_to_xlsx(datas, file_path, type='note'):
179 | wb = openpyxl.Workbook()
180 | ws = wb.active
181 | if type == 'note':
182 | headers = ['笔记id', '笔记url', '笔记类型', '用户id', '用户主页url', '昵称', '头像url', '标题', '描述', '点赞数量', '收藏数量', '评论数量', '分享数量', '视频封面url', '视频地址url', '图片地址url列表', '标签', '上传时间', 'ip归属地']
183 | elif type == 'user':
184 | headers = ['用户id', '用户主页url', '用户名', '头像url', '小红书号', '性别', 'ip地址', '介绍', '关注数量', '粉丝数量', '作品被赞和收藏数量', '标签']
185 | else:
186 | headers = ['笔记id', '笔记url', '评论id', '用户id', '用户主页url', '昵称', '头像url', '评论内容', '评论标签', '点赞数量', '上传时间', 'ip归属地', '图片地址url列表']
187 | ws.append(headers)
188 | for data in datas:
189 | data = {k: norm_text(str(v)) for k, v in data.items()}
190 | ws.append(list(data.values()))
191 | wb.save(file_path)
192 | logger.info(f'数据保存至 {file_path}')
193 |
194 | def download_media(path, name, url, type):
195 | if type == 'image':
196 | content = requests.get(url).content
197 | with open(path + '/' + name + '.jpg', mode="wb") as f:
198 | f.write(content)
199 | elif type == 'video':
200 | res = requests.get(url, stream=True)
201 | size = 0
202 | chunk_size = 1024 * 1024
203 | with open(path + '/' + name + '.mp4', mode="wb") as f:
204 | for data in res.iter_content(chunk_size=chunk_size):
205 | f.write(data)
206 | size += len(data)
207 |
208 | def save_user_detail(user, path):
209 | with open(f'{path}/detail.txt', mode="w", encoding="utf-8") as f:
210 | # 逐行输出到txt里
211 | f.write(f"用户id: {user['user_id']}\n")
212 | f.write(f"用户主页url: {user['home_url']}\n")
213 | f.write(f"用户名: {user['nickname']}\n")
214 | f.write(f"头像url: {user['avatar']}\n")
215 | f.write(f"小红书号: {user['red_id']}\n")
216 | f.write(f"性别: {user['gender']}\n")
217 | f.write(f"ip地址: {user['ip_location']}\n")
218 | f.write(f"介绍: {user['desc']}\n")
219 | f.write(f"关注数量: {user['follows']}\n")
220 | f.write(f"粉丝数量: {user['fans']}\n")
221 | f.write(f"作品被赞和收藏数量: {user['interaction']}\n")
222 | f.write(f"标签: {user['tags']}\n")
223 |
224 | def save_note_detail(note, path):
225 | with open(f'{path}/detail.txt', mode="w", encoding="utf-8") as f:
226 | # 逐行输出到txt里
227 | f.write(f"笔记id: {note['note_id']}\n")
228 | f.write(f"笔记url: {note['note_url']}\n")
229 | f.write(f"笔记类型: {note['note_type']}\n")
230 | f.write(f"用户id: {note['user_id']}\n")
231 | f.write(f"用户主页url: {note['home_url']}\n")
232 | f.write(f"昵称: {note['nickname']}\n")
233 | f.write(f"头像url: {note['avatar']}\n")
234 | f.write(f"标题: {note['title']}\n")
235 | f.write(f"描述: {note['desc']}\n")
236 | f.write(f"点赞数量: {note['liked_count']}\n")
237 | f.write(f"收藏数量: {note['collected_count']}\n")
238 | f.write(f"评论数量: {note['comment_count']}\n")
239 | f.write(f"分享数量: {note['share_count']}\n")
240 | f.write(f"视频封面url: {note['video_cover']}\n")
241 | f.write(f"视频地址url: {note['video_addr']}\n")
242 | f.write(f"图片地址url列表: {note['image_list']}\n")
243 | f.write(f"标签: {note['tags']}\n")
244 | f.write(f"上传时间: {note['upload_time']}\n")
245 | f.write(f"ip归属地: {note['ip_location']}\n")
246 |
247 |
248 |
249 | @retry(tries=3, delay=1)
250 | def download_note(note_info, path):
251 | note_id = note_info['note_id']
252 | user_id = note_info['user_id']
253 | title = note_info['title']
254 | title = norm_str(title)
255 | nickname = note_info['nickname']
256 | nickname = norm_str(nickname)
257 | if title.strip() == '':
258 | title = f'无标题'
259 | save_path = f'{path}/{nickname}_{user_id}/{title}_{note_id}'
260 | check_and_create_path(save_path)
261 | with open(f'{save_path}/info.json', mode='w', encoding='utf-8') as f:
262 | f.write(json.dumps(note_info) + '\n')
263 | note_type = note_info['note_type']
264 | save_note_detail(note_info, save_path)
265 | if note_type == '图集':
266 | for img_index, img_url in enumerate(note_info['image_list']):
267 | download_media(save_path, f'image_{img_index}', img_url, 'image')
268 | elif note_type == '视频':
269 | download_media(save_path, 'cover', note_info['video_cover'], 'image')
270 | download_media(save_path, 'video', note_info['video_addr'], 'video')
271 | return save_path
272 |
273 |
274 | def check_and_create_path(path):
275 | if not os.path.exists(path):
276 | os.makedirs(path)
277 |
--------------------------------------------------------------------------------
/xhs_utils/xhs_util.py:
--------------------------------------------------------------------------------
1 | import json
2 | import math
3 | import random
4 | import execjs
5 | from xhs_utils.cookie_util import trans_cookies
6 |
7 | try:
8 | js = execjs.compile(open(r'../static/xhs_xs_xsc_56.js', 'r', encoding='utf-8').read())
9 | except:
10 | js = execjs.compile(open(r'static/xhs_xs_xsc_56.js', 'r', encoding='utf-8').read())
11 |
12 | try:
13 | xray_js = execjs.compile(open(r'../static/xhs_xray.js', 'r', encoding='utf-8').read())
14 | except:
15 | xray_js = execjs.compile(open(r'static/xhs_xray.js', 'r', encoding='utf-8').read())
16 |
17 | def generate_x_b3_traceid(len=16):
18 | x_b3_traceid = ""
19 | for t in range(len):
20 | x_b3_traceid += "abcdef0123456789"[math.floor(16 * random.random())]
21 | return x_b3_traceid
22 |
23 | def generate_xs_xs_common(a1, api, data=''):
24 | ret = js.call('get_request_headers_params', api, data, a1)
25 | xs, xt, xs_common = ret['xs'], ret['xt'], ret['xs_common']
26 | return xs, xt, xs_common
27 |
28 | def generate_xs(a1, api, data=''):
29 | ret = js.call('get_xs', api, data, a1)
30 | xs, xt = ret['X-s'], ret['X-t']
31 | return xs, xt
32 |
33 | def generate_xray_traceid():
34 | return xray_js.call('traceId')
35 | def get_common_headers():
36 | return {
37 | "authority": "www.xiaohongshu.com",
38 | "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
39 | "accept-language": "zh-CN,zh;q=0.9",
40 | "cache-control": "no-cache",
41 | "pragma": "no-cache",
42 | "referer": "https://www.xiaohongshu.com/",
43 | "sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"",
44 | "sec-ch-ua-mobile": "?0",
45 | "sec-ch-ua-platform": "\"Windows\"",
46 | "sec-fetch-dest": "document",
47 | "sec-fetch-mode": "navigate",
48 | "sec-fetch-site": "same-origin",
49 | "sec-fetch-user": "?1",
50 | "upgrade-insecure-requests": "1",
51 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
52 | }
53 | def get_request_headers_template():
54 | return {
55 | "authority": "edith.xiaohongshu.com",
56 | "accept": "application/json, text/plain, */*",
57 | "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
58 | "cache-control": "no-cache",
59 | "content-type": "application/json;charset=UTF-8",
60 | "origin": "https://www.xiaohongshu.com",
61 | "pragma": "no-cache",
62 | "referer": "https://www.xiaohongshu.com/",
63 | "sec-ch-ua": "\"Not A(Brand\";v=\"99\", \"Microsoft Edge\";v=\"121\", \"Chromium\";v=\"121\"",
64 | "sec-ch-ua-mobile": "?0",
65 | "sec-ch-ua-platform": "\"Windows\"",
66 | "sec-fetch-dest": "empty",
67 | "sec-fetch-mode": "cors",
68 | "sec-fetch-site": "same-site",
69 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0",
70 | "x-b3-traceid": "",
71 | "x-s": "",
72 | "x-s-common": "",
73 | "x-t": "",
74 | "x-xray-traceid": generate_xray_traceid()
75 | }
76 |
77 | def generate_headers(a1, api, data=''):
78 | xs, xt, xs_common = generate_xs_xs_common(a1, api, data)
79 | x_b3_traceid = generate_x_b3_traceid()
80 | headers = get_request_headers_template()
81 | headers['x-s'] = xs
82 | headers['x-t'] = str(xt)
83 | headers['x-s-common'] = xs_common
84 | headers['x-b3-traceid'] = x_b3_traceid
85 | if data:
86 | data = json.dumps(data, separators=(',', ':'), ensure_ascii=False)
87 | return headers, data
88 |
89 | def generate_request_params(cookies_str, api, data=''):
90 | cookies = trans_cookies(cookies_str)
91 | a1 = cookies['a1']
92 | headers, data = generate_headers(a1, api, data)
93 | return headers, cookies, data
94 |
95 | def splice_str(api, params):
96 | url = api + '?'
97 | for key, value in params.items():
98 | if value is None:
99 | value = ''
100 | url += key + '=' + value + '&'
101 | return url[:-1]
102 |
103 |
--------------------------------------------------------------------------------