├── .env ├── .gitignore ├── Dockerfile ├── README.md ├── apis ├── __init__.py └── pc_apis.py ├── author ├── qq.jpg ├── wx.png ├── wx_pay.png └── zfb_pay.jpg ├── main.py ├── package-lock.json ├── package.json ├── requirements.txt ├── static ├── xhs_xray.js ├── xhs_xray_pack1.js ├── xhs_xray_pack2.js └── xhs_xs_xsc_56.js └── xhs_utils ├── __init__.py ├── common_utils.py ├── cookie_util.py ├── data_util.py └── xhs_util.py /.env: -------------------------------------------------------------------------------- 1 | COOKIES='' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | node_modules/ 3 | *.so 4 | .Python 5 | build/ 6 | develop-eggs/ 7 | dist/ 8 | downloads/ 9 | eggs/ 10 | .eggs/ 11 | lib/ 12 | lib64/ 13 | parts/ 14 | sdist/ 15 | var/ 16 | wheels/ 17 | MANIFEST 18 | *.manifest 19 | *.spec 20 | .cache 21 | *.log 22 | local_settings.py 23 | db.sqlite3 24 | __pypackages__/ 25 | .venv 26 | env/ 27 | venv/ 28 | ENV/ 29 | env.bak/ 30 | venv.bak/ 31 | 32 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | WORKDIR /app 4 | 5 | COPY . . 6 | 7 | RUN npm install 8 | RUN pip install --no-cache-dir -r requirements.txt 9 | 10 | # docker build -t spider_xhs . 11 | # docker run -it spider_xhs bash -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 | Spider_XHS logo 5 | 6 | 7 |

8 | 9 | 10 |
11 | 12 | Python 3.7+ 13 | 14 | 15 | NodeJS 18+ 16 | 17 |
18 | 19 | 20 | 21 | # Spider_XHS 22 | 23 | **✨ 专业的小红书数据采集解决方案,支持笔记爬取,保存格式为excel或者media** 24 | 25 | **✨ 小红书全域运营解决方法,AI一键改写笔记(图文,视频)直接上传** 26 | 27 | ## ⭐功能列表 28 | 29 | **⚠️ 任何涉及数据注入的操作都是不被允许的,本项目仅供学习交流使用,如有违反,后果自负** 30 | 31 | | 模块 | 已实现 | 32 | |----------|---------------------------------------------------------------------------------| 33 | | 小红书创作者平台 | ✅ 二维码登录
✅ 手机验证码登录
✅ 上传(图集、视频)作品
✅查看自己上传的作品 | 34 | | 小红书PC | ✅ 二维码登录
✅ 手机验证码登录
✅ 获取无水印图片
✅ 获取无水印视频
✅ 获取主页的所有频道
✅ 获取主页推荐笔记
✅ 获取某个用户的信息
✅ 用户自己的信息
✅ 获取某个用户上传的笔记
✅ 获取某个用户所有的喜欢笔记
✅ 获取某个用户所有的收藏笔记
✅ 获取某个笔记的详细内容
✅ 搜索笔记内容
✅ 搜索用户内容
✅ 获取某个笔记的评论
✅ 获取未读消息信息
✅ 获取收到的评论和@提醒信息
✅ 获取收到的点赞和收藏信息
✅ 获取新增关注信息| 35 | 36 | 37 | ## 🌟 功能特性 38 | 39 | - ✅ **多维度数据采集** 40 | - 用户主页信息 41 | - 笔记详细内容 42 | - 智能搜索结果抓取 43 | - 🚀 **高性能架构** 44 | - 自动重试机制 45 | - 🔒 **安全稳定** 46 | - 小红书最新API适配 47 | - 异常处理机制 48 | - proxy代理 49 | - 🎨 **便捷管理** 50 | - 结构化目录存储 51 | - 格式化输出(JSON/EXCEL/MEDIA) 52 | 53 | ## 🎨效果图 54 | ### 处理后的所有用户 55 | ![image](https://github.com/cv-cat/Spider_XHS/assets/94289429/00902dbd-4da1-45bc-90bb-19f5856a04ad) 56 | ### 某个用户所有的笔记 57 | ![image](https://github.com/cv-cat/Spider_XHS/assets/94289429/880884e8-4a1d-4dc1-a4dc-e168dd0e9896) 58 | ### 某个笔记具体的内容 59 | ![image](https://github.com/cv-cat/Spider_XHS/assets/94289429/d17f3f4e-cd44-4d3a-b9f6-d880da626cc8) 60 | ### 保存的excel 61 | ![image](https://github.com/user-attachments/assets/707f20ed-be27-4482-89b3-a5863bc360e7) 62 | 63 | ## 🛠️ 快速开始 64 | ### ⛳运行环境 65 | - Python 3.7+ 66 | - Node.js 18+ 67 | 68 | ### 🎯安装依赖 69 | ``` 70 | pip install -r requirements.txt 71 | npm install 72 | ``` 73 | 74 | ### 🎨配置文件 75 | 配置文件在项目根目录.env文件中,将下图自己的登录cookie放入其中,cookie获取➡️在浏览器f12打开控制台,点击网络,点击fetch,找一个接口点开 76 | ![image](https://github.com/user-attachments/assets/6a7e4ecb-0432-4581-890a-577e0eae463d) 77 | 78 | 复制cookie到.env文件中(注意!登录小红书后的cookie才是有效的,不登陆没有用) 79 | ![image](https://github.com/user-attachments/assets/5e62bc35-d758-463e-817c-7dcaacbee13c) 80 | 81 | ### 🚀运行项目 82 | ``` 83 | python main.py 84 | ``` 85 | 86 | ### 🗝️注意事项 87 | - main.py中的代码是爬虫的入口,可以根据自己的需求进行修改 88 | - apis/pc_apis.py中的代码包含了所有的api接口,可以根据自己的需求进行修改 89 | 90 | 91 | ## 🍥日志 92 | 93 | | 日期 | 说明 | 94 | |----------| --------------------------- | 95 | | 23/08/09 | - 首次提交 | 96 | | 23/09/13 | - api更改params增加两个字段,修复图片无法下载,有些页面无法访问导致报错 | 97 | | 23/09/16 | - 较大视频出现编码问题,修复视频编码问题,加入异常处理 | 98 | | 23/09/18 | - 代码重构,加入失败重试 | 99 | | 23/09/19 | - 新增下载搜索结果功能 | 100 | | 23/10/05 | - 新增跳过已下载功能,获取更详细的笔记和用户信息| 101 | | 23/10/08 | - 上传代码☞Pypi,可通过pip install安装本项目| 102 | | 23/10/17 | - 搜索下载新增排序方式选项(1、综合排序 2、热门排序 3、最新排序)| 103 | | 23/10/21 | - 新增图形化界面,上传至release v2.1.0| 104 | | 23/10/28 | - Fix Bug 修复搜索功能出现的隐藏问题| 105 | | 25/03/18 | - 更新API,修复部分问题| 106 | 107 | 108 | 109 | ## 🧸额外说明 110 | 1. 感谢star⭐和follow📰!不时更新 111 | 2. 作者的联系方式在主页里,有问题可以随时联系我 112 | 3. 可以关注下作者的其他项目,欢迎 PR 和 issue 113 | 4. 感谢赞助!如果此项目对您有帮助,请作者喝一杯奶茶~~ (开心一整天😊😊) 114 | 5. thank you~~~ 115 | 116 |
117 | 微信赞赏码  118 | 支付宝收款码 119 |
120 | 121 | 122 | ## 📈 Star 趋势 123 | 124 | 125 | 126 | 127 | Star History Chart 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /apis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/apis/__init__.py -------------------------------------------------------------------------------- /apis/pc_apis.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import json 3 | import re 4 | import urllib 5 | import requests 6 | from xhs_utils.xhs_util import splice_str, generate_request_params, generate_x_b3_traceid, get_common_headers 7 | from loguru import logger 8 | 9 | """ 10 | 获小红书的api 11 | :param cookies_str: 你的cookies 12 | """ 13 | class XHS_Apis(): 14 | def __init__(self): 15 | self.base_url = "https://edith.xiaohongshu.com" 16 | 17 | def get_homefeed_all_channel(self, cookies_str: str, proxies: dict = None): 18 | """ 19 | 获取主页的所有频道 20 | 返回主页的所有频道 21 | """ 22 | res_json = None 23 | try: 24 | api = "/api/sns/web/v1/homefeed/category" 25 | headers, cookies, data = generate_request_params(cookies_str, api) 26 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies) 27 | res_json = response.json() 28 | success, msg = res_json["success"], res_json["msg"] 29 | except Exception as e: 30 | success = False 31 | msg = str(e) 32 | return success, msg, res_json 33 | 34 | def get_homefeed_recommend(self, category, cursor_score, refresh_type, note_index, cookies_str: str, proxies: dict = None): 35 | """ 36 | 获取主页推荐的笔记 37 | :param category: 你想要获取的频道 38 | :param cursor_score: 你想要获取的笔记的cursor 39 | :param refresh_type: 你想要获取的笔记的刷新类型 40 | :param note_index: 你想要获取的笔记的index 41 | :param cookies_str: 你的cookies 42 | 返回主页推荐的笔记 43 | """ 44 | res_json = None 45 | try: 46 | api = f"/api/sns/web/v1/homefeed" 47 | data = { 48 | "cursor_score": cursor_score, 49 | "num": 20, 50 | "refresh_type": refresh_type, 51 | "note_index": note_index, 52 | "unread_begin_note_id": "", 53 | "unread_end_note_id": "", 54 | "unread_note_count": 0, 55 | "category": category, 56 | "search_key": "", 57 | "need_num": 10, 58 | "image_formats": [ 59 | "jpg", 60 | "webp", 61 | "avif" 62 | ], 63 | "need_filter_image": False 64 | } 65 | headers, cookies, trans_data = generate_request_params(cookies_str, api, data) 66 | response = requests.post(self.base_url + api, headers=headers, data=trans_data, cookies=cookies, proxies=proxies) 67 | res_json = response.json() 68 | success, msg = res_json["success"], res_json["msg"] 69 | except Exception as e: 70 | success = False 71 | msg = str(e) 72 | return success, msg, res_json 73 | 74 | def get_homefeed_recommend_by_num(self, category, require_num, cookies_str: str, proxies: dict = None): 75 | """ 76 | 根据数量获取主页推荐的笔记 77 | :param category: 你想要获取的频道 78 | :param require_num: 你想要获取的笔记的数量 79 | :param cookies_str: 你的cookies 80 | 根据数量返回主页推荐的笔记 81 | """ 82 | cursor_score, refresh_type, note_index = "", 1, 0 83 | note_list = [] 84 | try: 85 | while True: 86 | success, msg, res_json = self.get_homefeed_recommend(category, cursor_score, refresh_type, note_index, cookies_str, proxies) 87 | if not success: 88 | raise Exception(msg) 89 | if "items" not in res_json["data"]: 90 | break 91 | notes = res_json["data"]["items"] 92 | note_list.extend(notes) 93 | cursor_score = res_json["data"]["cursor_score"] 94 | refresh_type = 3 95 | note_index += 20 96 | if len(note_list) > require_num: 97 | break 98 | except Exception as e: 99 | success = False 100 | msg = str(e) 101 | if len(note_list) > require_num: 102 | note_list = note_list[:require_num] 103 | return success, msg, note_list 104 | 105 | def get_user_info(self, user_id: str, cookies_str: str, proxies: dict = None): 106 | """ 107 | 获取用户的信息 108 | :param user_id: 你想要获取的用户的id 109 | :param cookies_str: 你的cookies 110 | 返回用户的信息 111 | """ 112 | res_json = None 113 | try: 114 | api = f"/api/sns/web/v1/user/otherinfo" 115 | params = { 116 | "target_user_id": user_id 117 | } 118 | splice_api = splice_str(api, params) 119 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 120 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 121 | res_json = response.json() 122 | success, msg = res_json["success"], res_json["msg"] 123 | except Exception as e: 124 | success = False 125 | msg = str(e) 126 | return success, msg, res_json 127 | 128 | def get_user_self_info(self, cookies_str: str, proxies: dict = None): 129 | """ 130 | 获取用户自己的信息1 131 | :param cookies_str: 你的cookies 132 | 返回用户自己的信息1 133 | """ 134 | res_json = None 135 | try: 136 | api = f"/api/sns/web/v1/user/selfinfo" 137 | headers, cookies, data = generate_request_params(cookies_str, api) 138 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies) 139 | res_json = response.json() 140 | success, msg = res_json["success"], res_json["msg"] 141 | except Exception as e: 142 | success = False 143 | msg = str(e) 144 | return success, msg, res_json 145 | 146 | 147 | def get_user_self_info2(self, cookies_str: str, proxies: dict = None): 148 | """ 149 | 获取用户自己的信息2 150 | :param cookies_str: 你的cookies 151 | 返回用户自己的信息2 152 | """ 153 | res_json = None 154 | try: 155 | api = f"/api/sns/web/v2/user/me" 156 | headers, cookies, data = generate_request_params(cookies_str, api) 157 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies) 158 | res_json = response.json() 159 | success, msg = res_json["success"], res_json["msg"] 160 | except Exception as e: 161 | success = False 162 | msg = str(e) 163 | return success, msg, res_json 164 | 165 | def get_user_note_info(self, user_id: str, cursor: str, cookies_str: str, xsec_token='', xsec_source='', proxies: dict = None): 166 | """ 167 | 获取用户指定位置的笔记 168 | :param user_id: 你想要获取的用户的id 169 | :param cursor: 你想要获取的笔记的cursor 170 | :param cookies_str: 你的cookies 171 | 返回用户指定位置的笔记 172 | """ 173 | res_json = None 174 | try: 175 | api = f"/api/sns/web/v1/user_posted" 176 | params = { 177 | "num": "30", 178 | "cursor": cursor, 179 | "user_id": user_id, 180 | "image_formats": "jpg,webp,avif", 181 | "xsec_token": xsec_token, 182 | "xsec_source": xsec_source, 183 | } 184 | splice_api = splice_str(api, params) 185 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 186 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 187 | res_json = response.json() 188 | success, msg = res_json["success"], res_json["msg"] 189 | except Exception as e: 190 | success = False 191 | msg = str(e) 192 | return success, msg, res_json 193 | 194 | 195 | def get_user_all_notes(self, user_url: str, cookies_str: str, proxies: dict = None): 196 | """ 197 | 获取用户所有笔记 198 | :param user_id: 你想要获取的用户的id 199 | :param cookies_str: 你的cookies 200 | 返回用户的所有笔记 201 | """ 202 | cursor = '' 203 | note_list = [] 204 | try: 205 | urlParse = urllib.parse.urlparse(user_url) 206 | user_id = urlParse.path.split("/")[-1] 207 | kvs = urlParse.query.split('&') 208 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs} 209 | xsec_token = kvDist['xsec_token'] if 'xsec_token' in kvDist else "" 210 | xsec_source = kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_search" 211 | while True: 212 | success, msg, res_json = self.get_user_note_info(user_id, cursor, cookies_str, xsec_token, xsec_source, proxies) 213 | if not success: 214 | raise Exception(msg) 215 | notes = res_json["data"]["notes"] 216 | if 'cursor' in res_json["data"]: 217 | cursor = str(res_json["data"]["cursor"]) 218 | else: 219 | break 220 | note_list.extend(notes) 221 | if len(notes) == 0 or not res_json["data"]["has_more"]: 222 | break 223 | except Exception as e: 224 | success = False 225 | msg = str(e) 226 | return success, msg, note_list 227 | 228 | def get_user_like_note_info(self, user_id: str, cursor: str, cookies_str: str, xsec_token='', xsec_source='', proxies: dict = None): 229 | """ 230 | 获取用户指定位置喜欢的笔记 231 | :param user_id: 你想要获取的用户的id 232 | :param cursor: 你想要获取的笔记的cursor 233 | :param cookies_str: 你的cookies 234 | 返回用户指定位置喜欢的笔记 235 | """ 236 | res_json = None 237 | try: 238 | api = f"/api/sns/web/v1/note/like/page" 239 | params = { 240 | "num": "30", 241 | "cursor": cursor, 242 | "user_id": user_id, 243 | "image_formats": "jpg,webp,avif", 244 | "xsec_token": xsec_token, 245 | "xsec_source": xsec_source, 246 | } 247 | splice_api = splice_str(api, params) 248 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 249 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 250 | res_json = response.json() 251 | success, msg = res_json["success"], res_json["msg"] 252 | except Exception as e: 253 | success = False 254 | msg = str(e) 255 | return success, msg, res_json 256 | 257 | def get_user_all_like_note_info(self, user_url: str, cookies_str: str, proxies: dict = None): 258 | """ 259 | 获取用户所有喜欢笔记 260 | :param user_id: 你想要获取的用户的id 261 | :param cookies_str: 你的cookies 262 | 返回用户的所有喜欢笔记 263 | """ 264 | cursor = '' 265 | note_list = [] 266 | try: 267 | urlParse = urllib.parse.urlparse(user_url) 268 | user_id = urlParse.path.split("/")[-1] 269 | kvs = urlParse.query.split('&') 270 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs} 271 | xsec_token = kvDist['xsec_token'] if 'xsec_token' in kvDist else "" 272 | xsec_source = kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_user" 273 | while True: 274 | success, msg, res_json = self.get_user_like_note_info(user_id, cursor, cookies_str, xsec_token, 275 | xsec_source, proxies) 276 | if not success: 277 | raise Exception(msg) 278 | notes = res_json["data"]["notes"] 279 | if 'cursor' in res_json["data"]: 280 | cursor = str(res_json["data"]["cursor"]) 281 | else: 282 | break 283 | note_list.extend(notes) 284 | if len(notes) == 0 or not res_json["data"]["has_more"]: 285 | break 286 | except Exception as e: 287 | success = False 288 | msg = str(e) 289 | return success, msg, note_list 290 | 291 | def get_user_collect_note_info(self, user_id: str, cursor: str, cookies_str: str, xsec_token='', xsec_source='', proxies: dict = None): 292 | """ 293 | 获取用户指定位置收藏的笔记 294 | :param user_id: 你想要获取的用户的id 295 | :param cursor: 你想要获取的笔记的cursor 296 | :param cookies_str: 你的cookies 297 | 返回用户指定位置收藏的笔记 298 | """ 299 | res_json = None 300 | try: 301 | api = f"/api/sns/web/v2/note/collect/page" 302 | params = { 303 | "num": "30", 304 | "cursor": cursor, 305 | "user_id": user_id, 306 | "image_formats": "jpg,webp,avif", 307 | "xsec_token": xsec_token, 308 | "xsec_source": xsec_source, 309 | } 310 | splice_api = splice_str(api, params) 311 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 312 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 313 | res_json = response.json() 314 | success, msg = res_json["success"], res_json["msg"] 315 | except Exception as e: 316 | success = False 317 | msg = str(e) 318 | return success, msg, res_json 319 | 320 | def get_user_all_collect_note_info(self, user_url: str, cookies_str: str, proxies: dict = None): 321 | """ 322 | 获取用户所有收藏笔记 323 | :param user_id: 你想要获取的用户的id 324 | :param cookies_str: 你的cookies 325 | 返回用户的所有收藏笔记 326 | """ 327 | cursor = '' 328 | note_list = [] 329 | try: 330 | urlParse = urllib.parse.urlparse(user_url) 331 | user_id = urlParse.path.split("/")[-1] 332 | kvs = urlParse.query.split('&') 333 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs} 334 | xsec_token = kvDist['xsec_token'] if 'xsec_token' in kvDist else "" 335 | xsec_source = kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_search" 336 | while True: 337 | success, msg, res_json = self.get_user_collect_note_info(user_id, cursor, cookies_str, xsec_token, 338 | xsec_source, proxies) 339 | if not success: 340 | raise Exception(msg) 341 | notes = res_json["data"]["notes"] 342 | if 'cursor' in res_json["data"]: 343 | cursor = str(res_json["data"]["cursor"]) 344 | else: 345 | break 346 | note_list.extend(notes) 347 | if len(notes) == 0 or not res_json["data"]["has_more"]: 348 | break 349 | except Exception as e: 350 | success = False 351 | msg = str(e) 352 | return success, msg, note_list 353 | 354 | def get_note_info(self, url: str, cookies_str: str, proxies: dict = None): 355 | """ 356 | 获取笔记的详细 357 | :param url: 你想要获取的笔记的url 358 | :param cookies_str: 你的cookies 359 | :param xsec_source: 你的xsec_source 默认为pc_search pc_user pc_feed 360 | 返回笔记的详细 361 | """ 362 | res_json = None 363 | try: 364 | urlParse = urllib.parse.urlparse(url) 365 | note_id = urlParse.path.split("/")[-1] 366 | kvs = urlParse.query.split('&') 367 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs} 368 | api = f"/api/sns/web/v1/feed" 369 | data = { 370 | "source_note_id": note_id, 371 | "image_formats": [ 372 | "jpg", 373 | "webp", 374 | "avif" 375 | ], 376 | "extra": { 377 | "need_body_topic": "1" 378 | }, 379 | "xsec_source": kvDist['xsec_source'] if 'xsec_source' in kvDist else "pc_search", 380 | "xsec_token": kvDist['xsec_token'] 381 | } 382 | headers, cookies, data = generate_request_params(cookies_str, api, data) 383 | response = requests.post(self.base_url + api, headers=headers, data=data, cookies=cookies, proxies=proxies) 384 | res_json = response.json() 385 | success, msg = res_json["success"], res_json["msg"] 386 | except Exception as e: 387 | success = False 388 | msg = str(e) 389 | return success, msg, res_json 390 | 391 | 392 | def get_search_keyword(self, word: str, cookies_str: str, proxies: dict = None): 393 | """ 394 | 获取搜索关键词 395 | :param word: 你的关键词 396 | :param cookies_str: 你的cookies 397 | 返回搜索关键词 398 | """ 399 | res_json = None 400 | try: 401 | api = "/api/sns/web/v1/search/recommend" 402 | params = { 403 | "keyword": urllib.parse.quote(word) 404 | } 405 | splice_api = splice_str(api, params) 406 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 407 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 408 | res_json = response.json() 409 | success, msg = res_json["success"], res_json["msg"] 410 | except Exception as e: 411 | success = False 412 | msg = str(e) 413 | return success, msg, res_json 414 | 415 | def search_note(self, query: str, cookies_str: str, page=1, sort="general", note_type=0, proxies: dict = None): 416 | """ 417 | 获取搜索笔记的结果 418 | :param query 搜索的关键词 419 | :param cookies_str 你的cookies 420 | :param page 搜索的页数 421 | :param sort 排序方式 general:综合排序, time_descending:时间排序, popularity_descending:热度排序 422 | :param note_type 笔记类型 0:全部, 1:视频, 2:图文 423 | 返回搜索的结果 424 | """ 425 | res_json = None 426 | try: 427 | api = "/api/sns/web/v1/search/notes" 428 | data = { 429 | "keyword": query, 430 | "page": page, 431 | "page_size": 20, 432 | "search_id": generate_x_b3_traceid(21), 433 | "sort": sort, 434 | "note_type": note_type, 435 | "ext_flags": [], 436 | "image_formats": [ 437 | "jpg", 438 | "webp", 439 | "avif" 440 | ] 441 | } 442 | headers, cookies, data = generate_request_params(cookies_str, api, data) 443 | response = requests.post(self.base_url + api, headers=headers, data=data.encode('utf-8'), cookies=cookies, proxies=proxies) 444 | res_json = response.json() 445 | success, msg = res_json["success"], res_json["msg"] 446 | except Exception as e: 447 | success = False 448 | msg = str(e) 449 | return success, msg, res_json 450 | 451 | def search_some_note(self, query: str, require_num: int, cookies_str: str, sort="general", note_type=0, proxies: dict = None): 452 | """ 453 | 指定数量搜索笔记,设置排序方式和笔记类型和笔记数量 454 | :param query 搜索的关键词 455 | :param require_num 搜索的数量 456 | :param cookies_str 你的cookies 457 | :param sort 排序方式 general:综合排序, time_descending:时间排序, popularity_descending:热度排序 458 | :param note_type 笔记类型 0:全部, 1:视频, 2:图文 459 | 返回搜索的结果 460 | """ 461 | page = 1 462 | note_list = [] 463 | try: 464 | while True: 465 | success, msg, res_json = self.search_note(query, cookies_str, page, sort, note_type, proxies) 466 | if not success: 467 | raise Exception(msg) 468 | if "items" not in res_json["data"]: 469 | break 470 | notes = res_json["data"]["items"] 471 | note_list.extend(notes) 472 | page += 1 473 | if len(note_list) >= require_num or not res_json["data"]["has_more"]: 474 | break 475 | except Exception as e: 476 | success = False 477 | msg = str(e) 478 | if len(note_list) > require_num: 479 | note_list = note_list[:require_num] 480 | return success, msg, note_list 481 | 482 | def search_user(self, query: str, cookies_str: str, page=1, proxies: dict = None): 483 | """ 484 | 获取搜索用户的结果 485 | :param query 搜索的关键词 486 | :param cookies_str 你的cookies 487 | :param page 搜索的页数 488 | 返回搜索的结果 489 | """ 490 | res_json = None 491 | try: 492 | api = "/api/sns/web/v1/search/usersearch" 493 | data = { 494 | "search_user_request": { 495 | "keyword": query, 496 | "search_id": "2dn9they1jbjxwawlo4xd", 497 | "page": page, 498 | "page_size": 15, 499 | "biz_type": "web_search_user", 500 | "request_id": "22471139-1723999898524" 501 | } 502 | } 503 | headers, cookies, data = generate_request_params(cookies_str, api, data) 504 | response = requests.post(self.base_url + api, headers=headers, data=data.encode('utf-8'), cookies=cookies, proxies=proxies) 505 | res_json = response.json() 506 | success, msg = res_json["success"], res_json["msg"] 507 | except Exception as e: 508 | success = False 509 | msg = str(e) 510 | return success, msg, res_json 511 | 512 | def search_some_user(self, query: str, require_num: int, cookies_str: str, proxies: dict = None): 513 | """ 514 | 指定数量搜索用户 515 | :param query 搜索的关键词 516 | :param require_num 搜索的数量 517 | :param cookies_str 你的cookies 518 | 返回搜索的结果 519 | """ 520 | page = 1 521 | user_list = [] 522 | try: 523 | while True: 524 | success, msg, res_json = self.search_user(query, cookies_str, page, proxies) 525 | if not success: 526 | raise Exception(msg) 527 | if "users" not in res_json["data"]: 528 | break 529 | users = res_json["data"]["users"] 530 | user_list.extend(users) 531 | page += 1 532 | if len(user_list) >= require_num or not res_json["data"]["has_more"]: 533 | break 534 | except Exception as e: 535 | success = False 536 | msg = str(e) 537 | if len(user_list) > require_num: 538 | user_list = user_list[:require_num] 539 | return success, msg, user_list 540 | 541 | def get_note_out_comment(self, note_id: str, cursor: str, xsec_token: str, cookies_str: str, proxies: dict = None): 542 | """ 543 | 获取指定位置的笔记一级评论 544 | :param note_id 笔记的id 545 | :param cursor 指定位置的评论的cursor 546 | :param cookies_str 你的cookies 547 | 返回指定位置的笔记一级评论 548 | """ 549 | res_json = None 550 | try: 551 | api = "/api/sns/web/v2/comment/page" 552 | params = { 553 | "note_id": note_id, 554 | "cursor": cursor, 555 | "top_comment_id": "", 556 | "image_formats": "jpg,webp,avif", 557 | "xsec_token": xsec_token 558 | } 559 | splice_api = splice_str(api, params) 560 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 561 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 562 | res_json = response.json() 563 | success, msg = res_json["success"], res_json["msg"] 564 | except Exception as e: 565 | success = False 566 | msg = str(e) 567 | return success, msg, res_json 568 | 569 | def get_note_all_out_comment(self, note_id: str, xsec_token: str, cookies_str: str, proxies: dict = None): 570 | """ 571 | 获取笔记的全部一级评论 572 | :param note_id 笔记的id 573 | :param cookies_str 你的cookies 574 | 返回笔记的全部一级评论 575 | """ 576 | cursor = '' 577 | note_out_comment_list = [] 578 | try: 579 | while True: 580 | success, msg, res_json = self.get_note_out_comment(note_id, cursor, xsec_token, cookies_str, proxies) 581 | if not success: 582 | raise Exception(msg) 583 | comments = res_json["data"]["comments"] 584 | if 'cursor' in res_json["data"]: 585 | cursor = str(res_json["data"]["cursor"]) 586 | else: 587 | break 588 | note_out_comment_list.extend(comments) 589 | if len(note_out_comment_list) == 0 or not res_json["data"]["has_more"]: 590 | break 591 | except Exception as e: 592 | success = False 593 | msg = str(e) 594 | return success, msg, note_out_comment_list 595 | 596 | def get_note_inner_comment(self, comment: dict, cursor: str, xsec_token: str, cookies_str: str, proxies: dict = None): 597 | """ 598 | 获取指定位置的笔记二级评论 599 | :param comment 笔记的一级评论 600 | :param cursor 指定位置的评论的cursor 601 | :param cookies_str 你的cookies 602 | 返回指定位置的笔记二级评论 603 | """ 604 | res_json = None 605 | try: 606 | api = "/api/sns/web/v2/comment/sub/page" 607 | params = { 608 | "note_id": comment['note_id'], 609 | "root_comment_id": comment['id'], 610 | "num": "10", 611 | "cursor": cursor, 612 | "image_formats": "jpg,webp,avif", 613 | "top_comment_id": '', 614 | "xsec_token": xsec_token 615 | } 616 | splice_api = splice_str(api, params) 617 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 618 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 619 | res_json = response.json() 620 | success, msg = res_json["success"], res_json["msg"] 621 | except Exception as e: 622 | success = False 623 | msg = str(e) 624 | return success, msg, res_json 625 | 626 | def get_note_all_inner_comment(self, comment: dict, xsec_token: str, cookies_str: str, proxies: dict = None): 627 | """ 628 | 获取笔记的全部二级评论 629 | :param comment 笔记的一级评论 630 | :param cookies_str 你的cookies 631 | 返回笔记的全部二级评论 632 | """ 633 | try: 634 | if not comment['sub_comment_has_more']: 635 | return True, 'success', comment 636 | cursor = comment['sub_comment_cursor'] 637 | inner_comment_list = [] 638 | while True: 639 | success, msg, res_json = self.get_note_inner_comment(comment, cursor, xsec_token, cookies_str, proxies) 640 | if not success: 641 | raise Exception(msg) 642 | comments = res_json["data"]["comments"] 643 | if 'cursor' in res_json["data"]: 644 | cursor = str(res_json["data"]["cursor"]) 645 | else: 646 | break 647 | inner_comment_list.extend(comments) 648 | if not res_json["data"]["has_more"]: 649 | break 650 | comment['sub_comments'].extend(inner_comment_list) 651 | except Exception as e: 652 | success = False 653 | msg = str(e) 654 | return success, msg, comment 655 | 656 | def get_note_all_comment(self, url: str, cookies_str: str, proxies: dict = None): 657 | """ 658 | 获取一篇文章的所有评论 659 | :param note_id: 你想要获取的笔记的id 660 | :param cookies_str: 你的cookies 661 | 返回一篇文章的所有评论 662 | """ 663 | out_comment_list = [] 664 | try: 665 | urlParse = urllib.parse.urlparse(url) 666 | note_id = urlParse.path.split("/")[-1] 667 | kvs = urlParse.query.split('&') 668 | kvDist = {kv.split('=')[0]: kv.split('=')[1] for kv in kvs} 669 | success, msg, out_comment_list = self.get_note_all_out_comment(note_id, kvDist['xsec_token'], cookies_str, proxies) 670 | if not success: 671 | raise Exception(msg) 672 | for comment in out_comment_list: 673 | success, msg, new_comment = self.get_note_all_inner_comment(comment, kvDist['xsec_token'], cookies_str, proxies) 674 | if not success: 675 | raise Exception(msg) 676 | except Exception as e: 677 | success = False 678 | msg = str(e) 679 | return success, msg, out_comment_list 680 | 681 | def get_unread_message(self, cookies_str: str, proxies: dict = None): 682 | """ 683 | 获取未读消息 684 | :param cookies_str: 你的cookies 685 | 返回未读消息 686 | """ 687 | res_json = None 688 | try: 689 | api = "/api/sns/web/unread_count" 690 | headers, cookies, data = generate_request_params(cookies_str, api) 691 | response = requests.get(self.base_url + api, headers=headers, cookies=cookies, proxies=proxies) 692 | res_json = response.json() 693 | success, msg = res_json["success"], res_json["msg"] 694 | except Exception as e: 695 | success = False 696 | msg = str(e) 697 | return success, msg, res_json 698 | 699 | def get_metions(self, cursor: str, cookies_str: str, proxies: dict = None): 700 | """ 701 | 获取评论和@提醒 702 | :param cursor: 你想要获取的评论和@提醒的cursor 703 | :param cookies_str: 你的cookies 704 | 返回评论和@提醒 705 | """ 706 | res_json = None 707 | try: 708 | api = "/api/sns/web/v1/you/mentions" 709 | params = { 710 | "num": "20", 711 | "cursor": cursor 712 | } 713 | splice_api = splice_str(api, params) 714 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 715 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 716 | res_json = response.json() 717 | success, msg = res_json["success"], res_json["msg"] 718 | except Exception as e: 719 | success = False 720 | msg = str(e) 721 | return success, msg, res_json 722 | 723 | def get_all_metions(self, cookies_str: str, proxies: dict = None): 724 | """ 725 | 获取全部的评论和@提醒 726 | :param cookies_str: 你的cookies 727 | 返回全部的评论和@提醒 728 | """ 729 | cursor = '' 730 | metions_list = [] 731 | try: 732 | while True: 733 | success, msg, res_json = self.get_metions(cursor, cookies_str, proxies) 734 | if not success: 735 | raise Exception(msg) 736 | metions = res_json["data"]["message_list"] 737 | if 'cursor' in res_json["data"]: 738 | cursor = str(res_json["data"]["cursor"]) 739 | else: 740 | break 741 | metions_list.extend(metions) 742 | if not res_json["data"]["has_more"]: 743 | break 744 | except Exception as e: 745 | success = False 746 | msg = str(e) 747 | return success, msg, metions_list 748 | 749 | def get_likesAndcollects(self, cursor: str, cookies_str: str, proxies: dict = None): 750 | """ 751 | 获取赞和收藏 752 | :param cursor: 你想要获取的赞和收藏的cursor 753 | :param cookies_str: 你的cookies 754 | 返回赞和收藏 755 | """ 756 | res_json = None 757 | try: 758 | api = "/api/sns/web/v1/you/likes" 759 | params = { 760 | "num": "20", 761 | "cursor": cursor 762 | } 763 | splice_api = splice_str(api, params) 764 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 765 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 766 | res_json = response.json() 767 | success, msg = res_json["success"], res_json["msg"] 768 | except Exception as e: 769 | success = False 770 | msg = str(e) 771 | return success, msg, res_json 772 | 773 | def get_all_likesAndcollects(self, cookies_str: str, proxies: dict = None): 774 | """ 775 | 获取全部的赞和收藏 776 | :param cookies_str: 你的cookies 777 | 返回全部的赞和收藏 778 | """ 779 | cursor = '' 780 | likesAndcollects_list = [] 781 | try: 782 | while True: 783 | success, msg, res_json = self.get_likesAndcollects(cursor, cookies_str, proxies) 784 | if not success: 785 | raise Exception(msg) 786 | likesAndcollects = res_json["data"]["message_list"] 787 | if 'cursor' in res_json["data"]: 788 | cursor = str(res_json["data"]["cursor"]) 789 | else: 790 | break 791 | likesAndcollects_list.extend(likesAndcollects) 792 | if not res_json["data"]["has_more"]: 793 | break 794 | except Exception as e: 795 | success = False 796 | msg = str(e) 797 | return success, msg, likesAndcollects_list 798 | 799 | def get_new_connections(self, cursor: str, cookies_str: str, proxies: dict = None): 800 | """ 801 | 获取新增关注 802 | :param cursor: 你想要获取的新增关注的cursor 803 | :param cookies_str: 你的cookies 804 | 返回新增关注 805 | """ 806 | res_json = None 807 | try: 808 | api = "/api/sns/web/v1/you/connections" 809 | params = { 810 | "num": "20", 811 | "cursor": cursor 812 | } 813 | splice_api = splice_str(api, params) 814 | headers, cookies, data = generate_request_params(cookies_str, splice_api) 815 | response = requests.get(self.base_url + splice_api, headers=headers, cookies=cookies, proxies=proxies) 816 | res_json = response.json() 817 | success, msg = res_json["success"], res_json["msg"] 818 | except Exception as e: 819 | success = False 820 | msg = str(e) 821 | return success, msg, res_json 822 | 823 | def get_all_new_connections(self, cookies_str: str, proxies: dict = None): 824 | """ 825 | 获取全部的新增关注 826 | :param cookies_str: 你的cookies 827 | 返回全部的新增关注 828 | """ 829 | cursor = '' 830 | connections_list = [] 831 | try: 832 | while True: 833 | success, msg, res_json = self.get_new_connections(cursor, cookies_str, proxies) 834 | if not success: 835 | raise Exception(msg) 836 | connections = res_json["data"]["message_list"] 837 | if 'cursor' in res_json["data"]: 838 | cursor = str(res_json["data"]["cursor"]) 839 | else: 840 | break 841 | connections_list.extend(connections) 842 | if not res_json["data"]["has_more"]: 843 | break 844 | except Exception as e: 845 | success = False 846 | msg = str(e) 847 | return success, msg, connections_list 848 | 849 | @staticmethod 850 | def get_note_no_water_video(note_id): 851 | """ 852 | 获取笔记无水印视频 853 | :param note_id: 你想要获取的笔记的id 854 | 返回笔记无水印视频 855 | """ 856 | success = True 857 | msg = '成功' 858 | video_addr = None 859 | try: 860 | headers = get_common_headers() 861 | url = f"https://www.xiaohongshu.com/explore/{note_id}" 862 | response = requests.get(url, headers=headers) 863 | res = response.text 864 | video_addr = re.findall(r'', res)[0] 865 | except Exception as e: 866 | success = False 867 | msg = str(e) 868 | return success, msg, video_addr 869 | 870 | 871 | @staticmethod 872 | def get_note_no_water_img(img_url): 873 | """ 874 | 获取笔记无水印图片 875 | :param img_url: 你想要获取的图片的url 876 | 返回笔记无水印图片 877 | """ 878 | success = True 879 | msg = '成功' 880 | new_url = None 881 | try: 882 | # https://sns-webpic-qc.xhscdn.com/202403211626/c4fcecea4bd012a1fe8d2f1968d6aa91/110/0/01e50c1c135e8c010010000000018ab74db332_0.jpg!nd_dft_wlteh_webp_3 883 | if '.jpg' in img_url: 884 | img_id = '/'.join([split for split in img_url.split('/')[-3:]]).split('!')[0] 885 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/1920/format/png" 886 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/format/png" 887 | # return f'https://sns-img-hw.xhscdn.com/{img_id}' 888 | new_url = f'https://sns-img-qc.xhscdn.com/{img_id}' 889 | 890 | # 'https://sns-webpic-qc.xhscdn.com/202403231640/ea961053c4e0e467df1cc93afdabd630/spectrum/1000g0k0200n7mj8fq0005n7ikbllol6q50oniuo!nd_dft_wgth_webp_3' 891 | elif 'spectrum' in img_url: 892 | img_id = '/'.join(img_url.split('/')[-2:]).split('!')[0] 893 | # return f'http://sns-webpic.xhscdn.com/{img_id}?imageView2/2/w/1920/format/jpg' 894 | new_url = f'http://sns-webpic.xhscdn.com/{img_id}?imageView2/2/w/format/jpg' 895 | else: 896 | # 'http://sns-webpic-qc.xhscdn.com/202403181511/64ad2ea67ce04159170c686a941354f5/1040g008310cs1hii6g6g5ngacg208q5rlf1gld8!nd_dft_wlteh_webp_3' 897 | img_id = img_url.split('/')[-1].split('!')[0] 898 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/1920/format/png" 899 | # return f"http://ci.xiaohongshu.com/{img_id}?imageview2/2/w/format/png" 900 | # return f'https://sns-img-hw.xhscdn.com/{img_id}' 901 | new_url = f'https://sns-img-qc.xhscdn.com/{img_id}' 902 | except Exception as e: 903 | success = False 904 | msg = str(e) 905 | return success, msg, new_url 906 | 907 | if __name__ == '__main__': 908 | """ 909 | 此文件为小红书api的使用示例 910 | 所有涉及数据爬取的api都在此文件中 911 | 数据注入的api违规请勿尝试 912 | """ 913 | xhs_apis = XHS_Apis() 914 | cookies_str = r'' 915 | # 获取用户信息 916 | user_url = 'https://www.xiaohongshu.com/user/profile/67a332a2000000000d008358?xsec_token=ABTf9yz4cLHhTycIlksF0jOi1yIZgfcaQ6IXNNGdKJ8xg=&xsec_source=pc_feed' 917 | success, msg, user_info = xhs_apis.get_user_info('67a332a2000000000d008358', cookies_str) 918 | logger.info(f'获取用户信息结果 {json.dumps(user_info, ensure_ascii=False)}: {success}, msg: {msg}') 919 | success, msg, note_list = xhs_apis.get_user_all_notes(user_url, cookies_str) 920 | logger.info(f'获取用户所有笔记结果 {json.dumps(note_list, ensure_ascii=False)}: {success}, msg: {msg}') 921 | # 获取笔记信息 922 | note_url = r'https://www.xiaohongshu.com/explore/67d7c713000000000900e391?xsec_token=AB1ACxbo5cevHxV_bWibTmK8R1DDz0NnAW1PbFZLABXtE=&xsec_source=pc_user' 923 | success, msg, note_info = xhs_apis.get_note_info(note_url, cookies_str) 924 | logger.info(f'获取笔记信息结果 {json.dumps(note_info, ensure_ascii=False)}: {success}, msg: {msg}') 925 | # 获取搜索关键词 926 | query = "榴莲" 927 | success, msg, search_keyword = xhs_apis.get_search_keyword(query, cookies_str) 928 | logger.info(f'获取搜索关键词结果 {json.dumps(search_keyword, ensure_ascii=False)}: {success}, msg: {msg}') 929 | # 搜索笔记 930 | query = "榴莲" 931 | query_num = 10 932 | sort = "general" 933 | note_type = 0 934 | success, msg, notes = xhs_apis.search_some_note(query, query_num, cookies_str, sort, note_type) 935 | logger.info(f'搜索笔记结果 {json.dumps(notes, ensure_ascii=False)}: {success}, msg: {msg}') 936 | # 获取笔记评论 937 | note_url = r'https://www.xiaohongshu.com/explore/67d7c713000000000900e391?xsec_token=AB1ACxbo5cevHxV_bWibTmK8R1DDz0NnAW1PbFZLABXtE=&xsec_source=pc_user' 938 | success, msg, note_all_comment = xhs_apis.get_note_all_comment(note_url, cookies_str) 939 | logger.info(f'获取笔记评论结果 {json.dumps(note_all_comment, ensure_ascii=False)}: {success}, msg: {msg}') 940 | 941 | 942 | 943 | 944 | -------------------------------------------------------------------------------- /author/qq.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/qq.jpg -------------------------------------------------------------------------------- /author/wx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/wx.png -------------------------------------------------------------------------------- /author/wx_pay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/wx_pay.png -------------------------------------------------------------------------------- /author/zfb_pay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/author/zfb_pay.jpg -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | from loguru import logger 3 | from apis.pc_apis import XHS_Apis 4 | from xhs_utils.common_utils import init 5 | from xhs_utils.data_util import handle_note_info, download_note, save_to_xlsx 6 | 7 | 8 | class Data_Spider(): 9 | def __init__(self): 10 | self.xhs_apis = XHS_Apis() 11 | 12 | def spider_note(self, note_url: str, cookies_str: str, proxies=None): 13 | """ 14 | 爬取一个笔记的信息 15 | :param note_url: 16 | :param cookies_str: 17 | :return: 18 | """ 19 | note_info = None 20 | try: 21 | success, msg, note_info = self.xhs_apis.get_note_info(note_url, cookies_str, proxies) 22 | if success: 23 | note_info = note_info['data']['items'][0] 24 | note_info['url'] = note_url 25 | note_info = handle_note_info(note_info) 26 | except Exception as e: 27 | success = False 28 | msg = e 29 | logger.info(f'爬取笔记信息 {note_url}: {success}, msg: {msg}') 30 | return success, msg, note_info 31 | 32 | def spider_some_note(self, notes: list, cookies_str: str, base_path: dict, save_choice: str, excel_name: str = '', proxies=None): 33 | """ 34 | 爬取一些笔记的信息 35 | :param notes: 36 | :param cookies_str: 37 | :param base_path: 38 | :return: 39 | """ 40 | if (save_choice == 'all' or save_choice == 'excel') and excel_name == '': 41 | raise ValueError('excel_name 不能为空') 42 | note_list = [] 43 | for note_url in notes: 44 | success, msg, note_info = self.spider_note(note_url, cookies_str, proxies) 45 | if note_info is not None and success: 46 | note_list.append(note_info) 47 | for note_info in note_list: 48 | if save_choice == 'all' or save_choice == 'media': 49 | download_note(note_info, base_path['media']) 50 | if save_choice == 'all' or save_choice == 'excel': 51 | file_path = os.path.abspath(os.path.join(base_path['excel'], f'{excel_name}.xlsx')) 52 | save_to_xlsx(note_list, file_path) 53 | 54 | 55 | def spider_user_all_note(self, user_url: str, cookies_str: str, base_path: dict, save_choice: str, excel_name: str = '', proxies=None): 56 | """ 57 | 爬取一个用户的所有笔记 58 | :param user_url: 59 | :param cookies_str: 60 | :param base_path: 61 | :return: 62 | """ 63 | note_list = [] 64 | try: 65 | success, msg, all_note_info = self.xhs_apis.get_user_all_notes(user_url, cookies_str, proxies) 66 | if success: 67 | logger.info(f'用户 {user_url} 作品数量: {len(all_note_info)}') 68 | for simple_note_info in all_note_info: 69 | note_url = f"https://www.xiaohongshu.com/explore/{simple_note_info['note_id']}?xsec_token={simple_note_info['xsec_token']}" 70 | note_list.append(note_url) 71 | if save_choice == 'all' or save_choice == 'excel': 72 | excel_name = user_url.split('/')[-1].split('?')[0] 73 | self.spider_some_note(note_list, cookies_str, base_path, save_choice, excel_name, proxies) 74 | except Exception as e: 75 | success = False 76 | msg = e 77 | logger.info(f'爬取用户所有视频 {user_url}: {success}, msg: {msg}') 78 | return note_list, success, msg 79 | 80 | def spider_some_search_note(self, query: str, require_num: int, cookies_str: str, base_path: dict, save_choice: str, sort="general", note_type=0, excel_name: str = '', proxies=None): 81 | """ 82 | 指定数量搜索笔记,设置排序方式和笔记类型和笔记数量 83 | :param query 搜索的关键词 84 | :param require_num 搜索的数量 85 | :param cookies_str 你的cookies 86 | :param base_path 保存路径 87 | :param sort 排序方式 general:综合排序, time_descending:时间排序, popularity_descending:热度排序 88 | :param note_type 笔记类型 0:全部, 1:视频, 2:图文 89 | 返回搜索的结果 90 | """ 91 | note_list = [] 92 | try: 93 | success, msg, notes = self.xhs_apis.search_some_note(query, require_num, cookies_str, sort, note_type, proxies) 94 | if success: 95 | notes = list(filter(lambda x: x['model_type'] == "note", notes)) 96 | logger.info(f'搜索关键词 {query} 笔记数量: {len(notes)}') 97 | for note in notes: 98 | note_url = f"https://www.xiaohongshu.com/explore/{note['id']}?xsec_token={note['xsec_token']}" 99 | note_list.append(note_url) 100 | if save_choice == 'all' or save_choice == 'excel': 101 | excel_name = query 102 | self.spider_some_note(note_list, cookies_str, base_path, save_choice, excel_name, proxies) 103 | except Exception as e: 104 | success = False 105 | msg = e 106 | logger.info(f'搜索关键词 {query} 笔记: {success}, msg: {msg}') 107 | return note_list, success, msg 108 | 109 | if __name__ == '__main__': 110 | """ 111 | 此文件为爬虫的入口文件,可以直接运行 112 | apis/pc_apis.py 为爬虫的api文件,包含小红书的全部数据接口,可以继续封装,感谢star和follow 113 | """ 114 | cookies_str, base_path = init() 115 | data_spider = Data_Spider() 116 | # save_choice: all: 保存所有的信息, media: 保存视频和图片, excel: 保存到excel 117 | # save_choice 为 excel 或者 all 时,excel_name 不能为空 118 | # 1 119 | notes = [ 120 | r'https://www.xiaohongshu.com/explore/67d7c713000000000900e391?xsec_token=AB1ACxbo5cevHxV_bWibTmK8R1DDz0NnAW1PbFZLABXtE=&xsec_source=pc_user', 121 | ] 122 | data_spider.spider_some_note(notes, cookies_str, base_path, 'all', 'test') 123 | 124 | # 2 125 | user_url = 'https://www.xiaohongshu.com/user/profile/67a332a2000000000d008358?xsec_token=ABTf9yz4cLHhTycIlksF0jOi1yIZgfcaQ6IXNNGdKJ8xg=&xsec_source=pc_feed' 126 | data_spider.spider_user_all_note(user_url, cookies_str, base_path, 'all') 127 | 128 | # 3 129 | query = "榴莲" 130 | query_num = 10 131 | sort = "general" 132 | note_type = 0 133 | data_spider.spider_some_search_note(query, query_num, cookies_str, base_path, 'all', sort, note_type) 134 | -------------------------------------------------------------------------------- /package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Spider_XHS", 3 | "lockfileVersion": 3, 4 | "requires": true, 5 | "packages": { 6 | "": { 7 | "dependencies": { 8 | "jsdom": "^26.0.0" 9 | } 10 | }, 11 | "node_modules/@asamuzakjp/css-color": { 12 | "version": "3.1.1", 13 | "resolved": "https://mirrors.cloud.tencent.com/npm/@asamuzakjp/css-color/-/css-color-3.1.1.tgz", 14 | "integrity": "sha512-hpRD68SV2OMcZCsrbdkccTw5FXjNDLo5OuqSHyHZfwweGsDWZwDJ2+gONyNAbazZclobMirACLw0lk8WVxIqxA==", 15 | "dependencies": { 16 | "@csstools/css-calc": "^2.1.2", 17 | "@csstools/css-color-parser": "^3.0.8", 18 | "@csstools/css-parser-algorithms": "^3.0.4", 19 | "@csstools/css-tokenizer": "^3.0.3", 20 | "lru-cache": "^10.4.3" 21 | } 22 | }, 23 | "node_modules/@csstools/color-helpers": { 24 | "version": "5.0.2", 25 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/color-helpers/-/color-helpers-5.0.2.tgz", 26 | "integrity": "sha512-JqWH1vsgdGcw2RR6VliXXdA0/59LttzlU8UlRT/iUUsEeWfYq8I+K0yhihEUTTHLRm1EXvpsCx3083EU15ecsA==", 27 | "funding": [ 28 | { 29 | "type": "github", 30 | "url": "https://github.com/sponsors/csstools" 31 | }, 32 | { 33 | "type": "opencollective", 34 | "url": "https://opencollective.com/csstools" 35 | } 36 | ], 37 | "engines": { 38 | "node": ">=18" 39 | } 40 | }, 41 | "node_modules/@csstools/css-calc": { 42 | "version": "2.1.2", 43 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-calc/-/css-calc-2.1.2.tgz", 44 | "integrity": "sha512-TklMyb3uBB28b5uQdxjReG4L80NxAqgrECqLZFQbyLekwwlcDDS8r3f07DKqeo8C4926Br0gf/ZDe17Zv4wIuw==", 45 | "funding": [ 46 | { 47 | "type": "github", 48 | "url": "https://github.com/sponsors/csstools" 49 | }, 50 | { 51 | "type": "opencollective", 52 | "url": "https://opencollective.com/csstools" 53 | } 54 | ], 55 | "engines": { 56 | "node": ">=18" 57 | }, 58 | "peerDependencies": { 59 | "@csstools/css-parser-algorithms": "^3.0.4", 60 | "@csstools/css-tokenizer": "^3.0.3" 61 | } 62 | }, 63 | "node_modules/@csstools/css-color-parser": { 64 | "version": "3.0.8", 65 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-color-parser/-/css-color-parser-3.0.8.tgz", 66 | "integrity": "sha512-pdwotQjCCnRPuNi06jFuP68cykU1f3ZWExLe/8MQ1LOs8Xq+fTkYgd+2V8mWUWMrOn9iS2HftPVaMZDaXzGbhQ==", 67 | "funding": [ 68 | { 69 | "type": "github", 70 | "url": "https://github.com/sponsors/csstools" 71 | }, 72 | { 73 | "type": "opencollective", 74 | "url": "https://opencollective.com/csstools" 75 | } 76 | ], 77 | "dependencies": { 78 | "@csstools/color-helpers": "^5.0.2", 79 | "@csstools/css-calc": "^2.1.2" 80 | }, 81 | "engines": { 82 | "node": ">=18" 83 | }, 84 | "peerDependencies": { 85 | "@csstools/css-parser-algorithms": "^3.0.4", 86 | "@csstools/css-tokenizer": "^3.0.3" 87 | } 88 | }, 89 | "node_modules/@csstools/css-parser-algorithms": { 90 | "version": "3.0.4", 91 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-parser-algorithms/-/css-parser-algorithms-3.0.4.tgz", 92 | "integrity": "sha512-Up7rBoV77rv29d3uKHUIVubz1BTcgyUK72IvCQAbfbMv584xHcGKCKbWh7i8hPrRJ7qU4Y8IO3IY9m+iTB7P3A==", 93 | "funding": [ 94 | { 95 | "type": "github", 96 | "url": "https://github.com/sponsors/csstools" 97 | }, 98 | { 99 | "type": "opencollective", 100 | "url": "https://opencollective.com/csstools" 101 | } 102 | ], 103 | "engines": { 104 | "node": ">=18" 105 | }, 106 | "peerDependencies": { 107 | "@csstools/css-tokenizer": "^3.0.3" 108 | } 109 | }, 110 | "node_modules/@csstools/css-tokenizer": { 111 | "version": "3.0.3", 112 | "resolved": "https://mirrors.cloud.tencent.com/npm/@csstools/css-tokenizer/-/css-tokenizer-3.0.3.tgz", 113 | "integrity": "sha512-UJnjoFsmxfKUdNYdWgOB0mWUypuLvAfQPH1+pyvRJs6euowbFkFC6P13w1l8mJyi3vxYMxc9kld5jZEGRQs6bw==", 114 | "funding": [ 115 | { 116 | "type": "github", 117 | "url": "https://github.com/sponsors/csstools" 118 | }, 119 | { 120 | "type": "opencollective", 121 | "url": "https://opencollective.com/csstools" 122 | } 123 | ], 124 | "engines": { 125 | "node": ">=18" 126 | } 127 | }, 128 | "node_modules/agent-base": { 129 | "version": "7.1.3", 130 | "resolved": "https://mirrors.cloud.tencent.com/npm/agent-base/-/agent-base-7.1.3.tgz", 131 | "integrity": "sha512-jRR5wdylq8CkOe6hei19GGZnxM6rBGwFl3Bg0YItGDimvjGtAvdZk4Pu6Cl4u4Igsws4a1fd1Vq3ezrhn4KmFw==", 132 | "engines": { 133 | "node": ">= 14" 134 | } 135 | }, 136 | "node_modules/asynckit": { 137 | "version": "0.4.0", 138 | "resolved": "https://mirrors.cloud.tencent.com/npm/asynckit/-/asynckit-0.4.0.tgz", 139 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" 140 | }, 141 | "node_modules/call-bind-apply-helpers": { 142 | "version": "1.0.2", 143 | "resolved": "https://mirrors.cloud.tencent.com/npm/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", 144 | "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", 145 | "dependencies": { 146 | "es-errors": "^1.3.0", 147 | "function-bind": "^1.1.2" 148 | }, 149 | "engines": { 150 | "node": ">= 0.4" 151 | } 152 | }, 153 | "node_modules/combined-stream": { 154 | "version": "1.0.8", 155 | "resolved": "https://mirrors.cloud.tencent.com/npm/combined-stream/-/combined-stream-1.0.8.tgz", 156 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", 157 | "dependencies": { 158 | "delayed-stream": "~1.0.0" 159 | }, 160 | "engines": { 161 | "node": ">= 0.8" 162 | } 163 | }, 164 | "node_modules/cssstyle": { 165 | "version": "4.3.0", 166 | "resolved": "https://mirrors.cloud.tencent.com/npm/cssstyle/-/cssstyle-4.3.0.tgz", 167 | "integrity": "sha512-6r0NiY0xizYqfBvWp1G7WXJ06/bZyrk7Dc6PHql82C/pKGUTKu4yAX4Y8JPamb1ob9nBKuxWzCGTRuGwU3yxJQ==", 168 | "dependencies": { 169 | "@asamuzakjp/css-color": "^3.1.1", 170 | "rrweb-cssom": "^0.8.0" 171 | }, 172 | "engines": { 173 | "node": ">=18" 174 | } 175 | }, 176 | "node_modules/data-urls": { 177 | "version": "5.0.0", 178 | "resolved": "https://mirrors.cloud.tencent.com/npm/data-urls/-/data-urls-5.0.0.tgz", 179 | "integrity": "sha512-ZYP5VBHshaDAiVZxjbRVcFJpc+4xGgT0bK3vzy1HLN8jTO975HEbuYzZJcHoQEY5K1a0z8YayJkyVETa08eNTg==", 180 | "dependencies": { 181 | "whatwg-mimetype": "^4.0.0", 182 | "whatwg-url": "^14.0.0" 183 | }, 184 | "engines": { 185 | "node": ">=18" 186 | } 187 | }, 188 | "node_modules/debug": { 189 | "version": "4.4.0", 190 | "resolved": "https://mirrors.cloud.tencent.com/npm/debug/-/debug-4.4.0.tgz", 191 | "integrity": "sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==", 192 | "dependencies": { 193 | "ms": "^2.1.3" 194 | }, 195 | "engines": { 196 | "node": ">=6.0" 197 | }, 198 | "peerDependenciesMeta": { 199 | "supports-color": { 200 | "optional": true 201 | } 202 | } 203 | }, 204 | "node_modules/decimal.js": { 205 | "version": "10.5.0", 206 | "resolved": "https://mirrors.cloud.tencent.com/npm/decimal.js/-/decimal.js-10.5.0.tgz", 207 | "integrity": "sha512-8vDa8Qxvr/+d94hSh5P3IJwI5t8/c0KsMp+g8bNw9cY2icONa5aPfvKeieW1WlG0WQYwwhJ7mjui2xtiePQSXw==" 208 | }, 209 | "node_modules/delayed-stream": { 210 | "version": "1.0.0", 211 | "resolved": "https://mirrors.cloud.tencent.com/npm/delayed-stream/-/delayed-stream-1.0.0.tgz", 212 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", 213 | "engines": { 214 | "node": ">=0.4.0" 215 | } 216 | }, 217 | "node_modules/dunder-proto": { 218 | "version": "1.0.1", 219 | "resolved": "https://mirrors.cloud.tencent.com/npm/dunder-proto/-/dunder-proto-1.0.1.tgz", 220 | "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", 221 | "dependencies": { 222 | "call-bind-apply-helpers": "^1.0.1", 223 | "es-errors": "^1.3.0", 224 | "gopd": "^1.2.0" 225 | }, 226 | "engines": { 227 | "node": ">= 0.4" 228 | } 229 | }, 230 | "node_modules/entities": { 231 | "version": "4.5.0", 232 | "resolved": "https://mirrors.cloud.tencent.com/npm/entities/-/entities-4.5.0.tgz", 233 | "integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==", 234 | "engines": { 235 | "node": ">=0.12" 236 | }, 237 | "funding": { 238 | "url": "https://github.com/fb55/entities?sponsor=1" 239 | } 240 | }, 241 | "node_modules/es-define-property": { 242 | "version": "1.0.1", 243 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-define-property/-/es-define-property-1.0.1.tgz", 244 | "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", 245 | "engines": { 246 | "node": ">= 0.4" 247 | } 248 | }, 249 | "node_modules/es-errors": { 250 | "version": "1.3.0", 251 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-errors/-/es-errors-1.3.0.tgz", 252 | "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", 253 | "engines": { 254 | "node": ">= 0.4" 255 | } 256 | }, 257 | "node_modules/es-object-atoms": { 258 | "version": "1.1.1", 259 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-object-atoms/-/es-object-atoms-1.1.1.tgz", 260 | "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", 261 | "dependencies": { 262 | "es-errors": "^1.3.0" 263 | }, 264 | "engines": { 265 | "node": ">= 0.4" 266 | } 267 | }, 268 | "node_modules/es-set-tostringtag": { 269 | "version": "2.1.0", 270 | "resolved": "https://mirrors.cloud.tencent.com/npm/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", 271 | "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", 272 | "dependencies": { 273 | "es-errors": "^1.3.0", 274 | "get-intrinsic": "^1.2.6", 275 | "has-tostringtag": "^1.0.2", 276 | "hasown": "^2.0.2" 277 | }, 278 | "engines": { 279 | "node": ">= 0.4" 280 | } 281 | }, 282 | "node_modules/form-data": { 283 | "version": "4.0.2", 284 | "resolved": "https://mirrors.cloud.tencent.com/npm/form-data/-/form-data-4.0.2.tgz", 285 | "integrity": "sha512-hGfm/slu0ZabnNt4oaRZ6uREyfCj6P4fT/n6A1rGV+Z0VdGXjfOhVUpkn6qVQONHGIFwmveGXyDs75+nr6FM8w==", 286 | "dependencies": { 287 | "asynckit": "^0.4.0", 288 | "combined-stream": "^1.0.8", 289 | "es-set-tostringtag": "^2.1.0", 290 | "mime-types": "^2.1.12" 291 | }, 292 | "engines": { 293 | "node": ">= 6" 294 | } 295 | }, 296 | "node_modules/function-bind": { 297 | "version": "1.1.2", 298 | "resolved": "https://mirrors.cloud.tencent.com/npm/function-bind/-/function-bind-1.1.2.tgz", 299 | "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", 300 | "funding": { 301 | "url": "https://github.com/sponsors/ljharb" 302 | } 303 | }, 304 | "node_modules/get-intrinsic": { 305 | "version": "1.3.0", 306 | "resolved": "https://mirrors.cloud.tencent.com/npm/get-intrinsic/-/get-intrinsic-1.3.0.tgz", 307 | "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", 308 | "dependencies": { 309 | "call-bind-apply-helpers": "^1.0.2", 310 | "es-define-property": "^1.0.1", 311 | "es-errors": "^1.3.0", 312 | "es-object-atoms": "^1.1.1", 313 | "function-bind": "^1.1.2", 314 | "get-proto": "^1.0.1", 315 | "gopd": "^1.2.0", 316 | "has-symbols": "^1.1.0", 317 | "hasown": "^2.0.2", 318 | "math-intrinsics": "^1.1.0" 319 | }, 320 | "engines": { 321 | "node": ">= 0.4" 322 | }, 323 | "funding": { 324 | "url": "https://github.com/sponsors/ljharb" 325 | } 326 | }, 327 | "node_modules/get-proto": { 328 | "version": "1.0.1", 329 | "resolved": "https://mirrors.cloud.tencent.com/npm/get-proto/-/get-proto-1.0.1.tgz", 330 | "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", 331 | "dependencies": { 332 | "dunder-proto": "^1.0.1", 333 | "es-object-atoms": "^1.0.0" 334 | }, 335 | "engines": { 336 | "node": ">= 0.4" 337 | } 338 | }, 339 | "node_modules/gopd": { 340 | "version": "1.2.0", 341 | "resolved": "https://mirrors.cloud.tencent.com/npm/gopd/-/gopd-1.2.0.tgz", 342 | "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", 343 | "engines": { 344 | "node": ">= 0.4" 345 | }, 346 | "funding": { 347 | "url": "https://github.com/sponsors/ljharb" 348 | } 349 | }, 350 | "node_modules/has-symbols": { 351 | "version": "1.1.0", 352 | "resolved": "https://mirrors.cloud.tencent.com/npm/has-symbols/-/has-symbols-1.1.0.tgz", 353 | "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", 354 | "engines": { 355 | "node": ">= 0.4" 356 | }, 357 | "funding": { 358 | "url": "https://github.com/sponsors/ljharb" 359 | } 360 | }, 361 | "node_modules/has-tostringtag": { 362 | "version": "1.0.2", 363 | "resolved": "https://mirrors.cloud.tencent.com/npm/has-tostringtag/-/has-tostringtag-1.0.2.tgz", 364 | "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", 365 | "dependencies": { 366 | "has-symbols": "^1.0.3" 367 | }, 368 | "engines": { 369 | "node": ">= 0.4" 370 | }, 371 | "funding": { 372 | "url": "https://github.com/sponsors/ljharb" 373 | } 374 | }, 375 | "node_modules/hasown": { 376 | "version": "2.0.2", 377 | "resolved": "https://mirrors.cloud.tencent.com/npm/hasown/-/hasown-2.0.2.tgz", 378 | "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", 379 | "dependencies": { 380 | "function-bind": "^1.1.2" 381 | }, 382 | "engines": { 383 | "node": ">= 0.4" 384 | } 385 | }, 386 | "node_modules/html-encoding-sniffer": { 387 | "version": "4.0.0", 388 | "resolved": "https://mirrors.cloud.tencent.com/npm/html-encoding-sniffer/-/html-encoding-sniffer-4.0.0.tgz", 389 | "integrity": "sha512-Y22oTqIU4uuPgEemfz7NDJz6OeKf12Lsu+QC+s3BVpda64lTiMYCyGwg5ki4vFxkMwQdeZDl2adZoqUgdFuTgQ==", 390 | "dependencies": { 391 | "whatwg-encoding": "^3.1.1" 392 | }, 393 | "engines": { 394 | "node": ">=18" 395 | } 396 | }, 397 | "node_modules/http-proxy-agent": { 398 | "version": "7.0.2", 399 | "resolved": "https://mirrors.cloud.tencent.com/npm/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz", 400 | "integrity": "sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==", 401 | "dependencies": { 402 | "agent-base": "^7.1.0", 403 | "debug": "^4.3.4" 404 | }, 405 | "engines": { 406 | "node": ">= 14" 407 | } 408 | }, 409 | "node_modules/https-proxy-agent": { 410 | "version": "7.0.6", 411 | "resolved": "https://mirrors.cloud.tencent.com/npm/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz", 412 | "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==", 413 | "dependencies": { 414 | "agent-base": "^7.1.2", 415 | "debug": "4" 416 | }, 417 | "engines": { 418 | "node": ">= 14" 419 | } 420 | }, 421 | "node_modules/iconv-lite": { 422 | "version": "0.6.3", 423 | "resolved": "https://mirrors.cloud.tencent.com/npm/iconv-lite/-/iconv-lite-0.6.3.tgz", 424 | "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", 425 | "dependencies": { 426 | "safer-buffer": ">= 2.1.2 < 3.0.0" 427 | }, 428 | "engines": { 429 | "node": ">=0.10.0" 430 | } 431 | }, 432 | "node_modules/is-potential-custom-element-name": { 433 | "version": "1.0.1", 434 | "resolved": "https://mirrors.cloud.tencent.com/npm/is-potential-custom-element-name/-/is-potential-custom-element-name-1.0.1.tgz", 435 | "integrity": "sha512-bCYeRA2rVibKZd+s2625gGnGF/t7DSqDs4dP7CrLA1m7jKWz6pps0LpYLJN8Q64HtmPKJ1hrN3nzPNKFEKOUiQ==" 436 | }, 437 | "node_modules/jsdom": { 438 | "version": "26.0.0", 439 | "resolved": "https://mirrors.cloud.tencent.com/npm/jsdom/-/jsdom-26.0.0.tgz", 440 | "integrity": "sha512-BZYDGVAIriBWTpIxYzrXjv3E/4u8+/pSG5bQdIYCbNCGOvsPkDQfTVLAIXAf9ETdCpduCVTkDe2NNZ8NIwUVzw==", 441 | "dependencies": { 442 | "cssstyle": "^4.2.1", 443 | "data-urls": "^5.0.0", 444 | "decimal.js": "^10.4.3", 445 | "form-data": "^4.0.1", 446 | "html-encoding-sniffer": "^4.0.0", 447 | "http-proxy-agent": "^7.0.2", 448 | "https-proxy-agent": "^7.0.6", 449 | "is-potential-custom-element-name": "^1.0.1", 450 | "nwsapi": "^2.2.16", 451 | "parse5": "^7.2.1", 452 | "rrweb-cssom": "^0.8.0", 453 | "saxes": "^6.0.0", 454 | "symbol-tree": "^3.2.4", 455 | "tough-cookie": "^5.0.0", 456 | "w3c-xmlserializer": "^5.0.0", 457 | "webidl-conversions": "^7.0.0", 458 | "whatwg-encoding": "^3.1.1", 459 | "whatwg-mimetype": "^4.0.0", 460 | "whatwg-url": "^14.1.0", 461 | "ws": "^8.18.0", 462 | "xml-name-validator": "^5.0.0" 463 | }, 464 | "engines": { 465 | "node": ">=18" 466 | }, 467 | "peerDependencies": { 468 | "canvas": "^3.0.0" 469 | }, 470 | "peerDependenciesMeta": { 471 | "canvas": { 472 | "optional": true 473 | } 474 | } 475 | }, 476 | "node_modules/lru-cache": { 477 | "version": "10.4.3", 478 | "resolved": "https://mirrors.cloud.tencent.com/npm/lru-cache/-/lru-cache-10.4.3.tgz", 479 | "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==" 480 | }, 481 | "node_modules/math-intrinsics": { 482 | "version": "1.1.0", 483 | "resolved": "https://mirrors.cloud.tencent.com/npm/math-intrinsics/-/math-intrinsics-1.1.0.tgz", 484 | "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", 485 | "engines": { 486 | "node": ">= 0.4" 487 | } 488 | }, 489 | "node_modules/mime-db": { 490 | "version": "1.52.0", 491 | "resolved": "https://mirrors.cloud.tencent.com/npm/mime-db/-/mime-db-1.52.0.tgz", 492 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", 493 | "engines": { 494 | "node": ">= 0.6" 495 | } 496 | }, 497 | "node_modules/mime-types": { 498 | "version": "2.1.35", 499 | "resolved": "https://mirrors.cloud.tencent.com/npm/mime-types/-/mime-types-2.1.35.tgz", 500 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", 501 | "dependencies": { 502 | "mime-db": "1.52.0" 503 | }, 504 | "engines": { 505 | "node": ">= 0.6" 506 | } 507 | }, 508 | "node_modules/ms": { 509 | "version": "2.1.3", 510 | "resolved": "https://mirrors.cloud.tencent.com/npm/ms/-/ms-2.1.3.tgz", 511 | "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" 512 | }, 513 | "node_modules/nwsapi": { 514 | "version": "2.2.18", 515 | "resolved": "https://mirrors.cloud.tencent.com/npm/nwsapi/-/nwsapi-2.2.18.tgz", 516 | "integrity": "sha512-p1TRH/edngVEHVbwqWnxUViEmq5znDvyB+Sik5cmuLpGOIfDf/39zLiq3swPF8Vakqn+gvNiOQAZu8djYlQILA==" 517 | }, 518 | "node_modules/parse5": { 519 | "version": "7.2.1", 520 | "resolved": "https://mirrors.cloud.tencent.com/npm/parse5/-/parse5-7.2.1.tgz", 521 | "integrity": "sha512-BuBYQYlv1ckiPdQi/ohiivi9Sagc9JG+Ozs0r7b/0iK3sKmrb0b9FdWdBbOdx6hBCM/F9Ir82ofnBhtZOjCRPQ==", 522 | "dependencies": { 523 | "entities": "^4.5.0" 524 | }, 525 | "funding": { 526 | "url": "https://github.com/inikulin/parse5?sponsor=1" 527 | } 528 | }, 529 | "node_modules/punycode": { 530 | "version": "2.3.1", 531 | "resolved": "https://mirrors.cloud.tencent.com/npm/punycode/-/punycode-2.3.1.tgz", 532 | "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", 533 | "engines": { 534 | "node": ">=6" 535 | } 536 | }, 537 | "node_modules/rrweb-cssom": { 538 | "version": "0.8.0", 539 | "resolved": "https://mirrors.cloud.tencent.com/npm/rrweb-cssom/-/rrweb-cssom-0.8.0.tgz", 540 | "integrity": "sha512-guoltQEx+9aMf2gDZ0s62EcV8lsXR+0w8915TC3ITdn2YueuNjdAYh/levpU9nFaoChh9RUS5ZdQMrKfVEN9tw==" 541 | }, 542 | "node_modules/safer-buffer": { 543 | "version": "2.1.2", 544 | "resolved": "https://mirrors.cloud.tencent.com/npm/safer-buffer/-/safer-buffer-2.1.2.tgz", 545 | "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" 546 | }, 547 | "node_modules/saxes": { 548 | "version": "6.0.0", 549 | "resolved": "https://mirrors.cloud.tencent.com/npm/saxes/-/saxes-6.0.0.tgz", 550 | "integrity": "sha512-xAg7SOnEhrm5zI3puOOKyy1OMcMlIJZYNJY7xLBwSze0UjhPLnWfj2GF2EpT0jmzaJKIWKHLsaSSajf35bcYnA==", 551 | "dependencies": { 552 | "xmlchars": "^2.2.0" 553 | }, 554 | "engines": { 555 | "node": ">=v12.22.7" 556 | } 557 | }, 558 | "node_modules/symbol-tree": { 559 | "version": "3.2.4", 560 | "resolved": "https://mirrors.cloud.tencent.com/npm/symbol-tree/-/symbol-tree-3.2.4.tgz", 561 | "integrity": "sha512-9QNk5KwDF+Bvz+PyObkmSYjI5ksVUYtjW7AU22r2NKcfLJcXp96hkDWU3+XndOsUb+AQ9QhfzfCT2O+CNWT5Tw==" 562 | }, 563 | "node_modules/tldts": { 564 | "version": "6.1.84", 565 | "resolved": "https://mirrors.cloud.tencent.com/npm/tldts/-/tldts-6.1.84.tgz", 566 | "integrity": "sha512-aRGIbCIF3teodtUFAYSdQONVmDRy21REM3o6JnqWn5ZkQBJJ4gHxhw6OfwQ+WkSAi3ASamrS4N4nyazWx6uTYg==", 567 | "dependencies": { 568 | "tldts-core": "^6.1.84" 569 | }, 570 | "bin": { 571 | "tldts": "bin/cli.js" 572 | } 573 | }, 574 | "node_modules/tldts-core": { 575 | "version": "6.1.84", 576 | "resolved": "https://mirrors.cloud.tencent.com/npm/tldts-core/-/tldts-core-6.1.84.tgz", 577 | "integrity": "sha512-NaQa1W76W2aCGjXybvnMYzGSM4x8fvG2AN/pla7qxcg0ZHbooOPhA8kctmOZUDfZyhDL27OGNbwAeig8P4p1vg==" 578 | }, 579 | "node_modules/tough-cookie": { 580 | "version": "5.1.2", 581 | "resolved": "https://mirrors.cloud.tencent.com/npm/tough-cookie/-/tough-cookie-5.1.2.tgz", 582 | "integrity": "sha512-FVDYdxtnj0G6Qm/DhNPSb8Ju59ULcup3tuJxkFb5K8Bv2pUXILbf0xZWU8PX8Ov19OXljbUyveOFwRMwkXzO+A==", 583 | "dependencies": { 584 | "tldts": "^6.1.32" 585 | }, 586 | "engines": { 587 | "node": ">=16" 588 | } 589 | }, 590 | "node_modules/tr46": { 591 | "version": "5.1.0", 592 | "resolved": "https://mirrors.cloud.tencent.com/npm/tr46/-/tr46-5.1.0.tgz", 593 | "integrity": "sha512-IUWnUK7ADYR5Sl1fZlO1INDUhVhatWl7BtJWsIhwJ0UAK7ilzzIa8uIqOO/aYVWHZPJkKbEL+362wrzoeRF7bw==", 594 | "dependencies": { 595 | "punycode": "^2.3.1" 596 | }, 597 | "engines": { 598 | "node": ">=18" 599 | } 600 | }, 601 | "node_modules/w3c-xmlserializer": { 602 | "version": "5.0.0", 603 | "resolved": "https://mirrors.cloud.tencent.com/npm/w3c-xmlserializer/-/w3c-xmlserializer-5.0.0.tgz", 604 | "integrity": "sha512-o8qghlI8NZHU1lLPrpi2+Uq7abh4GGPpYANlalzWxyWteJOCsr/P+oPBA49TOLu5FTZO4d3F9MnWJfiMo4BkmA==", 605 | "dependencies": { 606 | "xml-name-validator": "^5.0.0" 607 | }, 608 | "engines": { 609 | "node": ">=18" 610 | } 611 | }, 612 | "node_modules/webidl-conversions": { 613 | "version": "7.0.0", 614 | "resolved": "https://mirrors.cloud.tencent.com/npm/webidl-conversions/-/webidl-conversions-7.0.0.tgz", 615 | "integrity": "sha512-VwddBukDzu71offAQR975unBIGqfKZpM+8ZX6ySk8nYhVoo5CYaZyzt3YBvYtRtO+aoGlqxPg/B87NGVZ/fu6g==", 616 | "engines": { 617 | "node": ">=12" 618 | } 619 | }, 620 | "node_modules/whatwg-encoding": { 621 | "version": "3.1.1", 622 | "resolved": "https://mirrors.cloud.tencent.com/npm/whatwg-encoding/-/whatwg-encoding-3.1.1.tgz", 623 | "integrity": "sha512-6qN4hJdMwfYBtE3YBTTHhoeuUrDBPZmbQaxWAqSALV/MeEnR5z1xd8UKud2RAkFoPkmB+hli1TZSnyi84xz1vQ==", 624 | "dependencies": { 625 | "iconv-lite": "0.6.3" 626 | }, 627 | "engines": { 628 | "node": ">=18" 629 | } 630 | }, 631 | "node_modules/whatwg-mimetype": { 632 | "version": "4.0.0", 633 | "resolved": "https://mirrors.cloud.tencent.com/npm/whatwg-mimetype/-/whatwg-mimetype-4.0.0.tgz", 634 | "integrity": "sha512-QaKxh0eNIi2mE9p2vEdzfagOKHCcj1pJ56EEHGQOVxp8r9/iszLUUV7v89x9O1p/T+NlTM5W7jW6+cz4Fq1YVg==", 635 | "engines": { 636 | "node": ">=18" 637 | } 638 | }, 639 | "node_modules/whatwg-url": { 640 | "version": "14.2.0", 641 | "resolved": "https://mirrors.cloud.tencent.com/npm/whatwg-url/-/whatwg-url-14.2.0.tgz", 642 | "integrity": "sha512-De72GdQZzNTUBBChsXueQUnPKDkg/5A5zp7pFDuQAj5UFoENpiACU0wlCvzpAGnTkj++ihpKwKyYewn/XNUbKw==", 643 | "dependencies": { 644 | "tr46": "^5.1.0", 645 | "webidl-conversions": "^7.0.0" 646 | }, 647 | "engines": { 648 | "node": ">=18" 649 | } 650 | }, 651 | "node_modules/ws": { 652 | "version": "8.18.1", 653 | "resolved": "https://mirrors.cloud.tencent.com/npm/ws/-/ws-8.18.1.tgz", 654 | "integrity": "sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w==", 655 | "engines": { 656 | "node": ">=10.0.0" 657 | }, 658 | "peerDependencies": { 659 | "bufferutil": "^4.0.1", 660 | "utf-8-validate": ">=5.0.2" 661 | }, 662 | "peerDependenciesMeta": { 663 | "bufferutil": { 664 | "optional": true 665 | }, 666 | "utf-8-validate": { 667 | "optional": true 668 | } 669 | } 670 | }, 671 | "node_modules/xml-name-validator": { 672 | "version": "5.0.0", 673 | "resolved": "https://mirrors.cloud.tencent.com/npm/xml-name-validator/-/xml-name-validator-5.0.0.tgz", 674 | "integrity": "sha512-EvGK8EJ3DhaHfbRlETOWAS5pO9MZITeauHKJyb8wyajUfQUenkIg2MvLDTZ4T/TgIcm3HU0TFBgWWboAZ30UHg==", 675 | "engines": { 676 | "node": ">=18" 677 | } 678 | }, 679 | "node_modules/xmlchars": { 680 | "version": "2.2.0", 681 | "resolved": "https://mirrors.cloud.tencent.com/npm/xmlchars/-/xmlchars-2.2.0.tgz", 682 | "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==" 683 | } 684 | } 685 | } 686 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "jsdom": "^26.0.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyExecJS 2 | requests 3 | loguru 4 | python-dotenv 5 | retry 6 | openpyxl -------------------------------------------------------------------------------- /static/xhs_xray.js: -------------------------------------------------------------------------------- 1 | self = global; 2 | window = global; 3 | var zc666; 4 | !function() { 5 | "use strict"; 6 | var e, t, n, r, o, i = {}, u = {}; 7 | function f(e) { 8 | var t = u[e]; 9 | if (void 0 !== t) 10 | return t.exports; 11 | var n = u[e] = { 12 | id: e, 13 | loaded: !1, 14 | exports: {} 15 | }; 16 | console.log(e); 17 | return i[e].call(n.exports, n, n.exports, f), 18 | n.loaded = !0, 19 | n.exports 20 | } 21 | f.m = i, 22 | f.amdO = {}, 23 | e = [], 24 | f.O = function(t, n, r, o) { 25 | if (!n) { 26 | var i = 1 / 0; 27 | for (d = 0; d < e.length; d++) { 28 | n = e[d][0], 29 | r = e[d][1], 30 | o = e[d][2]; 31 | for (var u = !0, c = 0; c < n.length; c++) 32 | (!1 & o || i >= o) && Object.keys(f.O).every((function(e) { 33 | return f.O[e](n[c]) 34 | } 35 | )) ? n.splice(c--, 1) : (u = !1, 36 | o < i && (i = o)); 37 | if (u) { 38 | e.splice(d--, 1); 39 | var a = r(); 40 | void 0 !== a && (t = a) 41 | } 42 | } 43 | return t 44 | } 45 | o = o || 0; 46 | for (var d = e.length; d > 0 && e[d - 1][2] > o; d--) 47 | e[d] = e[d - 1]; 48 | e[d] = [n, r, o] 49 | } 50 | , 51 | f.n = function(e) { 52 | var t = e && e.__esModule ? function() { 53 | return e.default 54 | } 55 | : function() { 56 | return e 57 | } 58 | ; 59 | return f.d(t, { 60 | a: t 61 | }), 62 | t 63 | } 64 | , 65 | n = Object.getPrototypeOf ? function(e) { 66 | return Object.getPrototypeOf(e) 67 | } 68 | : function(e) { 69 | return e.__proto__ 70 | } 71 | , 72 | f.t = function(e, r) { 73 | if (1 & r && (e = this(e)), 74 | 8 & r) 75 | return e; 76 | if ("object" == typeof e && e) { 77 | if (4 & r && e.__esModule) 78 | return e; 79 | if (16 & r && "function" == typeof e.then) 80 | return e 81 | } 82 | var o = Object.create(null); 83 | f.r(o); 84 | var i = {}; 85 | t = t || [null, n({}), n([]), n(n)]; 86 | for (var u = 2 & r && e; "object" == typeof u && !~t.indexOf(u); u = n(u)) 87 | Object.getOwnPropertyNames(u).forEach((function(t) { 88 | i[t] = function() { 89 | return e[t] 90 | } 91 | } 92 | )); 93 | return i.default = function() { 94 | return e 95 | } 96 | , 97 | f.d(o, i), 98 | o 99 | } 100 | , 101 | f.d = function(e, t) { 102 | for (var n in t) 103 | f.o(t, n) && !f.o(e, n) && Object.defineProperty(e, n, { 104 | enumerable: !0, 105 | get: t[n] 106 | }) 107 | } 108 | , 109 | f.f = {}, 110 | f.e = function(e) { 111 | return Promise.all(Object.keys(f.f).reduce((function(t, n) { 112 | return f.f[n](e, t), 113 | t 114 | } 115 | ), [])) 116 | } 117 | , 118 | f.u = function(e) { 119 | return "js/" + ({ 120 | 41: "Board", 121 | 94: "Login", 122 | 256: "NPS", 123 | 290: "Notification", 124 | 406: "User", 125 | 464: "FeedToNote", 126 | 540: "Explore", 127 | 692: "Track", 128 | 763: "Search", 129 | 891: "xhs-web-player", 130 | 895: "Note", 131 | 898: "minor" 132 | }[e] || e) + "." + { 133 | 13: "849e078", 134 | 18: "88c4016", 135 | 41: "a4fad25", 136 | 64: "de4ace7", 137 | 92: "1b9e4df", 138 | 94: "01eead2", 139 | 168: "256b43c", 140 | 256: "3c5b745", 141 | 281: "ade9f6a", 142 | 290: "d0e6310", 143 | 334: "afb0229", 144 | 337: "e738619", 145 | 398: "80ce566", 146 | 406: "0477db9", 147 | 426: "fd994fa", 148 | 464: "073bfcc", 149 | 469: "a49ea26", 150 | 474: "738cddb", 151 | 494: "c852c82", 152 | 513: "7ca0915", 153 | 540: "f44da86", 154 | 563: "5fc3402", 155 | 588: "67edf6f", 156 | 591: "ddde7d9", 157 | 692: "0c3ac5e", 158 | 699: "c290318", 159 | 737: "9268c58", 160 | 763: "01c6b25", 161 | 766: "f0a8354", 162 | 772: "50c8fcf", 163 | 787: "385b767", 164 | 871: "d5ef805", 165 | 891: "e811881", 166 | 895: "697ec77", 167 | 898: "868733b" 168 | }[e] + ".chunk.js" 169 | } 170 | , 171 | f.miniCssF = function(e) { 172 | return "css/" + ({ 173 | 41: "Board", 174 | 94: "Login", 175 | 256: "NPS", 176 | 290: "Notification", 177 | 406: "User", 178 | 464: "FeedToNote", 179 | 540: "Explore", 180 | 763: "Search", 181 | 895: "Note", 182 | 898: "minor" 183 | }[e] || e) + "." + { 184 | 41: "b232e5e", 185 | 92: "95cabbe", 186 | 94: "b4971ae", 187 | 256: "5d4f927", 188 | 290: "efde4b1", 189 | 334: "0f69949", 190 | 337: "919c828", 191 | 398: "ffe8b37", 192 | 406: "e3c28d5", 193 | 426: "082db25", 194 | 464: "1bbfe82", 195 | 540: "d6040d3", 196 | 588: "3e8b57e", 197 | 763: "af3c4cd", 198 | 895: "98f4076", 199 | 898: "5a4e17f" 200 | }[e] + ".chunk.css" 201 | } 202 | , 203 | f.g = function() { 204 | if ("object" == typeof globalThis) 205 | return globalThis; 206 | try { 207 | return this || new Function("return this")() 208 | } catch (e) { 209 | if ("object" == typeof window) 210 | return window 211 | } 212 | }(), 213 | f.o = function(e, t) { 214 | return Object.prototype.hasOwnProperty.call(e, t) 215 | } 216 | , 217 | r = {}, 218 | o = "xhs-pc-web:", 219 | f.l = function(e, t, n, i) { 220 | if (r[e]) 221 | r[e].push(t); 222 | else { 223 | var u, c; 224 | if (void 0 !== n) 225 | for (var a = document.getElementsByTagName("script"), d = 0; d < a.length; d++) { 226 | var l = a[d]; 227 | if (l.getAttribute("src") == e || l.getAttribute("data-webpack") == o + n) { 228 | u = l; 229 | break 230 | } 231 | } 232 | u || (c = !0, 233 | (u = document.createElement("script")).charset = "utf-8", 234 | u.timeout = 120, 235 | f.nc && u.setAttribute("nonce", f.nc), 236 | u.setAttribute("data-webpack", o + n), 237 | u.src = e), 238 | r[e] = [t]; 239 | var s = function(t, n) { 240 | u.onerror = u.onload = null, 241 | clearTimeout(b); 242 | var o = r[e]; 243 | if (delete r[e], 244 | u.parentNode && u.parentNode.removeChild(u), 245 | o && o.forEach((function(e) { 246 | return e(n) 247 | } 248 | )), 249 | t) 250 | return t(n) 251 | } 252 | , b = setTimeout(s.bind(null, void 0, { 253 | type: "timeout", 254 | target: u 255 | }), 12e4); 256 | u.onerror = s.bind(null, u.onerror), 257 | u.onload = s.bind(null, u.onload), 258 | c && document.head.appendChild(u) 259 | } 260 | } 261 | , 262 | f.r = function(e) { 263 | "undefined" != typeof Symbol && Symbol.toStringTag && Object.defineProperty(e, Symbol.toStringTag, { 264 | value: "Module" 265 | }), 266 | Object.defineProperty(e, "__esModule", { 267 | value: !0 268 | }) 269 | } 270 | , 271 | f.nmd = function(e) { 272 | return e.paths = [], 273 | e.children || (e.children = []), 274 | e 275 | } 276 | , 277 | f.p = "//fe-static.xhscdn.com/formula-static/xhs-pc-web/public/", 278 | function() { 279 | if ("undefined" != typeof document) { 280 | var e = function(e) { 281 | return new Promise((function(t, n) { 282 | var r = f.miniCssF(e) 283 | , o = f.p + r; 284 | if (function(e, t) { 285 | for (var n = document.getElementsByTagName("link"), r = 0; r < n.length; r++) { 286 | var o = (u = n[r]).getAttribute("data-href") || u.getAttribute("href"); 287 | if ("stylesheet" === u.rel && (o === e || o === t)) 288 | return u 289 | } 290 | var i = document.getElementsByTagName("style"); 291 | for (r = 0; r < i.length; r++) { 292 | var u; 293 | if ((o = (u = i[r]).getAttribute("data-href")) === e || o === t) 294 | return u 295 | } 296 | }(r, o)) 297 | return t(); 298 | !function(e, t, n, r, o) { 299 | var i = document.createElement("link"); 300 | i.rel = "stylesheet", 301 | i.type = "text/css", 302 | f.nc && (i.nonce = f.nc), 303 | i.onerror = i.onload = function(n) { 304 | if (i.onerror = i.onload = null, 305 | "load" === n.type) 306 | r(); 307 | else { 308 | var u = n && n.type 309 | , f = n && n.target && n.target.href || t 310 | , c = new Error("Loading CSS chunk " + e + " failed.\n(" + u + ": " + f + ")"); 311 | c.name = "ChunkLoadError", 312 | c.code = "CSS_CHUNK_LOAD_FAILED", 313 | c.type = u, 314 | c.request = f, 315 | i.parentNode && i.parentNode.removeChild(i), 316 | o(c) 317 | } 318 | } 319 | , 320 | i.href = t, 321 | n ? n.parentNode.insertBefore(i, n.nextSibling) : document.head.appendChild(i) 322 | }(e, o, null, t, n) 323 | } 324 | )) 325 | } 326 | , t = { 327 | 577: 0 328 | }; 329 | f.f.miniCss = function(n, r) { 330 | t[n] ? r.push(t[n]) : 0 !== t[n] && { 331 | 41: 1, 332 | 92: 1, 333 | 94: 1, 334 | 256: 1, 335 | 290: 1, 336 | 334: 1, 337 | 337: 1, 338 | 398: 1, 339 | 406: 1, 340 | 426: 1, 341 | 464: 1, 342 | 540: 1, 343 | 588: 1, 344 | 763: 1, 345 | 895: 1, 346 | 898: 1 347 | }[n] && r.push(t[n] = e(n).then((function() { 348 | t[n] = 0 349 | } 350 | ), (function(e) { 351 | throw delete t[n], 352 | e 353 | } 354 | ))) 355 | } 356 | } 357 | }(), 358 | function() { 359 | var e = { 360 | 577: 0 361 | }; 362 | f.f.j = function(t, n) { 363 | var r = f.o(e, t) ? e[t] : void 0; 364 | if (0 !== r) 365 | if (r) 366 | n.push(r[2]); 367 | else if (577 != t) { 368 | var o = new Promise((function(n, o) { 369 | r = e[t] = [n, o] 370 | } 371 | )); 372 | n.push(r[2] = o); 373 | var i = f.p + f.u(t) 374 | , u = new Error; 375 | f.l(i, (function(n) { 376 | if (f.o(e, t) && (0 !== (r = e[t]) && (e[t] = void 0), 377 | r)) { 378 | var o = n && ("load" === n.type ? "missing" : n.type) 379 | , i = n && n.target && n.target.src; 380 | u.message = "Loading chunk " + t + " failed.\n(" + o + ": " + i + ")", 381 | u.name = "ChunkLoadError", 382 | u.type = o, 383 | u.request = i, 384 | r[1](u) 385 | } 386 | } 387 | ), "chunk-" + t, t) 388 | } else 389 | e[t] = 0 390 | } 391 | , 392 | f.O.j = function(t) { 393 | return 0 === e[t] 394 | } 395 | ; 396 | var t = function(t, n) { 397 | var r, o, i = n[0], u = n[1], c = n[2], a = 0; 398 | if (i.some((function(t) { 399 | return 0 !== e[t] 400 | } 401 | ))) { 402 | for (r in u) 403 | f.o(u, r) && (f.m[r] = u[r]); 404 | if (c) 405 | var d = c(f) 406 | } 407 | for (t && t(n); a < i.length; a++) 408 | o = i[a], 409 | f.o(e, o) && e[o] && e[o][0](), 410 | e[o] = 0; 411 | return f.O(d) 412 | } 413 | , n = self.webpackChunkxhs_pc_web = self.webpackChunkxhs_pc_web || []; 414 | n.forEach(t.bind(null, 0)), 415 | n.push = t.bind(null, n.push.bind(n)) 416 | }() 417 | zc666 = f; 418 | }(); 419 | //# sourceMappingURL=https://picasso-private-1251524319.cos.ap-shanghai.myqcloud.com/data/formula-static/formula/xhs-pc-web/runtime-main.8718828.js.map 420 | try { 421 | require('./xhs_xray_pack1.js'); 422 | } catch (e) { 423 | try { 424 | require('../static/xhs_xray_pack1.js'); 425 | } catch (e) { 426 | require('./static/xhs_xray_pack1.js'); 427 | } 428 | } 429 | try { 430 | require('./xhs_xray_pack2.js'); 431 | } catch (e) { 432 | try { 433 | require('../static/xhs_xray_pack2.js'); 434 | } catch (e) { 435 | require('./static/xhs_xray_pack2.js'); 436 | } 437 | } 438 | var n = zc666(36497) 439 | , o = zc666(609) 440 | , i = zc666(2030); 441 | var a = zc666(81422) 442 | , u = zc666(49600); 443 | 444 | traceId = function() { 445 | var t, e, r, s = arguments.length > 0 && void 0 !== arguments[0] ? arguments[0] : i(); 446 | return o(t = "".concat(n(e = u.fromNumber(s, !0).shiftLeft(23).or(a.Int.seq()).toString(16)).call(e, 16, "0"))).call(t, n(r = new u(a.Int.random(32),a.Int.random(32),!0).toString(16)).call(r, 16, "0")) 447 | } 448 | -------------------------------------------------------------------------------- /xhs_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/Spider_XHS/647a7add4b9d1eb3a9c7afa18a6a9205a190dde7/xhs_utils/__init__.py -------------------------------------------------------------------------------- /xhs_utils/common_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from loguru import logger 3 | from dotenv import load_dotenv 4 | 5 | def load_env(): 6 | load_dotenv() 7 | cookies_str = os.getenv('COOKIES') 8 | return cookies_str 9 | 10 | def init(): 11 | media_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../datas/media_datas')) 12 | excel_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../datas/excel_datas')) 13 | for base_path in [media_base_path, excel_base_path]: 14 | if not os.path.exists(base_path): 15 | os.makedirs(base_path) 16 | logger.info(f'创建目录 {base_path}') 17 | cookies_str = load_env() 18 | base_path = { 19 | 'media': media_base_path, 20 | 'excel': excel_base_path, 21 | } 22 | return cookies_str, base_path 23 | -------------------------------------------------------------------------------- /xhs_utils/cookie_util.py: -------------------------------------------------------------------------------- 1 | def trans_cookies(cookies_str): 2 | if '; ' in cookies_str: 3 | ck = {i.split('=')[0]: '='.join(i.split('=')[1:]) for i in cookies_str.split('; ')} 4 | else: 5 | ck = {i.split('=')[0]: '='.join(i.split('=')[1:]) for i in cookies_str.split(';')} 6 | return ck 7 | -------------------------------------------------------------------------------- /xhs_utils/data_util.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import time 5 | import openpyxl 6 | import requests 7 | from loguru import logger 8 | from retry import retry 9 | 10 | 11 | def norm_str(str): 12 | new_str = re.sub(r"|[\\/:*?\"<>| ]+", "", str).replace('\n', '').replace('\r', '') 13 | return new_str 14 | 15 | def norm_text(text): 16 | ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]') 17 | text = ILLEGAL_CHARACTERS_RE.sub(r'', text) 18 | return text 19 | 20 | 21 | def timestamp_to_str(timestamp): 22 | time_local = time.localtime(timestamp / 1000) 23 | dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local) 24 | return dt 25 | 26 | def handle_user_info(data, user_id): 27 | home_url = f'https://www.xiaohongshu.com/user/profile/{user_id}' 28 | nickname = data['basic_info']['nickname'] 29 | avatar = data['basic_info']['imageb'] 30 | red_id = data['basic_info']['red_id'] 31 | gender = data['basic_info']['gender'] 32 | if gender == 0: 33 | gender = '男' 34 | elif gender == 1: 35 | gender = '女' 36 | else: 37 | gender = '未知' 38 | ip_location = data['basic_info']['ip_location'] 39 | desc = data['basic_info']['desc'] 40 | follows = data['interactions'][0]['count'] 41 | fans = data['interactions'][1]['count'] 42 | interaction = data['interactions'][2]['count'] 43 | tags_temp = data['tags'] 44 | tags = [] 45 | for tag in tags_temp: 46 | try: 47 | tags.append(tag['name']) 48 | except: 49 | pass 50 | return { 51 | 'user_id': user_id, 52 | 'home_url': home_url, 53 | 'nickname': nickname, 54 | 'avatar': avatar, 55 | 'red_id': red_id, 56 | 'gender': gender, 57 | 'ip_location': ip_location, 58 | 'desc': desc, 59 | 'follows': follows, 60 | 'fans': fans, 61 | 'interaction': interaction, 62 | 'tags': tags, 63 | } 64 | 65 | def handle_note_info(data): 66 | note_id = data['id'] 67 | note_url = data['url'] 68 | note_type = data['note_card']['type'] 69 | if note_type == 'normal': 70 | note_type = '图集' 71 | else: 72 | note_type = '视频' 73 | user_id = data['note_card']['user']['user_id'] 74 | home_url = f'https://www.xiaohongshu.com/user/profile/{user_id}' 75 | nickname = data['note_card']['user']['nickname'] 76 | avatar = data['note_card']['user']['avatar'] 77 | title = data['note_card']['title'] 78 | if title.strip() == '': 79 | title = f'无标题' 80 | desc = data['note_card']['desc'] 81 | liked_count = data['note_card']['interact_info']['liked_count'] 82 | collected_count = data['note_card']['interact_info']['collected_count'] 83 | comment_count = data['note_card']['interact_info']['comment_count'] 84 | share_count = data['note_card']['interact_info']['share_count'] 85 | image_list_temp = data['note_card']['image_list'] 86 | image_list = [] 87 | for image in image_list_temp: 88 | try: 89 | image_list.append(image['info_list'][1]['url']) 90 | # success, msg, img_url = XHS_Apis.get_note_no_water_img(image['info_list'][1]['url']) 91 | # image_list.append(img_url) 92 | except: 93 | pass 94 | if note_type == '视频': 95 | video_cover = image_list[0] 96 | video_addr = 'https://sns-video-bd.xhscdn.com/' + data['note_card']['video']['consumer']['origin_video_key'] 97 | # success, msg, video_addr = XHS_Apis.get_note_no_water_video(note_id) 98 | else: 99 | video_cover = None 100 | video_addr = None 101 | tags_temp = data['note_card']['tag_list'] 102 | tags = [] 103 | for tag in tags_temp: 104 | try: 105 | tags.append(tag['name']) 106 | except: 107 | pass 108 | upload_time = timestamp_to_str(data['note_card']['time']) 109 | if 'ip_location' in data['note_card']: 110 | ip_location = data['note_card']['ip_location'] 111 | else: 112 | ip_location = '未知' 113 | return { 114 | 'note_id': note_id, 115 | 'note_url': note_url, 116 | 'note_type': note_type, 117 | 'user_id': user_id, 118 | 'home_url': home_url, 119 | 'nickname': nickname, 120 | 'avatar': avatar, 121 | 'title': title, 122 | 'desc': desc, 123 | 'liked_count': liked_count, 124 | 'collected_count': collected_count, 125 | 'comment_count': comment_count, 126 | 'share_count': share_count, 127 | 'video_cover': video_cover, 128 | 'video_addr': video_addr, 129 | 'image_list': image_list, 130 | 'tags': tags, 131 | 'upload_time': upload_time, 132 | 'ip_location': ip_location, 133 | } 134 | 135 | def handle_comment_info(data): 136 | note_id = data['note_id'] 137 | note_url = data['note_url'] 138 | comment_id = data['id'] 139 | user_id = data['user_info']['user_id'] 140 | home_url = f'https://www.xiaohongshu.com/user/profile/{user_id}' 141 | nickname = data['user_info']['nickname'] 142 | avatar = data['user_info']['image'] 143 | content = data['content'] 144 | show_tags = data['show_tags'] 145 | like_count = data['like_count'] 146 | upload_time = timestamp_to_str(data['create_time']) 147 | try: 148 | ip_location = data['ip_location'] 149 | except: 150 | ip_location = '未知' 151 | pictures = [] 152 | try: 153 | pictures_temp = data['pictures'] 154 | for picture in pictures_temp: 155 | try: 156 | pictures.append(picture['info_list'][1]['url']) 157 | # success, msg, img_url = XHS_Apis.get_note_no_water_img(picture['info_list'][1]['url']) 158 | # pictures.append(img_url) 159 | except: 160 | pass 161 | except: 162 | pass 163 | return { 164 | 'note_id': note_id, 165 | 'note_url': note_url, 166 | 'comment_id': comment_id, 167 | 'user_id': user_id, 168 | 'home_url': home_url, 169 | 'nickname': nickname, 170 | 'avatar': avatar, 171 | 'content': content, 172 | 'show_tags': show_tags, 173 | 'like_count': like_count, 174 | 'upload_time': upload_time, 175 | 'ip_location': ip_location, 176 | 'pictures': pictures, 177 | } 178 | def save_to_xlsx(datas, file_path, type='note'): 179 | wb = openpyxl.Workbook() 180 | ws = wb.active 181 | if type == 'note': 182 | headers = ['笔记id', '笔记url', '笔记类型', '用户id', '用户主页url', '昵称', '头像url', '标题', '描述', '点赞数量', '收藏数量', '评论数量', '分享数量', '视频封面url', '视频地址url', '图片地址url列表', '标签', '上传时间', 'ip归属地'] 183 | elif type == 'user': 184 | headers = ['用户id', '用户主页url', '用户名', '头像url', '小红书号', '性别', 'ip地址', '介绍', '关注数量', '粉丝数量', '作品被赞和收藏数量', '标签'] 185 | else: 186 | headers = ['笔记id', '笔记url', '评论id', '用户id', '用户主页url', '昵称', '头像url', '评论内容', '评论标签', '点赞数量', '上传时间', 'ip归属地', '图片地址url列表'] 187 | ws.append(headers) 188 | for data in datas: 189 | data = {k: norm_text(str(v)) for k, v in data.items()} 190 | ws.append(list(data.values())) 191 | wb.save(file_path) 192 | logger.info(f'数据保存至 {file_path}') 193 | 194 | def download_media(path, name, url, type): 195 | if type == 'image': 196 | content = requests.get(url).content 197 | with open(path + '/' + name + '.jpg', mode="wb") as f: 198 | f.write(content) 199 | elif type == 'video': 200 | res = requests.get(url, stream=True) 201 | size = 0 202 | chunk_size = 1024 * 1024 203 | with open(path + '/' + name + '.mp4', mode="wb") as f: 204 | for data in res.iter_content(chunk_size=chunk_size): 205 | f.write(data) 206 | size += len(data) 207 | 208 | def save_user_detail(user, path): 209 | with open(f'{path}/detail.txt', mode="w", encoding="utf-8") as f: 210 | # 逐行输出到txt里 211 | f.write(f"用户id: {user['user_id']}\n") 212 | f.write(f"用户主页url: {user['home_url']}\n") 213 | f.write(f"用户名: {user['nickname']}\n") 214 | f.write(f"头像url: {user['avatar']}\n") 215 | f.write(f"小红书号: {user['red_id']}\n") 216 | f.write(f"性别: {user['gender']}\n") 217 | f.write(f"ip地址: {user['ip_location']}\n") 218 | f.write(f"介绍: {user['desc']}\n") 219 | f.write(f"关注数量: {user['follows']}\n") 220 | f.write(f"粉丝数量: {user['fans']}\n") 221 | f.write(f"作品被赞和收藏数量: {user['interaction']}\n") 222 | f.write(f"标签: {user['tags']}\n") 223 | 224 | def save_note_detail(note, path): 225 | with open(f'{path}/detail.txt', mode="w", encoding="utf-8") as f: 226 | # 逐行输出到txt里 227 | f.write(f"笔记id: {note['note_id']}\n") 228 | f.write(f"笔记url: {note['note_url']}\n") 229 | f.write(f"笔记类型: {note['note_type']}\n") 230 | f.write(f"用户id: {note['user_id']}\n") 231 | f.write(f"用户主页url: {note['home_url']}\n") 232 | f.write(f"昵称: {note['nickname']}\n") 233 | f.write(f"头像url: {note['avatar']}\n") 234 | f.write(f"标题: {note['title']}\n") 235 | f.write(f"描述: {note['desc']}\n") 236 | f.write(f"点赞数量: {note['liked_count']}\n") 237 | f.write(f"收藏数量: {note['collected_count']}\n") 238 | f.write(f"评论数量: {note['comment_count']}\n") 239 | f.write(f"分享数量: {note['share_count']}\n") 240 | f.write(f"视频封面url: {note['video_cover']}\n") 241 | f.write(f"视频地址url: {note['video_addr']}\n") 242 | f.write(f"图片地址url列表: {note['image_list']}\n") 243 | f.write(f"标签: {note['tags']}\n") 244 | f.write(f"上传时间: {note['upload_time']}\n") 245 | f.write(f"ip归属地: {note['ip_location']}\n") 246 | 247 | 248 | 249 | @retry(tries=3, delay=1) 250 | def download_note(note_info, path): 251 | note_id = note_info['note_id'] 252 | user_id = note_info['user_id'] 253 | title = note_info['title'] 254 | title = norm_str(title) 255 | nickname = note_info['nickname'] 256 | nickname = norm_str(nickname) 257 | if title.strip() == '': 258 | title = f'无标题' 259 | save_path = f'{path}/{nickname}_{user_id}/{title}_{note_id}' 260 | check_and_create_path(save_path) 261 | with open(f'{save_path}/info.json', mode='w', encoding='utf-8') as f: 262 | f.write(json.dumps(note_info) + '\n') 263 | note_type = note_info['note_type'] 264 | save_note_detail(note_info, save_path) 265 | if note_type == '图集': 266 | for img_index, img_url in enumerate(note_info['image_list']): 267 | download_media(save_path, f'image_{img_index}', img_url, 'image') 268 | elif note_type == '视频': 269 | download_media(save_path, 'cover', note_info['video_cover'], 'image') 270 | download_media(save_path, 'video', note_info['video_addr'], 'video') 271 | return save_path 272 | 273 | 274 | def check_and_create_path(path): 275 | if not os.path.exists(path): 276 | os.makedirs(path) 277 | -------------------------------------------------------------------------------- /xhs_utils/xhs_util.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import random 4 | import execjs 5 | from xhs_utils.cookie_util import trans_cookies 6 | 7 | try: 8 | js = execjs.compile(open(r'../static/xhs_xs_xsc_56.js', 'r', encoding='utf-8').read()) 9 | except: 10 | js = execjs.compile(open(r'static/xhs_xs_xsc_56.js', 'r', encoding='utf-8').read()) 11 | 12 | try: 13 | xray_js = execjs.compile(open(r'../static/xhs_xray.js', 'r', encoding='utf-8').read()) 14 | except: 15 | xray_js = execjs.compile(open(r'static/xhs_xray.js', 'r', encoding='utf-8').read()) 16 | 17 | def generate_x_b3_traceid(len=16): 18 | x_b3_traceid = "" 19 | for t in range(len): 20 | x_b3_traceid += "abcdef0123456789"[math.floor(16 * random.random())] 21 | return x_b3_traceid 22 | 23 | def generate_xs_xs_common(a1, api, data=''): 24 | ret = js.call('get_request_headers_params', api, data, a1) 25 | xs, xt, xs_common = ret['xs'], ret['xt'], ret['xs_common'] 26 | return xs, xt, xs_common 27 | 28 | def generate_xs(a1, api, data=''): 29 | ret = js.call('get_xs', api, data, a1) 30 | xs, xt = ret['X-s'], ret['X-t'] 31 | return xs, xt 32 | 33 | def generate_xray_traceid(): 34 | return xray_js.call('traceId') 35 | def get_common_headers(): 36 | return { 37 | "authority": "www.xiaohongshu.com", 38 | "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 39 | "accept-language": "zh-CN,zh;q=0.9", 40 | "cache-control": "no-cache", 41 | "pragma": "no-cache", 42 | "referer": "https://www.xiaohongshu.com/", 43 | "sec-ch-ua": "\"Chromium\";v=\"122\", \"Not(A:Brand\";v=\"24\", \"Google Chrome\";v=\"122\"", 44 | "sec-ch-ua-mobile": "?0", 45 | "sec-ch-ua-platform": "\"Windows\"", 46 | "sec-fetch-dest": "document", 47 | "sec-fetch-mode": "navigate", 48 | "sec-fetch-site": "same-origin", 49 | "sec-fetch-user": "?1", 50 | "upgrade-insecure-requests": "1", 51 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" 52 | } 53 | def get_request_headers_template(): 54 | return { 55 | "authority": "edith.xiaohongshu.com", 56 | "accept": "application/json, text/plain, */*", 57 | "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", 58 | "cache-control": "no-cache", 59 | "content-type": "application/json;charset=UTF-8", 60 | "origin": "https://www.xiaohongshu.com", 61 | "pragma": "no-cache", 62 | "referer": "https://www.xiaohongshu.com/", 63 | "sec-ch-ua": "\"Not A(Brand\";v=\"99\", \"Microsoft Edge\";v=\"121\", \"Chromium\";v=\"121\"", 64 | "sec-ch-ua-mobile": "?0", 65 | "sec-ch-ua-platform": "\"Windows\"", 66 | "sec-fetch-dest": "empty", 67 | "sec-fetch-mode": "cors", 68 | "sec-fetch-site": "same-site", 69 | "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0", 70 | "x-b3-traceid": "", 71 | "x-s": "", 72 | "x-s-common": "", 73 | "x-t": "", 74 | "x-xray-traceid": generate_xray_traceid() 75 | } 76 | 77 | def generate_headers(a1, api, data=''): 78 | xs, xt, xs_common = generate_xs_xs_common(a1, api, data) 79 | x_b3_traceid = generate_x_b3_traceid() 80 | headers = get_request_headers_template() 81 | headers['x-s'] = xs 82 | headers['x-t'] = str(xt) 83 | headers['x-s-common'] = xs_common 84 | headers['x-b3-traceid'] = x_b3_traceid 85 | if data: 86 | data = json.dumps(data, separators=(',', ':'), ensure_ascii=False) 87 | return headers, data 88 | 89 | def generate_request_params(cookies_str, api, data=''): 90 | cookies = trans_cookies(cookies_str) 91 | a1 = cookies['a1'] 92 | headers, data = generate_headers(a1, api, data) 93 | return headers, cookies, data 94 | 95 | def splice_str(api, params): 96 | url = api + '?' 97 | for key, value in params.items(): 98 | if value is None: 99 | value = '' 100 | url += key + '=' + value + '&' 101 | return url[:-1] 102 | 103 | --------------------------------------------------------------------------------