├── .env ├── author ├── qq.jpg ├── wx.png ├── group.jpg ├── wx_pay.png └── zfb_pay.jpg ├── dy_apis ├── __init__.py └── douyin_api.py ├── dy_live ├── __init__.py └── server.py ├── utils ├── __init__.py ├── common_util.py ├── cookie_util.py ├── dy_util.py └── data_util.py ├── requirements.txt ├── package.json ├── .gitignore ├── Dockerfile ├── static ├── Response.proto ├── Request.proto ├── Response_pb2.py ├── Live.proto ├── Request_pb2.py └── Live_pb2.py ├── README.md └── main.py /.env: -------------------------------------------------------------------------------- 1 | DY_COOKIES='' 2 | DY_LIVE_COOKIES='' -------------------------------------------------------------------------------- /author/qq.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/DouYin_Spider/HEAD/author/qq.jpg -------------------------------------------------------------------------------- /author/wx.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/DouYin_Spider/HEAD/author/wx.png -------------------------------------------------------------------------------- /author/group.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/DouYin_Spider/HEAD/author/group.jpg -------------------------------------------------------------------------------- /author/wx_pay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/DouYin_Spider/HEAD/author/wx_pay.png -------------------------------------------------------------------------------- /author/zfb_pay.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cv-cat/DouYin_Spider/HEAD/author/zfb_pay.jpg -------------------------------------------------------------------------------- /dy_apis/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2024/6/8 下午3:27 4 | # @Author : crush0 5 | # @Description : 6 | -------------------------------------------------------------------------------- /dy_live/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2024/6/8 下午10:38 4 | # @Author : crush0 5 | # @Description : 6 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # @Time : 2024/6/8 下午3:27 4 | # @Author : crush0 5 | # @Description : 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | asyncio 2 | aiofiles 3 | loguru 4 | python-dotenv 5 | retry 6 | openpyxl 7 | urllib3 8 | PyExecJS 9 | requests 10 | argparse 11 | websockets 12 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "canvas": "^2.11.2", 4 | "jsdom": "^25.0.1", 5 | "jsrsasign": "^11.1.0", 6 | "sdenv": "^0.2.2", 7 | "vm": "^0.1.0" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | node_modules/ 3 | *.so 4 | .Python 5 | build/ 6 | develop-eggs/ 7 | dist/ 8 | downloads/ 9 | eggs/ 10 | .eggs/ 11 | lib/ 12 | lib64/ 13 | parts/ 14 | sdist/ 15 | var/ 16 | wheels/ 17 | MANIFEST 18 | *.manifest 19 | *.spec 20 | .cache 21 | *.log 22 | local_settings.py 23 | db.sqlite3 24 | __pypackages__/ 25 | .venv 26 | env/ 27 | venv/ 28 | ENV/ 29 | env.bak/ 30 | venv.bak/ 31 | datas 32 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.10-slim 2 | 3 | WORKDIR /app 4 | 5 | RUN apt-get update && apt-get install -y \ 6 | curl \ 7 | gnupg \ 8 | build-essential \ 9 | git \ 10 | && rm -rf /var/lib/apt/lists/* 11 | 12 | RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \ 13 | && apt-get install -y nodejs \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | RUN python --version && node --version && npm --version 17 | 18 | COPY requirements.txt . 19 | 20 | RUN pip install --no-cache-dir -r requirements.txt 21 | 22 | COPY . . 23 | 24 | EXPOSE 5000 25 | 26 | ENV PYTHONUNBUFFERED=1 27 | ENV NODE_ENV=production 28 | 29 | CMD ["python", "main.py"] 30 | -------------------------------------------------------------------------------- /utils/common_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | # from loguru import logger 3 | from dotenv import load_dotenv 4 | 5 | dy_auth = None 6 | dy_live_auth = None 7 | def load_env(): 8 | global dy_auth, dy_live_auth 9 | load_dotenv() 10 | cookies_dy = os.getenv('DY_COOKIES') 11 | cookies_live = os.getenv('DY_LIVE_COOKIES') 12 | from builder.auth import DouyinAuth 13 | dy_auth = DouyinAuth() 14 | dy_auth.perepare_auth(cookies_dy, "", "") 15 | dy_live_auth = DouyinAuth() 16 | dy_live_auth.perepare_auth(cookies_live, "", "") 17 | return dy_auth 18 | 19 | def init(): 20 | media_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../datas/media_datas')) 21 | excel_base_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../datas/excel_datas')) 22 | for base_path in [media_base_path, excel_base_path]: 23 | if not os.path.exists(base_path): 24 | os.makedirs(base_path) 25 | # logger.info(f'create {base_path}') 26 | cookies = load_env() 27 | base_path = { 28 | 'media': media_base_path, 29 | 'excel': excel_base_path, 30 | } 31 | return cookies, base_path 32 | -------------------------------------------------------------------------------- /static/Response.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | message Response { 3 | int32 cmd = 1; 4 | int64 sequence_id = 2; 5 | string error_desc = 3; 6 | string message = 4; 7 | int64 inbox_type = 5; 8 | ResponseBody body = 6; 9 | } 10 | 11 | message ResponseBody { 12 | oneof body { 13 | NewMessageNotify new_message_notify = 500; 14 | GetConversationInfoListV2ResponseBody create_conversation_v2_body = 609; 15 | GetConversationInfoListV2ResponseBody get_conversation_info_list_v2_response_body = 610; 16 | } 17 | } 18 | 19 | message NewMessageNotify { 20 | string conversation_id = 2; 21 | int32 conversation_type = 3; 22 | int32 notify_type = 4; 23 | MessageBody message = 5; 24 | } 25 | 26 | message MessageBody { 27 | string conversation_id = 1; 28 | int32 conversation_type = 2; 29 | int64 server_message_id = 3; 30 | int64 index_in_conversation = 4; 31 | int64 conversation_short_id = 5; 32 | int32 message_type = 6; 33 | int64 sender = 7; 34 | string content = 8; 35 | } 36 | 37 | message GetConversationInfoListV2ResponseBody { 38 | repeated GetConversationInfoV2Response conversation_info_list = 1; 39 | } 40 | 41 | message GetConversationInfoV2Response { 42 | string conversation_id = 1; 43 | int64 conversation_short_id = 2; 44 | int32 conversation_type = 3; 45 | string ticket = 4; 46 | } -------------------------------------------------------------------------------- /utils/cookie_util.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from playwright.async_api import async_playwright 3 | from urllib.parse import urlparse, parse_qs 4 | 5 | webid = None 6 | msToken = None 7 | cookies = None 8 | def handle_request(request): 9 | url = request.url 10 | if url.startswith('https://www.douyin.com/aweme/v1/web/user/profile/other/'): 11 | parsed_url = urlparse(url) 12 | query_params = parse_qs(parsed_url.query) 13 | global webid 14 | webid = query_params['webid'][0] 15 | 16 | # 每隔1秒判断webid是否拿到 17 | async def check_webid(): 18 | while True: 19 | if webid is not None: 20 | break 21 | await asyncio.sleep(1) 22 | 23 | async def get_ttwid_and_webid(): 24 | url = 'https://www.douyin.com/user/MS4wLjABAAAAEpmH344CkCw2M58T33Q8TuFpdvJsOyaZcbWxAMc6H03wOVFf1Ow4mPP94TDUS4Us' 25 | headless = False 26 | print('如出现验证过程,请手动验证') 27 | async with async_playwright() as p: 28 | browser = await p.chromium.launch( 29 | headless=headless, 30 | args=[ 31 | '--disable-blink-features=AutomationControlled', 32 | ], 33 | channel='chrome' 34 | ) 35 | page = await browser.new_page() 36 | page.on("request", lambda request: handle_request(request=request)) 37 | await page.goto(url) 38 | await asyncio.sleep(3) 39 | await check_webid() 40 | page_cookies = await page.context.cookies() 41 | await browser.close() 42 | global cookies 43 | cookies = {} 44 | for cookie in page_cookies: 45 | cookies[cookie['name']] = cookie['value'] 46 | if cookie['name'] == 'msToken': 47 | global msToken 48 | msToken = cookie['value'] 49 | 50 | 51 | 52 | def get_new_cookies(): 53 | asyncio.run(get_ttwid_and_webid()) 54 | return { 55 | 'webid': webid, 56 | 'msToken': msToken, 57 | 'cookies': cookies, 58 | } 59 | 60 | if __name__ == '__main__': 61 | print(get_new_cookies()) 62 | 63 | -------------------------------------------------------------------------------- /static/Request.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message ExtValue { 4 | string key = 1; 5 | string value = 2; 6 | } 7 | 8 | message Request { 9 | int32 cmd = 1; 10 | int64 sequence_id = 2; 11 | string sdk_version = 3; 12 | string token = 4; 13 | int32 refer = 5; 14 | int64 inbox_type = 6; 15 | string build_number = 7; 16 | RequestBody body = 8; 17 | string device_id = 9; 18 | string channel = 10; 19 | string device_platform = 11; 20 | string device_type = 12; 21 | string os_version = 13; 22 | string version_code = 14; 23 | map headers = 15; 24 | int32 config_id = 16; 25 | TokenInfo token_info = 17; 26 | int32 auth_type = 18; 27 | string biz = 21; 28 | string access = 22; 29 | string ts_sign = 23; 30 | string sdk_cert = 24; 31 | string reuqest_sign = 25; 32 | } 33 | 34 | message RequestBody { 35 | oneof body { 36 | SendMessageRequestBody send_message_body = 100; 37 | CreateConversationV2RequestBody create_conversation_v2_body = 609; 38 | GetConversationInfoListV2RequestBody get_conversation_info_list_v2_body = 610; 39 | } 40 | } 41 | 42 | message SendMessageRequestBody { 43 | string conversation_id = 1; 44 | int32 conversation_type = 2; 45 | int64 conversation_short_id = 3; 46 | string content = 4; 47 | repeated ExtValue ext = 5; 48 | int32 message_type = 6; 49 | string ticket = 7; 50 | string client_message_id = 8; 51 | repeated int64 mentioned_users = 9; 52 | bool ignore_badge_count = 10; 53 | ReferencedMessageInfo ref_msg_info = 11; 54 | } 55 | 56 | message ReferencedMessageInfo { 57 | int64 original_message_id = 1; 58 | string original_message_sender = 2; 59 | int64 original_message_timestamp = 3; 60 | } 61 | 62 | message TokenInfo { 63 | int32 mark_id = 1; 64 | int32 type = 2; 65 | int32 app_id = 3; 66 | int64 user_id = 4; 67 | int64 timestamp = 5; 68 | } 69 | 70 | message CreateConversationV2RequestBody { 71 | int32 conversation_type = 1; 72 | repeated int64 participants = 2; 73 | bool persistent = 3; 74 | string idempotent_id = 4; 75 | string name = 5; 76 | string avatar_url = 6; 77 | string description = 7; 78 | map biz_ext = 8; 79 | } 80 | 81 | message GetConversationInfoListV2RequestBody { 82 | GetConversationInfoListV2ResponseBodyData data = 1; 83 | } 84 | 85 | message GetConversationInfoListV2ResponseBodyData { 86 | string conversation_id = 1; 87 | int64 conversation_short_id = 2; 88 | int32 conversation_type = 3; 89 | } -------------------------------------------------------------------------------- /static/Response_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # NO CHECKED-IN PROTOBUF GENCODE 4 | # source: Response.proto 5 | # Protobuf Python Version: 5.27.1 6 | """Generated protocol buffer code.""" 7 | from google.protobuf import descriptor as _descriptor 8 | from google.protobuf import descriptor_pool as _descriptor_pool 9 | from google.protobuf import runtime_version as _runtime_version 10 | from google.protobuf import symbol_database as _symbol_database 11 | from google.protobuf.internal import builder as _builder 12 | _runtime_version.ValidateProtobufRuntimeVersion( 13 | _runtime_version.Domain.PUBLIC, 14 | 5, 15 | 27, 16 | 1, 17 | '', 18 | 'Response.proto' 19 | ) 20 | # @@protoc_insertion_point(imports) 21 | 22 | _sym_db = _symbol_database.Default() 23 | 24 | 25 | 26 | 27 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x0eResponse.proto\"\x82\x01\n\x08Response\x12\x0b\n\x03\x63md\x18\x01 \x01(\x05\x12\x13\n\x0bsequence_id\x18\x02 \x01(\x03\x12\x12\n\nerror_desc\x18\x03 \x01(\t\x12\x0f\n\x07message\x18\x04 \x01(\t\x12\x12\n\ninbox_type\x18\x05 \x01(\x03\x12\x1b\n\x04\x62ody\x18\x06 \x01(\x0b\x32\r.ResponseBody\"\xf8\x01\n\x0cResponseBody\x12\x30\n\x12new_message_notify\x18\xf4\x03 \x01(\x0b\x32\x11.NewMessageNotifyH\x00\x12N\n\x1b\x63reate_conversation_v2_body\x18\xe1\x04 \x01(\x0b\x32&.GetConversationInfoListV2ResponseBodyH\x00\x12^\n+get_conversation_info_list_v2_response_body\x18\xe2\x04 \x01(\x0b\x32&.GetConversationInfoListV2ResponseBodyH\x00\x42\x06\n\x04\x62ody\"z\n\x10NewMessageNotify\x12\x17\n\x0f\x63onversation_id\x18\x02 \x01(\t\x12\x19\n\x11\x63onversation_type\x18\x03 \x01(\x05\x12\x13\n\x0bnotify_type\x18\x04 \x01(\x05\x12\x1d\n\x07message\x18\x05 \x01(\x0b\x32\x0c.MessageBody\"\xd1\x01\n\x0bMessageBody\x12\x17\n\x0f\x63onversation_id\x18\x01 \x01(\t\x12\x19\n\x11\x63onversation_type\x18\x02 \x01(\x05\x12\x19\n\x11server_message_id\x18\x03 \x01(\x03\x12\x1d\n\x15index_in_conversation\x18\x04 \x01(\x03\x12\x1d\n\x15\x63onversation_short_id\x18\x05 \x01(\x03\x12\x14\n\x0cmessage_type\x18\x06 \x01(\x05\x12\x0e\n\x06sender\x18\x07 \x01(\x03\x12\x0f\n\x07\x63ontent\x18\x08 \x01(\t\"g\n%GetConversationInfoListV2ResponseBody\x12>\n\x16\x63onversation_info_list\x18\x01 \x03(\x0b\x32\x1e.GetConversationInfoV2Response\"\x82\x01\n\x1dGetConversationInfoV2Response\x12\x17\n\x0f\x63onversation_id\x18\x01 \x01(\t\x12\x1d\n\x15\x63onversation_short_id\x18\x02 \x01(\x03\x12\x19\n\x11\x63onversation_type\x18\x03 \x01(\x05\x12\x0e\n\x06ticket\x18\x04 \x01(\tb\x06proto3') 28 | 29 | _globals = globals() 30 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) 31 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'Response_pb2', _globals) 32 | if not _descriptor._USE_C_DESCRIPTORS: 33 | DESCRIPTOR._loaded_options = None 34 | _globals['_RESPONSE']._serialized_start=19 35 | _globals['_RESPONSE']._serialized_end=149 36 | _globals['_RESPONSEBODY']._serialized_start=152 37 | _globals['_RESPONSEBODY']._serialized_end=400 38 | _globals['_NEWMESSAGENOTIFY']._serialized_start=402 39 | _globals['_NEWMESSAGENOTIFY']._serialized_end=524 40 | _globals['_MESSAGEBODY']._serialized_start=527 41 | _globals['_MESSAGEBODY']._serialized_end=736 42 | _globals['_GETCONVERSATIONINFOLISTV2RESPONSEBODY']._serialized_start=738 43 | _globals['_GETCONVERSATIONINFOLISTV2RESPONSEBODY']._serialized_end=841 44 | _globals['_GETCONVERSATIONINFOV2RESPONSE']._serialized_start=844 45 | _globals['_GETCONVERSATIONINFOV2RESPONSE']._serialized_end=974 46 | # @@protoc_insertion_point(module_scope) 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 | 4 | Spider_XHS logo 5 | 6 | 7 |

8 |
9 | 10 | Python 3.7+ 11 | 12 | 13 | NodeJS 18+ 14 | 15 |
16 | 17 | # 🎶DouYin_Spider 18 | 19 | **✨ 专业的抖音数据采集解决方案,支持笔记爬取,保存格式为excel或者media** 20 | 21 | **⚠️ 任何涉及数据注入的操作都是不被允许的,本项目仅供学习交流使用,如有违反,后果自负** 22 | 23 | ## 🌟 功能特性 24 | 25 | - ✅ **多维度数据采集** 26 | - 用户主页信息 27 | - 笔记详细内容 28 | - 智能搜索结果抓取 29 | - 🚀 **高性能架构** 30 | - 自动重试机制 31 | - 🔒 **安全稳定** 32 | - 抖音最新API适配 33 | - 异常处理机制 34 | - proxy代理 35 | - 🎨 **便捷管理** 36 | - 结构化目录存储 37 | - 格式化输出(JSON/EXCEL/MEDIA) 38 | 39 | ## 🎨效果图 40 | ### 处理后的所有用户 41 | ![image](https://github.com/cv-cat/DouYin_Spider/assets/94289429/3f3ff858-c443-4a68-bae6-1d16ef43011d) 42 | ### 某个用户所有的视频\图集 43 | ![image](https://github.com/cv-cat/DouYin_Spider/assets/94289429/fa6f5e65-7e3c-4abf-b140-cd20c33d3b43) 44 | ### 某个视频\图集具体的内容 45 | ![image](https://github.com/cv-cat/DouYin_Spider/assets/94289429/16cfc027-6186-4914-bca4-901f886a9b82) 46 | ### 某个直播时的具体弹幕发言和礼物数据 47 | ![image](https://github.com/cv-cat/DouYin_Spider/assets/94289429/e2cde1f1-6309-44fe-8aa3-bca2821bf30d) 48 | ### 保存的excel 49 | ![image](https://github.com/user-attachments/assets/5dfd8fb4-7597-4f54-af6a-9ab8ba766b7c) 50 | 51 | 52 | 53 | ## 🛠️ 快速开始 54 | ### ⛳运行环境 55 | - Python 3.7+ 56 | - Node.js 18+ 57 | 58 | ### 🎯安装依赖 59 | ``` 60 | pip install -r requirements.txt 61 | npm install 62 | ``` 63 | 64 | ### 🎨配置文件 65 | 这里以小红书的cookie获取为例 66 | 67 | 注意.env文件有两个变量,一个是打开www.douyin.com这个域名获取的,另一个是打开live.douyin.com这个域名获取的,第一个用于爬虫,第二个用于直播间监听 68 | 69 | 配置文件在项目根目录.env文件中,将下图自己的登录cookie放入其中,cookie获取➡️在浏览器f12打开控制台,点击网络,点击fetch,找一个接口点开 70 | ![image](https://github.com/user-attachments/assets/6a7e4ecb-0432-4581-890a-577e0eae463d) 71 | 72 | 复制cookie到.env文件中(注意!登录抖音后的cookie才是有效的,不登陆没有用) 73 | ![image](https://github.com/user-attachments/assets/60291f3f-9b69-423f-8b11-167278d44639) 74 | 75 | 76 | 77 | ### 🚀运行项目 78 | ``` 79 | python main.py 80 | python dy_live/server.py 81 | ``` 82 | 83 | ### 🗝️注意事项 84 | - main.py中的代码是爬虫的入口,可以根据自己的需求进行修改 85 | - dy_apis/douyin_apis.py 中的代码包含了所有的api接口,可以根据自己的需求进行修改 86 | - dy_live/server.py 中的代码包含了直播间监听的接口,可以根据自己的需求进行修改 87 | 88 | 89 | ## 🍥日志 90 | 91 | | 日期 | 说明 | 92 | | -------- | ------------------------------------ | 93 | | 23/10/05 | - 项目完成。 | 94 | | 23/10/17 | - 首次提交。 | 95 | | 23/10/18 | - 监听直播间弹幕和礼物。 | 96 | | 23/10/21 | - 新增搜索智能排序和限制时间。 | 97 | | 23/10/21 | - 新增可视化界面到release v1.1.0。 | 98 | | 23/10/25 | - 新增issue提出的输出直播间消息时包括用户等级。 | 99 | | 23/10/28 | - 遇到验证码请手动点击!Fix Some Bugs。 | 100 | | 23/11/11 | - 修复了很多很多大家的bug~~,关于v.dy格式的url正在处理 | 101 | | 23/12/22 | - 修复了直播间监控 | 102 | | 25/06/07 | - 开放所有之前闭源的代码,包括数据爬取和直播间监听 | 103 | 104 | ## 🧸额外说明 105 | 1. 感谢star⭐和follow📰!不时更新 106 | 2. 作者的联系方式在主页里,有问题可以随时联系我 107 | 3. 可以关注下作者的其他项目,欢迎 PR 和 issue 108 | 4. 感谢赞助!如果此项目对您有帮助,请作者喝一杯奶茶~~ (开心一整天😊😊) 109 | 5. thank you~~~ 110 | 111 |
112 | 微信赞赏码  113 | 支付宝收款码 114 |
115 | 116 | 117 | ## 📈 Star 趋势 118 | 119 | 120 | 121 | 122 | Star History Chart 123 | 124 | 125 | 126 | 127 | ## 🍔 交流群 128 | 过期请加作者主页wx 129 | 130 | 5355a0f82398ee2052f2e659328d737b 131 | 132 | 133 | 134 | -------------------------------------------------------------------------------- /static/Live.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | message HeadersList { 4 | string key = 1; 5 | string value = 2; 6 | } 7 | 8 | message PushFrame { 9 | uint64 seqId = 1; 10 | uint64 logId = 2; 11 | uint64 service = 3; 12 | uint64 method = 4; 13 | repeated HeadersList headersList = 5; 14 | string payloadEncoding = 6; 15 | string payloadType = 7; 16 | bytes payload = 8; 17 | string logIdNew = 9; 18 | } 19 | 20 | message Message { 21 | string method = 1; 22 | bytes payload = 2; 23 | int64 msgId = 3; 24 | int32 msgType = 4; 25 | int64 offset = 5; 26 | bool needWrdsStore = 6; 27 | int64 wrdsVersion = 7; 28 | string wrdsSubKey = 8; 29 | } 30 | 31 | message LiveResponse { 32 | repeated Message messagesList = 1; 33 | string cursor = 2; 34 | uint64 fetchInterval = 3; 35 | uint64 now = 4; 36 | string internalExt = 5; 37 | uint32 fetchType = 6; 38 | map routeParams = 7; 39 | uint64 heartbeatDuration = 8; 40 | bool needAck = 9; 41 | string pushServer = 10; 42 | string liveCursor = 11; 43 | bool historyNoMore = 12; 44 | string proxyServer = 13; 45 | } 46 | message Image { 47 | Content content = 8; 48 | } 49 | message Content { 50 | string name = 1; 51 | string font_color = 2; 52 | int64 level = 3; 53 | string alternative_text = 4; 54 | } 55 | message User { 56 | int64 id = 1; 57 | int64 short_id = 2; 58 | string nickname = 3; 59 | int32 gender = 4; 60 | string signature = 5; 61 | int32 level = 6; 62 | int64 birthday = 7; 63 | string telephone = 8; 64 | bool verified = 12; 65 | int32 experience = 13; 66 | string city = 14; 67 | int32 status = 15; 68 | int64 create_time = 16; 69 | int64 modify_time = 17; 70 | int32 secret = 18; 71 | string share_qrcode_uri = 19; 72 | int32 income_share_percent = 20; 73 | repeated Image badge_image_list = 21; 74 | string special_id = 26; 75 | int64 top_vip_no = 31; 76 | int64 pay_score = 34; 77 | int64 ticket_count = 35; 78 | int32 link_mic_stats = 37; 79 | string display_id = 38; 80 | bool with_commerce_permission = 39; 81 | bool with_fusion_shop_entry = 40; 82 | int64 total_recharge_diamond_count = 41; 83 | string verified_content = 43; 84 | repeated User top_fans = 45; 85 | string sec_uid = 46; 86 | int32 user_role = 47; 87 | int32 authorization_info = 54; 88 | int32 adversary_authorization_info = 55; 89 | int32 adversary_user_status = 58; 90 | repeated int64 commerce_webcast_config_ids = 60; 91 | string location_city = 63; 92 | string remark_name = 65; 93 | int32 mystery_man = 66; 94 | string web_rid = 67; 95 | string desensitized_nickname = 68; 96 | bool is_anonymous = 71; 97 | int32 consume_diamond_level = 72; 98 | string webcast_uid = 73; 99 | bool allow_be_located = 1001; 100 | bool allow_find_by_contacts = 1002; 101 | bool allow_others_download_video = 1003; 102 | bool allow_others_download_when_sharing_video = 1004; 103 | bool allow_share_show_profile = 1005; 104 | bool allow_show_in_gossip = 1006; 105 | bool allow_show_my_action = 1007; 106 | bool allow_strange_comment = 1008; 107 | bool allow_unfollower_comment = 1009; 108 | bool allow_use_linkmic = 1010; 109 | string bg_img_url = 1013; 110 | string birthday_description = 1014; 111 | bool birthday_valid = 1015; 112 | int32 block_status = 1016; 113 | int32 comment_restrict = 1017; 114 | string constellation = 1018; 115 | int32 disable_ichat = 1019; 116 | int64 enable_ichat_img = 1020; 117 | int32 exp = 1021; 118 | int64 fan_ticket_count = 1022; 119 | bool fold_stranger_chat = 1023; 120 | int64 follow_status = 1024; 121 | bool hotsoon_verified = 1025; 122 | string hotsoon_verified_reason = 1026; 123 | int32 ichat_restrict_type = 1027; 124 | string id_str = 1028; 125 | bool is_follower = 1029; 126 | bool is_following = 1030; 127 | bool need_profile_guide = 1031; 128 | int64 pay_scores = 1032; 129 | bool push_comment_status = 1033; 130 | bool push_digg = 1034; 131 | bool push_follow = 1035; 132 | bool push_friend_action = 1036; 133 | bool push_ichat = 1037; 134 | bool push_status = 1038; 135 | bool push_video_post = 1039; 136 | bool push_video_recommend = 1040; 137 | bool verified_mobile = 1042; 138 | string verified_reason = 1043; 139 | bool with_car_management_permission = 1044; 140 | int32 age_range = 1045; 141 | int64 watch_duration_month = 1046; 142 | } 143 | 144 | message ChatMessage { 145 | User user = 2; 146 | string content = 3; 147 | bool visibleToSender = 4; 148 | } 149 | 150 | message GiftStruct { 151 | int64 id = 5; 152 | string name = 16; 153 | } 154 | 155 | message GiftMessage { 156 | string giftId = 2; 157 | int64 comboCount = 6; 158 | User user = 7; 159 | User toUser = 8; 160 | GiftStruct gift = 15; 161 | } 162 | 163 | message MemberMessage { 164 | User user = 2; 165 | int64 memberCount = 3; 166 | } 167 | 168 | message LikeMessage { 169 | int64 count = 2; 170 | int64 total = 3; 171 | int64 color = 4; 172 | User user = 5; 173 | string icon = 6; 174 | } 175 | 176 | message RoomStatsMessage { 177 | string displayShort = 2; 178 | string displayMiddle = 3; 179 | string displayLong = 4; 180 | int64 displayValue = 5; 181 | int64 displayVersion = 6; 182 | bool incremental = 7; 183 | bool isHidden = 8; 184 | int64 total = 9; 185 | int64 displayType = 10; 186 | } 187 | 188 | message SocialMessage { 189 | User user = 2; 190 | int64 shareType = 3; 191 | int64 action = 4; 192 | string shareTarget = 5; 193 | uint64 followCount = 6; 194 | } -------------------------------------------------------------------------------- /static/Request_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # NO CHECKED-IN PROTOBUF GENCODE 4 | # source: Request.proto 5 | # Protobuf Python Version: 5.27.1 6 | """Generated protocol buffer code.""" 7 | from google.protobuf import descriptor as _descriptor 8 | from google.protobuf import descriptor_pool as _descriptor_pool 9 | from google.protobuf import runtime_version as _runtime_version 10 | from google.protobuf import symbol_database as _symbol_database 11 | from google.protobuf.internal import builder as _builder 12 | _runtime_version.ValidateProtobufRuntimeVersion( 13 | _runtime_version.Domain.PUBLIC, 14 | 5, 15 | 27, 16 | 1, 17 | '', 18 | 'Request.proto' 19 | ) 20 | # @@protoc_insertion_point(imports) 21 | 22 | _sym_db = _symbol_database.Default() 23 | 24 | 25 | 26 | 27 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rRequest.proto\"&\n\x08\x45xtValue\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\"\x94\x04\n\x07Request\x12\x0b\n\x03\x63md\x18\x01 \x01(\x05\x12\x13\n\x0bsequence_id\x18\x02 \x01(\x03\x12\x13\n\x0bsdk_version\x18\x03 \x01(\t\x12\r\n\x05token\x18\x04 \x01(\t\x12\r\n\x05refer\x18\x05 \x01(\x05\x12\x12\n\ninbox_type\x18\x06 \x01(\x03\x12\x14\n\x0c\x62uild_number\x18\x07 \x01(\t\x12\x1a\n\x04\x62ody\x18\x08 \x01(\x0b\x32\x0c.RequestBody\x12\x11\n\tdevice_id\x18\t \x01(\t\x12\x0f\n\x07\x63hannel\x18\n \x01(\t\x12\x17\n\x0f\x64\x65vice_platform\x18\x0b \x01(\t\x12\x13\n\x0b\x64\x65vice_type\x18\x0c \x01(\t\x12\x12\n\nos_version\x18\r \x01(\t\x12\x14\n\x0cversion_code\x18\x0e \x01(\t\x12&\n\x07headers\x18\x0f \x03(\x0b\x32\x15.Request.HeadersEntry\x12\x11\n\tconfig_id\x18\x10 \x01(\x05\x12\x1e\n\ntoken_info\x18\x11 \x01(\x0b\x32\n.TokenInfo\x12\x11\n\tauth_type\x18\x12 \x01(\x05\x12\x0b\n\x03\x62iz\x18\x15 \x01(\t\x12\x0e\n\x06\x61\x63\x63\x65ss\x18\x16 \x01(\t\x12\x0f\n\x07ts_sign\x18\x17 \x01(\t\x12\x10\n\x08sdk_cert\x18\x18 \x01(\t\x12\x14\n\x0creuqest_sign\x18\x19 \x01(\t\x1a.\n\x0cHeadersEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xeb\x01\n\x0bRequestBody\x12\x34\n\x11send_message_body\x18\x64 \x01(\x0b\x32\x17.SendMessageRequestBodyH\x00\x12H\n\x1b\x63reate_conversation_v2_body\x18\xe1\x04 \x01(\x0b\x32 .CreateConversationV2RequestBodyH\x00\x12T\n\"get_conversation_info_list_v2_body\x18\xe2\x04 \x01(\x0b\x32%.GetConversationInfoListV2RequestBodyH\x00\x42\x06\n\x04\x62ody\"\xb8\x02\n\x16SendMessageRequestBody\x12\x17\n\x0f\x63onversation_id\x18\x01 \x01(\t\x12\x19\n\x11\x63onversation_type\x18\x02 \x01(\x05\x12\x1d\n\x15\x63onversation_short_id\x18\x03 \x01(\x03\x12\x0f\n\x07\x63ontent\x18\x04 \x01(\t\x12\x16\n\x03\x65xt\x18\x05 \x03(\x0b\x32\t.ExtValue\x12\x14\n\x0cmessage_type\x18\x06 \x01(\x05\x12\x0e\n\x06ticket\x18\x07 \x01(\t\x12\x19\n\x11\x63lient_message_id\x18\x08 \x01(\t\x12\x17\n\x0fmentioned_users\x18\t \x03(\x03\x12\x1a\n\x12ignore_badge_count\x18\n \x01(\x08\x12,\n\x0cref_msg_info\x18\x0b \x01(\x0b\x32\x16.ReferencedMessageInfo\"y\n\x15ReferencedMessageInfo\x12\x1b\n\x13original_message_id\x18\x01 \x01(\x03\x12\x1f\n\x17original_message_sender\x18\x02 \x01(\t\x12\"\n\x1aoriginal_message_timestamp\x18\x03 \x01(\x03\"^\n\tTokenInfo\x12\x0f\n\x07mark_id\x18\x01 \x01(\x05\x12\x0c\n\x04type\x18\x02 \x01(\x05\x12\x0e\n\x06\x61pp_id\x18\x03 \x01(\x05\x12\x0f\n\x07user_id\x18\x04 \x01(\x03\x12\x11\n\ttimestamp\x18\x05 \x01(\x03\"\xa2\x02\n\x1f\x43reateConversationV2RequestBody\x12\x19\n\x11\x63onversation_type\x18\x01 \x01(\x05\x12\x14\n\x0cparticipants\x18\x02 \x03(\x03\x12\x12\n\npersistent\x18\x03 \x01(\x08\x12\x15\n\ridempotent_id\x18\x04 \x01(\t\x12\x0c\n\x04name\x18\x05 \x01(\t\x12\x12\n\navatar_url\x18\x06 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x07 \x01(\t\x12=\n\x07\x62iz_ext\x18\x08 \x03(\x0b\x32,.CreateConversationV2RequestBody.BizExtEntry\x1a-\n\x0b\x42izExtEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"`\n$GetConversationInfoListV2RequestBody\x12\x38\n\x04\x64\x61ta\x18\x01 \x01(\x0b\x32*.GetConversationInfoListV2ResponseBodyData\"~\n)GetConversationInfoListV2ResponseBodyData\x12\x17\n\x0f\x63onversation_id\x18\x01 \x01(\t\x12\x1d\n\x15\x63onversation_short_id\x18\x02 \x01(\x03\x12\x19\n\x11\x63onversation_type\x18\x03 \x01(\x05\x62\x06proto3') 28 | 29 | _globals = globals() 30 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) 31 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'Request_pb2', _globals) 32 | if not _descriptor._USE_C_DESCRIPTORS: 33 | DESCRIPTOR._loaded_options = None 34 | _globals['_REQUEST_HEADERSENTRY']._loaded_options = None 35 | _globals['_REQUEST_HEADERSENTRY']._serialized_options = b'8\001' 36 | _globals['_CREATECONVERSATIONV2REQUESTBODY_BIZEXTENTRY']._loaded_options = None 37 | _globals['_CREATECONVERSATIONV2REQUESTBODY_BIZEXTENTRY']._serialized_options = b'8\001' 38 | _globals['_EXTVALUE']._serialized_start=17 39 | _globals['_EXTVALUE']._serialized_end=55 40 | _globals['_REQUEST']._serialized_start=58 41 | _globals['_REQUEST']._serialized_end=590 42 | _globals['_REQUEST_HEADERSENTRY']._serialized_start=544 43 | _globals['_REQUEST_HEADERSENTRY']._serialized_end=590 44 | _globals['_REQUESTBODY']._serialized_start=593 45 | _globals['_REQUESTBODY']._serialized_end=828 46 | _globals['_SENDMESSAGEREQUESTBODY']._serialized_start=831 47 | _globals['_SENDMESSAGEREQUESTBODY']._serialized_end=1143 48 | _globals['_REFERENCEDMESSAGEINFO']._serialized_start=1145 49 | _globals['_REFERENCEDMESSAGEINFO']._serialized_end=1266 50 | _globals['_TOKENINFO']._serialized_start=1268 51 | _globals['_TOKENINFO']._serialized_end=1362 52 | _globals['_CREATECONVERSATIONV2REQUESTBODY']._serialized_start=1365 53 | _globals['_CREATECONVERSATIONV2REQUESTBODY']._serialized_end=1655 54 | _globals['_CREATECONVERSATIONV2REQUESTBODY_BIZEXTENTRY']._serialized_start=1610 55 | _globals['_CREATECONVERSATIONV2REQUESTBODY_BIZEXTENTRY']._serialized_end=1655 56 | _globals['_GETCONVERSATIONINFOLISTV2REQUESTBODY']._serialized_start=1657 57 | _globals['_GETCONVERSATIONINFOLISTV2REQUESTBODY']._serialized_end=1753 58 | _globals['_GETCONVERSATIONINFOLISTV2RESPONSEBODYDATA']._serialized_start=1755 59 | _globals['_GETCONVERSATIONINFOLISTV2RESPONSEBODYDATA']._serialized_end=1881 60 | # @@protoc_insertion_point(module_scope) 61 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import json 3 | import os 4 | from loguru import logger 5 | 6 | from dy_apis.douyin_api import DouyinAPI 7 | from utils.common_util import init 8 | from utils.data_util import handle_work_info, download_work, save_to_xlsx 9 | 10 | 11 | class Data_Spider(): 12 | def __init__(self): 13 | self.douyin_apis = DouyinAPI() 14 | 15 | def spider_work(self, auth, work_url: str, proxies=None): 16 | """ 17 | 爬取一个作品的信息 18 | :param auth : 用户认证信息 19 | :param work_url: 作品链接 20 | :return: 21 | """ 22 | res_json = self.douyin_apis.get_work_info(auth, work_url) 23 | data = res_json['aweme_detail'] 24 | 25 | work_info = handle_work_info(data) 26 | logger.info(f'爬取作品信息 {work_url}') 27 | return work_info 28 | 29 | def spider_some_work(self, auth, works: list, base_path: dict, save_choice: str, excel_name: str = '', proxies=None): 30 | """ 31 | 爬取一些作品的信息 32 | :param auth: 用户认证信息 33 | :param works: 作品链接列表 34 | :param base_path: 保存路径 35 | :param save_choice: 保存方式 all: 保存所有的信息, media: 保存视频和图片(media-video只下载视频, media-image只下载图片,media都下载), excel: 保存到excel 36 | :param excel_name: excel文件名 37 | :return: 38 | """ 39 | if (save_choice == 'all' or save_choice == 'excel') and excel_name == '': 40 | raise ValueError('excel_name 不能为空') 41 | work_list = [] 42 | for work_url in works: 43 | work_info = self.spider_work(auth, work_url) 44 | work_list.append(work_info) 45 | for work_info in work_list: 46 | if save_choice == 'all' or 'media' in save_choice: 47 | download_work(work_info, base_path['media'], save_choice) 48 | if save_choice == 'all' or save_choice == 'excel': 49 | file_path = os.path.abspath(os.path.join(base_path['excel'], f'{excel_name}.xlsx')) 50 | save_to_xlsx(work_list, file_path) 51 | 52 | 53 | def spider_user_all_work(self, auth, user_url: str, base_path: dict, save_choice: str, excel_name: str = '', proxies=None): 54 | """ 55 | 爬取一个用户的所有作品 56 | :param auth: 用户认证信息 57 | :param user_url: 用户链接 58 | :param base_path: 保存路径 59 | :param save_choice: 保存方式 all: 保存所有的信息, media: 保存视频和图片(media-video只下载视频, media-image只下载图片,media都下载), excel: 保存到excel 60 | :param excel_name: excel文件名 61 | :param proxies: 代理 62 | :return: 63 | """ 64 | user_info = self.douyin_apis.get_user_info(auth, user_url) 65 | work_list = self.douyin_apis.get_user_all_work_info(auth, user_url) 66 | work_info_list = [] 67 | logger.info(f'用户 {user_url} 作品数量: {len(work_list)}') 68 | if save_choice == 'all' or save_choice == 'excel': 69 | excel_name = user_url.split('/')[-1].split('?')[0] 70 | 71 | for work_info in work_list: 72 | work_info['author'].update(user_info['user']) 73 | work_info = handle_work_info(work_info) 74 | work_info_list.append(work_info) 75 | logger.info(f'爬取作品信息 {work_info["work_url"]}') 76 | if save_choice == 'all' or 'media' in save_choice: 77 | download_work(work_info, base_path['media'], save_choice) 78 | if save_choice == 'all' or save_choice == 'excel': 79 | file_path = os.path.abspath(os.path.join(base_path['excel'], f'{excel_name}.xlsx')) 80 | save_to_xlsx(work_info_list, file_path) 81 | 82 | def spider_some_search_work(self, auth, query: str, require_num: int, base_path: dict, save_choice: str, sort_type: str, publish_time: str, filter_duration="", search_range="", content_type="", excel_name: str = '', proxies=None): 83 | """ 84 | :param auth: DouyinAuth object. 85 | :param query: 搜索关键字. 86 | :param require_num: 搜索结果数量. 87 | :param base_path: 保存路径. 88 | :param save_choice: 保存方式 all: 保存所有的信息, media: 保存视频和图片(media-video只下载视频, media-image只下载图片,media都下载), excel: 保存到excel 89 | :param sort_type: 排序方式 0 综合排序, 1 最多点赞, 2 最新发布. 90 | :param publish_time: 发布时间 0 不限, 1 一天内, 7 一周内, 180 半年内. 91 | :param filter_duration: 视频时长 空字符串 不限, 0-1 一分钟内, 1-5 1-5分钟内, 5-10000 5分钟以上 92 | :param search_range: 搜索范围 0 不限, 1 最近看过, 2 还未看过, 3 关注的人 93 | :param content_type: 内容形式 0 不限, 1 视频, 2 图文 94 | :param excel_name: excel文件名 95 | """ 96 | work_info_list = [] 97 | work_list = self.douyin_apis.search_some_general_work(auth, query, require_num, sort_type, publish_time, filter_duration, search_range, content_type) 98 | logger.info(f'搜索关键词 {query} 作品数量: {len(work_list)}') 99 | if save_choice == 'all' or save_choice == 'excel': 100 | excel_name = query 101 | for work_info in work_list: 102 | logger.info(json.dumps(work_info)) 103 | logger.info(f'爬取作品信息 https://www.douyin.com/video/{work_info["aweme_info"]["aweme_id"]}') 104 | work_info = handle_work_info(work_info['aweme_info']) 105 | work_info_list.append(work_info) 106 | if save_choice == 'all' or 'media' in save_choice: 107 | download_work(work_info, base_path['media'], save_choice) 108 | if save_choice == 'all' or save_choice == 'excel': 109 | file_path = os.path.abspath(os.path.join(base_path['excel'], f'{excel_name}.xlsx')) 110 | save_to_xlsx(work_info_list, file_path) 111 | 112 | if __name__ == '__main__': 113 | """ 114 | 此文件为爬虫的入口文件,可以直接运行 115 | dy_apis/douyin_apis.py 为爬虫的api文件,包含抖音的全部数据接口,可以继续封装 116 | dy_live/server.py 为监听抖音直播的入口文件,可以直接运行 117 | 感谢star和follow 118 | """ 119 | 120 | auth, base_path = init() 121 | 122 | data_spider = Data_Spider() 123 | # save_choice: all: 保存所有的信息, media: 保存视频和图片(media-video只下载视频, media-image只下载图片,media都下载), excel: 保存到excel 124 | # save_choice 为 excel 或者 all 时,excel_name 不能为空 125 | 126 | 127 | # 1 爬取列表的所有作品信息 作品链接 如下所示 注意此url会过期! 128 | works = [ 129 | r'https://www.douyin.com/user/MS4wLjABAAAAv2Jr7Ngl7lQMjp4fw0AxtXkaHOgI_UL8aBJGGDSaU1g?from_tab_name=main&modal_id=7445533736877264178', 130 | ] 131 | data_spider.spider_some_work(auth, works, base_path, 'all', 'test') 132 | 133 | # 2 爬取用户的所有作品信息 用户链接 如下所示 注意此url会过期! 134 | user_url = 'https://www.douyin.com/user/MS4wLjABAAAAULqT-SrJDT7RqeoxeGg1hB14Ia5UI9Pm66kzKmI1ITD2Fo3bUhqYePBaztkzj7U5?from_tab_name=main&relation=0&vid=7227654252435361061' 135 | data_spider.spider_user_all_work(auth, user_url, base_path, 'all') 136 | 137 | # 3 搜索指定关键词的作品 138 | query = "榴莲" 139 | require_num = 20 # 搜索的数量 140 | sort_type = '0' # 排序方式 0 综合排序, 1 最多点赞, 2 最新发布 141 | publish_time = '0' # 发布时间 0 不限, 1 一天内, 7 一周内, 180 半年内 142 | filter_duration = "" # 视频时长 空字符串 不限, 0-1 一分钟内, 1-5 1-5分钟内, 5-10000 5分钟以上 143 | search_range = "0" # 搜索范围 0 不限, 1 最近看过, 2 还未看过, 3 关注的人 144 | content_type = "0" # 内容形式 0 不限, 1 视频, 2 图文 145 | 146 | data_spider.spider_some_search_work(auth, query, require_num, base_path, 'all', sort_type, publish_time, filter_duration, search_range, content_type) 147 | 148 | -------------------------------------------------------------------------------- /utils/dy_util.py: -------------------------------------------------------------------------------- 1 | import re 2 | import sys 3 | import time 4 | import json 5 | import random 6 | import base64 7 | import urllib 8 | from os import path 9 | 10 | import requests 11 | requests.packages.urllib3.disable_warnings() 12 | import subprocess 13 | from functools import partial 14 | 15 | subprocess.Popen = partial(subprocess.Popen, encoding="utf-8") 16 | import execjs 17 | 18 | if getattr(sys, 'frozen', None): 19 | basedir = sys._MEIPASS 20 | else: 21 | basedir = path.dirname(__file__) 22 | 23 | 24 | try: 25 | node_modules = path.join(basedir, 'node_modules') 26 | dy_path = path.join(basedir, 'static', 'dy_ab.js') 27 | dy_js = execjs.compile(open(dy_path, 'r', encoding='utf-8').read(), cwd=node_modules) 28 | sign_path = path.join(basedir, 'static', 'dy_live_sign.js') 29 | sign_js = execjs.compile(open(sign_path, 'r', encoding='utf-8').read(), cwd=node_modules) 30 | except: 31 | node_modules = path.join(basedir, '..', 'node_modules') 32 | dy_path = path.join(basedir, '..', 'static', 'dy_ab.js') 33 | dy_js = execjs.compile(open(dy_path, 'r', encoding='utf-8').read(), cwd=node_modules) 34 | sign_path = path.join(basedir, '..', 'static', 'dy_live_sign.js') 35 | sign_js = execjs.compile(open(sign_path, 'r', encoding='utf-8').read(), cwd=node_modules) 36 | 37 | 38 | def trans_cookies(cookies_str): 39 | cookies = { 40 | # "douyin.com": "", 41 | } 42 | for i in cookies_str.split("; "): 43 | try: 44 | cookies[i.split('=')[0]] = '='.join(i.split('=')[1:]) 45 | except: 46 | continue 47 | # cookies = {i.split('=')[0]: '='.join(i.split('=')[1:]) for i in cookies_str.split('; ')} 48 | return cookies 49 | 50 | 51 | # 私信传obj, 其他的拼接 52 | def generate_req_sign(e, priK): 53 | sign = dy_js.call('get_req_sign', e, priK) 54 | return sign 55 | 56 | 57 | # query, data都是拼接字符串 58 | def generate_a_bogus(query, data=""): 59 | a_bogus = dy_js.call('get_ab', query, data) 60 | return a_bogus 61 | 62 | 63 | def generate_signature(roomId, user_unique_id): 64 | return sign_js.call('sign', roomId, user_unique_id) 65 | 66 | 67 | # 传递私钥 68 | def generate_ree_key(prik): 69 | ree_key = dy_js.call('get_ree_key', prik) 70 | return ree_key 71 | 72 | 73 | # 传递query, ticket, ts_sign, priK 74 | def generate_bd_ticket_client_data(api, ticket, ts_sign, priK): 75 | timestamp = int(time.time()) 76 | res_sign = f"ticket={ticket}&path={api}×tamp={timestamp}" 77 | p = { 78 | 'ts_sign': ts_sign, 79 | 'req_content': 'ticket,path,timestamp', 80 | 'req_sign': generate_req_sign(res_sign, priK), 81 | 'timestamp': timestamp, 82 | } 83 | p = json.dumps(p, ensure_ascii=False, separators=(',', ':')) 84 | return base64.urlsafe_b64encode(p.encode('utf-8')).decode('utf-8') 85 | 86 | 87 | def generate_msToken(randomlength=107): 88 | random_str = '' 89 | base_str = 'ABCDEFGHIGKLMNOPQRSTUVWXYZabcdefghigklmnopqrstuvwxyz0123456789=' 90 | length = len(base_str) - 1 91 | for _ in range(randomlength): 92 | random_str += base_str[random.randint(0, length)] 93 | return random_str 94 | 95 | 96 | def generate_ttwid(): 97 | url = f"https://www.douyin.com/discover?modal_id=7376449060384935209" 98 | ttwid = None 99 | try: 100 | headers = { 101 | 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", 102 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", 103 | "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8" 104 | } 105 | response = requests.get(url, headers=headers, verify=False) 106 | cookies_dict = response.cookies.get_dict() 107 | ttwid = cookies_dict.get('ttwid') 108 | return ttwid 109 | except Exception as e: 110 | return ttwid 111 | 112 | 113 | def generate_fake_webid(random_length=19): 114 | random_str = '' 115 | base_str = '0123456789' 116 | length = len(base_str) - 1 117 | for _ in range(random_length): 118 | random_str += base_str[random.randint(0, length)] 119 | return random_str 120 | 121 | 122 | def generate_webid(auth=None, url=""): 123 | if url == "": 124 | url = f"https://www.douyin.com/discover?modal_id=7376449060384935209" 125 | try: 126 | from builder.header import HeaderBuilder, HeaderType 127 | headers = HeaderBuilder().build(HeaderType.DOC) 128 | headers.set_header('cookie', auth.cookie_str if auth else "") 129 | headers.set_header("upgrade-insecure-requests", "1") 130 | response = requests.get(url, headers=headers.get(), verify=False) 131 | res_text = response.text 132 | user_unique_id = re.findall(r'\\"user_unique_id\\":\\"(.*?)\\"', res_text)[0] 133 | webid = user_unique_id 134 | return webid 135 | except Exception as e: 136 | # print("===================") 137 | # print(url) 138 | # print(e) 139 | # print("===================") 140 | return generate_fake_webid() 141 | 142 | 143 | def ws_accept_key(ws_key): 144 | """calc the Sec-WebSocket-Accept key by Sec-WebSocket-key 145 | come from client, the return value used for handshake 146 | 147 | :ws_key: Sec-WebSocket-Key come from client 148 | :returns: Sec-WebSocket-Accept 149 | 150 | """ 151 | import hashlib 152 | import base64 153 | try: 154 | magic = '258EAFA5-E914-47DA-95CA-C5AB0DC85B11' 155 | sha1 = hashlib.sha1() 156 | sha1.update(ws_key + magic) 157 | return base64.b64encode(sha1.digest()) 158 | except Exception as e: 159 | return None 160 | 161 | 162 | def generate_csrf_token(cookies_str): 163 | csrf_token_1, csrf_token_2 = None, None 164 | try: 165 | headers = { 166 | 'accept': '*/*', 167 | 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 168 | 'cache-control': 'no-cache', 169 | 'cookie': cookies_str, 170 | 'pragma': 'no-cache', 171 | 'priority': 'u=1, i', 172 | 'referer': 'https://www.douyin.com/?recommend=1', 173 | 'sec-ch-ua': '"Microsoft Edge";v="125", "Chromium";v="125", "Not.A/Brand";v="24"', 174 | 'sec-ch-ua-mobile': '?0', 175 | 'sec-ch-ua-platform': '"Windows"', 176 | 'sec-fetch-dest': 'empty', 177 | 'sec-fetch-mode': 'cors', 178 | 'sec-fetch-site': 'same-origin', 179 | 'user-agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36", 180 | 'x-secsdk-csrf-request': '1', 181 | 'x-secsdk-csrf-version': '1.2.22', 182 | } 183 | response = requests.head('https://www.douyin.com/service/2/abtest_config/', headers=headers, verify=False) 184 | return response.headers['X-Ware-Csrf-Token'].split(',')[1], response.headers['X-Ware-Csrf-Token'].split(',')[4] 185 | except Exception as e: 186 | return csrf_token_1, csrf_token_2 187 | 188 | 189 | def generate_millisecond(): 190 | millis = int(round(time.time() * 1000)) 191 | return millis 192 | 193 | 194 | def splice_url(params): 195 | splice_url_str = '' 196 | for key, value in params.items(): 197 | if value is None: 198 | value = '' 199 | splice_url_str += key + '=' + urllib.parse.quote(str(value)) + '&' 200 | return splice_url_str[:-1] 201 | -------------------------------------------------------------------------------- /dy_live/server.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import threading 3 | import time 4 | from urllib.parse import urlencode 5 | 6 | from websocket import WebSocketApp 7 | 8 | import static.Live_pb2 as Live_pb2 9 | from dy_apis.douyin_api import DouyinAPI 10 | from builder.header import HeaderBuilder 11 | from builder.params import Params 12 | import utils.common_util as common_util 13 | from utils.dy_util import generate_signature 14 | 15 | 16 | class DouyinLive: 17 | def __init__(self, live_id, auth_): 18 | self.auth_ = auth_ 19 | self.live_id = live_id 20 | self.ws = None 21 | 22 | def ping(self, ws): 23 | while True: 24 | frame = Live_pb2.PushFrame() 25 | frame.payloadType = "hb" 26 | try: 27 | ws.send(frame.SerializeToString(), opcode=0x02) 28 | time.sleep(5) 29 | except Exception as e: 30 | ws.close() 31 | break 32 | 33 | def on_open(self, ws): 34 | print("\033[32m### opened ###\033[m") 35 | threading.Thread(target=self.ping, args=(ws,)).start() 36 | 37 | def on_message(self, ws, message): 38 | try: 39 | frame = Live_pb2.PushFrame() 40 | frame.ParseFromString(message) 41 | origin_bytes = gzip.decompress(frame.payload) 42 | response = Live_pb2.LiveResponse() 43 | response.ParseFromString(origin_bytes) 44 | if response.needAck: 45 | s = Live_pb2.PushFrame() 46 | s.payloadType = "ack" 47 | # s.payload = frame.headersList[1].value.encode('utf-8') 48 | s.payload = response.internalExt.encode('utf-8') 49 | s.logId = frame.logId 50 | ws.send(s.SerializeToString(), opcode=0x02) 51 | for item in response.messagesList: 52 | if item.method == 'WebcastGiftMessage': 53 | message = Live_pb2.GiftMessage() 54 | message.ParseFromString(item.payload) 55 | # print(f'\033[1;37;40m[礼物]SEC_UID = {message.user.sec_uid} - {message.user.nickname}\033[m 送出 \033[4;30;44m{message.gift.name}\033[m x {message.comboCount}') 56 | # 谁给谁送了什么礼物 57 | print(f'\033[1;37;40m[礼物]SEC_UID = {message.user.sec_uid} - {message.user.nickname}\033[m 送给 \033[1;37;40m{message.toUser.sec_uid} - {message.toUser.nickname}\033[m \033[4;30;44m{message.gift.name}\033[m x {message.comboCount}') 58 | elif item.method == "WebcastChatMessage": 59 | message = Live_pb2.ChatMessage() 60 | message.ParseFromString(item.payload) 61 | # 用户等级 62 | # print(message.user.badge_image_list[0]) 63 | print(f'\033[1;37;40m[消息]SEC_UID = {message.user.sec_uid} - {message.user.nickname}\033[m : \033[4;30;44m{message.content}\033[m') 64 | elif item.method == "WebcastMemberMessage": 65 | message = Live_pb2.MemberMessage() 66 | message.ParseFromString(item.payload) 67 | print(f'\033[1;37;40m[进入]SEC_UID = {message.user.sec_uid} - {message.user.nickname}\033[m 进入直播间') 68 | elif item.method == "WebcastLikeMessage": 69 | message = Live_pb2.LikeMessage() 70 | message.ParseFromString(item.payload) 71 | print(f'\033[1;37;40m[点赞]SEC_UID = {message.user.sec_uid} - {message.user.nickname}\033[m 点赞了 {message.count} 次') 72 | print(f'\033[1;37;40m[点赞]点赞总数 = {message.total}\033[m') 73 | elif item.method == "WebcastSocialMessage": 74 | message = Live_pb2.SocialMessage() 75 | message.ParseFromString(item.payload) 76 | if message.action == 1: 77 | print(f'\033[1;37;40m[关注]SEC_UID = {message.user.sec_uid} - {message.user.nickname}\033[m 关注主播') 78 | elif item.method == "WebcastRoomStatsMessage": 79 | message = Live_pb2.RoomStatsMessage() 80 | message.ParseFromString(item.payload) 81 | print(f'\033[1;37;40m[房间信息] {message.displayLong}') 82 | 83 | # s = zlib.decompress(decode_str).decode() 84 | except Exception as e: 85 | print('error') 86 | print(str(e)) 87 | 88 | def on_error(self, ws, error): 89 | print("\033[31m### error ###") 90 | print(error) 91 | print("### ===error=== ###\033[m") 92 | 93 | def on_close(self, ws, close_status_code, close_msg): 94 | # 此处判断是否需要重连 判断直播间是否关闭 95 | self.start_ws() 96 | print("\033[31m### closed ###") 97 | print(f"status_code: {close_status_code}, msg: {close_msg}") 98 | print("### ===closed=== ###\033[m") 99 | 100 | def start_ws(self): 101 | room_info = DouyinAPI.get_live_info(self.auth_, self.live_id) 102 | room_id = room_info['room_id'] 103 | user_id = room_info['user_id'] 104 | ttwid = room_info['ttwid'] 105 | params = Params() 106 | (params 107 | .add_param('app_name', 'douyin_web') 108 | .add_param('version_code', '180800') 109 | .add_param('webcast_sdk_version', '1.0.14-beta.0') 110 | .add_param('update_version_code', '1.0.14-beta.0') 111 | .add_param('compress', 'gzip') 112 | .add_param('device_platform', 'web') 113 | .add_param('cookie_enabled', 'true') 114 | .add_param('screen_width', '1707') 115 | .add_param('screen_height', '960') 116 | .add_param('browser_language', 'zh-CN') 117 | .add_param('browser_platform', 'Win32') 118 | .add_param('browser_name', 'Mozilla') 119 | .add_param('browser_version', 120 | HeaderBuilder.ua.split('Mozilla/')[-1]) 121 | .add_param('browser_online', 'true') 122 | .add_param('tz_name', 'Etc/GMT-8') 123 | .add_param('host', 'https://live.douyin.com') 124 | .add_param('aid', '6383') 125 | .add_param('live_id', '1') 126 | .add_param('did_rule', '3') 127 | .add_param('endpoint', 'live_pc') 128 | .add_param('support_wrds', '1') 129 | .add_param('user_unique_id', str(user_id)) 130 | .add_param('im_path', '/webcast/im/fetch/') 131 | .add_param('identity', 'audience') 132 | .add_param('need_persist_msg_count', '15') 133 | .add_param('insert_task_id', '') 134 | .add_param('live_reason', '') 135 | .add_param('room_id', room_id) 136 | .add_param('heartbeatDuration', '0') 137 | .add_param('signature', generate_signature(room_id, user_id)) 138 | ) 139 | wss_url = f"wss://webcast5-ws-web-lf.douyin.com/webcast/im/push/v2/?{urlencode(params.get())}" 140 | self.ws = WebSocketApp( 141 | url=wss_url, 142 | header={ 143 | 'Pragma': 'no-cache', 144 | 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 145 | 'User-Agent': HeaderBuilder.ua, 146 | 'Upgrade': 'websocket', 147 | 'Cache-Control': 'no-cache', 148 | 'Connection': 'Upgrade', 149 | }, 150 | cookie=f"ttwid={ttwid};", 151 | on_message=self.on_message, 152 | on_error=self.on_error, 153 | on_close=self.on_close, 154 | on_open=self.on_open 155 | ) 156 | try: 157 | self.ws.run_forever(origin='https://live.douyin.com') 158 | except Exception as e: 159 | print(str(e)) 160 | self.ws.close() 161 | 162 | 163 | if __name__ == '__main__': 164 | common_util.load_env() 165 | live_id = "81804234251" 166 | live = DouyinLive(live_id, common_util.dy_live_auth) 167 | live.start_ws() 168 | -------------------------------------------------------------------------------- /utils/data_util.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import re 4 | import time 5 | import openpyxl 6 | import requests 7 | from loguru import logger 8 | from retry import retry 9 | 10 | 11 | def norm_str(str): 12 | new_str = re.sub(r"|[\\/:*?\"<>| ]+", "", str).replace('\n', '').replace('\r', '') 13 | return new_str 14 | 15 | def norm_text(text): 16 | ILLEGAL_CHARACTERS_RE = re.compile(r'[\000-\010]|[\013-\014]|[\016-\037]') 17 | text = ILLEGAL_CHARACTERS_RE.sub(r'', text) 18 | return text 19 | 20 | 21 | def timestamp_to_str(timestamp): 22 | time_local = time.localtime(timestamp / 1000) 23 | dt = time.strftime("%Y-%m-%d %H:%M:%S", time_local) 24 | return dt 25 | 26 | 27 | 28 | def handle_work_info(data): 29 | sec_uid = data['author']['sec_uid'] 30 | user_url = f'https://www.douyin.com/user/{sec_uid}' 31 | user_desc = data['author']['signature'] if 'signature' in data['author'] else '未知' 32 | following_count = data['author']['following_count'] if 'following_count' in data['author'] else '未知' 33 | follower_count = data['author']['follower_count'] if 'follower_count' in data['author'] else '未知' 34 | total_favorited = data['author']['total_favorited'] if 'total_favorited' in data['author'] else '未知' 35 | aweme_count = data['author']['aweme_count'] if 'aweme_count' in data['author'] else '未知' 36 | user_id = data['author']['unique_id'] if 'unique_id' in data['author'] else '未知' 37 | user_age = data['author']['user_age'] if 'user_age' in data['author'] else '未知' 38 | gender = data['author']['gender'] if 'gender' in data['author'] else '未知' 39 | if gender == 1: 40 | gender = '男' 41 | elif gender == 0: 42 | gender = '女' 43 | else: 44 | gender = '未知' 45 | try: 46 | ip_location = data['user']['ip_location'] 47 | except: 48 | ip_location = '未知' 49 | aweme_id = data['aweme_id'] 50 | nickname = data['author']['nickname'] 51 | author_avatar = data['author']['avatar_thumb']['url_list'][0] 52 | video_cover = data['video']['cover']['url_list'][0] 53 | title = data['desc'] 54 | desc = data['desc'] 55 | admire_count = data['statistics']['admire_count'] if 'admire_count' in data['statistics'] else 0 56 | digg_count = data['statistics']['digg_count'] 57 | commnet_count = data['statistics']['comment_count'] 58 | collect_count = data['statistics']['collect_count'] 59 | share_count = data['statistics']['share_count'] 60 | video_addr = data['video']['play_addr']['url_list'][0] 61 | images = data['images'] 62 | if not isinstance(images, list): 63 | images = [] 64 | create_time = data['create_time'] 65 | 66 | text_extra = data['text_extra'] if 'text_extra' in data else [] 67 | text_extra = text_extra if text_extra else [] 68 | topics = [] 69 | for item in text_extra: 70 | hashtag_name = item['hashtag_name'] if 'hashtag_name' in item else '' 71 | if hashtag_name: 72 | topics.append(hashtag_name) 73 | 74 | work_type = '未知' 75 | if 'aweme_type' in data: 76 | if data['aweme_type'] == 68: 77 | work_type = '图集' 78 | elif data['aweme_type'] == 0: 79 | work_type = '视频' 80 | 81 | return { 82 | 'work_id': aweme_id, 83 | 'work_url': f'https://www.douyin.com/video/{aweme_id}', 84 | 'work_type': work_type, 85 | 'title': title, 86 | 'desc': desc, 87 | 'admire_count': admire_count, 88 | 'digg_count': digg_count, 89 | 'comment_count': commnet_count, 90 | 'collect_count': collect_count, 91 | 'share_count': share_count, 92 | 'video_addr': video_addr, 93 | 'images': images, 94 | 'topics': topics, 95 | 'create_time': create_time, 96 | 'video_cover': video_cover, 97 | 'user_url': user_url, 98 | 'user_id': user_id, 99 | 'nickname': nickname, 100 | 'author_avatar': author_avatar, 101 | 'user_desc': user_desc, 102 | 'following_count': following_count, 103 | 'follower_count': follower_count, 104 | 'total_favorited': total_favorited, 105 | 'aweme_count': aweme_count, 106 | 'user_age': user_age, 107 | 'gender': gender, 108 | 'ip_location': ip_location 109 | } 110 | 111 | 112 | def save_to_xlsx(datas, file_path): 113 | wb = openpyxl.Workbook() 114 | ws = wb.active 115 | headers = ['作品id', '作品url', '作品类型', '作品标题', '描述', 'admire数量', '点赞数量', '评论数量', '收藏数量', '分享数量', '视频地址url', '图片地址url列表', '标签', '上传时间', '视频封面url', '用户主页url', '用户id', '昵称', '头像url', '用户描述', '关注数量', '粉丝数量', '作品被赞和收藏数量', '作品数量', '用户年龄', '性别', 'ip归属地'] 116 | ws.append(headers) 117 | for data in datas: 118 | data = {k: norm_text(str(v)) for k, v in data.items()} 119 | ws.append(list(data.values())) 120 | wb.save(file_path) 121 | logger.info(f'数据保存至 {file_path}') 122 | 123 | def download_media(path, name, url, type): 124 | if type == 'image': 125 | content = requests.get(url).content 126 | with open(path + '/' + name + '.jpg', mode="wb") as f: 127 | f.write(content) 128 | elif type == 'video': 129 | res = requests.get(url, stream=True) 130 | size = 0 131 | chunk_size = 1024 * 1024 132 | with open(path + '/' + name + '.mp4', mode="wb") as f: 133 | for data in res.iter_content(chunk_size=chunk_size): 134 | f.write(data) 135 | size += len(data) 136 | 137 | 138 | def save_wrok_detail(work, path): 139 | with open(f'{path}/detail.txt', mode="w", encoding="utf-8") as f: 140 | # 逐行输出到txt里 141 | f.write(f"作品id: {work['work_id']}\n") 142 | f.write(f"作品url: {work['work_url']}\n") 143 | f.write(f"作品类型: {work['work_type']}\n") 144 | f.write(f"作品标题: {work['title']}\n") 145 | f.write(f"描述: {work['desc']}\n") 146 | f.write(f"admire数量: {work['admire_count']}\n") 147 | f.write(f"点赞数量: {work['digg_count']}\n") 148 | f.write(f"评论数量: {work['comment_count']}\n") 149 | f.write(f"收藏数量: {work['collect_count']}\n") 150 | f.write(f"分享数量: {work['share_count']}\n") 151 | f.write(f"视频地址url: {work['video_addr']}\n") 152 | f.write(f"图片地址url列表: {', '.join(work['images'])}\n") 153 | f.write(f"标签: {', '.join(work['topics'])}\n") 154 | f.write(f"上传时间: {timestamp_to_str(work['create_time'])}\n") 155 | f.write(f"视频封面url: {work['video_cover']}\n") 156 | f.write(f"用户主页url: {work['user_url']}\n") 157 | f.write(f"用户id: {work['user_id']}\n") 158 | f.write(f"昵称: {work['nickname']}\n") 159 | f.write(f"头像url: {work['author_avatar']}\n") 160 | f.write(f"用户描述: {work['user_desc']}\n") 161 | f.write(f"关注数量: {work['following_count']}\n") 162 | f.write(f"粉丝数量: {work['follower_count']}\n") 163 | f.write(f"作品被赞和收藏数量: {work['total_favorited']}\n") 164 | f.write(f"作品数量: {work['aweme_count']}\n") 165 | f.write(f"用户年龄: {work['user_age']}\n") 166 | f.write(f"用户性别: {work['gender']}\n") 167 | f.write(f"ip归属地: {work['ip_location']}\n") 168 | 169 | 170 | @retry(tries=3, delay=1) 171 | def download_work(work_info, path, save_choice): 172 | work_id = work_info['work_id'] 173 | user_id = work_info['user_id'] 174 | title = work_info['title'] 175 | title = norm_str(title)[:40] 176 | nickname = work_info['nickname'] 177 | nickname = norm_str(nickname)[:20] 178 | if title.strip() == '': 179 | title = f'无标题' 180 | save_path = f'{path}/{nickname}_{user_id}/{title}_{work_id}' 181 | check_and_create_path(save_path) 182 | with open(f'{save_path}/info.json', mode='w', encoding='utf-8') as f: 183 | f.write(json.dumps(work_info) + '\n') 184 | work_type = work_info['work_type'] 185 | save_wrok_detail(work_info, save_path) 186 | if work_type == '图集' and save_choice in ['media', 'media-image', 'all']: 187 | for img_index, img_url in enumerate(work_info['images']): 188 | download_media(save_path, f'image_{img_index}', img_url, 'image') 189 | elif work_type == '视频' and save_choice in ['media', 'media-video', 'all']: 190 | download_media(save_path, 'cover', work_info['video_cover'], 'image') 191 | download_media(save_path, 'video', work_info['video_addr'], 'video') 192 | logger.info(f'作品 {work_info["work_id"]} 下载完成,保存路径: {save_path}') 193 | return save_path 194 | 195 | 196 | 197 | def check_and_create_path(path): 198 | if not os.path.exists(path): 199 | os.makedirs(path) 200 | -------------------------------------------------------------------------------- /static/Live_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: Live.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf import descriptor as _descriptor 6 | from google.protobuf import descriptor_pool as _descriptor_pool 7 | from google.protobuf import symbol_database as _symbol_database 8 | from google.protobuf.internal import builder as _builder 9 | # @@protoc_insertion_point(imports) 10 | 11 | _sym_db = _symbol_database.Default() 12 | 13 | 14 | 15 | 16 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\nLive.proto\")\n\x0bHeadersList\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t\"\xbe\x01\n\tPushFrame\x12\r\n\x05seqId\x18\x01 \x01(\x04\x12\r\n\x05logId\x18\x02 \x01(\x04\x12\x0f\n\x07service\x18\x03 \x01(\x04\x12\x0e\n\x06method\x18\x04 \x01(\x04\x12!\n\x0bheadersList\x18\x05 \x03(\x0b\x32\x0c.HeadersList\x12\x17\n\x0fpayloadEncoding\x18\x06 \x01(\t\x12\x13\n\x0bpayloadType\x18\x07 \x01(\t\x12\x0f\n\x07payload\x18\x08 \x01(\x0c\x12\x10\n\x08logIdNew\x18\t \x01(\t\"\x9a\x01\n\x07Message\x12\x0e\n\x06method\x18\x01 \x01(\t\x12\x0f\n\x07payload\x18\x02 \x01(\x0c\x12\r\n\x05msgId\x18\x03 \x01(\x03\x12\x0f\n\x07msgType\x18\x04 \x01(\x05\x12\x0e\n\x06offset\x18\x05 \x01(\x03\x12\x15\n\rneedWrdsStore\x18\x06 \x01(\x08\x12\x13\n\x0bwrdsVersion\x18\x07 \x01(\x03\x12\x12\n\nwrdsSubKey\x18\x08 \x01(\t\"\xf3\x02\n\x0cLiveResponse\x12\x1e\n\x0cmessagesList\x18\x01 \x03(\x0b\x32\x08.Message\x12\x0e\n\x06\x63ursor\x18\x02 \x01(\t\x12\x15\n\rfetchInterval\x18\x03 \x01(\x04\x12\x0b\n\x03now\x18\x04 \x01(\x04\x12\x13\n\x0binternalExt\x18\x05 \x01(\t\x12\x11\n\tfetchType\x18\x06 \x01(\r\x12\x33\n\x0brouteParams\x18\x07 \x03(\x0b\x32\x1e.LiveResponse.RouteParamsEntry\x12\x19\n\x11heartbeatDuration\x18\x08 \x01(\x04\x12\x0f\n\x07needAck\x18\t \x01(\x08\x12\x12\n\npushServer\x18\n \x01(\t\x12\x12\n\nliveCursor\x18\x0b \x01(\t\x12\x15\n\rhistoryNoMore\x18\x0c \x01(\x08\x12\x13\n\x0bproxyServer\x18\r \x01(\t\x1a\x32\n\x10RouteParamsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\"\n\x05Image\x12\x19\n\x07\x63ontent\x18\x08 \x01(\x0b\x32\x08.Content\"T\n\x07\x43ontent\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x12\n\nfont_color\x18\x02 \x01(\t\x12\r\n\x05level\x18\x03 \x01(\x03\x12\x18\n\x10\x61lternative_text\x18\x04 \x01(\t\"\x83\x11\n\x04User\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x10\n\x08short_id\x18\x02 \x01(\x03\x12\x10\n\x08nickname\x18\x03 \x01(\t\x12\x0e\n\x06gender\x18\x04 \x01(\x05\x12\x11\n\tsignature\x18\x05 \x01(\t\x12\r\n\x05level\x18\x06 \x01(\x05\x12\x10\n\x08\x62irthday\x18\x07 \x01(\x03\x12\x11\n\ttelephone\x18\x08 \x01(\t\x12\x10\n\x08verified\x18\x0c \x01(\x08\x12\x12\n\nexperience\x18\r \x01(\x05\x12\x0c\n\x04\x63ity\x18\x0e \x01(\t\x12\x0e\n\x06status\x18\x0f \x01(\x05\x12\x13\n\x0b\x63reate_time\x18\x10 \x01(\x03\x12\x13\n\x0bmodify_time\x18\x11 \x01(\x03\x12\x0e\n\x06secret\x18\x12 \x01(\x05\x12\x18\n\x10share_qrcode_uri\x18\x13 \x01(\t\x12\x1c\n\x14income_share_percent\x18\x14 \x01(\x05\x12 \n\x10\x62\x61\x64ge_image_list\x18\x15 \x03(\x0b\x32\x06.Image\x12\x12\n\nspecial_id\x18\x1a \x01(\t\x12\x12\n\ntop_vip_no\x18\x1f \x01(\x03\x12\x11\n\tpay_score\x18\" \x01(\x03\x12\x14\n\x0cticket_count\x18# \x01(\x03\x12\x16\n\x0elink_mic_stats\x18% \x01(\x05\x12\x12\n\ndisplay_id\x18& \x01(\t\x12 \n\x18with_commerce_permission\x18\' \x01(\x08\x12\x1e\n\x16with_fusion_shop_entry\x18( \x01(\x08\x12$\n\x1ctotal_recharge_diamond_count\x18) \x01(\x03\x12\x18\n\x10verified_content\x18+ \x01(\t\x12\x17\n\x08top_fans\x18- \x03(\x0b\x32\x05.User\x12\x0f\n\x07sec_uid\x18. \x01(\t\x12\x11\n\tuser_role\x18/ \x01(\x05\x12\x1a\n\x12\x61uthorization_info\x18\x36 \x01(\x05\x12$\n\x1c\x61\x64versary_authorization_info\x18\x37 \x01(\x05\x12\x1d\n\x15\x61\x64versary_user_status\x18: \x01(\x05\x12#\n\x1b\x63ommerce_webcast_config_ids\x18< \x03(\x03\x12\x15\n\rlocation_city\x18? \x01(\t\x12\x13\n\x0bremark_name\x18\x41 \x01(\t\x12\x13\n\x0bmystery_man\x18\x42 \x01(\x05\x12\x0f\n\x07web_rid\x18\x43 \x01(\t\x12\x1d\n\x15\x64\x65sensitized_nickname\x18\x44 \x01(\t\x12\x14\n\x0cis_anonymous\x18G \x01(\x08\x12\x1d\n\x15\x63onsume_diamond_level\x18H \x01(\x05\x12\x13\n\x0bwebcast_uid\x18I \x01(\t\x12\x19\n\x10\x61llow_be_located\x18\xe9\x07 \x01(\x08\x12\x1f\n\x16\x61llow_find_by_contacts\x18\xea\x07 \x01(\x08\x12$\n\x1b\x61llow_others_download_video\x18\xeb\x07 \x01(\x08\x12\x31\n(allow_others_download_when_sharing_video\x18\xec\x07 \x01(\x08\x12!\n\x18\x61llow_share_show_profile\x18\xed\x07 \x01(\x08\x12\x1d\n\x14\x61llow_show_in_gossip\x18\xee\x07 \x01(\x08\x12\x1d\n\x14\x61llow_show_my_action\x18\xef\x07 \x01(\x08\x12\x1e\n\x15\x61llow_strange_comment\x18\xf0\x07 \x01(\x08\x12!\n\x18\x61llow_unfollower_comment\x18\xf1\x07 \x01(\x08\x12\x1a\n\x11\x61llow_use_linkmic\x18\xf2\x07 \x01(\x08\x12\x13\n\nbg_img_url\x18\xf5\x07 \x01(\t\x12\x1d\n\x14\x62irthday_description\x18\xf6\x07 \x01(\t\x12\x17\n\x0e\x62irthday_valid\x18\xf7\x07 \x01(\x08\x12\x15\n\x0c\x62lock_status\x18\xf8\x07 \x01(\x05\x12\x19\n\x10\x63omment_restrict\x18\xf9\x07 \x01(\x05\x12\x16\n\rconstellation\x18\xfa\x07 \x01(\t\x12\x16\n\rdisable_ichat\x18\xfb\x07 \x01(\x05\x12\x19\n\x10\x65nable_ichat_img\x18\xfc\x07 \x01(\x03\x12\x0c\n\x03\x65xp\x18\xfd\x07 \x01(\x05\x12\x19\n\x10\x66\x61n_ticket_count\x18\xfe\x07 \x01(\x03\x12\x1b\n\x12\x66old_stranger_chat\x18\xff\x07 \x01(\x08\x12\x16\n\rfollow_status\x18\x80\x08 \x01(\x03\x12\x19\n\x10hotsoon_verified\x18\x81\x08 \x01(\x08\x12 \n\x17hotsoon_verified_reason\x18\x82\x08 \x01(\t\x12\x1c\n\x13ichat_restrict_type\x18\x83\x08 \x01(\x05\x12\x0f\n\x06id_str\x18\x84\x08 \x01(\t\x12\x14\n\x0bis_follower\x18\x85\x08 \x01(\x08\x12\x15\n\x0cis_following\x18\x86\x08 \x01(\x08\x12\x1b\n\x12need_profile_guide\x18\x87\x08 \x01(\x08\x12\x13\n\npay_scores\x18\x88\x08 \x01(\x03\x12\x1c\n\x13push_comment_status\x18\x89\x08 \x01(\x08\x12\x12\n\tpush_digg\x18\x8a\x08 \x01(\x08\x12\x14\n\x0bpush_follow\x18\x8b\x08 \x01(\x08\x12\x1b\n\x12push_friend_action\x18\x8c\x08 \x01(\x08\x12\x13\n\npush_ichat\x18\x8d\x08 \x01(\x08\x12\x14\n\x0bpush_status\x18\x8e\x08 \x01(\x08\x12\x18\n\x0fpush_video_post\x18\x8f\x08 \x01(\x08\x12\x1d\n\x14push_video_recommend\x18\x90\x08 \x01(\x08\x12\x18\n\x0fverified_mobile\x18\x92\x08 \x01(\x08\x12\x18\n\x0fverified_reason\x18\x93\x08 \x01(\t\x12\'\n\x1ewith_car_management_permission\x18\x94\x08 \x01(\x08\x12\x12\n\tage_range\x18\x95\x08 \x01(\x05\x12\x1d\n\x14watch_duration_month\x18\x96\x08 \x01(\x03\"L\n\x0b\x43hatMessage\x12\x13\n\x04user\x18\x02 \x01(\x0b\x32\x05.User\x12\x0f\n\x07\x63ontent\x18\x03 \x01(\t\x12\x17\n\x0fvisibleToSender\x18\x04 \x01(\x08\"&\n\nGiftStruct\x12\n\n\x02id\x18\x05 \x01(\x03\x12\x0c\n\x04name\x18\x10 \x01(\t\"x\n\x0bGiftMessage\x12\x0e\n\x06giftId\x18\x02 \x01(\t\x12\x12\n\ncomboCount\x18\x06 \x01(\x03\x12\x13\n\x04user\x18\x07 \x01(\x0b\x32\x05.User\x12\x15\n\x06toUser\x18\x08 \x01(\x0b\x32\x05.User\x12\x19\n\x04gift\x18\x0f \x01(\x0b\x32\x0b.GiftStruct\"9\n\rMemberMessage\x12\x13\n\x04user\x18\x02 \x01(\x0b\x32\x05.User\x12\x13\n\x0bmemberCount\x18\x03 \x01(\x03\"]\n\x0bLikeMessage\x12\r\n\x05\x63ount\x18\x02 \x01(\x03\x12\r\n\x05total\x18\x03 \x01(\x03\x12\r\n\x05\x63olor\x18\x04 \x01(\x03\x12\x13\n\x04user\x18\x05 \x01(\x0b\x32\x05.User\x12\x0c\n\x04icon\x18\x06 \x01(\t\"\xcd\x01\n\x10RoomStatsMessage\x12\x14\n\x0c\x64isplayShort\x18\x02 \x01(\t\x12\x15\n\rdisplayMiddle\x18\x03 \x01(\t\x12\x13\n\x0b\x64isplayLong\x18\x04 \x01(\t\x12\x14\n\x0c\x64isplayValue\x18\x05 \x01(\x03\x12\x16\n\x0e\x64isplayVersion\x18\x06 \x01(\x03\x12\x13\n\x0bincremental\x18\x07 \x01(\x08\x12\x10\n\x08isHidden\x18\x08 \x01(\x08\x12\r\n\x05total\x18\t \x01(\x03\x12\x13\n\x0b\x64isplayType\x18\n \x01(\x03\"q\n\rSocialMessage\x12\x13\n\x04user\x18\x02 \x01(\x0b\x32\x05.User\x12\x11\n\tshareType\x18\x03 \x01(\x03\x12\x0e\n\x06\x61\x63tion\x18\x04 \x01(\x03\x12\x13\n\x0bshareTarget\x18\x05 \x01(\t\x12\x13\n\x0b\x66ollowCount\x18\x06 \x01(\x04\x62\x06proto3') 17 | 18 | _globals = globals() 19 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) 20 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'Live_pb2', _globals) 21 | if _descriptor._USE_C_DESCRIPTORS == False: 22 | 23 | DESCRIPTOR._options = None 24 | _LIVERESPONSE_ROUTEPARAMSENTRY._options = None 25 | _LIVERESPONSE_ROUTEPARAMSENTRY._serialized_options = b'8\001' 26 | _globals['_HEADERSLIST']._serialized_start=14 27 | _globals['_HEADERSLIST']._serialized_end=55 28 | _globals['_PUSHFRAME']._serialized_start=58 29 | _globals['_PUSHFRAME']._serialized_end=248 30 | _globals['_MESSAGE']._serialized_start=251 31 | _globals['_MESSAGE']._serialized_end=405 32 | _globals['_LIVERESPONSE']._serialized_start=408 33 | _globals['_LIVERESPONSE']._serialized_end=779 34 | _globals['_LIVERESPONSE_ROUTEPARAMSENTRY']._serialized_start=729 35 | _globals['_LIVERESPONSE_ROUTEPARAMSENTRY']._serialized_end=779 36 | _globals['_IMAGE']._serialized_start=781 37 | _globals['_IMAGE']._serialized_end=815 38 | _globals['_CONTENT']._serialized_start=817 39 | _globals['_CONTENT']._serialized_end=901 40 | _globals['_USER']._serialized_start=904 41 | _globals['_USER']._serialized_end=3083 42 | _globals['_CHATMESSAGE']._serialized_start=3085 43 | _globals['_CHATMESSAGE']._serialized_end=3161 44 | _globals['_GIFTSTRUCT']._serialized_start=3163 45 | _globals['_GIFTSTRUCT']._serialized_end=3201 46 | _globals['_GIFTMESSAGE']._serialized_start=3203 47 | _globals['_GIFTMESSAGE']._serialized_end=3323 48 | _globals['_MEMBERMESSAGE']._serialized_start=3325 49 | _globals['_MEMBERMESSAGE']._serialized_end=3382 50 | _globals['_LIKEMESSAGE']._serialized_start=3384 51 | _globals['_LIKEMESSAGE']._serialized_end=3477 52 | _globals['_ROOMSTATSMESSAGE']._serialized_start=3480 53 | _globals['_ROOMSTATSMESSAGE']._serialized_end=3685 54 | _globals['_SOCIALMESSAGE']._serialized_start=3687 55 | _globals['_SOCIALMESSAGE']._serialized_end=3800 56 | # @@protoc_insertion_point(module_scope) 57 | -------------------------------------------------------------------------------- /dy_apis/douyin_api.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | import re 4 | import time 5 | import urllib 6 | import uuid 7 | 8 | import requests 9 | requests.packages.urllib3.disable_warnings() 10 | from bs4 import BeautifulSoup 11 | from protobuf_to_dict import protobuf_to_dict 12 | 13 | import static.Response_pb2 as ResponseProto 14 | from builder.header import HeaderBuilder, HeaderType 15 | from builder.params import Params 16 | from builder.proto import ProtoBuilder 17 | from utils.dy_util import splice_url, generate_a_bogus, generate_msToken, trans_cookies 18 | 19 | 20 | 21 | class DouyinAPI: 22 | douyin_url = 'https://www.douyin.com' 23 | live_url = 'https://live.douyin.com' 24 | creator = "https://creator.douyin.com" 25 | 26 | 27 | @staticmethod 28 | def get_user_all_work_info(auth, user_url: str, **kwargs) -> list: 29 | """ 30 | 获取用户全部作品信息. 31 | :param auth: DouyinAuth object. 32 | :param user_url: 用户主页URL. 33 | :return: 全部作品信息. 34 | """ 35 | max_cursor = "0" 36 | work_list = [] 37 | while True: 38 | res_json = DouyinAPI.get_user_work_info(auth, user_url, max_cursor) 39 | if "aweme_list" not in res_json.keys(): 40 | break 41 | works = res_json["aweme_list"] 42 | max_cursor = str(res_json["max_cursor"]) 43 | work_list.extend(works) 44 | if res_json["has_more"] != 1: 45 | break 46 | return work_list 47 | 48 | 49 | @staticmethod 50 | def get_user_work_info(auth, user_url: str, max_cursor, **kwargs) -> dict: 51 | """ 52 | 获取用户作品信息. 53 | :param auth: DouyinAuth object. 54 | :param user_url: 用户主页URL. 55 | :param max_cursor: 上一次请求的max_cursor. 56 | :return: 57 | """ 58 | api = f"/aweme/v1/web/aweme/post/" 59 | user_id = user_url.split("/")[-1].split("?")[0] 60 | headers = HeaderBuilder().build(HeaderType.GET) 61 | headers.set_referer(user_url) 62 | params = Params() 63 | params.add_param("device_platform", 'webapp') 64 | params.add_param("aid", '6383') 65 | params.add_param("channel", 'channel_pc_web') 66 | params.add_param("sec_user_id", user_id) 67 | params.add_param("max_cursor", max_cursor) 68 | params.add_param("locate_query", 'false') 69 | params.add_param("show_live_replay_strategy", '1') 70 | params.add_param("need_time_list", '1' if max_cursor == '0' else '0') 71 | params.add_param("time_list_query", '0') 72 | params.add_param("whale_cut_token", '') 73 | params.add_param("cut_version", '1') 74 | params.add_param("count", '18') 75 | params.add_param("publish_video_strategy_type", '2') 76 | params.add_param("update_version_code", '170400') 77 | params.add_param("pc_client_type", '1') 78 | params.add_param("version_code", '290100') 79 | params.add_param("version_name", '29.1.0') 80 | params.add_param("cookie_enabled", 'true') 81 | params.add_param("screen_width", '1707') 82 | params.add_param("screen_height", '960') 83 | params.add_param("browser_language", 'zh-CN') 84 | params.add_param("browser_platform", 'Win32') 85 | params.add_param("browser_name", 'Edge') 86 | params.add_param("browser_version", '125.0.0.0') 87 | params.add_param("browser_online", 'true') 88 | params.add_param("engine_name", 'Blink') 89 | params.add_param("engine_version", '125.0.0.0') 90 | params.add_param("os_name", 'Windows') 91 | params.add_param("os_version", '10') 92 | params.add_param("cpu_core_num", '32') 93 | params.add_param("device_memory", '8') 94 | params.add_param("platform", 'PC') 95 | params.add_param("downlink", '10') 96 | params.add_param("effective_type", '4g') 97 | params.add_param("round_trip_time", '100') 98 | params.with_web_id(auth, user_url) 99 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 100 | params.add_param("fp", auth.cookie['s_v_web_id']) 101 | params.add_param("msToken", 102 | auth.msToken) 103 | params.with_a_bogus() 104 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 105 | params=params.get(), verify=False) 106 | return json.loads(resp.text) 107 | 108 | @staticmethod 109 | def get_work_info(auth, url: str) -> dict: 110 | """ 111 | 获取作品信息. 112 | :param auth: DouyinAuth object. 113 | :param url: 作品URL. 114 | :return: JSON. 115 | """ 116 | api = f"/aweme/v1/web/aweme/detail/" 117 | if 'video' in url: 118 | aweme_id = url.split("/")[-1].split("?")[0] 119 | else: 120 | aweme_id = re.findall(r'modal_id=(\d+)', url)[0] 121 | url = f'https://www.douyin.com/video/{aweme_id}' 122 | headers = HeaderBuilder().build(HeaderType.GET) 123 | headers.set_referer(url) 124 | params = Params() 125 | params.add_param("device_platform", "webapp") 126 | params.add_param("aid", "6383") 127 | params.add_param("channel", "channel_pc_web") 128 | params.add_param("aweme_id", aweme_id) 129 | params.add_param("update_version_code", "170400") 130 | params.add_param("pc_client_type", "1") 131 | params.add_param("version_code", "190500") 132 | params.add_param("version_name", "19.5.0") 133 | params.add_param("cookie_enabled", "true") 134 | params.add_param("screen_width", "1707") 135 | params.add_param("screen_height", "960") 136 | params.add_param("browser_language", "zh-CN") 137 | params.add_param("browser_platform", "Win32") 138 | params.add_param("browser_name", "Edge") 139 | params.add_param("browser_version", "125.0.0.0") 140 | params.add_param("browser_online", "true") 141 | params.add_param("engine_name", "Blink") 142 | params.add_param("engine_version", "125.0.0.0") 143 | params.add_param("os_name", "Windows") 144 | params.add_param("os_version", "10") 145 | params.add_param("cpu_core_num", "32") 146 | params.add_param("device_memory", "8") 147 | params.add_param("platform", "PC") 148 | params.add_param("downlink", "4.75") 149 | params.add_param("effective_type", "4g") 150 | params.add_param("round_trip_time", "150") 151 | params.with_web_id(auth, url) 152 | params.add_param("msToken", auth.msToken) 153 | params.with_a_bogus() 154 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 155 | params.add_param("fp", auth.cookie['s_v_web_id']) 156 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 157 | params=params.get(), verify=False) 158 | resp_json = json.loads(resp.text) 159 | return resp_json 160 | 161 | @staticmethod 162 | def get_work_out_comment(auth, url: str, cursor: str = '0', **kwargs) -> dict: 163 | """ 164 | 获取作品的全部一级评论. 165 | :param auth: DouyinAuth object. 166 | :param url: 作品URL. 167 | :param cursor: 评论游标. 168 | :return: JSON. 169 | """ 170 | api = f"/aweme/v1/web/comment/list/" 171 | if 'video' in url: 172 | aweme_id = url.split("/")[-1].split("?")[0] 173 | else: 174 | aweme_id = re.findall(r'modal_id=(\d+)', url)[0] 175 | url = f'https://www.douyin.com/video/{aweme_id}' 176 | headers = HeaderBuilder().build(HeaderType.GET) 177 | headers.set_referer(url) 178 | params = Params() 179 | params.add_param("device_platform", "webapp") 180 | params.add_param("aid", "6383") 181 | params.add_param("channel", "channel_pc_web") 182 | params.add_param("aweme_id", aweme_id) 183 | params.add_param("cursor", cursor) 184 | params.add_param("count", "5") 185 | params.add_param("item_type", "0") 186 | params.add_param("whale_cut_token", "") 187 | params.add_param("cut_version", "1") 188 | params.add_param("rcFT", "") 189 | params.add_param("update_version_code", "170400") 190 | params.add_param("pc_client_type", "1") 191 | params.add_param("version_code", "170400") 192 | params.add_param("version_name", "17.4.0") 193 | params.add_param("cookie_enabled", "true") 194 | params.add_param("screen_width", "1707") 195 | params.add_param("screen_height", "960") 196 | params.add_param("browser_language", "zh-CN") 197 | params.add_param("browser_platform", "Win32") 198 | params.add_param("browser_name", "Edge") 199 | params.add_param("browser_version", "125.0.0.0") 200 | params.add_param("browser_online", "true") 201 | params.add_param("engine_name", "Blink") 202 | params.add_param("engine_version", "125.0.0.0") 203 | params.add_param("os_name", "Windows") 204 | params.add_param("os_version", "10") 205 | params.add_param("cpu_core_num", "32") 206 | params.add_param("device_memory", "8") 207 | params.add_param("platform", "PC") 208 | params.add_param("downlink", "10") 209 | params.add_param("effective_type", "4g") 210 | params.add_param("round_trip_time", "0") 211 | params.with_web_id(auth, url) 212 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 213 | params.add_param("fp", auth.cookie['s_v_web_id']) 214 | params.add_param("msToken", auth.msToken) 215 | params.with_a_bogus() 216 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 217 | params=params.get(), verify=False) 218 | resp_json = json.loads(resp.text) 219 | return resp_json 220 | 221 | @staticmethod 222 | def get_work_all_out_comment(auth, url: str, **kwargs) -> list: 223 | """ 224 | 获取作品全部一级评论. 225 | :param auth: DouyinAuth object. 226 | :param url: 作品URL. 227 | :return: 228 | """ 229 | cursor = "0" 230 | comment_list = [] 231 | while True: 232 | res_json = DouyinAPI.get_work_out_comment(auth, url, cursor) 233 | comments = res_json["comments"] 234 | cursor = str(res_json["cursor"]) 235 | if comments is None or len(comments) == 0: 236 | break 237 | comment_list.extend(comments) 238 | if res_json["has_more"] != 1: 239 | break 240 | return comment_list 241 | 242 | @staticmethod 243 | def get_work_inner_comment(auth, comment: dict, cursor: str, count: str = '3', **kwargs): 244 | """ 245 | 获取作品评论的二级评论. 246 | :param count: 要获取的二级评论数量. 247 | :param auth: DouyinAuth object. 248 | :param comment: 一级评论信息. 249 | :param cursor: 评论游标. 250 | :return: 251 | """ 252 | api = f"/aweme/v1/web/comment/list/reply/" 253 | aweme_id = comment['aweme_id'] 254 | comment_id = comment['cid'] 255 | headers = HeaderBuilder().build(HeaderType.GET) 256 | refer = f'https://www.douyin.com/video/{aweme_id}' 257 | headers.set_referer(refer) 258 | params = Params() 259 | params.add_param("device_platform", "webapp") 260 | params.add_param("aid", "6383") 261 | params.add_param("channel", "channel_pc_web") 262 | params.add_param("item_id", aweme_id) 263 | params.add_param("comment_id", comment_id) 264 | params.add_param("cut_version", "1") 265 | params.add_param("cursor", cursor) 266 | params.add_param("count", count) 267 | params.add_param("item_type", "0") 268 | params.add_param("update_version_code", "170400") 269 | params.add_param("pc_client_type", "1") 270 | params.add_param("version_code", "170400") 271 | params.add_param("version_name", "17.4.0") 272 | params.add_param("cookie_enabled", "true") 273 | params.add_param("screen_width", "1707") 274 | params.add_param("screen_height", "960") 275 | params.add_param("browser_language", "zh-CN") 276 | params.add_param("browser_platform", "Win32") 277 | params.add_param("browser_name", "Edge") 278 | params.add_param("browser_version", "125.0.0.0") 279 | params.add_param("browser_online", "true") 280 | params.add_param("engine_name", "Blink") 281 | params.add_param("engine_version", "125.0.0.0") 282 | params.add_param("os_name", "Windows") 283 | params.add_param("os_version", "10") 284 | params.add_param("cpu_core_num", "32") 285 | params.add_param("device_memory", "8") 286 | params.add_param("platform", "PC") 287 | params.add_param("downlink", "10") 288 | params.add_param("effective_type", "4g") 289 | params.add_param("round_trip_time", "0") 290 | params.with_web_id(auth, refer) 291 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 292 | params.add_param("fp", auth.cookie['s_v_web_id']) 293 | params.add_param("msToken", auth.msToken) 294 | params.with_a_bogus() 295 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 296 | params=params.get(), verify=False) 297 | resp_json = json.loads(resp.text) 298 | return resp_json 299 | 300 | @staticmethod 301 | def get_work_all_inner_comment(auth, comment: dict, **kwargs) -> list: 302 | """ 303 | 获取作品评论的全部二级评论. 304 | :param auth: DouyinAuth object. 305 | :param comment: 一级评论信息. 306 | :return: 二级评论列表. 307 | """ 308 | cursor = "0" 309 | count = '5' 310 | comment_list = [] 311 | while True: 312 | res_json = DouyinAPI.get_work_inner_comment(auth, comment, cursor, count) 313 | comments = res_json["comments"] 314 | cursor = str(res_json["cursor"]) 315 | if type(comments) is list and len(comments) > 0: 316 | comment_list.extend(comments) 317 | if res_json["has_more"] != 1: 318 | break 319 | return comment_list 320 | 321 | @staticmethod 322 | def get_work_all_comment(auth, url: str, **kwargs): 323 | """ 324 | 获取作品全部评论. 325 | :param auth: DouyinAuth object. 326 | :param url: 作品URL. 327 | :return: 全部评论列表. 328 | """ 329 | out_comment_list = DouyinAPI.get_work_all_out_comment(auth, url) 330 | for comment in out_comment_list: 331 | comment['reply_comment'] = [] 332 | if comment['reply_comment_total'] > 0: 333 | inner_comment_list = DouyinAPI.get_work_all_inner_comment(auth, comment) 334 | comment['reply_comment'] = inner_comment_list 335 | return out_comment_list 336 | 337 | @staticmethod 338 | def get_user_info(auth, user_url: str, **kwargs) -> dict: 339 | """ 340 | 获取用户信息. 341 | :param auth: DouyinAuth object. 342 | :param user_url: 用户主页URL. 343 | :return: 用户信息. 344 | """ 345 | api = f"/aweme/v1/web/user/profile/other/" 346 | user_id = user_url.split("/")[-1].split("?")[0] 347 | headers = HeaderBuilder().build(HeaderType.GET) 348 | headers.set_referer(user_url) 349 | params = Params() 350 | params.add_param("device_platform", 'webapp') 351 | params.add_param("aid", '6383') 352 | params.add_param("channel", 'channel_pc_web') 353 | params.add_param("publish_video_strategy_type", '2') 354 | params.add_param("source", 'channel_pc_web') 355 | params.add_param("sec_user_id", user_id) 356 | params.add_param("personal_center_strategy", '1') 357 | params.add_param("update_version_code", '170400') 358 | params.add_param("pc_client_type", '1') 359 | params.add_param("version_code", '170400') 360 | params.add_param("version_name", '17.4.0') 361 | params.add_param("cookie_enabled", 'true') 362 | params.add_param("screen_width", '1707') 363 | params.add_param("screen_height", '960') 364 | params.add_param("browser_language", 'zh-CN') 365 | params.add_param("browser_platform", 'Win32') 366 | params.add_param("browser_name", 'Edge') 367 | params.add_param("browser_version", '125.0.0.0') 368 | params.add_param("browser_online", 'true') 369 | params.add_param("engine_name", 'Blink') 370 | params.add_param("engine_version", '125.0.0.0') 371 | params.add_param("os_name", 'Windows') 372 | params.add_param("os_version", '10') 373 | params.add_param("cpu_core_num", '32') 374 | params.add_param("device_memory", '8') 375 | params.add_param("platform", 'PC') 376 | params.add_param("downlink", '10') 377 | params.add_param("effective_type", '4g') 378 | params.add_param("round_trip_time", '100') 379 | params.with_web_id(auth, user_url) 380 | params.add_param("msToken", auth.msToken) 381 | params.add_param('verifyFp', auth.cookie['s_v_web_id']) 382 | params.add_param('fp', auth.cookie['s_v_web_id']) 383 | params.with_a_bogus() 384 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 385 | params=params.get(), verify=False) 386 | return json.loads(resp.text) 387 | 388 | @staticmethod 389 | def search_general_work(auth, query: str, sort_type: str = '0', publish_time: str = '0', offset: str = '0', 390 | filter_duration="", search_range="", content_type="", **kwargs): 391 | """ 392 | 搜索综合频道作品. 393 | :param auth: DouyinAuth object. 394 | :param query: 搜索关键字. 395 | :param sort_type: 排序方式 0 综合排序, 1 最多点赞, 2 最新发布. 396 | :param publish_time: 发布时间 0 不限, 1 一天内, 7 一周内, 180 半年内. 397 | :param offset: 搜索结果偏移量. 398 | :param filter_duration: 视频时长 空字符串 不限, 0-1 一分钟内, 1-5 1-5分钟内, 5-10000 5分钟以上 399 | :param search_range: 搜索范围 0 不限, 1 最近看过, 2 还未看过, 3 关注的人 400 | :param content_type: 内容形式 0 不限, 1 视频, 2 图文 401 | :return: JSON数据. 402 | """ 403 | api = f"/aweme/v1/web/general/search/single/" 404 | headers = HeaderBuilder().build(HeaderType.GET) 405 | refer = f'https://www.douyin.com/search/{urllib.parse.quote(query)}?aid={uuid.uuid4()}&type=general' 406 | headers.set_referer(refer) 407 | params = Params() 408 | params.add_param("device_platform", "webapp") 409 | params.add_param("aid", "6383") 410 | params.add_param("channel", "channel_pc_web") 411 | params.add_param("search_channel", "aweme_general") 412 | params.add_param("enable_history", "1") 413 | params.add_param("filter_selected", r'{"sort_type":"%s","publish_time":"%s","filter_duration":"%s",' 414 | r'"search_range":"%s","content_type":"%s"}' % (sort_type, publish_time, 415 | filter_duration, 416 | search_range, content_type)) 417 | params.add_param("keyword", query) 418 | params.add_param("search_source", "tab_search") 419 | params.add_param("query_correct_type", "1") 420 | params.add_param("is_filter_search", "1") 421 | params.add_param("from_group_id", "") 422 | params.add_param("offset", offset) 423 | params.add_param("count", '25') 424 | params.add_param("need_filter_settings", '1' if offset == '0' else '0') 425 | params.add_param("list_type", "single") 426 | params.add_param("update_version_code", "170400") 427 | params.add_param("pc_client_type", "1") 428 | params.add_param("version_code", "190600") 429 | params.add_param("version_name", "19.6.0") 430 | params.add_param("cookie_enabled", "true") 431 | params.add_param("screen_width", "1707") 432 | params.add_param("screen_height", "960") 433 | params.add_param("browser_language", "zh-CN") 434 | params.add_param("browser_platform", "Win32") 435 | params.add_param("browser_name", "Edge") 436 | params.add_param("browser_version", "125.0.0.0") 437 | params.add_param("browser_online", "true") 438 | params.add_param("engine_name", "Blink") 439 | params.add_param("engine_version", "125.0.0.0") 440 | params.add_param("os_name", "Windows") 441 | params.add_param("os_version", "10") 442 | params.add_param("cpu_core_num", "32") 443 | params.add_param("device_memory", "8") 444 | params.add_param("platform", "PC") 445 | params.add_param("downlink", "10") 446 | params.add_param("effective_type", "4g") 447 | params.add_param("round_trip_time", "50") 448 | params.with_web_id(auth, refer) 449 | params.add_param("msToken", auth.msToken) 450 | params.with_a_bogus() 451 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 452 | params=params.get(), verify=False) 453 | return json.loads(resp.text) 454 | 455 | @staticmethod 456 | def search_some_general_work(auth, query: str, num: int, sort_type: str, publish_time: str, filter_duration="", search_range="", content_type="", **kwargs) -> list: 457 | """ 458 | 搜索指定数量综合频道作品. 459 | :param auth: DouyinAuth object. 460 | :param query: 搜索关键字. 461 | :param num: 搜索结果数量. 462 | :param sort_type: 排序方式 0 综合排序, 1 最多点赞, 2 最新发布. 463 | :param publish_time: 发布时间 0 不限, 1 一天内, 7 一周内, 180 半年内. 464 | :param filter_duration: 视频时长 空字符串 不限, 0-1 一分钟内, 1-5 1-5分钟内, 5-10000 5分钟以上 465 | :param search_range: 搜索范围 0 不限, 1 最近看过, 2 还未看过, 3 关注的人 466 | :param content_type: 内容形式 0 不限, 1 视频, 2 图文 467 | :return: 作品列表. 468 | """ 469 | offset = "0" 470 | work_list = [] 471 | while True: 472 | res_json = DouyinAPI.search_general_work(auth, query, sort_type, publish_time, offset, 473 | filter_duration, search_range, content_type) 474 | works = res_json["data"] 475 | work_list.extend(works) 476 | if res_json["has_more"] != 1 or len(work_list) >= num: 477 | break 478 | offset = str(int(offset) + len(works)) 479 | if len(work_list) > num: 480 | work_list = work_list[:num] 481 | return work_list 482 | 483 | @staticmethod 484 | def search_some_user(auth, query: str, num: int, **kwargs) -> list: 485 | """ 486 | 搜索指定数量用户. 487 | :param auth: DouyinAuth object. 488 | :param query: 搜索关键字. 489 | :param num: 搜索结果数量. 490 | :return: 用户列表. 491 | """ 492 | offset = "0" 493 | count = "25" 494 | user_list = [] 495 | while True: 496 | res_json = DouyinAPI.search_user(auth, query, offset, count) 497 | users = res_json["user_list"] 498 | user_list.extend(users) 499 | if res_json["has_more"] != 1 or len(user_list) >= num: 500 | break 501 | offset = str(int(offset) + int(count)) 502 | if len(user_list) > num: 503 | user_list = user_list[:num] 504 | return user_list 505 | 506 | 507 | @staticmethod 508 | def search_user(auth, query: str, offset: str = '0', num: str = '25', douyin_user_fans="", douyin_user_type="", **kwargs): 509 | """ 510 | 搜索用户. 511 | :param auth: DouyinAuth object. 512 | :param query: 搜索关键字. 513 | :param offset: 搜索结果偏移量. 514 | :param num: 搜索结果数量. 515 | :param douyin_user_fans: 粉丝数量 空字符串 (0_1k 1000以下) (1k_1w 1000-10000) (1w_10w 10000-100000) (10w_100w 10w-100w粉丝) (100w_ 100w以上) 516 | :param douyin_user_type: 用户类型 空字符串 不限 common_user 普通用户 enterprise_user 企业用户 personal_user 个人认证用户 517 | :return: JSON数据. 518 | """ 519 | api = "/aweme/v1/web/discover/search" 520 | headers = HeaderBuilder().build(HeaderType.GET) 521 | refer = f'https://www.douyin.com/search/{urllib.parse.quote(query)}?aid={uuid.uuid4()}&type=general' 522 | headers.set_referer(refer) 523 | params = Params() 524 | params.add_param("device_platform", 'webapp') 525 | params.add_param("aid", '6383') 526 | params.add_param("channel", 'channel_pc_web') 527 | params.add_param("search_channel", 'aweme_user_web') 528 | params.add_param("search_filter_value", r'{"douyin_user_fans":["%s"],"douyin_user_type":["%s"]}' % ( 529 | douyin_user_fans, douyin_user_type)) 530 | params.add_param("keyword", query) 531 | params.add_param("search_source", 'switch_tab') 532 | params.add_param("query_correct_type", '1') 533 | params.add_param("is_filter_search", '1') 534 | # params.add_param("from_group_id", '7378456704385600820') 535 | params.add_param("offset", offset) 536 | params.add_param("count", num) 537 | params.add_param("need_filter_settings", '1' if offset == '0' else '0') 538 | params.add_param("list_type", 'single') 539 | params.add_param("update_version_code", '170400') 540 | params.add_param("pc_client_type", '1') 541 | params.add_param("version_code", '170400') 542 | params.add_param("version_name", '17.4.0') 543 | params.add_param("cookie_enabled", 'true') 544 | params.add_param("screen_width", '1707') 545 | params.add_param("screen_height", '960') 546 | params.add_param("browser_language", 'zh-CN') 547 | params.add_param("browser_platform", 'Win32') 548 | params.add_param("browser_name", 'Edge') 549 | params.add_param("browser_version", '125.0.0.0') 550 | params.add_param("browser_online", 'true') 551 | params.add_param("engine_name", 'Blink') 552 | params.add_param("engine_version", '125.0.0.0') 553 | params.add_param("os_name", 'Windows') 554 | params.add_param("os_version", '10') 555 | params.add_param("cpu_core_num", '32') 556 | params.add_param("device_memory", '8') 557 | params.add_param("platform", 'PC') 558 | params.add_param("downlink", '10') 559 | params.add_param("effective_type", '4g') 560 | params.add_param("round_trip_time", '150') 561 | params.with_web_id(auth, refer) 562 | params.add_param("msToken", auth.msToken) 563 | params.with_a_bogus() 564 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 565 | params=params.get(), verify=False) 566 | return resp.json() 567 | 568 | @staticmethod 569 | def search_live(auth, query: str, offset: str = '0', num: str = '25', **kwargs): 570 | """ 571 | 搜索直播. 572 | :param auth: DouyinAuth object. 573 | :param query: 搜索关键字. 574 | :param offset: 搜索结果偏移量. 575 | :param num: 搜索数量. 576 | :return: JSON数据. 577 | """ 578 | api = "/aweme/v1/web/live/search/" 579 | headers = HeaderBuilder().build(HeaderType.GET) 580 | refer = f'https://www.douyin.com/search/{urllib.parse.quote(query)}?aid={uuid.uuid4()}&type=live' 581 | headers.set_referer(refer) 582 | params = Params() 583 | params.add_param("device_platform", 'webapp') 584 | params.add_param("aid", '6383') 585 | params.add_param("channel", 'channel_pc_web') 586 | params.add_param("search_channel", 'aweme_live') 587 | params.add_param("keyword", query) 588 | params.add_param("search_source", 'normal_search') 589 | params.add_param("query_correct_type", '1') 590 | params.add_param("is_filter_search", '0') 591 | params.add_param("from_group_id", '') 592 | params.add_param("offset", offset) 593 | params.add_param("count", num) 594 | params.add_param("need_filter_settings", '1' if offset == '0' else '0') 595 | params.add_param("list_type", 'single') 596 | params.add_param("update_version_code", '170400') 597 | params.add_param("pc_client_type", '1') 598 | params.add_param("version_code", '170400') 599 | params.add_param("version_name", '17.4.0') 600 | params.add_param("cookie_enabled", 'true') 601 | params.add_param("screen_width", '1707') 602 | params.add_param("screen_height", '960') 603 | params.add_param("browser_language", 'zh-CN') 604 | params.add_param("browser_platform", 'Win32') 605 | params.add_param("browser_name", 'Edge') 606 | params.add_param("browser_version", '125.0.0.0') 607 | params.add_param("browser_online", 'true') 608 | params.add_param("engine_name", 'Blink') 609 | params.add_param("engine_version", '125.0.0.0') 610 | params.add_param("os_name", 'Windows') 611 | params.add_param("os_version", '10') 612 | params.add_param("cpu_core_num", '32') 613 | params.add_param("device_memory", '8') 614 | params.add_param("platform", 'PC') 615 | params.add_param("downlink", '10') 616 | params.add_param("effective_type", '4g') 617 | params.add_param("round_trip_time", '50') 618 | params.with_web_id(auth, refer) 619 | params.add_param("msToken", auth.msToken) 620 | params.with_a_bogus() 621 | resp = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), cookies=auth.cookie, 622 | params=params.get(), verify=False) 623 | return resp.json() 624 | 625 | @staticmethod 626 | def search_some_live(auth, query: str, num: int, **kwargs) -> list: 627 | """ 628 | 搜索指定数量直播. 629 | :param auth: DouyinAuth object. 630 | :param query: 搜索关键字. 631 | :param num: 搜索数量. 632 | :return: 直播列表. 633 | """ 634 | offset = "0" 635 | count = "25" 636 | live_list = [] 637 | while True: 638 | res_json = DouyinAPI.search_live(auth, query, offset, count) 639 | lives = res_json["data"] 640 | live_list.extend(lives) 641 | if res_json["has_more"] != 1 or len(live_list) >= num: 642 | break 643 | offset = str(int(offset) + int(count)) 644 | if len(live_list) > num: 645 | live_list = live_list[:num] 646 | return live_list 647 | 648 | @staticmethod 649 | def get_user_favorite(auth, sec_id: str, max_cursor: str = '0', num: str = '18', **kwargs): 650 | """ 651 | 获取用户收藏. 652 | :param auth: DouyinAuth object. 653 | :param sec_id: 用户SECID. 654 | :param max_cursor: 翻页游标. 655 | :param num: 要获取的收藏数量. 656 | :return: JSON. 657 | """ 658 | headers = HeaderBuilder.build(HeaderType.GET) 659 | refer = f"https://www.douyin.com/user/{sec_id}?showTab=like" 660 | headers.set_referer(refer) 661 | params = Params() 662 | params.add_param("device_platform", 'webapp') 663 | params.add_param("aid", '6383') 664 | params.add_param("channel", 'channel_pc_web') 665 | params.add_param("sec_user_id", 'MS4wLjABAAAA99bTJ_GOw3odYmsXOe7i7xuEv0iQf2X_Kg_VUyVP0U8') 666 | params.add_param("max_cursor", max_cursor) 667 | params.add_param("min_cursor", '0') 668 | params.add_param("whale_cut_token", '') 669 | params.add_param("cut_version", '1') 670 | params.add_param("count", num) 671 | params.add_param("publish_video_strategy_type", '2') 672 | params.add_param("update_version_code", '170400') 673 | params.add_param("pc_client_type", '1') 674 | params.add_param("version_code", '170400') 675 | params.add_param("version_name", '17.4.0') 676 | params.add_param("cookie_enabled", 'true') 677 | params.add_param("screen_width", '1707') 678 | params.add_param("screen_height", '960') 679 | params.add_param("browser_language", 'zh-CN') 680 | params.add_param("browser_platform", 'Win32') 681 | params.add_param("browser_name", 'Edge') 682 | params.add_param("browser_version", '125.0.0.0') 683 | params.add_param("browser_online", 'true') 684 | params.add_param("engine_name", 'Blink') 685 | params.add_param("engine_version", '125.0.0.0') 686 | params.add_param("os_name", 'Windows') 687 | params.add_param("os_version", '10') 688 | params.add_param("cpu_core_num", '32') 689 | params.add_param("device_memory", '8') 690 | params.add_param("platform", 'PC') 691 | params.add_param("downlink", '10') 692 | params.add_param("effective_type", '4g') 693 | params.add_param("round_trip_time", '100') 694 | params.with_web_id(auth=auth, url=refer) 695 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 696 | params.add_param("fp", auth.cookie['s_v_web_id']) 697 | params.add_param("msToken", 698 | auth.msToken) 699 | params.with_a_bogus() 700 | response = requests.get('https://www.douyin.com/aweme/v1/web/aweme/favorite/', params=params.get(), 701 | headers=headers.get(), cookies=auth.cookie, 702 | verify=False) 703 | return response.json() 704 | 705 | 706 | @staticmethod 707 | def get_my_uid(auth, **kwargs) -> int: 708 | """ 709 | 获取自己的用户ID. 710 | :param auth: DouyinAuth object. 711 | :return: 用户ID. 712 | """ 713 | url = 'https://www.douyin.com/aweme/v1/web/query/user/' 714 | headers = HeaderBuilder().build(HeaderType.GET) 715 | refer = 'https://www.douyin.com/' 716 | headers.set_header('referer', refer) 717 | params = Params() 718 | params.with_platform() 719 | params.with_web_id(auth, refer) 720 | params.with_ms_token() 721 | params.add_param('verifyFp', auth.cookie['s_v_web_id']) 722 | params.add_param('fp', auth.cookie['s_v_web_id']) 723 | params.with_a_bogus() 724 | resp = requests.get(url, params=params.get(), verify=False, headers=headers.get(), cookies=auth.cookie) 725 | resp_json = json.loads(resp.text) 726 | return int(resp_json['user_uid']) 727 | 728 | @staticmethod 729 | def get_my_sec_uid(auth, **kwargs) -> str: 730 | """ 731 | 获取自己的SECID. 732 | :param auth: DouyinAuth object. 733 | :return: SECID. 734 | """ 735 | headers = HeaderBuilder().build(HeaderType.GET) 736 | url = "https://www.douyin.com/user/self" 737 | params = { 738 | "from_tab_name": "main" 739 | } 740 | response = requests.get(url, headers=headers.get(), cookies=auth.cookie, params=params) 741 | sec_uid = re.findall(r'\\"secUid\\":\\"(.*?)\\"', response.text)[0] 742 | return sec_uid 743 | 744 | 745 | @staticmethod 746 | def get_live_info(auth_, live_id, **kwargs): 747 | """ 748 | 获取直播间信息. 749 | :param live_id: 直播间ID 750 | :return: 直播间ID, 用户ID, ttwid 751 | """ 752 | url = "https://live.douyin.com/" + live_id 753 | headers = HeaderBuilder().build(HeaderType.GET) 754 | res = requests.get(url, headers=headers.get(), cookies=auth_.cookie, verify=False) 755 | ttwid = res.cookies.get_dict()['ttwid'] 756 | soup = BeautifulSoup(res.text, 'html.parser') 757 | scripts = soup.select('script[nonce]') 758 | for script in scripts: 759 | if script.string is not None and 'roomId' in script.string: 760 | try: 761 | room_id = re.findall(r'\\"roomId\\":\\"(\d+)\\"', script.string)[0] 762 | user_id = re.findall(r'\\"user_unique_id\\":\\"(\d+)\\"', script.string)[0] 763 | room_info = re.findall(r'\\"roomInfo\\":\{\\"room\\":\{\\"id_str\\":\\".*?\\",\\"status\\":(.*?),\\"status_str\\":\\".*?\\",\\"title\\":\\"(.*?)\\"', script.string)[0] 764 | room_status = room_info[0] 765 | room_title = room_info[1] 766 | return { 767 | "room_id": room_id, 768 | "user_id": user_id, 769 | "ttwid": ttwid, 770 | # 2 是直播中 4 是未开播 771 | "room_status": room_status, 772 | "room_title": room_title 773 | } 774 | except Exception as e: 775 | pass 776 | return None, None, None 777 | 778 | @staticmethod 779 | def get_live_production(auth, url: str, room_id: str, author_id: str, offset: str, **kwargs): 780 | """ 781 | 获取直播间的商品信息. 782 | :param auth: DouyinAuth object. 783 | :param url: 直播间链接. 784 | :param room_id: 直播间ID 785 | :param author_id: 主播ID 786 | :param offset: 翻页游标. 787 | :return: JSON 商品列表. 788 | """ 789 | api = f"/live/promotions/page/" 790 | headers = HeaderBuilder().build(HeaderType.GET) 791 | headers.set_header("origin", DouyinAPI.live_url) 792 | headers.set_referer(url) 793 | params = Params() 794 | params.add_param("device_platform", "webapp") 795 | params.add_param("aid", "6383") 796 | params.add_param("channel", "channel_pc_web") 797 | params.add_param("room_id", room_id) 798 | params.add_param("author_id", author_id) 799 | params.add_param("offset", offset) 800 | params.add_param("limit", "20") 801 | params.add_param("pc_client_type", "1") 802 | params.add_param("version_code", "210800") 803 | params.add_param("version_name", "21.8.0") 804 | params.add_param("cookie_enabled", "true") 805 | params.add_param("screen_width", "2560") 806 | params.add_param("screen_height", "1440") 807 | params.add_param("browser_language", "zh-CN") 808 | params.add_param("browser_platform", "Win32") 809 | params.add_param("browser_name", "Edge") 810 | params.add_param("browser_version", "121.0.0.0") 811 | params.add_param("browser_online", "true") 812 | params.add_param("engine_name", "Blink") 813 | params.add_param("engine_version", "121.0.0.0") 814 | params.add_param("os_name", "Windows") 815 | params.add_param("os_version", "10") 816 | params.add_param("cpu_core_num", "20") 817 | params.add_param("device_memory", "8") 818 | params.add_param("platform", "PC") 819 | params.add_param("downlink", "10") 820 | params.add_param("effective_type", "4g") 821 | params.add_param("round_trip_time", "50") 822 | params.with_web_id(auth, url) 823 | params.add_param("msToken", auth.msToken) 824 | params.with_a_bogus() 825 | res = requests.post(f'{DouyinAPI.live_url}{api}', headers=headers.get(), cookies=auth.cookie, 826 | params=params.get(), verify=False) 827 | return res.json() 828 | 829 | @staticmethod 830 | def get_all_live_production(auth, url: str, **kwargs): 831 | """ 832 | 获取直播间的所有商品信息. 833 | :param auth: DouyinAuth object. 834 | :param url: 直播间链接. 835 | :return: 836 | """ 837 | room_info = DouyinAPI.get_live_info(auth, url.split("/")[-1].split("?")[0]) 838 | room_id = room_info["room_id"] 839 | author_id = room_info["author_id"] 840 | offset = "0" 841 | production_list = [] 842 | while True: 843 | res_json = DouyinAPI.get_live_production(auth, url, room_id, author_id, offset) 844 | productions = res_json["promotions"] 845 | production_list.extend(productions) 846 | offset = str(res_json["next_offset"]) 847 | if offset == "-1": 848 | break 849 | return production_list 850 | 851 | @staticmethod 852 | def get_live_production_detail(auth, url, ec_promotion_id, sec_author_id, live_room_id, **kwargs): 853 | """ 854 | 获取直播间商品详情. 855 | :param auth: DouyinAuth object. 856 | :param url: 直播间链接. 857 | :param ec_promotion_id: 商品ID. 858 | :param sec_author_id: 主播ID 859 | :param live_room_id: 直播间ID 860 | :return: JSON 商品详情. 861 | """ 862 | api = f"/ecom/product/detail/saas/pc/" 863 | headers = HeaderBuilder().build(HeaderType.FORM) 864 | headers.set_header("origin", DouyinAPI.live_url) 865 | headers.set_referer(url) 866 | headers.with_csrf(auth.cookie_str) 867 | params = Params() 868 | params.add_param("is_h5", "1") 869 | params.add_param("origin_type", "638301") 870 | params.add_param("device_platform", "webapp") 871 | params.add_param("aid", "6383") 872 | params.add_param("channel", "channel_pc_web") 873 | params.add_param("pc_client_type", "1") 874 | params.add_param("update_version_code", "170400") 875 | params.add_param("version_code", "") 876 | params.add_param("version_name", "") 877 | params.add_param("cookie_enabled", "true") 878 | params.add_param("screen_width", "1707") 879 | params.add_param("screen_height", "960") 880 | params.add_param("browser_language", "zh-CN") 881 | params.add_param("browser_platform", "Win32") 882 | params.add_param("browser_name", "Edge") 883 | params.add_param("browser_version", "125.0.0.0") 884 | params.add_param("browser_online", "true") 885 | params.add_param("engine_name", "Blink") 886 | params.add_param("engine_version", "125.0.0.0") 887 | params.add_param("os_name", "Windows") 888 | params.add_param("os_version", "10") 889 | params.add_param("cpu_core_num", "32") 890 | params.add_param("device_memory", "8") 891 | params.add_param("platform", "PC") 892 | params.add_param("downlink", "1.7") 893 | params.add_param("effective_type", "4g") 894 | params.add_param("round_trip_time", "200") 895 | params.with_web_id(auth, url) 896 | params.add_param("msToken", auth.msToken) 897 | data = { 898 | "bff_type": "2", 899 | "ec_promotion_id": ec_promotion_id, 900 | "is_h5": "1", 901 | "item_id": "0", 902 | "live_room_id": live_room_id, 903 | "origin_type": "638301", 904 | "promotion_ids": ec_promotion_id, 905 | "room_id": live_room_id, 906 | "sec_author_id": sec_author_id, 907 | "use_new_price": "1" 908 | } 909 | params.with_a_bogus(data) 910 | res = requests.post(f'{DouyinAPI.live_url}{api}', headers=headers.get(), params=params.get(), 911 | cookies=auth.cookie, data=data, verify=False) 912 | return res.json() 913 | 914 | @staticmethod 915 | def collect_aweme(auth, aweme_id: str, action: str = '1', **kwargs): 916 | """ 917 | 收藏或取消收藏视频. 918 | :param auth: DouyinAuth object. 919 | :param aweme_id: 视频ID. 920 | :param action: 1: 收藏, 0: 取消收藏. 921 | :return: 响应JSON. 922 | """ 923 | api = '/aweme/v1/web/aweme/collect/' 924 | headers = HeaderBuilder().build(HeaderType.FORM) 925 | refer = "https://www.douyin.com/?recommend=1" 926 | headers.set_referer(refer) 927 | headers.with_bd(api, auth) 928 | headers.with_csrf(auth.cookie_str) 929 | headers.set_header("origin", DouyinAPI.douyin_url) 930 | params = Params() 931 | params.add_param("device_platform", "webapp") 932 | params.add_param("aid", "6383") 933 | params.add_param("channel", "channel_pc_web") 934 | params.add_param("pc_client_type", "1") 935 | params.add_param("update_version_code", "170400") 936 | params.add_param("version_code", "170400") 937 | params.add_param("version_name", "17.4.0") 938 | params.add_param("cookie_enabled", "true") 939 | params.add_param("screen_width", "1707") 940 | params.add_param("screen_height", "960") 941 | params.add_param("browser_language", "zh-CN") 942 | params.add_param("browser_platform", "Win32") 943 | params.add_param("browser_name", "Edge") 944 | params.add_param("browser_version", "125.0.0.0") 945 | params.add_param("browser_online", "true") 946 | params.add_param("engine_name", "Blink") 947 | params.add_param("engine_version", "125.0.0.0") 948 | params.add_param("os_name", "Windows") 949 | params.add_param("os_version", "10") 950 | params.add_param("cpu_core_num", "32") 951 | params.add_param("device_memory", "8") 952 | params.add_param("platform", "PC") 953 | params.add_param("downlink", "10") 954 | params.add_param("effective_type", "4g") 955 | params.add_param("round_trip_time", "50") 956 | params.with_web_id(auth, refer) 957 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 958 | params.add_param("fp", auth.cookie['s_v_web_id']) 959 | params.add_param("msToken", auth.msToken) 960 | data = { 961 | "action": action, 962 | "aweme_id": aweme_id, 963 | "aweme_type": "0", 964 | } 965 | params.with_a_bogus(data) 966 | res = requests.post(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 967 | cookies=auth.cookie, data=data, verify=False) 968 | return res.json() 969 | 970 | @staticmethod 971 | def move_collect_aweme(auth, aweme_id: str, collect_name: str, collect_id: str, **kwargs): 972 | """ 973 | 移动视频到指定收藏夹(需要先收藏视频) 974 | :param collect_name: 收藏夹名称 975 | :param collect_id: 收藏夹ID 976 | :param auth: DouyinAuth object. 977 | :param aweme_id: 视频ID. 978 | :return: 响应JSON. 979 | """ 980 | api = '/aweme/v1/web/collects/video/move/' 981 | headers = HeaderBuilder().build(HeaderType.FORM) 982 | refer = "https://www.douyin.com/?recommend=1" 983 | headers.set_referer(refer) 984 | headers.with_bd(api, auth) 985 | headers.with_csrf(auth.cookie_str) 986 | headers.set_header("origin", DouyinAPI.douyin_url) 987 | params = Params() 988 | params.add_param("aid", "6383") 989 | params.add_param("browser_language", "zh-CN") 990 | params.add_param("browser_name", "Edge") 991 | params.add_param("browser_online", "true") 992 | params.add_param("browser_platform", "Win32") 993 | params.add_param("browser_version", "125.0.0.0") 994 | params.add_param("channel", "channel_pc_web") 995 | params.add_param("collects_name", collect_name) 996 | params.add_param("cookie_enabled", "true") 997 | params.add_param("cpu_core_num", "32") 998 | params.add_param("device_memory", "8") 999 | params.add_param("device_platform", "webapp") 1000 | params.add_param("downlink", "10") 1001 | params.add_param("effective_type", "4g") 1002 | params.add_param("engine_name", "Blink") 1003 | params.add_param("engine_version", "125.0.0.0") 1004 | params.add_param("item_ids", aweme_id) 1005 | params.add_param("item_type", "2") 1006 | params.add_param("move_collects_list", collect_id) 1007 | params.add_param("os_name", "Windows") 1008 | params.add_param("os_version", "10") 1009 | params.add_param("pc_client_type", "1") 1010 | params.add_param("platform", "PC") 1011 | params.add_param("round_trip_time", "50") 1012 | params.add_param("screen_height", "960") 1013 | params.add_param("screen_width", "1707") 1014 | params.add_param("to_collects_id", collect_id) 1015 | params.add_param("update_collects_sort", "true") 1016 | params.add_param("update_version_code", "170400") 1017 | params.add_param("version_code", "170400") 1018 | params.add_param("version_name", "17.4.0") 1019 | params.with_web_id(auth, refer) 1020 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 1021 | params.add_param("fp", auth.cookie['s_v_web_id']) 1022 | params.add_param("msToken", auth.msToken) 1023 | params.with_a_bogus() 1024 | res = requests.post(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 1025 | cookies=auth.cookie, verify=False) 1026 | return res.json() 1027 | 1028 | @staticmethod 1029 | def remove_collect_aweme(auth, aweme_id: str, collect_name: str, collect_id: str, **kwargs): 1030 | """ 1031 | 从指定收藏夹中移除视频(需要先收藏视频) 1032 | :param collect_name: 收藏夹名称 1033 | :param collect_id: 收藏夹ID 1034 | :param auth: DouyinAuth object. 1035 | :param aweme_id: 视频ID. 1036 | :return: 响应JSON. 1037 | """ 1038 | api = '/aweme/v1/web/collects/video/move/' 1039 | headers = HeaderBuilder().build(HeaderType.FORM) 1040 | refer = "https://www.douyin.com/user/self?showTab=favorite_collection" 1041 | headers.set_referer(refer) 1042 | headers.with_bd(api, auth) 1043 | headers.with_csrf(auth.cookie_str) 1044 | headers.set_header("origin", DouyinAPI.douyin_url) 1045 | params = Params() 1046 | params.add_param("aid", "6383") 1047 | params.add_param("browser_language", "zh-CN") 1048 | params.add_param("browser_name", "Edge") 1049 | params.add_param("browser_online", "true") 1050 | params.add_param("browser_platform", "Win32") 1051 | params.add_param("browser_version", "125.0.0.0") 1052 | params.add_param("channel", "channel_pc_web") 1053 | params.add_param("collects_name", collect_name) 1054 | params.add_param("cookie_enabled", "true") 1055 | params.add_param("cpu_core_num", "32") 1056 | params.add_param("device_memory", "8") 1057 | params.add_param("device_platform", "webapp") 1058 | params.add_param("downlink", "10") 1059 | params.add_param("effective_type", "4g") 1060 | params.add_param("engine_name", "Blink") 1061 | params.add_param("engine_version", "125.0.0.0") 1062 | params.add_param("from_collects_id", collect_id) 1063 | params.add_param("item_ids", aweme_id) 1064 | params.add_param("item_type", "2") 1065 | params.add_param("os_name", "Windows") 1066 | params.add_param("os_version", "10") 1067 | params.add_param("pc_client_type", "1") 1068 | params.add_param("platform", "PC") 1069 | params.add_param("round_trip_time", "50") 1070 | params.add_param("screen_height", "960") 1071 | params.add_param("screen_width", "1707") 1072 | params.add_param("update_version_code", "170400") 1073 | params.add_param("version_code", "170400") 1074 | params.add_param("version_name", "17.4.0") 1075 | params.with_web_id(auth, refer) 1076 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 1077 | params.add_param("fp", auth.cookie['s_v_web_id']) 1078 | params.add_param("msToken", auth.msToken) 1079 | params.with_a_bogus() 1080 | res = requests.post(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 1081 | cookies=auth.cookie, verify=False) 1082 | return res.json() 1083 | 1084 | @staticmethod 1085 | def get_collect_list(auth, **kwargs): 1086 | """ 1087 | 获取我的收藏夹列表 1088 | :param auth: DouyinAuth object. 1089 | :return: JSON. 1090 | """ 1091 | api = "/aweme/v1/web/collects/list/" 1092 | headers = HeaderBuilder().build(HeaderType.GET) 1093 | refer = "https://www.douyin.com/?recommend=1" 1094 | headers.set_referer(refer) 1095 | params = Params() 1096 | params.add_param("device_platform", "webapp") 1097 | params.add_param("aid", "6383") 1098 | params.add_param("channel", "channel_pc_web") 1099 | params.add_param("cursor", "0") 1100 | params.add_param("count", "20") 1101 | params.add_param("update_version_code", "170400") 1102 | params.add_param("pc_client_type", "1") 1103 | params.add_param("version_code", "170400") 1104 | params.add_param("version_name", "17.4.0") 1105 | params.add_param("cookie_enabled", "true") 1106 | params.add_param("screen_width", "1707") 1107 | params.add_param("screen_height", "960") 1108 | params.add_param("browser_language", "zh-CN") 1109 | params.add_param("browser_platform", "Win32") 1110 | params.add_param("browser_name", "Edge") 1111 | params.add_param("browser_version", "125.0.0.0") 1112 | params.add_param("browser_online", "true") 1113 | params.add_param("engine_name", "Blink") 1114 | params.add_param("engine_version", "125.0.0.0") 1115 | params.add_param("os_name", "Windows") 1116 | params.add_param("os_version", "10") 1117 | params.add_param("cpu_core_num", "32") 1118 | params.add_param("device_memory", "8") 1119 | params.add_param("platform", "PC") 1120 | params.add_param("downlink", "5.95") 1121 | params.add_param("effective_type", "4g") 1122 | params.add_param("round_trip_time", "200") 1123 | params.with_web_id(auth, refer) 1124 | params.add_param("msToken", auth.msToken) 1125 | params.with_a_bogus() 1126 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 1127 | params.add_param("fp", auth.cookie['s_v_web_id']) 1128 | res = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 1129 | cookies=auth.cookie, verify=False) 1130 | return res.json() 1131 | 1132 | @staticmethod 1133 | def get_user_follower_list(auth, user_id: str, sec_id: str, max_time: str = '0', count: str = '20', **kwargs): 1134 | """ 1135 | 获取用户的粉丝列表 1136 | :param auth: DouyinAuth object. 1137 | :param user_id: 用户ID. 1138 | :param sec_id: 用户sec_id. 1139 | :param max_time: 最大时间戳. 1140 | :param count: 数量. 1141 | :return: JSON. 1142 | """ 1143 | api = "/aweme/v1/web/user/follower/list/" 1144 | headers = HeaderBuilder().build(HeaderType.GET) 1145 | refer = f"https://www.douyin.com/user/{sec_id}" 1146 | headers.set_referer(refer) 1147 | params = Params() 1148 | params.add_param("device_platform", 'webapp') 1149 | params.add_param("aid", '6383') 1150 | params.add_param("channel", 'channel_pc_web') 1151 | params.add_param("user_id", user_id) 1152 | params.add_param("sec_user_id", sec_id) 1153 | params.add_param("offset", '0') 1154 | params.add_param("min_time", '0') 1155 | params.add_param("max_time", max_time) 1156 | params.add_param("count", count) 1157 | params.add_param("source_type", '2' if max_time == '0' else '1') 1158 | params.add_param("gps_access", '0') 1159 | params.add_param("address_book_access", '0') 1160 | params.add_param("update_version_code", '170400') 1161 | params.add_param("pc_client_type", '1') 1162 | params.add_param("version_code", '170400') 1163 | params.add_param("version_name", '17.4.0') 1164 | params.add_param("cookie_enabled", 'true') 1165 | params.add_param("screen_width", '1707') 1166 | params.add_param("screen_height", '960') 1167 | params.add_param("browser_language", 'zh-CN') 1168 | params.add_param("browser_platform", 'Win32') 1169 | params.add_param("browser_name", 'Edge') 1170 | params.add_param("browser_version", '125.0.0.0') 1171 | params.add_param("browser_online", 'true') 1172 | params.add_param("engine_name", 'Blink') 1173 | params.add_param("engine_version", '125.0.0.0') 1174 | params.add_param("os_name", 'Windows') 1175 | params.add_param("os_version", '10') 1176 | params.add_param("cpu_core_num", '32') 1177 | params.add_param("device_memory", '8') 1178 | params.add_param("platform", 'PC') 1179 | params.add_param("downlink", '10') 1180 | params.add_param("effective_type", '4g') 1181 | params.add_param("round_trip_time", '150') 1182 | params.with_web_id(auth, refer) 1183 | params.add_param("msToken", auth.msToken) 1184 | params.with_a_bogus() 1185 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 1186 | params.add_param("fp", auth.cookie['s_v_web_id']) 1187 | res = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 1188 | cookies=auth.cookie, verify=False) 1189 | return res.json() 1190 | 1191 | @staticmethod 1192 | def get_some_user_follower_list(auth, user_id: str, sec_id: str, num: int, **kwargs) -> list: 1193 | """ 1194 | 获取用户的前num个粉丝列表 1195 | :param auth: DouyinAuth object. 1196 | :param user_id: 用户ID. 1197 | :param sec_id: 用户sec_id. 1198 | :param num: 要获取的数量 1199 | :return: 粉丝列表. 1200 | """ 1201 | max_time = "0" 1202 | count = "20" 1203 | follower_list = [] 1204 | while True: 1205 | res_json = DouyinAPI.get_user_follower_list(auth, user_id, sec_id, max_time, count) 1206 | followers = res_json["followers"] 1207 | follower_list.extend(followers) 1208 | if res_json["has_more"] != 1 or len(follower_list) >= num: 1209 | break 1210 | max_time = res_json["min_time"] 1211 | if len(follower_list) > num: 1212 | follower_list = follower_list[:num] 1213 | return follower_list 1214 | 1215 | @staticmethod 1216 | def get_user_following_list(auth, user_id: str, sec_id: str, max_time: str = '0', count: str = '20', **kwargs): 1217 | """ 1218 | 获取用户的关注列表 1219 | :param auth: DouyinAuth object. 1220 | :param user_id: 用户ID. 1221 | :param sec_id: 用户sec_id. 1222 | :param max_time: 最大时间戳. 1223 | :param count: 数量. 1224 | :return: 1225 | """ 1226 | api = "/aweme/v1/web/user/following/list/" 1227 | headers = HeaderBuilder().build(HeaderType.GET) 1228 | refer = f"https://www.douyin.com/user/{sec_id}" 1229 | headers.set_referer(refer) 1230 | params = Params() 1231 | params.add_param("device_platform", 'webapp') 1232 | params.add_param("aid", '6383') 1233 | params.add_param("channel", 'channel_pc_web') 1234 | params.add_param("user_id", user_id) 1235 | params.add_param("sec_user_id", sec_id) 1236 | params.add_param("offset", '0') 1237 | params.add_param("min_time", '0') 1238 | params.add_param("max_time", max_time) 1239 | params.add_param("count", count) 1240 | params.add_param("source_type", '2' if max_time == '0' else '1') 1241 | params.add_param("gps_access", '0') 1242 | params.add_param("address_book_access", '0') 1243 | params.add_param("is_top", '1') 1244 | params.add_param("update_version_code", '170400') 1245 | params.add_param("pc_client_type", '1') 1246 | params.add_param("version_code", '170400') 1247 | params.add_param("version_name", '17.4.0') 1248 | params.add_param("cookie_enabled", 'true') 1249 | params.add_param("screen_width", '1707') 1250 | params.add_param("screen_height", '960') 1251 | params.add_param("browser_language", 'zh-CN') 1252 | params.add_param("browser_platform", 'Win32') 1253 | params.add_param("browser_name", 'Edge') 1254 | params.add_param("browser_version", '125.0.0.0') 1255 | params.add_param("browser_online", 'true') 1256 | params.add_param("engine_name", 'Blink') 1257 | params.add_param("engine_version", '125.0.0.0') 1258 | params.add_param("os_name", 'Windows') 1259 | params.add_param("os_version", '10') 1260 | params.add_param("cpu_core_num", '32') 1261 | params.add_param("device_memory", '8') 1262 | params.add_param("platform", 'PC') 1263 | params.add_param("downlink", '10') 1264 | params.add_param("effective_type", '4g') 1265 | params.add_param("round_trip_time", '150') 1266 | params.with_web_id(auth, refer) 1267 | params.add_param("msToken", auth.msToken) 1268 | params.with_a_bogus() 1269 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 1270 | params.add_param("fp", auth.cookie['s_v_web_id']) 1271 | res = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 1272 | cookies=auth.cookie, verify=False) 1273 | return res.json() 1274 | 1275 | @staticmethod 1276 | def get_some_user_following_list(auth, user_id: str, sec_id: str, num: int, **kwargs) -> list: 1277 | """ 1278 | 获取用户的前num个关注列表 1279 | :param auth: DouyinAuth object. 1280 | :param user_id: 用户ID. 1281 | :param sec_id: 用户sec_id. 1282 | :param num: 要获取的数量 1283 | :return: 关注列表. 1284 | """ 1285 | max_time = "0" 1286 | count = "20" 1287 | following_list = [] 1288 | while True: 1289 | res_json = DouyinAPI.get_user_following_list(auth, user_id, sec_id, max_time, count) 1290 | followings = res_json["followings"] 1291 | following_list.extend(followings) 1292 | if res_json["has_more"] != 1 or len(following_list) >= num: 1293 | break 1294 | max_time = res_json["min_time"] 1295 | if len(following_list) > num: 1296 | following_list = following_list[:num] 1297 | return following_list 1298 | 1299 | @staticmethod 1300 | def get_notice_list(auth, min_time='0', max_time='0', count='10', notice_group='700', **kwargs): 1301 | """ 1302 | 获得通知 1303 | :param auth: DouyinAuth object. 1304 | :param min_time: 最小时间戳. 1305 | :param max_time: 最大时间戳. 1306 | :param count: 数量. 1307 | :param notice_group: 消息类型 700 全部消息 401 粉丝 601 @我的 2 评论 3 点赞 520 弹幕 1308 | :return: JSON. 1309 | """ 1310 | api = "/aweme/v1/web/notice/" 1311 | headers = HeaderBuilder().build(HeaderType.GET) 1312 | refer = "https://www.douyin.com/?recommend=1" 1313 | headers.set_referer(refer) 1314 | params = Params() 1315 | params.add_param("device_platform", 'webapp') 1316 | params.add_param("aid", '6383') 1317 | params.add_param("channel", 'channel_pc_web') 1318 | params.add_param("is_new_notice", '1') 1319 | params.add_param("is_mark_read", '1') 1320 | params.add_param("notice_group", notice_group) 1321 | params.add_param("count", count) 1322 | params.add_param("min_time", min_time) 1323 | params.add_param("max_time", max_time) 1324 | params.add_param("update_version_code", '170400') 1325 | params.add_param("pc_client_type", '1') 1326 | params.add_param("version_code", '170400') 1327 | params.add_param("version_name", '17.4.0') 1328 | params.add_param("cookie_enabled", 'true') 1329 | params.add_param("screen_width", '1707') 1330 | params.add_param("screen_height", '960') 1331 | params.add_param("browser_language", 'zh-CN') 1332 | params.add_param("browser_platform", 'Win32') 1333 | params.add_param("browser_name", 'Edge') 1334 | params.add_param("browser_version", '125.0.0.0') 1335 | params.add_param("browser_online", 'true') 1336 | params.add_param("engine_name", 'Blink') 1337 | params.add_param("engine_version", '125.0.0.0') 1338 | params.add_param("os_name", 'Windows') 1339 | params.add_param("os_version", '10') 1340 | params.add_param("cpu_core_num", '32') 1341 | params.add_param("device_memory", '8') 1342 | params.add_param("platform", 'PC') 1343 | params.add_param("downlink", '10') 1344 | params.add_param("effective_type", '4g') 1345 | params.add_param("round_trip_time", '50') 1346 | params.with_web_id(auth, refer) 1347 | params.add_param("msToken", auth.msToken) 1348 | params.with_a_bogus() 1349 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 1350 | params.add_param("fp", auth.cookie['s_v_web_id']) 1351 | res = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 1352 | cookies=auth.cookie, verify=False) 1353 | return res.json() 1354 | 1355 | @staticmethod 1356 | def get_some_notice_list(auth, num: int = 20, notice_group='700', **kwargs) -> list: 1357 | """ 1358 | 获得前num条通知 1359 | :param auth: DouyinAuth object. 1360 | :param num: 数量. 1361 | :param notice_group: 消息类型 | 700 全部消息 401 粉丝 601 @我的 2 评论 3 点赞 520 弹幕 1362 | :return: 1363 | """ 1364 | min_time = "0" 1365 | max_time = "0" 1366 | count = "10" 1367 | notice_list = [] 1368 | while True: 1369 | res_json = DouyinAPI.get_notice_list(auth, min_time, max_time, count, notice_group) 1370 | notices = res_json["notice_list_v2"] 1371 | notice_list.extend(notices) 1372 | if res_json["has_more"] != 1 or len(notice_list) >= num: 1373 | break 1374 | min_time = res_json["min_time"] 1375 | max_time = res_json["max_time"] 1376 | if len(notice_list) > num: 1377 | notice_list = notice_list[:num] 1378 | return notice_list 1379 | 1380 | @staticmethod 1381 | def get_feed(auth, count='20', refresh_index='2', **kwargs): 1382 | """ 1383 | 获取首页推荐视频 1384 | :param auth: DouyinAuth object. 1385 | :param count: 数量. 1386 | :param refresh_index: 刷新索引. 1387 | :return: JSON. 1388 | """ 1389 | api = "/aweme/v1/web/module/feed/" 1390 | headers = HeaderBuilder().build(HeaderType.GET) 1391 | refer = "https://www.douyin.com/" 1392 | headers.set_referer(refer) 1393 | params = Params() 1394 | params.add_param("device_platform", 'webapp') 1395 | params.add_param("aid", '6383') 1396 | params.add_param("channel", 'channel_pc_web') 1397 | params.add_param("module_id", '3003101') 1398 | params.add_param("count", count) 1399 | params.add_param("filterGids", '') 1400 | params.add_param("presented_ids", '') 1401 | params.add_param("refresh_index", refresh_index) 1402 | params.add_param("refer_id", '') 1403 | params.add_param("refer_type", '10') 1404 | params.add_param("awemePcRecRawData", '{"is_client":false}') 1405 | params.add_param("Seo-Flag", '0') 1406 | params.add_param("install_time", '1715480185') 1407 | params.add_param("pc_client_type", '1') 1408 | params.add_param("update_version_code", '170400') 1409 | params.add_param("version_code", '170400') 1410 | params.add_param("version_name", '17.4.0') 1411 | params.add_param("cookie_enabled", 'true') 1412 | params.add_param("screen_width", '1707') 1413 | params.add_param("screen_height", '960') 1414 | params.add_param("browser_language", 'zh-CN') 1415 | params.add_param("browser_platform", 'Win32') 1416 | params.add_param("browser_name", 'Edge') 1417 | params.add_param("browser_version", '125.0.0.0') 1418 | params.add_param("browser_online", 'true') 1419 | params.add_param("engine_name", 'Blink') 1420 | params.add_param("engine_version", '125.0.0.0') 1421 | params.add_param("os_name", 'Windows') 1422 | params.add_param("os_version", '10') 1423 | params.add_param("cpu_core_num", '32') 1424 | params.add_param("device_memory", '8') 1425 | params.add_param("platform", 'PC') 1426 | params.add_param("downlink", '10') 1427 | params.add_param("effective_type", '4g') 1428 | params.add_param("round_trip_time", '100') 1429 | params.with_web_id(auth, refer) 1430 | params.add_param("msToken", auth.msToken) 1431 | params.with_a_bogus() 1432 | params.add_param("verifyFp", auth.cookie['s_v_web_id']) 1433 | params.add_param("fp", auth.cookie['s_v_web_id']) 1434 | 1435 | res = requests.get(f'{DouyinAPI.douyin_url}{api}', headers=headers.get(), params=params.get(), 1436 | cookies=auth.cookie, verify=False) 1437 | return res.json() 1438 | 1439 | 1440 | 1441 | if __name__ == '__main__': 1442 | web_protect_str = r'' 1443 | keys_str = r'' 1444 | cookies_str = '' 1445 | 1446 | 1447 | 1448 | from builder.auth import DouyinAuth 1449 | auth_ = DouyinAuth() 1450 | auth_.perepare_auth(cookies_str, web_protect_str, keys_str) 1451 | 1452 | res = DouyinAPI.search_live(auth_, "三角洲") 1453 | # print(res) 1454 | for i in res['data']: 1455 | print(i['lives']['author']['nickname']) 1456 | live_id = re.findall(r'"web_rid":"(.*?)",', str(i['lives']))[0] 1457 | live_url = f'https://live.douyin.com/{live_id}' 1458 | print(live_url) 1459 | 1460 | # my_uid = DouyinAPI.get_my_uid(auth_) 1461 | # print(my_uid) 1462 | # my_sec_uid = DouyinAPI.get_my_sec_uid(auth_) 1463 | # print(my_sec_uid) 1464 | # work_url = r'https://www.douyin.com/video/7433523124836060416' 1465 | # print(DouyinAPI.get_user_info(auth_, "https://www.douyin.com/user/MS4wLjABAAAA7BDbZk0LjnEMcDDsLag5mDrMc157hD3x0SMhH1HaCM8")) 1466 | # print(DouyinAPI.digg(auth_, "7433523124836060416", "1")) 1467 | # print(DouyinAPI.digg(auth_, "7212619184386182435", "1")) 1468 | # user_info = DouyinAPI.get_user_info(auth_, "https://www.douyin.com/user/MS4wLjABAAAAHXtdycTLMSe5Ld_468-9HKR1HUUrk4ywq-xMCM-E9w_cDIrhmynrQUalv061ZSpn?from_tab_name=main") 1469 | # to_user_id = user_info['user']['uid'] 1470 | # conversation_id, conversation_short_id, ticket = DouyinAPI.create_conversation(auth_, to_user_id) 1471 | # content = r'有份长期通告寻求合作,你通过了前期筛选,我是项目负责人,期待你与我联系:ncyj12' 1472 | # DouyinAPI.send_msg(auth_, conversation_id, conversation_short_id, ticket, content) 1473 | # print(DouyinAPI.get_user_all_work_info(auth_,"https://www.douyin.com/user/MS4wLjABAAAA8nC7nKxMrRtBwEqFzRgRBSxhBcw89VL0ysN-IXvhlKU?vid=7378825215213718818")) 1474 | # print(DouyinAPI.get_work_info(auth_, "https://www.douyin.com/video/7212619184386182435")) 1475 | # print(DouyinAPI.get_work_all_out_comment(auth_, "https://www.douyin.com/video/7212619184386182435")) 1476 | # print(DouyinAPI.get_work_inner_comment(auth_, { 1477 | # "aweme_id": "7212619184386182435", 1478 | # "cid": "7327990109411902208" 1479 | # }, "0")) 1480 | # print(DouyinAPI.get_work_all_inner_comment(auth_, { 1481 | # "aweme_id": "7212619184386182435", 1482 | # "cid": "7327990109411902208" 1483 | # })) 1484 | # print(DouyinAPI.get_work_all_comment(auth_, "https://www.douyin.com/video/7212619184386182435")) 1485 | # print(DouyinAPI.search_general_work(auth_, "美女", sort_type='2')) 1486 | # print(DouyinAPI.search_some_general_work(auth_, "美女", sort_type='2', publish_time='0', num=30)) 1487 | # print(DouyinAPI.get_all_live_production(auth_, "https://live.douyin.com/84255891276")) 1488 | # 60503986163 289606013148 91819894158 1489 | # room_info = DouyinAPI.get_live_info(auth_, '60503986163') 1490 | # print(room_info) 1491 | # print(DouyinAPI.get_live_production(auth_, "https://live.douyin.com/84255891276", room_id, author_id, '0')) 1492 | # print(DouyinAPI.collect_aweme(auth_, "7377676120549772554", '1')) 1493 | # print(DouyinAPI.move_collect_aweme(auth_, "7207861673711930656", "tt", "7379252593215919891")) 1494 | # print(DouyinAPI.remove_collect_aweme(auth_, "7376244589235113250", "tt", "7379252593215919891")) 1495 | # print(DouyinAPI.get_live_production_detail(auth_, "https://live.douyin.com/552370739330", "3622058069401408240", "MS4wLjABAAAATfhR-kvE-AWqZaNaomCLFqgDKzvBwMS87FUGVjS_u7Y", "7379220637308504843")) 1496 | # print(DouyinAPI.get_collect_list(auth_)) 1497 | # print(DouyinAPI.search_user(auth_, "巴旦木公主")) 1498 | # print(DouyinAPI.search_some_user(auth_, "巴旦木公主", 30)) 1499 | # print(DouyinAPI.search_live(auth_, "馨馨baby😐ᵇᵃᵇʸ")) 1500 | # print(DouyinAPI.get_user_favorite(auth_, "MS4wLjABAAAA99bTJ_GOw3odYmsXOe7i7xuEv0iQf2X_Kg_VUyVP0U8")) 1501 | # print(DouyinAPI.get_some_user_follower_list(auth_, "3074704605975950", "MS4wLjABAAAA0L4jpkJDeuFO9AM-dQK1B649tmr7GIw-sQtyPasP_Z45QnUjIQgUOLIs8Kw8Gp-u", 40)) 1502 | # print(DouyinAPI.get_some_user_following_list(auth_, "3074704605975950", "MS4wLjABAAAA0L4jpkJDeuFO9AM-dQK1B649tmr7GIw-sQtyPasP_Z45QnUjIQgUOLIs8Kw8Gp-u", 40)) 1503 | # print(DouyinAPI.search_some_video_work(auth_, "巴旦木公主", 32)) 1504 | # print(DouyinAPI.get_feed(auth_)) 1505 | # print(DouyinAPI.publish_comment(auth_, "7356193166732709139")) 1506 | # print(DouyinAPI.get_upload_auth_key(auth_)) 1507 | 1508 | # while True: 1509 | # print(DouyinAPI.sendMsgInRoom(auth_, room_id, "666")) 1510 | # time.sleep(3) 1511 | # # 1512 | # while True: 1513 | # print(DouyinAPI.diggLiveRoom(auth_, room_id, '10')) 1514 | # time.sleep(1) --------------------------------------------------------------------------------