├── douyin ├── __init__.py ├── douyin.db ├── sql.sql ├── dbutil.py ├── cons.py └── util.py ├── requirements.txt ├── lib ├── 0.0.1.crx ├── pics │ ├── 1.png │ ├── 2.png │ ├── 3.jpg │ └── 4.jpg ├── chromedriver.exe ├── video │ └── DouyinDownloadPreview.mp4 ├── 0.0.1 │ ├── js │ │ └── app.js │ ├── manifest.json │ └── xmlhttp.js └── 0.0.1.pem ├── .gitignore ├── favorite.md ├── .vscode ├── launch.json └── .ropeproject │ └── config.py ├── config.txt ├── readme.md └── Douyin.py /douyin/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests == 2.19.1 2 | selenium == 3.12.0 -------------------------------------------------------------------------------- /lib/0.0.1.crx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/lib/0.0.1.crx -------------------------------------------------------------------------------- /lib/pics/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/lib/pics/1.png -------------------------------------------------------------------------------- /lib/pics/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/lib/pics/2.png -------------------------------------------------------------------------------- /lib/pics/3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/lib/pics/3.jpg -------------------------------------------------------------------------------- /lib/pics/4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/lib/pics/4.jpg -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | douyin/__pycache__/ 2 | debug.log 3 | douyin/__pycache__/ 4 | douyin/__pycache__/ 5 | -------------------------------------------------------------------------------- /douyin/douyin.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/douyin/douyin.db -------------------------------------------------------------------------------- /lib/chromedriver.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/lib/chromedriver.exe -------------------------------------------------------------------------------- /favorite.md: -------------------------------------------------------------------------------- 1 | ## 收藏的小姐姐/小哥哥抖音id 2 | 欢迎补充... 3 | 4 | - 65413595875:知竹zZ 5 | - 58410350985:敏敏 6 | - 52554087366:山有木兮木有枝(我自己/捂脸逃) -------------------------------------------------------------------------------- /lib/video/DouyinDownloadPreview.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/scriptwang/douyin-downloader/HEAD/lib/video/DouyinDownloadPreview.mp4 -------------------------------------------------------------------------------- /lib/0.0.1/js/app.js: -------------------------------------------------------------------------------- 1 | var s = document.createElement("script"); 2 | s.src = chrome.extension.getURL("xmlhttp.js"); 3 | s.onload = function() { 4 | this.remove(); 5 | }; 6 | (document.head || document.documentElement).appendChild(s); -------------------------------------------------------------------------------- /lib/0.0.1/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "ScriptWang", 3 | "content_scripts": [ { 4 | "all_frames": true, 5 | "js": [ "js/app.js" ], 6 | "matches": [ "\u003Call_urls>" ], 7 | "run_at": "document_start" 8 | }], 9 | "web_accessible_resources": ["xmlhttp.js"], 10 | "manifest_version": 2, 11 | "name": "Hook-Ajax", 12 | "permissions": [ "*://*/*", "downloads", "history", "storage" ], 13 | "version": "0.0.1" 14 | } 15 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "douyin-downloader", 9 | "type": "python", 10 | "request": "launch", 11 | "stopOnEntry": false, 12 | "pythonPath": "${config:python.pythonPath}", 13 | "program": "${file}", 14 | "cwd": "${workspaceRoot}", 15 | "env": {}, 16 | "envFile": "${workspaceRoot}/.env", 17 | "debugOptions": [ 18 | "WaitOnAbnormalExit", 19 | "WaitOnNormalExit", 20 | "RedirectOutput" 21 | ] 22 | } 23 | ] 24 | } -------------------------------------------------------------------------------- /lib/0.0.1/xmlhttp.js: -------------------------------------------------------------------------------- 1 | //!important 一定要把数据绑定在window或别的浏览器本来就有的对象上面,直接定义变量在最后selenium拿不到 2 | window.finalRes = []; 3 | (function(send,open){ 4 | var self = this; 5 | self.tmp = {}; 6 | XMLHttpRequest.prototype.open = function(method, url, async, user, pass) { 7 | if (url.indexOf('aweme/v1/aweme/post') != -1) self.tmp.url = url;/*发请求之前拦截*/ 8 | open.call(this, method, url, async, user, pass); 9 | }; 10 | XMLHttpRequest.prototype.send = function (data) { 11 | this.addEventListener("readystatechange", function(){ 12 | if( this.readyState == 4 /* complete */) {/*发请求之后拦截*/ 13 | var j = eval('(' + this.responseText + ')'); 14 | //有可能后发出去的请求会先返回结果,所以对返回数据进行判断 15 | if ('has_more' in j && 'status_code' in j){ 16 | self.tmp.res = j; 17 | window.finalRes.push(self.tmp); 18 | self.tmp = {}; 19 | } 20 | } 21 | }, false); 22 | send.call(this, data); 23 | } 24 | })(XMLHttpRequest.prototype.send,XMLHttpRequest.prototype.open); -------------------------------------------------------------------------------- /config.txt: -------------------------------------------------------------------------------- 1 | [base_config] 2 | 3 | #=====================[常改动]===================== 4 | #Douyin用户id 5 | user_id = 65413595875 6 | 7 | #是否下载发表的视频,只能为True(下载)或者False(不下载) 8 | down_post_video = True 9 | 10 | #是否下载喜欢的视频,只能为True(下载)或者False(不下载) 11 | down_like_video = True 12 | 13 | #[常改动] 是否启用debug模式,只能是True或者False,默认为False 14 | debug = False 15 | 16 | #[常改动] 是否用无头模式启动浏览器,只能是True或者False,默认为True 17 | headless = True 18 | 19 | 20 | #=====================[不常改动]===================== 21 | 22 | #[不常改动] 下载路径,默认为defalut,即当前程序所在路径,可以是其他,比如D:/douyin/media,注意:只能是defalut或者其他路径 23 | download_path = defalut 24 | #download_path = D:/Media/抖音/ 25 | 26 | #[不常改动] 请求等待时间因子 27 | timeout = 2 28 | 29 | #[不常改动] 单次请求喜欢视频的大小,默认21 30 | single_like_requests_value = 21 31 | 32 | #[不常改动] 请求发表视频最小等待时间(单位/秒) 33 | min_post_wait_time = 1 34 | 35 | #[不常改动] 请求发表视频最大等待时间(单位/秒) 36 | max_post_wait_time = 2 37 | 38 | #[不常改动] 请求喜欢视频最小等待时间(单位/秒) 39 | min_like_wait_time = 0.4 40 | 41 | #[不常改动] 请求喜欢视频最大等待时间(单位/秒) 42 | max_like_wait_time = 3.4 43 | 44 | #[不常改动] 请求下载视频最小等待时间(单位/秒) 45 | min_down_wait_time = 0.4 46 | 47 | #[不常改动] 请求下载视频最大等待时间(单位/秒) 48 | max_down_wait_time = 2.4 -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | 2 | # 抖音下载小助手 3 | - 输入抖音id,下载用户发表/喜欢的视频 4 | - 所需环境:Python 3.5+ Chrome 67+ 5 | - 所需依赖: 6 | - requests 2.19.1 7 | - selenium 3.12.0 8 | 9 | # 更新 2018-07-14 10 | - 新增数据库功能,能自动记录上一次下载的位置,接着下载 11 | - 修复编码问题 12 | - 获取喜欢视频请求方式修复 13 | - 新增对抖音用户的收藏,详情见favorite.md 14 | 15 | 16 | ## Setup 17 | ``` 18 | # 安装Python3.5;安装Chrome67 19 | # Python3.5安装包:https://www.python.org/downloads/windows/ 20 | # Chrome67(需翻墙):https://www.google.com/chrome/index.html 21 | 22 | # 切换到项目所在目录 23 | cd ../你的目录/douyin-downloader 24 | 25 | # 安装依赖 26 | pip install -r requirements.txt 27 | 28 | # 配置抖音id(两种方式):1:修改config.txt下面的user_id; 2:命令行敲入id. 29 | # 注:命令行敲入id会覆盖掉配置文件里的id 30 | 31 | # 第一种方式:修改config.txt后执行 32 | python Douyin.py 33 | 34 | # 第二种方式:比如1234是一个抖音id 35 | python Douyin.py 1234 36 | 37 | #Enjoy it :) 38 | 39 | ``` 40 | 41 | ## 获取抖音id的方法 42 | 进入用户主页,以链接形式分享用户到微信文件助手,链接里面的数字就是抖音id 43 | - 进入用户首页 44 | ![17](./lib/pics/1.png) 45 | 46 | - 分享用户 47 | ![17](./lib/pics/2.png) 48 | 49 | - 以链接形式分享 50 | ![17](./lib/pics/3.jpg) 51 | 52 | - 分享到微信文件助手,打开链接,里面user后面问号前面的数字就是抖音id 53 | ![17](./lib/pics/4.jpg) 54 | -------------------------------------------------------------------------------- /douyin/sql.sql: -------------------------------------------------------------------------------- 1 | -- 建表语句 2 | create table douyin( 3 | rid INTEGER PRIMARY KEY AUTOINCREMENT, 4 | aweme_id VARCHAR(255) , -- '抖音视频id,唯一' 5 | douyin_id VARCHAR(255) , -- '抖音作者id' 6 | douyin_name VARCHAR(512) , -- '抖音作者名字' 7 | video_url VARCHAR(255), -- '视频下载的地址' 8 | video_type VARCHAR(16) , -- '视频类型post or like' 9 | video_name VARCHAR(255), -- '视频名字' 10 | video_count INT, -- '视频计数' 11 | digg_count VARCHAR(255) , -- '点赞的数量' 12 | download_path VARCHAR(255) , -- '视频文件的路径' 13 | download_time DATETIME -- '下载视频的时间' 14 | ); 15 | 16 | 17 | --常用语句 18 | select DATETIME('now'); 19 | 20 | drop table douyin; 21 | select aweme_id from douyin; 22 | SELECT * from douyin; 23 | delete from douyin; 24 | 25 | select 'post' as nm,case when max(video_count) is null then 1 else max(video_count) + 1 end as cnt, 26 | case when max(download_time) is null then '还没下载过此小姐姐/小哥哥发表的视频呢><' else max(download_time) end as time 27 | from douyin where douyin_id = '65413595875' AND video_type = 'post' 28 | UNION ALL 29 | select 'like' as nm,case when max(video_count) is null then 1 else max(video_count) + 1 end as cnt, 30 | case when max(download_time) is null then '还没下载过此小姐姐/小哥哥喜欢的视频呢><' else max(download_time) end as time 31 | from douyin where douyin_id = '65413595875' AND video_type = 'like'; 32 | 33 | select aweme_id,video_type from douyin where douyin_id = '65413595875'; 34 | 35 | 36 | -------------------------------------------------------------------------------- /Douyin.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | from douyin import util 4 | from douyin import dbutil as db 5 | from os import path 6 | import sys 7 | 8 | global _browser_ 9 | global _config_ 10 | global _result_ 11 | global _base_path_ 12 | global _c_ # 数据库连接 13 | global _d_pool_ # 已经下载了的视频的id池 14 | global _debug_ 15 | 16 | 17 | 18 | def __run__(): 19 | util._get_basic_info(_browser_,_config_,_result_) 20 | _p_len = 0 21 | _l_len = 0 22 | if _config_['dpv']: _p_len = util._get_post_request_data(_browser_,_config_,_result_) 23 | if _config_['dlv']: _l_len = util._get_like_request_data(_browser_,_config_,_result_) 24 | _browser_.quit() 25 | print('浏览器退出成功...') 26 | print('数据采集完毕,发表的视频[' + str(_p_len) + ']条,喜欢的视频[' + str(_l_len) + ']条!') 27 | return util._download_video(_config_,_result_,_d_pool_,_c_) 28 | 29 | 30 | if __name__ == '__main__': 31 | print('>>> 脚本初始化中... ...') 32 | _base_path_ = path.dirname(path.realpath(__file__)) 33 | _config_ = util._read_config(_base_path_ + '/config.txt',_base_path_,sys.argv) 34 | print('读取配置文件完毕...') 35 | _debug_ = util._debug_ = db._debug_ = _config_['debug'] 36 | _c_ = db.get_conn(_base_path_+'/douyin/douyin.db') 37 | print('数据库连接创建完毕!') 38 | _d_pool_ = db.ini_d_pool(_c_,_config_) 39 | print('初始化已下载池完毕!') 40 | _browser_ = util._init_browser({ 41 | 'driver_path':path.dirname(path.realpath(__file__))+'/lib/chromedriver.exe' 42 | },_config_['headless']) 43 | print('初始化浏览器成功...') 44 | _result_ = {} 45 | print('>>> Donyin下载任务开始... ...') 46 | r = __run__() 47 | print('Donyin下载任务结束... ...') 48 | print('视频下载完毕,发表的视频[' + str(r['l_len'] - 1) + ']条,喜欢的视频[' + str(r['p_len'] - 1) + ']条!') 49 | 50 | -------------------------------------------------------------------------------- /lib/0.0.1.pem: -------------------------------------------------------------------------------- 1 | -----BEGIN PRIVATE KEY----- 2 | MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQCRqHqd2Jdeoy+4 3 | gXA+Zne+J16boX9QzlG0gn+mSrASBeqdVexM1S4RQjOEOwc3DwscqDb0h5OfBRGl 4 | jit0ACQ08NNXqEGBGgQuOwadj2EOf9nRqeBbVAPoAFvuM4yy+Yu0GMc6RCsV7kBn 5 | fIYFRP5r2YCj+SwAcvDimmdD9iQ4ceBpOgP/x6xyzUg3VR+OQubuOY3+HagGRM7P 6 | 903mKEGHT2Cow4ne5InQm9XFI9M4q+rn/tG+ouzQDOA7FkuctWBeXvDNCfAJkzMi 7 | PkoSZM5uqlk3eWgPmyj14QXP6mcw//vnTT4057n1kKR++l48FWzu7gmI5aBFjGsE 8 | br5UaLVRAgMBAAECggEAAIfje9bHD8Zld/UqImK/i0xbw70Ywy51TW9E+tH+hppJ 9 | 6sKI5sDHdwhNkBeiT5jjmqT29PjXg2FX2UlwLsSKB+gGxSTXqZpIK0j2HbXg4yt5 10 | G5KpvtDKu4Tg0OM+3y9vSxE6DKQkn2MBDhTTQjVS/glKtuOYvZ8C8g3LqYuTQeIQ 11 | SGR1NiTDovxd2DpPiTgjVLEdCAfs1lKvd8trGiAawh10SGum95JwAK49gNRtgAvN 12 | 2HgjGP12Q5dYmyjuG5s8cY5BjYgMsZjB5nVg4A54l5m2CgFYq77b0gs9zZDK080R 13 | Mb0ZYJrjiHl5Gzn2+E/IHP7RjCfn7DrYYqdRLE+gaQKBgQDFtGJF1HRiXzptNKTM 14 | 3hUoIFWgqqNu1u16zpeZeE8mUyMNbvcwsyY9u1hEFNLcRTGrPykMhR5jzbE8Ive6 15 | HBpNoWK4/DXUznZ+sEjBnjoFkIfLGuH4sWIA53J3v2Nl4WRFaVZclmPkQWnfcTBv 16 | aQbx+Vo1+3kxEDfL+eZVjKJT1QKBgQC8m2VMQ0o8OzrJOTAhK8tB7y2ltn55O0lC 17 | srZ1f6t/XhsbFiN4MY/yoiJSjTJrdNk51BYC5mIkMh35qMi1unnKnBQ4CyLTnZk5 18 | e9fgmJjZlXVMI1MA3b62irQIpOFWEV+xVz2t80Po9yMcMrE/dHYs84G8to/mk7nK 19 | RldJ6nfljQKBgBj/C4XErMpbO+nexA9a7NXb8LeVm4BCEFh8klJHEcrIrB/grcQs 20 | gmM/spM5Bsfuv67lF22LzyHTOdtemG+hafnys6arzKqRSA4Qf8JrHO7jLSgSyAtH 21 | chqnKKPXQNv6uVDFVrPyF8MeOiKUDwmm/8fMPQmiN9pqMIPLnsGmPEfZAoGBAJWB 22 | YOT5GilwkgLcMEi7HF3hqH+Jj+utkT8rxvKRB3LpN/darzchooek+iwR/PutHlx2 23 | wBQAZ59lktlKb/+OF+kBnA1o1myEquckQlWo4NDpZpe6EJvJKtosTNXlTTIah45e 24 | J2hyQ0yySsSZcmNogBzV0euzk5IlW8r5w0u2buApAoGAbrwY/Huf9U6t793/57df 25 | UJ3iVVSVvIe5nJ7LNkECCuIu93pN0C2M7Q7qokFmIMHpAvDEu1SLdLC9WgffVpY0 26 | 4jXpES6DLK7ZLA4CoTPx6bdQroxXpoYKCDiCbaLo361FBDuiS43CPgkfKx/OQlbk 27 | XlzUz1qAFmEmn5DqaAVBVRc= 28 | -----END PRIVATE KEY----- 29 | -------------------------------------------------------------------------------- /douyin/dbutil.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | import sqlite3 as db 3 | import os 4 | 5 | global _debug_ 6 | 7 | 8 | def get_conn(db_file,check_same_thread=False): 9 | # 如果文件夹不存在则首先创建文件夹 10 | if not os.path.exists(os.path.dirname(db_file)): 11 | os.makedirs(os.path.dirname(db_file)) #先创建文件夹 12 | return db.connect(db_file,check_same_thread=check_same_thread) 13 | 14 | def exe_qry(conn,sql): 15 | if _debug_ : print(' ------->> exe_qry:' + sql) 16 | cursor = conn.execute(sql) 17 | rs = cursor.fetchall() 18 | return rs 19 | 20 | #insert update delete 21 | def exe_dml(conn,sql): 22 | if _debug_ : print(' ------->> exe_dml:' + sql) 23 | conn.execute(sql) 24 | conn.commit() 25 | 26 | 27 | def ini_d_pool(conn,_config_): 28 | r = {} 29 | rs = exe_qry(conn,"select aweme_id,video_type from douyin where douyin_id = '%s'" % (_config_['user_id'])) 30 | for i in rs:r[i[0]] = i[1] 31 | sql = ''' 32 | select 'post' as nm,case when max(video_count) is null then 1 else max(video_count) + 1 end as cnt, 33 | case when max(download_time) is null then '还没下载过此小姐姐/小哥哥发表的视频呢><' else max(download_time) end as time 34 | from douyin where douyin_id = '%s' AND video_type = 'post' 35 | union all 36 | select 'like' as nm,case when max(video_count) is null then 1 else max(video_count) + 1 end as cnt, 37 | case when max(download_time) is null then '还没下载过此小姐姐/小哥哥喜欢的视频呢><' else max(download_time) end as time 38 | from douyin where douyin_id = '%s' AND video_type = 'like'; 39 | ''' % (_config_['user_id'],_config_['user_id']) 40 | rs = exe_qry(conn,sql) 41 | for i in rs: 42 | print ('记录上一次下载计数/时间:' + str(i[0]) + ' 最后下载计数:' + str(i[1] - 1) + ' 最后下载时间:' + str(i[2])) 43 | r[i[0]] = i[1] 44 | return r 45 | -------------------------------------------------------------------------------- /douyin/cons.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | # js:注册函数 4 | rigister_function = ''' 5 | //拿到请求后记录的请求 6 | window.getRequestsUrl = function(){ 7 | var r = []; 8 | var n = window.performance.getEntries(); 9 | for(i in n){ 10 | if (n[i].initiatorType == 'xmlhttprequest' && n[i].name.indexOf('aweme/v1/aweme/post') != -1){ 11 | r.push(n[i].name) 12 | } 13 | } 14 | return r; 15 | } 16 | 17 | //Xpath 寻找元素 18 | window.getElementByXpath = function (STR_XPATH) { 19 | var xresult = document.evaluate(STR_XPATH, document, null, XPathResult.ANY_TYPE, null); 20 | var xnodes = []; 21 | var xres; 22 | while (xres = xresult.iterateNext()) { 23 | xnodes.push(xres); 24 | } 25 | return xnodes; 26 | } 27 | 28 | //Hook Ajax 注入ajax对象代码 29 | //!important 一定要把数据绑定在window或别的浏览器本来就有的对象上面,直接定义变量在最后selenium拿不到 30 | window.finalRes = []; 31 | (function(send,open){ 32 | var self = this; 33 | self.tmp = {}; 34 | XMLHttpRequest.prototype.open = function(method, url, async, user, pass) { 35 | if (url.indexOf('aweme/v1/aweme/') != -1) self.tmp.url = url; 36 | open.call(this, method, url, async, user, pass); 37 | }; 38 | XMLHttpRequest.prototype.send = function (data) { 39 | this.addEventListener("readystatechange", function(){ 40 | if( this.readyState == 4 /* complete */) {/*发请求之后拦截*/ 41 | var j = eval('(' + this.responseText + ')'); 42 | //有可能后发出去的请求会先返回结果,所以对返回数据进行判断 43 | if ('has_more' in j && 'status_code' in j){ 44 | self.tmp.res = j; 45 | window.finalRes.push(self.tmp); 46 | self.tmp = {}; 47 | } 48 | } 49 | }, false); 50 | send.call(this, data); 51 | } 52 | })(XMLHttpRequest.prototype.send,XMLHttpRequest.prototype.open); 53 | 54 | //计算请求到数据的长度 55 | window.resCnt = function(){ 56 | var cnt = 0; 57 | for (i in window.finalRes){ 58 | cnt = cnt + window.finalRes[i].res.aweme_list.length 59 | } 60 | return cnt; 61 | } 62 | ''' 63 | 64 | # js:页面滚动到喜欢/作品按钮的位置 65 | show_like = ''' 66 | //找到喜欢按钮元素 67 | likeBtn = window.getElementByXpath('//body/div/div[1]/div[3]/div/div[2]')[0] 68 | //向下滚动页面直至喜欢按钮出现在页面上,为后面点击喜欢按钮做准备 69 | var is_show = false; 70 | var clientHeight = window.document.documentElement.clientHeight; 71 | var p = 0; 72 | while (!is_show){ 73 | window.scrollTo(0,p); 74 | p += 10;//每次步进10 75 | if (likeBtn.getBoundingClientRect().top - clientHeight < -(likeBtn.offsetHeight + 100)) { 76 | is_show = true 77 | } 78 | } 79 | ''' 80 | 81 | # js屏幕向下滚动 82 | scroll_down = ''' 83 | //document文档的总高度 84 | var docHeight = document.documentElement.scrollHeight; 85 | //文档上部超出浏览器的高度 86 | var scrollTop = (document.documentElement && document.documentElement.scrollTop) || document.body.scrollTop; 87 | //浏览器窗口的高度 88 | var clientHeight = window.document.documentElement.clientHeight; 89 | //判断是否到了文档底部 90 | return scrollTop + clientHeight == docHeight; 91 | ''' 92 | 93 | # 最后请求第一个ajax请求,并放在数组第一位 94 | final_ajax = ''' 95 | requests = window.getRequestsUrl(); 96 | var ajax = new XMLHttpRequest(); 97 | console.log(requests[0]) 98 | ajax.open('get',requests[0],false); 99 | ajax.send(); 100 | ajax.onreadystatechange = function () { 101 | if (ajax.readyState==4 &&ajax.status==200) { 102 | //步骤五 如果能够进到这个判断 说明 数据 完美的回来了,并且请求的页面是存在的 103 | var r = {}; 104 | r.url = requests[0]; 105 | r.res = eval('(' + ajax.responseText + ')'); 106 | window.finalRes.push(r) 107 | //?? 108 | //var tmp = [r]; 109 | //window.finalRes = tmp.concat(window.finalRes); 110 |   } 111 | } 112 | ''' -------------------------------------------------------------------------------- /.vscode/.ropeproject/config.py: -------------------------------------------------------------------------------- 1 | # The default ``config.py`` 2 | # flake8: noqa 3 | 4 | 5 | def set_prefs(prefs): 6 | """This function is called before opening the project""" 7 | 8 | # Specify which files and folders to ignore in the project. 9 | # Changes to ignored resources are not added to the history and 10 | # VCSs. Also they are not returned in `Project.get_files()`. 11 | # Note that ``?`` and ``*`` match all characters but slashes. 12 | # '*.pyc': matches 'test.pyc' and 'pkg/test.pyc' 13 | # 'mod*.pyc': matches 'test/mod1.pyc' but not 'mod/1.pyc' 14 | # '.svn': matches 'pkg/.svn' and all of its children 15 | # 'build/*.o': matches 'build/lib.o' but not 'build/sub/lib.o' 16 | # 'build//*.o': matches 'build/lib.o' and 'build/sub/lib.o' 17 | prefs['ignored_resources'] = ['*.pyc', '*~', '.ropeproject', 18 | '.hg', '.svn', '_svn', '.git', '.tox'] 19 | 20 | # Specifies which files should be considered python files. It is 21 | # useful when you have scripts inside your project. Only files 22 | # ending with ``.py`` are considered to be python files by 23 | # default. 24 | #prefs['python_files'] = ['*.py'] 25 | 26 | # Custom source folders: By default rope searches the project 27 | # for finding source folders (folders that should be searched 28 | # for finding modules). You can add paths to that list. Note 29 | # that rope guesses project source folders correctly most of the 30 | # time; use this if you have any problems. 31 | # The folders should be relative to project root and use '/' for 32 | # separating folders regardless of the platform rope is running on. 33 | # 'src/my_source_folder' for instance. 34 | #prefs.add('source_folders', 'src') 35 | 36 | # You can extend python path for looking up modules 37 | #prefs.add('python_path', '~/python/') 38 | 39 | # Should rope save object information or not. 40 | prefs['save_objectdb'] = True 41 | prefs['compress_objectdb'] = False 42 | 43 | # If `True`, rope analyzes each module when it is being saved. 44 | prefs['automatic_soa'] = True 45 | # The depth of calls to follow in static object analysis 46 | prefs['soa_followed_calls'] = 0 47 | 48 | # If `False` when running modules or unit tests "dynamic object 49 | # analysis" is turned off. This makes them much faster. 50 | prefs['perform_doa'] = True 51 | 52 | # Rope can check the validity of its object DB when running. 53 | prefs['validate_objectdb'] = True 54 | 55 | # How many undos to hold? 56 | prefs['max_history_items'] = 32 57 | 58 | # Shows whether to save history across sessions. 59 | prefs['save_history'] = True 60 | prefs['compress_history'] = False 61 | 62 | # Set the number spaces used for indenting. According to 63 | # :PEP:`8`, it is best to use 4 spaces. Since most of rope's 64 | # unit-tests use 4 spaces it is more reliable, too. 65 | prefs['indent_size'] = 4 66 | 67 | # Builtin and c-extension modules that are allowed to be imported 68 | # and inspected by rope. 69 | prefs['extension_modules'] = [] 70 | 71 | # Add all standard c-extensions to extension_modules list. 72 | prefs['import_dynload_stdmods'] = True 73 | 74 | # If `True` modules with syntax errors are considered to be empty. 75 | # The default value is `False`; When `False` syntax errors raise 76 | # `rope.base.exceptions.ModuleSyntaxError` exception. 77 | prefs['ignore_syntax_errors'] = False 78 | 79 | # If `True`, rope ignores unresolvable imports. Otherwise, they 80 | # appear in the importing namespace. 81 | prefs['ignore_bad_imports'] = False 82 | 83 | # If `True`, rope will insert new module imports as 84 | # `from import ` by default. 85 | prefs['prefer_module_from_imports'] = False 86 | 87 | # If `True`, rope will transform a comma list of imports into 88 | # multiple separate import statements when organizing 89 | # imports. 90 | prefs['split_imports'] = False 91 | 92 | # If `True`, rope will remove all top-level import statements and 93 | # reinsert them at the top of the module when making changes. 94 | prefs['pull_imports_to_top'] = True 95 | 96 | # If `True`, rope will sort imports alphabetically by module name instead of 97 | # alphabetically by import statement, with from imports after normal 98 | # imports. 99 | prefs['sort_imports_alphabetically'] = False 100 | 101 | # Location of implementation of rope.base.oi.type_hinting.interfaces.ITypeHintingFactory 102 | # In general case, you don't have to change this value, unless you're an rope expert. 103 | # Change this value to inject you own implementations of interfaces 104 | # listed in module rope.base.oi.type_hinting.providers.interfaces 105 | # For example, you can add you own providers for Django Models, or disable the search 106 | # type-hinting in a class hierarchy, etc. 107 | prefs['type_hinting_factory'] = 'rope.base.oi.type_hinting.factory.default_type_hinting_factory' 108 | 109 | 110 | def project_opened(project): 111 | """This function is called after opening the project""" 112 | # Do whatever you like here! 113 | -------------------------------------------------------------------------------- /douyin/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import time 4 | import requests 5 | import re 6 | import random 7 | import math 8 | import datetime 9 | import os 10 | import configparser 11 | import logging 12 | from os import path 13 | from selenium import webdriver 14 | from selenium.webdriver.common.desired_capabilities import DesiredCapabilities 15 | from selenium.webdriver.remote.remote_connection import LOGGER 16 | from selenium.webdriver.chrome.options import Options 17 | from douyin.cons import * 18 | from douyin.dbutil import * 19 | 20 | global _debug_ 21 | 22 | def _read_config(config_path,_base_path_,args): 23 | def _get_user_id(user_id,args): 24 | if len(args) >= 2: 25 | print('从命令行读取抖音id[' + str(args[1]) + ']...') 26 | return args[1] 27 | else: 28 | print('从配置文件读取抖音id[' + str(user_id) + ']...') 29 | return user_id 30 | config = configparser.ConfigParser() 31 | with open(config_path,'r',encoding='utf-8') as cfgfile: 32 | config.readfp(cfgfile) 33 | user_id = _get_user_id(str(config.get("base_config","user_id")),args) 34 | download_path = str(config.get("base_config","download_path")) 35 | if download_path == 'defalut':download_path = _base_path_ 36 | timeout = float(config.get("base_config","timeout")) 37 | headless = config.get("base_config","headless") 38 | if headless == 'True':headless = True 39 | else:headless = False 40 | dpv = config.get("base_config","down_post_video") 41 | if dpv == 'True':dpv = True 42 | else:dpv = False 43 | dlv = config.get("base_config","down_like_video") 44 | if dlv == 'True':dlv = True 45 | else:dlv = False 46 | debug = config.get("base_config","debug") 47 | if debug == 'True':debug = True 48 | else:debug = False 49 | slrv = config.get("base_config","single_like_requests_value") 50 | mipt = config.get("base_config","min_post_wait_time") 51 | mapt = config.get("base_config","max_post_wait_time") 52 | milt = config.get("base_config","min_like_wait_time") 53 | malt = config.get("base_config","max_like_wait_time") 54 | midt = config.get("base_config","min_down_wait_time") 55 | madt = config.get("base_config","max_down_wait_time") 56 | return {'user_id':user_id,'download_path':download_path,'timeout':timeout,'headless':headless, 57 | 'debug':debug,'dpv':dpv,'dlv':dlv,'slrv':slrv,'mipt':mipt,'mapt':mapt,'milt':milt,'malt':malt, 58 | 'midt':midt,'madt':madt} 59 | 60 | def _init_browser(args,headless = True): 61 | chrome_options = Options() 62 | if headless: 63 | chrome_options.add_argument('--headless') 64 | chrome_options.add_argument('--disable-gpu') 65 | if 'extension_path' in args: 66 | chrome_options.add_extension(args['extension_path']) 67 | LOGGER.setLevel(logging.ERROR) 68 | return webdriver.Chrome(executable_path=args['driver_path'],chrome_options=chrome_options) 69 | 70 | 71 | # 拿到基本信息 72 | def _get_basic_info(browser,_config_,_result_): 73 | share_link = 'https://www.douyin.com/share/user/'+_config_['user_id']+'?share_type=link' 74 | browser.get(share_link) 75 | time.sleep(float(_config_['timeout'])/2) 76 | title = browser.find_element_by_xpath('//body/div/div[1]/div[2]/div/p').text 77 | desc = None 78 | try: 79 | desc = browser.find_element_by_xpath('//body/div/div[1]/div[2]/div[2]/div/span').text 80 | except Exception as e:pass 81 | _result_['title'] = title 82 | print('获取小姐姐/小哥哥"'+title+'('+_config_['user_id']+')"基本信息成功!') 83 | if desc is not None: 84 | _result_['desc'] = title 85 | print(title + ':' + desc) 86 | 87 | 88 | 89 | # 拿到发表的视频,因为抖音有请求限制,所以用hook ajax请求的方式拿到数据 90 | def _get_post_request_data(browser,_config_,_result_): 91 | st = datetime.datetime.now() 92 | print('>>> 请求发表视频数据中(可能会比较慢).....') 93 | print('最小请求等待时间为:' + str(_config_['mipt']) +'s 最大请求等待时间为:' + str(_config_['mapt']) + 's') 94 | share_link = 'https://www.douyin.com/share/user/'+_config_['user_id']+'?share_type=link' 95 | if _debug_ : print('请求连接:' + share_link) 96 | browser.get(share_link) 97 | time.sleep(float(_config_['timeout'])/2) 98 | browser.execute_script(rigister_function) 99 | browser.execute_script(show_like) 100 | post_btn = browser.find_element_by_xpath('//body/div/div[1]/div[3]/div/div[1]') 101 | post_btn.click() 102 | time.sleep(float(_config_['timeout'])/1.2) 103 | # 滚动请求直到文档的底部 104 | is_bottom = False 105 | _len = 0 106 | while (not is_bottom): 107 | browser.execute_script('window.scrollTo(0,document.body.scrollHeight);') 108 | t = random.randint(15,20)/10 109 | if t < float(_config_['mipt']):t = float(_config_['mipt']) 110 | if t > float(_config_['mapt']):t = float(_config_['mapt']) 111 | time.sleep(t) 112 | _len = int(browser.execute_script(' return window.resCnt()')) 113 | print(str(_len) + '条发表视频数据已添加!(随机等待请求时间:' + str(t) + 's)' ) 114 | is_bottom = browser.execute_script(scroll_down) 115 | browser.execute_script(final_ajax) 116 | time.sleep(float(_config_['timeout'])/2) 117 | _result_['post'] = browser.execute_script(' return window.finalRes') 118 | _len = int(browser.execute_script(' return window.resCnt()')) 119 | print(str(_len) + '条发表视频数据已添加!(随机等待请求时间:' + str(t) + 's)' ) 120 | et = datetime.datetime.now() 121 | print('请求用时:'+ str((et - st).seconds) + 's') 122 | return _len 123 | 124 | # 拿到喜欢的视频,因为抖音有请求限制,所以用hook ajax请求的方式拿到数据 125 | def _get_like_request_data(browser,_config_,_result_): 126 | st = datetime.datetime.now() 127 | print('>>> 请求喜欢视频数据中(可能会比较慢).....') 128 | print('最小请求等待时间为:' + str(_config_['milt']) +'s 最大请求等待时间为:' + str(_config_['malt']) + 's') 129 | share_link = 'https://www.douyin.com/share/user/'+_config_['user_id']+'?share_type=link' 130 | if _debug_ : print('请求连接:' + share_link) 131 | browser.get(share_link) 132 | time.sleep(float(_config_['timeout'])/2) 133 | browser.execute_script(rigister_function) 134 | browser.execute_script(show_like) 135 | post_btn = browser.find_element_by_xpath('//body/div/div[1]/div[3]/div/div[2]') 136 | post_btn.click() 137 | time.sleep(float(_config_['timeout'])/1.2) 138 | # 滚动请求直到文档的底部 139 | is_bottom = False 140 | _len = 0 141 | while (not is_bottom): 142 | browser.execute_script('window.scrollTo(0,document.body.scrollHeight);') 143 | t = random.randint(15,20)/10 144 | if t < float(_config_['mipt']):t = float(_config_['mipt']) 145 | if t > float(_config_['mapt']):t = float(_config_['mapt']) 146 | time.sleep(t) 147 | _len = int(browser.execute_script(' return window.resCnt()')) 148 | print(str(_len) + '条喜欢视频数据已添加!(随机等待请求时间:' + str(t) + 's)' ) 149 | is_bottom = browser.execute_script(scroll_down) 150 | browser.execute_script(final_ajax) 151 | time.sleep(float(_config_['timeout'])/2) 152 | _result_['like'] = browser.execute_script(' return window.finalRes') 153 | _len = int(browser.execute_script(' return window.resCnt()')) 154 | print(str(_len) + '条喜欢视频数据已添加!(随机等待请求时间:' + str(t) + 's)' ) 155 | et = datetime.datetime.now() 156 | print('请求用时:'+ str((et - st).seconds) + 's') 157 | return _len 158 | 159 | 160 | # 拿到发表的视频,因为抖音有请求限制,所以用hook ajax请求的方式拿到数据 161 | def _get_post_request_data_abandon(browser,user_id,timeout,_result_): 162 | share_link = 'https://www.douyin.com/share/user/'+user_id+'?share_type=link' 163 | browser.get(share_link) 164 | time.sleep(timeout/2) 165 | #browser.execute_script(show_like) 166 | #post_btn = browser.find_element_by_xpath('//body/div/div[1]/div[3]/div/div[1]') 167 | #post_btn.click() 168 | # browser.execute_script(hook_ajax) 169 | # 滚动请求直到文档的底部 170 | is_bottom = False 171 | while (not is_bottom): 172 | browser.execute_script('window.scrollTo(0,document.body.scrollHeight);') 173 | time.sleep(random.randint(7,12)/10) 174 | is_bottom = browser.execute_script(scroll_down) 175 | # 循环获取完成后拿到请求的api 176 | # r = browser.execute_script(get_requests_urls) 177 | _r = browser.execute_script("return window.finalRes") 178 | _result_['post'] = _r 179 | 180 | 181 | # 拿到喜欢的视频 182 | def _get_like_request_data_abandon(_config_,_result_): 183 | def get_api(user_id,count,max_cursor): 184 | return 'https://www.douyin.com/aweme/v1/aweme/favorite/?user_id='+user_id+'&count='+count+'&max_cursor='+max_cursor 185 | def _replace(r): 186 | return r.replace('false','False') 187 | st = datetime.datetime.now() 188 | res = [] 189 | tmp_res = {} 190 | max_cursor = '0' 191 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36'} 192 | # 第一次请求初始化一些数据 193 | print('>>> 请求喜欢的视频中.....') 194 | print('最小请求等待时间为:' + str(_config_['milt']) +'s 最大请求等待时间为:' + str(_config_['malt']) + 's') 195 | _r = requests.get(get_api(_config_['user_id'],_config_['slrv'],max_cursor),headers = headers) 196 | tmp_res['url'] = get_api(_config_['user_id'],_config_['slrv'],max_cursor) 197 | r = eval(_replace(_r.text)) 198 | tmp_res['res'] = r 199 | res.append(tmp_res) 200 | tmp_res = {} 201 | cnt = len(r['aweme_list']) 202 | time.sleep(float(_config_['timeout'])/(random.randint(4,10))) 203 | # 循环请求 204 | while r['has_more'] == 1: 205 | t = float(_config_['timeout'])/(abs(math.sin(cnt)) * 10) 206 | if t < float(_config_['milt']):t = float(_config_['milt']) 207 | if t > float(_config_['malt']):t = float(_config_['malt']) 208 | time.sleep(t) 209 | max_cursor = str(r['max_cursor']) 210 | api = get_api(_config_['user_id'],_config_['slrv'],max_cursor) 211 | tmp_res['url'] = api 212 | _r = requests.get(api,headers = headers) 213 | r = eval(_replace(_r.text)) 214 | tmp_res['res'] = r 215 | res.append(tmp_res) 216 | tmp_res = {} 217 | cnt = cnt + len(r['aweme_list']) 218 | print(str(cnt) + '条喜欢视频数据已添加!(随机等待时间' + str(round(t,2)) + 's)' ) 219 | print('喜欢视频数据添加完毕,一共' + str(cnt) + '条!') 220 | _result_['like'] = res 221 | et = datetime.datetime.now() 222 | print('请求用时:'+ str((et - st).seconds) + 's') 223 | return cnt 224 | 225 | 226 | 227 | 228 | def _download_video(_config_,_result_,_d_pool_,_c_): 229 | def replace_filename(nm,n): 230 | str = '?*:"<>\/|\\' 231 | for i in str:nm = nm.replace(i,n) 232 | nm = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]').sub(n,nm) 233 | return nm 234 | def _download(flag,base_path): #flag is post or like 235 | def _sub_sownload(j,cnt,flag): 236 | def show_tip(cnt,flag,t,video_name,aweme_id,type): 237 | try: 238 | if type == 0: 239 | v = exe_qry(_c_,"select video_name,download_time from douyin where aweme_id = '%s' and douyin_id= '%s' " %(aweme_id,_config_['user_id'])) 240 | print('视频[' + str(v[0][0])+ ']已经下载过!下载时间:' + str(v[0][1])) 241 | elif type == 1:print('第' + str(cnt) + '个' + flag + '视频已经下载!随机等待('+str(t)+'s) 文件为[' + replace_filename(video_name,'_').encode('utf-8').decode('utf-8') + ']') 242 | except Exception as x: 243 | # 一些标题含有特殊字符时候video_name会解码失败 244 | if type == 0:print('视频[' + str(x) + ']已经下载过!下载时间:' + str(exe_qry(_c_,"select download_time from douyin where aweme_id = '" + str(aweme_id) + "'"))) 245 | elif type == 1:print('第' + str(cnt) + '个' + flag + '视频已经下载!随机等待('+str(t)+'s) 文件为['+ str(x) +']') 246 | def down_insert(j,cnt,video_name): 247 | exe_dml(_c_,"insert into douyin values (%s,'%s','%s','%s','%s','%s','%s',%s,'%s','%s',%s)" %( 248 | 'NULL', 249 | j['statistics']['aweme_id'], 250 | _config_['user_id'], 251 | _result_['title'], 252 | j['video']['play_addr']['url_list'][0], 253 | flag, 254 | video_name, 255 | cnt, 256 | j['statistics']['digg_count'], 257 | '../' + replace_filename(_result_['title'],'-') + '/' + video_name, 258 | "datetime('now')" 259 | )) 260 | 261 | 262 | if j['statistics']['aweme_id'] in _d_pool_: 263 | show_tip('','','',j['share_info']['share_desc'],j['statistics']['aweme_id'],0) 264 | return cnt 265 | t = round(float(_config_['timeout'])/(abs(math.sin(cnt)) * 6),2) 266 | if t < float(_config_['midt']):t = float(_config_['midt']) 267 | if t > float(_config_['madt']):t = float(_config_['madt']) 268 | time.sleep(t) 269 | video_url = j['video']['play_addr']['url_list'][0] 270 | video_name = j['share_info']['share_desc'] 271 | video_name = flag + '-' + str(cnt) + '-' + replace_filename(video_name,'_') + '.mp4' 272 | with open(base_path + '/' + video_name,"wb") as file: 273 | r = requests.get(video_url,headers = headers) 274 | file.write(r.content) 275 | show_tip(cnt,flag,t,video_name,'',1) 276 | down_insert(j,cnt,video_name) 277 | cnt = cnt + 1 278 | return cnt 279 | 280 | 281 | headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36'} 282 | cnt = _d_pool_[flag] 283 | if flag not in _result_:return 284 | for i in _result_[flag]: 285 | if 'res' not in i or 'aweme_list' not in i['res']:continue 286 | for j in i['res']['aweme_list']: 287 | try: 288 | cnt = _sub_sownload(j,cnt,flag) 289 | except Exception as e: 290 | print(e) 291 | print('当前视频下载失败:[' + replace_filename(j['share_info']['share_desc'],'_') + ']') 292 | return cnt 293 | 294 | 295 | print('>>> 下载视频中... ...') 296 | print('最小请求等待时间为:' + str(_config_['midt']) +'s 最大请求等待时间为:' + str(_config_['madt']) + 's') 297 | base_path = _config_['download_path'] + '/' + replace_filename(_result_['title'],'-') 298 | if not path.exists(base_path):os.makedirs(base_path) 299 | print('下载路径:'+str(base_path)) 300 | p_len = 0 301 | l_len = 0 302 | if 'post' not in _result_: 303 | print('没有发表的视频可供下载...') 304 | else:p_len = _download('post',base_path) 305 | if 'like' not in _result_: 306 | print('没有喜欢的视频可供下载...') 307 | else:l_len = _download('like',base_path) 308 | return {'p_len':p_len,'l_len':l_len} 309 | 310 | 311 | def _count_func(_result_,flag): 312 | # flag maybe post or like 313 | cnt = 0 314 | for i in _result_[flag]: 315 | cnt = cnt + len(i['res']['aweme_list']) 316 | print(cnt) --------------------------------------------------------------------------------