├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md └── pornhub_crawler ├── .idea ├── .gitignore ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── pornhub_crawler.iml ├── configure.txt ├── main.py ├── pornhub_video.sql ├── requirements.txt ├── tool ├── __init__.py ├── mysql.py └── video_download.py └── 安装库.txt /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # Environments 101 | .env 102 | .venv 103 | env/ 104 | venv/ 105 | ENV/ 106 | env.bak/ 107 | venv.bak/ 108 | 109 | # Spyder project settings 110 | .spyderproject 111 | .spyproject 112 | 113 | # Rope project settings 114 | .ropeproject 115 | 116 | # mkdocs documentation 117 | /site 118 | 119 | # mypy 120 | .mypy_cache/ 121 | .dmypy.json 122 | dmypy.json 123 | 124 | # Pyre type checker 125 | .pyre/ 126 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 lzkgbld 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ## 使用方法: 5 | 6 | 1.部署环境 7 | pip install -r requirements.txt 8 | --2. 9 | 手动安装环境:(以下库默认请安装最新版) 10 | 1.request 11 | 2.PyMySQL 12 | 3.youtube-dl 13 | 4.lxml 14 | 5.requests-toolbelt 15 | 16 | 2.修改配置文件 17 | 请打开configure.txt文件进行修改. 18 | 19 | 3.创建数据库环境(可选) 20 | 此步不一定需要,仅是为了防止第二次运行导致获取重复视频,如果需要请按照下面数据库结构进行创建. 21 | 数据库相关配置信息请自行前往configure.txt填写。 22 | 去重原理是数据库只会记录抓取视频地址,由于名称存储使用pymysql存储会出现一些奇怪的错误,所以选择根据url地址进行去重。 23 | 数据库构建有两种方案: 24 | 1.自行导入pornhub_video.sql文件 25 | 2.自行创建数据库,并且在configure.txt填写好数据库名称,确定里面有张表名称为video,如果需要更换请自行修改源码目录下的tool/mysql.py。 26 | 3.如果使用自行创建数据库造成的数据库执行出错,请自行研究源码修复,本人不做过多的指导! 27 | 28 | 3--1 29 | 如果不需要去重,请自行打开源码里面tool/video_download.py下面留下注释的执行语句,但是请确保configure.txt的数据还是有填写的,请勿留空. 30 | 31 | 32 | 4.运行爬虫,进入目录看到main.py文件,直接运行Python main.py 33 | 34 | 35 | 36 | ## 如果需要下载pornhub会员收费视频,请执行下列步骤 37 | 38 | 39 | 40 | 1.请使用对应cookie获取插件,获取cookie值,比如谷歌浏览器请使用cookies.txt(https://chrome.google.com/webstore/detail/cookiestxt/njabckikapfpffapmjgojcnbfjonfjfg) 41 | 由于本人只有谷歌,获取cookie浏览器插件仅介绍谷歌能用的,其他浏览器请自行寻找其他插件,将下载的cookie文本里面的内容全部复制粘贴,替换源码中的cookies.txt里面内容. 42 | 43 | 2.请在configure.txt填好账号密码,请确定能够访问付费视频权限的账号! 44 | 45 | 3.请在下载配置文件主动填写对应的配置信息.(1:免费 2:收费视频) 46 | 47 | 4.运行爬虫,进入目录看到main.py文件,直接运行Python main.py 48 | 49 | 50 | ## 备注: 51 | 如果有任何问题请留言,尽可能修复. 52 | 预计后面会配置一个页面控制端,运行在页面直接配置好信息后直接调度爬虫...(开发时间未定) 53 | 54 | 画质自动下载1080p如果有需要请video_download.py文件里面41行,为收藏夹画质,分类下载请修改84行和135行,注释已经写清楚是下载免费和收费视频的画质,可改正720p和480p,请确定P站有的画质! 55 | 请Linux运行的哥们修改一下pipelines.py文件,把最后的%(title)s.%(ext)s修改为%\(title\)s.%\(ext\)s (原因是Linux不识别%加括号的命令....) 56 | -------------------------------------------------------------------------------- /pornhub_crawler/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /pornhub_crawler/.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 48 | -------------------------------------------------------------------------------- /pornhub_crawler/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /pornhub_crawler/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /pornhub_crawler/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /pornhub_crawler/.idea/pornhub_crawler.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /pornhub_crawler/configure.txt: -------------------------------------------------------------------------------- 1 | 爬取类型(1:分类 2:收藏)==2 2 | 存储文件夹==D:/Demo 3 | 起始页==1 4 | 结束页==1 5 | 完整地址==32972131 6 | 是否收费(1:免费 2:收费视频)==1 7 | 数据库地址==127.0.0.1 8 | 数据库账号==root 9 | 数据库密码==1cb1a931db23a8f3 10 | 数据库名称==ecchi_video 11 | 账号==请填写开通高级服务的账号 12 | 密码==请填写开通高级账号的密码 -------------------------------------------------------------------------------- /pornhub_crawler/main.py: -------------------------------------------------------------------------------- 1 | # Author": "lzkgbld", 2 | # "Date": 2020/9/20 12:56 3 | # "LastEditors": "lzkgbld" 4 | # "LastEditTime": 2020/9/20 12:56 5 | from tool.video_download import por_favorites, por_type 6 | 7 | 8 | if __name__ == "__main__": 9 | # 调用方法将txt数据读取到list中 10 | txt_data = [] 11 | for line in open('configure.txt', "r", encoding='UTF-8'): 12 | txt_data.append(line.strip('\n')) 13 | cl_video_type =None 14 | cl_path = None 15 | cl_num = None 16 | cl_end = None 17 | cl_url = None 18 | cl_type = None 19 | cl_host = None 20 | cl_user = None 21 | cl_pwd = None 22 | cl_name = None 23 | user_name = None 24 | user_pwd = None 25 | # 进行数据分类 26 | # 如果只抓收藏,在完整地址输入抓的ID即可,默认抓收藏夹全部内容 27 | # 选择收藏类型只需要填存储文件夹和完整地址里面输入ID即可,其他不会受到任何影响 28 | for t in txt_data: 29 | if t.split("==")[0] in "爬取类型(1:分类 2:收藏)": 30 | cl_video_type = int(t.split("==")[1]) 31 | elif t.split("==")[0] in "存储文件夹": 32 | cl_path = t.split("==")[1] 33 | elif t.split("==")[0] in "起始页": 34 | cl_num = t.split("==")[1] 35 | elif t.split("==")[0] in "结束页": 36 | cl_end = t.split("==")[1] 37 | elif t.split("==")[0] in "完整地址": 38 | cl_url = t.split("==")[1] 39 | elif t.split("==")[0] in "是否收费(1:免费 2:收费视频)": 40 | cl_type = t.split("==")[1] 41 | elif t.split("==")[0] in "数据库地址": 42 | cl_host = t.split("==")[1] 43 | elif t.split("==")[0] in "数据库账号": 44 | cl_user = t.split("==")[1] 45 | elif t.split("==")[0] in "数据库密码": 46 | cl_pwd = t.split("==")[1] 47 | elif t.split("==")[0] in "数据库名称": 48 | cl_name = t.split("==")[1] 49 | elif t.split("==")[0] in "账号": 50 | user_name = t.split("==")[1] 51 | elif t.split("==")[0] in "密码": 52 | user_pwd = t.split("==")[1] 53 | if cl_video_type == 1: 54 | por_type(cl_num, cl_end, cl_url, cl_type, cl_path, cl_host, cl_user, cl_pwd, cl_name, user_name, user_pwd) 55 | elif cl_video_type == 2: 56 | por_favorites(cl_url, cl_path, cl_host, cl_user, cl_pwd, cl_name) 57 | 58 | # D:/Demo/video 59 | # 101749201 -------------------------------------------------------------------------------- /pornhub_crawler/pornhub_video.sql: -------------------------------------------------------------------------------- 1 | /* 2 | Navicat Premium Data Transfer 3 | 4 | Source Server : mysql 5 | Source Server Type : MySQL 6 | Source Server Version : 50728 7 | Source Host : localhost:3306 8 | Source Schema : pornhub_video 9 | 10 | Target Server Type : MySQL 11 | Target Server Version : 50728 12 | File Encoding : 65001 13 | 14 | Date: 26/09/2020 17:43:02 15 | */ 16 | 17 | SET NAMES utf8mb4; 18 | SET FOREIGN_KEY_CHECKS = 0; 19 | 20 | -- ---------------------------- 21 | -- Table structure for video 22 | -- ---------------------------- 23 | DROP TABLE IF EXISTS `video`; 24 | CREATE TABLE `video` ( 25 | `video_url` varchar(255) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL 26 | ) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic; 27 | 28 | SET FOREIGN_KEY_CHECKS = 1; 29 | -------------------------------------------------------------------------------- /pornhub_crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | PyMySQL==0.9.3 2 | youtube-dl==2020.6.16.1 3 | lxml==4.6.2 4 | requests==2.23.0 5 | requests-toolbelt==0.9.1 6 | -------------------------------------------------------------------------------- /pornhub_crawler/tool/__init__.py: -------------------------------------------------------------------------------- 1 | # Author": "lzkgbld", 2 | # "Date": 2020/9/20 12:56 3 | # "LastEditors": "lzkgbld" 4 | # "LastEditTime": 2020/9/20 12:56 -------------------------------------------------------------------------------- /pornhub_crawler/tool/mysql.py: -------------------------------------------------------------------------------- 1 | # Author": "lzkgbld", 2 | # "Date": 2020/9/20 13:26 3 | # "LastEditors": "lzkgbld" 4 | # "LastEditTime": 2020/9/20 13:26 5 | import pymysql, time, os 6 | 7 | 8 | class ManagementMysql(object): 9 | def __init__(self, host, user, password, database): 10 | self.host = host 11 | self.user = user 12 | self.password = password 13 | self.database = database 14 | 15 | # 检查视频是否存在 16 | def check_video(self, video_url): 17 | try: 18 | conn = pymysql.connect(host=self.host, user=self.user, password=self.password, database=self.database, charset="utf8") 19 | cursor = conn.cursor() 20 | # 搜索数据库是否已有该内容 21 | cursor.execute("SELECT video_url from video WHERE video_url=" + "'" + str(video_url) + "'" + ";") 22 | # 关闭数据库连接 23 | cursor.close() 24 | conn.close() 25 | if cursor.fetchone(): 26 | return "NO" 27 | else: 28 | return "OK" 29 | except Exception as e: 30 | # 如果文件存在则删除 31 | # if os.path.exists("checklog.txt"): 32 | # os.remove("dest.txt") 33 | # 写出错误log日志 34 | with open("check_log.txt", "a+",encoding='utf-8') as f: 35 | f.writelines(str(e)) 36 | f.writelines("\n") 37 | f.writelines("错误视频URL地址:"+str(video_url)) 38 | f.writelines("\n") 39 | f.writelines("错误发生时间:" + str(time.strftime("%Y-%m-%d %H:%M:%S"))) 40 | f.writelines("\n") 41 | 42 | def add_video(self, video_url): 43 | try: 44 | conn = pymysql.connect(host=self.host, user=self.user, password=self.password, database=self.database, charset="utf8") 45 | cursor = conn.cursor() 46 | # 搜索数据库是否已有该内容 47 | cursor.execute("SELECT video_url from video WHERE video_url=" + "'" + str(video_url) + "'" + ";") 48 | # 关闭数据库连接 49 | cursor.close() 50 | conn.close() 51 | if cursor.fetchone(): 52 | return "视频已存在" 53 | else: 54 | cursor = conn.cursor() 55 | # 数据库插入语句 56 | cursor.execute( 57 | "INSERT INTO video(video_url) values ('%s')" % (str(video_url))) 58 | # 执行插入语句 59 | conn.commit() 60 | # 关闭数据库连接 61 | cursor.close() 62 | conn.close() 63 | 64 | except Exception as e: 65 | # 如果文件存在则删除 66 | # if os.path.exists("checklog.txt"): 67 | # os.remove("dest.txt") 68 | # 写出错误log日志 69 | with open("add_log.txt", "a+",encoding='utf-8') as f: 70 | f.writelines(str(e)) 71 | f.writelines("\n") 72 | f.writelines("添加错误URL地址:"+str(video_url)) 73 | f.writelines("\n") 74 | f.writelines("错误发生时间:" + str(time.strftime("%Y-%m-%d %H:%M:%S"))) 75 | f.writelines("\n") -------------------------------------------------------------------------------- /pornhub_crawler/tool/video_download.py: -------------------------------------------------------------------------------- 1 | # Author": "lzkgbld", 2 | # "Date": 2020/9/20 12:57 3 | # "LastEditors": "lzkgbld" 4 | # "LastEditTime": 2020/9/20 12:57 5 | import requests, os 6 | from lxml import etree 7 | from tool.mysql import ManagementMysql 8 | 9 | 10 | def por_favorites(video_id, video_file, host, user, password, name): 11 | # 创建mysql工具类 请注释 12 | mysql = ManagementMysql(host, user, password, name) 13 | # 原地址 14 | video_url = 'https://cn.pornhub.com' 15 | # 获取视频数量 16 | num_url = 'https://cn.pornhub.com/playlist/'+video_id 17 | # 获取视频列表 18 | html_num = requests.get(num_url) 19 | html_num = html_num.text 20 | html_num = etree.HTML(html_num) 21 | data_num = html_num.xpath('//*[@id="aboutPlaylist"]/div[1]/text()') 22 | video_num = data_num[1].strip() 23 | video_num = int(video_num.split('个')[0].split('-')[-1].strip()) 24 | video_num = int(video_num/50)+1 25 | num = 1 26 | page = 0 27 | while num <= video_num: 28 | # 获取视频列表 29 | url = 'https://cn.pornhub.com/playlist/viewChunked?id='+video_id+'&offset='+str(num)+'&itemsPerPage=50' 30 | # 获取视频链接 31 | html = requests.get(url) 32 | html = html.text 33 | html = etree.HTML(html) 34 | data = html.xpath("//li/div/div/a/@href") 35 | for d in data: 36 | dd = d.split('&')[0] 37 | video_u = video_url+dd 38 | # 如果不需要去重请注释这句话 39 | if mysql.check_video(video_u) == "OK": 40 | # 添加下载任务 Linux="youtube-dl " + video_u + " -o " +video_file + "/" + "%\(title\)s.%\(ext\)s" 41 | cmd = "youtube-dl -f 1080p " + video_u + " -o " + video_file + "/" + "%\(title\)s.%\(ext\)s" 42 | print(cmd) 43 | results = os.system(cmd) 44 | print("下载状态" + str(results)) 45 | if results == 0: 46 | pass 47 | else: 48 | cmd1 = "youtube-dl " + video_u + " -o " +video_file + "/" + "%\(title\)s.%\(ext\)s" 49 | print(cmd1) 50 | # 添加下载任务 51 | results = os.system(cmd1) 52 | print("画质下载错误,使用备用方案,选择已有最好画质下载") 53 | print("修正下载状态码" + str(results)) 54 | # 请注释这条添加语句 55 | mysql.add_video(url) 56 | else: 57 | print("视频重复:"+video_u) 58 | page += 50 59 | num += 1 60 | 61 | 62 | def por_type(start_num, end_num, video_url, charge_type, video_file, host, user, password, name, user_name, user_pwd): 63 | start_num = int(start_num) 64 | end_num = int(end_num) 65 | # 创建mysql工具类 请注释 66 | mysql = ManagementMysql(host, user, password, name) 67 | urls = 'https://cn.pornhub.com' 68 | headers = { 69 | 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.52 Safari/536.5' 70 | } 71 | # 不收费类型 72 | if charge_type == 1: 73 | while start_num <= end_num: 74 | urls = video_url + '&page=' + str(start_num) 75 | html = requests.get(urls, headers=headers).text 76 | html = etree.HTML(html) 77 | video_list = html.xpath('//*[@data-segment="straight"]/div/div[1]/a/@href') 78 | for vl in video_list: 79 | # 拼接地址下载 80 | url = urls+vl 81 | if mysql.check_video(url) == "OK": 82 | # 添加下载任务 Linux="youtube-dl " + url + " -o " +video_file + "/" + "%\(title\)s.%\(ext\)s" 83 | cmd = "youtube-dl -f 1080p " + url + " -o " + video_file + "/" + "%\(title\)s.%\(ext\)s" 84 | results = os.system(cmd) 85 | print("下载状态" + str(results)) 86 | if results == 0: 87 | pass 88 | else: 89 | cmd1 = "youtube-dl " + url + " -o " +video_file + "/" + "%\(title\)s.%\(ext\)s" 90 | # 添加下载任务 91 | results = os.system(cmd1) 92 | print("画质下载错误,使用备用方案,选择已有最好画质下载") 93 | print("修正下载状态码" + str(results)) 94 | # 请注释这条添加语句 95 | mysql.add_video(url) 96 | else: 97 | print("视频重复:"+url) 98 | 99 | start_num += 1 100 | # 收费类型 101 | elif charge_type == 2: 102 | urls = 'https://cn.pornhubpremium.com' 103 | headers = { 104 | "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36", 105 | 'Referer': 'https://cn.pornhubpremium.com/premium/login' 106 | } 107 | 108 | html_login = requests.get('https://cn.pornhubpremium.com/premium/login') 109 | h_cookie = html_login.cookies 110 | h_cookie = requests.utils.dict_from_cookiejar(h_cookie) 111 | html_login = html_login.text 112 | html = etree.HTML(html_login) 113 | token = html.xpath('//*[@id="token"]/@value') 114 | token = token[0] 115 | redirect = {'from': 'pc_premium_login', 'segment': 'straight'} 116 | # username=用户名 password=密码 117 | data = {'username': user_name, 'password': user_pwd, 'token': token, 'redirect': redirect} 118 | r = requests.post("https://cn.pornhubpremium.com/front/authenticate", headers=headers, data=data, 119 | cookies=h_cookie) 120 | # print(r.text) 121 | cookice = r.cookies 122 | cookice = requests.utils.dict_from_cookiejar(cookice) 123 | 124 | while start_num <= end_num: 125 | urls = video_url + '&page=' + str(start_num) 126 | html = requests.get(urls, headers=headers, cookice=cookice).text 127 | html = etree.HTML(html) 128 | video_list = html.xpath('//*[@data-segment="straight"]/div/div[1]/a/@href') 129 | for vl in video_list: 130 | # 拼接地址下载 131 | url = urls+vl 132 | # 如果需要进行去重验证请注释这条IF语句 133 | if mysql.check_video(url) == "OK": 134 | # 添加下载任务 Linux="youtube-dl " + url + " -o " +video_file + "/" + "%\(title\)s.%\(ext\)s" 135 | cmd = "youtube-dl -f 1080p " + url +' --cookies cookies.txt -o ' + video_file + "/" + "%\(title\)s.%\(ext\)s" 136 | results = os.system(cmd) 137 | print("下载状态" + str(results)) 138 | if results == 0: 139 | pass 140 | else: 141 | cmd1 = "youtube-dl " + url +' --cookies cookies.txt -o ' + video_file + "/" + "%\(title\)s.%\(ext\)s" 142 | # 添加下载任务 143 | results = os.system(cmd1) 144 | print("画质下载错误,使用备用方案,选择已有最好画质下载") 145 | print("修正下载状态码" + str(results)) 146 | # 请注释这条添加语句 147 | mysql.add_video(url) 148 | else: 149 | print("视频重复:"+url) 150 | start_num += 1 -------------------------------------------------------------------------------- /pornhub_crawler/安装库.txt: -------------------------------------------------------------------------------- 1 | requests=2.23.0 2 | lxml=1.3.0 3 | pymysql=0.9.3 4 | YouTube-dl=2020.6.16.1 --------------------------------------------------------------------------------