├── .gitignore ├── README.md ├── config.json.example ├── license ├── pyph.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | env/ 3 | __pycache__/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pornhub下载器 2 | ## 开发灵感 3 | * 海外服务器下载速度较慢,使用体验很差(还是我天朝的东西好) 4 | * 一直觉得这个网站中的人讲英语讲得非常native,学习一下 5 | 6 | ## 功能要点 7 | * 支持视频播放页面视频下载 8 | * 支持视频列表页面视频下载 9 | * 支持多线程下载 10 | 11 | ## 目前不足 12 | * 仅支持linux平台使用 13 | * 下载进度不能实时展示 14 | * ... 15 | 16 | ## 参考项目 17 | https://github.com/blackmatch/pornhub-downloader 18 | https://github.com/formateddd/pornhub 19 | 20 | ## 配置过程 21 | * 下载此项目到你的主机中`git clone https://github.com/RyanSu98/pyph.git && cd pyph` 22 | * 安装依赖`pip3 install -r requirements.txt` 23 | * 安装多线程下载程序axel `sudo apt install axel` 24 | * 若使用代理,仍需手动设置环境变量 25 | ``` 26 | export http_proxy=socks5h://127.0.0.1:1080 27 | export https_proxy=socks5h://127.0.0.1:1080 28 | ``` 29 | * 程序配置 30 | `cp config.json.example config.json` 31 | 32 | num_connections 线程数 我在digitalocean上面开40个线程下载速度可以达到十几兆每秒,请以实际坏境为准 33 | 34 | ## 下载测试 35 | * 视频播放页面下载 `python3 pyph.py "https://www.pornhub.com/view_video.php?viewkey=ph5c15d3fda4987"` 36 | ```bash 37 | 检测到【视频播放页面】链接,即将开始处理... 38 | Twerking in Masturbating 开始下载... 下载过程无进度提示,请耐心等待:) 39 | Twerking in Masturbating 下载成功 40 | 41 | ubuntu@seoul:~/pyph$ ls ph-video/ 42 | 'Twerking in Masturbating.mp4' 43 | ``` 44 | * 视频列表页面下载 `python3 pyph.py "https://www.pornhub.com/video?o=ht&cc=us"` 45 | ``` 46 | ubuntu@seoul:~/pyph$ python3 pyph.py "https://www.pornhub.com/video?o=ht&cc=us" 47 | 检测到【视频列表页面】链接,即将开始处理... 48 | Sister in Law Begs for Cock FULL SERIES 开始下载... 下载过程无进度提示,请耐心等待:) 49 | Sister in Law Begs for Cock FULL SERIES 下载成功 50 | Bratty Sis - Step Brother and Sister Share A Bed and Fuck S8:E1 开始下载... 下载过程无进度提示,请耐心等待:) 51 | Bratty Sis - Step Brother and Sister Share A Bed and Fuck S8:E1 下载成功 52 | The best Squirter ever 开始下载... 下载过程无进度提示,请耐心等待:) 53 | ``` 54 | 55 | ## 开源协议 56 | Do What The F*ck You Want To Public License 57 | 58 | ## 写在最后 59 | 欢迎pr代码 60 | -------------------------------------------------------------------------------- /config.json.example: -------------------------------------------------------------------------------- 1 | { 2 | "proxy":"socks5h://127.0.0.1:1080", 3 | "num_connections":20, 4 | "output_dir":"" 5 | } 6 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. -------------------------------------------------------------------------------- /pyph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # @Author : RyanSu 4 | # @Filename : pyph.py 5 | # @Mailto : i@suruifu.com 6 | # @Website : https://www.suruifu.com/ 7 | import os 8 | import re 9 | import sys 10 | import json 11 | import requests 12 | from axel import axel 13 | from bs4 import BeautifulSoup 14 | 15 | 16 | def pyph(pasted_uri): 17 | """ 18 | 判断粘贴链接(pasted_uri)类型,分别执行操作 19 | * 视频播放页面(video_uri) 20 | * 视频列表页面(list_uri) 21 | """ 22 | pasted_uri = pasted_uri.strip() 23 | if 'view_video.php?viewkey' in pasted_uri: 24 | print('检测到【视频播放页面】链接,即将开始处理...') 25 | download_video(pasted_uri) 26 | else: 27 | print('检测到【视频列表页面】链接,即将开始处理...') 28 | download_list(pasted_uri) 29 | 30 | 31 | def download_video(video_uri): 32 | """ 下载视频播放页面对应的视频 """ 33 | # 1. 获取视频播放页面网页内容 34 | video_page_content = get_page_content(video_uri).text 35 | # 2. 提取视频标题 video_title 36 | video_title_regexp = r'(.*)? - Pornhub.com' 37 | video_title = re.findall(video_title_regexp, video_page_content)[0].strip() 38 | # 3. 提取视频直链 video_direct_uri 39 | # 3.1 提取存储视频信息的json字符串 40 | video_info_regexp = r'var flashvars_\d*? = (.*)?;' 41 | video_info_string = re.findall(video_info_regexp, video_page_content)[0] 42 | video_info_json = json.loads(video_info_string) 43 | # 3.2 获取最高画质视频链接 44 | video_info_array = video_info_json['mediaDefinitions'] 45 | video_quality = 0 46 | for video_info in video_info_array: 47 | temp_video_quality = int(video_info['quality']) 48 | temp_video_direct_uri = video_info['videoUrl'] 49 | if temp_video_direct_uri == '': 50 | continue 51 | if temp_video_quality > video_quality: 52 | video_quality = temp_video_quality 53 | video_direct_uri = temp_video_direct_uri 54 | # 4. 下载视频 55 | axel_download(video_title, video_direct_uri) 56 | 57 | 58 | def download_list(list_uri): 59 | """ 下载视频列表页面对应的视频 """ 60 | # 1. 获取网页内容,构造bs对象 61 | list_page_content = get_page_content(list_uri).content 62 | list_page_bs = BeautifulSoup(list_page_content, 'html.parser') 63 | # 2. 获取视频列表 64 | if len(list_page_bs.findAll(class_='noVideosNotice')) == 1: 65 | print('未发现视频链接') 66 | exit() 67 | video_uri_array = [] 68 | for video_li_tag in list_page_bs.findAll(id='videoCategory')[0].findAll('li'): 69 | video_uri_array.append('https://www.pornhub.com' + video_li_tag.a['href']) 70 | # 3. 依次下载视频 71 | for video_uri in video_uri_array: 72 | download_video(video_uri) 73 | 74 | 75 | def get_page_content(uri): 76 | """ 获取网页内容 """ 77 | # 1. 获取代理配置信息 78 | proxy = json.load(open('config.json'))['proxy'].strip() 79 | # 2. 重获取网页内容 80 | return requests.get(uri, proxies=dict(http=proxy, https=proxy)) 81 | 82 | 83 | def axel_download(file_name, file_uri): 84 | """ 使用多线程下载程序axel下载文件 """ 85 | print("{}\t开始下载... 下载过程无进度提示,请耐心等待:)".format(file_name)) 86 | # 1. 获取下载配置信息 线程数目 下载文件夹 87 | config = json.load(open('config.json')) 88 | num_connections = config['num_connections'] 89 | output_dir = config['output_dir'].strip() 90 | # 2. 下载文件夹的前期准备(未配置则是默认文件夹,不存在则创建文件夹) 91 | if not output_dir: 92 | output_dir = os.path.join(os.getcwd(), 'ph-video') 93 | if not os.path.exists(output_dir): 94 | os.mkdir(output_dir) 95 | # 3. 清理之前的下载缓存(例如下载失败之后的不完整文件,如果不清理使用axel下载会报错) 96 | output_path = os.path.join(output_dir, file_name)+'.mp4' 97 | if os.path.exists(output_path): 98 | os.remove(output_path) 99 | # 4. 下载视频 100 | axel(file_uri, output_path, num_connections) 101 | print('{}\t下载成功'.format(file_name)) 102 | 103 | 104 | if __name__ == "__main__": 105 | if len(sys.argv) == 0: 106 | print('python3 pyph.py https://copied.uri/') 107 | else: 108 | try: 109 | pyph(sys.argv[1]) 110 | except: 111 | print('下载出现异常') -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | axel-wrapper==0.0.1 2 | beautifulsoup4==4.7.1 3 | certifi==2018.11.29 4 | chardet==3.0.4 5 | idna==2.8 6 | PySocks==1.6.8 7 | requests==2.21.0 8 | soupsieve==1.7.3 9 | urllib3==1.24.2 10 | --------------------------------------------------------------------------------