├── .gitignore ├── LICENSE ├── README.md └── src ├── downloader.py ├── porn.py ├── pornhub-dl.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | 55 | # Sphinx documentation 56 | docs/_build/ 57 | 58 | # PyBuilder 59 | target/ 60 | 61 | #Ipython Notebook 62 | .ipynb_checkpoints 63 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pornhub-dl 2 | ====================== 3 | ###Description### 4 | pornhub.com crawler to crawl and download videos those are publicly present in the website for viewing and downloading 5 | It’s my first repository at github one of my friend in college asked me to make something that can download porn at a bulk, so I make it 6 | It’s very poorly written because I am very new to Python it’s been couple of months only since I am using Python for programming 7 | 8 | ###Prerequisite### 9 | - [Python 2.7](https://www.python.org/downloads/release/python-2710/) 10 | - [Beautiful Soup / BS4](https://pypi.python.org/pypi/beautifulsoup4) (Beautiful Soup 4 Install using PIP : `$pip install bs4` ) 11 | 12 | 13 | ###First Run### 14 | `$pornhub-dl -u "http://www.pornhub.com/~url"` 15 | 16 | -------------------------------------------------------------------------------- /src/downloader.py: -------------------------------------------------------------------------------- 1 | import urllib2, os, time 2 | class Download: 3 | def __init__(self,url,file_name): 4 | self.url = url 5 | self.file_name=file_name 6 | self.file = open(self.file_name, 'ab') 7 | self.current_file_size = os.stat(self.file_name).st_size 8 | self.download_block_size = 8192 9 | def now(self): 10 | try: 11 | req = urllib2.Request(self.url) 12 | already_downloaded_size = self.current_file_size 13 | if(self.current_file_size>0): 14 | temp = urllib2.urlopen(self.url).info() 15 | actual_file_size = int(temp.getheaders("Content-Length")[0]) 16 | if self.current_file_size >= actual_file_size: 17 | return True 18 | # file is already there and have some data downloaded to it (file-size is present in 'self.current_file_size') 19 | req.add_header('Range', 'bytes=%d-' % self.current_file_size) 20 | print "[Resuming -- %3.2f MB] : %s"%(self.current_file_size/1048576., self.file_name) 21 | else: 22 | print "[Saving] : %s" %(self.file_name) 23 | req.add_header('User-Agent','fluid v1.0.5') 24 | req = urllib2.urlopen(req) 25 | header = req.info() 26 | actual_file_size = int(header.getheaders("Content-Length")[0]) 27 | actual_file_size += self.current_file_size 28 | print("[SIZE] : %3.2f"%(actual_file_size/1048576.)) 29 | clk_start = time.clock() 30 | while True: 31 | buffer = req.read(self.download_block_size) 32 | if not buffer: 33 | break 34 | already_downloaded_size += len(buffer) 35 | self.file.write(buffer) 36 | done = int(50 * already_downloaded_size / actual_file_size) 37 | dlMB = float(already_downloaded_size/1048576.) 38 | print("\r[PROGRESS] : [%3.2f%%] [%3.2f MB] [%s%s] [%3.2fKbps]" %((already_downloaded_size*100./actual_file_size), dlMB, '#' * done, '.' * (50-done), (dlMB*1024/(time.clock() - clk_start)) ) ), 39 | except Exception as e: 40 | print e 41 | finally: 42 | self.file.close() 43 | if already_downloaded_size == actual_file_size : 44 | return True 45 | return False 46 | -------------------------------------------------------------------------------- /src/porn.py: -------------------------------------------------------------------------------- 1 | from downloader import Download 2 | from util import Tools 3 | from bs4 import BeautifulSoup 4 | import urllib2,os 5 | class PornHub: 6 | 'this module is only for Porn-Hub ' 7 | def __init__(self): 8 | self.helper = Tools() 9 | self.MAIN_FILE = "%s\MAIN_PH.list" %(os.getenv('APPDATA')) 10 | self.TBD_FILE = "%s\TBD_PH.list" %(os.getenv('APPDATA')) 11 | self.ARCHIVE_FILE = "%s\ARCHIVE_PH.list" %(os.getenv('APPDATA')) 12 | 13 | def PH_extractor_(self,resp): 14 | try: 15 | parse_tree = BeautifulSoup(resp,"html.parser") 16 | tag_finder = parse_tree.findAll("li", {"class" : "videoblock"}) 17 | del resp, parse_tree 18 | for each_tag in tag_finder: 19 | link = str(each_tag['_vkey']) 20 | if not self.helper.find_link(self.MAIN_FILE,link): 21 | self.helper.append_link(self.MAIN_FILE,link) 22 | self.helper.append_link(self.TBD_FILE,link) 23 | del tag_finder 24 | except: 25 | #may be bad connection will try later :) 26 | pass 27 | 28 | def _fetch_CDN_(self,resp): 29 | if 'alt="Upgrade to Pornhub Premium to enjoy this video."' in resp: 30 | #upgrade to premium message with nothing to fetch just remove that link from file and move on 31 | return True 32 | if 'var player_quality_' in resp: 33 | p720 = resp.find('var player_quality_720p = \'') 34 | if p720 == -1: 35 | p420 = resp.find('var player_quality_480p = \'') 36 | if p420 == -1: 37 | p240 = resp.find('var player_quality_240p = \'') 38 | if p240 == -1: 39 | #nothing is there 40 | print("\n[None] No Video Format could be found -- Removing the Link") 41 | return True 42 | else: 43 | print("[FETCHED -- 240px]") 44 | start = p240 + 27 45 | end = resp.find('\'',p240+30) 46 | else: 47 | print("[FETCHED -- 420px]") 48 | start = p420 + 27 49 | end = resp.find('\'',p420+30) 50 | else: 51 | print("[FETCHED -- 720px]") 52 | start = p720 + 27 53 | end = resp.find('\'',p720+30) 54 | #print resp[start:end] 55 | file_name = BeautifulSoup(resp,"html.parser") 56 | file_name = str(file_name.title.string) 57 | file_name = file_name.translate(None,"'*:\"\/?<>|") 58 | download = Download(resp[start:end],"%s.mp4"%(file_name)) 59 | download = download.now() 60 | if download: 61 | return True 62 | return False 63 | else: 64 | pass 65 | def __prepare__(self): 66 | #this will run into infinite loop until there is nothing in the ToBeDownloaded.list file 67 | while os.stat(self.TBD_FILE).st_size>0: 68 | link = self.helper.get_me_link(self.TBD_FILE) 69 | print("\n[Downloading] : http://www.pornhub.com/view_video.php?viewkey=%s" %(link)) 70 | resp = urllib2.Request("http://www.pornhub.com/view_video.php?viewkey=%s"%(link)) 71 | resp.add_header('Cookie',"RNKEY=1043543*1527941:2834309375:3318880964:1;") 72 | try: 73 | resp = urllib2.urlopen(resp).read() 74 | self.PH_extractor_(resp) 75 | rc=self._fetch_CDN_(resp) 76 | if rc==True: 77 | self.helper.remove_link(self.TBD_FILE,link) 78 | self.helper.append_link(self.ARCHIVE_FILE,link) 79 | print("\n[WIN] : File Download Complete!") 80 | else: 81 | print("\n[ERROR] : Something went wrong!") 82 | except Exception as e: 83 | print e 84 | -------------------------------------------------------------------------------- /src/pornhub-dl.py: -------------------------------------------------------------------------------- 1 | import sys, urllib2, argparse, os 2 | from porn import PornHub 3 | parser = argparse.ArgumentParser() 4 | parser.add_argument('-u/-url', action='store', dest='url', help='Porn-Hub URL') 5 | parser.add_argument('--version', action='version', version='version 1.0.1 an open book project (C) 2016') 6 | results = parser.parse_args() 7 | 8 | if not results.url: 9 | if not os.path.exists("%s\TBD_PH.list" %(os.getenv('APPDATA'))): 10 | #must pass something to fetch first 11 | print("please Provide an URL to Fetch !!! ") 12 | else: 13 | #simple download porn from saved file list 14 | newPorn = PornHub() 15 | newPorn.__prepare__() 16 | else: 17 | #simple download porn from link provided 18 | newPorn = PornHub() 19 | resp=urllib2.urlopen(str(results.url)).read() 20 | newPorn._fetch_CDN_(resp) 21 | newPorn.PH_extractor_(resp) 22 | del resp 23 | newPorn.__prepare__() -------------------------------------------------------------------------------- /src/util.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib2, os 3 | class Tools: 4 | def __init__(self): 5 | if not os.path.exists("%s\MAIN_PH.list" %(os.getenv('APPDATA'))): 6 | print("[WELCOME] First Run! ") 7 | open("%s\MAIN_PH.list" %(os.getenv('APPDATA')),"w") 8 | if not os.path.exists("%s\TBD_PH.list" %(os.getenv('APPDATA'))): 9 | print("[PREPARING] Files") 10 | open("%s\TBD_PH.list" %(os.getenv('APPDATA')),"w") 11 | if not os.path.exists("%s\ARCHIVE_PH.list" %(os.getenv('APPDATA'))): 12 | print("[DONE] okay Jack we are done ! here we go !") 13 | open("%s\ARCHIVE_PH.list" %(os.getenv('APPDATA')),"w") 14 | 15 | def find_link(self,file_path,link): 16 | # this will check if the given link is there in the provided file link 17 | file_object = open(file_path,'rb') 18 | data = file_object.read() 19 | data = data.split() 20 | file_object.close() 21 | del file_object 22 | if str(link) in data: 23 | return True 24 | return False 25 | 26 | def append_link(self,file_path,link): 27 | #this will add a new link to the provided file at the end of the file 28 | try: 29 | file_object = open(file_path,'rb') 30 | data = file_object.read() 31 | data += "%s "%(str(link)) 32 | file_object.close() 33 | file_object = open(file_path,'wb') 34 | file_object.write(data) 35 | file_object.close() 36 | del file_object, data 37 | return True 38 | except: 39 | return False 40 | 41 | def remove_link(self,file_path,link): 42 | try: 43 | file_object = open(file_path,'rb') 44 | data = file_object.read() 45 | data = data.split() 46 | data.remove(link) 47 | data = ' '.join(data) 48 | data += " " 49 | file_object.close() 50 | file_object = open(file_path,'wb') 51 | file_object.write(data) 52 | file_object.close() 53 | del data, file_object 54 | return True 55 | except: 56 | return False 57 | 58 | def get_me_link(self,file_path): 59 | file_object = open(file_path,'r') 60 | data = file_object.read() 61 | file_object.close() 62 | del file_object 63 | if not data: 64 | return '0' 65 | else: 66 | d = data.split() 67 | del data 68 | return d[0] 69 | --------------------------------------------------------------------------------