├── .gitignore
├── LICENSE
├── README.md
└── src
    ├── downloader.py
    ├── porn.py
    ├── pornhub-dl.py
    └── util.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | 
55 | # Sphinx documentation
56 | docs/_build/
57 | 
58 | # PyBuilder
59 | target/
60 | 
61 | #Ipython Notebook
62 | .ipynb_checkpoints
63 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <http://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | pornhub-dl
 2 | ======================
 3 | ###Description###
 4 | pornhub.com crawler to crawl and download videos those are publicly present in the website for viewing and downloading
 5 | It’s my first repository at github one of my friend in college asked me to make something that can download porn at a bulk, so I make it
 6 | It’s very poorly written because I am very new to Python it’s been couple of months only since I am using Python for programming
 7 | 
 8 | ###Prerequisite###
 9 | - [Python 2.7](https://www.python.org/downloads/release/python-2710/)
10 | - [Beautiful Soup / BS4](https://pypi.python.org/pypi/beautifulsoup4) (Beautiful Soup 4 Install using PIP : `$pip install bs4` )
11 | 
12 | 
13 | ###First Run###
14 | `$pornhub-dl -u "http://www.pornhub.com/~url"`
15 | 
16 | 


--------------------------------------------------------------------------------
/src/downloader.py:
--------------------------------------------------------------------------------
 1 | import urllib2, os, time
 2 | class Download:
 3 |     def __init__(self,url,file_name):
 4 |         self.url = url
 5 |         self.file_name=file_name
 6 |         self.file = open(self.file_name, 'ab')
 7 |         self.current_file_size = os.stat(self.file_name).st_size
 8 |         self.download_block_size = 8192
 9 |     def now(self):
10 |         try:                        
11 |             req = urllib2.Request(self.url)
12 |             already_downloaded_size = self.current_file_size
13 |             if(self.current_file_size>0):
14 |                 temp = urllib2.urlopen(self.url).info()
15 |                 actual_file_size = int(temp.getheaders("Content-Length")[0])
16 |                 if self.current_file_size >= actual_file_size:                    
17 |                     return True
18 |                 # file is already there and have some data downloaded  to it (file-size is present in 'self.current_file_size')
19 |                 req.add_header('Range', 'bytes=%d-' % self.current_file_size)
20 |                 print "[Resuming -- %3.2f MB] : %s"%(self.current_file_size/1048576., self.file_name)
21 |             else:
22 |                 print "[Saving] : %s" %(self.file_name)
23 |             req.add_header('User-Agent','fluid v1.0.5')
24 |             req = urllib2.urlopen(req)            
25 |             header = req.info()
26 |             actual_file_size = int(header.getheaders("Content-Length")[0])
27 |             actual_file_size += self.current_file_size
28 |             print("[SIZE] : %3.2f"%(actual_file_size/1048576.))
29 |             clk_start = time.clock()                       
30 |             while True:
31 |                 buffer = req.read(self.download_block_size)
32 |                 if not buffer:
33 |                     break
34 |                 already_downloaded_size += len(buffer)
35 |                 self.file.write(buffer)
36 |                 done = int(50 * already_downloaded_size / actual_file_size)
37 |                 dlMB = float(already_downloaded_size/1048576.)
38 |                 print("\r[PROGRESS] : [%3.2f%%] [%3.2f MB] [%s%s] [%3.2fKbps]" %((already_downloaded_size*100./actual_file_size), dlMB, '#' * done, '.' * (50-done), (dlMB*1024/(time.clock() - clk_start)) ) ),            
39 |         except Exception as e:
40 |             print e
41 |         finally:
42 |             self.file.close()
43 |             if already_downloaded_size == actual_file_size :
44 |                 return True
45 |             return False
46 | 


--------------------------------------------------------------------------------
/src/porn.py:
--------------------------------------------------------------------------------
 1 | from downloader import Download
 2 | from util import Tools
 3 | from bs4 import BeautifulSoup
 4 | import urllib2,os
 5 | class PornHub:
 6 | 	'this module is only for Porn-Hub '
 7 | 	def __init__(self):
 8 | 		self.helper = Tools()
 9 | 		self.MAIN_FILE = "%s\MAIN_PH.list" %(os.getenv('APPDATA'))			
10 | 		self.TBD_FILE = "%s\TBD_PH.list" %(os.getenv('APPDATA'))
11 | 		self.ARCHIVE_FILE = "%s\ARCHIVE_PH.list" %(os.getenv('APPDATA'))
12 | 		
13 | 	def PH_extractor_(self,resp):
14 | 		try:
15 | 			parse_tree = BeautifulSoup(resp,"html.parser")			
16 | 			tag_finder = parse_tree.findAll("li", {"class" : "videoblock"})
17 | 			del resp, parse_tree
18 | 			for each_tag in tag_finder:
19 | 				link = str(each_tag['_vkey'])				
20 | 				if not self.helper.find_link(self.MAIN_FILE,link):
21 | 					self.helper.append_link(self.MAIN_FILE,link)
22 | 					self.helper.append_link(self.TBD_FILE,link)
23 | 			del tag_finder
24 | 		except:
25 | 			#may be bad connection will try later :)
26 | 			pass
27 | 				
28 | 	def _fetch_CDN_(self,resp):		
29 | 		if 'alt="Upgrade to Pornhub Premium to enjoy this video."' in resp:
30 | 			#upgrade to premium message with nothing to fetch just remove that link from file and move on
31 | 			return True
32 | 		if 'var player_quality_' in resp:			
33 | 			p720 = resp.find('var player_quality_720p = \'')
34 | 			if p720 == -1:
35 | 				p420 = resp.find('var player_quality_480p = \'')
36 | 				if p420 == -1:
37 | 					p240 = resp.find('var player_quality_240p = \'')
38 | 					if p240 == -1:
39 | 						#nothing is there
40 | 						print("\n[None] No Video Format could be found -- Removing the Link")
41 | 						return True
42 | 					else:
43 | 						print("[FETCHED -- 240px]")
44 | 						start = p240 + 27
45 | 						end = resp.find('\'',p240+30)
46 | 				else:
47 | 					print("[FETCHED -- 420px]")
48 | 					start = p420 + 27
49 | 					end = resp.find('\'',p420+30)
50 | 			else:
51 | 				print("[FETCHED -- 720px]")
52 | 				start = p720 + 27
53 | 				end = resp.find('\'',p720+30)
54 | 			#print resp[start:end]				
55 | 			file_name = BeautifulSoup(resp,"html.parser")
56 | 			file_name = str(file_name.title.string)
57 | 			file_name = file_name.translate(None,"'*:\"\/?<>|")
58 | 			download = Download(resp[start:end],"%s.mp4"%(file_name))
59 | 			download = download.now()			
60 | 			if download:				
61 | 				return True
62 | 			return False
63 | 		else:
64 | 			pass
65 | 	def __prepare__(self):
66 | 		#this will run into infinite loop until there is nothing in the ToBeDownloaded.list file
67 | 		while os.stat(self.TBD_FILE).st_size>0:
68 | 			link = self.helper.get_me_link(self.TBD_FILE)
69 | 			print("\n[Downloading] : http://www.pornhub.com/view_video.php?viewkey=%s" %(link))
70 | 			resp = urllib2.Request("http://www.pornhub.com/view_video.php?viewkey=%s"%(link))
71 | 			resp.add_header('Cookie',"RNKEY=1043543*1527941:2834309375:3318880964:1;")
72 | 			try:
73 | 				resp = urllib2.urlopen(resp).read()
74 | 				self.PH_extractor_(resp)
75 | 				rc=self._fetch_CDN_(resp)
76 | 				if rc==True:
77 | 					self.helper.remove_link(self.TBD_FILE,link)
78 | 					self.helper.append_link(self.ARCHIVE_FILE,link)
79 | 					print("\n[WIN] : File Download Complete!")
80 | 				else:
81 | 					print("\n[ERROR] : Something went wrong!")
82 | 			except Exception as e:
83 | 				print e
84 | 


--------------------------------------------------------------------------------
/src/pornhub-dl.py:
--------------------------------------------------------------------------------
 1 | import sys, urllib2, argparse, os
 2 | from porn import PornHub
 3 | parser = argparse.ArgumentParser()
 4 | parser.add_argument('-u/-url', action='store', dest='url', help='Porn-Hub URL')
 5 | parser.add_argument('--version', action='version', version='version 1.0.1 an open book project (C) 2016')
 6 | results = parser.parse_args()
 7 | 
 8 | if not results.url:
 9 |     if not os.path.exists("%s\TBD_PH.list" %(os.getenv('APPDATA'))):    
10 |         #must pass something to fetch first
11 |         print("please Provide an URL to Fetch !!! ")
12 |     else:
13 |         #simple download porn from saved file list
14 |         newPorn = PornHub()
15 |         newPorn.__prepare__()
16 | else:
17 |     #simple download porn from link provided
18 |     newPorn = PornHub()
19 |     resp=urllib2.urlopen(str(results.url)).read()
20 |     newPorn._fetch_CDN_(resp)
21 |     newPorn.PH_extractor_(resp)
22 |     del resp
23 |     newPorn.__prepare__()


--------------------------------------------------------------------------------
/src/util.py:
--------------------------------------------------------------------------------
 1 | from bs4 import BeautifulSoup
 2 | import urllib2, os
 3 | class Tools:
 4 | 	def __init__(self):
 5 | 		if not os.path.exists("%s\MAIN_PH.list" %(os.getenv('APPDATA'))):
 6 | 			print("[WELCOME] First Run! ")
 7 | 			open("%s\MAIN_PH.list" %(os.getenv('APPDATA')),"w")
 8 | 		if not os.path.exists("%s\TBD_PH.list" %(os.getenv('APPDATA'))):
 9 | 			print("[PREPARING] Files")
10 | 			open("%s\TBD_PH.list" %(os.getenv('APPDATA')),"w")
11 | 		if not os.path.exists("%s\ARCHIVE_PH.list" %(os.getenv('APPDATA'))):
12 | 			print("[DONE] okay Jack we are done ! here we go !")
13 | 			open("%s\ARCHIVE_PH.list" %(os.getenv('APPDATA')),"w")
14 | 		
15 | 	def find_link(self,file_path,link):
16 | 		# this will check if the given link is there in the provided file link
17 | 		file_object = open(file_path,'rb')
18 | 		data = file_object.read()
19 | 		data = data.split()
20 | 		file_object.close()
21 | 		del file_object
22 | 		if str(link) in data:
23 | 			return True
24 | 		return False
25 | 
26 | 	def append_link(self,file_path,link):
27 | 		#this will add a new link to the provided file at the end of the file 
28 | 		try:
29 | 			file_object = open(file_path,'rb')
30 | 			data = file_object.read()
31 | 			data += "%s "%(str(link))
32 | 			file_object.close()
33 | 			file_object = open(file_path,'wb')
34 | 			file_object.write(data)
35 | 			file_object.close()
36 | 			del file_object, data
37 | 			return True
38 | 		except:
39 | 			return False
40 | 
41 | 	def remove_link(self,file_path,link):
42 | 		try:
43 | 			file_object = open(file_path,'rb')
44 | 			data = file_object.read()
45 | 			data = data.split()
46 | 			data.remove(link)
47 | 			data = ' '.join(data)
48 | 			data += " "
49 | 			file_object.close()
50 | 			file_object = open(file_path,'wb')
51 | 			file_object.write(data)
52 | 			file_object.close()
53 | 			del data, file_object
54 | 			return True
55 | 		except:
56 | 			return False
57 | 				
58 | 	def get_me_link(self,file_path):
59 | 		file_object = open(file_path,'r')
60 | 		data = file_object.read()
61 | 		file_object.close()
62 | 		del file_object
63 | 		if not data:
64 | 			return '0'
65 | 		else:
66 | 			d = data.split()
67 | 			del data
68 | 			return d[0]
69 | 


--------------------------------------------------------------------------------