Page not found

├── README.md
├── p_im_dl_lt.py
├── p_pl_dl_common.py
├── p_pl_dl_main.py
├── p_pl_dl_ph.py
├── p_pl_dl_pornve.py
├── p_pl_dl_pt.py
├── p_pl_dl_sb.py
├── p_pl_dl_xh.py
├── p_pl_dl_xv.py
├── p_pl_dl_youporn.py
└── requirements.txt


/README.md:
--------------------------------------------------------------------------------
 1 | # p_pl_dl - Porn Playlist Downloader
 2 | 
 3 | A porn playlist downloader using `youtube-dl` and `beautifulsoup`, along with some limited support for image albums.
 4 | 
 5 | Currently supports:
 6 | 
 7 | - lewdthots (albums only)
 8 | - pornhub
 9 | - porntrex
10 | - spankbang
11 | - xhamster
12 | - xvideos
13 | 
14 | ***
15 | ***
16 | 
17 | ## Overview
18 | 
19 | #### Basic Usage
20 | 
21 | Call `p_pl_dl_main` using command prompt. Pass in a text file with URLs using `-i`. Optionally, provide cookies with `-c`, and specify the download destination with `-d`.
22 | 
23 | For cookies, you may pass in a single text file, or a folder path containing multiple cookie text files.
24 | 
25 | Videos from each site will be downloaded to `\sites\<site name>` within the current working directory.
26 | 
27 | Using a single cookie text file:
28 | ```
29 | python p_pl_dl_main.py.py -i "C:\MyFolder\urls.txt" -c "C:\MyCookieFolder\cookies.txt" -d "F:\DownloadDestination"
30 | ```
31 | 
32 | Using multiple cookie text files stored in a folder:
33 | 
34 | ```
35 | python p_pl_dl_main.py.py -i "C:\MyFolder\urls.txt" -c "C:\MyCookieFolder\" -d "F:\DownloadDestination"
36 | ```
37 | 
38 | You may also restrict downloads to a specific site using `-o`. This may be useful if your `urls.txt` has lots of playlists/videos across many sites, but you need to scrape a specific one. Pass in the full name of the site as given in the list of supported sites above.
39 | 
40 | ```
41 | python p_pl_dl_main.py.py -i "C:\MyFolder\urls.txt" -c "C:\MyCookieFolder\" -d "F:\DownloadDestination" -o "xhamster"
42 | python p_pl_dl_main.py.py -i "C:\MyFolder\urls.txt" -c "C:\MyCookieFolder\" -d "F:\DownloadDestination" -o "spankbang"
43 | ```
44 | 
45 | ***
46 | 
47 | #### Input TXT w/ URLs
48 | 
49 | The URL text file should have URLs separated by a line break. The URLs may be for individual videos or entire playlists.
50 | 
51 | Example:
52 | 
53 | ```
54 | https://www.xvideos.com/video35247781/
55 | https://www.xhamster.com/videos/busty-blonde-girl-get-fucked-with-nice-lingerie-14429903
56 | ```
57 | 
58 | #### Cookies
59 | 
60 | All cookie text files must have `# Netscape HTTP Cookie File` on its first line. If that line is not found, the file will not be recognized as a cookie file and ignored.
61 | 


--------------------------------------------------------------------------------
/p_im_dl_lt.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import p_pl_dl_common as dl_common
 4 | 
 5 | sExtractor = 'lewdthots'
 6 | 
 7 | sTestUrl = r"https://lewdthots.com/meg-turney-lord-raiden-topless-onlyfans-set-leaked/"
 8 | 
 9 | 
10 | def run(sUrl, *args, **kwargs):
11 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
12 | 
13 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders)
14 | 
15 |     soup = dl_common.BeautifulSoup(html.text, 'html.parser')
16 |     eGallery = soup.find(attrs={"class": 'mace-gallery-teaser'})        # Get gallery element into soup
17 |     lGallery = eval(eGallery.attrs['data-g1-gallery'])                  # Should eval to a list of dicts
18 | 
19 |     lImageUrls = []
20 |     for dImage in lGallery:
21 |         sImageUrl = dImage['full']
22 |         sImageUrl = sImageUrl.replace("\\", "")
23 |         lImageUrls += [sImageUrl]
24 |     print(f"Found {len(lImageUrls)} images")
25 | 
26 |     sArchive = rf".\\sites\\{sExtractor}\\dl_hist_{sExtractor}.txt"
27 | 
28 |     # Parse out album name, then check whether this album has already been downloaded
29 |     sAlbumName = sUrl.split("/")[-2] if sUrl[-1] == '/' else sUrl.split("/")[-1]
30 |     sAlbumName.replace("-", "_")
31 | 
32 |     bRun = True
33 |     try:
34 |         with open(sArchive) as archive:
35 |             if sAlbumName in archive.read():
36 |                 print(f"Archive already has an entry for {sAlbumName}")
37 |                 print("Skipping...")
38 |                 bRun = False
39 |     except:
40 |         pass
41 | 
42 |     if bRun:
43 |         # Create subdirectory for the album - there has to be a better (more Pythonic) way...
44 |         lPathComponents = ['sites', sExtractor, sAlbumName]
45 |         sPath = ''
46 |         for idx, sPathComponent in enumerate(lPathComponents):
47 |             sPath += sPathComponent
48 |             try:
49 |                 os.mkdir(sPath)
50 |             except Exception:
51 |                 pass
52 |             sPath += '/'
53 | 
54 |         nImageNum = 1
55 |         for sImageUrl in lImageUrls:
56 |             sImageName = sImageUrl.split('/')[-1]
57 |             print(f"Processing image {nImageNum:>03} : {sImageName}")
58 |             nFileName = f"{nImageNum:>03}_{sImageName}"
59 |             with open(os.path.join('sites', sExtractor, sAlbumName, nFileName), 'wb') as handler:
60 |                 response = dl_common.requests.get(sImageUrl, stream=True)
61 |                 handler.write(response.content)
62 |             nImageNum += 1
63 | 
64 |         with open(sArchive, 'a') as archive:
65 |             archive.write(sAlbumName + "\r\n")
66 | 
67 | 


--------------------------------------------------------------------------------
/p_pl_dl_common.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import re
  3 | import requests
  4 | import yt_dlp.utils as ytdl_utils
  5 | from bs4 import BeautifulSoup
  6 | from time import sleep
  7 | from requests.adapters import HTTPAdapter
  8 | from requests.packages.urllib3.util.ssl_ import create_urllib3_context
  9 | 
 10 | CIPHERS = 'DEFAULT:@SECLEVEL=2'
 11 | 
 12 | dYdlOptions = {'continuedl'         : True,
 13 |                'nooverwrites'       : True,
 14 |                'ignoreerrors'       : True,
 15 |                'restrictfilenames'  : True,
 16 |                'writeinfojson'      : True,
 17 |                'writeannotations'   : True,
 18 |                'nopostoverwrites'   : True,
 19 |                'download_archive'   : 'dl_hist_{}.txt',
 20 |                'outtmpl'            : None,
 21 |                'retries'            : 3,
 22 |                'fragment_retries'   : 3
 23 |                }
 24 | 
 25 | dHeaders = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:90.0) Gecko/20100101 Firefox/90.0"}
 26 | 
 27 | session = requests.Session()
 28 | dCookiesParsed = {}
 29 | 
 30 | 
 31 | def randomizeHeader():
 32 |     dHeaders['User-Agent'] = randomizeUserAgent()
 33 | 
 34 | 
 35 | def randomizeUserAgent():
 36 |     return ytdl_utils.random_user_agent()
 37 | 
 38 | 
 39 | def parseCookieFile(sCookiesTxt):
 40 |     """
 41 |     Parse a cookies text file and return a dictionary of key-value pairs
 42 |     compatible with requests.
 43 |     """
 44 |     dCookies = {}
 45 |     with open(sCookiesTxt, 'r') as fp:
 46 |         for line in fp:
 47 |             # Need to keep "HTML Only" items for xhamster cookies
 48 |             if "#HttpOnly_xhamster.com" in line:
 49 |                 line = line.replace("#HttpOnly_xhamster.com", ".xhamster.com")
 50 |             elif '#HttpOnly_.xhamster.com' in line:
 51 |                 line = line.replace("#HttpOnly_.xhamster.com", ".xhamster.com")
 52 |             elif '#' in line or 'href' in line or len(line) == 1:
 53 |                 continue
 54 | 
 55 |             if not re.match(r'^\#', line):
 56 |                 lineFields = line.strip().split('\t')
 57 |                 dCookies[lineFields[5]] = lineFields[6]
 58 | 
 59 |     global dCookiesParsed
 60 |     dCookiesParsed.update(dCookies)
 61 | 
 62 | 
 63 | def parseCookies(sDirectory):
 64 |     """
 65 |     Scans a directory for cookie text files.
 66 | 
 67 |     The cookie file must begin with:
 68 | 
 69 |         # Netscape HTTP Cookie File
 70 | 
 71 |     If that header line is not seen, the text file will be ignored.
 72 |     """
 73 |     sRe = '# Netscape HTTP Cookie File'
 74 |     lTextFiles = glob.glob(rf"{sDirectory}\*.txt")
 75 | 
 76 |     for sTxt in lTextFiles:
 77 |         with open(sTxt, 'r') as fp:
 78 |             sFirstLine = fp.readline().rstrip()
 79 |             if sFirstLine == sRe:
 80 |                 print(f"Parsing {sTxt} for cookies...")
 81 |                 parseCookieFile(sTxt)
 82 |             else:
 83 |                 print(f"Skipping {sTxt}...")
 84 |         sleep(0.250)
 85 |     sleep(1)
 86 | 
 87 | 
 88 | def cookieHeaderStringGet(dCookies=None):
 89 |     if dCookies is None:
 90 |         dCookies = dCookiesParsed
 91 | 
 92 |     cookieString = ''
 93 |     for key, value in dCookies.items():
 94 |         if cookieString != '':
 95 |             cookieString += '; '
 96 |         cookieString += f"{key}={value}"
 97 | 
 98 |     return cookieString
 99 | 
100 | 
101 | def addCipher(sPrefix):
102 |     session.mount(sPrefix, CipherAdapter())
103 | 
104 | 
105 | def runYtdl():
106 |     pass
107 | 
108 | 
109 | class Page:
110 | 
111 |     def __init__(self, url, headers=None):
112 |         if headers is None:
113 |             headers = dHeaders
114 | 
115 |         self.url = url
116 |         self.content = session.get(url, headers=headers, cookies=dCookiesParsed)
117 |         self.soup = BeautifulSoup(self.content.text, 'html.parser')
118 |         self.videos = []
119 | 
120 | 
121 |     def _extract_video_urls(self, sFilter=None):
122 |         """
123 |         Extract video URLs from a single playlist page.
124 |         """
125 |         for a in self.soup.find_all('a', href=True):
126 |             href = a['href']
127 |             if 'http' not in href:
128 |                 continue
129 |             if sFilter is not None and sFilter not in href:
130 |                 continue
131 |             if href not in self.videos:
132 |                 self.videos.append(a['href'])
133 | 
134 | 
135 |     def _html_to_text(self, sFileName=None):
136 |         if sFileName is None:
137 |             sFileName = "html_content.txt"
138 |         text_file = open(sFileName, "w", encoding='utf-8')
139 |         text_file.write(self.content.text)
140 |         text_file.close()
141 | 
142 | 
143 | class CipherAdapter(HTTPAdapter):
144 |     # Sourced from https://stackoverflow.com/questions/64967706/python-requests-https-code-403-without-but-code-200-when-using-burpsuite
145 | 
146 |     def init_poolmanager(self, *args, **kwargs):
147 |         context = create_urllib3_context(ciphers=CIPHERS)
148 |         kwargs['ssl_context'] = context
149 |         return super(CipherAdapter, self).init_poolmanager(*args, **kwargs)
150 | 
151 |     def proxy_manager_for(self, *args, **kwargs):
152 |         context = create_urllib3_context(ciphers=CIPHERS)
153 |         kwargs['ssl_context'] = context
154 |         return super(CipherAdapter, self).proxy_manager_for(*args, **kwargs)
155 | 


--------------------------------------------------------------------------------
/p_pl_dl_main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import json
  4 | import traceback
  5 | from time import sleep
  6 | 
  7 | import p_pl_dl_common as dl_common
  8 | import p_pl_dl_ph as dl_ph
  9 | import p_pl_dl_pt as dl_pt
 10 | import p_pl_dl_pornve as dl_pornve
 11 | import p_pl_dl_sb as dl_sb
 12 | import p_pl_dl_xh as dl_xh
 13 | import p_pl_dl_xv as dl_xv
 14 | import p_im_dl_lt as dl_lt
 15 | 
 16 | 
 17 | def main(argv):
 18 |     print()
 19 | 
 20 |     if argv.dest is not None:
 21 |         os.chdir(argv.dest)
 22 |     print(f"Working download directory: {os.getcwd()}")
 23 |     sleep(2)
 24 | 
 25 |     print()
 26 |     sSourceCookies = argv.cookies
 27 |     if sSourceCookies is not None:
 28 |         print(f"Cookies source: {sSourceCookies}")
 29 |         if ".txt'" in sSourceCookies:
 30 |             dl_common.parseCookieFile(sSourceCookies)
 31 |         else:
 32 |             dl_common.parseCookies(sSourceCookies)
 33 |     else:
 34 |         print(f"No cookies provided!")
 35 |     sleep(0.5)
 36 | 
 37 |     print()
 38 |     sSourceUrls = argv.input
 39 |     print(f"Using the following input source: {sSourceUrls}")
 40 |     print()
 41 |     sleep(0.5)
 42 | 
 43 |     dSites = {'lewdthots'   : False,
 44 |               'pornhub'     : False,
 45 |               'porntrex'    : False,
 46 |               'pornve'      : False,
 47 |               'spankbang'   : False,
 48 |               'xhamster'    : False,
 49 |               'xvideos'     : False,
 50 |               'youporn'     : False,
 51 |               }
 52 | 
 53 |     dExtractors = {'lewdthots'  : dl_lt,
 54 |                    'pornhub'    : dl_ph,
 55 |                    'porntrex'   : dl_pt,
 56 |                    'pornve'     : dl_pornve,
 57 |                    'spankbang'  : dl_sb,
 58 |                    'xhamster'   : dl_xh,
 59 |                    'xvideos'    : dl_xv,
 60 |                    }
 61 | 
 62 |     nVideoLimit = int(argv.limit) if argv.limit is not None else None
 63 |     print(f"Video limit per URL = {nVideoLimit}")
 64 | 
 65 |     # Get each URL into a dict
 66 |     dUrlDefs = {}
 67 |     with open(sSourceUrls) as fSourceUrls:
 68 |         sLines = fSourceUrls.readlines()
 69 |         for sLine in sLines:
 70 |             sUrl = sLine.strip()
 71 |             print(f"URL: {sUrl}")
 72 |             for sSite in dSites.keys():
 73 |                 if sSite in sLine:
 74 |                     dSites[sSite] = True
 75 |                     dUrlDefs[sUrl] = sSite
 76 |     print()
 77 |     print("Detected websites:")
 78 |     print(json.dumps(dSites, indent=4))
 79 |     print()
 80 |     sleep(2)
 81 | 
 82 |     if argv.only is not None:
 83 |         argv.only = argv.only.lower()
 84 |         if argv.only in dSites.keys():
 85 |             for key, value in dSites.items():
 86 |                 if argv.only == key:
 87 |                     dSites[key] = True
 88 |                 else:
 89 |                     dSites[key] = False
 90 | 
 91 |     for sUrl, sSite in dUrlDefs.items():
 92 |         if sSite in dExtractors.keys() and dSites[sSite]:
 93 |             try:
 94 |                 dExtractors[sSite].run(sUrl, sCookieSource=None, nVideoLimit=nVideoLimit)       # Cookies should already be parsed and available when going through main
 95 |             except:
 96 |                 print("\r\n\r\n")
 97 |                 traceback.print_exc()
 98 |                 print("\r\n\r\n")
 99 |                 continue
100 |         else:
101 |             print(f"No extractor available for {sSite} - {sUrl}")
102 |             sleep(0.5)
103 |         print()
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     argparser = argparse.ArgumentParser()
108 |     argparser.add_argument('-i', '--input',     help='Input TXT file with URLs to process', required=True)
109 |     argparser.add_argument('-c', '--cookies',   help='Input TXT file with cookies')
110 |     argparser.add_argument('-d', '--dest',      help='Download destination path')
111 |     argparser.add_argument('-o', '--only',      help='Only run a specific site')
112 |     argparser.add_argument('-l', '--limit',     help='Limit the number of videos')
113 |     args = argparser.parse_args()
114 |     main(args)
115 | 


--------------------------------------------------------------------------------
/p_pl_dl_ph.py:
--------------------------------------------------------------------------------
  1 | from time import sleep
  2 | from time import time
  3 | import yt_dlp as youtube_dl
  4 | 
  5 | import p_pl_dl_common as dl_common
  6 | 
  7 | sExtractor = 'pornhub'
  8 | 
  9 | 
 10 | def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
 11 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
 12 | 
 13 |     if sCookieSource is not None:
 14 |         dl_common.parseCookieFile(sCookieSource)
 15 | 
 16 |     if dl_common.dCookiesParsed is None:
 17 |         print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
 18 | 
 19 |     # Attempt initial connection
 20 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
 21 |     print(f"Initial connection status: {html.status_code}")
 22 |     if html.status_code == 403:
 23 |         raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 24 |     elif html.status_code != 200:
 25 |         raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
 26 |     print()
 27 | 
 28 |     if bDebug:
 29 |         # Save HTML content to a text file for debug
 30 |         text_file = open("html_content.txt", "w", encoding='utf-8')
 31 |         text_file.write(html.text)
 32 |         text_file.close()
 33 | 
 34 |     page = Page_Pornhub(sUrl)
 35 | 
 36 |     dYdlOptions = dict(dl_common.dYdlOptions)
 37 |     dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
 38 | 
 39 |     # Set options helpful for pornhub
 40 |     # dYdlOptions['retries']                      = 10
 41 |     # dYdlOptions['fragment_retries']             = 10
 42 |     # dYdlOptions['keep_fragments']               = True
 43 |     # dYdlOptions['skip_unavailable_fragments']   = False
 44 |     # dYdlOptions['external_downloader_args']     = ["-m3u8_hold_counters", "3", "-max_reload", "3"]
 45 | 
 46 |     lFailedUrls = []
 47 | 
 48 |     def ytdlLoop(lUrls, bLogFailures):
 49 |         nonlocal lFailedUrls
 50 | 
 51 |         for nIdx, sVideoUrl in enumerate(lUrls):
 52 |             print(f"Processing video {nIdx + 1} of {len(lUrls)} :: {sVideoUrl}")
 53 |             print()
 54 | 
 55 |             sVideoId = sVideoUrl.split('view_video.php?viewkey=')[-1]
 56 |             dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{sVideoId}_%(title).125s.mp4'
 57 | 
 58 |             nStart = time()
 59 |             try:
 60 |                 with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
 61 |                     ydl.download([sVideoUrl])
 62 |             except:
 63 |                 if bLogFailures:
 64 |                     print(f"\r\nEncountered some error for URL = {sVideoUrl}")
 65 |                     print(f"Adding it to the retry list...")
 66 |                     lFailedUrls += [sVideoUrl]
 67 |                 continue
 68 |             nStop = time()
 69 |             print(f"\r\nElapsed time for URL = {sVideoUrl}: {round((nStop - nStart) / 60, 2)} minutes\r\n")
 70 | 
 71 |             if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
 72 |                 print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
 73 |                 break
 74 |         print()
 75 | 
 76 |     ytdlLoop(page.videos, bLogFailures=True)
 77 | 
 78 |     if lFailedUrls:
 79 |         print("Retrying URLs that failed...")
 80 |         for sUrl in lFailedUrls:
 81 |             print(sUrl)
 82 |         ytdlLoop(lFailedUrls, bLogFailures=False)
 83 | 
 84 | 
 85 | class Page_Pornhub(dl_common.Page):
 86 | 
 87 |     def __init__(self, url):
 88 |         super().__init__(url)
 89 | 
 90 |         nPageStatus = self.content.status_code
 91 |         if nPageStatus != 200:
 92 |             if nPageStatus == 403:
 93 |                 raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 94 | 
 95 |         self.sUrlType = self._get_url_type()
 96 |         self._playlistId = self.url.split('.com/')[1].split('/')[0] if self.sUrlType == 'playlist' else None
 97 | 
 98 |         if self.sUrlType == 'video':
 99 |             self.videos.append(self.url)
100 |             self._nVideos = 1
101 |         elif self.sUrlType == 'playlist':
102 |             print("Playlist detected. Getting videos...")
103 |             self._sUrlBaseFormat = self.urlStandardize(self.url)
104 |             self._extract_video_urls()
105 |             self._nVideos = len(self.videos)
106 |             print(f"Found {self._nVideos} video URLs in the playlist")
107 | 
108 | 
109 |     def _get_url_type(self):
110 |         # Video URLs are in the form of /.../view_video.php?viewkey=ph602a75a6151e9
111 |         # Favorites are in the form of /.../videos/favorites?page=2
112 |         # Playlists take the form https://www.pornhub.com/playlist/123465789 ... not sure how to handle pages for these yet
113 |         if '/view_video.php' in self.url:
114 |             sUrlType = 'video'
115 |         elif 'videos/favorites' in self.url:
116 |             sUrlType = 'playlist'
117 |         elif '/playlist/' in self.url:
118 |             raise ValueError("Regular pornhub playlists are unsupported...")
119 |         else:
120 |             raise ValueError(f"Unable to determine {sExtractor} URL type for {self.url}! Please submit a bug report!")
121 |         return sUrlType
122 | 
123 | 
124 |     def _extract_video_urls(self, sFilter=None):
125 |         """
126 |         Extract video URLs from all playlist pages.
127 |         """
128 |         lUrlVideos = []
129 |         nPage = 0
130 |         while True:
131 |             nPage += 1
132 | 
133 |             lPageVideos = self._extract_page_urls(nPage)
134 |             if lPageVideos:
135 |                 lUrlVideos += lPageVideos
136 |                 print(f"Found {len(lPageVideos)} videos on page {nPage:02}...")
137 |             else:
138 |                 print(f"No videos found on page {nPage}. Stopping...")
139 |                 break
140 |         self.videos += lUrlVideos
141 | 
142 | 
143 |     def _extract_page_urls(self, nPage, sFilter=None):
144 |         """
145 |         Extract video URLs from a single page of the playlist.
146 |         """
147 |         sUrlBase = "https://www.pornhub.com{}"
148 | 
149 |         for nAttempts in range(3):
150 |             sUrlPage = self._sUrlBaseFormat.format(nPage)
151 |             content = dl_common.session.get(sUrlPage, cookies=dl_common.dCookiesParsed)
152 |             if "503 Service Temporarily Unavailable" in content.text:
153 |                 sleep(3)
154 |                 continue
155 |             soup = dl_common.BeautifulSoup(content.text, 'html.parser')
156 |             break
157 | 
158 |         lVideos = []
159 |         lTags = soup.find_all(attrs={"class": 'pcVideoListItem js-pop videoblock videoBox'})
160 |         for tag in lTags:
161 |             if 'id' in tag.attrs.keys() and 'vfavouriteVideo' in tag.attrs['id']:
162 |                 for a in tag.find_all('a', href=True):
163 |                     href = a['href']
164 |                     if 'view_video.php?' not in href:
165 |                         continue
166 |                     if '&pkey=' in href:
167 |                         continue
168 |                     if href not in self.videos:
169 |                         sUrlFull = sUrlBase.format(href)
170 |                         if sUrlFull not in lVideos:
171 |                             lVideos.append(sUrlFull)
172 |         return lVideos
173 | 
174 | 
175 |     def urlStandardize(self, sUrl):
176 |         """
177 |         Make sure URL ends with '/' and tack on f-string brackets for iterating through pages.
178 |         """
179 |         if sUrl.endswith('favorites'):
180 |             sUrl += '?page={}'
181 |         return sUrl
182 | 


--------------------------------------------------------------------------------
/p_pl_dl_pornve.py:
--------------------------------------------------------------------------------
  1 | from time import sleep
  2 | import jsbeautifier
  3 | import random
  4 | import re
  5 | import yt_dlp as youtube_dl
  6 | 
  7 | import p_pl_dl_common as dl_common
  8 | 
  9 | DEBUG = False
 10 | 
 11 | sExtractor  = 'pornve'
 12 | sArchive    = rf".\\sites\\{sExtractor}\\dl_hist_{sExtractor}.txt"
 13 | 
 14 | 
 15 | def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
 16 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
 17 | 
 18 |     if sCookieSource is not None:
 19 |         dl_common.parseCookieFile(sCookieSource)
 20 | 
 21 |     if dl_common.dCookiesParsed is None:
 22 |         print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
 23 | 
 24 |     # Attempt initial connection
 25 |     dl_common.randomizeHeader()
 26 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
 27 |     print(f"Initial connection status: {html.status_code}")
 28 |     if html.status_code == 403:
 29 |         raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 30 |     elif html.status_code != 200:
 31 |         raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
 32 |     print()
 33 |     sleepRandom(1, 3)
 34 | 
 35 |     if bDebug:
 36 |         # Save HTML content to a text file for debug
 37 |         text_file = open("html_content.txt", "w", encoding='utf-8')
 38 |         text_file.write(html.text)
 39 |         text_file.close()
 40 | 
 41 |     page = Page_Pornve(sUrl)
 42 |     sleepRandom(3, 5)
 43 | 
 44 |     dYdlOptions = dict(dl_common.dYdlOptions)
 45 |     dYdlOptions['download_archive'] = None
 46 | 
 47 |     for nIdx, sVideoUrl in enumerate(page.videos):
 48 |         if page.sUrlType == 'playlist':
 49 |             print(f"Processing playlist video {nIdx + 1} of {page._nVideos} :: {sVideoUrl}")
 50 |             print()
 51 | 
 52 |         # Get the actual video stream info for a video link from a playlist
 53 |         if page.sUrlType == 'playlist':
 54 |             pageVideo = Page_Pornve(sVideoUrl)
 55 |             sVideoName = pageVideo._sVideoName
 56 |             sVideoStreamUrl = pageVideo.videos[0]
 57 |             sPageUrl = pageVideo.url
 58 |         else:
 59 |             sVideoName = page._sVideoName
 60 |             sVideoStreamUrl = page.videos[0]
 61 |             sPageUrl = page.url
 62 | 
 63 |         bRun = True
 64 |         try:
 65 |             with open(sArchive) as archive:
 66 |                 if sPageUrl in archive.read():
 67 |                     print(f"Archive already has an entry for {sPageUrl}")
 68 |                     print("Skipping...")
 69 |                     bRun = False
 70 |         except:
 71 |             pass
 72 | 
 73 |         if bRun:
 74 |             dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{sVideoName}.%(ext)s'
 75 | 
 76 |             with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
 77 |                 ydl.cache.remove()
 78 |                 ret = ydl.download([sVideoStreamUrl])
 79 | 
 80 |             # Need to do our own archiving since YTDL will treat everything with the name "index-v1-a1" because
 81 |             # of how the video is extracted in _extract_video_stream
 82 |             # YTDL ret 0 is good, 1 is bad
 83 |             if not ret:
 84 |                 with open(sArchive, 'a') as archive:
 85 |                     archive.write(sPageUrl + "\r\n")
 86 | 
 87 |         if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
 88 |             print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
 89 |             break
 90 |         print()
 91 |         sleepRandom(3, 5)
 92 | 
 93 | 
 94 | class Page_Pornve(dl_common.Page):
 95 | 
 96 |     def __init__(self, url):
 97 |         super().__init__(url)
 98 | 
 99 |         nPageStatus = self.content.status_code
100 |         if nPageStatus != 200:
101 |             if nPageStatus == 403:
102 |                 raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
103 | 
104 |         self.sUrlType = self._get_url_type()
105 | 
106 |         if self.sUrlType == 'video':
107 |             sVideoStreamUrl = self._extract_video_stream()
108 |             self.videos.append(sVideoStreamUrl)
109 | 
110 |             sVideoNameComponents = self.url.split('.html')[0].split('/')[-2:]
111 |             self._sVideoName = '_'.join(reversed(sVideoNameComponents))
112 | 
113 |             self._nVideos = 1
114 |         elif self.sUrlType == 'playlist':
115 |             print("Playlist detected. Getting videos...")
116 | 
117 |             lUrlComponents = self.url.split('/')
118 |             self._playlistId = lUrlComponents[-2] if not lUrlComponents[-1] else lUrlComponents[-1]
119 | 
120 |             self._extract_video_urls()
121 |             self._nVideos = len(self.videos)
122 |             print(f"Found {self._nVideos} video URLs in the playlist\r\n")
123 | 
124 | 
125 |     def _get_url_type(self):
126 |         if '/playlist/' in self.url:
127 |             sUrlType = 'playlist'
128 |         else:
129 |             sUrlType = 'video'
130 |         return sUrlType
131 | 
132 | 
133 |     def _extract_video_urls(self, sFilter=None):
134 |         """
135 |         Extract video URLs from all playlist pages.
136 |         """
137 |         self._sUrlBaseFormat = f"https://pornve.com/?hide_search=1&op=search&playlist={self._playlistId}&sort_field=file_created&sort_order=down&page={{}}"
138 | 
139 |         lUrlVideos = []
140 |         nPage = 0
141 |         while True:
142 |             nPage += 1
143 | 
144 |             lPageVideos = self._extract_page_urls(nPage)
145 |             if lPageVideos:
146 |                 lUrlVideos += lPageVideos
147 |                 print(f"Found {len(lPageVideos)} videos on page {nPage:02}...")
148 |             else:
149 |                 print(f"No videos found on page {nPage}. Stopping...")
150 |                 break
151 |         self.videos += lUrlVideos
152 | 
153 | 
154 |     def _extract_page_urls(self, nPage, sFilter=None):
155 |         """
156 |         Extract video URLs from a single page of the playlist.
157 |         """
158 |         dl_common.randomizeHeader()
159 |         for nAttempts in range(3):
160 |             sUrlPage = self._sUrlBaseFormat.format(nPage)
161 |             content = dl_common.session.get(sUrlPage, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
162 |             if "503 Service Temporarily Unavailable" in content.text:
163 |                 if DEBUG:
164 |                     print("503 encountered! Sleeping...")
165 |                 sleepRandom()
166 |                 continue
167 |             soup = dl_common.BeautifulSoup(content.text, 'html.parser')
168 |             sleepRandom(1, 3)
169 |             break
170 | 
171 |         lVideos = []
172 |         lProcessed = []
173 |         for a in soup.find_all('a', href=True):
174 |             href = a['href']
175 |             if href in lProcessed:
176 |                 continue
177 |             if f'?list={self._playlistId}' not in href:
178 |                 continue
179 |             if sFilter is not None and sFilter not in href:
180 |                 continue
181 |             if href not in self.videos:
182 |                 sCleanedUrl = self._clean_video_url(href)
183 |                 if sCleanedUrl is not None and sCleanedUrl not in lVideos:
184 |                     lVideos.append(sCleanedUrl)
185 |                 lProcessed += [href]
186 |         return lVideos
187 | 
188 | 
189 |     def _extract_video_stream(self):
190 |         for nAttempts in range(3):
191 |             content = dl_common.session.get(self.url, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
192 |             if "503 Service Temporarily Unavailable" in content.text:
193 |                 if DEBUG:
194 |                     print("503 encountered! Sleeping...")
195 |                 sleepRandom()
196 |                 continue
197 |             sleepRandom(1, 3)
198 |             break
199 | 
200 |         sPackedCode = self._js_find_packed_code(content.text)
201 |         sVideoStreamUrl = self._js_unpack_and_get_stream(sPackedCode)
202 | 
203 |         return sVideoStreamUrl
204 | 
205 | 
206 |     def _clean_video_url(self, sUrlMasked, nAttempts=3):
207 |         """
208 |         Unmask playlist videos.
209 |         """
210 |         return sUrlMasked.split("?list=")[0]
211 | 
212 | 
213 |     def _js_find_packed_code(self, htmlContent):
214 |         lHtmlLines = htmlContent.split("\r\n")
215 |         sPackedCode = None
216 |         for row in lHtmlLines:
217 |             if r"""eval(function(p,a,c,k,e,d)""" in row:
218 |                 sPackedCode = row
219 |         if sPackedCode is None:
220 |             raise ValueError("Did not find any packed JS code...")
221 | 
222 |         nIdxStart = len(sPackedCode) - len(sPackedCode.lstrip())
223 |         sPackedCode = sPackedCode[nIdxStart:]
224 | 
225 |         if sPackedCode[-1:] == '\n':
226 |             sPackedCode = sPackedCode[:-1]
227 | 
228 |         return sPackedCode
229 | 
230 | 
231 |     def _js_unpack_and_get_stream(self, packedData):
232 |         """
233 |         Pass in obfuscated "eval(function(p,a,c,k,e,d)..." string
234 |         """
235 |         url = None
236 |         unpacked_data = jsbeautifier.beautify(packedData).split('"')
237 |         for sData in unpacked_data:
238 |             if ".m3u8" in sData:
239 |                 url = sData
240 |         if url is None:
241 |             raise ValueError("Could not find a video stream URL!")
242 | 
243 |         # unpacked_data_split = unpacked_data.split('><source src=')
244 |         # url = unpacked_data_split[1].split(""" type="application/x-mpegURL">""")[0].replace('"', "")
245 | 
246 |         return url
247 | 
248 | 
249 | def urlStandardize(sUrl):
250 |     """
251 |     Make sure URL ends with '/' and tack on f-string brackets for iterating through pages.
252 |     """
253 |     if sUrl[-1] != '/':
254 |         sUrl += '/'
255 |     sUrl += '{}'
256 |     return sUrl
257 | 
258 | 
259 | def sleepRandom(nMin=5, nMax=10, bSim=False):
260 |     """
261 |     Sleep for some random interval to help avoid tripping Cloudflare's anti-bot protection.
262 |     """
263 |     nSleep = round(random.uniform(min(nMin, nMax), max(nMin, nMax)), 2)
264 |     if DEBUG or bSim:
265 |         print(nSleep)
266 |     if not bSim:
267 |         sleep(nSleep)
268 | 


--------------------------------------------------------------------------------
/p_pl_dl_pt.py:
--------------------------------------------------------------------------------
  1 | from time import time
  2 | import yt_dlp as youtube_dl
  3 | 
  4 | import p_pl_dl_common as dl_common
  5 | 
  6 | sExtractor = 'porntrex'
  7 | 
  8 | 
  9 | """
 10 | For PornTrex, Youtube-DL does not seem able to consistently pick the highest
 11 | quality source. To workaround this, I pull the entire video page and pick the 
 12 | largest file available from its download options, and pass that URL to Youtube-DL.
 13 | 
 14 | PornTrex also uses AJAX, which is a bit awkward to deal with for me.
 15 | """
 16 | 
 17 | 
 18 | def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
 19 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
 20 | 
 21 |     nTimeStart = time()
 22 | 
 23 |     if sCookieSource is not None:
 24 |         dl_common.parseCookieFile(sCookieSource)
 25 | 
 26 |     if dl_common.dCookiesParsed is None:
 27 |         print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
 28 | 
 29 |     if 'porntrex.com/video' in sUrl:
 30 |         sUrlType = 'video'
 31 |     elif 'porntrex.com/my' in sUrl:
 32 |         sUrlType = 'playlist'
 33 |     elif 'porntrex.com/search' in sUrl:
 34 |         sUrlType = 'playlist'               # Search results can be treated as a playlist
 35 |     else:
 36 |         raise ValueError(f"Unable to determine {sExtractor} URL type for {sUrl}! Please submit a bug report!")
 37 | 
 38 |     # Attempt initial connection
 39 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
 40 |     print(f"Initial connection status: {html.status_code}")
 41 |     if html.status_code == 403:
 42 |         raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 43 |     elif html.status_code != 200:
 44 |         raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
 45 |     print()
 46 | 
 47 |     if bDebug:
 48 |         # Save HTML content to a text file for debug
 49 |         text_file = open("html_content.txt", "w", encoding='utf-8')
 50 |         text_file.write(html.text)
 51 |         text_file.close()
 52 | 
 53 |     lUrlVideos = []
 54 |     if sUrlType == 'playlist':
 55 |         print("Playlist detected. Getting videos...")
 56 |         sUrlBaseFormat = urlBaseFormatGet(sUrl)
 57 |         nPage = 0
 58 | 
 59 |         while True:
 60 |             nPage += 1
 61 |             print(f"Attempting page {nPage:02}")
 62 |             if 'search' in sUrl:
 63 |                 if nPage == 1:
 64 |                     sUrlPage = sUrlBaseFormat.format('')
 65 |                 else:
 66 |                     sUrlPage = sUrlBaseFormat.format(f'{nPage}/')
 67 |             else:
 68 |                 sUrlPage = sUrlBaseFormat.format(f'{nPage:02}')
 69 |             page = dl_common.Page(sUrlPage)
 70 |             nPageStatus = page.content.status_code
 71 |             if nPageStatus != 200:
 72 |                 if nPageStatus == 403:
 73 |                     raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 74 |                 elif nPageStatus == 404:
 75 |                     print(f"Page {nPage} returned 404!")
 76 |                     print(f"Assuming page {nPage - 1} was the last page of the playlist")
 77 |                     break
 78 |             page._extract_video_urls()
 79 |             if page.videos:
 80 |                 lUrlVideos += page.videos
 81 |             else:
 82 |                 break
 83 | 
 84 |         # Remove non-video URLs that may have been picked up
 85 |         lTemp = []
 86 |         for sUrl in lUrlVideos:
 87 |             if sUrl == 'https://www.porntrex.com/my/favourites/videos/':
 88 |                 continue
 89 |             if 'video' in sUrl:
 90 |                 lTemp += [sUrl]
 91 |         lUrlVideos = lTemp
 92 | 
 93 |         nNumVideos = len(lUrlVideos)
 94 |         print(f"Found {nNumVideos} video URLs in the playlist")
 95 |         if bDebug:
 96 |             for sUrl in lUrlVideos:
 97 |                 print(sUrl)
 98 | 
 99 |     elif sUrlType == 'video':
100 |         lUrlVideos = [sUrl]
101 | 
102 |     dYdlOptions = dict(dl_common.dYdlOptions)
103 |     dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
104 | 
105 |     for nIdx, sVideoUrl in enumerate(lUrlVideos):
106 |         if sUrlType == 'playlist':
107 |             print(f"Processing video {nIdx + 1} of {nNumVideos}...")
108 |             print()
109 | 
110 |         if bDebug:
111 |             print(f"Processing {sVideoUrl}")
112 |         video = Video(sVideoUrl)
113 |         dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{video.sFullName}'
114 | 
115 |         with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
116 |             ydl.download([video.downloadUrl])
117 | 
118 |         if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
119 |             print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
120 |             break
121 |         print()
122 | 
123 |     nTimeEnd = time()
124 |     print(f"Run time: {round((nTimeEnd - nTimeStart) / 60, 2)} minutes")
125 | 
126 | 
127 | def urlBaseFormatGet(sUrl):
128 |     """
129 |     Create the base f-string URL that be used to iteratively go through pages.
130 | 
131 |     Playlists and favorites use an AJAX format. They do not have simple page numbers.
132 |     Search results use simple page numbers.
133 |     """
134 |     sUrlBase = None
135 |     if 'playlists' in sUrl:
136 |         print("Using 'playlists' format...")
137 |         nType = 10
138 |         nPlaylistId = sUrl.split('/')[-2]
139 |         sUrlBase = f'https://www.porntrex.com/my/playlists/{nPlaylistId}/?mode=async&function=get_block&block_id=list_videos_my_favourite_videos&fav_type={nType}&playlist_id={nPlaylistId}&sort_by=&from_my_fav_videos={{}}'
140 |     elif 'favourites' in sUrl:
141 |         print("Using 'favourites' format...")
142 |         nType = 0
143 |         nPlaylistId = 0
144 |         sUrlBase = f'https://www.porntrex.com/my/favourites/videos/?mode=async&function=get_block&block_id=list_videos_my_favourite_videos&fav_type={nType}&playlist_id={nPlaylistId}&sort_by=&from_my_fav_videos={{}}'
145 |     elif 'search' in sUrl:
146 |         if not sUrl.endswith('/'):
147 |             sUrl += '/'
148 |         sUrl += '{}'
149 |         sUrlBase = sUrl
150 |     return sUrlBase
151 | 
152 | 
153 | class Video(dl_common.Page):
154 | 
155 |     def __init__(self, url):
156 |         super().__init__(url)
157 |         self.downloadUrl = None
158 | 
159 |         lUrlComponents = self.url.split('/')
160 |         if lUrlComponents[-1] == '':
161 |             lUrlComponents.pop(-1)
162 |         self._lUrlComponents = lUrlComponents
163 | 
164 |         self.sVideoId   = self._lUrlComponents[-2]
165 |         self.sVideoName = self._lUrlComponents[-1]
166 |         self.sFullName  = '_'.join([self.sVideoId, self.sVideoName]) + '.mp4'
167 | 
168 |         self._extract_video_urls(sFilter='get_file')
169 |         self._extract_video_largest()
170 | 
171 | 
172 |     def _extract_video_largest(self):
173 |         """
174 |         Get the file that has the largest file size, which should be the highest quality.
175 |         """
176 |         nIdxBiggest = 0
177 |         nBiggestSize = 0
178 |         for index in range(len(self.videos)):
179 |             nSize = int(dl_common.session.get(self.videos[index], cookies=dl_common.dCookiesParsed, stream=True).headers['Content-Length'])
180 |             if nSize > nBiggestSize:
181 |                 nIdxBiggest = index
182 |                 nBiggestSize = nSize
183 |         self.downloadUrl = self.videos[nIdxBiggest]
184 | 


--------------------------------------------------------------------------------
/p_pl_dl_sb.py:
--------------------------------------------------------------------------------
  1 | from time import sleep
  2 | from time import time
  3 | import yt_dlp as youtube_dl
  4 | import random
  5 | 
  6 | import p_pl_dl_common as dl_common
  7 | 
  8 | DEBUG = False
  9 | 
 10 | sExtractor = 'spankbang'
 11 | 
 12 | 
 13 | def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
 14 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
 15 | 
 16 |     if sCookieSource is not None:
 17 |         dl_common.parseCookieFile(sCookieSource)
 18 | 
 19 |     if dl_common.dCookiesParsed is None:
 20 |         print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
 21 | 
 22 |     # 20210619 :: Workaround for https://github.com/ppldl/p_pl_dl/issues/1
 23 |     # 20220710 :: Wrapping this in lazy try-except since I'm not sure this is needed anymore since I use ytdlp instead of ytdl
 24 |     try:
 25 |         dl_common.addCipher("https://spankbang.com")
 26 |     except:
 27 |         pass
 28 | 
 29 |     # Attempt initial connection
 30 |     dl_common.randomizeHeader()
 31 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
 32 |     print(f"Initial connection status: {html.status_code}")
 33 |     if html.status_code == 403:
 34 |         raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 35 |     elif html.status_code != 200:
 36 |         raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
 37 |     print()
 38 |     sleepRandom(1, 3)
 39 | 
 40 |     if bDebug:
 41 |         # Save HTML content to a text file for debug
 42 |         text_file = open("html_content.txt", "w", encoding='utf-8')
 43 |         text_file.write(html.text)
 44 |         text_file.close()
 45 | 
 46 |     page = Page_Spankbang(sUrl)
 47 |     sleepRandom(3, 5)
 48 | 
 49 |     dYdlOptions = dict(dl_common.dYdlOptions)
 50 |     dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
 51 |     # dYdlOptions['referer']          = 'https://spankbang.com'
 52 |     # dYdlOptions['user_agent']       = dl_common.dHeaders['User-Agent']        # Not needed - YTDL already has a UA randomizer
 53 | 
 54 |     # Store info on videos that have already been downloaded
 55 |     sArchive = rf".\\sites\\{sExtractor}\\dl_hist_{sExtractor}.txt"
 56 |     with open(sArchive) as file:
 57 |         lines = file.readlines()
 58 |         lVidHistory = [line.rstrip().split(' ')[1] for line in lines]
 59 |     print(lVidHistory)
 60 | 
 61 |     for nIdx, sVideoUrl in enumerate(page.videos):
 62 |         if page.sUrlType == 'playlist':
 63 |             print(f"Processing playlist video {nIdx + 1} of {page._nVideos} :: {sVideoUrl}")
 64 |             print()
 65 | 
 66 |         sVidId = sVideoUrl.split('/')[3]
 67 |         print(sVidId)
 68 |         if sVidId in lVidHistory:
 69 |             print(f"{sVidId} has already been downloaded. Moving on...")
 70 |             continue
 71 | 
 72 |         dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\%(title).125s.%(ext)s'
 73 |         with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
 74 |             ydl.cache.remove()
 75 |             ydl.download([sVideoUrl])
 76 | 
 77 |         if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
 78 |             print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
 79 |             break
 80 |         print()
 81 |         sleepRandom()
 82 | 
 83 | 
 84 | class Page_Spankbang(dl_common.Page):
 85 | 
 86 |     def __init__(self, url):
 87 |         super().__init__(url)
 88 | 
 89 |         nPageStatus = self.content.status_code
 90 |         if nPageStatus != 200:
 91 |             if nPageStatus == 403:
 92 |                 raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 93 | 
 94 |         self.sUrlType = self._get_url_type()
 95 |         self._playlistId = self.url.split('.com/')[1].split('/')[0] if self.sUrlType == 'playlist' else None
 96 |         self._sUrlBaseFormat = urlStandardize(self.url)
 97 | 
 98 |         if self.sUrlType == 'video':
 99 |             self.videos.append(self.url)
100 |             self._nVideos = 1
101 |         elif self.sUrlType == 'playlist':
102 |             print("Playlist detected. Getting videos...")
103 |             self._extract_video_urls()
104 |             self._nVideos = len(self.videos)
105 |             print(f"Found {self._nVideos} video URLs in the playlist")
106 | 
107 | 
108 |     def _get_url_type(self):
109 |         # Video URLs are in the form of spankbang.com/vwxyz/video/full-content-name
110 |         # Playlists are in the form of spankbang.com/ijklm/playlist/name-of-playlist
111 |         # Within a playlist, its videos are "masked" as spankbang.com/ijklm-abc123/playlist/name-of-playlist
112 |         if '/video/' in self.url:
113 |             sUrlType = 'video'
114 |         elif '/playlist/' in self.url:
115 |             if '-' in self.url:
116 |                 sUrlType = 'video_masked'
117 |             else:
118 |                 sUrlType = 'playlist'
119 |         else:
120 |             raise ValueError(f"Unable to determine {sExtractor} URL type for {self.url}! Please submit a bug report!")
121 |         return sUrlType
122 | 
123 | 
124 |     def _extract_video_urls(self, sFilter=None):
125 |         """
126 |         Extract video URLs from all playlist pages.
127 |         """
128 |         lUrlVideos = []
129 |         nPage = 0
130 |         timeStart = time()
131 |         while True:
132 |             nPage += 1
133 | 
134 |             lPageVideos = self._extract_page_urls(nPage)
135 |             if lPageVideos:
136 |                 lUrlVideos += lPageVideos
137 |                 print(f"Found {len(lPageVideos)} videos on page {nPage:02}...")
138 |             else:
139 |                 print(f"No videos found on page {nPage}. Stopping...")
140 |                 break
141 |         timeStop = time()
142 |         timeElapsed = round((timeStop - timeStart) / 60, 1)
143 |         print(f"Time elapsed: {timeElapsed} minutes")
144 |         self.videos += lUrlVideos
145 | 
146 | 
147 |     def _extract_page_urls(self, nPage, sFilter=None):
148 |         """
149 |         Extract video URLs from a single page of the playlist.
150 |         """
151 |         dl_common.randomizeHeader()
152 |         for nAttempts in range(3):
153 |             sUrlPage = self._sUrlBaseFormat.format(nPage)
154 |             content = dl_common.session.get(sUrlPage, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
155 |             if "503 Service Temporarily Unavailable" in content.text:
156 |                 if DEBUG:
157 |                     print("503 encountered! Sleeping...")
158 |                 sleepRandom()
159 |                 continue
160 |             soup = dl_common.BeautifulSoup(content.text, 'html.parser')
161 |             sleepRandom(3, 5)
162 |             break
163 | 
164 |         lVideos = []
165 |         lProcessed = []
166 |         for a in soup.find_all('a', href=True):
167 |             href = a['href']
168 |             if href in lProcessed:
169 |                 continue
170 |             if 'playlist' not in href:
171 |                 continue
172 |             if '/lang/' in href:
173 |                 continue
174 |             if f'{self._playlistId}-' not in href:
175 |                 continue
176 |             if sFilter is not None and sFilter not in href:
177 |                 continue
178 |             if href not in self.videos:
179 |                 sUnmaskedUrl = self._unmask_video_url(href)
180 |                 if sUnmaskedUrl is not None and sUnmaskedUrl not in lVideos:
181 |                     lVideos.append(sUnmaskedUrl)
182 |                 lProcessed += [href]
183 |         return lVideos
184 | 
185 | 
186 |     def _unmask_video_url(self, sUrlMasked, nAttempts=3):
187 |         """
188 |         Unmask playlist videos.
189 |         """
190 |         sUrlFull = rf"https://spankbang.com{sUrlMasked}"
191 |         if DEBUG:
192 |             print(sUrlFull)
193 | 
194 |         # Load up the page using the masked URL from the playlist, then search its content for the real URL
195 |         for nAttempt in range(nAttempts):
196 |             content = dl_common.session.get(sUrlFull, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
197 |             soup = dl_common.BeautifulSoup(content.text, 'html.parser')
198 | 
199 |             try:
200 |                 sCanonicalUrl = soup.find(attrs={'rel': 'canonical'}).attrs['href']
201 |             except:
202 |                 sCanonicalUrl = None
203 | 
204 |             if sCanonicalUrl is not None:
205 |                 break
206 |             else:
207 |                 sleepRandom(1, 5)
208 | 
209 |         if sCanonicalUrl is None:
210 |             print(f"Failed to unmask a URL for {sUrlMasked}")
211 |         # sleepRandom(1, 3)
212 |         return sCanonicalUrl
213 | 
214 | 
215 | def urlStandardize(sUrl):
216 |     """
217 |     Make sure URL ends with '/' and tack on f-string brackets for iterating through pages.
218 |     """
219 |     if sUrl[-1] != '/':
220 |         sUrl += '/'
221 |     sUrl += '{}'
222 |     return sUrl
223 | 
224 | 
225 | def sleepRandom(nMin=5, nMax=10):
226 |     """
227 |     Sleep for some random interval to help avoid tripping Cloudflare's anti-bot protection.
228 |     """
229 |     nSleep = round(random.uniform(min(nMin, nMax), max(nMin, nMax)), 2)
230 |     if DEBUG:
231 |         print(nSleep)
232 |     sleep(nSleep)
233 | 


--------------------------------------------------------------------------------
/p_pl_dl_xh.py:
--------------------------------------------------------------------------------
  1 | import yt_dlp as youtube_dl
  2 | 
  3 | import p_pl_dl_common as dl_common
  4 | 
  5 | sExtractor = 'xhamster'
  6 | 
  7 | # Something changed with xhamster where these headers are now required
  8 | def _xhamsterHeaderGet():
  9 |     dHeaders_xh = {'Host'           : 'xhamster.com',
 10 |                    'User-Agent'     : dl_common.randomizeUserAgent(),
 11 |                    'DNT'            : '1',
 12 |                    'Connection'     : 'keep-alive',
 13 |                    'Sec-Fetch-Dest' : 'document',
 14 |                    'Sec-Fetch-Mode' : 'navigate',
 15 |                    'Sec-Fetch-Site' : 'none',
 16 |                    'Sec-Fetch-User' : '?1',
 17 |                    'Cache-Control'  : 'max-age=0',
 18 |                    'Cookie'         : dl_common.cookieHeaderStringGet(),
 19 |                    'TE'             : 'trailers'
 20 |                    }
 21 |     return dHeaders_xh
 22 | 
 23 | 
 24 | def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
 25 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
 26 | 
 27 |     if sCookieSource is not None:
 28 |         dl_common.parseCookieFile(sCookieSource)
 29 | 
 30 |     if dl_common.dCookiesParsed is None:
 31 |         print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
 32 | 
 33 |     if f'{sExtractor}.com/videos' in sUrl:
 34 |         sUrlType = 'video'
 35 |     elif f'{sExtractor}.com/my' in sUrl:
 36 |         sUrlType = 'playlist'
 37 |     else:
 38 |         raise ValueError(f"Unable to determine {sExtractor} URL type for {sUrl}! Please submit a bug report!")
 39 | 
 40 |     dXhamsterHeader = _xhamsterHeaderGet()
 41 | 
 42 |     # Attempt initial connection
 43 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
 44 |     print(f"Initial connection status: {html.status_code}")
 45 |     if html.status_code == 403:
 46 |         raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 47 |     elif html.status_code != 200:
 48 |         raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
 49 |     print()
 50 | 
 51 |     if bDebug:
 52 |         # Save HTML content to a text file for debug
 53 |         text_file = open("html_content.txt", "w", encoding='utf-8')
 54 |         text_file.write(html.text)
 55 |         text_file.close()
 56 | 
 57 |     lUrlVideos = []
 58 |     if sUrlType == 'playlist':
 59 |         print("Playlist detected. Getting videos...")
 60 |         sUrlBaseFormat = urlStandardize(sUrl)
 61 |         nPage = 0
 62 |         while True:
 63 |             nPage += 1
 64 |             print(f"Attempting page {nPage:02}")
 65 |             sUrlPage = sUrlBaseFormat.format(f'{nPage:02}')
 66 |             page = dl_common.Page(sUrlPage, headers=dXhamsterHeader)
 67 |             nPageStatus = page.content.status_code
 68 |             if nPageStatus != 200:
 69 |                 if nPageStatus == 403:
 70 |                     raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 71 |                 elif nPageStatus == 404:
 72 |                     print(f"Page {nPage} returned 404!")
 73 |                     print(f"Assuming page {nPage - 1} was the last page of the playlist")
 74 |                     break
 75 | 
 76 |             if "<title>Page not found</title>" in page.content.text:
 77 |                 break
 78 | 
 79 |             page._extract_video_urls()
 80 |             if page.videos:
 81 |                 lUrlVideos += page.videos
 82 |             else:
 83 |                 break
 84 | 
 85 |         # Remove non-video URLs that may have been picked up
 86 |         lTemp = []
 87 |         for sUrl in lUrlVideos:
 88 |             if 'com/videos/recommended' in sUrl:
 89 |                 continue
 90 |             if 'com/videos' in sUrl:
 91 |                 lTemp += [sUrl]
 92 |         lUrlVideos = lTemp
 93 | 
 94 |         nNumVideos = len(lUrlVideos)
 95 |         print(f"Found {nNumVideos} video URLs in the playlist")
 96 |         if bDebug:
 97 |             for sUrl in lUrlVideos:
 98 |                 print(sUrl)
 99 | 
100 |     elif sUrlType == 'video':
101 |         lUrlVideos = [sUrl]
102 | 
103 |     dYdlOptions = dict(dl_common.dYdlOptions)
104 |     dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
105 | 
106 |     for nIdx, sVideoUrl in enumerate(lUrlVideos):
107 |         if sUrlType == 'playlist':
108 |             print(f"Processing playlist video {nIdx + 1} of {nNumVideos} :: {sVideoUrl}")
109 |             print()
110 | 
111 |         dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\%(title)s.%(ext)s'
112 | 
113 |         with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
114 |             ydl.download([sVideoUrl])
115 | 
116 |         if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
117 |             print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
118 |             break
119 |         print()
120 | 
121 | 
122 | def urlStandardize(sUrl):
123 |     """
124 |     Make sure URL ends with '/' and tack on f-string brackets for iterating through pages.
125 |     """
126 |     if sUrl[-1] != '/':
127 |         sUrl += '/'
128 |     sUrl += '{}'
129 |     return sUrl
130 | 


--------------------------------------------------------------------------------
/p_pl_dl_xv.py:
--------------------------------------------------------------------------------
  1 | import yt_dlp as youtube_dl
  2 | 
  3 | import p_pl_dl_common as dl_common
  4 | 
  5 | sExtractor = 'xvideos'
  6 | 
  7 | 
  8 | def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
  9 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
 10 | 
 11 |     if sCookieSource is not None:
 12 |         dl_common.parseCookieFile(sCookieSource)
 13 | 
 14 |     if dl_common.dCookiesParsed is None:
 15 |         print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
 16 | 
 17 |     # Attempt initial connection
 18 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
 19 |     print(f"Initial connection status: {html.status_code}")
 20 |     if html.status_code == 403:
 21 |         raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 22 |     elif html.status_code != 200:
 23 |         raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
 24 |     print()
 25 | 
 26 |     if bDebug:
 27 |         # Save HTML content to a text file for debug
 28 |         text_file = open("html_content.txt", "w", encoding='utf-8')
 29 |         text_file.write(html.text)
 30 |         text_file.close()
 31 | 
 32 |     page = Page_Xvideos(sUrl)
 33 |     nPageStatus = page.content.status_code
 34 |     if nPageStatus != 200:
 35 |         if nPageStatus == 403:
 36 |             raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 37 | 
 38 |     dYdlOptions = dict(dl_common.dYdlOptions)
 39 |     dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
 40 | 
 41 |     print()
 42 |     for nIdx, sVideoUrl in enumerate(page.videos):
 43 |         if page.sUrlType == 'playlist':
 44 |             print(f"Processing playlist video {nIdx + 1} of {len(page.videos)} :: {sVideoUrl}")
 45 |             print()
 46 | 
 47 |         dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\%(title).125s.%(ext)s'
 48 | 
 49 |         with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
 50 |             ydl.download([sVideoUrl])
 51 | 
 52 |         if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
 53 |             print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
 54 |             break
 55 |         print()
 56 | 
 57 | 
 58 | class Page_Xvideos(dl_common.Page):
 59 | 
 60 |     def __init__(self, url):
 61 |         super().__init__(url)
 62 |         if f'{sExtractor}.com/video' in self.url:
 63 |             sUrlType = 'video'
 64 |         elif f'{sExtractor}.com/favorite' in self.url:
 65 |             sUrlType = 'playlist'
 66 |         else:
 67 |             raise ValueError(f"Unable to determine {sExtractor} URL type for {self.url}! Please submit a bug report!")
 68 |         self.sUrlType = sUrlType
 69 | 
 70 |         self._sUrlBaseFormat = urlStandardize(self.url)
 71 | 
 72 |         if self.sUrlType == 'video':
 73 |             self.videos.append(self.url)
 74 |         elif self.sUrlType == 'playlist':
 75 |             print("Playlist detected. Getting videos...")
 76 |             self._extract_video_urls()
 77 | 
 78 | 
 79 |     def _extract_video_urls(self, sFilter=None):
 80 |         """
 81 |         Extract video URLs from all playlist pages.
 82 |         """
 83 |         nNumPages = self._NumPagesGet()
 84 |         print(f"Found {nNumPages} pages in the playlist...")
 85 | 
 86 |         lUrlVideos = []
 87 |         for nPage in range(0, nNumPages):
 88 |             lPageVideos = self._extract_page_urls(nPage)
 89 |             if lPageVideos:
 90 |                 lUrlVideos += lPageVideos
 91 |                 print(f"Found {len(lPageVideos)} on page {nPage + 1}")
 92 |             else:
 93 |                 print(f"Failed to load page {nPage + 1}!")
 94 |                 break
 95 | 
 96 |         # Remove non-video URLs that may have been picked up
 97 |         lTemp = []
 98 |         for sUrl in lUrlVideos:
 99 |             if 'com/videos/recommended' in sUrl:
100 |                 continue
101 |             if 'com/video' in sUrl:
102 |                 lTemp += [sUrl]
103 |             else:
104 |                 raise ValueError(f"Not sure about this one: {sUrl}")
105 |         lUrlVideos = lTemp
106 | 
107 |         nNumVideos = len(lUrlVideos)
108 |         print(f"\r\nFound {nNumVideos} video URLs in the playlist")
109 |         self.videos += lUrlVideos
110 | 
111 | 
112 |     def _extract_page_urls(self, nPage, sFilter=None):
113 |         """
114 |         Extract video URLs from a single playlist page.
115 |         """
116 |         sUrlPage = self._sUrlBaseFormat.format(nPage)
117 |         content = dl_common.session.get(sUrlPage, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
118 |         soup = dl_common.BeautifulSoup(content.text, 'html.parser')
119 | 
120 |         lVideos = []
121 |         for a in soup.find_all('a', href=True):
122 |             href = a['href']
123 |             if '/video' != href[:6]:
124 |                 continue
125 |             if '/videos-i-like' == href:
126 |                 continue
127 | 
128 |             if sFilter is not None and sFilter not in href:
129 |                 continue
130 | 
131 |             sVideoUrlFull = 'https://www.xvideos.com' + a['href']
132 |             sVideoUrlSplit = sVideoUrlFull.split('?pl=')[0]
133 | 
134 |             if sVideoUrlSplit not in lVideos:
135 |                 lVideos.append(sVideoUrlSplit)
136 |         return lVideos
137 | 
138 | 
139 |     def _NumPagesGet(self):
140 |         """
141 |         Return the number of pages in the playlist.
142 |         """
143 |         pagination_block = self.soup.find(attrs={"class": 'pagination'})
144 | 
145 |         # Check for playlist with many pages first (i.e. pagination of pages)
146 |         try:
147 |             nPages = int(pagination_block.find(attrs={"class": "last-page"}).string)
148 |         except:
149 |             # Then for a multi-page playlist (no pagination of pages)
150 |             try:
151 |                 nPages = len(pagination_block.find_all('li')) - 1
152 |             # If no pagination, assume only one page
153 |             except AttributeError:
154 |                 nPages = 1
155 |         return nPages
156 | 
157 | 
158 | def urlStandardize(sUrl):
159 |     """
160 |     Make sure URL ends with '/' and tack on f-string brackets for iterating through pages.
161 |     """
162 |     if sUrl[-1] != '/':
163 |         sUrl += '/'
164 |     sUrl += '{}'
165 |     return sUrl
166 | 


--------------------------------------------------------------------------------
/p_pl_dl_youporn.py:
--------------------------------------------------------------------------------
  1 | from time import sleep
  2 | from time import time
  3 | import yt_dlp as youtube_dl
  4 | 
  5 | import p_pl_dl_common as dl_common
  6 | 
  7 | sExtractor = 'youporn'
  8 | 
  9 | 
 10 | def run(sUrl, sCookieSource=None, nVideoLimit=None, bDebug=False):
 11 |     print(f"Running {sExtractor} extractor for {sUrl}\r\n")
 12 | 
 13 |     if sCookieSource is not None:
 14 |         dl_common.parseCookieFile(sCookieSource)
 15 | 
 16 |     if dl_common.dCookiesParsed is None:
 17 |         print("WARNING :: No cookies were provided! Private videos/playlists will fail to download!\r\n")
 18 | 
 19 |     # Attempt initial connection
 20 |     html = dl_common.session.get(sUrl, headers=dl_common.dHeaders, cookies=dl_common.dCookiesParsed)
 21 |     print(f"Initial connection status: {html.status_code}")
 22 |     if html.status_code == 403:
 23 |         raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 24 |     elif html.status_code != 200:
 25 |         raise ConnectionError(f"Initial connection failed : Status {html.status_code}")
 26 |     print()
 27 | 
 28 |     if bDebug:
 29 |         # Save HTML content to a text file for debug
 30 |         text_file = open("html_content.txt", "w", encoding='utf-8')
 31 |         text_file.write(html.text)
 32 |         text_file.close()
 33 | 
 34 |     page = Page_Youporn(sUrl)
 35 | 
 36 |     dYdlOptions = dict(dl_common.dYdlOptions)
 37 |     dYdlOptions['download_archive'] = rf".\\sites\\{sExtractor}\\{dYdlOptions['download_archive'].format(sExtractor)}"
 38 | 
 39 |     lFailedUrls = []
 40 | 
 41 |     def ytdlLoop(lUrls, bLogFailures):
 42 |         nonlocal lFailedUrls
 43 | 
 44 |         for nIdx, sVideoUrl in enumerate(lUrls):
 45 |             print(f"Processing video {nIdx + 1} of {len(lUrls)} :: {sVideoUrl}")
 46 |             print()
 47 | 
 48 |             sVideoId = sVideoUrl.split('view_video.php?viewkey=')[-1]
 49 |             dYdlOptions['outtmpl'] = rf'.\\sites\\{sExtractor}\\{sVideoId}_%(title).125s.mp4'
 50 | 
 51 |             nStart = time()
 52 |             try:
 53 |                 with youtube_dl.YoutubeDL(dYdlOptions) as ydl:
 54 |                     ydl.download([sVideoUrl])
 55 |             except:
 56 |                 if bLogFailures:
 57 |                     print(f"\r\nEncountered some error for URL = {sVideoUrl}")
 58 |                     print(f"Adding it to the retry list...")
 59 |                     lFailedUrls += [sVideoUrl]
 60 |                 continue
 61 |             nStop = time()
 62 |             print(f"\r\nElapsed time for URL = {sVideoUrl}: {round((nStop - nStart) / 60, 2)} minutes\r\n")
 63 | 
 64 |             if nVideoLimit is not None and (nIdx + 1) >= nVideoLimit:
 65 |                 print(f"Hit the specified maximum limit of {nVideoLimit}. Stopping...")
 66 |                 break
 67 |         print()
 68 | 
 69 |     ytdlLoop(page.videos, bLogFailures=True)
 70 | 
 71 |     if lFailedUrls:
 72 |         print("Retrying URLs that failed...")
 73 |         for sUrl in lFailedUrls:
 74 |             print(sUrl)
 75 |         ytdlLoop(lFailedUrls, bLogFailures=False)
 76 | 
 77 | 
 78 | class Page_Youporn(dl_common.Page):
 79 | 
 80 |     def __init__(self, url):
 81 |         super().__init__(url)
 82 | 
 83 |         nPageStatus = self.content.status_code
 84 |         if nPageStatus != 200:
 85 |             if nPageStatus == 403:
 86 |                 raise ConnectionError(f"403 Forbidden! Please check if cookies are required! Private videos/playlists cannot be accessed without cookies!")
 87 | 
 88 |         self.sUrlType = self._get_url_type()
 89 |         self._playlistId = self.url.split('.com/')[1].split('/')[0] if self.sUrlType == 'playlist' else None
 90 | 
 91 |         if self.sUrlType == 'video':
 92 |             self.videos.append(self.url)
 93 |             self._nVideos = 1
 94 |         elif self.sUrlType == 'playlist':
 95 |             print("Playlist detected. Getting videos...")
 96 |             self._sUrlBaseFormat = self.urlStandardize(self.url)
 97 |             self._extract_video_urls()
 98 |             self._nVideos = len(self.videos)
 99 |             print(f"Found {self._nVideos} video URLs in the playlist")
100 | 
101 | 
102 |     def _get_url_type(self):
103 |         if 'youporn.com/watch/' in self.url:
104 |             sUrlType = 'video'
105 |         elif 'youporn.com/favorites/' in self.url or 'youporn.com/collections/' in self.url:
106 |             sUrlType = 'playlist'
107 |         else:
108 |             raise ValueError(f"Unable to determine {sExtractor} URL type for {self.url}! Please submit a bug report!")
109 |         return sUrlType
110 | 
111 | 
112 |     def _extract_video_urls(self, sFilter=None):
113 |         """
114 |         Extract video URLs from all playlist pages.
115 |         """
116 |         lUrlVideos = []
117 |         nPage = 0
118 |         while True:
119 |             nPage += 1
120 | 
121 |             lPageVideos = self._extract_page_urls(nPage)
122 |             if lPageVideos:
123 |                 lUrlVideos += lPageVideos
124 |                 print(f"Found {len(lPageVideos)} videos on page {nPage:02}...")
125 |             else:
126 |                 print(f"No videos found on page {nPage}. Stopping...")
127 |                 break
128 |         self.videos += lUrlVideos
129 | 
130 | 
131 |     def _extract_page_urls(self, nPage, sFilter=None):
132 |         """
133 |         Extract video URLs from a single page of the playlist.
134 |         """
135 |         sUrlBase = "https://www.pornhub.com{}"
136 | 
137 |         for nAttempts in range(3):
138 |             sUrlPage = self._sUrlBaseFormat.format(nPage)
139 |             content = dl_common.session.get(sUrlPage, cookies=dl_common.dCookiesParsed)
140 |             if "503 Service Temporarily Unavailable" in content.text:
141 |                 sleep(3)
142 |                 continue
143 |             soup = dl_common.BeautifulSoup(content.text, 'html.parser')
144 |             break
145 | 
146 |         lVideos = []
147 |         lTags = soup.find_all(attrs={"class": 'pcVideoListItem js-pop videoblock videoBox'})
148 |         for tag in lTags:
149 |             if 'id' in tag.attrs.keys() and 'vfavouriteVideo' in tag.attrs['id']:
150 |                 for a in tag.find_all('a', href=True):
151 |                     href = a['href']
152 |                     if 'view_video.php?' not in href:
153 |                         continue
154 |                     if '&pkey=' in href:
155 |                         continue
156 |                     if href not in self.videos:
157 |                         sUrlFull = sUrlBase.format(href)
158 |                         if sUrlFull not in lVideos:
159 |                             lVideos.append(sUrlFull)
160 |         return lVideos
161 | 
162 | 
163 |     def urlStandardize(self, sUrl):
164 |         """
165 |         Make sure URL ends with '/' and tack on f-string brackets for iterating through pages.
166 |         """
167 |         if sUrl.endswith('favorites'):
168 |             sUrl += '?page={}'
169 |         return sUrl
170 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4
2 | requests
3 | yt-dlp>=2022.10.4
4 | jsbeautifier


--------------------------------------------------------------------------------