├── __init__.py ├── scenes ├── __init__.py ├── networkProjectOneService_BangBros.py ├── networkProjectOneService_Brazzers.py ├── networkProjectOneService_PartTwo.py ├── networkProjectOneService_BrazzersVR.py ├── networkProjectOneService_HentaiPros.py ├── networkProjectOneService_MyPervyFamily.py ├── networkProjectOneService_DigitalPlayground.py ├── generic.py ├── siteAmateurBoxxx.py ├── siteDirtyWrestlingPit.py ├── siteMagmaFilm.py ├── siteMaverickMen.py ├── siteIsiahMaxwell.py ├── siteBabeArchives.py ├── siteDannyOceansAdventures.py ├── siteMyBoobs.py ├── siteSinsVR.py ├── siteMom4k.py ├── siteOldjeThreesome.py ├── siteClassLesbians.py ├── siteHotTS.py ├── siteMySlavegirl.py ├── siteSubmissiveX.py ├── siteDeepLush.py ├── siteBlackPayback.py ├── siteTerrorXXX.py ├── siteErotiqueTVLive.py ├── siteEricJohnsSexAdventures.py ├── siteJimSlip.py ├── siteTheHabibShow.py ├── siteSwallowbay.py ├── siteDefeatedSexFight.py ├── siteVlogXXX.py ├── siteFutanarica.py ├── sitePlayboyTV.py ├── sitePeeOnHer.py ├── sitePorkVendors.py ├── siteUnlimitedMILFs.py ├── siteVrAllure.py ├── siteRadicalJizzlam.py ├── siteWeAreHairy.py ├── siteBennryGreen.py ├── siteInkaporn.py ├── siteAffect3dStore.py ├── siteS3xus.py ├── siteAngelaSommers.py ├── siteTheNudie.py ├── siteWhoaBoyz.py ├── siteLilMissy.py ├── siteAngeloGodshackOfficial.py ├── siteDominicPacifico.py ├── siteNylonUp.py ├── siteTheFemaleOrgasm.py ├── siteBondageCafe.py ├── siteJoymii.py ├── siteDeviantAss.py ├── siteArtOfBlowjob.py ├── siteLinaMila.py ├── siteLukesPOV.py ├── siteFragileSlave.py ├── siteHotAndTatted.py ├── siteGenuineSin.py ├── siteGoonMuse.py ├── siteMenOfMontreal.py ├── siteBrattyMILF.py ├── siteFitErotic.py ├── siteJaporn.py ├── siteJoshStoneXXX.py ├── siteRiggsFilms.py ├── siteBrandNewAmateurs.py ├── siteLezCrush.py ├── siteDrDaddyPOV.py ├── siteMatureFetish.py ├── siteBourneChallenge.py ├── siteCocksureMen.py ├── siteJulieGinger.py ├── siteMyPervMom.py ├── sitePenthouse.py ├── siteSuperbeModels.py ├── siteRealJamVR.py ├── siteCumflation.py ├── siteMaverickMenDirects.py ├── siteOnlyBBC.py ├── siteBrasileirinhas.py ├── sitePinkyXXX.py ├── siteLostBetsGames.py ├── siteOldje.py ├── siteXX-Cel.py ├── siteAVIdolz.py ├── siteDanni.py ├── siteGothGirlfriend.py ├── siteRandyBlue.py ├── siteTooDiva.py ├── siteTrans4TheFans.py ├── networkBrokeStraightboys.py ├── siteFetishPros.py ├── siteTatsAndTits.py ├── siteTwinz.py ├── siteStrippers4k.py ├── siteFreeze.py ├── siteVampired.py ├── siteJizzOnTeens.py ├── siteSubspaceland.py ├── siteVoodooed.py ├── siteDirtyTony.py ├── siteFuckerMate.py ├── siteSmokingHawt.py ├── siteCupidsEden.py ├── siteSpankingStraightBoys.py ├── siteTgirlsHookup.py ├── siteBoyfun.py ├── siteHotCollegeFucks.py ├── siteRestrictedSenses.py ├── siteEyeOnTheguy.py ├── siteGlowingDesire.py ├── siteHitzefrei.py ├── siteMyBestSexLife.py ├── siteAntonioSuleiman.py ├── siteLuxePlayhouse.py ├── siteBaitBuddies.py ├── siteMasqulin.py ├── siteZishy.py ├── siteSapphix.py ├── siteBiCollegeFucks.py ├── siteExploitedSecretaries.py ├── siteMILFVR.py ├── siteSketchySex.py ├── siteTagTeamPOV.py ├── siteBravoFucker.py ├── siteNextDoorSins.py └── sitePublicHandjobs.py ├── performers ├── __init__.py ├── siteSlaveToBondagePerformer.py ├── networkLegalPornoPornworldPerformer.py ├── siteDownblouseWowPerformer.py ├── networkTeenMegaWorldPerformer.py ├── siteWatch4FetishPerformer.py ├── siteTransVRPerformer.py ├── siteGirlsOutWestPerformer.py ├── siteClubDomPerformer.py ├── siteOnlyGrandpaPerformer.py ├── siteSpunkWorthyPerformer.py ├── siteClassLesbiansPerformer.py ├── siteLucasEntertainmentPerformer.py ├── siteDrDaddyPOVPerformer.py ├── siteFuckerMatePerformer.py └── siteHDSex18Performer.py ├── .github └── workflows │ ├── code-standards.yml │ └── submodules.yml └── .mergify.yml /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scenes/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /performers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/workflows/code-standards.yml: -------------------------------------------------------------------------------- 1 | name: 'Code Standards' 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | jobs: 8 | flake8-lint: 9 | runs-on: ubuntu-latest 10 | name: Lint 11 | steps: 12 | - name: Check out source repository 13 | uses: actions/checkout@v2 14 | 15 | - name: Set up Python environment 16 | uses: actions/setup-python@v2 17 | with: 18 | python-version: "3.8" 19 | 20 | - name: flake8 Lint 21 | uses: reviewdog/action-flake8@v3 22 | with: 23 | github_token: ${{ secrets.GITHUB_TOKEN }} 24 | filter_mode: file 25 | fail_level: any 26 | flake8_args: "--ignore E305,E501,E722" 27 | -------------------------------------------------------------------------------- /scenes/networkProjectOneService_BangBros.py: -------------------------------------------------------------------------------- 1 | ## Stub scraper, this has been moved to NetworkAylo.py 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class ProjectOneServiceBangBrosSpider(BaseSceneScraper): 8 | name = 'ProjectOneServiceBangBros' 9 | 10 | start_urls = [ 11 | '', 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'', 16 | 'pagination': '', 17 | 'type': 'Scene', 18 | } 19 | 20 | def get_scenes(self, response): 21 | meta = response.meta 22 | scenes = response.xpath('').getall() 23 | for scene in scenes: 24 | if re.search(self.get_selector_map('external_id'), scene): 25 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) -------------------------------------------------------------------------------- /scenes/networkProjectOneService_Brazzers.py: -------------------------------------------------------------------------------- 1 | ## Stub scraper, this has been moved to NetworkAylo.py 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class ProjectOneServiceBrazzersSpider(BaseSceneScraper): 8 | name = 'ProjectOneServiceBrazzers' 9 | 10 | start_urls = [ 11 | '', 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'', 16 | 'pagination': '', 17 | 'type': 'Scene', 18 | } 19 | 20 | def get_scenes(self, response): 21 | meta = response.meta 22 | scenes = response.xpath('').getall() 23 | for scene in scenes: 24 | if re.search(self.get_selector_map('external_id'), scene): 25 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) -------------------------------------------------------------------------------- /scenes/networkProjectOneService_PartTwo.py: -------------------------------------------------------------------------------- 1 | ## Stub scraper, this has been moved to NetworkAylo.py 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class ProjectOneServicePartTwoSpider(BaseSceneScraper): 8 | name = 'ProjectOneService_PartTwo' 9 | 10 | start_urls = [ 11 | '', 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'', 16 | 'pagination': '', 17 | 'type': 'Scene', 18 | } 19 | 20 | def get_scenes(self, response): 21 | meta = response.meta 22 | scenes = response.xpath('').getall() 23 | for scene in scenes: 24 | if re.search(self.get_selector_map('external_id'), scene): 25 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) -------------------------------------------------------------------------------- /scenes/networkProjectOneService_BrazzersVR.py: -------------------------------------------------------------------------------- 1 | ## Stub scraper, this has been moved to NetworkAylo.py 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class ProjectOneServiceBrazzersVRSpider(BaseSceneScraper): 8 | name = 'ProjectOneServiceBrazzersVR' 9 | 10 | start_urls = [ 11 | '', 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'', 16 | 'pagination': '', 17 | 'type': 'Scene', 18 | } 19 | 20 | def get_scenes(self, response): 21 | meta = response.meta 22 | scenes = response.xpath('').getall() 23 | for scene in scenes: 24 | if re.search(self.get_selector_map('external_id'), scene): 25 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) -------------------------------------------------------------------------------- /scenes/networkProjectOneService_HentaiPros.py: -------------------------------------------------------------------------------- 1 | ## Stub scraper, this has been moved to NetworkAylo.py 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class ProjectOneServiceHentaiProsSpider(BaseSceneScraper): 8 | name = 'ProjectOneServiceHentaiPros' 9 | 10 | start_urls = [ 11 | '', 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'', 16 | 'pagination': '', 17 | 'type': 'Scene', 18 | } 19 | 20 | def get_scenes(self, response): 21 | meta = response.meta 22 | scenes = response.xpath('').getall() 23 | for scene in scenes: 24 | if re.search(self.get_selector_map('external_id'), scene): 25 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) -------------------------------------------------------------------------------- /scenes/networkProjectOneService_MyPervyFamily.py: -------------------------------------------------------------------------------- 1 | ## Stub scraper, this has been moved to NetworkAylo.py 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class ProjectOneServiceMyPervyFamilySpider(BaseSceneScraper): 8 | name = 'ProjectOneServiceMyPervyFamily' 9 | 10 | start_urls = [ 11 | '', 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'', 16 | 'pagination': '', 17 | 'type': 'Scene', 18 | } 19 | 20 | def get_scenes(self, response): 21 | meta = response.meta 22 | scenes = response.xpath('').getall() 23 | for scene in scenes: 24 | if re.search(self.get_selector_map('external_id'), scene): 25 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) -------------------------------------------------------------------------------- /scenes/networkProjectOneService_DigitalPlayground.py: -------------------------------------------------------------------------------- 1 | ## Stub scraper, this has been moved to NetworkAylo.py 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class ProjectOneServiceDigitalPlaygroundSpider(BaseSceneScraper): 8 | name = 'ProjectOneServiceDigitalPlayground' 9 | 10 | start_urls = [ 11 | '', 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'', 16 | 'pagination': '', 17 | 'type': 'Scene', 18 | } 19 | 20 | def get_scenes(self, response): 21 | meta = response.meta 22 | scenes = response.xpath('').getall() 23 | for scene in scenes: 24 | if re.search(self.get_selector_map('external_id'), scene): 25 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) -------------------------------------------------------------------------------- /scenes/generic.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class Spider(BaseSceneScraper): 8 | name = '' 9 | network = '' 10 | parent = '' 11 | site = '' 12 | 13 | start_urls = [ 14 | '', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '', 19 | 'description': '', 20 | 'date': '', 21 | 'image': '', 22 | 'performers': '', 23 | 'tags': '', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'', 27 | 'pagination': '', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /.github/workflows/submodules.yml: -------------------------------------------------------------------------------- 1 | name: 'Submodule Notify Parent' 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | # Allows you to run this workflow manually from the Actions tab 9 | workflow_dispatch: 10 | 11 | jobs: 12 | notify: 13 | name: 'Submodule Notify Parent' 14 | runs-on: ubuntu-latest 15 | 16 | # Use the Bash shell regardless whether the GitHub Actions runner is ubuntu-latest, macos-latest, or windows-latest 17 | defaults: 18 | run: 19 | shell: bash 20 | 21 | steps: 22 | - name: Github REST API Call 23 | env: 24 | CI_TOKEN: ${{ secrets.CI_TOKEN }} 25 | PARENT_REPO: ThePornDatabase/scrapy 26 | PARENT_BRANCH: main 27 | WORKFLOW_ID: 8680311 28 | run: | 29 | curl -fL --retry 3 -X POST -H "Accept: application/vnd.github.v3+json" -H "Authorization: token ${{ env.CI_TOKEN }}" https://api.github.com/repos/${{ env.PARENT_REPO }}/actions/workflows/${{ env.WORKFLOW_ID }}/dispatches -d '{"ref":"${{ env.PARENT_BRANCH }}"}' -------------------------------------------------------------------------------- /scenes/siteAmateurBoxxx.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class AmatuerBoxxxSpider(BaseSceneScraper): 7 | name = 'AmateurBoxxx' 8 | network = 'Amateur Boxxx' 9 | parent = 'Amateur Boxxx' 10 | 11 | start_urls = [ 12 | 'https://tour.amateurboxxx.com' 13 | ] 14 | 15 | selector_map = { 16 | 'title': 'span.update_title::text', 17 | 'description': 'span.latest_update_description::text', 18 | 'performers': 'span.tour_update_models a::text', 19 | 'date': 'span.availdate::text', 20 | 'image': 'img.large_update_thumb::attr(src)', 21 | 'tags': '', 22 | 'external_id': 'updates/(.+).html', 23 | 'trailer': '', 24 | 'pagination': '/categories/updates_%s_d.html' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.css('.updateItem h4 a::attr(href)').getall() 29 | for scene in scenes: 30 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 31 | -------------------------------------------------------------------------------- /.mergify.yml: -------------------------------------------------------------------------------- 1 | pull_request_rules: 2 | - name: automatic update for PR 3 | conditions: 4 | - base=main 5 | - -merged 6 | - -closed 7 | - check-success=Code Standards 8 | actions: 9 | update: 10 | 11 | - name: update pr and assign 12 | conditions: 13 | - base=main 14 | - -merged 15 | - -closed 16 | - check-success=Code Standards 17 | - label!=conflict 18 | actions: 19 | assign: 20 | add_users: 21 | - chalupabatman69 22 | update: 23 | 24 | - name: warn on conflicts 25 | conditions: 26 | - conflict 27 | - -merged 28 | - -closed 29 | actions: 30 | comment: 31 | message: "@{{author}} this pull request is now in conflict 😩" 32 | label: 33 | add: 34 | - conflict 35 | assign: 36 | remove_users: 37 | - chalupabatman69 38 | 39 | - name: remove conflict label if not needed 40 | conditions: 41 | - -conflict 42 | actions: 43 | label: 44 | remove: 45 | - conflict 46 | -------------------------------------------------------------------------------- /scenes/siteDirtyWrestlingPit.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteDirtyWrestlingPitSpider(BaseSceneScraper): 8 | name = 'DirtyWrestlingPitDoNotUse' 9 | network = 'DirtyWrestlingPit' 10 | parent = 'DirtyWrestlingPit' 11 | site = 'DirtyWrestlingPit' 12 | 13 | start_urls = [ 14 | '', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '', 19 | 'description': '', 20 | 'date': '', 21 | 'image': '', 22 | 'performers': '', 23 | 'tags': '', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'', 27 | 'pagination': '', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /performers/siteSlaveToBondagePerformer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BasePerformerScraper import BasePerformerScraper 5 | 6 | 7 | class siteSlaveToBondagePerformerSpider(BasePerformerScraper): 8 | selector_map = { 9 | 'name': '//div[@class="profileDetails"]//h2[@class="title"]/text()', 10 | 'image': '//div[@class="profilePic"]//img/@src0_3x', 11 | 'image_blob': True, 12 | 'bio': '//div[@class="profileContent"]/p//text()', 13 | 'pagination': '/tour/models/%s/latest/', 14 | 'external_id': r'model/(.*)/' 15 | } 16 | 17 | name = 'siteSlaveToBondagePerformer' 18 | network = 'SlaveToBondage' 19 | 20 | start_urls = [ 21 | 'https://www.slavetobondage.com', 22 | ] 23 | 24 | def get_gender(self, response): 25 | return 'Female' 26 | 27 | def get_performers(self, response): 28 | performers = response.xpath('//div[@class="slave"]/a/@href').getall() 29 | for performer in performers: 30 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 31 | -------------------------------------------------------------------------------- /performers/networkLegalPornoPornworldPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BasePerformerScraper import BasePerformerScraper 3 | 4 | 5 | class NetworkLegalPornoPornworldPerformerSpider(BasePerformerScraper): 6 | name = 'PornworldPerformer' 7 | network = 'Legal Porno' 8 | 9 | selector_map = { 10 | 'name': '//h1[@class="model__title"]/text()', 11 | 'image': '//div[contains(@class, "model__left--photo")]/img/@src', 12 | 'image_blob': True, 13 | 'nationality': '//td[contains(text(), "Nationality")]/following-sibling::td[1]/div/a/text()', 14 | 15 | 'pagination': '/models/sex/female/page/%s/', 16 | 'external_id': r'model/(.*)/' 17 | } 18 | 19 | start_urls = [ 20 | 'https://pornworld.com', 21 | ] 22 | 23 | def get_gender(self, response): 24 | return 'Female' 25 | 26 | def get_performers(self, response): 27 | performers = response.xpath('//div[@class="model-top"]/a[1]/@href').getall() 28 | for performer in performers: 29 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 30 | -------------------------------------------------------------------------------- /scenes/siteMagmaFilm.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class MagmaFilmNetworkSpider(BaseSceneScraper): 7 | name = 'MagmaFilm' 8 | 9 | start_urls = [ 10 | 'http://www.magmafilm.tv', 11 | ] 12 | 13 | cookies = { 14 | '_culture': 'en', 15 | } 16 | 17 | selector_map = { 18 | 'title': 'h2:first-of-type::text', 19 | 'description': '//div[@class="infobox"]/div/p/text()', 20 | 'date': '', 21 | 'image': '//div[contains(@class, "imgbox") and contains(@class, "full")]/@style', 22 | 're_image': r'url\(\'(.*)\'\)', 23 | 'performers': '//div[@class="infobox"]/div/table//td/div/text()', 24 | 'tags': '//div[@class="infobox"]/div/table//td/a/span/text()', 25 | 'external_id': r'/([a-zA-Z0-9-]+?)/?$', 26 | 'trailer': '', 27 | 'pagination': '/en/List/Neu?page=%s' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[contains(@class, "clipbox")]/a[1]/@href').getall() 32 | for scene in scenes: 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | -------------------------------------------------------------------------------- /performers/siteDownblouseWowPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BasePerformerScraper import BasePerformerScraper 4 | 5 | 6 | class SiteDownblouseWowPerformerSpider(BasePerformerScraper): 7 | selector_map = { 8 | 'name': '//div[contains(@class,"modelinfo")]/p/strong[contains(text(), "Name")]/following-sibling::text()[1]', 9 | 'image': '//div[contains(@class,"modelpic")]/img/@src', 10 | 'image_blob': True, 11 | 'cupsize': '//div[contains(@class,"modelinfo")]/p/strong[contains(text(), "Bra")]/following-sibling::text()[1]', 12 | 13 | 'pagination': '/show.php?a=147_%s', 14 | 'external_id': r'model/(.*)/' 15 | } 16 | 17 | name = 'DownblouseWowPerformer' 18 | network = 'Downblouse Wow' 19 | 20 | start_urls = [ 21 | 'https://downblousewow.com', 22 | ] 23 | 24 | def get_gender(self, response): 25 | return 'Female' 26 | 27 | def get_performers(self, response): 28 | performers = response.xpath('//div[@class="itemminfo"]/p/a/@href').getall() 29 | for performer in performers: 30 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 31 | -------------------------------------------------------------------------------- /scenes/siteMaverickMen.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMaverickMenSpider(BaseSceneScraper): 7 | name = 'MaverickMen' 8 | network = 'Maverick Men' 9 | parent = 'Maverick Men' 10 | site = 'Maverick Men' 11 | 12 | start_urls = [ 13 | 'https://vod.maverickmen.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[@id="view_title"]/text()', 18 | 'description': '//span[@id="view_description"]//text()', 19 | 'date': '//strong[contains(text(), "Released")]/following-sibling::text()', 20 | 'date_formats': ['%m/%d/%Y'], 21 | 'image': '//div[@class="main_vid"]//img/@src', 22 | 'external_id': r'.*=(.*?)$', 23 | 'pagination': '/?page=videos&p=%s', 24 | 'type': 'Scene', 25 | } 26 | 27 | def get_scenes(self, response): 28 | meta = response.meta 29 | scenes = response.xpath('//div[@class="vid-list-thumb"]/a[1]/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 33 | -------------------------------------------------------------------------------- /scenes/siteIsiahMaxwell.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteIsiahMaxwellSpider(BaseSceneScraper): 7 | name = 'IsiahMaxwell' 8 | network = 'Isiah Maxwell' 9 | parent = 'Isiah Maxwell' 10 | site = 'Isiah Maxwell' 11 | 12 | start_urls = [ 13 | 'https://tour.isiahmaxwellxxx.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//span[@class="medium-blue"]/text()', 18 | 'description': '', 19 | 'date': '', 20 | 'image': '//div[@id="endscreen"]/@style', 21 | 're_image': r'(http.*\.jpg)', 22 | 'performers': '//p[@class="featuring"]/a/text()', 23 | 'tags': '//p[@class="category"]/a/text()', 24 | 'trailer': '//video/source/@src', 25 | 'external_id': r'.*/(.*)\.htm', 26 | 'pagination': '/?&spage=%s' 27 | } 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//h2/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 35 | -------------------------------------------------------------------------------- /scenes/siteBabeArchives.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BaseSceneScraper import BaseSceneScraper 3 | 4 | 5 | class BabeArchivesWhoresSpider(BaseSceneScraper): 6 | name = 'BabeArchives' 7 | network = 'Babe Archives' 8 | parent = 'Babe Archives' 9 | 10 | start_urls = [ 11 | 'https://babearchives.com' 12 | ] 13 | 14 | selector_map = { 15 | 'title': '//div[contains(@class,"videoDetails")]/h3/text()', 16 | 'description': '//div[contains(@class,"videoDetails")]/h3/text()', # No description on site, just using title for filler 17 | 'date': '//span[contains(text(),"Added:")]/following-sibling::text()', 18 | 'image': '//div[@class="player-thumb"]/img/@src0_1x', 19 | 'image_blob': True, 20 | 'performers': '//li[@class="update_models"]/a/text()', 21 | 'tags': '', 22 | 'external_id': r'\/trailers\/(.+)\.html', 23 | 'trailer': '', 24 | 'pagination': '/categories/movies/%s/latest/' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.xpath('//div[@class="item-info"]/h4/a/@href').getall() 29 | for scene in scenes: 30 | yield scrapy.Request(url=scene, callback=self.parse_scene, meta={'site': 'Babe Archives'}) 31 | -------------------------------------------------------------------------------- /scenes/siteDannyOceansAdventures.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class Spider(BaseSceneScraper): 7 | name = 'DannyOceansAdventures' 8 | network = 'Danny Oceans Adventures' 9 | parent = 'Danny Oceans Adventures' 10 | site = 'Danny Oceans Adventures' 11 | 12 | start_urls = [ 13 | 'https://dannyoceansadventures.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h2[@itemprop="headline"]/text()', 18 | 'description': '//span[@itemprop="about"]//text()', 19 | 'date': '', 20 | 'image': '//video/@poster', 21 | 'performers': '//span[@itemprop="actors"]/a/text()', 22 | 'tags': '//span[@itemprop="keywords"]/a/text()', 23 | 'trailer': '//video/source/@src', 24 | 'external_id': r'.*/(.*?)/', 25 | 'pagination': '/scenes/page/%s/' 26 | } 27 | 28 | def get_scenes(self, response): 29 | meta = response.meta 30 | scenes = response.xpath('//h3/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 34 | -------------------------------------------------------------------------------- /scenes/siteMyBoobs.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class MyBoobsSpider(BaseSceneScraper): 8 | name = 'MyBoobs' 9 | network = "Radical Entertainment" 10 | parent = "MyBoobs" 11 | 12 | start_urls = [ 13 | 'https://tour.myboobs.eu' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h2[@class="sec-tit"]/span/text()', 18 | 'description': '', 19 | 'date': '', 20 | 'image': '//img[@id="preview"]/@src', 21 | 'performers': '', 22 | 'tags': '', 23 | 'trailer': '', 24 | 'external_id': 'view\\/(\\d+)\\/', 25 | 'pagination': '/videos?page=%s' 26 | } 27 | 28 | def get_scenes(self, response): 29 | 30 | scenes = response.xpath('//div[contains(@class,"set-thumb")]') 31 | for scene in scenes: 32 | date = scene.xpath('./div/div/div/span/span[1]/text()').get() 33 | date = self.parse_date(date.strip()).isoformat() 34 | scene = scene.xpath('./a/@href').get() 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta={'date': date}) 37 | -------------------------------------------------------------------------------- /scenes/siteSinsVR.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteSinsVRSpider(BaseSceneScraper): 7 | name = 'SinsVR' 8 | network = 'SinsVR' 9 | parent = 'SinsVR' 10 | site = 'SinsVR' 11 | 12 | start_urls = [ 13 | 'https://xsinsvr.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//li[contains(@class, "desc")]/div/p//text()', 19 | 'date': '//time/text()', 20 | 'date_formats': ['%b %d, %Y'], 21 | 'image': '//dl8-video/@poster', 22 | 'performers': '//strong[contains(text(), "Starring")]/following-sibling::span//a/text()', 23 | 'tags': '//div[@class="tags"]//a/text()', 24 | 'trailer': '//dl8-video/source[1]/@src', 25 | 'external_id': r'/video/(.*)', 26 | 'pagination': '/videos/%s' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[@class="tn-video"]/a[contains(@href, "/video/")]/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | -------------------------------------------------------------------------------- /scenes/siteMom4k.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMom4kSpider(BaseSceneScraper): 7 | name = 'Mom4k' 8 | network = 'Mom4k' 9 | parent = 'Mom4k' 10 | site = 'Mom4k' 11 | 12 | start_urls = [ 13 | 'https://mom4k.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@id, "side")]/h1/text()', 18 | 'description': '//div[contains(@id, "description")]/text()', 19 | 'date': '//div[contains(text(), "RELEASED")]/span/text()', 20 | 'date_formats': ['%B %d, %Y'], 21 | 'image': '//video/@poster', 22 | 'performers': '//div[contains(@id, "models")]/a/text()', 23 | 'tags': '', 24 | 'external_id': r'.*/(.*?)$', 25 | 'trailer': '', 26 | 'pagination': '/?page=%s' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[@class="btn-group"]/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | 35 | def get_tags(self, response): 36 | return ['Interracial'] 37 | -------------------------------------------------------------------------------- /scenes/siteOldjeThreesome.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteOldje3someSpider(BaseSceneScraper): 7 | name = 'Oldje3some' 8 | network = 'Oldje' 9 | parent = 'Oldje' 10 | site = 'Oldje 3some' 11 | 12 | start_urls = [ 13 | 'https://www.oldje-3some.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "updates")]/div/div/h1/text()', 18 | 'description': '//div[contains(@class, "description")]//p/text()', 19 | 'image': '//div[contains(@class, "teaser-img")]/a/img/@src', 20 | 'performers': '//div[contains(@class, "updates")]//a[contains(@href, "/models/")]/text()', 21 | 'tags': '//div[@class="tags"]/a//text()', 22 | 'trailer': '', 23 | 'external_id': r'.*/(.*?)$', 24 | 'pagination': '/page/%s' 25 | } 26 | 27 | def get_scenes(self, response): 28 | meta = response.meta 29 | scenes = response.xpath('//div[contains(@class, "read-more")]/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 33 | -------------------------------------------------------------------------------- /scenes/siteClassLesbians.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteClassMediaSpider(BaseSceneScraper): 7 | name = 'ClassLesbians' 8 | network = 'Class Media' 9 | 10 | start_urls = [ 11 | 'https://www.class-lesbians.com/', 12 | ] 13 | 14 | selector_map = { 15 | 'title': "//h1/text()", 16 | 'description': "//div[@class='expand opened']/p[1]/text()", 17 | 'date': '//span[@class="period"]/text()', 18 | 'date_formats': ['%d.%m.%Y'], 19 | 'performers': '//div[@class="main-info"]/p[@class="cast"]/a/text()', 20 | 'image': '//div[@class="banner-video"]/img/@src', 21 | 'tags': '', 22 | 'external_id': r'.*\/(.*?)$', 23 | 'trailer': '', 24 | 'pagination': '/videos/%s' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.xpath('//div[@class="box new-videos-box"]/a[contains(@href,"/videos/")]/@href').getall() 29 | for scene in scenes: 30 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 31 | 32 | def get_site(self, response): 33 | return "Class Lesbians" 34 | 35 | def get_parent(self, response): 36 | return "Class Lesbians" 37 | -------------------------------------------------------------------------------- /scenes/siteHotTS.py: -------------------------------------------------------------------------------- 1 | # Part of AdultPrime 2 | import re 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteHotTSSpider(BaseSceneScraper): 8 | name = 'HotTS' 9 | 10 | start_urls = [ 11 | # ~ 'https://hotts.com/', 12 | ] 13 | 14 | selector_map = { 15 | 'title': '//div[@class="videoDetails clear"]/h3/text()', 16 | 'description': '//div[@class="videoDetails clear"]/p/text()', 17 | 'date': '', 18 | 'duration': '//div[@class="player-time"]/text()', 19 | 'image': '//meta[@property="og:image"]/@content|//meta[@name="twitter:image"]/@content', 20 | 'performers': '//li[@class="update_models"]/a/text()', 21 | 'tags': '//li[@class="label"]/following-sibling::li/a[contains(@href, "categories")]/text()', 22 | 'external_id': r'.*/(.*?).html', 23 | 'trailer': '', 24 | 'pagination': '/categories/movies/%s/latest/' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.xpath('//div[@class="item-thumb"]/a/@href').getall() 29 | for scene in scenes: 30 | if re.search(self.get_selector_map('external_id'), scene): 31 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 32 | -------------------------------------------------------------------------------- /scenes/siteMySlavegirl.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMySlavegirlSpider(BaseSceneScraper): 7 | name = 'MySlavegirl' 8 | network = 'My Slavegirl' 9 | parent = 'My Slavegirl' 10 | site = 'My Slavegirl' 11 | 12 | start_urls = [ 13 | 'https://www.my-slavegirl.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[@class="h2"]/text()', 18 | 'description': '//div[@class="custom_text"]/p/text()', 19 | 'date': '', 20 | 'image': '//meta[@name="twitter:image"]/@content', 21 | 'performers': '//a[contains(@href, "/models/")]/text()', 22 | 'tags': '//div[@class="tags"]/a/text()', 23 | 'external_id': r'collections/(.*)', 24 | 'trailer': '//meta[@name="twitter:player:stream"]/@content', 25 | 're_trailer': r'(.*\.mp4)', 26 | 'pagination': '/collections/page/%s' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[contains(@class, "my-2")]/div/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | -------------------------------------------------------------------------------- /scenes/siteSubmissiveX.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteSubmissiveXSpider(BaseSceneScraper): 7 | name = 'SubmissiveX' 8 | network = 'Submissive X' 9 | parent = 'Submissive X' 10 | site = 'Submissive X' 11 | 12 | start_urls = [ 13 | 'https://submissivex.com/', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="title clear"]/h2/text()', 18 | 'description': '//span[contains(@class,"description")]/text()', 19 | 'date': '//span[contains(@class,"update_date")]/text()', 20 | 'image': '//span[@class="model_update_thumb"]/img/@src', 21 | 'performers': '//span[@class="tour_update_models"]/a/text()', 22 | 'tags': '//span[@class="update_tags"]/a/text()', 23 | 'external_id': r'updates/(.*).html', 24 | 'trailer': '', 25 | 'pagination': '/categories/movies_%s_d.html#' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath('//div[@class="updateItem"]/div/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | -------------------------------------------------------------------------------- /scenes/siteDeepLush.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BaseSceneScraper import BaseSceneScraper 3 | 4 | 5 | class DeepLushSpider(BaseSceneScraper): 6 | name = 'DeepLush' 7 | network = 'Deep Lush' 8 | parent = 'Deep Lush' 9 | 10 | start_urls = [ 11 | # ~ "https://deeplush.com" 12 | ] 13 | 14 | selector_map = { 15 | 'title': '//h2/text()', 16 | 'description': '//div[@class="collapse "]//text()', 17 | 'tags': '//a[contains(@href,"/video/category/")]//text()', 18 | 'performers': '//a[contains(@class,"performer")]//text()', 19 | 'image': '//video/@poster', 20 | 'trailer': '//video/source[last()]/@src', 21 | 'date': '//span[@class="date"]//text()', 22 | 'date_formats': ['%d %b %Y'], 23 | 'external_id': '[0-9]+/(.+)', 24 | 'pagination': 'video/gallery/%s', 25 | } 26 | 27 | def get_next_page_url(self, base, page): 28 | return self.format_url(base, self.get_selector_map('pagination') % ((page - 1) * 12)) 29 | 30 | def get_scenes(self, response): 31 | for scene in response.xpath('//a[contains(@href, "/video/watch/")]/@href').getall(): 32 | yield scrapy.Request( 33 | url=self.format_link(response, scene), 34 | callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteBlackPayback.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteBlackPaybackSpider(BaseSceneScraper): 7 | name = 'BlackPayback' 8 | network = 'Black Payback' 9 | parent = 'Black Payback' 10 | site = 'Black Payback' 11 | max_pages = 15 12 | 13 | start_urls = [ 14 | 'https://blackpayback.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h1/text()', 19 | 'description': '//div[contains(@class,"videoDetails")]/p/text()', 20 | 'date': '', 21 | 'image': '//div[@class="player-thumb"]/img/@src0_2x', 22 | # ~ 're_image': r'poster=\"(.*?\.jpg)', 23 | 'performers': '', 24 | 'tags': '//div[contains(@class,"featuring")]/ul/li/a/text()', 25 | 'external_id': r'.*/(.*?).html', 26 | 'trailer': '', 27 | 'pagination': '/tour/updates/page_%s.html' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[@class="item-thumb"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene) and response.meta['page'] < self.max_pages: 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteTerrorXXX.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteTerrorXXXSpider(BaseSceneScraper): 8 | name = 'TerrorXXX' 9 | network = 'Terror XXX' 10 | parent = 'Terror XXX' 11 | site = 'Terror XXX' 12 | 13 | start_urls = [ 14 | 'https://terrorxxx.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//div[contains(@class,"title")]/span[@class="update_title"]/text()', 19 | 'description': '//span[contains(@class,"description")]/text()', 20 | 'date': '', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '//span[@class="tour_update_models"]/a/text()', 23 | 'tags': '', 24 | 'external_id': r'.*/(.*?).html', 25 | 'trailer': '//script[contains(text(),"/trailers/")]', 26 | 're_trailer': r'\"(/trailers.*?\.mp4)\"', 27 | 'pagination': '/categories/Movies_%s_d.html' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[@class="updateItem"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteErotiqueTVLive.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteErotiqueTVLiveSpider(BaseSceneScraper): 7 | name = 'ErotiqueTVLive' 8 | network = 'ErotiqueTVLive' 9 | parent = 'ErotiqueTVLive' 10 | site = 'ErotiqueTVLive' 11 | 12 | start_urls = [ 13 | 'https://erotiquetvlive.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="video-player"]/div/h2/text()', 18 | 'description': '//h3[contains(text(), "Description")]/following-sibling::text()', 19 | 'date': '', 20 | 'image': '//div[@class="player-thumb"]//img/@src0_1x', 21 | 'performers': '//div[contains(@class,"models-list-thumbs")]/ul/li/a/span/text()', 22 | 'tags': '//div[@class="update-info-block"]/ul/li/a/text()', 23 | 'external_id': r'.*/(.*?).html', 24 | 'trailer': '', 25 | 'pagination': '/tour/categories/movies_%s_d.html' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath('//div[@class="content-div"]/h4/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | -------------------------------------------------------------------------------- /performers/networkTeenMegaWorldPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BasePerformerScraper import BasePerformerScraper 4 | 5 | 6 | class NetworkTeenMegaWorldPerformerSpider(BasePerformerScraper): 7 | selector_map = { 8 | 'name': '//div[@class="model"]/h1/text()', 9 | 'image': '//div[@class="photo"]/img/@data-src', 10 | 'image_blob': True, 11 | 'bio': '//div[@class="bio"]/div/p/text()', 12 | 'eyecolor': '//div[@class="title" and contains(text(), "Eyes")]/following-sibling::div/text()', 13 | 'haircolor': '//div[@class="title" and contains(text(), "Hair")]/following-sibling::div/text()', 14 | 15 | 'pagination': '/models/models_%s.html', 16 | 'external_id': r'model/(.*)/' 17 | } 18 | 19 | name = 'TeenMegaWorldPerformer' 20 | network = 'TeenMegaWorld' 21 | 22 | start_urls = [ 23 | 'https://teenmegaworld.net', 24 | ] 25 | 26 | def get_gender(self, response): 27 | return 'Female' 28 | 29 | def get_performers(self, response): 30 | performers = response.xpath('//li[contains(@class,"model_list")]/div/a/@href').getall() 31 | for performer in performers: 32 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 33 | -------------------------------------------------------------------------------- /performers/siteWatch4FetishPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BasePerformerScraper import BasePerformerScraper 3 | 4 | 5 | class SiteWatch4FetishPerformerSpider(BasePerformerScraper): 6 | selector_map = { 7 | 'name': '//div[@class="channal-details-info"]/h3/text()', 8 | 'image': '//div[@class="channal-image"]/img/@src0_1x', 9 | 'image_blob': True, 10 | 'height': '//td[contains(text(), "Height:")]/following-sibling::td/text()', 11 | 'nationality': '//td[contains(text(), "Country:")]/following-sibling::td/text()', 12 | 'bio': '//strong[contains(text(), "About me")]/following-sibling::text()', 13 | 'pagination': '/models/models_%s.html', 14 | 'external_id': r'models/(.*)/' 15 | } 16 | 17 | name = 'Watch4FetishPerformer' 18 | network = "Watch4Fetish" 19 | 20 | start_urls = [ 21 | 'https://www.watch4fetish.com', 22 | ] 23 | 24 | def get_performers(self, response): 25 | performers = response.xpath('//a[@class="model_thumb"]/@href').getall() 26 | for performer in performers: 27 | yield scrapy.Request( 28 | url=self.format_link(response, performer), 29 | callback=self.parse_performer 30 | ) 31 | 32 | def get_gender(self, response): 33 | return "Female" 34 | -------------------------------------------------------------------------------- /scenes/siteEricJohnsSexAdventures.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteEricJohnsSexAdventuresSpider(BaseSceneScraper): 8 | name = 'EricJohnsSexAdventures' 9 | network = 'Eric Johns Sex Adventures' 10 | parent = 'Eric Johns Sex Adventures' 11 | site = 'Eric Johns Sex Adventures' 12 | 13 | start_urls = [ 14 | 'https://ericjohnssexadventures.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h2[@class="section-title"]/text()', 19 | 'description': '//h3[contains(text(),"Description:")]/following-sibling::text()', 20 | 'date': '', 21 | 'image': '//img[@class="update_thumb thumbs stdimage"]/@src0_1x', 22 | 'performers': '//li/a/span/text()', 23 | 'tags': '//ul[@class="tags"]/li/a/text()', 24 | 'external_id': r'.*/(.*?).html', 25 | 'trailer': '', 26 | 'pagination': '/categories/movies_%s_d.html' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[@class="content-div"]/h4/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | -------------------------------------------------------------------------------- /scenes/siteJimSlip.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BaseSceneScraper import BaseSceneScraper 3 | 4 | 5 | class JimSlipSpider(BaseSceneScraper): 6 | name = 'JimSlip' 7 | network = 'JimSlip' 8 | 9 | start_urls = ['https://www.jimslip.com/updates.php'] 10 | 11 | selector_map = { 12 | 'title': "b font::text", 13 | 'description': "div.textarea::text", 14 | 'performers': ".has-text-white-ter a.is-dark::text", 15 | 'external_id': 'slug=(.+)', 16 | 'trailer': '', 17 | 'tags': '', 18 | 'image': 'img[hspace]::attr(src)', 19 | 'pagination': '/updates.php?page=%s' 20 | } 21 | 22 | max_pages = 200 23 | 24 | def get_scenes(self, response): 25 | scenes = response.css( 26 | "[width] > tbody > tr > td > table > tbody > tr > td") 27 | for scene in scenes: 28 | link = scene.css('a::attr(href)').get() 29 | meta = {} 30 | if scene.css('td.gray::text').get(): 31 | text = scene.css('td.gray::text').get().strip().replace( 32 | 'added ', '').replace('.', '-') 33 | meta['date'] = self.parse_date(text.strip(), date_formats=['%d.%m.%Y']).isoformat() 34 | yield scrapy.Request(url=self.format_link(response, link), callback=self.parse_scene, meta=meta) 35 | -------------------------------------------------------------------------------- /scenes/siteTheHabibShow.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteTheHabibShowSpider(BaseSceneScraper): 7 | name = 'TheHabibShow' 8 | network = 'The Habib Show' 9 | parent = 'The Habib Show' 10 | site = 'The Habib Show' 11 | 12 | start_urls = [ 13 | 'https://thehabibshow.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//header/h1/text()', 18 | 'description': '//article[@class="article"]/p//text()', 19 | 'date': '', 20 | 'image': '//div[@class="player"]/@data-poster', 21 | 'performers': '', 22 | 'tags': '', 23 | 'duration': '', 24 | 'trailer': '//div[@class="player"]/@data-video-hd', 25 | 'external_id': r'.*-(\d+)\.htm', 26 | 'pagination': '/tour/browse/most-recent/page%s.html', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[contains(@class,"padding half")]/a[contains(@class,"font-color-orange")]/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | -------------------------------------------------------------------------------- /scenes/siteSwallowbay.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteSwallowbaySpider(BaseSceneScraper): 7 | name = 'Swallowbay' 8 | network = 'Swallowbay' 9 | parent = 'Swallowbay' 10 | site = 'Swallowbay' 11 | 12 | start_urls = [ 13 | 'https://swallowbay.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[@class="content-desc more-desc"]//text()', 19 | 'date': '//div[@class="content-date"]', 20 | 're_date': r'(\d{1,2}\w{1,2}? \w+ \d{4})', 21 | 'date_formats': ['%d %b %Y'], 22 | 'image': '//meta[@property="og:image"]/@content', 23 | 'performers': '//div[@class="content-models"]/a//text()', 24 | 'tags': '//div[@class="content-tags"]//a/text()', 25 | 'trailer': '//dl8-video/source[1]/@src', 26 | 'external_id': r'video/(.*)\.html', 27 | 'pagination': '/videos/page%s.html' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[@class="item-name"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteDefeatedSexFight.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class Spider(BaseSceneScraper): 7 | name = 'DefeatedSexFight' 8 | network = 'Hentaied' 9 | parent = 'Defeated Sex Fight' 10 | site = 'Defeated Sex Fight' 11 | 12 | start_urls = [ 13 | 'https://defeatedsexfight.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[contains(@class,"cont")]/p/text()', 19 | 'date': '//meta[@property="article:published_time"]/@content', 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'performers': '//div[contains(@class,"tagsmodels")]/a/text()', 22 | 'tags': '//ul[@class="post-categories"]/li/a/text()', 23 | 'duration': '//div[@class="duration"]/text()', 24 | 'trailer': '', 25 | 'external_id': r'com/(.*)/', 26 | 'pagination': '/all-videos/page/%s/', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//center[@class="vidcont"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteVlogXXX.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteVlogXXXSpider(BaseSceneScraper): 7 | name = 'VlogXXX' 8 | network = 'VlogXXX' 9 | parent = 'VlogXXX' 10 | 11 | start_urls = [ 12 | 'https://vlogxxx.com', 13 | ] 14 | 15 | selector_map = { 16 | 'title': '//h1/text()', 17 | 'description': '//div[@id="trailer-data"]/div/p/text()', 18 | 'date': '//p[@class="date"]/text()', 19 | 'image': '//div[@id="noMore"]/img/@src', 20 | 'performers': '//h3[contains(text(),"pornstars")]/following-sibling::a/text()', 21 | 'tags': '//h3[contains(text(),"Categories")]/following-sibling::a/text()', 22 | 'external_id': r'updates/(.*).html', 23 | 'trailer': '', 24 | 'pagination': '/categories/movies_%s_d.html' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.xpath('//div[contains(@class,"thumb-pic")]/a[1]/@href').getall() 29 | for scene in scenes: 30 | if re.search(self.get_selector_map('external_id'), scene): 31 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 32 | 33 | def get_id(self, response): 34 | externid = super().get_id(response) 35 | return externid.lower() 36 | -------------------------------------------------------------------------------- /scenes/siteFutanarica.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteFutanaricaSpider(BaseSceneScraper): 7 | name = 'Futanarica' 8 | network = 'Futanarica' 9 | parent = 'Futanarica' 10 | site = 'Futanarica' 11 | 12 | start_urls = [ 13 | 'https://futanarica.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h3[@class="post_title entry-title"]/text()', 18 | 'description': '//div[contains(@class, "post_content")]/p[1]/text()', 19 | 'date': '//meta[@property="article:published_time"]/@content', 20 | 'image': '//div[contains(@class, "post_content")]/a/img/@src', 21 | 'performers': '', 22 | 'tags': '', 23 | 'external_id': r'.*/(.*?)/$', 24 | 'trailer': '//div[contains(@class, "post_content")]/a/@href', 25 | 'pagination': '/releases/page/%s/' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath('//article/div/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | 34 | def get_tags(self, response): 35 | return ['3D CG', 'Animation', 'Hentai', 'Futanari'] 36 | -------------------------------------------------------------------------------- /scenes/sitePlayboyTV.py: -------------------------------------------------------------------------------- 1 | import re 2 | from tpdb.BaseSceneScraper import BaseSceneScraper 3 | 4 | 5 | class SitePlayboyTVSpider(BaseSceneScraper): 6 | name = 'PlayboyTV' 7 | site = 'Playboy TV' 8 | parent = 'Playboy TV' 9 | network = 'Playboy' 10 | 11 | start_urls = [ 12 | 'https://www.playboytv.com' 13 | ] 14 | 15 | selector_map = { 16 | 'external_id': r'', 17 | 'pagination': '/episodes?page=%s&selected=episodes', 18 | } 19 | 20 | def get_scenes(self, response): 21 | scenes = response.xpath('//li[@class="item"]') 22 | for scene in scenes: 23 | item = self.init_scene() 24 | 25 | show = scene.xpath('.//h3/text()').get() 26 | episode = scene.xpath('.//p[@class="subtitle"]/text()').get() 27 | item['title'] = f"{show} - {episode}" 28 | 29 | item['image'] = scene.xpath('.//img/@data-src').get() 30 | item['image_blob'] = self.get_image_blob_from_link(item['image']) 31 | item['id'] = re.search(r'.*/(\d+)/', item['image']).group(1) 32 | item['site'] = 'Playboy TV' 33 | item['parent'] = 'Playboy TV' 34 | item['network'] = 'Playboy' 35 | 36 | item['url'] = self.format_link(response, scene.xpath('.//a[@class="cardLink"]/@href').get()) 37 | 38 | yield item 39 | -------------------------------------------------------------------------------- /performers/siteTransVRPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BasePerformerScraper import BasePerformerScraper 4 | 5 | 6 | class SiteTransVRPerformerSpider(BasePerformerScraper): 7 | selector_map = { 8 | 'name': '//h3[@class="modelname"]/text()', 9 | 'image': '//div[@class="model_photo"]/img/@src', 10 | 'image_blob': True, 11 | 'bio': '//div[@class="model_details clear"]/div[@id="bio"]/ul[1]/li[1]/text()', 12 | 'ethnicity': '//div[@class="model_details clear"]//b[contains(text(), "Ethnicity")]/following-sibling::text()', 13 | 'nationality': '//div[@class="model_details clear"]//b[contains(text(), "Nationality")]/following-sibling::text()', 14 | 15 | 'pagination': '/tour/models/%s/latest/?g=', 16 | 'external_id': r'model/(.*)/' 17 | } 18 | 19 | name = 'TransVRPerformer' 20 | network = 'Grooby Network' 21 | 22 | start_urls = [ 23 | 'https://www.transvr.com', 24 | ] 25 | 26 | def get_gender(self, response): 27 | return 'Trans Female' 28 | 29 | def get_performers(self, response): 30 | performers = response.xpath('//div[@class="modelphoto"]/a/@href').getall() 31 | for performer in performers: 32 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 33 | -------------------------------------------------------------------------------- /performers/siteGirlsOutWestPerformer.py: -------------------------------------------------------------------------------- 1 | from tpdb.BasePerformerScraper import BasePerformerScraper 2 | 3 | 4 | class GirlsOutWestPerformerSpider(BasePerformerScraper): 5 | selector_map = { 6 | 'pagination': '/models//models_%s_d.html', 7 | 'external_id': r'model/(.*)/' 8 | } 9 | 10 | name = 'GirlsOutWestPerformer' 11 | network = 'Girls Out West' 12 | 13 | start_urls = [ 14 | 'https://tour.girlsoutwest.com', 15 | ] 16 | 17 | def get_performers(self, response): 18 | performers = response.xpath('//div[@class="modelPic"]') 19 | for performer in performers: 20 | item = self.init_performer() 21 | 22 | perf_name = performer.xpath('.//h5/a/text()').get() 23 | item['name'] = self.cleanup_title(perf_name.strip()) 24 | image = performer.xpath('.//img/@src0_1x') 25 | if image: 26 | item['image'] = self.format_link(response, image.get()) 27 | item['image_blob'] = self.get_image_blob_from_link(item['image']) 28 | else: 29 | item['image'] = "" 30 | item['image_blob'] = "" 31 | item['gender'] = 'Female' 32 | item['network'] = 'Girls Out West' 33 | item['url'] = self.format_link(response, performer.xpath('.//h5/a/@href').get()) 34 | 35 | yield item 36 | -------------------------------------------------------------------------------- /scenes/sitePeeOnHer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SitePeeOnHerSpider(BaseSceneScraper): 7 | name = 'PeeOnHer' 8 | network = "VIPissy Cash" 9 | parent = "Pee On Her" 10 | site = "Pee On Her" 11 | 12 | start_urls = [ 13 | 'https://www.peeonher.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[@class="page_title"]/text()', 18 | 'description': '//strong[@class="title"]/following-sibling::p/text()', 19 | 'date': '//strong[@class="title" and contains(text(),"Published")]/following-sibling::text()', 20 | 'image': '//div[@class="update_box"]/img/@src', 21 | 'performers': '//strong[contains(text(),"Starring")]/following-sibling::a/text()', 22 | 'tags': '//strong[contains(text(),"Tags")]/following-sibling::a/text()', 23 | 'external_id': r'.*/(.*?)/$', 24 | 'trailer': '//div[@id="videoplayer"]//video/source/@src', 25 | 'pagination': '/updates/page-%s/' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath('//div[contains(@class,"item")]/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | -------------------------------------------------------------------------------- /scenes/sitePorkVendors.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SitePorkVendorsSpider(BaseSceneScraper): 7 | name = 'PorkVendors' 8 | network = 'Pork Vendors' 9 | parent = 'Pork Vendors' 10 | site = 'Pork Vendors' 11 | 12 | start_urls = [ 13 | 'https://porkvendors.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[contains(@class,"title_bar")]/text()', 18 | 'description': '//p[@class="description-text"]/text()', 19 | 'date': '//label[contains(text(), "Date")]/following-sibling::p[1]/text()', 20 | 'date_formats': ['%Y-%m-%d'], 21 | 'image': '//video/@poster', 22 | 'performers': '//div[contains(@class,"videobg")]//span[@class="update_models"]/a/text()', 23 | 'tags': '//a[contains(@href, "/categories/")]/text()', 24 | 'trailer': '', 25 | 'external_id': r'.*/(.*?)\.htm', 26 | 'pagination': '/categories/movies_%s_d.html' 27 | } 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//a[@class="updateimg"]/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 35 | -------------------------------------------------------------------------------- /scenes/siteUnlimitedMILFs.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteUnlimitedMILFsSpider(BaseSceneScraper): 8 | name = 'UnlimitedMILFs' 9 | network = 'New Sensations' 10 | parent = 'Unlimited MILFs' 11 | site = 'Unlimited MILFs' 12 | 13 | start_urls = [ 14 | 'https://network.newsensations.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//div[@class="update_title"]/text()', 19 | 'description': '//span[@class="update_description"]/text()', 20 | 'date': '//div[@class="cell update_date"]/text()', 21 | 're_date': r'Released: (.*)', 22 | 'date_formats': ['%m/%d/%Y'], 23 | 'image': '//video/@poster|//div[@id="hpromo"]/a/img/@src', 24 | 'performers': '//span[@class="update_models"]/a/text()', 25 | 'tags': '//span[@class="update_tags"]/a/text()', 26 | 'external_id': r'scenes/(.*).html', 27 | 'trailer': '', 28 | 'pagination': '/tour_um/updates/page_%s.html' 29 | } 30 | 31 | def get_scenes(self, response): 32 | scenes = response.xpath('//div[@class="update_details"]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 36 | -------------------------------------------------------------------------------- /scenes/siteVrAllure.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class VrAllureSpider(BaseSceneScraper): 8 | name = 'VrAllure' 9 | network = "Radical Entertainment" 10 | parent = "VrAllure" 11 | 12 | start_urls = [ 13 | 'https://vrallure.com/' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//meta[@property="og:title"]/@content', 18 | 're_title': r'(.*) - ', 19 | 'description': '//p[@class="desc"]/span/text()', 20 | 'date': '//p[@class="publish-date"]/img/following-sibling::text()', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '//p[@class="model-name"]/a/text()', 23 | 'tags': '//p[@class="tag-container"]/a/text()', 24 | 'trailer': '', 25 | 'external_id': '\\/scenes\\/(vr.*?)_', 26 | 'pagination': '/?page=%s' 27 | } 28 | 29 | def get_scenes(self, response): 30 | 31 | scenes = response.xpath( 32 | '//h4[@class="latest-scene-title"]/a/@href').getall() 33 | for scene in scenes: 34 | if '?nats' in scene: 35 | scene = re.search('(.*)\\?nats', scene).group(1) 36 | 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 39 | -------------------------------------------------------------------------------- /scenes/siteRadicalJizzlam.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteRadicalJizzlamSpider(BaseSceneScraper): 7 | name = 'RadicalJizzlam' 8 | site = 'Radical Jizzlam' 9 | parent = 'Radical Jizzlam' 10 | network = 'Radical Jizzlam' 11 | 12 | start_urls = [ 13 | 'https://www.radicaljizzlam.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//h1/following-sibling::p[1]//text()', 19 | 'date': '', 20 | 'image': '//script[contains(text(), "video_content")]/text()', 21 | 're_image': 'src0_3x.*?(http.*?)[\'\"]', 22 | 'performers': '', 23 | 'tags': '//li[@class="label"]/following-sibling::li/a/text()', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*)?\.htm', 27 | 'pagination': '/tour/updates/page_%s.html', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[contains(@class, "item-video")]/div[@class="item-thumb"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /scenes/siteWeAreHairy.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteWeAreHairySpider(BaseSceneScraper): 7 | name = 'WeAreHairy' 8 | network = 'We Are Hairy' 9 | parent = 'We Are Hairy' 10 | site = 'We Are Hairy' 11 | 12 | start_urls = [ 13 | 'https://www.wearehairy.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[@class="desc"]//h3[contains(text(), "Description")]/following-sibling::p/text()', 19 | 'date': '//time/@datetime', 20 | 'date_formats': ['%Y-%m-%d'], 21 | 'image': '//div[@id="video-wrapper"]//video/@poster', 22 | 'performers': '//div[@class="meet"]/div//a[contains(@href, "/models/")]/text()', 23 | 'tags': '//div[@class="tagline"]//a[contains(@href, "/categories/")]/text()', 24 | 'external_id': r'.*/(.*?)/', 25 | 'trailer': '//div[@id="video-wrapper"]//video/source/@src', 26 | 'pagination': '/categories/Movies/page%s.shtml' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[@class="dvdtitle"]/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | -------------------------------------------------------------------------------- /scenes/siteBennryGreen.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class Spider(BaseSceneScraper): 7 | name = 'BennyGreen' 8 | network = 'Benny Green' 9 | parent = 'Benny Green' 10 | site = 'Benny Green' 11 | 12 | start_urls = [ 13 | 'https://www.bennygreen.it', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="titolo-video"]/h2[1]/text()', 18 | 'description': '//div[@class="titolo-video"]/div[contains(@class,"captionvideo")]/text()', 19 | 'date': '', 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'performers': '//h4/a[contains(@href, "/pornostar/")]/text()', 22 | 'tags': '', 23 | 'duration': '', 24 | 'trailer': '//script[contains(text(), "qualityselector")]/text()', 25 | 're_trailer': r'hd1280.*?(http.*?)\'', 26 | 'external_id': r'/(\d+)-', 27 | 'pagination': '/new-video.php?next=%s&term=&categoria=&pornostar=&durata=&risoluzione=&shorting=', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | scenes = response.xpath('//a[@class="link-photo-home"]/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 36 | -------------------------------------------------------------------------------- /scenes/siteInkaporn.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class siteInkaPornSpider(BaseSceneScraper): 7 | name = 'InkaPorn' 8 | network = 'InkaPorn' 9 | 10 | start_urls = [ 11 | 'https://www.inkaporn.com', 12 | 'https://www.inkasex.com', 13 | 'https://www.xekeko.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//p[contains(@itemprop, "description")]/text()', 19 | 'date': '//script[contains(text(), "uploadDate")]/text()', 20 | 're_date': r'(\d{4}-\d{2}-\d{2})', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '', 23 | 'tags': '', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*?)\.htm', 27 | 'pagination': '/videos/latest?page_id=%s', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[@class="video-title"]/a[1]/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | 38 | def get_id(self, response): 39 | return super().get_id(response).lower() 40 | -------------------------------------------------------------------------------- /scenes/siteAffect3dStore.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import scrapy 4 | # ~ from helpers.scrapy_flare.request import FlareRequest 5 | from tpdb.BaseSceneScraper import BaseSceneScraper 6 | 7 | 8 | class SiteAffect3dStoreSpider(BaseSceneScraper): 9 | name = 'Affect3dStore' 10 | network = 'Affect3dStore' 11 | parent = '' 12 | site = '' 13 | 14 | custom_settings = {'DOWNLOADER_MIDDLEWARES': {'tpdb.helpers.scrapy_flare.middleware.FlareMiddleware': 543}} 15 | 16 | start_urls = [ 17 | 'https://affect3dstore.com', 18 | ] 19 | 20 | selector_map = { 21 | 'title': '//h1[@class="page-title"]/span/text()', 22 | 'description': '//div[@itemprop="description"]/p/text()', 23 | 'date': '', 24 | 'image': '', 25 | 'performers': '', 26 | 'tags': '', 27 | 'external_id': r'.*/(.*?).html', 28 | 'trailer': '', 29 | 'pagination': '/animation/short-clips.html?p=%s' 30 | } 31 | 32 | def get_scenes(self, response): 33 | print(response.text) 34 | scenes = response.xpath('//div[@class="product-info"]/div/h3/a/@href').getall() 35 | for scene in scenes: 36 | if re.search(self.get_selector_map('external_id'), scene): 37 | yield scrapy.FlareRequest(url=self.format_link(response, scene), callback=self.parse_scene) 38 | 39 | def parse_scene(self, response): 40 | print(response.text) 41 | -------------------------------------------------------------------------------- /scenes/siteS3xus.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteS3xusSpider(BaseSceneScraper): 8 | name = 'S3xus' 9 | site = 'S3xus' 10 | parent = 'S3xus' 11 | network = 'S3xus' 12 | 13 | start_urls = [ 14 | 'https://s3xus.com/', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h1/text()', 19 | 'description': '//meta[@property="og:description"]/@content', 20 | 'date': '//ul[@class="info-wrapper"]/li[3]/span/text()', 21 | 'date_formats': ["%b %d, %Y"], 22 | 'image': '//meta[@property="og:image"]/@content', 23 | 'image_blob': '//meta[@property="og:image"]/@content', 24 | 'performers': '//div[@class="model-thumb"]/a/img/@alt', 25 | 'tags': '//div[@class="tag-name"]/a/text()|//p[@class="tags"]/a/text()', 26 | 'duration': '//ul[@class="info-wrapper"]/li[1]/span/text()', 27 | 'external_id': r'scenes/(.+)', 28 | 'trailer': '', 29 | 'pagination': '/scenes?page=%s&order_by=publish_date&sort_by=desc' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//div[@class="card"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | -------------------------------------------------------------------------------- /scenes/siteAngelaSommers.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteAngelaSommersSpider(BaseSceneScraper): 7 | name = 'AngelaSommers' 8 | network = 'Angela Sommers' 9 | parent = 'Angela Sommers' 10 | site = 'Angela Sommers' 11 | 12 | start_urls = [ 13 | 'https://angelasommers.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//span[@class="update_title"]/text()', 18 | 'description': '//span[@class="latest_update_description"]/text()', 19 | 'date': '//div[@class="update_block_info"]//text()[contains(., "/")]', 20 | 're_date': r'(\d{1,2}/\d{1,2}/\d{4})', 21 | 'date_formats': ['%m/%d/%Y'], 22 | 'image': '//meta[@property="og:image"]/@content', 23 | 'performers': '//span[contains(@class, "tour_update_models")]/a/text()', 24 | 'tags': '//span[contains(@class, "update_tags")]/a/text()', 25 | 'external_id': r'updates/(.*?).html', 26 | 'trailer': '', 27 | 'pagination': '/categories/Movies_%s.html' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[contains(@class, "updateItem")]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteTheNudie.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteTheNudieSpider(BaseSceneScraper): 8 | name = 'TheNudie' 9 | network = 'The Nudie' 10 | 11 | start_urls = [ 12 | 'https://www.thenudie.com', 13 | ] 14 | 15 | selector_map = { 16 | 'title': '//h1/text()', 17 | 'description': '//div[contains(text(), "Description")]/following-sibling::div/text()', 18 | 'date': '', 19 | 'image': '//div[contains(@class,"w-full")]//@poster-url', 20 | 'performers': '//div[contains(text(), "Starring")]/following-sibling::div/span/div/text()', 21 | 'tags': '//div[contains(text(), "Categories")]/following-sibling::div/span/a/text()', 22 | 'external_id': r'.*/(.*?)$', 23 | 'trailer': '', 24 | 'pagination': '/scenes?page=%s' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.xpath('//div[contains(@class,"w-full")]/a/@href').getall() 29 | for scene in scenes: 30 | scene = scene.strip() 31 | if re.search(self.get_selector_map('external_id'), scene) and "signup" not in scene: 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | 34 | def get_site(self, response): 35 | return "The Nudie" 36 | 37 | def get_parent(self, response): 38 | return "The Nudie" 39 | -------------------------------------------------------------------------------- /scenes/siteWhoaBoyz.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteWhoaBoyzSpider(BaseSceneScraper): 8 | name = 'WhoaBoyz' 9 | network = 'Whoa Boyz' 10 | 11 | start_urls = [ 12 | 'https://www.whoaboyz.com', 13 | ] 14 | 15 | selector_map = { 16 | 'title': '//div[@class="trailer"]/h2/text()', 17 | 'description': '//div[@class="trailer"]/p[1]/text()', 18 | 'date': '//p[@class="date-trailer"]/span/text()', 19 | 'date_formats': ['%m/%d/%Y'], 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'performers': '//div[@class="trailer"]//span[@class="tour_update_models"]/a/text()', 22 | 'tags': '//p[@class="tags"]/a/text()', 23 | 'external_id': r'.*/(.*?).html', 24 | 'trailer': '//div[@class="trailervid"]/div/video/source/@src', 25 | 'pagination': '/tour/categories/Movies_%s_d.html' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath('//div[@class="item-title"]/h3/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | 34 | def get_site(self, response): 35 | return "Whoa Boyz" 36 | 37 | def get_parent(self, response): 38 | return "Whoa Boyz" 39 | -------------------------------------------------------------------------------- /scenes/siteLilMissy.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteLilMissySpider(BaseSceneScraper): 7 | name = 'LilMissy' 8 | network = 'LilMissy' 9 | parent = 'LilMissy' 10 | site = 'LilMissy' 11 | 12 | start_urls = [ 13 | 'https://lilmissy.uk', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "contentD")]/h1/text()', 18 | 'description': '//div[contains(@class, "contentD")]//div[contains(@class, "Description")]/p/text()', 19 | 'date': '', 20 | 'image': '//meta[@name="twitter:image"]/@content', 21 | 'performers': '//div[@class="models"]/ul/li/a/text()', 22 | 'tags': '//div[@class="tags"]/ul/li/a/text()', 23 | 'duration': '//div[contains(@class, "contentD")]//i[contains(@class, "clock")]/following-sibling::text()', 24 | 'trailer': '', 25 | 'external_id': r'.*/(.*?)$', 26 | 'pagination': '/updates?page=%s', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[contains(@class, "videoPic")]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene) and "photo-set" not in scene: 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | -------------------------------------------------------------------------------- /performers/siteClubDomPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BasePerformerScraper import BasePerformerScraper 4 | 5 | 6 | class SiteClubDomPerformerSpider(BasePerformerScraper): 7 | selector_map = { 8 | 'name': '//div[@class="title_bar"]/span/text()', 9 | 'image': '//div[@class="table"]//img[contains(@class, "model_bio_thumb")]/@src0|//div[@class="table"]//img[contains(@class, "model_bio_thumb")]/@src0_3x|//div[@class="table"]//img[contains(@class, "model_bio_thumb")]/@src0_2x|//div[@class="table"]//img[contains(@class, "model_bio_thumb")]/@src', 10 | 'image_blob': True, 11 | 12 | 'pagination': '/vod/models/models_%s_d.html', 13 | 'external_id': r'model/(.*)/' 14 | } 15 | 16 | name = 'ClubDomPerformer' 17 | network = 'Club Dom' 18 | 19 | start_urls = [ 20 | 'https://www.clubdom.com', 21 | ] 22 | 23 | def get_gender(self, response): 24 | return 'Female' 25 | 26 | def get_performers(self, response): 27 | performers = response.xpath('//div[@class="update_details"]/a[1]/@href').getall() 28 | for performer in performers: 29 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 30 | 31 | def get_image(self, response): 32 | image = super().get_image(response) 33 | if "/content/" not in image: 34 | image = "" 35 | return image 36 | -------------------------------------------------------------------------------- /scenes/siteAngeloGodshackOfficial.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteAngeloGodshackOfficialSpider(BaseSceneScraper): 7 | name = 'AngeloGodshackOfficial' 8 | network = 'Angelo Godshack' 9 | parent = 'Angelo Godshack Official' 10 | site = 'Angelo Godshack Official' 11 | 12 | start_urls = [ 13 | 'https://angelogodshackxxx.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="video-detail"]//div[contains(@class, "header")]/h1/text()', 18 | 'description': '//div/strong[contains(text(), "Description")]/../following-sibling::p/text()', 19 | 'date': '', 20 | 'image': '//video-js/@data-poster', 21 | 'performers': '//div[contains(@class,"video-detail__description")]//div[@class="title"]/text()', 22 | 'tags': '', 23 | 'duration': '', 24 | 'trailer': '//video/source/@src', 25 | 'external_id': r'.*/(.*?)$', 26 | 'pagination': '/newest?page=%s', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[contains(@class, "library-item")]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | -------------------------------------------------------------------------------- /scenes/siteDominicPacifico.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteDominicPacificoSpider(BaseSceneScraper): 7 | name = 'DominicPacifico' 8 | site = 'Dominic Pacifico' 9 | parent = 'Dominic Pacifico' 10 | network = 'Dominic Pacifico' 11 | 12 | start_urls = [ 13 | 'https://dominicpacifico.com' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "video-block")]/div[1]/h2/text()', 18 | 'description': '//div[contains(@class, "video-block")]//h4/text()', 19 | 'image': '//meta[@property="og:image"]/@content', 20 | 'type': 'Scene', 21 | 'external_id': r'videos/(\d+)', 22 | 'pagination': '/scenes.html?start=%s', 23 | } 24 | 25 | def get_next_page_url(self, base, page): 26 | page = str((int(page) - 1) * 18) 27 | return self.format_url(base, self.get_selector_map('pagination') % page) 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//div[contains(@class, "img-wrapper")]/a/@href').getall() 32 | for scene in scenes: 33 | if "?nats" in scene: 34 | scene = re.search(r'(.*?)\?nat', scene).group(1) 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /performers/siteOnlyGrandpaPerformer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BasePerformerScraper import BasePerformerScraper 5 | 6 | 7 | class PerformerSpider(BasePerformerScraper): 8 | selector_map = { 9 | 'name': '//div[contains(@class, "model-profile")]//ancestor::div[contains(@class, "container")]//h2/text()', 10 | 'image': '//div[contains(@class, "model-profile-thumb")]/img/@src', 11 | 'image_blob': True, 12 | 'bio': '//div[contains(@class, "model-profile")]//ancestor::div[contains(@class, "container")]//div[contains(@class, "section-text-content")]/p//text()', 13 | 'haircolor': '//strong[contains(text(), "Hair Color:")]/following-sibling::text()', 14 | 'nationality': '//strong[contains(text(), "Nationality:")]/following-sibling::text()', 15 | 'pagination': '/models/models_%s.html', 16 | 'external_id': r'model/(.*)/' 17 | } 18 | 19 | name = 'OnlyGrandpaPerformer' 20 | network = 'Only Grandpa' 21 | 22 | start_urls = [ 23 | 'https://onlygrandpa.com', 24 | ] 25 | 26 | def get_gender(self, response): 27 | return 'Female' 28 | 29 | def get_performers(self, response): 30 | performers = response.xpath('//div[@class="item-model-thumb"]/a/@href').getall() 31 | for performer in performers: 32 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 33 | -------------------------------------------------------------------------------- /scenes/siteNylonUp.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteNylonUpSpider(BaseSceneScraper): 7 | name = 'NylonUp' 8 | network = 'Nylon Up' 9 | parent = 'Nylon Up' 10 | site = 'Nylon Up' 11 | 12 | start_urls = [ 13 | 'https://www.nylonup.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "videoDetails")]/h3/text()', 18 | 'description': '//div[contains(@class, "videoDetails")]/p/text()', 19 | 'date': '//div[contains(@class, "videoInfo")]/p/span[contains(text(), "Date")]/following-sibling::text()', 20 | 'date_formats': ['%B %d, %Y'], 21 | 'image': '//script[contains(text(), "poster=")]/text()', 22 | 're_image': r'poster=\s?\"(.*\.jpg)', 23 | 'performers': '//li[@class="update_models"]/a/text()', 24 | 'tags': '//li[@class="label"]/following-sibling::li/a[contains(@href, "/categories/")]/text()', 25 | 'external_id': r'trailers/(.*?)\.html', 26 | 'trailer': '', 27 | 'pagination': '/tour/categories/movies/%s/latest/' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[@class="item-thumb"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteTheFemaleOrgasm.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteTheFemaleOrgasmSpider(BaseSceneScraper): 7 | name = 'TheFemaleOrgasm' 8 | network = 'The Female Orgasm' 9 | parent = 'The Female Orgasm' 10 | site = 'The Female Orgasm' 11 | 12 | start_urls = [ 13 | 'https://www.the-female-orgasm.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "videoDetails")]/h3/text()', 18 | 'description': '//span[@class="latest_update_description"]/text()', 19 | 'date': '//span[@class="update_date"]/text()', 20 | 'date_formats': ['%m/%d/%Y'], 21 | 'image': '//div[@class="player-window-play"]/following-sibling::img/@src0_2x', 22 | 'performers': '//li[@class="update_models"]/a/text()', 23 | 'tags': '//li[contains(text(), "Tags:")]/following-sibling::li/a/text()', 24 | 'trailer': '', 25 | 'external_id': r'.*/(.*)\.htm', 26 | 'pagination': '/explore/categories/Movies/%s/latest/' 27 | } 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//div[contains(@class, "videothumb")]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 35 | -------------------------------------------------------------------------------- /scenes/siteBondageCafe.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteBondageCafeSpider(BaseSceneScraper): 8 | name = 'BondageCafe' 9 | network = 'Bondage Cafe' 10 | 11 | start_urls = [ 12 | 'https://www.bondagecafe.com/', 13 | ] 14 | 15 | selector_map = { 16 | 'title': '//span[@class="update_title"]/text()', 17 | 'description': '//span[@class="latest_update_description"]/text()', 18 | 'date': '//span[@class="availdate"]/text()', 19 | 'image': '//img[contains(@class,"large_update_thumb")]/@src', 20 | 'performers': '//span[@class="tour_update_models"]/a/text()', 21 | 'tags': '//span[@class="update_tags"]/a/text()', 22 | 'external_id': r'.*/(w.*?-\d{2,5})-', 23 | 'trailer': '//div[@class="update_image"]/a[1]/@onclick', 24 | 're_trailer': r'\'(.*\.mp4)\'', 25 | 'pagination': '/categories/movies_%s_d.html#' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath('//div[@class="updateItem"]/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | 34 | def get_site(self, response): 35 | return "Bondage Cafe" 36 | 37 | def get_parent(self, response): 38 | return "Bondage Cafe" 39 | -------------------------------------------------------------------------------- /scenes/siteJoymii.py: -------------------------------------------------------------------------------- 1 | # This has been moved to AdultTime API Scraper 2 | 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class JoyMiiSpider(BaseSceneScraper): 8 | name = 'JoyMii' 9 | network = 'JoyMii' 10 | parent = 'JoyMii' 11 | 12 | start_urls = [ 13 | # ~ 'https://joymii.com' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[@class="title"]/text()', 18 | 'description': '//p[@class="text"]/text()', 19 | 'date': "//span[@class='post-date']/text()", 20 | 'image': '//div[@id="video-set-details"]//video[@id="video-playback"]/@poster', 21 | 'performers': '//h2[@class="starring-models"]/a/text()', 22 | 'tags': "", 23 | 'external_id': 'code\\/(.+)', 24 | 'trailer': '', 25 | 'pagination': '/get-content-list?blockName=latest&sortType=release_date&limit=36&onlyPhotos=&onlyVideos=1&sorting=date&tags=&actors=&page=%s' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath( 30 | "//div[contains(@class, 'box-results')]//div[contains(@class, 'set')]") 31 | for scene in scenes: 32 | meta = { 33 | 'date': self.parse_date(scene.css('.release_date::text').get()).isoformat() 34 | } 35 | 36 | link = self.format_link(response, scene.css('a::attr(href)').get()) 37 | 38 | yield scrapy.Request(url=link, callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteDeviantAss.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteDeviantAssSpider(BaseSceneScraper): 7 | name = 'DeviantAss' 8 | network = 'Deviant Ass' 9 | parent = 'Deviant Ass' 10 | site = 'Deviant Ass' 11 | 12 | start_urls = [ 13 | 'https://deviantass.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "updatesBlock")]/div[contains(@class, "section-heading")]/h3/text()', 18 | 'description': '//div[@class="wrapper"]//span[contains(@class,"tour_update_models")]/../following-sibling::div/text()', 19 | 'date': '//div[@class="updateDetails"]/div[1]/div[1]/p[1]/text()', 20 | 'image': '//meta[@property="og:image"]/@content|//meta[@property="twitter:image"]/@content', 21 | 'performers': '//div[@class="wrapper"]//span[contains(@class,"tour_update_models")]/a/text()', 22 | 'tags': '', 23 | 'trailer': '', 24 | 'external_id': r'.*/(.*)?\.html', 25 | 'pagination': '/categories/movies_%s_d.html?lang=0' 26 | } 27 | 28 | def get_scenes(self, response): 29 | meta = response.meta 30 | scenes = response.xpath('//a[contains(@href, "/updates/")]/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 34 | -------------------------------------------------------------------------------- /scenes/siteArtOfBlowjob.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteArtOfBlowjobSpider(BaseSceneScraper): 7 | name = 'ArtOfBlowjob' 8 | network = 'Art of Blowjob' 9 | parent = 'Art of Blowjob' 10 | site = 'Art of Blowjob' 11 | 12 | start_urls = [ 13 | 'https://theartofblowjob.com/', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="section_title"]/text()', 18 | 'description': '//div[contains(@class,"preserve-newlines")]/text()', 19 | 'date': '', 20 | 'image': '//section[@id="about"]/img/@src', 21 | 'performers': '', 22 | 'tags': '', 23 | 'external_id': r'videos/(\d+)/', 24 | 'trailer': '', 25 | 'pagination': '/display/updatelist/' 26 | } 27 | 28 | def start_requests(self): 29 | url = "https://www.theartofblowjob.com/display/updatelist/" 30 | yield scrapy.Request(url, callback=self.get_scenes, meta={'page': self.page}, headers=self.headers, cookies=self.cookies) 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//div[@class="video-thumbnail"]/../../span/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | 38 | def get_tags(self, response): 39 | return ['Blowjob'] 40 | -------------------------------------------------------------------------------- /scenes/siteLinaMila.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteLinaMilaSpider(BaseSceneScraper): 8 | name = 'LinaMila' 9 | network = 'LinaMila' 10 | parent = 'LinaMila' 11 | site = 'LinaMila' 12 | 13 | start_urls = [ 14 | 'https://www.linamila.tv', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h1[contains(@class, "title")]/text()', 19 | 'description': '//div[@class="inner"]/div[contains(@class, "custom_text")]/p/text()', 20 | 'date': '//i[contains(@class, "calendar")]/following-sibling::span[1]/text()', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '', 23 | 'tags': '', 24 | 'duration': '//div[@class="view-card" and contains(text(), "minutes")]/span/text()', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*?)$', 27 | 'pagination': '/collections/page/%s?media=video', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[@class="card"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | 38 | def get_performers(self, response): 39 | return ['Lina Mila'] 40 | -------------------------------------------------------------------------------- /performers/siteSpunkWorthyPerformer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BasePerformerScraper import BasePerformerScraper 5 | 6 | 7 | class PerformerSpider(BasePerformerScraper): 8 | selector_map = { 9 | 'name': '//div[@class="head"]/p[1]/span/text()', 10 | 'image': '//div[@class="content"]/div[1]/img/@src', 11 | 'image_blob': True, 12 | 'bio': '', 13 | 'gender': '', 14 | 'astrology': '', 15 | 'birthday': '', 16 | 'birthplace': '', 17 | 'cupsize': '', 18 | 'ethnicity': '', 19 | 'eyecolor': '', 20 | 'fakeboobs': '', 21 | 'haircolor': '', 22 | 'height': '', 23 | 'measurements': '', 24 | 'nationality': '', 25 | 'piercings': '', 26 | 'tattoos': '', 27 | 'weight': '', 28 | 29 | 'pagination': '/preview/guys?page=%s', 30 | 'external_id': r'model/(.*)/' 31 | } 32 | 33 | name = 'SpunkWorthyPerformer' 34 | network = 'SpunkWorthy' 35 | 36 | start_urls = [ 37 | 'https://spunkworthy.com', 38 | ] 39 | 40 | def get_gender(self, response): 41 | return 'Male' 42 | 43 | def get_performers(self, response): 44 | performers = response.xpath('//div[@class="hs"]/p/a/@href').getall() 45 | for performer in performers: 46 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 47 | 48 | 49 | -------------------------------------------------------------------------------- /scenes/siteLukesPOV.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteLukesPOVSpider(BaseSceneScraper): 8 | name = 'LukesPOV' 9 | network = 'Lukes POV' 10 | parent = 'Lukes POV' 11 | site = 'Lukes POV' 12 | 13 | start_urls = [ 14 | 'https://lukespov.com/', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h1/text()|//h1/strong/text()', 19 | 'description': '//h1/following-sibling::p[not(contains(.//a/@data-wpel-link, "internal"))]/text()', 20 | 'date': '//meta[@property="ya:ovs:upload_date"]/@content', 21 | 're_date': r'(\d{4}-\d{2}-\d{2})', 22 | 'date_formats': ['%Y-%m-%d'], 23 | 'image': '//meta[@property="og:image"][1]/@content', 24 | 'performers': '//strong[contains(text(), "Starring")]/a/text()', 25 | 'tags': '', 26 | 'duration': '//strong[contains(text(), "Duration")]/following-sibling::text()[1]', 27 | 'trailer': '', 28 | 'external_id': r'.*/(.*?)/', 29 | 'type': 'Scene', 30 | 'pagination': '/pov-blowjob-videos/page/%s/' 31 | } 32 | 33 | def get_scenes(self, response): 34 | meta = response.meta 35 | scenes = response.xpath('//h2/a/@href').getall() 36 | for scene in scenes: 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteFragileSlave.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteFragileSlaveSpider(BaseSceneScraper): 7 | name = 'FragileSlave' 8 | network = 'Fragile Slave' 9 | parent = 'Fragile Slave' 10 | site = 'Fragile Slave' 11 | 12 | start_urls = [ 13 | 'https://www.fragileslave.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[@class="videocontent"]/p/text()', 19 | 'date': '//p[@class="date"]/text()', 20 | 're_date': r'(\d{1,2}/\d{1,2}/\d{4})', 21 | 'date_formats': ['%m/%d/%Y'], 22 | 'image': '//script[contains(text(), "video_content")]/text()', 23 | 're_image': r'poster=\"(.*?)\".*', 24 | 'performers': '//span[@class="tour_update_models"]/a/text()', 25 | 'tags': '//div[@class="videodetails"]/p/a/text()', 26 | 'external_id': r'.*/(.*?).html', 27 | 'trailer': '//script[contains(text(), "video_content")]/text()', 28 | 're_trailer': r'video src=\"(.*?)\".*', 29 | 'pagination': '/updates?page=%s' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//div[@class="modelimg"]/a[contains(@href, "/trailers")]/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | -------------------------------------------------------------------------------- /scenes/siteHotAndTatted.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteHotAndTattedSpider(BaseSceneScraper): 7 | name = 'HotAndTatted' 8 | network = 'Hot and Tatted' 9 | parent = 'Hot and Tatted' 10 | site = 'Hot and Tatted' 11 | 12 | start_urls = [ 13 | 'https://www.hotandtatted.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class,"videoDetails")]/h3/text()', 18 | 'description': '//div[contains(@class,"videoDetails")]/p//text()', 19 | 'date': '//span[contains(text(), "Added:")]/following-sibling::text()', 20 | 'date_formats': ['%Y-%m-%d'], 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '//li[@class="update_models"]/a/text()', 23 | 'tags': '//div[contains(@class,"featuring")]/ul/li/a[contains(@href, "categories")]/text()', 24 | 'trailer': '//script[contains(text(), "var video_content")]/text()', 25 | 're_trailer': r'video src=\"(.*?\.mp4)', 26 | 'external_id': r'trailers(.*)\.htm', 27 | 'pagination': '/categories/Movies/%s/latest/' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[@class="item-thumb"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | -------------------------------------------------------------------------------- /scenes/siteGenuineSin.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BaseSceneScraper import BaseSceneScraper 3 | 4 | 5 | class SiteGenuineSinSpider(BaseSceneScraper): 6 | name = 'GenuineSin' 7 | network = 'Genuine Sin' 8 | parent = 'Genuine Sin' 9 | site = 'Genuine Sin' 10 | 11 | start_urls = [ 12 | 'https://genuinesin.com', 13 | ] 14 | 15 | selector_map = { 16 | 'title': '//div[contains(@class, "videoDetails")]//h3/text()', 17 | 'description': '//div[contains(@class, "videoDetails")]//p/text()', 18 | 'performers': '//div[contains(@class, "featuring") and contains(., "Featuring")]//following-sibling::li/a/text()', 19 | 'date': '', 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'tags': '//div[contains(@class,"featuring")]/ul/li/a[contains(@href,"categories")]/text()', 22 | 'external_id': r'/trailers/(.*).html', 23 | 'trailer': '//script[contains(text(),"video_content")]/text()', 24 | 're_trailer': r'video src=\"(.*?\.mp4)', 25 | 'pagination': '/categories/movies/%s/latest/', 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = self.process_xpath(response, '//div[@class="item-thumb"]/a/@href').getall() 30 | for scene in scenes: 31 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 32 | 33 | def get_image(self, response): 34 | image = super().get_image(response) 35 | return image.replace("-1x.jpg", "-2x.jpg") 36 | -------------------------------------------------------------------------------- /scenes/siteGoonMuse.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteGoonMuseSpider(BaseSceneScraper): 7 | name = 'GoonMuse' 8 | site = 'GoonMuse' 9 | parent = 'GoonMuse' 10 | network = 'GoonMuse' 11 | 12 | start_urls = [ 13 | 'https://www.goonmuse.com', 14 | ] 15 | 16 | cookies = [{"name": "warn", "value": "true"}] 17 | 18 | selector_map = { 19 | 'title': '//h4/text()', 20 | 'description': '//div[contains(@class,"vidImgContent")]/p/text()', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '//div[contains(@class,"latestUpdateBinfo gallery_info")]/p[@class="link_light"]/a/text()', 23 | 'tags': '//div[@class="blogTags"]/ul/li/a/text()', 24 | 'trailer': '', 25 | 'external_id': r'.*/(.*?)\.htm', 26 | 'pagination': '/categories/movies_%s.html', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[@class="videoPic"]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | 37 | def get_id(self, response): 38 | sceneid = super().get_id(response) 39 | sceneid = sceneid.lower().replace("_vids", "") 40 | return sceneid 41 | -------------------------------------------------------------------------------- /scenes/siteMenOfMontreal.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMenOfMontrealSpider(BaseSceneScraper): 7 | name = 'MenOfMontreal' 8 | network = 'Men Of Montreal' 9 | parent = 'Men Of Montreal' 10 | site = 'Men Of Montreal' 11 | 12 | start_urls = [ 13 | # ~ 'https://menofmontreal.com', Moved to BroNetwork Scraper 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class,"gallery_info")]/h1/text()', 18 | 'description': '//p[@class="update_description"]/text()', 19 | 'date': '', 20 | 'image': '//div[@class="fullscreenTour"]/video-js/@poster', 21 | 'performers': '', 22 | 'tags': '', 23 | 'duration': '//span[@class="availdate"]/text()', 24 | 're_duration': r'((?:\d{1,2}\:)?\d{2}\:\d{2})', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*?)\.htm', 27 | 'pagination': '/categories/videos_%s_d.html', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[contains(@class,"category_listing_wrapper")]/div/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | 38 | def get_tags(self, response): 39 | return ['Gay Porn'] 40 | -------------------------------------------------------------------------------- /scenes/siteBrattyMILF.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteBrattyMILFSpider(BaseSceneScraper): 8 | name = 'BrattyMILF' 9 | site = 'Bratty MILF' 10 | parent = 'Bratty MILF' 11 | network = 'Nubiles' 12 | 13 | start_urls = [ 14 | 'https://brattymilf.com/', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h2/text()', 19 | 'description': '//div[contains(@class, "collapse")]/p/text()', 20 | 'date': '//div[@class="container"]/div/div/div[@class="clearfix"]/span[@class="date"]/text()', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'image_blob': '//meta[@property="og:image"]/@content', 23 | 'performers': '//div[@class="content-pane-performers"]/a/text()', 24 | 'tags': '//div[@class="categories"]/a/text()', 25 | 'external_id': r'watch/(\d+)/', 26 | 'trailer': '', 27 | 'pagination': '/video/gallery/%s' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//figcaption/div/span/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | 36 | def get_next_page_url(self, base, page): 37 | page = str((int(page) - 1) * 12) 38 | return self.format_url(base, self.get_selector_map('pagination') % page) 39 | -------------------------------------------------------------------------------- /scenes/siteFitErotic.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteFitEroticSpider(BaseSceneScraper): 7 | name = 'FitErotic' 8 | network = 'FitErotic' 9 | parent = 'FitErotic' 10 | site = 'FitErotic' 11 | 12 | start_urls = [ 13 | 'https://fiterotic.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[@class="entry-title"]/text()', 18 | 'description': '//figcaption[contains(@class,"wp-element-caption")]/text()', 19 | 'date': '//time[contains(@class,"entry-date") and contains(@class, "published")]/@datetime', 20 | 're_date': r'(\d{4}-\d{2}-\d{2})', 21 | 'image': '//header[@class="entry-header"]/following-sibling::div[1]/div[@class="wp-block-image"]/figure[1]/a[1]/img[1]/@src[1]|//div[@class="entry-content"]//a/img/@src', 22 | 'performers': '', 23 | 'tags': '//footer[@class="entry-footer"]//a[contains(@href, "/category/")]/text()', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*?)/', 27 | 'pagination': '/tour/page/%s/', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//article/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /scenes/siteJaporn.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteJapornSpider(BaseSceneScraper): 7 | name = 'Japorn' 8 | network = 'Japorn' 9 | parent = 'Japorn' 10 | site = 'Japorn' 11 | 12 | start_urls = [ 13 | 'https://www.japornxxx.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="block"]/h2[1]/text()', 18 | 'description': '//div[@class="description"]/p//text()', 19 | 'date': '//strong[contains(text(), "Date:")]/following-sibling::text()[1]', 20 | 'date_formats': ['%d %B %Y'], 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '//div[contains(@class,"actorRelated block")]/div[@class="related"]/a/text()', 23 | 'tags': '//div[@class="tags"]/ul/li/a/text()', 24 | 'duration': '//strong[contains(text(), "Length:")]/following-sibling::text()[1]', 25 | 'trailer': '//script[contains(text(), ".mp4")]/text()', 26 | 're_trailer': r'url\: \"(http.*?\.mp4)', 27 | 'external_id': r'.*_(\d+)', 28 | 'pagination': '/scene?page=%s', 29 | 'type': 'Scene', 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//a[contains(@class, "scene item")]/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | -------------------------------------------------------------------------------- /scenes/siteJoshStoneXXX.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteJoshStoneXXXSpider(BaseSceneScraper): 7 | name = 'JoshStoneXXX' 8 | network = 'Josh Stone Productions' 9 | parent = 'Josh Stone XXX' 10 | site = 'Josh Stone XXX' 11 | 12 | start_urls = [ 13 | 'https://www.joshstonexxx.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//span[@class="update_title"]/text()', 18 | 'description': '//span[contains(@class,"description")]/text()', 19 | 'date': '//span[@class="availdate"]/text()', 20 | 'date_formats': ['%m/%d/%Y'], 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '//div[@class="update_block_info"]/span[@class="tour_update_models"]/a/text()', 23 | 'tags': '//span[@class="update_tags"]/a/text()', 24 | 'trailer': '//div[@class="update_image"]/a[contains(@onclick, "trailer")][1]/@onclick', 25 | 're_trailer': r'(trailer.*\.mp4)', 26 | 'external_id': r'.*/(.*?)\.html', 27 | 'pagination': '/tour/categories/movies_%s_d.html' 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[@class="updateItem"]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | -------------------------------------------------------------------------------- /scenes/siteRiggsFilms.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from scrapy.utils.project import get_project_settings 5 | 6 | 7 | class SiteRiggsFilmsSpider(BaseSceneScraper): 8 | name = 'RiggsFilms' 9 | network = 'Riggs Films' 10 | parent = 'Riggs Films' 11 | site = 'Riggs Films' 12 | 13 | start_urls = [ 14 | 'https://riggsfilms.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h1/text()', 19 | 'description': '//h1/following-sibling::p[not(contains(.//a/@data-wpel-link, "internal"))]/text()', 20 | 'date': '//meta[@property="ya:ovs:upload_date"]/@content', 21 | 're_date': r'(\d{4}-\d{2}-\d{2})', 22 | 'date_formats': ['%Y-%m-%d'], 23 | 'image': '//meta[@property="og:image"][1]/@content', 24 | 'performers': '//strong[contains(text(), "Starring")]/a/text()', 25 | 'tags': '', 26 | 'duration': '//strong[contains(text(), "Duration")]/following-sibling::text()[1]', 27 | 'trailer': '', 28 | 'external_id': r'.*/(.*?)/', 29 | 'type': 'Scene', 30 | 'pagination': '/scenes/page/%s/' 31 | } 32 | 33 | def get_scenes(self, response): 34 | meta = response.meta 35 | scenes = response.xpath('//h5/a/@href').getall() 36 | for scene in scenes: 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteBrandNewAmateurs.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteBrandNewAmateursSpider(BaseSceneScraper): 7 | name = 'BrandNewAmateurs' 8 | network = 'Brand New Amateurs' 9 | parent = 'Brand New Amateurs' 10 | site = 'Brand New Amateurs' 11 | 12 | start_urls = [ 13 | 'https://www.brandnewamateurs.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//meta[@property="og:title"]/@content', 18 | 'description': '//div[contains(@class,"videoDetails")]/p/text()', 19 | 'date': '', 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'performers': '', 22 | 'tags': '//div[contains(@class,"featuring")]/ul/li/a[contains(@href,"/categories/")]/text()', 23 | 'external_id': r'.*/(.*?).html', 24 | 'trailer': '//script[contains(text(),"video_content")]/text()', 25 | 're_trailer': r'video src=\"(.*\.mp4)\"', 26 | 'pagination': '/categories/movies/%s/latest/' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[@class="item-thumb"]//a[not(contains(@href,"signup"))]/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | 35 | def get_id(self, response): 36 | externid = super().get_id(response) 37 | return externid.lower() 38 | -------------------------------------------------------------------------------- /scenes/siteLezCrush.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteLezCrushSpider(BaseSceneScraper): 7 | name = 'LezCrush' 8 | network = 'Lez Crush' 9 | parent = 'Lez Crush' 10 | site = 'Lez Crush' 11 | 12 | start_urls = [ 13 | 'https://lezcrush.com', 14 | ] 15 | 16 | cookies = {'accepted': '1', 'ex_referrer': 'https%3A%2F%2Flezcrush.com%2Ftour%2Fpages.php%3Fid%3Denter'} 17 | 18 | selector_map = { 19 | 'title': '//span[@class="updateTitle"]/text()', 20 | 'description': '', 21 | 'date': '//span[@class="updateDate"]/text()', 22 | 'date_formats': ['%m/%d/%Y'], 23 | 'image': '//div[@class="firstPic"]/a/img/@src0_2x', 24 | 'performers': '//object/a[contains(@href, "/models/")]/text()', 25 | 'tags': '//span[@class="updateTags"]/a/text()', 26 | 'trailer': '//div[@class="firstPic"]/a/@onclick', 27 | 're_trailer': r'tload\(\'(.*\.mp4)', 28 | 'external_id': r'.*/(.*?)\.htm', 29 | 'pagination': '/tour/categories/movies_%s_d.html' 30 | } 31 | 32 | def get_scenes(self, response): 33 | meta = response.meta 34 | scenes = response.xpath('//div[@class="updateItem"]/a/@href').getall() 35 | for scene in scenes: 36 | if re.search(self.get_selector_map('external_id'), scene): 37 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta, headers=self.headers, cookies=self.cookies) 38 | -------------------------------------------------------------------------------- /scenes/siteDrDaddyPOV.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteDrDaddyPOVSpider(BaseSceneScraper): 7 | name = 'DrDaddyPOV' 8 | network = 'DrDaddyPOV' 9 | parent = 'DrDaddyPOV' 10 | site = 'DrDaddyPOV' 11 | 12 | start_urls = [ 13 | 'https://drdaddypov.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="title"]/h1/text()', 18 | 'description': '//p[@class="description"]/text()', 19 | 'date': '//h2[contains(text(), "Release Date")]/following-sibling::p/text()', 20 | 'image': '//div[@id="hpromo"]//video/@poster', 21 | 'performers': '//span[@class="update_models"]/a/text()', 22 | 'tags': '//div[@class="categories-holder"]/a/text()', 23 | 'duration': '//h2[contains(text(), "Length")]/following-sibling::p/text()', 24 | 'trailer': '', 25 | 'external_id': r'updates/(.*)\.htm', 26 | 'pagination': '/categories/movies_%s_d.html', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[@class="thumb-pic"]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | 37 | def get_id(self, response): 38 | id = super().get_id(response) 39 | return id.lower() 40 | -------------------------------------------------------------------------------- /scenes/siteMatureFetish.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMatureFetishSpider(BaseSceneScraper): 7 | name = 'MatureFetish' 8 | network = 'Mature NL' 9 | parent = 'Mature Fetish' 10 | site = 'Mature Fetish' 11 | 12 | start_urls = [ 13 | 'https://maturefetish.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//h3[contains(text(), "Synopsis")]/following-sibling::text()', 19 | 'date': '//h1/following-sibling::div[1]/div[@class="stats-list"]/div[2]/text()', 20 | 'date_formats': ['%d-%m-%Y'], 21 | 'image': '//video/@poster', 22 | 'performers': '//div[@class="grid-tile-model"]//a[contains(@href, "/model/")]/text()', 23 | 'tags': '//div[contains(@class, "tag-list")]/a/text()', 24 | 'duration': '//div[contains(@style, "max-width")]/following-sibling::div[contains(@class, "stats-list")]/div[1]/text()', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*?)$', 27 | 'pagination': '/en/content/%s', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[@class="grid-tile-content"]/div[1]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /performers/siteClassLesbiansPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | import re 3 | from datetime import datetime 4 | from dateutil.relativedelta import relativedelta 5 | import dateparser 6 | from urllib.parse import urlparse 7 | 8 | from tpdb.BasePerformerScraper import BasePerformerScraper 9 | 10 | 11 | class siteClassLesbiansPerformerSpider(BasePerformerScraper): 12 | selector_map = { 13 | 'name': '//h1/text()', 14 | 'image': '//div[@class="banner-model"]/img/@src', 15 | 'nationality': '//p[@class="country"]/text()', 16 | 'pagination': '/models/%s', 17 | 'external_id': 'models/(.+).html$' 18 | } 19 | 20 | name = 'ClassLesbiansPerformer' 21 | network = 'Class Media' 22 | 23 | start_urls = [ 24 | 'https://www.class-lesbians.com', 25 | ] 26 | 27 | def get_gender(self, response): 28 | return 'Female' 29 | 30 | def get_performers(self, response): 31 | performers = response.xpath('//div[@class="box"]/a[contains(@href,"/models")]/@href').getall() 32 | for performer in performers: 33 | yield scrapy.Request( 34 | url=self.format_link(response, performer), 35 | callback=self.parse_performer 36 | ) 37 | 38 | 39 | def get_image(self, response): 40 | if 'image' in self.selector_map: 41 | image = self.process_xpath(response, self.get_selector_map('image')).get() 42 | if image: 43 | return "https://www.class-lesbians.com" + image.strip() 44 | return '' 45 | 46 | -------------------------------------------------------------------------------- /scenes/siteBourneChallenge.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteBourneChallengeSpider(BaseSceneScraper): 7 | name = 'BourneChallenge' 8 | network = 'Bourne Challenge' 9 | parent = 'Bourne Challenge' 10 | site = 'Bourne Challenge' 11 | 12 | start_urls = [ 13 | 'https://bournechallenge.com/movies/page/3/', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[contains(@class, "title")]/text()', 18 | 'description': '//div[contains(@class, "post__content")]/p/text()', 19 | 'date': '//time/@datetime', 20 | 're_date': r'(\d{4}-\d{2}-\d{2})', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'performers': '//ul[contains(@class, "talent__list")]/li/a/text()', 23 | 'tags': '//ul[contains(@class, "tags__list")]/li/a/text()', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'', 27 | 'pagination': '/movies/page/%s/', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[@class="card-title"]/a/@href').getall() 34 | for scene in scenes: 35 | sceneid = re.search(r'(\d{4})/(\d{2})/(.*)/', scene) 36 | meta['id'] = f"{sceneid.group(1)}-{sceneid.group(2)}-{sceneid.group(3)}" 37 | if meta['id']: 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteCocksureMen.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteCocksureMenSpider(BaseSceneScraper): 7 | name = 'CocksureMen' 8 | network = 'Jake Cruise Media' 9 | parent = 'Cocksure Men' 10 | site = 'Cocksure Men' 11 | 12 | start_urls = [ 13 | 'https://www.cocksuremen.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="videoleft"]/h3/text()', 18 | 'description': '//div[@class="videoleft"]//div[@class="aboutvideo"]/p/text()', 19 | 'date': '', 20 | 'image': '//script[contains(text(), video_content)]/text()', 21 | 're_image': r'poster=[\'\"](.*?)[\'\"]', 22 | 'performers': '//ul[@class="featuredModels"]/li/a//span/text()', 23 | 'tags': '', 24 | 'duration': '//div[@class="videoleft"]/h4[1]/text()', 25 | 'trailer': '//script[contains(text(), video_content)]/text()', 26 | 're_trailer': r'video src=[\'\"](.*?)[\'\"]', 27 | 'external_id': r'.*/(.*?)\.htm', 28 | 'pagination': '/tour/categories/movies/%s/latest/', 29 | 'type': 'Scene', 30 | } 31 | 32 | def get_scenes(self, response): 33 | meta = response.meta 34 | scenes = response.xpath('//div[contains(@class,"sexycock_img")]/a/@href').getall() 35 | for scene in scenes: 36 | if re.search(self.get_selector_map('external_id'), scene): 37 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 38 | -------------------------------------------------------------------------------- /scenes/siteJulieGinger.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteJulieGingerSpider(BaseSceneScraper): 7 | name = 'JulieGinger' 8 | site = 'Julie Ginger' 9 | parent = 'Julie Ginger' 10 | network = 'Julie Ginger' 11 | 12 | start_urls = [ 13 | 'https://julieginger.com' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//span[@class="update_title"]/text()', 18 | 'description': '//span[@class="latest_update_description"]/text()', 19 | 'date': '//span[@class="availdate"]/text()', 20 | 're_date': r'(\d{1,2}/\d{1,2}/\d{4})', 21 | 'date_formats': ['%m/%d/%Y'], 22 | 'image': '//img[contains(@class, "large_update_thumb")]/@src0_3x', 23 | 'performers': '//span[@class="tour_update_models"]/a/text()', 24 | 'tags': '//span[@class="update_tags"]/a/text()', 25 | 'duration': '', 26 | 'trailer': '', 27 | 'type': 'Scene', 28 | 'external_id': r'', 29 | 'pagination': '/categories/movies_%s.html', 30 | } 31 | 32 | def get_scenes(self, response): 33 | meta = response.meta 34 | scenes = response.xpath('//div[@class="updateItem"]') 35 | for scene in scenes: 36 | meta['id'] = re.search(r'-(\d+)', scene.xpath('./a[1]/img/@id').get()).group(1) 37 | scene = scene.xpath('./a[1]/@href').get() 38 | if meta['id']: 39 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 40 | -------------------------------------------------------------------------------- /scenes/siteMyPervMom.py: -------------------------------------------------------------------------------- 1 | # This is a filler script since it's also pulled from TeamSkeet. There's actually more information 2 | # on this page, but I'm not sure if everything is here 3 | import re 4 | import scrapy 5 | from tpdb.BaseSceneScraper import BaseSceneScraper 6 | 7 | 8 | class SiteMyPervMomSpider(BaseSceneScraper): 9 | name = 'MyPervMom' 10 | network = 'Team Skeet' 11 | parent = 'Perv Mom' 12 | site = 'Perv Mom' 13 | 14 | start_urls = [ 15 | 'https://mypervmom.com', 16 | ] 17 | 18 | selector_map = { 19 | 'title': '//h2/text()', 20 | 'description': '//strong[contains(text(), "Description")]/following-sibling::text()', 21 | 'date': '//div[@id="title-single"]/span/img[@id="time-single"]/following-sibling::text()', 22 | 'date_formats': ['%B %d, %Y'], 23 | 'image': '//div[@class="entry"]//video/@poster', 24 | 'performers': '//strong[contains(text(), "Starring")]/following-sibling::a/text()', 25 | 'tags': '//span[@class="update_tags"]/a/text()', 26 | 'external_id': r'.*/(.*?)/', 27 | 'trailer': '//div[@class="entry"]//video/source/@src', 28 | 'pagination': '/page/%s/' 29 | } 30 | 31 | def get_scenes(self, response): 32 | scenes = response.xpath('//div[contains(@id,"post")]//h2/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene) and "/join/" not in scene: 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 36 | -------------------------------------------------------------------------------- /scenes/sitePenthouse.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BaseSceneScraper import BaseSceneScraper 3 | 4 | 5 | class PenthouseSpider(BaseSceneScraper): 6 | name = 'Penthouse' 7 | network = 'Penthouse' 8 | parent = 'Penthouse' 9 | 10 | start_urls = [ 11 | 'https://penthousegold.com' 12 | ] 13 | 14 | selector_map = { 15 | 'title': '//meta[@itemprop="name"]/@content', 16 | 'description': '//meta[@itemprop="name"]/@content', # No description on site, just using title for filler 17 | 'date': '//meta[@itemprop="uploadDate"]/@content', 18 | 'image': '//meta[@itemprop="thumbnailUrl"]/@content', 19 | 'image_blob': True, 20 | 'performers': '//a[@data-track="PORNSTAR_NAME"]/text()', 21 | 'tags': '//ul[@class="scene-tags"]/li/a/text()', 22 | 'external_id': r'\/scenes\/(.+)\.html', 23 | 'trailer': '', # A trailer is available, but is tokenized and expires 24 | 'pagination': '/categories/videos_%s_d.html' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.xpath('//a[@data-track="SCENE_LINK"]/@href').getall() 29 | for scene in scenes: 30 | yield scrapy.Request(url=scene, callback=self.parse_scene, meta={'site': 'Penthouse Gold'}) 31 | 32 | def get_title(self, response): 33 | title = self.process_xpath( 34 | response, self.get_selector_map('title')).get() 35 | title = title.replace("Video - ", "") 36 | if title: 37 | return title.strip().title() 38 | return '' 39 | -------------------------------------------------------------------------------- /scenes/siteSuperbeModels.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteSuperbeModelsSpider(BaseSceneScraper): 8 | name = 'SuperbeModels' 9 | network = 'Superbe Models' 10 | parent = 'Superbe Models' 11 | site = 'Superbe Models' 12 | 13 | start_urls = [ 14 | 'https://www.superbemodels.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h1/text()', 19 | 'description': '//meta[@itemprop="description"]/@content', 20 | 'date': '//div[@class="-mvd-grid-stats"]/text()', 21 | 're_date': r'(\w+ \d{1,2}, \d{4})', 22 | 'date_formats': ['%b %d, %Y'], 23 | 'image': '//meta[@itemprop="thumbnailUrl"]/@content|//picture[@class="-vcc-picture"]//img/@src', 24 | 'performers': '//div[contains(@class, "-mvd-grid-actors")]/span/a/text()', 25 | 'tags': '//div[@class="-mvd-list"]/span/a/text()', 26 | 'duration': '//meta[@itemprop="duration"]/@content', 27 | 'external_id': r'watch/(\d+)/', 28 | 'trailer': '//meta[@itemprop="contentURL"]/@content', 29 | 'pagination': '/films.en.html?page=%s' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//div[contains(@class, "global-multi-card")]//a[contains(@href, "/watch/")]/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | -------------------------------------------------------------------------------- /scenes/siteRealJamVR.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteRealJamVRSpider(BaseSceneScraper): 7 | name = 'RealJamVR' 8 | network = 'realjamvr' 9 | parent = 'realjamvr' 10 | site = 'realjamvr' 11 | 12 | start_urls = [ 13 | 'https://realjamvr.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[contains(@class,"opacity-75")]/text()', 19 | 'date': '//i[contains(@class, "bi-calendar")]/../../strong/text()', 20 | 'date_formats': ['%B %d, %Y'], 21 | 'duration': '//i[contains(@class, "bi-clock-history")]/../../strong/text()', 22 | 'image': '//dl8-video/@poster', 23 | 'performers': '//div[contains(text(), "Starring")]/a/text()', 24 | 'tags': '//div[contains(text(), "Tags")]/a/text()', 25 | 'trailer': '', 26 | 'external_id': r'/scene/(.*)', 27 | 'pagination': '/scenes/?page=%s' 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[@class="panel"]') 33 | for scene in scenes: 34 | trailer = scene.xpath('.//video/source/@src') 35 | if trailer: 36 | meta['trailer'] = trailer.get() 37 | scene = scene.xpath('./a/@href').get() 38 | if re.search(self.get_selector_map('external_id'), scene): 39 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 40 | -------------------------------------------------------------------------------- /scenes/siteCumflation.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteCumflationSpider(BaseSceneScraper): 7 | name = 'Cumflation' 8 | network = 'Hentaied' 9 | parent = 'Cumflation' 10 | site = 'Cumflation' 11 | 12 | start_urls = [ 13 | 'https://cumflation.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[@id="fullstory"]/p/text()', 19 | 'date': '//meta[@property="article:published_time"]/@content', 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'duration': '//div[contains(@class,"duration")]/img/following-sibling::text()', 22 | 'performers': '//div[@class="taglist"]/a[@rel="tag"]/text()', 23 | 'tags': '//ul[@class="post-categories"]/li/a/text()', 24 | 'director': '//div[contains(@class,"director")]/span/a/text()', 25 | 'external_id': '.*\/(.*?)\/$', 26 | 'trailer': '//video/source/@src', 27 | 'pagination': '/all-videos/page/%s/' 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//center[@class="vidcont"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | 36 | def get_site(self, response): 37 | return "Cumflation" 38 | 39 | def get_parent(self, response): 40 | return "Cumflation" 41 | -------------------------------------------------------------------------------- /scenes/siteMaverickMenDirects.py: -------------------------------------------------------------------------------- 1 | import re 2 | import string 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteMaverickMenDirectsSpider(BaseSceneScraper): 8 | name = 'MaverickMenDirects' 9 | network = 'Maverick Men' 10 | parent = 'Maverick Men Directs' 11 | site = 'Maverick Men Directs' 12 | 13 | start_urls = [ 14 | 'https://vod.maverickmen.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//div[@class="custom-container"]/div/div/h2/text()', 19 | 'description': '//h5/following-sibling::p//text()', 20 | 'date': '//i[contains(@class, "fa-clock")]/following-sibling::small/text()', 21 | 'date_formats': ['%m/%d/%Y'], 22 | 'image': '//comment()[contains(., "img-responsive")]', 23 | 're_image': r'(http.*?)[\'\"]', 24 | 'external_id': r'.*/(.*?)$', 25 | 'pagination': '/m/r/site/Maverick_Directs/ms/trailers?p=%s', 26 | 'type': 'Scene', 27 | } 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//div[@class="videobox2"]/figure/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 35 | 36 | def get_title(self, response): 37 | title = super().get_title(response) 38 | title = title.lower().replace("teaser", "").strip() 39 | return string.capwords(title) 40 | -------------------------------------------------------------------------------- /scenes/siteOnlyBBC.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteOnlyBBCSpider(BaseSceneScraper): 7 | name = 'OnlyBBC' 8 | site = 'Only BBC' 9 | parent = 'Only BBC' 10 | network = 'Only BBC' 11 | 12 | start_urls = [ 13 | 'https://www.onlybbc.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="update_block_info"]/span[contains(@class, "update_title")]/text()', 18 | 'description': '//div[@class="update_block_info"]/span[contains(@class, "update_description")]/text()', 19 | 'date': '//div[@class="update_block_info"]/span[contains(@class, "availdate")]/text()', 20 | 'date_formats': ['%m/%d/%Y'], 21 | 'image': '//meta[@property="og:image"]/@content|//meta[@name="twitter:image"]/@content', 22 | 'performers': '//div[@class="update_block_info"]/span[contains(@class, "update_models")]/a/text()', 23 | 'tags': '//div[@class="update_block_info"]/span[contains(@class, "update_tags")]/a/text()', 24 | 'external_id': r'.*/(.*?)\.htm', 25 | 'pagination': '/tour/categories/movies_%s_d.html', 26 | 'type': 'Scene', 27 | } 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//div[@class="updateItem"]/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 35 | -------------------------------------------------------------------------------- /scenes/siteBrasileirinhas.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteBrasileirinhasSpider(BaseSceneScraper): 7 | name = 'Brasileirinhas' 8 | network = 'Brasileirinhas' 9 | parent = 'Brasileirinhas' 10 | site = 'Brasileirinhas' 11 | 12 | start_urls = [ 13 | 'https://www.brasileirinhas.com.br', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[@class="titleVideo"]/text()', 18 | 'description': '//h1[@class="titleVideo"]/following-sibling::p/text()', 19 | 'date': '', 20 | 'image': '', 21 | 'performers': '', 22 | 'tags': '', 23 | 'duration': '//span[@class="tempoCena"]/text()', 24 | 'trailer': '', 25 | 'external_id': r'.*-(\d+)\.htm', 26 | 'pagination': '/videos-porno/pagina-%s.html', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | scenes = response.xpath('//div[contains(@class, "conteudoVideos")]/div/a[contains(@class, "filmeLink")]/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 35 | 36 | def get_image(self, response): 37 | sceneid = super().get_id(response) 38 | if sceneid: 39 | image = f"https://static1.brasileirinhas.com.br/Brasileirinhas/images/conteudo/cenas/player/{sceneid}.jpg" 40 | return image 41 | return None 42 | -------------------------------------------------------------------------------- /scenes/sitePinkyXXX.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class PinkyxxxSpider(BaseSceneScraper): 8 | name = 'PinkyXXX' 9 | 10 | start_urls = [ 11 | 'https://pinkyxxx.com' 12 | ] 13 | 14 | selector_map = { 15 | 'external_id': r'preview/(.+)', 16 | 'pagination': '/wp-admin/admin-ajax.php?action=vls&vid_type=promo&list_type=views&limit=100&offset=%s' 17 | } 18 | 19 | def get_scenes(self, response): 20 | jsondata = json.loads(response.text) 21 | for scene in jsondata['listings']: 22 | item = SceneItem() 23 | item['title'] = self.cleanup_title(scene['title']) 24 | item['date'] = scene['info']['post_date'] 25 | item['image'] = scene['poster'][0] 26 | item['image_blob'] = self.get_image_blob_from_link(item['image']) 27 | item['description'] = scene['description'] 28 | item['id'] = scene['ID'] 29 | item['trailer'] = '' 30 | item['tags'] = [] 31 | item['performers'] = [] 32 | item['url'] = scene['permalink'] 33 | item['type'] = 'Scene' 34 | item['site'] = 'PinkyXXX' 35 | item['parent'] = 'PinkyXXX' 36 | item['network'] = 'PinkyXXX' 37 | yield self.check_item(item, self.days) 38 | 39 | def get_next_page_url(self, base, page): 40 | page = str((int(page) - 1) * 100) 41 | return self.format_url(base, self.get_selector_map('pagination') % page) 42 | -------------------------------------------------------------------------------- /scenes/siteLostBetsGames.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteLostBetsGamesSpider(BaseSceneScraper): 7 | name = 'LostBetsGames' 8 | network = 'Lost Bets Games' 9 | parent = 'Lost Bets Games' 10 | site = 'Lost Bets Games' 11 | 12 | start_urls = [ 13 | 'https://lostbetsgames.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "descriptions")]/h1/text()', 18 | 'description': '//div[contains(@class, "descriptions")]//p//text()', 19 | 'image': '//video/@poster', 20 | 'performers': '', 21 | 'tags': '', 22 | 'trailer': '//video/source/@src', 23 | 'external_id': r'id/(\d+)/', 24 | 'pagination': '/site/index/p/%s', 25 | 'type': 'Scene', 26 | } 27 | 28 | def get_scenes(self, response): 29 | meta = response.meta 30 | scenes = response.xpath('//figure') 31 | for scene in scenes: 32 | scenedate = scene.xpath('.//em[@class="added"]/time/@datetime') 33 | if scenedate: 34 | meta['date'] = scenedate.get() 35 | duration = scene.xpath('.//span[@class="time"]//time/text()') 36 | if duration: 37 | meta['duration'] = self.duration_to_seconds(duration.get()) 38 | scene = scene.xpath('./a/@href').get() 39 | if re.search(self.get_selector_map('external_id'), scene): 40 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 41 | -------------------------------------------------------------------------------- /scenes/siteOldje.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteOldjeSpider(BaseSceneScraper): 7 | name = 'Oldje' 8 | network = 'Oldje' 9 | parent = 'Oldje' 10 | site = 'Oldje' 11 | 12 | start_urls = [ 13 | 'https://www.oldje.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[contains(@class, "title")]/text()|//h1[@class="preview_header"]/span[1]/text()', 18 | 'description': '//div[@id="content"]//p[@class="text"]/text()|//div[@class="preview_desc"]/text()', 19 | 'date': '//p[contains(text(), "Published")]/span/text()', 20 | 'date_formats': ['%Y-%m-%d'], 21 | 'image': '//div[@id="content"]/a[1]/img/@src|//div[@class="content left"]/div/div[1]/a/img[contains(@src, "sets")]/@src', 22 | 'performers': '//span[contains(@class,"act_name")]/a/text()', 23 | 'tags': '//p[@class="tags"]/span/a/text()|//p[contains(@id, "tags")]/span/a/text()', 24 | 'trailer': '', 25 | 'external_id': r'.*/(.*?)$', 26 | 'pagination': '/movies/%s' 27 | } 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//div[@class="left mini_cover"]/h2/a/@href').getall() 32 | for scene in scenes: 33 | if re.search(self.get_selector_map('external_id'), scene): 34 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 35 | 36 | def get_title(self, response): 37 | return self.get_date(response) 38 | -------------------------------------------------------------------------------- /scenes/siteXX-Cel.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteXXCelSpider(BaseSceneScraper): 7 | name = 'XX-Cel' 8 | network = 'XX-Cel' 9 | parent = 'XX-Cel' 10 | site = 'XX-Cel' 11 | 12 | start_urls = [ 13 | 'https://xx-cel.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "vid-details")]//h2/text()', 18 | 'description': '', 19 | 'date': '//div[contains(@class, "vid-details")]//span[contains(text(), "eleased")]/strong/text()', 20 | 'date_formats': ['%b %d, %Y'], 21 | 'image': '//div[@id="videoPlayer"]//video/@poster|//div[@id="videoPlayer"]/a/img/@src', 22 | 'performers': '//div[contains(@class, "vid-details")]//span[contains(text(), "tarring")]/a/text()', 23 | 'tags': '', 24 | 'duration': '//div[contains(@class, "vid-details")]//span[contains(text(), "uration")]/strong/text()', 25 | 'trailer': '//div[@id="videoPlayer"]//video/source/@src', 26 | 'external_id': r'.*/(.*?)$', 27 | 'pagination': '/movies/page-%s/?tag=&q=&model=&sort=recent', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[contains(@class, "star col-xxl-3")]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /performers/siteLucasEntertainmentPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | 3 | from tpdb.BasePerformerScraper import BasePerformerScraper 4 | 5 | 6 | class SiteLucasEntertainmentPerformerSpider(BasePerformerScraper): 7 | selector_map = { 8 | 'name': '//h2[@class="visible-xs"]/text()', 9 | 'image': '//img[contains(@class,"lazy main-photo")]/@data-original', 10 | 'image_blob': True, 11 | 'bio': '//p[@class="plain-link"]/following-sibling::p[1]/text()', 12 | 'gender': '', 13 | 'astrology': '', 14 | 'birthday': '', 15 | 'birthplace': '', 16 | 'cupsize': '', 17 | 'ethnicity': '', 18 | 'eyecolor': '', 19 | 'fakeboobs': '', 20 | 'haircolor': '', 21 | 'height': '', 22 | 'measurements': '', 23 | 'nationality': '', 24 | 'piercings': '', 25 | 'tattoos': '', 26 | 'weight': '', 27 | 28 | 'pagination': '/models/page/%s/', 29 | 'external_id': r'model/(.*)/' 30 | } 31 | 32 | name = 'LucasEntertainmentPerformer' 33 | network = 'Lucas Entertainment' 34 | 35 | start_urls = [ 36 | 'https://www.lucasentertainment.com', 37 | ] 38 | 39 | def get_gender(self, response): 40 | return 'Male' 41 | 42 | def get_performers(self, response): 43 | performers = response.xpath('//div[@class="scene-item"]/a[1]/@href').getall() 44 | for performer in performers: 45 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 46 | -------------------------------------------------------------------------------- /scenes/siteAVIdolz.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteAVIdolzSpider(BaseSceneScraper): 7 | name = 'AVIdolz' 8 | network = 'AVIdolz' 9 | parent = 'AVIdolz' 10 | site = 'AVIdolz' 11 | 12 | start_urls = [ 13 | # ~ 'https://avidolz.com', # Moved into AVRevenue scraper 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1[@itemprop="name"]/text()', 18 | 'description': '//div[@itemprop="description"]//text()', 19 | 'date': '//h1[@itemprop="name"]/../../../../meta[@itemprop="datePublished"]/@content', 20 | 'image': '//h1[@itemprop="name"]/../../../../meta[@itemprop="thumbnailUrl"]/@content', 21 | 'performers': '//p/strong[contains(text(), "JAV Model")]/following-sibling::span//text()', 22 | 'tags': '//p/strong[contains(text(), "Categories")]/following-sibling::a/text()', 23 | 'trailer': '', 24 | 'external_id': r'.*/(.*?)/$', 25 | 'pagination': '/japan-porn/page/%s/' 26 | } 27 | 28 | def get_scenes(self, response): 29 | scenes = response.xpath('//li[contains(@class, "pure")]/div/div//div/a/@href').getall() 30 | for scene in scenes: 31 | if re.search(self.get_selector_map('external_id'), scene): 32 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 33 | 34 | def get_tags(self, response): 35 | tags = super().get_tags(response) 36 | if "Asian" not in tags: 37 | tags.append("Asian") 38 | return tags 39 | -------------------------------------------------------------------------------- /scenes/siteDanni.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteDanniSpider(BaseSceneScraper): 7 | name = 'Danni' 8 | network = 'Sexual Prime' 9 | parent = 'Danni' 10 | site = 'Danni' 11 | 12 | start_urls = [ 13 | 'https://www.danni.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="scene-title"]/text()', 18 | 'description': '', 19 | 'date': '', 20 | 'image': '//script[contains(text(), "vJSPlayer")]/text()', 21 | 're_image': r'poster.*?(http.*?)[\'\"]', 22 | 'performers': '//div[@class="scene-title"]/following-sibling::div[contains(@class, "model-list")]/a/text()', 23 | 'tags': '//div[@class="scene-title"]/following-sibling::div[contains(@class, "scene-tags")]/a/text()', 24 | 'duration': '//div[contains(@class, "danni-clock")]/following-sibling::span/text()', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*?)_vid', 27 | 'pagination': '/categories/videos_%s_d', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[@class="danni-card-name-wrapper"]/div/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | 38 | def get_id(self, response): 39 | return super().get_id(response).lower() 40 | -------------------------------------------------------------------------------- /scenes/siteGothGirlfriend.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteGothGirlfriendsSpider(BaseSceneScraper): 7 | name = 'GothGirlfriends' 8 | site = 'Goth Girlfriends' 9 | parent = 'Goth Girlfriends' 10 | network = 'Goth Girlfriends' 11 | 12 | start_urls = [ 13 | 'https://www.gothgirlfriends.com' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="title"]/h1/text()', 18 | 'description': '//p[@class="description"]/text()', 19 | 'date': '//h2[contains(text(), "Release")]/following-sibling::p/text()', 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'performers': '//span[@class="update_models"]/a/text()', 22 | 'tags': '//div[@class="categories-holder"]/a/@title', 23 | 'duration': '//h2[contains(text(), "Length")]/following-sibling::p/text()', 24 | 'trailer': '', 25 | 'type': 'Scene', 26 | 'external_id': r'.*/(.*)?\.htm', 27 | 'pagination': '/categories/videos_%s_d.html', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[@class="thumb-pic"]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | 37 | def get_id(self, response): 38 | sceneid = super().get_id(response) 39 | return sceneid.lower() 40 | -------------------------------------------------------------------------------- /scenes/siteRandyBlue.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class siteRandyBlueSpider(BaseSceneScraper): 7 | name = 'RandyBlue' 8 | network = 'RandyBlue' 9 | parent = 'RandyBlue' 10 | site = 'RandyBlue' 11 | 12 | start_urls = [ 13 | 'https://www.randyblue.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "title-zone")]//h1/text()', 18 | 'description': '//div[@class="panel-body"]/text()', 19 | 'date': '//div[contains(@class, "title-zone")]//span[contains(@class, "calendar")]/following-sibling::text()', 20 | 'date_formats': ['%m/%d/%Y'], 21 | 'image': '//div[@id="trailer_player_finished"]//img[contains(@src, "/content/")]/@src', 22 | 'performers': '//div[contains(@class, "title-zone")]//ul[@class="scene-models-list"]/li/a/text()', 23 | 'tags': '//div[contains(@class, "title-zone")]//ul[@class="scene-tags"]/li/a/text()', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'scenes/(.*)\.htm', 27 | 'pagination': '/categories/videos_%s_d.html', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//li[contains(@class, "scene-video")]/div/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /scenes/siteTooDiva.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteTooDivaSpider(BaseSceneScraper): 8 | name = 'TooDiva' 9 | network = 'TooDiva' 10 | parent = 'TooDiva' 11 | site = 'TooDiva' 12 | 13 | start_urls = [ 14 | 'https://toodiva.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//h1[contains(@itemprop, "headline")]/text()', 19 | 'description': '//div[@itemprop="articleBody"]/p/text()', 20 | 'date': '//head/meta[@property="article:published_time"][1]/@content', 21 | 're_date': r'(\d{4}-\d{2}-\d{2})', 22 | 'date_formats': ['%Y-%m-%d'], 23 | 'image': '//head/meta[@property="og:image"][1]/@content', 24 | 'performers': '//h2[contains(@class, "author-title")]/a/span/text()', 25 | 'tags': '', 26 | 'duration': '//article[contains(@id, "post")]/div[1]//span[contains(@class, "duration")]/text()', 27 | 'trailer': '', 28 | 'external_id': r'.*/(.*?)/', 29 | 'pagination': '/members/?page=%s', 30 | 'type': 'Scene', 31 | } 32 | 33 | def get_scenes(self, response): 34 | meta = response.meta 35 | scenes = response.xpath('//header/following-sibling::div[1]/div/div[@id="primary"]/div[1]/div[1]/ul[1]/li/article/div[1]/a/@href').getall() 36 | for scene in scenes: 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteTrans4TheFans.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteTrans4TheFansSpider(BaseSceneScraper): 7 | name = 'Trans4TheFans' 8 | site = 'Trans4TheFans' 9 | parent = 'Trans4TheFans' 10 | network = 'Trans4TheFans' 11 | 12 | start_urls = [ 13 | 'https://trans4thefans.com' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="breadcrumb"]//span/text()', 18 | 'description': '//h4[contains(text(), "About")]/following-sibling::p/text()', 19 | 'image': '//div[@class="vid-play"]/video/@poster', 20 | 'performers': '//div[@class="pscat"]/a[contains(@href, "pornstars")]/text()', 21 | 'tags': '//div[@class="pscat"]/span/a/text()', 22 | 'duration': '', 23 | 'trailer': '', 24 | 'type': 'Scene', 25 | 'external_id': r'.*/(.*?)/$', 26 | 'pagination': '/videos/page/%s/', 27 | } 28 | 29 | def get_scenes(self, response): 30 | meta = response.meta 31 | scenes = response.xpath('//ul[@class="vid-listing"]/li') 32 | for scene in scenes: 33 | scenedate = scene.xpath('./div[@class="date"]/text()') 34 | if scenedate: 35 | meta['date'] = self.parse_date(scenedate.get(), date_formats=["%B %d, %Y"]).strftime('%Y-%m-%d') 36 | scene = scene.xpath('./a/@href').get() 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/networkBrokeStraightboys.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class NetworkBrokeStraightBoysSpider(BaseSceneScraper): 7 | name = 'BrokeStraightBoys' 8 | network = 'Broke Straight Boys' 9 | 10 | start_urls = [ 11 | 'https://www.boygusher.com', 12 | 'https://www.brokestraightboys.com', 13 | 'https://www.collegeboyphysicals.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="inner"]/div[@class="dettl-bar"]/div[1]/text()|//div[@class="deTlt"]/h1/text()', 18 | 'description': '//div[@class="desc"]//text()|//div[@class="dtlp"]/p//text()', 19 | 'date': '', 20 | 'image': '//img[contains(@src, "/thumbs") and contains(@src, "video")]/@src', 21 | 'performers': '//div[@class="model-desc"]/div//a/text()|//div[@class="dtlp"]/span//a/text()', 22 | 'tags': '', 23 | 'duration': '', 24 | 'trailer': '', 25 | 'external_id': r'.*/(.*?)$', 26 | 'pagination': '/episodes.php?page=%s&s=1&t=&nats=', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//ul[@class="listingC"]/li//a/@href|//ul[@id="vids"]/li/div/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | 37 | def get_tags(self, response): 38 | return ['Gay'] 39 | -------------------------------------------------------------------------------- /scenes/siteFetishPros.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteFetishProsSpider(BaseSceneScraper): 7 | name = 'FetishPros' 8 | network = 'Fetish Pros' 9 | parent = 'Fetish Pros' 10 | site = 'Fetish Pros' 11 | 12 | start_urls = [ 13 | 'https://www.fetishpros.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[@class="contentD"]//div[contains(@class, "videoDescription")]//span/text()', 19 | 'date': '//div[@class="contentD"]//i[contains(@class, "calendar")]/following-sibling::text()', 20 | 'date_formats': ['%b %d, %Y'], 21 | 'image': '//div[@class="contentD"]//div[contains(@class, "videoPreLeft")]//iframe/@src', 22 | 're_image': r'poster=(http.*)', 23 | 'performers': '//div[@class="contentD"]//div[contains(@class, "models")]/ul/li/a/text()', 24 | 'tags': '//div[@class="contentD"]//div[contains(@class, "tags")]/ul/li/a/text()', 25 | 'duration': '//div[@class="contentD"]//i[contains(@class, "clock")]/following-sibling::text()', 26 | 'external_id': r'.*/(.*?)$', 27 | 'trailer': '', 28 | 'pagination': '/updates?page=%s' 29 | } 30 | 31 | def get_scenes(self, response): 32 | scenes = response.xpath('//div[@class="videoPic"]/a[1]/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 36 | -------------------------------------------------------------------------------- /scenes/siteTatsAndTits.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteTatsAndTitspider(BaseSceneScraper): 7 | name = 'TatsandTits' 8 | network = 'Tats and Tits' 9 | parent = 'Tats and Tits' 10 | site = 'Tats and Tits' 11 | 12 | start_urls = [ 13 | 'https://tatsandtits.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "vid-box")]/h1/text()', 18 | 'description': '//div[contains(@class, "vid-box")]/p/text()', 19 | 'date': '', 20 | 'image': '//video/@poster', 21 | 'performers': '//div[contains(@class, "pscat")]/a/text()', 22 | 'tags': '//div[contains(@class, "pscat")]/span/a/text()', 23 | 'external_id': r'([A-Za-z0-9]+(-[A-Za-z0-9]+)+)', 24 | 'trailer': '', 25 | 'pagination': '/videos/page/%s/' 26 | } 27 | 28 | def get_scenes(self, response): 29 | meta = response.meta 30 | scenes = response.xpath('//ul[@class="vid-listing"]/li') 31 | for scene in scenes: 32 | scenedate = scene.xpath('./div[@class="date"]/text()') 33 | if scenedate: 34 | scenedate = scenedate.get() 35 | meta['date'] = self.parse_date(scenedate, date_formats=['%B %d, %Y']).strftime('%Y-%m-%d') 36 | scene = self.format_link(response, scene.xpath('./a/@href').get()) 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteTwinz.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteTwinzSpider(BaseSceneScraper): 7 | name = 'Twinz' 8 | network = 'Hentaied' 9 | parent = 'Twinz' 10 | site = 'Twinz' 11 | 12 | start_urls = [ 13 | 'https://hentaied.pro', 14 | ] 15 | 16 | cookies = {"name": "age_gate", "value": "18"} 17 | 18 | selector_map = { 19 | 'title': '//h1/text()', 20 | 'description': '//div[@id="fullstory"]/p/text()', 21 | 'date': '//meta[@property="article:published_time"]/@content', 22 | 'image': '//div[@class="gallery-inner"]/a[1]/@href', 23 | 'duration': '//div[contains(@class,"duration")]/img/following-sibling::text()', 24 | 'performers': '//div[@class="taglist"]/a[@rel="tag"]/text()', 25 | 'tags': '//ul[@class="post-categories"]/li/a/text()', 26 | 'director': '//div[contains(@class,"director")]/span/a/text()', 27 | 'external_id': '.*\/(.*?)\/$', 28 | 'trailer': '//video/source/@src', 29 | 'pagination': '/projects/twinz/page/%s/' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//center[@class="vidcont"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | 38 | def get_site(self, response): 39 | return "Twinz" 40 | 41 | def get_parent(self, response): 42 | return "Twinz" 43 | -------------------------------------------------------------------------------- /scenes/siteStrippers4k.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteStrippers4kSpider(BaseSceneScraper): 7 | name = 'Strippers4k' 8 | network = 'PornPros' 9 | parent = 'Strippers4k' 10 | site = 'Strippers4k' 11 | 12 | start_urls = [ 13 | 'https://strippers4k.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[contains(@class, "scene-info")]/div[contains(@class, "items-start")]/span/text()', 19 | 'image': '//div[@data-controller="player"]//video/@poster', 20 | 'performers': '//div[contains(@class, "scene-info")]//a[contains(@href, "/models/")]/text()', 21 | 'external_id': r'', 22 | 'pagination': '/?page=%s', 23 | 'type': 'Scene', 24 | } 25 | 26 | def get_scenes(self, response): 27 | meta = response.meta 28 | scenes = response.xpath('//div[contains(@class, "video-thumbnail")]') 29 | for scene in scenes: 30 | scenedate = scene.xpath('.//div[contains(@class, "-footer")]//span[contains(@class, "text-xs")]/text()') 31 | if scenedate: 32 | meta['date'] = self.parse_date(scenedate.get(), date_formats=['%m/%d/%Y']).strftime('%Y-%m-%d') 33 | 34 | meta['id'] = scene.xpath('./@data-vid').get() 35 | scene = scene.xpath('./div[1]/div[1]/a[contains(@href, "/video/")]/@href').get() 36 | 37 | if meta['id']: 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteFreeze.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteFreezeSpider(BaseSceneScraper): 7 | name = 'Freeze' 8 | network = 'Freeze' 9 | parent = 'Freeze' 10 | site = 'Freeze' 11 | 12 | start_urls = [ 13 | 'https://freeze.xxx', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[@id="fullstory"]/p/text()', 19 | 'date': '//meta[@property="article:published_time"]/@content', 20 | 're_date': r'(\d{4}-\d{2}-\d{2})', 21 | 'image': '//meta[@property="og:image"]/@content', 22 | 'duration': '//div[contains(@class,"duration")]/img/following-sibling::text()', 23 | 'performers': '//div[contains(@class,"tagsmodels")]/div[contains(@class, "taglist")]/a/text()', 24 | 'tags': '//ul[@class="post-categories"]/li/a/text()', 25 | 'director': '//div[contains(@class,"director")]/span/a/text()', 26 | 'external_id': '.*\/(.*?)\/$', 27 | 'trailer': '//video/@src', 28 | 'pagination': '/all-videos/page/%s/' 29 | } 30 | 31 | def get_scenes(self, response): 32 | scenes = response.xpath('//center[@class="vidcont"]/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 36 | 37 | def get_site(self, response): 38 | return "Freeze" 39 | 40 | def get_parent(self, response): 41 | return "Freeze" 42 | -------------------------------------------------------------------------------- /scenes/siteVampired.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteVampiredSpider(BaseSceneScraper): 7 | name = 'Vampired' 8 | network = 'Hentaied' 9 | parent = 'Vampired' 10 | site = 'Vampired' 11 | 12 | start_urls = [ 13 | 'https://vampired.com', 14 | ] 15 | 16 | cookies = {"name": "age_gate", "value": "18"} 17 | 18 | selector_map = { 19 | 'title': '//h1/text()', 20 | 'description': '//div[@id="fullstory"]/p/text()', 21 | 'date': '//meta[@property="article:published_time"]/@content', 22 | 'image': '//meta[@property="og:image"]/@content', 23 | 'duration': '//div[contains(@class,"duration")]/img/following-sibling::text()', 24 | 'performers': '//div[@class="taglist"]/a[@rel="tag"]/text()', 25 | 'tags': '//ul[@class="post-categories"]/li/a/text()', 26 | 'director': '//div[contains(@class,"director")]/span/a/text()', 27 | 'external_id': r'.*/(.*?)/$', 28 | 'trailer': '//video/source/@src', 29 | 'pagination': '/all-videos/page/%s/' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//center[@class="vidcont"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | 38 | def get_site(self, response): 39 | return "Vampired" 40 | 41 | def get_parent(self, response): 42 | return "Vampired" 43 | -------------------------------------------------------------------------------- /scenes/siteJizzOnTeens.py: -------------------------------------------------------------------------------- 1 | import re 2 | import string 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteJizzOnTeensSpider(BaseSceneScraper): 7 | name = 'JizzOnTeens' 8 | network = 'Jizz On Teens' 9 | parent = 'Jizz On Teens' 10 | site = 'Jizz On Teens' 11 | 12 | start_urls = [ 13 | 'http://www.jizzonteens.com', 14 | ] 15 | 16 | selector_map = { 17 | 'external_id': r'', 18 | 'pagination': '/page/%s', 19 | 'type': 'Scene', 20 | } 21 | 22 | def get_scenes(self, response): 23 | scenes = response.xpath('//div[@class="box-outer"][.//h2]') 24 | for scene in scenes: 25 | item = self.init_scene() 26 | 27 | title = scene.xpath('.//h2/text()').get() 28 | item['title'] = string.capwords(self.cleanup_title(title)) 29 | 30 | description = scene.xpath('.//p/textarea/text()') 31 | if description: 32 | item['description'] = description.get() 33 | 34 | image = scene.xpath('.//video/@poster') 35 | if image: 36 | image = self.format_link(response, image.get()) 37 | item['image'] = image 38 | item['image_blob'] = self.get_image_blob_from_link(image) 39 | item['id'] = re.search(r'content/(.*?)/', image).group(1) 40 | item['url'] = response.url 41 | item['site'] = "Jizz On Teens" 42 | item['parent'] = "Jizz On Teens" 43 | item['network'] = "Jizz On Teens" 44 | 45 | if item['id']: 46 | yield item 47 | -------------------------------------------------------------------------------- /scenes/siteSubspaceland.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteSubspacelandSpider(BaseSceneScraper): 8 | name = 'Subspaceland' 9 | network = 'Subspaceland' 10 | 11 | start_urls = [ 12 | 'https://www.subspaceland.com', 13 | ] 14 | 15 | selector_map = { 16 | 'title': '//h1/text()', 17 | 'description': '', 18 | 'date': '//p[@class="date"]/text()', 19 | 're_date': r'(\d{2} \w{3} \d{4})', 20 | 'date_formats': ['%d %b %Y'], 21 | 'image': '//div[@class="setEntertaiment"]/a/img/@src', 22 | 'performers': '//h2/a[contains(@href,"/model/")]/text()', 23 | 'tags': '//div[@id="tagsInColums"]/ul/li/a/text()', 24 | 'external_id': r'video\/(.*)', 25 | 'trailer': '', 26 | 'pagination': '/movies/%s' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[@class="MovieAsItem"]/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | 35 | def get_id(self, response): 36 | externid = super().get_id(response) 37 | externid = externid.replace("/", "") 38 | return externid 39 | 40 | def get_description(self, response): 41 | return '' 42 | 43 | def get_site(self, response): 44 | return "Subspaceland" 45 | 46 | def get_parent(self, response): 47 | return "Subspaceland" 48 | -------------------------------------------------------------------------------- /scenes/siteVoodooed.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteVoodooedSpider(BaseSceneScraper): 7 | name = 'Voodooed' 8 | network = 'Hentaied' 9 | parent = 'Voodooed' 10 | site = 'Voodooed' 11 | 12 | start_urls = [ 13 | 'https://voodooed.com', 14 | ] 15 | 16 | cookies = {"name": "age_gate", "value": "18"} 17 | 18 | selector_map = { 19 | 'title': '//h1/text()', 20 | 'description': '//div[@id="fullstory"]/p/text()', 21 | 'date': '//meta[@property="article:published_time"]/@content', 22 | 'image': '//meta[@property="og:image"]/@content', 23 | 'duration': '//div[contains(@class,"duration")]/img/following-sibling::text()', 24 | 'performers': '//div[@class="taglist"]/a[@rel="tag"]/text()', 25 | 'tags': '//ul[@class="post-categories"]/li/a/text()', 26 | 'director': '//div[contains(@class,"director")]/span/a/text()', 27 | 'external_id': '.*\/(.*?)\/$', 28 | 'trailer': '//video/source/@src', 29 | 'pagination': '/all-videos/page/%s/' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//center[@class="vidcont"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | 38 | def get_site(self, response): 39 | return "Voodooed" 40 | 41 | def get_parent(self, response): 42 | return "Voodooed" 43 | -------------------------------------------------------------------------------- /scenes/siteDirtyTony.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteDirtyTonySpider(BaseSceneScraper): 7 | name = 'DirtyTony' 8 | network = 'Dirty Tony' 9 | parent = 'Dirty Tony' 10 | site = 'Dirty Tony' 11 | 12 | start_urls = [ 13 | 'http://dirtytony.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@id, "content-wrap")]/div[1]/div[1]/div[1]/h1/text()', 18 | 'description': '//h5/text()', 19 | 'date': '', 20 | 'image': '', 21 | 'performers': '', 22 | 'tags': '', 23 | 'duration': '', 24 | 'trailer': '//video/source/@src', 25 | 'external_id': r'', 26 | 'pagination': '/tour/?paged=%s', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//h1/@id').getall() 33 | for scene in scenes: 34 | meta['id'] = re.search(r'-(\d+)$', scene).group(1) 35 | scene = f"http://dirtytony.com/tour/?p={meta['id']}" 36 | if meta['id']: 37 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 38 | 39 | def get_image(self, response): 40 | image = response.xpath('//video/@poster') 41 | if image: 42 | image = image.get() 43 | image = image.replace("..", "") 44 | image = "http://dirtytony.com" + image 45 | return image 46 | 47 | def get_tags(self, response): 48 | return ['Gay'] 49 | -------------------------------------------------------------------------------- /performers/siteDrDaddyPOVPerformer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BasePerformerScraper import BasePerformerScraper 3 | 4 | 5 | class SiteDrDaddyPOVPerformerSpider(BasePerformerScraper): 6 | selector_map = { 7 | 'name': '//h1[@class="model-name"]/text()', 8 | 'image': '//div[@class="model-thumbnail"]/img/@src', 9 | 'image_blob': True, 10 | 'bio': '//div[@class="model-bio-item"]/text()', 11 | 'astrology': '//b[contains(text(), "Astrological")]/following-sibling::text()', 12 | 'birthday': '//b[contains(text(), "Age")]/following-sibling::text()', 13 | 're_birthday': r'(\w+ \d{1,2}, \d{4})', 14 | 'date_formats': ['%B %d, %Y'], 15 | 'height': '//b[contains(text(), "Height")]/following-sibling::text()', 16 | 're_height': r'(\d+cm)', 17 | 'measurements': '//b[contains(text(), "Measurements")]/following-sibling::text()', 18 | 're_measurements': r'(\d2\w{1,4}-\d{2}-\d{2})', 19 | 20 | 'pagination': '/models/models_%s_d.html', 21 | 'external_id': r'model/(.*)/' 22 | } 23 | 24 | name = 'DrDaddyPOVPerformer' 25 | network = 'DrDaddyPOV' 26 | 27 | start_urls = [ 28 | 'https://drdaddypov.com', 29 | ] 30 | 31 | def get_gender(self, response): 32 | return 'Female' 33 | 34 | def get_performers(self, response): 35 | performers = response.xpath('//div[@class="pornstar-pic"]/a/@href').getall() 36 | for performer in performers: 37 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 38 | -------------------------------------------------------------------------------- /scenes/siteFuckerMate.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteFuckerMateSpider(BaseSceneScraper): 7 | name = 'FuckerMate' 8 | network = 'Fucker Mate' 9 | parent = 'Fucker Mate' 10 | site = 'Fucker Mate' 11 | 12 | start_urls = [ 13 | 'https://www.fuckermate.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//section[@id="video-section"]//div[contains(@class, "post-header")]/h1[1]/text()', 18 | 'description': '//section[@id="video-section"]//div[contains(@class, "post-entry")]/div/div/p[1]/text()', 19 | 'date': '//section[@id="video-section"]//div[contains(@class, "post-meta")]/text()[1]', 20 | 're_date': r'(\d{4}-\d{2}-\d{2})', 21 | 'image': '//section[@id="video-section"]//div[@id="video-frame"]/videoplayer/@poster', 22 | 'performers': '//div[@class="team-item"]/following-sibling::div[1]//h1/a/text()', 23 | 'tags': '//section[@id="video-section"]//div[contains(@class, "post-meta")]/a/text()', 24 | 'duration': '', 25 | 'trailer': '', 26 | 'external_id': r'video/(.*)', 27 | 'pagination': '/video?page=%s', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[contains(@class, "post-thumbnail")]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /scenes/siteSmokingHawt.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteSmokingHawtSpider(BaseSceneScraper): 7 | name = 'SmokingHawt' 8 | network = 'Hentaied' 9 | parent = 'SmokingHawt' 10 | site = 'SmokingHawt' 11 | 12 | start_urls = [ 13 | 'https://smokinghawt.com', 14 | ] 15 | 16 | cookies = {"name": "age_gate", "value": "18"} 17 | 18 | selector_map = { 19 | 'title': '//h1/text()', 20 | 'description': '//div[@id="fullstory"]/p/text()', 21 | 'date': '//meta[@property="article:published_time"]/@content', 22 | 'image': '//meta[@property="og:image"]/@content', 23 | 'duration': '//div[contains(@class,"duration")]/img/following-sibling::text()', 24 | 'performers': '//div[@class="taglist"]/a[@rel="tag"]/text()', 25 | 'tags': '//ul[@class="post-categories"]/li/a/text()', 26 | 'director': '//div[contains(@class,"director")]/span/a/text()', 27 | 'external_id': '.*\/(.*?)\/$', 28 | 'trailer': '//video/source/@src', 29 | 'pagination': '/all-videos/page/%s/' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//center[@class="vidcont"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | 38 | def get_site(self, response): 39 | return "SmokingHawt" 40 | 41 | def get_parent(self, response): 42 | return "SmokingHawt" 43 | -------------------------------------------------------------------------------- /scenes/siteCupidsEden.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteCupidsEdenSpider(BaseSceneScraper): 7 | name = 'CupidsEden' 8 | network = 'CupidsEden' 9 | parent = 'CupidsEden' 10 | site = 'CupidsEden' 11 | 12 | start_urls = [ 13 | 'https://cupidseden.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//div[@class="video-text"]/div/p/text()', 19 | 'date': '', 20 | 'image': '//meta[@property="og:image"]/@content', 21 | 'performers': '//div[@class="video-text"]//span[contains(text(), "Model:")]/following-sibling::text()', 22 | 'tags': '//div[@class="video-text"]//span[contains(text(), "Tags:")]/following-sibling::a/text()', 23 | 'duration': '', 24 | 'trailer': '', 25 | 'external_id': r'.*/(.*?)/', 26 | 'pagination': '/page%s/', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//span[contains(@class, "title-info")]/..') 33 | for scene in scenes: 34 | image = scene.xpath('.//img/@src') 35 | if image: 36 | image = image.get() 37 | image = self.format_link(response, image) 38 | meta['image'] = image 39 | meta['image_blob'] = self.get_image_blob_from_link(image) 40 | 41 | scene = scene.xpath('./@href').get() 42 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 43 | -------------------------------------------------------------------------------- /scenes/siteSpankingStraightBoys.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteSpankingStraightBoysSpider(BaseSceneScraper): 7 | name = 'SpankingStraightBoys' 8 | site = 'Spanking Straight Boys' 9 | parent = 'Spanking Straight Boys' 10 | network = 'Spanking Straight Boys' 11 | 12 | start_urls = [ 13 | 'https://spankingstraightboys.com' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//span[@class="update_title"]/text()', 18 | 'description': '//span[@class="latest_update_description"]/text()', 19 | 'date': '//span[@class="update_date"]/text()', 20 | 'date_formats': ['%m/%d/%Y'], 21 | 'image': '//div[@class="update_image"]/a/img/@src0_1x', 22 | 'performers': '//span[@class="tour_update_models"]/a/text()', 23 | 'tags': '//span[@class="tour_update_tags"]/a/text()', 24 | 'duration': '', 25 | 'trailer': '//div[@class="update_image"]/a[1]/@onclick', 26 | 're_trailer': r'[\'\"](.*\.mp4)', 27 | 'type': 'Scene', 28 | 'external_id': r'', 29 | 'pagination': '/tour/categories/updates_%s_d.html', 30 | } 31 | 32 | def get_scenes(self, response): 33 | meta = response.meta 34 | scenes = response.xpath('//div[@class="update_details"]') 35 | for scene in scenes: 36 | meta['id'] = scene.xpath('./@data-setid').get() 37 | scene = scene.xpath('./a[1]/@href').get() 38 | if meta['id']: 39 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 40 | -------------------------------------------------------------------------------- /scenes/siteTgirlsHookup.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteTgirlsHookupSpider(BaseSceneScraper): 7 | name = 'TgirlsHookup' 8 | network = 'Tgirls Hookup' 9 | parent = 'Tgirls Hookup' 10 | site = 'Tgirls Hookup' 11 | 12 | start_urls = [ 13 | 'https://www.tgirlshookup.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "toptitle_left")]/text()', 18 | 'description': '//comment()[contains(., "setdesc")]/following-sibling::p/text()', 19 | 'date': '//b[contains(text(), "Added")]/following-sibling::text()', 20 | 'date_formats': ['%B %d, %Y'], 21 | 'image': '//div[contains(@class, "player-thumb")]/img[contains(@id, "set-target")]/@src0_1x|//meta[@property="og:image"]/@content', 22 | 'performers': '//div[@class="setdesc"]/a[contains(@href, "/models/")]/text()', 23 | 'tags': '', 24 | 'duration': '//div[contains(@class,"trailermovieruntime")]/text()', 25 | 'trailer': '', 26 | 'external_id': r'trailers/(.*)\.htm', 27 | 'pagination': '/tour/categories/movies/%s/latest/', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | scenes = response.xpath('//div[contains(@class, "videoblock")]/div/a/@href').getall() 33 | for scene in scenes: 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 36 | 37 | def get_tags(self, response): 38 | return ['Trans'] 39 | -------------------------------------------------------------------------------- /scenes/siteBoyfun.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | true = True 5 | false = False 6 | 7 | 8 | class SiteBoyfunSpider(BaseSceneScraper): 9 | name = 'Boyfun' 10 | site = 'Boyfun' 11 | parent = 'Boyfun' 12 | network = 'Boyfun' 13 | 14 | start_urls = [ 15 | 'https://www.boyfun.com' 16 | ] 17 | 18 | cookies = [{"domain":"www.boyfun.com","hostOnly":true,"httpOnly":false,"name":"warningHidden","path":"/","sameSite":"unspecified","secure":false,"session":true,"storeId":"0","value":"hide"}] 19 | 20 | selector_map = { 21 | 'title': '//span[@class="title"]/text()', 22 | 'description': '//div[@class="heading"]/following-sibling::p/text()', 23 | 'date': '//span[@class="date"]/span[@class="content"]/text()', 24 | 'date_formats': ['%b %d, %Y'], 25 | 'image': '//video/@poster', 26 | 'performers': '//span[@class="models"]/span[@class="content"]/a/text()', 27 | 'tags': '', 28 | 'duration': '', 29 | 'trailer': '', 30 | 'type': 'Scene', 31 | 'external_id': r'.*-(\d+)', 32 | 'pagination': '/videos/page%s.html', 33 | } 34 | 35 | def get_scenes(self, response): 36 | meta = response.meta 37 | scenes = response.xpath('//div[@class="item-inside"]/a[1]/@href').getall() 38 | for scene in scenes: 39 | if re.search(self.get_selector_map('external_id'), scene): 40 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 41 | 42 | def get_tags(self, response): 43 | return ['Gay'] 44 | -------------------------------------------------------------------------------- /scenes/siteHotCollegeFucks.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteHotCollegeFucksSpider(BaseSceneScraper): 7 | name = 'HotCollegeFucks' 8 | network = 'Hot College Fucks' 9 | parent = 'Hot College Fucks' 10 | site = 'Hot College Fucks' 11 | 12 | start_urls = [ 13 | 'https://hotcollegefucks.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//h4[contains(text(), "description")]/following-sibling::p//text()', 19 | 'date': '//span[contains(text(), "Added")]/following-sibling::text()[1]', 20 | 're_date': r'(\w+ \d{1,2}, \d{4})', 21 | 'image': '//div[@class="player-thumb"]//img/@src0_1x', 22 | 'performers': '//h5/following-sibling::ul/li/a/text()', 23 | 'duration': '//span[contains(text(), "Added")]/following-sibling::text()[contains(., "in")]', 24 | 're_duration': r'((?:\d{1,2}\:)?\d{2}\:\d{2})', 25 | 'external_id': r'', 26 | 'pagination': '/tour/categories/movies/%s/latest/', 27 | 'type': 'Scene', 28 | } 29 | 30 | def get_scenes(self, response): 31 | meta = response.meta 32 | scenes = response.xpath('//div[contains(@class, "videothumb")]') 33 | for scene in scenes: 34 | sceneid = scene.xpath('./@class').get() 35 | meta['id'] = re.search(r'b(\d+)_', sceneid).group(1) 36 | scene = scene.xpath('./a/@href').get() 37 | if meta['id']: 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteRestrictedSenses.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteRestrictedSensesSpider(BaseSceneScraper): 8 | name = 'RestrictedSenses' 9 | network = 'Restricted Senses' 10 | 11 | start_urls = [ 12 | 'http://restrictedsenses.com', 13 | ] 14 | 15 | selector_map = { 16 | 'title': '//article/h1/a/text()', 17 | 'description': '//article/p[1]/text()', 18 | 'date': '//span[@class="entry-date"]/text()', 19 | 'image': '//div[@class="pin-container"]/img/@src', 20 | 'performers': '', 21 | 'tags': '', 22 | 'external_id': r'.*/(.*?)/', 23 | 'trailer': '', 24 | 'pagination': '/main/updates/page/%s/' 25 | } 26 | 27 | def get_scenes(self, response): 28 | scenes = response.xpath('//article/h4/../h1/a/@href').getall() 29 | for scene in scenes: 30 | if re.search(self.get_selector_map('external_id'), scene): 31 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 32 | 33 | def get_site(self, response): 34 | return "Restricted Senses" 35 | 36 | def get_parent(self, response): 37 | return "Restricted Senses" 38 | 39 | def get_performers(self, response): 40 | description = response.xpath('//article/p[1]/text()') 41 | if description: 42 | description = description.get() 43 | if "Mina" in description: 44 | return ['Mina'] 45 | return [] 46 | 47 | def get_tags(self, response): 48 | return ['Bondage', 'Fetish'] 49 | -------------------------------------------------------------------------------- /scenes/siteEyeOnTheguy.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteEyeOnTheGuySpider(BaseSceneScraper): 7 | name = 'EyeOnTheGuy' 8 | network = 'Eye on the Guy' 9 | parent = 'Eye on the Guy' 10 | site = 'Eye on the Guy' 11 | 12 | start_urls = [ 13 | 'https://www.eyeontheguy.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[not(@class="container")]/div[@class="title-block"]/h2[@class="section-title"]/text()', 18 | 'description': '//h3[contains(text(), "Description")]/following-sibling::text()', 19 | 'date': '//strong[contains(text(), "Released")]/following-sibling::text()', 20 | 'date_formats': ['%B %d, %Y'], 21 | 'image': '//script[contains(text(), "video_content")]/text()', 22 | 're_image': r'poster=\"(.*\.jpg)', 23 | 'performers': '//div[contains(@class, "models-list-thumbs")]/ul/li/a/span/text()', 24 | 'tags': '//ul[@class="tags"]/li/a/text()', 25 | 'trailer': '//script[contains(text(), "video_content")]/text()', 26 | 're_trailer': r'src=\"(.*\.mp4)', 27 | 'external_id': r'.*/(.*?)\.htm', 28 | 'pagination': '/t1/categories/movies_%s_d.html' 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//h4/a[contains(@href, "/t1/trailers")]/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | -------------------------------------------------------------------------------- /scenes/siteGlowingDesire.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteGlowingDesireSpider(BaseSceneScraper): 8 | name = 'GlowingDesire' 9 | network = 'Glowing Desire' 10 | parent = 'Glowing Desire' 11 | site = 'Glowing Desire' 12 | start_urls = [ 13 | 'https://glowingdesire.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h2/text()', 18 | 'description': '//div[@class="content"]/p//text()', 19 | 'date': '//span[@class="date"]/text()', 20 | 'date_formats': ['%b %d, %Y'], 21 | 'image': '//video/@poster', 22 | 'performers': '//div[contains(@class, "content-pane-performers")]/a/text()', 23 | 'tags': '//div[contains(@class, "categories")]/a/text()', 24 | 'external_id': r'stream/(\d+)/', 25 | 'pagination': '/video/gallery/%s', 26 | 'type': 'Scene', 27 | } 28 | 29 | def get_next_page_url(self, base, page): 30 | if int(page) == 1: 31 | return self.format_url(base, '/video/gallery/') 32 | else: 33 | page = str((int(page) - 1) * 12) 34 | return self.format_url(base, f'/video/gallery/{page}/') 35 | 36 | def get_scenes(self, response): 37 | meta = response.meta 38 | scenes = response.xpath('//div[contains(@class, "overlay-item")]/a/@href').getall() 39 | for scene in scenes: 40 | if re.search(self.get_selector_map('external_id'), scene): 41 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 42 | -------------------------------------------------------------------------------- /scenes/siteHitzefrei.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | import dateparser 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class HitzefreiSpider(BaseSceneScraper): 8 | name = 'Hitzefrei' 9 | network = "Radical Entertainment" 10 | parent = "Hitzefrei" 11 | 12 | start_urls = [ 13 | 'https://tour.hitzefrei.com/' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class,"row-content-details")]/h1[@class="content-title"]/text()', 18 | 'description': '//div[@class="content-description"]/p/text()', 19 | 'date': '//p[@class="content-metas"]/span[@class="meta-value"][2]/text()', 20 | 'image': '//div[@id="trailer-player"]/@data-screencap', 21 | 'performers': '//div[@class="model-name"]/text()', 22 | 'tags': '', 23 | 'trailer': '//div[@id="trailer-player"]/@data-trailer', 24 | 'external_id': r'/view/(\d*)/', 25 | 'pagination': '/videos?page=%s' 26 | } 27 | 28 | def get_scenes(self, response): 29 | 30 | scenes = response.xpath('//h1[@class="content-title"]/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | 35 | def get_date(self, response): 36 | date = self.process_xpath( 37 | response, self.get_selector_map('date')).get() 38 | date.replace('Released:', '').replace('Added:', '').strip() 39 | return dateparser.parse(date.strip(), settings={'DATE_ORDER': 'DMY', 'TIMEZONE': 'UTC'}).isoformat() 40 | -------------------------------------------------------------------------------- /scenes/siteMyBestSexLife.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMyBestSexLifeSpider(BaseSceneScraper): 7 | name = 'MyBestSexLife' 8 | network = 'My Best Sex Life' 9 | parent = 'My Best Sex Life' 10 | site = 'My Best Sex Life' 11 | 12 | start_urls = [ 13 | 'https://mybestsexlife.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "video-player")]//h2[@class="section-title"]/text()', 18 | 'description': '//h3[contains(text(), "escription")]/following-sibling::text()', 19 | 'date': '//strong[contains(text(), "eleased")]/following-sibling::text()', 20 | 'date_formats': ['%B %d, %Y'], 21 | 'image': '//div[@class="player-thumb"]/img/@src0_1x', 22 | 'performers': '//div[contains(@class, "models-list-thumb")]//img/following-sibling::span/text()', 23 | 'tags': '//ul[@class="tags"]/li/a/text()', 24 | 'duration': '//strong[contains(text(), "untime")]/following-sibling::text()', 25 | 're_duration': r'(\d{1,2}:\d{1,2})', 26 | 'trailer': '', 27 | 'external_id': r'.*/(.*?)\.htm', 28 | 'pagination': '/categories/movies_%s_d.html', 29 | 'type': 'Scene', 30 | } 31 | 32 | def get_scenes(self, response): 33 | meta = response.meta 34 | scenes = response.xpath('//div[@class="img-div"]/a/@href').getall() 35 | for scene in scenes: 36 | if re.search(self.get_selector_map('external_id'), scene): 37 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 38 | -------------------------------------------------------------------------------- /scenes/siteAntonioSuleiman.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteAntonioSuleimanSpider(BaseSceneScraper): 7 | name = 'AntonioSuleiman' 8 | network = 'Antonio Suleiman' 9 | parent = 'Antonio Suleiman' 10 | site = 'Antonio Suleiman' 11 | 12 | start_urls = [ 13 | 'https://antoniosuleiman.com', 14 | ] 15 | 16 | cookies = [{"name": "lang", "value": "0"}, {"name": "warn", "value": "true"}] 17 | 18 | selector_map = { 19 | 'title': '//div[contains(@class, "updatesBlock")]/div[contains(@class, "section-heading")]/h3/text()', 20 | 'description': '//div[contains(@class,"updatesBlock")]/div[@class="wrapper"]/p/span[contains(@class,"tour_update_models")]/../following-sibling::div[1]/text()', 21 | 'date': '//div[contains(@class, "updateDetails")]//p/text()', 22 | 'image': '//meta[@property="og:image"]/@content', 23 | 'performers': '//div[contains(@class,"updatesBlock")]/div[@class="wrapper"]/p/span[contains(@class,"tour_update_models")]/a/text()', 24 | 'tags': '', 25 | 'duration': '', 26 | 'trailer': '', 27 | 'external_id': r'updates/(.*)\.htm', 28 | 'pagination': '/categories/updates_%s_d.html?lang=0', 29 | 'type': 'Scene', 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//a[contains(@href, "/updates/")]/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | -------------------------------------------------------------------------------- /scenes/siteLuxePlayhouse.py: -------------------------------------------------------------------------------- 1 | import re 2 | import requests 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteLuxePlayhouseSpider(BaseSceneScraper): 8 | name = 'LuxePlayhouse' 9 | network = 'LuxePlayhouse' 10 | parent = 'LuxePlayhouse' 11 | site = 'LuxePlayhouse' 12 | 13 | start_urls = [ 14 | 'https://luxeplayhouse.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//div[contains(@class, "update-info")]/h1/text()', 19 | 'description': '', 20 | 'date': '//span[contains(text(), "ADDED")]/following-sibling::span[1]/text()', 21 | 'date_formats': ['%B %d, %Y'], 22 | 'image': '//script[contains(text(), "poster")]/text()', 23 | 're_image': r'poster=[\'\"](http.*?)[\'\"]', 24 | 'performers': '//ul[contains(@class, "luxe-list")]/li/a[contains(@href, "/models/")]/text()', 25 | 'tags': '//ul[contains(@class, "luxe-list")]/li/a[contains(@href, "/categories/")]/text()', 26 | 'duration': '//span[contains(text(), "RUNTIME")]/following-sibling::span[1]/text()', 27 | 'trailer': '', 28 | 'external_id': r'.*/(.*?)\.htm', 29 | 'pagination': '/categories/movies_%s.html', 30 | 'type': 'Scene', 31 | } 32 | 33 | def get_scenes(self, response): 34 | meta = response.meta 35 | scenes = response.xpath('//span[contains(@class, "item-title")]/h3/a/@href').getall() 36 | for scene in scenes: 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /performers/siteFuckerMatePerformer.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BasePerformerScraper import BasePerformerScraper 5 | 6 | 7 | class SiteFuckerMatePerformerSpider(BasePerformerScraper): 8 | selector_map = { 9 | 'name': '//h1[@class="post-title"]/text()', 10 | 'image': '//div[@id="post-thumbnail"]/img/@src', 11 | 'image_blob': True, 12 | 'bio': '//div[@class="widget"]/p[1]/text()', 13 | 'gender': '', 14 | 'astrology': '', 15 | 'birthday': '', 16 | 'birthplace': '', 17 | 'cupsize': '', 18 | 'ethnicity': '//div[@class="widget"]//li[contains(text(), "Ethnicity")]/a[1]/text()', 19 | 'eyecolor': '', 20 | 'fakeboobs': '', 21 | 'haircolor': '', 22 | 'height': '', 23 | 'measurements': '', 24 | 'nationality': '//div[@class="widget"]//li[contains(text(), "Ethnicity")]/a[2]/text()', 25 | 'piercings': '', 26 | 'tattoos': '', 27 | 'weight': '', 28 | 29 | 'pagination': '/actor?page=%s', 30 | 'external_id': r'model/(.*)/' 31 | } 32 | 33 | name = 'FuckerMatePerformer' 34 | network = 'Fucker Mate' 35 | 36 | start_urls = [ 37 | 'https://www.fuckermate.com', 38 | ] 39 | 40 | def get_gender(self, response): 41 | return 'Male' 42 | 43 | def get_performers(self, response): 44 | performers = response.xpath('//div[@class="post-thumbnail"]/a/@href').getall() 45 | for performer in performers: 46 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 47 | -------------------------------------------------------------------------------- /scenes/siteBaitBuddies.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteBaitBuddiesSpider(BaseSceneScraper): 7 | name = 'BaitBuddies' 8 | network = 'Bait Buddies' 9 | parent = 'Bait Buddies' 10 | site = 'Bait Buddies' 11 | 12 | start_urls = [ 13 | 'https://www.baitbuddies.com', 14 | ] 15 | 16 | selector_map = { 17 | 'description': '//div[@class="TabbedPanelsContentWrap"]//text()', 18 | 'image': '//div[@class="main_video"]/a[1]/img/@src', 19 | 'performers': '//div[@class="header_txt"]/strong/following-sibling::a/text()', 20 | 'tags': '//div[@id="tags"]/a/text()', 21 | 'external_id': r'contentId=(.*?)_', 22 | 'pagination': '/?page=preview&p=%s', 23 | 'type': 'Scene', 24 | } 25 | 26 | def get_scenes(self, response): 27 | meta = response.meta 28 | scenes = response.xpath('//div[@class="videos-thumb"]') 29 | for scene in scenes: 30 | scenedate = scene.xpath('.//strong[contains(text(), "Release")]/following-sibling::text()') 31 | if scenedate: 32 | meta['date'] = self.parse_date(scenedate.get(), date_formats=['%m/%d/%Y']).strftime('%Y-%m-%d') 33 | scene = scene.xpath('./a/@href').get() 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | 37 | def get_title(self, response): 38 | performers = self.get_performers(response) 39 | return " and ".join(performers) 40 | -------------------------------------------------------------------------------- /scenes/siteMasqulin.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMasqulinSpider(BaseSceneScraper): 7 | name = 'Masqulin' 8 | network = 'Masqulin' 9 | parent = 'Masqulin' 10 | site = 'Masqulin' 11 | 12 | start_urls = [ 13 | 'https://www.masqulin.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class,"gallery_info")]/h1/text()', 18 | 'description': '//p[@class="update_description"]/text()', 19 | 'date': '//span[@class="availdate" and not(contains(@style, "right"))]/text()', 20 | 'date_formats': ['%b %d, %Y'], 21 | 'image': '//div[@class="fullscreenTour"]/video-js/@poster', 22 | 'performers': '//div[contains(@class, "gallery_info")]/p/span/a[contains(@href, "models")]/text()', 23 | 'tags': '//a[@class="tagsVideoPage"]/text()', 24 | 'duration': '//span[@class="availdate" and contains(@style, "right")]/text()', 25 | 're_duration': r'(\d{1,2}\:\d{2})', 26 | 'trailer': '', 27 | 'external_id': r'.*/(.*?)\.htm', 28 | 'pagination': '/categories/movies_%s_d.html', 29 | 'type': 'Scene', 30 | } 31 | 32 | def get_scenes(self, response): 33 | meta = response.meta 34 | scenes = response.xpath('//div[@class="updateDetails"]/@onclick').getall() 35 | for scene in scenes: 36 | scene = re.search(r'(http.*\.\w{3,4})', scene).group(1) 37 | if re.search(self.get_selector_map('external_id'), scene): 38 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 39 | -------------------------------------------------------------------------------- /scenes/siteZishy.py: -------------------------------------------------------------------------------- 1 | import re 2 | import string 3 | import scrapy 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteZishySpider(BaseSceneScraper): 8 | name = 'Zishy' 9 | network = 'Zishy' 10 | parent = 'Zishy' 11 | site = 'Zishy' 12 | 13 | start_urls = [ 14 | 'https://www.zishy.com', 15 | ] 16 | 17 | selector_map = { 18 | 'title': '//div[@id="albumhead"]/div/span[1]/text()', 19 | 'description': '//div[@id="descrip"]/text()', 20 | 'date': '//div[@id="albumhead"]/div/span[2]/text()', 21 | 're_date': r'(\w+ \d{2}, \d{4})', 22 | 'date_formats': ['%b %d, %Y'], 23 | 'image': '//div[contains(@id, "media-player")]/a/img/@style', 24 | 're_image': r'url\((.*?)\)', 25 | 'performers': '//span[@class="moreof"]/a/text()', 26 | 'tags': '', 27 | 'duration': '', 28 | 'trailer': '', 29 | 'external_id': r'.*/(\d+)', 30 | 'pagination': '/?q=with_videos&page=%s', 31 | 'type': 'Scene', 32 | } 33 | 34 | def get_scenes(self, response): 35 | meta = response.meta 36 | scenes = response.xpath('//div[@class="albumcover"]/a/@href').getall() 37 | for scene in scenes: 38 | if re.search(self.get_selector_map('external_id'), scene): 39 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 40 | 41 | def get_performers(self, response): 42 | performers = super().get_performers(response) 43 | performers = list(map(lambda x: string.capwords(x.replace("#", "").strip()), performers)) 44 | return performers 45 | -------------------------------------------------------------------------------- /scenes/siteSapphix.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | 4 | from tpdb.BaseSceneScraper import BaseSceneScraper 5 | 6 | 7 | class SiteSapphixSpider(BaseSceneScraper): 8 | name = 'Sapphix' 9 | network = 'Sapphix' 10 | parent = 'Sapphix' 11 | site = 'Sapphix' 12 | 13 | date_trash = ['Released:', 'Added:', 'Published:', 'Added'] 14 | 15 | start_urls = [ 16 | 'https://www.sapphix.com', 17 | ] 18 | 19 | selector_map = { 20 | 'title': '//h2/text()', 21 | 'description': '//p[@class="mg-md"]/text()', 22 | 'date': '//div[@class="row"]/div[contains(@class,"text-right")]/span/text()', 23 | 'date_formats': ['%B %d, %Y'], 24 | 'image': '//div[@id="videoPlayer"]//video/@poster', 25 | 'performers': '//h4[contains(text(), "Featured")]/following-sibling::p/a/text()', 26 | 'tags': '//h4[contains(text(), "Tags")]/following-sibling::a/text()', 27 | 'external_id': r'movies/(.*)/', 28 | 'trailer': '//div[@id="videoPlayer"]//video/source/@src', 29 | 'pagination': '/movies/page-%s/?tag=&q=&model=&sort=recent' 30 | } 31 | 32 | def get_scenes(self, response): 33 | scenes = response.xpath('//div[@class="itemm"]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 37 | 38 | def get_url(self, response): 39 | url = re.search(r'(.*)\?nats', response.url) 40 | if url: 41 | url = url.group(1) 42 | return url.strip() 43 | return response.url 44 | -------------------------------------------------------------------------------- /scenes/siteBiCollegeFucks.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | true = True 5 | false = False 6 | 7 | 8 | class SiteBiCollegeFucksSpider(BaseSceneScraper): 9 | name = 'BiCollegeFucks' 10 | network = 'BiCollegeFucks' 11 | parent = 'BiCollegeFucks' 12 | site = 'BiCollegeFucks' 13 | 14 | cookies = [{"name": "warn", "value": "true"}] 15 | 16 | start_urls = [ 17 | 'https://bicollegefucks.com', 18 | ] 19 | 20 | selector_map = { 21 | 'title': '//h1/text()', 22 | 'description': '//div[@class="description"]/p//text()', 23 | 'date': '//span[contains(text(), "Added:")]/following-sibling::text()[contains(., ",")]', 24 | 're_date': r'(\w+ \d{1,2}, \d{4})', 25 | 'image': '//img[contains(@id, "set-target")]/@src0_1x', 26 | 'performers': '//div[contains(@class, "modelFeaturing")]/ul/li/a/text()', 27 | 'duration': '//div[@class="player-time"]/text()', 28 | 'external_id': r'', 29 | 'pagination': '/tour/categories/movies/%s/latest/', 30 | 'type': 'Scene', 31 | } 32 | 33 | def get_scenes(self, response): 34 | meta = response.meta 35 | scenes = response.xpath('//div[contains(@class, "videothumb")]') 36 | for scene in scenes: 37 | sceneid = scene.xpath('./@class').get() 38 | meta['id'] = re.search(r'b(\d+)_', sceneid).group(1) 39 | scene = scene.xpath('./a[1]/@href').get() 40 | if re.search(self.get_selector_map('external_id'), scene): 41 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 42 | -------------------------------------------------------------------------------- /scenes/siteExploitedSecretaries.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteExploitedSecretariesSpider(BaseSceneScraper): 7 | name = 'ExploitedSecretaries' 8 | network = 'Exploited Secretaries' 9 | parent = 'Exploited Secretaries' 10 | site = 'Exploited Secretaries' 11 | 12 | start_urls = [ 13 | 'https://exploitedsecretaries.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h2/text()', 18 | 'description': '//div[@class="wpb_wrapper"]/p[1]/text()', 19 | 'date': '//script[contains(text(), "datePublished")]/text()', 20 | 're_date': r'datePublished.*?(\d{4}-\d{2}-\d{2})', 21 | 'image': '//meta[@property="og:image"]/@content|//meta[@name="twitter:image"]/@content', 22 | 'performers': '//h3[contains(text(), "Model Details")]/following-sibling::div[1]/ul/div/li/strong[contains(text(), "Name")]/following-sibling::text()', 23 | 'tags': '', 24 | 'duration': '', 25 | 'trailer': '//script[contains(text(), "videoSource")]/text()', 26 | 're_trailer': r'videoSource.*?(http.*?\.mp4)', 27 | 'external_id': r'.*/(.*?)/$', 28 | 'pagination': '/videos/?page=%s', 29 | 'type': 'Scene', 30 | } 31 | 32 | def get_scenes(self, response): 33 | meta = response.meta 34 | scenes = response.xpath('//div[contains(@class,"animated-block")]/div/a/@href').getall() 35 | for scene in scenes: 36 | if re.search(self.get_selector_map('external_id'), scene): 37 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 38 | -------------------------------------------------------------------------------- /scenes/siteMILFVR.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteMILFVRSpider(BaseSceneScraper): 7 | name = 'MILFVR' 8 | network = 'MILFVR' 9 | parent = 'MILFVR' 10 | site = 'MILFVR' 11 | 12 | start_urls = [ 13 | 'https://www.milfvr.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[contains(@class, "header-lg")]/h1[@class="detail__title"]/text()', 18 | 'description': '//script[contains(@type,"ld+json")]/text()', 19 | 're_description': r'description[\'\"].*?[\'\"](.*?)[\'\"],', 20 | 'date': '//script[contains(@type,"ld+json")]/text()', 21 | 're_date': r'uploadDate[\'\"].*?[\'\"](.*?)[\'\"],', 22 | 'date_formats': ['%Y-%m-%d'], 23 | 'image': '//meta[@property="og:image"]/@content', 24 | 'performers': '//div[@class="detail__models" and contains(text(), "Starring")]/a/text()', 25 | 'tags': '//div[contains(@class,"tag-list__body")]//a/text()', 26 | 'duration': '//script[contains(@type,"ld+json")]/text()', 27 | 're_duration': r'duration[\'\"].*?[\'\"](.*?)[\'\"],', 28 | 'trailer': '', 29 | 'external_id': r'.*-(\d+)$', 30 | 'pagination': '/?o=d&p=%s', 31 | 'type': 'Scene', 32 | } 33 | 34 | def get_scenes(self, response): 35 | meta = response.meta 36 | scenes = response.xpath('//div[@class="card__body"]/a/@href').getall() 37 | for scene in scenes: 38 | if re.search(self.get_selector_map('external_id'), scene): 39 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 40 | -------------------------------------------------------------------------------- /scenes/siteSketchySex.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteSketchySexSpider(BaseSceneScraper): 7 | name = 'SketchySex' 8 | network = 'Sketchy Sex' 9 | parent = 'Sketchy Sex' 10 | site = 'Sketchy Sex' 11 | 12 | start_urls = [ 13 | 'https://www.sketchysex.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="info"]/div[@class="name"]/span/text()', 18 | 'description': '//div[@class="VideoDescription"]/text()', 19 | 'image': '//video/@poster', 20 | 'performers': '//ul[@class="ModelNames"]/li/a/text()', 21 | 'tags': '//div[@class="VideoTagsWrap"]/a/span/text()', 22 | 'external_id': r'id=(\d+)', 23 | 'pagination': '/index.php?page=%s', 24 | 'type': 'Scene', 25 | } 26 | 27 | def get_scenes(self, response): 28 | meta = response.meta 29 | scenes = response.xpath('//div[@class="video-item"]') 30 | for scene in scenes: 31 | scenedate = scene.xpath('.//span[contains(@class, "video-date")]/text()') 32 | if scenedate: 33 | meta['date'] = self.parse_date(scenedate.get().strip(), date_formats=['%b %d, %Y']).strftime('%Y-%m-%d') 34 | 35 | scene = scene.xpath('./div[contains(@class,"video-thumb")]/a/@href').get() 36 | if re.search(self.get_selector_map('external_id'), scene): 37 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 38 | 39 | def get_tags(self, response): 40 | tags = super().get_tags(response) 41 | tags.extend(['Gay']) 42 | return tags -------------------------------------------------------------------------------- /scenes/siteTagTeamPOV.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteTagTeamPOVSpider(BaseSceneScraper): 7 | name = 'TagTeamPOV' 8 | network = 'Spizoo' 9 | parent = 'TagTeamPOV' 10 | site = 'TagTeamPOV' 11 | 12 | start_urls = [ 13 | 'https://www.tagteampov.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//div[@class="title"]/h1/text()', 18 | 'description': '//div[contains(@class, "description")]/p/text()', 19 | 'date': '//h2[contains(text(), "Release")]/following-sibling::p/text()', 20 | 'date_formats': ['%Y-%m-%d'], 21 | 'image': '//div[@id="block-content"]/img/@src', 22 | 'performers': '//h2[contains(text(), "Pornstars")]/following-sibling::span[1]/a/@title', 23 | 'tags': '//div[contains(@class, "categories-holder")]/a/@title', 24 | 'duration': '//h4[contains(text(), "Length")]/following-sibling::p/text()|//h2[contains(text(), "Length")]/following-sibling::p/text()', 25 | 'trailer': '', 26 | 'external_id': r'.*/(.*?)\.htm', 27 | 'pagination': '/categories/videos_%s_d.html', 28 | 'type': 'Scene', 29 | } 30 | 31 | def get_scenes(self, response): 32 | meta = response.meta 33 | scenes = response.xpath('//div[contains(@class, "title-label")]/a/@href').getall() 34 | for scene in scenes: 35 | if re.search(self.get_selector_map('external_id'), scene): 36 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 37 | 38 | def get_id(self, response): 39 | return super().get_id(response).lower() 40 | -------------------------------------------------------------------------------- /performers/siteHDSex18Performer.py: -------------------------------------------------------------------------------- 1 | import scrapy 2 | from tpdb.BasePerformerScraper import BasePerformerScraper 3 | 4 | 5 | class SiteHDSex18PerformerSpider(BasePerformerScraper): 6 | selector_map = { 7 | 'name': '//div[@class="main-content"]//div[@class="model-right"]//h2[@class="title"]/text()', 8 | 'image': '//div[@class="main-content"]//div[@class="model-left"]//img/@src', 9 | 'bio': '', 10 | 'gender': '', 11 | 'astrology': '', 12 | 'birthday': '', 13 | 'birthplace': '', 14 | 'cupsize': '', 15 | 'ethnicity': '', 16 | 'eyecolor': '//span[@class="label fix-w" and contains(text(), "Eyes")]/following-sibling::a/text()', 17 | 'fakeboobs': '', 18 | 'haircolor': '//span[@class="label fix-w" and contains(text(), "Hair")]/following-sibling::a/text()', 19 | 'height': '', 20 | 'measurements': '', 21 | 'nationality': '', 22 | 'piercings': '', 23 | 'tattoos': '', 24 | 'weight': '', 25 | 26 | 'pagination': '/models/%s/?sort_by=model_id&gender_id=0', 27 | 'external_id': r'model/(.*)/' 28 | } 29 | 30 | name = 'HDSex18Performer' 31 | network = 'HDSex18' 32 | 33 | start_urls = [ 34 | 'https://hdsex18.com', 35 | ] 36 | 37 | def get_gender(self, response): 38 | return 'Female' 39 | 40 | def get_performers(self, response): 41 | performers = response.xpath('//div[@class="thumb-model"]/div/a/@href').getall() 42 | for performer in performers: 43 | yield scrapy.Request(url=self.format_link(response, performer), callback=self.parse_performer, cookies=self.cookies, headers=self.headers) 44 | -------------------------------------------------------------------------------- /scenes/siteBravoFucker.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | from tpdb.items import SceneItem 5 | 6 | 7 | class SiteBravoFuckerSpider(BaseSceneScraper): 8 | name = 'BravoFucker' 9 | network = 'Bravo Fucker' 10 | parent = 'Bravo Fucker' 11 | site = 'Bravo Fucker' 12 | 13 | start_urls = [ 14 | 'https://www.bravofucker.com', 15 | ] 16 | 17 | selector_map = { 18 | 'external_id': '', 19 | 'pagination': '/en/videos?page=%s', 20 | 'type': 'Scene', 21 | } 22 | 23 | def get_scenes(self, response): 24 | jsondata = response.xpath('//script[contains(@type,"ld+json")]/text()').get() 25 | jsondata = json.loads(jsondata, strict=False) 26 | for scene in jsondata['itemListElement']: 27 | scene = scene['item'] 28 | item = SceneItem() 29 | item['title'] = self.cleanup_title(scene['name']) 30 | item['description'] = self.cleanup_description(scene['description']) 31 | item['date'] = scene['datePublished'] 32 | item['image'] = scene['thumbnailUrl'] 33 | item['image_blob'] = self.get_image_blob_from_link(item['image']) 34 | item['url'] = scene['url'] 35 | item['id'] = re.search(r'detail/(\d+)', item['url']).group(1) 36 | item['trailer'] = '' 37 | item['tags'] = ['Gay Porn'] 38 | item['performers'] = [] 39 | item['type'] = "Scene" 40 | item['site'] = "Bravo Fucker" 41 | item['parent'] = "Bravo Fucker" 42 | item['network'] = "Bravo Fucker" 43 | yield self.check_item(item, self.days) 44 | -------------------------------------------------------------------------------- /scenes/siteNextDoorSins.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SiteNextdoorSinsSpider(BaseSceneScraper): 7 | name = 'NextdoorSins' 8 | network = 'Nextdoor Sins' 9 | parent = 'Nextdoor Sins' 10 | site = 'Nextdoor Sins' 11 | 12 | start_urls = [ 13 | 'https://www.nextdoorsins.com', 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//span[contains(@class, "title")]/text()', 18 | 'description': '//span[contains(@class, "description")]/text()', 19 | 'date': '//span[@class="availdate"]/text()', 20 | 'date_formats': ['%m/%d/%Y'], 21 | 'image': '', 22 | 'performers': '', 23 | 'tags': '//span[@class="update-tags"]/a/text()', 24 | 'external_id': r'updates/(.*).html', 25 | 'trailer': '', 26 | 'pagination': '/tour/categories/movies_%s_d.html' 27 | } 28 | 29 | def get_scenes(self, response): 30 | scenes = response.xpath('//div[@class="movie-holder"]/a/@href|//div[@class="video"]/a/@href').getall() 31 | for scene in scenes: 32 | if re.search(self.get_selector_map('external_id'), scene): 33 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene) 34 | 35 | def get_image(self, response): 36 | image = response.xpath('//div[@class="large-image-holder"]/a/img/@src0_2x') 37 | if not image: 38 | image = response.xpath('//div[@class="large-image-holder"]/a/img/@src0_1x') 39 | 40 | if image: 41 | image = "https://www.nextdoorsins.com/tour/" + image.get() 42 | return image 43 | 44 | return '' 45 | -------------------------------------------------------------------------------- /scenes/sitePublicHandjobs.py: -------------------------------------------------------------------------------- 1 | import re 2 | import scrapy 3 | from tpdb.BaseSceneScraper import BaseSceneScraper 4 | 5 | 6 | class SitePublicHandjobsSpider(BaseSceneScraper): 7 | name = 'PublicHandjobs' 8 | site = 'Public Handjobs' 9 | parent = 'Public Handjobs' 10 | network = 'Public Handjobs' 11 | 12 | start_urls = [ 13 | 'https://publichandjobs.com' 14 | ] 15 | 16 | selector_map = { 17 | 'title': '//h1/text()', 18 | 'description': '//h4[contains(text(), "Tags")]/following-sibling::p//text()', 19 | 'image': '//video/@poster', 20 | 'performers': '', 21 | 'tags': '//h4[contains(text(), "Tags")]/a/text()', 22 | 'type': 'Scene', 23 | 'external_id': r'', 24 | 'pagination': '/page%s/', 25 | } 26 | 27 | def get_scenes(self, response): 28 | meta = response.meta 29 | scenes = response.xpath('//div[@class="video-card"]') 30 | for scene in scenes: 31 | sceneid = scene.xpath('./div/a/img/@src').get() 32 | meta['id'] = re.search(r'/(\d+)-', sceneid).group(1) 33 | scene = scene.xpath('./div/a/@href').get() 34 | if re.search(self.get_selector_map('external_id'), scene): 35 | yield scrapy.Request(url=self.format_link(response, scene), callback=self.parse_scene, meta=meta) 36 | 37 | def get_performers(self, response): 38 | performers = response.xpath('//h4[contains(text(), "Model:")]/text()').get() 39 | performers = re.search(r':(.*)', performers).group(1) 40 | performers = performers.strip().replace(" ", " ") 41 | performers = performers.split(" and ") 42 | return performers 43 | --------------------------------------------------------------------------------