├── .editorconfig ├── .env.example ├── .gitattributes ├── .gitignore ├── .styleci.yml ├── README.md ├── app ├── Console │ └── Kernel.php ├── Contracts │ ├── ScrapeItemInterface.php │ ├── ScraperFactoryInterface.php │ └── ScraperInterface.php ├── DTOs │ └── ScrapeItemDTO.php ├── Events │ └── ProcessingStarted.php ├── Exceptions │ ├── Handler.php │ └── ScraperDriverNotFoundException.php ├── Factories │ └── ScraperFactory.php ├── Http │ ├── Controllers │ │ ├── Controller.php │ │ ├── PageScrapeController.php │ │ └── ProgressController.php │ ├── Kernel.php │ └── Middleware │ │ ├── Authenticate.php │ │ ├── CheckForMaintenanceMode.php │ │ ├── EncryptCookies.php │ │ ├── RedirectIfAuthenticated.php │ │ ├── TrimStrings.php │ │ ├── TrustHosts.php │ │ ├── TrustProxies.php │ │ └── VerifyCsrfToken.php ├── Jobs │ ├── ProcessPhotoGallery.php │ └── ProcessVideo.php ├── PhotoGallery.php ├── Providers │ ├── AppServiceProvider.php │ ├── AuthServiceProvider.php │ ├── BroadcastServiceProvider.php │ ├── EventServiceProvider.php │ └── RouteServiceProvider.php ├── ScrapeItem.php ├── Scrapers │ ├── FullPornerScraper.php │ ├── GoodPornScraper.php │ ├── HQPornerScraper.php │ ├── PicHunterScraper.php │ ├── PornHubScraper.php │ ├── PornKTubeScraper.php │ ├── PornPicsScraper.php │ ├── PorntrexScraper.php │ ├── PornwildScraper.php │ ├── WhoresHubScraper.php │ └── YouJizzScraper.php ├── Traits │ └── ScrapeItemTrait.php ├── User.php └── Video.php ├── artisan ├── bootstrap ├── app.php └── cache │ └── .gitignore ├── composer.json ├── composer.lock ├── config ├── app.php ├── auth.php ├── broadcasting.php ├── cache.php ├── cors.php ├── database.php ├── filesystems.php ├── hashing.php ├── logging.php ├── mail.php ├── queue.php ├── scrapers.php ├── services.php ├── session.php └── view.php ├── database ├── .gitignore ├── factories │ └── UserFactory.php ├── migrations │ ├── 2014_10_12_000000_create_users_table.php │ ├── 2014_10_12_100000_create_password_resets_table.php │ ├── 2019_08_19_000000_create_failed_jobs_table.php │ ├── 2021_07_05_044011_create_jobs_table.php │ ├── 2021_07_05_205158_create_videos_table.php │ ├── 2022_10_23_165740_add_is_stream_column_to_videos_table.php │ ├── 2022_10_30_152342_create_photo_galleries_table.php │ ├── 2022_10_30_152624_create_scrape_items_table.php │ ├── 2022_10_31_051225_remove_columns_from_videos_table.php │ ├── 2022_11_05_180539_add_soft_deletes_to_videos_table.php │ ├── 2022_11_05_180550_add_soft_deletes_to_photo_galleries_table.php │ └── 2022_11_05_180601_add_soft_deletes_to_scrape_items_table.php └── seeds │ └── DatabaseSeeder.php ├── package-lock.json ├── package.json ├── phpunit.xml ├── public ├── .htaccess ├── favicon.ico ├── index.php ├── robots.txt └── web.config ├── resources ├── js │ ├── app.js │ ├── bootstrap.js │ └── components │ │ ├── ContentScraper.vue │ │ ├── CurrentlyScrapingItem.vue │ │ ├── ProgressBar.vue │ │ ├── ScrapeListTable.vue │ │ ├── ScrapeListTableRow.vue │ │ ├── ScrapeStatusBadge.vue │ │ ├── SiteLogo.vue │ │ └── icons │ │ ├── Checkmark.vue │ │ ├── Exclamation.vue │ │ └── Spinner.vue ├── lang │ └── en │ │ ├── auth.php │ │ ├── pagination.php │ │ ├── passwords.php │ │ └── validation.php ├── sass │ └── app.scss └── views │ └── welcome.blade.php ├── routes ├── api.php ├── channels.php ├── console.php └── web.php ├── server.php ├── storage ├── app │ └── public │ │ └── logos │ │ ├── fullporner.png │ │ ├── goodporn.png │ │ ├── hqporner.png │ │ ├── pichunter.png │ │ ├── pornhub.png │ │ ├── pornktube.png │ │ ├── pornpics.png │ │ ├── porntrex.png │ │ ├── pornwild.png │ │ ├── whoreshub.png │ │ └── youjizz.png ├── framework │ ├── cache │ │ └── .gitignore │ ├── sessions │ │ └── .gitignore │ └── views │ │ └── .gitignore └── logs │ └── .gitignore ├── tests ├── Browser │ ├── ExampleTest.php │ ├── Pages │ │ ├── HomePage.php │ │ └── Page.php │ ├── console │ │ └── .gitignore │ └── screenshots │ │ └── .gitignore ├── CreatesApplication.php ├── DuskTestCase.php ├── Feature │ └── ExampleTest.php ├── TestCase.php └── Unit │ └── ExampleTest.php └── webpack.mix.js /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | insert_final_newline = true 7 | indent_style = space 8 | indent_size = 4 9 | trim_trailing_whitespace = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | 14 | [*.{yml,yaml}] 15 | indent_size = 2 16 | 17 | [*.{js,vue,json}] 18 | indent_size = 2 19 | 20 | [*.{html,blade.php}] 21 | indent_size = 2 22 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | APP_NAME=Scraper 2 | APP_ENV=local 3 | APP_KEY= 4 | APP_DEBUG=true 5 | APP_URL=http://localhost 6 | 7 | LOG_CHANNEL=stack 8 | 9 | DB_CONNECTION=sqlite 10 | 11 | BROADCAST_DRIVER=log 12 | CACHE_DRIVER=file 13 | QUEUE_CONNECTION=database 14 | SESSION_DRIVER=file 15 | SESSION_LIFETIME=120 16 | 17 | # Only needs configured if using Redis as QUEUE_CONNECTION 18 | REDIS_HOST=127.0.0.1 19 | REDIS_PASSWORD=null 20 | REDIS_PORT=6379 21 | 22 | FFMPEG_OUTPUT_PATH= # full system path where scraped videos are stored 23 | FFMPEG_LOG_PATH= # full system path where ffmpeg processing log files are stored 24 | 25 | PHOTO_OUTPUT_PATH= # full system path where scraped photo galleries are stored 26 | PHOTO_LOG_PATH= # full system path where scraped photo gallery processing log files are stored 27 | 28 | # (optional) Authentication credentials for accessing videos behind required login 29 | PORNTREX_USERNAME= 30 | PORNTREX_PASSWORD= 31 | PORNTREX_LOGIN_URL= 32 | 33 | PORNWILD_USERNAME= 34 | PORNWILD_PASSWORD= 35 | PORNWILD_LOGIN_URL= 36 | 37 | PORNKTUBE_USERNAME= 38 | PORNKTUBE_PASSWORD= 39 | PORNKTUBE_LOGIN_URL= 40 | 41 | HQPORNER_USERNAME= 42 | HQPORNER_PASSWORD= 43 | HQPORNER_LOGIN_URL= 44 | 45 | PORNHUB_USERNAME= 46 | PORNHUB_PASSWORD= 47 | PORNHUB_LOGIN_URL= 48 | 49 | PORNPICS_USERNAME= 50 | PORNPICS_PASSWORD= 51 | PORNPICS_LOGIN_URL= 52 | 53 | PICHUNTER_USERNAME= 54 | PICHUNTER_PASSWORD= 55 | PICHUNTER_LOGIN_URL= 56 | 57 | WHORESHUB_USERNAME= 58 | WHORESHUB_PASSWORD= 59 | WHORESHUB_LOGIN_URL= 60 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | *.css linguist-vendored 3 | *.scss linguist-vendored 4 | *.js linguist-vendored 5 | CHANGELOG.md export-ignore 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | /public/hot 3 | /public/storage 4 | /storage/*.key 5 | /vendor 6 | .env 7 | .env.backup 8 | .phpunit.result.cache 9 | Homestead.json 10 | Homestead.yaml 11 | npm-debug.log 12 | yarn-error.log 13 | .idea/* 14 | /public/js 15 | /public/css 16 | /public/mix-manifest.json 17 | -------------------------------------------------------------------------------- /.styleci.yml: -------------------------------------------------------------------------------- 1 | php: 2 | preset: laravel 3 | disabled: 4 | - unused_use 5 | finder: 6 | not-name: 7 | - index.php 8 | - server.php 9 | js: 10 | finder: 11 | not-name: 12 | - webpack.mix.js 13 | css: true 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Porn Site Scraper 2 | A web based scraper and UI monitoring tool for downloading videos and full photo galleries from various porn sites. Launches 3 | a web driver for scraping and can be configured to handle authentication 4 | for scraping media hidden behind logins. 5 | 6 | ### Currently supported sites 7 | 8 | **Videos** 9 | 1. PornHub (pornhub.com) 10 | 2. Porntrex (porntrex.com) 11 | 2. FullPorner (fullporner.com) 12 | 3. PornKTube (pornktube.tv) 13 | 4. HQPorner (hqporner.com) 14 | 5. WhoresHub (whoreshub.com) 15 | 6. YouJizz (youjizz.com) 16 | 7. Goodporn (goodporn.to) 17 | 18 | **Photo Galleries** 19 | 1. PornPics (pornpics.com) 20 | 2. PicHunter (pichunter.com) 21 | 22 | ## Usage 23 | 1. Visit one of the supported sites and navigate to a desired video. 24 | 2. Copy the URL from the site and paste it into the web UI input and add a name to be used for saved file. 25 | 3. Click scrape button to launch the job and monitor progress in the UI. 26 | 27 | ## Installation 28 | ### Pre-reqs 29 | 1. \>=PHP 7.2 30 | 2. [Chrome](https://www.google.com/chrome/) browser installed on host system 31 | 3. [FFmpeg](https://ffmpeg.org/) installed on host browser 32 | 33 | ### Installation steps 34 | 1. Clone the repository 35 |
`git clone https://github.com/ed36080666/site_scraper.git` 36 | 2. Install PHP dependencies 37 |
`composer install` 38 | 3. Install Laravel Dusk chrome driver 39 |
`php artisan dusk:chrome-driver` 40 | 4. Install frontend dependencies 41 |
`npm install` 42 | 5. Copy and configure `.env` 43 |
`cp .env.example .env` 44 | 1. Set full system path for `FFMPEG_OUTPUT_PATH` variable in `.env`. This determines where saved videos are stored. 45 | 2. Set full system path for `FFMPEG_LOG_PATH` variable in `.env`. This determines where FFmpeg will store log files. 46 | 6. Generate Laravel application key 47 |
`php artisan key:generate` 48 | 7. Create the base SQLite database 49 |
`touch ./database/database.sqlite` 50 | 8. Run database migrations 51 |
`php artisan migrate` 52 | 9. Build frontend assets 53 |
`npm run dev` 54 | 10. Start a queue worker (handles scraping jobs in background) 55 |
`php artisan queue:work` 56 | 11. Start the application 57 |
`php artisan serve` 58 | 59 | ### Running queue workers 60 | To get the most out of this application, you should leverage the Laravel worker queue. The best way to do this is by running queue workers in the background using [Supervisor](http://supervisord.org/installing.html). Supervisor will launch a given number of worker threads and keep them running. 61 | 62 | 1. Install supervisor 63 |
`sudo apt update && sudo apt install supervisor` 64 | 2. Create a new config file for our workers: 65 |
`sudo vim /etc/supervisor/conf.d/site_scraper_worker.conf` 66 | 67 | ``` 68 | [program:site_scraper_worker] 69 | process_name=%(program_name)s_%(process_num)02d 70 | # cstomize system path to root of the site_scraper directory 71 | command=php /var/www/vhosts/site_scraper/artisan queue:work --tries=1 --timeout=7000 72 | autostart=true 73 | autorestart=true 74 | stopasgroup=true 75 | killasgroup=true 76 | redirect_stderr=true 77 | stopwaitsecs=7201 78 | user=# set appropriate system user 79 | numprocs=8 # Can add more or fewer works based on your hardware, network etc. 80 | stdout_logfile=# Customize this to wherever you want to place your queue worker logs. 81 | ``` 82 | 3. Reread the config files and update supervisor 83 |
`sudo supervisorctl reread` 84 |
`sudo supervisorctl update` 85 | 4. Check the workers are running 86 |
`sudo supervisorctl update` 87 |

You should see something along the following: 88 | ``` 89 | site_scraper_worker:site_scraper_worker_00 RUNNING pid 20567, uptime 0:02:55 90 | ... 1 entry for each worker 91 | ``` 92 | 93 | ### Troubleshooting 94 | 1. Chrome Web Driver exceptions 95 | 1. Ensure Chrome is installed on the host system 96 | 2. Ensure Laravel Dusk Chrome driver binary is installed 97 | 1. Visit [Laravel Dusk](https://github.com/ed36080666/site_scraper.git) docs for more info 98 | 2. Out of date errors. Sometimes Laravel Dusk will install a version of the Chrome driver that requires a higher version of the Chrome binary than what is installed on the system. If you see errors about unsupported versions during scraping, try updating the Chrome binary to a higher version (aka re-install/update Chrome browser). 99 | 2. Permission errors 100 | 1. Ensure ffmpeg binary has execute permissions allowing server to launch processes 101 | 2. Ensure server has write permissions to the video output directory 102 | 3. Ensure server has write permissions to all the log directories 103 | -------------------------------------------------------------------------------- /app/Console/Kernel.php: -------------------------------------------------------------------------------- 1 | command('inspire')->hourly(); 28 | } 29 | 30 | /** 31 | * Register the commands for the application. 32 | * 33 | * @return void 34 | */ 35 | protected function commands() 36 | { 37 | $this->load(__DIR__.'/Commands'); 38 | 39 | require base_path('routes/console.php'); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /app/Contracts/ScrapeItemInterface.php: -------------------------------------------------------------------------------- 1 | id = $item->id(); 24 | $this->name = $item->name(); 25 | $this->progress = $item->progress(); 26 | $this->height = $item->height(); 27 | $this->width = $item->width(); 28 | $this->status = $item->status(); 29 | $this->is_stream = $item->isStream(); 30 | $this->started_at = $item->startedAt(); 31 | $this->type = $item->type(); 32 | $this->file_exists = $item->fileExists(); 33 | $this->log_exists = $item->logExists(); 34 | } 35 | 36 | public function toArray(): array 37 | { 38 | return [ 39 | 'id' => $this->id, 40 | 'name' => $this->name, 41 | 'progress' => $this->progress, 42 | 'height' => $this->height, 43 | 'width' => $this->width, 44 | 'status' => $this->status, 45 | 'is_stream' => $this->is_stream, 46 | 'started_at' => $this->started_at, 47 | 'type' => $this->type, 48 | 'file_exists' => $this->file_exists, 49 | 'log_exists' => $this->log_exists, 50 | ]; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /app/Events/ProcessingStarted.php: -------------------------------------------------------------------------------- 1 | video = $video; 27 | } 28 | 29 | /** 30 | * Get the channels the event should broadcast on. 31 | * 32 | * @return \Illuminate\Broadcasting\Channel|array 33 | */ 34 | public function broadcastOn() 35 | { 36 | return new Channel('Queue'); 37 | } 38 | 39 | /** 40 | * Get the name of the event to listen for. 41 | * 42 | * @return string 43 | */ 44 | public function broadcastAs() 45 | { 46 | return 'processing.started'; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /app/Exceptions/Handler.php: -------------------------------------------------------------------------------- 1 | prepare(); 28 | return $scraper; 29 | } 30 | 31 | /** 32 | * Resolve an instance of a scraper by matching against URL (e.g. video url). 33 | * 34 | * @param string $url 35 | * @return ScraperInterface 36 | * @throws ScraperDriverNotFoundException 37 | */ 38 | public static function resolveFromUrl(string $url): ScraperInterface 39 | { 40 | $drivers = config('scrapers.drivers'); 41 | foreach ($drivers as $driver => $config) { 42 | if (str_contains($url, $config['base_url'])) { 43 | return self::make($driver); 44 | } 45 | } 46 | 47 | throw new ScraperDriverNotFoundException("Can't resolve matching driver from URL: `$url`"); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /app/Http/Controllers/Controller.php: -------------------------------------------------------------------------------- 1 | get(); 17 | 18 | $logo_map = array_values(array_map(function ($driver_config) { 19 | return [ 20 | 'src' => asset("storage/logos/{$driver_config['logo_filename']}"), 21 | 'base_url' => $driver_config['base_url'] 22 | ]; 23 | }, config('scrapers.drivers'))); 24 | 25 | return view('welcome', [ 26 | 'logo_map' => $logo_map, 27 | 'scrape_items' => $scrape_items->map(function (ScrapeItem $item) { 28 | return (new ScrapeItemDTO($item->scrapable))->toArray(); 29 | }) 30 | ]); 31 | } 32 | 33 | public function log(ScrapeItem $scrape_item): \Illuminate\Http\Response 34 | { 35 | $log_contents = file_get_contents($scrape_item->log_path); 36 | $response = Response::make($log_contents); 37 | $response->header('Content-Type', 'text/plain'); 38 | 39 | return $response; 40 | } 41 | 42 | /** 43 | * Store a newly created resource in storage. 44 | * 45 | * @param Request $request 46 | * @return JsonResponse 47 | */ 48 | public function store(Request $request): JsonResponse 49 | { 50 | try { 51 | $scraper = ScraperFactory::resolveFromUrl($request->video_url); 52 | $filename = str_replace("'", '', $request->filename); 53 | 54 | $scraper->scrape($request->video_url, $filename); 55 | 56 | return response()->json([ 57 | 'success' => true 58 | ]); 59 | } catch (\Exception $e) { 60 | return response()->json([ 61 | 'success' => false, 62 | 'message' => $e->getMessage() 63 | ], 500); 64 | } 65 | } 66 | 67 | public function destroy($id): JsonResponse 68 | { 69 | $item = ScrapeItem::findOrFail($id); 70 | 71 | $scrapable = $item->scrapable; 72 | 73 | if ($item->log_path && file_exists($item->log_path)) { 74 | unlink($item->log_path); 75 | } 76 | 77 | $scrapable->removeFiles(); 78 | 79 | $scrapable->delete(); 80 | $item->delete(); 81 | 82 | return response()->json([ 83 | 'success' => true, 84 | 'message' => 'Item: '.$item->id.' - '.$scrapable->name().' deleted!' 85 | ]); 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /app/Http/Controllers/ProgressController.php: -------------------------------------------------------------------------------- 1 | get() 24 | ->transform(function (ScrapeItem $item) { 25 | return (new ScrapeItemDTO($item->scrapable))->toArray(); 26 | }); 27 | 28 | return response()->json($in_progress); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /app/Http/Kernel.php: -------------------------------------------------------------------------------- 1 | [ 33 | \App\Http\Middleware\EncryptCookies::class, 34 | \Illuminate\Cookie\Middleware\AddQueuedCookiesToResponse::class, 35 | \Illuminate\Session\Middleware\StartSession::class, 36 | // \Illuminate\Session\Middleware\AuthenticateSession::class, 37 | \Illuminate\View\Middleware\ShareErrorsFromSession::class, 38 | \App\Http\Middleware\VerifyCsrfToken::class, 39 | \Illuminate\Routing\Middleware\SubstituteBindings::class, 40 | ], 41 | 42 | 'api' => [ 43 | 'throttle:60,1', 44 | \Illuminate\Routing\Middleware\SubstituteBindings::class, 45 | ], 46 | ]; 47 | 48 | /** 49 | * The application's route middleware. 50 | * 51 | * These middleware may be assigned to groups or used individually. 52 | * 53 | * @var array 54 | */ 55 | protected $routeMiddleware = [ 56 | 'auth' => \App\Http\Middleware\Authenticate::class, 57 | 'auth.basic' => \Illuminate\Auth\Middleware\AuthenticateWithBasicAuth::class, 58 | 'bindings' => \Illuminate\Routing\Middleware\SubstituteBindings::class, 59 | 'cache.headers' => \Illuminate\Http\Middleware\SetCacheHeaders::class, 60 | 'can' => \Illuminate\Auth\Middleware\Authorize::class, 61 | 'guest' => \App\Http\Middleware\RedirectIfAuthenticated::class, 62 | 'password.confirm' => \Illuminate\Auth\Middleware\RequirePassword::class, 63 | 'signed' => \Illuminate\Routing\Middleware\ValidateSignature::class, 64 | 'throttle' => \Illuminate\Routing\Middleware\ThrottleRequests::class, 65 | 'verified' => \Illuminate\Auth\Middleware\EnsureEmailIsVerified::class, 66 | ]; 67 | } 68 | -------------------------------------------------------------------------------- /app/Http/Middleware/Authenticate.php: -------------------------------------------------------------------------------- 1 | expectsJson()) { 18 | return route('login'); 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /app/Http/Middleware/CheckForMaintenanceMode.php: -------------------------------------------------------------------------------- 1 | check()) { 22 | return redirect(RouteServiceProvider::HOME); 23 | } 24 | 25 | return $next($request); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /app/Http/Middleware/TrimStrings.php: -------------------------------------------------------------------------------- 1 | allSubdomainsOfApplicationUrl(), 18 | ]; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /app/Http/Middleware/TrustProxies.php: -------------------------------------------------------------------------------- 1 | gallery_url = $gallery_url; 47 | $this->photo_urls = $photo_urls; 48 | $this->directory_name = $directory_name; 49 | $this->output_path = config('scrapers.photo_gallery.output_path'); 50 | $this->log_path = config('scrapers.photo_gallery.log_path'); 51 | 52 | $this->photo_gallery = PhotoGallery::create([ 53 | 'name' => $directory_name, 54 | 'number_photos' => count($this->photo_urls) 55 | ]); 56 | 57 | $this->scrape_item = ScrapeItem::create([ 58 | 'status' => ScrapeItem::STATUS_QUEUED, 59 | 'url' => $gallery_url, 60 | 'scrapable_id' => $this->photo_gallery->id, 61 | 'scrapable_type' => $this->photo_gallery->getMorphClass() 62 | ]); 63 | } 64 | 65 | /** 66 | * Execute the job. 67 | * 68 | * @return void 69 | */ 70 | public function handle() 71 | { 72 | $output_path = "{$this->output_path}/{$this->directory_name}"; 73 | $log_path = $this->log_path . '/' . str_replace(' ', '_', $this->directory_name) . '[' . now()->timestamp . '].txt'; 74 | 75 | $photo_count = count($this->photo_urls); 76 | file_put_contents($log_path, "Scraping {$photo_count} photos into {$output_path}...\n0"); 77 | 78 | $this->photo_gallery->update([ 79 | 'number_photos' => count($this->photo_urls), 80 | ]); 81 | 82 | $this->scrape_item->update([ 83 | 'status' => ScrapeItem::STATUS_PROCESSING, 84 | 'started_at' => now(), 85 | 'path' => $output_path, 86 | 'log_path' => $log_path 87 | ]); 88 | 89 | try { 90 | if (!is_dir($output_path)) { 91 | mkdir($output_path); 92 | } 93 | 94 | $i = 1; 95 | foreach($this->photo_urls as $photo_url) { 96 | $size = getimagesize($photo_url); 97 | $extension = image_type_to_extension($size[2]); 98 | $filename = str_pad($i, 4, "0", STR_PAD_LEFT); 99 | $filename .= $extension; 100 | 101 | $img_data = file_get_contents($photo_url); 102 | file_put_contents("{$output_path}/{$filename}", $img_data); 103 | 104 | $log_string = file_get_contents($log_path); 105 | $log_string .= "\n{$i}"; 106 | file_put_contents($log_path, $log_string); 107 | 108 | $i += 1; 109 | } 110 | } catch (\Exception $e) { 111 | $this->scrape_item->update([ 112 | 'status' => ScrapeItem::STATUS_ERROR 113 | ]); 114 | } 115 | 116 | $this->scrape_item->update([ 117 | 'status' => ScrapeItem::STATUS_DONE, 118 | 'finished_at' => now() 119 | ]); 120 | } 121 | 122 | public function failed(Throwable $exception) 123 | { 124 | $this->scrape_item->update([ 125 | 'status' => ScrapeItem::STATUS_ERROR 126 | ]); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /app/Jobs/ProcessVideo.php: -------------------------------------------------------------------------------- 1 | url = $url; 57 | $this->filename = $filename; 58 | $this->is_stream = $is_stream; 59 | $this->output_path = config('scrapers.ffmpeg.output_path'); 60 | $this->log_path = config('scrapers.ffmpeg.log_path'); 61 | $this->ffmpeg_args = $ffmpeg_args; 62 | $this->output_file_args = $output_file_args; 63 | 64 | $this->video = Video::create(['name' => $filename]); 65 | 66 | $this->scrape_item = ScrapeItem::create([ 67 | 'status' => ScrapeItem::STATUS_QUEUED, 68 | 'url' => $url, 69 | 'is_stream' => $is_stream, 70 | 'scrapable_id' => $this->video->id, 71 | 'scrapable_type' => $this->video->getMorphClass(), 72 | ]); 73 | } 74 | 75 | /** 76 | * Execute the job. 77 | * 78 | * @return void 79 | */ 80 | public function handle() 81 | { 82 | $output_path = $this->output_path; 83 | $log_path = $this->log_path . '/' . str_replace(' ', '_', $this->filename) . '[' . now()->timestamp . '].txt'; 84 | 85 | // if we are scraping a stream (m3u8) we cannot get metadata the same with ffprobe. 86 | // we can still scrape without this metadata but we don't currently have a way 87 | // to reliably get progress information. 88 | $meta = $this->is_stream 89 | ? null // todo look for a solution to allow us to get at least some metadata 90 | : json_decode(shell_exec("ffprobe -v quiet -print_format json -show_format -show_streams {$this->url}")); 91 | 92 | $this->video->update([ 93 | 'codec' => $meta->streams[0]->codec_name ?? null, 94 | 'width' => $meta->streams[0]->width ?? null, 95 | 'height' => $meta->streams[0]->height ?? null, 96 | 'duration' => $meta->format->duration ?? null, 97 | 'size' => $meta->format->size ?? null, 98 | 'bitrate' => $meta->format->bit_rate ?? null, 99 | ]); 100 | 101 | $this->scrape_item->update([ 102 | 'status' => ScrapeItem::STATUS_PROCESSING, 103 | 'started_at' => now(), 104 | 'path' => $output_path, 105 | 'log_path' => $log_path, 106 | ]); 107 | 108 | event(new ProcessingStarted($this->video)); 109 | 110 | try { 111 | shell_exec("ffmpeg -nostdin {$this->ffmpeg_args} -i \"{$this->url}\" {$this->output_file_args} \"$output_path/{$this->filename}\" 1>$log_path 2>&1"); 112 | // shell_exec("ffmpeg -i \"{$this->url}\" -c copy \"/home/eric/Downloads/ffmpeg_test/{$this->filename}\" > /dev/null 2>&1 &"); 113 | 114 | } catch (\Exception $e) { 115 | $this->scrape_item->update([ 116 | 'status' => ScrapeItem::STATUS_ERROR 117 | ]); 118 | } 119 | 120 | $this->scrape_item->update([ 121 | 'status' => ScrapeItem::STATUS_DONE, 122 | 'finished_at' => now() 123 | ]); 124 | } 125 | 126 | public function failed(Throwable $exception) 127 | { 128 | $this->scrape_item->update([ 129 | 'status' => ScrapeItem::STATUS_ERROR 130 | ]); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /app/PhotoGallery.php: -------------------------------------------------------------------------------- 1 | morphOne(ScrapeItem::class, 'scrapable'); 23 | } 24 | 25 | public function height(): ?int 26 | { 27 | return null; 28 | } 29 | 30 | public function width(): ?int 31 | { 32 | return null; 33 | } 34 | 35 | public function name(): string 36 | { 37 | return $this->name; 38 | } 39 | 40 | public function progress(): float 41 | { 42 | if (!$this->scrapeItem->log_path || !file_exists($this->scrapeItem->log_path)) { 43 | return 0; 44 | } 45 | 46 | $file = escapeshellarg($this->scrapeItem->log_path); 47 | $line = `tail -n 1 $file`; 48 | 49 | $progress = (int) $line; 50 | return (float) ($progress / $this->number_photos) * 100; 51 | } 52 | 53 | public function type(): string 54 | { 55 | return 'gallery'; 56 | } 57 | 58 | public function fileExists(): bool 59 | { 60 | return $this->path() && is_dir($this->path()); 61 | } 62 | 63 | public function removeFiles(): void 64 | { 65 | if ($this->fileExists()) { 66 | // delete all files in directory first 67 | array_map( 'unlink', array_filter((array) glob("{$this->path()}/*"))); 68 | rmdir($this->path()); 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /app/Providers/AppServiceProvider.php: -------------------------------------------------------------------------------- 1 | job, 'failed')) { 30 | // $event->job->failed(); 31 | // } 32 | // }); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /app/Providers/AuthServiceProvider.php: -------------------------------------------------------------------------------- 1 | 'App\Policies\ModelPolicy', 17 | ]; 18 | 19 | /** 20 | * Register any authentication / authorization services. 21 | * 22 | * @return void 23 | */ 24 | public function boot() 25 | { 26 | $this->registerPolicies(); 27 | 28 | // 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /app/Providers/BroadcastServiceProvider.php: -------------------------------------------------------------------------------- 1 | [ 19 | SendEmailVerificationNotification::class, 20 | ], 21 | ]; 22 | 23 | /** 24 | * Register any events for your application. 25 | * 26 | * @return void 27 | */ 28 | public function boot() 29 | { 30 | parent::boot(); 31 | 32 | // 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /app/Providers/RouteServiceProvider.php: -------------------------------------------------------------------------------- 1 | mapApiRoutes(); 46 | 47 | $this->mapWebRoutes(); 48 | 49 | // 50 | } 51 | 52 | /** 53 | * Define the "web" routes for the application. 54 | * 55 | * These routes all receive session state, CSRF protection, etc. 56 | * 57 | * @return void 58 | */ 59 | protected function mapWebRoutes() 60 | { 61 | Route::middleware('web') 62 | ->namespace($this->namespace) 63 | ->group(base_path('routes/web.php')); 64 | } 65 | 66 | /** 67 | * Define the "api" routes for the application. 68 | * 69 | * These routes are typically stateless. 70 | * 71 | * @return void 72 | */ 73 | protected function mapApiRoutes() 74 | { 75 | Route::prefix('api') 76 | ->middleware('api') 77 | ->namespace($this->namespace) 78 | ->group(base_path('routes/api.php')); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /app/ScrapeItem.php: -------------------------------------------------------------------------------- 1 | 'boolean', 31 | ]; 32 | 33 | public function scrapable() 34 | { 35 | return $this->morphTo(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /app/Scrapers/FullPornerScraper.php: -------------------------------------------------------------------------------- 1 | browse(function (Browser $browser) use ($url, $filename) { 22 | $browser->visit($url); 23 | 24 | $video_node = $browser->element('.single-video iframe'); 25 | $video_url = $video_node->getAttribute('src'); 26 | 27 | // the first 2 characters of scraped url are '//'. we need to append 28 | // https: for a valid path to give FFmpeg. 29 | $cdn_url = "https:$video_url"; 30 | 31 | $browser->visit($cdn_url); 32 | 33 | $browser->waitFor('#flvv', 5); 34 | $source_nodes = $browser->elements('#flvv source'); 35 | $highest_resolution_node = $this->findHighestResolution($source_nodes); 36 | 37 | $cdn_video_url = 'https:' . $highest_resolution_node->getAttribute('src'); 38 | 39 | ProcessVideo::dispatch($cdn_video_url, "$filename.mp4"); 40 | 41 | $browser->quit(); 42 | }); 43 | } 44 | 45 | private function findHighestResolution(array $dom_nodes) 46 | { 47 | // iterate all resolutions starting at highest. each video has multiple DOM nodes containing 48 | // resolutions stored in the title attribute. look for highest resolution and return associated node. 49 | foreach (self::RESOLUTIONS as $resolution) { 50 | foreach ($dom_nodes as $node) { 51 | if (str_contains($node->getAttribute('title'), $resolution)) { 52 | return $node; 53 | } 54 | } 55 | } 56 | 57 | throw new \Exception('Unable to find DOM node for supported resolutions.'); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /app/Scrapers/GoodPornScraper.php: -------------------------------------------------------------------------------- 1 | '-vf delogo=x=2:y=4:w=630:h=160', 28 | '1080' => '-vf delogo=x=3:y=6:w=320:h=70', 29 | ]; 30 | 31 | $this->browse(function (Browser $browser) use ($resolutions, $url, $filename, $watermark_stripper_args) { 32 | $browser->visit($url); 33 | 34 | $flashvars = $browser->script("return window.flashvars"); 35 | 36 | // filters flashvars to only the items that are related to video urls 37 | $urls = collect(array_filter($flashvars[0], function($key) { 38 | return strpos($key, 'video_alt_url') === 0; 39 | }, ARRAY_FILTER_USE_KEY)); 40 | 41 | foreach($resolutions as $resolution) { 42 | // starting with largest resolution, see if we have any video urls that 43 | // contain the resolution. 44 | $key = $urls->search(function ($item) use ($resolution) { 45 | return str_contains($item, $resolution); 46 | }); 47 | 48 | if ($key) { 49 | break; 50 | } 51 | } 52 | 53 | if (!isset($key)) { 54 | throw new \Exception('Could not find video URL for any supported resolutions.'); 55 | } 56 | 57 | $url = $flashvars[0][$key]; 58 | 59 | // attempt to find a watermark removal args 60 | $output_args = '-c copy'; 61 | foreach($watermark_stripper_args as $resolution => $arg) { 62 | if (str_contains($url, $resolution)) { 63 | $output_args = $arg; 64 | break; 65 | } 66 | } 67 | 68 | ProcessVideo::dispatch($url, "$filename.mp4", false, '', $output_args); 69 | }); 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /app/Scrapers/HQPornerScraper.php: -------------------------------------------------------------------------------- 1 | browse(function (Browser $browser) use ($url, $filename) { 22 | // hqporner does some funky stuff with referrers and some of the videos cannot 23 | // be accessed directly from their url in the browser. this only applies on some 24 | // and I cannot find a pattern. to bypass this, we will first visit the base 25 | // url and then call a window.location to navigate with valid referrers. 26 | $browser->visit('https://' . config('scrapers.drivers.hqporner.base_url')); 27 | $browser->script("return window.location.href = '$url'"); 28 | 29 | $browser->withinFrame('#playerWrapper iframe', function (Browser $iframe) use ($filename) { 30 | $source_nodes = $iframe->elements('source'); 31 | 32 | $node = $this->findHighestResolution($source_nodes); 33 | 34 | $video_src = $node->getAttribute('src'); 35 | 36 | // video src is already prefixed with 2 forward slashes so we just need to add "https:" 37 | $cdn_url = "https:$video_src"; 38 | ProcessVideo::dispatch($cdn_url, "$filename.mp4"); 39 | }); 40 | 41 | $browser->quit(); 42 | }); 43 | } 44 | 45 | private function findHighestResolution(array $dom_nodes) 46 | { 47 | // iterate all resolutions starting at highest. each video has multiple DOM nodes containing 48 | // resolutions stored in the title attribute. look for highest resolution and return associated node. 49 | foreach (self::RESOLUTIONS as $resolution) { 50 | foreach ($dom_nodes as $node) { 51 | if (str_contains($node->getAttribute('title'), $resolution)) { 52 | return $node; 53 | } 54 | } 55 | } 56 | 57 | throw new \Exception('Unable to find DOM node for supported resolutions.'); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /app/Scrapers/PicHunterScraper.php: -------------------------------------------------------------------------------- 1 | browse(function (Browser $browser) use ($url, $filename) { 29 | $browser->visit($url); 30 | 31 | $image_nodes = $browser->elements('#gallery figure img'); 32 | $photo_urls = collect([]); 33 | foreach($image_nodes as $node) { 34 | $photo_urls->push($node->getAttribute('xof')); 35 | } 36 | 37 | ProcessPhotoGallery::dispatch($url, $photo_urls->toArray(), $filename); 38 | 39 | $browser->quit(); 40 | }); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /app/Scrapers/PornHubScraper.php: -------------------------------------------------------------------------------- 1 | browse(function (Browser $browser) use ($resolutions, $url, $filename) { 25 | $browser->visit($url); 26 | 27 | $video_id = $browser->attribute('#player', 'data-video-id'); 28 | $data_key = "flashvars_$video_id"; 29 | $video_data = $browser->script("return window['$data_key']"); 30 | $media_data = collect($video_data[0]['mediaDefinitions']); 31 | 32 | $url = null; 33 | foreach ($resolutions as $resolution) { 34 | $video_definition = $media_data->first(function ($item) use ($resolution, $url) { 35 | return $item['quality'] === $resolution; 36 | }); 37 | 38 | if ($video_definition) { 39 | $url = $video_definition['videoUrl']; 40 | break; 41 | } 42 | } 43 | 44 | if (is_null($url)) { 45 | throw new \Exception('Could not find valid video URL for the supported resolutions.'); 46 | } 47 | 48 | ProcessVideo::dispatch($url, "$filename.mp4", true); 49 | 50 | $browser->quit(); 51 | }); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /app/Scrapers/PornKTubeScraper.php: -------------------------------------------------------------------------------- 1 | browse(function (Browser $browser) use ($url, $filename) { 31 | $browser->visit($url); 32 | 33 | // there are hidden divs that act as buttons for each resolution. these contain 34 | // all the url parts in a semi-colon delimited string in the `data-c` attribute. 35 | // the first child here will always be the highest available resolution. 36 | $data_url = $browser->attribute('.listlinks > div', 'data-c'); 37 | $url_parts = explode(';', $data_url); 38 | $url = $this->buildUrlFromParts($url_parts); 39 | 40 | ProcessVideo::dispatch($url, "$filename.mp4"); 41 | 42 | $browser->quit(); 43 | }); 44 | } 45 | 46 | private function buildUrlFromParts(array $url_parts) 47 | { 48 | // example url: https://s2.stormedia.info/whpvid/1657432818/Ntw9EdVYIRsyxZJAOuPL4A/18000/18248/18248_480p.mp4 49 | // reconstruct the direct CDN url using parts scraped from the DOM. first we 50 | // have to pull index 7. this will be an integer value typically `2` or `3` to 51 | // map to the `s2` or `s3` before the base url. 52 | $url = "https://s$url_parts[7]." . self::BASE_CDN_URL; 53 | 54 | // next we add an integer stored in part 5 55 | $url .= $url_parts[5] . '/'; 56 | 57 | // then we add a hash stored in part 6 58 | $url .= $url_parts[6] . '/'; 59 | 60 | // then we take the video id stored in part 4 and strip the first 61 | // 2 values and pad with 3x 0s (e.g. 18248 = 18000) 62 | $url .= substr($url_parts[4], 0, 2) . '000' . '/'; 63 | 64 | // then we add the video id 65 | $url .= $url_parts[4] . '/'; 66 | 67 | // then we create the filename which is always: