├── .gitignore ├── .travis.yml ├── changelog.md ├── composer.json ├── examples ├── bootstrap.php ├── demo-async-using-builder.php ├── demo-async.php ├── demo-identity.php ├── demo-sync.php └── demo.php ├── license ├── readme.md ├── src ├── Builder │ ├── Build.php │ └── ProxyRotatorBuildOrderInterface.php ├── Events │ ├── UseOwnIpEvent.php │ └── WaitingEvent.php ├── Exceptions │ ├── NoProxiesLeftException.php │ └── RotatingProxySubscriberException.php ├── Interval │ ├── BaseRandomInterval.php │ ├── NullRandomCounter.php │ ├── NullTimeInterval.php │ ├── RandomCounterInterval.php │ ├── RandomCounterIntervalInterface.php │ ├── RandomIntervalInterface.php │ ├── RandomTimeInterval.php │ ├── SystemTimeProvider.php │ ├── TimeIntervalInterface.php │ └── TimeProviderInterface.php ├── Proxy │ ├── Identity.php │ ├── IdentityInterface.php │ ├── NullProxy.php │ ├── RotatingIdentityProxy.php │ ├── RotatingProxy.php │ └── RotatingProxyInterface.php ├── ProxyRotator.php ├── ProxyRotatorInterface.php ├── Random │ ├── RandomizerInterface.php │ └── SystemRandomizer.php └── RotatingProxySubscriber.php └── tests ├── RandomAndTimeHelper.php ├── integration └── RotatingProxySubscriberTest.php └── unit ├── ProxyRotatorTest.php ├── RandomCounterIntervalTest.php ├── RandomTimeIntervalTest.php ├── RotatingIdentityProxyTest.php └── RotatingProxyTest.php /.gitignore: -------------------------------------------------------------------------------- 1 | _demo-local*.php -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: php 2 | php: 3 | - 5.5 4 | - 5.6 5 | - 7 6 | 7 | before_script: 8 | - composer install 9 | 10 | script: vendor/bin/phpunit --bootstrap vendor/autoload.php tests -------------------------------------------------------------------------------- /changelog.md: -------------------------------------------------------------------------------- 1 | #todo 2 | 3 | - explain `generateIdentitiesForProxies` method in readme 4 | - doc comments 5 | 6 | #dev-master 7 | 8 | ##0.7.0 9 | 10 | - updated dependencies (PHP ~7) 11 | 12 | ##0.2.0 13 | 14 | - updated dependencies (Guzzle ^5.3.0; PHPUnit ~4) 15 | 16 | ###0.1.2 17 | 18 | - proxy requests are now evaluated with their own event priority (see `RotatingProxySubscriber::PROXY_*` constants) in the 'complete' and 'error' events 19 | - added `RotatingProxyInterface::GUZZLE_CONFIG_*` constants for proxy evaluation 20 | - provided property to let redirect-requests be performed by the same proxy, see `ProxyRotator::setReuseProxyOnRedirect()` - this is default behavior now 21 | - added `RotatingIdentityProxy` (with tests) 22 | - updated `Build` to include `RotatingIdentityProxy` 23 | - added `Identity` class which holds Cookies, headers values, the last referer and a user agent for now, so that proxy seem more "real" 24 | - added test for cached responses (requests that are intercepted in the before event) 25 | - moved demos to example folder and added mocks to make the demos executable even without real proxies 26 | 27 | ###0.1.1 28 | 29 | - updated repositories to local satis installation 30 | 31 | ##0.1.0 32 | 33 | - changed package name from GuzzleRotatingProxySubscriber to guzzle-rotating-proxy-subscriber 34 | 35 | ###0.0.2 36 | 37 | - fixed changelog 38 | - made `RotationProxySubscriberException` inherit from Guzzles `RequestException` to make it usable by the `MockSubscriber` 39 | 40 | ###0.0.1 41 | 42 | - initial commit -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "paslandau/guzzle-rotating-proxy-subscriber", 3 | "license": "MIT", 4 | "description" : "Guzzle plugin resp. Guzzle subscriber to automatically pick a proxy from a predefined set of proxies for every request to avoid IP based blocking.", 5 | "authors": [ 6 | { 7 | "name": "Pascal Landau", 8 | "email": "kontakt@myseosolution.de", 9 | "homepage": "http://www.myseosolution.de", 10 | "role": "Developer" 11 | } 12 | ], 13 | "repositories": [ { "type": "composer", "url": "http://packages.myseosolution.de/"} ], 14 | "autoload": { 15 | "psr-4": { 16 | "paslandau\\GuzzleRotatingProxySubscriber\\": "src/" 17 | } 18 | }, 19 | "require" : { 20 | "php": ">=5.5", 21 | "guzzlehttp/guzzle": "^5.3.0" 22 | }, 23 | "require-dev" : { 24 | "phpunit/phpunit": "~4", 25 | "paslandau/guzzle-application-cache-subscriber": "dev-master" 26 | }, 27 | "config": { 28 | "secure-http": false 29 | } 30 | } -------------------------------------------------------------------------------- /examples/bootstrap.php: -------------------------------------------------------------------------------- 1 | failsIfNoProxiesAreLeft() 26 | ->withProxiesFromString($s, "\n") 27 | ->evaluatesProxyResultsByDefault() 28 | ->eachProxyMayFailInfinitlyInTotal() 29 | ->eachProxyMayFailConsecutively(3) 30 | ->eachProxyNeedsToWaitSecondsBetweenRequests(1, 3) 31 | ->build(); 32 | 33 | $getWaitingTime = function (WaitingEvent $e) { 34 | echo "Need to wait " . $e->getProxy()->getWaitingTime() . "s\n"; 35 | }; 36 | $rotator->getEmitter()->on(ProxyRotator::EVENT_ON_WAIT, $getWaitingTime); 37 | 38 | $sub = new RotatingProxySubscriber($rotator); 39 | $client = new Client(["defaults" => ["headers" => ["User-Agent" => null]]]); 40 | $client->getEmitter()->attach($sub); 41 | 42 | // lets prepare 10 responses 43 | $num = 10; 44 | $responses = []; 45 | for($i = 0; $i < $num; $i++){ 46 | $responses[] = new Response(200); 47 | } 48 | $mock = new Mock($responses); 49 | $client->getEmitter()->attach($mock); 50 | 51 | // lets execute 10 requests 52 | $requests = []; 53 | $url = "http://localhost/"; 54 | for ($i = 0; $i < $num; $i++) { 55 | $req = $client->createRequest("GET", $url); 56 | $req->getConfig()->set("id", $i); 57 | $requests[] = $req; 58 | } 59 | 60 | $completeFn = function (Pool $pool, RequestInterface $request, ResponseInterface $response) { 61 | echo "Success with " . $request->getConfig()->get("proxy") . " on {$request->getConfig()->get("id")}. request\n"; 62 | }; 63 | $errorFn = function (Pool $pool, RequestInterface $request, ResponseInterface $response = null, Exception $exception) { 64 | if ($exception instanceof NoProxiesLeftException) { 65 | echo "All proxies are blocked, terminating...\n"; 66 | $pool->cancel(); 67 | } else { 68 | echo "Failed with " . $request->getConfig()->get("proxy") . " on {$request->getConfig()->get("id")}. request: " . $exception->getMessage() . "\n"; 69 | } 70 | }; 71 | 72 | $pool = new Pool($client, $requests, [ 73 | "pool_size" => 3, 74 | "end" => function (EndEvent $event) use (&$pool, $completeFn, $errorFn) { 75 | $request = $event->getRequest(); 76 | $response = $event->getResponse(); 77 | $exception = $event->getException(); 78 | if ($exception === null) { 79 | $completeFn($pool, $request, $response); 80 | } else { 81 | $errorFn($pool, $request, $response, $exception); 82 | } 83 | } 84 | ]); 85 | $pool->wait(); 86 | 87 | /** @var \paslandau\GuzzleRotatingProxySubscriber\Proxy\RotatingProxy $proxy */ 88 | $proxies = $rotator->getProxies(); 89 | echo "\nProxy usage:\n"; 90 | foreach($proxies as $proxy){ 91 | echo $proxy->getProxyString()."\t made ".$proxy->getTotalRequests()." requests in total\n"; 92 | } -------------------------------------------------------------------------------- /examples/demo-async.php: -------------------------------------------------------------------------------- 1 | ["headers" => ["User-Agent" => null]]]); 24 | $client->getEmitter()->attach($sub); 25 | 26 | // lets prepare 20 responses 27 | $num = 20; 28 | $responses = []; 29 | for ($i = 0; $i < $num; $i++) { 30 | $responses[] = new Response(200); 31 | } 32 | $mock = new Mock($responses); 33 | $client->getEmitter()->attach($mock); 34 | 35 | // lets execute 20 requests 36 | $requests = []; 37 | $url = "http://localhost/"; 38 | for ($i = 0; $i < $num; $i++) { 39 | $req = $client->createRequest("GET", $url); 40 | $req->getConfig()->set("id", $i); 41 | $requests[] = $req; 42 | } 43 | 44 | $completeFn = function (Pool $pool, RequestInterface $request, ResponseInterface $response) { 45 | echo "Success with " . $request->getConfig()->get("proxy") . " on {$request->getConfig()->get("id")}. request\n"; 46 | }; 47 | $errorFn = function (Pool $pool, RequestInterface $request, ResponseInterface $response = null, Exception $exception) { 48 | if ($exception instanceof NoProxiesLeftException) { 49 | echo "All proxies are blocked, terminating...\n"; 50 | $pool->cancel(); 51 | } else { 52 | echo "Failed with " . $request->getConfig()->get("proxy") . " on {$request->getConfig()->get("id")}. request: " . $exception->getMessage() . "\n"; 53 | } 54 | }; 55 | 56 | $pool = new Pool($client, $requests, [ 57 | "pool_size" => 3, 58 | "end" => function (EndEvent $event) use (&$pool, $completeFn, $errorFn) { 59 | $request = $event->getRequest(); 60 | $response = $event->getResponse(); 61 | $exception = $event->getException(); 62 | if ($exception === null) { 63 | $completeFn($pool, $request, $response); 64 | } else { 65 | $errorFn($pool, $request, $response, $exception); 66 | } 67 | } 68 | ]); 69 | $pool->wait(); 70 | 71 | /** @var \paslandau\GuzzleRotatingProxySubscriber\Proxy\RotatingProxy $proxy */ 72 | $proxies = $rotator->getProxies(); 73 | echo "\nProxy usage:\n"; 74 | foreach($proxies as $proxy){ 75 | echo $proxy->getProxyString()."\t made ".$proxy->getTotalRequests()." requests in total\n"; 76 | } -------------------------------------------------------------------------------- /examples/demo-identity.php: -------------------------------------------------------------------------------- 1 | "de,en"]; // add a preferred language to each of our requests 21 | $cookieSession = new CookieJar(); // enable cookies for this identity 22 | 23 | $identity = new Identity($userAgent,$defaultRequestHeaders,$cookieSession); 24 | $identities = [$identity]; 25 | $proxy1 = new RotatingIdentityProxy($identities, "[PROXY 1]"); 26 | 27 | $userAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"; // common user agent string for chrome 28 | $defaultRequestHeaders = ["Accept-Language" => "de"]; // add a preferred language to each of our requests 29 | $cookieSession = null; // disable cookies for this identity 30 | 31 | $identity1 = new Identity($userAgent,$defaultRequestHeaders,$cookieSession); 32 | 33 | $userAgent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"; // common user agent string for Internet Explorer 34 | $defaultRequestHeaders = ["Pragma" => "no-cache"]; // add a no-cache directive to each request 35 | $cookieSession = new CookieJar(); // enable cookies for this identity 36 | 37 | $identity2 = new Identity($userAgent,$defaultRequestHeaders,$cookieSession); 38 | 39 | $identities = [$identity1,$identity2]; 40 | $systemRandomizer = new SystemRandomizer(); 41 | 42 | // switch identities randomly after 2 to 5 requests 43 | $minRequests = 2; 44 | $maxRequests = 5; 45 | $counter = new RandomCounterInterval($minRequests,$maxRequests); 46 | $proxy2 = new RotatingIdentityProxy($identities, "[PROXY 2]",$systemRandomizer,$counter); 47 | 48 | $proxies = [$proxy1,$proxy2]; 49 | $rotator = new ProxyRotator($proxies); 50 | $sub = new RotatingProxySubscriber($rotator); 51 | $client = new Client(); 52 | $client->getEmitter()->attach($sub); 53 | // lets prepare 20 responses 54 | $num = 20; 55 | $responses = []; 56 | for($i = 0; $i < $num; $i++){ 57 | $responses[] = new Response(200); 58 | } 59 | $mock = new Mock($responses); 60 | $client->getEmitter()->attach($mock); 61 | 62 | // lets execute 20 requests 63 | $requests = []; 64 | $url = "http://localhost/"; 65 | for($i = 0; $i < $num; $i++){ 66 | $requests[] = $client->createRequest("GET",$url); 67 | } 68 | 69 | for ($i = 0; $i < $num; $i++) { 70 | $request = $client->createRequest("GET",$url); 71 | try { 72 | $response = $client->send($request); 73 | echo "Success with " . $request->getConfig()->get("proxy") . " using user agent " . $request->getHeader("user-agent"). " on $i. request\n"; 74 | } catch (Exception $e) { 75 | if ($e->getPrevious() instanceof NoProxiesLeftException) { 76 | echo "All proxies are blocked, terminating...\n"; 77 | break; 78 | } 79 | echo "Failed with " . $request->getConfig()->get("proxy") . " on $i. request: " . $e->getMessage() . "\n"; 80 | } 81 | } 82 | 83 | /** @var \paslandau\GuzzleRotatingProxySubscriber\Proxy\RotatingProxy $proxy */ 84 | echo "\nProxy usage:\n"; 85 | foreach($proxies as $proxy){ 86 | echo $proxy->getProxyString()."\t made ".$proxy->getTotalRequests()." requests in total\n"; 87 | } 88 | 89 | /** Example ouput 90 | Success with [PROXY 2] using user agent Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) on 0. request 91 | Success with [PROXY 2] using user agent Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) on 1. request 92 | Success with [PROXY 2] using user agent Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) on 2. request 93 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 3. request 94 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 4. request 95 | Success with [PROXY 2] using user agent Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) on 5. request 96 | Success with [PROXY 2] using user agent Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0) on 6. request 97 | Success with [PROXY 2] using user agent Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 on 7. request 98 | Success with [PROXY 2] using user agent Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 on 8. request 99 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 9. request 100 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 10. request 101 | Success with [PROXY 2] using user agent Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 on 11. request 102 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 12. request 103 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 13. request 104 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 14. request 105 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 15. request 106 | Success with [PROXY 2] using user agent Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 on 16. request 107 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 17. request 108 | Success with [PROXY 1] using user agent Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0 on 18. request 109 | Success with [PROXY 2] using user agent Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36 on 19. request 110 | [PROXY 1] made 10 requests in total 111 | [PROXY 2] made 10 requests in total 112 | */ -------------------------------------------------------------------------------- /examples/demo-sync.php: -------------------------------------------------------------------------------- 1 | ["headers" => ["User-Agent" => null]]]); // remove User-Agent info from request 20 | $client->getEmitter()->attach($sub); 21 | 22 | // lets prepare 20 responses 23 | $num = 20; 24 | $responses = []; 25 | for($i = 0; $i < $num; $i++){ 26 | $responses[] = new Response(200); 27 | } 28 | $mock = new Mock($responses); 29 | $client->getEmitter()->attach($mock); 30 | 31 | // lets execute 20 requests 32 | $requests = []; 33 | $url = "http://localhost/"; 34 | for ($i = 0; $i < $num; $i++) { 35 | $request = $client->createRequest("GET",$url); 36 | try { 37 | $response = $client->send($request); 38 | echo "Success with " . $request->getConfig()->get("proxy") . " on $i. request\n"; 39 | } catch (Exception $e) { 40 | if ($e->getPrevious() instanceof NoProxiesLeftException) { 41 | echo "All proxies are blocked, terminating...\n"; 42 | break; 43 | } 44 | echo "Failed with " . $request->getConfig()->get("proxy") . " on $i. request: " . $e->getMessage() . "\n"; 45 | } 46 | } 47 | 48 | /** @var \paslandau\GuzzleRotatingProxySubscriber\Proxy\RotatingProxy $proxy */ 49 | echo "\nProxy usage:\n"; 50 | foreach($proxies as $proxy){ 51 | echo $proxy->getProxyString()."\t made ".$proxy->getTotalRequests()." requests in total\n"; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /examples/demo.php: -------------------------------------------------------------------------------- 1 | getEmitter()->attach($sub); 21 | 22 | // lets prepare 10 responses 23 | $num = 10; 24 | $responses = []; 25 | for($i = 0; $i < $num; $i++){ 26 | $responses[] = new Response(200); 27 | } 28 | $mock = new Mock($responses); 29 | $client->getEmitter()->attach($mock); 30 | 31 | // lets execute 10 requests 32 | $requests = []; 33 | $url = "http://localhost/"; 34 | for ($i = 0; $i < $num; $i++) { 35 | $request = $client->createRequest("GET", $url); 36 | try { 37 | $response = $client->send($request); 38 | echo "Success with " . $request->getConfig()->get("proxy") . " on $i. request\n"; 39 | } catch (Exception $e) { 40 | echo "Failed with " . $request->getConfig()->get("proxy") . " on $i. request: " . $e->getMessage() . "\n"; 41 | } 42 | } 43 | 44 | /** @var \paslandau\GuzzleRotatingProxySubscriber\Proxy\RotatingProxy $proxy */ 45 | $proxies = $rotator->getProxies(); 46 | echo "\nProxy usage:\n"; 47 | foreach($proxies as $proxy){ 48 | echo $proxy->getProxyString()."\t made ".$proxy->getTotalRequests()." requests in total\n"; 49 | } -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Pascal Landau, http://www.myseosolution.de/ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # DEPRECATED ⛔ 2 | 3 | This repository has been deprecated as of 2019-01-27. That code was written a long time ago and has been unmaintained for several years. Thus, repository will now be [archived](https://github.blog/2017-11-08-archiving-repositories/).If you are interested in taking over ownership, feel free to [contact me](https://www.pascallandau.com/about/). 4 | 5 | --- 6 | 7 | # guzzle-rotating-proxy-subscriber 8 | [![Build Status](https://travis-ci.org/paslandau/guzzle-rotating-proxy-subscriber.svg?branch=master)](https://travis-ci.org/paslandau/guzzle-rotating-proxy-subscriber) 9 | 10 | Plugin for [Guzzle 5](https://github.com/scripts/guzzle) to automatically choose a random element from a set of proxies on each request. 11 | 12 | ## Description 13 | 14 | This plugin takes a set of proxies and uses them randomly on every request, which might come in handy if you need to avoid getting 15 | IP-blocked due to (too) strict limitations. 16 | 17 | ### Key features 18 | 19 | - switches proxies randomly on each request 20 | - each proxy can get a random timeout after each request 21 | - each proxy can have a list of attached "identities" (an entity including cookies, a user agent and default request headers) 22 | - a request can be evaluated via user-defined closure 23 | - builder class for easy usage 24 | - unit tests 25 | 26 | ### Basic Usage 27 | ```php 28 | 29 | // define proxies 30 | $proxy1 = new RotatingProxy("username:password@111.111.111.111:4711"); 31 | $proxy2 = new RotatingProxy("username:password@112.112.112.112:4711"); 32 | 33 | // setup and attach subscriber 34 | $rotator = new ProxyRotator([$proxy1,$proxy2]); 35 | $sub = new RotatingProxySubscriber($rotator); 36 | $client = new Client(); 37 | $client->getEmitter()->attach($sub); 38 | 39 | // perform the requests 40 | $num = 10; 41 | $url = "http://www.myseosolution.de/scripts/myip.php"; 42 | for ($i = 0; $i < $num; $i++) { 43 | $request = $client->createRequest("GET",$url); 44 | try { 45 | $response = $client->send($request); 46 | echo "Success with " . $request->getConfig()->get("proxy") . " on $i. request\n"; 47 | } catch (Exception $e) { 48 | echo "Failed with " . $request->getConfig()->get("proxy") . " on $i. request: " . $e->getMessage() . "\n"; 49 | } 50 | } 51 | ``` 52 | 53 | ### Examples 54 | 55 | See `examples/demo*.php` files. 56 | 57 | ## Requirements 58 | 59 | - PHP >= 5.5 60 | - Guzzle >= 5.3.0 61 | 62 | ## Installation 63 | 64 | The recommended way to install guzzle-rotating-proxy-subscriber is through [Composer](http://getcomposer.org/). 65 | 66 | curl -sS https://getcomposer.org/installer | php 67 | 68 | Next, update your project's composer.json file to include GuzzleRotatingProxySubscriber: 69 | 70 | { 71 | "repositories": [ { "type": "composer", "url": "http://packages.myseosolution.de/"} ], 72 | "minimum-stability": "dev", 73 | "require": { 74 | "paslandau/guzzle-rotating-proxy-subscriber": "dev-master" 75 | } 76 | "config": { 77 | "secure-http": false 78 | } 79 | } 80 | 81 | _**Caution:** You need to explicitly set `"secure-http": false` in order to access http://packages.myseosolution.de/ as repository. 82 | This change is required because composer changed the default setting for `secure-http` to true at [the end of february 2016](https://github.com/composer/composer/commit/cb59cf0c85e5b4a4a4d5c6e00f827ac830b54c70#diff-c26d84d5bc3eed1fec6a015a8fc0e0a7L55)._ 83 | 84 | 85 | After installing, you need to require Composer's autoloader: 86 | ```php 87 | 88 | require 'vendor/autoload.php'; 89 | ``` 90 | 91 | ## General workflow and customization options 92 | The guzzle-rotating-proxy-subscriber uses the `RotatingProxy` class to represent a single proxy. A set of proxies is managed by a `ProxyRotator`, that takes care 93 | of the rotation on every request by hooking into the [before](http://guzzle.readthedocs.org/en/latest/events.html#before) event and changing the 94 | ['proxy' request option](http://guzzle.readthedocs.org/en/latest/clients.html#proxy) of a request. You might choose to further customize the request by 95 | adding a specific user agent, a cookie session or a some other request headers. In that case you'll need to use the `RotatingIdentityProxy` class. 96 | 97 | The response of the request is evaluated either in the [complete](http://guzzle.readthedocs.org/en/latest/events.html#complete) event 98 | or in the [error](http://guzzle.readthedocs.org/en/latest/events.html#error) event of the guzzle event lifecycle. The evaluation is done by 99 | using a closure that might be defined for each `RotatingProxy` individually. The closure gets the corresponding event (`CompleteEvent` or `ErrorEvent`) 100 | and needs to return either `true` or `false` in order to decide wether the request was successful or not. 101 | 102 | An unsucessful request will increase the number of failed requests for a proxy. A distinction is made between the _total number of failed requests_ 103 | and the _number of requests that failed consecutively_, because you usually want to mark a proxy as "unusable" after it failed like 5 times in a row. 104 | The _number of requests that failed consecutively_ is reset to zero after each successful request. 105 | 106 | You might define a random timeout that the proxy must wait after each request before it can be used again. 107 | 108 | If all provided proxies become unsuable, you might either choose to continue without using any proxies (= making direct requests, thus revealing your own IP) or to let the process 109 | terminate by throwing a `NoProxiesLeftException` instead of making the remaining requests. 110 | 111 | ###Mark a proxy as blocked 112 | A system might block a proxy / IP due to a too aggressive request behaviour. Depending on the system, you might receive a corresponding reponse, 113 | e.g. a certain status code ([Twitter uses 429](https://dev.twitter.com/rest/public/rate-limiting)) or 114 | maybe just a text message saying something like "Sorry, you're blocked". 115 | 116 | In that case, you don't want to use the proxy in question any longer and should call its `block()` method. See next section for an example. 117 | 118 | ### Use a custom evaluation function for requests 119 | 120 | ```php 121 | 122 | $evaluation = function(RotatingProxyInterface $proxy, AbstractTransferEvent $event){ 123 | if($event instanceof CompleteEvent){ 124 | $content = $event->getResponse()->getBody(); 125 | // example of a custom message returned by a target system 126 | // for a blocked IP 127 | $pattern = "#Sorry! You made too many requests, your IP is blocked#"; 128 | if(preg_match($pattern,$content)){ 129 | // The current proxy seems to be blocked 130 | // so let's mark it as blocked 131 | $proxy->block(); 132 | return false; 133 | }else{ 134 | // nothing went wrong, the request was successful 135 | return true; 136 | } 137 | }else{ 138 | // We didn't get a CompleteEvent maybe 139 | // due to some connection issues at the proxy 140 | // so let's mark the request as failed 141 | return false; 142 | } 143 | }; 144 | 145 | $proxy = new RotatingProxy("username:password@111.111.111.111:4711", $evaluation); 146 | // or 147 | $proxy->setEvaluationFunction($evaluation); 148 | ``` 149 | 150 | Since the "evaluation" is usually very domain-specific, chances are high that you have something already in place to determine success/failure/blocked states in your application. 151 | In that case you sohuldn't duplicate that code/method but instead use the `GUZZLE_CONFIG_*` constants defined in the `RotatingProxyInterface` to store the result of 152 | that method in the config of the guzzle request and just evaluate that config value. See the following example for clarification: 153 | 154 | ```php 155 | 156 | // function specific to your domain model that performs the evaluation 157 | function domain_specific_evaluation(AbstractTransferEvent $event){ 158 | if($event instanceof CompleteEvent){ 159 | $content = $event->getResponse()->getBody(); 160 | // example of a custom message returned by a target system 161 | // for a blocked IP 162 | $pattern = "#Sorry! You made too many requests, your IP is blocked#"; 163 | if(preg_match($pattern,$content)){ 164 | // The current proxy seems to be blocked 165 | // so let's mark it as blocked 166 | $event->getRequest()->getConfig()->set(RotatingProxyInterface::GUZZLE_CONFIG_KEY_REQUEST_RESULT, RotatingProxyInterface::GUZZLE_CONFIG_VALUE_REQUEST_RESULT_BLOCKED); 167 | return false; 168 | }else{ 169 | // nothing went wrong, the request was successful 170 | $event->getRequest()->getConfig()->set(RotatingProxyInterface::GUZZLE_CONFIG_KEY_REQUEST_RESULT, RotatingProxyInterface::GUZZLE_CONFIG_VALUE_REQUEST_RESULT_SUCCESS); 171 | return true; 172 | } 173 | }else{ 174 | // We didn't get a CompleteEvent maybe 175 | // due to some connection issues at the proxy 176 | // so let's mark the request as failed 177 | $event->getRequest()->getConfig()->set(RotatingProxyInterface::GUZZLE_CONFIG_KEY_REQUEST_RESULT, RotatingProxyInterface::GUZZLE_CONFIG_VALUE_REQUEST_RESULT_FAILURE); 178 | return false; 179 | } 180 | } 181 | 182 | $evaluation = function(RotatingProxyInterface $proxy, AbstractTransferEvent $event){ 183 | $result = $event->getRequest()->getConfig()->get(RotatingProxyInterface::GUZZLE_CONFIG_KEY_REQUEST_RESULT); 184 | switch($result){ 185 | case RotatingProxyInterface::GUZZLE_CONFIG_VALUE_REQUEST_RESULT_SUCCESS:{ 186 | return true; 187 | } 188 | case RotatingProxyInterface::GUZZLE_CONFIG_VALUE_REQUEST_RESULT_FAILURE:{ 189 | return false; 190 | } 191 | case RotatingProxyInterface::GUZZLE_CONFIG_VALUE_REQUEST_RESULT_BLOCKED:{ 192 | $proxy->block(); 193 | return false; 194 | } 195 | default: throw new RuntimeException("Unknown value '{$result}' for config key ".RotatingProxyInterface::GUZZLE_CONFIG_KEY_REQUEST_RESULT); 196 | } 197 | }; 198 | 199 | $proxy = new RotatingProxy("username:password@111.111.111.111:4711", $evaluation); 200 | // or 201 | $proxy->setEvaluationFunction($evaluation); 202 | ``` 203 | 204 | ### Set a maximum number of fails (total/consecutive) 205 | 206 | ```php 207 | 208 | $maximumFails = 100; 209 | $consecutiveFails = 5; 210 | 211 | $proxy = new RotatingProxy("username:password@111.111.111.111:4711", null,$consecutiveFails,$maximumFails); 212 | // or 213 | $proxy->setMaxTotalFails($maximumFails); 214 | $proxy->setMaxConsecutiveFails($consecutiveFails); 215 | ``` 216 | 217 | ### Set a random timeout for each proxy before reuse 218 | 219 | ```php 220 | 221 | $from = 1; 222 | $to = 5; 223 | $wait = new RandomTimeInterval($from,$to); 224 | 225 | $proxy = new RotatingProxy("username:password@111.111.111.111:4711", null,null,null,$wait); 226 | // or 227 | $proxy->setWaitInterval($wait); 228 | ``` 229 | 230 | The first request using this proxy will be made without delay. Before the second request can be made with this proxy, a random time between 1 and 5 seconds 231 | is chosen that must pass. This time changes after each request, so the first waiting time might be 2 seconds, the second one might be 5 seconds, etc. 232 | The `ProxyRotator` will try to find another proxy that does not have a time restriction. If none can be found, 233 | a `WaitingEvent` is emitted that contains the proxy with the lowest timeout. You might choose to either skip the waiting time or to let the process sleep until 234 | the waiting time is over and a proxy will be available: 235 | 236 | ```php 237 | 238 | $rotator = new ProxyRotator($proxies); 239 | 240 | $waitFn = function (WaitingEvent $event){ 241 | $proxy = $event->getProxy(); 242 | echo "All proxies have a timeout restriction, the lowest is {$proxy->getWaitingTime()}s!\n"; 243 | // nah, we don't wanna wait 244 | $event->skipWaiting(); 245 | }; 246 | 247 | $rotator->getEmitter()->on(ProxyRotator::EVENT_ON_WAIT, $waitFn); 248 | ``` 249 | 250 | ### Define if the requests should be stopped if all proxies are unusable 251 | 252 | ```php 253 | 254 | $proxies = [/* ... */]; 255 | $useOwnIp = true; 256 | $rotator = new ProxyRotator($proxies,$useOwnIp); 257 | // or 258 | $rotator->setUseOwnIp($useOwnIp); 259 | ``` 260 | 261 | If set to true, the `ProxyRotator` will _not_ throw an `NoProxiesLeftException` if all proxies are unusable but instead make the remaining 262 | requests without using any proxies. In that case, a `UseOwnIpEvent` is emitted every time before a request takes place: 263 | 264 | ```php 265 | 266 | $infoFn = function (UseOwnIpEvent $event){ 267 | echo "No proxies are left, making a direct request!\n"; 268 | }; 269 | 270 | $rotator->getEmitter()->on(ProxyRotator::EVENT_ON_USE_OWN_IP,$infoFn); 271 | ``` 272 | 273 | ### Use the builder class 274 | The majority of the time it is not necessary to set individual options for every proxy, because you're usually sending requests to the same system 275 | (maybe even the same URL), so the evaluation function should be the same for every `RotatingProxy`, for instance. In that case, the `Build` class might come 276 | in handy, as it guides you through the process by using a fluent interface in combination with a 277 | [variant](http://blog.crisp.se/2013/10/09/perlundholm/another-builder-pattern-for-java) of the builder pattern. 278 | 279 | ```php 280 | 281 | $s = " 282 | username:password@111.111.111.111:4711 283 | username:password@112.112.112.112:4711 284 | username:password@113.113.113.113:4711 285 | "; 286 | 287 | $rotator = Build::rotator() 288 | ->failsIfNoProxiesAreLeft() // throw exception if no proxies are left 289 | ->withProxiesFromString($s, "\n") // build proxies from a string of proxies 290 | // where each proxy is seperated by a new line 291 | ->evaluatesProxyResultsByDefault() // use the default evaluation function 292 | ->eachProxyMayFailInfinitlyInTotal() // don't care about total number of fails for a proxy 293 | ->eachProxyMayFailConsecutively(5) // but block a proxy if it fails 5 times in a row 294 | ->eachProxyNeedsToWaitSecondsBetweenRequests(1, 3) // and let it wait between 1 and 3 seconds before making another request 295 | ->build(); 296 | ``` 297 | 298 | This would be equivalent to: 299 | 300 | ```php 301 | 302 | $s = " 303 | username:password@111.111.111.111:4711 304 | username:password@112.112.112.112:4711 305 | username:password@113.113.113.113:4711 306 | "; 307 | 308 | $lines = explode("\n",$s); 309 | $proxies = []; 310 | foreach($lines as $line){ 311 | $trimmed = trim($line); 312 | if($trimmed != ""){ 313 | $wait = new RandomTimeInterval(1,3); 314 | $proxies[$trimmed] = new RotatingProxy($trimmed,null,5,-1,$wait); 315 | } 316 | } 317 | $rotator = new ProxyRotator($proxies,false); 318 | ``` 319 | 320 | ### Use different "identities" to add customization to the requests 321 | There are more advanced systems that do not only check the IP address but take also other "patterns" into account when identifying unusual request behaviour 322 | (that usually ends in blocking that "pattern"). To prevent being caught by such a system, the `RotatingIdentityProxy` was introduced. Think of it as a 323 | `RotatingProxy` with some customizations flavour to diversify your request footprint. 324 | 325 | The customization options are handled via the `Identity` class and - for now - include: 326 | - user agent 327 | - default request headers 328 | - cookie session 329 | - use of the "referer" header 330 | 331 | ```php 332 | $userAgent = "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:37.0) Gecko/20100101 Firefox/37.0"; // common user agent string for firefox 333 | $defaultRequestHeaders = ["Accept-Language" => "de,en"]; // add a preferred language to each of our requests 334 | $cookieSession = new CookieJar(); // enable cookies for this identity 335 | 336 | $identity = new Identity($userAgent,$defaultRequestHeaders,$cookieSession); 337 | $identities = [$identity]; 338 | $proxy1 = new RotatingIdentityProxy($identities, "[PROXY 1]"); 339 | ``` 340 | 341 | *Note:* Since `RotatingIdentityProxy` inherits from `RotatingProxy` it has the same capabilities in terms of random waiting times. 342 | 343 | #### Randomly rotate through multiple identities 344 | The `RotatingIdentityProxy` expects not only one identity but and array of identities. You can further provide a `RandomCounterInterval` the will randomly 345 | switch the identity after a certain amount of requests. From the outside (= the server receiving the requests) this looks like a genuine network of different 346 | People sharing the same IP address. 347 | 348 | ```php 349 | 350 | $userAgent = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"; // common user agent string for chrome 351 | $defaultRequestHeaders = ["Accept-Language" => "de"]; // add a preferred language to each of our requests 352 | $cookieSession = null; // disable cookies for this identity 353 | 354 | $identity1 = new Identity($userAgent,$defaultRequestHeaders,$cookieSession); 355 | 356 | $userAgent = "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)"; // common user agent string for Internet Explorer 357 | $defaultRequestHeaders = ["Pragma" => "no-cache"]; // add a no-cache directive to each request 358 | $cookieSession = new CookieJar(); // enable cookies for this identity 359 | 360 | $identity2 = new Identity($userAgent,$defaultRequestHeaders,$cookieSession); 361 | 362 | $identities = [$identity1,$identity2]; 363 | $systemRandomizer = new SystemRandomizer(); 364 | 365 | // switch identities randomly after 2 to 5 requests 366 | $minRequests = 2; 367 | $maxRequests = 5; 368 | $counter = new RandomCounterInterval($minRequests,$maxRequests); 369 | $proxy2 = new RotatingIdentityProxy($identities, "[PROXY 2]",$systemRandomizer,$counter); 370 | ``` 371 | 372 | #### Use builder with identities 373 | There are two options that can be used via the builder interface: 374 | 375 | - `distributeIdentitiesAmongProxies($identities)` 376 | - `eachProxySwitchesIdentityAfterRequests($min,$max)` 377 | 378 | ```php 379 | 380 | $s = " 381 | username:password@111.111.111.111:4711 382 | username:password@112.112.112.112:4711 383 | username:password@113.113.113.113:4711 384 | "; 385 | 386 | $identities = [ 387 | new Identity(/*...*/), 388 | new Identity(/*...*/), 389 | new Identity(/*...*/), 390 | new Identity(/*...*/), 391 | new Identity(/*...*/), 392 | /*..*/ 393 | ]; 394 | 395 | $rotator = Build::rotator() 396 | ->failsIfNoProxiesAreLeft() // throw exception if no proxies are left 397 | ->withProxiesFromString($s, "\n") // build proxies from a string of proxies 398 | // where each proxy is seperated by a new line 399 | ->evaluatesProxyResultsByDefault() // use the default evaluation function 400 | ->eachProxyMayFailInfinitlyInTotal() // don't care about total number of fails for a proxy 401 | ->eachProxyMayFailConsecutively(5) // but block a proxy if it fails 5 times in a row 402 | ->eachProxyNeedsToWaitSecondsBetweenRequests(1, 3) // and let it wait between 1 and 3 seconds before making another request 403 | // identity options 404 | ->distributeIdentitiesAmongProxies($identities) // setup each proxy with a subset of $identities - no identity is assigne twice! 405 | ->eachProxySwitchesIdentityAfterRequests(3,7) // switch to another identity after between 3 and 7 requests 406 | ->build(); 407 | ``` 408 | 409 | ## Frequently searched questions 410 | 411 | - How can I randomly choose a proxy for each request in Guzzle? 412 | - How can I avoid getting IP blocked? 413 | -------------------------------------------------------------------------------- /src/Builder/Build.php: -------------------------------------------------------------------------------- 1 | proxyClass = RotatingProxy::class; 78 | } 79 | 80 | /** 81 | * @return ProxyRotatorBuildOrderInterface_UseOwnIp 82 | */ 83 | public static function rotator(){ 84 | return new self(); 85 | } 86 | 87 | /** 88 | * Expects an array of proxy strings as input, e.g. 89 | * ["217.0.0.8:8080", "foo@bar:125.12.2.1:7777", "28.3.6.1"] 90 | * Each proxy string is used to create a new RotatingProxy 91 | * @param string[] $stringProxies 92 | * @return \paslandau\GuzzleRotatingProxySubscriber\Builder\Build 93 | */ 94 | public function withProxiesFromStringArray(array $stringProxies){ 95 | $this->stringProxies = $stringProxies; 96 | return $this; 97 | } 98 | 99 | /** 100 | * Expects a seperated string of proxies as input, e.g. 101 | * "217.0.0.8:8080, foo@bar:125.12.2.1:7777, 28.3.6.1" 102 | * The seperator can be defined by the $seperator argument, it defaults to "\n". 103 | * the string is split on the $seperator and each element is trimmed to get the plain proxy string. 104 | * @param string $proxyString 105 | * @param string $seperator [optional]. Default: "\n"; 106 | * @return \paslandau\GuzzleRotatingProxySubscriber\Builder\Build 107 | */ 108 | public function withProxiesFromString($proxyString, $seperator = null){ 109 | if($seperator === null){ 110 | $seperator = "\n"; 111 | } 112 | $ps = mb_split($seperator, $proxyString); 113 | $proxies = []; 114 | foreach($ps as $p){ 115 | $proxy = trim($p); 116 | if($proxy != ""){ 117 | $proxies[] = $proxy; 118 | } 119 | } 120 | return $this->withProxiesFromStringArray($proxies); 121 | } 122 | 123 | public function evaluatesProxyResultsBy(callable $evaluationFunction){ 124 | $this->evaluationFunction = $evaluationFunction; 125 | return $this; 126 | } 127 | 128 | public function evaluatesProxyResultsByDefault(){ 129 | $this->evaluationFunction = null; 130 | return $this; 131 | } 132 | 133 | public function eachProxyMayFailInTotal($maxTotalFails){ 134 | $this->maxTotalFails = $maxTotalFails; 135 | return $this; 136 | } 137 | 138 | public function eachProxyMayFailInfinitlyInTotal(){ 139 | $this->maxTotalFails = -1; 140 | return $this; 141 | } 142 | 143 | public function eachProxyMayFailConsecutively($maxConsecutiveFails){ 144 | $this->maxConsecutiveFails = $maxConsecutiveFails; 145 | return $this; 146 | } 147 | 148 | public function eachProxyMayFailInfinitlyConsecutively(){ 149 | $this->maxConsecutiveFails = -1; 150 | return $this; 151 | } 152 | 153 | public function eachProxyNeedsToWaitSecondsBetweenRequests($from, $to){ 154 | $this->from = $from; 155 | $this->to = $to; 156 | return $this; 157 | } 158 | 159 | public function proxiesDontNeedToWait(){ 160 | $this->from = null; 161 | $this->to = null; 162 | return $this; 163 | } 164 | 165 | public function failsIfNoProxiesAreLeft(){ 166 | $this->useOwnIp = false; 167 | return $this; 168 | } 169 | 170 | public function usesOwnIpIfNoProxiesAreLeft(){ 171 | $this->useOwnIp = true; 172 | return $this; 173 | } 174 | 175 | public function build(){ 176 | $proxies = []; 177 | $class = $this->proxyClass; 178 | if($this->identities !== null && count($this->identities) < count($this->stringProxies)){ 179 | throw new \InvalidArgumentException("Number of identities ".count($this->identities)." must be greater or equal number of proxies ".count($this->stringProxies)); 180 | } 181 | $identitySlice = floor(count($this->identities)/count($this->stringProxies)); 182 | $rest = count($this->identities)%count($this->stringProxies); 183 | foreach($this->stringProxies as $proxyString){ 184 | $time = null; 185 | if($this->from !== null && $this->to !== null){ 186 | $time = new RandomTimeInterval($this->from, $this->to); 187 | } 188 | if($class == RotatingProxy::class) { 189 | $proxies[$proxyString] = new $class($proxyString, $this->evaluationFunction, $this->maxConsecutiveFails, $this->maxTotalFails, $time); 190 | }elseif($class == RotatingIdentityProxy::class) { 191 | $counter = null; 192 | if($this->fromRequest !== null && $this->toRequest !== null){ 193 | $counter = new RandomCounterInterval($this->from, $this->to); 194 | } 195 | $slice = $identitySlice; 196 | if($rest > 0){ // if we still got a rest from the division, we can add an additional identity 197 | $rest--; 198 | $slice++; 199 | } 200 | $identities = array_splice($this->identities,0,$slice); 201 | $proxies[$proxyString] = new $class($identities, $proxyString, null, $counter, $this->evaluationFunction, $this->maxConsecutiveFails, $this->maxTotalFails, $time); 202 | } 203 | } 204 | $rotator = new ProxyRotator($proxies, $this->useOwnIp); 205 | return $rotator; 206 | } 207 | 208 | /** 209 | * @param Identity[] $identities 210 | * @return ProxyRotatorBuildOrderInterface_SwitchIdentities 211 | */ 212 | public function distributeIdentitiesAmongProxies(array $identities) 213 | { 214 | $this->proxyClass = RotatingIdentityProxy::class; 215 | $this->identities = $identities; 216 | return $this; 217 | } 218 | 219 | /** 220 | * @param int $nrOfIdentitiesPerProxy 221 | * @param string[] $userAgentSeed 222 | * @param string[][] $requestHeaderSeed 223 | * @return ProxyRotatorBuildOrderInterface_SwitchIdentities 224 | * @internal param \paslandau\GuzzleRotatingProxySubscriber\Proxy\Identity[] $identities 225 | */ 226 | public function generateIdentitiesForProxies($nrOfIdentitiesPerProxy, array $userAgentSeed, array $requestHeaderSeed) 227 | { 228 | $this->proxyClass = RotatingIdentityProxy::class; 229 | $proxies = count($this->stringProxies); 230 | $targetIdentityCount = $nrOfIdentitiesPerProxy*$proxies; 231 | $identities = []; 232 | for($i=0; $i < $targetIdentityCount; $i++){ 233 | $uaKey = array_rand($userAgentSeed); 234 | $ua = $userAgentSeed[$uaKey]; 235 | $headersKey = array_rand($requestHeaderSeed); 236 | $headers = $requestHeaderSeed[$headersKey]; 237 | $cookieJar = new CookieJar(); 238 | $identities[] = new Identity($ua,$headers,$cookieJar); 239 | } 240 | $this->identities = $identities; 241 | return $this; 242 | } 243 | 244 | /** 245 | * @return ProxyRotatorBuildOrderInterface_Build 246 | */ 247 | public function eachProxySwitchesIdentityAfterEachRequest() 248 | { 249 | $this->fromRequest = null; 250 | $this->toRequest = null; 251 | return $this; 252 | } 253 | 254 | /** 255 | * @param int $from 256 | * @param int $to 257 | * @return ProxyRotatorBuildOrderInterface_Build 258 | */ 259 | public function eachProxySwitchesIdentityAfterRequests($from, $to) 260 | { 261 | $this->fromRequest = $from; 262 | $this->toRequest = $to; 263 | return $this; 264 | } 265 | 266 | } -------------------------------------------------------------------------------- /src/Builder/ProxyRotatorBuildOrderInterface.php: -------------------------------------------------------------------------------- 1 | proxy = $proxy; 20 | } 21 | 22 | /** 23 | * @return RotatingProxyInterface 24 | */ 25 | public function getProxy() 26 | { 27 | return $this->proxy; 28 | } 29 | 30 | /** 31 | * Skips the waiting time 32 | */ 33 | public function skipWaiting(){ 34 | $this->proxy->skipWaitingTime(); 35 | } 36 | } -------------------------------------------------------------------------------- /src/Exceptions/NoProxiesLeftException.php: -------------------------------------------------------------------------------- 1 | proxyRotator = $proxyRotator; 24 | 25 | parent::__construct($message, $request, null, $previous); 26 | } 27 | 28 | /** 29 | * @return ProxyRotatorInterface 30 | */ 31 | public function getProxyRotator() 32 | { 33 | return $this->proxyRotator; 34 | } 35 | 36 | /** 37 | * @param ProxyRotatorInterface $proxyRotator 38 | */ 39 | public function setProxyRotator($proxyRotator) 40 | { 41 | $this->proxyRotator = $proxyRotator; 42 | } 43 | 44 | 45 | } -------------------------------------------------------------------------------- /src/Exceptions/RotatingProxySubscriberException.php: -------------------------------------------------------------------------------- 1 | to = $to; 36 | $this->from = $from; 37 | if($randomizer === null){ 38 | $randomizer = new SystemRandomizer(); 39 | } 40 | $this->randomizer = $randomizer; 41 | } 42 | 43 | /** 44 | * Returns the current interval. If none is set, a new one is randomly created. 45 | * @return int 46 | */ 47 | protected function getCurrentInterval(){ 48 | if($this->currentInterval === null){ 49 | $this->currentInterval = $this->randomizer->randNum($this->from,$this->to); 50 | } 51 | return $this->currentInterval; 52 | } 53 | } -------------------------------------------------------------------------------- /src/Interval/NullRandomCounter.php: -------------------------------------------------------------------------------- 1 | counter = 0; 26 | } 27 | 28 | /** 29 | * @return int 30 | */ 31 | public function getCounter() 32 | { 33 | return $this->counter; 34 | } 35 | 36 | /** 37 | * Returns true if the current random interval is lower or equal the counter value 38 | * @return bool 39 | */ 40 | public function isReady(){ 41 | $cur = $this->getCurrentInterval(); 42 | if($this->counter >= $cur){ 43 | return true; 44 | } 45 | return false; 46 | } 47 | 48 | /** 49 | * Increments the counter by 1 and returns the current counter value (after incrementing) 50 | * @return int 51 | */ 52 | public function incrementCounter(){ 53 | $this->counter++; 54 | return $this->counter; 55 | } 56 | 57 | /** 58 | * Sets the counter to 0 and the interval to null (so a new intervall will be choosen upon next call to $this->isReady()) 59 | */ 60 | public function restart(){ 61 | $this->counter = 0; 62 | $this->currentInterval = null; 63 | } 64 | } -------------------------------------------------------------------------------- /src/Interval/RandomCounterIntervalInterface.php: -------------------------------------------------------------------------------- 1 | isReady()) 19 | */ 20 | public function restart(); 21 | 22 | /** 23 | * @return int 24 | */ 25 | public function getCounter(); 26 | } -------------------------------------------------------------------------------- /src/Interval/RandomIntervalInterface.php: -------------------------------------------------------------------------------- 1 | timeProvider = $timeProvider; 34 | } 35 | 36 | /** 37 | * Checks if sufficient time has passed to satisfy the current time interval. 38 | * @return bool 39 | */ 40 | public function isReady() 41 | { 42 | if ($this->lastActionTime === null) { 43 | return true; 44 | } 45 | $t = $this->getWaitingTime(); 46 | return ($t <= 0); 47 | } 48 | 49 | /** 50 | * Gets the time in seconds the need to pass until $this->isReady becomes true. 51 | * @return int 52 | */ 53 | public function getWaitingTime() 54 | { 55 | if ($this->lastActionTime === null) { 56 | return 0; 57 | } 58 | $diff = $this->lastActionTime - ($this->timeProvider->getTime() - $this->getCurrentInterval()); 59 | return $diff; 60 | } 61 | 62 | /** 63 | * Resets the current time interval and set the time of the last action to now 64 | */ 65 | public function restart() 66 | { 67 | $this->lastActionTime = $this->timeProvider->getTime(); 68 | $this->currentInterval = null; 69 | } 70 | 71 | /** 72 | * Resets the current time interval and set the time of the last action to null. 73 | * This means that isReady will return true and getWaitingTime will return 0 until 74 | * $this->restart is called the next time. 75 | */ 76 | public function reset() 77 | { 78 | $this->lastActionTime = null; 79 | $this->currentInterval = null; 80 | } 81 | 82 | } -------------------------------------------------------------------------------- /src/Interval/SystemTimeProvider.php: -------------------------------------------------------------------------------- 1 | isReady becomes true. 13 | * @return int 14 | */ 15 | public function getWaitingTime(); 16 | 17 | /** 18 | * Restarts the current time interval and set the time of the last action to now 19 | */ 20 | public function restart(); 21 | 22 | /** 23 | * Resets the current time interval and set the time of the last action to null. 24 | * This means that isReady will return true and getWaitingTime will return 0 until 25 | * $this->restart is called the next time. 26 | */ 27 | public function reset(); 28 | } -------------------------------------------------------------------------------- /src/Interval/TimeProviderInterface.php: -------------------------------------------------------------------------------- 1 | userAgent = $userAgent; 38 | $this->defaultRequestHeaders = $defaultRequestHeaders; 39 | $this->cookieJar = $cookieJar; 40 | $this->referer = null; 41 | } 42 | 43 | /** 44 | * @return string 45 | */ 46 | public function getUserAgent() 47 | { 48 | return $this->userAgent; 49 | } 50 | 51 | /** 52 | * @param string $userAgent 53 | */ 54 | public function setUserAgent($userAgent) 55 | { 56 | $this->userAgent = $userAgent; 57 | } 58 | 59 | /** 60 | * @return \string[] 61 | */ 62 | public function getDefaultRequestHeaders() 63 | { 64 | return $this->defaultRequestHeaders; 65 | } 66 | 67 | /** 68 | * @param \string[] $defaultRequestHeaders 69 | */ 70 | public function setDefaultRequestHeaders($defaultRequestHeaders) 71 | { 72 | $this->defaultRequestHeaders = $defaultRequestHeaders; 73 | } 74 | 75 | /** 76 | * @return CookieJarInterface 77 | */ 78 | public function getCookieJar() 79 | { 80 | return $this->cookieJar; 81 | } 82 | 83 | /** 84 | * @param CookieJarInterface $cookieJar 85 | */ 86 | public function setCookieJar($cookieJar) 87 | { 88 | $this->cookieJar = $cookieJar; 89 | } 90 | 91 | /** 92 | * @return string|null 93 | */ 94 | public function getReferer() 95 | { 96 | return $this->referer; 97 | } 98 | 99 | /** 100 | * @param string $referer 101 | */ 102 | public function setReferer($referer) 103 | { 104 | $this->referer = $referer; 105 | } 106 | } -------------------------------------------------------------------------------- /src/Proxy/IdentityInterface.php: -------------------------------------------------------------------------------- 1 | identities = $identities; 54 | if ($randomizer === null) { 55 | $randomizer = new SystemRandomizer(); 56 | } 57 | $this->randomizer = $randomizer; 58 | if ($randomCounter === null) { 59 | $randomCounter = new NullRandomCounter(); 60 | } 61 | $this->randomCounter = $randomCounter; 62 | parent::__construct($proxyString, $evaluationFunction, $maxConsecutiveFails, $maxTotalFails, $randomWaitInterval); 63 | } 64 | 65 | /** 66 | * Save referer to identity before passing the event on to the evaluation function 67 | * @param AbstractTransferEvent $event 68 | */ 69 | public function evaluate(AbstractTransferEvent $event) 70 | { 71 | $resp = $event->getResponse(); 72 | if ($resp != null) { 73 | $url = $resp->getEffectiveUrl(); 74 | if ($url !== "") { 75 | $identity = $this->getCurrentIdentity(); 76 | $identity->setReferer($url); 77 | } 78 | } 79 | return parent::evaluate($event); 80 | } 81 | 82 | /** 83 | * Check if the identity should be switched after each request 84 | */ 85 | public function requested() 86 | { 87 | parent::requested(); 88 | $this->randomCounter->incrementCounter(); 89 | if ($this->randomCounter->isReady()) { 90 | $this->switchIdentity(); 91 | } 92 | } 93 | 94 | /** 95 | * @return IdentityInterface[] 96 | */ 97 | public function getIdentities() 98 | { 99 | return $this->identities; 100 | } 101 | 102 | /** 103 | * @return IdentityInterface 104 | */ 105 | public function getCurrentIdentity() 106 | { 107 | if ($this->currentIdentity === null) { 108 | $this->switchIdentity(); 109 | } 110 | return $this->currentIdentity; 111 | } 112 | 113 | /** 114 | * Switches the current identity to a randomly chosen one. 115 | */ 116 | public function switchIdentity() 117 | { 118 | $key = $this->randomizer->randKey($this->identities); 119 | $this->currentIdentity = $this->identities[$key]; 120 | $this->randomCounter->restart(); 121 | } 122 | 123 | /** 124 | * @param RequestInterface $request 125 | * @return RequestInterface 126 | */ 127 | public function setupRequest(RequestInterface $request) 128 | { 129 | $identitiy = $this->getCurrentIdentity(); 130 | if ($identitiy->getCookieJar() != null) { 131 | //todo 132 | // this seems pretty hacky... is there a better way to replace the cookie container of a request? 133 | // > Currently not @see https://github.com/guzzle/guzzle/issues/1028#issuecomment-96253542 - maybe with Guzzle 6 134 | 135 | // remove current cookie subscribers 136 | $emitter = $request->getEmitter(); 137 | foreach ($emitter->listeners("complete") as $listener) { 138 | if (is_array($listener) && $listener[0] instanceof Cookie) { 139 | $emitter->detach($listener[0]); 140 | } 141 | } 142 | // set new Cookie subscriber 143 | $cookie = new Cookie($identitiy->getCookieJar()); 144 | $emitter->attach($cookie); 145 | } 146 | if ($identitiy->getUserAgent() != null) { 147 | $request->setHeader("user-agent", $identitiy->getUserAgent()); 148 | } 149 | $headers = $identitiy->getDefaultRequestHeaders(); 150 | if ($headers != null) { 151 | foreach ($headers as $key => $val) { 152 | $request->setHeader($key, $val); 153 | } 154 | } 155 | if ($identitiy->getReferer() != null && trim($identitiy->getReferer()) != "") { 156 | $request->setHeader("referer", $identitiy->getReferer()); 157 | } 158 | $request = parent::setupRequest($request); 159 | return $request; 160 | } 161 | } -------------------------------------------------------------------------------- /src/Proxy/RotatingProxy.php: -------------------------------------------------------------------------------- 1 | proxyString = $proxyString; 71 | if($evaluationFunction === null){ 72 | $evaluationFunction = function(RotatingProxyInterface $proxy, AbstractTransferEvent $event){ 73 | return $event instanceof CompleteEvent; 74 | }; 75 | } 76 | $this->evaluationFunction = $evaluationFunction; 77 | 78 | if($maxConsecutiveFails === null){ 79 | $maxConsecutiveFails = 5; 80 | } 81 | $this->maxConsecutiveFails = $maxConsecutiveFails; 82 | $this->currentConsecutiveFails = 0; 83 | if($maxTotalFails === null){ 84 | $maxTotalFails = -1; 85 | } 86 | $this->maxTotalFails = $maxTotalFails; 87 | $this->currentTotalFails = 0; 88 | 89 | if($randomWaitInterval === null){ 90 | $randomWaitInterval = new NullTimeInterval(); 91 | } 92 | $this->waitInterval = $randomWaitInterval; 93 | 94 | $this->totalRequests = 0; 95 | $this->blocked = false; 96 | } 97 | 98 | /** 99 | * @param AbstractTransferEvent $event 100 | */ 101 | public function evaluate(AbstractTransferEvent $event){ 102 | $f = $this->evaluationFunction; 103 | return $f($this, $event); 104 | } 105 | 106 | /** 107 | * @return bool 108 | */ 109 | public function hasToWait(){ 110 | $res = $this->waitInterval->isReady(); 111 | return ! $res; 112 | } 113 | 114 | /** 115 | * @return int 116 | */ 117 | public function getWaitingTime(){ 118 | return $this->waitInterval->getWaitingTime(); 119 | } 120 | 121 | /** 122 | * 123 | */ 124 | public function restartWaitingTime(){ 125 | $this->waitInterval->restart(); 126 | } 127 | 128 | /** 129 | * Sets the waiting time to 0 130 | */ 131 | public function skipWaitingTime() 132 | { 133 | $this->waitInterval->reset(); 134 | } 135 | 136 | /** 137 | * @return bool 138 | */ 139 | public function isUsable(){ 140 | return ( ! $this->isBlocked() && ! $this->hasTooManyFails()); 141 | } 142 | 143 | /** 144 | * Call after any request 145 | * @return void 146 | */ 147 | public function requested(){ 148 | $this->totalRequests++; 149 | } 150 | 151 | /** 152 | * Call after a request failed 153 | * @return void 154 | */ 155 | public function failed(){ 156 | $this->currentTotalFails++; 157 | $this->currentConsecutiveFails++; 158 | } 159 | 160 | /** 161 | * Call afer a request was successful 162 | * @return void 163 | */ 164 | public function succeeded(){ 165 | $this->currentConsecutiveFails = 0; 166 | } 167 | 168 | /** 169 | * @return bool 170 | */ 171 | public function hasTooManyFails(){ 172 | return ($this->hasTooManyConsecutiveFails() || $this->hasTooManyTotalFails()); 173 | } 174 | 175 | /** 176 | * @return bool 177 | */ 178 | public function hasTooManyConsecutiveFails(){ 179 | return $this->maxConsecutiveFails > -1 && $this->currentConsecutiveFails >= $this->maxConsecutiveFails; 180 | } 181 | 182 | /** 183 | * @return bool 184 | */ 185 | public function hasTooManyTotalFails(){ 186 | return $this->maxTotalFails > -1 && $this->currentTotalFails >= $this->maxTotalFails; 187 | } 188 | 189 | /** 190 | * @return callable 191 | */ 192 | public function getEvaluationFunction() 193 | { 194 | return $this->evaluationFunction; 195 | } 196 | 197 | /** 198 | * @param callable $evaluationFunction 199 | */ 200 | public function setEvaluationFunction(callable $evaluationFunction) 201 | { 202 | $this->evaluationFunction = $evaluationFunction; 203 | } 204 | 205 | /** 206 | * @return boolean 207 | */ 208 | public function isBlocked() 209 | { 210 | return $this->blocked; 211 | } 212 | 213 | /** 214 | */ 215 | public function block() 216 | { 217 | $this->blocked = true; 218 | } 219 | 220 | /** 221 | */ 222 | public function unblock() 223 | { 224 | $this->blocked = false; 225 | } 226 | 227 | /** 228 | * @return mixed 229 | */ 230 | public function getCurrentConsecutiveFails() 231 | { 232 | return $this->currentConsecutiveFails; 233 | } 234 | 235 | /** 236 | * @param mixed $currentConsecutiveFails 237 | */ 238 | public function setCurrentConsecutiveFails($currentConsecutiveFails) 239 | { 240 | $this->currentConsecutiveFails = $currentConsecutiveFails; 241 | } 242 | 243 | /** 244 | * @return mixed 245 | */ 246 | public function getCurrentTotalFails() 247 | { 248 | return $this->currentTotalFails; 249 | } 250 | 251 | /** 252 | * @param mixed $currentTotalFails 253 | */ 254 | public function setCurrentTotalFails($currentTotalFails) 255 | { 256 | $this->currentTotalFails = $currentTotalFails; 257 | } 258 | 259 | /** 260 | * @return int|null 261 | */ 262 | public function getMaxConsecutiveFails() 263 | { 264 | return $this->maxConsecutiveFails; 265 | } 266 | 267 | /** 268 | * @param int|null $maxConsecutiveFails 269 | */ 270 | public function setMaxConsecutiveFails($maxConsecutiveFails) 271 | { 272 | $this->maxConsecutiveFails = $maxConsecutiveFails; 273 | } 274 | 275 | /** 276 | * @return int|null 277 | */ 278 | public function getMaxTotalFails() 279 | { 280 | return $this->maxTotalFails; 281 | } 282 | 283 | /** 284 | * @param int|null $maxTotalFails 285 | */ 286 | public function setMaxTotalFails($maxTotalFails) 287 | { 288 | $this->maxTotalFails = $maxTotalFails; 289 | } 290 | 291 | /** 292 | * @return string 293 | */ 294 | public function getProxyString() 295 | { 296 | return $this->proxyString; 297 | } 298 | 299 | /** 300 | * @param string $proxyString 301 | */ 302 | public function setProxyString($proxyString) 303 | { 304 | $this->proxyString = $proxyString; 305 | } 306 | 307 | /** 308 | * @return int 309 | */ 310 | public function getTotalRequests() 311 | { 312 | return $this->totalRequests; 313 | } 314 | 315 | /** 316 | * @param int $totalRequests 317 | */ 318 | public function setTotalRequests($totalRequests) 319 | { 320 | $this->totalRequests = $totalRequests; 321 | } 322 | 323 | /** 324 | * @return TimeIntervalInterface 325 | */ 326 | public function getWaitInterval() 327 | { 328 | return $this->waitInterval; 329 | } 330 | 331 | /** 332 | * @param TimeIntervalInterface $waitInterval 333 | */ 334 | public function setWaitInterval($waitInterval) 335 | { 336 | $this->waitInterval = $waitInterval; 337 | } 338 | 339 | /** 340 | * @param RequestInterface $request 341 | * @return RequestInterface 342 | */ 343 | public function setupRequest(RequestInterface $request){ 344 | $request->getConfig()->set("proxy", $this->getProxyString()); 345 | return $request; 346 | } 347 | } -------------------------------------------------------------------------------- /src/Proxy/RotatingProxyInterface.php: -------------------------------------------------------------------------------- 1 | randomizer = $randomizer; 75 | if($proxies === null){ 76 | $proxies = []; 77 | } 78 | $this->setProxies($proxies); 79 | if($useOwnIp === null){ 80 | $useOwnIp = false; 81 | } 82 | $this->useOwnIp = $useOwnIp; 83 | $this->requestId2ProxyMap = []; 84 | $this->reuseSameProxyOnRedirect = true; 85 | } 86 | 87 | 88 | /** 89 | * @return RotatingProxyInterface[] 90 | */ 91 | public function getProxies() 92 | { 93 | return $this->proxies; 94 | } 95 | 96 | /** 97 | * @param RotatingProxyInterface[] $proxies 98 | */ 99 | public function setProxies(array $proxies) 100 | { 101 | foreach($proxies as $proxy) { 102 | $this->proxies[$proxy->getProxyString()] = $proxy; 103 | $this->workingProxies[$proxy->getProxyString()] = $proxy; 104 | } 105 | } 106 | 107 | 108 | /** 109 | * @return RotatingProxyInterface[] 110 | */ 111 | public function getWorkingProxies() 112 | { 113 | return $this->workingProxies; 114 | } 115 | 116 | /** 117 | * @return boolean 118 | */ 119 | public function isUseOwnIp() 120 | { 121 | return $this->useOwnIp; 122 | } 123 | 124 | /** 125 | * @param boolean $useOwnIp 126 | */ 127 | public function setUseOwnIp($useOwnIp) 128 | { 129 | $this->useOwnIp = $useOwnIp; 130 | } 131 | 132 | /** 133 | * @return boolean 134 | */ 135 | public function isReuseSameProxyOnRedirect() 136 | { 137 | return $this->reuseSameProxyOnRedirect; 138 | } 139 | 140 | /** 141 | * @param boolean $reuseSameProxyOnRedirect 142 | */ 143 | public function setReuseSameProxyOnRedirect($reuseSameProxyOnRedirect) 144 | { 145 | $this->reuseSameProxyOnRedirect = $reuseSameProxyOnRedirect; 146 | } 147 | 148 | /** 149 | * @param RequestInterface $request 150 | * @return bool - returns false if no proxy could be used (no working proxies left but $this->useOwnIp is true), otherwise true. 151 | */ 152 | public function setupRequest(RequestInterface $request){ 153 | if($this->reuseSameProxyOnRedirect && $this->isRedirectRequest($request)){ // do not change proxy on redirect 154 | return true; 155 | } 156 | $proxy = $this->getWorkingProxy($request); 157 | $this->requestId2ProxyMap[] = $proxy; 158 | $keys = array_keys($this->requestId2ProxyMap); 159 | $requestId = end($keys); // get newly inserted key 160 | $request->getConfig()->set(self::$REQUEST_CONFIG_KEY, $requestId); 161 | $proxy->restartWaitingTime(); 162 | $request = $proxy->setupRequest($request); 163 | if(!$proxy instanceof NullProxy){ 164 | return true; 165 | } 166 | return false; 167 | } 168 | 169 | /** 170 | * Checks wether $request is a redirect by inspecting the "redirect_count" property of the request config 171 | * used by to define the number of redirects of a request GuzzleHttp\Subscriber\Redirect 172 | * @param RequestInterface $request 173 | * @return bool 174 | */ 175 | private function isRedirectRequest(RequestInterface $request){ 176 | $isRedirect = $request->getConfig()->get("redirect_count"); 177 | return ($isRedirect !== null && $isRedirect > 0); 178 | } 179 | 180 | /** 181 | * @param AbstractTransferEvent $event 182 | * @throws RotatingProxySubscriberException 183 | * @return void 184 | */ 185 | public function evaluateResult(AbstractTransferEvent $event){ 186 | $request = $event->getRequest(); 187 | if($event instanceof ErrorEvent) { 188 | $exception = $event->getException(); 189 | if($exception !== null) { 190 | do { 191 | if ($exception instanceof NoProxiesLeftException) { 192 | throw $exception; 193 | } 194 | $exception = $exception->getPrevious(); 195 | } while ($exception !== null); 196 | } 197 | } 198 | $requestId = $request->getConfig()->get(self::$REQUEST_CONFIG_KEY); 199 | if($requestId === null){ 200 | return; 201 | // Question: What about caches? A cached response might be served so that no proxy was used. 202 | // SOLUTION: simply return without exception. 203 | // throw new RotatingProxySubscriberException("Config key '".self::$REQUEST_CONFIG_KEY."' not found in request config - this shouldn't happen..."); 204 | } 205 | if(!array_key_exists($requestId, $this->requestId2ProxyMap)){ 206 | // This method really should only be called once, because it determines the result of the proxy request 207 | // if it's called multiple times, something is probably wrong. A possible scenario: 208 | // Client has a RotatingProxySubscriber and an additional function attached to the complete event that checks wether the 209 | // response was logically correct (e.g. contained the right json format) and throws an exception if not. 210 | // In that case, this method (evaluateResult) would be called twice: one time in the complete event and the next time 211 | // after the exception was thrown (which results in an error event being emitted). 212 | // SOLUTION: This can be solved by giving this RotatingProxySubscriber a lower priority so that is called last 213 | // Question: Does this introduce problems with ->retry() calls, e.g. is only the last call counted? 214 | // Answer: No - see RotatingProxySubscriberTest::test_integration_RetryingRequestsShouldIncreaseFailesAccordingly() 215 | $msg = "Request with id '{$requestId}' not found - it was probably already processed. Make sure not to pass on multiple events for the same request. This might be influenced by the event priority."; 216 | throw new RotatingProxySubscriberException($msg,$event->getRequest()); 217 | } 218 | $proxy = $this->requestId2ProxyMap[$requestId]; 219 | unset($this->requestId2ProxyMap[$requestId]); 220 | $proxy->requested(); // increase request count 221 | if($proxy->evaluate($event)){ 222 | $proxy->succeeded(); 223 | }else{ 224 | $proxy->failed(); 225 | } 226 | } 227 | 228 | /** 229 | * @param RequestInterface $request 230 | * @return RotatingProxyInterface 231 | */ 232 | protected function getWorkingProxy(RequestInterface $request){ 233 | $waitingProxies = []; 234 | $waitingProxyTimes = []; 235 | while($this->hasEnoughWorkingProxies()){ 236 | $randKey = $this->randomizer->randKey($this->workingProxies); 237 | // $randKey = array_rand($this->workingProxies); 238 | $proxy = $this->workingProxies[$randKey]; 239 | if(!$proxy->isUsable()){ 240 | unset($this->workingProxies[$randKey]); 241 | continue; 242 | } 243 | if($proxy->hasToWait()){ 244 | $waitingProxyTimes [$randKey] = $proxy->getWaitingTime(); 245 | $waitingProxies [$randKey] = $proxy; 246 | unset($this->workingProxies[$randKey]); 247 | continue; 248 | } 249 | $this->workingProxies += $waitingProxies; 250 | return $proxy; 251 | } 252 | if(count($waitingProxies) > 0) { 253 | asort($waitingProxyTimes); 254 | reset($waitingProxyTimes); 255 | $minKey = key($waitingProxies); 256 | /** @var RotatingProxyInterface $minWaitingProxy */ 257 | $minWaitingProxy = $waitingProxies[$minKey]; 258 | // $minimumWaitingTime = ceil(reset($waitingProxyTimes)); 259 | $event = new WaitingEvent($minWaitingProxy); 260 | $this->getEmitter()->emit(self::EVENT_ON_WAIT,$event); 261 | $minimumWaitingTime = $minWaitingProxy->getWaitingTime(); // the WaitingTime might have been changed by a listener to the WaitingEvent 262 | if($minimumWaitingTime > 0) { 263 | sleep($minimumWaitingTime); 264 | } 265 | $this->workingProxies += $waitingProxies; 266 | return $this->getWorkingProxy($request); 267 | } 268 | 269 | if($this->useOwnIp){ 270 | $event = new UseOwnIpEvent(); 271 | $this->getEmitter()->emit(self::EVENT_ON_USE_OWN_IP,$event); 272 | return new NullProxy(); 273 | } 274 | $msg = "No proxies left and usage of own IP is forbidden"; 275 | throw new NoProxiesLeftException($this,$request,$msg); 276 | } 277 | 278 | /** 279 | * @return bool 280 | */ 281 | private function hasEnoughWorkingProxies(){ 282 | return count($this->workingProxies) > 0; 283 | } 284 | } -------------------------------------------------------------------------------- /src/ProxyRotatorInterface.php: -------------------------------------------------------------------------------- 1 | useOwnIp is true), otherwise true. 13 | */ 14 | public function setupRequest(RequestInterface $request); 15 | 16 | /** 17 | * @param AbstractTransferEvent $event 18 | * @return void 19 | */ 20 | public function evaluateResult(AbstractTransferEvent $event); 21 | } -------------------------------------------------------------------------------- /src/Random/RandomizerInterface.php: -------------------------------------------------------------------------------- 1 | proxyRotator = $proxyRotator; 35 | } 36 | 37 | /** 38 | * Returns an array of event names this subscriber wants to listen to. 39 | * 40 | * @return array 41 | */ 42 | public function getEvents() 43 | { 44 | return array( 45 | 'before' => ['setup',self::PROXY_PREPARE_EVENT], 46 | // 'end' => ['evaluate'] // Note: We cannot use the end event because it would not be possible to use AbstractTransferEvent::retry() 47 | //(only the last retry would then be used to determine the proxy result 48 | // therefore, we're going to use complete and error 49 | // and we use them slightly before the RedirectSubscriber kicks in, so that we can evaluate the results 50 | // of every requests - even if it's a redirect 51 | 'complete' => ['evaluate',self::PROXY_COMPLETE_EVENT], 52 | 'error' => ['evaluate',self::PROXY_COMPLETE_EVENT] 53 | // 'complete' => ['evaluate',RequestEvents::REDIRECT_RESPONSE +1], 54 | // 'error' => ['evaluate',RequestEvents::REDIRECT_RESPONSE +1] 55 | ); 56 | } 57 | 58 | public function setup(BeforeEvent $event) 59 | { 60 | $request = $event->getRequest(); 61 | $this->proxyRotator->setupRequest($request); 62 | } 63 | 64 | public function evaluate(AbstractTransferEvent $event) 65 | { 66 | $this->proxyRotator->evaluateResult($event); 67 | } 68 | } -------------------------------------------------------------------------------- /tests/RandomAndTimeHelper.php: -------------------------------------------------------------------------------- 1 | randomMock = $randomMock; 44 | $this->timeMock = $timeMock; 45 | $takenNumbers = []; 46 | $this->getLastNumberFn = function () use (&$takenNumbers) { 47 | return end($takenNumbers); 48 | }; 49 | $this->getRandomNumberFn = function ($from = null, $to = null) use (&$numbers, &$takenNumbers) { 50 | $el = array_shift($numbers); 51 | $takenNumbers[] = $el; 52 | return $el; 53 | }; 54 | 55 | $this->getLastKeyFn = function () use (&$takenKeys) { 56 | return end($takenKeys); 57 | }; 58 | $this->getRandomKeyFn = function () use (&$randKeys, &$takenKeys) { 59 | $el = array_shift($randKeys); 60 | $takenKeys[] = $el; 61 | return $el; 62 | }; 63 | 64 | $takenTimes = []; 65 | $this->getLastTimeFn = function () use (&$takenTimes) { 66 | return end($takenTimes); 67 | }; 68 | $this->getRandomTimeFn = function () use (&$times, &$takenTimes) { 69 | $el = array_shift($times); 70 | $takenTimes[] = $el; 71 | return $el; 72 | }; 73 | } 74 | 75 | /** 76 | * @return callable 77 | */ 78 | public function getGetLastNumberFn() 79 | { 80 | return $this->getLastNumberFn; 81 | } 82 | 83 | /** 84 | * @return callable 85 | */ 86 | public function getGetLastTimeFn() 87 | { 88 | return $this->getLastTimeFn; 89 | } 90 | 91 | /** 92 | * @return callable 93 | */ 94 | public function getGetRandomNumberFn() 95 | { 96 | return $this->getRandomNumberFn; 97 | } 98 | 99 | /** 100 | * @return callable 101 | */ 102 | public function getGetRandomTimeFn() 103 | { 104 | return $this->getRandomTimeFn; 105 | } 106 | 107 | /** 108 | * @return RandomizerInterface 109 | */ 110 | public function getRandomMock() 111 | { 112 | return $this->randomMock; 113 | } 114 | 115 | /** 116 | * @return \paslandau\GuzzleRotatingProxySubscriber\Interval\TimeProviderInterface 117 | */ 118 | public function getTimeMock() 119 | { 120 | return $this->timeMock; 121 | } 122 | 123 | /** 124 | * @return callable 125 | */ 126 | public function getGetLastKeyFn() 127 | { 128 | return $this->getLastKeyFn; 129 | } 130 | 131 | /** 132 | * @return callable 133 | */ 134 | public function getGetRandomKeyFn() 135 | { 136 | return $this->getRandomKeyFn; 137 | } 138 | } -------------------------------------------------------------------------------- /tests/integration/RotatingProxySubscriberTest.php: -------------------------------------------------------------------------------- 1 | $proxy0, 56 | 1 => $proxy1 57 | ]; 58 | 59 | $success = true; 60 | $fail = false; 61 | $responses2Proxy = [ 62 | [$success, $proxy0], 63 | [$success, $proxy0], 64 | [$success, $proxy1], 65 | [$fail, $proxy1], 66 | [$fail, $proxy0], 67 | [$fail, $proxy1], 68 | [$fail, $proxy1], 69 | [$success, $proxy0], 70 | [$success, $proxy0], 71 | [$success, $proxy0], 72 | ]; 73 | $randKeys = []; 74 | $responses = []; 75 | foreach ($responses2Proxy as $key => $val) { 76 | $randKeys[$key] = array_search($val[1], $proxies); 77 | $responses[$key] = ($val[0]) ? new Response(200) : new Response(403); 78 | } 79 | 80 | $h = $this->getHelper(null, null, $randKeys); 81 | $useOwnIp = false; 82 | 83 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 84 | $sub = new RotatingProxySubscriber($rotator); 85 | 86 | $mock = new Mock($responses); 87 | 88 | // Add the mock subscriber to the client. 89 | $client->getEmitter()->attach($mock); 90 | $client->getEmitter()->attach($sub); 91 | 92 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 93 | // otherwise a real request is sent. 94 | $requests = []; 95 | foreach ($responses as $key => $val) { 96 | $req = $client->createRequest("GET"); 97 | $req->getConfig()->set("request_id", $key); 98 | $requests[$key] = $req; 99 | } 100 | 101 | // $sucFn = function(RequestInterface $request){ 102 | // echo "Success at request ".$request->getConfig()->get("request_id")." using proxy ".$request->getConfig()->get("proxy")."\n"; 103 | // }; 104 | // $errFn = function(RequestInterface $request, Exception $e){ 105 | // echo "Error at request ".$request->getConfig()->get("request_id")." using proxy ".$request->getConfig()->get("proxy").": ".$e->getMessage()."\n"; 106 | // }; 107 | // foreach($requests as $key => $request){ 108 | // try { 109 | // $client->send($request); 110 | // $sucFn($request); 111 | // }catch(Exception $e){ 112 | // $errFn($request, $e); 113 | // } 114 | // } 115 | 116 | $options = [ 117 | // "complete" => function (CompleteEvent $ev) use ($sucFn) { $sucFn($ev->getRequest());}, 118 | // "error" => function (ErrorEvent $ev) use ($errFn) { $errFn($ev->getRequest(),$ev->getException());}, 119 | ]; 120 | $pool = new Pool($client, $requests, $options); 121 | $pool->wait(); 122 | 123 | $this->assertEquals($total_0, $proxy0->getTotalRequests()); 124 | $this->assertEquals($total_error_0, $proxy0->getCurrentTotalFails()); 125 | $this->assertEquals($consecutive_error_0, $proxy0->getCurrentConsecutiveFails()); 126 | $this->assertEquals($consecutive_error_0 < $max_consecutive_error_0, $proxy0->isUsable()); 127 | $this->assertEquals($total_1, $proxy1->getTotalRequests()); 128 | $this->assertEquals($total_error_1, $proxy1->getCurrentTotalFails()); 129 | $this->assertEquals($consecutive_error_1, $proxy1->getCurrentConsecutiveFails()); 130 | $this->assertEquals($consecutive_error_1 < $max_consecutive_error_1, $proxy1->isUsable()); 131 | } 132 | 133 | /** 134 | * Test if an exception emitted in the complete event of a client is received in the error event. 135 | */ 136 | public function test_integration_HonorEventOrder() 137 | { 138 | $client = new Client(); 139 | 140 | $proxy0 = new RotatingProxy("0", null, 5, 10, null); 141 | $proxies = [ 142 | 0 => $proxy0, 143 | ]; 144 | 145 | $success = true; 146 | $fail = false; 147 | $responses2Proxy = [ 148 | [$success, $proxy0], 149 | ]; 150 | $randKeys = []; 151 | $responses = []; 152 | foreach ($responses2Proxy as $key => $val) { 153 | $randKeys[$key] = array_search($val[1], $proxies); 154 | $responses[$key] = ($val[0]) ? new Response(200) : new Response(403); 155 | } 156 | 157 | $h = $this->getHelper(null, null, $randKeys); 158 | $useOwnIp = false; 159 | 160 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 161 | $sub = new RotatingProxySubscriber($rotator); 162 | 163 | $mock = new Mock($responses); 164 | 165 | // Add the mock subscriber to the client. 166 | $client->getEmitter()->attach($mock); 167 | $client->getEmitter()->attach($sub); 168 | 169 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 170 | // otherwise a real request is sent. 171 | $requests = []; 172 | foreach ($responses as $key => $val) { 173 | $req = $client->createRequest("GET"); 174 | $req->getConfig()->set("request_id", $key); 175 | $requests[$key] = $req; 176 | } 177 | 178 | $exceptionThrownInComplete = null; 179 | /** @var Exception $exceptionReceivedInError */ 180 | $exceptionReceivedInError = null; 181 | $options = [ 182 | "complete" => function (CompleteEvent $ev) use (&$exceptionThrownInComplete) { 183 | $exceptionThrownInComplete = new Exception("foo"); 184 | throw $exceptionThrownInComplete; 185 | }, 186 | "error" => [ 187 | "fn" => function (ErrorEvent $ev) use (&$exceptionReceivedInError) { 188 | $exceptionReceivedInError = $ev->getException(); 189 | }, 190 | // "priority" => -1000000 191 | ] 192 | ]; 193 | $pool = new Pool($client, $requests, $options); 194 | $pool->wait(); 195 | $this->assertNotNull($exceptionThrownInComplete, "The complete event did not throw an exception"); 196 | $this->assertNotNull($exceptionReceivedInError, "The error event did not receive an exception"); 197 | do { 198 | $exceptionReceivedInError = $exceptionReceivedInError->getPrevious(); 199 | $identical = $exceptionThrownInComplete === $exceptionReceivedInError; 200 | } while ($exceptionReceivedInError !== null && !$identical); 201 | $this->assertTrue($identical, "The exception thrown in the complete event is not identical with the exception received in the error event"); 202 | $this->assertEquals(1, $proxy0->getCurrentTotalFails()); 203 | } 204 | 205 | /** 206 | * Test if the RotatingProxySubscriber goes well together with the paslandau\ApplicationCacheSubscriber 207 | * This might be problematic because the ApplicationCacheSubscriber intercepts requests in the before event 208 | * and prevents that a proxy is set. The RotatingProxySubscriber must not chocke on such a cached request/response 209 | */ 210 | public function test_integration_CachedRequestsShouldNotFail() 211 | { 212 | $client = new Client(); 213 | 214 | $proxy0 = new RotatingProxy("0", null, 5, 10, null); 215 | $proxies = [ 216 | 0 => $proxy0, 217 | ]; 218 | 219 | $success = true; 220 | $responses2Proxy = [ 221 | [$success, $proxy0], 222 | [$success, $proxy0], 223 | [$success, $proxy0], 224 | ]; 225 | $randKeys = []; 226 | $responses = []; 227 | foreach ($responses2Proxy as $key => $val) { 228 | $randKeys[$key] = array_search($val[1], $proxies); 229 | $responses[$key] = ($val[0]) ? new Response(200, [], Stream::factory("test")) : new Response(403); 230 | } 231 | 232 | $h = $this->getHelper(null, null, $randKeys); 233 | $useOwnIp = false; 234 | 235 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 236 | $sub = new RotatingProxySubscriber($rotator); 237 | 238 | $mock = new Mock($responses); 239 | 240 | 241 | $cacheDriver = new ArrayCache(); 242 | $cache = new CacheStorage($cacheDriver); 243 | $cacheSub = new ApplicationCacheSubscriber($cache); 244 | 245 | // Add the mock subscriber to the client. 246 | $client->getEmitter()->attach($cacheSub); 247 | $client->getEmitter()->attach($mock); 248 | $client->getEmitter()->attach($sub); 249 | 250 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 251 | // otherwise a real request is sent. 252 | $url = "/"; //make sure its the same 253 | $req = $client->createRequest("GET", $url); 254 | $req2 = $client->createRequest("GET", $url); 255 | $req3 = $client->createRequest("GET", $url); 256 | $requests = [$req, $req2, $req3]; 257 | 258 | $cached = 0; 259 | $options = [ 260 | "end" => function (EndEvent $ev) use (&$cached) { 261 | if ($ev->getRequest()->getConfig()->get(ApplicationCacheSubscriber::CACHED_RESPONSE_KEY) === true) { 262 | $cached++; 263 | } 264 | } 265 | ]; 266 | $pool = new Pool($client, $requests, $options); 267 | $pool->wait(); 268 | $this->assertEquals(2, $cached, "Expected 2 requests to be cached but got $cached"); 269 | $this->assertEquals(1, $proxy0->getTotalRequests(), "Expected 1 requests made by the proxy"); // only 1 request will be made with the proxy, the two other ones will be cached 270 | } 271 | 272 | /** 273 | * Test if every failed request during a retry-session increases the fails of a proxy accordingly. 274 | * NOTE: This can only work if the evaluation function of a proxy is called first 275 | * ==> retry MUST be called after the complete/error event of the RotatingProxySubscriber was executed! 276 | */ 277 | public function test_integration_RetryingRequestsShouldIncreaseFailesAccordingly() 278 | { 279 | $client = new Client(); 280 | 281 | $proxy0 = new RotatingProxy("0", null, 5, 10, null); 282 | $proxies = [ 283 | 0 => $proxy0, 284 | ]; 285 | 286 | $success = true; 287 | $fail = false; 288 | $responses2Proxy = [ 289 | [$fail, $proxy0], 290 | [$fail, $proxy0], 291 | [$fail, $proxy0], 292 | [$success, $proxy0], 293 | ]; 294 | $randKeys = []; 295 | $responses = []; 296 | foreach ($responses2Proxy as $key => $val) { 297 | $randKeys[$key] = array_search($val[1], $proxies); 298 | $responses[$key] = ($val[0]) ? new Response(200) : new Response(403); 299 | } 300 | 301 | $h = $this->getHelper(null, null, $randKeys); 302 | $useOwnIp = false; 303 | 304 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 305 | $sub = new RotatingProxySubscriber($rotator); 306 | 307 | $mock = new Mock($responses); 308 | 309 | // Add the mock subscriber to the client. 310 | $client->getEmitter()->attach($mock); 311 | $client->getEmitter()->attach($sub); 312 | 313 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 314 | // otherwise a real request is sent. 315 | $req = $client->createRequest("GET"); 316 | $requests = [$req]; 317 | 318 | $options = [ 319 | "error" => [ 320 | "fn" => function (ErrorEvent $ev) { 321 | $ev->retry(); 322 | }, 323 | "priority" => RotatingProxySubscriber::PROXY_COMPLETE_EVENT - 5 324 | ] // make sure to call retry AFTER the evaluation 325 | ]; 326 | $pool = new Pool($client, $requests, $options); 327 | $pool->wait(); 328 | $this->assertEquals(3, $proxy0->getCurrentTotalFails()); 329 | } 330 | 331 | /** 332 | * Test if a client recognizes if he's blocked even during a retry event 333 | */ 334 | public function test_integration_RetryingRequestsShouldHonorProxyBlocking() 335 | { 336 | $client = new Client(); 337 | 338 | $evaluationFn = function (RotatingProxyInterface $proxy, AbstractTransferEvent $event) { 339 | if ($event instanceof ErrorEvent) { 340 | $proxy->block(); 341 | return false; 342 | } 343 | return true; 344 | }; 345 | 346 | $proxy0 = new RotatingProxy("0", $evaluationFn, 5, 10, null); 347 | $proxies = [ 348 | 0 => $proxy0, 349 | ]; 350 | 351 | $success = true; 352 | $fail = false; 353 | $responses2Proxy = [ 354 | [$fail, $proxy0], 355 | [$success, $proxy0], 356 | ]; 357 | $randKeys = []; 358 | $responses = []; 359 | foreach ($responses2Proxy as $key => $val) { 360 | $randKeys[$key] = array_search($val[1], $proxies); 361 | $responses[$key] = ($val[0]) ? new Response(200) : new Response(403); 362 | } 363 | 364 | $h = $this->getHelper(null, null, $randKeys); 365 | $useOwnIp = false; 366 | 367 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 368 | $sub = new RotatingProxySubscriber($rotator); 369 | 370 | $mock = new Mock($responses); 371 | 372 | // Add the mock subscriber to the client. 373 | $client->getEmitter()->attach($mock); 374 | $client->getEmitter()->attach($sub); 375 | 376 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 377 | // otherwise a real request is sent. 378 | $req = $client->createRequest("GET"); 379 | $requests = [$req]; 380 | 381 | $exception = null; 382 | $options = [ 383 | "error" => [ 384 | "fn" => function (ErrorEvent $ev) { 385 | $ev->retry(); 386 | }, 387 | "priority" => RotatingProxySubscriber::PROXY_COMPLETE_EVENT - 5 388 | ], // make sure to call retry AFTER the evaluation 389 | "end" => function (EndEvent $ev) use (&$exception) { 390 | $ex = $ev->getException(); 391 | if ($ex !== null) { 392 | $exception = get_class($ex); 393 | } 394 | } 395 | ]; 396 | $pool = new Pool($client, $requests, $options); 397 | $pool->wait(); 398 | $expected = NoProxiesLeftException::class; 399 | $this->assertEquals($expected, $exception); 400 | } 401 | 402 | /** 403 | * Test if redirect requests use the same proxy 404 | */ 405 | public function test_integration_RedirectingRequestsShouldUseTheSameProxy() 406 | { 407 | $client = new Client(); 408 | 409 | $proxy0 = new RotatingProxy("0", null, null, null, null); 410 | $proxy1 = new RotatingProxy("1", null, null, null, null); 411 | $proxies = [ 412 | 0 => $proxy0, 413 | 1 => $proxy1, 414 | ]; 415 | 416 | $success = true; 417 | $redirect = false; 418 | $responses2Proxy = [ 419 | [$success, $proxy1], 420 | [$redirect, $proxy1], 421 | [$success, null], // proxy1 will be reused 422 | [$success, $proxy0], 423 | ]; 424 | $randKeys = []; 425 | $responses = []; 426 | foreach ($responses2Proxy as $key => $val) { 427 | if ($val[1] !== null) { 428 | $randKeys[$key] = array_search($val[1], $proxies); 429 | } 430 | $responses[$key] = ($val[0]) ? new Response(200) : new Response(301, ["Location" => "http://localhost/"]); 431 | } 432 | 433 | $h = $this->getHelper(null, null, $randKeys); 434 | $useOwnIp = false; 435 | 436 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 437 | $rotator->setReuseSameProxyOnRedirect(true); 438 | 439 | $sub = new RotatingProxySubscriber($rotator); 440 | 441 | $mock = new Mock($responses); 442 | 443 | // Add the mock subscriber to the client. 444 | $client->getEmitter()->attach($mock); 445 | $client->getEmitter()->attach($sub); 446 | 447 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 448 | // otherwise a real request is sent. 449 | $req1 = $client->createRequest("GET"); 450 | $req2 = $client->createRequest("GET"); 451 | $req3 = $client->createRequest("GET"); 452 | $requests = [$req1, $req2, $req3]; // making only 3 requests but will receive all 4 responses (verified because $proxy0 will have 1 total request) 453 | 454 | // $options = [ 455 | // "before" => function(BeforeEvent $ev){ 456 | // echo "Before: Proxy ".$ev->getRequest()->getConfig()->get("proxy")."\n"; 457 | // }, 458 | // "complete" => function(CompleteEvent $ev){ 459 | // echo "Complete: Proxy ".$ev->getRequest()->getConfig()->get("proxy")."\n"; 460 | // }, 461 | // "error" => function(ErrorEvent $ev){ 462 | // echo "Error: Proxy ".$ev->getRequest()->getConfig()->get("proxy")."\n"; 463 | // }, 464 | // "end" => function(EndEvent $ev){ 465 | // echo "End: Proxy ".$ev->getRequest()->getConfig()->get("proxy")."\n"; 466 | // }, 467 | // ]; 468 | $pool = new Pool($client, $requests); 469 | $pool->wait(); 470 | $this->assertEquals(2, $proxy1->getTotalRequests(), "Proxy {$proxy1->getProxyString()} should have 2 total request"); 471 | $this->assertEquals(0, $proxy1->getCurrentTotalFails(), "Proxy {$proxy1->getProxyString()} should have 0 failed requests"); 472 | $this->assertEquals(1, $proxy0->getTotalRequests(), "Proxy {$proxy0->getProxyString()} should have 1 total request"); 473 | } 474 | 475 | public function test_integration_ShouldHonorWaitingTimes() 476 | { 477 | /* 478 | * Scenario: 479 | * 7 requests, 3 proxies 480 | * 1 - proxy_0 => successful request 481 | * 2 - proxy_1 => successful request 482 | * 3 - proxy_2 => successful request 483 | * 4 - proxy_0 => has to wait 5 seconds 484 | * - proxy_2 => successful request 485 | * 5 - proxy_1 => has to wait 24 seconds 486 | * - proxy_2 => has to wait 1 seconds 487 | * - proxy_0 => failed request 488 | * 6 - proxy_2 => successful request 489 | * 7 - proxy_2 => has to wait 5 seconds 490 | * - proxy_1 => has to wait 4 seconds 491 | * - proxy_0 => has to wait 3 seconds 492 | * // sleep for 3 seconds - test by event 493 | * - proxy_1 => has to wait 1 seconds 494 | * - proxy_2 => has to wait 2 seconds 495 | * - proxy_0 => successful request 496 | */ 497 | 498 | $client = new Client(); 499 | 500 | $numbers = [ 501 | 10, // request 4, picked at hasToWait (should return true) 502 | 0, // request 5, picked at hasToWait (should return false) 503 | 10, // request 7, picked at hasToWait (should return true) 504 | ]; 505 | $times = [ 506 | 5, // after request 1, picked at restartWaitingTime -- 5 = lastActionTime 507 | 10, // request 4, picked at hasToWait (true) 508 | 10, // request 4, picked at getWaitingTime (should return 5 - (10 - 10) => 5) 509 | 20, // request 5, picked at hasToWait (should return false) 510 | 23, // after request 5, picked at restartWaitingTime -- 23 = lastActionTime 511 | 30, // request 7, picked at hasToWait (should return true) 512 | 30, // request 7, picked at getWaitingTime (should return 23 - (30 - 10) => 3) 513 | 30, // request 7, picked at getWaitingTime in sleep-loop (should return 23 - (30 - 10) => 3) 514 | 35, // request 7, picked at hasToWait (should return false) 515 | ]; 516 | $h = $this->getHelper($numbers, $times); 517 | $interval = new RandomTimeInterval(0, 15, $h->getRandomMock(), $h->getTimeMock()); 518 | $proxy0 = new RotatingProxy("0", null, 5, 10, $interval); 519 | 520 | 521 | $numbers = [ 522 | 29, // request 5, picked at hasToWait (should return true) 523 | ]; 524 | $times = [ 525 | 5, // after request 2, picked at restartWaitingTime -- 5 = lastActionTime 526 | 10, // request 5, picked at hasToWait (true) 527 | 10, // request 5, picked at getWaitingTime (should return 5 - (10 - 29) => 24) 528 | 30, // request 7, picked at hasToWait (should return true) 529 | 30, // request 7, picked at getWaitingTime (should return 5 - (30 - 29) => 4) 530 | 33, // request 7, picked at hasToWait (should return true) 531 | 33, // request 7, picked at getWaitingTime (should return 5 - (33 - 29) => 1) 532 | ]; 533 | $h = $this->getHelper($numbers, $times); 534 | $interval = new RandomTimeInterval(0, 15, $h->getRandomMock(), $h->getTimeMock()); 535 | $proxy1 = new RotatingProxy("1", null, 5, 10, $interval); 536 | 537 | $numbers = [ 538 | 5, // request 4, picked at hasToWait (should return false) 539 | 8, // request 5, picked at hasToWait (should return true) 540 | 10, // request 7, picked at hasToWait (should return false) 541 | ]; 542 | $times = [ 543 | 5, // after request 3, picked at restartWaitingTime -- 5 = lastActionTime 544 | 10, // request 4, picked at hasToWait (false) 545 | 13, // after request 4, picked at restartWaitingTime -- 13 = lastActionTime 546 | 20, // request 5, picked at hasToWait (should return true) 547 | 20, // request 5, picked at getWaitingTime (should return 13 - (20 - 8) => 1) 548 | 25, // request 6, picked at hasToWait (should return true) 549 | 25, // after request 6, picked at restartWaitingTime -- 25 = lastActionTime 550 | 30, // request 7, picked at hasToWait (should return true) 551 | 30, // request 7, picked at getWaitingTime (should return 25 - (30 - 10) => 5) 552 | 33, // request 7, picked at hasToWait (should return true) 553 | 33, // request 7, picked at getWaitingTime (should return 25 - (33 - 10) => 2) 554 | ]; 555 | $h = $this->getHelper($numbers, $times); 556 | $interval = new RandomTimeInterval(0, 15, $h->getRandomMock(), $h->getTimeMock()); 557 | $proxy2 = new RotatingProxy("2", null, 5, 10, $interval); 558 | 559 | $proxies = [ 560 | $proxy0->getProxyString() => $proxy0, 561 | $proxy1->getProxyString() => $proxy1, 562 | $proxy2->getProxyString() => $proxy2, 563 | ]; 564 | 565 | $success = true; 566 | $fail = false; 567 | 568 | $responses2Proxy = [ 569 | [$success, $proxy0], //1 570 | [$success, $proxy1], //2 571 | [$success, $proxy2], //3 572 | [null, $proxy0], //4 573 | [$success, $proxy2], 574 | [null, $proxy1], //5 575 | [null, $proxy2], //5 576 | [$fail, $proxy0], //5 577 | [$success, $proxy2],//6 578 | [null, $proxy2], //7 579 | [null, $proxy1], 580 | [null, $proxy0], 581 | [null, $proxy1], 582 | [null, $proxy2], 583 | [$success, $proxy0], 584 | ]; 585 | $randKeys = []; 586 | $responses = []; 587 | foreach ($responses2Proxy as $key => $val) { 588 | $randKeys[] = array_search($val[1], $proxies); 589 | if ($val[0] !== null) { 590 | $responses[] = ($val[0]) ? new Response(200) : new Response(403); 591 | } 592 | } 593 | 594 | $h = $this->getHelper(null, null, $randKeys); 595 | $useOwnIp = false; 596 | 597 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 598 | $sub = new RotatingProxySubscriber($rotator); 599 | 600 | $mock = new Mock($responses); 601 | 602 | // Add the mock subscriber to the client. 603 | $client->getEmitter()->attach($mock); 604 | $client->getEmitter()->attach($sub); 605 | 606 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 607 | // otherwise a real request is sent. 608 | $requests = []; 609 | foreach ($responses as $key => $val) { 610 | $req = $client->createRequest("GET"); 611 | $req->getConfig()->set("request_id", $key); 612 | $requests[$key] = $req; 613 | } 614 | 615 | 616 | $checkState = function ($curProxy) use (&$responses2Proxy) { 617 | while (count($responses2Proxy) > 0) { 618 | $el = array_shift($responses2Proxy); 619 | if ($el === null) { 620 | break; 621 | } 622 | if ($el[0] !== null) { 623 | $this->assertEquals($curProxy, $el[1]->getProxyString()); 624 | break; 625 | } 626 | }; 627 | }; 628 | 629 | $sucFn = function (RequestInterface $request) use ($checkState) { 630 | $proxy = $request->getConfig()->get("proxy"); 631 | $checkState($proxy); 632 | // echo "Success at request ".($request->getConfig()->get("request_id")+1)." using proxy ".$proxy."\n"; 633 | }; 634 | $errFn = function (RequestInterface $request, Exception $e) use ($checkState) { 635 | $proxy = $request->getConfig()->get("proxy"); 636 | $checkState($proxy); 637 | // echo "Error at request ".($request->getConfig()->get("request_id")+1)." using proxy ".$proxy.": ".$e->getMessage()."\n"; 638 | }; 639 | 640 | $options = [ 641 | "complete" => function (CompleteEvent $ev) use ($sucFn) { 642 | $sucFn($ev->getRequest()); 643 | }, 644 | "error" => function (ErrorEvent $ev) use ($errFn) { 645 | $errFn($ev->getRequest(), $ev->getException()); 646 | }, 647 | ]; 648 | $pool = new Pool($client, $requests, $options); 649 | $pool->wait(); 650 | } 651 | 652 | public function test_integration_SwitchIdentities() 653 | { 654 | /* 655 | * Scenario: 656 | * 4 requests, 1 proxy with 2 identities 657 | * Identies are switched after 2 requests 658 | * In the end, 659 | * both identies should have made 2 requests 660 | * No retries take place 661 | */ 662 | $client = new Client(); 663 | 664 | $identity0 = new Identity("0"); 665 | $identity1 = new Identity("1"); 666 | $identities = [ 667 | 0 => $identity0, 668 | 1 => $identity1, 669 | ]; 670 | 671 | $identityOrder = [ 672 | 0, 673 | 1 674 | ]; 675 | 676 | $expectedIdentities = [ 677 | 0, 0, 1, 1 678 | ]; 679 | /** @var RandomizerInterface|PHPUnit_Framework_MockObject_MockObject $randomizer */ 680 | $randomizer = $this->getMock(RandomizerInterface::class); 681 | $getKeysFn = function ($arr) use (&$identityOrder) { 682 | return array_shift($identityOrder); 683 | }; 684 | $randomizer->expects($this->any())->method("randKey")->willReturnCallback($getKeysFn); 685 | 686 | $counter = new RandomCounterInterval(2, 2); // will always return 2; 687 | $proxy0 = new RotatingIdentityProxy($identities, "0", $randomizer, $counter); 688 | $proxies = [ 689 | 0 => $proxy0, 690 | ]; 691 | 692 | $success = true; 693 | $responses2Proxy = [ 694 | [$success, $proxy0], 695 | [$success, $proxy0], 696 | [$success, $proxy0], 697 | [$success, $proxy0], 698 | ]; 699 | $randKeys = []; 700 | $responses = []; 701 | foreach ($responses2Proxy as $key => $val) { 702 | $randKeys[$key] = array_search($val[1], $proxies); 703 | $responses[$key] = ($val[0]) ? new Response(200) : new Response(403); 704 | } 705 | 706 | $h = $this->getHelper(null, null, $randKeys); 707 | $useOwnIp = false; 708 | 709 | $rotator = new ProxyRotator($proxies, $useOwnIp, $h->getRandomMock()); 710 | $sub = new RotatingProxySubscriber($rotator); 711 | 712 | $mock = new Mock($responses); 713 | 714 | // Add the mock subscriber to the client. 715 | $client->getEmitter()->attach($mock); 716 | $client->getEmitter()->attach($sub); 717 | 718 | // build requests - we need to do this _after_ the $mock hast been attached to the client, 719 | // otherwise a real request is sent. 720 | $requests = []; 721 | foreach ($responses as $key => $val) { 722 | $req = $client->createRequest("GET"); 723 | $req->getConfig()->set("request_id", $key); 724 | $requests[$key] = $req; 725 | } 726 | 727 | $actualIdentities = []; 728 | $options = [ 729 | "pool_size" => 1, 730 | "end" => function (EndEvent $ev) use (&$actualIdentities) { 731 | $actual = $ev->getRequest()->getHeader("user-agent"); 732 | $actualIdentities[] = $actual; 733 | }, 734 | ]; 735 | $pool = new Pool($client, $requests, $options); 736 | $pool->wait(); 737 | $this->assertEquals($expectedIdentities, $actualIdentities); 738 | } 739 | 740 | private function getHelper(array $numbers = null, array $times = null, array $randKeys = null) 741 | { 742 | $randomMock = $this->getMock(RandomizerInterface::class); 743 | $timeMock = $this->getMock(TimeProviderInterface::class); 744 | $h = new RandomAndTimeHelper($numbers, $times, $randKeys, $randomMock, $timeMock); 745 | $randomMock->expects($this->any())->method("randNum")->will($this->returnCallback($h->getGetRandomNumberFn())); 746 | $randomMock->expects($this->any())->method("randKey")->will($this->returnCallback($h->getGetRandomKeyFn())); 747 | $timeMock->expects($this->any())->method("getTime")->will($this->returnCallback($h->getGetRandomTimeFn())); 748 | 749 | return $h; 750 | } 751 | } 752 | -------------------------------------------------------------------------------- /tests/unit/ProxyRotatorTest.php: -------------------------------------------------------------------------------- 1 | getMock(RandomizerInterface::class); 15 | $h = new RandomAndTimeHelper(null, null, $keys, $randomMock); 16 | $randomMock->expects($this->any())->method("randKey")->will($this->returnCallback($h->getGetRandomKeyFn())); 17 | return $h; 18 | } 19 | 20 | /** 21 | * @param $proxyString 22 | * @param $isUsable 23 | * @param int $waitingTime [optional]. Default: 0. 24 | * @return RotatingProxyInterface|PHPUnit_Framework_MockObject_MockObject 25 | */ 26 | private function getRotatingProxyMock($proxyString, $isUsable, $waitingTime 27 | = 0){ 28 | $proxyMock = $this->getMock(RotatingProxyInterface::class); 29 | $proxyMock->expects($this->any())->method("getProxyString")->will($this->returnValue($proxyString)); 30 | $proxyMock->expects($this->any())->method("isUsable")->will($this->returnValue($isUsable)); 31 | 32 | $curWaitingTime = $waitingTime; 33 | $getWaitingTime = function ()use(&$curWaitingTime){ 34 | return $curWaitingTime; 35 | }; 36 | $calcWaitingTime = function () use(&$curWaitingTime){ 37 | return $curWaitingTime > 0; 38 | }; 39 | $resetWaitingTime = function () use(&$curWaitingTime){ 40 | $curWaitingTime = 0; 41 | }; 42 | $proxyMock->expects($this->any())->method("hasToWait")->will($this->returnCallback($calcWaitingTime)); 43 | $proxyMock->expects($this->any())->method("getWaitingTime")->will($this->returnCallback($getWaitingTime)); 44 | $proxyMock->expects($this->any())->method("skipWaitingTime")->will($this->returnCallback($resetWaitingTime)); 45 | return $proxyMock; 46 | } 47 | 48 | public function test_ShouldSetupProxyOnRequestOnlyIfProxyIsUsable() 49 | { 50 | $failingMock = $this->getRotatingProxyMock("foo",false); 51 | $failingMock->expects($this->never())->method("setupRequest")->willReturnArgument(0); 52 | $proxyMock = $this->getRotatingProxyMock("test",true); 53 | $proxyMock->expects($this->once())->method("setupRequest")->willReturnArgument(0); 54 | $proxies = [ 55 | "foo"=> $failingMock, 56 | "test" => $proxyMock 57 | ]; 58 | $useOwnIp = false; 59 | //make sure to pic the failing proxy first 60 | $keys = ["foo","test"]; 61 | $helper = $this->getHelper($keys); 62 | $rotator = new ProxyRotator($proxies, $useOwnIp,$helper->getRandomMock()); 63 | 64 | $client = new Client(); 65 | $request = $client->createRequest("GET", "/"); 66 | 67 | $isWorkingProxyLeft = $rotator->setupRequest($request); 68 | $this->assertTrue($isWorkingProxyLeft,"setupRequest should have returned true when no NullProxy is used"); 69 | } 70 | 71 | public function test_ShouldSetupNullProxyOnRequestIfNoProxyIsReady() 72 | { 73 | $proxyMock = $this->getRotatingProxyMock("test",false); 74 | $proxies = [$proxyMock]; 75 | $useOwnIp = true; 76 | $rotator = new ProxyRotator($proxies, $useOwnIp); 77 | 78 | $client = new Client(); 79 | $request = $client->createRequest("GET", "/"); 80 | 81 | $isWorkingProxyLeft = $rotator->setupRequest($request); 82 | $proxyString = $request->getConfig()->get("proxy"); 83 | $this->assertEquals(null,$proxyString,"Proxy should be not set (null)"); 84 | $this->assertFalse($isWorkingProxyLeft,"setupRequest should have returned false when the NullProxy is used"); 85 | 86 | } 87 | 88 | public function test_ShouldThrowExceptionWhileSetupRequestIfNoProxyIsReady() 89 | { 90 | $this->setExpectedException(NoProxiesLeftException::class); 91 | $proxyMock = $this->getRotatingProxyMock("test",false); 92 | $proxies = [$proxyMock]; 93 | $useOwnIp = false; 94 | $rotator = new ProxyRotator($proxies, $useOwnIp); 95 | 96 | $client = new Client(); 97 | $request = $client->createRequest("GET", "/"); 98 | 99 | $rotator->setupRequest($request); 100 | } 101 | 102 | public function test_ShouldGetWaitEventWithCorrectProxy(){ 103 | $waitingTime = 5; 104 | $proxyMock = $this->getRotatingProxyMock("test",true, $waitingTime); 105 | $proxies = [$proxyMock]; 106 | $useOwnIp = true; 107 | $rotator = new ProxyRotator($proxies, $useOwnIp); 108 | 109 | // prepare the event listener 110 | $eventProxy = null; 111 | $checkWaitingTime = function(WaitingEvent $event) use (&$eventProxy){ 112 | $proxy = $event->getProxy(); 113 | $eventProxy = $proxy; 114 | $event->skipWaiting(); 115 | }; 116 | $rotator->getEmitter()->on(ProxyRotator::EVENT_ON_WAIT, $checkWaitingTime); 117 | 118 | $client = new Client(); 119 | $request = $client->createRequest("GET", "/"); 120 | 121 | $rotator->setupRequest($request); 122 | 123 | $this->assertEquals($proxyMock,$eventProxy,"Did not get the correct proxy from the WaitingEvent"); 124 | } 125 | 126 | public function test_ShouldGetNoWaitEventOnNonWaitingProxy(){ 127 | $waitingTime = 0; 128 | $proxyMock = $this->getRotatingProxyMock("test",true, $waitingTime); 129 | $proxies = [$proxyMock]; 130 | $useOwnIp = true; 131 | $rotator = new ProxyRotator($proxies, $useOwnIp); 132 | 133 | // prepare the event listener 134 | $eventProxy = null; 135 | $checkWaitingTime = function(WaitingEvent $event) use (&$eventProxy){ 136 | $proxy = $event->getProxy(); 137 | $eventProxy = $proxy; 138 | $event->skipWaiting(); 139 | }; 140 | $rotator->getEmitter()->on(ProxyRotator::EVENT_ON_WAIT, $checkWaitingTime); 141 | 142 | $client = new Client(); 143 | $request = $client->createRequest("GET", "/"); 144 | 145 | $rotator->setupRequest($request); 146 | 147 | $this->assertEquals(null,$eventProxy,"Did not get the correct proxy from the WaitingEvent"); 148 | } 149 | 150 | public function test_ShouldReuseSameProxyOnRedirect(){ 151 | $proxyMock = $this->getRotatingProxyMock("test",true); 152 | $proxyMock->expects($this->once())->method("setupRequest")->willReturnArgument(0); 153 | $proxies = [$proxyMock]; 154 | $useOwnIp = false; 155 | $rotator = new ProxyRotator($proxies, $useOwnIp); 156 | $rotator->setReuseSameProxyOnRedirect(true); 157 | $client = new Client(); 158 | $request = $client->createRequest("GET", "/"); 159 | 160 | $request->getConfig()->set("redirect_count",1); 161 | $rotator->setupRequest($request); // setupRequest will not be called 162 | 163 | $request->getConfig()->remove("redirect_count"); 164 | $rotator->setupRequest($request); // setupRequest will be called 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /tests/unit/RandomCounterIntervalTest.php: -------------------------------------------------------------------------------- 1 | getMock(RandomizerInterface::class); 13 | $h = new RandomAndTimeHelper($numbers, null, null, $randomMock); 14 | $randomMock->expects($this->any())->method("randNum")->will($this->returnCallback($h->getGetRandomNumberFn())); 15 | return $h; 16 | } 17 | 18 | public function test_ShouldBeReadyWhenCounterExceedsMaximumAndSetCounterToZeroOnRestart() 19 | { 20 | $numbers = [4, 6]; 21 | 22 | $h = $this->getHelper($numbers); 23 | $getLastNumber = $h->getGetLastNumberFn(); 24 | 25 | $interval = new RandomCounterInterval(0, 15, $h->getRandomMock()); 26 | //counter is 0 27 | $counter = $interval->getCounter(); 28 | $this->assertEquals(0, $counter, "Counter should be 0 but is $counter"); 29 | $actual = $interval->isReady(); // takes first number '4' from $numbers, counter is 0 at this time 30 | $expected = false; 31 | $lastNum = $getLastNumber(); 32 | $this->assertEquals($expected, $actual, "Should be false since counter is {$counter} and current random number is {$lastNum}"); 33 | for($i = 1; $i < $lastNum;$i++){ 34 | $counter = $interval->incrementCounter(); 35 | $this->assertEquals($i, $counter, "Counter should be $i but is $counter"); 36 | $actual = $interval->isReady(); 37 | $this->assertEquals($expected, $actual, "Should be false since counter is {$counter} and current random number is {$lastNum}"); 38 | } 39 | $counter = $interval->incrementCounter(); 40 | $actual = $interval->isReady(); 41 | $expected = true; 42 | $this->assertEquals($expected, $actual, "Should be true since counter is {$counter} and current random number is {$lastNum}"); 43 | 44 | $interval->restart(); 45 | $counter = $interval->getCounter(); 46 | $this->assertEquals(0, $counter, "Counter should be 0 after restart but is $counter"); 47 | $actual = $interval->isReady(); // takes second number '6' from $numbers, counter is 0 at this time 48 | $expected = false; 49 | $lastNum = $getLastNumber(); 50 | $this->assertEquals($expected, $actual, "Should be false since counter is {$counter} and current random number is {$lastNum}"); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /tests/unit/RandomTimeIntervalTest.php: -------------------------------------------------------------------------------- 1 | getMock(RandomizerInterface::class); 12 | $timeMock = $this->getMock(TimeProviderInterface::class); 13 | $h = new RandomAndTimeHelper($numbers, $times, null, $randomMock,$timeMock); 14 | $randomMock->expects($this->any())->method("randNum")->will($this->returnCallback($h->getGetRandomNumberFn())); 15 | $timeMock->expects($this->any())->method("getTime")->will($this->returnCallback($h->getGetRandomTimeFn())); 16 | 17 | return $h; 18 | } 19 | 20 | public function test_ShouldGenerateRandomCurrentInterval() 21 | { 22 | $numbers = [4, 6, 3, 7, 1, 9, 12]; 23 | $times = [5, 10, 15, 20, 25, 30]; 24 | 25 | $h = $this->getHelper($numbers,$times); 26 | $getLastTime = $h->getGetLastTimeFn(); 27 | $getLastNumber = $h->getGetLastNumberFn(); 28 | 29 | $interval = new RandomTimeInterval(0, 15, $h->getRandomMock(), $h->getTimeMock()); 30 | 31 | $time = $interval->getWaitingTime(); 32 | $this->assertEquals(0, $time, "First call, should be 0 since lastActionTime is null"); 33 | 34 | $interval->restart(); // takes first number '5' from $times 35 | $firstTime = $getLastTime(); 36 | 37 | $time = $interval->getWaitingTime(); // takes second number '10' from $times and first number '4' from randomNumbers 38 | $rand = $getLastNumber(); 39 | $secondTime = $getLastTime(); 40 | $expected = $firstTime - ($secondTime - $rand); 41 | $this->assertEquals($expected, $time, "Should be $expected using start $firstTime, current $secondTime and random $rand"); 42 | 43 | $time = $interval->getWaitingTime(); // takes third number '15' from $times and no number from randomNumbers 44 | $thirdTime = $getLastTime(); 45 | $expected = $firstTime - ($thirdTime - $rand); 46 | $this->assertEquals($expected, $time, "Should be $expected using start $firstTime, current $thirdTime and random $rand"); 47 | 48 | $interval->restart(); // takes fourth number '20' from $times 49 | $fourthTime = $getLastTime(); 50 | 51 | $time = $interval->getWaitingTime(); // takes fifth number '25' from $times and second number '6' from randomNumbers 52 | $secondRand = $getLastNumber(); 53 | $fifthTime = $getLastTime(); 54 | $expected = $fourthTime - ($fifthTime - $secondRand); 55 | $this->assertEquals($expected, $time, "Should be $expected using start $fourthTime, current $fifthTime and random $secondRand"); 56 | 57 | $interval->reset(); 58 | $time = $interval->getWaitingTime(); 59 | $this->assertEquals(0, $time, "First call after reset, should be 0 since lastActionTime is null"); 60 | 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /tests/unit/RotatingIdentityProxyTest.php: -------------------------------------------------------------------------------- 1 | setExpectedException(\InvalidArgumentException::class); 24 | new RotatingIdentityProxy([], ""); 25 | } 26 | 27 | public function test_ShouldIncrementCounterAfterEachRequest(){ 28 | /** @var RandomizerInterface|PHPUnit_Framework_MockObject_MockObject $randomizer */ 29 | $randomizer = $this->getMock(RandomizerInterface::class); 30 | 31 | /** @var RandomCounterIntervalInterface|PHPUnit_Framework_MockObject_MockObject $counter */ 32 | $counter = $this->getMock(RandomCounterIntervalInterface::class); 33 | $counter->expects($this->once())->method("incrementCounter"); 34 | 35 | /** @var IdentityInterface|PHPUnit_Framework_MockObject_MockObject $identity */ 36 | $identity = $this->getMock(IdentityInterface::class); 37 | $rip = new RotatingIdentityProxy([$identity], "", $randomizer, $counter); 38 | $rip->requested(); 39 | } 40 | 41 | public function test_ShouldChooseIdentityAfterInstantiationAndSetupRequestAccordingly() 42 | { 43 | $identityKey = 0; 44 | /** @var RandomizerInterface|PHPUnit_Framework_MockObject_MockObject $randomizer */ 45 | $randomizer = $this->getMock(RandomizerInterface::class); 46 | $randomizer->expects($this->any())->method("randKey")->willReturn($identityKey); 47 | 48 | $randomCounter = new NullRandomCounter(); 49 | 50 | $proxyString = "proxy"; 51 | 52 | /** @var CookieJarInterface|PHPUnit_Framework_MockObject_MockObject $jarMock */ 53 | $jarMock = $this->getMock(CookieJarInterface::class); 54 | 55 | $userAgent = "foo"; 56 | $headers = ["test" => "baz"]; 57 | $identity = new Identity($userAgent, $headers, $jarMock); 58 | $identities = [ 59 | $identityKey => $identity 60 | ]; 61 | $rip = new RotatingIdentityProxy($identities, $proxyString, $randomizer, $randomCounter); 62 | $actual = $rip->getCurrentIdentity(); 63 | $this->assertSame($identity, $actual, "Got wrong identity after object instantiation"); 64 | 65 | //setup request 66 | $client = new Client(); 67 | $request = $client->createRequest("GET", "/"); 68 | $request = $rip->setupRequest($request); 69 | 70 | $actualUserAgent = $request->getHeader("user-agent"); 71 | $this->assertEquals($userAgent,$actualUserAgent,"Expected header 'user-agent' to be {$userAgent} - it was {$actualUserAgent} instead"); 72 | foreach($headers as $key => $val){ 73 | $this->assertTrue($request->hasHeader($key),"Expected header '{$key}' was not present"); 74 | $acutalHeader = $request->getHeader($key); 75 | $this->assertEquals($val,$acutalHeader,"Expected header '{$key}' to be {$val} - it was {$acutalHeader} instead"); 76 | } 77 | $emitter = $request->getEmitter(); 78 | $actualCookieJar = null; 79 | foreach($emitter->listeners("complete") as $listener){ 80 | /** @var Cookie[] $listener */ 81 | if(is_array($listener) && $listener[0] instanceof Cookie) { 82 | $actualCookieJar = $listener[0]->getCookieJar(); 83 | } 84 | } 85 | $this->assertSame($jarMock, $actualCookieJar, "Got wrong cookie jar after request setup"); 86 | } 87 | 88 | public function test_ShouldSwitchIdentities() 89 | { 90 | /** @var RandomizerInterface|PHPUnit_Framework_MockObject_MockObject $randomizer */ 91 | $randomizer = $this->getMock(RandomizerInterface::class); 92 | $keys = [1,2,0]; // order identity keys 93 | $checkKeys = $keys; 94 | $getKeysFn = function($arr) use (&$keys){ 95 | return array_shift($keys); 96 | }; 97 | $randomizer->expects($this->any())->method("randKey")->willReturnCallback($getKeysFn); 98 | 99 | $randomCounter = new NullRandomCounter(); 100 | 101 | $proxyString = "proxy"; 102 | 103 | /** @var CookieJarInterface|PHPUnit_Framework_MockObject_MockObject $jarMock */ 104 | $jarMock = $this->getMock(CookieJarInterface::class); 105 | 106 | $identities = [ 107 | 0 => new Identity("0"), 108 | 1 => new Identity("1"), 109 | 2 => new Identity("2"), 110 | ]; 111 | $rip = new RotatingIdentityProxy($identities, $proxyString, $randomizer, $randomCounter); 112 | foreach($checkKeys as $key) { 113 | $rip->switchIdentity(); 114 | $expected = $identities[$key]; 115 | $actual = $rip->getCurrentIdentity(); 116 | $msg = "Got wrong identity ({$actual->getUserAgent()}) after object instantiation, expected $key"; 117 | $this->assertSame($expected, $actual, $msg); 118 | } 119 | } 120 | 121 | public function test_ShouldSetReferrerOnIdentity() 122 | { 123 | $identityKey = 0; 124 | $referer = "foo"; 125 | /** @var RandomizerInterface|PHPUnit_Framework_MockObject_MockObject $randomizer */ 126 | $randomizer = $this->getMock(RandomizerInterface::class); 127 | $randomizer->expects($this->any())->method("randKey")->willReturn($identityKey); 128 | 129 | $randomCounter = new NullRandomCounter(); 130 | 131 | $proxyString = "proxy"; 132 | 133 | /** @var RequestInterface|PHPUnit_Framework_MockObject_MockObject $requestMock */ 134 | $requestMock = $this->getMock(RequestInterface::class); 135 | $collection = new Collection(); 136 | $requestMock->expects($this->any())->method("getConfig")->willReturn($collection); 137 | 138 | /** @var ResponseInterface|PHPUnit_Framework_MockObject_MockObject $responseMock */ 139 | $responseMock = $this->getMock(ResponseInterface::class); 140 | $responseMock->expects($this->once())->method("getEffectiveUrl")->willReturn($referer); 141 | 142 | 143 | /** @var AbstractTransferEvent|PHPUnit_Framework_MockObject_MockObject $eventMock */ 144 | $eventMock = $this->getMock(AbstractTransferEvent::class,[],[],"",false); 145 | $eventMock->expects($this->once())->method("getResponse")->willReturn($responseMock); 146 | 147 | /** @var IdentityInterface|PHPUnit_Framework_MockObject_MockObject $identityMock */ 148 | $identityMock = $this->getMock(IdentityInterface::class); 149 | $identityMock->expects($this->once())->method("setReferer"); 150 | $identityMock->expects($this->atLeast(1))->method("getReferer"); 151 | 152 | 153 | $identities = [ 154 | $identityKey => $identityMock 155 | ]; 156 | $rip = new RotatingIdentityProxy($identities, $proxyString, $randomizer, $randomCounter); 157 | $rip->evaluate($eventMock); // call setReferer 158 | $rip->setupRequest($requestMock); // call getReferer at least once 159 | } 160 | 161 | } -------------------------------------------------------------------------------- /tests/unit/RotatingProxyTest.php: -------------------------------------------------------------------------------- 1 | failed(); 14 | } 15 | $this->assertFalse($rp->hasTooManyTotalFails(), "Expected NOT to have enough total fails"); 16 | $rp->failed(); 17 | $this->assertTrue($rp->hasTooManyTotalFails(), "Expected to have enough total fails"); 18 | 19 | $rp->setMaxTotalFails(-1); 20 | $this->assertFalse($rp->hasTooManyTotalFails(), "Expected NOT to have enough total fails since it inifite fails should be allowed"); 21 | } 22 | 23 | public function test_ShouldFailOnTooManyConsecutiveFailsAndNotBefore() 24 | { 25 | $maxFails = 10; 26 | $rp = new RotatingProxy("test", null, $maxFails, -1, null); 27 | for ($i = 0; $i < $maxFails - 1; $i++) { 28 | $rp->failed(); 29 | } 30 | $this->assertFalse($rp->hasTooManyConsecutiveFails(), "Expected NOT to have enough consecutive fails"); 31 | $rp->failed(); 32 | $this->assertTrue($rp->hasTooManyConsecutiveFails(), "Expected to have enough consecutive fails"); 33 | 34 | $rp->setMaxConsecutiveFails(-1); 35 | $this->assertFalse($rp->hasTooManyTotalFails(), "Expected NOT to have enough consecutive fails since it inifite fails should be allowed"); 36 | 37 | $rp->setMaxConsecutiveFails($maxFails); 38 | $rp->setCurrentConsecutiveFails(0); 39 | for ($i = 0; $i < $maxFails - 1; $i++) { 40 | $rp->failed(); 41 | } 42 | $this->assertFalse($rp->hasTooManyConsecutiveFails("Expected NOT to have enough consecutive fails after resetting")); 43 | $rp->succeeded(); 44 | $rp->failed(); 45 | $this->assertFalse($rp->hasTooManyConsecutiveFails("Expected NOT to have enough consecutive fails after succeeding")); 46 | } 47 | 48 | public function test_ShouldBeUnsuableOnTooManyFailsOrIfBlocked() 49 | { 50 | $maxFails = 10; 51 | $rp = new RotatingProxy("test", null, $maxFails, -1, null); 52 | $this->assertTrue($rp->isUsable(), "Expected NOT to have enough consecutive fails"); 53 | $rp->setCurrentConsecutiveFails($maxFails); 54 | $this->assertFalse($rp->isUsable(), "Expected to have enough consecutive fails"); 55 | $rp->setCurrentConsecutiveFails(0); 56 | $rp->block(); 57 | $this->assertFalse($rp->isUsable(), "Expected to be blocked"); 58 | $rp->unblock(); 59 | $this->assertTrue($rp->isUsable(), "Expected NOT to be blocked"); 60 | } 61 | 62 | public function test_ShouldNotWaitIfReady() 63 | { 64 | $timeMock = $this->getMock(TimeIntervalInterface::class); 65 | 66 | $waitTime = 0; 67 | $timeMock->expects($this->any())->method("isReady")->will($this->returnValue(true)); 68 | $timeMock->expects($this->any())->method("getWaitingTime")->will($this->returnValue($waitTime)); 69 | 70 | /** @var TimeIntervalInterface $timeMock */ 71 | $rp = new RotatingProxy("test", null, -1, -1, $timeMock); 72 | $res = $rp->hasToWait(); 73 | $this->assertFalse($res, "Expected proxy does NOT need to wait"); 74 | $this->assertEquals($waitTime, $rp->getWaitingTime(), "Expected $waitTime seconds to wait"); 75 | } 76 | 77 | public function test_ShouldWaitIfNotReady() 78 | { 79 | $waitTime = 5; 80 | $timeMock2 = $this->getMock(TimeIntervalInterface::class); 81 | 82 | $timeMock2->expects($this->any())->method("isReady")->will($this->returnValue(false)); 83 | $timeMock2->expects($this->any())->method("getWaitingTime")->will($this->returnValue($waitTime)); 84 | 85 | /** @var \paslandau\GuzzleRotatingProxySubscriber\Interval\TimeIntervalInterface $timeMock2 */ 86 | $rp = new RotatingProxy("test", null, -1, -1, $timeMock2); 87 | $this->assertTrue($rp->hasToWait(), "Expected proxy needs to wait"); 88 | $this->assertEquals($waitTime, $rp->getWaitingTime(), "Expected $waitTime seconds to wait"); 89 | } 90 | 91 | public function test_ShouldCallReset() 92 | { 93 | $timeMock = $this->getMock(TimeIntervalInterface::class); 94 | $timeMock->expects($this->once())->method("reset"); 95 | /** @var TimeIntervalInterface $timeMock */ 96 | $rp = new RotatingProxy("test", null, -1, -1, $timeMock); 97 | $rp->skipWaitingTime(); 98 | } 99 | 100 | public function test_ShouldCallRestart() 101 | { 102 | $timeMock = $this->getMock(TimeIntervalInterface::class); 103 | $timeMock->expects($this->once())->method("restart"); 104 | /** @var TimeIntervalInterface $timeMock */ 105 | $rp = new RotatingProxy("test", null, -1, -1, $timeMock); 106 | $rp->restartWaitingTime(); 107 | } 108 | 109 | } --------------------------------------------------------------------------------