├── .gitignore ├── Goutte ├── Client.php ├── Resources │ └── phar-stub.php └── Tests │ └── ClientTest.php ├── LICENSE ├── README.rst ├── box.json ├── composer.json └── phpunit.xml.dist /.gitignore: -------------------------------------------------------------------------------- 1 | composer.lock 2 | phpunit.xml 3 | vendor 4 | -------------------------------------------------------------------------------- /Goutte/Client.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Goutte; 13 | 14 | use Symfony\Component\BrowserKit\CookieJar; 15 | use Symfony\Component\BrowserKit\History; 16 | use Symfony\Component\BrowserKit\HttpBrowser; 17 | use Symfony\Contracts\HttpClient\HttpClientInterface; 18 | 19 | /** 20 | * @author Fabien Potencier 21 | * 22 | * @deprecated Use Symfony\Component\BrowserKit\HttpBrowser directly instead 23 | */ 24 | class Client extends HttpBrowser 25 | { 26 | public function __construct(HttpClientInterface $client = null, History $history = null, CookieJar $cookieJar = null) 27 | { 28 | trigger_deprecation('fabpot/goutte', '4.0', 'The "%s" class is deprecated, use "%s" instead.', __CLASS__, HttpBrowser::class); 29 | 30 | parent::__construct($client, $history, $cookieJar); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /Goutte/Resources/phar-stub.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * This source file is subject to the MIT license that is bundled 9 | * with this source code in the file LICENSE. 10 | */ 11 | 12 | require_once 'phar://'.__FILE__.'/vendor/autoload.php'; 13 | 14 | __HALT_COMPILER(); 15 | -------------------------------------------------------------------------------- /Goutte/Tests/ClientTest.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Goutte\Tests; 13 | 14 | use Goutte\Client; 15 | use PHPUnit\Framework\TestCase; 16 | use Symfony\Component\BrowserKit\HttpBrowser; 17 | 18 | class ClientTest extends TestCase 19 | { 20 | public function testNew() 21 | { 22 | $client = new Client(); 23 | $this->assertInstanceOf(HttpBrowser::class, $client); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-present Fabien Potencier 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Goutte, a simple PHP Web Scraper 2 | ================================ 3 | 4 | Goutte is a screen scraping and web crawling library for PHP. 5 | 6 | Goutte provides a nice API to crawl websites and extract data from the HTML/XML 7 | responses. 8 | 9 | **WARNING**: This library is deprecated. As of v4, Goutte became a simple proxy 10 | to the `HttpBrowser class 11 | `_ 12 | from the `Symfony BrowserKit `_ component. To 13 | migrate, replace ``Goutte\Client`` by 14 | ``Symfony\Component\BrowserKit\HttpBrowser`` in your code. 15 | 16 | Requirements 17 | ------------ 18 | 19 | Goutte depends on PHP 7.1+. 20 | 21 | Installation 22 | ------------ 23 | 24 | Add ``fabpot/goutte`` as a require dependency in your ``composer.json`` file: 25 | 26 | .. code-block:: bash 27 | 28 | composer require fabpot/goutte 29 | 30 | Usage 31 | ----- 32 | 33 | Create a Goutte Client instance (which extends 34 | ``Symfony\Component\BrowserKit\HttpBrowser``): 35 | 36 | .. code-block:: php 37 | 38 | use Goutte\Client; 39 | 40 | $client = new Client(); 41 | 42 | Make requests with the ``request()`` method: 43 | 44 | .. code-block:: php 45 | 46 | // Go to the symfony.com website 47 | $crawler = $client->request('GET', 'https://www.symfony.com/blog/'); 48 | 49 | The method returns a ``Crawler`` object 50 | (``Symfony\Component\DomCrawler\Crawler``). 51 | 52 | To use your own HTTP settings, you may create and pass an HttpClient 53 | instance to Goutte. For example, to add a 60 second request timeout: 54 | 55 | .. code-block:: php 56 | 57 | use Goutte\Client; 58 | use Symfony\Component\HttpClient\HttpClient; 59 | 60 | $client = new Client(HttpClient::create(['timeout' => 60])); 61 | 62 | Click on links: 63 | 64 | .. code-block:: php 65 | 66 | // Click on the "Security Advisories" link 67 | $link = $crawler->selectLink('Security Advisories')->link(); 68 | $crawler = $client->click($link); 69 | 70 | Extract data: 71 | 72 | .. code-block:: php 73 | 74 | // Get the latest post in this category and display the titles 75 | $crawler->filter('h2 > a')->each(function ($node) { 76 | print $node->text()."\n"; 77 | }); 78 | 79 | Submit forms: 80 | 81 | .. code-block:: php 82 | 83 | $crawler = $client->request('GET', 'https://github.com/'); 84 | $crawler = $client->click($crawler->selectLink('Sign in')->link()); 85 | $form = $crawler->selectButton('Sign in')->form(); 86 | $crawler = $client->submit($form, ['login' => 'fabpot', 'password' => 'xxxxxx']); 87 | $crawler->filter('.flash-error')->each(function ($node) { 88 | print $node->text()."\n"; 89 | }); 90 | 91 | More Information 92 | ---------------- 93 | 94 | Read the documentation of the `BrowserKit`_, `DomCrawler`_, and `HttpClient`_ 95 | Symfony Components for more information about what you can do with Goutte. 96 | 97 | Pronunciation 98 | ------------- 99 | 100 | Goutte is pronounced ``goot`` i.e. it rhymes with ``boot`` and not ``out``. 101 | 102 | Technical Information 103 | --------------------- 104 | 105 | Goutte is a thin wrapper around the following Symfony Components: 106 | `BrowserKit`_, `CssSelector`_, `DomCrawler`_, and `HttpClient`_. 107 | 108 | License 109 | ------- 110 | 111 | Goutte is licensed under the MIT license. 112 | 113 | .. _`Composer`: https://getcomposer.org 114 | .. _`BrowserKit`: https://symfony.com/components/BrowserKit 115 | .. _`DomCrawler`: https://symfony.com/doc/current/components/dom_crawler.html 116 | .. _`CssSelector`: https://symfony.com/doc/current/components/css_selector.html 117 | .. _`HttpClient`: https://symfony.com/doc/current/components/http_client.html 118 | -------------------------------------------------------------------------------- /box.json: -------------------------------------------------------------------------------- 1 | { 2 | "output": "goutte.phar", 3 | "chmod": "0755", 4 | "compactors": [ 5 | "Herrera\\Box\\Compactor\\Php" 6 | ], 7 | "extract": false, 8 | "files": [ 9 | "LICENSE", 10 | "Goutte/Client.php" 11 | ], 12 | "finder": [ 13 | { 14 | "name": ["*.php", "*.pem*"], 15 | "exclude": ["Tests", "tests"], 16 | "in": "vendor" 17 | } 18 | ], 19 | "stub": "Goutte/Resources/phar-stub.php", 20 | "web": false 21 | } 22 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "fabpot/goutte", 3 | "type": "application", 4 | "description": "A simple PHP Web Scraper", 5 | "keywords": ["scraper"], 6 | "homepage": "https://github.com/FriendsOfPHP/Goutte", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "Fabien Potencier", 11 | "email": "fabien@symfony.com" 12 | } 13 | ], 14 | "require": { 15 | "php": ">=7.1.3", 16 | "symfony/deprecation-contracts": "^2.1|^3", 17 | "symfony/browser-kit": "^4.4|^5.0|^6.0", 18 | "symfony/css-selector": "^4.4|^5.0|^6.0", 19 | "symfony/dom-crawler": "^4.4|^5.0|^6.0", 20 | "symfony/http-client": "^4.4|^5.0|^6.0", 21 | "symfony/mime": "^4.4|^5.0|^6.0" 22 | }, 23 | "require-dev": { 24 | "symfony/phpunit-bridge": "^6.0" 25 | }, 26 | "autoload": { 27 | "psr-4": { "Goutte\\": "Goutte" }, 28 | "exclude-from-classmap": ["Goutte/Tests"] 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 12 | 13 | 14 | ./Goutte/Tests 15 | 16 | 17 | 18 | --------------------------------------------------------------------------------