├── .php_cs ├── .scrutinizer.yml ├── LICENSE ├── README.md ├── composer.json └── src ├── Command └── HttpCacheWarmupCommand.php ├── DependencyInjection ├── Compiler │ └── UrlProviderCompilerPass.php ├── Configuration.php └── ZenstruckCacheExtension.php ├── Resources └── config │ ├── services.xml │ └── sitemap_provider.xml ├── Url ├── Crawler.php ├── SitemapUrlProvider.php └── UrlProvider.php └── ZenstruckCacheBundle.php /.php_cs: -------------------------------------------------------------------------------- 1 | getUrls()); 99 | } 100 | } 101 | ``` 102 | 103 | 2. Register the class as a service tagged with `zenstruck_cache.url_provider`: 104 | 105 | ```yaml 106 | my_url_provider: 107 | class: Acme\MyUrlProvider 108 | tags: 109 | - { name: zenstruck_cache.url_provider } 110 | ``` 111 | 112 | ## Full Default Config 113 | 114 | ```yaml 115 | zenstruck_cache: 116 | # Either a class or a service that implements Http\Client\HttpClient. 117 | http_client: ~ # Required 118 | 119 | # Either a class or a service that implements Http\Message\MessageFactory. 120 | message_factory: ~ # Required 121 | 122 | sitemap_provider: 123 | enabled: false 124 | sitemaps: [] 125 | ``` 126 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "zenstruck/cache-bundle", 3 | "description": "Provides a httpcache warmup command for Symfony2", 4 | "keywords": ["cache", "caching", "httpcache"], 5 | "homepage": "http://zenstruck.com/project/ZenstruckCacheBundle", 6 | "type": "symfony-bundle", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "Kevin Bond", 11 | "email": "kevinbond@gmail.com" 12 | } 13 | ], 14 | "require": { 15 | "symfony/framework-bundle": "^2.5|^3.0", 16 | "symfony/console": "^2.5|^3.0", 17 | "php-http/httplug": "^1.0", 18 | "php-http/message-factory": "^1.0", 19 | "php-http/client-implementation": "^1.0" 20 | }, 21 | "require-dev": { 22 | "symfony/dom-crawler": "^2.5|^3.0", 23 | "symfony/css-selector": "^2.5|^3.0", 24 | "matthiasnoback/symfony-dependency-injection-test": "^0.7.4", 25 | "php-http/guzzle5-adapter": "^0.4@dev", 26 | "guzzlehttp/psr7": "^1.0", 27 | "sllh/php-cs-fixer-styleci-bridge": "^1.4" 28 | }, 29 | "autoload": { 30 | "psr-4": { "Zenstruck\\CacheBundle\\": "src/" } 31 | }, 32 | "autoload-dev": { 33 | "psr-4": { "Zenstruck\\CacheBundle\\Tests\\": "tests/" } 34 | }, 35 | "suggest": { 36 | "dpn/xml-sitemap-bundle": "Create a sitemap with Symfony", 37 | "guzzlehttp/psr7": "To use and auto discover GuzzleMessageFactory", 38 | "php-http/guzzle5-adapter": "To use and auto discover Guzzle5HttpAdapter", 39 | "php-http/guzzle6-adapter": "To use and auto discover Guzzle6HttpAdapter" 40 | }, 41 | "extra": { 42 | "branch-alias": { 43 | "dev-master": "3.x-dev" 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/Command/HttpCacheWarmupCommand.php: -------------------------------------------------------------------------------- 1 | 17 | */ 18 | class HttpCacheWarmupCommand extends ContainerAwareCommand 19 | { 20 | /** 21 | * {@inheritdoc} 22 | */ 23 | protected function configure() 24 | { 25 | $this 26 | ->setName('zenstruck:http-cache:warmup') 27 | ->setDescription('Warms up an http cache'); 28 | } 29 | 30 | /** 31 | * {@inheritdoc} 32 | */ 33 | protected function execute(InputInterface $input, OutputInterface $output) 34 | { 35 | /** @var Crawler $crawler */ 36 | $crawler = $this->getContainer()->get('zenstruck_cache.crawler'); 37 | $summary = []; 38 | $total = count($crawler); 39 | $progress = new ProgressBar($output, $total); 40 | 41 | if (0 === $total) { 42 | throw new \RuntimeException('No URL providers registered.'); 43 | } 44 | 45 | $output->writeln("\nBeginning http cache warmup."); 46 | $progress->start(); 47 | 48 | $callback = function (ResponseInterface $response) use (&$summary, $progress) { 49 | $status = $response->getStatusCode(); 50 | 51 | $progress->advance(); 52 | 53 | if (!array_key_exists($status, $summary)) { 54 | $summary[$status] = 1; 55 | 56 | return; 57 | } 58 | 59 | ++$summary[$status]; 60 | }; 61 | 62 | $crawler->crawl($callback); 63 | 64 | $progress->finish(); 65 | $output->writeln("\n"); 66 | 67 | ksort($summary); 68 | 69 | $output->writeln('Summary:'); 70 | 71 | $table = new Table($output); 72 | 73 | $table->setHeaders(['Code', 'Reason', 'Count']); 74 | 75 | foreach ($summary as $code => $count) { 76 | $table->addRow([$code, Response::$statusTexts[$code], $count]); 77 | } 78 | 79 | $table->addRow(new TableSeparator()); 80 | $table->addRow(['', 'Total', $total]); 81 | 82 | $table->render(); 83 | $output->writeln(''); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/DependencyInjection/Compiler/UrlProviderCompilerPass.php: -------------------------------------------------------------------------------- 1 | 11 | */ 12 | class UrlProviderCompilerPass implements CompilerPassInterface 13 | { 14 | /** 15 | * {@inheritdoc} 16 | */ 17 | public function process(ContainerBuilder $container) 18 | { 19 | if (!$container->hasDefinition('zenstruck_cache.crawler')) { 20 | return; 21 | } 22 | 23 | $definition = $container->getDefinition('zenstruck_cache.crawler'); 24 | $taggedServices = $container->findTaggedServiceIds('zenstruck_cache.url_provider'); 25 | 26 | foreach ($taggedServices as $id => $attributes) { 27 | $definition->addMethodCall('addUrlProvider', [new Reference($id)]); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/DependencyInjection/Configuration.php: -------------------------------------------------------------------------------- 1 | 10 | */ 11 | class Configuration implements ConfigurationInterface 12 | { 13 | public function getConfigTreeBuilder() 14 | { 15 | $treeBuilder = new TreeBuilder(); 16 | $rootNode = $treeBuilder->root('zenstruck_cache'); 17 | 18 | $rootNode 19 | ->children() 20 | ->scalarNode('http_client') 21 | ->info('Either a class or a service that implements Http\Client\HttpClient.') 22 | ->isRequired() 23 | ->end() 24 | ->scalarNode('message_factory') 25 | ->info('Either a class or a service that implements Http\Message\MessageFactory.') 26 | ->isRequired() 27 | ->end() 28 | ->arrayNode('sitemap_provider') 29 | ->canBeEnabled() 30 | ->children() 31 | ->arrayNode('sitemaps') 32 | ->requiresAtLeastOneElement() 33 | ->prototype('scalar')->end() 34 | ->end() 35 | ->end() 36 | ->end() 37 | ->end() 38 | ; 39 | 40 | return $treeBuilder; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/DependencyInjection/ZenstruckCacheExtension.php: -------------------------------------------------------------------------------- 1 | 14 | */ 15 | class ZenstruckCacheExtension extends ConfigurableExtension 16 | { 17 | /** 18 | * {@inheritdoc} 19 | */ 20 | protected function loadInternal(array $mergedConfig, ContainerBuilder $container) 21 | { 22 | $loader = new Loader\XmlFileLoader($container, new FileLocator(__DIR__.'/../Resources/config')); 23 | $loader->load('services.xml'); 24 | 25 | $this->configureHttpClient($mergedConfig['http_client'], $container); 26 | $this->configureMessageFactory($mergedConfig['message_factory'], $container); 27 | 28 | if ($mergedConfig['sitemap_provider']['enabled']) { 29 | $container->setParameter('zenstruck_cache.sitemap_provider.sitemaps', $mergedConfig['sitemap_provider']['sitemaps']); 30 | $loader->load('sitemap_provider.xml'); 31 | } 32 | } 33 | 34 | /** 35 | * @param string $httpClient 36 | * @param ContainerBuilder $container 37 | */ 38 | private function configureHttpClient($httpClient, ContainerBuilder $container) 39 | { 40 | if (!class_exists($httpClient)) { 41 | // is a service 42 | $container->setAlias('zenstruck_cache.http_client', $httpClient); 43 | 44 | return; 45 | } 46 | 47 | $r = new \ReflectionClass($httpClient); 48 | 49 | if (!$r->implementsInterface('Http\Client\HttpClient')) { 50 | throw new InvalidConfigurationException('HttpClient class must implement "Http\Client\HttpClient".'); 51 | } 52 | 53 | if ($r->isAbstract()) { 54 | throw new InvalidConfigurationException('HttpClient class must not be abstract.'); 55 | } 56 | 57 | if (null !== $r->getConstructor() && 0 !== $r->getConstructor()->getNumberOfRequiredParameters()) { 58 | throw new InvalidConfigurationException('HttpClient class must not have required constructor arguments.'); 59 | } 60 | 61 | $httpClient = new Definition($httpClient); 62 | $httpClient->setPublic(false); 63 | $container->setDefinition('zenstruck_cache.http_client', $httpClient); 64 | } 65 | 66 | /** 67 | * @param string $messageFactory 68 | * @param ContainerBuilder $container 69 | */ 70 | private function configureMessageFactory($messageFactory, ContainerBuilder $container) 71 | { 72 | if (!class_exists($messageFactory)) { 73 | // is a service 74 | $container->setAlias('zenstruck_cache.message_factory', $messageFactory); 75 | 76 | return; 77 | } 78 | 79 | $r = new \ReflectionClass($messageFactory); 80 | 81 | if (!$r->implementsInterface('Http\Message\MessageFactory')) { 82 | throw new InvalidConfigurationException('MessageFactory class must implement "Http\Message\MessageFactory".'); 83 | } 84 | 85 | if ($r->isAbstract()) { 86 | throw new InvalidConfigurationException('MessageFactory class must not be abstract.'); 87 | } 88 | 89 | if (null !== $r->getConstructor() && 0 !== $r->getConstructor()->getNumberOfRequiredParameters()) { 90 | throw new InvalidConfigurationException('MessageFactory class must not have required constructor arguments.'); 91 | } 92 | 93 | $messageFactory = new Definition($messageFactory); 94 | $messageFactory->setPublic(false); 95 | $container->setDefinition('zenstruck_cache.message_factory', $messageFactory); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /src/Resources/config/services.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | Zenstruck\CacheBundle\Url\Crawler 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/Resources/config/sitemap_provider.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | Zenstruck\CacheBundle\Url\SitemapUrlProvider 9 | 10 | 11 | 12 | 13 | %zenstruck_cache.sitemap_provider.sitemaps% 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/Url/Crawler.php: -------------------------------------------------------------------------------- 1 | 13 | */ 14 | class Crawler implements \Countable 15 | { 16 | private $httpClient; 17 | private $messageFactory; 18 | private $logger; 19 | private $urlProviders; 20 | 21 | /** 22 | * @param HttpClient $httpClient 23 | * @param MessageFactory $messageFactory 24 | * @param LoggerInterface $logger 25 | * @param UrlProvider[] $urlProviders 26 | */ 27 | public function __construct(HttpClient $httpClient, MessageFactory $messageFactory, LoggerInterface $logger = null, array $urlProviders = []) 28 | { 29 | $this->httpClient = $httpClient; 30 | $this->messageFactory = $messageFactory; 31 | $this->logger = $logger; 32 | $this->urlProviders = $urlProviders; 33 | } 34 | 35 | /** 36 | * @param UrlProvider $provider 37 | */ 38 | public function addUrlProvider(UrlProvider $provider) 39 | { 40 | $this->urlProviders[] = $provider; 41 | } 42 | 43 | /** 44 | * {@inheritdoc} 45 | */ 46 | public function count() 47 | { 48 | $count = 0; 49 | 50 | foreach ($this->urlProviders as $provider) { 51 | $count += count($provider); 52 | } 53 | 54 | return $count; 55 | } 56 | 57 | /** 58 | * @param callable $callback Response as first argument, calling URL as second 59 | */ 60 | public function crawl(callable $callback = null) 61 | { 62 | foreach ($this->getUrls() as $url) { 63 | $response = $this->httpClient->sendRequest($this->messageFactory->createRequest('GET', $url)); 64 | 65 | $this->log($response, $url); 66 | 67 | if ($callback) { 68 | $callback($response, $url); 69 | } 70 | } 71 | } 72 | 73 | /** 74 | * @param ResponseInterface $response 75 | * @param string $url 76 | */ 77 | private function log(ResponseInterface $response, $url) 78 | { 79 | if (null === $this->logger) { 80 | return; 81 | } 82 | 83 | $status = $response->getStatusCode(); 84 | 85 | $this->logger->log($status == 200 ? LogLevel::DEBUG : LogLevel::NOTICE, sprintf('[%s] %s', $status, $url)); 86 | } 87 | 88 | /** 89 | * @return array 90 | */ 91 | private function getUrls() 92 | { 93 | $urls = []; 94 | 95 | foreach ($this->urlProviders as $provider) { 96 | $urls = array_merge($urls, $provider->getUrls()); 97 | } 98 | 99 | return $urls; 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/Url/SitemapUrlProvider.php: -------------------------------------------------------------------------------- 1 | 11 | */ 12 | class SitemapUrlProvider implements UrlProvider 13 | { 14 | private $sitemaps; 15 | private $httpClient; 16 | private $messageFactory; 17 | private $urls; 18 | 19 | /** 20 | * @param array $sitemaps 21 | * @param HttpClient $httpClient 22 | * @param MessageFactory $messageFactory 23 | */ 24 | public function __construct(array $sitemaps, HttpClient $httpClient, MessageFactory $messageFactory) 25 | { 26 | if (!class_exists('Symfony\\Component\\DomCrawler\\Crawler')) { 27 | throw new \RuntimeException('symfony/dom-crawler and symfony/css-selector must be installed to use SitemapUrlProvider.'); 28 | } 29 | $this->sitemaps = $sitemaps; 30 | $this->httpClient = $httpClient; 31 | $this->messageFactory = $messageFactory; 32 | } 33 | 34 | /** 35 | * {@inheritdoc} 36 | */ 37 | public function count() 38 | { 39 | return count($this->getUrls()); 40 | } 41 | 42 | /** 43 | * {@inheritdoc} 44 | */ 45 | public function getUrls() 46 | { 47 | if (null !== $this->urls) { 48 | return $this->urls; 49 | } 50 | 51 | $urls = []; 52 | 53 | foreach ($this->sitemaps as $sitemap) { 54 | $urls = array_merge($urls, $this->getUrlsForSitemapUrl($sitemap)); 55 | } 56 | 57 | return $this->urls = $urls; 58 | } 59 | 60 | /** 61 | * @param string $sitemap 62 | * 63 | * @return array 64 | */ 65 | private function getUrlsForSitemapUrl($sitemap) 66 | { 67 | $path = parse_url($sitemap, PHP_URL_PATH); 68 | 69 | if (null === $path || '/' === trim($path)) { 70 | return $this->tryDefaultSitemapUrls($sitemap); 71 | } 72 | 73 | return $this->parseUrl($sitemap); 74 | } 75 | 76 | /** 77 | * @param string $host 78 | * 79 | * @return array 80 | */ 81 | private function tryDefaultSitemapUrls($host) 82 | { 83 | // try default sitemap_index.xml 84 | $urls = $this->parseUrl($this->addPathToHost('sitemap_index.xml', $host)); 85 | 86 | if (empty($urls)) { 87 | // try default sitemap.xml 88 | $urls = $this->parseUrl($this->addPathToHost('sitemap.xml', $host)); 89 | } 90 | 91 | return $urls; 92 | } 93 | 94 | /** 95 | * @param string $url 96 | * 97 | * @return array 98 | */ 99 | private function parseUrl($url) 100 | { 101 | $response = $this->httpClient->sendRequest($this->messageFactory->createRequest('GET', $url)); 102 | 103 | if (200 !== $response->getStatusCode()) { 104 | return []; 105 | } 106 | 107 | $body = (string) $response->getBody(); 108 | 109 | if (false !== strpos($body, 'parseSitemapIndex($body); 111 | } 112 | 113 | return $this->getLocEntries($body); 114 | } 115 | 116 | /** 117 | * @param string $body 118 | * 119 | * @return array 120 | */ 121 | private function parseSitemapIndex($body) 122 | { 123 | $urls = []; 124 | 125 | foreach ($this->getLocEntries($body) as $entry) { 126 | $urls = array_merge($urls, $this->getUrlsForSitemapUrl($entry)); 127 | } 128 | 129 | return $urls; 130 | } 131 | 132 | /** 133 | * @param string $body 134 | * 135 | * @return array 136 | */ 137 | private function getLocEntries($body) 138 | { 139 | $crawler = new DomCrawler($body); 140 | $entries = []; 141 | $filter = 'loc'; 142 | 143 | // check for namespaces 144 | if (preg_match('/xmlns:/', $body)) { 145 | $filter = 'default|loc'; 146 | } 147 | 148 | foreach ($crawler->filter($filter) as $node) { 149 | $entries[] = $node->nodeValue; 150 | } 151 | 152 | return $entries; 153 | } 154 | 155 | /** 156 | * @param string $path 157 | * @param string $host 158 | * 159 | * @return string 160 | */ 161 | private function addPathToHost($path, $host) 162 | { 163 | return sprintf('%s/%s', trim($host, '/'), ltrim($path, '/')); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/Url/UrlProvider.php: -------------------------------------------------------------------------------- 1 | 7 | */ 8 | interface UrlProvider extends \Countable 9 | { 10 | /** 11 | * @return array 12 | */ 13 | public function getUrls(); 14 | } 15 | -------------------------------------------------------------------------------- /src/ZenstruckCacheBundle.php: -------------------------------------------------------------------------------- 1 | 11 | */ 12 | class ZenstruckCacheBundle extends Bundle 13 | { 14 | /** 15 | * {@inheritdoc} 16 | */ 17 | public function build(ContainerBuilder $container) 18 | { 19 | parent::build($container); 20 | 21 | $container->addCompilerPass(new UrlProviderCompilerPass()); 22 | } 23 | } 24 | --------------------------------------------------------------------------------