├── .gitignore
├── Api
├── CrawlerInterface.php
├── Data
│ ├── PageInterface.php
│ └── PageSearchResultInterface.php
└── PageRepositoryInterface.php
├── CHANGELOG.md
├── Console
└── Command
│ ├── Crawler.php
│ └── Flush.php
├── Cron
└── Crawler.php
├── Helper
└── Config.php
├── LICENSE.txt
├── Model
├── Cache
│ └── TypePlugin.php
├── Config
│ └── Source
│ │ └── OnComplete.php
├── Crawler.php
├── Page.php
├── PageLogger.php
├── PageRepository.php
└── ResourceModel
│ ├── Page.php
│ └── Page
│ └── Collection.php
├── Observer
├── FlushCrawler.php
└── PageLoadObserver.php
├── README.md
├── Test
└── Unit
│ └── Model
│ ├── CrawlerTest.php
│ ├── PageLoggerTest.php
│ ├── PageRepositoryTest.php
│ └── PageTest.php
├── composer.json
├── etc
├── adminhtml
│ └── system.xml
├── config.xml
├── cron_groups.xml
├── crontab.xml
├── db_schema.xml
├── db_schema_whitelist.json
├── di.xml
├── events.xml
├── frontend
│ ├── events.xml
│ └── routes.xml
└── module.xml
├── example.gif
└── registration.php
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 |
--------------------------------------------------------------------------------
/Api/CrawlerInterface.php:
--------------------------------------------------------------------------------
1 | crawler = $crawler;
38 |
39 | parent::__construct();
40 | }
41 |
42 | /**
43 | * Console config
44 | */
45 | protected function configure()
46 | {
47 | $this->setName('primer:crawler:run')
48 | ->addOption('batch-size', null, InputOption::VALUE_OPTIONAL, 'Max number of pages to crawl per batch')
49 | ->addOption('max-run-time', null, InputOption::VALUE_OPTIONAL, 'Max time in seconds for the crawler to run before exiting')
50 | ->addOption('sleep-between-batch', null, InputOption::VALUE_OPTIONAL, 'Time in seconds to wait between batches')
51 | ->addOption('sleep-when-empty', null, InputOption::VALUE_OPTIONAL, 'Time in seconds to wait before trying again when no pages were crawled')
52 | ->addOption('crawl-threshold', null, InputOption::VALUE_OPTIONAL, 'Minimum priority for logged page to reach before being crawled')
53 | ->addOption('when-complete', null, InputOption::VALUE_OPTIONAL, 'What to do when all pages have been logged (sleep or stop)')
54 | ->addOption('dump-config', 'd', InputOption::VALUE_OPTIONAL, 'dump run config before running crawler')
55 |
56 | ->setDescription('Initiate primer crawler');
57 | }
58 |
59 | /**
60 | * Main execute function - trigger crawler
61 | *
62 | * @param InputInterface $input
63 | * @param OutputInterface $output
64 | * @return null
65 | */
66 | protected function execute(InputInterface $input, OutputInterface $output)
67 | {
68 | $this->output = $output;
69 | $this->input = $input;
70 | $this->crawler->setOutput($output);
71 |
72 |
73 |
74 | $batchSize = $input->getOption('batch-size');
75 | if ($batchSize) {
76 | $this->crawler->setBatchSize($batchSize);
77 | }
78 |
79 | $maxRunTime = $input->getOption('max-run-time');
80 | if ($maxRunTime) {
81 | $this->crawler->setMaxRunTime($maxRunTime);
82 | }
83 |
84 | $sleepBetweenBatch = $input->getOption('sleep-between-batch');
85 | if ($sleepBetweenBatch) {
86 | $this->crawler->setSleepBetweenBatch($sleepBetweenBatch);
87 | }
88 |
89 | $sleepWhenEmpty = $input->getOption('sleep-when-empty');
90 | if ($sleepWhenEmpty) {
91 | $this->crawler->setSleepWhenEmpty($sleepWhenEmpty);
92 | }
93 |
94 | $crawlThreshold = $input->getOption('crawl-threshold');
95 | if ($crawlThreshold) {
96 | $this->crawler->setCrawlThreshold($crawlThreshold);
97 | }
98 |
99 | $whenComplete= $input->getOption('when-complete');
100 | if ($whenComplete) {
101 | $this->crawler->setWhenComplete($whenComplete);
102 | }
103 |
104 |
105 | $this->showHeader();
106 |
107 | $this->crawler->run();
108 | }
109 |
110 |
111 | protected function showHeader()
112 | {
113 | $this->output->writeln(self::$header);
114 |
115 | if ($this->input->hasParameterOption('--dump-config') || $this->input->hasParameterOption('-d')) {
116 | $this->output->writeln('Crawl Threshold: '.$this->crawler->getCrawlThreshold());
117 | $this->output->writeln('Batch Size: '.$this->crawler->getBatchSize());
118 | $this->output->writeln('Sleep Between Batch: '.$this->crawler->getSleepBetweenBatch());
119 | $this->output->writeln('Sleep when Empty: '.$this->crawler->getSleepWhenEmpty());
120 | $this->output->writeln('Max Run Time: '.$this->crawler->getMaxRunTime());
121 |
122 | $this->output->writeln('');
123 |
124 | }
125 | }
126 | }
127 |
--------------------------------------------------------------------------------
/Console/Command/Flush.php:
--------------------------------------------------------------------------------
1 | pageRepository = $pageRepository;
25 |
26 | parent::__construct();
27 | }
28 |
29 | /**
30 | * Console config
31 | */
32 | protected function configure()
33 | {
34 | $this->setName('primer:flush')
35 | ->setDescription('Invalidate all urls so that crawler will reprime everything');
36 | }
37 |
38 | /**
39 | * Main execute function - invalidate all urls in primer table to force recrawl
40 | *
41 | * @param InputInterface $input
42 | * @param OutputInterface $output
43 | * @return null
44 | */
45 | protected function execute(InputInterface $input, OutputInterface $output)
46 | {
47 | $this->pageRepository->flush();
48 | $output->writeln('crawler pages flushed - all urls will be crawled');
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/Cron/Crawler.php:
--------------------------------------------------------------------------------
1 | crawler = $crawler;
25 | $this->configHelper = $configHelper;
26 | }
27 |
28 | /**
29 | * execute crawler cron
30 | */
31 | public function execute()
32 | {
33 | if ($this->enableOnCron()) {
34 | $this->crawler->setWhenComplete($this->getOnComplete())->run();
35 | }
36 | }
37 |
38 | /**
39 | * get config for whether the crawler should run from cron triggers
40 | * @return mixed
41 | */
42 | protected function enableOnCron()
43 | {
44 | return $this->configHelper->getCronEnabled();
45 | }
46 |
47 | /**
48 | * get config for what cron should do when all pages are crawled
49 | * @return mixed
50 | */
51 | protected function getOnComplete()
52 | {
53 | return 'stop';
54 | return $this->configHelper->getCronOnComplete();
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/Helper/Config.php:
--------------------------------------------------------------------------------
1 | scopeConfig = $scopeConfig;
12 | }
13 |
14 |
15 | public function getSleepBetweenBatch()
16 | {
17 | return $this->scopeConfig->getValue(
18 | 'system/cache_primer/sleep_between_batch',
19 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
20 | );
21 | }
22 |
23 | public function getSleepWhenEmpty()
24 | {
25 | return $this->scopeConfig->getValue(
26 | 'system/cache_primer/sleep_when_empty',
27 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
28 | );
29 | }
30 |
31 | public function getBatchSize()
32 | {
33 | return $this->scopeConfig->getValue(
34 | 'system/cache_primer/batch_size',
35 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
36 | );
37 | }
38 |
39 | public function getCrawlThreshold()
40 | {
41 | return $this->scopeConfig->getValue(
42 | 'system/cache_primer/crawl_threshold',
43 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
44 | );
45 | }
46 |
47 |
48 | public function getCronEnabled()
49 | {
50 | return $this->scopeConfig->getValue(
51 | 'system/cache_primer/cron_enabled',
52 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
53 | );
54 | }
55 |
56 | public function getCronOnComplete()
57 | {
58 | return $this->scopeConfig->getValue(
59 | 'system/cache_primer/cron_on_complete',
60 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
61 | );
62 | }
63 |
64 | public function getCronMaxRuntime()
65 | {
66 | return $this->scopeConfig->getValue(
67 | 'system/cache_primer/cron_max_run_time',
68 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
69 | );
70 | }
71 |
72 | /**
73 | * Should we log page requests to our page log?
74 | *
75 | * @return bool
76 | */
77 | public function loggingEnabled()
78 | {
79 | return $this->scopeConfig->getValue(
80 | 'system/cache_primer/logging_enabled',
81 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
82 | );
83 | }
84 |
85 | /**
86 | * Should we log page requests to our page log?
87 | *
88 | * @return bool
89 | */
90 | public function getLoggingSampleNumber()
91 | {
92 | return $this->scopeConfig->getValue(
93 | 'system/cache_primer/logging_sample_number',
94 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
95 | );
96 | }
97 |
98 | }
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Open Software License ("OSL") v. 3.0
2 |
3 | This Open Software License (the "License") applies to any original work of authorship (the "Original Work") whose owner (the "Licensor") has placed the following licensing notice adjacent to the copyright notice for the Original Work:
4 |
5 | Licensed under the Open Software License version 3.0
6 |
7 | 1. Grant of Copyright License. Licensor grants You a worldwide, royalty-free, non-exclusive, sublicensable license, for the duration of the copyright, to do the following:
8 |
9 | 1. to reproduce the Original Work in copies, either alone or as part of a collective work;
10 |
11 | 2. to translate, adapt, alter, transform, modify, or arrange the Original Work, thereby creating derivative works ("Derivative Works") based upon the Original Work;
12 |
13 | 3. to distribute or communicate copies of the Original Work and Derivative Works to the public, with the proviso that copies of Original Work or Derivative Works that You distribute or communicate shall be licensed under this Open Software License;
14 |
15 | 4. to perform the Original Work publicly; and
16 |
17 | 5. to display the Original Work publicly.
18 |
19 | 2. Grant of Patent License. Licensor grants You a worldwide, royalty-free, non-exclusive, sublicensable license, under patent claims owned or controlled by the Licensor that are embodied in the Original Work as furnished by the Licensor, for the duration of the patents, to make, use, sell, offer for sale, have made, and import the Original Work and Derivative Works.
20 |
21 | 3. Grant of Source Code License. The term "Source Code" means the preferred form of the Original Work for making modifications to it and all available documentation describing how to modify the Original Work. Licensor agrees to provide a machine-readable copy of the Source Code of the Original Work along with each copy of the Original Work that Licensor distributes. Licensor reserves the right to satisfy this obligation by placing a machine-readable copy of the Source Code in an information repository reasonably calculated to permit inexpensive and convenient access by You for as long as Licensor continues to distribute the Original Work.
22 |
23 | 4. Exclusions From License Grant. Neither the names of Licensor, nor the names of any contributors to the Original Work, nor any of their trademarks or service marks, may be used to endorse or promote products derived from this Original Work without express prior permission of the Licensor. Except as expressly stated herein, nothing in this License grants any license to Licensor's trademarks, copyrights, patents, trade secrets or any other intellectual property. No patent license is granted to make, use, sell, offer for sale, have made, or import embodiments of any patent claims other than the licensed claims defined in Section 2. No license is granted to the trademarks of Licensor even if such marks are included in the Original Work. Nothing in this License shall be interpreted to prohibit Licensor from licensing under terms different from this License any Original Work that Licensor otherwise would have a right to license.
24 |
25 | 5. External Deployment. The term "External Deployment" means the use, distribution, or communication of the Original Work or Derivative Works in any way such that the Original Work or Derivative Works may be used by anyone other than You, whether those works are distributed or communicated to those persons or made available as an application intended for use over a network. As an express condition for the grants of license hereunder, You must treat any External Deployment by You of the Original Work or a Derivative Work as a distribution under section 1(c).
26 |
27 | 6. Attribution Rights. You must retain, in the Source Code of any Derivative Works that You create, all copyright, patent, or trademark notices from the Source Code of the Original Work, as well as any notices of licensing and any descriptive text identified therein as an "Attribution Notice." You must cause the Source Code for any Derivative Works that You create to carry a prominent Attribution Notice reasonably calculated to inform recipients that You have modified the Original Work.
28 |
29 | 7. Warranty of Provenance and Disclaimer of Warranty. Licensor warrants that the copyright in and to the Original Work and the patent rights granted herein by Licensor are owned by the Licensor or are sublicensed to You under the terms of this License with the permission of the contributor(s) of those copyrights and patent rights. Except as expressly stated in the immediately preceding sentence, the Original Work is provided under this License on an "AS IS" BASIS and WITHOUT WARRANTY, either express or implied, including, without limitation, the warranties of non-infringement, merchantability or fitness for a particular purpose. THE ENTIRE RISK AS TO THE QUALITY OF THE ORIGINAL WORK IS WITH YOU. This DISCLAIMER OF WARRANTY constitutes an essential part of this License. No license to the Original Work is granted by this License except under this disclaimer.
30 |
31 | 8. Limitation of Liability. Under no circumstances and under no legal theory, whether in tort (including negligence), contract, or otherwise, shall the Licensor be liable to anyone for any indirect, special, incidental, or consequential damages of any character arising as a result of this License or the use of the Original Work including, without limitation, damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses. This limitation of liability shall not apply to the extent applicable law prohibits such limitation.
32 |
33 | 9. Acceptance and Termination. If, at any time, You expressly assented to this License, that assent indicates your clear and irrevocable acceptance of this License and all of its terms and conditions. If You distribute or communicate copies of the Original Work or a Derivative Work, You must make a reasonable effort under the circumstances to obtain the express assent of recipients to the terms of this License. This License conditions your rights to undertake the activities listed in Section 1, including your right to create Derivative Works based upon the Original Work, and doing so without honoring these terms and conditions is prohibited by copyright law and international treaty. Nothing in this License is intended to affect copyright exceptions and limitations (including 'fair use' or 'fair dealing'). This License shall terminate immediately and You may no longer exercise any of the rights granted to You by this License upon your failure to honor the conditions in Section 1(c).
34 |
35 | 10. Termination for Patent Action. This License shall terminate automatically and You may no longer exercise any of the rights granted to You by this License as of the date You commence an action, including a cross-claim or counterclaim, against Licensor or any licensee alleging that the Original Work infringes a patent. This termination provision shall not apply for an action alleging patent infringement by combinations of the Original Work with other software or hardware.
36 |
37 | 11. Jurisdiction, Venue and Governing Law. Any action or suit relating to this License may be brought only in the courts of a jurisdiction wherein the Licensor resides or in which Licensor conducts its primary business, and under the laws of that jurisdiction excluding its conflict-of-law provisions. The application of the United Nations Convention on Contracts for the International Sale of Goods is expressly excluded. Any use of the Original Work outside the scope of this License or after its termination shall be subject to the requirements and penalties of copyright or patent law in the appropriate jurisdiction. This section shall survive the termination of this License.
38 |
39 | 12. Attorneys' Fees. In any action to enforce the terms of this License or seeking damages relating thereto, the prevailing party shall be entitled to recover its costs and expenses, including, without limitation, reasonable attorneys' fees and costs incurred in connection with such action, including any appeal of such action. This section shall survive the termination of this License.
40 |
41 | 13. Miscellaneous. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable.
42 |
43 | 14. Definition of "You" in This License. "You" throughout this License, whether in upper or lower case, means an individual or a legal entity exercising rights under, and complying with all of the terms of, this License. For legal entities, "You" includes any entity that controls, is controlled by, or is under common control with you. For purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
44 |
45 | 15. Right to Use. You may use the Original Work in all ways not otherwise restricted or conditioned by this License or by law, and Licensor promises not to interfere with or be responsible for such uses by You.
46 |
47 | 16. Modification of This License. This License is Copyright (C) 2005 Lawrence Rosen. Permission is granted to copy, distribute, or communicate this License without modification. Nothing in this License permits You to modify this License as applied to the Original Work or to Derivative Works. However, You may modify the text of this License and copy, distribute or communicate your modified version (the "Modified License") and apply it to other original works of authorship subject to the following conditions: (i) You may not indicate in any way that your Modified License is the "Open Software License" or "OSL" and you may not use those names in the name of your Modified License; (ii) You must replace the notice specified in the first paragraph above with the notice "Licensed under " or with a notice of your own that is not confusingly similar to the notice in this License; and (iii) You may not claim that your original works are open source software unless your Modified License has been approved by Open Source Initiative (OSI) and You comply with its license review and certification process.
48 |
--------------------------------------------------------------------------------
/Model/Cache/TypePlugin.php:
--------------------------------------------------------------------------------
1 | pageRepository = $pageRepository;
23 | $this->logger = $logger;
24 | }
25 |
26 | /**
27 | * @param \Magento\PageCache\Model\Cache\Type $subject
28 | * @param callable $proceed
29 | * @param $mode
30 | * @param $tags
31 | */
32 | public function aroundClean(
33 | \Magento\PageCache\Model\Cache\Type $subject,
34 | callable $proceed,
35 | $mode = \Zend_Cache::CLEANING_MODE_ALL,
36 | array $tags = []
37 | ) {
38 | $proceed($mode, $tags);
39 |
40 | if ($mode === \Zend_Cache::CLEANING_MODE_ALL) {
41 |
42 | try {
43 | $this->pageRepository->flush();
44 | } catch(\Exception $e) {
45 | $this->logger->error('cannot flush primer urls with error: ' . $e->getMessage());
46 | }
47 |
48 | }
49 | // else {
50 | // /** @todo cache tags required on page log to clear by tag
51 | // if (count($tags)) {
52 | //
53 | // }
54 | // }
55 |
56 | return $proceed();
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/Model/Config/Source/OnComplete.php:
--------------------------------------------------------------------------------
1 | CrawlerInterface::WHEN_COMPLETE_SLEEP, 'label' => __('Sleep')],
17 | ['value' => CrawlerInterface::WHEN_COMPLETE_STOP, 'label' => __('Stop')],
18 | ];
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/Model/Crawler.php:
--------------------------------------------------------------------------------
1 | pageRepository = $pageRepository;
54 | $this->storeManager = $storeManager;
55 | $this->objectManager = $objectManager;
56 | $this->scopeConfig = $scopeConfig;
57 | $this->cacheManager = $cacheManager;
58 | $this->configHelper = $configHelper;
59 | }
60 |
61 | /**
62 | * Main run method
63 | */
64 | public function run()
65 | {
66 | if (!$this->cacheEnabled()) {
67 | $this->writeln('Not running crawler as full page cache is disabled');
68 | return;
69 | }
70 |
71 | $starttime = time();
72 |
73 | while (true) {
74 | $this->getNextBatch();
75 |
76 | // if we have no items in queue either stop process or sleep depending on config
77 | if (count($this->queue) < 1) {
78 | if ($this->getWhenComplete() != self::WHEN_COMPLETE_SLEEP) {
79 | $this->writeln('No pages in queue - exiting');
80 | return;
81 | }
82 |
83 | $this->writeln('No pages in queue - waiting '.$this->getSleepWhenEmpty().' seconds');
84 | sleep($this->getSleepWhenEmpty()); // @codingStandardsIgnoreLine
85 | continue;
86 | }
87 |
88 | $this->writeln('Crawling '.count($this->queue).' pages');
89 |
90 | if ($this->shouldPurge()) {
91 | $this->purge();
92 | }
93 |
94 | $this->prime();
95 |
96 | //stop crawler after max run time elapsed
97 | $runtime = time() - $starttime;
98 | if ($this->getMaxRunTime() > 0 && $runtime > $this->getMaxRunTime()) {
99 | $this->writeln('Max runtime elapsed - exiting');
100 | return;
101 | }
102 |
103 | sleep($this->getSleepBetweenBatch()); // @codingStandardsIgnoreLine
104 | }
105 | }
106 |
107 | /**
108 | * Send PURGE request all urls in queue
109 | */
110 | private function purge()
111 | {
112 | $promises = [];
113 |
114 | foreach ($this->queue as $page) {
115 | $url = $page->getStoreUrl();
116 |
117 | $sendtime = microtime(true);
118 |
119 | $request = new Request('PURGE', $url);
120 |
121 | $promises[] = $this->getClient()->sendAsync($request)->then(
122 | function (Response $response) use ($page, $sendtime, $request) {
123 | $responsetime = microtime(true);
124 | $this->writeln('PURGE '.$page->getPath() .''.$response->getStatusCode().', '.number_format (( $responsetime - $sendtime ), 2).'s');
125 | },
126 | function (RequestException $e) use ($page) {
127 | $this->writeln('PURGE '.$page->getPath() .' FAILED
128 | '.$e->getMessage().'');
129 | }
130 | );
131 | }
132 |
133 | \GuzzleHttp\Promise\all($promises)->wait();
134 | }
135 |
136 | /**
137 | * Send GET request all urls in queue
138 | */
139 | private function prime()
140 | {
141 | $promises = [];
142 |
143 | foreach ($this->queue as $page) {
144 | $url = $page->getStoreUrl();
145 |
146 | $options = [];
147 |
148 | if ($page->getMagentoVary() != null) {
149 | $options['cookies'] = CookieJar::fromArray([
150 | 'X-Magento-Vary' => $page->getMagentoVary()
151 | ], $page->getCookieDomain());
152 | }
153 |
154 | $sendtime = microtime(true);
155 |
156 | $request = new Request('GET', $url);
157 |
158 | $promises[] = $this->getClient()->sendAsync($request, $options)->then(
159 |
160 | function (Response $response) use ($page, $sendtime, $request) {
161 |
162 | $responsetime = microtime(true);
163 |
164 | $this->writeln(
165 | 'GET '.$page->getPath() .' '.$page->getMagentoVary().''.$response->getStatusCode().', '.number_format (( $responsetime - $sendtime ), 2).'s'
166 | );
167 | $page->setStatus(1);
168 | $this->pageRepository->save($page);
169 | }
170 | )->otherwise(function (\Exception $e) use ($page, $sendtime, $request) {
171 | $this->writeln(
172 | ''.$e->getMessage().''
173 | );
174 | $priority = $page->getPriority();
175 | $page->setPriority($priority-1);
176 | $page->setStatus(1);
177 | $this->pageRepository->save($page);
178 | });
179 |
180 | }
181 |
182 | \GuzzleHttp\Promise\all($promises)->wait();
183 | }
184 |
185 | /**
186 | * Update queue with next batch of urls to process
187 | *
188 | * @throws \Magento\Framework\Exception\LocalizedException
189 | */
190 | private function getNextBatch()
191 | {
192 |
193 | $statusFilter = $this->objectManager->create('Magento\Framework\Api\Filter');
194 | $statusFilter->setData('field', 'status');
195 | $statusFilter->setData('value', 0);
196 | $statusFilter->setData('condition_type', 'eq');
197 |
198 | $statusFilterGroup = $this->objectManager->create('Magento\Framework\Api\Search\FilterGroup');
199 | $statusFilterGroup->setData('filters', [$statusFilter]);
200 |
201 |
202 | $priorityFilter = $this->objectManager->create('Magento\Framework\Api\Filter');
203 | $priorityFilter->setData('field', 'priority');
204 | $priorityFilter->setData('value', $this->getCrawlThreshold());
205 | $priorityFilter->setData('condition_type', 'gteq');
206 |
207 | $priorityFilterGroup = $this->objectManager->create('Magento\Framework\Api\Search\FilterGroup');
208 | $priorityFilterGroup->setData('filters', [$priorityFilter]);
209 |
210 |
211 | $sortOrder = $this->objectManager->create('Magento\Framework\Api\SortOrder');
212 | $sortOrders = [
213 | $sortOrder->setField('priority')->setDirection(\Magento\Framework\Api\SortOrder::SORT_DESC)
214 | ];
215 |
216 | /** @var \Magento\Framework\Api\SearchCriteriaInterface $search_criteria */
217 | $search_criteria = $this->objectManager->create('Magento\Framework\Api\SearchCriteriaInterface');
218 | $search_criteria ->setFilterGroups([$statusFilterGroup, $priorityFilterGroup])
219 | ->setPageSize($this->getBatchSize())
220 | ->setCurrentPage(1)
221 | ->setSortOrders();
222 |
223 | $search_criteria->setSortOrders($sortOrders);
224 |
225 | $this->queue = $this->pageRepository->getList($search_criteria);
226 | }
227 |
228 | private function writeln($message, $options = null)
229 | {
230 | if ($this->output) {
231 | $this->output->writeln($message, $options);
232 | }
233 | }
234 |
235 | /**
236 | * Should we send a purge request? There is no point sending it if we don't have varnish
237 | *
238 | * @return bool
239 | */
240 | private function shouldPurge()
241 | {
242 | // we only need to send a purge request if varnish is enabled
243 | if ($this->scopeConfig->getValue(
244 | 'system/full_page_cache/caching_application',
245 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE
246 | ) == 2) {
247 | return false; // disabled for now as need to either purge by tag or update vcl @todo add config
248 | return true;
249 | }
250 | return false;
251 | }
252 |
253 | /**
254 | * Is the full page cache enabled
255 | *
256 | * @return bool
257 | */
258 | private function cacheEnabled()
259 | {
260 | foreach ($this->cacheManager->getStatus() as $cache => $status) {
261 | if ($cache == 'full_page') {
262 | return $status;
263 | }
264 | }
265 | return false;
266 | }
267 |
268 | /**
269 | * @param \Symfony\Component\Console\Output\OutputInterface $output
270 | * @return $this
271 | */
272 | public function setOutput(\Symfony\Component\Console\Output\OutputInterface $output)
273 | {
274 | $this->output = $output;
275 | return $this;
276 | }
277 |
278 | /**
279 | * @return mixed
280 | * @throws \Exception
281 | */
282 | public function getWhenComplete()
283 | {
284 | if (null === $this->whenComplete) {
285 | $this->setWhenComplete(self::WHEN_COMPLETE_SLEEP);
286 | }
287 | return $this->whenComplete;
288 | }
289 |
290 | /**
291 | * @param $whenComplete
292 | * @return $this
293 | * @throws \Exception
294 | */
295 | public function setWhenComplete($whenComplete)
296 | {
297 | if (!in_array($whenComplete, [self::WHEN_COMPLETE_SLEEP, self::WHEN_COMPLETE_STOP])) {
298 | throw new \Exception('Invalid Action');
299 | }
300 |
301 | $this->whenComplete = $whenComplete;
302 | return $this;
303 | }
304 |
305 | /**
306 | * @return int
307 | */
308 | public function getSleepBetweenBatch()
309 | {
310 | if (null === $this->sleepBetweenBatch) {
311 | $this->setSleepBetweenBatch($this->configHelper->getSleepBetweenBatch());
312 | }
313 |
314 | return $this->sleepBetweenBatch;
315 | }
316 |
317 | /**
318 | * @param $sleepBetweenBatch
319 | * @return $this
320 | */
321 | public function setSleepBetweenBatch($sleepBetweenBatch)
322 | {
323 | $this->sleepBetweenBatch = $sleepBetweenBatch;
324 | return $this;
325 | }
326 |
327 | /**
328 | * @return int
329 | */
330 | public function getSleepWhenEmpty()
331 | {
332 | if (null === $this->sleepWhenEmpty) {
333 | $this->setSleepWhenEmpty($this->configHelper->getSleepWhenEmpty());
334 | }
335 |
336 | return $this->sleepWhenEmpty;
337 | }
338 |
339 | /**
340 | * @param $sleepWhenEmpty
341 | * @return $this
342 | */
343 | public function setSleepWhenEmpty($sleepWhenEmpty)
344 | {
345 | $this->sleepWhenEmpty = $sleepWhenEmpty;
346 | return $this;
347 | }
348 |
349 | /**
350 | * @return int
351 | */
352 | public function getBatchSize()
353 | {
354 | if (null === $this->batchSize) {
355 | $this->setBatchSize($this->configHelper->getBatchSize());
356 | }
357 |
358 | return $this->batchSize;
359 | }
360 |
361 | /**
362 | * @param $batchSize
363 | * @return $this
364 | */
365 | public function setBatchSize($batchSize)
366 | {
367 | $this->batchSize = $batchSize;
368 | return $this;
369 | }
370 |
371 | /**
372 | * @return int
373 | */
374 | public function getMaxRunTime()
375 | {
376 | return $this->maxRunTime;
377 | }
378 |
379 | /**
380 | * @param $maxRunTime
381 | * @return $this
382 | */
383 | public function setMaxRunTime($maxRunTime)
384 | {
385 | $this->maxRunTime = $maxRunTime;
386 | return $this;
387 | }
388 |
389 | /**
390 | * @return int
391 | */
392 | public function getCrawlThreshold()
393 | {
394 | if (null === $this->crawlThreshold) {
395 | $this->setCrawlThreshold($this->configHelper->getCrawlThreshold());
396 | }
397 |
398 | return $this->crawlThreshold;
399 | }
400 |
401 | /**
402 | * @param $crawlThreshold
403 | * @return $this
404 | */
405 | public function setCrawlThreshold($crawlThreshold)
406 | {
407 | $this->crawlThreshold = $crawlThreshold;
408 | return $this;
409 | }
410 |
411 | protected function getClient()
412 | {
413 | if (!$this->client) {
414 | $this->client = new Client([
415 | 'verify' => false,
416 | 'headers' => [
417 | 'User-Agent' => 'Magento Primer Crawler',
418 | ],
419 | ]);
420 | }
421 | return $this->client;
422 | }
423 | }
424 |
--------------------------------------------------------------------------------
/Model/Page.php:
--------------------------------------------------------------------------------
1 | storeManager = $storeManager;
24 | $this->scopeConfig = $scopeConfig;
25 |
26 | parent::__construct($context, $registry, $resource, $resourceCollection, $data);
27 | }
28 |
29 | protected function _construct()
30 | {
31 | $this->_init('EightWire\Primer\Model\ResourceModel\Page');
32 | }
33 |
34 | public function getPageId()
35 | {
36 | return $this->getData(self::PAGE_ID);
37 | }
38 |
39 | public function setPageId($pageId)
40 | {
41 | $this->setData(self::PAGE_ID, $pageId);
42 | return $this;
43 | }
44 |
45 | public function getPath()
46 | {
47 | return $this->getData(self::PATH);
48 | }
49 |
50 | public function setPath($path)
51 | {
52 | $this->setData(self::PATH, $path);
53 | return $this;
54 | }
55 |
56 | public function getStatus()
57 | {
58 | return $this->getData(self::STATUS);
59 | }
60 |
61 | public function setStatus($status)
62 | {
63 | $this->setData(self::STATUS, $status);
64 | return $this;
65 | }
66 |
67 | public function getPriority()
68 | {
69 | return $this->getData(self::PRIORITY);
70 | }
71 |
72 | public function setPriority($priority)
73 | {
74 | $this->setData(self::PRIORITY, $priority);
75 | return $this;
76 | }
77 |
78 | public function getCreatedAt()
79 | {
80 | return $this->getData(self::CREATED_AT);
81 | }
82 |
83 | public function setCreatedAt($createdAt)
84 | {
85 | $this->setData(self::CREATED_AT, $createdAt);
86 | return $this;
87 | }
88 |
89 | public function getUpdatedAt()
90 | {
91 | return $this->getData(self::UPDATED_AT);
92 | }
93 |
94 | public function setUpdatedAt($updatedAt)
95 | {
96 | $this->setData(self::UPDATED_AT, $updatedAt);
97 | return $this;
98 | }
99 |
100 | public function getStoreId()
101 | {
102 | return $this->getData(self::STORE_ID);
103 | }
104 |
105 | public function setStoreId($storeId)
106 | {
107 | $this->setData(self::STORE_ID, $storeId);
108 | return $this;
109 | }
110 |
111 | public function incrementPriority()
112 | {
113 | $priority = $this->getPriority();
114 | $priority++;
115 | $this->setPriority($priority);
116 | }
117 |
118 | public function getStoreUrl()
119 | {
120 | $basePath = rtrim($this->storeManager->getStore($this->getStoreId())->getBaseUrl(), "/");
121 | return $basePath.$this->getPath();
122 | }
123 |
124 | public function getCookieDomain()
125 | {
126 | $code = $this->storeManager->getStore($this->getStoreId())->getCode();
127 |
128 | return $this->scopeConfig->getValue(
129 | 'web/cookie/cookie_domain',
130 | \Magento\Store\Model\ScopeInterface::SCOPE_STORE,
131 | $code
132 | );
133 | }
134 |
135 | public function getMagentoVary()
136 | {
137 | return $this->getData(self::MAGENTO_VARY);
138 | }
139 |
140 | public function setMagentoVary($value)
141 | {
142 | $this->setData(self::MAGENTO_VARY, $value);
143 | return $this;
144 | }
145 | }
146 |
--------------------------------------------------------------------------------
/Model/PageLogger.php:
--------------------------------------------------------------------------------
1 | pageRepository = $pageRepository;
22 | $this->storeManager = $storeManager;
23 | $this->objectManager = $objectManager;
24 | $this->config = $configHelper;
25 | }
26 |
27 | public function log(\Magento\Framework\App\Request\Http $request, \Magento\Framework\App\Response\Http $response)
28 | {
29 | /**
30 | * Things not to log
31 | *
32 | * Blacklisted User Agents - don't record pages being crawled by bots etc as they will skew results
33 | * Non Cached pages - whats the point crawling a page if its not going to be cached (how can I find this out reliably?)
34 | * Non whitelisted actions - we explicitly define which actions should be primed, if not defined we don't need to log it
35 | * Non 200 responses - don't want to be crawling pages that 301 redirect
36 | * Requests other than GET - as a crawler can't replicate them without all the data and they are likely user specifc anyway
37 | * URLS with obvious tracking parameters - will likely be unique per visitor e.g mailchimp etc
38 | * Apply Sample so only 1 in 10 log for example
39 | */
40 | if ($this->config->loggingEnabled()
41 | && $this->shouldLogRequest($request)
42 | && $this->shouldLogResponse($response)
43 | && $this->inSample()
44 | ) {
45 | $result = $this->matchRequest($request);
46 | $storeId = $this->storeManager->getStore()->getId();
47 |
48 | if ($result->getTotalCount()) {
49 | $page = $result->getFirstItem();
50 | $page->incrementPriority();
51 | $page->setUpdatedAt(time());
52 | } else {
53 | $page = $this->pageRepository->create();
54 | $page->setPath($this->getPath($request));
55 | $page->setMagentoVary($request->getCookie('X-Magento-Vary'));
56 | $page->setStoreId($storeId);
57 | $page->setStatus(1);
58 | $page->setPriority(1);
59 | $page->setUpdatedAt(time());
60 |
61 | /**
62 | * would be good if we could store cache tags here so that we can invalidate crawler by tag however
63 | * X-Magento-Tags is unset on the header in Magento\Framework\App\PageCache\Kernel::process
64 | */
65 | }
66 | $this->pageRepository->save($page);
67 | }
68 | }
69 |
70 | /**
71 | * Check a request object to see if we should log the page
72 | *
73 | * @param \Magento\Framework\App\Request\Http $request
74 | * @return bool
75 | */
76 | private function shouldLogRequest(\Magento\Framework\App\Request\Http $request)
77 | {
78 | // this happens when returning a cached page
79 | if ($request->getFullActionName() == null) {
80 | return false;
81 | }
82 |
83 | // only log GET requests as thats all we crawl
84 | if ($request->getMethod() != "GET") {
85 | return false;
86 | }
87 |
88 | // @todo get these from configuration xml and further db config
89 | $blacklistAgents = [
90 | '/^Magento Primer Crawler$/',
91 | '/Googlebot/',
92 | '/UptimeRobot/'
93 | ];
94 |
95 | foreach ($blacklistAgents as $regex) {
96 | if (preg_match($regex, $request->getHeader('User-Agent'))) {
97 | return false;
98 | }
99 | }
100 |
101 | // @todo get these from configuration xml and further db config
102 | $blacklistParams = [
103 | 'mc_id',
104 | 'mc_eid',
105 | 'SID',
106 | 'utm_source',
107 | 'utm_campaign',
108 | 'utm_medium',
109 | 'utm_term',
110 | 'fbclid',
111 | 'gclid',
112 | 'emailoffers' //advintage only - to be removed and added through configuration
113 | ];
114 |
115 | foreach (array_keys($request->getParams()) as $parameterName) {
116 | if (in_array($parameterName, $blacklistParams)) {
117 | return false;
118 | }
119 | }
120 |
121 |
122 | // can't do this for now as cached pages don't have an action name
123 | // need to work out how to get this or only log non cached pages
124 | // performing logging only on non cached pages would solve this, its kind of a requirement with varnish anyway
125 | // this would mean changing the event we fire this on
126 | if (!$this->actionIsWhitelisted($request->getFullActionName())) {
127 | return false;
128 | }
129 |
130 | return true;
131 | }
132 |
133 |
134 | /**
135 | * Check a response object to see if we should log the page
136 | *
137 | * @param \Magento\Framework\App\Response\Http $response
138 | * @return bool
139 | */
140 | private function shouldLogResponse(\Magento\Framework\App\Response\Http $response)
141 | {
142 | if ($response->getHttpResponseCode() != 200) {
143 | return false;
144 | }
145 |
146 | return true;
147 | }
148 |
149 | /**
150 | * We don't need to log every single page to get an accurate measure of what pages are most popular
151 | * the purpose of this function is to only trigger recording the url on a configurable sample of pages
152 | *
153 | * e.g 1 in every 10 page views
154 | *
155 | * @return bool
156 | */
157 | private function inSample()
158 | {
159 | $sample = (int) $this->config->getLoggingSampleNumber();
160 |
161 | if ($sample <= 1) {
162 | return true;
163 | }
164 |
165 | return (rand(1, $sample) === 1);
166 | }
167 |
168 | /**
169 | * Get path for logging, returns request string from request object or / if the request string is blank
170 | * @param $request
171 | * @return string
172 | */
173 | private function getPath($request)
174 | {
175 | return $request->getRequestString()?:'/';
176 | }
177 |
178 |
179 | /**
180 | * Check for existing logs matching current request
181 | *
182 | * @param $request
183 | * @return \EightWire\Primer\Api\Data\PageSearchResultsInterface
184 | * @throws \Magento\Framework\Exception\LocalizedException
185 | */
186 | public function matchRequest($request)
187 | {
188 | $storeId = $this->storeManager->getStore()->getId();
189 | $path = $this->getPath($request);
190 |
191 | $pathFilter = $this->objectManager->create('Magento\Framework\Api\Filter');
192 | $pathFilter->setData('field', 'path');
193 | $pathFilter->setData('value', $path);
194 |
195 | $pathFilterGroup = $this->objectManager->create('Magento\Framework\Api\Search\FilterGroup');
196 | $pathFilterGroup->setData('filters', [$pathFilter]);
197 |
198 | $storeFilter = $this->objectManager->create('Magento\Framework\Api\Filter');
199 | $storeFilter->setData('field', 'store_id');
200 | $storeFilter->setData('value', $storeId);
201 |
202 | $storeFilterGroup = $this->objectManager->create('Magento\Framework\Api\Search\FilterGroup');
203 | $storeFilterGroup->setData('filters', [$storeFilter]);
204 |
205 | $varyFilter = $this->objectManager->create('Magento\Framework\Api\Filter');
206 | $varyFilter->setData('field', 'magento_vary');
207 |
208 | if ($request->getCookie('X-Magento-Vary')) {
209 | $varyFilter->setData('value', $request->getCookie('X-Magento-Vary'));
210 |
211 | } else {
212 | $varyFilter->setData('condition_type', 'null');
213 | }
214 |
215 | $storeFilterGroup = $this->objectManager->create('Magento\Framework\Api\Search\FilterGroup');
216 | $storeFilterGroup->setData('filters', [$varyFilter]);
217 |
218 | $search_criteria = $this->objectManager->create('Magento\Framework\Api\SearchCriteriaInterface');
219 | $search_criteria->setFilterGroups([$pathFilterGroup, $storeFilterGroup]);
220 |
221 | $result = $this->pageRepository->getList($search_criteria);
222 |
223 | return $result;
224 | }
225 |
226 | /**
227 | * Check getFullActionName of controller to see if it has been explicitly whitelisted
228 | *
229 | * @param $action
230 | * @return bool
231 | */
232 | private function actionIsWhitelisted($action)
233 | {
234 | /**
235 | * full action names to log
236 | *
237 | * @todo move to xml config so other modules can provide more actions
238 | */
239 | $controllerWhitelist = [
240 | 'cms_index_index',
241 | 'cms_page_view',
242 | 'catalog_product_view',
243 | 'catalog_category_view'
244 | ];
245 |
246 | return in_array($action, $controllerWhitelist);
247 | }
248 | }
249 |
--------------------------------------------------------------------------------
/Model/PageRepository.php:
--------------------------------------------------------------------------------
1 | pageFactory = $pageFactory;
45 | $this->searchResultFactory = $searchResultFactory;
46 | $this->collectionProcessor = $collectionProcessor;
47 | $this->searchCriteriaInterfaceFactory = $searchCriteriaInterfaceFactory;
48 | }
49 |
50 | /**
51 | * Loads a specified page
52 | *
53 | * @param int $pageId The page ID.
54 | * @return \EightWire\Primer\Api\Data\PageInterface Page Interface
55 | * @throws InputException
56 | * @throws NoSuchEntityException
57 | */
58 | public function get($pageId)
59 | {
60 | if (!$pageId) {
61 | throw new InputException(__('Id required'));
62 | }
63 | if (!isset($this->registry[$pageId])) {
64 | /** @var \EightWire\Primer\Api\Data\PageInterface $entity */
65 | $entity = $this->pageFactory->create()->load($pageId);
66 |
67 | if (!$entity->getEntityId()) {
68 | throw new NoSuchEntityException(__('Requested entity doesn\'t exist'));
69 | }
70 | $this->registry[$pageId] = $entity;
71 | }
72 |
73 | return $this->registry[$pageId];
74 | }
75 |
76 | /**
77 | * Create page instance
78 |
79 | * @return Page
80 | */
81 | public function create()
82 | {
83 | return $this->pageFactory->create();
84 | }
85 |
86 | /**
87 | * Lists pages that match specified search criteria.
88 | *
89 | * @param \Magento\Framework\Api\SearchCriteriaInterface $searchCriteria
90 | * @return \EightWire\Primer\Api\Data\PageSearchResultsInterface
91 | */
92 | public function getList(\Magento\Framework\Api\SearchCriteriaInterface $searchCriteria)
93 | {
94 | /** @var \EightWire\Primer\Model\ResourceModel\Page\Collection $searchResult */
95 | $searchResult = $this->searchResultFactory->create();
96 | $this->collectionProcessor->process($searchCriteria, $searchResult);
97 | $searchResult->setSearchCriteria($searchCriteria);
98 | return $searchResult;
99 | }
100 |
101 | /**
102 | * Deletes a specified page.
103 |
104 | * @param \EightWire\Primer\Api\Data\PageInterface $entity
105 | * @return bool
106 | * @throws CouldNotDeleteException
107 | */
108 | public function delete(\EightWire\Primer\Api\Data\PageInterface $entity)
109 | {
110 | try {
111 | $entity->delete($entity);
112 | unset($this->registry[$entity->getEntityId()]);
113 | } catch (\Exception $e) {
114 | throw new CouldNotDeleteException(__('Could not delete page'), $e);
115 | }
116 | return true;
117 | }
118 |
119 |
120 | /**
121 | * Deletes a specified page by ID
122 | *
123 | * @param int $pageId
124 | * @return bool|void
125 | * @throws CouldNotDeleteException
126 | * @throws InputException
127 | */
128 | public function deleteById($pageId)
129 | {
130 |
131 | if (!$pageId) {
132 | throw new InputException(__('Id required'));
133 | }
134 | if (!isset($this->registry[$pageId])) {
135 | /** @var \EightWire\Primer\Api\Data\PageInterface $entity */
136 | $entity = $this->pageFactory->create()->load($pageId);
137 | $this->registry[$pageId] = $entity;
138 | }
139 |
140 | $this->delete($this->registry[$pageId]);
141 | }
142 |
143 |
144 | /**
145 | * Performs persist operations for a specified credit memo.
146 | *
147 | * @param \EightWire\Primer\Api\Data\PageInterface $entity the page.
148 | * @return \EightWire\Primer\Api\Data\PageInterface page interface.
149 | * @throws CouldNotSaveException
150 | */
151 | public function save(\EightWire\Primer\Api\Data\PageInterface $entity)
152 | {
153 | try {
154 | $entity->save($entity);
155 | $this->registry[$entity->getEntityId()] = $entity;
156 | } catch (\Exception $e) {
157 | throw new CouldNotSaveException(__('Could not save page'), $e);
158 | }
159 | return $this->registry[$entity->getEntityId()];
160 | }
161 |
162 |
163 | /**
164 | * Flush all pages within a collection so they can be crawled again
165 | *
166 | * @param null $collection
167 | */
168 | public function flush($collection = null)
169 | {
170 | if ($collection == null) {
171 | $searchCriteria = $this->searchCriteriaInterfaceFactory->create();
172 | $collection = $this->getList($searchCriteria);
173 | }
174 |
175 | $collection->flushStatus();
176 | }
177 | }
178 |
--------------------------------------------------------------------------------
/Model/ResourceModel/Page.php:
--------------------------------------------------------------------------------
1 | _init('eightwire_primer_page', 'page_id');
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/Model/ResourceModel/Page/Collection.php:
--------------------------------------------------------------------------------
1 | _init('EightWire\Primer\Model\Page', 'EightWire\Primer\Model\ResourceModel\Page');
23 | }
24 |
25 | /**
26 | * Set items list.
27 | *
28 | * @param \Magento\Framework\Api\ExtensibleDataInterface[] $items
29 | * @return $this
30 | */
31 |
32 | /**
33 | * @param array|null $items
34 | * @return $this
35 | * @throws \Exception
36 | */
37 | public function setItems(array $items = null)
38 | {
39 | if (!$items) {
40 | return $this;
41 | }
42 | foreach ($items as $item) {
43 | $this->addItem($item);
44 | }
45 | return $this;
46 | }
47 |
48 | /**
49 | * Get search criteria.
50 | *
51 | * @return \Magento\Framework\Api\SearchCriteriaInterface|null
52 | */
53 | public function getSearchCriteria()
54 | {
55 | return $this->searchCriteria;
56 | }
57 |
58 | /**
59 | * Set search criteria.
60 | *
61 | * @param \Magento\Framework\Api\SearchCriteriaInterface $searchCriteria
62 | * @return $this
63 | * @SuppressWarnings(PHPMD.UnusedFormalParameter)
64 | */
65 | public function setSearchCriteria(\Magento\Framework\Api\SearchCriteriaInterface $searchCriteria = null)
66 | {
67 | $this->searchCriteria = $searchCriteria;
68 | return $this;
69 | }
70 |
71 | /**
72 | * Get total count.
73 | *
74 | * @return int
75 | */
76 | public function getTotalCount()
77 | {
78 | return $this->getSize();
79 | }
80 |
81 | /**
82 | * Set total count.
83 | *
84 | * @param int $totalCount
85 | * @return $this
86 | * @SuppressWarnings(PHPMD.UnusedFormalParameter)
87 | */
88 | public function setTotalCount($totalCount)
89 | {
90 | return $this;
91 | }
92 |
93 | /**
94 | * Bulk reset status column
95 | *
96 | * @throws \Magento\Framework\Exception\LocalizedException
97 | */
98 | public function flushStatus()
99 | {
100 | $this->getConnection()->update(
101 | $this->getResource()->getMainTable(),
102 | ['status' => 0],
103 | $this->getResource()->getIdFieldName() . ' IN(' . implode(',', $this->getAllIds()) . ')'
104 | );
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/Observer/FlushCrawler.php:
--------------------------------------------------------------------------------
1 | pageRepository = $pageRepository;
30 | $this->logger = $loggerInterface;
31 | }
32 |
33 | /**
34 | * Flash crawler page urls
35 | *
36 | * @param \Magento\Framework\Event\Observer $observer
37 | * @return void
38 | */
39 | public function execute(\Magento\Framework\Event\Observer $observer)
40 | {
41 | // die('here');
42 | // $this->logger->info('flushing primer cache - '.$observer->getEventName());
43 | // $this->pageRepository->flush();
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/Observer/PageLoadObserver.php:
--------------------------------------------------------------------------------
1 | pageLogger = $pageLogger;
20 | }
21 |
22 | /**
23 | * This is the method that fires when the event runs.
24 | *
25 | * @param Observer $observer
26 | */
27 | public function execute(Observer $observer)
28 | {
29 | try {
30 | $this->pageLogger->log($observer->getEvent()->getRequest(), $observer->getEvent()->getResponse());
31 | } catch (\Exception $e) {
32 | // fail silently if logging fails as we don't want to break page loads
33 | return;
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://packagist.org/packages/eightwire/magento2-module-primer)
2 | [](https://www.codacy.com/app/andrewkett/magento2-module-primer?utm_source=github.com&utm_medium=referral&utm_content=8WireDigital/magento2-module-primer&utm_campaign=Badge_Grade)
3 | [](https://packagist.org/packages/eightwire/magento2-module-primer)
4 |
5 |
6 | # Magento 2 Cache Primer
7 |
8 | A full page cache priming tool for Magento 2
9 |
10 | Requests to whitelisted actions are logged to a history table with a higher priority given to pages that are viewed most frequently.
11 | A console and cron task is provided to initiate the crawler and prime pages in the queue from highest to lowest priority.
12 | Supports multiple store views and X-Magento-Vary cookies.
13 |
14 |
15 | 
16 |
17 |
18 | ## Usage
19 |
20 | ```
21 | php bin/magento primer:crawler:run # Run crawler task
22 | php bin/magento primer:flush # Flush urls to force a recrawl
23 | ```
24 |
25 | Provided by [8 Wire Digital](https://www.8wiredigital.co.nz/)
26 |
--------------------------------------------------------------------------------
/Test/Unit/Model/CrawlerTest.php:
--------------------------------------------------------------------------------
1 | crawler = $objectManager->getObject('EightWire\Primer\Model\Crawler');
18 | }
19 |
20 | public function testInvalidSetWhenComplete()
21 | {
22 | $this->expectException('\Exception');
23 | $this->crawler->setWhenComplete('invalid string');
24 | }
25 |
26 | public function testValidSetWhenComplete()
27 | {
28 | $this->assertInstanceOf(Crawler::class, $this->crawler->setWhenComplete(Crawler::WHEN_COMPLETE_STOP));
29 | }
30 |
31 |
32 |
33 |
34 | }
--------------------------------------------------------------------------------
/Test/Unit/Model/PageLoggerTest.php:
--------------------------------------------------------------------------------
1 | pageLogger = $objectManager->getObject('EightWire\Primer\Model\PageLogger');
18 | }
19 |
20 | public function test()
21 | {
22 | $this->assertTrue(true);
23 | }
24 | }
--------------------------------------------------------------------------------
/Test/Unit/Model/PageRepositoryTest.php:
--------------------------------------------------------------------------------
1 | pageRepository = $objectManager->getObject('EightWire\Primer\Model\PageRepository');
18 | }
19 |
20 | public function test()
21 | {
22 | $this->assertTrue(true);
23 | }
24 | }
--------------------------------------------------------------------------------
/Test/Unit/Model/PageTest.php:
--------------------------------------------------------------------------------
1 | page = $objectManager->getObject('EightWire\Primer\Model\Page');
18 | }
19 |
20 | public function testIncrementPriority()
21 | {
22 | $this->page->setPriority(1);
23 | $this->page->incrementPriority();
24 |
25 | $this->assertEquals(2, $this->page->getPriority());
26 | }
27 | }
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "eightwire/magento2-module-primer",
3 | "description": "A cache primer extension for Magento 2",
4 | "type": "magento2-module",
5 | "homepage": "https://github.com/8WireDigital/magento2-module-primer",
6 | "license": [
7 | "OSL-3.0",
8 | "AFL-3.0"
9 | ],
10 | "authors":[
11 | {
12 | "name":"Andrew Kett",
13 | "email":"andrew@8wiredigital.co.nz"
14 | }
15 | ],
16 | "require": {
17 | "guzzlehttp/guzzle": "^6.0"
18 | },
19 | "autoload": {
20 | "files": [
21 | "registration.php"
22 | ],
23 | "psr-4": {
24 | "EightWire\\Primer\\": ""
25 | }
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/etc/adminhtml/system.xml:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 | Number of urls to crawl in a batch
17 |
18 |
19 |
20 |
21 | After batch of urls are crawled wait x seconds
22 |
23 |
24 |
25 |
26 | When there are no urls left to crawl wait for x seconds
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 | A page must be logged this many times before it is crawled
36 |
37 |
38 |
39 |
40 |
41 | Magento\Config\Model\Config\Source\Enabledisable
42 | Should the crawler run through Magento's cron system?
43 |
44 |
45 |
46 | Cron will terminate after running longer than this time (seconds)
47 |
48 | 1
49 |
50 |
51 |
52 |
53 | EightWire\Primer\Model\Config\Source\OnComplete
54 | When all urls have been crawled either "stop" or "sleep" and wait for more urls
55 |
56 | 1
57 |
58 |
59 |
60 |
61 |
62 | Magento\Config\Model\Config\Source\Enabledisable
63 | Should page requests be logged to primer page database
64 |
65 |
66 |
67 |
68 | If > 1 pages will only be logged on every x requests, e.g if 10 1 in every 10 page requests will be logged
69 |
70 | 1
71 |
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/etc/config.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | 2
7 | 10
8 | 5
9 | 5
10 | 0
11 | stop
12 | 240
13 | 1
14 | 1
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/etc/cron_groups.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | 1
5 | 10
6 | 2
7 | 10
8 | 60
9 | 600
10 | 1
11 |
12 |
--------------------------------------------------------------------------------
/etc/crontab.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | */5 * * * *
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/etc/db_schema.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |