├── .gitignore
├── CHANGELOG.md
├── src
├── registration.php
├── etc
│ ├── module.xml
│ ├── frontend
│ │ └── di.xml
│ ├── acl.xml
│ └── adminhtml
│ │ └── system.xml
├── Model
│ ├── Service
│ │ ├── WhitelistService.php
│ │ ├── LogService.php
│ │ └── BlacklistService.php
│ └── Logger.php
├── Plugin
│ └── Magento
│ │ └── Framework
│ │ └── Session
│ │ └── SessionManagerPlugin.php
└── Helper
│ └── Config.php
├── LICENSE.md
├── composer.json
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea
2 | /vendor
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to `friends-of-hyva/magento2-crawler-session` will be documented in this file.
4 |
5 | ## 1.0.0 - 2024-03-15
6 |
7 | - initial release
8 |
--------------------------------------------------------------------------------
/src/registration.php:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/src/etc/frontend/di.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/src/Model/Service/WhitelistService.php:
--------------------------------------------------------------------------------
1 | config->getWhitelist())) {
19 | return true;
20 | }
21 |
22 | return false;
23 | }
24 | }
--------------------------------------------------------------------------------
/src/Model/Service/LogService.php:
--------------------------------------------------------------------------------
1 | alreadyLogged) {
21 | $this->logger->debug($userAgent);
22 | $this->alreadyLogged = true;
23 | }
24 | }
25 | }
--------------------------------------------------------------------------------
/src/etc/acl.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/src/Model/Logger.php:
--------------------------------------------------------------------------------
1 | initHandlers());
15 | }
16 |
17 | private function initHandlers(): array
18 | {
19 | return [
20 | (new StreamHandler(BP . '/var/log/crawler-session.log'))
21 | ->setFormatter(new LineFormatter("%datetime%: %message%\n", "Y-m-d H:i:s"))
22 | ];
23 | }
24 | }
--------------------------------------------------------------------------------
/src/Model/Service/BlacklistService.php:
--------------------------------------------------------------------------------
1 | config->getBlacklist())
21 | || $this->crawlerDetect->isCrawler($userAgent)
22 | ) {
23 | return true;
24 | }
25 |
26 | return false;
27 | }
28 | }
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Marcus Venghaus
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "friends-of-hyva/magento2-crawler-session",
3 | "description": "Prevent crawlers from creating a session",
4 | "type": "magento2-module",
5 | "keywords": [
6 | "magento",
7 | "magento2",
8 | "crawler",
9 | "bot",
10 | "session",
11 | "prevent",
12 | "block",
13 | "disable"
14 | ],
15 | "homepage": "https://github.com/friends-of-hyva/magento2-crawler-session",
16 | "support": {
17 | "issues": "https://github.com/friends-of-hyva/magento2-crawler-session/issues",
18 | "source": "https://github.com/friends-of-hyva/magento2-crawler-session"
19 | },
20 | "license": "MIT",
21 | "authors": [
22 | {
23 | "name": "Marcus Venghaus",
24 | "email": "marcus.venghaus@inklammern.de",
25 | "role": "Developer"
26 | }
27 | ],
28 | "require": {
29 | "php": "^8.1",
30 | "magento/framework": "*",
31 | "jaybizzle/crawler-detect": "^1.2"
32 | },
33 | "autoload": {
34 | "files": [
35 | "src/registration.php"
36 | ],
37 | "psr-4": {
38 | "FriendsOfHyva\\CrawlerSession\\": "src"
39 | }
40 | },
41 | "config": {
42 | "sort-packages": true
43 | },
44 | "minimum-stability": "stable",
45 | "prefer-stable": true
46 | }
47 |
--------------------------------------------------------------------------------
/src/Plugin/Magento/Framework/Session/SessionManagerPlugin.php:
--------------------------------------------------------------------------------
1 | httpRequest->getServer('HTTP_USER_AGENT', '');
28 |
29 | if ($this->config->isEnabled() &&
30 | !$this->whitelistService->isWhitelisted($userAgent) &&
31 | $this->blacklistService->isBlacklisted($userAgent)
32 | ) {
33 | if ($this->config->isLogEnabled()) {
34 | $this->logService->log($userAgent);
35 | }
36 |
37 | return $subject;
38 | }
39 |
40 | return $proceed();
41 | }
42 | }
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Magento 2 - Crawler Session
2 |
3 | ## Prevent crawlers from creating a session!
4 |
5 | It's insane how many requests are coming from crawlers & bots nowadays. Every request creates an unnecessary session.
6 | If you block the SEO tools via robots.txt there are still plenty of search engines left. 100 requests per minute is not unusal.
7 | This leads to a lot of sessions which could affect some limits like Redis "max_concurrency".
8 |
9 | This module prevents the initiation of a session if a crawler is detected. The detection is based on:
10 |
11 | https://github.com/JayBizzle/Crawler-Detect
12 |
13 | ## Installation
14 |
15 | Install the package via composer:
16 |
17 | ```bash
18 | composer require friends-of-hyva/magento2-crawler-session
19 |
20 | php bin/magento setup:upgrade
21 | ```
22 |
23 | # Usage
24 |
25 | After installation, you need to enable the module:
26 |
27 | ```
28 | Stores > Configuration > General > Web > Prevent Crawler Session
29 | ```
30 |
31 | ## Configuration
32 |
33 | ### Enabled
34 |
35 | If enabled, the detected crawler will no longer create a session.
36 |
37 | ### Additional Blacklist
38 |
39 | If the provided list from crawlerdetect.io is not enough, you can define your own custom user agents here.
40 |
41 | ### Whitelist
42 |
43 | For situations where you need to allow a user agent that is blacklisted, you can do so here.
44 |
45 | ### Log
46 |
47 | If enabled all blocked user agents are logged to ```var/log/crawler-session.log```.
48 | But this is only intended for temporary debugging purposes. The filesize can get big very fast!
--------------------------------------------------------------------------------
/src/Helper/Config.php:
--------------------------------------------------------------------------------
1 | scopeConfig->isSetFlag(self::XML_PATH_ENABLED, ScopeInterface::SCOPE_STORE, $storeId);
20 | }
21 |
22 | public function isLogEnabled(?int $storeId = null): bool
23 | {
24 | return $this->scopeConfig->isSetFlag(self::XML_PATH_LOG_DEBUG, ScopeInterface::SCOPE_STORE, $storeId);
25 | }
26 |
27 | public function getBlacklist(?int $storeId = null): array
28 | {
29 | return $this->getList(self::XML_PATH_BLACKLIST, $storeId);
30 | }
31 |
32 | public function getWhitelist(?int $storeId = null): array
33 | {
34 | return $this->getList(self::XML_PATH_WHITELIST, $storeId);
35 | }
36 |
37 | private function getList(string $path, ?int $storeId): array
38 | {
39 | $value = (string)$this->scopeConfig->getValue($path, ScopeInterface::SCOPE_STORE, $storeId);
40 |
41 | $list = [];
42 | foreach (explode("\n", $value) as $line) {
43 | $line = trim($line);
44 | if (empty($line)) {
45 | continue;
46 | }
47 |
48 | $list[] = $line;
49 | }
50 |
51 | return $list;
52 | }
53 | }
--------------------------------------------------------------------------------
/src/etc/adminhtml/system.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | Magento\Config\Model\Config\Source\Yesno
10 |
11 |
12 |
13 | The default User-Agents are coming from crawlerdetect.io package. Here you can define your own custom ones. One User-Agent per line.
14 |
15 |
16 |
17 | One User-Agent per line.
18 |
19 |
20 |
21 | Magento\Config\Model\Config\Source\Yesno
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------