├── .gitignore
├── .scrutinizer.yml
├── .travis.yml
├── LICENSE
├── README.md
├── composer.json
├── phpcs.xml
├── phpdoc.dist.xml
├── phpunit.xml.dist
├── src
├── BotInterface.php
├── Bots
│ ├── AbstractBot.php
│ ├── AbstractTaggedBot.php
│ ├── Alexa.php
│ ├── Baidu.php
│ ├── BaseBot.php
│ ├── BeeBot.php
│ ├── Bing.php
│ ├── DiscoBot.php
│ ├── EmptyBot.php
│ ├── ExaBot.php
│ ├── Facebook.php
│ ├── Google.php
│ ├── MailRU.php
│ ├── Seznam.php
│ ├── Sogou.php
│ ├── Soso.php
│ ├── Visitor.php
│ ├── WordPress.php
│ ├── Yahoo.php
│ └── Yandex.php
├── Detector.php
└── NotAKnownBotException.php
└── test
└── units
├── Bots
├── AbstractBotTest.php
├── AbstractTaggedBotTest.php
├── AlexaTest.php
├── BaiduTest.php
├── BaseBotTest.php
├── BeeBotTest.php
├── BingTest.php
├── DiscoBotTest.php
├── EmptyBotTest.php
├── ExaBotTest.php
├── FacebookTest.php
├── GoogleTest.php
├── MailRUTest.php
├── SeznamTest.php
├── SogouTest.php
├── SosoTest.php
├── VisitorTest.php
├── WordPressTest.php
├── YahooTest.php
└── YandexTest.php
├── DetectorTest.php
└── bootstrap.php
/.gitignore:
--------------------------------------------------------------------------------
1 | /vendor/
2 | /report/
3 | /bin/
4 | /composer.lock
5 |
--------------------------------------------------------------------------------
/.scrutinizer.yml:
--------------------------------------------------------------------------------
1 | ## Scrutinizer quality insurance configuration
2 | tools:
3 | external_code_coverage: true
4 | php_mess_detector:
5 | config:
6 | unused_code_rules:
7 | unused_local_variable: true
8 | unused_private_method: true
9 | unused_formal_parameter: true
10 | controversial_rules:
11 | camel_case_class_name: true
12 | camel_case_property_name: true
13 | camel_case_method_name: true
14 | php_cs_fixer: true
15 | php_pdepend: true
16 | php_sim: true
17 | php_changetracking: true
18 | php_analyzer: true
19 | sensiolabs_security_checker: true
20 |
21 | filter:
22 | excluded_paths:
23 | - vendor/*
24 | - test/public/*
25 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: php
3 | php:
4 | - 7.0
5 | - 5.6
6 | - 5.5
7 | - 5.4
8 | install:
9 | - composer install --no-interaction --prefer-source
10 | - composer install:phpunit
11 | script: bin/phpunit --coverage-clover=coverage.clover
12 | after_script:
13 | - wget https://scrutinizer-ci.com/ocular.phar
14 | - php ocular.phar code-coverage:upload --format=php-clover coverage.clover
15 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | bee4/useragent-classifier
2 | ======================
3 |
4 | [](https://travis-ci.org/bee4/useragent-classifier)
5 | [](https://scrutinizer-ci.com/g/bee4/useragent-classifier/?branch=develop)
6 | [](https://scrutinizer-ci.com/g/bee4/useragent-classifier/)
7 | [](https://insight.sensiolabs.com/projects/3f165beb-2425-4669-a3da-c1794c6f7337)
8 |
9 | [](https://packagist.org/packages/bee4/useragent-classifier)
10 |
11 | This library allow to track and detect who is behind useragents :)
12 |
13 |
14 | Installing
15 | ----------
16 | [](https://packagist.org/packages/bee4/useragent-classifier)
17 | [](https://packagist.org/packages/bee4/useragent-classifier)
18 |
19 | This project can be installed using Composer. Add the following to your composer.json:
20 |
21 | ```JSON
22 | {
23 | "require": {
24 | "bee4/useragent-classifier": "~1.0"
25 | }
26 | }
27 | ```
28 |
29 | or run this command:
30 |
31 | ```Shell
32 | composer require bee4/useragent-classifier:~1.0
33 | ```
34 |
35 | Usage
36 | -----
37 |
38 | This library is composed of a `Detector` object and different `Bots` implementation.
39 |
40 | ```php
41 | use Bee4\UserAgent\Classifier\Detector;
42 |
43 | $ua = 'Mozilla/5.0 (compatible; Mail.RU/2.0)';
44 | $bot = Detector::whoIs($ua); //$bot is a Bots\MailRU instance
45 |
46 | $bot->getBot(); //Here we get `mailru`
47 | $bot->getName(); //Here we get `mailru-bot`
48 |
49 | $ua = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)';
50 | $bot = Detector::whoIs($ua); //$bot is a Bots\Google instance
51 |
52 | $bot->getBot(); //Here we get `google`
53 | $bot->getName(); //Here we get `google-bot`
54 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "bee4/useragent-classifier",
3 | "description": "A library to work with UserAgent detection and classification",
4 | "license": "Apache-2.0",
5 | "authors": [
6 | {
7 | "name": "Stephane HULARD",
8 | "email": "s.hulard@chstudio.fr"
9 | }
10 | ],
11 | "require": {
12 | "php": ">=5.4",
13 | "ext-pcre": "*"
14 | },
15 | "autoload": {
16 | "psr-4": {
17 | "Bee4\\UserAgent\\Classifier\\": "src"
18 | }
19 | },
20 | "scripts": {
21 | "install:phpunit": [
22 | "mkdir -p bin && php -r \"file_put_contents('bin/phpunit', file_get_contents('https://phar.phpunit.de/phpunit-4.7.7.phar'));\"",
23 | "chmod 755 bin/phpunit"
24 | ],
25 | "install:phpcodesniffer": [
26 | "mkdir -p bin && php -r \"file_put_contents('bin/phpcs', file_get_contents('https://squizlabs.github.io/PHP_CodeSniffer/phpcs.phar'));\"",
27 | "php -r \"file_put_contents('bin/phpcbf', file_get_contents('https://squizlabs.github.io/PHP_CodeSniffer/phpcbf.phar'));\"",
28 | "chmod 755 bin/phpcs bin/phpcbf"
29 | ]
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/phpcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | Specific BeeBOT standard
4 |
5 | */vendor/*
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/phpdoc.dist.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | bee4/useragent-classify
4 |
5 | doc/phpdoc
6 |
7 | php
8 |
9 |
10 |
11 | - TODO
12 | - FIXME
13 | - IDEA
14 |
15 |
16 |
17 | doc/phpdoc
18 |
19 |
20 |
21 |
22 |
23 | src
24 |
25 |
26 | crit
27 |
28 | doc/phpdoc/default.log
29 | doc/phpdoc/errors.log
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
12 |
13 |
14 | test/units
15 |
16 |
17 |
18 |
19 | src
20 |
21 |
22 |
23 |
24 |
25 |
26 |
--------------------------------------------------------------------------------
/src/BotInterface.php:
--------------------------------------------------------------------------------
1 | name = $name;
34 | }
35 |
36 | /**
37 | * Name property accessor
38 | * @return String
39 | */
40 | public function getName()
41 | {
42 | return $this->name;
43 | }
44 |
45 | /**
46 | * Retrieve global bot name
47 | * @return String
48 | */
49 | public static function getBot()
50 | {
51 | $parts = explode('\\', get_called_class());
52 | return strtolower(array_pop($parts));
53 | }
54 |
55 | /**
56 | * Transform the bot to its JSON representation
57 | * @return Array
58 | */
59 | public function jsonSerialize()
60 | {
61 | return array(
62 | 'bot' => $this->getBot(),
63 | 'name' => $this->getName()
64 | );
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/src/Bots/AbstractTaggedBot.php:
--------------------------------------------------------------------------------
1 | tags = array_values(array_unique(array_merge($this->tags, $tags)));
25 | }
26 |
27 | /**
28 | * Retrieve tag list
29 | * @return array|null
30 | */
31 | public function getTags()
32 | {
33 | if (count($this->tags) > 0) {
34 | return $this->tags;
35 | }
36 |
37 | return null;
38 | }
39 |
40 | /**
41 | * Transform the bot to its JSON representation
42 | * @return Array
43 | */
44 | public function jsonSerialize()
45 | {
46 | $data = parent::jsonSerialize();
47 | $data['tags'] = $this->getTags();
48 |
49 | return $data;
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/Bots/Alexa.php:
--------------------------------------------------------------------------------
1 | setName('alexa-crawler');
31 | $this->addTags(['search']);
32 | } elseif (strpos($useragent, 'AlexaToolbar') !== false || strpos($useragent, 'Alexa Toolbar') !== false) {
33 | $this->setName('alexa-toolbar');
34 | $this->addTags(['tool','collect']);
35 | } else {
36 | throw new InvalidArgumentException('UserAgent given is not a valid Alexa one: ' . $useragent);
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/Bots/Baidu.php:
--------------------------------------------------------------------------------
1 | setName('baidu-image');
37 | $this->addTags(['search','image']);
38 | } elseif (strpos($useragent, 'BaiduGame') !== false) {
39 | $this->setName('baidu-game');
40 | $this->addTags(['search','game']);
41 | } elseif (strpos($useragent, 'Baiduspider') !== false || strpos($useragent, 'baidu') !== false) {
42 | $this->setName('baidu-spider');
43 | $this->addTags(['search']);
44 | } else {
45 | throw new InvalidArgumentException('UserAgent given is not a valid Baidu one: ' . $useragent);
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/Bots/BaseBot.php:
--------------------------------------------------------------------------------
1 | setName('crawler4j');
24 | } elseif (preg_match('/^scrapy/', $useragent) === 1) {
25 | $this->setName('scrapy');
26 | } elseif (strpos($lower, 'bot') !== false) {
27 | $this->setName('base-bot');
28 | } elseif (strpos($lower, 'crawler') !== false) {
29 | $this->setName('base-crawler');
30 | } elseif (strpos($lower, 'spider') !== false) {
31 | $this->setName('base-spider');
32 | } else {
33 | throw new NotAKnownBotException('UserAgent given is not a valid Bot one: ' . $useragent);
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/Bots/BeeBot.php:
--------------------------------------------------------------------------------
1 | setName('beebot-crawler');
26 | } else {
27 | throw new InvalidArgumentException('UserAgent given is not a valid BeeBot one: ' . $useragent);
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/Bots/Bing.php:
--------------------------------------------------------------------------------
1 | setName('bing-msn-media');
38 | $this->addTags(['search']);
39 | } elseif (strpos($useragent, 'adidxbot') !== false) {
40 | $this->setName('bing-adcenter');
41 | $this->addTags(['search','publicity']);
42 | } elseif (strpos($useragent, 'msnbot') !== false) {
43 | $this->setName('bing-msn');
44 | $this->addTags(['search']);
45 | } elseif (strpos(strtolower($useragent), 'bing') !== false) {
46 | $this->setName('bing-bot');
47 | $this->addTags(['search']);
48 | } else {
49 | throw new InvalidArgumentException(
50 | 'UserAgent given is not a valid Bing one: ' . $useragent
51 | );
52 | }
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/Bots/DiscoBot.php:
--------------------------------------------------------------------------------
1 | setName('discobot-news');
29 | $this->addTags(['search','news']);
30 | } elseif (strpos($useragent, 'discobot') !== false) {
31 | $this->setName('discobot-bot');
32 | $this->addTags(['search']);
33 | } else {
34 | throw new InvalidArgumentException('UserAgent given is not a valid DiscoBot one: ' . $useragent);
35 | }
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/src/Bots/EmptyBot.php:
--------------------------------------------------------------------------------
1 | setName('empty');
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/Bots/ExaBot.php:
--------------------------------------------------------------------------------
1 | setName('exabot-image');
28 | $this->addTags(['search','image']);
29 | } elseif (strpos($useragent, 'Exabot') !== false) {
30 | $this->setName('exabot-bot');
31 | $this->addTags(['search']);
32 | } else {
33 | throw new InvalidArgumentException('UserAgent given is not a valid Exabot one: ' . $useragent);
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/Bots/Facebook.php:
--------------------------------------------------------------------------------
1 | setName('facebook-externalhit');
28 | $this->addTags(['social','agent']);
29 | } else {
30 | throw new InvalidArgumentException('UserAgent given is not a valid Facebook one: ' . $useragent);
31 | }
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/src/Bots/Google.php:
--------------------------------------------------------------------------------
1 | setName('google-image');
97 | $this->addTags(['search','image']);
98 | } elseif (strpos($useragent, 'Googlebot-Mobile') !== false) {
99 | $this->setName('google-mobile');
100 | $this->addTags(['search','mobile']);
101 | } elseif (strpos($useragent, 'Googlebot-News') !== false) {
102 | $this->setName('google-news');
103 | $this->addTags(['search','news']);
104 | } elseif (strpos($useragent, 'Googlebot-Video') !== false) {
105 | $this->setName('google-video');
106 | $this->addTags(['search','video']);
107 | } elseif (strpos($useragent, 'compatible; Mediapartners-Google') !== false) {
108 | $this->setName('google-adsense-mobile');
109 | $this->addTags(['search','publicity','mobile']);
110 | } elseif (strpos($useragent, 'Mediapartners-Google') !== false) {
111 | $this->setName('google-adsense');
112 | $this->addTags(['search','publicity']);
113 | } elseif (strpos($useragent, 'AdsBot-Google') !== false) {
114 | $this->setName('google-adsbot');
115 | $this->addTags(['tool','publicity','quality']);
116 | } elseif (strpos($useragent, 'GoogleProducer') !== false) {
117 | $this->setName('google-producer');
118 | } elseif (strpos($useragent, 'Google-Site-Verification') !== false) {
119 | $this->setName('google-site-verification');
120 | $this->addTags(['tool']);
121 | } elseif (strpos($useragent, 'Google-Test') !== false || strpos($useragent, 'Googlebot-Test') !== false) {
122 | $this->setName('google-test');
123 | } elseif (strpos($useragent, 'Feedfetcher-Google') !== false) {
124 | $this->setName('google-feedfetcher');
125 | $this->addTags(['feed']);
126 | } elseif (strpos($useragent, 'Google Desktop') !== false) {
127 | $this->setName('google-desktop');
128 | $this->addTags(['tool']);
129 | } elseif (strpos($useragent, 'translate.google.com') !== false) {
130 | $this->setName('google-translate');
131 | $this->addTags(['translate']);
132 | } elseif (strpos($useragent, 'GoogleToolbar') !== false) {
133 | $this->setName('google-toolbar');
134 | $this->addTags(['tool','collect']);
135 | } elseif (strpos($useragent, 'AppEngine-Google') !== false) {
136 | $this->setName('google-appengine');
137 | $this->addTags(['cloud']);
138 | } elseif (strpos($useragent, 'Googlebot') !== false) {
139 | $this->setName('google-bot');
140 | $this->addTags(['search']);
141 | } elseif (strpos($useragent, 'Google') !== false) {
142 | $this->setName('google-unknown');
143 | } else {
144 | throw new InvalidArgumentException('UserAgent given is not a valid google one: ' . $useragent);
145 | }
146 | }
147 | }
148 |
--------------------------------------------------------------------------------
/src/Bots/MailRU.php:
--------------------------------------------------------------------------------
1 | setName('mailru-bot');
27 | $this->addTags(['search']);
28 | } else {
29 | throw new InvalidArgumentException('UserAgent given is not a valid Mail.RU one: ' . $useragent);
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/Bots/Seznam.php:
--------------------------------------------------------------------------------
1 | setName('seznam-bot');
27 | $this->addTags(['search']);
28 | } else {
29 | throw new InvalidArgumentException('UserAgent given is not a valid Seznam one: ' . $useragent);
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/Bots/Sogou.php:
--------------------------------------------------------------------------------
1 | setName('sogou-image');
31 | $this->addTags(['search','image']);
32 | } elseif (strpos($useragent, 'Sogou inst') !== false) {
33 | $this->setName('sogou-instant');
34 | $this->addTags(['search']);
35 | } elseif (strpos(strtolower($useragent), 'sogou') !== false) {
36 | $this->setName('sogou-spider');
37 | $this->addTags(['search']);
38 | } else {
39 | throw new InvalidArgumentException('UserAgent given is not a valid Sogou one: ' . $useragent);
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/Bots/Soso.php:
--------------------------------------------------------------------------------
1 | setName('soso-image');
27 | $this->addTags(['search','image']);
28 | } elseif (strpos($useragent, 'Sosospider') !== false) {
29 | $this->setName('soso-spider');
30 | $this->addTags(['search']);
31 | } else {
32 | throw new InvalidArgumentException('UserAgent given is not a valid Soso one: ' . $useragent);
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/Bots/Visitor.php:
--------------------------------------------------------------------------------
1 | setName('visitor');
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/Bots/WordPress.php:
--------------------------------------------------------------------------------
1 |
9 | * @package Bee4\UserAgent\Classifier\Bots
10 | */
11 |
12 | namespace Bee4\UserAgent\Classifier\Bots;
13 |
14 | use InvalidArgumentException;
15 |
16 | /**
17 | * Class WordPress
18 | * Detect if a visit is a wordpress one
19 | * @package Bee4\UserAgent\Classifier\Bots
20 | */
21 | class WordPress extends AbstractTaggedBot
22 | {
23 | /**
24 | * WordPress bot constructor
25 | *
26 | * @param String $useragent The useragent used for detection
27 | * @throws InvalidArgumentException
28 | */
29 | public function __construct($useragent)
30 | {
31 | /**
32 | * WordPress/X.X.X
33 | */
34 |
35 | if (preg_match('/^WordPress\/.*$/', $useragent, $matches)) {
36 | $this->setName('wordpress-bot');
37 | $this->addTags(['agent']);
38 | } else {
39 | throw new InvalidArgumentException('UserAgent given is not a valid WordPress one: ' . $useragent);
40 | }
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/Bots/Yahoo.php:
--------------------------------------------------------------------------------
1 | ; )
45 | * YahooMobileMessenger/1.0 (xyz-mobile messenger; 1.0.0.0) (1234; Apple; iPhone; iPhone OS/3.0)
46 | */
47 | if (strpos($useragent, 'Yahoo!-AdCrawler') !== false) {
48 | $this->setName('yahoo-ads');
49 | $this->addTags(['search','publicity']);
50 | } elseif (strpos($useragent, 'YahooYSMcm') !== false) {
51 | $this->setName('yahoo-search-marketing');
52 | $this->addTags(['search','publicity']);
53 | } elseif (strpos($useragent, 'Yahoo! Slurp') !== false) {
54 | $this->setName('yahoo-slurp');
55 | $this->addTags(['search']);
56 | } elseif (strpos($useragent, 'Yahoo Pipes') !== false) {
57 | $this->setName('yahoo-pipes');
58 | $this->addTags(['feed']);
59 | } elseif (
60 | strpos($useragent, 'Y!J') !== false ||
61 | $useragent == 'Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)'
62 | ) {
63 | $this->setName('yahoo-japan');
64 | $this->addTags(['search']);
65 | } elseif ($useragent == 'YahooCacheSystem' || $useragent == 'YahooExternalCache') {
66 | $this->setName('yahoo-cache');
67 | $this->addTags(['search']);
68 | } elseif (
69 | substr($useragent, 0, 14) == 'YahooMessenger' ||
70 | substr($useragent, 0, 20) == 'YahooMobileMessenger'
71 | ) {
72 | $this->setName('yahoo-messenger');
73 | $this->addTags(['browser']);
74 | } elseif ($useragent == 'Yahoo:LinkExpander:Slingstone') {
75 | $this->setName('yahoo-tools');
76 | $this->addTags(['tool']);
77 | } else {
78 | throw new InvalidArgumentException('UserAgent given is not a valid Yahoo one: ' . $useragent);
79 | }
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/src/Bots/Yandex.php:
--------------------------------------------------------------------------------
1 | setName('yandex-'.strtolower($matches[1]));
42 | switch (strtolower($matches[1])) {
43 | case 'blogs':
44 | case 'metrika':
45 | case 'bot':
46 | $this->addTags(['search']);
47 | break;
48 | case 'antivirus':
49 | $this->addTags(['search','antivirus']);
50 | break;
51 | case 'direct':
52 | $this->addTags(['search','publicity']);
53 | break;
54 | case 'images':
55 | $this->addTags(['search','image']);
56 | break;
57 | case 'imageresizer':
58 | $this->addTags(['search','image','tool']);
59 | break;
60 | }
61 | } elseif (strpos($useragent, 'Yandex.Translate') !== false) {
62 | $this->setName('yandex-translate');
63 | $this->addTags(['translate']);
64 | } elseif (strpos($useragent, 'Yandex Browser') !== false || strpos($useragent, 'Edition Yandex') !== false) {
65 | throw new NotAKnownBotException($useragent);
66 | } else {
67 | throw new InvalidArgumentException('UserAgent given is not a valid Yandex one: ' . $useragent);
68 | }
69 | }
70 | }
71 |
--------------------------------------------------------------------------------
/src/Detector.php:
--------------------------------------------------------------------------------
1 | object = $this
25 | ->getMockBuilder('\Bee4\UserAgent\Classifier\Bots\AbstractBot')
26 | ->setMethods(null)
27 | ->setMockClassName('FakeBot')
28 | ->getMock();
29 | }
30 |
31 | /**
32 | * @expectedException \InvalidArgumentException
33 | */
34 | public function testInvalidName()
35 | {
36 | //Set a name as number...
37 | $method = new \ReflectionMethod('FakeBot', 'setName');
38 | $method->setAccessible(TRUE);
39 | $method->invoke($this->object, 0);
40 | }
41 |
42 | public function testName()
43 | {
44 | $method = new \ReflectionMethod('FakeBot', 'setName');
45 | $method->setAccessible(TRUE);
46 | $method->invoke($this->object, 'fakebot-spider');
47 | $this->assertEquals('fakebot-spider', $this->object->getName());
48 | }
49 |
50 | public function testGetBot() {
51 | $this->assertEquals(
52 | 'fakebot',
53 | call_user_func([$this->object, 'getBot'])
54 | );
55 | }
56 |
57 | public function testJsonSerialize()
58 | {
59 | $method = new \ReflectionMethod('FakeBot', 'setName');
60 | $method->setAccessible(TRUE);
61 | $method->invoke($this->object, 'fakebot-spider');
62 |
63 | $this->assertEquals(
64 | ['bot'=>'fakebot', 'name'=>'fakebot-spider'],
65 | $this->object->jsonSerialize()
66 | );
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/test/units/Bots/AbstractTaggedBotTest.php:
--------------------------------------------------------------------------------
1 | object = $this
25 | ->getMockBuilder('\Bee4\UserAgent\Classifier\Bots\AbstractTaggedBot')
26 | ->setMethods(null)
27 | ->setMockClassName('FakeTaggedBot')
28 | ->getMock();
29 | }
30 |
31 | public function testTags()
32 | {
33 | $this->assertNull($this->object->getTags());
34 |
35 | $tags = ['tag1'];
36 | $this->object->addTags($tags);
37 | $this->assertEquals($tags, $this->object->getTags());
38 |
39 | $tags = ['tag1', 'tag2', 'tag3'];
40 | $this->object->addTags($tags);
41 | $this->assertEquals($tags, $this->object->getTags());
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/test/units/Bots/AlexaTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('alexa-crawler', $bot->getName());
25 | $this->assertEquals(['search'], $bot->getTags());
26 |
27 | $bot = new Alexa('Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; BTRS102908; GTB7.5; SIMBAR={21DEE6A8-BD2A-11E1-BFEE-F021C79C3CCF}; SLCC1; .NET CLR 2.0.50727; Media Center PC 5.0; .NET CLR 3.0.30618; .NET CLR 3.5.30729; InfoPath.1; Alexa Toolbar)');
28 | $this->assertEquals('alexa-toolbar', $bot->getName());
29 | $this->assertEquals(['tool','collect'], $bot->getTags());
30 |
31 | $bot = new Alexa('Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; AlexaToolbar/amzni-3.0; GTB7.5; AskTbORJ/5.15.23.36191; .NET4.0C; .NET4.0E; AlexaToolbar/amzni-3.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; MSIECrawler)');
32 | $this->assertEquals('alexa-toolbar', $bot->getName());
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/test/units/Bots/BaiduTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('baidu-spider', $bot->getName());
25 | $bot = new Baidu('Mozilla/5.0 (Linux;u;Android 2.3.7;zh-cn;) AppleWebKit/533.1 (KHTML,like Gecko) Version/4.0 Mobile Safari/533.1 (compatible; +http://www.baidu.com/search/spider.html)');
26 | $this->assertEquals('baidu-spider', $bot->getName());
27 | $this->assertEquals(['search'], $bot->getTags());
28 |
29 | $bot = new Baidu('Baiduspider-image+(+http://www.baidu.com/search/spider.htm)');
30 | $this->assertEquals('baidu-image', $bot->getName());
31 | $this->assertEquals(['search','image'], $bot->getTags());
32 |
33 | $bot = new Baidu('Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; BaiduGame)');
34 | $this->assertEquals('baidu-game', $bot->getName());
35 | $this->assertEquals(['search','game'], $bot->getTags());
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/test/units/Bots/BaseBotTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('base-bot', $bot->getName());
30 | $bot = new BaseBot('magpie-crawler\/1.1 (U; Linux amd64; en-GB; +http:\/\/www.brandwatch.net)');
31 | $this->assertEquals('base-crawler', $bot->getName());
32 | $bot = new BaseBot('Mozilla\/5.0 (compatible; YYSpider; +http:\/\/www.yunyun.com\/spider.html)');
33 | $this->assertEquals('base-spider', $bot->getName());
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/test/units/Bots/BeeBotTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('beebot-crawler', $bot->getName());
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/test/units/Bots/BingTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('bing-bot', $bot->getName());
25 | $bot = new Bing('Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)');
26 | $this->assertEquals('bing-bot', $bot->getName());
27 | $this->assertEquals(['search'], $bot->getTags());
28 |
29 | $bot = new Bing('msnbot-UDiscovery/2.0b (+http://search.msn.com/msnbot.htm)');
30 | $this->assertEquals('bing-msn', $bot->getName());
31 | $bot = new Bing('msnbot/2.0b (+http://search.msn.com/msnbot.htm)');
32 | $this->assertEquals('bing-msn', $bot->getName());
33 | $this->assertEquals(['search'], $bot->getTags());
34 |
35 | $bot = new Bing('msnbot-media/1.1 (+http://search.msn.com/msnbot.htm)');
36 | $this->assertEquals('bing-msn-media', $bot->getName());
37 | $this->assertEquals(['search'], $bot->getTags());
38 |
39 | $bot = new Bing('adidxbot/1.1 (+http://search.msn.com/msnbot.htm)');
40 | $this->assertEquals('bing-adcenter', $bot->getName());
41 | $bot = new Bing('adidxbot/2.0 (+http://search.msn.com/msnbot.htm)');
42 | $this->assertEquals('bing-adcenter', $bot->getName());
43 | $this->assertEquals(['search','publicity'], $bot->getTags());
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/test/units/Bots/DiscoBotTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('discobot-news', $bot->getName());
25 | $this->assertEquals(['search','news'], $bot->getTags());
26 |
27 | $bot = new DiscoBot('Mozilla/5.0 (compatible; discobot/2.0; +http://discoveryengine.com/discobot.html)');
28 | $this->assertEquals('discobot-bot', $bot->getName());
29 | $this->assertEquals(['search'], $bot->getTags());
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/test/units/Bots/EmptyBotTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('empty', $bot->getName());
17 | $this->assertEquals('emptybot', $bot->getBot());
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/test/units/Bots/ExaBotTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('exabot-image', $bot->getName());
25 | $this->assertEquals(['search','image'], $bot->getTags());
26 |
27 | $bot = new ExaBot('Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)');
28 | $this->assertEquals('exabot-bot', $bot->getName());
29 | $this->assertEquals(['search'], $bot->getTags());
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/test/units/Bots/FacebookTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('facebook-externalhit', $bot->getName());
25 | $bot = new Facebook('facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)');
26 | $this->assertEquals('facebook-externalhit', $bot->getName());
27 | $this->assertEquals(['social','agent'], $bot->getTags());
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/test/units/Bots/GoogleTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('google-bot', $bot->getName());
25 | $this->assertEquals(['search'], $bot->getTags());
26 |
27 | $bot = new Google('Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; GoogleToolbar; User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://bsalsa.com) ; .NET CLR 2.0.50727)');
28 | $this->assertEquals('google-toolbar', $bot->getName());
29 | $this->assertEquals(['tool','collect'], $bot->getTags());
30 |
31 | $bot = new Google('Googlebot-Image/1.0');
32 | $this->assertEquals('google-image', $bot->getName());
33 | $this->assertEquals(['search','image'], $bot->getTags());
34 |
35 | $bot = new Google('(compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)');
36 | $this->assertEquals('google-mobile', $bot->getName());
37 | $this->assertEquals(['search','mobile'], $bot->getTags());
38 |
39 | $bot = new Google('Googlebot-News');
40 | $this->assertEquals('google-news', $bot->getName());
41 | $this->assertEquals(['search','news'], $bot->getTags());
42 |
43 | $bot = new Google('Googlebot-Video/1.0');
44 | $this->assertEquals('google-video', $bot->getName());
45 | $this->assertEquals(['search','video'], $bot->getTags());
46 |
47 | $bot = new Google('AppEngine-Google; (+http://application-name.appspot.com)');
48 | $this->assertEquals('google-appengine', $bot->getName());
49 | $this->assertEquals(['cloud'], $bot->getTags());
50 |
51 | $bot = new Google('(compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)');
52 | $this->assertEquals('google-adsense-mobile', $bot->getName());
53 | $this->assertEquals(['search','publicity','mobile'], $bot->getTags());
54 |
55 | $bot = new Google('Mediapartners-Google');
56 | $this->assertEquals('google-adsense', $bot->getName());
57 | $this->assertEquals(['search','publicity'], $bot->getTags());
58 |
59 | $bot = new Google('AdsBot-Google (+http://www.google.com/adsbot.html)');
60 | $this->assertEquals('google-adsbot', $bot->getName());
61 | $this->assertEquals(['tool','publicity','quality'], $bot->getTags());
62 |
63 | $bot = new Google('GoogleProducer; (+http://goo.gl/7y4SX)');
64 | $this->assertEquals('google-producer', $bot->getName());
65 | $this->assertEquals(null, $bot->getTags());
66 |
67 | $bot = new Google('Google-Site-Verification/1.0');
68 | $this->assertEquals('google-site-verification', $bot->getName());
69 | $this->assertEquals(['tool'], $bot->getTags());
70 |
71 | $bot = new Google('Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; feed-id=)');
72 | $this->assertEquals('google-feedfetcher', $bot->getName());
73 | $this->assertEquals(['feed'], $bot->getTags());
74 |
75 | $bot = new Google('Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)');
76 | $this->assertEquals('google-desktop', $bot->getName());
77 | $this->assertEquals(['tool'], $bot->getTags());
78 |
79 | $bot = new Google('Mozilla/5.0 (iPad; CPU OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3,gzip(gfe) (via translate.google.com)');
80 | $this->assertEquals('google-translate', $bot->getName());
81 | $this->assertEquals(['translate'], $bot->getTags());
82 |
83 | $bot = new Google('Googlebot-Test ( http://www.googlebot.com/bot.html)');
84 | $this->assertEquals('google-test', $bot->getName());
85 |
86 | $bot = new Google('Mozilla/5.0 (compatible; Google New-Beebot-testing-unknown; http://some.url.com/)');
87 | $this->assertEquals('google-unknown', $bot->getName());
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/test/units/Bots/MailRUTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('mailru-bot', $bot->getName());
25 | $this->assertEquals(['search'], $bot->getTags());
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/test/units/Bots/SeznamTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('seznam-bot', $bot->getName());
25 | $this->assertEquals(['search'], $bot->getTags());
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/test/units/Bots/SogouTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('sogou-image', $bot->getName());
25 | $this->assertEquals(['search','image'], $bot->getTags());
26 |
27 | $bot = new Sogou('Sogou inst spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07"');
28 | $this->assertEquals('sogou-instant', $bot->getName());
29 | $this->assertEquals(['search'], $bot->getTags());
30 |
31 | $bot = new Sogou('Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)');
32 | $this->assertEquals('sogou-spider', $bot->getName());
33 | $this->assertEquals(['search'], $bot->getTags());
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/test/units/Bots/SosoTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('soso-spider', $bot->getName());
25 | $this->assertEquals(['search'], $bot->getTags());
26 |
27 | $bot = new Soso('Mozilla/5.0 (compatible; Sosoimagespider/2.0; +http://help.soso.com/soso-image-spider.htm)');
28 | $this->assertEquals('soso-image', $bot->getName());
29 | $this->assertEquals(['search','image'], $bot->getTags());
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/test/units/Bots/VisitorTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('visitor', $bot->getName());
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/test/units/Bots/WordPressTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('wordpress-bot', $bot->getName());
25 | $this->assertEquals(['agent'], $bot->getTags());
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/test/units/Bots/YahooTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('yahoo-ads', $bot->getName());
28 | $this->assertEquals(['search','publicity'], $bot->getTags());
29 |
30 | $bot = new Yahoo('Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)');
31 | $this->assertEquals('yahoo-slurp', $bot->getName());
32 | $bot = new Yahoo('Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)');
33 | $this->assertEquals('yahoo-slurp', $bot->getName());
34 | $this->assertEquals(['search'], $bot->getTags());
35 |
36 | $bot = new Yahoo('Mozilla/5.0 (compatible; Yahoo Pipes 2.0; +http://developer.yahoo.com/yql/provider) Gecko/20090729 Firefox/3.5.2');
37 | $this->assertEquals('yahoo-pipes', $bot->getName());
38 | $this->assertEquals(['feed'], $bot->getTags());
39 |
40 | $bot = new Yahoo('Mozilla/5.0 (YahooYSMcm/3.0.0; http://help.yahoo.com)');
41 | $this->assertEquals('yahoo-search-marketing', $bot->getName());
42 | $this->assertEquals(['search','publicity'], $bot->getTags());
43 |
44 | $bot = new Yahoo('DoCoMo/2.0 SH902i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)');
45 | $this->assertEquals('yahoo-japan', $bot->getName());
46 | $bot = new Yahoo('DoCoMo/2.0/SO502i (compatible; Y!J-SRD/1.0; http://help.yahoo.co.jp/help/jp/search/indexing/indexing-27.html)');
47 | $this->assertEquals('yahoo-japan', $bot->getName());
48 | $bot = new Yahoo('Mozilla/4.0 (compatible; Yahoo Japan; for robot study; kasugiya)');
49 | $this->assertEquals('yahoo-japan', $bot->getName());
50 | $bot = new Yahoo('Mozilla/4.0 (compatible; Y!J; for robot study; keyoshid)');
51 | $this->assertEquals('yahoo-japan', $bot->getName());
52 | $bot = new Yahoo('Y!J/1.0 (http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html)');
53 | $this->assertEquals('yahoo-japan', $bot->getName());
54 | $this->assertEquals(['search'], $bot->getTags());
55 |
56 | //Check Yahoo messenger useragent
57 | $bot = new Yahoo('YahooMobileMessenger/1.0 (xyz-mobile messenger; 1.0.0.0) (1234; Apple; iPhone; iPhone OS/3.0)');
58 | $this->assertEquals('yahoo-messenger', $bot->getName());
59 | //Check third party apps useragent that are using YahooMessenger API
60 | $bot = new Yahoo('YahooMessenger/1.0 ( xyz-mobile messenger; 1.0.0.0 )');
61 | $this->assertEquals('yahoo-messenger', $bot->getName());
62 | $this->assertEquals(['browser'], $bot->getTags());
63 |
64 |
65 | $bot = new Yahoo('Yahoo:LinkExpander:Slingstone');
66 | $this->assertEquals('yahoo-tools', $bot->getName());
67 | $this->assertEquals(['tool'], $bot->getTags());
68 |
69 | $bot = new Yahoo('YahooCacheSystem');
70 | $this->assertEquals('yahoo-cache', $bot->getName());
71 | $bot = new Yahoo('YahooExternalCache');
72 | $this->assertEquals('yahoo-cache', $bot->getName());
73 | $this->assertEquals(['search'], $bot->getTags());
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/test/units/Bots/YandexTest.php:
--------------------------------------------------------------------------------
1 | assertEquals('yandex-antivirus', $bot->getName());
35 | $this->assertEquals(['search','antivirus'], $bot->getTags());
36 |
37 | $bot = new Yandex('Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)');
38 | $this->assertEquals('yandex-images', $bot->getName());
39 | $this->assertEquals(['search','image'], $bot->getTags());
40 |
41 | $bot = new Yandex('Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)');
42 | $this->assertEquals('yandex-direct', $bot->getName());
43 | $this->assertEquals(['search','publicity'], $bot->getTags());
44 |
45 | $bot = new Yandex('Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; B; +http://yandex.com/bots)');
46 | $this->assertEquals('yandex-blogs', $bot->getName());
47 | $this->assertEquals(['search'], $bot->getTags());
48 |
49 | $bot = new Yandex('Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)');
50 | $this->assertEquals('yandex-metrika', $bot->getName());
51 | $this->assertEquals(['search'], $bot->getTags());
52 |
53 | $bot = new Yandex('Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)');
54 | $this->assertEquals('yandex-imageresizer', $bot->getName());
55 | $this->assertEquals(['search','image','tool'], $bot->getTags());
56 |
57 | $bot = new Yandex('Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0) Yandex.Translate');
58 | $this->assertEquals('yandex-translate', $bot->getName());
59 | $this->assertEquals(['translate'], $bot->getTags());
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/test/units/DetectorTest.php:
--------------------------------------------------------------------------------
1 | ia_archiver et pas simplement alexa.com selon les donnees officielles ALEXA :
30 | http://www.alexa.com/help/webmasters
31 | */
32 | $this->robotsUA['alexa']['useragent'] = "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)";
33 | $this->robotsUA['alexa']['expectedValue'] = "alexa-crawler";
34 | //Baidu's Bots UA
35 | $this->robotsUA['baidu-image']['useragent'] = "Baiduspider-image+(+http://www.baidu.com/search/spider.htm)";
36 | $this->robotsUA['baidu-spider']['useragent'] = "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)";
37 | $this->robotsUA['baidu-image']['expectedValue'] = "baidu-image";
38 | $this->robotsUA['baidu-spider']['expectedValue'] = "baidu-spider";
39 | //BaseBot's Bots UA
40 | $this->robotsUA['base-bot']['useragent'] = "EasyDL/3.xx http://keywen.com/Encyclopedia/Bot";
41 | $this->robotsUA['base-spider']['useragent'] = "Aleksika Spider/1.0 (+http://www.aleksika.com/)";
42 | $this->robotsUA['base-crawler']['useragent'] = "aardvark-crawler";
43 | $this->robotsUA['base-bot']['expectedValue'] = "base-bot";
44 | $this->robotsUA['base-spider']['expectedValue'] = "base-spider";
45 | $this->robotsUA['base-crawler']['expectedValue'] = "base-crawler";
46 | //Bing's Bots UA
47 | $this->robotsUA['msnbot-media']['useragent'] = "msnbot-media/1.0 (+http://search.msn.com/msnbot.htm)";
48 | //UA for original msnbot is Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; MS Search 4.0 Robot)
49 | $this->robotsUA['msnbot']['useragent'] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT; msnbot 4.0 Robot)";
50 | //The test for adidxbot returns actually an bing-msn bot first
51 | $this->robotsUA['adixbot']['useragent'] = "adidxbot/1.1 (+http://search.msn.com/msnbot.htm)";
52 | $this->robotsUA['bing']['useragent'] = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)";
53 | $this->robotsUA['msnbot-media']['expectedValue'] = "bing-msn-media";
54 | $this->robotsUA['msnbot']['expectedValue'] = "bing-msn";
55 | $this->robotsUA['adixbot']['expectedValue'] = "bing-adcenter";
56 | $this->robotsUA['bing']['expectedValue'] = "bing-bot";
57 | //Google's Bots UA
58 | $this->robotsUA['google-image']['useragent'] = "Googlebot-Image/1.0";
59 | $this->robotsUA['google-mobile']['useragent'] = "(compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)";
60 | $this->robotsUA['google-news']['useragent'] = "Googlebot-News";
61 | $this->robotsUA['google-video']['useragent'] = "Googlebot-Video/1.0";
62 | $this->robotsUA['google-adsense-mobile']['useragent'] = "(compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)";
63 | $this->robotsUA['google-adsense']['useragent'] = "Mediapartners-Google";
64 | $this->robotsUA['google-adsbot']['useragent'] = "AdsBot-Google (+http://www.google.com/adsbot.html)";
65 | $this->robotsUA['google-producer']['useragent'] = "GoogleProducer; (+http://goo.gl/7y4SX)";
66 | $this->robotsUA['google-site-verification']['useragent'] = "Google-Site-Verification/1.0";
67 | //Google test returns first a google-bot normal response...
68 | $this->robotsUA['google-test']['useragent'] = "Googlebot-Test ( http://www.googlebot.com/bot.html)";
69 | $this->robotsUA['google-feedfetcher']['useragent'] = "Feedfetcher-Google; (+http://www.google.com/feedfetcher.html; feed-id=)";
70 | $this->robotsUA['google-desktop']['useragent'] = "Mozilla/5.0 (compatible; Google Desktop/5.9.1005.12335; http://desktop.google.com/)";
71 | $this->robotsUA['google-translate']['useragent'] = "Mozilla/5.0 (iPad; CPU OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3,gzip(gfe) (via translate.google.com)";
72 | //Original UA : Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; GTB5; User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://bsalsa.com) ; .NET CLR 2.0.50727)
73 | // GTB5 = GoogleToolbar v5 for IE
74 | $this->robotsUA['google-toolbar']['useragent'] = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; GoogleToolbar; User-agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; http://bsalsa.com) ; .NET CLR 2.0.50727)";
75 | $this->robotsUA['google-appengine']['useragent'] = "AppEngine-Google; (+http://application-name.appspot.com)";
76 | $this->robotsUA['google-bot']['useragent'] = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)";
77 | $this->robotsUA['google-unknown']['useragent'] = "Mozilla/5.0 (compatible; Google New-testing-unknown; http://some.url.com/)";
78 | $this->robotsUA['google-image']['expectedValue'] = "google-image";
79 | $this->robotsUA['google-mobile']['expectedValue'] = "google-mobile";
80 | $this->robotsUA['google-news']['expectedValue'] = "google-news";
81 | $this->robotsUA['google-video']['expectedValue'] = "google-video";
82 | $this->robotsUA['google-adsense-mobile']['expectedValue'] = "google-adsense-mobile";
83 | $this->robotsUA['google-adsense']['expectedValue'] = "google-adsense";
84 | $this->robotsUA['google-adsbot']['expectedValue'] = "google-adsbot";
85 | $this->robotsUA['google-producer']['expectedValue'] = "google-producer";
86 | $this->robotsUA['google-site-verification']['expectedValue'] = "google-site-verification";
87 | $this->robotsUA['google-test']['expectedValue'] = "google-test";
88 | $this->robotsUA['google-feedfetcher']['expectedValue'] = "google-feedfetcher";
89 | $this->robotsUA['google-desktop']['expectedValue'] = "google-desktop";
90 | $this->robotsUA['google-translate']['expectedValue'] = "google-translate";
91 | $this->robotsUA['google-toolbar']['expectedValue'] = "google-toolbar";
92 | $this->robotsUA['google-appengine']['expectedValue'] = "google-appengine";
93 | $this->robotsUA['google-bot']['expectedValue'] = "google-bot";
94 | $this->robotsUA['google-unknown']['expectedValue'] = "google-unknown";
95 | //Discobot's Bots UA
96 | $this->robotsUA['discobot-news']['useragent'] = "Mozilla/5.0 (compatible; discobot-news; +http://discoveryengine.com/discobot.html)";
97 | $this->robotsUA['discobot-bot']['useragent'] = "Mozilla/5.0 (compatible; discobot/2.0; +http://discoveryengine.com/discobot.html)";
98 | $this->robotsUA['discobot-news']['expectedValue'] = "discobot-news";
99 | $this->robotsUA['discobot-bot']['expectedValue'] = "discobot-bot";
100 | //Exabot's Bots UA
101 | $this->robotsUA['exabot-thumbnails']['useragent'] = "Mozilla/5.0 (compatible; Konqueror/3.5; Linux) KHTML/3.5.5 (like Gecko) (Exabot-Thumbnails)";
102 | $this->robotsUA['exabot']['useragent'] = "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)";
103 | $this->robotsUA['exabot-thumbnails']['expectedValue'] = "exabot-image";
104 | $this->robotsUA['exabot']['expectedValue'] = "exabot-bot";
105 | //Facebook's Bots UA
106 | $this->robotsUA['facebookexternalhit']['useragent'] = "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)";
107 | $this->robotsUA['facebookexternalhit']['expectedValue'] = "facebook-externalhit";
108 | //Mail.ru's Bots UA
109 | $this->robotsUA['mail.ru']['useragent'] = "Mozilla/5.0 (compatible; Mail.RU/2.0)";
110 | $this->robotsUA['mail.ru']['expectedValue'] = "mailru-bot";
111 | //Seznam's Bots UA
112 | $this->robotsUA['SeznamBot']['useragent'] = "SeznamBot/3.0 (+http://fulltext.sblog.cz/)";
113 | $this->robotsUA['SeznamBot']['expectedValue'] = "seznam-bot";
114 | //Sogou's Bots UA
115 | $this->robotsUA['sogou-pic']['useragent'] = "Sogou Pic Spider/3.0(+http://www.sogou.com/docs/help/webmasters.htm#07)";
116 | $this->robotsUA['sogou-inst']['useragent'] = "Sogou inst spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)";
117 | $this->robotsUA['sogou']['useragent'] = "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)";
118 | $this->robotsUA['sogou-pic']['expectedValue'] = "sogou-image";
119 | $this->robotsUA['sogou-inst']['expectedValue'] = "sogou-instant";
120 | $this->robotsUA['sogou']['expectedValue'] = "sogou-spider";
121 | //Soso's Bots UA
122 | $this->robotsUA['sosoimagespider']['useragent'] = "Mozilla/5.0 (compatible; Sosoimagespider/2.0; +http://help.soso.com/soso-image-spider.htm)";
123 | $this->robotsUA['sosospider']['useragent'] = "Sosospider+(+http://help.soso.com/webspider.htm)";
124 | $this->robotsUA['sosoimagespider']['expectedValue'] = "soso-image";
125 | $this->robotsUA['sosospider']['expectedValue'] = "soso-spider";
126 | //Yahoo's Bots UA
127 | $this->robotsUA['yahoo-adcrawler']['useragent'] = "Mozilla/5.0 (compatible; Yahoo!-AdCrawler; http://help.yahoo.com/yahoo_adcrawler)";
128 | $this->robotsUA['yahooYSMcm']['useragent'] = "Mozilla/5.0 (YahooYSMcm/3.0.0; http://help.yahoo.com)";
129 | $this->robotsUA['yahoo-slurp']['useragent'] = "Mozilla/5.0 (compatible; Yahoo! Slurp/3.0; http://help.yahoo.com/help/us/ysearch/slurp)";
130 | $this->robotsUA['yahoo-pipes']['useragent'] = "Mozilla/5.0 (compatible; Yahoo Pipes 2.0; +http://developer.yahoo.com/yql/provider) Gecko/20090729 Firefox/3.5.2";
131 | ///////////////////////////////////////////////////////////////////////////////////////////////////////// @TODO STEPH
132 | $this->robotsUA['yahoo-adcrawler']['expectedValue'] = "yahoo-ads";
133 | $this->robotsUA['yahooYSMcm']['expectedValue'] = "yahoo-search-marketing";
134 | $this->robotsUA['yahoo-slurp']['expectedValue'] = "yahoo-slurp";
135 | $this->robotsUA['yahoo-pipes']['expectedValue'] = "yahoo-pipes";
136 | //Yandex's Bots UA
137 | $this->robotsUA['yandex-antivirus']['useragent'] = "Mozilla/5.0 (compatible; YandexAntivirus/2.0; +http://yandex.com/bots)";
138 | $this->robotsUA['yandex-direct']['useragent'] = "Mozilla/5.0 (compatible; YandexDirect/3.0; +http://yandex.com/bots)";
139 | $this->robotsUA['yandex-blogs']['useragent'] = "Mozilla/5.0 (compatible; YandexBlogs/0.99; robot; B; +http://yandex.com/bots)";
140 | $this->robotsUA['yandex-metrika']['useragent'] = "Mozilla/5.0 (compatible; YandexMetrika/2.0; +http://yandex.com/bots)";
141 | $this->robotsUA['yandex-image-resizer']['useragent'] = "Mozilla/5.0 (compatible; YandexImageResizer/2.0; +http://yandex.com/bots)";
142 | $this->robotsUA['yandex-images']['useragent'] = "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)";
143 | $this->robotsUA['yandex-bot']['useragent'] = "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)";
144 | $this->robotsUA['yandex-antivirus']['expectedValue'] = "yandex-antivirus";
145 | $this->robotsUA['yandex-direct']['expectedValue'] = "yandex-direct";
146 | $this->robotsUA['yandex-blogs']['expectedValue'] = "yandex-blogs";
147 | $this->robotsUA['yandex-metrika']['expectedValue'] = "yandex-metrika";
148 | $this->robotsUA['yandex-image-resizer']['expectedValue'] = "yandex-imageresizer";
149 | $this->robotsUA['yandex-images']['expectedValue'] = "yandex-images";
150 | $this->robotsUA['yandex-bot']['expectedValue'] = "yandex-bot";
151 |
152 | $this->robotsUA['beebot-crawler']['useragent'] = "Bee4 - BeeBot/1.0";
153 | $this->robotsUA['beebot-crawler']['expectedValue'] = "beebot-crawler";
154 |
155 | $this->robotsUA['visitor']['useragent'] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.71 Safari/537.36";
156 | $this->robotsUA['visitor']['expectedValue'] = "visitor";
157 |
158 | $this->robotsUA['emptybot']['useragent'] = "-";
159 | $this->robotsUA['emptybot']['expectedValue'] = "empty";
160 |
161 | $this->robotsUA['wordpress']['useragent'] = "WordPress/4.1.0";
162 | $this->robotsUA['wordpress']['expectedValue'] = "wordpress-bot";
163 |
164 | //TODO: Utiliser un dataProvider sur sample.log
165 | }
166 |
167 | public function testWhoIs()
168 | {
169 | foreach($this->robotsUA as $key => $robot){
170 | if($key != 'not-a-bot-UA'){
171 | $this->assertEquals(
172 | $robot['expectedValue'],
173 | Detector::whoIs($robot['useragent'])->getName()
174 | );
175 | }
176 | }
177 | }
178 |
179 | public function testBotButNotKnown()
180 | {
181 | $bot = Detector::whoIs('crawler4j (http://code.google.com/p/crawler4j/)');
182 | $this->assertInstanceOf('\Bee4\UserAgent\Classifier\Bots\BaseBot',$bot);
183 |
184 | $bot = Detector::whoIs('Opera\/9.80 (Windows NT 6.1; U; Edition Yandex; ru) Presto\/2.8.131 Version\/11.10');
185 | $this->assertInstanceOf('\Bee4\UserAgent\Classifier\Bots\EmptyBot',$bot);
186 | }
187 | }
188 |
--------------------------------------------------------------------------------
/test/units/bootstrap.php:
--------------------------------------------------------------------------------
1 |