├── .github
└── workflows
│ ├── php-cs-fixer.yml
│ └── test.yml
├── .php_cs.dist
├── LICENSE
├── README.md
├── composer.json
├── export.php
├── raw
├── Crawlers.json
├── Crawlers.txt
├── Exclusions.json
├── Exclusions.txt
├── Headers.json
└── Headers.txt
└── src
├── CrawlerDetect.php
└── Fixtures
├── AbstractProvider.php
├── Crawlers.php
├── Exclusions.php
└── Headers.php
/.github/workflows/php-cs-fixer.yml:
--------------------------------------------------------------------------------
1 | name: Check & fix styling
2 |
3 | on: [ push ]
4 |
5 | jobs:
6 | php-cs-fixer:
7 | runs-on: ubuntu-24.04
8 |
9 | steps:
10 | - name: Checkout code
11 | uses: actions/checkout@v3
12 | with:
13 | ref: ${{ github.head_ref }}
14 |
15 | - name: Run PHP CS Fixer
16 | uses: docker://oskarstark/php-cs-fixer-ga:2.18.6
17 | with:
18 | args: --config=.php_cs.dist --allow-risky=yes
19 |
20 | - name: Commit changes
21 | uses: stefanzweifel/git-auto-commit-action@v4
22 | with:
23 | commit_message: Fix styling
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | push:
5 | branches:
6 | - "master"
7 | pull_request:
8 |
9 | jobs:
10 | build:
11 | runs-on: ubuntu-24.04
12 | strategy:
13 | fail-fast: true
14 | matrix:
15 | php: [7.1, 7.2, 7.3, 7.4, 8.0, 8.1, 8.2, 8.3, 8.4]
16 |
17 | name: PHP:${{ matrix.php }}
18 |
19 | steps:
20 | - name: Checkout
21 | uses: actions/checkout@v4
22 |
23 | - name: Setup PHP, with composer
24 | uses: shivammathur/setup-php@v2
25 | with:
26 | php-version: ${{ matrix.php }}
27 | tools: composer:v2
28 | coverage: xdebug
29 |
30 | - name: Get composer cache directory
31 | id: composer-cache
32 | run: |
33 | echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT
34 | shell: bash
35 |
36 | - name: Cache composer dependencies
37 | uses: actions/cache@v4
38 | with:
39 | path: ${{ steps.composer-cache.outputs.dir }}
40 | key: dependencies-php-${{ matrix.php }}-composer-${{ hashFiles('composer.json') }}
41 | restore-keys: dependencies-php-${{ matrix.php }}-composer-
42 |
43 | - name: Install Composer dependencies
44 | run: |
45 | composer install --prefer-dist --no-interaction --no-suggest
46 |
47 | - name: Run Unit tests
48 | run: |
49 | vendor/bin/phpunit --coverage-clover=tests/logs/clover.xml
50 |
51 | - name: Upload coverage results to Coveralls
52 | env:
53 | COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
54 | run: |
55 | composer global require php-coveralls/php-coveralls "^1.0"
56 | coveralls --coverage_clover=tests/logs/clover.xml -v
57 |
--------------------------------------------------------------------------------
/.php_cs.dist:
--------------------------------------------------------------------------------
1 | in([
5 | __DIR__.'/src',
6 | __DIR__.'/tests',
7 | ])
8 | ->name('*.php')
9 | ->ignoreDotFiles(true)
10 | ->ignoreVCS(true);
11 |
12 | return PhpCsFixer\Config::create()
13 | ->setRules([
14 | '@PSR2' => true,
15 | 'ordered_imports' => ['sortAlgorithm' => 'alpha'],
16 | 'no_unused_imports' => true,
17 | 'not_operator_with_successor_space' => true,
18 | 'trailing_comma_in_multiline_array' => true,
19 | 'phpdoc_scalar' => true,
20 | 'unary_operator_spaces' => true,
21 | 'binary_operator_spaces' => true,
22 | 'blank_line_before_statement' => [
23 | 'statements' => ['break', 'continue', 'declare', 'return', 'throw', 'try'],
24 | ],
25 | 'phpdoc_single_line_var_spacing' => true,
26 | 'phpdoc_var_without_name' => true,
27 | 'method_argument_space' => [
28 | 'on_multiline' => 'ensure_fully_multiline',
29 | 'keep_multiple_spaces_after_comma' => true,
30 | ],
31 | ])
32 | ->setFinder($finder);
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015-2020 Mark Beech
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |

2 | crawlerdetect.io
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | ## About CrawlerDetect
14 |
15 | CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the `user agent` and `http_from` header. Currently able to detect 1,000's of bots/spiders/crawlers.
16 |
17 | ### Installation
18 | ```
19 | composer require jaybizzle/crawler-detect
20 | ```
21 |
22 | ### Usage
23 | ```PHP
24 | use Jaybizzle\CrawlerDetect\CrawlerDetect;
25 |
26 | $CrawlerDetect = new CrawlerDetect;
27 |
28 | // Check the user agent of the current 'visitor'
29 | if($CrawlerDetect->isCrawler()) {
30 | // true if crawler user agent detected
31 | }
32 |
33 | // Pass a user agent as a string
34 | if($CrawlerDetect->isCrawler('Mozilla/5.0 (compatible; Sosospider/2.0; +http://help.soso.com/webspider.htm)')) {
35 | // true if crawler user agent detected
36 | }
37 |
38 | // Output the name of the bot that matched (if any)
39 | echo $CrawlerDetect->getMatches();
40 | ```
41 |
42 | ### Contributing
43 | If you find a bot/spider/crawler user agent that CrawlerDetect fails to detect, please submit a pull request with the regex pattern added to the `$data` array in `Fixtures/Crawlers.php` and add the failing user agent to `tests/crawlers.txt`.
44 |
45 | Failing that, just create an issue with the user agent you have found, and we'll take it from there :)
46 |
47 | ### Laravel Package
48 | If you would like to use this with Laravel, please see [Laravel-Crawler-Detect](https://github.com/JayBizzle/Laravel-Crawler-Detect)
49 |
50 | ### Symfony Bundle
51 | To use this library with Symfony 2/3/4, check out the [CrawlerDetectBundle](https://github.com/nicolasmure/CrawlerDetectBundle).
52 |
53 | ### YII2 Extension
54 | To use this library with the YII2 framework, check out [yii2-crawler-detect](https://github.com/AlikDex/yii2-crawler-detect).
55 |
56 | ### ES6 Library
57 | To use this library with NodeJS or any ES6 application based, check out [es6-crawler-detect](https://github.com/JefferyHus/es6-crawler-detect).
58 |
59 | ### Python Library
60 | To use this library in a Python project, check out [crawlerdetect](https://github.com/moskrc/CrawlerDetect).
61 |
62 | ### JVM Library (written in Java)
63 | To use this library in a JVM project (including Java, Scala, Kotlin, etc.), check out [CrawlerDetect](https://github.com/nekosoftllc/crawler-detect).
64 |
65 | ### .NET Library
66 | To use this library in a .net standard (including .net core) based project, check out [NetCrawlerDetect](https://github.com/gplumb/NetCrawlerDetect).
67 |
68 | ### Ruby Gem
69 | To use this library with Ruby on Rails or any Ruby-based application, check out [crawler_detect](https://github.com/loadkpi/crawler_detect) gem.
70 |
71 | ### Go Module
72 | To use this library with Go, check out the [crawlerdetect](https://github.com/x-way/crawlerdetect) module.
73 |
74 | _Parts of this class are based on the brilliant [MobileDetect](https://github.com/serbanghita/Mobile-Detect)_
75 |
76 | [](https://github.com/JayBizzle/Crawler-Detect)
77 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "jaybizzle/crawler-detect",
3 | "type": "library",
4 | "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent",
5 | "keywords": ["crawler", "crawler detect", "crawler detector", "crawlerdetect", "php crawler detect"],
6 | "homepage": "https://github.com/JayBizzle/Crawler-Detect/",
7 | "license": "MIT",
8 | "authors": [
9 | {
10 | "name": "Mark Beech",
11 | "email": "m@rkbee.ch",
12 | "role": "Developer"
13 | }
14 | ],
15 | "require": {
16 | "php": ">=7.1.0"
17 | },
18 | "require-dev": {
19 | "phpunit/phpunit": "^4.8|^5.5|^6.5|^9.4"
20 | },
21 | "autoload": {
22 | "psr-4": {
23 | "Jaybizzle\\CrawlerDetect\\": "src/"
24 | }
25 | },
26 | "scripts": {
27 | "test": "vendor/bin/phpunit"
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/export.php:
--------------------------------------------------------------------------------
1 |
7 | *
8 | * This source file is subject to the MIT license that is bundled
9 | * with this source code in the file LICENSE.
10 | */
11 |
12 | require 'src/Fixtures/AbstractProvider.php';
13 | require 'src/Fixtures/Crawlers.php';
14 | require 'src/Fixtures/Exclusions.php';
15 | require 'src/Fixtures/Headers.php';
16 |
17 | $src = [
18 | 'Crawlers',
19 | 'Exclusions',
20 | 'Headers',
21 | ];
22 |
23 | foreach ($src as $class) {
24 | $class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class";
25 | $object = new $class;
26 |
27 | outputJson($object);
28 | outputTxt($object);
29 | }
30 |
31 | function outputJson($object)
32 | {
33 | $className = (new ReflectionClass($object))->getShortName();
34 | file_put_contents("raw/$className.json", json_encode($object->getAll()));
35 | }
36 |
37 | function outputTxt($object)
38 | {
39 | $className = (new ReflectionClass($object))->getShortName();
40 | file_put_contents("raw/$className.txt", implode(PHP_EOL, $object->getAll()));
41 | }
42 |
--------------------------------------------------------------------------------
/raw/Crawlers.json:
--------------------------------------------------------------------------------
1 | [" YLT","^Aether","^Amazon Simple Notification Service Agent$","^Amazon-Route53-Health-Check-Service","^Amazon CloudFront","^b0t$","^bluefish ","^Calypso v\\\/","^COMODO DCV","^Corax","^DangDang","^DavClnt","^DHSH","^docker\\\/[0-9]","^Expanse","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^Gradle\\\/","^HTTPClient\\\/","^HTTPing","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^Mikrotik\\\/","^Netlab360","^NG\\\/[0-9\\.]","^NING\\\/","^npm\\\/","^Nuclei","^PHP-AYMAPI\\\/","^PHP\\\/","^pip\\\/","^pnpm\\\/","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^symbolicator\\\/","^Swurl ","^TLS tester ","^twine\\\/","^ureq","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2GDPR\\\/","2ip\\.ru","404enemy","7Siters","80legs","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acebookexternalhit\\\/","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adreview\\\/","adscanner","adstxt-worker","Adstxtaggregator","adstxt\\.com","Adyen HttpClient","AffiliateLabz\\\/","affilimate-puppeteer","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Anthill","Anturis Agent","Ant\\.com","AnyEvent-HTTP\\\/","Apache Ant\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","apimon\\.de","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppleSyndication","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","Asana\\\/","Ask Jeeves","AskQuickly","ASPSeek","Asterias","Astute","asynchttp","Attach","attohttpc","autocite","AutomaticWPTester","Autonomy","awin\\.com","AWS Security Scanner","axios\\\/","a\\.pr-cy\\.ru","B-l-i-t-z-B-O-T","Backlink-Ceck","BacklinkHttpStatus","BackStreet","BackupLand","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","Barracuda Sentinel \\(EE\\)","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bewica-security-scan","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","Bitrix link preview","biz_Directory","BKCTwitterUnshortener\\\/","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","BlogBridge","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","Boost\\.Beast","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","caam dot crwlr at gmail dot com","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","centuryb.o.t9[at]gmail.com","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cf-facebook","cg-eye","changedetection","ChangesMeter","Charlotte","chatterino-api-cache","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","CISPA Web Analyser","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudflare-Healthchecks","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","ColdFusion","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","Contextual Code Sites Explorer","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","cortex\\\/","Cosmos4j\\.feedback","Covario-IDS","Craw\\\/","Crescent","Criteo","Crowsnest","CSHttp","CSSCheck","Cula\\\/","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","DatadogSynthetics","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","dBpoweramp","ddline","deeris","delve\\.ai","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Dispatch\\\/","Disqus\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","DomainLabz","Domains Project\\\/","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","DuplexWeb-Google","DynatraceSynthetic","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","ExoRank","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookcatalog","facebookexternalhit","facebookexternalua","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconarchive","faviconkit","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","FeedshowOnline","Feedshow\\\/","Feedspot","FeedViewer\\\/","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fiery%20Feeds","Filestack","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","Florienzh\\\/","fluffy","Flunky","flynxapp","forensiq","ForusP","FoundSeoTool","fragFINN\\.de","free thumbnails","Freeuploader","FreshRSS","frontman","Funnelback","Fuzz Faster U Fool","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","gdnplus\\.com","GeedoProductSearch","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Geziyor","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","GitHub-Hookshot","github\\.com","Go http package","Go [\\d\\.]* package http","Go!Zilla","Go-Ahead-Got-It","Go-http-client","go-mtasts\\\/","gobuster","gobyus","Gofeed","gofetch","Goldfire Server","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google PP Default","Google Search Console","Google Web Preview","Google-Ads","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-InspectionTool","Google-Podcast","Google-Publisher-Plugin","Google-Read-Aloud","Google-SearchByImage","Google-Site-Verification","Google-SMTP-STS","Google-speakr","Google-Structured-Data-Testing-Tool","Google-Transparency-Report","google-xrawler","Google-Youtube-Links","GoogleDocs","GoogleHC\\\/","GoogleOther","GoogleProber","GoogleProducer","GoogleSites","Gookey","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hardenize","Hatena","Havij","HaxerMen","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","Hexometer","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http:\\\/\\\/www.neomo.de\\\/","HttpComponents","httphr","HTTPie","HTTPMon","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","http\\.rb\\\/","HTTP_Compression_Test","http_get","http_request2","http_requester","httrack","huaweisymantec","HubSpot ","HubSpot-Link-Resolver","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","iGooglePortal","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","Internet Ninja","InternetSeer","internetVista monitor","internetwache","internet_archive","intraVnews","IODC","IOI","Inboxb0t","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","IZaBEE","iZSearch","JAHHO","janforman","Jaunt\\\/","Java.*outbrain","javelin\\.io","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JungleKeyThumbnail","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","khttp\\\/","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","krawler\\.dk","kube-probe","kubectl","kulturarw3","KumKie","Larbin","Lavf\\\/","leakix\\.net","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lighthouse","Lightspeedsystems","Likse","limber\\.io","Link Valet","LinkAlarm\\\/","LinkAnalyser","link-check","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreview","LinkScan","LinksManager","LinkTiger","LinkWalker","link_thumbnailer","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","longurl-r-package","looid\\.com","looksystems\\.net","lscache_runner","ltx71","lua-resty-http","Lucee \\(CFML Engine\\)","Lush Http Client","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","L\\.webis","mabontland","MacOutlook\\\/","MagentaNews\\\/","Mag-Net","MagpieRSS","Mail::STS","MailChimp","Mail\\.Ru","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","mattermost","MatchorySearch\\\/","Mediametric","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft Data Access","Microsoft Office","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft\\.Data\\.Mashup","MicrosoftPreview","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","mio_httpc","Miro-HttpClient","Mister PiX","mixdata dot com","mixed-content-scan","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Mollie","monitis","Monitority\\\/","Monit\\\/","montastic","MonSpark","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mrcgiguy","Mr\\.4x3 Powered","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","myseosnapshot","nagios","Najdi\\.si","Name Intelligence","NameFo\\.com","Nameprotect","nationalarchives","Navroad","nbertaupete95","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetSystemsResearch","nettle","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","Nexgate Ruby Client","NG-Search","nghttp2","Nibbler","NICErsPRO","NihilScio","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-fetch","node-superagent","node-urllib","Nodemeter","NodePing","node\\.io","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","NotionEmbedder","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Observatory\\\/","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","OgScrper","okhttp","omgili","OMSC","Online Domain Tools","Open Source RSS","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","OPPO A33","Optimizer","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","Owler","Owlin","ownCloud News","ow\\.ly","OxfordCloudService","page scorer","Page Valet","page2rss","PageFreezer","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","PageThing","page_verifier","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","PDF24 URL To PDF","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","php-requests","phpservermon","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","ping\\.blo\\.gs","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Pleroma ","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","PodcastAddict\\\/","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","postfix-mta-sts-resolver","PostmanRuntime","postplanner\\.com","PostPost","postrank","PowerPoint\\\/","Prebid","Prerender","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probely\\.com","probethenet","Project ?25499","Project-Resonance","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","pulsetic\\.com","Pump","Python-httplib2","python-httpx","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","quic-go-HTTP\\\/","QuiteRSS","Quora Link Preview","Qwantify","Radian6","RadioPublicImageResizer","Railgun\\\/","RankActive","RankFlex","RankSonicSiteAuditor","RapidLoad\\\/","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","Reeder\\\/","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSMix\\\/","RSSOwl","RuxitSynthetic","RyowlEngine","safe-agent-scanner","SalesIntelligent","Saleslift","SAP NetWeaver Application Server","SauceNAO","SBIder","sc-downloader","scalaj-http","Scamadviser-Frontend","ScanAlert","scan\\.lol","Scoop","scooter","ScopeContentAG-HTTP-Client","ScoutJet","ScoutURLMonitor","ScrapeBox Page Scanner","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","SearchWP","search\\.thunderstone","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","Sendsay\\.Ru","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","seocompany","SEOkicks","SEOlizer","Seomoz","SEOprofiler","seoscanners","SEOsearch","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","ShortLinkTranslate","shortURL lengthener","shrinktheweb","Sideqik","Siege","SimplePie","SimplyFast","Siphon","SISTRIX","Site Sucker","Site-Shot\\\/","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","sli-systems\\.com","slider\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","Spawning-AI","spaziodati","SPDYCheck","Specificfeeds","SpeedKit","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","Statically-","StatusCake","Steeler","Stratagems Kumo","Stripe\\\/","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","swcd ","Symfony BrowserKit","Symfony2 BrowserKit","Synapse\\\/","Syndirella\\\/","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","tchelebi\\.io","techiaith\\.cymru","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","TextRazor Downloader","The Drop Reaper","The Expert HTML Source Viewer","The Intraformant","The Knowledge AI","theinternetrules","TheNomad","Thinklab","Thumbor","Thumbshots","ThumbSniper","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","ultimate_sitemap_parser","unchaos","unirest-java","UniversalFeedParser","unshortenit","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","URL Verifier","Urlcheckr","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","URL\\\/Emacs","Vacuum","Vagabondo","VB Project","vBSEO","VCI","Verity","via ggpht\\.com GoogleImageProxy","Virusdie","visionutils","Visual Rights Group","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C-checklink","W3C-mobileOK","W3C_Unicorn","WAC-OFU","WakeletLinkExpander","WallpapersHD","Wallpapers\\\/[0-9]+","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web spyder","Web Sucker","web-capture\\.net","Web-sniffer","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","webtech\\\/","WebThumbnail","Webthumb\\\/","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatCMS","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoAPI\\\/","WhoRunsCoinHive","Whynder Magnet","Windows-RSS-Platform","WinHttp-Autoproxy-Service","WinHTTP\\\/","WinPodder","wkhtmlto","wmtips","Woko","Wolfram HTTPClient","woorankreview","WordPress\\\/","WordupinfoSearch","Word\\\/","worldping-api","wotbox","WP Engine Install Performance API","WP Rocket","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","WWWOFFLE","www\\.monitor\\.us","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","XenForo\\\/","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-[A-Z][A-Z][A-Z]","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooMailProxy","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yomins\\.com","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zapier","Zauba","Zemanta Aggregator","Zend\\\\Http\\\\Client","Zend_Http_Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","ZnHTTP","Zombie\\.js","Zoom\\.Mac","ZoteroTranslationServer","ZyBorg","[a-z0-9\\-_]*(bot|crawl|headless|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)"]
--------------------------------------------------------------------------------
/raw/Crawlers.txt:
--------------------------------------------------------------------------------
1 | YLT
2 | ^Aether
3 | ^Amazon Simple Notification Service Agent$
4 | ^Amazon-Route53-Health-Check-Service
5 | ^Amazon CloudFront
6 | ^b0t$
7 | ^bluefish
8 | ^Calypso v\/
9 | ^COMODO DCV
10 | ^Corax
11 | ^DangDang
12 | ^DavClnt
13 | ^DHSH
14 | ^docker\/[0-9]
15 | ^Expanse
16 | ^FDM
17 | ^git\/
18 | ^Goose\/
19 | ^Grabber
20 | ^Gradle\/
21 | ^HTTPClient\/
22 | ^HTTPing
23 | ^Java\/
24 | ^Jeode\/
25 | ^Jetty\/
26 | ^Mail\/
27 | ^Mget
28 | ^Microsoft URL Control
29 | ^Mikrotik\/
30 | ^Netlab360
31 | ^NG\/[0-9\.]
32 | ^NING\/
33 | ^npm\/
34 | ^Nuclei
35 | ^PHP-AYMAPI\/
36 | ^PHP\/
37 | ^pip\/
38 | ^pnpm\/
39 | ^RMA\/
40 | ^Ruby|Ruby\/[0-9]
41 | ^symbolicator\/
42 | ^Swurl
43 | ^TLS tester
44 | ^twine\/
45 | ^ureq
46 | ^VSE\/[0-9]
47 | ^WordPress\.com
48 | ^XRL\/[0-9]
49 | ^ZmEu
50 | 008\/
51 | 13TABS
52 | 192\.comAgent
53 | 2GDPR\/
54 | 2ip\.ru
55 | 404enemy
56 | 7Siters
57 | 80legs
58 | a3logics\.in
59 | A6-Indexer
60 | Abonti
61 | Aboundex
62 | aboutthedomain
63 | Accoona-AI-Agent
64 | acebookexternalhit\/
65 | acoon
66 | acrylicapps\.com\/pulp
67 | Acunetix
68 | AdAuth\/
69 | adbeat
70 | AddThis
71 | ADmantX
72 | AdminLabs
73 | adressendeutschland
74 | adreview\/
75 | adscanner
76 | adstxt-worker
77 | Adstxtaggregator
78 | adstxt\.com
79 | Adyen HttpClient
80 | AffiliateLabz\/
81 | affilimate-puppeteer
82 | agentslug
83 | AHC
84 | aihit
85 | aiohttp\/
86 | Airmail
87 | akka-http\/
88 | akula\/
89 | alertra
90 | alexa site audit
91 | Alibaba\.Security\.Heimdall
92 | Alligator
93 | allloadin
94 | AllSubmitter
95 | alyze\.info
96 | amagit
97 | Anarchie
98 | AndroidDownloadManager
99 | Anemone
100 | AngleSharp
101 | annotate_google
102 | Anthill
103 | Anturis Agent
104 | Ant\.com
105 | AnyEvent-HTTP\/
106 | Apache Ant\/
107 | Apache Droid
108 | Apache OpenOffice
109 | Apache-HttpAsyncClient
110 | Apache-HttpClient
111 | ApacheBench
112 | Apexoo
113 | apimon\.de
114 | APIs-Google
115 | AportWorm\/
116 | AppBeat\/
117 | AppEngine-Google
118 | AppleSyndication
119 | Aprc\/[0-9]
120 | Arachmo
121 | arachnode
122 | Arachnophilia
123 | aria2
124 | Arukereso
125 | asafaweb
126 | Asana\/
127 | Ask Jeeves
128 | AskQuickly
129 | ASPSeek
130 | Asterias
131 | Astute
132 | asynchttp
133 | Attach
134 | attohttpc
135 | autocite
136 | AutomaticWPTester
137 | Autonomy
138 | awin\.com
139 | AWS Security Scanner
140 | axios\/
141 | a\.pr-cy\.ru
142 | B-l-i-t-z-B-O-T
143 | Backlink-Ceck
144 | BacklinkHttpStatus
145 | BackStreet
146 | BackupLand
147 | BackWeb
148 | Bad-Neighborhood
149 | Badass
150 | baidu\.com
151 | Bandit
152 | Barracuda Sentinel \(EE\)
153 | basicstate
154 | BatchFTP
155 | Battleztar Bazinga
156 | baypup\/
157 | BazQux
158 | BBBike
159 | BCKLINKS
160 | BDFetch
161 | BegunAdvertising
162 | Bewica-security-scan
163 | Bidtellect
164 | BigBozz
165 | Bigfoot
166 | biglotron
167 | BingLocalSearch
168 | BingPreview
169 | binlar
170 | biNu image cacher
171 | Bitacle
172 | Bitrix link preview
173 | biz_Directory
174 | BKCTwitterUnshortener\/
175 | Black Hole
176 | Blackboard Safeassign
177 | BlackWidow
178 | BlockNote\.Net
179 | BlogBridge
180 | Bloglines
181 | Bloglovin
182 | BlogPulseLive
183 | BlogSearch
184 | Blogtrottr
185 | BlowFish
186 | boitho\.com-dc
187 | Boost\.Beast
188 | BPImageWalker
189 | Braintree-Webhooks
190 | Branch Metrics API
191 | Branch-Passthrough
192 | Brandprotect
193 | Brandwatch
194 | Brodie\/
195 | Browsershots
196 | BUbiNG
197 | Buck\/
198 | Buddy
199 | BuiltWith
200 | Bullseye
201 | BunnySlippers
202 | Burf Search
203 | Butterfly\/
204 | BuzzSumo
205 | CAAM\/[0-9]
206 | caam dot crwlr at gmail dot com
207 | CakePHP
208 | Calculon
209 | Canary%20Mail
210 | CaretNail
211 | catexplorador
212 | CC Metadata Scaper
213 | Cegbfeieh
214 | censys
215 | centuryb.o.t9[at]gmail.com
216 | Cerberian Drtrs
217 | CERT\.at-Statistics-Survey
218 | cf-facebook
219 | cg-eye
220 | changedetection
221 | ChangesMeter
222 | Charlotte
223 | chatterino-api-cache
224 | CheckHost
225 | checkprivacy
226 | CherryPicker
227 | ChinaClaw
228 | Chirp\/
229 | chkme\.com
230 | Chlooe
231 | Chromaxa
232 | CirrusExplorer
233 | CISPA Vulnerability Notification
234 | CISPA Web Analyser
235 | Citoid
236 | CJNetworkQuality
237 | Clarsentia
238 | clips\.ua\.ac\.be
239 | Cloud mapping
240 | CloudEndure
241 | CloudFlare-AlwaysOnline
242 | Cloudflare-Healthchecks
243 | Cloudinary
244 | cmcm\.com
245 | coccoc
246 | cognitiveseo
247 | ColdFusion
248 | colly -
249 | CommaFeed
250 | Commons-HttpClient
251 | commonscan
252 | contactbigdatafr
253 | contentkingapp
254 | Contextual Code Sites Explorer
255 | convera
256 | CookieReports
257 | copyright sheriff
258 | CopyRightCheck
259 | Copyscape
260 | cortex\/
261 | Cosmos4j\.feedback
262 | Covario-IDS
263 | Craw\/
264 | Crescent
265 | Criteo
266 | Crowsnest
267 | CSHttp
268 | CSSCheck
269 | Cula\/
270 | curb
271 | Curious George
272 | curl
273 | cuwhois\/
274 | cybo\.com
275 | DAP\/NetHTTP
276 | DareBoost
277 | DatabaseDriverMysqli
278 | DataCha0s
279 | DatadogSynthetics
280 | Datafeedwatch
281 | Datanyze
282 | DataparkSearch
283 | dataprovider
284 | DataXu
285 | Daum(oa)?[ \/][0-9]
286 | dBpoweramp
287 | ddline
288 | deeris
289 | delve\.ai
290 | Demon
291 | DeuSu
292 | developers\.google\.com\/\+\/web\/snippet\/
293 | Devil
294 | Digg
295 | Digincore
296 | DigitalPebble
297 | Dirbuster
298 | Discourse Forum Onebox
299 | Dispatch\/
300 | Disqus\/
301 | DittoSpyder
302 | dlvr
303 | DMBrowser
304 | DNSPod-reporting
305 | docoloc
306 | Dolphin http client
307 | DomainAppender
308 | DomainLabz
309 | Domains Project\/
310 | Donuts Content Explorer
311 | dotMailer content retrieval
312 | dotSemantic
313 | downforeveryoneorjustme
314 | Download Wonder
315 | downnotifier
316 | DowntimeDetector
317 | Drip
318 | drupact
319 | Drupal \(\+http:\/\/drupal\.org\/\)
320 | DTS Agent
321 | dubaiindex
322 | DuplexWeb-Google
323 | DynatraceSynthetic
324 | EARTHCOM
325 | Easy-Thumb
326 | EasyDL
327 | Ebingbong
328 | ec2linkfinder
329 | eCairn-Grabber
330 | eCatch
331 | ECCP
332 | eContext\/
333 | Ecxi
334 | EirGrabber
335 | ElectricMonk
336 | elefent
337 | EMail Exractor
338 | EMail Wolf
339 | EmailWolf
340 | Embarcadero
341 | Embed PHP Library
342 | Embedly
343 | endo\/
344 | europarchive\.org
345 | evc-batch
346 | EventMachine HttpClient
347 | Everwall Link Expander
348 | Evidon
349 | Evrinid
350 | ExactSearch
351 | ExaleadCloudview
352 | Excel\/
353 | exif
354 | ExoRank
355 | Exploratodo
356 | Express WebPictures
357 | Extreme Picture Finder
358 | EyeNetIE
359 | ezooms
360 | facebookcatalog
361 | facebookexternalhit
362 | facebookexternalua
363 | facebookplatform
364 | fairshare
365 | Faraday v
366 | fasthttp
367 | Faveeo
368 | Favicon downloader
369 | faviconarchive
370 | faviconkit
371 | FavOrg
372 | Feed Wrangler
373 | Feedable\/
374 | Feedbin
375 | FeedBooster
376 | FeedBucket
377 | FeedBunch\/
378 | FeedBurner
379 | feeder
380 | Feedly
381 | FeedshowOnline
382 | Feedshow\/
383 | Feedspot
384 | FeedViewer\/
385 | Feedwind\/
386 | FeedZcollector
387 | feeltiptop
388 | Fetch API
389 | Fetch\/[0-9]
390 | Fever\/[0-9]
391 | FHscan
392 | Fiery%20Feeds
393 | Filestack
394 | Fimap
395 | findlink
396 | findthatfile
397 | FlashGet
398 | FlipboardBrowserProxy
399 | FlipboardProxy
400 | FlipboardRSS
401 | Flock\/
402 | Florienzh\/
403 | fluffy
404 | Flunky
405 | flynxapp
406 | forensiq
407 | ForusP
408 | FoundSeoTool
409 | fragFINN\.de
410 | free thumbnails
411 | Freeuploader
412 | FreshRSS
413 | frontman
414 | Funnelback
415 | Fuzz Faster U Fool
416 | G-i-g-a-b-o-t
417 | g00g1e\.net
418 | ganarvisitas
419 | gdnplus\.com
420 | GeedoProductSearch
421 | geek-tools
422 | Genieo
423 | GentleSource
424 | GetCode
425 | Getintent
426 | GetLinkInfo
427 | getprismatic
428 | GetRight
429 | getroot
430 | GetURLInfo\/
431 | GetWeb
432 | Geziyor
433 | Ghost Inspector
434 | GigablastOpenSource
435 | GIS-LABS
436 | github-camo
437 | GitHub-Hookshot
438 | github\.com
439 | Go http package
440 | Go [\d\.]* package http
441 | Go!Zilla
442 | Go-Ahead-Got-It
443 | Go-http-client
444 | go-mtasts\/
445 | gobuster
446 | gobyus
447 | Gofeed
448 | gofetch
449 | Goldfire Server
450 | GomezAgent
451 | gooblog
452 | Goodzer\/
453 | Google AppsViewer
454 | Google Desktop
455 | Google favicon
456 | Google Keyword Suggestion
457 | Google Keyword Tool
458 | Google Page Speed Insights
459 | Google PP Default
460 | Google Search Console
461 | Google Web Preview
462 | Google-Ads
463 | Google-Adwords
464 | Google-Apps-Script
465 | Google-Calendar-Importer
466 | Google-HotelAdsVerifier
467 | Google-HTTP-Java-Client
468 | Google-InspectionTool
469 | Google-Podcast
470 | Google-Publisher-Plugin
471 | Google-Read-Aloud
472 | Google-SearchByImage
473 | Google-Site-Verification
474 | Google-SMTP-STS
475 | Google-speakr
476 | Google-Structured-Data-Testing-Tool
477 | Google-Transparency-Report
478 | google-xrawler
479 | Google-Youtube-Links
480 | GoogleDocs
481 | GoogleHC\/
482 | GoogleOther
483 | GoogleProber
484 | GoogleProducer
485 | GoogleSites
486 | Gookey
487 | GoSpotCheck
488 | gosquared-thumbnailer
489 | Gotit
490 | GoZilla
491 | grabify
492 | GrabNet
493 | Grafula
494 | Grammarly
495 | GrapeFX
496 | GreatNews
497 | Gregarius
498 | GRequests
499 | grokkit
500 | grouphigh
501 | grub-client
502 | gSOAP\/
503 | GT::WWW
504 | GTmetrix
505 | GuzzleHttp
506 | gvfs\/
507 | HAA(A)?RTLAND http client
508 | Haansoft
509 | hackney\/
510 | Hadi Agent
511 | HappyApps-WebCheck
512 | Hardenize
513 | Hatena
514 | Havij
515 | HaxerMen
516 | HEADMasterSEO
517 | HeartRails_Capture
518 | help@dataminr\.com
519 | heritrix
520 | Hexometer
521 | historious
522 | hkedcity
523 | hledejLevne\.cz
524 | Hloader
525 | HMView
526 | Holmes
527 | HonesoSearchEngine
528 | HootSuite Image proxy
529 | Hootsuite-WebFeed
530 | hosterstats
531 | HostTracker
532 | ht:\/\/check
533 | htdig
534 | HTMLparser
535 | htmlyse
536 | HTTP Banner Detection
537 | http-get
538 | HTTP-Header-Abfrage
539 | http-kit
540 | http-request\/
541 | HTTP-Tiny
542 | HTTP::Lite
543 | http:\/\/www.neomo.de\/
544 | HttpComponents
545 | httphr
546 | HTTPie
547 | HTTPMon
548 | httpRequest
549 | httpscheck
550 | httpssites_power
551 | httpunit
552 | HttpUrlConnection
553 | http\.rb\/
554 | HTTP_Compression_Test
555 | http_get
556 | http_request2
557 | http_requester
558 | httrack
559 | huaweisymantec
560 | HubSpot
561 | HubSpot-Link-Resolver
562 | Humanlinks
563 | i2kconnect\/
564 | Iblog
565 | ichiro
566 | Id-search
567 | IdeelaborPlagiaat
568 | IDG Twitter Links Resolver
569 | IDwhois\/
570 | Iframely
571 | igdeSpyder
572 | iGooglePortal
573 | IlTrovatore
574 | Image Fetch
575 | Image Sucker
576 | ImageEngine\/
577 | ImageVisu\/
578 | Imagga
579 | imagineeasy
580 | imgsizer
581 | InAGist
582 | inbound\.li parser
583 | InDesign%20CC
584 | Indy Library
585 | InetURL
586 | infegy
587 | infohelfer
588 | InfoTekies
589 | InfoWizards Reciprocal Link
590 | inpwrd\.com
591 | instabid
592 | Instapaper
593 | Integrity
594 | integromedb
595 | Intelliseek
596 | InterGET
597 | Internet Ninja
598 | InternetSeer
599 | internetVista monitor
600 | internetwache
601 | internet_archive
602 | intraVnews
603 | IODC
604 | IOI
605 | Inboxb0t
606 | iplabel
607 | ips-agent
608 | IPS\/[0-9]
609 | IPWorks HTTP\/S Component
610 | iqdb\/
611 | Iria
612 | Irokez
613 | isitup\.org
614 | iskanie
615 | isUp\.li
616 | iThemes Sync\/
617 | IZaBEE
618 | iZSearch
619 | JAHHO
620 | janforman
621 | Jaunt\/
622 | Java.*outbrain
623 | javelin\.io
624 | Jbrofuzz
625 | Jersey\/
626 | JetCar
627 | Jigsaw
628 | Jobboerse
629 | JobFeed discovery
630 | Jobg8 URL Monitor
631 | jobo
632 | Jobrapido
633 | Jobsearch1\.5
634 | JoinVision Generic
635 | JolokiaPwn
636 | Joomla
637 | Jorgee
638 | JS-Kit
639 | JungleKeyThumbnail
640 | JustView
641 | Kaspersky Lab CFR link resolver
642 | Kelny\/
643 | Kerrigan\/
644 | KeyCDN
645 | Keyword Density
646 | Keywords Research
647 | khttp\/
648 | KickFire
649 | KimonoLabs\/
650 | Kml-Google
651 | knows\.is
652 | KOCMOHABT
653 | kouio
654 | krawler\.dk
655 | kube-probe
656 | kubectl
657 | kulturarw3
658 | KumKie
659 | Larbin
660 | Lavf\/
661 | leakix\.net
662 | LeechFTP
663 | LeechGet
664 | letsencrypt
665 | Lftp
666 | LibVLC
667 | LibWeb
668 | Libwhisker
669 | libwww
670 | Licorne
671 | Liferea\/
672 | Lighthouse
673 | Lightspeedsystems
674 | Likse
675 | limber\.io
676 | Link Valet
677 | LinkAlarm\/
678 | LinkAnalyser
679 | link-check
680 | linkCheck
681 | linkdex
682 | LinkExaminer
683 | linkfluence
684 | linkpeek
685 | LinkPreview
686 | LinkScan
687 | LinksManager
688 | LinkTiger
689 | LinkWalker
690 | link_thumbnailer
691 | Lipperhey
692 | Litemage_walker
693 | livedoor ScreenShot
694 | LoadImpactRload
695 | localsearch-web
696 | LongURL API
697 | longurl-r-package
698 | looid\.com
699 | looksystems\.net
700 | lscache_runner
701 | ltx71
702 | lua-resty-http
703 | Lucee \(CFML Engine\)
704 | Lush Http Client
705 | lwp-request
706 | lwp-trivial
707 | LWP::Simple
708 | lycos
709 | LYT\.SR
710 | L\.webis
711 | mabontland
712 | MacOutlook\/
713 | MagentaNews\/
714 | Mag-Net
715 | MagpieRSS
716 | Mail::STS
717 | MailChimp
718 | Mail\.Ru
719 | Majestic12
720 | makecontact\/
721 | Mandrill
722 | MapperCmd
723 | marketinggrader
724 | MarkMonitor
725 | MarkWatch
726 | Mass Downloader
727 | masscan\/
728 | Mata Hari
729 | mattermost
730 | MatchorySearch\/
731 | Mediametric
732 | Mediapartners-Google
733 | mediawords
734 | MegaIndex\.ru
735 | MeltwaterNews
736 | Melvil Rawi
737 | MemGator
738 | Metaspinner
739 | MetaURI
740 | MFC_Tear_Sample
741 | Microsearch
742 | Microsoft Data Access
743 | Microsoft Office
744 | Microsoft Outlook
745 | Microsoft Windows Network Diagnostics
746 | Microsoft-WebDAV-MiniRedir
747 | Microsoft\.Data\.Mashup
748 | MicrosoftPreview
749 | MIDown tool
750 | MIIxpc
751 | Mindjet
752 | Miniature\.io
753 | Miniflux
754 | mio_httpc
755 | Miro-HttpClient
756 | Mister PiX
757 | mixdata dot com
758 | mixed-content-scan
759 | mixnode
760 | Mnogosearch
761 | mogimogi
762 | Mojeek
763 | Mojolicious \(Perl\)
764 | Mollie
765 | monitis
766 | Monitority\/
767 | Monit\/
768 | montastic
769 | MonSpark
770 | MonTools
771 | Moreover
772 | Morfeus Fucking Scanner
773 | Morning Paper
774 | MovableType
775 | mowser
776 | Mrcgiguy
777 | Mr\.4x3 Powered
778 | MS Web Services Client Protocol
779 | MSFrontPage
780 | mShots
781 | MuckRack\/
782 | muhstik-scan
783 | MVAClient
784 | MxToolbox\/
785 | myseosnapshot
786 | nagios
787 | Najdi\.si
788 | Name Intelligence
789 | NameFo\.com
790 | Nameprotect
791 | nationalarchives
792 | Navroad
793 | nbertaupete95
794 | NearSite
795 | Needle
796 | Nessus
797 | Net Vampire
798 | NetAnts
799 | NETCRAFT
800 | NetLyzer
801 | NetMechanic
802 | NetNewsWire
803 | Netpursual
804 | netresearch
805 | NetShelter ContentScan
806 | Netsparker
807 | NetSystemsResearch
808 | nettle
809 | NetTrack
810 | Netvibes
811 | NetZIP
812 | Neustar WPM
813 | NeutrinoAPI
814 | NewRelicPinger
815 | NewsBlur .*Finder
816 | NewsGator
817 | newsme
818 | newspaper\/
819 | Nexgate Ruby Client
820 | NG-Search
821 | nghttp2
822 | Nibbler
823 | NICErsPRO
824 | NihilScio
825 | Nikto
826 | nineconnections
827 | NLNZ_IAHarvester
828 | Nmap Scripting Engine
829 | node-fetch
830 | node-superagent
831 | node-urllib
832 | Nodemeter
833 | NodePing
834 | node\.io
835 | nominet\.org\.uk
836 | nominet\.uk
837 | Norton-Safeweb
838 | Notifixious
839 | notifyninja
840 | NotionEmbedder
841 | nuhk
842 | nutch
843 | Nuzzel
844 | nWormFeedFinder
845 | nyawc\/
846 | Nymesis
847 | NYU
848 | Observatory\/
849 | Ocelli\/
850 | Octopus
851 | oegp
852 | Offline Explorer
853 | Offline Navigator
854 | OgScrper
855 | okhttp
856 | omgili
857 | OMSC
858 | Online Domain Tools
859 | Open Source RSS
860 | OpenCalaisSemanticProxy
861 | Openfind
862 | OpenLinkProfiler
863 | Openstat\/
864 | OpenVAS
865 | OPPO A33
866 | Optimizer
867 | Orbiter
868 | OrgProbe\/
869 | orion-semantics
870 | Outlook-Express
871 | Outlook-iOS
872 | Owler
873 | Owlin
874 | ownCloud News
875 | ow\.ly
876 | OxfordCloudService
877 | page scorer
878 | Page Valet
879 | page2rss
880 | PageFreezer
881 | PageGrabber
882 | PagePeeker
883 | PageScorer
884 | Pagespeed\/
885 | PageThing
886 | page_verifier
887 | Panopta
888 | panscient
889 | Papa Foto
890 | parsijoo
891 | Pavuk
892 | PayPal IPN
893 | pcBrowser
894 | Pcore-HTTP
895 | PDF24 URL To PDF
896 | Pearltrees
897 | PECL::HTTP
898 | peerindex
899 | Peew
900 | PeoplePal
901 | Perlu -
902 | PhantomJS Screenshoter
903 | PhantomJS\/
904 | Photon\/
905 | php-requests
906 | phpservermon
907 | Pi-Monster
908 | Picscout
909 | Picsearch
910 | PictureFinder
911 | Pimonster
912 | Pingability
913 | PingAdmin\.Ru
914 | Pingdom
915 | Pingoscope
916 | PingSpot
917 | ping\.blo\.gs
918 | pinterest\.com
919 | Pixray
920 | Pizilla
921 | Plagger\/
922 | Pleroma
923 | Ploetz \+ Zeller
924 | Plukkie
925 | plumanalytics
926 | PocketImageCache
927 | PocketParser
928 | Pockey
929 | PodcastAddict\/
930 | POE-Component-Client-HTTP
931 | Polymail\/
932 | Pompos
933 | Porkbun
934 | Port Monitor
935 | postano
936 | postfix-mta-sts-resolver
937 | PostmanRuntime
938 | postplanner\.com
939 | PostPost
940 | postrank
941 | PowerPoint\/
942 | Prebid
943 | Prerender
944 | Priceonomics Analysis Engine
945 | PrintFriendly
946 | PritTorrent
947 | Prlog
948 | probely\.com
949 | probethenet
950 | Project ?25499
951 | Project-Resonance
952 | prospectb2b
953 | Protopage
954 | ProWebWalker
955 | proximic
956 | PRTG Network Monitor
957 | pshtt, https scanning
958 | PTST
959 | PTST\/[0-9]+
960 | pulsetic\.com
961 | Pump
962 | Python-httplib2
963 | python-httpx
964 | python-requests
965 | Python-urllib
966 | Qirina Hurdler
967 | QQDownload
968 | QrafterPro
969 | Qseero
970 | Qualidator
971 | QueryN Metasearch
972 | queuedriver
973 | quic-go-HTTP\/
974 | QuiteRSS
975 | Quora Link Preview
976 | Qwantify
977 | Radian6
978 | RadioPublicImageResizer
979 | Railgun\/
980 | RankActive
981 | RankFlex
982 | RankSonicSiteAuditor
983 | RapidLoad\/
984 | Re-re Studio
985 | ReactorNetty
986 | Readability
987 | RealDownload
988 | RealPlayer%20Downloader
989 | RebelMouse
990 | Recorder
991 | RecurPost\/
992 | redback\/
993 | ReederForMac
994 | Reeder\/
995 | ReGet
996 | RepoMonkey
997 | request\.js
998 | reqwest\/
999 | ResponseCodeTest
1000 | RestSharp
1001 | Riddler
1002 | Rival IQ
1003 | Robosourcer
1004 | Robozilla
1005 | ROI Hunter
1006 | RPT-HTTPClient
1007 | RSSMix\/
1008 | RSSOwl
1009 | RuxitSynthetic
1010 | RyowlEngine
1011 | safe-agent-scanner
1012 | SalesIntelligent
1013 | Saleslift
1014 | SAP NetWeaver Application Server
1015 | SauceNAO
1016 | SBIder
1017 | sc-downloader
1018 | scalaj-http
1019 | Scamadviser-Frontend
1020 | ScanAlert
1021 | scan\.lol
1022 | Scoop
1023 | scooter
1024 | ScopeContentAG-HTTP-Client
1025 | ScoutJet
1026 | ScoutURLMonitor
1027 | ScrapeBox Page Scanner
1028 | Scrapy
1029 | Screaming
1030 | ScreenShotService
1031 | Scrubby
1032 | Scrutiny\/
1033 | Search37
1034 | searchenginepromotionhelp
1035 | Searchestate
1036 | SearchExpress
1037 | SearchSight
1038 | SearchWP
1039 | search\.thunderstone
1040 | Seeker
1041 | semanticdiscovery
1042 | semanticjuice
1043 | Semiocast HTTP client
1044 | Semrush
1045 | Sendsay\.Ru
1046 | sentry\/
1047 | SEO Browser
1048 | Seo Servis
1049 | seo-nastroj\.cz
1050 | seo4ajax
1051 | Seobility
1052 | SEOCentro
1053 | SeoCheck
1054 | seocompany
1055 | SEOkicks
1056 | SEOlizer
1057 | Seomoz
1058 | SEOprofiler
1059 | seoscanners
1060 | SEOsearch
1061 | seositecheckup
1062 | SEOstats
1063 | servernfo
1064 | sexsearcher
1065 | Seznam
1066 | Shelob
1067 | Shodan
1068 | Shoppimon
1069 | ShopWiki
1070 | ShortLinkTranslate
1071 | shortURL lengthener
1072 | shrinktheweb
1073 | Sideqik
1074 | Siege
1075 | SimplePie
1076 | SimplyFast
1077 | Siphon
1078 | SISTRIX
1079 | Site Sucker
1080 | Site-Shot\/
1081 | Site24x7
1082 | SiteBar
1083 | Sitebeam
1084 | Sitebulb\/
1085 | SiteCondor
1086 | SiteExplorer
1087 | SiteGuardian
1088 | Siteimprove
1089 | SiteIndexed
1090 | Sitemap(s)? Generator
1091 | SitemapGenerator
1092 | SiteMonitor
1093 | Siteshooter B0t
1094 | SiteSnagger
1095 | SiteSucker
1096 | SiteTruth
1097 | Sitevigil
1098 | sitexy\.com
1099 | SkypeUriPreview
1100 | Slack\/
1101 | sli-systems\.com
1102 | slider\.com
1103 | slurp
1104 | SlySearch
1105 | SmartDownload
1106 | SMRF URL Expander
1107 | SMUrlExpander
1108 | Snake
1109 | Snappy
1110 | SnapSearch
1111 | Snarfer\/
1112 | SniffRSS
1113 | sniptracker
1114 | Snoopy
1115 | SnowHaze Search
1116 | sogou web
1117 | SortSite
1118 | Sottopop
1119 | sovereign\.ai
1120 | SpaceBison
1121 | SpamExperts
1122 | Spammen
1123 | Spanner
1124 | Spawning-AI
1125 | spaziodati
1126 | SPDYCheck
1127 | Specificfeeds
1128 | SpeedKit
1129 | speedy
1130 | SPEng
1131 | Spinn3r
1132 | spray-can
1133 | Sprinklr
1134 | spyonweb
1135 | sqlmap
1136 | Sqlworm
1137 | Sqworm
1138 | SSL Labs
1139 | ssl-tools
1140 | StackRambler
1141 | Statastico\/
1142 | Statically-
1143 | StatusCake
1144 | Steeler
1145 | Stratagems Kumo
1146 | Stripe\/
1147 | Stroke\.cz
1148 | StudioFACA
1149 | StumbleUpon
1150 | suchen
1151 | Sucuri
1152 | summify
1153 | SuperHTTP
1154 | Surphace Scout
1155 | Suzuran
1156 | swcd
1157 | Symfony BrowserKit
1158 | Symfony2 BrowserKit
1159 | Synapse\/
1160 | Syndirella\/
1161 | SynHttpClient-Built
1162 | Sysomos
1163 | sysscan
1164 | Szukacz
1165 | T0PHackTeam
1166 | tAkeOut
1167 | Tarantula\/
1168 | Taringa UGC
1169 | TarmotGezgin
1170 | tchelebi\.io
1171 | techiaith\.cymru
1172 | Teleport
1173 | Telesoft
1174 | Telesphoreo
1175 | Telesphorep
1176 | Tenon\.io
1177 | teoma
1178 | terrainformatica
1179 | Test Certificate Info
1180 | testuri
1181 | Tetrahedron
1182 | TextRazor Downloader
1183 | The Drop Reaper
1184 | The Expert HTML Source Viewer
1185 | The Intraformant
1186 | The Knowledge AI
1187 | theinternetrules
1188 | TheNomad
1189 | Thinklab
1190 | Thumbor
1191 | Thumbshots
1192 | ThumbSniper
1193 | timewe\.net
1194 | TinEye
1195 | Tiny Tiny RSS
1196 | TLSProbe\/
1197 | Toata
1198 | topster
1199 | touche\.com
1200 | Traackr\.com
1201 | tracemyfile
1202 | Trackuity
1203 | TrapitAgent
1204 | Trendiction
1205 | Trendsmap
1206 | trendspottr
1207 | truwoGPS
1208 | TryJsoup
1209 | TulipChain
1210 | Turingos
1211 | Turnitin
1212 | tweetedtimes
1213 | Tweetminster
1214 | Tweezler\/
1215 | twibble
1216 | Twice
1217 | Twikle
1218 | Twingly
1219 | Twisted PageGetter
1220 | Typhoeus
1221 | ubermetrics-technologies
1222 | uclassify
1223 | UdmSearch
1224 | ultimate_sitemap_parser
1225 | unchaos
1226 | unirest-java
1227 | UniversalFeedParser
1228 | unshortenit
1229 | Unshorten\.It
1230 | Untiny
1231 | UnwindFetchor
1232 | updated
1233 | updown\.io daemon
1234 | Upflow
1235 | Uptimia
1236 | URL Verifier
1237 | Urlcheckr
1238 | URLitor
1239 | urlresolver
1240 | Urlstat
1241 | URLTester
1242 | UrlTrends Ranking Updater
1243 | URLy Warning
1244 | URLy\.Warning
1245 | URL\/Emacs
1246 | Vacuum
1247 | Vagabondo
1248 | VB Project
1249 | vBSEO
1250 | VCI
1251 | Verity
1252 | via ggpht\.com GoogleImageProxy
1253 | Virusdie
1254 | visionutils
1255 | Visual Rights Group
1256 | vkShare
1257 | VoidEYE
1258 | Voil
1259 | voltron
1260 | voyager\/
1261 | VSAgent\/
1262 | VSB-TUO\/
1263 | Vulnbusters Meter
1264 | VYU2
1265 | w3af\.org
1266 | W3C-checklink
1267 | W3C-mobileOK
1268 | W3C_Unicorn
1269 | WAC-OFU
1270 | WakeletLinkExpander
1271 | WallpapersHD
1272 | Wallpapers\/[0-9]+
1273 | wangling
1274 | Wappalyzer
1275 | WatchMouse
1276 | WbSrch\/
1277 | WDT\.io
1278 | Web Auto
1279 | Web Collage
1280 | Web Enhancer
1281 | Web Fetch
1282 | Web Fuck
1283 | Web Pix
1284 | Web Sauger
1285 | Web spyder
1286 | Web Sucker
1287 | web-capture\.net
1288 | Web-sniffer
1289 | Webalta
1290 | Webauskunft
1291 | WebAuto
1292 | WebCapture
1293 | WebClient\/
1294 | webcollage
1295 | WebCookies
1296 | WebCopier
1297 | WebCorp
1298 | WebDataStats
1299 | WebDoc
1300 | WebEnhancer
1301 | WebFetch
1302 | WebFuck
1303 | WebGazer
1304 | WebGo IS
1305 | WebImageCollector
1306 | WebImages
1307 | WebIndex
1308 | webkit2png
1309 | WebLeacher
1310 | webmastercoffee
1311 | webmon
1312 | WebPix
1313 | WebReaper
1314 | WebSauger
1315 | webscreenie
1316 | Webshag
1317 | Webshot
1318 | Website Quester
1319 | websitepulse agent
1320 | WebsiteQuester
1321 | Websnapr
1322 | WebSniffer
1323 | Webster
1324 | WebStripper
1325 | WebSucker
1326 | webtech\/
1327 | WebThumbnail
1328 | Webthumb\/
1329 | WebWhacker
1330 | WebZIP
1331 | WeLikeLinks
1332 | WEPA
1333 | WeSEE
1334 | wf84
1335 | Wfuzz\/
1336 | wget
1337 | WhatCMS
1338 | WhatsApp
1339 | WhatsMyIP
1340 | WhatWeb
1341 | WhereGoes\?
1342 | Whibse
1343 | WhoAPI\/
1344 | WhoRunsCoinHive
1345 | Whynder Magnet
1346 | Windows-RSS-Platform
1347 | WinHttp-Autoproxy-Service
1348 | WinHTTP\/
1349 | WinPodder
1350 | wkhtmlto
1351 | wmtips
1352 | Woko
1353 | Wolfram HTTPClient
1354 | woorankreview
1355 | WordPress\/
1356 | WordupinfoSearch
1357 | Word\/
1358 | worldping-api
1359 | wotbox
1360 | WP Engine Install Performance API
1361 | WP Rocket
1362 | wpif
1363 | wprecon\.com survey
1364 | WPScan
1365 | wscheck
1366 | Wtrace
1367 | WWW-Collector-E
1368 | WWW-Mechanize
1369 | WWW::Document
1370 | WWW::Mechanize
1371 | WWWOFFLE
1372 | www\.monitor\.us
1373 | x09Mozilla
1374 | x22Mozilla
1375 | XaxisSemanticsClassifier
1376 | XenForo\/
1377 | Xenu Link Sleuth
1378 | XING-contenttabreceiver
1379 | xpymep([0-9]?)\.exe
1380 | Y!J-[A-Z][A-Z][A-Z]
1381 | Yaanb
1382 | yacy
1383 | Yahoo Link Preview
1384 | YahooCacheSystem
1385 | YahooMailProxy
1386 | YahooYSMcm
1387 | YandeG
1388 | Yandex(?!Search)
1389 | yanga
1390 | yeti
1391 | Yo-yo
1392 | Yoleo Consumer
1393 | yomins\.com
1394 | yoogliFetchAgent
1395 | YottaaMonitor
1396 | Your-Website-Sucks
1397 | yourls\.org
1398 | YoYs\.net
1399 | YP\.PL
1400 | Zabbix
1401 | Zade
1402 | Zao
1403 | Zapier
1404 | Zauba
1405 | Zemanta Aggregator
1406 | Zend\\Http\\Client
1407 | Zend_Http_Client
1408 | Zermelo
1409 | Zeus
1410 | zgrab
1411 | ZnajdzFoto
1412 | ZnHTTP
1413 | Zombie\.js
1414 | Zoom\.Mac
1415 | ZoteroTranslationServer
1416 | ZyBorg
1417 | [a-z0-9\-_]*(bot|crawl|headless|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)
--------------------------------------------------------------------------------
/raw/Exclusions.json:
--------------------------------------------------------------------------------
1 | ["Safari.[\\d\\.]*","Firefox.[\\d\\.]*"," Chrome.[\\d\\.]*","Chromium.[\\d\\.]*","MSIE.[\\d\\.]","Opera\\\/[\\d\\.]*","Mozilla.[\\d\\.]*","AppleWebKit.[\\d\\.]*","Trident.[\\d\\.]*","Windows NT.[\\d\\.]*","Android [\\d\\.]*","Macintosh.","Ubuntu","Linux","[ ]Intel","Mac OS X [\\d_]*","(like )?Gecko(.[\\d\\.]*)?","KHTML,","CriOS.[\\d\\.]*","CPU iPhone OS ([0-9_])* like Mac OS X","CPU OS ([0-9_])* like Mac OS X","iPod","compatible","x86_..","i686","x64","X11","rv:[\\d\\.]*","Version.[\\d\\.]*","WOW64","Win64","Dalvik.[\\d\\.]*"," \\.NET CLR [\\d\\.]*","Presto.[\\d\\.]*","Media Center PC","BlackBerry","Build","Opera Mini\\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\\/\\d{1,2}\\.","Opera"," \\.NET[\\d\\.]*","cubot","; M bot","; CRONO","; B bot","; IDbot","; ID bot","; POWER BOT","OCTOPUS-CORE","htc_botdugls","super\\\/\\d+\\\/Android\\\/\\d+","\"Yandex\"","YandexModule2"]
--------------------------------------------------------------------------------
/raw/Exclusions.txt:
--------------------------------------------------------------------------------
1 | Safari.[\d\.]*
2 | Firefox.[\d\.]*
3 | Chrome.[\d\.]*
4 | Chromium.[\d\.]*
5 | MSIE.[\d\.]
6 | Opera\/[\d\.]*
7 | Mozilla.[\d\.]*
8 | AppleWebKit.[\d\.]*
9 | Trident.[\d\.]*
10 | Windows NT.[\d\.]*
11 | Android [\d\.]*
12 | Macintosh.
13 | Ubuntu
14 | Linux
15 | [ ]Intel
16 | Mac OS X [\d_]*
17 | (like )?Gecko(.[\d\.]*)?
18 | KHTML,
19 | CriOS.[\d\.]*
20 | CPU iPhone OS ([0-9_])* like Mac OS X
21 | CPU OS ([0-9_])* like Mac OS X
22 | iPod
23 | compatible
24 | x86_..
25 | i686
26 | x64
27 | X11
28 | rv:[\d\.]*
29 | Version.[\d\.]*
30 | WOW64
31 | Win64
32 | Dalvik.[\d\.]*
33 | \.NET CLR [\d\.]*
34 | Presto.[\d\.]*
35 | Media Center PC
36 | BlackBerry
37 | Build
38 | Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.
39 | Opera
40 | \.NET[\d\.]*
41 | cubot
42 | ; M bot
43 | ; CRONO
44 | ; B bot
45 | ; IDbot
46 | ; ID bot
47 | ; POWER BOT
48 | OCTOPUS-CORE
49 | htc_botdugls
50 | super\/\d+\/Android\/\d+
51 | "Yandex"
52 | YandexModule2
--------------------------------------------------------------------------------
/raw/Headers.json:
--------------------------------------------------------------------------------
1 | ["HTTP_USER_AGENT","HTTP_X_OPERAMINI_PHONE_UA","HTTP_X_DEVICE_USER_AGENT","HTTP_X_ORIGINAL_USER_AGENT","HTTP_X_SKYFIRE_PHONE","HTTP_X_BOLT_PHONE_UA","HTTP_DEVICE_STOCK_UA","HTTP_X_UCBROWSER_DEVICE_UA","HTTP_FROM","HTTP_X_SCANNER","HTTP_SEC_CH_UA"]
--------------------------------------------------------------------------------
/raw/Headers.txt:
--------------------------------------------------------------------------------
1 | HTTP_USER_AGENT
2 | HTTP_X_OPERAMINI_PHONE_UA
3 | HTTP_X_DEVICE_USER_AGENT
4 | HTTP_X_ORIGINAL_USER_AGENT
5 | HTTP_X_SKYFIRE_PHONE
6 | HTTP_X_BOLT_PHONE_UA
7 | HTTP_DEVICE_STOCK_UA
8 | HTTP_X_UCBROWSER_DEVICE_UA
9 | HTTP_FROM
10 | HTTP_X_SCANNER
11 | HTTP_SEC_CH_UA
--------------------------------------------------------------------------------
/src/CrawlerDetect.php:
--------------------------------------------------------------------------------
1 |
7 | *
8 | * This source file is subject to the MIT license that is bundled
9 | * with this source code in the file LICENSE.
10 | */
11 |
12 | namespace Jaybizzle\CrawlerDetect;
13 |
14 | use Jaybizzle\CrawlerDetect\Fixtures\Crawlers;
15 | use Jaybizzle\CrawlerDetect\Fixtures\Exclusions;
16 | use Jaybizzle\CrawlerDetect\Fixtures\Headers;
17 |
18 | class CrawlerDetect
19 | {
20 | /**
21 | * The user agent.
22 | *
23 | * @var string|null
24 | */
25 | protected $userAgent;
26 |
27 | /**
28 | * Headers that contain a user agent.
29 | *
30 | * @var array
31 | */
32 | protected $httpHeaders = [];
33 |
34 | /**
35 | * Store regex matches.
36 | *
37 | * @var array
38 | */
39 | protected $matches = [];
40 |
41 | /**
42 | * Crawlers object.
43 | *
44 | * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
45 | */
46 | protected $crawlers;
47 |
48 | /**
49 | * Exclusions object.
50 | *
51 | * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
52 | */
53 | protected $exclusions;
54 |
55 | /**
56 | * Headers object.
57 | *
58 | * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
59 | */
60 | protected $uaHttpHeaders;
61 |
62 | /**
63 | * The compiled regex string.
64 | *
65 | * @var string
66 | */
67 | protected $compiledRegex;
68 |
69 | /**
70 | * The compiled exclusions regex string.
71 | *
72 | * @var string
73 | */
74 | protected $compiledExclusions;
75 |
76 | /**
77 | * Class constructor.
78 | */
79 | public function __construct(?array $headers = null, $userAgent = null)
80 | {
81 | $this->crawlers = new Crawlers;
82 | $this->exclusions = new Exclusions;
83 | $this->uaHttpHeaders = new Headers;
84 |
85 | $this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
86 | $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
87 |
88 | $this->setHttpHeaders($headers);
89 | $this->setUserAgent($userAgent);
90 | }
91 |
92 | /**
93 | * Compile the regex patterns into one regex string.
94 | *
95 | * @param array
96 | * @return string
97 | */
98 | public function compileRegex($patterns)
99 | {
100 | return '('.implode('|', $patterns).')';
101 | }
102 |
103 | /**
104 | * Set HTTP headers.
105 | *
106 | * @param array|null $httpHeaders
107 | */
108 | public function setHttpHeaders($httpHeaders)
109 | {
110 | // Use global _SERVER if $httpHeaders aren't defined.
111 | if (! is_array($httpHeaders) || ! count($httpHeaders)) {
112 | $httpHeaders = $_SERVER;
113 | }
114 |
115 | // Clear existing headers.
116 | $this->httpHeaders = [];
117 |
118 | // Only save HTTP headers. In PHP land, that means
119 | // only _SERVER vars that start with HTTP_.
120 | foreach ($httpHeaders as $key => $value) {
121 | if (strpos($key, 'HTTP_') === 0) {
122 | $this->httpHeaders[$key] = $value;
123 | }
124 | }
125 | }
126 |
127 | /**
128 | * Return user agent headers.
129 | *
130 | * @return array
131 | */
132 | public function getUaHttpHeaders()
133 | {
134 | return $this->uaHttpHeaders->getAll();
135 | }
136 |
137 | /**
138 | * Set the user agent.
139 | *
140 | * @param string|null $userAgent
141 | */
142 | public function setUserAgent($userAgent)
143 | {
144 | if (is_null($userAgent)) {
145 | foreach ($this->getUaHttpHeaders() as $altHeader) {
146 | if (isset($this->httpHeaders[$altHeader])) {
147 | $userAgent .= $this->httpHeaders[$altHeader].' ';
148 | }
149 | }
150 | }
151 |
152 | return $this->userAgent = $userAgent;
153 | }
154 |
155 | /**
156 | * Check user agent string against the regex.
157 | *
158 | * @param string|null $userAgent
159 | * @return bool
160 | */
161 | public function isCrawler($userAgent = null)
162 | {
163 | $agent = trim(preg_replace(
164 | "/{$this->compiledExclusions}/i",
165 | '',
166 | $userAgent ?: $this->userAgent ?: ''
167 | ));
168 |
169 | if ($agent === '') {
170 | $this->matches = [];
171 |
172 | return false;
173 | }
174 |
175 | return (bool) preg_match("/{$this->compiledRegex}/i", $agent, $this->matches);
176 | }
177 |
178 | /**
179 | * Return the matches.
180 | *
181 | * @return string|null
182 | */
183 | public function getMatches()
184 | {
185 | return isset($this->matches[0]) ? $this->matches[0] : null;
186 | }
187 |
188 | /**
189 | * @return string|null
190 | */
191 | public function getUserAgent()
192 | {
193 | return $this->userAgent;
194 | }
195 | }
196 |
--------------------------------------------------------------------------------
/src/Fixtures/AbstractProvider.php:
--------------------------------------------------------------------------------
1 |
7 | *
8 | * This source file is subject to the MIT license that is bundled
9 | * with this source code in the file LICENSE.
10 | */
11 |
12 | namespace Jaybizzle\CrawlerDetect\Fixtures;
13 |
14 | abstract class AbstractProvider
15 | {
16 | /**
17 | * The data set.
18 | *
19 | * @var array
20 | */
21 | protected $data;
22 |
23 | /**
24 | * Return the data set.
25 | *
26 | * @return array
27 | */
28 | public function getAll()
29 | {
30 | return $this->data;
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/src/Fixtures/Crawlers.php:
--------------------------------------------------------------------------------
1 |
7 | *
8 | * This source file is subject to the MIT license that is bundled
9 | * with this source code in the file LICENSE.
10 | */
11 |
12 | namespace Jaybizzle\CrawlerDetect\Fixtures;
13 |
14 | class Crawlers extends AbstractProvider
15 | {
16 | /**
17 | * Array of regular expressions to match against the user agent.
18 | *
19 | * @var array
20 | */
21 | protected $data = [
22 | ' YLT',
23 | '^Aether',
24 | '^Amazon Simple Notification Service Agent$',
25 | '^Amazon-Route53-Health-Check-Service',
26 | '^Amazon CloudFront',
27 | '^b0t$',
28 | '^bluefish ',
29 | '^Calypso v\/',
30 | '^COMODO DCV',
31 | '^Corax',
32 | '^DangDang',
33 | '^DavClnt',
34 | '^DHSH',
35 | '^docker\/[0-9]',
36 | '^Expanse',
37 | '^FDM ',
38 | '^git\/',
39 | '^Goose\/',
40 | '^Grabber',
41 | '^Gradle\/',
42 | '^HTTPClient\/',
43 | '^HTTPing',
44 | '^Java\/',
45 | '^Jeode\/',
46 | '^Jetty\/',
47 | '^Mail\/',
48 | '^Mget',
49 | '^Microsoft URL Control',
50 | '^Mikrotik\/',
51 | '^Netlab360',
52 | '^NG\/[0-9\.]',
53 | '^NING\/',
54 | '^npm\/',
55 | '^Nuclei',
56 | '^PHP-AYMAPI\/',
57 | '^PHP\/',
58 | '^pip\/',
59 | '^pnpm\/',
60 | '^RMA\/',
61 | '^Ruby|Ruby\/[0-9]',
62 | "^symbolicator\\/",
63 | '^Swurl ',
64 | '^TLS tester ',
65 | '^twine\/',
66 | '^ureq',
67 | '^VSE\/[0-9]',
68 | '^WordPress\.com',
69 | '^XRL\/[0-9]',
70 | '^ZmEu',
71 | '008\/',
72 | '13TABS',
73 | '192\.comAgent',
74 | '2GDPR\/',
75 | '2ip\.ru',
76 | '404enemy',
77 | '7Siters',
78 | '80legs',
79 | 'a3logics\.in',
80 | 'A6-Indexer',
81 | 'Abonti',
82 | 'Aboundex',
83 | 'aboutthedomain',
84 | 'Accoona-AI-Agent',
85 | 'acebookexternalhit\/',
86 | 'acoon',
87 | 'acrylicapps\.com\/pulp',
88 | 'Acunetix',
89 | 'AdAuth\/',
90 | 'adbeat',
91 | 'AddThis',
92 | 'ADmantX',
93 | 'AdminLabs',
94 | 'adressendeutschland',
95 | 'adreview\/',
96 | 'adscanner',
97 | 'adstxt-worker',
98 | 'Adstxtaggregator',
99 | 'adstxt\.com',
100 | 'Adyen HttpClient',
101 | 'AffiliateLabz\/',
102 | 'affilimate-puppeteer',
103 | 'agentslug',
104 | 'AHC',
105 | 'aihit',
106 | 'aiohttp\/',
107 | 'Airmail',
108 | 'akka-http\/',
109 | 'akula\/',
110 | 'alertra',
111 | 'alexa site audit',
112 | 'Alibaba\.Security\.Heimdall',
113 | 'Alligator',
114 | 'allloadin',
115 | 'AllSubmitter',
116 | 'alyze\.info',
117 | 'amagit',
118 | 'Anarchie',
119 | 'AndroidDownloadManager',
120 | 'Anemone',
121 | 'AngleSharp',
122 | 'annotate_google',
123 | 'Anthill',
124 | 'Anturis Agent',
125 | 'Ant\.com',
126 | 'AnyEvent-HTTP\/',
127 | 'Apache Ant\/',
128 | 'Apache Droid',
129 | 'Apache OpenOffice',
130 | 'Apache-HttpAsyncClient',
131 | 'Apache-HttpClient',
132 | 'ApacheBench',
133 | 'Apexoo',
134 | 'apimon\.de',
135 | 'APIs-Google',
136 | 'AportWorm\/',
137 | 'AppBeat\/',
138 | 'AppEngine-Google',
139 | 'AppleSyndication',
140 | 'Aprc\/[0-9]',
141 | 'Arachmo',
142 | 'arachnode',
143 | 'Arachnophilia',
144 | 'aria2',
145 | 'Arukereso',
146 | 'asafaweb',
147 | 'Asana\/',
148 | 'Ask Jeeves',
149 | 'AskQuickly',
150 | 'ASPSeek',
151 | 'Asterias',
152 | 'Astute',
153 | 'asynchttp',
154 | 'Attach',
155 | 'attohttpc',
156 | 'autocite',
157 | 'AutomaticWPTester',
158 | 'Autonomy',
159 | 'awin\.com',
160 | 'AWS Security Scanner',
161 | 'axios\/',
162 | 'a\.pr-cy\.ru',
163 | 'B-l-i-t-z-B-O-T',
164 | 'Backlink-Ceck',
165 | 'BacklinkHttpStatus',
166 | 'BackStreet',
167 | 'BackupLand',
168 | 'BackWeb',
169 | 'Bad-Neighborhood',
170 | 'Badass',
171 | 'baidu\.com',
172 | 'Bandit',
173 | 'Barracuda Sentinel \(EE\)',
174 | 'basicstate',
175 | 'BatchFTP',
176 | 'Battleztar Bazinga',
177 | 'baypup\/',
178 | 'BazQux',
179 | 'BBBike',
180 | 'BCKLINKS',
181 | 'BDFetch',
182 | 'BegunAdvertising',
183 | 'Bewica-security-scan',
184 | 'Bidtellect',
185 | 'BigBozz',
186 | 'Bigfoot',
187 | 'biglotron',
188 | 'BingLocalSearch',
189 | 'BingPreview',
190 | 'binlar',
191 | 'biNu image cacher',
192 | 'Bitacle',
193 | 'Bitrix link preview',
194 | 'biz_Directory',
195 | 'BKCTwitterUnshortener\/',
196 | 'Black Hole',
197 | 'Blackboard Safeassign',
198 | 'BlackWidow',
199 | 'BlockNote\.Net',
200 | 'BlogBridge',
201 | 'Bloglines',
202 | 'Bloglovin',
203 | 'BlogPulseLive',
204 | 'BlogSearch',
205 | 'Blogtrottr',
206 | 'BlowFish',
207 | 'boitho\.com-dc',
208 | 'Boost\.Beast',
209 | 'BPImageWalker',
210 | 'Braintree-Webhooks',
211 | 'Branch Metrics API',
212 | 'Branch-Passthrough',
213 | 'Brandprotect',
214 | 'Brandwatch',
215 | 'Brodie\/',
216 | 'Browsershots',
217 | 'BUbiNG',
218 | 'Buck\/',
219 | 'Buddy',
220 | 'BuiltWith',
221 | 'Bullseye',
222 | 'BunnySlippers',
223 | 'Burf Search',
224 | 'Butterfly\/',
225 | 'BuzzSumo',
226 | 'CAAM\/[0-9]',
227 | 'caam dot crwlr at gmail dot com',
228 | 'CakePHP',
229 | 'Calculon',
230 | 'Canary%20Mail',
231 | 'CaretNail',
232 | 'catexplorador',
233 | 'CC Metadata Scaper',
234 | 'Cegbfeieh',
235 | 'censys',
236 | 'centuryb.o.t9[at]gmail.com',
237 | 'Cerberian Drtrs',
238 | 'CERT\.at-Statistics-Survey',
239 | 'cf-facebook',
240 | 'cg-eye',
241 | 'changedetection',
242 | 'ChangesMeter',
243 | 'Charlotte',
244 | 'chatterino-api-cache',
245 | 'CheckHost',
246 | 'checkprivacy',
247 | 'CherryPicker',
248 | 'ChinaClaw',
249 | 'Chirp\/',
250 | 'chkme\.com',
251 | 'Chlooe',
252 | 'Chromaxa',
253 | 'CirrusExplorer',
254 | 'CISPA Vulnerability Notification',
255 | 'CISPA Web Analyser',
256 | 'Citoid',
257 | 'CJNetworkQuality',
258 | 'Clarsentia',
259 | 'clips\.ua\.ac\.be',
260 | 'Cloud mapping',
261 | 'CloudEndure',
262 | 'CloudFlare-AlwaysOnline',
263 | 'Cloudflare-Healthchecks',
264 | 'Cloudinary',
265 | 'cmcm\.com',
266 | 'coccoc',
267 | 'cognitiveseo',
268 | 'ColdFusion',
269 | 'colly -',
270 | 'CommaFeed',
271 | 'Commons-HttpClient',
272 | 'commonscan',
273 | 'contactbigdatafr',
274 | 'contentkingapp',
275 | 'Contextual Code Sites Explorer',
276 | 'convera',
277 | 'CookieReports',
278 | 'copyright sheriff',
279 | 'CopyRightCheck',
280 | 'Copyscape',
281 | 'cortex\/',
282 | 'Cosmos4j\.feedback',
283 | 'Covario-IDS',
284 | 'Craw\/',
285 | 'Crescent',
286 | 'Criteo',
287 | 'Crowsnest',
288 | 'CSHttp',
289 | 'CSSCheck',
290 | 'Cula\/',
291 | 'curb',
292 | 'Curious George',
293 | 'curl',
294 | 'cuwhois\/',
295 | 'cybo\.com',
296 | 'DAP\/NetHTTP',
297 | 'DareBoost',
298 | 'DatabaseDriverMysqli',
299 | 'DataCha0s',
300 | 'DatadogSynthetics',
301 | 'Datafeedwatch',
302 | 'Datanyze',
303 | 'DataparkSearch',
304 | 'dataprovider',
305 | 'DataXu',
306 | 'Daum(oa)?[ \/][0-9]',
307 | 'dBpoweramp',
308 | 'ddline',
309 | 'deeris',
310 | 'delve\.ai',
311 | 'Demon',
312 | 'DeuSu',
313 | 'developers\.google\.com\/\+\/web\/snippet\/',
314 | 'Devil',
315 | 'Digg',
316 | 'Digincore',
317 | 'DigitalPebble',
318 | 'Dirbuster',
319 | 'Discourse Forum Onebox',
320 | 'Dispatch\/',
321 | 'Disqus\/',
322 | 'DittoSpyder',
323 | 'dlvr',
324 | 'DMBrowser',
325 | 'DNSPod-reporting',
326 | 'docoloc',
327 | 'Dolphin http client',
328 | 'DomainAppender',
329 | 'DomainLabz',
330 | 'Domains Project\/',
331 | 'Donuts Content Explorer',
332 | 'dotMailer content retrieval',
333 | 'dotSemantic',
334 | 'downforeveryoneorjustme',
335 | 'Download Wonder',
336 | 'downnotifier',
337 | 'DowntimeDetector',
338 | 'Drip',
339 | 'drupact',
340 | 'Drupal \(\+http:\/\/drupal\.org\/\)',
341 | 'DTS Agent',
342 | 'dubaiindex',
343 | 'DuplexWeb-Google',
344 | 'DynatraceSynthetic',
345 | 'EARTHCOM',
346 | 'Easy-Thumb',
347 | 'EasyDL',
348 | 'Ebingbong',
349 | 'ec2linkfinder',
350 | 'eCairn-Grabber',
351 | 'eCatch',
352 | 'ECCP',
353 | 'eContext\/',
354 | 'Ecxi',
355 | 'EirGrabber',
356 | 'ElectricMonk',
357 | 'elefent',
358 | 'EMail Exractor',
359 | 'EMail Wolf',
360 | 'EmailWolf',
361 | 'Embarcadero',
362 | 'Embed PHP Library',
363 | 'Embedly',
364 | 'endo\/',
365 | 'europarchive\.org',
366 | 'evc-batch',
367 | 'EventMachine HttpClient',
368 | 'Everwall Link Expander',
369 | 'Evidon',
370 | 'Evrinid',
371 | 'ExactSearch',
372 | 'ExaleadCloudview',
373 | 'Excel\/',
374 | 'exif',
375 | 'ExoRank',
376 | 'Exploratodo',
377 | 'Express WebPictures',
378 | 'Extreme Picture Finder',
379 | 'EyeNetIE',
380 | 'ezooms',
381 | 'facebookcatalog',
382 | 'facebookexternalhit',
383 | 'facebookexternalua',
384 | 'facebookplatform',
385 | 'fairshare',
386 | 'Faraday v',
387 | 'fasthttp',
388 | 'Faveeo',
389 | 'Favicon downloader',
390 | 'faviconarchive',
391 | 'faviconkit',
392 | 'FavOrg',
393 | 'Feed Wrangler',
394 | 'Feedable\/',
395 | 'Feedbin',
396 | 'FeedBooster',
397 | 'FeedBucket',
398 | 'FeedBunch\/',
399 | 'FeedBurner',
400 | 'feeder',
401 | 'Feedly',
402 | 'FeedshowOnline',
403 | 'Feedshow\/',
404 | 'Feedspot',
405 | 'FeedViewer\/',
406 | 'Feedwind\/',
407 | 'FeedZcollector',
408 | 'feeltiptop',
409 | 'Fetch API',
410 | 'Fetch\/[0-9]',
411 | 'Fever\/[0-9]',
412 | 'FHscan',
413 | 'Fiery%20Feeds',
414 | 'Filestack',
415 | 'Fimap',
416 | 'findlink',
417 | 'findthatfile',
418 | 'FlashGet',
419 | 'FlipboardBrowserProxy',
420 | 'FlipboardProxy',
421 | 'FlipboardRSS',
422 | 'Flock\/',
423 | 'Florienzh\/',
424 | 'fluffy',
425 | 'Flunky',
426 | 'flynxapp',
427 | 'forensiq',
428 | 'ForusP',
429 | 'FoundSeoTool',
430 | 'fragFINN\.de',
431 | 'free thumbnails',
432 | 'Freeuploader',
433 | 'FreshRSS',
434 | 'frontman',
435 | 'Funnelback',
436 | 'Fuzz Faster U Fool',
437 | 'G-i-g-a-b-o-t',
438 | 'g00g1e\.net',
439 | 'ganarvisitas',
440 | 'gdnplus\.com',
441 | 'GeedoProductSearch',
442 | 'geek-tools',
443 | 'Genieo',
444 | 'GentleSource',
445 | 'GetCode',
446 | 'Getintent',
447 | 'GetLinkInfo',
448 | 'getprismatic',
449 | 'GetRight',
450 | 'getroot',
451 | 'GetURLInfo\/',
452 | 'GetWeb',
453 | 'Geziyor',
454 | 'Ghost Inspector',
455 | 'GigablastOpenSource',
456 | 'GIS-LABS',
457 | 'github-camo',
458 | 'GitHub-Hookshot',
459 | 'github\.com',
460 | 'Go http package',
461 | 'Go [\d\.]* package http',
462 | 'Go!Zilla',
463 | 'Go-Ahead-Got-It',
464 | 'Go-http-client',
465 | 'go-mtasts\/',
466 | 'gobuster',
467 | 'gobyus',
468 | 'Gofeed',
469 | 'gofetch',
470 | 'Goldfire Server',
471 | 'GomezAgent',
472 | 'gooblog',
473 | 'Goodzer\/',
474 | 'Google AppsViewer',
475 | 'Google Desktop',
476 | 'Google favicon',
477 | 'Google Keyword Suggestion',
478 | 'Google Keyword Tool',
479 | 'Google Page Speed Insights',
480 | 'Google PP Default',
481 | 'Google Search Console',
482 | 'Google Web Preview',
483 | 'Google-Ads',
484 | 'Google-Adwords',
485 | 'Google-Apps-Script',
486 | 'Google-Calendar-Importer',
487 | 'Google-HotelAdsVerifier',
488 | 'Google-HTTP-Java-Client',
489 | 'Google-InspectionTool',
490 | 'Google-Podcast',
491 | 'Google-Publisher-Plugin',
492 | 'Google-Read-Aloud',
493 | 'Google-SearchByImage',
494 | 'Google-Site-Verification',
495 | 'Google-SMTP-STS',
496 | 'Google-speakr',
497 | 'Google-Structured-Data-Testing-Tool',
498 | 'Google-Transparency-Report',
499 | 'google-xrawler',
500 | 'Google-Youtube-Links',
501 | 'GoogleDocs',
502 | 'GoogleHC\/',
503 | 'GoogleOther',
504 | 'GoogleProber',
505 | 'GoogleProducer',
506 | 'GoogleSites',
507 | 'Gookey',
508 | 'GoSpotCheck',
509 | 'gosquared-thumbnailer',
510 | 'Gotit',
511 | 'GoZilla',
512 | 'grabify',
513 | 'GrabNet',
514 | 'Grafula',
515 | 'Grammarly',
516 | 'GrapeFX',
517 | 'GreatNews',
518 | 'Gregarius',
519 | 'GRequests',
520 | 'grokkit',
521 | 'grouphigh',
522 | 'grub-client',
523 | 'gSOAP\/',
524 | 'GT::WWW',
525 | 'GTmetrix',
526 | 'GuzzleHttp',
527 | 'gvfs\/',
528 | 'HAA(A)?RTLAND http client',
529 | 'Haansoft',
530 | 'hackney\/',
531 | 'Hadi Agent',
532 | 'HappyApps-WebCheck',
533 | 'Hardenize',
534 | 'Hatena',
535 | 'Havij',
536 | 'HaxerMen',
537 | 'HEADMasterSEO',
538 | 'HeartRails_Capture',
539 | 'help@dataminr\.com',
540 | 'heritrix',
541 | 'Hexometer',
542 | 'historious',
543 | 'hkedcity',
544 | 'hledejLevne\.cz',
545 | 'Hloader',
546 | 'HMView',
547 | 'Holmes',
548 | 'HonesoSearchEngine',
549 | 'HootSuite Image proxy',
550 | 'Hootsuite-WebFeed',
551 | 'hosterstats',
552 | 'HostTracker',
553 | 'ht:\/\/check',
554 | 'htdig',
555 | 'HTMLparser',
556 | 'htmlyse',
557 | 'HTTP Banner Detection',
558 | 'http-get',
559 | 'HTTP-Header-Abfrage',
560 | 'http-kit',
561 | 'http-request\/',
562 | 'HTTP-Tiny',
563 | 'HTTP::Lite',
564 | 'http:\/\/www.neomo.de\/', // 'Francis [Bot]'
565 | 'HttpComponents',
566 | 'httphr',
567 | 'HTTPie',
568 | 'HTTPMon',
569 | 'httpRequest',
570 | 'httpscheck',
571 | 'httpssites_power',
572 | 'httpunit',
573 | 'HttpUrlConnection',
574 | 'http\.rb\/',
575 | 'HTTP_Compression_Test',
576 | 'http_get',
577 | 'http_request2',
578 | 'http_requester',
579 | 'httrack',
580 | 'huaweisymantec',
581 | 'HubSpot ',
582 | 'HubSpot-Link-Resolver',
583 | 'Humanlinks',
584 | 'i2kconnect\/',
585 | 'Iblog',
586 | 'ichiro',
587 | 'Id-search',
588 | 'IdeelaborPlagiaat',
589 | 'IDG Twitter Links Resolver',
590 | 'IDwhois\/',
591 | 'Iframely',
592 | 'igdeSpyder',
593 | 'iGooglePortal',
594 | 'IlTrovatore',
595 | 'Image Fetch',
596 | 'Image Sucker',
597 | 'ImageEngine\/',
598 | 'ImageVisu\/',
599 | 'Imagga',
600 | 'imagineeasy',
601 | 'imgsizer',
602 | 'InAGist',
603 | 'inbound\.li parser',
604 | 'InDesign%20CC',
605 | 'Indy Library',
606 | 'InetURL',
607 | 'infegy',
608 | 'infohelfer',
609 | 'InfoTekies',
610 | 'InfoWizards Reciprocal Link',
611 | 'inpwrd\.com',
612 | 'instabid',
613 | 'Instapaper',
614 | 'Integrity',
615 | 'integromedb',
616 | 'Intelliseek',
617 | 'InterGET',
618 | 'Internet Ninja',
619 | 'InternetSeer',
620 | 'internetVista monitor',
621 | 'internetwache',
622 | 'internet_archive',
623 | 'intraVnews',
624 | 'IODC',
625 | 'IOI',
626 | 'Inboxb0t',
627 | 'iplabel',
628 | 'ips-agent',
629 | 'IPS\/[0-9]',
630 | 'IPWorks HTTP\/S Component',
631 | 'iqdb\/',
632 | 'Iria',
633 | 'Irokez',
634 | 'isitup\.org',
635 | 'iskanie',
636 | 'isUp\.li',
637 | 'iThemes Sync\/',
638 | 'IZaBEE',
639 | 'iZSearch',
640 | 'JAHHO',
641 | 'janforman',
642 | 'Jaunt\/',
643 | 'Java.*outbrain',
644 | 'javelin\.io',
645 | 'Jbrofuzz',
646 | 'Jersey\/',
647 | 'JetCar',
648 | 'Jigsaw',
649 | 'Jobboerse',
650 | 'JobFeed discovery',
651 | 'Jobg8 URL Monitor',
652 | 'jobo',
653 | 'Jobrapido',
654 | 'Jobsearch1\.5',
655 | 'JoinVision Generic',
656 | 'JolokiaPwn',
657 | 'Joomla',
658 | 'Jorgee',
659 | 'JS-Kit',
660 | 'JungleKeyThumbnail',
661 | 'JustView',
662 | 'Kaspersky Lab CFR link resolver',
663 | 'Kelny\/',
664 | 'Kerrigan\/',
665 | 'KeyCDN',
666 | 'Keyword Density',
667 | 'Keywords Research',
668 | 'khttp\/',
669 | 'KickFire',
670 | 'KimonoLabs\/',
671 | 'Kml-Google',
672 | 'knows\.is',
673 | 'KOCMOHABT',
674 | 'kouio',
675 | 'krawler\.dk',
676 | 'kube-probe',
677 | 'kubectl',
678 | 'kulturarw3',
679 | 'KumKie',
680 | 'Larbin',
681 | 'Lavf\/',
682 | 'leakix\.net',
683 | 'LeechFTP',
684 | 'LeechGet',
685 | 'letsencrypt',
686 | 'Lftp',
687 | 'LibVLC',
688 | 'LibWeb',
689 | 'Libwhisker',
690 | 'libwww',
691 | 'Licorne',
692 | 'Liferea\/',
693 | 'Lighthouse',
694 | 'Lightspeedsystems',
695 | 'Likse',
696 | 'limber\.io',
697 | 'Link Valet',
698 | 'LinkAlarm\/',
699 | 'LinkAnalyser',
700 | 'link-check',
701 | 'linkCheck',
702 | 'linkdex',
703 | 'LinkExaminer',
704 | 'linkfluence',
705 | 'linkpeek',
706 | 'LinkPreview',
707 | 'LinkScan',
708 | 'LinksManager',
709 | 'LinkTiger',
710 | 'LinkWalker',
711 | 'link_thumbnailer',
712 | 'Lipperhey',
713 | 'Litemage_walker',
714 | 'livedoor ScreenShot',
715 | 'LoadImpactRload',
716 | 'localsearch-web',
717 | 'LongURL API',
718 | 'longurl-r-package',
719 | 'looid\.com',
720 | 'looksystems\.net',
721 | 'lscache_runner',
722 | 'ltx71',
723 | 'lua-resty-http',
724 | 'Lucee \(CFML Engine\)',
725 | 'Lush Http Client',
726 | 'lwp-request',
727 | 'lwp-trivial',
728 | 'LWP::Simple',
729 | 'lycos',
730 | 'LYT\.SR',
731 | 'L\.webis',
732 | 'mabontland',
733 | 'MacOutlook\/',
734 | 'MagentaNews\/',
735 | 'Mag-Net',
736 | 'MagpieRSS',
737 | 'Mail::STS',
738 | 'MailChimp',
739 | 'Mail\.Ru',
740 | 'Majestic12',
741 | 'makecontact\/',
742 | 'Mandrill',
743 | 'MapperCmd',
744 | 'marketinggrader',
745 | 'MarkMonitor',
746 | 'MarkWatch',
747 | 'Mass Downloader',
748 | 'masscan\/',
749 | 'Mata Hari',
750 | 'mattermost',
751 | 'MatchorySearch\/',
752 | 'Mediametric',
753 | 'Mediapartners-Google',
754 | 'mediawords',
755 | 'MegaIndex\.ru',
756 | 'MeltwaterNews',
757 | 'Melvil Rawi',
758 | 'MemGator',
759 | 'Metaspinner',
760 | 'MetaURI',
761 | 'MFC_Tear_Sample',
762 | 'Microsearch',
763 | 'Microsoft Data Access',
764 | 'Microsoft Office',
765 | 'Microsoft Outlook',
766 | 'Microsoft Windows Network Diagnostics',
767 | 'Microsoft-WebDAV-MiniRedir',
768 | 'Microsoft\.Data\.Mashup',
769 | 'MicrosoftPreview',
770 | 'MIDown tool',
771 | 'MIIxpc',
772 | 'Mindjet',
773 | 'Miniature\.io',
774 | 'Miniflux',
775 | 'mio_httpc',
776 | 'Miro-HttpClient',
777 | 'Mister PiX',
778 | 'mixdata dot com',
779 | 'mixed-content-scan',
780 | 'mixnode',
781 | 'Mnogosearch',
782 | 'mogimogi',
783 | 'Mojeek',
784 | 'Mojolicious \(Perl\)',
785 | 'Mollie',
786 | 'monitis',
787 | 'Monitority\/',
788 | 'Monit\/',
789 | 'montastic',
790 | 'MonSpark',
791 | 'MonTools',
792 | 'Moreover',
793 | 'Morfeus Fucking Scanner',
794 | 'Morning Paper',
795 | 'MovableType',
796 | 'mowser',
797 | 'Mrcgiguy',
798 | 'Mr\.4x3 Powered',
799 | 'MS Web Services Client Protocol',
800 | 'MSFrontPage',
801 | 'mShots',
802 | 'MuckRack\/',
803 | 'muhstik-scan',
804 | 'MVAClient',
805 | 'MxToolbox\/',
806 | 'myseosnapshot',
807 | 'nagios',
808 | 'Najdi\.si',
809 | 'Name Intelligence',
810 | 'NameFo\.com',
811 | 'Nameprotect',
812 | 'nationalarchives',
813 | 'Navroad',
814 | 'nbertaupete95',
815 | 'NearSite',
816 | 'Needle',
817 | 'Nessus',
818 | 'Net Vampire',
819 | 'NetAnts',
820 | 'NETCRAFT',
821 | 'NetLyzer',
822 | 'NetMechanic',
823 | 'NetNewsWire',
824 | 'Netpursual',
825 | 'netresearch',
826 | 'NetShelter ContentScan',
827 | 'Netsparker',
828 | 'NetSystemsResearch',
829 | 'nettle',
830 | 'NetTrack',
831 | 'Netvibes',
832 | 'NetZIP',
833 | 'Neustar WPM',
834 | 'NeutrinoAPI',
835 | 'NewRelicPinger',
836 | 'NewsBlur .*Finder',
837 | 'NewsGator',
838 | 'newsme',
839 | 'newspaper\/',
840 | 'Nexgate Ruby Client',
841 | 'NG-Search',
842 | 'nghttp2',
843 | 'Nibbler',
844 | 'NICErsPRO',
845 | 'NihilScio',
846 | 'Nikto',
847 | 'nineconnections',
848 | 'NLNZ_IAHarvester',
849 | 'Nmap Scripting Engine',
850 | 'node-fetch',
851 | 'node-superagent',
852 | 'node-urllib',
853 | 'Nodemeter',
854 | 'NodePing',
855 | 'node\.io',
856 | 'nominet\.org\.uk',
857 | 'nominet\.uk',
858 | 'Norton-Safeweb',
859 | 'Notifixious',
860 | 'notifyninja',
861 | 'NotionEmbedder',
862 | 'nuhk',
863 | 'nutch',
864 | 'Nuzzel',
865 | 'nWormFeedFinder',
866 | 'nyawc\/',
867 | 'Nymesis',
868 | 'NYU',
869 | 'Observatory\/',
870 | 'Ocelli\/',
871 | 'Octopus',
872 | 'oegp',
873 | 'Offline Explorer',
874 | 'Offline Navigator',
875 | 'OgScrper',
876 | 'okhttp',
877 | 'omgili',
878 | 'OMSC',
879 | 'Online Domain Tools',
880 | 'Open Source RSS',
881 | 'OpenCalaisSemanticProxy',
882 | 'Openfind',
883 | 'OpenLinkProfiler',
884 | 'Openstat\/',
885 | 'OpenVAS',
886 | 'OPPO A33',
887 | 'Optimizer',
888 | 'Orbiter',
889 | 'OrgProbe\/',
890 | 'orion-semantics',
891 | 'Outlook-Express',
892 | 'Outlook-iOS',
893 | 'Owler',
894 | 'Owlin',
895 | 'ownCloud News',
896 | 'ow\.ly',
897 | 'OxfordCloudService',
898 | 'page scorer',
899 | 'Page Valet',
900 | 'page2rss',
901 | 'PageFreezer',
902 | 'PageGrabber',
903 | 'PagePeeker',
904 | 'PageScorer',
905 | 'Pagespeed\/',
906 | 'PageThing',
907 | 'page_verifier',
908 | 'Panopta',
909 | 'panscient',
910 | 'Papa Foto',
911 | 'parsijoo',
912 | 'Pavuk',
913 | 'PayPal IPN',
914 | 'pcBrowser',
915 | 'Pcore-HTTP',
916 | 'PDF24 URL To PDF',
917 | 'Pearltrees',
918 | 'PECL::HTTP',
919 | 'peerindex',
920 | 'Peew',
921 | 'PeoplePal',
922 | 'Perlu -',
923 | 'PhantomJS Screenshoter',
924 | 'PhantomJS\/',
925 | 'Photon\/',
926 | 'php-requests',
927 | 'phpservermon',
928 | 'Pi-Monster',
929 | 'Picscout',
930 | 'Picsearch',
931 | 'PictureFinder',
932 | 'Pimonster',
933 | 'Pingability',
934 | 'PingAdmin\.Ru',
935 | 'Pingdom',
936 | 'Pingoscope',
937 | 'PingSpot',
938 | 'ping\.blo\.gs',
939 | 'pinterest\.com',
940 | 'Pixray',
941 | 'Pizilla',
942 | 'Plagger\/',
943 | 'Pleroma ',
944 | 'Ploetz \+ Zeller',
945 | 'Plukkie',
946 | 'plumanalytics',
947 | 'PocketImageCache',
948 | 'PocketParser',
949 | 'Pockey',
950 | 'PodcastAddict\/',
951 | 'POE-Component-Client-HTTP',
952 | 'Polymail\/',
953 | 'Pompos',
954 | 'Porkbun',
955 | 'Port Monitor',
956 | 'postano',
957 | 'postfix-mta-sts-resolver',
958 | 'PostmanRuntime',
959 | 'postplanner\.com',
960 | 'PostPost',
961 | 'postrank',
962 | 'PowerPoint\/',
963 | 'Prebid',
964 | 'Prerender',
965 | 'Priceonomics Analysis Engine',
966 | 'PrintFriendly',
967 | 'PritTorrent',
968 | 'Prlog',
969 | 'probely\.com',
970 | 'probethenet',
971 | 'Project ?25499',
972 | 'Project-Resonance',
973 | 'prospectb2b',
974 | 'Protopage',
975 | 'ProWebWalker',
976 | 'proximic',
977 | 'PRTG Network Monitor',
978 | 'pshtt, https scanning',
979 | 'PTST ',
980 | 'PTST\/[0-9]+',
981 | 'pulsetic\.com',
982 | 'Pump',
983 | 'Python-httplib2',
984 | 'python-httpx',
985 | 'python-requests',
986 | 'Python-urllib',
987 | 'Qirina Hurdler',
988 | 'QQDownload',
989 | 'QrafterPro',
990 | 'Qseero',
991 | 'Qualidator',
992 | 'QueryN Metasearch',
993 | 'queuedriver',
994 | 'quic-go-HTTP\/',
995 | 'QuiteRSS',
996 | 'Quora Link Preview',
997 | 'Qwantify',
998 | 'Radian6',
999 | 'RadioPublicImageResizer',
1000 | 'Railgun\/',
1001 | 'RankActive',
1002 | 'RankFlex',
1003 | 'RankSonicSiteAuditor',
1004 | 'RapidLoad\/',
1005 | 'Re-re Studio',
1006 | 'ReactorNetty',
1007 | 'Readability',
1008 | 'RealDownload',
1009 | 'RealPlayer%20Downloader',
1010 | 'RebelMouse',
1011 | 'Recorder',
1012 | 'RecurPost\/',
1013 | 'redback\/',
1014 | 'ReederForMac',
1015 | 'Reeder\/',
1016 | 'ReGet',
1017 | 'RepoMonkey',
1018 | 'request\.js',
1019 | 'reqwest\/',
1020 | 'ResponseCodeTest',
1021 | 'RestSharp',
1022 | 'Riddler',
1023 | 'Rival IQ',
1024 | 'Robosourcer',
1025 | 'Robozilla',
1026 | 'ROI Hunter',
1027 | 'RPT-HTTPClient',
1028 | 'RSSMix\/',
1029 | 'RSSOwl',
1030 | 'RuxitSynthetic',
1031 | 'RyowlEngine',
1032 | 'safe-agent-scanner',
1033 | 'SalesIntelligent',
1034 | 'Saleslift',
1035 | 'SAP NetWeaver Application Server',
1036 | 'SauceNAO',
1037 | 'SBIder',
1038 | 'sc-downloader',
1039 | 'scalaj-http',
1040 | 'Scamadviser-Frontend',
1041 | 'ScanAlert',
1042 | 'scan\.lol',
1043 | 'Scoop',
1044 | 'scooter',
1045 | 'ScopeContentAG-HTTP-Client',
1046 | 'ScoutJet',
1047 | 'ScoutURLMonitor',
1048 | 'ScrapeBox Page Scanner',
1049 | 'Scrapy',
1050 | 'Screaming',
1051 | 'ScreenShotService',
1052 | 'Scrubby',
1053 | 'Scrutiny\/',
1054 | 'Search37',
1055 | 'searchenginepromotionhelp',
1056 | 'Searchestate',
1057 | 'SearchExpress',
1058 | 'SearchSight',
1059 | 'SearchWP',
1060 | 'search\.thunderstone',
1061 | 'Seeker',
1062 | 'semanticdiscovery',
1063 | 'semanticjuice',
1064 | 'Semiocast HTTP client',
1065 | 'Semrush',
1066 | 'Sendsay\.Ru',
1067 | 'sentry\/',
1068 | 'SEO Browser',
1069 | 'Seo Servis',
1070 | 'seo-nastroj\.cz',
1071 | 'seo4ajax',
1072 | 'Seobility',
1073 | 'SEOCentro',
1074 | 'SeoCheck',
1075 | 'seocompany',
1076 | 'SEOkicks',
1077 | 'SEOlizer',
1078 | 'Seomoz',
1079 | 'SEOprofiler',
1080 | 'seoscanners',
1081 | 'SEOsearch',
1082 | 'seositecheckup',
1083 | 'SEOstats',
1084 | 'servernfo',
1085 | 'sexsearcher',
1086 | 'Seznam',
1087 | 'Shelob',
1088 | 'Shodan',
1089 | 'Shoppimon',
1090 | 'ShopWiki',
1091 | 'ShortLinkTranslate',
1092 | 'shortURL lengthener',
1093 | 'shrinktheweb',
1094 | 'Sideqik',
1095 | 'Siege',
1096 | 'SimplePie',
1097 | 'SimplyFast',
1098 | 'Siphon',
1099 | 'SISTRIX',
1100 | 'Site Sucker',
1101 | 'Site-Shot\/',
1102 | 'Site24x7',
1103 | 'SiteBar',
1104 | 'Sitebeam',
1105 | 'Sitebulb\/',
1106 | 'SiteCondor',
1107 | 'SiteExplorer',
1108 | 'SiteGuardian',
1109 | 'Siteimprove',
1110 | 'SiteIndexed',
1111 | 'Sitemap(s)? Generator',
1112 | 'SitemapGenerator',
1113 | 'SiteMonitor',
1114 | 'Siteshooter B0t',
1115 | 'SiteSnagger',
1116 | 'SiteSucker',
1117 | 'SiteTruth',
1118 | 'Sitevigil',
1119 | 'sitexy\.com',
1120 | 'SkypeUriPreview',
1121 | 'Slack\/',
1122 | 'sli-systems\.com',
1123 | 'slider\.com',
1124 | 'slurp',
1125 | 'SlySearch',
1126 | 'SmartDownload',
1127 | 'SMRF URL Expander',
1128 | 'SMUrlExpander',
1129 | 'Snake',
1130 | 'Snappy',
1131 | 'SnapSearch',
1132 | 'Snarfer\/',
1133 | 'SniffRSS',
1134 | 'sniptracker',
1135 | 'Snoopy',
1136 | 'SnowHaze Search',
1137 | 'sogou web',
1138 | 'SortSite',
1139 | 'Sottopop',
1140 | 'sovereign\.ai',
1141 | 'SpaceBison',
1142 | 'SpamExperts',
1143 | 'Spammen',
1144 | 'Spanner',
1145 | 'Spawning-AI',
1146 | 'spaziodati',
1147 | 'SPDYCheck',
1148 | 'Specificfeeds',
1149 | 'SpeedKit',
1150 | 'speedy',
1151 | 'SPEng',
1152 | 'Spinn3r',
1153 | 'spray-can',
1154 | 'Sprinklr ',
1155 | 'spyonweb',
1156 | 'sqlmap',
1157 | 'Sqlworm',
1158 | 'Sqworm',
1159 | 'SSL Labs',
1160 | 'ssl-tools',
1161 | 'StackRambler',
1162 | 'Statastico\/',
1163 | 'Statically-',
1164 | 'StatusCake',
1165 | 'Steeler',
1166 | 'Stratagems Kumo',
1167 | 'Stripe\/',
1168 | 'Stroke\.cz',
1169 | 'StudioFACA',
1170 | 'StumbleUpon',
1171 | 'suchen',
1172 | 'Sucuri',
1173 | 'summify',
1174 | 'SuperHTTP',
1175 | 'Surphace Scout',
1176 | 'Suzuran',
1177 | 'swcd ',
1178 | 'Symfony BrowserKit',
1179 | 'Symfony2 BrowserKit',
1180 | 'Synapse\/',
1181 | 'Syndirella\/',
1182 | 'SynHttpClient-Built',
1183 | 'Sysomos',
1184 | 'sysscan',
1185 | 'Szukacz',
1186 | 'T0PHackTeam',
1187 | 'tAkeOut',
1188 | 'Tarantula\/',
1189 | 'Taringa UGC',
1190 | 'TarmotGezgin',
1191 | 'tchelebi\.io',
1192 | 'techiaith\.cymru',
1193 | 'Teleport',
1194 | 'Telesoft',
1195 | 'Telesphoreo',
1196 | 'Telesphorep',
1197 | 'Tenon\.io',
1198 | 'teoma',
1199 | 'terrainformatica',
1200 | 'Test Certificate Info',
1201 | 'testuri',
1202 | 'Tetrahedron',
1203 | 'TextRazor Downloader',
1204 | 'The Drop Reaper',
1205 | 'The Expert HTML Source Viewer',
1206 | 'The Intraformant',
1207 | 'The Knowledge AI',
1208 | 'theinternetrules',
1209 | 'TheNomad',
1210 | 'Thinklab',
1211 | 'Thumbor',
1212 | 'Thumbshots',
1213 | 'ThumbSniper',
1214 | 'timewe\.net',
1215 | 'TinEye',
1216 | 'Tiny Tiny RSS',
1217 | 'TLSProbe\/',
1218 | 'Toata',
1219 | 'topster',
1220 | 'touche\.com',
1221 | 'Traackr\.com',
1222 | 'tracemyfile',
1223 | 'Trackuity',
1224 | 'TrapitAgent',
1225 | 'Trendiction',
1226 | 'Trendsmap',
1227 | 'trendspottr',
1228 | 'truwoGPS',
1229 | 'TryJsoup',
1230 | 'TulipChain',
1231 | 'Turingos',
1232 | 'Turnitin',
1233 | 'tweetedtimes',
1234 | 'Tweetminster',
1235 | 'Tweezler\/',
1236 | 'twibble',
1237 | 'Twice',
1238 | 'Twikle',
1239 | 'Twingly',
1240 | 'Twisted PageGetter',
1241 | 'Typhoeus',
1242 | 'ubermetrics-technologies',
1243 | 'uclassify',
1244 | 'UdmSearch',
1245 | 'ultimate_sitemap_parser',
1246 | 'unchaos',
1247 | 'unirest-java',
1248 | 'UniversalFeedParser',
1249 | 'unshortenit',
1250 | 'Unshorten\.It',
1251 | 'Untiny',
1252 | 'UnwindFetchor',
1253 | 'updated',
1254 | 'updown\.io daemon',
1255 | 'Upflow',
1256 | 'Uptimia',
1257 | 'URL Verifier',
1258 | 'Urlcheckr',
1259 | 'URLitor',
1260 | 'urlresolver',
1261 | 'Urlstat',
1262 | 'URLTester',
1263 | 'UrlTrends Ranking Updater',
1264 | 'URLy Warning',
1265 | 'URLy\.Warning',
1266 | 'URL\/Emacs',
1267 | 'Vacuum',
1268 | 'Vagabondo',
1269 | 'VB Project',
1270 | 'vBSEO',
1271 | 'VCI',
1272 | 'Verity',
1273 | 'via ggpht\.com GoogleImageProxy',
1274 | 'Virusdie',
1275 | 'visionutils',
1276 | 'Visual Rights Group',
1277 | 'vkShare',
1278 | 'VoidEYE',
1279 | 'Voil',
1280 | 'voltron',
1281 | 'voyager\/',
1282 | 'VSAgent\/',
1283 | 'VSB-TUO\/',
1284 | 'Vulnbusters Meter',
1285 | 'VYU2',
1286 | 'w3af\.org',
1287 | 'W3C-checklink',
1288 | 'W3C-mobileOK',
1289 | 'W3C_Unicorn',
1290 | 'WAC-OFU',
1291 | 'WakeletLinkExpander',
1292 | 'WallpapersHD',
1293 | 'Wallpapers\/[0-9]+',
1294 | 'wangling',
1295 | 'Wappalyzer',
1296 | 'WatchMouse',
1297 | 'WbSrch\/',
1298 | 'WDT\.io',
1299 | 'Web Auto',
1300 | 'Web Collage',
1301 | 'Web Enhancer',
1302 | 'Web Fetch',
1303 | 'Web Fuck',
1304 | 'Web Pix',
1305 | 'Web Sauger',
1306 | 'Web spyder',
1307 | 'Web Sucker',
1308 | 'web-capture\.net',
1309 | 'Web-sniffer',
1310 | 'Webalta',
1311 | 'Webauskunft',
1312 | 'WebAuto',
1313 | 'WebCapture',
1314 | 'WebClient\/',
1315 | 'webcollage',
1316 | 'WebCookies',
1317 | 'WebCopier',
1318 | 'WebCorp',
1319 | 'WebDataStats',
1320 | 'WebDoc',
1321 | 'WebEnhancer',
1322 | 'WebFetch',
1323 | 'WebFuck',
1324 | 'WebGazer',
1325 | 'WebGo IS',
1326 | 'WebImageCollector',
1327 | 'WebImages',
1328 | 'WebIndex',
1329 | 'webkit2png',
1330 | 'WebLeacher',
1331 | 'webmastercoffee',
1332 | 'webmon ',
1333 | 'WebPix',
1334 | 'WebReaper',
1335 | 'WebSauger',
1336 | 'webscreenie',
1337 | 'Webshag',
1338 | 'Webshot',
1339 | 'Website Quester',
1340 | 'websitepulse agent',
1341 | 'WebsiteQuester',
1342 | 'Websnapr',
1343 | 'WebSniffer',
1344 | 'Webster',
1345 | 'WebStripper',
1346 | 'WebSucker',
1347 | 'webtech\/',
1348 | 'WebThumbnail',
1349 | 'Webthumb\/',
1350 | 'WebWhacker',
1351 | 'WebZIP',
1352 | 'WeLikeLinks',
1353 | 'WEPA',
1354 | 'WeSEE',
1355 | 'wf84',
1356 | 'Wfuzz\/',
1357 | 'wget',
1358 | 'WhatCMS',
1359 | 'WhatsApp',
1360 | 'WhatsMyIP',
1361 | 'WhatWeb',
1362 | 'WhereGoes\?',
1363 | 'Whibse',
1364 | 'WhoAPI\/',
1365 | 'WhoRunsCoinHive',
1366 | 'Whynder Magnet',
1367 | 'Windows-RSS-Platform',
1368 | 'WinHttp-Autoproxy-Service',
1369 | 'WinHTTP\/',
1370 | 'WinPodder',
1371 | 'wkhtmlto',
1372 | 'wmtips',
1373 | 'Woko',
1374 | 'Wolfram HTTPClient',
1375 | 'woorankreview',
1376 | 'WordPress\/',
1377 | 'WordupinfoSearch',
1378 | 'Word\/',
1379 | 'worldping-api',
1380 | 'wotbox',
1381 | 'WP Engine Install Performance API',
1382 | 'WP Rocket',
1383 | 'wpif',
1384 | 'wprecon\.com survey',
1385 | 'WPScan',
1386 | 'wscheck',
1387 | 'Wtrace',
1388 | 'WWW-Collector-E',
1389 | 'WWW-Mechanize',
1390 | 'WWW::Document',
1391 | 'WWW::Mechanize',
1392 | 'WWWOFFLE',
1393 | 'www\.monitor\.us',
1394 | 'x09Mozilla',
1395 | 'x22Mozilla',
1396 | 'XaxisSemanticsClassifier',
1397 | 'XenForo\/',
1398 | 'Xenu Link Sleuth',
1399 | 'XING-contenttabreceiver',
1400 | 'xpymep([0-9]?)\.exe',
1401 | 'Y!J-[A-Z][A-Z][A-Z]',
1402 | 'Yaanb',
1403 | 'yacy',
1404 | 'Yahoo Link Preview',
1405 | 'YahooCacheSystem',
1406 | 'YahooMailProxy',
1407 | 'YahooYSMcm',
1408 | 'YandeG',
1409 | 'Yandex(?!Search)',
1410 | 'yanga',
1411 | 'yeti',
1412 | 'Yo-yo',
1413 | 'Yoleo Consumer',
1414 | 'yomins\.com',
1415 | 'yoogliFetchAgent',
1416 | 'YottaaMonitor',
1417 | 'Your-Website-Sucks',
1418 | 'yourls\.org',
1419 | 'YoYs\.net',
1420 | 'YP\.PL',
1421 | 'Zabbix',
1422 | 'Zade',
1423 | 'Zao',
1424 | 'Zapier',
1425 | 'Zauba',
1426 | 'Zemanta Aggregator',
1427 | 'Zend\\\\Http\\\\Client',
1428 | 'Zend_Http_Client',
1429 | 'Zermelo',
1430 | 'Zeus ',
1431 | 'zgrab',
1432 | 'ZnajdzFoto',
1433 | 'ZnHTTP',
1434 | 'Zombie\.js',
1435 | 'Zoom\.Mac',
1436 | 'ZoteroTranslationServer',
1437 | 'ZyBorg',
1438 | '[a-z0-9\-_]*(bot|crawl|headless|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)',
1439 | ];
1440 | }
1441 |
--------------------------------------------------------------------------------
/src/Fixtures/Exclusions.php:
--------------------------------------------------------------------------------
1 |
7 | *
8 | * This source file is subject to the MIT license that is bundled
9 | * with this source code in the file LICENSE.
10 | */
11 |
12 | namespace Jaybizzle\CrawlerDetect\Fixtures;
13 |
14 | class Exclusions extends AbstractProvider
15 | {
16 | /**
17 | * List of strings to remove from the user agent before running the crawler regex
18 | * Over a large list of user agents, this gives us about a 55% speed increase!
19 | *
20 | * @var array
21 | */
22 | protected $data = [
23 | 'Safari.[\d\.]*',
24 | 'Firefox.[\d\.]*',
25 | ' Chrome.[\d\.]*',
26 | 'Chromium.[\d\.]*',
27 | 'MSIE.[\d\.]',
28 | 'Opera\/[\d\.]*',
29 | 'Mozilla.[\d\.]*',
30 | 'AppleWebKit.[\d\.]*',
31 | 'Trident.[\d\.]*',
32 | 'Windows NT.[\d\.]*',
33 | 'Android [\d\.]*',
34 | 'Macintosh.',
35 | 'Ubuntu',
36 | 'Linux',
37 | '[ ]Intel',
38 | 'Mac OS X [\d_]*',
39 | '(like )?Gecko(.[\d\.]*)?',
40 | 'KHTML,',
41 | 'CriOS.[\d\.]*',
42 | 'CPU iPhone OS ([0-9_])* like Mac OS X',
43 | 'CPU OS ([0-9_])* like Mac OS X',
44 | 'iPod',
45 | 'compatible',
46 | 'x86_..',
47 | 'i686',
48 | 'x64',
49 | 'X11',
50 | 'rv:[\d\.]*',
51 | 'Version.[\d\.]*',
52 | 'WOW64',
53 | 'Win64',
54 | 'Dalvik.[\d\.]*',
55 | ' \.NET CLR [\d\.]*',
56 | 'Presto.[\d\.]*',
57 | 'Media Center PC',
58 | 'BlackBerry',
59 | 'Build',
60 | 'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.',
61 | 'Opera',
62 | ' \.NET[\d\.]*',
63 | 'cubot',
64 | '; M bot',
65 | '; CRONO',
66 | '; B bot',
67 | '; IDbot',
68 | '; ID bot',
69 | '; POWER BOT',
70 | 'OCTOPUS-CORE',
71 | 'htc_botdugls',
72 | 'super\/\d+\/Android\/\d+',
73 | '"Yandex"',
74 | 'YandexModule2',
75 | ];
76 | }
77 |
--------------------------------------------------------------------------------
/src/Fixtures/Headers.php:
--------------------------------------------------------------------------------
1 |
7 | *
8 | * This source file is subject to the MIT license that is bundled
9 | * with this source code in the file LICENSE.
10 | */
11 |
12 | namespace Jaybizzle\CrawlerDetect\Fixtures;
13 |
14 | class Headers extends AbstractProvider
15 | {
16 | /**
17 | * All possible HTTP headers that represent the user agent string.
18 | *
19 | * @var array
20 | */
21 | protected $data = [
22 | // The default User-Agent string.
23 | 'HTTP_USER_AGENT',
24 | // Header can occur on devices using Opera Mini.
25 | 'HTTP_X_OPERAMINI_PHONE_UA',
26 | // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
27 | 'HTTP_X_DEVICE_USER_AGENT',
28 | 'HTTP_X_ORIGINAL_USER_AGENT',
29 | 'HTTP_X_SKYFIRE_PHONE',
30 | 'HTTP_X_BOLT_PHONE_UA',
31 | 'HTTP_DEVICE_STOCK_UA',
32 | 'HTTP_X_UCBROWSER_DEVICE_UA',
33 | // Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address
34 | 'HTTP_FROM',
35 | 'HTTP_X_SCANNER', // Seen in use by Netsparker
36 | // Observed that Facebook will omit identifying itself in User Agent headers but will persist HeadlessChrome in this header for mobile requests
37 | 'HTTP_SEC_CH_UA',
38 | ];
39 | }
40 |
--------------------------------------------------------------------------------