├── .github └── workflows │ ├── php-cs-fixer.yml │ └── test.yml ├── .php_cs.dist ├── LICENSE ├── README.md ├── composer.json ├── export.php ├── raw ├── Crawlers.json ├── Crawlers.txt ├── Exclusions.json ├── Exclusions.txt ├── Headers.json └── Headers.txt └── src ├── CrawlerDetect.php └── Fixtures ├── AbstractProvider.php ├── Crawlers.php ├── Exclusions.php └── Headers.php /.github/workflows/php-cs-fixer.yml: -------------------------------------------------------------------------------- 1 | name: Check & fix styling 2 | 3 | on: [ push ] 4 | 5 | jobs: 6 | php-cs-fixer: 7 | runs-on: ubuntu-24.04 8 | 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v3 12 | with: 13 | ref: ${{ github.head_ref }} 14 | 15 | - name: Run PHP CS Fixer 16 | uses: docker://oskarstark/php-cs-fixer-ga:2.18.6 17 | with: 18 | args: --config=.php_cs.dist --allow-risky=yes 19 | 20 | - name: Commit changes 21 | uses: stefanzweifel/git-auto-commit-action@v4 22 | with: 23 | commit_message: Fix styling -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - "master" 7 | pull_request: 8 | 9 | jobs: 10 | build: 11 | runs-on: ubuntu-24.04 12 | strategy: 13 | fail-fast: true 14 | matrix: 15 | php: [7.1, 7.2, 7.3, 7.4, 8.0, 8.1, 8.2, 8.3, 8.4] 16 | 17 | name: PHP:${{ matrix.php }} 18 | 19 | steps: 20 | - name: Checkout 21 | uses: actions/checkout@v4 22 | 23 | - name: Setup PHP, with composer 24 | uses: shivammathur/setup-php@v2 25 | with: 26 | php-version: ${{ matrix.php }} 27 | tools: composer:v2 28 | coverage: xdebug 29 | 30 | - name: Get composer cache directory 31 | id: composer-cache 32 | run: | 33 | echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT 34 | shell: bash 35 | 36 | - name: Cache composer dependencies 37 | uses: actions/cache@v4 38 | with: 39 | path: ${{ steps.composer-cache.outputs.dir }} 40 | key: dependencies-php-${{ matrix.php }}-composer-${{ hashFiles('composer.json') }} 41 | restore-keys: dependencies-php-${{ matrix.php }}-composer- 42 | 43 | - name: Install Composer dependencies 44 | run: | 45 | composer install --prefer-dist --no-interaction --no-suggest 46 | 47 | - name: Run Unit tests 48 | run: | 49 | vendor/bin/phpunit --coverage-clover=tests/logs/clover.xml 50 | 51 | - name: Upload coverage results to Coveralls 52 | env: 53 | COVERALLS_REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} 54 | run: | 55 | composer global require php-coveralls/php-coveralls "^1.0" 56 | coveralls --coverage_clover=tests/logs/clover.xml -v 57 | -------------------------------------------------------------------------------- /.php_cs.dist: -------------------------------------------------------------------------------- 1 | in([ 5 | __DIR__.'/src', 6 | __DIR__.'/tests', 7 | ]) 8 | ->name('*.php') 9 | ->ignoreDotFiles(true) 10 | ->ignoreVCS(true); 11 | 12 | return PhpCsFixer\Config::create() 13 | ->setRules([ 14 | '@PSR2' => true, 15 | 'ordered_imports' => ['sortAlgorithm' => 'alpha'], 16 | 'no_unused_imports' => true, 17 | 'not_operator_with_successor_space' => true, 18 | 'trailing_comma_in_multiline_array' => true, 19 | 'phpdoc_scalar' => true, 20 | 'unary_operator_spaces' => true, 21 | 'binary_operator_spaces' => true, 22 | 'blank_line_before_statement' => [ 23 | 'statements' => ['break', 'continue', 'declare', 'return', 'throw', 'try'], 24 | ], 25 | 'phpdoc_single_line_var_spacing' => true, 26 | 'phpdoc_var_without_name' => true, 27 | 'method_argument_space' => [ 28 | 'on_multiline' => 'ensure_fully_multiline', 29 | 'keep_multiple_spaces_after_comma' => true, 30 | ], 31 | ]) 32 | ->setFinder($finder); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2020 Mark Beech 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |



2 | crawlerdetect.io 3 |

4 |

5 |

6 | GitHub Workflow Status 7 | 8 | 9 | 10 | 11 |

12 | 13 | ## About CrawlerDetect 14 | 15 | CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the `user agent` and `http_from` header. Currently able to detect 1,000's of bots/spiders/crawlers. 16 | 17 | ### Installation 18 | ``` 19 | composer require jaybizzle/crawler-detect 20 | ``` 21 | 22 | ### Usage 23 | ```PHP 24 | use Jaybizzle\CrawlerDetect\CrawlerDetect; 25 | 26 | $CrawlerDetect = new CrawlerDetect; 27 | 28 | // Check the user agent of the current 'visitor' 29 | if($CrawlerDetect->isCrawler()) { 30 | // true if crawler user agent detected 31 | } 32 | 33 | // Pass a user agent as a string 34 | if($CrawlerDetect->isCrawler('Mozilla/5.0 (compatible; Sosospider/2.0; +http://help.soso.com/webspider.htm)')) { 35 | // true if crawler user agent detected 36 | } 37 | 38 | // Output the name of the bot that matched (if any) 39 | echo $CrawlerDetect->getMatches(); 40 | ``` 41 | 42 | ### Contributing 43 | If you find a bot/spider/crawler user agent that CrawlerDetect fails to detect, please submit a pull request with the regex pattern added to the `$data` array in `Fixtures/Crawlers.php` and add the failing user agent to `tests/crawlers.txt`. 44 | 45 | Failing that, just create an issue with the user agent you have found, and we'll take it from there :) 46 | 47 | ### Laravel Package 48 | If you would like to use this with Laravel, please see [Laravel-Crawler-Detect](https://github.com/JayBizzle/Laravel-Crawler-Detect) 49 | 50 | ### Symfony Bundle 51 | To use this library with Symfony 2/3/4, check out the [CrawlerDetectBundle](https://github.com/nicolasmure/CrawlerDetectBundle). 52 | 53 | ### YII2 Extension 54 | To use this library with the YII2 framework, check out [yii2-crawler-detect](https://github.com/AlikDex/yii2-crawler-detect). 55 | 56 | ### ES6 Library 57 | To use this library with NodeJS or any ES6 application based, check out [es6-crawler-detect](https://github.com/JefferyHus/es6-crawler-detect). 58 | 59 | ### Python Library 60 | To use this library in a Python project, check out [crawlerdetect](https://github.com/moskrc/CrawlerDetect). 61 | 62 | ### JVM Library (written in Java) 63 | To use this library in a JVM project (including Java, Scala, Kotlin, etc.), check out [CrawlerDetect](https://github.com/nekosoftllc/crawler-detect). 64 | 65 | ### .NET Library 66 | To use this library in a .net standard (including .net core) based project, check out [NetCrawlerDetect](https://github.com/gplumb/NetCrawlerDetect). 67 | 68 | ### Ruby Gem 69 | To use this library with Ruby on Rails or any Ruby-based application, check out [crawler_detect](https://github.com/loadkpi/crawler_detect) gem. 70 | 71 | ### Go Module 72 | To use this library with Go, check out the [crawlerdetect](https://github.com/x-way/crawlerdetect) module. 73 | 74 | _Parts of this class are based on the brilliant [MobileDetect](https://github.com/serbanghita/Mobile-Detect)_ 75 | 76 | [![Analytics](https://ga-beacon.appspot.com/UA-72430465-1/Crawler-Detect/readme?pixel)](https://github.com/JayBizzle/Crawler-Detect) 77 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jaybizzle/crawler-detect", 3 | "type": "library", 4 | "description": "CrawlerDetect is a PHP class for detecting bots/crawlers/spiders via the user agent", 5 | "keywords": ["crawler", "crawler detect", "crawler detector", "crawlerdetect", "php crawler detect"], 6 | "homepage": "https://github.com/JayBizzle/Crawler-Detect/", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "Mark Beech", 11 | "email": "m@rkbee.ch", 12 | "role": "Developer" 13 | } 14 | ], 15 | "require": { 16 | "php": ">=7.1.0" 17 | }, 18 | "require-dev": { 19 | "phpunit/phpunit": "^4.8|^5.5|^6.5|^9.4" 20 | }, 21 | "autoload": { 22 | "psr-4": { 23 | "Jaybizzle\\CrawlerDetect\\": "src/" 24 | } 25 | }, 26 | "scripts": { 27 | "test": "vendor/bin/phpunit" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /export.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * This source file is subject to the MIT license that is bundled 9 | * with this source code in the file LICENSE. 10 | */ 11 | 12 | require 'src/Fixtures/AbstractProvider.php'; 13 | require 'src/Fixtures/Crawlers.php'; 14 | require 'src/Fixtures/Exclusions.php'; 15 | require 'src/Fixtures/Headers.php'; 16 | 17 | $src = [ 18 | 'Crawlers', 19 | 'Exclusions', 20 | 'Headers', 21 | ]; 22 | 23 | foreach ($src as $class) { 24 | $class = "Jaybizzle\\CrawlerDetect\\Fixtures\\$class"; 25 | $object = new $class; 26 | 27 | outputJson($object); 28 | outputTxt($object); 29 | } 30 | 31 | function outputJson($object) 32 | { 33 | $className = (new ReflectionClass($object))->getShortName(); 34 | file_put_contents("raw/$className.json", json_encode($object->getAll())); 35 | } 36 | 37 | function outputTxt($object) 38 | { 39 | $className = (new ReflectionClass($object))->getShortName(); 40 | file_put_contents("raw/$className.txt", implode(PHP_EOL, $object->getAll())); 41 | } 42 | -------------------------------------------------------------------------------- /raw/Crawlers.json: -------------------------------------------------------------------------------- 1 | [" YLT","^Aether","^Amazon Simple Notification Service Agent$","^Amazon-Route53-Health-Check-Service","^Amazon CloudFront","^b0t$","^bluefish ","^Calypso v\\\/","^COMODO DCV","^Corax","^DangDang","^DavClnt","^DHSH","^docker\\\/[0-9]","^Expanse","^FDM ","^git\\\/","^Goose\\\/","^Grabber","^Gradle\\\/","^HTTPClient\\\/","^HTTPing","^Java\\\/","^Jeode\\\/","^Jetty\\\/","^Mail\\\/","^Mget","^Microsoft URL Control","^Mikrotik\\\/","^Netlab360","^NG\\\/[0-9\\.]","^NING\\\/","^npm\\\/","^Nuclei","^PHP-AYMAPI\\\/","^PHP\\\/","^pip\\\/","^pnpm\\\/","^RMA\\\/","^Ruby|Ruby\\\/[0-9]","^symbolicator\\\/","^Swurl ","^TLS tester ","^twine\\\/","^ureq","^VSE\\\/[0-9]","^WordPress\\.com","^XRL\\\/[0-9]","^ZmEu","008\\\/","13TABS","192\\.comAgent","2GDPR\\\/","2ip\\.ru","404enemy","7Siters","80legs","a3logics\\.in","A6-Indexer","Abonti","Aboundex","aboutthedomain","Accoona-AI-Agent","acebookexternalhit\\\/","acoon","acrylicapps\\.com\\\/pulp","Acunetix","AdAuth\\\/","adbeat","AddThis","ADmantX","AdminLabs","adressendeutschland","adreview\\\/","adscanner","adstxt-worker","Adstxtaggregator","adstxt\\.com","Adyen HttpClient","AffiliateLabz\\\/","affilimate-puppeteer","agentslug","AHC","aihit","aiohttp\\\/","Airmail","akka-http\\\/","akula\\\/","alertra","alexa site audit","Alibaba\\.Security\\.Heimdall","Alligator","allloadin","AllSubmitter","alyze\\.info","amagit","Anarchie","AndroidDownloadManager","Anemone","AngleSharp","annotate_google","Anthill","Anturis Agent","Ant\\.com","AnyEvent-HTTP\\\/","Apache Ant\\\/","Apache Droid","Apache OpenOffice","Apache-HttpAsyncClient","Apache-HttpClient","ApacheBench","Apexoo","apimon\\.de","APIs-Google","AportWorm\\\/","AppBeat\\\/","AppEngine-Google","AppleSyndication","Aprc\\\/[0-9]","Arachmo","arachnode","Arachnophilia","aria2","Arukereso","asafaweb","Asana\\\/","Ask Jeeves","AskQuickly","ASPSeek","Asterias","Astute","asynchttp","Attach","attohttpc","autocite","AutomaticWPTester","Autonomy","awin\\.com","AWS Security Scanner","axios\\\/","a\\.pr-cy\\.ru","B-l-i-t-z-B-O-T","Backlink-Ceck","BacklinkHttpStatus","BackStreet","BackupLand","BackWeb","Bad-Neighborhood","Badass","baidu\\.com","Bandit","Barracuda Sentinel \\(EE\\)","basicstate","BatchFTP","Battleztar Bazinga","baypup\\\/","BazQux","BBBike","BCKLINKS","BDFetch","BegunAdvertising","Bewica-security-scan","Bidtellect","BigBozz","Bigfoot","biglotron","BingLocalSearch","BingPreview","binlar","biNu image cacher","Bitacle","Bitrix link preview","biz_Directory","BKCTwitterUnshortener\\\/","Black Hole","Blackboard Safeassign","BlackWidow","BlockNote\\.Net","BlogBridge","Bloglines","Bloglovin","BlogPulseLive","BlogSearch","Blogtrottr","BlowFish","boitho\\.com-dc","Boost\\.Beast","BPImageWalker","Braintree-Webhooks","Branch Metrics API","Branch-Passthrough","Brandprotect","Brandwatch","Brodie\\\/","Browsershots","BUbiNG","Buck\\\/","Buddy","BuiltWith","Bullseye","BunnySlippers","Burf Search","Butterfly\\\/","BuzzSumo","CAAM\\\/[0-9]","caam dot crwlr at gmail dot com","CakePHP","Calculon","Canary%20Mail","CaretNail","catexplorador","CC Metadata Scaper","Cegbfeieh","censys","centuryb.o.t9[at]gmail.com","Cerberian Drtrs","CERT\\.at-Statistics-Survey","cf-facebook","cg-eye","changedetection","ChangesMeter","Charlotte","chatterino-api-cache","CheckHost","checkprivacy","CherryPicker","ChinaClaw","Chirp\\\/","chkme\\.com","Chlooe","Chromaxa","CirrusExplorer","CISPA Vulnerability Notification","CISPA Web Analyser","Citoid","CJNetworkQuality","Clarsentia","clips\\.ua\\.ac\\.be","Cloud mapping","CloudEndure","CloudFlare-AlwaysOnline","Cloudflare-Healthchecks","Cloudinary","cmcm\\.com","coccoc","cognitiveseo","ColdFusion","colly -","CommaFeed","Commons-HttpClient","commonscan","contactbigdatafr","contentkingapp","Contextual Code Sites Explorer","convera","CookieReports","copyright sheriff","CopyRightCheck","Copyscape","cortex\\\/","Cosmos4j\\.feedback","Covario-IDS","Craw\\\/","Crescent","Criteo","Crowsnest","CSHttp","CSSCheck","Cula\\\/","curb","Curious George","curl","cuwhois\\\/","cybo\\.com","DAP\\\/NetHTTP","DareBoost","DatabaseDriverMysqli","DataCha0s","DatadogSynthetics","Datafeedwatch","Datanyze","DataparkSearch","dataprovider","DataXu","Daum(oa)?[ \\\/][0-9]","dBpoweramp","ddline","deeris","delve\\.ai","Demon","DeuSu","developers\\.google\\.com\\\/\\+\\\/web\\\/snippet\\\/","Devil","Digg","Digincore","DigitalPebble","Dirbuster","Discourse Forum Onebox","Dispatch\\\/","Disqus\\\/","DittoSpyder","dlvr","DMBrowser","DNSPod-reporting","docoloc","Dolphin http client","DomainAppender","DomainLabz","Domains Project\\\/","Donuts Content Explorer","dotMailer content retrieval","dotSemantic","downforeveryoneorjustme","Download Wonder","downnotifier","DowntimeDetector","Drip","drupact","Drupal \\(\\+http:\\\/\\\/drupal\\.org\\\/\\)","DTS Agent","dubaiindex","DuplexWeb-Google","DynatraceSynthetic","EARTHCOM","Easy-Thumb","EasyDL","Ebingbong","ec2linkfinder","eCairn-Grabber","eCatch","ECCP","eContext\\\/","Ecxi","EirGrabber","ElectricMonk","elefent","EMail Exractor","EMail Wolf","EmailWolf","Embarcadero","Embed PHP Library","Embedly","endo\\\/","europarchive\\.org","evc-batch","EventMachine HttpClient","Everwall Link Expander","Evidon","Evrinid","ExactSearch","ExaleadCloudview","Excel\\\/","exif","ExoRank","Exploratodo","Express WebPictures","Extreme Picture Finder","EyeNetIE","ezooms","facebookcatalog","facebookexternalhit","facebookexternalua","facebookplatform","fairshare","Faraday v","fasthttp","Faveeo","Favicon downloader","faviconarchive","faviconkit","FavOrg","Feed Wrangler","Feedable\\\/","Feedbin","FeedBooster","FeedBucket","FeedBunch\\\/","FeedBurner","feeder","Feedly","FeedshowOnline","Feedshow\\\/","Feedspot","FeedViewer\\\/","Feedwind\\\/","FeedZcollector","feeltiptop","Fetch API","Fetch\\\/[0-9]","Fever\\\/[0-9]","FHscan","Fiery%20Feeds","Filestack","Fimap","findlink","findthatfile","FlashGet","FlipboardBrowserProxy","FlipboardProxy","FlipboardRSS","Flock\\\/","Florienzh\\\/","fluffy","Flunky","flynxapp","forensiq","ForusP","FoundSeoTool","fragFINN\\.de","free thumbnails","Freeuploader","FreshRSS","frontman","Funnelback","Fuzz Faster U Fool","G-i-g-a-b-o-t","g00g1e\\.net","ganarvisitas","gdnplus\\.com","GeedoProductSearch","geek-tools","Genieo","GentleSource","GetCode","Getintent","GetLinkInfo","getprismatic","GetRight","getroot","GetURLInfo\\\/","GetWeb","Geziyor","Ghost Inspector","GigablastOpenSource","GIS-LABS","github-camo","GitHub-Hookshot","github\\.com","Go http package","Go [\\d\\.]* package http","Go!Zilla","Go-Ahead-Got-It","Go-http-client","go-mtasts\\\/","gobuster","gobyus","Gofeed","gofetch","Goldfire Server","GomezAgent","gooblog","Goodzer\\\/","Google AppsViewer","Google Desktop","Google favicon","Google Keyword Suggestion","Google Keyword Tool","Google Page Speed Insights","Google PP Default","Google Search Console","Google Web Preview","Google-Ads","Google-Adwords","Google-Apps-Script","Google-Calendar-Importer","Google-HotelAdsVerifier","Google-HTTP-Java-Client","Google-InspectionTool","Google-Podcast","Google-Publisher-Plugin","Google-Read-Aloud","Google-SearchByImage","Google-Site-Verification","Google-SMTP-STS","Google-speakr","Google-Structured-Data-Testing-Tool","Google-Transparency-Report","google-xrawler","Google-Youtube-Links","GoogleDocs","GoogleHC\\\/","GoogleOther","GoogleProber","GoogleProducer","GoogleSites","Gookey","GoSpotCheck","gosquared-thumbnailer","Gotit","GoZilla","grabify","GrabNet","Grafula","Grammarly","GrapeFX","GreatNews","Gregarius","GRequests","grokkit","grouphigh","grub-client","gSOAP\\\/","GT::WWW","GTmetrix","GuzzleHttp","gvfs\\\/","HAA(A)?RTLAND http client","Haansoft","hackney\\\/","Hadi Agent","HappyApps-WebCheck","Hardenize","Hatena","Havij","HaxerMen","HEADMasterSEO","HeartRails_Capture","help@dataminr\\.com","heritrix","Hexometer","historious","hkedcity","hledejLevne\\.cz","Hloader","HMView","Holmes","HonesoSearchEngine","HootSuite Image proxy","Hootsuite-WebFeed","hosterstats","HostTracker","ht:\\\/\\\/check","htdig","HTMLparser","htmlyse","HTTP Banner Detection","http-get","HTTP-Header-Abfrage","http-kit","http-request\\\/","HTTP-Tiny","HTTP::Lite","http:\\\/\\\/www.neomo.de\\\/","HttpComponents","httphr","HTTPie","HTTPMon","httpRequest","httpscheck","httpssites_power","httpunit","HttpUrlConnection","http\\.rb\\\/","HTTP_Compression_Test","http_get","http_request2","http_requester","httrack","huaweisymantec","HubSpot ","HubSpot-Link-Resolver","Humanlinks","i2kconnect\\\/","Iblog","ichiro","Id-search","IdeelaborPlagiaat","IDG Twitter Links Resolver","IDwhois\\\/","Iframely","igdeSpyder","iGooglePortal","IlTrovatore","Image Fetch","Image Sucker","ImageEngine\\\/","ImageVisu\\\/","Imagga","imagineeasy","imgsizer","InAGist","inbound\\.li parser","InDesign%20CC","Indy Library","InetURL","infegy","infohelfer","InfoTekies","InfoWizards Reciprocal Link","inpwrd\\.com","instabid","Instapaper","Integrity","integromedb","Intelliseek","InterGET","Internet Ninja","InternetSeer","internetVista monitor","internetwache","internet_archive","intraVnews","IODC","IOI","Inboxb0t","iplabel","ips-agent","IPS\\\/[0-9]","IPWorks HTTP\\\/S Component","iqdb\\\/","Iria","Irokez","isitup\\.org","iskanie","isUp\\.li","iThemes Sync\\\/","IZaBEE","iZSearch","JAHHO","janforman","Jaunt\\\/","Java.*outbrain","javelin\\.io","Jbrofuzz","Jersey\\\/","JetCar","Jigsaw","Jobboerse","JobFeed discovery","Jobg8 URL Monitor","jobo","Jobrapido","Jobsearch1\\.5","JoinVision Generic","JolokiaPwn","Joomla","Jorgee","JS-Kit","JungleKeyThumbnail","JustView","Kaspersky Lab CFR link resolver","Kelny\\\/","Kerrigan\\\/","KeyCDN","Keyword Density","Keywords Research","khttp\\\/","KickFire","KimonoLabs\\\/","Kml-Google","knows\\.is","KOCMOHABT","kouio","krawler\\.dk","kube-probe","kubectl","kulturarw3","KumKie","Larbin","Lavf\\\/","leakix\\.net","LeechFTP","LeechGet","letsencrypt","Lftp","LibVLC","LibWeb","Libwhisker","libwww","Licorne","Liferea\\\/","Lighthouse","Lightspeedsystems","Likse","limber\\.io","Link Valet","LinkAlarm\\\/","LinkAnalyser","link-check","linkCheck","linkdex","LinkExaminer","linkfluence","linkpeek","LinkPreview","LinkScan","LinksManager","LinkTiger","LinkWalker","link_thumbnailer","Lipperhey","Litemage_walker","livedoor ScreenShot","LoadImpactRload","localsearch-web","LongURL API","longurl-r-package","looid\\.com","looksystems\\.net","lscache_runner","ltx71","lua-resty-http","Lucee \\(CFML Engine\\)","Lush Http Client","lwp-request","lwp-trivial","LWP::Simple","lycos","LYT\\.SR","L\\.webis","mabontland","MacOutlook\\\/","MagentaNews\\\/","Mag-Net","MagpieRSS","Mail::STS","MailChimp","Mail\\.Ru","Majestic12","makecontact\\\/","Mandrill","MapperCmd","marketinggrader","MarkMonitor","MarkWatch","Mass Downloader","masscan\\\/","Mata Hari","mattermost","MatchorySearch\\\/","Mediametric","Mediapartners-Google","mediawords","MegaIndex\\.ru","MeltwaterNews","Melvil Rawi","MemGator","Metaspinner","MetaURI","MFC_Tear_Sample","Microsearch","Microsoft Data Access","Microsoft Office","Microsoft Outlook","Microsoft Windows Network Diagnostics","Microsoft-WebDAV-MiniRedir","Microsoft\\.Data\\.Mashup","MicrosoftPreview","MIDown tool","MIIxpc","Mindjet","Miniature\\.io","Miniflux","mio_httpc","Miro-HttpClient","Mister PiX","mixdata dot com","mixed-content-scan","mixnode","Mnogosearch","mogimogi","Mojeek","Mojolicious \\(Perl\\)","Mollie","monitis","Monitority\\\/","Monit\\\/","montastic","MonSpark","MonTools","Moreover","Morfeus Fucking Scanner","Morning Paper","MovableType","mowser","Mrcgiguy","Mr\\.4x3 Powered","MS Web Services Client Protocol","MSFrontPage","mShots","MuckRack\\\/","muhstik-scan","MVAClient","MxToolbox\\\/","myseosnapshot","nagios","Najdi\\.si","Name Intelligence","NameFo\\.com","Nameprotect","nationalarchives","Navroad","nbertaupete95","NearSite","Needle","Nessus","Net Vampire","NetAnts","NETCRAFT","NetLyzer","NetMechanic","NetNewsWire","Netpursual","netresearch","NetShelter ContentScan","Netsparker","NetSystemsResearch","nettle","NetTrack","Netvibes","NetZIP","Neustar WPM","NeutrinoAPI","NewRelicPinger","NewsBlur .*Finder","NewsGator","newsme","newspaper\\\/","Nexgate Ruby Client","NG-Search","nghttp2","Nibbler","NICErsPRO","NihilScio","Nikto","nineconnections","NLNZ_IAHarvester","Nmap Scripting Engine","node-fetch","node-superagent","node-urllib","Nodemeter","NodePing","node\\.io","nominet\\.org\\.uk","nominet\\.uk","Norton-Safeweb","Notifixious","notifyninja","NotionEmbedder","nuhk","nutch","Nuzzel","nWormFeedFinder","nyawc\\\/","Nymesis","NYU","Observatory\\\/","Ocelli\\\/","Octopus","oegp","Offline Explorer","Offline Navigator","OgScrper","okhttp","omgili","OMSC","Online Domain Tools","Open Source RSS","OpenCalaisSemanticProxy","Openfind","OpenLinkProfiler","Openstat\\\/","OpenVAS","OPPO A33","Optimizer","Orbiter","OrgProbe\\\/","orion-semantics","Outlook-Express","Outlook-iOS","Owler","Owlin","ownCloud News","ow\\.ly","OxfordCloudService","page scorer","Page Valet","page2rss","PageFreezer","PageGrabber","PagePeeker","PageScorer","Pagespeed\\\/","PageThing","page_verifier","Panopta","panscient","Papa Foto","parsijoo","Pavuk","PayPal IPN","pcBrowser","Pcore-HTTP","PDF24 URL To PDF","Pearltrees","PECL::HTTP","peerindex","Peew","PeoplePal","Perlu -","PhantomJS Screenshoter","PhantomJS\\\/","Photon\\\/","php-requests","phpservermon","Pi-Monster","Picscout","Picsearch","PictureFinder","Pimonster","Pingability","PingAdmin\\.Ru","Pingdom","Pingoscope","PingSpot","ping\\.blo\\.gs","pinterest\\.com","Pixray","Pizilla","Plagger\\\/","Pleroma ","Ploetz \\+ Zeller","Plukkie","plumanalytics","PocketImageCache","PocketParser","Pockey","PodcastAddict\\\/","POE-Component-Client-HTTP","Polymail\\\/","Pompos","Porkbun","Port Monitor","postano","postfix-mta-sts-resolver","PostmanRuntime","postplanner\\.com","PostPost","postrank","PowerPoint\\\/","Prebid","Prerender","Priceonomics Analysis Engine","PrintFriendly","PritTorrent","Prlog","probely\\.com","probethenet","Project ?25499","Project-Resonance","prospectb2b","Protopage","ProWebWalker","proximic","PRTG Network Monitor","pshtt, https scanning","PTST ","PTST\\\/[0-9]+","pulsetic\\.com","Pump","Python-httplib2","python-httpx","python-requests","Python-urllib","Qirina Hurdler","QQDownload","QrafterPro","Qseero","Qualidator","QueryN Metasearch","queuedriver","quic-go-HTTP\\\/","QuiteRSS","Quora Link Preview","Qwantify","Radian6","RadioPublicImageResizer","Railgun\\\/","RankActive","RankFlex","RankSonicSiteAuditor","RapidLoad\\\/","Re-re Studio","ReactorNetty","Readability","RealDownload","RealPlayer%20Downloader","RebelMouse","Recorder","RecurPost\\\/","redback\\\/","ReederForMac","Reeder\\\/","ReGet","RepoMonkey","request\\.js","reqwest\\\/","ResponseCodeTest","RestSharp","Riddler","Rival IQ","Robosourcer","Robozilla","ROI Hunter","RPT-HTTPClient","RSSMix\\\/","RSSOwl","RuxitSynthetic","RyowlEngine","safe-agent-scanner","SalesIntelligent","Saleslift","SAP NetWeaver Application Server","SauceNAO","SBIder","sc-downloader","scalaj-http","Scamadviser-Frontend","ScanAlert","scan\\.lol","Scoop","scooter","ScopeContentAG-HTTP-Client","ScoutJet","ScoutURLMonitor","ScrapeBox Page Scanner","Scrapy","Screaming","ScreenShotService","Scrubby","Scrutiny\\\/","Search37","searchenginepromotionhelp","Searchestate","SearchExpress","SearchSight","SearchWP","search\\.thunderstone","Seeker","semanticdiscovery","semanticjuice","Semiocast HTTP client","Semrush","Sendsay\\.Ru","sentry\\\/","SEO Browser","Seo Servis","seo-nastroj\\.cz","seo4ajax","Seobility","SEOCentro","SeoCheck","seocompany","SEOkicks","SEOlizer","Seomoz","SEOprofiler","seoscanners","SEOsearch","seositecheckup","SEOstats","servernfo","sexsearcher","Seznam","Shelob","Shodan","Shoppimon","ShopWiki","ShortLinkTranslate","shortURL lengthener","shrinktheweb","Sideqik","Siege","SimplePie","SimplyFast","Siphon","SISTRIX","Site Sucker","Site-Shot\\\/","Site24x7","SiteBar","Sitebeam","Sitebulb\\\/","SiteCondor","SiteExplorer","SiteGuardian","Siteimprove","SiteIndexed","Sitemap(s)? Generator","SitemapGenerator","SiteMonitor","Siteshooter B0t","SiteSnagger","SiteSucker","SiteTruth","Sitevigil","sitexy\\.com","SkypeUriPreview","Slack\\\/","sli-systems\\.com","slider\\.com","slurp","SlySearch","SmartDownload","SMRF URL Expander","SMUrlExpander","Snake","Snappy","SnapSearch","Snarfer\\\/","SniffRSS","sniptracker","Snoopy","SnowHaze Search","sogou web","SortSite","Sottopop","sovereign\\.ai","SpaceBison","SpamExperts","Spammen","Spanner","Spawning-AI","spaziodati","SPDYCheck","Specificfeeds","SpeedKit","speedy","SPEng","Spinn3r","spray-can","Sprinklr ","spyonweb","sqlmap","Sqlworm","Sqworm","SSL Labs","ssl-tools","StackRambler","Statastico\\\/","Statically-","StatusCake","Steeler","Stratagems Kumo","Stripe\\\/","Stroke\\.cz","StudioFACA","StumbleUpon","suchen","Sucuri","summify","SuperHTTP","Surphace Scout","Suzuran","swcd ","Symfony BrowserKit","Symfony2 BrowserKit","Synapse\\\/","Syndirella\\\/","SynHttpClient-Built","Sysomos","sysscan","Szukacz","T0PHackTeam","tAkeOut","Tarantula\\\/","Taringa UGC","TarmotGezgin","tchelebi\\.io","techiaith\\.cymru","Teleport","Telesoft","Telesphoreo","Telesphorep","Tenon\\.io","teoma","terrainformatica","Test Certificate Info","testuri","Tetrahedron","TextRazor Downloader","The Drop Reaper","The Expert HTML Source Viewer","The Intraformant","The Knowledge AI","theinternetrules","TheNomad","Thinklab","Thumbor","Thumbshots","ThumbSniper","timewe\\.net","TinEye","Tiny Tiny RSS","TLSProbe\\\/","Toata","topster","touche\\.com","Traackr\\.com","tracemyfile","Trackuity","TrapitAgent","Trendiction","Trendsmap","trendspottr","truwoGPS","TryJsoup","TulipChain","Turingos","Turnitin","tweetedtimes","Tweetminster","Tweezler\\\/","twibble","Twice","Twikle","Twingly","Twisted PageGetter","Typhoeus","ubermetrics-technologies","uclassify","UdmSearch","ultimate_sitemap_parser","unchaos","unirest-java","UniversalFeedParser","unshortenit","Unshorten\\.It","Untiny","UnwindFetchor","updated","updown\\.io daemon","Upflow","Uptimia","URL Verifier","Urlcheckr","URLitor","urlresolver","Urlstat","URLTester","UrlTrends Ranking Updater","URLy Warning","URLy\\.Warning","URL\\\/Emacs","Vacuum","Vagabondo","VB Project","vBSEO","VCI","Verity","via ggpht\\.com GoogleImageProxy","Virusdie","visionutils","Visual Rights Group","vkShare","VoidEYE","Voil","voltron","voyager\\\/","VSAgent\\\/","VSB-TUO\\\/","Vulnbusters Meter","VYU2","w3af\\.org","W3C-checklink","W3C-mobileOK","W3C_Unicorn","WAC-OFU","WakeletLinkExpander","WallpapersHD","Wallpapers\\\/[0-9]+","wangling","Wappalyzer","WatchMouse","WbSrch\\\/","WDT\\.io","Web Auto","Web Collage","Web Enhancer","Web Fetch","Web Fuck","Web Pix","Web Sauger","Web spyder","Web Sucker","web-capture\\.net","Web-sniffer","Webalta","Webauskunft","WebAuto","WebCapture","WebClient\\\/","webcollage","WebCookies","WebCopier","WebCorp","WebDataStats","WebDoc","WebEnhancer","WebFetch","WebFuck","WebGazer","WebGo IS","WebImageCollector","WebImages","WebIndex","webkit2png","WebLeacher","webmastercoffee","webmon ","WebPix","WebReaper","WebSauger","webscreenie","Webshag","Webshot","Website Quester","websitepulse agent","WebsiteQuester","Websnapr","WebSniffer","Webster","WebStripper","WebSucker","webtech\\\/","WebThumbnail","Webthumb\\\/","WebWhacker","WebZIP","WeLikeLinks","WEPA","WeSEE","wf84","Wfuzz\\\/","wget","WhatCMS","WhatsApp","WhatsMyIP","WhatWeb","WhereGoes\\?","Whibse","WhoAPI\\\/","WhoRunsCoinHive","Whynder Magnet","Windows-RSS-Platform","WinHttp-Autoproxy-Service","WinHTTP\\\/","WinPodder","wkhtmlto","wmtips","Woko","Wolfram HTTPClient","woorankreview","WordPress\\\/","WordupinfoSearch","Word\\\/","worldping-api","wotbox","WP Engine Install Performance API","WP Rocket","wpif","wprecon\\.com survey","WPScan","wscheck","Wtrace","WWW-Collector-E","WWW-Mechanize","WWW::Document","WWW::Mechanize","WWWOFFLE","www\\.monitor\\.us","x09Mozilla","x22Mozilla","XaxisSemanticsClassifier","XenForo\\\/","Xenu Link Sleuth","XING-contenttabreceiver","xpymep([0-9]?)\\.exe","Y!J-[A-Z][A-Z][A-Z]","Yaanb","yacy","Yahoo Link Preview","YahooCacheSystem","YahooMailProxy","YahooYSMcm","YandeG","Yandex(?!Search)","yanga","yeti","Yo-yo","Yoleo Consumer","yomins\\.com","yoogliFetchAgent","YottaaMonitor","Your-Website-Sucks","yourls\\.org","YoYs\\.net","YP\\.PL","Zabbix","Zade","Zao","Zapier","Zauba","Zemanta Aggregator","Zend\\\\Http\\\\Client","Zend_Http_Client","Zermelo","Zeus ","zgrab","ZnajdzFoto","ZnHTTP","Zombie\\.js","Zoom\\.Mac","ZoteroTranslationServer","ZyBorg","[a-z0-9\\-_]*(bot|crawl|headless|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)"] -------------------------------------------------------------------------------- /raw/Crawlers.txt: -------------------------------------------------------------------------------- 1 | YLT 2 | ^Aether 3 | ^Amazon Simple Notification Service Agent$ 4 | ^Amazon-Route53-Health-Check-Service 5 | ^Amazon CloudFront 6 | ^b0t$ 7 | ^bluefish 8 | ^Calypso v\/ 9 | ^COMODO DCV 10 | ^Corax 11 | ^DangDang 12 | ^DavClnt 13 | ^DHSH 14 | ^docker\/[0-9] 15 | ^Expanse 16 | ^FDM 17 | ^git\/ 18 | ^Goose\/ 19 | ^Grabber 20 | ^Gradle\/ 21 | ^HTTPClient\/ 22 | ^HTTPing 23 | ^Java\/ 24 | ^Jeode\/ 25 | ^Jetty\/ 26 | ^Mail\/ 27 | ^Mget 28 | ^Microsoft URL Control 29 | ^Mikrotik\/ 30 | ^Netlab360 31 | ^NG\/[0-9\.] 32 | ^NING\/ 33 | ^npm\/ 34 | ^Nuclei 35 | ^PHP-AYMAPI\/ 36 | ^PHP\/ 37 | ^pip\/ 38 | ^pnpm\/ 39 | ^RMA\/ 40 | ^Ruby|Ruby\/[0-9] 41 | ^symbolicator\/ 42 | ^Swurl 43 | ^TLS tester 44 | ^twine\/ 45 | ^ureq 46 | ^VSE\/[0-9] 47 | ^WordPress\.com 48 | ^XRL\/[0-9] 49 | ^ZmEu 50 | 008\/ 51 | 13TABS 52 | 192\.comAgent 53 | 2GDPR\/ 54 | 2ip\.ru 55 | 404enemy 56 | 7Siters 57 | 80legs 58 | a3logics\.in 59 | A6-Indexer 60 | Abonti 61 | Aboundex 62 | aboutthedomain 63 | Accoona-AI-Agent 64 | acebookexternalhit\/ 65 | acoon 66 | acrylicapps\.com\/pulp 67 | Acunetix 68 | AdAuth\/ 69 | adbeat 70 | AddThis 71 | ADmantX 72 | AdminLabs 73 | adressendeutschland 74 | adreview\/ 75 | adscanner 76 | adstxt-worker 77 | Adstxtaggregator 78 | adstxt\.com 79 | Adyen HttpClient 80 | AffiliateLabz\/ 81 | affilimate-puppeteer 82 | agentslug 83 | AHC 84 | aihit 85 | aiohttp\/ 86 | Airmail 87 | akka-http\/ 88 | akula\/ 89 | alertra 90 | alexa site audit 91 | Alibaba\.Security\.Heimdall 92 | Alligator 93 | allloadin 94 | AllSubmitter 95 | alyze\.info 96 | amagit 97 | Anarchie 98 | AndroidDownloadManager 99 | Anemone 100 | AngleSharp 101 | annotate_google 102 | Anthill 103 | Anturis Agent 104 | Ant\.com 105 | AnyEvent-HTTP\/ 106 | Apache Ant\/ 107 | Apache Droid 108 | Apache OpenOffice 109 | Apache-HttpAsyncClient 110 | Apache-HttpClient 111 | ApacheBench 112 | Apexoo 113 | apimon\.de 114 | APIs-Google 115 | AportWorm\/ 116 | AppBeat\/ 117 | AppEngine-Google 118 | AppleSyndication 119 | Aprc\/[0-9] 120 | Arachmo 121 | arachnode 122 | Arachnophilia 123 | aria2 124 | Arukereso 125 | asafaweb 126 | Asana\/ 127 | Ask Jeeves 128 | AskQuickly 129 | ASPSeek 130 | Asterias 131 | Astute 132 | asynchttp 133 | Attach 134 | attohttpc 135 | autocite 136 | AutomaticWPTester 137 | Autonomy 138 | awin\.com 139 | AWS Security Scanner 140 | axios\/ 141 | a\.pr-cy\.ru 142 | B-l-i-t-z-B-O-T 143 | Backlink-Ceck 144 | BacklinkHttpStatus 145 | BackStreet 146 | BackupLand 147 | BackWeb 148 | Bad-Neighborhood 149 | Badass 150 | baidu\.com 151 | Bandit 152 | Barracuda Sentinel \(EE\) 153 | basicstate 154 | BatchFTP 155 | Battleztar Bazinga 156 | baypup\/ 157 | BazQux 158 | BBBike 159 | BCKLINKS 160 | BDFetch 161 | BegunAdvertising 162 | Bewica-security-scan 163 | Bidtellect 164 | BigBozz 165 | Bigfoot 166 | biglotron 167 | BingLocalSearch 168 | BingPreview 169 | binlar 170 | biNu image cacher 171 | Bitacle 172 | Bitrix link preview 173 | biz_Directory 174 | BKCTwitterUnshortener\/ 175 | Black Hole 176 | Blackboard Safeassign 177 | BlackWidow 178 | BlockNote\.Net 179 | BlogBridge 180 | Bloglines 181 | Bloglovin 182 | BlogPulseLive 183 | BlogSearch 184 | Blogtrottr 185 | BlowFish 186 | boitho\.com-dc 187 | Boost\.Beast 188 | BPImageWalker 189 | Braintree-Webhooks 190 | Branch Metrics API 191 | Branch-Passthrough 192 | Brandprotect 193 | Brandwatch 194 | Brodie\/ 195 | Browsershots 196 | BUbiNG 197 | Buck\/ 198 | Buddy 199 | BuiltWith 200 | Bullseye 201 | BunnySlippers 202 | Burf Search 203 | Butterfly\/ 204 | BuzzSumo 205 | CAAM\/[0-9] 206 | caam dot crwlr at gmail dot com 207 | CakePHP 208 | Calculon 209 | Canary%20Mail 210 | CaretNail 211 | catexplorador 212 | CC Metadata Scaper 213 | Cegbfeieh 214 | censys 215 | centuryb.o.t9[at]gmail.com 216 | Cerberian Drtrs 217 | CERT\.at-Statistics-Survey 218 | cf-facebook 219 | cg-eye 220 | changedetection 221 | ChangesMeter 222 | Charlotte 223 | chatterino-api-cache 224 | CheckHost 225 | checkprivacy 226 | CherryPicker 227 | ChinaClaw 228 | Chirp\/ 229 | chkme\.com 230 | Chlooe 231 | Chromaxa 232 | CirrusExplorer 233 | CISPA Vulnerability Notification 234 | CISPA Web Analyser 235 | Citoid 236 | CJNetworkQuality 237 | Clarsentia 238 | clips\.ua\.ac\.be 239 | Cloud mapping 240 | CloudEndure 241 | CloudFlare-AlwaysOnline 242 | Cloudflare-Healthchecks 243 | Cloudinary 244 | cmcm\.com 245 | coccoc 246 | cognitiveseo 247 | ColdFusion 248 | colly - 249 | CommaFeed 250 | Commons-HttpClient 251 | commonscan 252 | contactbigdatafr 253 | contentkingapp 254 | Contextual Code Sites Explorer 255 | convera 256 | CookieReports 257 | copyright sheriff 258 | CopyRightCheck 259 | Copyscape 260 | cortex\/ 261 | Cosmos4j\.feedback 262 | Covario-IDS 263 | Craw\/ 264 | Crescent 265 | Criteo 266 | Crowsnest 267 | CSHttp 268 | CSSCheck 269 | Cula\/ 270 | curb 271 | Curious George 272 | curl 273 | cuwhois\/ 274 | cybo\.com 275 | DAP\/NetHTTP 276 | DareBoost 277 | DatabaseDriverMysqli 278 | DataCha0s 279 | DatadogSynthetics 280 | Datafeedwatch 281 | Datanyze 282 | DataparkSearch 283 | dataprovider 284 | DataXu 285 | Daum(oa)?[ \/][0-9] 286 | dBpoweramp 287 | ddline 288 | deeris 289 | delve\.ai 290 | Demon 291 | DeuSu 292 | developers\.google\.com\/\+\/web\/snippet\/ 293 | Devil 294 | Digg 295 | Digincore 296 | DigitalPebble 297 | Dirbuster 298 | Discourse Forum Onebox 299 | Dispatch\/ 300 | Disqus\/ 301 | DittoSpyder 302 | dlvr 303 | DMBrowser 304 | DNSPod-reporting 305 | docoloc 306 | Dolphin http client 307 | DomainAppender 308 | DomainLabz 309 | Domains Project\/ 310 | Donuts Content Explorer 311 | dotMailer content retrieval 312 | dotSemantic 313 | downforeveryoneorjustme 314 | Download Wonder 315 | downnotifier 316 | DowntimeDetector 317 | Drip 318 | drupact 319 | Drupal \(\+http:\/\/drupal\.org\/\) 320 | DTS Agent 321 | dubaiindex 322 | DuplexWeb-Google 323 | DynatraceSynthetic 324 | EARTHCOM 325 | Easy-Thumb 326 | EasyDL 327 | Ebingbong 328 | ec2linkfinder 329 | eCairn-Grabber 330 | eCatch 331 | ECCP 332 | eContext\/ 333 | Ecxi 334 | EirGrabber 335 | ElectricMonk 336 | elefent 337 | EMail Exractor 338 | EMail Wolf 339 | EmailWolf 340 | Embarcadero 341 | Embed PHP Library 342 | Embedly 343 | endo\/ 344 | europarchive\.org 345 | evc-batch 346 | EventMachine HttpClient 347 | Everwall Link Expander 348 | Evidon 349 | Evrinid 350 | ExactSearch 351 | ExaleadCloudview 352 | Excel\/ 353 | exif 354 | ExoRank 355 | Exploratodo 356 | Express WebPictures 357 | Extreme Picture Finder 358 | EyeNetIE 359 | ezooms 360 | facebookcatalog 361 | facebookexternalhit 362 | facebookexternalua 363 | facebookplatform 364 | fairshare 365 | Faraday v 366 | fasthttp 367 | Faveeo 368 | Favicon downloader 369 | faviconarchive 370 | faviconkit 371 | FavOrg 372 | Feed Wrangler 373 | Feedable\/ 374 | Feedbin 375 | FeedBooster 376 | FeedBucket 377 | FeedBunch\/ 378 | FeedBurner 379 | feeder 380 | Feedly 381 | FeedshowOnline 382 | Feedshow\/ 383 | Feedspot 384 | FeedViewer\/ 385 | Feedwind\/ 386 | FeedZcollector 387 | feeltiptop 388 | Fetch API 389 | Fetch\/[0-9] 390 | Fever\/[0-9] 391 | FHscan 392 | Fiery%20Feeds 393 | Filestack 394 | Fimap 395 | findlink 396 | findthatfile 397 | FlashGet 398 | FlipboardBrowserProxy 399 | FlipboardProxy 400 | FlipboardRSS 401 | Flock\/ 402 | Florienzh\/ 403 | fluffy 404 | Flunky 405 | flynxapp 406 | forensiq 407 | ForusP 408 | FoundSeoTool 409 | fragFINN\.de 410 | free thumbnails 411 | Freeuploader 412 | FreshRSS 413 | frontman 414 | Funnelback 415 | Fuzz Faster U Fool 416 | G-i-g-a-b-o-t 417 | g00g1e\.net 418 | ganarvisitas 419 | gdnplus\.com 420 | GeedoProductSearch 421 | geek-tools 422 | Genieo 423 | GentleSource 424 | GetCode 425 | Getintent 426 | GetLinkInfo 427 | getprismatic 428 | GetRight 429 | getroot 430 | GetURLInfo\/ 431 | GetWeb 432 | Geziyor 433 | Ghost Inspector 434 | GigablastOpenSource 435 | GIS-LABS 436 | github-camo 437 | GitHub-Hookshot 438 | github\.com 439 | Go http package 440 | Go [\d\.]* package http 441 | Go!Zilla 442 | Go-Ahead-Got-It 443 | Go-http-client 444 | go-mtasts\/ 445 | gobuster 446 | gobyus 447 | Gofeed 448 | gofetch 449 | Goldfire Server 450 | GomezAgent 451 | gooblog 452 | Goodzer\/ 453 | Google AppsViewer 454 | Google Desktop 455 | Google favicon 456 | Google Keyword Suggestion 457 | Google Keyword Tool 458 | Google Page Speed Insights 459 | Google PP Default 460 | Google Search Console 461 | Google Web Preview 462 | Google-Ads 463 | Google-Adwords 464 | Google-Apps-Script 465 | Google-Calendar-Importer 466 | Google-HotelAdsVerifier 467 | Google-HTTP-Java-Client 468 | Google-InspectionTool 469 | Google-Podcast 470 | Google-Publisher-Plugin 471 | Google-Read-Aloud 472 | Google-SearchByImage 473 | Google-Site-Verification 474 | Google-SMTP-STS 475 | Google-speakr 476 | Google-Structured-Data-Testing-Tool 477 | Google-Transparency-Report 478 | google-xrawler 479 | Google-Youtube-Links 480 | GoogleDocs 481 | GoogleHC\/ 482 | GoogleOther 483 | GoogleProber 484 | GoogleProducer 485 | GoogleSites 486 | Gookey 487 | GoSpotCheck 488 | gosquared-thumbnailer 489 | Gotit 490 | GoZilla 491 | grabify 492 | GrabNet 493 | Grafula 494 | Grammarly 495 | GrapeFX 496 | GreatNews 497 | Gregarius 498 | GRequests 499 | grokkit 500 | grouphigh 501 | grub-client 502 | gSOAP\/ 503 | GT::WWW 504 | GTmetrix 505 | GuzzleHttp 506 | gvfs\/ 507 | HAA(A)?RTLAND http client 508 | Haansoft 509 | hackney\/ 510 | Hadi Agent 511 | HappyApps-WebCheck 512 | Hardenize 513 | Hatena 514 | Havij 515 | HaxerMen 516 | HEADMasterSEO 517 | HeartRails_Capture 518 | help@dataminr\.com 519 | heritrix 520 | Hexometer 521 | historious 522 | hkedcity 523 | hledejLevne\.cz 524 | Hloader 525 | HMView 526 | Holmes 527 | HonesoSearchEngine 528 | HootSuite Image proxy 529 | Hootsuite-WebFeed 530 | hosterstats 531 | HostTracker 532 | ht:\/\/check 533 | htdig 534 | HTMLparser 535 | htmlyse 536 | HTTP Banner Detection 537 | http-get 538 | HTTP-Header-Abfrage 539 | http-kit 540 | http-request\/ 541 | HTTP-Tiny 542 | HTTP::Lite 543 | http:\/\/www.neomo.de\/ 544 | HttpComponents 545 | httphr 546 | HTTPie 547 | HTTPMon 548 | httpRequest 549 | httpscheck 550 | httpssites_power 551 | httpunit 552 | HttpUrlConnection 553 | http\.rb\/ 554 | HTTP_Compression_Test 555 | http_get 556 | http_request2 557 | http_requester 558 | httrack 559 | huaweisymantec 560 | HubSpot 561 | HubSpot-Link-Resolver 562 | Humanlinks 563 | i2kconnect\/ 564 | Iblog 565 | ichiro 566 | Id-search 567 | IdeelaborPlagiaat 568 | IDG Twitter Links Resolver 569 | IDwhois\/ 570 | Iframely 571 | igdeSpyder 572 | iGooglePortal 573 | IlTrovatore 574 | Image Fetch 575 | Image Sucker 576 | ImageEngine\/ 577 | ImageVisu\/ 578 | Imagga 579 | imagineeasy 580 | imgsizer 581 | InAGist 582 | inbound\.li parser 583 | InDesign%20CC 584 | Indy Library 585 | InetURL 586 | infegy 587 | infohelfer 588 | InfoTekies 589 | InfoWizards Reciprocal Link 590 | inpwrd\.com 591 | instabid 592 | Instapaper 593 | Integrity 594 | integromedb 595 | Intelliseek 596 | InterGET 597 | Internet Ninja 598 | InternetSeer 599 | internetVista monitor 600 | internetwache 601 | internet_archive 602 | intraVnews 603 | IODC 604 | IOI 605 | Inboxb0t 606 | iplabel 607 | ips-agent 608 | IPS\/[0-9] 609 | IPWorks HTTP\/S Component 610 | iqdb\/ 611 | Iria 612 | Irokez 613 | isitup\.org 614 | iskanie 615 | isUp\.li 616 | iThemes Sync\/ 617 | IZaBEE 618 | iZSearch 619 | JAHHO 620 | janforman 621 | Jaunt\/ 622 | Java.*outbrain 623 | javelin\.io 624 | Jbrofuzz 625 | Jersey\/ 626 | JetCar 627 | Jigsaw 628 | Jobboerse 629 | JobFeed discovery 630 | Jobg8 URL Monitor 631 | jobo 632 | Jobrapido 633 | Jobsearch1\.5 634 | JoinVision Generic 635 | JolokiaPwn 636 | Joomla 637 | Jorgee 638 | JS-Kit 639 | JungleKeyThumbnail 640 | JustView 641 | Kaspersky Lab CFR link resolver 642 | Kelny\/ 643 | Kerrigan\/ 644 | KeyCDN 645 | Keyword Density 646 | Keywords Research 647 | khttp\/ 648 | KickFire 649 | KimonoLabs\/ 650 | Kml-Google 651 | knows\.is 652 | KOCMOHABT 653 | kouio 654 | krawler\.dk 655 | kube-probe 656 | kubectl 657 | kulturarw3 658 | KumKie 659 | Larbin 660 | Lavf\/ 661 | leakix\.net 662 | LeechFTP 663 | LeechGet 664 | letsencrypt 665 | Lftp 666 | LibVLC 667 | LibWeb 668 | Libwhisker 669 | libwww 670 | Licorne 671 | Liferea\/ 672 | Lighthouse 673 | Lightspeedsystems 674 | Likse 675 | limber\.io 676 | Link Valet 677 | LinkAlarm\/ 678 | LinkAnalyser 679 | link-check 680 | linkCheck 681 | linkdex 682 | LinkExaminer 683 | linkfluence 684 | linkpeek 685 | LinkPreview 686 | LinkScan 687 | LinksManager 688 | LinkTiger 689 | LinkWalker 690 | link_thumbnailer 691 | Lipperhey 692 | Litemage_walker 693 | livedoor ScreenShot 694 | LoadImpactRload 695 | localsearch-web 696 | LongURL API 697 | longurl-r-package 698 | looid\.com 699 | looksystems\.net 700 | lscache_runner 701 | ltx71 702 | lua-resty-http 703 | Lucee \(CFML Engine\) 704 | Lush Http Client 705 | lwp-request 706 | lwp-trivial 707 | LWP::Simple 708 | lycos 709 | LYT\.SR 710 | L\.webis 711 | mabontland 712 | MacOutlook\/ 713 | MagentaNews\/ 714 | Mag-Net 715 | MagpieRSS 716 | Mail::STS 717 | MailChimp 718 | Mail\.Ru 719 | Majestic12 720 | makecontact\/ 721 | Mandrill 722 | MapperCmd 723 | marketinggrader 724 | MarkMonitor 725 | MarkWatch 726 | Mass Downloader 727 | masscan\/ 728 | Mata Hari 729 | mattermost 730 | MatchorySearch\/ 731 | Mediametric 732 | Mediapartners-Google 733 | mediawords 734 | MegaIndex\.ru 735 | MeltwaterNews 736 | Melvil Rawi 737 | MemGator 738 | Metaspinner 739 | MetaURI 740 | MFC_Tear_Sample 741 | Microsearch 742 | Microsoft Data Access 743 | Microsoft Office 744 | Microsoft Outlook 745 | Microsoft Windows Network Diagnostics 746 | Microsoft-WebDAV-MiniRedir 747 | Microsoft\.Data\.Mashup 748 | MicrosoftPreview 749 | MIDown tool 750 | MIIxpc 751 | Mindjet 752 | Miniature\.io 753 | Miniflux 754 | mio_httpc 755 | Miro-HttpClient 756 | Mister PiX 757 | mixdata dot com 758 | mixed-content-scan 759 | mixnode 760 | Mnogosearch 761 | mogimogi 762 | Mojeek 763 | Mojolicious \(Perl\) 764 | Mollie 765 | monitis 766 | Monitority\/ 767 | Monit\/ 768 | montastic 769 | MonSpark 770 | MonTools 771 | Moreover 772 | Morfeus Fucking Scanner 773 | Morning Paper 774 | MovableType 775 | mowser 776 | Mrcgiguy 777 | Mr\.4x3 Powered 778 | MS Web Services Client Protocol 779 | MSFrontPage 780 | mShots 781 | MuckRack\/ 782 | muhstik-scan 783 | MVAClient 784 | MxToolbox\/ 785 | myseosnapshot 786 | nagios 787 | Najdi\.si 788 | Name Intelligence 789 | NameFo\.com 790 | Nameprotect 791 | nationalarchives 792 | Navroad 793 | nbertaupete95 794 | NearSite 795 | Needle 796 | Nessus 797 | Net Vampire 798 | NetAnts 799 | NETCRAFT 800 | NetLyzer 801 | NetMechanic 802 | NetNewsWire 803 | Netpursual 804 | netresearch 805 | NetShelter ContentScan 806 | Netsparker 807 | NetSystemsResearch 808 | nettle 809 | NetTrack 810 | Netvibes 811 | NetZIP 812 | Neustar WPM 813 | NeutrinoAPI 814 | NewRelicPinger 815 | NewsBlur .*Finder 816 | NewsGator 817 | newsme 818 | newspaper\/ 819 | Nexgate Ruby Client 820 | NG-Search 821 | nghttp2 822 | Nibbler 823 | NICErsPRO 824 | NihilScio 825 | Nikto 826 | nineconnections 827 | NLNZ_IAHarvester 828 | Nmap Scripting Engine 829 | node-fetch 830 | node-superagent 831 | node-urllib 832 | Nodemeter 833 | NodePing 834 | node\.io 835 | nominet\.org\.uk 836 | nominet\.uk 837 | Norton-Safeweb 838 | Notifixious 839 | notifyninja 840 | NotionEmbedder 841 | nuhk 842 | nutch 843 | Nuzzel 844 | nWormFeedFinder 845 | nyawc\/ 846 | Nymesis 847 | NYU 848 | Observatory\/ 849 | Ocelli\/ 850 | Octopus 851 | oegp 852 | Offline Explorer 853 | Offline Navigator 854 | OgScrper 855 | okhttp 856 | omgili 857 | OMSC 858 | Online Domain Tools 859 | Open Source RSS 860 | OpenCalaisSemanticProxy 861 | Openfind 862 | OpenLinkProfiler 863 | Openstat\/ 864 | OpenVAS 865 | OPPO A33 866 | Optimizer 867 | Orbiter 868 | OrgProbe\/ 869 | orion-semantics 870 | Outlook-Express 871 | Outlook-iOS 872 | Owler 873 | Owlin 874 | ownCloud News 875 | ow\.ly 876 | OxfordCloudService 877 | page scorer 878 | Page Valet 879 | page2rss 880 | PageFreezer 881 | PageGrabber 882 | PagePeeker 883 | PageScorer 884 | Pagespeed\/ 885 | PageThing 886 | page_verifier 887 | Panopta 888 | panscient 889 | Papa Foto 890 | parsijoo 891 | Pavuk 892 | PayPal IPN 893 | pcBrowser 894 | Pcore-HTTP 895 | PDF24 URL To PDF 896 | Pearltrees 897 | PECL::HTTP 898 | peerindex 899 | Peew 900 | PeoplePal 901 | Perlu - 902 | PhantomJS Screenshoter 903 | PhantomJS\/ 904 | Photon\/ 905 | php-requests 906 | phpservermon 907 | Pi-Monster 908 | Picscout 909 | Picsearch 910 | PictureFinder 911 | Pimonster 912 | Pingability 913 | PingAdmin\.Ru 914 | Pingdom 915 | Pingoscope 916 | PingSpot 917 | ping\.blo\.gs 918 | pinterest\.com 919 | Pixray 920 | Pizilla 921 | Plagger\/ 922 | Pleroma 923 | Ploetz \+ Zeller 924 | Plukkie 925 | plumanalytics 926 | PocketImageCache 927 | PocketParser 928 | Pockey 929 | PodcastAddict\/ 930 | POE-Component-Client-HTTP 931 | Polymail\/ 932 | Pompos 933 | Porkbun 934 | Port Monitor 935 | postano 936 | postfix-mta-sts-resolver 937 | PostmanRuntime 938 | postplanner\.com 939 | PostPost 940 | postrank 941 | PowerPoint\/ 942 | Prebid 943 | Prerender 944 | Priceonomics Analysis Engine 945 | PrintFriendly 946 | PritTorrent 947 | Prlog 948 | probely\.com 949 | probethenet 950 | Project ?25499 951 | Project-Resonance 952 | prospectb2b 953 | Protopage 954 | ProWebWalker 955 | proximic 956 | PRTG Network Monitor 957 | pshtt, https scanning 958 | PTST 959 | PTST\/[0-9]+ 960 | pulsetic\.com 961 | Pump 962 | Python-httplib2 963 | python-httpx 964 | python-requests 965 | Python-urllib 966 | Qirina Hurdler 967 | QQDownload 968 | QrafterPro 969 | Qseero 970 | Qualidator 971 | QueryN Metasearch 972 | queuedriver 973 | quic-go-HTTP\/ 974 | QuiteRSS 975 | Quora Link Preview 976 | Qwantify 977 | Radian6 978 | RadioPublicImageResizer 979 | Railgun\/ 980 | RankActive 981 | RankFlex 982 | RankSonicSiteAuditor 983 | RapidLoad\/ 984 | Re-re Studio 985 | ReactorNetty 986 | Readability 987 | RealDownload 988 | RealPlayer%20Downloader 989 | RebelMouse 990 | Recorder 991 | RecurPost\/ 992 | redback\/ 993 | ReederForMac 994 | Reeder\/ 995 | ReGet 996 | RepoMonkey 997 | request\.js 998 | reqwest\/ 999 | ResponseCodeTest 1000 | RestSharp 1001 | Riddler 1002 | Rival IQ 1003 | Robosourcer 1004 | Robozilla 1005 | ROI Hunter 1006 | RPT-HTTPClient 1007 | RSSMix\/ 1008 | RSSOwl 1009 | RuxitSynthetic 1010 | RyowlEngine 1011 | safe-agent-scanner 1012 | SalesIntelligent 1013 | Saleslift 1014 | SAP NetWeaver Application Server 1015 | SauceNAO 1016 | SBIder 1017 | sc-downloader 1018 | scalaj-http 1019 | Scamadviser-Frontend 1020 | ScanAlert 1021 | scan\.lol 1022 | Scoop 1023 | scooter 1024 | ScopeContentAG-HTTP-Client 1025 | ScoutJet 1026 | ScoutURLMonitor 1027 | ScrapeBox Page Scanner 1028 | Scrapy 1029 | Screaming 1030 | ScreenShotService 1031 | Scrubby 1032 | Scrutiny\/ 1033 | Search37 1034 | searchenginepromotionhelp 1035 | Searchestate 1036 | SearchExpress 1037 | SearchSight 1038 | SearchWP 1039 | search\.thunderstone 1040 | Seeker 1041 | semanticdiscovery 1042 | semanticjuice 1043 | Semiocast HTTP client 1044 | Semrush 1045 | Sendsay\.Ru 1046 | sentry\/ 1047 | SEO Browser 1048 | Seo Servis 1049 | seo-nastroj\.cz 1050 | seo4ajax 1051 | Seobility 1052 | SEOCentro 1053 | SeoCheck 1054 | seocompany 1055 | SEOkicks 1056 | SEOlizer 1057 | Seomoz 1058 | SEOprofiler 1059 | seoscanners 1060 | SEOsearch 1061 | seositecheckup 1062 | SEOstats 1063 | servernfo 1064 | sexsearcher 1065 | Seznam 1066 | Shelob 1067 | Shodan 1068 | Shoppimon 1069 | ShopWiki 1070 | ShortLinkTranslate 1071 | shortURL lengthener 1072 | shrinktheweb 1073 | Sideqik 1074 | Siege 1075 | SimplePie 1076 | SimplyFast 1077 | Siphon 1078 | SISTRIX 1079 | Site Sucker 1080 | Site-Shot\/ 1081 | Site24x7 1082 | SiteBar 1083 | Sitebeam 1084 | Sitebulb\/ 1085 | SiteCondor 1086 | SiteExplorer 1087 | SiteGuardian 1088 | Siteimprove 1089 | SiteIndexed 1090 | Sitemap(s)? Generator 1091 | SitemapGenerator 1092 | SiteMonitor 1093 | Siteshooter B0t 1094 | SiteSnagger 1095 | SiteSucker 1096 | SiteTruth 1097 | Sitevigil 1098 | sitexy\.com 1099 | SkypeUriPreview 1100 | Slack\/ 1101 | sli-systems\.com 1102 | slider\.com 1103 | slurp 1104 | SlySearch 1105 | SmartDownload 1106 | SMRF URL Expander 1107 | SMUrlExpander 1108 | Snake 1109 | Snappy 1110 | SnapSearch 1111 | Snarfer\/ 1112 | SniffRSS 1113 | sniptracker 1114 | Snoopy 1115 | SnowHaze Search 1116 | sogou web 1117 | SortSite 1118 | Sottopop 1119 | sovereign\.ai 1120 | SpaceBison 1121 | SpamExperts 1122 | Spammen 1123 | Spanner 1124 | Spawning-AI 1125 | spaziodati 1126 | SPDYCheck 1127 | Specificfeeds 1128 | SpeedKit 1129 | speedy 1130 | SPEng 1131 | Spinn3r 1132 | spray-can 1133 | Sprinklr 1134 | spyonweb 1135 | sqlmap 1136 | Sqlworm 1137 | Sqworm 1138 | SSL Labs 1139 | ssl-tools 1140 | StackRambler 1141 | Statastico\/ 1142 | Statically- 1143 | StatusCake 1144 | Steeler 1145 | Stratagems Kumo 1146 | Stripe\/ 1147 | Stroke\.cz 1148 | StudioFACA 1149 | StumbleUpon 1150 | suchen 1151 | Sucuri 1152 | summify 1153 | SuperHTTP 1154 | Surphace Scout 1155 | Suzuran 1156 | swcd 1157 | Symfony BrowserKit 1158 | Symfony2 BrowserKit 1159 | Synapse\/ 1160 | Syndirella\/ 1161 | SynHttpClient-Built 1162 | Sysomos 1163 | sysscan 1164 | Szukacz 1165 | T0PHackTeam 1166 | tAkeOut 1167 | Tarantula\/ 1168 | Taringa UGC 1169 | TarmotGezgin 1170 | tchelebi\.io 1171 | techiaith\.cymru 1172 | Teleport 1173 | Telesoft 1174 | Telesphoreo 1175 | Telesphorep 1176 | Tenon\.io 1177 | teoma 1178 | terrainformatica 1179 | Test Certificate Info 1180 | testuri 1181 | Tetrahedron 1182 | TextRazor Downloader 1183 | The Drop Reaper 1184 | The Expert HTML Source Viewer 1185 | The Intraformant 1186 | The Knowledge AI 1187 | theinternetrules 1188 | TheNomad 1189 | Thinklab 1190 | Thumbor 1191 | Thumbshots 1192 | ThumbSniper 1193 | timewe\.net 1194 | TinEye 1195 | Tiny Tiny RSS 1196 | TLSProbe\/ 1197 | Toata 1198 | topster 1199 | touche\.com 1200 | Traackr\.com 1201 | tracemyfile 1202 | Trackuity 1203 | TrapitAgent 1204 | Trendiction 1205 | Trendsmap 1206 | trendspottr 1207 | truwoGPS 1208 | TryJsoup 1209 | TulipChain 1210 | Turingos 1211 | Turnitin 1212 | tweetedtimes 1213 | Tweetminster 1214 | Tweezler\/ 1215 | twibble 1216 | Twice 1217 | Twikle 1218 | Twingly 1219 | Twisted PageGetter 1220 | Typhoeus 1221 | ubermetrics-technologies 1222 | uclassify 1223 | UdmSearch 1224 | ultimate_sitemap_parser 1225 | unchaos 1226 | unirest-java 1227 | UniversalFeedParser 1228 | unshortenit 1229 | Unshorten\.It 1230 | Untiny 1231 | UnwindFetchor 1232 | updated 1233 | updown\.io daemon 1234 | Upflow 1235 | Uptimia 1236 | URL Verifier 1237 | Urlcheckr 1238 | URLitor 1239 | urlresolver 1240 | Urlstat 1241 | URLTester 1242 | UrlTrends Ranking Updater 1243 | URLy Warning 1244 | URLy\.Warning 1245 | URL\/Emacs 1246 | Vacuum 1247 | Vagabondo 1248 | VB Project 1249 | vBSEO 1250 | VCI 1251 | Verity 1252 | via ggpht\.com GoogleImageProxy 1253 | Virusdie 1254 | visionutils 1255 | Visual Rights Group 1256 | vkShare 1257 | VoidEYE 1258 | Voil 1259 | voltron 1260 | voyager\/ 1261 | VSAgent\/ 1262 | VSB-TUO\/ 1263 | Vulnbusters Meter 1264 | VYU2 1265 | w3af\.org 1266 | W3C-checklink 1267 | W3C-mobileOK 1268 | W3C_Unicorn 1269 | WAC-OFU 1270 | WakeletLinkExpander 1271 | WallpapersHD 1272 | Wallpapers\/[0-9]+ 1273 | wangling 1274 | Wappalyzer 1275 | WatchMouse 1276 | WbSrch\/ 1277 | WDT\.io 1278 | Web Auto 1279 | Web Collage 1280 | Web Enhancer 1281 | Web Fetch 1282 | Web Fuck 1283 | Web Pix 1284 | Web Sauger 1285 | Web spyder 1286 | Web Sucker 1287 | web-capture\.net 1288 | Web-sniffer 1289 | Webalta 1290 | Webauskunft 1291 | WebAuto 1292 | WebCapture 1293 | WebClient\/ 1294 | webcollage 1295 | WebCookies 1296 | WebCopier 1297 | WebCorp 1298 | WebDataStats 1299 | WebDoc 1300 | WebEnhancer 1301 | WebFetch 1302 | WebFuck 1303 | WebGazer 1304 | WebGo IS 1305 | WebImageCollector 1306 | WebImages 1307 | WebIndex 1308 | webkit2png 1309 | WebLeacher 1310 | webmastercoffee 1311 | webmon 1312 | WebPix 1313 | WebReaper 1314 | WebSauger 1315 | webscreenie 1316 | Webshag 1317 | Webshot 1318 | Website Quester 1319 | websitepulse agent 1320 | WebsiteQuester 1321 | Websnapr 1322 | WebSniffer 1323 | Webster 1324 | WebStripper 1325 | WebSucker 1326 | webtech\/ 1327 | WebThumbnail 1328 | Webthumb\/ 1329 | WebWhacker 1330 | WebZIP 1331 | WeLikeLinks 1332 | WEPA 1333 | WeSEE 1334 | wf84 1335 | Wfuzz\/ 1336 | wget 1337 | WhatCMS 1338 | WhatsApp 1339 | WhatsMyIP 1340 | WhatWeb 1341 | WhereGoes\? 1342 | Whibse 1343 | WhoAPI\/ 1344 | WhoRunsCoinHive 1345 | Whynder Magnet 1346 | Windows-RSS-Platform 1347 | WinHttp-Autoproxy-Service 1348 | WinHTTP\/ 1349 | WinPodder 1350 | wkhtmlto 1351 | wmtips 1352 | Woko 1353 | Wolfram HTTPClient 1354 | woorankreview 1355 | WordPress\/ 1356 | WordupinfoSearch 1357 | Word\/ 1358 | worldping-api 1359 | wotbox 1360 | WP Engine Install Performance API 1361 | WP Rocket 1362 | wpif 1363 | wprecon\.com survey 1364 | WPScan 1365 | wscheck 1366 | Wtrace 1367 | WWW-Collector-E 1368 | WWW-Mechanize 1369 | WWW::Document 1370 | WWW::Mechanize 1371 | WWWOFFLE 1372 | www\.monitor\.us 1373 | x09Mozilla 1374 | x22Mozilla 1375 | XaxisSemanticsClassifier 1376 | XenForo\/ 1377 | Xenu Link Sleuth 1378 | XING-contenttabreceiver 1379 | xpymep([0-9]?)\.exe 1380 | Y!J-[A-Z][A-Z][A-Z] 1381 | Yaanb 1382 | yacy 1383 | Yahoo Link Preview 1384 | YahooCacheSystem 1385 | YahooMailProxy 1386 | YahooYSMcm 1387 | YandeG 1388 | Yandex(?!Search) 1389 | yanga 1390 | yeti 1391 | Yo-yo 1392 | Yoleo Consumer 1393 | yomins\.com 1394 | yoogliFetchAgent 1395 | YottaaMonitor 1396 | Your-Website-Sucks 1397 | yourls\.org 1398 | YoYs\.net 1399 | YP\.PL 1400 | Zabbix 1401 | Zade 1402 | Zao 1403 | Zapier 1404 | Zauba 1405 | Zemanta Aggregator 1406 | Zend\\Http\\Client 1407 | Zend_Http_Client 1408 | Zermelo 1409 | Zeus 1410 | zgrab 1411 | ZnajdzFoto 1412 | ZnHTTP 1413 | Zombie\.js 1414 | Zoom\.Mac 1415 | ZoteroTranslationServer 1416 | ZyBorg 1417 | [a-z0-9\-_]*(bot|crawl|headless|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper) -------------------------------------------------------------------------------- /raw/Exclusions.json: -------------------------------------------------------------------------------- 1 | ["Safari.[\\d\\.]*","Firefox.[\\d\\.]*"," Chrome.[\\d\\.]*","Chromium.[\\d\\.]*","MSIE.[\\d\\.]","Opera\\\/[\\d\\.]*","Mozilla.[\\d\\.]*","AppleWebKit.[\\d\\.]*","Trident.[\\d\\.]*","Windows NT.[\\d\\.]*","Android [\\d\\.]*","Macintosh.","Ubuntu","Linux","[ ]Intel","Mac OS X [\\d_]*","(like )?Gecko(.[\\d\\.]*)?","KHTML,","CriOS.[\\d\\.]*","CPU iPhone OS ([0-9_])* like Mac OS X","CPU OS ([0-9_])* like Mac OS X","iPod","compatible","x86_..","i686","x64","X11","rv:[\\d\\.]*","Version.[\\d\\.]*","WOW64","Win64","Dalvik.[\\d\\.]*"," \\.NET CLR [\\d\\.]*","Presto.[\\d\\.]*","Media Center PC","BlackBerry","Build","Opera Mini\\\/\\d{1,2}\\.\\d{1,2}\\.[\\d\\.]*\\\/\\d{1,2}\\.","Opera"," \\.NET[\\d\\.]*","cubot","; M bot","; CRONO","; B bot","; IDbot","; ID bot","; POWER BOT","OCTOPUS-CORE","htc_botdugls","super\\\/\\d+\\\/Android\\\/\\d+","\"Yandex\"","YandexModule2"] -------------------------------------------------------------------------------- /raw/Exclusions.txt: -------------------------------------------------------------------------------- 1 | Safari.[\d\.]* 2 | Firefox.[\d\.]* 3 | Chrome.[\d\.]* 4 | Chromium.[\d\.]* 5 | MSIE.[\d\.] 6 | Opera\/[\d\.]* 7 | Mozilla.[\d\.]* 8 | AppleWebKit.[\d\.]* 9 | Trident.[\d\.]* 10 | Windows NT.[\d\.]* 11 | Android [\d\.]* 12 | Macintosh. 13 | Ubuntu 14 | Linux 15 | [ ]Intel 16 | Mac OS X [\d_]* 17 | (like )?Gecko(.[\d\.]*)? 18 | KHTML, 19 | CriOS.[\d\.]* 20 | CPU iPhone OS ([0-9_])* like Mac OS X 21 | CPU OS ([0-9_])* like Mac OS X 22 | iPod 23 | compatible 24 | x86_.. 25 | i686 26 | x64 27 | X11 28 | rv:[\d\.]* 29 | Version.[\d\.]* 30 | WOW64 31 | Win64 32 | Dalvik.[\d\.]* 33 | \.NET CLR [\d\.]* 34 | Presto.[\d\.]* 35 | Media Center PC 36 | BlackBerry 37 | Build 38 | Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\. 39 | Opera 40 | \.NET[\d\.]* 41 | cubot 42 | ; M bot 43 | ; CRONO 44 | ; B bot 45 | ; IDbot 46 | ; ID bot 47 | ; POWER BOT 48 | OCTOPUS-CORE 49 | htc_botdugls 50 | super\/\d+\/Android\/\d+ 51 | "Yandex" 52 | YandexModule2 -------------------------------------------------------------------------------- /raw/Headers.json: -------------------------------------------------------------------------------- 1 | ["HTTP_USER_AGENT","HTTP_X_OPERAMINI_PHONE_UA","HTTP_X_DEVICE_USER_AGENT","HTTP_X_ORIGINAL_USER_AGENT","HTTP_X_SKYFIRE_PHONE","HTTP_X_BOLT_PHONE_UA","HTTP_DEVICE_STOCK_UA","HTTP_X_UCBROWSER_DEVICE_UA","HTTP_FROM","HTTP_X_SCANNER","HTTP_SEC_CH_UA"] -------------------------------------------------------------------------------- /raw/Headers.txt: -------------------------------------------------------------------------------- 1 | HTTP_USER_AGENT 2 | HTTP_X_OPERAMINI_PHONE_UA 3 | HTTP_X_DEVICE_USER_AGENT 4 | HTTP_X_ORIGINAL_USER_AGENT 5 | HTTP_X_SKYFIRE_PHONE 6 | HTTP_X_BOLT_PHONE_UA 7 | HTTP_DEVICE_STOCK_UA 8 | HTTP_X_UCBROWSER_DEVICE_UA 9 | HTTP_FROM 10 | HTTP_X_SCANNER 11 | HTTP_SEC_CH_UA -------------------------------------------------------------------------------- /src/CrawlerDetect.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * This source file is subject to the MIT license that is bundled 9 | * with this source code in the file LICENSE. 10 | */ 11 | 12 | namespace Jaybizzle\CrawlerDetect; 13 | 14 | use Jaybizzle\CrawlerDetect\Fixtures\Crawlers; 15 | use Jaybizzle\CrawlerDetect\Fixtures\Exclusions; 16 | use Jaybizzle\CrawlerDetect\Fixtures\Headers; 17 | 18 | class CrawlerDetect 19 | { 20 | /** 21 | * The user agent. 22 | * 23 | * @var string|null 24 | */ 25 | protected $userAgent; 26 | 27 | /** 28 | * Headers that contain a user agent. 29 | * 30 | * @var array 31 | */ 32 | protected $httpHeaders = []; 33 | 34 | /** 35 | * Store regex matches. 36 | * 37 | * @var array 38 | */ 39 | protected $matches = []; 40 | 41 | /** 42 | * Crawlers object. 43 | * 44 | * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers 45 | */ 46 | protected $crawlers; 47 | 48 | /** 49 | * Exclusions object. 50 | * 51 | * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions 52 | */ 53 | protected $exclusions; 54 | 55 | /** 56 | * Headers object. 57 | * 58 | * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers 59 | */ 60 | protected $uaHttpHeaders; 61 | 62 | /** 63 | * The compiled regex string. 64 | * 65 | * @var string 66 | */ 67 | protected $compiledRegex; 68 | 69 | /** 70 | * The compiled exclusions regex string. 71 | * 72 | * @var string 73 | */ 74 | protected $compiledExclusions; 75 | 76 | /** 77 | * Class constructor. 78 | */ 79 | public function __construct(?array $headers = null, $userAgent = null) 80 | { 81 | $this->crawlers = new Crawlers; 82 | $this->exclusions = new Exclusions; 83 | $this->uaHttpHeaders = new Headers; 84 | 85 | $this->compiledRegex = $this->compileRegex($this->crawlers->getAll()); 86 | $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll()); 87 | 88 | $this->setHttpHeaders($headers); 89 | $this->setUserAgent($userAgent); 90 | } 91 | 92 | /** 93 | * Compile the regex patterns into one regex string. 94 | * 95 | * @param array 96 | * @return string 97 | */ 98 | public function compileRegex($patterns) 99 | { 100 | return '('.implode('|', $patterns).')'; 101 | } 102 | 103 | /** 104 | * Set HTTP headers. 105 | * 106 | * @param array|null $httpHeaders 107 | */ 108 | public function setHttpHeaders($httpHeaders) 109 | { 110 | // Use global _SERVER if $httpHeaders aren't defined. 111 | if (! is_array($httpHeaders) || ! count($httpHeaders)) { 112 | $httpHeaders = $_SERVER; 113 | } 114 | 115 | // Clear existing headers. 116 | $this->httpHeaders = []; 117 | 118 | // Only save HTTP headers. In PHP land, that means 119 | // only _SERVER vars that start with HTTP_. 120 | foreach ($httpHeaders as $key => $value) { 121 | if (strpos($key, 'HTTP_') === 0) { 122 | $this->httpHeaders[$key] = $value; 123 | } 124 | } 125 | } 126 | 127 | /** 128 | * Return user agent headers. 129 | * 130 | * @return array 131 | */ 132 | public function getUaHttpHeaders() 133 | { 134 | return $this->uaHttpHeaders->getAll(); 135 | } 136 | 137 | /** 138 | * Set the user agent. 139 | * 140 | * @param string|null $userAgent 141 | */ 142 | public function setUserAgent($userAgent) 143 | { 144 | if (is_null($userAgent)) { 145 | foreach ($this->getUaHttpHeaders() as $altHeader) { 146 | if (isset($this->httpHeaders[$altHeader])) { 147 | $userAgent .= $this->httpHeaders[$altHeader].' '; 148 | } 149 | } 150 | } 151 | 152 | return $this->userAgent = $userAgent; 153 | } 154 | 155 | /** 156 | * Check user agent string against the regex. 157 | * 158 | * @param string|null $userAgent 159 | * @return bool 160 | */ 161 | public function isCrawler($userAgent = null) 162 | { 163 | $agent = trim(preg_replace( 164 | "/{$this->compiledExclusions}/i", 165 | '', 166 | $userAgent ?: $this->userAgent ?: '' 167 | )); 168 | 169 | if ($agent === '') { 170 | $this->matches = []; 171 | 172 | return false; 173 | } 174 | 175 | return (bool) preg_match("/{$this->compiledRegex}/i", $agent, $this->matches); 176 | } 177 | 178 | /** 179 | * Return the matches. 180 | * 181 | * @return string|null 182 | */ 183 | public function getMatches() 184 | { 185 | return isset($this->matches[0]) ? $this->matches[0] : null; 186 | } 187 | 188 | /** 189 | * @return string|null 190 | */ 191 | public function getUserAgent() 192 | { 193 | return $this->userAgent; 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /src/Fixtures/AbstractProvider.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * This source file is subject to the MIT license that is bundled 9 | * with this source code in the file LICENSE. 10 | */ 11 | 12 | namespace Jaybizzle\CrawlerDetect\Fixtures; 13 | 14 | abstract class AbstractProvider 15 | { 16 | /** 17 | * The data set. 18 | * 19 | * @var array 20 | */ 21 | protected $data; 22 | 23 | /** 24 | * Return the data set. 25 | * 26 | * @return array 27 | */ 28 | public function getAll() 29 | { 30 | return $this->data; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/Fixtures/Crawlers.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * This source file is subject to the MIT license that is bundled 9 | * with this source code in the file LICENSE. 10 | */ 11 | 12 | namespace Jaybizzle\CrawlerDetect\Fixtures; 13 | 14 | class Crawlers extends AbstractProvider 15 | { 16 | /** 17 | * Array of regular expressions to match against the user agent. 18 | * 19 | * @var array 20 | */ 21 | protected $data = [ 22 | ' YLT', 23 | '^Aether', 24 | '^Amazon Simple Notification Service Agent$', 25 | '^Amazon-Route53-Health-Check-Service', 26 | '^Amazon CloudFront', 27 | '^b0t$', 28 | '^bluefish ', 29 | '^Calypso v\/', 30 | '^COMODO DCV', 31 | '^Corax', 32 | '^DangDang', 33 | '^DavClnt', 34 | '^DHSH', 35 | '^docker\/[0-9]', 36 | '^Expanse', 37 | '^FDM ', 38 | '^git\/', 39 | '^Goose\/', 40 | '^Grabber', 41 | '^Gradle\/', 42 | '^HTTPClient\/', 43 | '^HTTPing', 44 | '^Java\/', 45 | '^Jeode\/', 46 | '^Jetty\/', 47 | '^Mail\/', 48 | '^Mget', 49 | '^Microsoft URL Control', 50 | '^Mikrotik\/', 51 | '^Netlab360', 52 | '^NG\/[0-9\.]', 53 | '^NING\/', 54 | '^npm\/', 55 | '^Nuclei', 56 | '^PHP-AYMAPI\/', 57 | '^PHP\/', 58 | '^pip\/', 59 | '^pnpm\/', 60 | '^RMA\/', 61 | '^Ruby|Ruby\/[0-9]', 62 | "^symbolicator\\/", 63 | '^Swurl ', 64 | '^TLS tester ', 65 | '^twine\/', 66 | '^ureq', 67 | '^VSE\/[0-9]', 68 | '^WordPress\.com', 69 | '^XRL\/[0-9]', 70 | '^ZmEu', 71 | '008\/', 72 | '13TABS', 73 | '192\.comAgent', 74 | '2GDPR\/', 75 | '2ip\.ru', 76 | '404enemy', 77 | '7Siters', 78 | '80legs', 79 | 'a3logics\.in', 80 | 'A6-Indexer', 81 | 'Abonti', 82 | 'Aboundex', 83 | 'aboutthedomain', 84 | 'Accoona-AI-Agent', 85 | 'acebookexternalhit\/', 86 | 'acoon', 87 | 'acrylicapps\.com\/pulp', 88 | 'Acunetix', 89 | 'AdAuth\/', 90 | 'adbeat', 91 | 'AddThis', 92 | 'ADmantX', 93 | 'AdminLabs', 94 | 'adressendeutschland', 95 | 'adreview\/', 96 | 'adscanner', 97 | 'adstxt-worker', 98 | 'Adstxtaggregator', 99 | 'adstxt\.com', 100 | 'Adyen HttpClient', 101 | 'AffiliateLabz\/', 102 | 'affilimate-puppeteer', 103 | 'agentslug', 104 | 'AHC', 105 | 'aihit', 106 | 'aiohttp\/', 107 | 'Airmail', 108 | 'akka-http\/', 109 | 'akula\/', 110 | 'alertra', 111 | 'alexa site audit', 112 | 'Alibaba\.Security\.Heimdall', 113 | 'Alligator', 114 | 'allloadin', 115 | 'AllSubmitter', 116 | 'alyze\.info', 117 | 'amagit', 118 | 'Anarchie', 119 | 'AndroidDownloadManager', 120 | 'Anemone', 121 | 'AngleSharp', 122 | 'annotate_google', 123 | 'Anthill', 124 | 'Anturis Agent', 125 | 'Ant\.com', 126 | 'AnyEvent-HTTP\/', 127 | 'Apache Ant\/', 128 | 'Apache Droid', 129 | 'Apache OpenOffice', 130 | 'Apache-HttpAsyncClient', 131 | 'Apache-HttpClient', 132 | 'ApacheBench', 133 | 'Apexoo', 134 | 'apimon\.de', 135 | 'APIs-Google', 136 | 'AportWorm\/', 137 | 'AppBeat\/', 138 | 'AppEngine-Google', 139 | 'AppleSyndication', 140 | 'Aprc\/[0-9]', 141 | 'Arachmo', 142 | 'arachnode', 143 | 'Arachnophilia', 144 | 'aria2', 145 | 'Arukereso', 146 | 'asafaweb', 147 | 'Asana\/', 148 | 'Ask Jeeves', 149 | 'AskQuickly', 150 | 'ASPSeek', 151 | 'Asterias', 152 | 'Astute', 153 | 'asynchttp', 154 | 'Attach', 155 | 'attohttpc', 156 | 'autocite', 157 | 'AutomaticWPTester', 158 | 'Autonomy', 159 | 'awin\.com', 160 | 'AWS Security Scanner', 161 | 'axios\/', 162 | 'a\.pr-cy\.ru', 163 | 'B-l-i-t-z-B-O-T', 164 | 'Backlink-Ceck', 165 | 'BacklinkHttpStatus', 166 | 'BackStreet', 167 | 'BackupLand', 168 | 'BackWeb', 169 | 'Bad-Neighborhood', 170 | 'Badass', 171 | 'baidu\.com', 172 | 'Bandit', 173 | 'Barracuda Sentinel \(EE\)', 174 | 'basicstate', 175 | 'BatchFTP', 176 | 'Battleztar Bazinga', 177 | 'baypup\/', 178 | 'BazQux', 179 | 'BBBike', 180 | 'BCKLINKS', 181 | 'BDFetch', 182 | 'BegunAdvertising', 183 | 'Bewica-security-scan', 184 | 'Bidtellect', 185 | 'BigBozz', 186 | 'Bigfoot', 187 | 'biglotron', 188 | 'BingLocalSearch', 189 | 'BingPreview', 190 | 'binlar', 191 | 'biNu image cacher', 192 | 'Bitacle', 193 | 'Bitrix link preview', 194 | 'biz_Directory', 195 | 'BKCTwitterUnshortener\/', 196 | 'Black Hole', 197 | 'Blackboard Safeassign', 198 | 'BlackWidow', 199 | 'BlockNote\.Net', 200 | 'BlogBridge', 201 | 'Bloglines', 202 | 'Bloglovin', 203 | 'BlogPulseLive', 204 | 'BlogSearch', 205 | 'Blogtrottr', 206 | 'BlowFish', 207 | 'boitho\.com-dc', 208 | 'Boost\.Beast', 209 | 'BPImageWalker', 210 | 'Braintree-Webhooks', 211 | 'Branch Metrics API', 212 | 'Branch-Passthrough', 213 | 'Brandprotect', 214 | 'Brandwatch', 215 | 'Brodie\/', 216 | 'Browsershots', 217 | 'BUbiNG', 218 | 'Buck\/', 219 | 'Buddy', 220 | 'BuiltWith', 221 | 'Bullseye', 222 | 'BunnySlippers', 223 | 'Burf Search', 224 | 'Butterfly\/', 225 | 'BuzzSumo', 226 | 'CAAM\/[0-9]', 227 | 'caam dot crwlr at gmail dot com', 228 | 'CakePHP', 229 | 'Calculon', 230 | 'Canary%20Mail', 231 | 'CaretNail', 232 | 'catexplorador', 233 | 'CC Metadata Scaper', 234 | 'Cegbfeieh', 235 | 'censys', 236 | 'centuryb.o.t9[at]gmail.com', 237 | 'Cerberian Drtrs', 238 | 'CERT\.at-Statistics-Survey', 239 | 'cf-facebook', 240 | 'cg-eye', 241 | 'changedetection', 242 | 'ChangesMeter', 243 | 'Charlotte', 244 | 'chatterino-api-cache', 245 | 'CheckHost', 246 | 'checkprivacy', 247 | 'CherryPicker', 248 | 'ChinaClaw', 249 | 'Chirp\/', 250 | 'chkme\.com', 251 | 'Chlooe', 252 | 'Chromaxa', 253 | 'CirrusExplorer', 254 | 'CISPA Vulnerability Notification', 255 | 'CISPA Web Analyser', 256 | 'Citoid', 257 | 'CJNetworkQuality', 258 | 'Clarsentia', 259 | 'clips\.ua\.ac\.be', 260 | 'Cloud mapping', 261 | 'CloudEndure', 262 | 'CloudFlare-AlwaysOnline', 263 | 'Cloudflare-Healthchecks', 264 | 'Cloudinary', 265 | 'cmcm\.com', 266 | 'coccoc', 267 | 'cognitiveseo', 268 | 'ColdFusion', 269 | 'colly -', 270 | 'CommaFeed', 271 | 'Commons-HttpClient', 272 | 'commonscan', 273 | 'contactbigdatafr', 274 | 'contentkingapp', 275 | 'Contextual Code Sites Explorer', 276 | 'convera', 277 | 'CookieReports', 278 | 'copyright sheriff', 279 | 'CopyRightCheck', 280 | 'Copyscape', 281 | 'cortex\/', 282 | 'Cosmos4j\.feedback', 283 | 'Covario-IDS', 284 | 'Craw\/', 285 | 'Crescent', 286 | 'Criteo', 287 | 'Crowsnest', 288 | 'CSHttp', 289 | 'CSSCheck', 290 | 'Cula\/', 291 | 'curb', 292 | 'Curious George', 293 | 'curl', 294 | 'cuwhois\/', 295 | 'cybo\.com', 296 | 'DAP\/NetHTTP', 297 | 'DareBoost', 298 | 'DatabaseDriverMysqli', 299 | 'DataCha0s', 300 | 'DatadogSynthetics', 301 | 'Datafeedwatch', 302 | 'Datanyze', 303 | 'DataparkSearch', 304 | 'dataprovider', 305 | 'DataXu', 306 | 'Daum(oa)?[ \/][0-9]', 307 | 'dBpoweramp', 308 | 'ddline', 309 | 'deeris', 310 | 'delve\.ai', 311 | 'Demon', 312 | 'DeuSu', 313 | 'developers\.google\.com\/\+\/web\/snippet\/', 314 | 'Devil', 315 | 'Digg', 316 | 'Digincore', 317 | 'DigitalPebble', 318 | 'Dirbuster', 319 | 'Discourse Forum Onebox', 320 | 'Dispatch\/', 321 | 'Disqus\/', 322 | 'DittoSpyder', 323 | 'dlvr', 324 | 'DMBrowser', 325 | 'DNSPod-reporting', 326 | 'docoloc', 327 | 'Dolphin http client', 328 | 'DomainAppender', 329 | 'DomainLabz', 330 | 'Domains Project\/', 331 | 'Donuts Content Explorer', 332 | 'dotMailer content retrieval', 333 | 'dotSemantic', 334 | 'downforeveryoneorjustme', 335 | 'Download Wonder', 336 | 'downnotifier', 337 | 'DowntimeDetector', 338 | 'Drip', 339 | 'drupact', 340 | 'Drupal \(\+http:\/\/drupal\.org\/\)', 341 | 'DTS Agent', 342 | 'dubaiindex', 343 | 'DuplexWeb-Google', 344 | 'DynatraceSynthetic', 345 | 'EARTHCOM', 346 | 'Easy-Thumb', 347 | 'EasyDL', 348 | 'Ebingbong', 349 | 'ec2linkfinder', 350 | 'eCairn-Grabber', 351 | 'eCatch', 352 | 'ECCP', 353 | 'eContext\/', 354 | 'Ecxi', 355 | 'EirGrabber', 356 | 'ElectricMonk', 357 | 'elefent', 358 | 'EMail Exractor', 359 | 'EMail Wolf', 360 | 'EmailWolf', 361 | 'Embarcadero', 362 | 'Embed PHP Library', 363 | 'Embedly', 364 | 'endo\/', 365 | 'europarchive\.org', 366 | 'evc-batch', 367 | 'EventMachine HttpClient', 368 | 'Everwall Link Expander', 369 | 'Evidon', 370 | 'Evrinid', 371 | 'ExactSearch', 372 | 'ExaleadCloudview', 373 | 'Excel\/', 374 | 'exif', 375 | 'ExoRank', 376 | 'Exploratodo', 377 | 'Express WebPictures', 378 | 'Extreme Picture Finder', 379 | 'EyeNetIE', 380 | 'ezooms', 381 | 'facebookcatalog', 382 | 'facebookexternalhit', 383 | 'facebookexternalua', 384 | 'facebookplatform', 385 | 'fairshare', 386 | 'Faraday v', 387 | 'fasthttp', 388 | 'Faveeo', 389 | 'Favicon downloader', 390 | 'faviconarchive', 391 | 'faviconkit', 392 | 'FavOrg', 393 | 'Feed Wrangler', 394 | 'Feedable\/', 395 | 'Feedbin', 396 | 'FeedBooster', 397 | 'FeedBucket', 398 | 'FeedBunch\/', 399 | 'FeedBurner', 400 | 'feeder', 401 | 'Feedly', 402 | 'FeedshowOnline', 403 | 'Feedshow\/', 404 | 'Feedspot', 405 | 'FeedViewer\/', 406 | 'Feedwind\/', 407 | 'FeedZcollector', 408 | 'feeltiptop', 409 | 'Fetch API', 410 | 'Fetch\/[0-9]', 411 | 'Fever\/[0-9]', 412 | 'FHscan', 413 | 'Fiery%20Feeds', 414 | 'Filestack', 415 | 'Fimap', 416 | 'findlink', 417 | 'findthatfile', 418 | 'FlashGet', 419 | 'FlipboardBrowserProxy', 420 | 'FlipboardProxy', 421 | 'FlipboardRSS', 422 | 'Flock\/', 423 | 'Florienzh\/', 424 | 'fluffy', 425 | 'Flunky', 426 | 'flynxapp', 427 | 'forensiq', 428 | 'ForusP', 429 | 'FoundSeoTool', 430 | 'fragFINN\.de', 431 | 'free thumbnails', 432 | 'Freeuploader', 433 | 'FreshRSS', 434 | 'frontman', 435 | 'Funnelback', 436 | 'Fuzz Faster U Fool', 437 | 'G-i-g-a-b-o-t', 438 | 'g00g1e\.net', 439 | 'ganarvisitas', 440 | 'gdnplus\.com', 441 | 'GeedoProductSearch', 442 | 'geek-tools', 443 | 'Genieo', 444 | 'GentleSource', 445 | 'GetCode', 446 | 'Getintent', 447 | 'GetLinkInfo', 448 | 'getprismatic', 449 | 'GetRight', 450 | 'getroot', 451 | 'GetURLInfo\/', 452 | 'GetWeb', 453 | 'Geziyor', 454 | 'Ghost Inspector', 455 | 'GigablastOpenSource', 456 | 'GIS-LABS', 457 | 'github-camo', 458 | 'GitHub-Hookshot', 459 | 'github\.com', 460 | 'Go http package', 461 | 'Go [\d\.]* package http', 462 | 'Go!Zilla', 463 | 'Go-Ahead-Got-It', 464 | 'Go-http-client', 465 | 'go-mtasts\/', 466 | 'gobuster', 467 | 'gobyus', 468 | 'Gofeed', 469 | 'gofetch', 470 | 'Goldfire Server', 471 | 'GomezAgent', 472 | 'gooblog', 473 | 'Goodzer\/', 474 | 'Google AppsViewer', 475 | 'Google Desktop', 476 | 'Google favicon', 477 | 'Google Keyword Suggestion', 478 | 'Google Keyword Tool', 479 | 'Google Page Speed Insights', 480 | 'Google PP Default', 481 | 'Google Search Console', 482 | 'Google Web Preview', 483 | 'Google-Ads', 484 | 'Google-Adwords', 485 | 'Google-Apps-Script', 486 | 'Google-Calendar-Importer', 487 | 'Google-HotelAdsVerifier', 488 | 'Google-HTTP-Java-Client', 489 | 'Google-InspectionTool', 490 | 'Google-Podcast', 491 | 'Google-Publisher-Plugin', 492 | 'Google-Read-Aloud', 493 | 'Google-SearchByImage', 494 | 'Google-Site-Verification', 495 | 'Google-SMTP-STS', 496 | 'Google-speakr', 497 | 'Google-Structured-Data-Testing-Tool', 498 | 'Google-Transparency-Report', 499 | 'google-xrawler', 500 | 'Google-Youtube-Links', 501 | 'GoogleDocs', 502 | 'GoogleHC\/', 503 | 'GoogleOther', 504 | 'GoogleProber', 505 | 'GoogleProducer', 506 | 'GoogleSites', 507 | 'Gookey', 508 | 'GoSpotCheck', 509 | 'gosquared-thumbnailer', 510 | 'Gotit', 511 | 'GoZilla', 512 | 'grabify', 513 | 'GrabNet', 514 | 'Grafula', 515 | 'Grammarly', 516 | 'GrapeFX', 517 | 'GreatNews', 518 | 'Gregarius', 519 | 'GRequests', 520 | 'grokkit', 521 | 'grouphigh', 522 | 'grub-client', 523 | 'gSOAP\/', 524 | 'GT::WWW', 525 | 'GTmetrix', 526 | 'GuzzleHttp', 527 | 'gvfs\/', 528 | 'HAA(A)?RTLAND http client', 529 | 'Haansoft', 530 | 'hackney\/', 531 | 'Hadi Agent', 532 | 'HappyApps-WebCheck', 533 | 'Hardenize', 534 | 'Hatena', 535 | 'Havij', 536 | 'HaxerMen', 537 | 'HEADMasterSEO', 538 | 'HeartRails_Capture', 539 | 'help@dataminr\.com', 540 | 'heritrix', 541 | 'Hexometer', 542 | 'historious', 543 | 'hkedcity', 544 | 'hledejLevne\.cz', 545 | 'Hloader', 546 | 'HMView', 547 | 'Holmes', 548 | 'HonesoSearchEngine', 549 | 'HootSuite Image proxy', 550 | 'Hootsuite-WebFeed', 551 | 'hosterstats', 552 | 'HostTracker', 553 | 'ht:\/\/check', 554 | 'htdig', 555 | 'HTMLparser', 556 | 'htmlyse', 557 | 'HTTP Banner Detection', 558 | 'http-get', 559 | 'HTTP-Header-Abfrage', 560 | 'http-kit', 561 | 'http-request\/', 562 | 'HTTP-Tiny', 563 | 'HTTP::Lite', 564 | 'http:\/\/www.neomo.de\/', // 'Francis [Bot]' 565 | 'HttpComponents', 566 | 'httphr', 567 | 'HTTPie', 568 | 'HTTPMon', 569 | 'httpRequest', 570 | 'httpscheck', 571 | 'httpssites_power', 572 | 'httpunit', 573 | 'HttpUrlConnection', 574 | 'http\.rb\/', 575 | 'HTTP_Compression_Test', 576 | 'http_get', 577 | 'http_request2', 578 | 'http_requester', 579 | 'httrack', 580 | 'huaweisymantec', 581 | 'HubSpot ', 582 | 'HubSpot-Link-Resolver', 583 | 'Humanlinks', 584 | 'i2kconnect\/', 585 | 'Iblog', 586 | 'ichiro', 587 | 'Id-search', 588 | 'IdeelaborPlagiaat', 589 | 'IDG Twitter Links Resolver', 590 | 'IDwhois\/', 591 | 'Iframely', 592 | 'igdeSpyder', 593 | 'iGooglePortal', 594 | 'IlTrovatore', 595 | 'Image Fetch', 596 | 'Image Sucker', 597 | 'ImageEngine\/', 598 | 'ImageVisu\/', 599 | 'Imagga', 600 | 'imagineeasy', 601 | 'imgsizer', 602 | 'InAGist', 603 | 'inbound\.li parser', 604 | 'InDesign%20CC', 605 | 'Indy Library', 606 | 'InetURL', 607 | 'infegy', 608 | 'infohelfer', 609 | 'InfoTekies', 610 | 'InfoWizards Reciprocal Link', 611 | 'inpwrd\.com', 612 | 'instabid', 613 | 'Instapaper', 614 | 'Integrity', 615 | 'integromedb', 616 | 'Intelliseek', 617 | 'InterGET', 618 | 'Internet Ninja', 619 | 'InternetSeer', 620 | 'internetVista monitor', 621 | 'internetwache', 622 | 'internet_archive', 623 | 'intraVnews', 624 | 'IODC', 625 | 'IOI', 626 | 'Inboxb0t', 627 | 'iplabel', 628 | 'ips-agent', 629 | 'IPS\/[0-9]', 630 | 'IPWorks HTTP\/S Component', 631 | 'iqdb\/', 632 | 'Iria', 633 | 'Irokez', 634 | 'isitup\.org', 635 | 'iskanie', 636 | 'isUp\.li', 637 | 'iThemes Sync\/', 638 | 'IZaBEE', 639 | 'iZSearch', 640 | 'JAHHO', 641 | 'janforman', 642 | 'Jaunt\/', 643 | 'Java.*outbrain', 644 | 'javelin\.io', 645 | 'Jbrofuzz', 646 | 'Jersey\/', 647 | 'JetCar', 648 | 'Jigsaw', 649 | 'Jobboerse', 650 | 'JobFeed discovery', 651 | 'Jobg8 URL Monitor', 652 | 'jobo', 653 | 'Jobrapido', 654 | 'Jobsearch1\.5', 655 | 'JoinVision Generic', 656 | 'JolokiaPwn', 657 | 'Joomla', 658 | 'Jorgee', 659 | 'JS-Kit', 660 | 'JungleKeyThumbnail', 661 | 'JustView', 662 | 'Kaspersky Lab CFR link resolver', 663 | 'Kelny\/', 664 | 'Kerrigan\/', 665 | 'KeyCDN', 666 | 'Keyword Density', 667 | 'Keywords Research', 668 | 'khttp\/', 669 | 'KickFire', 670 | 'KimonoLabs\/', 671 | 'Kml-Google', 672 | 'knows\.is', 673 | 'KOCMOHABT', 674 | 'kouio', 675 | 'krawler\.dk', 676 | 'kube-probe', 677 | 'kubectl', 678 | 'kulturarw3', 679 | 'KumKie', 680 | 'Larbin', 681 | 'Lavf\/', 682 | 'leakix\.net', 683 | 'LeechFTP', 684 | 'LeechGet', 685 | 'letsencrypt', 686 | 'Lftp', 687 | 'LibVLC', 688 | 'LibWeb', 689 | 'Libwhisker', 690 | 'libwww', 691 | 'Licorne', 692 | 'Liferea\/', 693 | 'Lighthouse', 694 | 'Lightspeedsystems', 695 | 'Likse', 696 | 'limber\.io', 697 | 'Link Valet', 698 | 'LinkAlarm\/', 699 | 'LinkAnalyser', 700 | 'link-check', 701 | 'linkCheck', 702 | 'linkdex', 703 | 'LinkExaminer', 704 | 'linkfluence', 705 | 'linkpeek', 706 | 'LinkPreview', 707 | 'LinkScan', 708 | 'LinksManager', 709 | 'LinkTiger', 710 | 'LinkWalker', 711 | 'link_thumbnailer', 712 | 'Lipperhey', 713 | 'Litemage_walker', 714 | 'livedoor ScreenShot', 715 | 'LoadImpactRload', 716 | 'localsearch-web', 717 | 'LongURL API', 718 | 'longurl-r-package', 719 | 'looid\.com', 720 | 'looksystems\.net', 721 | 'lscache_runner', 722 | 'ltx71', 723 | 'lua-resty-http', 724 | 'Lucee \(CFML Engine\)', 725 | 'Lush Http Client', 726 | 'lwp-request', 727 | 'lwp-trivial', 728 | 'LWP::Simple', 729 | 'lycos', 730 | 'LYT\.SR', 731 | 'L\.webis', 732 | 'mabontland', 733 | 'MacOutlook\/', 734 | 'MagentaNews\/', 735 | 'Mag-Net', 736 | 'MagpieRSS', 737 | 'Mail::STS', 738 | 'MailChimp', 739 | 'Mail\.Ru', 740 | 'Majestic12', 741 | 'makecontact\/', 742 | 'Mandrill', 743 | 'MapperCmd', 744 | 'marketinggrader', 745 | 'MarkMonitor', 746 | 'MarkWatch', 747 | 'Mass Downloader', 748 | 'masscan\/', 749 | 'Mata Hari', 750 | 'mattermost', 751 | 'MatchorySearch\/', 752 | 'Mediametric', 753 | 'Mediapartners-Google', 754 | 'mediawords', 755 | 'MegaIndex\.ru', 756 | 'MeltwaterNews', 757 | 'Melvil Rawi', 758 | 'MemGator', 759 | 'Metaspinner', 760 | 'MetaURI', 761 | 'MFC_Tear_Sample', 762 | 'Microsearch', 763 | 'Microsoft Data Access', 764 | 'Microsoft Office', 765 | 'Microsoft Outlook', 766 | 'Microsoft Windows Network Diagnostics', 767 | 'Microsoft-WebDAV-MiniRedir', 768 | 'Microsoft\.Data\.Mashup', 769 | 'MicrosoftPreview', 770 | 'MIDown tool', 771 | 'MIIxpc', 772 | 'Mindjet', 773 | 'Miniature\.io', 774 | 'Miniflux', 775 | 'mio_httpc', 776 | 'Miro-HttpClient', 777 | 'Mister PiX', 778 | 'mixdata dot com', 779 | 'mixed-content-scan', 780 | 'mixnode', 781 | 'Mnogosearch', 782 | 'mogimogi', 783 | 'Mojeek', 784 | 'Mojolicious \(Perl\)', 785 | 'Mollie', 786 | 'monitis', 787 | 'Monitority\/', 788 | 'Monit\/', 789 | 'montastic', 790 | 'MonSpark', 791 | 'MonTools', 792 | 'Moreover', 793 | 'Morfeus Fucking Scanner', 794 | 'Morning Paper', 795 | 'MovableType', 796 | 'mowser', 797 | 'Mrcgiguy', 798 | 'Mr\.4x3 Powered', 799 | 'MS Web Services Client Protocol', 800 | 'MSFrontPage', 801 | 'mShots', 802 | 'MuckRack\/', 803 | 'muhstik-scan', 804 | 'MVAClient', 805 | 'MxToolbox\/', 806 | 'myseosnapshot', 807 | 'nagios', 808 | 'Najdi\.si', 809 | 'Name Intelligence', 810 | 'NameFo\.com', 811 | 'Nameprotect', 812 | 'nationalarchives', 813 | 'Navroad', 814 | 'nbertaupete95', 815 | 'NearSite', 816 | 'Needle', 817 | 'Nessus', 818 | 'Net Vampire', 819 | 'NetAnts', 820 | 'NETCRAFT', 821 | 'NetLyzer', 822 | 'NetMechanic', 823 | 'NetNewsWire', 824 | 'Netpursual', 825 | 'netresearch', 826 | 'NetShelter ContentScan', 827 | 'Netsparker', 828 | 'NetSystemsResearch', 829 | 'nettle', 830 | 'NetTrack', 831 | 'Netvibes', 832 | 'NetZIP', 833 | 'Neustar WPM', 834 | 'NeutrinoAPI', 835 | 'NewRelicPinger', 836 | 'NewsBlur .*Finder', 837 | 'NewsGator', 838 | 'newsme', 839 | 'newspaper\/', 840 | 'Nexgate Ruby Client', 841 | 'NG-Search', 842 | 'nghttp2', 843 | 'Nibbler', 844 | 'NICErsPRO', 845 | 'NihilScio', 846 | 'Nikto', 847 | 'nineconnections', 848 | 'NLNZ_IAHarvester', 849 | 'Nmap Scripting Engine', 850 | 'node-fetch', 851 | 'node-superagent', 852 | 'node-urllib', 853 | 'Nodemeter', 854 | 'NodePing', 855 | 'node\.io', 856 | 'nominet\.org\.uk', 857 | 'nominet\.uk', 858 | 'Norton-Safeweb', 859 | 'Notifixious', 860 | 'notifyninja', 861 | 'NotionEmbedder', 862 | 'nuhk', 863 | 'nutch', 864 | 'Nuzzel', 865 | 'nWormFeedFinder', 866 | 'nyawc\/', 867 | 'Nymesis', 868 | 'NYU', 869 | 'Observatory\/', 870 | 'Ocelli\/', 871 | 'Octopus', 872 | 'oegp', 873 | 'Offline Explorer', 874 | 'Offline Navigator', 875 | 'OgScrper', 876 | 'okhttp', 877 | 'omgili', 878 | 'OMSC', 879 | 'Online Domain Tools', 880 | 'Open Source RSS', 881 | 'OpenCalaisSemanticProxy', 882 | 'Openfind', 883 | 'OpenLinkProfiler', 884 | 'Openstat\/', 885 | 'OpenVAS', 886 | 'OPPO A33', 887 | 'Optimizer', 888 | 'Orbiter', 889 | 'OrgProbe\/', 890 | 'orion-semantics', 891 | 'Outlook-Express', 892 | 'Outlook-iOS', 893 | 'Owler', 894 | 'Owlin', 895 | 'ownCloud News', 896 | 'ow\.ly', 897 | 'OxfordCloudService', 898 | 'page scorer', 899 | 'Page Valet', 900 | 'page2rss', 901 | 'PageFreezer', 902 | 'PageGrabber', 903 | 'PagePeeker', 904 | 'PageScorer', 905 | 'Pagespeed\/', 906 | 'PageThing', 907 | 'page_verifier', 908 | 'Panopta', 909 | 'panscient', 910 | 'Papa Foto', 911 | 'parsijoo', 912 | 'Pavuk', 913 | 'PayPal IPN', 914 | 'pcBrowser', 915 | 'Pcore-HTTP', 916 | 'PDF24 URL To PDF', 917 | 'Pearltrees', 918 | 'PECL::HTTP', 919 | 'peerindex', 920 | 'Peew', 921 | 'PeoplePal', 922 | 'Perlu -', 923 | 'PhantomJS Screenshoter', 924 | 'PhantomJS\/', 925 | 'Photon\/', 926 | 'php-requests', 927 | 'phpservermon', 928 | 'Pi-Monster', 929 | 'Picscout', 930 | 'Picsearch', 931 | 'PictureFinder', 932 | 'Pimonster', 933 | 'Pingability', 934 | 'PingAdmin\.Ru', 935 | 'Pingdom', 936 | 'Pingoscope', 937 | 'PingSpot', 938 | 'ping\.blo\.gs', 939 | 'pinterest\.com', 940 | 'Pixray', 941 | 'Pizilla', 942 | 'Plagger\/', 943 | 'Pleroma ', 944 | 'Ploetz \+ Zeller', 945 | 'Plukkie', 946 | 'plumanalytics', 947 | 'PocketImageCache', 948 | 'PocketParser', 949 | 'Pockey', 950 | 'PodcastAddict\/', 951 | 'POE-Component-Client-HTTP', 952 | 'Polymail\/', 953 | 'Pompos', 954 | 'Porkbun', 955 | 'Port Monitor', 956 | 'postano', 957 | 'postfix-mta-sts-resolver', 958 | 'PostmanRuntime', 959 | 'postplanner\.com', 960 | 'PostPost', 961 | 'postrank', 962 | 'PowerPoint\/', 963 | 'Prebid', 964 | 'Prerender', 965 | 'Priceonomics Analysis Engine', 966 | 'PrintFriendly', 967 | 'PritTorrent', 968 | 'Prlog', 969 | 'probely\.com', 970 | 'probethenet', 971 | 'Project ?25499', 972 | 'Project-Resonance', 973 | 'prospectb2b', 974 | 'Protopage', 975 | 'ProWebWalker', 976 | 'proximic', 977 | 'PRTG Network Monitor', 978 | 'pshtt, https scanning', 979 | 'PTST ', 980 | 'PTST\/[0-9]+', 981 | 'pulsetic\.com', 982 | 'Pump', 983 | 'Python-httplib2', 984 | 'python-httpx', 985 | 'python-requests', 986 | 'Python-urllib', 987 | 'Qirina Hurdler', 988 | 'QQDownload', 989 | 'QrafterPro', 990 | 'Qseero', 991 | 'Qualidator', 992 | 'QueryN Metasearch', 993 | 'queuedriver', 994 | 'quic-go-HTTP\/', 995 | 'QuiteRSS', 996 | 'Quora Link Preview', 997 | 'Qwantify', 998 | 'Radian6', 999 | 'RadioPublicImageResizer', 1000 | 'Railgun\/', 1001 | 'RankActive', 1002 | 'RankFlex', 1003 | 'RankSonicSiteAuditor', 1004 | 'RapidLoad\/', 1005 | 'Re-re Studio', 1006 | 'ReactorNetty', 1007 | 'Readability', 1008 | 'RealDownload', 1009 | 'RealPlayer%20Downloader', 1010 | 'RebelMouse', 1011 | 'Recorder', 1012 | 'RecurPost\/', 1013 | 'redback\/', 1014 | 'ReederForMac', 1015 | 'Reeder\/', 1016 | 'ReGet', 1017 | 'RepoMonkey', 1018 | 'request\.js', 1019 | 'reqwest\/', 1020 | 'ResponseCodeTest', 1021 | 'RestSharp', 1022 | 'Riddler', 1023 | 'Rival IQ', 1024 | 'Robosourcer', 1025 | 'Robozilla', 1026 | 'ROI Hunter', 1027 | 'RPT-HTTPClient', 1028 | 'RSSMix\/', 1029 | 'RSSOwl', 1030 | 'RuxitSynthetic', 1031 | 'RyowlEngine', 1032 | 'safe-agent-scanner', 1033 | 'SalesIntelligent', 1034 | 'Saleslift', 1035 | 'SAP NetWeaver Application Server', 1036 | 'SauceNAO', 1037 | 'SBIder', 1038 | 'sc-downloader', 1039 | 'scalaj-http', 1040 | 'Scamadviser-Frontend', 1041 | 'ScanAlert', 1042 | 'scan\.lol', 1043 | 'Scoop', 1044 | 'scooter', 1045 | 'ScopeContentAG-HTTP-Client', 1046 | 'ScoutJet', 1047 | 'ScoutURLMonitor', 1048 | 'ScrapeBox Page Scanner', 1049 | 'Scrapy', 1050 | 'Screaming', 1051 | 'ScreenShotService', 1052 | 'Scrubby', 1053 | 'Scrutiny\/', 1054 | 'Search37', 1055 | 'searchenginepromotionhelp', 1056 | 'Searchestate', 1057 | 'SearchExpress', 1058 | 'SearchSight', 1059 | 'SearchWP', 1060 | 'search\.thunderstone', 1061 | 'Seeker', 1062 | 'semanticdiscovery', 1063 | 'semanticjuice', 1064 | 'Semiocast HTTP client', 1065 | 'Semrush', 1066 | 'Sendsay\.Ru', 1067 | 'sentry\/', 1068 | 'SEO Browser', 1069 | 'Seo Servis', 1070 | 'seo-nastroj\.cz', 1071 | 'seo4ajax', 1072 | 'Seobility', 1073 | 'SEOCentro', 1074 | 'SeoCheck', 1075 | 'seocompany', 1076 | 'SEOkicks', 1077 | 'SEOlizer', 1078 | 'Seomoz', 1079 | 'SEOprofiler', 1080 | 'seoscanners', 1081 | 'SEOsearch', 1082 | 'seositecheckup', 1083 | 'SEOstats', 1084 | 'servernfo', 1085 | 'sexsearcher', 1086 | 'Seznam', 1087 | 'Shelob', 1088 | 'Shodan', 1089 | 'Shoppimon', 1090 | 'ShopWiki', 1091 | 'ShortLinkTranslate', 1092 | 'shortURL lengthener', 1093 | 'shrinktheweb', 1094 | 'Sideqik', 1095 | 'Siege', 1096 | 'SimplePie', 1097 | 'SimplyFast', 1098 | 'Siphon', 1099 | 'SISTRIX', 1100 | 'Site Sucker', 1101 | 'Site-Shot\/', 1102 | 'Site24x7', 1103 | 'SiteBar', 1104 | 'Sitebeam', 1105 | 'Sitebulb\/', 1106 | 'SiteCondor', 1107 | 'SiteExplorer', 1108 | 'SiteGuardian', 1109 | 'Siteimprove', 1110 | 'SiteIndexed', 1111 | 'Sitemap(s)? Generator', 1112 | 'SitemapGenerator', 1113 | 'SiteMonitor', 1114 | 'Siteshooter B0t', 1115 | 'SiteSnagger', 1116 | 'SiteSucker', 1117 | 'SiteTruth', 1118 | 'Sitevigil', 1119 | 'sitexy\.com', 1120 | 'SkypeUriPreview', 1121 | 'Slack\/', 1122 | 'sli-systems\.com', 1123 | 'slider\.com', 1124 | 'slurp', 1125 | 'SlySearch', 1126 | 'SmartDownload', 1127 | 'SMRF URL Expander', 1128 | 'SMUrlExpander', 1129 | 'Snake', 1130 | 'Snappy', 1131 | 'SnapSearch', 1132 | 'Snarfer\/', 1133 | 'SniffRSS', 1134 | 'sniptracker', 1135 | 'Snoopy', 1136 | 'SnowHaze Search', 1137 | 'sogou web', 1138 | 'SortSite', 1139 | 'Sottopop', 1140 | 'sovereign\.ai', 1141 | 'SpaceBison', 1142 | 'SpamExperts', 1143 | 'Spammen', 1144 | 'Spanner', 1145 | 'Spawning-AI', 1146 | 'spaziodati', 1147 | 'SPDYCheck', 1148 | 'Specificfeeds', 1149 | 'SpeedKit', 1150 | 'speedy', 1151 | 'SPEng', 1152 | 'Spinn3r', 1153 | 'spray-can', 1154 | 'Sprinklr ', 1155 | 'spyonweb', 1156 | 'sqlmap', 1157 | 'Sqlworm', 1158 | 'Sqworm', 1159 | 'SSL Labs', 1160 | 'ssl-tools', 1161 | 'StackRambler', 1162 | 'Statastico\/', 1163 | 'Statically-', 1164 | 'StatusCake', 1165 | 'Steeler', 1166 | 'Stratagems Kumo', 1167 | 'Stripe\/', 1168 | 'Stroke\.cz', 1169 | 'StudioFACA', 1170 | 'StumbleUpon', 1171 | 'suchen', 1172 | 'Sucuri', 1173 | 'summify', 1174 | 'SuperHTTP', 1175 | 'Surphace Scout', 1176 | 'Suzuran', 1177 | 'swcd ', 1178 | 'Symfony BrowserKit', 1179 | 'Symfony2 BrowserKit', 1180 | 'Synapse\/', 1181 | 'Syndirella\/', 1182 | 'SynHttpClient-Built', 1183 | 'Sysomos', 1184 | 'sysscan', 1185 | 'Szukacz', 1186 | 'T0PHackTeam', 1187 | 'tAkeOut', 1188 | 'Tarantula\/', 1189 | 'Taringa UGC', 1190 | 'TarmotGezgin', 1191 | 'tchelebi\.io', 1192 | 'techiaith\.cymru', 1193 | 'Teleport', 1194 | 'Telesoft', 1195 | 'Telesphoreo', 1196 | 'Telesphorep', 1197 | 'Tenon\.io', 1198 | 'teoma', 1199 | 'terrainformatica', 1200 | 'Test Certificate Info', 1201 | 'testuri', 1202 | 'Tetrahedron', 1203 | 'TextRazor Downloader', 1204 | 'The Drop Reaper', 1205 | 'The Expert HTML Source Viewer', 1206 | 'The Intraformant', 1207 | 'The Knowledge AI', 1208 | 'theinternetrules', 1209 | 'TheNomad', 1210 | 'Thinklab', 1211 | 'Thumbor', 1212 | 'Thumbshots', 1213 | 'ThumbSniper', 1214 | 'timewe\.net', 1215 | 'TinEye', 1216 | 'Tiny Tiny RSS', 1217 | 'TLSProbe\/', 1218 | 'Toata', 1219 | 'topster', 1220 | 'touche\.com', 1221 | 'Traackr\.com', 1222 | 'tracemyfile', 1223 | 'Trackuity', 1224 | 'TrapitAgent', 1225 | 'Trendiction', 1226 | 'Trendsmap', 1227 | 'trendspottr', 1228 | 'truwoGPS', 1229 | 'TryJsoup', 1230 | 'TulipChain', 1231 | 'Turingos', 1232 | 'Turnitin', 1233 | 'tweetedtimes', 1234 | 'Tweetminster', 1235 | 'Tweezler\/', 1236 | 'twibble', 1237 | 'Twice', 1238 | 'Twikle', 1239 | 'Twingly', 1240 | 'Twisted PageGetter', 1241 | 'Typhoeus', 1242 | 'ubermetrics-technologies', 1243 | 'uclassify', 1244 | 'UdmSearch', 1245 | 'ultimate_sitemap_parser', 1246 | 'unchaos', 1247 | 'unirest-java', 1248 | 'UniversalFeedParser', 1249 | 'unshortenit', 1250 | 'Unshorten\.It', 1251 | 'Untiny', 1252 | 'UnwindFetchor', 1253 | 'updated', 1254 | 'updown\.io daemon', 1255 | 'Upflow', 1256 | 'Uptimia', 1257 | 'URL Verifier', 1258 | 'Urlcheckr', 1259 | 'URLitor', 1260 | 'urlresolver', 1261 | 'Urlstat', 1262 | 'URLTester', 1263 | 'UrlTrends Ranking Updater', 1264 | 'URLy Warning', 1265 | 'URLy\.Warning', 1266 | 'URL\/Emacs', 1267 | 'Vacuum', 1268 | 'Vagabondo', 1269 | 'VB Project', 1270 | 'vBSEO', 1271 | 'VCI', 1272 | 'Verity', 1273 | 'via ggpht\.com GoogleImageProxy', 1274 | 'Virusdie', 1275 | 'visionutils', 1276 | 'Visual Rights Group', 1277 | 'vkShare', 1278 | 'VoidEYE', 1279 | 'Voil', 1280 | 'voltron', 1281 | 'voyager\/', 1282 | 'VSAgent\/', 1283 | 'VSB-TUO\/', 1284 | 'Vulnbusters Meter', 1285 | 'VYU2', 1286 | 'w3af\.org', 1287 | 'W3C-checklink', 1288 | 'W3C-mobileOK', 1289 | 'W3C_Unicorn', 1290 | 'WAC-OFU', 1291 | 'WakeletLinkExpander', 1292 | 'WallpapersHD', 1293 | 'Wallpapers\/[0-9]+', 1294 | 'wangling', 1295 | 'Wappalyzer', 1296 | 'WatchMouse', 1297 | 'WbSrch\/', 1298 | 'WDT\.io', 1299 | 'Web Auto', 1300 | 'Web Collage', 1301 | 'Web Enhancer', 1302 | 'Web Fetch', 1303 | 'Web Fuck', 1304 | 'Web Pix', 1305 | 'Web Sauger', 1306 | 'Web spyder', 1307 | 'Web Sucker', 1308 | 'web-capture\.net', 1309 | 'Web-sniffer', 1310 | 'Webalta', 1311 | 'Webauskunft', 1312 | 'WebAuto', 1313 | 'WebCapture', 1314 | 'WebClient\/', 1315 | 'webcollage', 1316 | 'WebCookies', 1317 | 'WebCopier', 1318 | 'WebCorp', 1319 | 'WebDataStats', 1320 | 'WebDoc', 1321 | 'WebEnhancer', 1322 | 'WebFetch', 1323 | 'WebFuck', 1324 | 'WebGazer', 1325 | 'WebGo IS', 1326 | 'WebImageCollector', 1327 | 'WebImages', 1328 | 'WebIndex', 1329 | 'webkit2png', 1330 | 'WebLeacher', 1331 | 'webmastercoffee', 1332 | 'webmon ', 1333 | 'WebPix', 1334 | 'WebReaper', 1335 | 'WebSauger', 1336 | 'webscreenie', 1337 | 'Webshag', 1338 | 'Webshot', 1339 | 'Website Quester', 1340 | 'websitepulse agent', 1341 | 'WebsiteQuester', 1342 | 'Websnapr', 1343 | 'WebSniffer', 1344 | 'Webster', 1345 | 'WebStripper', 1346 | 'WebSucker', 1347 | 'webtech\/', 1348 | 'WebThumbnail', 1349 | 'Webthumb\/', 1350 | 'WebWhacker', 1351 | 'WebZIP', 1352 | 'WeLikeLinks', 1353 | 'WEPA', 1354 | 'WeSEE', 1355 | 'wf84', 1356 | 'Wfuzz\/', 1357 | 'wget', 1358 | 'WhatCMS', 1359 | 'WhatsApp', 1360 | 'WhatsMyIP', 1361 | 'WhatWeb', 1362 | 'WhereGoes\?', 1363 | 'Whibse', 1364 | 'WhoAPI\/', 1365 | 'WhoRunsCoinHive', 1366 | 'Whynder Magnet', 1367 | 'Windows-RSS-Platform', 1368 | 'WinHttp-Autoproxy-Service', 1369 | 'WinHTTP\/', 1370 | 'WinPodder', 1371 | 'wkhtmlto', 1372 | 'wmtips', 1373 | 'Woko', 1374 | 'Wolfram HTTPClient', 1375 | 'woorankreview', 1376 | 'WordPress\/', 1377 | 'WordupinfoSearch', 1378 | 'Word\/', 1379 | 'worldping-api', 1380 | 'wotbox', 1381 | 'WP Engine Install Performance API', 1382 | 'WP Rocket', 1383 | 'wpif', 1384 | 'wprecon\.com survey', 1385 | 'WPScan', 1386 | 'wscheck', 1387 | 'Wtrace', 1388 | 'WWW-Collector-E', 1389 | 'WWW-Mechanize', 1390 | 'WWW::Document', 1391 | 'WWW::Mechanize', 1392 | 'WWWOFFLE', 1393 | 'www\.monitor\.us', 1394 | 'x09Mozilla', 1395 | 'x22Mozilla', 1396 | 'XaxisSemanticsClassifier', 1397 | 'XenForo\/', 1398 | 'Xenu Link Sleuth', 1399 | 'XING-contenttabreceiver', 1400 | 'xpymep([0-9]?)\.exe', 1401 | 'Y!J-[A-Z][A-Z][A-Z]', 1402 | 'Yaanb', 1403 | 'yacy', 1404 | 'Yahoo Link Preview', 1405 | 'YahooCacheSystem', 1406 | 'YahooMailProxy', 1407 | 'YahooYSMcm', 1408 | 'YandeG', 1409 | 'Yandex(?!Search)', 1410 | 'yanga', 1411 | 'yeti', 1412 | 'Yo-yo', 1413 | 'Yoleo Consumer', 1414 | 'yomins\.com', 1415 | 'yoogliFetchAgent', 1416 | 'YottaaMonitor', 1417 | 'Your-Website-Sucks', 1418 | 'yourls\.org', 1419 | 'YoYs\.net', 1420 | 'YP\.PL', 1421 | 'Zabbix', 1422 | 'Zade', 1423 | 'Zao', 1424 | 'Zapier', 1425 | 'Zauba', 1426 | 'Zemanta Aggregator', 1427 | 'Zend\\\\Http\\\\Client', 1428 | 'Zend_Http_Client', 1429 | 'Zermelo', 1430 | 'Zeus ', 1431 | 'zgrab', 1432 | 'ZnajdzFoto', 1433 | 'ZnHTTP', 1434 | 'Zombie\.js', 1435 | 'Zoom\.Mac', 1436 | 'ZoteroTranslationServer', 1437 | 'ZyBorg', 1438 | '[a-z0-9\-_]*(bot|crawl|headless|archiver|transcoder|spider|uptime|validator|fetcher|cron|checker|reader|extractor|monitoring|analyzer|scraper)', 1439 | ]; 1440 | } 1441 | -------------------------------------------------------------------------------- /src/Fixtures/Exclusions.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * This source file is subject to the MIT license that is bundled 9 | * with this source code in the file LICENSE. 10 | */ 11 | 12 | namespace Jaybizzle\CrawlerDetect\Fixtures; 13 | 14 | class Exclusions extends AbstractProvider 15 | { 16 | /** 17 | * List of strings to remove from the user agent before running the crawler regex 18 | * Over a large list of user agents, this gives us about a 55% speed increase! 19 | * 20 | * @var array 21 | */ 22 | protected $data = [ 23 | 'Safari.[\d\.]*', 24 | 'Firefox.[\d\.]*', 25 | ' Chrome.[\d\.]*', 26 | 'Chromium.[\d\.]*', 27 | 'MSIE.[\d\.]', 28 | 'Opera\/[\d\.]*', 29 | 'Mozilla.[\d\.]*', 30 | 'AppleWebKit.[\d\.]*', 31 | 'Trident.[\d\.]*', 32 | 'Windows NT.[\d\.]*', 33 | 'Android [\d\.]*', 34 | 'Macintosh.', 35 | 'Ubuntu', 36 | 'Linux', 37 | '[ ]Intel', 38 | 'Mac OS X [\d_]*', 39 | '(like )?Gecko(.[\d\.]*)?', 40 | 'KHTML,', 41 | 'CriOS.[\d\.]*', 42 | 'CPU iPhone OS ([0-9_])* like Mac OS X', 43 | 'CPU OS ([0-9_])* like Mac OS X', 44 | 'iPod', 45 | 'compatible', 46 | 'x86_..', 47 | 'i686', 48 | 'x64', 49 | 'X11', 50 | 'rv:[\d\.]*', 51 | 'Version.[\d\.]*', 52 | 'WOW64', 53 | 'Win64', 54 | 'Dalvik.[\d\.]*', 55 | ' \.NET CLR [\d\.]*', 56 | 'Presto.[\d\.]*', 57 | 'Media Center PC', 58 | 'BlackBerry', 59 | 'Build', 60 | 'Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.', 61 | 'Opera', 62 | ' \.NET[\d\.]*', 63 | 'cubot', 64 | '; M bot', 65 | '; CRONO', 66 | '; B bot', 67 | '; IDbot', 68 | '; ID bot', 69 | '; POWER BOT', 70 | 'OCTOPUS-CORE', 71 | 'htc_botdugls', 72 | 'super\/\d+\/Android\/\d+', 73 | '"Yandex"', 74 | 'YandexModule2', 75 | ]; 76 | } 77 | -------------------------------------------------------------------------------- /src/Fixtures/Headers.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * This source file is subject to the MIT license that is bundled 9 | * with this source code in the file LICENSE. 10 | */ 11 | 12 | namespace Jaybizzle\CrawlerDetect\Fixtures; 13 | 14 | class Headers extends AbstractProvider 15 | { 16 | /** 17 | * All possible HTTP headers that represent the user agent string. 18 | * 19 | * @var array 20 | */ 21 | protected $data = [ 22 | // The default User-Agent string. 23 | 'HTTP_USER_AGENT', 24 | // Header can occur on devices using Opera Mini. 25 | 'HTTP_X_OPERAMINI_PHONE_UA', 26 | // Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/ 27 | 'HTTP_X_DEVICE_USER_AGENT', 28 | 'HTTP_X_ORIGINAL_USER_AGENT', 29 | 'HTTP_X_SKYFIRE_PHONE', 30 | 'HTTP_X_BOLT_PHONE_UA', 31 | 'HTTP_DEVICE_STOCK_UA', 32 | 'HTTP_X_UCBROWSER_DEVICE_UA', 33 | // Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address 34 | 'HTTP_FROM', 35 | 'HTTP_X_SCANNER', // Seen in use by Netsparker 36 | // Observed that Facebook will omit identifying itself in User Agent headers but will persist HeadlessChrome in this header for mobile requests 37 | 'HTTP_SEC_CH_UA', 38 | ]; 39 | } 40 | --------------------------------------------------------------------------------