├── .gitignore ├── README.md ├── build.xml ├── module-info.java ├── pom.xml ├── release └── user-agent-detector.jar ├── src └── main │ └── java │ └── net │ └── pieroxy │ └── ua │ ├── detection │ ├── Bot.java │ ├── BotFamily.java │ ├── BotsHelper.java │ ├── Brand.java │ ├── Browser.java │ ├── BrowserFamily.java │ ├── Country.java │ ├── Device.java │ ├── DeviceType.java │ ├── Extension.java │ ├── GeckoSpinoff.java │ ├── GenericBot.java │ ├── IUserAgentDetector.java │ ├── Language.java │ ├── Locale.java │ ├── LocaleHelper.java │ ├── Matcher.java │ ├── MatchingRegion.java │ ├── MatchingType.java │ ├── OS.java │ ├── OSFamily.java │ ├── RenderingEngine.java │ ├── RenderingEngineFamily.java │ ├── StringUtils.java │ ├── UserAgentContext.java │ ├── UserAgentDetectionHelper.java │ ├── UserAgentDetectionResult.java │ ├── UserAgentDetector.java │ └── VersionedObject.java │ └── tooling │ └── UserAgentTester.java ├── test-data └── database.gz └── test.sh /.gitignore: -------------------------------------------------------------------------------- 1 | build/* 2 | release/tmp 3 | release/doc 4 | target/* 5 | .idea 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | java-user-agent-detection 2 | ========================= 3 | 4 | Some code to deduce an OS/Platform/Browser out of a user-agent string, compatible with Java 1.6 or newer. 5 | 6 | ## Useful links 7 | 8 | * You will find all you need [on the homepage](http://pieroxy.net/user-agent/lib.html). 9 | * You can [browse the database](http://pieroxy.net/user-agent/db.html) - although it is a little rough right now. 10 | * You can [test any user-agent](http://pieroxy.net/user-agent/). 11 | * You can [browse the javadoc API](http://pieroxy.net/user-agent/doc/). 12 | * [Leave a comment on the blog](http://pieroxy.net/blog/2014/11/18/user_agent_detection_in_java.html). 13 | 14 | ## How to contribute 15 | 16 | I will not merge pull requests, as most of this code and tests are generated. What you can do is [open issues](https://github.com/pieroxy/java-user-agent-detection/issues) if you find problems or would like to suggest improvements. You can also leave a message [on the blog](http://pieroxy.net/blog/2014/11/18/user_agent_detection_in_java.html) if you just want to ask something. 17 | 18 | ## How to build with ant 19 | 20 | Building with ant is as simple as installing ant and typing ```ant```. 21 | 22 | Your jar can then be found in the ```build/``` directory and the javadoc in ```build/doc/index.html```. 23 | 24 | 25 | ## How to build with Maven 26 | 27 | Building with ant is as simple as installing Maven and typing ```mvn release``` 28 | 29 | Your jar can then be found in the ```target/``` directory. 30 | -------------------------------------------------------------------------------- /build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /module-info.java: -------------------------------------------------------------------------------- 1 | module net.pieroxy.ua.detection { 2 | exports net.pieroxy.ua.detection; 3 | } 4 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | net.pieroxy.ua 5 | user_agent_detector 6 | jar 7 | 2015-10-31-SNAPSHOT 8 | User Agent Detection Library 9 | A library to extract bot, browser, device and OS from a User-Agent in Java 6 or newer. 10 | https://github.com/pieroxy/java-user-agent-detection/ 11 | 12 | 13 | 14 | WTFPL 15 | http://www.wtfpl.net/ 16 | 17 | 18 | 19 | 20 | 21 | Pierre Grimaud 22 | pieroxy most likely at pieroxy dot net 23 | 24 | 25 | 26 | 27 | scm:git:git://github.com/pieroxy/java-user-agent-detection.git 28 | scm:git:ssh://github.com/pieroxy/java-user-agent-detection.git 29 | https://github.com/pieroxy/java-user-agent-detection/tree/master 30 | 31 | 32 | 33 | https://github.com/pieroxy/java-user-agent-detection/issues 34 | GitHub Issues 35 | 36 | 37 | 38 | 39 | 40 | UTF-8 41 | 42 | 43 | 44 | 45 | org.apache.maven.plugins 46 | maven-compiler-plugin 47 | 3.1 48 | 49 | 1.6 50 | 1.6 51 | 52 | 53 | 54 | org.apache.maven.plugins 55 | maven-source-plugin 56 | 2.1.2 57 | 58 | 59 | attach-sources 60 | jar 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /release/user-agent-detector.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pieroxy/java-user-agent-detection/110625e86fa68650fdd3881bcacb934d20fc40b8/release/user-agent-detector.jar -------------------------------------------------------------------------------- /src/main/java/net/pieroxy/ua/detection/Bot.java: -------------------------------------------------------------------------------- 1 | package net.pieroxy.ua.detection; 2 | import java.io.*; 3 | import java.util.*; 4 | /** 5 | * Describe a bot, which is a program that access sites automatically rather than a human browsing the web. 6 | */ 7 | public class Bot { 8 | private Brand vendor; 9 | private BotFamily family; 10 | private String description; 11 | private String version; 12 | private String url; 13 | 14 | /** 15 | * This is the most detailed constructor of the Bot object. You can specifiy all of its fields. 16 | * @param _brand The vendor of this bot. 17 | * @param _family The family of this bot. 18 | * @param _description The text description of this bot. 19 | * @param _version The version of this bot. 20 | * @param _url The url describing this bot. 21 | */ 22 | public Bot(Brand _brand, BotFamily _family, String _description, String _version, String _url) { 23 | this(_brand,_family,_description,_version); 24 | url = _url==null ? "" : _url; 25 | } 26 | 27 | /** 28 | * This constructor of the Bot object does not specify the url of the bot, initializing it to an empty string. 29 | * @param _brand The vendor of this bot. 30 | * @param _family The family of this bot. 31 | * @param _description The text description of this bot. 32 | * @param _version The version of this bot. 33 | */ 34 | public Bot(Brand _brand, BotFamily _family, String _description, String _version) { 35 | family = _family; 36 | description = _description; 37 | version = _version; 38 | vendor = _brand; 39 | url = ""; 40 | } 41 | public boolean equals(Object o) { 42 | if (o == null) return false; 43 | if (! (o instanceof Bot)) return false; 44 | Bot d = (Bot) o; 45 | if (d.vendor==null && vendor!=d.vendor) return false; 46 | if (d.getFamily()==null && family!=d.getFamily()) return false; 47 | if (d.description==null && description!=d.description) return false; 48 | if (d.version==null && version!=d.version) return false; 49 | if (d.url==null && url!=d.url) return false; 50 | return 51 | ( (d.getFamily()==null && family==null) || d.getFamily().equals(family) ) && 52 | ( (d.description==null && description==null) || d.description.equals(description) ) && 53 | ( (d.vendor==null && vendor==null) || d.vendor.equals(vendor) ) && 54 | ( (d.url==null && url==null) || d.url.equals(url) ) && 55 | ( (d.version==null && version==null) || d.version.equals(version) ); 56 | } 57 | public int hashCode() { 58 | int res = 0; 59 | if (family != null) { 60 | res *= 3; 61 | res += family.hashCode(); 62 | } 63 | if (version != null) { 64 | res *= 3; 65 | res += version.hashCode(); 66 | } 67 | if (vendor!= null) { 68 | res *= 3; 69 | res += vendor.hashCode(); 70 | } 71 | if (url!= null) { 72 | res *= 3; 73 | res += url.hashCode(); 74 | } 75 | if (description != null) { 76 | res *= 3; 77 | res += description.hashCode(); 78 | } 79 | return res; 80 | } 81 | 82 | /** 83 | * @return The vendor, usually the company that operates the bot, if known. 84 | */ 85 | public Brand getVendor() { 86 | return vendor; 87 | } 88 | /** @return The family of the bot. SPAMBOT, WEB_CRAWLER, ... */ 89 | public BotFamily getFamily() { 90 | return family; 91 | } 92 | /** @return The description of the bot. For example "Google Bot" or "Flipboard Proxy". */ 93 | public String getDescription() { 94 | return description; 95 | } 96 | /** @return The version number */ 97 | public String getVersion() { 98 | return version; 99 | } 100 | /** @return The URL the bot points at or a url describing the bot. */ 101 | public String getUrl() { 102 | return url; 103 | } 104 | /** @param u The URL the bot points at or a url describing the bot. */ 105 | public void setUrl(String u) { 106 | url = u; 107 | } 108 | 109 | } -------------------------------------------------------------------------------- /src/main/java/net/pieroxy/ua/detection/BotFamily.java: -------------------------------------------------------------------------------- 1 | package net.pieroxy.ua.detection; 2 | import java.io.*; 3 | import java.util.*; 4 | /** 5 | * Describes the family of the bot, its nature. 6 | */ 7 | public enum BotFamily { 8 | 9 | /** 10 | * A generic robot. 11 | */ 12 | ROBOT("Robot",false), 13 | /** 14 | * A spam bot. 15 | */ 16 | SPAMBOT("Spam bot",true ), 17 | /** 18 | * A robot that hides as a regular browser. There are considered to be nefarious (because they hide.) 19 | */ 20 | HIDDEN_BOT("Hidden bot",true ), 21 | /** 22 | * A robot used to crawl the web (Google's crawler, Bing's crawler, etc.) 23 | */ 24 | CRAWLER("Web Crawler",false ), 25 | /** 26 | * Feed aggregator (Feedly, ...) 27 | */ 28 | FEED_CRAWLER("Feed aggregator",false ), 29 | /** 30 | * Unknown type of bot 31 | */ 32 | UNKNOWN("",false), 33 | /** 34 | * This UserAgentDetectionResult does not depict a bot: 35 | */ 36 | NOT_A_BOT("Not a bot",false); 37 | 38 | private boolean nefarious; 39 | private String label; 40 | BotFamily(String _label, boolean _bad) { 41 | nefarious=_bad; 42 | label = _label; 43 | } 44 | 45 | /** 46 | * @return true if the bot is of the nefarious type. 47 | */ 48 | public boolean isNefarious() { 49 | return nefarious; 50 | } 51 | /** 52 | * @return the text-based description of this bot. 53 | */ 54 | public String getLabel() { 55 | return label; 56 | } 57 | } -------------------------------------------------------------------------------- /src/main/java/net/pieroxy/ua/detection/BotsHelper.java: -------------------------------------------------------------------------------- 1 | package net.pieroxy.ua.detection; 2 | import java.io.*; 3 | import java.util.*; 4 | class BotsHelper { 5 | 6 | public static String[] getGroups(String regexp, String ua, int ... groups) { 7 | java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(regexp); 8 | java.util.regex.Matcher m = pattern.matcher(ua); 9 | 10 | if (m.matches()) { 11 | String[]res = new String[groups.length]; 12 | for (int i=0 ; i hiddenBots; 22 | static private Map genericBotsBrandAndType; 23 | static private Map genericBotsLiteral; 24 | static private Bot genericBotBase = new Bot(Brand.OTHER, BotFamily.ROBOT, "", ""); 25 | static private GenericBot[]genericBotsPatterns = new GenericBot[] { 26 | new GenericBot("Mozilla/5\\.0 \\(compatible; ?([^\\);/]+)/([0-9\\.]+[a-z]?); ?(MirrorDetector; )?(\\+? ?https?://[^\\)]+)\\)( AppleWebKit/[0-9\\.]+)?(/[0-9\\.]+[a-z]?)?(/\\*)?", new int[]{1,2,4}, true), 27 | new GenericBot("Mozilla/5\\.0 \\(compatible; ([^\\);/]+)\\-([0-9\\.]+); (\\+? ?https?://[^\\)]+)\\)", new int[]{1,2,3}, true), 28 | new GenericBot("Mozilla/5\\.0 \\(compatible; ([^\\);/]+);? (\\+? ?https?://[^\\)]+)\\)", new int[]{1,0,2}, true), 29 | new GenericBot("([^\\(\\);/]+)/([0-9RC\\.]+) \\((\\+?https?://[^\\);]+)\\)( .*)?", new int[]{1,2,3}, true), 30 | new GenericBot("([^\\(\\);]+) \\((\\+?https?://[^\\);]+)\\)( .*)?", new int[]{1,0,2}, true), 31 | new GenericBot("([^\\(\\);/]+)/([0-9RC\\.]+) \\(([A-Za-z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\)( .*)?", new int[]{1,2,0}, true), 32 | new GenericBot("([^<>\\(\\);]+) \\(([A-Za-z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\)", new int[]{1,0,0}, true), 33 | }; 34 | private static Map mapCfNetworkOS; 35 | private static Map mapCfNetworkArchitecture; 36 | 37 | static { 38 | hiddenBots = new HashSet(); 39 | hiddenBots.add("Mozilla/4.0 (compatible; MSIE8.0; Windows NT 6.0) .NET CLR 2.0.50727)"); 40 | hiddenBots.add("Mozilla/0.6 Beta (Windows)"); 41 | hiddenBots.add("Mozilla/0.91 Beta (Windows)"); 42 | 43 | genericBotsLiteral = new HashMap(); 44 | genericBotsLiteral.put("AdnormCrawler www.adnorm.com/crawler", new Bot(Brand.OTHER, BotFamily.ROBOT, "AdnormCrawler", "")); 45 | 46 | genericBotsBrandAndType = new HashMap(); 47 | // Complicated 48 | genericBotsBrandAndType.put("YodaoBot", new Bot(Brand.NETEASE, BotFamily.CRAWLER, "Yodao Bot", "")); 49 | genericBotsBrandAndType.put("Exabot", new Bot(Brand.EXALEAD, BotFamily.CRAWLER, "Exalead crawler", "")); 50 | genericBotsBrandAndType.put("Baiduspider", new Bot(Brand.BAIDU, BotFamily.CRAWLER, "Baidu Web search", "")); 51 | 52 | // Other form 53 | genericBotsBrandAndType.put("bingbot", new Bot(Brand.MICROSOFT, BotFamily.CRAWLER, "Bing Bot", "")); 54 | 55 | // Cleaned up: 56 | genericBotsBrandAndType.put("CloudFlare-AlwaysOnline", new Bot(Brand.CLOUDFLARE, BotFamily.CRAWLER, "Always Online", "")); 57 | genericBotsBrandAndType.put("Cloudflare-AMP", new Bot(Brand.CLOUDFLARE, BotFamily.CRAWLER, "AMP Discovery Fetcher", "")); 58 | genericBotsBrandAndType.put("YodaoBot-Image", new Bot(Brand.NETEASE, BotFamily.CRAWLER, "Yodao Image Bot", "")); 59 | genericBotsBrandAndType.put("Googlebot", new Bot(Brand.GOOGLE, BotFamily.CRAWLER, "Google Bot", "")); 60 | genericBotsBrandAndType.put("Yahoo! Slurp", new Bot(Brand.YAHOO, BotFamily.CRAWLER, "Yahoo! Slurp", "")); 61 | genericBotsBrandAndType.put("YandexAntivirus", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", "")); 62 | genericBotsBrandAndType.put("YandexFavicons", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", "")); 63 | genericBotsBrandAndType.put("YandexMedia", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", "")); 64 | genericBotsBrandAndType.put("YandexImages", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", "")); 65 | genericBotsBrandAndType.put("YandexImageResizer", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", "")); 66 | genericBotsBrandAndType.put("YandexBot", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", "")); 67 | genericBotsBrandAndType.put("proximic", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Proximic Crawler", "")); 68 | genericBotsBrandAndType.put("Speedy Spider", new Bot(Brand.ENTIREWEB, BotFamily.CRAWLER, "Speedy Spider", "")); 69 | genericBotsBrandAndType.put("yoozBot", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Yooz Bot", "")); 70 | genericBotsBrandAndType.put("Lipperhey Link Explorer", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", "")); 71 | genericBotsBrandAndType.put("Lipperhey Site Explorer", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", "")); 72 | genericBotsBrandAndType.put("Lipperhey SEO Service", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", "")); 73 | genericBotsBrandAndType.put("Lipperhey-Kaus-Australis", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", "")); 74 | genericBotsBrandAndType.put("Exabot-Images", new Bot(Brand.EXALEAD, BotFamily.CRAWLER, "Exalead crawler", "")); 75 | genericBotsBrandAndType.put("MegaIndex.ru", new Bot(Brand.MEGAINDEX, BotFamily.ROBOT, "MegaIndex.ru crawler", "")); 76 | genericBotsBrandAndType.put("spbot", new Bot(Brand.ENTIREWEB, BotFamily.CRAWLER, "SEO Profiler", "")); 77 | genericBotsBrandAndType.put("WBSearchBot", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Ware Bay Search Crawler", "")); 78 | genericBotsBrandAndType.put("BLEXBot", new Bot(Brand.OTHER, BotFamily.ROBOT, "BLEX Bot", "")); 79 | genericBotsBrandAndType.put("meanpathbot", new Bot(Brand.MEANPATH, BotFamily.ROBOT, "meanpath", "")); 80 | genericBotsBrandAndType.put("DuckDuckGo-Favicons-Bot", new Bot(Brand.DUCKDUCKGO, BotFamily.ROBOT, "Favicons bot", "")); 81 | genericBotsBrandAndType.put("DomainTunoCrawler", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Domain Tuno Crawler", "")); 82 | genericBotsBrandAndType.put("SeznamBot", new Bot(Brand.SEZNAM, BotFamily.CRAWLER, "SeznamBot crawler", "")); 83 | genericBotsBrandAndType.put("AhrefsBot", new Bot(Brand.OTHER, BotFamily.CRAWLER, "AhrefsBot", "")); 84 | genericBotsBrandAndType.put("oBot", new Bot(Brand.IBM, BotFamily.ROBOT, "oBot", "")); 85 | genericBotsBrandAndType.put("Google Desktop", new Bot(Brand.GOOGLE, BotFamily.CRAWLER, "Google Desktop Bot", "")); 86 | genericBotsBrandAndType.put("Google-Adwords-Instant-Mobile", new Bot(Brand.GOOGLE, BotFamily.ROBOT, "Google Landing page inspection bot", "")); 87 | genericBotsBrandAndType.put("Google-Structured-Data-Testing-Tool", new Bot(Brand.GOOGLE, BotFamily.ROBOT, "Google Structured Data Testing Tool", "")); 88 | 89 | genericBotsBrandAndType.put("ltx71 -", new Bot(Brand.OTHER,BotFamily.ROBOT,"ltx71","")); 90 | genericBotsBrandAndType.put("masscan", new Bot(Brand.UNKNOWN,BotFamily.CRAWLER,"Mass IP port scanner","")); 91 | genericBotsBrandAndType.put("Baiduspider+", new Bot(Brand.BAIDU,BotFamily.CRAWLER,"Baidu Web search","")); 92 | genericBotsBrandAndType.put("FeedlyBot", new Bot(Brand.OTHER,BotFamily.FEED_CRAWLER,"Feedly","")); 93 | genericBotsBrandAndType.put("Y!J-ASR/0.1 crawler", new Bot(Brand.YAHOO,BotFamily.CRAWLER,"Yahoo Japan","")); 94 | genericBotsBrandAndType.put("CCBot", new Bot(Brand.OTHER,BotFamily.CRAWLER,"Common Crawl","")); 95 | 96 | mapCfNetworkOS = new HashMap(); 97 | mapCfNetworkOS.put("1.1/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.2")); 98 | mapCfNetworkOS.put("1.2.1/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.3.2")); 99 | mapCfNetworkOS.put("1.2.2/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.3.9")); 100 | mapCfNetworkOS.put("1.2.6/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.3.9")); 101 | mapCfNetworkOS.put("128/8.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.0")); 102 | mapCfNetworkOS.put("128/8.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.1")); 103 | mapCfNetworkOS.put("128.2/8.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.2")); 104 | mapCfNetworkOS.put("129.5/8.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.3")); 105 | mapCfNetworkOS.put("129.9/8.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.4")); 106 | mapCfNetworkOS.put("129.9/8.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.5")); 107 | mapCfNetworkOS.put("129.10/8.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.4")); 108 | mapCfNetworkOS.put("129.10/8.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.5")); 109 | mapCfNetworkOS.put("129.13/8.6.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.6")); 110 | mapCfNetworkOS.put("129.16/8.7.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.7")); 111 | mapCfNetworkOS.put("129.18/8.8.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.8")); 112 | mapCfNetworkOS.put("129.20/8.9.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.9")); 113 | mapCfNetworkOS.put("129.21/8.10.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.10")); 114 | mapCfNetworkOS.put("129.22/8.11.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.11")); 115 | mapCfNetworkOS.put("217/9.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.0")); 116 | mapCfNetworkOS.put("220/9.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.1")); 117 | mapCfNetworkOS.put("221.2/9.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.2 dev")); 118 | mapCfNetworkOS.put("221.5/9.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.2")); 119 | mapCfNetworkOS.put("330/9.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.3")); 120 | mapCfNetworkOS.put("330.4/9.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.4")); 121 | mapCfNetworkOS.put("339.5/9.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.5")); 122 | mapCfNetworkOS.put("422.11/9.6.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.6")); 123 | mapCfNetworkOS.put("438.12/9.7.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.7")); 124 | mapCfNetworkOS.put("438.14/9.8.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.8")); 125 | mapCfNetworkOS.put("454.4/10.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.0")); 126 | mapCfNetworkOS.put("454.5/10.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.2")); 127 | mapCfNetworkOS.put("454.9.4/10.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.3")); 128 | mapCfNetworkOS.put("454.9.7/10.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.4")); 129 | mapCfNetworkOS.put("454.11.5/10.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.5")); 130 | mapCfNetworkOS.put("454.11.5/10.6.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.6")); 131 | mapCfNetworkOS.put("454.11.12/10.7.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.7")); 132 | mapCfNetworkOS.put("454.12.4/10.8.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.8")); 133 | mapCfNetworkOS.put("459/10.0.0d3", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "3.1.3")); 134 | mapCfNetworkOS.put("485.2/10.3.1", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4")); 135 | mapCfNetworkOS.put("485.10.2/10.3.1", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.1")); 136 | mapCfNetworkOS.put("485.12.7/10.4.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.2.1")); 137 | mapCfNetworkOS.put("485.12.30/10.4.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.2.8")); 138 | mapCfNetworkOS.put("485.13.9/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.3.*")); 139 | mapCfNetworkOS.put("520.0.13/11.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.1")); 140 | mapCfNetworkOS.put("520.2.5/11.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.2")); 141 | mapCfNetworkOS.put("520.3.2/11.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.3")); 142 | mapCfNetworkOS.put("520.4.3/11.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.4")); 143 | mapCfNetworkOS.put("520.5.1/11.4.2", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.5")); 144 | mapCfNetworkOS.put("548.0.3/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "5")); 145 | mapCfNetworkOS.put("548.0.4/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "5.0.1")); 146 | mapCfNetworkOS.put("548.1.4/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "5.1")); 147 | mapCfNetworkOS.put("596.0.1/12.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.0")); 148 | mapCfNetworkOS.put("596.1/12.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.1")); 149 | mapCfNetworkOS.put("596.2.3/12.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.2")); 150 | mapCfNetworkOS.put("596.3.3/12.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.3")); 151 | mapCfNetworkOS.put("596.4.3/12.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.4")); 152 | mapCfNetworkOS.put("596.5/12.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.5")); 153 | mapCfNetworkOS.put("596.6.2/12.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.5")); 154 | mapCfNetworkOS.put("602/13.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "6.0-b3")); 155 | mapCfNetworkOS.put("609/13.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "6.0.*")); 156 | mapCfNetworkOS.put("609.1.4/13.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "6.1.*")); 157 | mapCfNetworkOS.put("672.0.2/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.0.0-2")); 158 | mapCfNetworkOS.put("672.0.8/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.0.3-6")); 159 | mapCfNetworkOS.put("672.1.12/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1-b5")); 160 | mapCfNetworkOS.put("672.1.13/13.3.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1")); 161 | mapCfNetworkOS.put("672.1.13/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1")); 162 | mapCfNetworkOS.put("672.1.14/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1.1")); 163 | mapCfNetworkOS.put("672.1.15/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1.2")); 164 | mapCfNetworkOS.put("673.0.3/13.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.0")); 165 | mapCfNetworkOS.put("673.0.3/13.0.2", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.1")); 166 | mapCfNetworkOS.put("673.2.1/13.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.2")); 167 | mapCfNetworkOS.put("673.3/13.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3 beta")); 168 | mapCfNetworkOS.put("673.3/13.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3 beta")); 169 | mapCfNetworkOS.put("673.3/13.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3 beta")); 170 | mapCfNetworkOS.put("673.4/13.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3")); 171 | mapCfNetworkOS.put("673.4/13.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.4")); 172 | mapCfNetworkOS.put("673.4/13.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.5")); 173 | mapCfNetworkOS.put("673.5/13.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.*")); 174 | mapCfNetworkOS.put("696.0.2/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 175 | mapCfNetworkOS.put("699/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 176 | mapCfNetworkOS.put("703.1/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.0")); 177 | mapCfNetworkOS.put("703.1.6/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.0")); 178 | mapCfNetworkOS.put("708.1/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 179 | mapCfNetworkOS.put("709.1/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 180 | mapCfNetworkOS.put("707/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 181 | mapCfNetworkOS.put("709/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 182 | mapCfNetworkOS.put("711.0.6/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.0.0-2")); 183 | mapCfNetworkOS.put("711.1.12/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.1.0")); 184 | mapCfNetworkOS.put("711.1.16/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.1.1-3")); 185 | mapCfNetworkOS.put("714/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 186 | mapCfNetworkOS.put("718/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 187 | mapCfNetworkOS.put("720.0.4/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 188 | mapCfNetworkOS.put("720.0.7/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 189 | mapCfNetworkOS.put("720.0.8/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 190 | mapCfNetworkOS.put("720.0.9/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0")); 191 | mapCfNetworkOS.put("720.1.1/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.1")); 192 | mapCfNetworkOS.put("720.2.2/14.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.2")); 193 | mapCfNetworkOS.put("720.2.3/14.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.1")); 194 | mapCfNetworkOS.put("720.2.4/14.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.2")); 195 | mapCfNetworkOS.put("720.3.6/14.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.3")); 196 | mapCfNetworkOS.put("720.3.9/14.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.3")); 197 | 198 | mapCfNetworkArchitecture = new HashMap(); 199 | mapCfNetworkArchitecture.put("128/8.0.0", "PowerPC"); 200 | mapCfNetworkArchitecture.put("128/8.1.0", "PowerPC"); 201 | mapCfNetworkArchitecture.put("128.2/8.2.0", "PowerPC"); 202 | mapCfNetworkArchitecture.put("129.5/8.3.0", "PowerPC"); 203 | mapCfNetworkArchitecture.put("129.9/8.4.0", "PowerPC"); 204 | mapCfNetworkArchitecture.put("129.9/8.5.0", "PowerPC"); 205 | mapCfNetworkArchitecture.put("129.10/8.4.0", "Intel"); 206 | mapCfNetworkArchitecture.put("129.10/8.5.0", "Intel"); 207 | } 208 | 209 | static String getAndConsumeUrl(UserAgentContext context, MatchingRegion region, String pattern) { 210 | String url = sanitizeUrl(context.getcToken(pattern, MatchingType.CONTAINS, region)); 211 | return url; 212 | } 213 | 214 | static String sanitizeUrl(String url) { 215 | if (url==null) url=""; 216 | if (url.startsWith("+http")) url = url.substring(1); 217 | if (url.startsWith("+ http")) url = url.substring(2); 218 | if (url.endsWith(";")) url = url.substring(0, url.length()-1); 219 | if (url.contains("; ")) url = url.substring(0, url.indexOf("; ")); 220 | if (url.contains(", ")) url = url.substring(0, url.indexOf(", ")); 221 | return url.trim(); 222 | } 223 | 224 | static String consumeUrlAndMozilla(UserAgentContext context, String url) { 225 | UserAgentDetectionHelper.consumeMozilla(context); 226 | return getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, url); 227 | } 228 | 229 | static Bot getGenericBots(String userAgent, UserAgentContext context) { 230 | for (Map.Entry e : genericBotsLiteral.entrySet()) { 231 | if (userAgent.equals(e.getKey())) { 232 | context.consumeAllTokens(); 233 | return e.getValue(); 234 | } 235 | } 236 | 237 | for (GenericBot gb : genericBotsPatterns) { 238 | Bot b = getGenericBot(gb, userAgent); 239 | if (b!=null) { 240 | if (gb.discardAll) context.consumeAllTokens(); 241 | return b; 242 | } 243 | } 244 | return null; 245 | } 246 | static Bot getGenericBot(GenericBot gb, String userAgent) { 247 | java.util.regex.Matcher m = gb.pattern.matcher(userAgent); 248 | 249 | if (m.matches() && !userAgent.startsWith("Curl/PHP")) { 250 | String botName = m.group(gb.groups[0]); 251 | Bot baseBot = genericBotsBrandAndType.get(botName); 252 | String description = baseBot == null ? botName : baseBot.getDescription(); 253 | if (baseBot == null) baseBot = genericBotBase; 254 | String version = gb.groups[1] == 0 ? "" : m.group(gb.groups[1]); 255 | String url = gb.groups[2] == 0 ? "" : m.group(gb.groups[2]); 256 | return new Bot(baseBot.getVendor(), baseBot.getFamily(), description, version, sanitizeUrl(url)); 257 | } 258 | return null; 259 | } 260 | 261 | public static Bot getBot(UserAgentContext context) { 262 | int pos=0; 263 | String ver; 264 | String[]multi; 265 | 266 | Bot b = getGenericBots(context.getUA(), context) ; 267 | if (b != null) { 268 | return b; 269 | } 270 | 271 | if (hiddenBots.contains(context.getUA())) { 272 | context.consumeAllTokens(); 273 | return new Bot(Brand.UNKNOWN,BotFamily.HIDDEN_BOT,"",""); 274 | } else if (context.consume("commoncrawl.org/research//", MatchingType.BEGINS, MatchingRegion.REGULAR)) { 275 | ver = context.getcVersionAfterPattern("CCResearchBot/", MatchingType.BEGINS,MatchingRegion.BOTH); 276 | if (ver == null) ver=""; 277 | 278 | return new Bot(Brand.OTHER,BotFamily.CRAWLER,"Common Crawl",ver , "http://commoncrawl.org/faqs/"); 279 | } else if (context.getUA().equals("Qwantify/1.0")) { 280 | context.consumeAllTokens(); 281 | return new Bot(Brand.QWANT,BotFamily.CRAWLER,"Qwant crawler","1.0"); 282 | } else if (context.consume("via ggpht.com GoogleImageProxy", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // http://serverfault.com/questions/581857/apache-logs-flooded-with-connections-via-ggpht-com-googleimageproxy. 283 | return new Bot(Brand.GOOGLE,BotFamily.ROBOT,"Gmail image downloader proxy",""); 284 | } else if (context.consume("Google-StructuredDataTestingTool", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { 285 | return new Bot(Brand.GOOGLE,BotFamily.ROBOT,"Google Structured Data Testing Tool",consumeUrlAndMozilla(context, "+http://")); 286 | } else if (context.consume("ONDOWN3.2", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // Looks like a bot to me. 287 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"ONDOWN","3.2"); 288 | } else if (context.consume("Google Web Preview", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { 289 | context.consume("generic", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 290 | context.consume("iPhone", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 291 | return new Bot(Brand.GOOGLE, BotFamily.ROBOT,"Web Preview",""); 292 | } else if ((multi = context.getcNextTokens(new Matcher[] {new Matcher("BusinessBot:", MatchingType.EQUALS), 293 | new Matcher("^[A-Za-z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$", MatchingType.REGEXP) 294 | }, 295 | MatchingRegion.REGULAR)) != null) { 296 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"BusinessBot","", ""); 297 | } 298 | else if (context.consume("Contact: backend@getprismatic.com", MatchingType.EQUALS, MatchingRegion.PARENTHESIS) || 299 | (multi = context.getcNextTokens(new Matcher[] {new Matcher("Contact:", MatchingType.EQUALS), 300 | new Matcher("feedback@getprismatic.com", MatchingType.EQUALS) 301 | }, 302 | MatchingRegion.REGULAR)) != null) { 303 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"Get Prismatic Bot","", "http://getprismatic.com/"); 304 | } 305 | else if ((ver=context.getcVersionAfterPattern("Diffbot/", MatchingType.BEGINS,MatchingRegion.BOTH))!=null || 306 | (ver=context.getcVersionAfterPattern("diffbot/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null || 307 | context.contains("+http://www.diffbot.com", MatchingType.BEGINS,MatchingRegion.PARENTHESIS)) { 308 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"Diffbot ", ver==null?"":ver, consumeUrlAndMozilla(context, "http://")); 309 | } else if ((ver=context.getcVersionAfterPattern("GWPImages/", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) { 310 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"GWPImages ", ver, consumeUrlAndMozilla(context, "http://")); 311 | } else if ((ver=context.getcVersionAfterPattern("LSSRocketCrawler/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null) { 312 | context.consume("LightspeedSystems", MatchingType.EQUALS, MatchingRegion.REGULAR); 313 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"LSSRocketCrawler ", ver); 314 | } else if ((ver=context.getcVersionAfterPattern("OrangeBot/", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) { 315 | context.consume("[0-9a-zA-Z\\.]+@[0-9a-zA-Z\\.]+", MatchingType.REGEXP, MatchingRegion.PARENTHESIS); 316 | return new Bot(Brand.ORANGE, BotFamily.CRAWLER,"Orange Bot ", ver, consumeUrlAndMozilla(context, "http://")); 317 | } else if ((ver=context.getcVersionAfterPattern("del.icio.us-thumbnails/", MatchingType.BEGINS,MatchingRegion.BOTH))!=null) { 318 | return new Bot(Brand.DELICIOUS, BotFamily.ROBOT,"Thumbnails crawler ", ver); 319 | } else if ((ver=context.getcVersionAfterPattern("EvoHtmlToPdf/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null) { 320 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"EvoHtmlToPdf",ver); 321 | } else if ((ver=context.getcVersionAfterPattern("PhantomJS/", MatchingType.BEGINS,MatchingRegion.REGULAR,2))!=null) { 322 | if (context.consume("development", MatchingType.EQUALS,MatchingRegion.PARENTHESIS)) { 323 | ver += " dev"; 324 | } 325 | context.consume("Unknown", MatchingType.EQUALS,MatchingRegion.PARENTHESIS); 326 | return new Bot(Brand.OPENSOURCE_COMMUNITY,BotFamily.ROBOT,"PhantomJS", ver); 327 | } else if (context.consume("theoldreader.com", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { 328 | context.consume("feed-id=", MatchingType.BEGINS, MatchingRegion.PARENTHESIS); 329 | context.consume("[0-9]+ subscribers", MatchingType.REGEXP, MatchingRegion.PARENTHESIS); 330 | consumeUrlAndMozilla(context, "http://"); 331 | return new Bot(Brand.GOOGLE,BotFamily.FEED_CRAWLER,"RSS Feed Fetcher","","http://theoldreader.com/"); 332 | } else if (context.consume("Feedfetcher-Google;", MatchingType.EQUALS, MatchingRegion.REGULAR)) { 333 | context.consume("feed-id=", MatchingType.BEGINS, MatchingRegion.PARENTHESIS); 334 | context.consume("[0-9]+ subscribers", MatchingType.REGEXP, MatchingRegion.PARENTHESIS); 335 | return new Bot(Brand.GOOGLE,BotFamily.FEED_CRAWLER,"RSS Feed Fetcher","", getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "+http://www.google")); 336 | } else if (context.consume("Porkbun/Mustache", MatchingType.EQUALS, MatchingRegion.REGULAR)) { 337 | context.consume(".*@porkbun.com", MatchingType.REGEXP, MatchingRegion.PARENTHESIS); 338 | context.consume("Website Analysis", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 339 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"Porkbun Website Analysis","", getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://")); 340 | } else if (context.consume("yacybot", MatchingType.EQUALS, MatchingRegion.REGULAR)) { 341 | context.consume("freeworld/global", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 342 | context.consume("yacy.net", MatchingType.EQUALS, MatchingRegion.REGULAR); 343 | return new Bot(Brand.YACI,BotFamily.CRAWLER,"Yacy bot","", getAndConsumeUrl(context, MatchingRegion.REGULAR, "http://")); 344 | } else if (context.consume("125LA", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // Will look for login forms and upload forms 345 | context.consume("Mozilla/4.0", MatchingType.EQUALS, MatchingRegion.REGULAR); 346 | context.consume("compatible", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 347 | context.consume("MSIE 9.0", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 348 | return new Bot(Brand.OTHER,BotFamily.SPAMBOT,"Unknown bot",""); 349 | } else if ((ver = context.getcVersionAfterPattern("AvantGo ", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) { 350 | context.consume("Mozilla/", MatchingType.BEGINS, MatchingRegion.REGULAR); 351 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"AvantGo", ver); 352 | } else if ((ver = context.getcVersionAfterPattern("InfegyAtlas/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) { 353 | context.consume("Linux", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 354 | consumeUrlAndMozilla(context, "@"); 355 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"InfegyAtlas", ver, "http://infegy.com"); 356 | } else if ((ver = context.getcVersionAfterPattern("Twitterbot/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) { 357 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"Twitterbot", ver); 358 | } else if ((ver = context.getcVersionAfterPattern("BingPreview/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) { 359 | return new Bot(Brand.MICROSOFT,BotFamily.ROBOT,"Bing Web Preview", ver); 360 | } else if ((ver = context.getcVersionAfterPattern("LinkScan/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) { 361 | return new Bot(Brand.ELSOP,BotFamily.ROBOT,"LinkScan", ver); 362 | } else if ((ver = context.getcVersionAfterPattern("Fever/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) { 363 | context.consume("Allow like Gecko", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 364 | context.consume("Feed Parser", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 365 | return new Bot(Brand.UNKNOWN,BotFamily.FEED_CRAWLER,"Feed A Fever", ver, getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://")); 366 | } else if (context.consume("NetShelter ContentScan(, contact [a-zA-Z0-9\\.]+@[a-zA-Z0-9\\.]+ for information)?", MatchingType.REGEXP, MatchingRegion.PARENTHESIS)) { 367 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"NetShelter ContentScan", ""); 368 | } else if ((ver = context.getcVersionAfterPattern("SimplePie/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) { 369 | context.consume("Build/", MatchingType.BEGINS, MatchingRegion.REGULAR); 370 | context.consume("Allow like Gecko", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 371 | context.consume("Feed Parser", MatchingType.EQUALS, MatchingRegion.PARENTHESIS); 372 | return new Bot(Brand.UNKNOWN,BotFamily.FEED_CRAWLER,"SimplePie", ver, getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://")); 373 | } else if ((ver = context.getcVersionAfterPattern("Qwantify/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) { 374 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"Qwantify Crawler", ver, consumeUrlAndMozilla(context, "https://")); 375 | } else if ((ver = context.getcVersionAfterPattern("PageAnalyzer/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) { 376 | return new Bot(Brand.UNKNOWN,BotFamily.CRAWLER,"PageAnalyzer", ver, consumeUrlAndMozilla(context, "http://")); 377 | } else if ((ver = context.getcVersionAfterPattern("Pagespeed/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) { 378 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"Pagespeed feed fetcher", ver, consumeUrlAndMozilla(context, "http://")); 379 | } else if ((ver = context.getcVersionAfterPattern("ClearBot/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) { 380 | return new Bot(Brand.CLEARSWIFT,BotFamily.ROBOT,"ClearBot crawler", ver, getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://")); 381 | } else if ((ver = context.getcVersionAfterPattern("Mail.RU_Bot/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) { 382 | return new Bot(Brand.MAILRU,BotFamily.CRAWLER,"Mail.ru crawler", ver, consumeUrlAndMozilla(context, "http://go.mail.ru")); 383 | } else if ((ver = context.getcVersionAfterPattern("MJ12bot/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) { 384 | return new Bot(Brand.MAJESTIC12,BotFamily.CRAWLER,"Majestic 12", ver, consumeUrlAndMozilla(context, "http://")); 385 | } else if ((ver = context.getcVersionAfterPattern("GigablastOpenSource/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) { 386 | return new Bot(Brand.OTHER,BotFamily.CRAWLER,"GigaBlast Crawler", ver); 387 | } else if (context.getUA().equals("NetLyzer FastProbe")) { 388 | context.consumeAllTokens(); 389 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"NetLyzer FastProbe", ""); 390 | } else if (context.getUA().equals("NerdyBot")) { 391 | context.consumeAllTokens(); 392 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"Nerdy Bot", "", "http://nerdybot.com"); 393 | } else if (context.getUA().equals("PHPCrawl")) { 394 | context.consumeAllTokens(); 395 | return new Bot(Brand.OPENSOURCE_COMMUNITY,BotFamily.ROBOT,"PHP Crawl", "", "http://phpcrawl.cuab.de"); 396 | } else if (context.getUA().equals("updown_tester")) { 397 | context.consume("updown_tester", MatchingType.EQUALS, MatchingRegion.REGULAR); 398 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"Unknown (updown_tester)", ""); 399 | } else if (context.getUA().equals("YisouSpider")) { 400 | context.consume("YisouSpider", MatchingType.EQUALS, MatchingRegion.REGULAR); 401 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"YisouSpider", ""); 402 | } else if (context.getUA().equals("RSSGraffiti")) { 403 | context.consume("RSSGraffiti", MatchingType.EQUALS, MatchingRegion.REGULAR); 404 | return new Bot(Brand.SCRIBBLE,BotFamily.ROBOT,"RSS Graffiti", ""); 405 | } else if (context.getUA().startsWith("WordPress/")) { 406 | ver = context.getcVersionAfterPattern("WordPress/", MatchingType.BEGINS, MatchingRegion.REGULAR); 407 | return new Bot(Brand.OTHER, BotFamily.ROBOT, "WordPress", ver, getAndConsumeUrl(context, MatchingRegion.REGULAR, "http://")); 408 | } else if (context.getUA().contains("TuringOS; Turing Machine")) { 409 | // No idea. This thing only hit a few URLs and doesn't render them (no JS/CSS/IMGs)... 410 | context.consumeAllTokens(); 411 | return new Bot(Brand.OTHER, BotFamily.ROBOT, "Turing", ""); 412 | } else if (context.getUA().indexOf("-1 && context.getUA().endsWith(" (Windows NT 5.1; U; en) Presto/2.10.229 Version/11.60")) { 413 | context.consumeAllTokens(); 414 | return new Bot(Brand.UNKNOWN, BotFamily.SPAMBOT, "Link reference bombing", ""); 415 | } else if (context.getLCUA().matches(".*