├── .gitignore
├── README.md
├── build.xml
├── module-info.java
├── pom.xml
├── release
└── user-agent-detector.jar
├── src
└── main
│ └── java
│ └── net
│ └── pieroxy
│ └── ua
│ ├── detection
│ ├── Bot.java
│ ├── BotFamily.java
│ ├── BotsHelper.java
│ ├── Brand.java
│ ├── Browser.java
│ ├── BrowserFamily.java
│ ├── Country.java
│ ├── Device.java
│ ├── DeviceType.java
│ ├── Extension.java
│ ├── GeckoSpinoff.java
│ ├── GenericBot.java
│ ├── IUserAgentDetector.java
│ ├── Language.java
│ ├── Locale.java
│ ├── LocaleHelper.java
│ ├── Matcher.java
│ ├── MatchingRegion.java
│ ├── MatchingType.java
│ ├── OS.java
│ ├── OSFamily.java
│ ├── RenderingEngine.java
│ ├── RenderingEngineFamily.java
│ ├── StringUtils.java
│ ├── UserAgentContext.java
│ ├── UserAgentDetectionHelper.java
│ ├── UserAgentDetectionResult.java
│ ├── UserAgentDetector.java
│ └── VersionedObject.java
│ └── tooling
│ └── UserAgentTester.java
├── test-data
└── database.gz
└── test.sh
/.gitignore:
--------------------------------------------------------------------------------
1 | build/*
2 | release/tmp
3 | release/doc
4 | target/*
5 | .idea
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | java-user-agent-detection
2 | =========================
3 |
4 | Some code to deduce an OS/Platform/Browser out of a user-agent string, compatible with Java 1.6 or newer.
5 |
6 | ## Useful links
7 |
8 | * You will find all you need [on the homepage](http://pieroxy.net/user-agent/lib.html).
9 | * You can [browse the database](http://pieroxy.net/user-agent/db.html) - although it is a little rough right now.
10 | * You can [test any user-agent](http://pieroxy.net/user-agent/).
11 | * You can [browse the javadoc API](http://pieroxy.net/user-agent/doc/).
12 | * [Leave a comment on the blog](http://pieroxy.net/blog/2014/11/18/user_agent_detection_in_java.html).
13 |
14 | ## How to contribute
15 |
16 | I will not merge pull requests, as most of this code and tests are generated. What you can do is [open issues](https://github.com/pieroxy/java-user-agent-detection/issues) if you find problems or would like to suggest improvements. You can also leave a message [on the blog](http://pieroxy.net/blog/2014/11/18/user_agent_detection_in_java.html) if you just want to ask something.
17 |
18 | ## How to build with ant
19 |
20 | Building with ant is as simple as installing ant and typing ```ant```.
21 |
22 | Your jar can then be found in the ```build/``` directory and the javadoc in ```build/doc/index.html```.
23 |
24 |
25 | ## How to build with Maven
26 |
27 | Building with ant is as simple as installing Maven and typing ```mvn release```
28 |
29 | Your jar can then be found in the ```target/``` directory.
30 |
--------------------------------------------------------------------------------
/build.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
--------------------------------------------------------------------------------
/module-info.java:
--------------------------------------------------------------------------------
1 | module net.pieroxy.ua.detection {
2 | exports net.pieroxy.ua.detection;
3 | }
4 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | net.pieroxy.ua
5 | user_agent_detector
6 | jar
7 | 2015-10-31-SNAPSHOT
8 | User Agent Detection Library
9 | A library to extract bot, browser, device and OS from a User-Agent in Java 6 or newer.
10 | https://github.com/pieroxy/java-user-agent-detection/
11 |
12 |
13 |
14 | WTFPL
15 | http://www.wtfpl.net/
16 |
17 |
18 |
19 |
20 |
21 | Pierre Grimaud
22 | pieroxy most likely at pieroxy dot net
23 |
24 |
25 |
26 |
27 | scm:git:git://github.com/pieroxy/java-user-agent-detection.git
28 | scm:git:ssh://github.com/pieroxy/java-user-agent-detection.git
29 | https://github.com/pieroxy/java-user-agent-detection/tree/master
30 |
31 |
32 |
33 | https://github.com/pieroxy/java-user-agent-detection/issues
34 | GitHub Issues
35 |
36 |
37 |
38 |
39 |
40 | UTF-8
41 |
42 |
43 |
44 |
45 | org.apache.maven.plugins
46 | maven-compiler-plugin
47 | 3.1
48 |
49 | 1.6
50 | 1.6
51 |
52 |
53 |
54 | org.apache.maven.plugins
55 | maven-source-plugin
56 | 2.1.2
57 |
58 |
59 | attach-sources
60 | jar
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/release/user-agent-detector.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pieroxy/java-user-agent-detection/110625e86fa68650fdd3881bcacb934d20fc40b8/release/user-agent-detector.jar
--------------------------------------------------------------------------------
/src/main/java/net/pieroxy/ua/detection/Bot.java:
--------------------------------------------------------------------------------
1 | package net.pieroxy.ua.detection;
2 | import java.io.*;
3 | import java.util.*;
4 | /**
5 | * Describe a bot, which is a program that access sites automatically rather than a human browsing the web.
6 | */
7 | public class Bot {
8 | private Brand vendor;
9 | private BotFamily family;
10 | private String description;
11 | private String version;
12 | private String url;
13 |
14 | /**
15 | * This is the most detailed constructor of the Bot object. You can specifiy all of its fields.
16 | * @param _brand The vendor of this bot.
17 | * @param _family The family of this bot.
18 | * @param _description The text description of this bot.
19 | * @param _version The version of this bot.
20 | * @param _url The url describing this bot.
21 | */
22 | public Bot(Brand _brand, BotFamily _family, String _description, String _version, String _url) {
23 | this(_brand,_family,_description,_version);
24 | url = _url==null ? "" : _url;
25 | }
26 |
27 | /**
28 | * This constructor of the Bot object does not specify the url of the bot, initializing it to an empty string.
29 | * @param _brand The vendor of this bot.
30 | * @param _family The family of this bot.
31 | * @param _description The text description of this bot.
32 | * @param _version The version of this bot.
33 | */
34 | public Bot(Brand _brand, BotFamily _family, String _description, String _version) {
35 | family = _family;
36 | description = _description;
37 | version = _version;
38 | vendor = _brand;
39 | url = "";
40 | }
41 | public boolean equals(Object o) {
42 | if (o == null) return false;
43 | if (! (o instanceof Bot)) return false;
44 | Bot d = (Bot) o;
45 | if (d.vendor==null && vendor!=d.vendor) return false;
46 | if (d.getFamily()==null && family!=d.getFamily()) return false;
47 | if (d.description==null && description!=d.description) return false;
48 | if (d.version==null && version!=d.version) return false;
49 | if (d.url==null && url!=d.url) return false;
50 | return
51 | ( (d.getFamily()==null && family==null) || d.getFamily().equals(family) ) &&
52 | ( (d.description==null && description==null) || d.description.equals(description) ) &&
53 | ( (d.vendor==null && vendor==null) || d.vendor.equals(vendor) ) &&
54 | ( (d.url==null && url==null) || d.url.equals(url) ) &&
55 | ( (d.version==null && version==null) || d.version.equals(version) );
56 | }
57 | public int hashCode() {
58 | int res = 0;
59 | if (family != null) {
60 | res *= 3;
61 | res += family.hashCode();
62 | }
63 | if (version != null) {
64 | res *= 3;
65 | res += version.hashCode();
66 | }
67 | if (vendor!= null) {
68 | res *= 3;
69 | res += vendor.hashCode();
70 | }
71 | if (url!= null) {
72 | res *= 3;
73 | res += url.hashCode();
74 | }
75 | if (description != null) {
76 | res *= 3;
77 | res += description.hashCode();
78 | }
79 | return res;
80 | }
81 |
82 | /**
83 | * @return The vendor, usually the company that operates the bot, if known.
84 | */
85 | public Brand getVendor() {
86 | return vendor;
87 | }
88 | /** @return The family of the bot. SPAMBOT, WEB_CRAWLER, ... */
89 | public BotFamily getFamily() {
90 | return family;
91 | }
92 | /** @return The description of the bot. For example "Google Bot" or "Flipboard Proxy". */
93 | public String getDescription() {
94 | return description;
95 | }
96 | /** @return The version number */
97 | public String getVersion() {
98 | return version;
99 | }
100 | /** @return The URL the bot points at or a url describing the bot. */
101 | public String getUrl() {
102 | return url;
103 | }
104 | /** @param u The URL the bot points at or a url describing the bot. */
105 | public void setUrl(String u) {
106 | url = u;
107 | }
108 |
109 | }
--------------------------------------------------------------------------------
/src/main/java/net/pieroxy/ua/detection/BotFamily.java:
--------------------------------------------------------------------------------
1 | package net.pieroxy.ua.detection;
2 | import java.io.*;
3 | import java.util.*;
4 | /**
5 | * Describes the family of the bot, its nature.
6 | */
7 | public enum BotFamily {
8 |
9 | /**
10 | * A generic robot.
11 | */
12 | ROBOT("Robot",false),
13 | /**
14 | * A spam bot.
15 | */
16 | SPAMBOT("Spam bot",true ),
17 | /**
18 | * A robot that hides as a regular browser. There are considered to be nefarious (because they hide.)
19 | */
20 | HIDDEN_BOT("Hidden bot",true ),
21 | /**
22 | * A robot used to crawl the web (Google's crawler, Bing's crawler, etc.)
23 | */
24 | CRAWLER("Web Crawler",false ),
25 | /**
26 | * Feed aggregator (Feedly, ...)
27 | */
28 | FEED_CRAWLER("Feed aggregator",false ),
29 | /**
30 | * Unknown type of bot
31 | */
32 | UNKNOWN("",false),
33 | /**
34 | * This UserAgentDetectionResult does not depict a bot:
35 | */
36 | NOT_A_BOT("Not a bot",false);
37 |
38 | private boolean nefarious;
39 | private String label;
40 | BotFamily(String _label, boolean _bad) {
41 | nefarious=_bad;
42 | label = _label;
43 | }
44 |
45 | /**
46 | * @return true if the bot is of the nefarious type.
47 | */
48 | public boolean isNefarious() {
49 | return nefarious;
50 | }
51 | /**
52 | * @return the text-based description of this bot.
53 | */
54 | public String getLabel() {
55 | return label;
56 | }
57 | }
--------------------------------------------------------------------------------
/src/main/java/net/pieroxy/ua/detection/BotsHelper.java:
--------------------------------------------------------------------------------
1 | package net.pieroxy.ua.detection;
2 | import java.io.*;
3 | import java.util.*;
4 | class BotsHelper {
5 |
6 | public static String[] getGroups(String regexp, String ua, int ... groups) {
7 | java.util.regex.Pattern pattern = java.util.regex.Pattern.compile(regexp);
8 | java.util.regex.Matcher m = pattern.matcher(ua);
9 |
10 | if (m.matches()) {
11 | String[]res = new String[groups.length];
12 | for (int i=0 ; i hiddenBots;
22 | static private Map genericBotsBrandAndType;
23 | static private Map genericBotsLiteral;
24 | static private Bot genericBotBase = new Bot(Brand.OTHER, BotFamily.ROBOT, "", "");
25 | static private GenericBot[]genericBotsPatterns = new GenericBot[] {
26 | new GenericBot("Mozilla/5\\.0 \\(compatible; ?([^\\);/]+)/([0-9\\.]+[a-z]?); ?(MirrorDetector; )?(\\+? ?https?://[^\\)]+)\\)( AppleWebKit/[0-9\\.]+)?(/[0-9\\.]+[a-z]?)?(/\\*)?", new int[]{1,2,4}, true),
27 | new GenericBot("Mozilla/5\\.0 \\(compatible; ([^\\);/]+)\\-([0-9\\.]+); (\\+? ?https?://[^\\)]+)\\)", new int[]{1,2,3}, true),
28 | new GenericBot("Mozilla/5\\.0 \\(compatible; ([^\\);/]+);? (\\+? ?https?://[^\\)]+)\\)", new int[]{1,0,2}, true),
29 | new GenericBot("([^\\(\\);/]+)/([0-9RC\\.]+) \\((\\+?https?://[^\\);]+)\\)( .*)?", new int[]{1,2,3}, true),
30 | new GenericBot("([^\\(\\);]+) \\((\\+?https?://[^\\);]+)\\)( .*)?", new int[]{1,0,2}, true),
31 | new GenericBot("([^\\(\\);/]+)/([0-9RC\\.]+) \\(([A-Za-z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\)( .*)?", new int[]{1,2,0}, true),
32 | new GenericBot("([^<>\\(\\);]+) \\(([A-Za-z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,})\\)", new int[]{1,0,0}, true),
33 | };
34 | private static Map mapCfNetworkOS;
35 | private static Map mapCfNetworkArchitecture;
36 |
37 | static {
38 | hiddenBots = new HashSet();
39 | hiddenBots.add("Mozilla/4.0 (compatible; MSIE8.0; Windows NT 6.0) .NET CLR 2.0.50727)");
40 | hiddenBots.add("Mozilla/0.6 Beta (Windows)");
41 | hiddenBots.add("Mozilla/0.91 Beta (Windows)");
42 |
43 | genericBotsLiteral = new HashMap();
44 | genericBotsLiteral.put("AdnormCrawler www.adnorm.com/crawler", new Bot(Brand.OTHER, BotFamily.ROBOT, "AdnormCrawler", ""));
45 |
46 | genericBotsBrandAndType = new HashMap();
47 | // Complicated
48 | genericBotsBrandAndType.put("YodaoBot", new Bot(Brand.NETEASE, BotFamily.CRAWLER, "Yodao Bot", ""));
49 | genericBotsBrandAndType.put("Exabot", new Bot(Brand.EXALEAD, BotFamily.CRAWLER, "Exalead crawler", ""));
50 | genericBotsBrandAndType.put("Baiduspider", new Bot(Brand.BAIDU, BotFamily.CRAWLER, "Baidu Web search", ""));
51 |
52 | // Other form
53 | genericBotsBrandAndType.put("bingbot", new Bot(Brand.MICROSOFT, BotFamily.CRAWLER, "Bing Bot", ""));
54 |
55 | // Cleaned up:
56 | genericBotsBrandAndType.put("CloudFlare-AlwaysOnline", new Bot(Brand.CLOUDFLARE, BotFamily.CRAWLER, "Always Online", ""));
57 | genericBotsBrandAndType.put("Cloudflare-AMP", new Bot(Brand.CLOUDFLARE, BotFamily.CRAWLER, "AMP Discovery Fetcher", ""));
58 | genericBotsBrandAndType.put("YodaoBot-Image", new Bot(Brand.NETEASE, BotFamily.CRAWLER, "Yodao Image Bot", ""));
59 | genericBotsBrandAndType.put("Googlebot", new Bot(Brand.GOOGLE, BotFamily.CRAWLER, "Google Bot", ""));
60 | genericBotsBrandAndType.put("Yahoo! Slurp", new Bot(Brand.YAHOO, BotFamily.CRAWLER, "Yahoo! Slurp", ""));
61 | genericBotsBrandAndType.put("YandexAntivirus", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", ""));
62 | genericBotsBrandAndType.put("YandexFavicons", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", ""));
63 | genericBotsBrandAndType.put("YandexMedia", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", ""));
64 | genericBotsBrandAndType.put("YandexImages", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", ""));
65 | genericBotsBrandAndType.put("YandexImageResizer", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", ""));
66 | genericBotsBrandAndType.put("YandexBot", new Bot(Brand.YANDEX, BotFamily.CRAWLER, "Yandex Crawler", ""));
67 | genericBotsBrandAndType.put("proximic", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Proximic Crawler", ""));
68 | genericBotsBrandAndType.put("Speedy Spider", new Bot(Brand.ENTIREWEB, BotFamily.CRAWLER, "Speedy Spider", ""));
69 | genericBotsBrandAndType.put("yoozBot", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Yooz Bot", ""));
70 | genericBotsBrandAndType.put("Lipperhey Link Explorer", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", ""));
71 | genericBotsBrandAndType.put("Lipperhey Site Explorer", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", ""));
72 | genericBotsBrandAndType.put("Lipperhey SEO Service", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", ""));
73 | genericBotsBrandAndType.put("Lipperhey-Kaus-Australis", new Bot(Brand.OTHER, BotFamily.ROBOT, "Lipperhey", ""));
74 | genericBotsBrandAndType.put("Exabot-Images", new Bot(Brand.EXALEAD, BotFamily.CRAWLER, "Exalead crawler", ""));
75 | genericBotsBrandAndType.put("MegaIndex.ru", new Bot(Brand.MEGAINDEX, BotFamily.ROBOT, "MegaIndex.ru crawler", ""));
76 | genericBotsBrandAndType.put("spbot", new Bot(Brand.ENTIREWEB, BotFamily.CRAWLER, "SEO Profiler", ""));
77 | genericBotsBrandAndType.put("WBSearchBot", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Ware Bay Search Crawler", ""));
78 | genericBotsBrandAndType.put("BLEXBot", new Bot(Brand.OTHER, BotFamily.ROBOT, "BLEX Bot", ""));
79 | genericBotsBrandAndType.put("meanpathbot", new Bot(Brand.MEANPATH, BotFamily.ROBOT, "meanpath", ""));
80 | genericBotsBrandAndType.put("DuckDuckGo-Favicons-Bot", new Bot(Brand.DUCKDUCKGO, BotFamily.ROBOT, "Favicons bot", ""));
81 | genericBotsBrandAndType.put("DomainTunoCrawler", new Bot(Brand.OTHER, BotFamily.CRAWLER, "Domain Tuno Crawler", ""));
82 | genericBotsBrandAndType.put("SeznamBot", new Bot(Brand.SEZNAM, BotFamily.CRAWLER, "SeznamBot crawler", ""));
83 | genericBotsBrandAndType.put("AhrefsBot", new Bot(Brand.OTHER, BotFamily.CRAWLER, "AhrefsBot", ""));
84 | genericBotsBrandAndType.put("oBot", new Bot(Brand.IBM, BotFamily.ROBOT, "oBot", ""));
85 | genericBotsBrandAndType.put("Google Desktop", new Bot(Brand.GOOGLE, BotFamily.CRAWLER, "Google Desktop Bot", ""));
86 | genericBotsBrandAndType.put("Google-Adwords-Instant-Mobile", new Bot(Brand.GOOGLE, BotFamily.ROBOT, "Google Landing page inspection bot", ""));
87 | genericBotsBrandAndType.put("Google-Structured-Data-Testing-Tool", new Bot(Brand.GOOGLE, BotFamily.ROBOT, "Google Structured Data Testing Tool", ""));
88 |
89 | genericBotsBrandAndType.put("ltx71 -", new Bot(Brand.OTHER,BotFamily.ROBOT,"ltx71",""));
90 | genericBotsBrandAndType.put("masscan", new Bot(Brand.UNKNOWN,BotFamily.CRAWLER,"Mass IP port scanner",""));
91 | genericBotsBrandAndType.put("Baiduspider+", new Bot(Brand.BAIDU,BotFamily.CRAWLER,"Baidu Web search",""));
92 | genericBotsBrandAndType.put("FeedlyBot", new Bot(Brand.OTHER,BotFamily.FEED_CRAWLER,"Feedly",""));
93 | genericBotsBrandAndType.put("Y!J-ASR/0.1 crawler", new Bot(Brand.YAHOO,BotFamily.CRAWLER,"Yahoo Japan",""));
94 | genericBotsBrandAndType.put("CCBot", new Bot(Brand.OTHER,BotFamily.CRAWLER,"Common Crawl",""));
95 |
96 | mapCfNetworkOS = new HashMap();
97 | mapCfNetworkOS.put("1.1/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.2"));
98 | mapCfNetworkOS.put("1.2.1/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.3.2"));
99 | mapCfNetworkOS.put("1.2.2/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.3.9"));
100 | mapCfNetworkOS.put("1.2.6/", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.3.9"));
101 | mapCfNetworkOS.put("128/8.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.0"));
102 | mapCfNetworkOS.put("128/8.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.1"));
103 | mapCfNetworkOS.put("128.2/8.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.2"));
104 | mapCfNetworkOS.put("129.5/8.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.3"));
105 | mapCfNetworkOS.put("129.9/8.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.4"));
106 | mapCfNetworkOS.put("129.9/8.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.5"));
107 | mapCfNetworkOS.put("129.10/8.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.4"));
108 | mapCfNetworkOS.put("129.10/8.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.5"));
109 | mapCfNetworkOS.put("129.13/8.6.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.6"));
110 | mapCfNetworkOS.put("129.16/8.7.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.7"));
111 | mapCfNetworkOS.put("129.18/8.8.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.8"));
112 | mapCfNetworkOS.put("129.20/8.9.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.9"));
113 | mapCfNetworkOS.put("129.21/8.10.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.10"));
114 | mapCfNetworkOS.put("129.22/8.11.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.4.11"));
115 | mapCfNetworkOS.put("217/9.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.0"));
116 | mapCfNetworkOS.put("220/9.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.1"));
117 | mapCfNetworkOS.put("221.2/9.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.2 dev"));
118 | mapCfNetworkOS.put("221.5/9.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.2"));
119 | mapCfNetworkOS.put("330/9.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.3"));
120 | mapCfNetworkOS.put("330.4/9.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.4"));
121 | mapCfNetworkOS.put("339.5/9.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.5"));
122 | mapCfNetworkOS.put("422.11/9.6.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.6"));
123 | mapCfNetworkOS.put("438.12/9.7.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.7"));
124 | mapCfNetworkOS.put("438.14/9.8.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.5.8"));
125 | mapCfNetworkOS.put("454.4/10.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.0"));
126 | mapCfNetworkOS.put("454.5/10.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.2"));
127 | mapCfNetworkOS.put("454.9.4/10.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.3"));
128 | mapCfNetworkOS.put("454.9.7/10.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.4"));
129 | mapCfNetworkOS.put("454.11.5/10.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.5"));
130 | mapCfNetworkOS.put("454.11.5/10.6.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.6"));
131 | mapCfNetworkOS.put("454.11.12/10.7.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.7"));
132 | mapCfNetworkOS.put("454.12.4/10.8.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.6.8"));
133 | mapCfNetworkOS.put("459/10.0.0d3", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "3.1.3"));
134 | mapCfNetworkOS.put("485.2/10.3.1", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4"));
135 | mapCfNetworkOS.put("485.10.2/10.3.1", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.1"));
136 | mapCfNetworkOS.put("485.12.7/10.4.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.2.1"));
137 | mapCfNetworkOS.put("485.12.30/10.4.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.2.8"));
138 | mapCfNetworkOS.put("485.13.9/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "4.3.*"));
139 | mapCfNetworkOS.put("520.0.13/11.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.1"));
140 | mapCfNetworkOS.put("520.2.5/11.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.2"));
141 | mapCfNetworkOS.put("520.3.2/11.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.3"));
142 | mapCfNetworkOS.put("520.4.3/11.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.4"));
143 | mapCfNetworkOS.put("520.5.1/11.4.2", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.7.5"));
144 | mapCfNetworkOS.put("548.0.3/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "5"));
145 | mapCfNetworkOS.put("548.0.4/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "5.0.1"));
146 | mapCfNetworkOS.put("548.1.4/11.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "5.1"));
147 | mapCfNetworkOS.put("596.0.1/12.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.0"));
148 | mapCfNetworkOS.put("596.1/12.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.1"));
149 | mapCfNetworkOS.put("596.2.3/12.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.2"));
150 | mapCfNetworkOS.put("596.3.3/12.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.3"));
151 | mapCfNetworkOS.put("596.4.3/12.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.4"));
152 | mapCfNetworkOS.put("596.5/12.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.5"));
153 | mapCfNetworkOS.put("596.6.2/12.5.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.8.5"));
154 | mapCfNetworkOS.put("602/13.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "6.0-b3"));
155 | mapCfNetworkOS.put("609/13.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "6.0.*"));
156 | mapCfNetworkOS.put("609.1.4/13.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "6.1.*"));
157 | mapCfNetworkOS.put("672.0.2/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.0.0-2"));
158 | mapCfNetworkOS.put("672.0.8/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.0.3-6"));
159 | mapCfNetworkOS.put("672.1.12/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1-b5"));
160 | mapCfNetworkOS.put("672.1.13/13.3.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1"));
161 | mapCfNetworkOS.put("672.1.13/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1"));
162 | mapCfNetworkOS.put("672.1.14/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1.1"));
163 | mapCfNetworkOS.put("672.1.15/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.1.2"));
164 | mapCfNetworkOS.put("673.0.3/13.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.0"));
165 | mapCfNetworkOS.put("673.0.3/13.0.2", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.1"));
166 | mapCfNetworkOS.put("673.2.1/13.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.2"));
167 | mapCfNetworkOS.put("673.3/13.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3 beta"));
168 | mapCfNetworkOS.put("673.3/13.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3 beta"));
169 | mapCfNetworkOS.put("673.3/13.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3 beta"));
170 | mapCfNetworkOS.put("673.4/13.2.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.3"));
171 | mapCfNetworkOS.put("673.4/13.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.4"));
172 | mapCfNetworkOS.put("673.4/13.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.5"));
173 | mapCfNetworkOS.put("673.5/13.4.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.9.*"));
174 | mapCfNetworkOS.put("696.0.2/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
175 | mapCfNetworkOS.put("699/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
176 | mapCfNetworkOS.put("703.1/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "7.0"));
177 | mapCfNetworkOS.put("703.1.6/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.0"));
178 | mapCfNetworkOS.put("708.1/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
179 | mapCfNetworkOS.put("709.1/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
180 | mapCfNetworkOS.put("707/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
181 | mapCfNetworkOS.put("709/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
182 | mapCfNetworkOS.put("711.0.6/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.0.0-2"));
183 | mapCfNetworkOS.put("711.1.12/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.1.0"));
184 | mapCfNetworkOS.put("711.1.16/14.0.0", new OS(Brand.APPLE, OSFamily.IOS, "iOS", "8.1.1-3"));
185 | mapCfNetworkOS.put("714/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
186 | mapCfNetworkOS.put("718/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
187 | mapCfNetworkOS.put("720.0.4/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
188 | mapCfNetworkOS.put("720.0.7/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
189 | mapCfNetworkOS.put("720.0.8/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
190 | mapCfNetworkOS.put("720.0.9/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.0"));
191 | mapCfNetworkOS.put("720.1.1/14.0.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.1"));
192 | mapCfNetworkOS.put("720.2.2/14.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.2"));
193 | mapCfNetworkOS.put("720.2.3/14.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.1"));
194 | mapCfNetworkOS.put("720.2.4/14.1.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.2"));
195 | mapCfNetworkOS.put("720.3.6/14.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.3"));
196 | mapCfNetworkOS.put("720.3.9/14.3.0", new OS(Brand.APPLE, OSFamily.MACOSX, "MacOSX", "10.10.3"));
197 |
198 | mapCfNetworkArchitecture = new HashMap();
199 | mapCfNetworkArchitecture.put("128/8.0.0", "PowerPC");
200 | mapCfNetworkArchitecture.put("128/8.1.0", "PowerPC");
201 | mapCfNetworkArchitecture.put("128.2/8.2.0", "PowerPC");
202 | mapCfNetworkArchitecture.put("129.5/8.3.0", "PowerPC");
203 | mapCfNetworkArchitecture.put("129.9/8.4.0", "PowerPC");
204 | mapCfNetworkArchitecture.put("129.9/8.5.0", "PowerPC");
205 | mapCfNetworkArchitecture.put("129.10/8.4.0", "Intel");
206 | mapCfNetworkArchitecture.put("129.10/8.5.0", "Intel");
207 | }
208 |
209 | static String getAndConsumeUrl(UserAgentContext context, MatchingRegion region, String pattern) {
210 | String url = sanitizeUrl(context.getcToken(pattern, MatchingType.CONTAINS, region));
211 | return url;
212 | }
213 |
214 | static String sanitizeUrl(String url) {
215 | if (url==null) url="";
216 | if (url.startsWith("+http")) url = url.substring(1);
217 | if (url.startsWith("+ http")) url = url.substring(2);
218 | if (url.endsWith(";")) url = url.substring(0, url.length()-1);
219 | if (url.contains("; ")) url = url.substring(0, url.indexOf("; "));
220 | if (url.contains(", ")) url = url.substring(0, url.indexOf(", "));
221 | return url.trim();
222 | }
223 |
224 | static String consumeUrlAndMozilla(UserAgentContext context, String url) {
225 | UserAgentDetectionHelper.consumeMozilla(context);
226 | return getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, url);
227 | }
228 |
229 | static Bot getGenericBots(String userAgent, UserAgentContext context) {
230 | for (Map.Entry e : genericBotsLiteral.entrySet()) {
231 | if (userAgent.equals(e.getKey())) {
232 | context.consumeAllTokens();
233 | return e.getValue();
234 | }
235 | }
236 |
237 | for (GenericBot gb : genericBotsPatterns) {
238 | Bot b = getGenericBot(gb, userAgent);
239 | if (b!=null) {
240 | if (gb.discardAll) context.consumeAllTokens();
241 | return b;
242 | }
243 | }
244 | return null;
245 | }
246 | static Bot getGenericBot(GenericBot gb, String userAgent) {
247 | java.util.regex.Matcher m = gb.pattern.matcher(userAgent);
248 |
249 | if (m.matches() && !userAgent.startsWith("Curl/PHP")) {
250 | String botName = m.group(gb.groups[0]);
251 | Bot baseBot = genericBotsBrandAndType.get(botName);
252 | String description = baseBot == null ? botName : baseBot.getDescription();
253 | if (baseBot == null) baseBot = genericBotBase;
254 | String version = gb.groups[1] == 0 ? "" : m.group(gb.groups[1]);
255 | String url = gb.groups[2] == 0 ? "" : m.group(gb.groups[2]);
256 | return new Bot(baseBot.getVendor(), baseBot.getFamily(), description, version, sanitizeUrl(url));
257 | }
258 | return null;
259 | }
260 |
261 | public static Bot getBot(UserAgentContext context) {
262 | int pos=0;
263 | String ver;
264 | String[]multi;
265 |
266 | Bot b = getGenericBots(context.getUA(), context) ;
267 | if (b != null) {
268 | return b;
269 | }
270 |
271 | if (hiddenBots.contains(context.getUA())) {
272 | context.consumeAllTokens();
273 | return new Bot(Brand.UNKNOWN,BotFamily.HIDDEN_BOT,"","");
274 | } else if (context.consume("commoncrawl.org/research//", MatchingType.BEGINS, MatchingRegion.REGULAR)) {
275 | ver = context.getcVersionAfterPattern("CCResearchBot/", MatchingType.BEGINS,MatchingRegion.BOTH);
276 | if (ver == null) ver="";
277 |
278 | return new Bot(Brand.OTHER,BotFamily.CRAWLER,"Common Crawl",ver , "http://commoncrawl.org/faqs/");
279 | } else if (context.getUA().equals("Qwantify/1.0")) {
280 | context.consumeAllTokens();
281 | return new Bot(Brand.QWANT,BotFamily.CRAWLER,"Qwant crawler","1.0");
282 | } else if (context.consume("via ggpht.com GoogleImageProxy", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // http://serverfault.com/questions/581857/apache-logs-flooded-with-connections-via-ggpht-com-googleimageproxy.
283 | return new Bot(Brand.GOOGLE,BotFamily.ROBOT,"Gmail image downloader proxy","");
284 | } else if (context.consume("Google-StructuredDataTestingTool", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) {
285 | return new Bot(Brand.GOOGLE,BotFamily.ROBOT,"Google Structured Data Testing Tool",consumeUrlAndMozilla(context, "+http://"));
286 | } else if (context.consume("ONDOWN3.2", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // Looks like a bot to me.
287 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"ONDOWN","3.2");
288 | } else if (context.consume("Google Web Preview", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) {
289 | context.consume("generic", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
290 | context.consume("iPhone", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
291 | return new Bot(Brand.GOOGLE, BotFamily.ROBOT,"Web Preview","");
292 | } else if ((multi = context.getcNextTokens(new Matcher[] {new Matcher("BusinessBot:", MatchingType.EQUALS),
293 | new Matcher("^[A-Za-z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$", MatchingType.REGEXP)
294 | },
295 | MatchingRegion.REGULAR)) != null) {
296 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"BusinessBot","", "");
297 | }
298 | else if (context.consume("Contact: backend@getprismatic.com", MatchingType.EQUALS, MatchingRegion.PARENTHESIS) ||
299 | (multi = context.getcNextTokens(new Matcher[] {new Matcher("Contact:", MatchingType.EQUALS),
300 | new Matcher("feedback@getprismatic.com", MatchingType.EQUALS)
301 | },
302 | MatchingRegion.REGULAR)) != null) {
303 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"Get Prismatic Bot","", "http://getprismatic.com/");
304 | }
305 | else if ((ver=context.getcVersionAfterPattern("Diffbot/", MatchingType.BEGINS,MatchingRegion.BOTH))!=null ||
306 | (ver=context.getcVersionAfterPattern("diffbot/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null ||
307 | context.contains("+http://www.diffbot.com", MatchingType.BEGINS,MatchingRegion.PARENTHESIS)) {
308 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"Diffbot ", ver==null?"":ver, consumeUrlAndMozilla(context, "http://"));
309 | } else if ((ver=context.getcVersionAfterPattern("GWPImages/", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) {
310 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"GWPImages ", ver, consumeUrlAndMozilla(context, "http://"));
311 | } else if ((ver=context.getcVersionAfterPattern("LSSRocketCrawler/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null) {
312 | context.consume("LightspeedSystems", MatchingType.EQUALS, MatchingRegion.REGULAR);
313 | return new Bot(Brand.OTHER, BotFamily.ROBOT,"LSSRocketCrawler ", ver);
314 | } else if ((ver=context.getcVersionAfterPattern("OrangeBot/", MatchingType.BEGINS,MatchingRegion.PARENTHESIS))!=null) {
315 | context.consume("[0-9a-zA-Z\\.]+@[0-9a-zA-Z\\.]+", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
316 | return new Bot(Brand.ORANGE, BotFamily.CRAWLER,"Orange Bot ", ver, consumeUrlAndMozilla(context, "http://"));
317 | } else if ((ver=context.getcVersionAfterPattern("del.icio.us-thumbnails/", MatchingType.BEGINS,MatchingRegion.BOTH))!=null) {
318 | return new Bot(Brand.DELICIOUS, BotFamily.ROBOT,"Thumbnails crawler ", ver);
319 | } else if ((ver=context.getcVersionAfterPattern("EvoHtmlToPdf/", MatchingType.BEGINS,MatchingRegion.REGULAR))!=null) {
320 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"EvoHtmlToPdf",ver);
321 | } else if ((ver=context.getcVersionAfterPattern("PhantomJS/", MatchingType.BEGINS,MatchingRegion.REGULAR,2))!=null) {
322 | if (context.consume("development", MatchingType.EQUALS,MatchingRegion.PARENTHESIS)) {
323 | ver += " dev";
324 | }
325 | context.consume("Unknown", MatchingType.EQUALS,MatchingRegion.PARENTHESIS);
326 | return new Bot(Brand.OPENSOURCE_COMMUNITY,BotFamily.ROBOT,"PhantomJS", ver);
327 | } else if (context.consume("theoldreader.com", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) {
328 | context.consume("feed-id=", MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
329 | context.consume("[0-9]+ subscribers", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
330 | consumeUrlAndMozilla(context, "http://");
331 | return new Bot(Brand.GOOGLE,BotFamily.FEED_CRAWLER,"RSS Feed Fetcher","","http://theoldreader.com/");
332 | } else if (context.consume("Feedfetcher-Google;", MatchingType.EQUALS, MatchingRegion.REGULAR)) {
333 | context.consume("feed-id=", MatchingType.BEGINS, MatchingRegion.PARENTHESIS);
334 | context.consume("[0-9]+ subscribers", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
335 | return new Bot(Brand.GOOGLE,BotFamily.FEED_CRAWLER,"RSS Feed Fetcher","", getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "+http://www.google"));
336 | } else if (context.consume("Porkbun/Mustache", MatchingType.EQUALS, MatchingRegion.REGULAR)) {
337 | context.consume(".*@porkbun.com", MatchingType.REGEXP, MatchingRegion.PARENTHESIS);
338 | context.consume("Website Analysis", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
339 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"Porkbun Website Analysis","", getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://"));
340 | } else if (context.consume("yacybot", MatchingType.EQUALS, MatchingRegion.REGULAR)) {
341 | context.consume("freeworld/global", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
342 | context.consume("yacy.net", MatchingType.EQUALS, MatchingRegion.REGULAR);
343 | return new Bot(Brand.YACI,BotFamily.CRAWLER,"Yacy bot","", getAndConsumeUrl(context, MatchingRegion.REGULAR, "http://"));
344 | } else if (context.consume("125LA", MatchingType.EQUALS, MatchingRegion.PARENTHESIS)) { // Will look for login forms and upload forms
345 | context.consume("Mozilla/4.0", MatchingType.EQUALS, MatchingRegion.REGULAR);
346 | context.consume("compatible", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
347 | context.consume("MSIE 9.0", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
348 | return new Bot(Brand.OTHER,BotFamily.SPAMBOT,"Unknown bot","");
349 | } else if ((ver = context.getcVersionAfterPattern("AvantGo ", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
350 | context.consume("Mozilla/", MatchingType.BEGINS, MatchingRegion.REGULAR);
351 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"AvantGo", ver);
352 | } else if ((ver = context.getcVersionAfterPattern("InfegyAtlas/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
353 | context.consume("Linux", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
354 | consumeUrlAndMozilla(context, "@");
355 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"InfegyAtlas", ver, "http://infegy.com");
356 | } else if ((ver = context.getcVersionAfterPattern("Twitterbot/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) {
357 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"Twitterbot", ver);
358 | } else if ((ver = context.getcVersionAfterPattern("BingPreview/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) {
359 | return new Bot(Brand.MICROSOFT,BotFamily.ROBOT,"Bing Web Preview", ver);
360 | } else if ((ver = context.getcVersionAfterPattern("LinkScan/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) {
361 | return new Bot(Brand.ELSOP,BotFamily.ROBOT,"LinkScan", ver);
362 | } else if ((ver = context.getcVersionAfterPattern("Fever/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) {
363 | context.consume("Allow like Gecko", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
364 | context.consume("Feed Parser", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
365 | return new Bot(Brand.UNKNOWN,BotFamily.FEED_CRAWLER,"Feed A Fever", ver, getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://"));
366 | } else if (context.consume("NetShelter ContentScan(, contact [a-zA-Z0-9\\.]+@[a-zA-Z0-9\\.]+ for information)?", MatchingType.REGEXP, MatchingRegion.PARENTHESIS)) {
367 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"NetShelter ContentScan", "");
368 | } else if ((ver = context.getcVersionAfterPattern("SimplePie/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) {
369 | context.consume("Build/", MatchingType.BEGINS, MatchingRegion.REGULAR);
370 | context.consume("Allow like Gecko", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
371 | context.consume("Feed Parser", MatchingType.EQUALS, MatchingRegion.PARENTHESIS);
372 | return new Bot(Brand.UNKNOWN,BotFamily.FEED_CRAWLER,"SimplePie", ver, getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://"));
373 | } else if ((ver = context.getcVersionAfterPattern("Qwantify/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
374 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"Qwantify Crawler", ver, consumeUrlAndMozilla(context, "https://"));
375 | } else if ((ver = context.getcVersionAfterPattern("PageAnalyzer/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
376 | return new Bot(Brand.UNKNOWN,BotFamily.CRAWLER,"PageAnalyzer", ver, consumeUrlAndMozilla(context, "http://"));
377 | } else if ((ver = context.getcVersionAfterPattern("Pagespeed/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
378 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"Pagespeed feed fetcher", ver, consumeUrlAndMozilla(context, "http://"));
379 | } else if ((ver = context.getcVersionAfterPattern("ClearBot/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) {
380 | return new Bot(Brand.CLEARSWIFT,BotFamily.ROBOT,"ClearBot crawler", ver, getAndConsumeUrl(context, MatchingRegion.PARENTHESIS, "http://"));
381 | } else if ((ver = context.getcVersionAfterPattern("Mail.RU_Bot/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
382 | return new Bot(Brand.MAILRU,BotFamily.CRAWLER,"Mail.ru crawler", ver, consumeUrlAndMozilla(context, "http://go.mail.ru"));
383 | } else if ((ver = context.getcVersionAfterPattern("MJ12bot/", MatchingType.BEGINS, MatchingRegion.PARENTHESIS))!=null) {
384 | return new Bot(Brand.MAJESTIC12,BotFamily.CRAWLER,"Majestic 12", ver, consumeUrlAndMozilla(context, "http://"));
385 | } else if ((ver = context.getcVersionAfterPattern("GigablastOpenSource/", MatchingType.BEGINS, MatchingRegion.REGULAR))!=null) {
386 | return new Bot(Brand.OTHER,BotFamily.CRAWLER,"GigaBlast Crawler", ver);
387 | } else if (context.getUA().equals("NetLyzer FastProbe")) {
388 | context.consumeAllTokens();
389 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"NetLyzer FastProbe", "");
390 | } else if (context.getUA().equals("NerdyBot")) {
391 | context.consumeAllTokens();
392 | return new Bot(Brand.OTHER,BotFamily.ROBOT,"Nerdy Bot", "", "http://nerdybot.com");
393 | } else if (context.getUA().equals("PHPCrawl")) {
394 | context.consumeAllTokens();
395 | return new Bot(Brand.OPENSOURCE_COMMUNITY,BotFamily.ROBOT,"PHP Crawl", "", "http://phpcrawl.cuab.de");
396 | } else if (context.getUA().equals("updown_tester")) {
397 | context.consume("updown_tester", MatchingType.EQUALS, MatchingRegion.REGULAR);
398 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"Unknown (updown_tester)", "");
399 | } else if (context.getUA().equals("YisouSpider")) {
400 | context.consume("YisouSpider", MatchingType.EQUALS, MatchingRegion.REGULAR);
401 | return new Bot(Brand.UNKNOWN,BotFamily.ROBOT,"YisouSpider", "");
402 | } else if (context.getUA().equals("RSSGraffiti")) {
403 | context.consume("RSSGraffiti", MatchingType.EQUALS, MatchingRegion.REGULAR);
404 | return new Bot(Brand.SCRIBBLE,BotFamily.ROBOT,"RSS Graffiti", "");
405 | } else if (context.getUA().startsWith("WordPress/")) {
406 | ver = context.getcVersionAfterPattern("WordPress/", MatchingType.BEGINS, MatchingRegion.REGULAR);
407 | return new Bot(Brand.OTHER, BotFamily.ROBOT, "WordPress", ver, getAndConsumeUrl(context, MatchingRegion.REGULAR, "http://"));
408 | } else if (context.getUA().contains("TuringOS; Turing Machine")) {
409 | // No idea. This thing only hit a few URLs and doesn't render them (no JS/CSS/IMGs)...
410 | context.consumeAllTokens();
411 | return new Bot(Brand.OTHER, BotFamily.ROBOT, "Turing", "");
412 | } else if (context.getUA().indexOf("-1 && context.getUA().endsWith(" (Windows NT 5.1; U; en) Presto/2.10.229 Version/11.60")) {
413 | context.consumeAllTokens();
414 | return new Bot(Brand.UNKNOWN, BotFamily.SPAMBOT, "Link reference bombing", "");
415 | } else if (context.getLCUA().matches(".*