├── MANIFEST.in ├── notes ├── README.rst ├── setup.py └── httpagentparser └── __init__.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | -------------------------------------------------------------------------------- /notes: -------------------------------------------------------------------------------- 1 | python setup.py sdist upload 2 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ----- 3 | 4 | :: 5 | 6 | >>> import httpagentparser 7 | >>> s = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.307.11 Safari/532.9" 8 | >>> print httpagentparser.simple_detect(s) 9 | ('Linux', 'Chrome 5.0.307.11') 10 | >>> print httpagentparser.detect(s) 11 | {'os': {'name': 'Linux'}, 12 | 'browser': {'version': '5.0.307.11', 'name': 'Chrome'}} 13 | 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name='httpagentparser', 5 | version='1.0.2', 6 | description='Extracts OS Browser etc information from http user agent string', 7 | long_description=file("README.rst").read(), 8 | packages=find_packages(), 9 | author='Shekhar Tiwatne', 10 | author_email='pythonic@gmail.com', 11 | url="http://flavors.me/shon", 12 | license="http://www.opensource.org/licenses/mit-license.php", 13 | ) 14 | 15 | -------------------------------------------------------------------------------- /httpagentparser/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Extract client information from http user agent 3 | The module does not try to detect all capabilities of browser in current form (it can easily be extended though). 4 | Aim is 5 | * fast 6 | * very easy to extend 7 | * reliable enough for practical purposes 8 | * and assist python web apps to detect clients. 9 | """ 10 | import sys 11 | 12 | class DetectorsHub(dict): 13 | _known_types = ['os', 'dist', 'flavor', 'browser'] 14 | 15 | def __init__(self, *args, **kw): 16 | dict.__init__(self, *args, **kw) 17 | for typ in self._known_types: 18 | self.setdefault(typ, []) 19 | self.registerDetectors() 20 | 21 | def register(self, detector): 22 | if detector.info_type not in self._known_types: 23 | self[detector.info_type] = [detector] 24 | self._known_types.insert(detector.order, detector.info_type) 25 | else: 26 | self[detector.info_type].append(detector) 27 | 28 | def reorderByPrefs(self, detectors, prefs): 29 | if prefs is None: 30 | return [] 31 | elif prefs == []: 32 | return detectors 33 | else: 34 | prefs.insert(0, '') 35 | return sorted(detectors, key=lambda d: d.name in prefs and prefs.index(d.name) or sys.maxint) 36 | 37 | def __iter__(self): 38 | return iter(self._known_types) 39 | 40 | def registerDetectors(self): 41 | detectors = [v() for v in globals().values() if DetectorBase in getattr(v, '__mro__', [])] 42 | for d in detectors: 43 | if d.can_register: 44 | self.register(d) 45 | 46 | 47 | class DetectorBase(object): 48 | name = "" # "to perform match in DetectorsHub object" 49 | info_type = "override me" 50 | result_key = "override me" 51 | order = 10 # 0 is highest 52 | look_for = "string to look for" 53 | skip_if_found = [] # strings if present stop processin 54 | can_register = False 55 | prefs = dict() # dict(info_type = [name1, name2], ..) 56 | version_splitters = ["/", " "] 57 | _suggested_detectors = None 58 | 59 | def __init__(self): 60 | if not self.name: 61 | self.name = self.__class__.__name__ 62 | self.can_register = (self.__class__.__dict__.get('can_register', True)) 63 | 64 | def detect(self, agent, result): 65 | # -> True/None 66 | if self.checkWords(agent): 67 | result[self.info_type] = dict(name=self.name) 68 | version = self.getVersion(agent) 69 | if version: 70 | result[self.info_type]['version'] = version 71 | return True 72 | 73 | def checkWords(self, agent): 74 | # -> True/None 75 | for w in self.skip_if_found: 76 | if w in agent: 77 | return False 78 | if self.look_for in agent: 79 | return True 80 | 81 | def getVersion(self, agent): 82 | # -> version string /None 83 | return agent.split(self.look_for + self.version_splitters[0])[-1].split(self.version_splitters[1])[0].strip() 84 | 85 | 86 | class OS(DetectorBase): 87 | info_type = "os" 88 | can_register = False 89 | version_splitters = [";", " "] 90 | 91 | 92 | class Dist(DetectorBase): 93 | info_type = "dist" 94 | can_register = False 95 | 96 | 97 | class Flavor(DetectorBase): 98 | info_type = "flavor" 99 | can_register = False 100 | 101 | 102 | class Browser(DetectorBase): 103 | info_type = "browser" 104 | can_register = False 105 | 106 | 107 | class Macintosh(OS): 108 | look_for = 'Macintosh' 109 | prefs = dict(dist=None) 110 | 111 | def getVersion(self, agent): pass 112 | 113 | 114 | class Firefox(Browser): 115 | look_for = "Firefox" 116 | 117 | 118 | class Konqueror(Browser): 119 | look_for = "Konqueror" 120 | version_splitters = ["/", ";"] 121 | 122 | 123 | class Opera(Browser): 124 | look_for = "Opera" 125 | def getVersion(self, agent): 126 | try: 127 | look_for = "Version" 128 | return agent.split(look_for)[1][1:].split(' ')[0] 129 | except: 130 | look_for = "Opera" 131 | return agent.split(look_for)[1][1:].split(' ')[0] 132 | 133 | class Netscape(Browser): 134 | look_for = "Netscape" 135 | 136 | class MSIE(Browser): 137 | look_for = "MSIE" 138 | skip_if_found = ["Opera"] 139 | name = "Microsoft Internet Explorer" 140 | version_splitters = [" ", ";"] 141 | 142 | 143 | class Galeon(Browser): 144 | look_for = "Galeon" 145 | 146 | class WOSBrowser(Browser): 147 | look_for = "wOSBrowser" 148 | 149 | def getVersion(self, agent): pass 150 | 151 | class Safari(Browser): 152 | look_for = "Safari" 153 | 154 | def checkWords(self, agent): 155 | unless_list = ["Chrome", "OmniWeb", "wOSBrowser"] 156 | if self.look_for in agent: 157 | for word in unless_list: 158 | if word in agent: 159 | return False 160 | return True 161 | 162 | def getVersion(self, agent): 163 | if "Version/" in agent: 164 | return agent.split('Version/')[-1].split(' ')[0].strip() 165 | else: 166 | return agent.split('Safari ')[-1].split(' ')[0].strip() # Mobile Safari 167 | 168 | 169 | class Linux(OS): 170 | look_for = 'Linux' 171 | prefs = dict(browser=["Firefox"], dist=["Ubuntu", "Android"], flavor=None) 172 | 173 | def getVersion(self, agent): pass 174 | 175 | class Blackberry(OS): 176 | look_for = 'BlackBerry' 177 | prefs = dict(dist=["BlackberryPlaybook"], flavor=None) 178 | def getVersion(self, agent): pass 179 | 180 | class BlackberryPlaybook(Dist): 181 | look_for = 'PlayBook' 182 | def getVersion(self, agent): pass 183 | 184 | class Macintosh(OS): 185 | look_for = 'Macintosh' 186 | prefs = dict(dist=None, flavor=['MacOS']) 187 | 188 | def getVersion(self, agent): pass 189 | 190 | 191 | class MacOS(Flavor): 192 | look_for = 'Mac OS' 193 | prefs = dict(browser=['Firefox', 'Opera', "Microsoft Internet Explorer"]) 194 | 195 | def getVersion(self, agent): 196 | version_end_chars = [';', ')'] 197 | part = agent.split('Mac OS')[-1].strip() 198 | for c in version_end_chars: 199 | if c in part: 200 | version = part.split(c)[0] 201 | return version.replace('_', '.') 202 | return '' 203 | 204 | 205 | class Windows(OS): 206 | look_for = 'Windows' 207 | prefs = dict(browser=["Microsoft Internet Explorer", 'Firefox'], dict=None, flavor=None) 208 | win_versions = { 209 | "NT 6.1": "7", 210 | "NT 6.0": "Vista", 211 | "NT 5.2": "Server 2003 / XP x64", 212 | "NT 5.1": "XP", 213 | "NT 5.01": "2000 SP1", 214 | "NT 5.0": "2000", 215 | "98; Win 9x 4.90": "Me" 216 | } 217 | 218 | def getVersion(self, agent): 219 | v = agent.split('Windows')[-1].split(';')[0].strip() 220 | if ')' in v: 221 | v = v.split(')')[0] 222 | v = self.win_versions.get(v, v) 223 | return v 224 | 225 | 226 | class Ubuntu(Dist): 227 | look_for = 'Ubuntu' 228 | version_splitters = ["/", " "] 229 | prefs = dict(browser=['Firefox']) 230 | 231 | 232 | class Debian(Dist): 233 | look_for = 'Debian' 234 | version_splitters = ["/", " "] 235 | prefs = dict(browser=['Firefox']) 236 | 237 | 238 | class Chrome(Browser): 239 | look_for = "Chrome" 240 | version_splitters = ["/", " "] 241 | 242 | class ChromeOS(OS): 243 | look_for = "CrOS" 244 | version_splitters = [" ", " "] 245 | prefs = dict(browser=['Chrome']) 246 | def getVersion(self, agent): 247 | return agent.split(self.look_for + self.version_splitters[0])[-1].split(self.version_splitters[1])[1].strip()[:-1] 248 | 249 | class Android(Dist): 250 | look_for = 'Android' 251 | 252 | def getVersion(self, agent): 253 | if "Mobile Safari" in agent: 254 | deviceType = "Phone" 255 | else: 256 | deviceType = "Tablet" 257 | aVersion = agent.split('Android')[-1].split(';')[0].strip() 258 | return deviceType + " " + aVersion 259 | 260 | class WebOS(Dist): 261 | look_for = 'hpwOS' 262 | 263 | def getVersion(self, agent): 264 | return agent.split('hpwOS/')[-1].split(';')[0].strip() 265 | 266 | 267 | class IPhone(Dist): 268 | look_for = 'iPhone' 269 | 270 | def getVersion(self, agent): 271 | version_end_chars = [';', ')'] 272 | part = agent.split('Mac OS')[-1].strip() 273 | for c in version_end_chars: 274 | if c in part: 275 | version = part.split(c)[0] 276 | return version.replace('_', '.') 277 | 278 | class IPad(Dist): 279 | look_for = 'iPad' 280 | 281 | def getVersion(self, agent): 282 | version_end_chars = [';', ')'] 283 | part = agent.split('Mac OS')[-1].strip() 284 | for c in version_end_chars: 285 | if c in part: 286 | version = part.split(c)[0] 287 | return version.replace('_', '.') 288 | 289 | detectorshub = DetectorsHub() 290 | 291 | def detect(agent): 292 | result = dict() 293 | prefs = dict() 294 | _suggested_detectors = [] 295 | for info_type in detectorshub: 296 | if not _suggested_detectors: 297 | detectors = detectorshub[info_type] 298 | _d_prefs = prefs.get(info_type, []) 299 | detectors = detectorshub.reorderByPrefs(detectors, _d_prefs) 300 | if "detector" in locals(): 301 | detector._suggested_detectors = detectors 302 | else: 303 | detectors = _suggested_detectors 304 | for detector in detectors: 305 | print "detector name: ", detector.name 306 | if detector.detect(agent, result): 307 | prefs = detector.prefs 308 | _suggested_detectors = detector._suggested_detectors 309 | break 310 | return result 311 | 312 | 313 | class Result(dict): 314 | def __missing__(self, k): 315 | return "" 316 | 317 | 318 | def detect(agent): 319 | result = Result() 320 | _suggested_detectors = [] 321 | for info_type in detectorshub: 322 | detectors = _suggested_detectors or detectorshub[info_type] 323 | for detector in detectors: 324 | if detector.detect(agent, result): 325 | if detector.prefs and not detector._suggested_detectors: 326 | _suggested_detectors = detectorshub.reorderByPrefs(detectors, detector.prefs.get(info_type)) 327 | detector._suggested_detectors = _suggested_detectors 328 | break 329 | return result 330 | 331 | 332 | def simple_detect(agent): 333 | """ 334 | -> (os, browser) # tuple of strings 335 | """ 336 | result = detect(agent) 337 | os_list = [] 338 | if 'flavor' in result: os_list.append(result['flavor']['name']) 339 | if 'dist' in result: os_list.append(result['dist']['name']) 340 | if 'os' in result: os_list.append(result['os']['name']) 341 | 342 | os = os_list and " ".join(os_list) or "Unknown OS" 343 | os_version = os_list and (result.get('flavor') and result['flavor'].get('version')) or ( 344 | result.get('dist') and result['dist'].get('version')) or (result.get('os') and result['os'].get('version')) or "" 345 | browser = 'browser' in result and result['browser'].get('name') or 'Unknown Browser' 346 | browser_version = 'browser' in result and result['browser'].get('version') or "" 347 | if browser_version: 348 | browser = " ".join((browser, browser_version)) 349 | if os_version: 350 | os = " ".join((os, os_version)) 351 | return os, browser 352 | 353 | 354 | if __name__ == '__main__': 355 | import time 356 | import unittest 357 | 358 | data = ( 359 | ("Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-GB; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10", 360 | ('MacOS Macintosh X 10.5', 'Firefox 3.0.10'), 361 | {'flavor': {'version': 'X 10.5', 'name': 'MacOS'}, 'os': {'name': 'Macintosh'}, 'browser': {'version': '3.0.10', 'name': 'Firefox'}},), 362 | ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_6) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.3 Safari/534.24,gzip(gfe)", 363 | ('MacOS Macintosh X 10.6.6', 'Chrome 11.0.696.3'), 364 | {'flavor': {'version': 'X 10.6.6', 'name': 'MacOS'}, 'os': {'name': 'Macintosh'}, 'browser': {'version': '11.0.696.3', 'name': 'Chrome'}},), 365 | ("Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2) Gecko/20100308 Ubuntu/10.04 (lucid) Firefox/3.6 GTB7.1", 366 | ('Ubuntu Linux 10.04', 'Firefox 3.6'), 367 | {'dist': {'version': '10.04', 'name': 'Ubuntu'}, 'os': {'name': 'Linux'}, 'browser': {'version': '3.6', 'name': 'Firefox'}},), 368 | ("Mozilla/5.0 (Linux; U; Android 2.2.1; fr-ch; A43 Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 369 | ('Android Linux Phone 2.2.1', 'Safari 4.0'), 370 | {'dist': {'version': 'Phone 2.2.1', 'name': 'Android'}, 'os': {'name': 'Linux'}, 'browser': {'version': '4.0', 'name': 'Safari'}},), 371 | ("Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1A543a Safari/419.3", 372 | ('MacOS IPhone X', 'Safari 3.0'), 373 | {'flavor': {'version': 'X', 'name': 'MacOS'}, 'dist': {'version': 'X', 'name': 'IPhone'}, 'browser': {'version': '3.0', 'name': 'Safari'}},), 374 | ("Mozilla/5.0 (X11; CrOS i686 0.0.0) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.27 Safari/534.24,gzip(gfe)", 375 | ('ChromeOS 0.0.0', 'Chrome 11.0.696.27'), 376 | {'os': {'name': 'ChromeOS', 'version': '0.0.0'}, 'browser': {'name': 'Chrome', 'version': '11.0.696.27'}},), 377 | ("Mozilla/4.0 (compatible; MSIE 6.0; MSIE 5.5; Windows NT 5.1) Opera 7.02 [en]", 378 | ('Windows XP', 'Opera 7.02'), 379 | {'os': {'name': 'Windows', 'version': 'XP'}, 'browser': {'name': 'Opera', 'version': '7.02'}},), 380 | ("Opera/9.80 (X11; Linux i686; U; en) Presto/2.9.168 Version/11.50", 381 | ("Linux", "Opera 11.50"), 382 | {"os": {"name": "Linux"}, "browser": {"name": "Opera", "version": "11.50"}},), 383 | ("Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.7.5) Gecko/20060127 Netscape/8.1", 384 | ("Windows XP", "Netscape 8.1"), 385 | {'os': {'name': 'Windows', 'version': 'XP'}, 'browser': {'name': 'Netscape', 'version': '8.1'}},), 386 | ("Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.2; U; en-US) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/234.40.1 Safari/534.6 TouchPad/1.0", 387 | ("WebOS Linux 3.0.2", "WOSBrowser"), 388 | {'dist': {'name': 'WebOS', 'version': '3.0.2'}, 'os' : {'name' : 'Linux'}, 'browser': {'name': 'WOSBrowser'}},), 389 | ("Mozilla/5.0 (iPad; CPU OS 5_0_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9A405 Safari/7534.48.3", 390 | ('MacOS IPad X', 'Safari 5.1'), 391 | {'flavor': {'version': 'X', 'name': 'MacOS'}, 'dist': {'version': 'X', 'name': 'IPad'}, 'browser': {'version': '5.1', 'name': 'Safari'}},), 392 | ("Mozilla/5.0 (Linux; U; Android 3.2.1; en-gb; Transformer TF101 Build/HTK75) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13", 393 | ('Android Linux Tablet 3.2.1', 'Safari 4.0'), 394 | {'dist': {'version': 'Tablet 3.2.1', 'name': 'Android'}, 'os': {'name': 'Linux'}, 'browser': {'version': '4.0', 'name': 'Safari'}},), 395 | ("Mozilla/5.0 (BlackBerry; U; BlackBerry 9700; en-US) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.448 Mobile Safari/534.8+", 396 | ('Blackberry', 'Safari 6.0.0.448'), 397 | {'os': {'name': 'Blackberry'}, 'browser': {'version': '6.0.0.448', 'name': 'Safari'}},), 398 | ("Mozilla/5.0 (PlayBook; U; RIM Tablet OS 1.0.0; en-US) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.7 Safari/534.11+", 399 | ('BlackberryPlaybook', 'Safari 7.1.0.7'), 400 | {'dist': {'name': 'BlackberryPlaybook'}, 'browser': {'version': '7.1.0.7', 'name': 'Safari'}},), 401 | ) 402 | 403 | class TestHAP(unittest.TestCase): 404 | def setUp(self): 405 | self.harass_repeat = 1000 406 | self.data = data 407 | 408 | def test_simple_detect(self): 409 | for agent, simple_res, res in data: 410 | self.assertEqual(simple_detect(agent), simple_res) 411 | 412 | def test_detect(self): 413 | for agent, simple_res, res in data: 414 | self.assertEqual(detect(agent), res) 415 | 416 | def test_harass(self): 417 | then = time.time() 418 | for agent, simple_res, res in data * self.harass_repeat: 419 | detect(agent) 420 | time_taken = time.time() - then 421 | no_of_tests = len(self.data) * self.harass_repeat 422 | print "\nTime taken for %s detecttions: %s" % (no_of_tests, time_taken) 423 | print "Time taken for single detecttion: ", time_taken / (len(self.data) * self.harass_repeat) 424 | 425 | unittest.main() 426 | 427 | --------------------------------------------------------------------------------