├── README.md ├── run.py ├── scrapy.cfg └── sina ├── __init__.py ├── config.py ├── cookies.py ├── items.py ├── middlewares.py ├── pipelines.py ├── settings.py ├── spiders ├── __init__.py └── weibo_spider.py └── user_agents.py /README.md: -------------------------------------------------------------------------------- 1 | # WeiboSpider 2 | This is a sina weibo spider built by scrapy 3 | 4 | ## Update 2018/7/28 5 | 6 | **戳这里:[微博爬虫,单机每日千万级的数据 && 吐血整理的微博爬虫总结](https://blog.csdn.net/nghuyong/article/details/81251948)** 7 | 8 | ## Update 2018/7/27 9 | 这个爬虫一开始是需要登陆获得微博cookie的,然后再运行爬虫 10 | 11 | 如果你的账号是买的,微博判定不是正常账号,会出现滑动宫格验证码,本项目中获取cookie的方案就不适用了, 12 | 具体可以参考[这篇文章](https://juejin.im/post/5acf0ffcf265da23826e5e20) 13 | 14 | 如果需要构建大规模的微博抓取系统,在本项目的基础上**仅仅**需要做的就是,**购买大量微博账号,维护一个账号池** 15 | 16 | 购买微博账号的地址是[这里](http://www.xiaohao.shop/),访问需要翻墙。 17 | 18 | ![](http://wx3.sinaimg.cn/mw690/006Ueclxly1ftoh9t49z3j31jw0ie77z.jpg) 19 | 20 | 目前我自己维护了一个200+个账号的账号池,并通过redis构建分布式,抓取效果如上图,**一分钟可以抓取8000左右的数据,一天数据采集量在1100万** 21 | 22 | 这个账号池,我也是花钱买的,就不Share了。 23 | 24 | 如果确实有抓取数据的需要,可以联系我,Email:nghuyong@163.com 25 | 26 | ## 使用本项目 27 | Python版本:Python3.6 28 | ```bash 29 | git clone https://github.com/SimpleBrightMan/WeiboSpider.git 30 | # 首先获取cookie,并存入数据库中 31 | python cookies.py 32 | # 然后运行爬虫 33 | python run.py 34 | ``` 35 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | from scrapy import cmdline 4 | 5 | cmdline.execute("scrapy crawl SinaSpider".split(" ")) -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = sina.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = sina 12 | -------------------------------------------------------------------------------- /sina/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gyqlr/weibo_spider/e44b0da039f92b346bdc7ac13d64364cf1bbc401/sina/__init__.py -------------------------------------------------------------------------------- /sina/config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | """ 初始的待爬队列 """ 4 | weiboID = [ 5 | "5303798085" 6 | ] 7 | 8 | PROXYPOOL = [] -------------------------------------------------------------------------------- /sina/cookies.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | import datetime 4 | import json 5 | import base64 6 | from time import sleep 7 | 8 | import pymongo 9 | from selenium import webdriver 10 | from selenium.webdriver import ActionChains 11 | from selenium.webdriver.common.by import By 12 | from selenium.webdriver.support.ui import WebDriverWait 13 | from selenium.webdriver.support import expected_conditions as EC 14 | 15 | """ 16 | 输入你的微博账号和密码,可去淘宝买,一元七个。 17 | 建议买几十个,微博反扒的厉害,太频繁了会出现302转移。 18 | 或者你也可以把时间间隔调大点。 19 | """ 20 | WeiBoAccounts = [ 21 | {'username': 'liujuan86088@163.com', 'password': '*****'}, 22 | ] 23 | 24 | cookies = [] 25 | client = pymongo.MongoClient("localhost", 27017) 26 | db = client["Sina"] 27 | userAccount = db["userAccount"] 28 | 29 | 30 | def get_cookie_from_weibo(username, password): 31 | driver = webdriver.Chrome() 32 | driver.get('https://weibo.cn') 33 | assert "微博" in driver.title 34 | login_link = driver.find_element_by_link_text('登录') 35 | ActionChains(driver).move_to_element(login_link).click().perform() 36 | login_name = WebDriverWait(driver, 10).until( 37 | EC.visibility_of_element_located((By.ID, "loginName")) 38 | ) 39 | login_password = driver.find_element_by_id("loginPassword") 40 | login_name.send_keys(username) 41 | login_password.send_keys(password) 42 | login_button = driver.find_element_by_id("loginAction") 43 | login_button.click() 44 | # 这里停留了10秒观察一下启动的Chrome是否登陆成功了,没有的化手动登陆进去 45 | sleep(10) 46 | cookie = driver.get_cookies() 47 | driver.close() 48 | return cookie 49 | 50 | 51 | def init_cookies(): 52 | for cookie in userAccount.find(): 53 | cookies.append(cookie['cookie']) 54 | 55 | 56 | if __name__ == "__main__": 57 | try: 58 | userAccount.drop() 59 | except Exception as e: 60 | pass 61 | for account in WeiBoAccounts: 62 | cookie = get_cookie_from_weibo(account["username"], account["password"]) 63 | userAccount.insert_one({"_id": account["username"], "cookie": cookie}) 64 | -------------------------------------------------------------------------------- /sina/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy import Item, Field 9 | 10 | class InformationItem(Item): 11 | """ 个人信息 """ 12 | _id = Field() # 用户ID 13 | NickName = Field() # 昵称 14 | Gender = Field() # 性别 15 | Province = Field() # 所在省 16 | City = Field() # 所在城市 17 | BriefIntroduction = Field() # 简介 18 | Birthday = Field() # 生日 19 | Num_Tweets = Field() # 微博数 20 | Num_Follows = Field() # 关注数 21 | Num_Fans = Field() # 粉丝数 22 | SexOrientation = Field() # 性取向 23 | Sentiment = Field() # 感情状况 24 | VIPlevel = Field() # 会员等级 25 | Authentication = Field() # 认证 26 | URL = Field() # 首页链接 27 | 28 | 29 | class TweetsItem(Item): 30 | """ 微博信息 """ 31 | _id = Field() # 用户ID-微博ID 32 | ID = Field() # 用户ID 33 | Content = Field() # 微博内容 34 | PubTime = Field() # 发表时间 35 | Co_oridinates = Field() # 定位坐标 36 | Tools = Field() # 发表工具/平台 37 | Like = Field() # 点赞数 38 | Comment = Field() # 评论数 39 | Transfer = Field() # 转载数 40 | 41 | 42 | class RelationshipsItem(Item): 43 | """ 用户关系,只保留与关注的关系 """ 44 | fan_id = Field() 45 | followed_id = Field() # 被关注者的ID 46 | -------------------------------------------------------------------------------- /sina/middlewares.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | import random 3 | from sina.cookies import cookies, init_cookies 4 | from sina.user_agents import agents 5 | 6 | 7 | class UserAgentMiddleware(object): 8 | """ 换User-Agent """ 9 | 10 | def process_request(self, request, spider): 11 | agent = random.choice(agents) 12 | request.headers["User-Agent"] = agent 13 | 14 | 15 | class CookiesMiddleware(object): 16 | """ 换Cookie """ 17 | 18 | def __init__(self): 19 | init_cookies() 20 | 21 | def process_request(self, request, spider): 22 | cookie = random.choice(cookies) 23 | request.cookies = cookie 24 | 25 | 26 | -------------------------------------------------------------------------------- /sina/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | import pymongo 8 | 9 | from sina.items import RelationshipsItem, TweetsItem, InformationItem 10 | 11 | 12 | class MongoDBPipeline(object): 13 | def __init__(self): 14 | clinet = pymongo.MongoClient("localhost", 27017) 15 | db = clinet["Sina"] 16 | self.Information = db["Information"] 17 | self.Tweets = db["Tweets"] 18 | self.Relationships = db["Relationships"] 19 | 20 | def process_item(self, item, spider): 21 | """ 判断item的类型,并作相应的处理,再入数据库 """ 22 | if isinstance(item, RelationshipsItem): 23 | try: 24 | self.Relationships.insert(dict(item)) 25 | except Exception: 26 | pass 27 | elif isinstance(item, TweetsItem): 28 | try: 29 | self.Tweets.insert(dict(item)) 30 | except Exception: 31 | pass 32 | elif isinstance(item, InformationItem): 33 | try: 34 | self.Information.insert(dict(item)) 35 | except Exception: 36 | pass 37 | return item 38 | -------------------------------------------------------------------------------- /sina/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for sina project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'sina' 13 | 14 | SPIDER_MODULES = ['sina.spiders'] 15 | NEWSPIDER_MODULE = 'sina.spiders' 16 | 17 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 18 | CONCURRENT_REQUESTS = 32 19 | 20 | # Configure a delay for requests for the same website (default: 0) 21 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 22 | # See also autothrottle settings and docs 23 | DOWNLOAD_DELAY = 0.5 24 | # The download delay setting will honor only one of: 25 | # CONCURRENT_REQUESTS_PER_DOMAIN = 16 26 | # CONCURRENT_REQUESTS_PER_IP = 16 27 | 28 | # Disable cookies (enabled by default) 29 | # COOKIES_ENABLED = False 30 | 31 | # Disable Telnet Console (enabled by default) 32 | # TELNETCONSOLE_ENABLED = False 33 | 34 | # Override the default request headers: 35 | # DEFAULT_REQUEST_HEADERS = { 36 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 37 | # 'Accept-Language': 'en', 38 | # } 39 | 40 | # Enable or disable spider middlewares 41 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 42 | # SPIDER_MIDDLEWARES = { 43 | # 'sina.middlewares.SinaSpiderMiddleware': 543, 44 | # } 45 | 46 | # Enable or disable downloader middlewares 47 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 48 | DOWNLOADER_MIDDLEWARES = { 49 | 'sina.middlewares.UserAgentMiddleware': 401, 50 | 'sina.middlewares.CookiesMiddleware': 402, 51 | } 52 | 53 | # Enable or disable extensions 54 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 55 | # EXTENSIONS = { 56 | # 'scrapy.extensions.telnet.TelnetConsole': None, 57 | # } 58 | 59 | # Configure item pipelines 60 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 61 | ITEM_PIPELINES = { 62 | 'sina.pipelines.MongoDBPipeline': 300, 63 | } 64 | 65 | # Enable and configure the AutoThrottle extension (disabled by default) 66 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 67 | # AUTOTHROTTLE_ENABLED = True 68 | # The initial download delay 69 | # AUTOTHROTTLE_START_DELAY = 5 70 | # The maximum download delay to be set in case of high latencies 71 | # AUTOTHROTTLE_MAX_DELAY = 60 72 | # The average number of requests Scrapy should be sending in parallel to 73 | # each remote server 74 | # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 75 | # Enable showing throttling stats for every response received: 76 | # AUTOTHROTTLE_DEBUG = False 77 | 78 | # Enable and configure HTTP caching (disabled by default) 79 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 80 | # HTTPCACHE_ENABLED = True 81 | # HTTPCACHE_EXPIRATION_SECS = 0 82 | # HTTPCACHE_DIR = 'httpcache' 83 | # HTTPCACHE_IGNORE_HTTP_CODES = [] 84 | # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 85 | 86 | # LOG_FILE = "/mnt/mongodb/data/weibo.log" -------------------------------------------------------------------------------- /sina/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /sina/spiders/weibo_spider.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import datetime 5 | import requests 6 | import re 7 | from lxml import etree 8 | from scrapy import Spider 9 | from scrapy.selector import Selector 10 | from scrapy.http import Request 11 | 12 | from sina.config import weiboID 13 | from sina.items import TweetsItem, InformationItem, RelationshipsItem 14 | 15 | 16 | class Spider(Spider): 17 | name = "SinaSpider" 18 | host = "https://weibo.cn" 19 | start_urls = list(set(weiboID)) 20 | 21 | def start_requests(self): 22 | for uid in self.start_urls: 23 | yield Request(url="https://weibo.cn/%s/info" % uid, callback=self.parse_information) 24 | 25 | def parse_information(self, response): 26 | """ 抓取个人信息 """ 27 | informationItem = InformationItem() 28 | selector = Selector(response) 29 | ID = re.findall('(\d+)/info', response.url)[0] 30 | try: 31 | text1 = ";".join(selector.xpath('body/div[@class="c"]//text()').extract()) # 获取标签里的所有text() 32 | nickname = re.findall('昵称;?[::]?(.*?);', text1) 33 | gender = re.findall('性别;?[::]?(.*?);', text1) 34 | place = re.findall('地区;?[::]?(.*?);', text1) 35 | briefIntroduction = re.findall('简介;?[::]?(.*?);', text1) 36 | birthday = re.findall('生日;?[::]?(.*?);', text1) 37 | sexOrientation = re.findall('性取向;?[::]?(.*?);', text1) 38 | sentiment = re.findall('感情状况;?[::]?(.*?);', text1) 39 | vipLevel = re.findall('会员等级;?[::]?(.*?);', text1) 40 | authentication = re.findall('认证;?[::]?(.*?);', text1) 41 | url = re.findall('互联网;?[::]?(.*?);', text1) 42 | 43 | informationItem["_id"] = ID 44 | if nickname and nickname[0]: 45 | informationItem["NickName"] = nickname[0].replace(u"\xa0", "") 46 | if gender and gender[0]: 47 | informationItem["Gender"] = gender[0].replace(u"\xa0", "") 48 | if place and place[0]: 49 | place = place[0].replace(u"\xa0", "").split(" ") 50 | informationItem["Province"] = place[0] 51 | if len(place) > 1: 52 | informationItem["City"] = place[1] 53 | if briefIntroduction and briefIntroduction[0]: 54 | informationItem["BriefIntroduction"] = briefIntroduction[0].replace(u"\xa0", "") 55 | if birthday and birthday[0]: 56 | try: 57 | birthday = datetime.datetime.strptime(birthday[0], "%Y-%m-%d") 58 | informationItem["Birthday"] = birthday - datetime.timedelta(hours=8) 59 | except Exception: 60 | informationItem['Birthday'] = birthday[0] # 有可能是星座,而非时间 61 | if sexOrientation and sexOrientation[0]: 62 | if sexOrientation[0].replace(u"\xa0", "") == gender[0]: 63 | informationItem["SexOrientation"] = "同性恋" 64 | else: 65 | informationItem["SexOrientation"] = "异性恋" 66 | if sentiment and sentiment[0]: 67 | informationItem["Sentiment"] = sentiment[0].replace(u"\xa0", "") 68 | if vipLevel and vipLevel[0]: 69 | informationItem["VIPlevel"] = vipLevel[0].replace(u"\xa0", "") 70 | if authentication and authentication[0]: 71 | informationItem["Authentication"] = authentication[0].replace(u"\xa0", "") 72 | if url: 73 | informationItem["URL"] = url[0] 74 | 75 | try: 76 | urlothers = "https://weibo.cn/attgroup/opening?uid=%s" % ID 77 | new_ck = {} 78 | for ck in response.request.cookies: 79 | new_ck[ck['name']] = ck['value'] 80 | r = requests.get(urlothers, cookies=new_ck, timeout=5) 81 | if r.status_code == 200: 82 | selector = etree.HTML(r.content) 83 | texts = ";".join(selector.xpath('//body//div[@class="tip2"]/a//text()')) 84 | if texts: 85 | num_tweets = re.findall('微博\[(\d+)\]', texts) 86 | num_follows = re.findall('关注\[(\d+)\]', texts) 87 | num_fans = re.findall('粉丝\[(\d+)\]', texts) 88 | if num_tweets: 89 | informationItem["Num_Tweets"] = int(num_tweets[0]) 90 | if num_follows: 91 | informationItem["Num_Follows"] = int(num_follows[0]) 92 | if num_fans: 93 | informationItem["Num_Fans"] = int(num_fans[0]) 94 | except Exception as e: 95 | pass 96 | except Exception as e: 97 | pass 98 | else: 99 | yield informationItem 100 | if informationItem["Num_Tweets"] and informationItem["Num_Tweets"] < 5000: 101 | yield Request(url="https://weibo.cn/%s/profile?filter=1&page=1" % ID, callback=self.parse_tweets, 102 | dont_filter=True) 103 | if informationItem["Num_Follows"] and informationItem["Num_Follows"] < 500: 104 | yield Request(url="https://weibo.cn/%s/follow" % ID, callback=self.parse_relationship, dont_filter=True) 105 | if informationItem["Num_Fans"] and informationItem["Num_Fans"] < 500: 106 | yield Request(url="https://weibo.cn/%s/fans" % ID, callback=self.parse_relationship, dont_filter=True) 107 | 108 | def parse_tweets(self, response): 109 | """ 抓取微博数据 """ 110 | selector = Selector(response) 111 | ID = re.findall('(\d+)/profile', response.url)[0] 112 | divs = selector.xpath('body/div[@class="c" and @id]') 113 | for div in divs: 114 | try: 115 | tweetsItems = TweetsItem() 116 | id = div.xpath('@id').extract_first() # 微博ID 117 | content = div.xpath('div/span[@class="ctt"]//text()').extract() # 微博内容 118 | cooridinates = div.xpath('div/a/@href').extract() # 定位坐标 119 | like = re.findall('赞\[(\d+)\]', div.extract()) # 点赞数 120 | transfer = re.findall('转发\[(\d+)\]', div.extract()) # 转载数 121 | comment = re.findall('评论\[(\d+)\]', div.extract()) # 评论数 122 | others = div.xpath('div/span[@class="ct"]/text()').extract() # 求时间和使用工具(手机或平台) 123 | 124 | tweetsItems["_id"] = ID + "-" + id 125 | tweetsItems["ID"] = ID 126 | if content: 127 | tweetsItems["Content"] = " ".join(content).strip('[位置]') # 去掉最后的"[位置]" 128 | if cooridinates: 129 | cooridinates = re.findall('center=([\d.,]+)', cooridinates[0]) 130 | if cooridinates: 131 | tweetsItems["Co_oridinates"] = cooridinates[0] 132 | if like: 133 | tweetsItems["Like"] = int(like[0]) 134 | if transfer: 135 | tweetsItems["Transfer"] = int(transfer[0]) 136 | if comment: 137 | tweetsItems["Comment"] = int(comment[0]) 138 | if others: 139 | others = others[0].split('来自') 140 | tweetsItems["PubTime"] = others[0].replace(u"\xa0", "") 141 | if len(others) == 2: 142 | tweetsItems["Tools"] = others[1].replace(u"\xa0", "") 143 | yield tweetsItems 144 | except Exception as e: 145 | self.logger.info(e) 146 | pass 147 | 148 | url_next = selector.xpath('body/div[@class="pa" and @id="pagelist"]/form/div/a[text()="下页"]/@href').extract() 149 | if url_next: 150 | yield Request(url=self.host + url_next[0], callback=self.parse_tweets, dont_filter=True) 151 | 152 | def parse_relationship(self, response): 153 | """ 打开url爬取里面的个人ID """ 154 | selector = Selector(response) 155 | if "/follow" in response.url: 156 | ID = re.findall('(\d+)/follow', response.url)[0] 157 | flag = True 158 | else: 159 | ID = re.findall('(\d+)/fans', response.url)[0] 160 | flag = False 161 | urls = selector.xpath('//a[text()="关注他" or text()="关注她"]/@href').extract() 162 | uids = re.findall('uid=(\d+)', ";".join(urls), re.S) 163 | for uid in uids: 164 | relationshipsItem = RelationshipsItem() 165 | relationshipsItem["fan_id"] = ID if flag else uid 166 | relationshipsItem["followed_id"] = uid if flag else ID 167 | yield relationshipsItem 168 | yield Request(url="https://weibo.cn/%s/info" % uid, callback=self.parse_information) 169 | 170 | next_url = selector.xpath('//a[text()="下页"]/@href').extract() 171 | if next_url: 172 | yield Request(url=self.host + next_url[0], callback=self.parse_relationship, dont_filter=True) 173 | -------------------------------------------------------------------------------- /sina/user_agents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | """ User-Agents """ 5 | agents = [ 6 | "Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 7 | "Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)", 8 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5", 9 | "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.310.0 Safari/532.9", 10 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7", 11 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14", 12 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14", 13 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20", 14 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0 Safari/534.27", 15 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1", 16 | "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2", 17 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7", 18 | "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre", 19 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10", 20 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)", 21 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 GTB5", 22 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0E)", 23 | "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", 24 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", 25 | "Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0", 26 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110622 Firefox/6.0a2", 27 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1", 28 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre", 29 | "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0 )", 30 | "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)", 31 | "Mozilla/5.0 (Windows; U; Windows XP) Gecko MultiZilla/1.6.1.0a", 32 | "Mozilla/2.02E (Win95; U)", 33 | "Mozilla/3.01Gold (Win95; I)", 34 | "Mozilla/4.8 [en] (Windows NT 5.1; U)", 35 | "Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.4) Gecko Netscape/7.1 (ax)", 36 | "HTC_Dream Mozilla/5.0 (Linux; U; Android 1.5; en-ca; Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 37 | "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.2; U; de-DE) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/234.40.1 Safari/534.6 TouchPad/1.0", 38 | "Mozilla/5.0 (Linux; U; Android 1.5; en-us; sdk Build/CUPCAKE) AppleWebkit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 39 | "Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 40 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 41 | "Mozilla/5.0 (Linux; U; Android 1.5; en-us; htc_bahamas Build/CRB17) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 42 | "Mozilla/5.0 (Linux; U; Android 2.1-update1; de-de; HTC Desire 1.19.161.5 Build/ERE27) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 43 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 44 | "Mozilla/5.0 (Linux; U; Android 1.5; de-ch; HTC Hero Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 45 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 46 | "Mozilla/5.0 (Linux; U; Android 2.1; en-us; HTC Legend Build/cupcake) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 47 | "Mozilla/5.0 (Linux; U; Android 1.5; de-de; HTC Magic Build/PLAT-RC33) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1 FirePHP/0.3", 48 | "Mozilla/5.0 (Linux; U; Android 1.6; en-us; HTC_TATTOO_A3288 Build/DRC79) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 49 | "Mozilla/5.0 (Linux; U; Android 1.0; en-us; dream) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 50 | "Mozilla/5.0 (Linux; U; Android 1.5; en-us; T-Mobile G1 Build/CRB43) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari 525.20.1", 51 | "Mozilla/5.0 (Linux; U; Android 1.5; en-gb; T-Mobile_G2_Touch Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 52 | "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 53 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Droid Build/FRG22D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 54 | "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Milestone Build/ SHOLS_U2_01.03.1) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 55 | "Mozilla/5.0 (Linux; U; Android 2.0.1; de-de; Milestone Build/SHOLS_U2_01.14.0) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 56 | "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 57 | "Mozilla/5.0 (Linux; U; Android 0.5; en-us) AppleWebKit/522 (KHTML, like Gecko) Safari/419.3", 58 | "Mozilla/5.0 (Linux; U; Android 1.1; en-gb; dream) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 59 | "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 60 | "Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 61 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 62 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 63 | "Mozilla/5.0 (Linux; U; Android 2.2; en-ca; GT-P1000M Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 64 | "Mozilla/5.0 (Linux; U; Android 3.0.1; fr-fr; A500 Build/HRI66) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13", 65 | "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 66 | "Mozilla/5.0 (Linux; U; Android 1.6; es-es; SonyEricssonX10i Build/R1FA016) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 67 | "Mozilla/5.0 (Linux; U; Android 1.6; en-us; SonyEricssonX10i Build/R1AA056) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 68 | ] --------------------------------------------------------------------------------