├── README.md
├── run.py
├── scrapy.cfg
└── sina
    ├── __init__.py
    ├── config.py
    ├── cookies.py
    ├── items.py
    ├── middlewares.py
    ├── pipelines.py
    ├── settings.py
    ├── spiders
        ├── __init__.py
        └── weibo_spider.py
    └── user_agents.py


/README.md:
--------------------------------------------------------------------------------
 1 | # WeiboSpider
 2 | This is a sina weibo spider built by scrapy
 3 | 
 4 | ## Update 2018/7/28
 5 | 
 6 | **戳这里：[微博爬虫，单机每日千万级的数据 && 吐血整理的微博爬虫总结](https://blog.csdn.net/nghuyong/article/details/81251948)**
 7 | 
 8 | ## Update 2018/7/27
 9 | 这个爬虫一开始是需要登陆获得微博cookie的，然后再运行爬虫
10 | 
11 | 如果你的账号是买的，微博判定不是正常账号，会出现滑动宫格验证码，本项目中获取cookie的方案就不适用了，
12 | 具体可以参考[这篇文章](https://juejin.im/post/5acf0ffcf265da23826e5e20)
13 | 
14 | 如果需要构建大规模的微博抓取系统，在本项目的基础上**仅仅**需要做的就是，**购买大量微博账号，维护一个账号池**
15 | 
16 | 购买微博账号的地址是[这里](http://www.xiaohao.shop/)，访问需要翻墙。
17 | 
18 | ![](http://wx3.sinaimg.cn/mw690/006Ueclxly1ftoh9t49z3j31jw0ie77z.jpg)
19 | 
20 | 目前我自己维护了一个200+个账号的账号池，并通过redis构建分布式，抓取效果如上图，**一分钟可以抓取8000左右的数据，一天数据采集量在1100万**
21 | 
22 | 这个账号池，我也是花钱买的，就不Share了。
23 | 
24 | 如果确实有抓取数据的需要，可以联系我，Email：nghuyong@163.com
25 | 
26 | ## 使用本项目
27 | Python版本:Python3.6
28 | ```bash
29 | git clone https://github.com/SimpleBrightMan/WeiboSpider.git
30 | # 首先获取cookie，并存入数据库中
31 | python cookies.py
32 | # 然后运行爬虫
33 | python run.py
34 | ```
35 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | from scrapy import cmdline
4 | 
5 | cmdline.execute("scrapy crawl SinaSpider".split(" "))


--------------------------------------------------------------------------------
/scrapy.cfg:
--------------------------------------------------------------------------------
 1 | # Automatically created by: scrapy startproject
 2 | #
 3 | # For more information about the [deploy] section see:
 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html
 5 | 
 6 | [settings]
 7 | default = sina.settings
 8 | 
 9 | [deploy]
10 | #url = http://localhost:6800/
11 | project = sina
12 | 


--------------------------------------------------------------------------------
/sina/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gyqlr/weibo_spider/e44b0da039f92b346bdc7ac13d64364cf1bbc401/sina/__init__.py


--------------------------------------------------------------------------------
/sina/config.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | """ 初始的待爬队列 """
4 | weiboID = [
5 |     "5303798085"
6 | ]
7 | 
8 | PROXYPOOL = []


--------------------------------------------------------------------------------
/sina/cookies.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | import datetime
 4 | import json
 5 | import base64
 6 | from time import sleep
 7 | 
 8 | import pymongo
 9 | from selenium import webdriver
10 | from selenium.webdriver import ActionChains
11 | from selenium.webdriver.common.by import By
12 | from selenium.webdriver.support.ui import WebDriverWait
13 | from selenium.webdriver.support import expected_conditions as EC
14 | 
15 | """
16 | 输入你的微博账号和密码，可去淘宝买，一元七个。
17 | 建议买几十个，微博反扒的厉害，太频繁了会出现302转移。
18 | 或者你也可以把时间间隔调大点。
19 | """
20 | WeiBoAccounts = [
21 | {'username': 'liujuan86088@163.com', 'password': '*****'},
22 | ]
23 | 
24 | cookies = []
25 | client = pymongo.MongoClient("localhost", 27017)
26 | db = client["Sina"]
27 | userAccount = db["userAccount"]
28 | 
29 | 
30 | def get_cookie_from_weibo(username, password):
31 |     driver = webdriver.Chrome()
32 |     driver.get('https://weibo.cn')
33 |     assert "微博" in driver.title
34 |     login_link = driver.find_element_by_link_text('登录')
35 |     ActionChains(driver).move_to_element(login_link).click().perform()
36 |     login_name = WebDriverWait(driver, 10).until(
37 |         EC.visibility_of_element_located((By.ID, "loginName"))
38 |     )
39 |     login_password = driver.find_element_by_id("loginPassword")
40 |     login_name.send_keys(username)
41 |     login_password.send_keys(password)
42 |     login_button = driver.find_element_by_id("loginAction")
43 |     login_button.click()
44 |     # 这里停留了10秒观察一下启动的Chrome是否登陆成功了，没有的化手动登陆进去
45 |     sleep(10)
46 |     cookie = driver.get_cookies()
47 |     driver.close()
48 |     return cookie
49 | 
50 | 
51 | def init_cookies():
52 |     for cookie in userAccount.find():
53 |         cookies.append(cookie['cookie'])
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     try:
58 |         userAccount.drop()
59 |     except Exception as e:
60 |         pass
61 |     for account in WeiBoAccounts:
62 |         cookie = get_cookie_from_weibo(account["username"], account["password"])
63 |         userAccount.insert_one({"_id": account["username"], "cookie": cookie})
64 | 


--------------------------------------------------------------------------------
/sina/items.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define here the models for your scraped items
 4 | #
 5 | # See documentation in:
 6 | # http://doc.scrapy.org/en/latest/topics/items.html
 7 | 
 8 | from scrapy import Item, Field
 9 | 
10 | class InformationItem(Item):
11 |     """ 个人信息 """
12 |     _id = Field()  # 用户ID
13 |     NickName = Field()  # 昵称
14 |     Gender = Field()  # 性别
15 |     Province = Field()  # 所在省
16 |     City = Field()  # 所在城市
17 |     BriefIntroduction = Field()  # 简介
18 |     Birthday = Field()  # 生日
19 |     Num_Tweets = Field()  # 微博数
20 |     Num_Follows = Field()  # 关注数
21 |     Num_Fans = Field()  # 粉丝数
22 |     SexOrientation = Field()  # 性取向
23 |     Sentiment = Field()  # 感情状况
24 |     VIPlevel = Field()  # 会员等级
25 |     Authentication = Field()  # 认证
26 |     URL = Field()  # 首页链接
27 | 
28 | 
29 | class TweetsItem(Item):
30 |     """ 微博信息 """
31 |     _id = Field()  # 用户ID-微博ID
32 |     ID = Field()  # 用户ID
33 |     Content = Field()  # 微博内容
34 |     PubTime = Field()  # 发表时间
35 |     Co_oridinates = Field()  # 定位坐标
36 |     Tools = Field()  # 发表工具/平台
37 |     Like = Field()  # 点赞数
38 |     Comment = Field()  # 评论数
39 |     Transfer = Field()  # 转载数
40 | 
41 | 
42 | class RelationshipsItem(Item):
43 |     """ 用户关系，只保留与关注的关系 """
44 |     fan_id = Field()
45 |     followed_id = Field()  # 被关注者的ID
46 | 


--------------------------------------------------------------------------------
/sina/middlewares.py:
--------------------------------------------------------------------------------
 1 | # encoding: utf-8
 2 | import random
 3 | from sina.cookies import cookies, init_cookies
 4 | from sina.user_agents import agents
 5 | 
 6 | 
 7 | class UserAgentMiddleware(object):
 8 |     """ 换User-Agent """
 9 | 
10 |     def process_request(self, request, spider):
11 |         agent = random.choice(agents)
12 |         request.headers["User-Agent"] = agent
13 | 
14 | 
15 | class CookiesMiddleware(object):
16 |     """ 换Cookie """
17 | 
18 |     def __init__(self):
19 |         init_cookies()
20 | 
21 |     def process_request(self, request, spider):
22 |         cookie = random.choice(cookies)
23 |         request.cookies = cookie
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/sina/pipelines.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Define your item pipelines here
 4 | #
 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 7 | import pymongo
 8 | 
 9 | from sina.items import RelationshipsItem, TweetsItem, InformationItem
10 | 
11 | 
12 | class MongoDBPipeline(object):
13 |     def __init__(self):
14 |         clinet = pymongo.MongoClient("localhost", 27017)
15 |         db = clinet["Sina"]
16 |         self.Information = db["Information"]
17 |         self.Tweets = db["Tweets"]
18 |         self.Relationships = db["Relationships"]
19 | 
20 |     def process_item(self, item, spider):
21 |         """ 判断item的类型，并作相应的处理，再入数据库 """
22 |         if isinstance(item, RelationshipsItem):
23 |             try:
24 |                 self.Relationships.insert(dict(item))
25 |             except Exception:
26 |                 pass
27 |         elif isinstance(item, TweetsItem):
28 |             try:
29 |                 self.Tweets.insert(dict(item))
30 |             except Exception:
31 |                 pass
32 |         elif isinstance(item, InformationItem):
33 |             try:
34 |                 self.Information.insert(dict(item))
35 |             except Exception:
36 |                 pass
37 |         return item
38 | 


--------------------------------------------------------------------------------
/sina/settings.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Scrapy settings for sina project
 4 | #
 5 | # For simplicity, this file contains only settings considered important or
 6 | # commonly used. You can find more settings consulting the documentation:
 7 | #
 8 | #     http://doc.scrapy.org/en/latest/topics/settings.html
 9 | #     http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
10 | #     http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
11 | 
12 | BOT_NAME = 'sina'
13 | 
14 | SPIDER_MODULES = ['sina.spiders']
15 | NEWSPIDER_MODULE = 'sina.spiders'
16 | 
17 | # Configure maximum concurrent requests performed by Scrapy (default: 16)
18 | CONCURRENT_REQUESTS = 32
19 | 
20 | # Configure a delay for requests for the same website (default: 0)
21 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay
22 | # See also autothrottle settings and docs
23 | DOWNLOAD_DELAY = 0.5
24 | # The download delay setting will honor only one of:
25 | # CONCURRENT_REQUESTS_PER_DOMAIN = 16
26 | # CONCURRENT_REQUESTS_PER_IP = 16
27 | 
28 | # Disable cookies (enabled by default)
29 | # COOKIES_ENABLED = False
30 | 
31 | # Disable Telnet Console (enabled by default)
32 | # TELNETCONSOLE_ENABLED = False
33 | 
34 | # Override the default request headers:
35 | # DEFAULT_REQUEST_HEADERS = {
36 | #   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
37 | #   'Accept-Language': 'en',
38 | # }
39 | 
40 | # Enable or disable spider middlewares
41 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html
42 | # SPIDER_MIDDLEWARES = {
43 | #    'sina.middlewares.SinaSpiderMiddleware': 543,
44 | # }
45 | 
46 | # Enable or disable downloader middlewares
47 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html
48 | DOWNLOADER_MIDDLEWARES = {
49 |     'sina.middlewares.UserAgentMiddleware': 401,
50 |     'sina.middlewares.CookiesMiddleware': 402,
51 | }
52 | 
53 | # Enable or disable extensions
54 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html
55 | # EXTENSIONS = {
56 | #    'scrapy.extensions.telnet.TelnetConsole': None,
57 | # }
58 | 
59 | # Configure item pipelines
60 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html
61 | ITEM_PIPELINES = {
62 |     'sina.pipelines.MongoDBPipeline': 300,
63 | }
64 | 
65 | # Enable and configure the AutoThrottle extension (disabled by default)
66 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html
67 | # AUTOTHROTTLE_ENABLED = True
68 | # The initial download delay
69 | # AUTOTHROTTLE_START_DELAY = 5
70 | # The maximum download delay to be set in case of high latencies
71 | # AUTOTHROTTLE_MAX_DELAY = 60
72 | # The average number of requests Scrapy should be sending in parallel to
73 | # each remote server
74 | # AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
75 | # Enable showing throttling stats for every response received:
76 | # AUTOTHROTTLE_DEBUG = False
77 | 
78 | # Enable and configure HTTP caching (disabled by default)
79 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
80 | # HTTPCACHE_ENABLED = True
81 | # HTTPCACHE_EXPIRATION_SECS = 0
82 | # HTTPCACHE_DIR = 'httpcache'
83 | # HTTPCACHE_IGNORE_HTTP_CODES = []
84 | # HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
85 | 
86 | # LOG_FILE = "/mnt/mongodb/data/weibo.log"


--------------------------------------------------------------------------------
/sina/spiders/__init__.py:
--------------------------------------------------------------------------------
1 | # This package will contain the spiders of your Scrapy project
2 | #
3 | # Please refer to the documentation for information on how to create and manage
4 | # your spiders.
5 | 


--------------------------------------------------------------------------------
/sina/spiders/weibo_spider.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | 
  4 | import datetime
  5 | import requests
  6 | import re
  7 | from lxml import etree
  8 | from scrapy import Spider
  9 | from scrapy.selector import Selector
 10 | from scrapy.http import Request
 11 | 
 12 | from sina.config import weiboID
 13 | from sina.items import TweetsItem, InformationItem, RelationshipsItem
 14 | 
 15 | 
 16 | class Spider(Spider):
 17 |     name = "SinaSpider"
 18 |     host = "https://weibo.cn"
 19 |     start_urls = list(set(weiboID))
 20 | 
 21 |     def start_requests(self):
 22 |         for uid in self.start_urls:
 23 |             yield Request(url="https://weibo.cn/%s/info" % uid, callback=self.parse_information)
 24 | 
 25 |     def parse_information(self, response):
 26 |         """ 抓取个人信息 """
 27 |         informationItem = InformationItem()
 28 |         selector = Selector(response)
 29 |         ID = re.findall('(\d+)/info', response.url)[0]
 30 |         try:
 31 |             text1 = ";".join(selector.xpath('body/div[@class="c"]//text()').extract())  # 获取标签里的所有text()
 32 |             nickname = re.findall('昵称;?[：:]?(.*?);', text1)
 33 |             gender = re.findall('性别;?[：:]?(.*?);', text1)
 34 |             place = re.findall('地区;?[：:]?(.*?);', text1)
 35 |             briefIntroduction = re.findall('简介;?[：:]?(.*?);', text1)
 36 |             birthday = re.findall('生日;?[：:]?(.*?);', text1)
 37 |             sexOrientation = re.findall('性取向;?[：:]?(.*?);', text1)
 38 |             sentiment = re.findall('感情状况;?[：:]?(.*?);', text1)
 39 |             vipLevel = re.findall('会员等级;?[：:]?(.*?);', text1)
 40 |             authentication = re.findall('认证;?[：:]?(.*?);', text1)
 41 |             url = re.findall('互联网;?[：:]?(.*?);', text1)
 42 | 
 43 |             informationItem["_id"] = ID
 44 |             if nickname and nickname[0]:
 45 |                 informationItem["NickName"] = nickname[0].replace(u"\xa0", "")
 46 |             if gender and gender[0]:
 47 |                 informationItem["Gender"] = gender[0].replace(u"\xa0", "")
 48 |             if place and place[0]:
 49 |                 place = place[0].replace(u"\xa0", "").split(" ")
 50 |                 informationItem["Province"] = place[0]
 51 |                 if len(place) > 1:
 52 |                     informationItem["City"] = place[1]
 53 |             if briefIntroduction and briefIntroduction[0]:
 54 |                 informationItem["BriefIntroduction"] = briefIntroduction[0].replace(u"\xa0", "")
 55 |             if birthday and birthday[0]:
 56 |                 try:
 57 |                     birthday = datetime.datetime.strptime(birthday[0], "%Y-%m-%d")
 58 |                     informationItem["Birthday"] = birthday - datetime.timedelta(hours=8)
 59 |                 except Exception:
 60 |                     informationItem['Birthday'] = birthday[0]  # 有可能是星座，而非时间
 61 |             if sexOrientation and sexOrientation[0]:
 62 |                 if sexOrientation[0].replace(u"\xa0", "") == gender[0]:
 63 |                     informationItem["SexOrientation"] = "同性恋"
 64 |                 else:
 65 |                     informationItem["SexOrientation"] = "异性恋"
 66 |             if sentiment and sentiment[0]:
 67 |                 informationItem["Sentiment"] = sentiment[0].replace(u"\xa0", "")
 68 |             if vipLevel and vipLevel[0]:
 69 |                 informationItem["VIPlevel"] = vipLevel[0].replace(u"\xa0", "")
 70 |             if authentication and authentication[0]:
 71 |                 informationItem["Authentication"] = authentication[0].replace(u"\xa0", "")
 72 |             if url:
 73 |                 informationItem["URL"] = url[0]
 74 | 
 75 |             try:
 76 |                 urlothers = "https://weibo.cn/attgroup/opening?uid=%s" % ID
 77 |                 new_ck = {}
 78 |                 for ck in response.request.cookies:
 79 |                     new_ck[ck['name']] = ck['value']
 80 |                 r = requests.get(urlothers, cookies=new_ck, timeout=5)
 81 |                 if r.status_code == 200:
 82 |                     selector = etree.HTML(r.content)
 83 |                     texts = ";".join(selector.xpath('//body//div[@class="tip2"]/a//text()'))
 84 |                     if texts:
 85 |                         num_tweets = re.findall('微博\[(\d+)\]', texts)
 86 |                         num_follows = re.findall('关注\[(\d+)\]', texts)
 87 |                         num_fans = re.findall('粉丝\[(\d+)\]', texts)
 88 |                         if num_tweets:
 89 |                             informationItem["Num_Tweets"] = int(num_tweets[0])
 90 |                         if num_follows:
 91 |                             informationItem["Num_Follows"] = int(num_follows[0])
 92 |                         if num_fans:
 93 |                             informationItem["Num_Fans"] = int(num_fans[0])
 94 |             except Exception as e:
 95 |                 pass
 96 |         except Exception as e:
 97 |             pass
 98 |         else:
 99 |             yield informationItem
100 |         if informationItem["Num_Tweets"] and informationItem["Num_Tweets"] < 5000:
101 |             yield Request(url="https://weibo.cn/%s/profile?filter=1&page=1" % ID, callback=self.parse_tweets,
102 |                           dont_filter=True)
103 |         if informationItem["Num_Follows"] and informationItem["Num_Follows"] < 500:
104 |             yield Request(url="https://weibo.cn/%s/follow" % ID, callback=self.parse_relationship, dont_filter=True)
105 |         if informationItem["Num_Fans"] and informationItem["Num_Fans"] < 500:
106 |             yield Request(url="https://weibo.cn/%s/fans" % ID, callback=self.parse_relationship, dont_filter=True)
107 | 
108 |     def parse_tweets(self, response):
109 |         """ 抓取微博数据 """
110 |         selector = Selector(response)
111 |         ID = re.findall('(\d+)/profile', response.url)[0]
112 |         divs = selector.xpath('body/div[@class="c" and @id]')
113 |         for div in divs:
114 |             try:
115 |                 tweetsItems = TweetsItem()
116 |                 id = div.xpath('@id').extract_first()  # 微博ID
117 |                 content = div.xpath('div/span[@class="ctt"]//text()').extract()  # 微博内容
118 |                 cooridinates = div.xpath('div/a/@href').extract()  # 定位坐标
119 |                 like = re.findall('赞\[(\d+)\]', div.extract())  # 点赞数
120 |                 transfer = re.findall('转发\[(\d+)\]', div.extract())  # 转载数
121 |                 comment = re.findall('评论\[(\d+)\]', div.extract())  # 评论数
122 |                 others = div.xpath('div/span[@class="ct"]/text()').extract()  # 求时间和使用工具（手机或平台）
123 | 
124 |                 tweetsItems["_id"] = ID + "-" + id
125 |                 tweetsItems["ID"] = ID
126 |                 if content:
127 |                     tweetsItems["Content"] = " ".join(content).strip('[位置]')  # 去掉最后的"[位置]"
128 |                 if cooridinates:
129 |                     cooridinates = re.findall('center=([\d.,]+)', cooridinates[0])
130 |                     if cooridinates:
131 |                         tweetsItems["Co_oridinates"] = cooridinates[0]
132 |                 if like:
133 |                     tweetsItems["Like"] = int(like[0])
134 |                 if transfer:
135 |                     tweetsItems["Transfer"] = int(transfer[0])
136 |                 if comment:
137 |                     tweetsItems["Comment"] = int(comment[0])
138 |                 if others:
139 |                     others = others[0].split('来自')
140 |                     tweetsItems["PubTime"] = others[0].replace(u"\xa0", "")
141 |                     if len(others) == 2:
142 |                         tweetsItems["Tools"] = others[1].replace(u"\xa0", "")
143 |                 yield tweetsItems
144 |             except Exception as e:
145 |                 self.logger.info(e)
146 |                 pass
147 | 
148 |         url_next = selector.xpath('body/div[@class="pa" and @id="pagelist"]/form/div/a[text()="下页"]/@href').extract()
149 |         if url_next:
150 |             yield Request(url=self.host + url_next[0], callback=self.parse_tweets, dont_filter=True)
151 | 
152 |     def parse_relationship(self, response):
153 |         """ 打开url爬取里面的个人ID """
154 |         selector = Selector(response)
155 |         if "/follow" in response.url:
156 |             ID = re.findall('(\d+)/follow', response.url)[0]
157 |             flag = True
158 |         else:
159 |             ID = re.findall('(\d+)/fans', response.url)[0]
160 |             flag = False
161 |         urls = selector.xpath('//a[text()="关注他" or text()="关注她"]/@href').extract()
162 |         uids = re.findall('uid=(\d+)', ";".join(urls), re.S)
163 |         for uid in uids:
164 |             relationshipsItem = RelationshipsItem()
165 |             relationshipsItem["fan_id"] = ID if flag else uid
166 |             relationshipsItem["followed_id"] = uid if flag else ID
167 |             yield relationshipsItem
168 |             yield Request(url="https://weibo.cn/%s/info" % uid, callback=self.parse_information)
169 | 
170 |         next_url = selector.xpath('//a[text()="下页"]/@href').extract()
171 |         if next_url:
172 |             yield Request(url=self.host + next_url[0], callback=self.parse_relationship, dont_filter=True)
173 | 


--------------------------------------------------------------------------------
/sina/user_agents.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # encoding: utf-8
 3 | 
 4 | """ User-Agents """
 5 | agents = [
 6 |     "Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
 7 |     "Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)",
 8 |     "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5",
 9 |     "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.310.0 Safari/532.9",
10 |     "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7",
11 |     "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14",
12 |     "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14",
13 |     "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20",
14 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0 Safari/534.27",
15 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1",
16 |     "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2",
17 |     "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7",
18 |     "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre",
19 |     "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10",
20 |     "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)",
21 |     "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 GTB5",
22 |     "Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0E)",
23 |     "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
24 |     "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
25 |     "Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0",
26 |     "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110622 Firefox/6.0a2",
27 |     "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1",
28 |     "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre",
29 |     "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0 )",
30 |     "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)",
31 |     "Mozilla/5.0 (Windows; U; Windows XP) Gecko MultiZilla/1.6.1.0a",
32 |     "Mozilla/2.02E (Win95; U)",
33 |     "Mozilla/3.01Gold (Win95; I)",
34 |     "Mozilla/4.8 [en] (Windows NT 5.1; U)",
35 |     "Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.4) Gecko Netscape/7.1 (ax)",
36 |     "HTC_Dream Mozilla/5.0 (Linux; U; Android 1.5; en-ca; Build/CUPCAKE) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
37 |     "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.2; U; de-DE) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/234.40.1 Safari/534.6 TouchPad/1.0",
38 |     "Mozilla/5.0 (Linux; U; Android 1.5; en-us; sdk Build/CUPCAKE) AppleWebkit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
39 |     "Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
40 |     "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
41 |     "Mozilla/5.0 (Linux; U; Android 1.5; en-us; htc_bahamas Build/CRB17) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
42 |     "Mozilla/5.0 (Linux; U; Android 2.1-update1; de-de; HTC Desire 1.19.161.5 Build/ERE27) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
43 |     "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
44 |     "Mozilla/5.0 (Linux; U; Android 1.5; de-ch; HTC Hero Build/CUPCAKE) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
45 |     "Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
46 |     "Mozilla/5.0 (Linux; U; Android 2.1; en-us; HTC Legend Build/cupcake) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
47 |     "Mozilla/5.0 (Linux; U; Android 1.5; de-de; HTC Magic Build/PLAT-RC33) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1 FirePHP/0.3",
48 |     "Mozilla/5.0 (Linux; U; Android 1.6; en-us; HTC_TATTOO_A3288 Build/DRC79) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
49 |     "Mozilla/5.0 (Linux; U; Android 1.0; en-us; dream) AppleWebKit/525.10  (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
50 |     "Mozilla/5.0 (Linux; U; Android 1.5; en-us; T-Mobile G1 Build/CRB43) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari 525.20.1",
51 |     "Mozilla/5.0 (Linux; U; Android 1.5; en-gb; T-Mobile_G2_Touch Build/CUPCAKE) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
52 |     "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
53 |     "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Droid Build/FRG22D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
54 |     "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Milestone Build/ SHOLS_U2_01.03.1) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
55 |     "Mozilla/5.0 (Linux; U; Android 2.0.1; de-de; Milestone Build/SHOLS_U2_01.14.0) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
56 |     "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10  (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
57 |     "Mozilla/5.0 (Linux; U; Android 0.5; en-us) AppleWebKit/522  (KHTML, like Gecko) Safari/419.3",
58 |     "Mozilla/5.0 (Linux; U; Android 1.1; en-gb; dream) AppleWebKit/525.10  (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
59 |     "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
60 |     "Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17",
61 |     "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
62 |     "Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
63 |     "Mozilla/5.0 (Linux; U; Android 2.2; en-ca; GT-P1000M Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1",
64 |     "Mozilla/5.0 (Linux; U; Android 3.0.1; fr-fr; A500 Build/HRI66) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13",
65 |     "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10  (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2",
66 |     "Mozilla/5.0 (Linux; U; Android 1.6; es-es; SonyEricssonX10i Build/R1FA016) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
67 |     "Mozilla/5.0 (Linux; U; Android 1.6; en-us; SonyEricssonX10i Build/R1AA056) AppleWebKit/528.5  (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1",
68 | ]


--------------------------------------------------------------------------------