├── README.md ├── scrapy.cfg └── weiboCAR ├── __init__.py ├── __init__.pyc ├── cookies.py ├── cookies.pyc ├── items.py ├── items.pyc ├── middleware.py ├── middleware.pyc ├── pipelines.py ├── pipelines.pyc ├── settings.py ├── settings.pyc ├── spiders ├── __init__.py ├── __init__.pyc ├── spiders.py └── spiders.pyc ├── user_agents.py └── user_agents.pyc /README.md: -------------------------------------------------------------------------------- 1 | # weiboCAR 2 | 3 | 4 | ##安装环境 5 | 6 | 1.python 2.7 7 | 8 | 2.win7(暂时未在OS系统里测试过,不过应该没有什么问题) 9 | 10 | 11 | 12 | ##依赖包 13 | 14 | 1.scrapy(安装:pip install scrapy) 15 | 16 | 2.beautifulSoup(安装:pip install beautifulsoup) 17 | 18 | 19 | 20 | ##使用方法: 21 | 22 | 1.在cookies.py里填上微博账号和密码; 23 | 24 | ```python 25 | myWeiBo = [ 26 | {'no': '用户名1填这里', 'psw': '密码1填这里'}, 27 | {'no': '用户名2填这里', 'psw': '密码2填这里'}, 28 | ] 29 | ``` 30 | 31 | 2.在settings里WEIBO_IDS填上要抓的微博的ID; 32 | 33 | ```python 34 | WEIBO_IDS = ["Ehf8SdHyq"] #微博ID填在这里 35 | ``` 36 | 37 | >什么叫微博ID,比如http://weibo.com/1932835417/Ei8uMnP44?ref=home&rid=0_0_8_2596589918405690045, 在这个微博链接里,微博ID是 Ei8uMnP44 38 | 39 | 3、在文件夹路径下,执行 40 | 41 | ``` 42 | scrapy crawl weiboCAR -a method=[参数] 43 | ``` 44 | 45 | >[参数] 可以为repost(只抓转发),comment(只抓评论),attitude(只抓点赞),all(前面三种都抓)。 46 | 47 | 如: 48 | 49 | ``` 50 | scrapy crawl weiboCAR -a methon=all 51 | ``` 52 | -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = weiboCAR.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = weiboCAR 12 | -------------------------------------------------------------------------------- /weiboCAR/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/__init__.py -------------------------------------------------------------------------------- /weiboCAR/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/__init__.pyc -------------------------------------------------------------------------------- /weiboCAR/cookies.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | import json 3 | import base64 4 | import requests 5 | 6 | 7 | 8 | myWeiBo = [ 9 | {'no': '用户名1填这里', 'psw': '密码1填这里'}, 10 | {'no': '用户名2填这里', 'psw': '密码2填这里'}, 11 | ] 12 | 13 | 14 | def getCookies(weibo): 15 | """ 获取Cookies """ 16 | cookies = [] 17 | loginURL = r'https://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.15)' 18 | for elem in weibo: 19 | account = elem['no'] 20 | password = elem['psw'] 21 | username = base64.b64encode(account.encode('utf-8')).decode('utf-8') 22 | postData = { 23 | "entry": "sso", 24 | "gateway": "1", 25 | "from": "null", 26 | "savestate": "30", 27 | "useticket": "0", 28 | "pagerefer": "", 29 | "vsnf": "1", 30 | "su": username, 31 | "service": "sso", 32 | "sp": password, 33 | "sr": "1440*900", 34 | "encoding": "UTF-8", 35 | "cdult": "3", 36 | "domain": "sina.com.cn", 37 | "prelt": "0", 38 | "returntype": "TEXT", 39 | } 40 | session = requests.Session() 41 | r = session.post(loginURL, data=postData) 42 | jsonStr = r.content.decode('gbk') 43 | info = json.loads(jsonStr) 44 | if info["retcode"] == "0": 45 | print "Get Cookie Success!( Account:%s )" % account 46 | cookie = session.cookies.get_dict() 47 | cookies.append(cookie) 48 | else: 49 | print "Failed!( Reason:%s )" % info['reason'] 50 | return cookies 51 | 52 | 53 | cookies = getCookies(myWeiBo) 54 | print "Get Cookies Finish!( Num:%d)" % len(cookies) 55 | -------------------------------------------------------------------------------- /weiboCAR/cookies.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/cookies.pyc -------------------------------------------------------------------------------- /weiboCAR/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | from scrapy import Item, Field 9 | 10 | 11 | class CommentItem(Item): 12 | """ 微博评论 """ 13 | weiboID = Field() 14 | userId = Field() #评论者ID 15 | userName = Field() #评论者 16 | userUrl = Field() #评论者首页 17 | commentLike = Field() #评论被点赞数 18 | commentText = Field() #评论内容 19 | commentTime = Field() #发布时间 20 | 21 | class AttitudeItem(Item): 22 | """ 微博点赞 """ 23 | weiboID = Field() 24 | userName = Field() #点赞者 25 | userUrl = Field() #评论者首页 26 | attitudeTime = Field() #发布时间 27 | 28 | class RepostItem(Item): 29 | """ 微博转发 """ 30 | weiboID = Field() 31 | userName = Field() #转发者 32 | userUrl = Field() #评论者首页 33 | repostText = Field() #转发内容 34 | repostLike = Field() #点赞数 35 | repostTime = Field() #发布时间 36 | 37 | 38 | -------------------------------------------------------------------------------- /weiboCAR/items.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/items.pyc -------------------------------------------------------------------------------- /weiboCAR/middleware.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | import random 3 | from cookies import cookies 4 | from user_agents import agents 5 | 6 | 7 | class UserAgentMiddleware(object): 8 | """ 换User-Agent """ 9 | 10 | def process_request(self, request, spider): 11 | agent = random.choice(agents) 12 | request.headers["User-Agent"] = agent 13 | 14 | 15 | class CookiesMiddleware(object): 16 | """ 换Cookie """ 17 | 18 | def process_request(self, request, spider): 19 | cookie = random.choice(cookies) 20 | request.cookies = cookie 21 | -------------------------------------------------------------------------------- /weiboCAR/middleware.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/middleware.pyc -------------------------------------------------------------------------------- /weiboCAR/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | from weiboCAR.items import CommentItem, AttitudeItem, RepostItem 8 | 9 | class saveTxtPipeline(object): 10 | 11 | def process_item(self, item, spider): 12 | f = item["weiboID"] 13 | if isinstance(item, CommentItem): 14 | line = "" 15 | for v in item.values(): 16 | line = line + v.encode("utf8") + "\t" 17 | with open(f+"_comment.txt","a") as outfile: 18 | outfile.write(line+"\n") 19 | elif isinstance(item, AttitudeItem): 20 | line = "" 21 | for v in item.values(): 22 | line = line + v.encode("utf8") + "\t" 23 | with open(f+"_attitude.txt","a") as outfile: 24 | outfile.write(line+"\n") 25 | elif isinstance(item, RepostItem): 26 | line = "" 27 | for v in item.values(): 28 | line = line + v.encode("utf8") + "\t" 29 | with open(f+"_repost.txt","a") as outfile: 30 | outfile.write(line+"\n") 31 | else: 32 | pass 33 | return item 34 | 35 | -------------------------------------------------------------------------------- /weiboCAR/pipelines.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/pipelines.pyc -------------------------------------------------------------------------------- /weiboCAR/settings.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Scrapy settings for weiboCAR project 4 | # 5 | # For simplicity, this file contains only settings considered important or 6 | # commonly used. You can find more settings consulting the documentation: 7 | # 8 | # http://doc.scrapy.org/en/latest/topics/settings.html 9 | # http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 10 | # http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 11 | 12 | BOT_NAME = 'weiboCAR' 13 | 14 | WEIBO_IDS = ["Ehf8SdHyq"] #微博ID填在这里 15 | 16 | SPIDER_MODULES = ['weiboCAR.spiders'] 17 | NEWSPIDER_MODULE = 'weiboCAR.spiders' 18 | 19 | 20 | # Crawl responsibly by identifying yourself (and your website) on the user-agent 21 | #USER_AGENT = 'weiboCAR (+http://www.yourdomain.com)' 22 | 23 | # Obey robots.txt rules 24 | #ROBOTSTXT_OBEY = True 25 | 26 | # Configure maximum concurrent requests performed by Scrapy (default: 16) 27 | #CONCURRENT_REQUESTS = 32 28 | 29 | # Configure a delay for requests for the same website (default: 0) 30 | # See http://scrapy.readthedocs.org/en/latest/topics/settings.html#download-delay 31 | # See also autothrottle settings and docs 32 | DOWNLOAD_DELAY = 3 #每次抓取的间隔 33 | # The download delay setting will honor only one of: 34 | #CONCURRENT_REQUESTS_PER_DOMAIN = 16 35 | #CONCURRENT_REQUESTS_PER_IP = 16 36 | 37 | # Disable cookies (enabled by default) 38 | #COOKIES_ENABLED = False 39 | 40 | # Disable Telnet Console (enabled by default) 41 | #TELNETCONSOLE_ENABLED = False 42 | 43 | # Override the default request headers: 44 | #DEFAULT_REQUEST_HEADERS = { 45 | # 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 46 | # 'Accept-Language': 'en', 47 | #} 48 | 49 | # Enable or disable spider middlewares 50 | # See http://scrapy.readthedocs.org/en/latest/topics/spider-middleware.html 51 | #SPIDER_MIDDLEWARES = { 52 | # 'weiboCAR.middlewares.MyCustomSpiderMiddleware': 543, 53 | #} 54 | 55 | # Enable or disable downloader middlewares 56 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html 57 | DOWNLOADER_MIDDLEWARES = { 58 | "weiboCAR.middleware.UserAgentMiddleware": 401, 59 | "weiboCAR.middleware.CookiesMiddleware": 402, 60 | } 61 | 62 | # Enable or disable extensions 63 | # See http://scrapy.readthedocs.org/en/latest/topics/extensions.html 64 | #EXTENSIONS = { 65 | # 'scrapy.extensions.telnet.TelnetConsole': None, 66 | #} 67 | 68 | # Configure item pipelines 69 | # See http://scrapy.readthedocs.org/en/latest/topics/item-pipeline.html 70 | ITEM_PIPELINES = { 71 | 'weiboCAR.pipelines.saveTxtPipeline': 300, 72 | } 73 | 74 | # Enable and configure the AutoThrottle extension (disabled by default) 75 | # See http://doc.scrapy.org/en/latest/topics/autothrottle.html 76 | #AUTOTHROTTLE_ENABLED = True 77 | # The initial download delay 78 | #AUTOTHROTTLE_START_DELAY = 5 79 | # The maximum download delay to be set in case of high latencies 80 | #AUTOTHROTTLE_MAX_DELAY = 60 81 | # The average number of requests Scrapy should be sending in parallel to 82 | # each remote server 83 | #AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0 84 | # Enable showing throttling stats for every response received: 85 | #AUTOTHROTTLE_DEBUG = False 86 | 87 | # Enable and configure HTTP caching (disabled by default) 88 | # See http://scrapy.readthedocs.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings 89 | #HTTPCACHE_ENABLED = True 90 | #HTTPCACHE_EXPIRATION_SECS = 0 91 | #HTTPCACHE_DIR = 'httpcache' 92 | #HTTPCACHE_IGNORE_HTTP_CODES = [] 93 | #HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage' 94 | -------------------------------------------------------------------------------- /weiboCAR/settings.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/settings.pyc -------------------------------------------------------------------------------- /weiboCAR/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /weiboCAR/spiders/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/spiders/__init__.pyc -------------------------------------------------------------------------------- /weiboCAR/spiders/spiders.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | from scrapy.spiders import CrawlSpider 4 | from weiboCAR.items import CommentItem, AttitudeItem, RepostItem 5 | from scrapy.http import Request 6 | from bs4 import BeautifulSoup 7 | from weiboCAR import settings 8 | 9 | class WeiboCAR(CrawlSpider): 10 | host = "http://weibo.cn" 11 | name = "weiboCAR" 12 | allowed_domains = ["weibo.cn"] 13 | start_urls = settings.WEIBO_IDS 14 | weiboIDs = set(start_urls) 15 | 16 | def start_requests(self): 17 | 18 | method = getattr(self, 'method', None) 19 | 20 | for weiboID in self.start_urls: 21 | comment_url = "http://weibo.cn/comment/%s?page=1" %weiboID 22 | attitude_url = "http://weibo.cn/attitude/%s?page=1" %weiboID 23 | repost_url = "http://weibo.cn/repost/%s?page=1" %weiboID 24 | if method is not None: 25 | if method == "attitude": 26 | yield Request(url=attitude_url, callback=self.parseA, meta={"weiboID":weiboID}) 27 | elif method == "comment": 28 | yield Request(url=comment_url, callback=self.parseC, meta={"weiboID":weiboID}) 29 | elif method == "repost": 30 | yield Request(url=repost_url, callback=self.parseR, meta={"weiboID":weiboID}) 31 | else: 32 | yield Request(url=comment_url, callback=self.parseC, meta={"weiboID":weiboID}) 33 | yield Request(url=repost_url, callback=self.parseR, meta={"weiboID":weiboID}) 34 | yield Request(url=attitude_url, callback=self.parseA, meta={"weiboID":weiboID}) 35 | else: 36 | print "请输入参数method,可能的取值为comment(只抓评论),repost(只抓转发),attitude(只抓点赞),all(三种都抓)" 37 | 38 | 39 | 40 | def parseC(self,response): 41 | """ 提取评论信息 """ 42 | html = response.text 43 | soup = BeautifulSoup(html,"html.parser",from_encoding="utf8") 44 | comments = soup.find_all("div",{"class":"c"}) 45 | 46 | for c in comments: 47 | try: 48 | item = CommentItem() 49 | item["weiboID"] = response.meta["weiboID"] 50 | item["userId"] = str(c.get("id")) 51 | item["userName"] = c.find("a").text 52 | item["userUrl"] = c.find("a").get("href") 53 | item["commentLike"] = c.find("span",{"class":"cc"}).find("a").text 54 | item["commentText"] = c.find("span",{"class":"ctt"}).text 55 | item["commentTime"] = c.find("span",{"class":"ct"}).text.strip() 56 | yield item 57 | except: 58 | pass 59 | 60 | next_url = None 61 | try: 62 | next_url = soup.find("div",{"id":"pagelist"}).find("form").find("a",text=r'下页').get("href") 63 | except: 64 | pass 65 | 66 | if next_url: 67 | yield Request(url=self.host+next_url, callback=self.parseC,meta={"weiboID":response.meta["weiboID"]}) 68 | else: 69 | pass 70 | 71 | def parseA(self,response): 72 | """ 提取点赞信息 """ 73 | html = response.text 74 | soup = BeautifulSoup(html,"html.parser",from_encoding="utf8") 75 | comments = soup.find_all("div",{"class":"c"}) 76 | 77 | for c in comments: 78 | try: 79 | item = AttitudeItem() 80 | item["weiboID"] = response.meta["weiboID"] 81 | item["userName"] = c.find("a").text 82 | item["userUrl"] = c.find("a").get("href") 83 | item["attitudeTime"] = c.find("span",{"class":"ct"}).text.strip() 84 | yield item 85 | except: 86 | pass 87 | 88 | next_url = None 89 | try: 90 | next_url = soup.find("div",{"id":"pagelist"}).find("form").find("a",text=r'下页').get("href") 91 | except: 92 | pass 93 | 94 | if next_url: 95 | yield Request(url=self.host+next_url, callback=self.parseA,meta={"weiboID":response.meta["weiboID"]}) 96 | else: 97 | pass 98 | 99 | def parseR(self,response): 100 | """ 提取转发信息 """ 101 | html = response.text 102 | soup = BeautifulSoup(html,"html.parser",from_encoding="utf8") 103 | comments = soup.find_all("div",{"class":"c"}) 104 | print len(comments) 105 | for c in comments: 106 | try: 107 | item = RepostItem() 108 | item["weiboID"] = response.meta["weiboID"] 109 | item["userName"] = c.find("a").text 110 | item["userUrl"] = c.find("a").get("href") 111 | texts = c.find_all(text=True) 112 | texts = [t.strip() for t in texts if t.strip() != ""] 113 | item["repostText"] = "".join(texts[1:-2]) 114 | item["repostTime"] = c.find("span",{"class":"ct"}).text.strip() 115 | item["repostLike"] = c.find("span",{"class":"cc"}).find("a").text 116 | yield item 117 | except: 118 | pass 119 | 120 | next_url = None 121 | try: 122 | next_url = soup.find("div",{"id":"pagelist"}).find("form").find("a",text=r'下页').get("href") 123 | except: 124 | pass 125 | 126 | if next_url: 127 | yield Request(url=self.host+next_url, callback=self.parseR,meta={"weiboID":response.meta["weiboID"]}) 128 | else: 129 | pass 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | -------------------------------------------------------------------------------- /weiboCAR/spiders/spiders.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/spiders/spiders.pyc -------------------------------------------------------------------------------- /weiboCAR/user_agents.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | agents = [ 4 | "Mozilla/5.0 (Linux; U; Android 2.3.6; en-us; Nexus S Build/GRK39F) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 5 | "Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)", 6 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.0 Safari/532.5", 7 | "Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko) Chrome/5.0.310.0 Safari/532.9", 8 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko) Chrome/7.0.514.0 Safari/534.7", 9 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/9.0.601.0 Safari/534.14", 10 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like Gecko) Chrome/10.0.601.0 Safari/534.14", 11 | "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like Gecko) Chrome/11.0.672.2 Safari/534.20", 12 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0 Safari/534.27", 13 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24 Safari/535.1", 14 | "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120 Safari/535.2", 15 | "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36 Safari/535.7", 16 | "Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421 Minefield/3.0.2pre", 17 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10", 18 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11 (.NET CLR 3.5.30729)", 19 | "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 GTB5", 20 | "Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8 ( .NET CLR 3.5.30729; .NET4.0E)", 21 | "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", 22 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", 23 | "Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0", 24 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110622 Firefox/6.0a2", 25 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1", 26 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre", 27 | "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0 )", 28 | "Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)", 29 | "Mozilla/5.0 (Windows; U; Windows XP) Gecko MultiZilla/1.6.1.0a", 30 | "Mozilla/2.02E (Win95; U)", 31 | "Mozilla/3.01Gold (Win95; I)", 32 | "Mozilla/4.8 [en] (Windows NT 5.1; U)", 33 | "Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.4) Gecko Netscape/7.1 (ax)", 34 | "HTC_Dream Mozilla/5.0 (Linux; U; Android 1.5; en-ca; Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 35 | "Mozilla/5.0 (hp-tablet; Linux; hpwOS/3.0.2; U; de-DE) AppleWebKit/534.6 (KHTML, like Gecko) wOSBrowser/234.40.1 Safari/534.6 TouchPad/1.0", 36 | "Mozilla/5.0 (Linux; U; Android 1.5; en-us; sdk Build/CUPCAKE) AppleWebkit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 37 | "Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 38 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 39 | "Mozilla/5.0 (Linux; U; Android 1.5; en-us; htc_bahamas Build/CRB17) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 40 | "Mozilla/5.0 (Linux; U; Android 2.1-update1; de-de; HTC Desire 1.19.161.5 Build/ERE27) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 41 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 42 | "Mozilla/5.0 (Linux; U; Android 1.5; de-ch; HTC Hero Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 43 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 44 | "Mozilla/5.0 (Linux; U; Android 2.1; en-us; HTC Legend Build/cupcake) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 45 | "Mozilla/5.0 (Linux; U; Android 1.5; de-de; HTC Magic Build/PLAT-RC33) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1 FirePHP/0.3", 46 | "Mozilla/5.0 (Linux; U; Android 1.6; en-us; HTC_TATTOO_A3288 Build/DRC79) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 47 | "Mozilla/5.0 (Linux; U; Android 1.0; en-us; dream) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 48 | "Mozilla/5.0 (Linux; U; Android 1.5; en-us; T-Mobile G1 Build/CRB43) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari 525.20.1", 49 | "Mozilla/5.0 (Linux; U; Android 1.5; en-gb; T-Mobile_G2_Touch Build/CUPCAKE) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 50 | "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 51 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Droid Build/FRG22D) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 52 | "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Milestone Build/ SHOLS_U2_01.03.1) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 53 | "Mozilla/5.0 (Linux; U; Android 2.0.1; de-de; Milestone Build/SHOLS_U2_01.14.0) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 54 | "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 55 | "Mozilla/5.0 (Linux; U; Android 0.5; en-us) AppleWebKit/522 (KHTML, like Gecko) Safari/419.3", 56 | "Mozilla/5.0 (Linux; U; Android 1.1; en-gb; dream) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 57 | "Mozilla/5.0 (Linux; U; Android 2.0; en-us; Droid Build/ESD20) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 58 | "Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17", 59 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Sprint APA9292KT Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 60 | "Mozilla/5.0 (Linux; U; Android 2.2; en-us; ADR6300 Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 61 | "Mozilla/5.0 (Linux; U; Android 2.2; en-ca; GT-P1000M Build/FROYO) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1", 62 | "Mozilla/5.0 (Linux; U; Android 3.0.1; fr-fr; A500 Build/HRI66) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13", 63 | "Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/525.10 (KHTML, like Gecko) Version/3.0.4 Mobile Safari/523.12.2", 64 | "Mozilla/5.0 (Linux; U; Android 1.6; es-es; SonyEricssonX10i Build/R1FA016) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 65 | "Mozilla/5.0 (Linux; U; Android 1.6; en-us; SonyEricssonX10i Build/R1AA056) AppleWebKit/528.5 (KHTML, like Gecko) Version/3.1.2 Mobile Safari/525.20.1", 66 | ] 67 | -------------------------------------------------------------------------------- /weiboCAR/user_agents.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/terry2tan/weiboCAR/8ec5436bc767e9202e43bfc621b8b48003c1e4b8/weiboCAR/user_agents.pyc --------------------------------------------------------------------------------