├── 17monipdb.dat ├── DB.py ├── README.md ├── __init__.py ├── api.py ├── config.py ├── crawler.py ├── data └── proxy.db ├── ipip.py ├── logger.py ├── proxypool.py ├── proxysites.py ├── test.py ├── ua.json ├── ua.py └── validator.py /17monipdb.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Greyh4t/ProxyPool/09cce076e25b12840058b4d847805e20678a5691/17monipdb.dat -------------------------------------------------------------------------------- /DB.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import sqlite3 4 | 5 | 6 | class DatabaseObject(object): 7 | def __init__(self, db_file): 8 | self.queries = { 9 | 'SELECT': 'SELECT %s FROM %s', 10 | 'INSERT': 'INSERT INTO %s (%s) VALUES(%s)', 11 | 'UPDATE': 'UPDATE %s SET %s WHERE %s', 12 | 'DELETE': 'DELETE FROM %s where %s', 13 | 'DELETE_ALL': 'DELETE FROM %s', 14 | 'CREATE_TABLE': 'CREATE TABLE IF NOT EXISTS %s(%s)', 15 | } 16 | self.db = sqlite3.connect(db_file, check_same_thread=False) 17 | self.db_file = db_file 18 | self.cursor = self.db.cursor() 19 | self.create_table('proxy') 20 | 21 | def create_table(self, table_name): 22 | values = ''' 23 | ip varchar(20) NOT NULL, 24 | port varchar(11) NOT NULL, 25 | protocol varchar(10) NOT NULL DEFAULT http, 26 | type int(1) NOT NULL DEFAULT 0, 27 | area varchar(255) NOT NULL, 28 | speed int(11) NOT NULL DEFAULT 0, 29 | updatetime TimeStamp NOT NULL DEFAULT (datetime(\'now\',\'localtime\')), 30 | lastusedtime TimeStamp NOT NULL DEFAULT '0000-00-00 00:00:00', 31 | score int(11) DEFAULT 1, 32 | PRIMARY KEY (ip,port) 33 | ''' 34 | query = self.queries['CREATE_TABLE'] % (table_name, values) 35 | self.cursor.execute(query) 36 | query = ''' 37 | CREATE INDEX IF NOT EXISTS proxy_index on proxy (protocol, type, area, speed, updatetime, lastusedtime, score); 38 | CREATE TRIGGER IF NOT EXISTS proxy_update_trig AFTER UPDATE OF speed ON proxy 39 | BEGIN 40 | UPDATE proxy SET updatetime=datetime(\'now\',\'localtime\'),score=(score+1) WHERE ip=NEW.ip AND port=NEW.port; 41 | END; 42 | CREATE TRIGGER IF NOT EXISTS proxy_insert_trig AFTER INSERT ON proxy 43 | BEGIN 44 | UPDATE proxy SET updatetime=datetime(\'now\',\'localtime\') WHERE ip=NEW.ip and port=NEW.port; 45 | END; 46 | ''' 47 | self.cursor.executescript(query) 48 | 49 | def free(self): 50 | self.cursor.close() 51 | 52 | def select(self, table_name, condition): 53 | vals = [] 54 | query = self.queries['SELECT'] % (','.join(condition['field']), table_name) 55 | if condition['where']: 56 | query += ' WHERE ' + ' and '.join(['%s %s ?' % n[:2] for n in condition['where']]) 57 | vals.extend([n[-1].decode('utf-8') for n in condition['where']]) 58 | if condition['order']: 59 | query += ' ORDER BY ' + ','.join(condition['order']) 60 | if condition['limit']: 61 | query += ' LIMIT ?' 62 | vals.append(condition['limit']) 63 | data = self.cursor.execute(query, vals).fetchall() 64 | return data 65 | 66 | def insert(self, table_name, args): 67 | result = [] 68 | for arg in args: 69 | cols = ','.join([k for k in arg]) 70 | values = ','.join(['?' for l in arg]) 71 | vals = tuple([arg[k] for k in arg]) 72 | query = self.queries['INSERT'] % (table_name, cols, values) 73 | try: 74 | self.cursor.execute(query, vals) 75 | except: 76 | result.append(arg) 77 | self.db.commit() 78 | return result 79 | 80 | def update(self, table_name, args): 81 | result = [] 82 | for arg in args: 83 | updates = ','.join(['%s=?' % k for k in arg]) 84 | conds = ' and '.join(['%s=?' % k for k in arg if k == 'ip' or k == 'port']) 85 | vals = [arg[k] for k in arg] 86 | subs = [arg[k] for k in arg if k == 'ip' or k == 'port'] 87 | query = self.queries['UPDATE'] % (table_name, updates, conds) 88 | try: 89 | self.cursor.execute(query, vals + subs) 90 | except Exception, e: 91 | print e 92 | result.append(arg) 93 | self.db.commit() 94 | return result 95 | 96 | def executesql(self, query): 97 | result = self.cursor.execute(query).fetchall() 98 | self.db.commit() 99 | return result 100 | 101 | def disconnect(self): 102 | self.db.close() 103 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ProxyPool 2 | Crawl and validate proxies from Internet 3 | 4 | # Features 5 | fake-useragent from https://github.com/hellysmile/fake-useragent 6 | 7 | # Requirement 8 | ``` 9 | requests 10 | gevent 11 | lxml 12 | beautifulsoup4 13 | ``` 14 | # How to use 15 | just run in terminal 16 | ``` 17 | python proxypool.py 18 | ``` 19 | then 20 | ``` 21 | http://localhost:8000 22 | ``` 23 | or 24 | ``` 25 | http://localhost:8000/?num=1&port=80&type=3&protocol=http&minscore=0&area=北京 26 | ``` 27 | # Other 28 | ``` 29 | the parameter "type" means anonymous level 30 | 0: unknown 31 | 1: transparent 32 | 2: anonymous 33 | 3: high anonymous 34 | ``` 35 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Greyh4t/ProxyPool/09cce076e25b12840058b4d847805e20678a5691/__init__.py -------------------------------------------------------------------------------- /api.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler 4 | from urlparse import urlparse, parse_qs 5 | from DB import DatabaseObject 6 | from config import DB_CONFIG, API_CONFIG 7 | from logger import logger 8 | import datetime 9 | import json 10 | 11 | 12 | class ProxyServer: 13 | def __init__(self, port): 14 | self.port = int(port) 15 | self.run() 16 | 17 | class ProxyPoolHandler(BaseHTTPRequestHandler): 18 | def __init__(self, request, client_address, server): 19 | try: 20 | self.sqlite = DatabaseObject(DB_CONFIG['SQLITE']) 21 | self.table_name = 'proxy' 22 | except Exception, e: 23 | self.sqlite = '' 24 | logger.error('SQLite error: %s', e) 25 | BaseHTTPRequestHandler.__init__(self, request, client_address, server) 26 | 27 | def do_GET(self): 28 | # num=1&port=80&type=3&protocol=http&area=北京 29 | if '/favicon.ico' in self.path: 30 | return 31 | params = parse_qs(urlparse(self.path).query) 32 | data = self.get_proxy(params) 33 | self.protocal_version = 'HTTP/1.1' 34 | self.send_response(200) 35 | self.send_header('Content-type', 'application/json') 36 | self.end_headers() 37 | self.wfile.write(data) 38 | 39 | def get_proxy(self, params): 40 | where_dict = {'port': 'port', 'type': 'type', 'protocol': 'protocol', 'area': 'area'} 41 | conds = { 42 | 'field': ['ip', 'port'], 43 | 'order': ['updatetime desc', 'lastusedtime', 'score desc', 'speed'], 44 | 'limit': 1, 45 | 'where': [], 46 | } 47 | if params: 48 | for (k, v) in params.items(): 49 | try: 50 | k = k.lower() 51 | if k == 'num': 52 | conds['limit'] = v[0] 53 | elif k == 'area': 54 | conds['where'].append((where_dict[k], 'like', '%%%s%%' % v[0])) 55 | elif k == 'minscore': 56 | conds['where'].append(('score', '>=', v[0])) 57 | else: 58 | conds['where'].append((where_dict[k], '=', v[0])) 59 | except: 60 | continue 61 | data = self.sqlite.select(self.table_name, conds) 62 | tmp = [{'ip': n[0], 'port': n[1], 'lastusedtime': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} for 63 | n in data] 64 | self.sqlite.update(self.table_name, tmp) 65 | data = ['%s:%s' % n for n in data] 66 | return json.dumps(data) 67 | 68 | def run(self): 69 | http_server = HTTPServer(('localhost', self.port), self.ProxyPoolHandler) 70 | logger.info('listened on localhost:%s' % API_CONFIG['PORT']) 71 | http_server.serve_forever() 72 | 73 | 74 | if __name__ == '__main__': 75 | ProxyServer(API_CONFIG['PORT']) 76 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | HEADER = { 4 | 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2693.2 Safari/537.36', 5 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 6 | 'Accept-Language': 'en-US,en;q=0.5', 7 | 'Connection': 'keep-alive', 8 | 'Accept-Encoding': 'gzip, deflate', 9 | } 10 | 11 | PROXYPOOL_CONFIG = { 12 | 'MIN_IP_NUM': 5000, # 代理池中最小可用ip数量,若检测到小于此数量,启动爬虫 13 | 'DELETE_TIME': 24 * 60, # minutes, 删除更新时间在该时间之前的ip 14 | 'DELETE_SCORE': -5, # 删除分数小于该分值的ip 15 | 'UPDATE_TIME': 10, # minutes, 重新验证该时间之前的ip 16 | 'CRAWL_TIME': 30, # minutes, 计算可用ip数量时, 取距当前多少分钟内验证过的ip 17 | 'SLEEP_TIME': 1800 # second, 两次爬取间隔 18 | } 19 | 20 | API_CONFIG = { 21 | 'PORT': 8000 22 | } 23 | 24 | CRAWLER_CONFIG = { 25 | 'THREAD_NUM': 20, 26 | 'TIMEOUT': 5, 27 | 'RETRY_TIMES': 5 28 | } 29 | 30 | VALIDATE_CONFIG = { 31 | 'THREAD_NUM': 1000, 32 | 'TIMEOUT': 20, 33 | 'PROXY_TYPE': [0, 1, 2, 3], 34 | 'HTTP_TARGET': 'http://proxy.mimvp.com/check.php', 35 | 'HTTPS_TARGET': 'https://proxy.mimvp.com/check.php' 36 | # 'TARGET1': 'http://7xr8ng.com1.z0.glb.clouddn.com/a.txt', 37 | # 'TARGET2': 'http://www.stilllistener.com/checkpoint1/test11/' 38 | } 39 | 40 | DB_CONFIG = { 41 | 'SQLITE': './data/proxy.db' 42 | } 43 | 44 | LOG_CONFIG = { 45 | 'LOG_TO_FILE': False, 46 | 'LOG_TO_PRINT': True, 47 | 'FILE_PATH': './proxyPool.log' 48 | } 49 | -------------------------------------------------------------------------------- /crawler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | from gevent import monkey 4 | monkey.patch_all() 5 | from gevent.pool import Pool 6 | import re 7 | import json 8 | import random 9 | import requests 10 | from ua import RandomHeader 11 | from logger import logger 12 | from config import CRAWLER_CONFIG, API_CONFIG 13 | from proxysites import get_proxy_sites 14 | 15 | CRAWLER_POOL = Pool(CRAWLER_CONFIG['THREAD_NUM']) 16 | rh = RandomHeader() 17 | 18 | 19 | class Crawler(object): 20 | def run(self): 21 | proxy_sites = self._get_proxy_sites() 22 | random.shuffle(proxy_sites) 23 | proxies_list = CRAWLER_POOL.map(self.crawl, proxy_sites) 24 | proxies = tuple(set([item for sublist in proxies_list for item in sublist])) 25 | logger.info('Get %s proxies' % len(proxies)) 26 | return proxies 27 | 28 | def crawl(self, site): 29 | site_url = site[0] 30 | pattern = site[1] 31 | proxies = [] 32 | r = None 33 | try: 34 | r = requests.get(site_url, headers=rh.Header(site_url), timeout=CRAWLER_CONFIG['TIMEOUT']) 35 | except: 36 | pass 37 | COUNT = 0 38 | while CRAWLER_CONFIG['RETRY_TIMES'] > COUNT: 39 | if not r or (not r.ok) or len(r.content) < 500: 40 | try: 41 | port = API_CONFIG['PORT'] 42 | pr = requests.get('http://localhost:%s/?type=3' % port) 43 | proxy = json.loads(pr.content) 44 | if proxy: 45 | proxy = proxy[0] 46 | r = requests.get(site_url, headers=rh.Header(site_url), timeout=CRAWLER_CONFIG['TIMEOUT'], 47 | proxies={'http': 'http://%s' % proxy}) 48 | else: 49 | break 50 | except Exception, e: 51 | pass 52 | COUNT += 1 53 | else: 54 | break 55 | if not r or not r.ok: 56 | return [] 57 | for match in pattern.finditer(r.content): 58 | ip = match.groupdict()['ip'] 59 | port = match.groupdict()['port'] 60 | proxies.append('%s:%s' % (ip, port)) 61 | logger.info('%s crawl ip: %s', site_url, len(proxies)) 62 | return proxies 63 | 64 | def _get_proxy_sites(self): 65 | proxy_sites = [] 66 | tmp = get_proxy_sites() 67 | for site in tmp: 68 | url = site['url'] 69 | pattern = re.compile(site['pattern']) 70 | if site['range']: 71 | for i in site['range']: 72 | proxy_sites.append((url % i, pattern)) 73 | else: 74 | proxy_sites.append((url, pattern)) 75 | return proxy_sites 76 | 77 | 78 | if __name__ == '__main__': 79 | Crawler().run() 80 | -------------------------------------------------------------------------------- /data/proxy.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Greyh4t/ProxyPool/09cce076e25b12840058b4d847805e20678a5691/data/proxy.db -------------------------------------------------------------------------------- /ipip.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | # author: frk 4 | 5 | import struct 6 | from socket import inet_aton 7 | import os 8 | 9 | 10 | class IPL: 11 | def __init__(self, file): 12 | self.offset = 0 13 | self.index = 0 14 | self.binary = "" 15 | self._unpack_V = lambda b: struct.unpack("L", b) 17 | self._unpack_C = lambda b: struct.unpack("B", b) 18 | self.load(file) 19 | 20 | def load(self, file): 21 | try: 22 | path = os.path.abspath(file) 23 | with open(path, "rb") as f: 24 | self.binary = f.read() 25 | self.offset, = self._unpack_N(self.binary[:4]) 26 | self.index = self.binary[4:self.offset] 27 | except Exception as ex: 28 | print "cannot open file %s" % file 29 | print ex.message 30 | exit(0) 31 | 32 | def find(self, ip): 33 | index = self.index 34 | offset = self.offset 35 | binary = self.binary 36 | nip = inet_aton(ip) 37 | ipdot = ip.split('.') 38 | if int(ipdot[0]) < 0 or int(ipdot[0]) > 255 or len(ipdot) != 4: 39 | return "N/A" 40 | 41 | tmp_offset = int(ipdot[0]) * 4 42 | start, = self._unpack_V(index[tmp_offset:tmp_offset + 4]) 43 | 44 | index_offset = index_length = 0 45 | max_comp_len = offset - 1028 46 | start = start * 8 + 1024 47 | while start < max_comp_len: 48 | if index[start:start + 4] >= nip: 49 | index_offset, = self._unpack_V(index[start + 4:start + 7] + chr(0).encode('utf-8')) 50 | index_length, = self._unpack_C(index[start + 7]) 51 | break 52 | start += 8 53 | 54 | if index_offset == 0: 55 | return "N/A" 56 | 57 | res_offset = offset + index_offset - 1024 58 | return binary[res_offset:res_offset + index_length].decode('utf-8') 59 | 60 | if __name__ == '__main__': 61 | IPL = IPL('17monipdb.dat') 62 | ip = '59.64.234.174' 63 | try: 64 | area = IPL.find(ip).rstrip().replace('\t', '.') 65 | except: 66 | area = 'None.None.None' 67 | print area 68 | -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import logging 4 | from config import LOG_CONFIG 5 | 6 | 7 | def get_logger(): 8 | logger = logging.getLogger('proxyPool') 9 | logger.setLevel(logging.DEBUG) 10 | if not logger.handlers: 11 | # logging format 12 | fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 13 | # filehandler 14 | if LOG_CONFIG['LOG_TO_FILE']: 15 | fh = logging.FileHandler(LOG_CONFIG['PATH']) 16 | fh.setFormatter(fmt) 17 | fh.setLevel(logging.DEBUG) 18 | logger.addHandler(fh) 19 | # streamhandler 20 | if LOG_CONFIG['LOG_TO_PRINT']: 21 | ch = logging.StreamHandler() 22 | ch.setFormatter(fmt) 23 | ch.setLevel(logging.INFO) 24 | logger.addHandler(ch) 25 | return logger 26 | 27 | 28 | logger = get_logger() 29 | -------------------------------------------------------------------------------- /proxypool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | from gevent import monkey 4 | monkey.patch_all() 5 | import datetime 6 | import time 7 | import threading 8 | from logger import logger 9 | from DB import DatabaseObject 10 | from config import DB_CONFIG, PROXYPOOL_CONFIG, API_CONFIG 11 | from crawler import Crawler 12 | from validator import Validator 13 | from api import ProxyServer 14 | 15 | 16 | class ProxyPool: 17 | def __init__(self): 18 | self.sqlite = DatabaseObject(DB_CONFIG['SQLITE']) 19 | self.Validator = Validator() 20 | self.Crawler = Crawler() 21 | 22 | def _monitor(self): 23 | while True: 24 | self._update(PROXYPOOL_CONFIG['UPDATE_TIME']) 25 | self._delete(PROXYPOOL_CONFIG['DELETE_TIME'], PROXYPOOL_CONFIG['DELETE_SCORE']) 26 | self._crawl(PROXYPOOL_CONFIG['CRAWL_TIME']) 27 | time.sleep(PROXYPOOL_CONFIG['SLEEP_TIME']) 28 | 29 | def _crawl(self, minutes): 30 | query = "SELECT COUNT(*) FROM proxy WHERE updatetime>'%s'" % \ 31 | ((datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S')) 32 | count = self.sqlite.executesql(query)[0] 33 | if int(count[0]) < PROXYPOOL_CONFIG['MIN_IP_NUM']: 34 | logger.info('Crawl proxy begin') 35 | proxies = self.Crawler.run() 36 | logger.info('Crawl proxy end') 37 | logger.info('Validate proxy begin') 38 | proxies = [(n, '') for n in proxies] 39 | avaliable_proxies = self.Validator.run(proxies) 40 | logger.info('Validate proxy end') 41 | if DB_CONFIG['SQLITE']: 42 | self.save2sqlite(avaliable_proxies) 43 | time.sleep(600) 44 | self._crawl(minutes) 45 | else: 46 | return 47 | 48 | def _delete(self, minutes, score): 49 | query = "DELETE FROM proxy WHERE updatetime<'%s' or score<%s" % ( 50 | (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S'), score) 51 | self.sqlite.executesql(query) 52 | 53 | def _update(self, minutes): 54 | query = "SELECT ip,port,protocol FROM proxy WHERE updatetime<'%s'" % ( 55 | (datetime.datetime.now() - datetime.timedelta(minutes=minutes)).strftime('%Y-%m-%d %H:%M:%S')) 56 | proxies = [('%s:%s' % n[:2], n[2]) for n in self.sqlite.executesql(query)] 57 | if proxies: 58 | avaliable_proxies = self.Validator.run(proxies) 59 | validated_fail_proxies = list(set([tuple(n[0].split(':')) for n in proxies]).difference(set([(n['ip'], n['port']) for n in avaliable_proxies]))) 60 | self.save2sqlite(avaliable_proxies) 61 | self._minus_score(validated_fail_proxies) 62 | 63 | def save2sqlite(self, result): 64 | failed = self.sqlite.insert('proxy', result) 65 | if failed: 66 | failed = self.sqlite.update('proxy', failed) 67 | if failed: 68 | logger.info('Some ip failed to save: %s' % (str(failed))) 69 | 70 | def _minus_score(self, result): 71 | query = "UPDATE proxy SET score=(score-1) WHERE ip=? AND port=?;" 72 | self.sqlite.cursor.executemany(query, result) 73 | self.sqlite.db.commit() 74 | 75 | def _api(self): 76 | ProxyServer(API_CONFIG['PORT']) 77 | 78 | def run(self): 79 | t1 = threading.Thread(target=self._api) 80 | t2 = threading.Thread(target=self._monitor) 81 | t1.start() 82 | t2.start() 83 | 84 | 85 | if __name__ == '__main__': 86 | ProxyPool().run() 87 | -------------------------------------------------------------------------------- /proxysites.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | 4 | 5 | def get_proxy_sites(): 6 | proxysites = [ 7 | { 8 | 'url': 'http://www.66ip.cn/%s.html', 9 | 'range': ['index'] + range(2, 11), 10 | 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 11 | }, 12 | { 13 | 'url': 'http://www.66ip.cn/%s', 14 | 'range': ['mo.php?tqsl=1000'] 15 | + 16 | ['nmtq.php?getnum=1000&anonymoustype=%s&proxytype=2&api=66ip' % n for n in range(1, 5)], 17 | 'pattern': '(?P(?:\d{1,3}\.){1,3}\d{1,3}):(?P\d{1,4})' 18 | }, 19 | { 20 | 'url': 'http://www.66ip.cn/mo.php?sxb=&tqsl=3000&port=&export=&ktip=&sxa=&textarea=', 21 | 'range': [], 22 | 'pattern': '(?P(?:\d{1,3}\.){1,3}\d{1,3}):(?P\d{1,4})' 23 | }, 24 | # { 25 | # 'url': 'http://www.kuaidaili.com/proxylist/%s', 26 | # 'range': range(1, 11), 27 | # 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 28 | # }, 29 | # { 30 | # 'url': 'http://www.kuaidaili.com/free/%s', 31 | # 'range': ['%s/%s' % (m, n) for m in ['inha', 'intr', 'outha', 'outtr'] for n in range(1, 11)], 32 | # 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 33 | # }, 34 | { 35 | 'url': 'http://www.xicidaili.com/%s', 36 | 'range': ['%s/%s' % (m, n) for m in ['nn', 'nt', 'wn', 'wt'] for n in range(1, 6)], 37 | 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 38 | }, 39 | { 40 | 'url': 'http://www.cz88.net/proxy/%s', 41 | 'range': ['index.shtml'] + ['http_%s.shtml' % n for n in range(2, 11)], 42 | 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 43 | }, 44 | { 45 | 'url': 'http://www.ip181.com/daili/%s.html', 46 | 'range': range(1, 11), 47 | 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 48 | 49 | }, 50 | { 51 | 'url': 'http://proxy.ipcn.org/proxylist%s.html', 52 | 'range': ['', '2'], 53 | 'pattern': '(?P(?:\d{1,3}\.){1,3}\d{1,3}):(?P\d{1,4})' 54 | }, 55 | { 56 | 'url': 'http://ip84.com/%s', 57 | 'range': ['dl'] + ['dl/%s' % n for n in range(1, 15)] 58 | + 59 | ['gw'] + ['gw/%s' % n for n in range(1, 40)], 60 | 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 61 | }, 62 | { 63 | 'url': 'http://www.mimiip.com/%s', 64 | 'range': ['%s/%s' % (m, n) for m in ['gngao', 'gnpu', 'gntou', 'hw'] for n in range(1, 5)], 65 | 'pattern': '(?P(?:\d{1,3}\.){3}\d{1,3})\n?\s*\s*(?P\d{1,4})' 66 | }, 67 | ] 68 | proxysites.extend(get_proxy_sites2()) 69 | return proxysites 70 | 71 | 72 | def get_proxy_sites2(): 73 | import requests 74 | from bs4 import BeautifulSoup as BS 75 | from config import HEADER 76 | sites = [] 77 | 78 | url = 'http://blog.kuaidaili.com/' 79 | pattern = '(?P(?:\d{1,3}\.){1,3}\d{1,3}):(?P\d{1,4})' 80 | r = requests.get(url, headers=HEADER) 81 | if r.ok: 82 | soup = BS(r.content, 'lxml') 83 | for s in soup.find_all('article')[:2]: 84 | sites.append({ 85 | 'url': s.find('a')['href'], 86 | 'range': [], 87 | 'pattern': pattern 88 | }) 89 | return sites 90 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | import requests 4 | import json 5 | from config import API_CONFIG, HEADER 6 | 7 | 8 | url = 'http://www.guokr.com' 9 | 10 | 11 | def run(): 12 | while True: 13 | port = API_CONFIG['PORT'] 14 | r = requests.get('http://localhost:%s/' % port) 15 | proxy = json.loads(r.content) 16 | if proxy: 17 | proxy = proxy[0] 18 | else: 19 | break 20 | try: 21 | r = requests.get(url, headers=HEADER, proxies={'http': 'http://%s' % proxy}) 22 | print r.ok, len(r.content), proxy 23 | except Exception, e: 24 | print proxy, e 25 | 26 | if __name__ == '__main__': 27 | run() 28 | -------------------------------------------------------------------------------- /ua.json: -------------------------------------------------------------------------------- 1 | {"browsers": {"chrome": ["Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.1 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2227.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2226.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.4; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2225.0 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2224.3 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.93 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", "Mozilla/5.0 (Windows NT 4.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36", "Mozilla/5.0 (X11; OpenBSD i386) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.125 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.3319.102 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2117.157 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1866.237 Safari/537.36", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/4E423F", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.116 Safari/537.36 Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.517 Safari/537.36", "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.16 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1623.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.17 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.62 Safari/537.36", "Mozilla/5.0 (X11; CrOS i686 4319.74.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1464.0 Safari/537.36", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1500.55 Safari/537.36", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.90 Safari/537.36", "Mozilla/5.0 (X11; NetBSD) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36", "Mozilla/5.0 (X11; CrOS i686 3912.101.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.116 Safari/537.36", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1309.0 Safari/537.17", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.15 (KHTML, like Gecko) Chrome/24.0.1295.0 Safari/537.15", "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.14 (KHTML, like Gecko) Chrome/24.0.1292.0 Safari/537.14"], "opera": ["Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16", "Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14", "Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14", "Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02", "Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00", "Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00", "Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00", "Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00", "Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0", "Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62", "Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52", "Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51", "Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50", "Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50", "Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11", "Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11", "Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11", "Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10", "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10", "Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10", "Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1", "Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01", "Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01", "Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01", "Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.13) Gecko/20101213 Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01", "Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01", "Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01", "Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00", "Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00", "Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00", "Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00", "Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00", "Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00", "Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00", "Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00", "Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00"], "firefox": ["Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1", "Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10; rv:33.0) Gecko/20100101 Firefox/33.0", "Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/31.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:31.0) Gecko/20130401 Firefox/31.0", "Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/29.0", "Mozilla/5.0 (X11; OpenBSD amd64; rv:28.0) Gecko/20100101 Firefox/28.0", "Mozilla/5.0 (X11; Linux x86_64; rv:28.0) Gecko/20100101 Firefox/28.0", "Mozilla/5.0 (Windows NT 6.1; rv:27.3) Gecko/20130101 Firefox/27.3", "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:27.0) Gecko/20121011 Firefox/27.0", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:25.0) Gecko/20100101 Firefox/25.0", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:24.0) Gecko/20100101 Firefox/24.0", "Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20100101 Firefox/24.0", "Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/23.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0", "Mozilla/5.0 (Windows NT 6.2; rv:22.0) Gecko/20130405 Firefox/22.0", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:22.0) Gecko/20130328 Firefox/22.0", "Mozilla/5.0 (Windows NT 6.1; rv:22.0) Gecko/20130405 Firefox/22.0", "Mozilla/5.0 (Microsoft Windows NT 6.2.9200.0); rv:22.0) Gecko/20130405 Firefox/22.0", "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1", "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:16.0.1) Gecko/20121011 Firefox/21.0.1", "Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:21.0.0) Gecko/20121011 Firefox/21.0.0", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20130331 Firefox/21.0", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:21.0) Gecko/20100101 Firefox/21.0", "Mozilla/5.0 (X11; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:21.0) Gecko/20130514 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.2; rv:21.0) Gecko/20130326 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130401 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130331 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20130330 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:21.0) Gecko/20100101 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130328 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20100101 Firefox/21.0", "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130401 Firefox/21.0", "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20130331 Firefox/21.0", "Mozilla/5.0 (Windows NT 5.1; rv:21.0) Gecko/20100101 Firefox/21.0", "Mozilla/5.0 (Windows NT 5.0; rv:21.0) Gecko/20100101 Firefox/21.0", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0", "Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0", "Mozilla/5.0 (Windows x86; rv:19.0) Gecko/20100101 Firefox/19.0", "Mozilla/5.0 (Windows NT 6.1; rv:6.0) Gecko/20100101 Firefox/19.0", "Mozilla/5.0 (Windows NT 6.1; rv:14.0) Gecko/20100101 Firefox/18.0.1", "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0", "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:17.0) Gecko/20100101 Firefox/17.0.6"], "safari": ["Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A", "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10", "Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko ) Version/5.1 Mobile/9B176 Safari/7534.48.3", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; de-at) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; da-dk) AppleWebKit/533.21.1 (KHTML, like Gecko) Version/5.0.5 Safari/533.21.1", "Mozilla/5.0 (Windows; U; Windows NT 6.1; tr-TR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Windows; U; Windows NT 6.1; ko-KR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Windows; U; Windows NT 6.1; fr-FR) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Windows; U; Windows NT 6.1; cs-CZ) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Windows; U; Windows NT 6.0; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_5_8; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; zh-cn) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; sv-se) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ko-kr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; ja-jp) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; it-it) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-fr) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; es-es) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-gb) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; de-de) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27", "Mozilla/5.0 (Windows; U; Windows NT 6.1; sv-SE) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 6.1; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 6.1; de-DE) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 6.0; hu-HU) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 6.0; de-DE) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru-RU) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 5.1; ja-JP) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 5.1; it-IT) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_7; en-us) AppleWebKit/534.16+ (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; fr-ch) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; de-de) AppleWebKit/534.15+ (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; ar) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Android 2.2; Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.3 Safari/533.19.4", "Mozilla/5.0 (Windows; U; Windows NT 6.1; zh-HK) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Windows; U; Windows NT 6.0; tr-TR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Windows; U; Windows NT 6.0; nb-NO) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Windows; U; Windows NT 6.0; fr-FR) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-TW) AppleWebKit/533.19.4 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Windows; U; Windows NT 5.1; ru-RU) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; zh-cn) AppleWebKit/533.18.1 (KHTML, like Gecko) Version/5.0.2 Safari/533.18.5"], "internetexplorer": ["Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; AS; rv:11.0) like Gecko", "Mozilla/5.0 (compatible, MSIE 11, Windows NT 6.3; Trident/7.0; rv:11.0) like Gecko", "Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 7.0; InfoPath.3; .NET CLR 3.1.40767; Trident/6.0; en-IN)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)", "Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/4.0; InfoPath.2; SV1; .NET CLR 2.0.50727; WOW64)", "Mozilla/5.0 (compatible; MSIE 10.0; Macintosh; Intel Mac OS X 10_7_3; Trident/6.0)", "Mozilla/4.0 (Compatible; MSIE 8.0; Windows NT 5.2; Trident/6.0)", "Mozilla/4.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/5.0)", "Mozilla/1.22 (compatible; MSIE 10.0; Windows 3.1)", "Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))", "Mozilla/5.0 (Windows; U; MSIE 9.0; Windows NT 9.0; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 7.1; Trident/5.0)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; InfoPath.3; MS-RTC LM 8; .NET4.0C; .NET4.0E)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; chromeframe/12.0.742.112)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 2.0.50727; Media Center PC 6.0)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; Tablet PC 2.0; InfoPath.3; .NET4.0C; .NET4.0E)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; yie8)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET CLR 1.1.4322; .NET4.0C; Tablet PC 2.0)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; FunWebProducts)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/13.0.782.215)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; chromeframe/11.0.696.57)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0) chromeframe/10.0.648.205", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.1; SV1; .NET CLR 2.8.52393; WOW64; en-US)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; chromeframe/11.0.696.57)", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/4.0; GTB7.4; InfoPath.3; SV1; .NET CLR 3.1.76908; WOW64; en-US)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET CLR 1.0.3705; .NET CLR 1.1.4322)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.8.36217; WOW64; en-US)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; .NET CLR 2.7.58687; SLCC2; Media Center PC 5.0; Zune 3.4; Tablet PC 3.6; InfoPath.3)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.2; Trident/4.0; Media Center PC 4.0; SLCC1; .NET CLR 3.0.04320)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 1.1.4322)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; SLCC1; .NET CLR 1.1.4322)", "Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 3.0.04506.30)", "Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 5.0; Trident/4.0; FBSMTWB; .NET CLR 2.0.34861; .NET CLR 3.0.3746.3218; .NET CLR 3.5.33652; msn OptimizedIE8;ENUS)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.2; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; Media Center PC 6.0; InfoPath.2; MS-RTC LM 8", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; Media Center PC 6.0; .NET CLR 3.5.30729; .NET CLR 3.0.30729; .NET4.0C)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.3; .NET4.0C; .NET4.0E; .NET CLR 3.5.30729; .NET CLR 3.0.30729; MS-RTC LM 8)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; InfoPath.2)", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 3.0)"]}, "randomize": {"344": "chrome", "0": "chrome", "346": "chrome", "347": "chrome", "340": "chrome", "341": "chrome", "342": "chrome", "343": "chrome", "810": "firefox", "811": "firefox", "812": "firefox", "813": "firefox", "348": "chrome", "349": "chrome", "816": "firefox", "817": "firefox", "595": "chrome", "719": "chrome", "718": "chrome", "717": "chrome", "716": "chrome", "715": "chrome", "714": "chrome", "713": "chrome", "712": "chrome", "711": "chrome", "710": "chrome", "915": "firefox", "914": "firefox", "606": "chrome", "917": "firefox", "594": "chrome", "736": "internetexplorer", "916": "firefox", "911": "firefox", "619": "chrome", "910": "firefox", "913": "firefox", "298": "chrome", "299": "chrome", "296": "chrome", "297": "chrome", "294": "chrome", "295": "chrome", "292": "chrome", "293": "chrome", "290": "chrome", "291": "chrome", "591": "chrome", "590": "chrome", "593": "chrome", "592": "chrome", "199": "chrome", "198": "chrome", "597": "chrome", "596": "chrome", "195": "chrome", "194": "chrome", "197": "chrome", "196": "chrome", "191": "chrome", "190": "chrome", "193": "chrome", "192": "chrome", "270": "chrome", "271": "chrome", "272": "chrome", "273": "chrome", "274": "chrome", "275": "chrome", "276": "chrome", "277": "chrome", "278": "chrome", "279": "chrome", "738": "internetexplorer", "524": "chrome", "525": "chrome", "526": "chrome", "527": "chrome", "520": "chrome", "521": "chrome", "522": "chrome", "523": "chrome", "599": "chrome", "528": "chrome", "529": "chrome", "449": "chrome", "448": "chrome", "443": "chrome", "442": "chrome", "441": "chrome", "440": "chrome", "447": "chrome", "446": "chrome", "445": "chrome", "444": "chrome", "108": "chrome", "109": "chrome", "102": "chrome", "103": "chrome", "100": "chrome", "101": "chrome", "106": "chrome", "107": "chrome", "104": "chrome", "105": "chrome", "902": "firefox", "903": "firefox", "39": "chrome", "38": "chrome", "906": "firefox", "907": "firefox", "904": "firefox", "905": "firefox", "33": "chrome", "32": "chrome", "31": "chrome", "30": "chrome", "37": "chrome", "36": "chrome", "35": "chrome", "34": "chrome", "641": "chrome", "640": "chrome", "643": "chrome", "642": "chrome", "645": "chrome", "644": "chrome", "438": "chrome", "439": "chrome", "436": "chrome", "437": "chrome", "434": "chrome", "435": "chrome", "432": "chrome", "433": "chrome", "430": "chrome", "431": "chrome", "339": "chrome", "338": "chrome", "335": "chrome", "334": "chrome", "337": "chrome", "336": "chrome", "331": "chrome", "330": "chrome", "333": "chrome", "332": "chrome", "744": "internetexplorer", "745": "internetexplorer", "854": "firefox", "818": "firefox", "856": "firefox", "857": "firefox", "850": "firefox", "851": "firefox", "852": "firefox", "345": "chrome", "858": "firefox", "859": "firefox", "748": "internetexplorer", "6": "chrome", "900": "firefox", "848": "firefox", "99": "chrome", "98": "chrome", "844": "firefox", "91": "chrome", "90": "chrome", "93": "chrome", "92": "chrome", "95": "chrome", "94": "chrome", "97": "chrome", "96": "chrome", "814": "firefox", "815": "firefox", "740": "internetexplorer", "741": "internetexplorer", "742": "internetexplorer", "743": "internetexplorer", "559": "chrome", "558": "chrome", "746": "internetexplorer", "747": "internetexplorer", "555": "chrome", "554": "chrome", "557": "chrome", "556": "chrome", "551": "chrome", "550": "chrome", "553": "chrome", "552": "chrome", "238": "chrome", "239": "chrome", "234": "chrome", "235": "chrome", "236": "chrome", "237": "chrome", "230": "chrome", "231": "chrome", "232": "chrome", "233": "chrome", "1": "chrome", "614": "chrome", "146": "chrome", "147": "chrome", "144": "chrome", "145": "chrome", "142": "chrome", "143": "chrome", "140": "chrome", "141": "chrome", "612": "chrome", "613": "chrome", "610": "chrome", "611": "chrome", "616": "chrome", "617": "chrome", "148": "chrome", "149": "chrome", "912": "firefox", "951": "safari", "948": "safari", "949": "safari", "946": "safari", "947": "safari", "944": "safari", "945": "safari", "942": "safari", "943": "safari", "940": "firefox", "941": "safari", "768": "internetexplorer", "689": "chrome", "688": "chrome", "685": "chrome", "684": "chrome", "687": "chrome", "686": "chrome", "681": "chrome", "680": "chrome", "683": "chrome", "682": "chrome", "623": "chrome", "819": "firefox", "622": "chrome", "133": "chrome", "132": "chrome", "131": "chrome", "130": "chrome", "137": "chrome", "136": "chrome", "135": "chrome", "134": "chrome", "494": "chrome", "495": "chrome", "139": "chrome", "138": "chrome", "490": "chrome", "491": "chrome", "492": "chrome", "493": "chrome", "24": "chrome", "25": "chrome", "26": "chrome", "27": "chrome", "20": "chrome", "21": "chrome", "22": "chrome", "23": "chrome", "927": "firefox", "28": "chrome", "29": "chrome", "407": "chrome", "406": "chrome", "405": "chrome", "404": "chrome", "403": "chrome", "402": "chrome", "401": "chrome", "400": "chrome", "933": "firefox", "932": "firefox", "931": "firefox", "930": "firefox", "937": "firefox", "629": "chrome", "409": "chrome", "408": "chrome", "628": "chrome", "758": "internetexplorer", "379": "chrome", "378": "chrome", "829": "firefox", "828": "firefox", "371": "chrome", "370": "chrome", "373": "chrome", "372": "chrome", "375": "chrome", "374": "chrome", "377": "chrome", "376": "chrome", "708": "chrome", "709": "chrome", "704": "chrome", "705": "chrome", "706": "chrome", "707": "chrome", "700": "chrome", "618": "chrome", "702": "chrome", "703": "chrome", "393": "chrome", "392": "chrome", "88": "chrome", "89": "chrome", "397": "chrome", "396": "chrome", "395": "chrome", "394": "chrome", "82": "chrome", "83": "chrome", "80": "chrome", "81": "chrome", "86": "chrome", "87": "chrome", "84": "chrome", "85": "chrome", "797": "firefox", "796": "firefox", "795": "firefox", "794": "firefox", "793": "firefox", "792": "firefox", "791": "firefox", "790": "firefox", "799": "firefox", "798": "firefox", "7": "chrome", "601": "chrome", "607": "chrome", "586": "chrome", "587": "chrome", "584": "chrome", "585": "chrome", "582": "chrome", "583": "chrome", "580": "chrome", "581": "chrome", "588": "chrome", "589": "chrome", "245": "chrome", "244": "chrome", "247": "chrome", "246": "chrome", "241": "chrome", "240": "chrome", "243": "chrome", "242": "chrome", "615": "chrome", "249": "chrome", "248": "chrome", "924": "firefox", "970": "safari", "925": "firefox", "519": "chrome", "518": "chrome", "926": "firefox", "511": "chrome", "510": "chrome", "513": "chrome", "512": "chrome", "515": "chrome", "514": "chrome", "517": "chrome", "516": "chrome", "458": "chrome", "459": "chrome", "621": "chrome", "620": "chrome", "627": "chrome", "626": "chrome", "625": "chrome", "624": "chrome", "450": "chrome", "451": "chrome", "452": "chrome", "453": "chrome", "454": "chrome", "455": "chrome", "456": "chrome", "457": "chrome", "979": "opera", "179": "chrome", "178": "chrome", "177": "chrome", "176": "chrome", "175": "chrome", "174": "chrome", "173": "chrome", "172": "chrome", "171": "chrome", "170": "chrome", "977": "opera", "656": "chrome", "975": "safari", "974": "safari", "973": "safari", "972": "safari", "971": "safari", "657": "chrome", "654": "chrome", "253": "chrome", "978": "opera", "182": "chrome", "183": "chrome", "180": "chrome", "181": "chrome", "186": "chrome", "187": "chrome", "184": "chrome", "185": "chrome", "886": "firefox", "652": "chrome", "188": "chrome", "189": "chrome", "658": "chrome", "653": "chrome", "650": "chrome", "651": "chrome", "764": "internetexplorer", "11": "chrome", "10": "chrome", "13": "chrome", "12": "chrome", "15": "chrome", "14": "chrome", "17": "chrome", "16": "chrome", "19": "chrome", "18": "chrome", "863": "firefox", "862": "firefox", "865": "firefox", "864": "firefox", "867": "firefox", "866": "firefox", "884": "firefox", "938": "firefox", "659": "chrome", "883": "firefox", "753": "internetexplorer", "881": "firefox", "880": "firefox", "887": "firefox", "831": "firefox", "885": "firefox", "752": "internetexplorer", "928": "firefox", "62": "chrome", "888": "firefox", "950": "safari", "756": "internetexplorer", "929": "firefox", "809": "firefox", "322": "chrome", "323": "chrome", "320": "chrome", "321": "chrome", "326": "chrome", "327": "chrome", "324": "chrome", "325": "chrome", "328": "chrome", "329": "chrome", "759": "internetexplorer", "201": "chrome", "200": "chrome", "203": "chrome", "202": "chrome", "205": "chrome", "204": "chrome", "207": "chrome", "206": "chrome", "209": "chrome", "208": "chrome", "779": "firefox", "778": "firefox", "889": "firefox", "77": "chrome", "76": "chrome", "75": "chrome", "74": "chrome", "73": "chrome", "72": "chrome", "71": "chrome", "70": "chrome", "655": "chrome", "79": "chrome", "78": "chrome", "2": "chrome", "805": "firefox", "804": "firefox", "669": "chrome", "668": "chrome", "667": "chrome", "666": "chrome", "665": "chrome", "664": "chrome", "663": "chrome", "662": "chrome", "661": "chrome", "660": "chrome", "769": "internetexplorer", "692": "chrome", "693": "chrome", "690": "chrome", "691": "chrome", "696": "chrome", "697": "chrome", "694": "chrome", "695": "chrome", "698": "chrome", "699": "chrome", "542": "chrome", "543": "chrome", "540": "chrome", "541": "chrome", "546": "chrome", "547": "chrome", "544": "chrome", "545": "chrome", "8": "chrome", "548": "chrome", "549": "chrome", "68": "chrome", "598": "chrome", "869": "firefox", "868": "firefox", "120": "chrome", "121": "chrome", "122": "chrome", "123": "chrome", "124": "chrome", "125": "chrome", "126": "chrome", "127": "chrome", "128": "chrome", "129": "chrome", "765": "internetexplorer", "414": "chrome", "415": "chrome", "416": "chrome", "417": "chrome", "410": "chrome", "411": "chrome", "412": "chrome", "413": "chrome", "920": "firefox", "498": "chrome", "922": "firefox", "923": "firefox", "418": "chrome", "419": "chrome", "776": "internetexplorer", "499": "chrome", "319": "chrome", "318": "chrome", "313": "chrome", "312": "chrome", "311": "chrome", "310": "chrome", "317": "chrome", "316": "chrome", "315": "chrome", "314": "chrome", "861": "firefox", "921": "firefox", "496": "chrome", "832": "firefox", "833": "firefox", "830": "firefox", "497": "chrome", "836": "firefox", "837": "firefox", "834": "firefox", "835": "firefox", "838": "firefox", "839": "firefox", "808": "firefox", "3": "chrome", "725": "internetexplorer", "368": "chrome", "369": "chrome", "366": "chrome", "367": "chrome", "364": "chrome", "365": "chrome", "362": "chrome", "363": "chrome", "360": "chrome", "361": "chrome", "959": "safari", "952": "safari", "882": "firefox", "380": "chrome", "381": "chrome", "382": "chrome", "383": "chrome", "384": "chrome", "385": "chrome", "386": "chrome", "387": "chrome", "388": "chrome", "389": "chrome", "784": "firefox", "785": "firefox", "786": "firefox", "787": "firefox", "780": "firefox", "781": "firefox", "782": "firefox", "783": "firefox", "788": "firefox", "789": "firefox", "860": "firefox", "605": "chrome", "579": "chrome", "578": "chrome", "604": "chrome", "573": "chrome", "572": "chrome", "571": "chrome", "570": "chrome", "577": "chrome", "576": "chrome", "575": "chrome", "574": "chrome", "60": "chrome", "61": "chrome", "258": "chrome", "259": "chrome", "64": "chrome", "65": "chrome", "66": "chrome", "67": "chrome", "252": "chrome", "69": "chrome", "250": "chrome", "251": "chrome", "256": "chrome", "257": "chrome", "254": "chrome", "255": "chrome", "603": "chrome", "602": "chrome", "939": "firefox", "731": "internetexplorer", "730": "internetexplorer", "733": "internetexplorer", "732": "internetexplorer", "735": "internetexplorer", "734": "internetexplorer", "508": "chrome", "509": "chrome", "506": "chrome", "507": "chrome", "504": "chrome", "505": "chrome", "502": "chrome", "503": "chrome", "500": "chrome", "501": "chrome", "630": "chrome", "631": "chrome", "632": "chrome", "633": "chrome", "469": "chrome", "468": "chrome", "636": "chrome", "637": "chrome", "465": "chrome", "464": "chrome", "467": "chrome", "466": "chrome", "461": "chrome", "460": "chrome", "463": "chrome", "462": "chrome", "901": "firefox", "168": "chrome", "169": "chrome", "164": "chrome", "165": "chrome", "166": "chrome", "167": "chrome", "160": "chrome", "161": "chrome", "162": "chrome", "163": "chrome", "964": "safari", "965": "safari", "966": "safari", "967": "safari", "960": "safari", "961": "safari", "962": "safari", "963": "safari", "968": "safari", "969": "safari", "936": "firefox", "935": "firefox", "934": "firefox", "908": "firefox", "909": "firefox", "600": "chrome", "878": "firefox", "879": "firefox", "876": "firefox", "877": "firefox", "874": "firefox", "875": "firefox", "872": "firefox", "873": "firefox", "870": "firefox", "871": "firefox", "9": "chrome", "890": "firefox", "891": "firefox", "892": "firefox", "893": "firefox", "894": "firefox", "647": "chrome", "896": "firefox", "897": "firefox", "898": "firefox", "899": "firefox", "646": "chrome", "649": "chrome", "648": "chrome", "357": "chrome", "356": "chrome", "355": "chrome", "354": "chrome", "353": "chrome", "352": "chrome", "351": "chrome", "350": "chrome", "803": "firefox", "802": "firefox", "801": "firefox", "800": "firefox", "807": "firefox", "806": "firefox", "359": "chrome", "358": "chrome", "216": "chrome", "217": "chrome", "214": "chrome", "215": "chrome", "212": "chrome", "213": "chrome", "210": "chrome", "211": "chrome", "762": "internetexplorer", "763": "internetexplorer", "760": "internetexplorer", "761": "internetexplorer", "766": "internetexplorer", "767": "internetexplorer", "218": "chrome", "219": "chrome", "957": "safari", "956": "safari", "289": "chrome", "288": "chrome", "4": "chrome", "281": "chrome", "280": "chrome", "283": "chrome", "282": "chrome", "285": "chrome", "284": "chrome", "287": "chrome", "286": "chrome", "678": "chrome", "679": "chrome", "674": "chrome", "675": "chrome", "676": "chrome", "677": "chrome", "670": "chrome", "671": "chrome", "672": "chrome", "673": "chrome", "263": "chrome", "262": "chrome", "261": "chrome", "260": "chrome", "267": "chrome", "266": "chrome", "265": "chrome", "264": "chrome", "269": "chrome", "268": "chrome", "701": "chrome", "59": "chrome", "58": "chrome", "55": "chrome", "54": "chrome", "57": "chrome", "56": "chrome", "51": "chrome", "50": "chrome", "53": "chrome", "52": "chrome", "537": "chrome", "536": "chrome", "535": "chrome", "63": "chrome", "533": "chrome", "532": "chrome", "531": "chrome", "530": "chrome", "539": "chrome", "538": "chrome", "775": "internetexplorer", "774": "internetexplorer", "982": "opera", "983": "opera", "980": "opera", "981": "opera", "777": "internetexplorer", "984": "opera", "985": "opera", "115": "chrome", "114": "chrome", "117": "chrome", "116": "chrome", "111": "chrome", "110": "chrome", "113": "chrome", "112": "chrome", "771": "internetexplorer", "119": "chrome", "118": "chrome", "770": "internetexplorer", "773": "internetexplorer", "772": "internetexplorer", "953": "safari", "429": "chrome", "428": "chrome", "534": "chrome", "919": "firefox", "918": "firefox", "421": "chrome", "420": "chrome", "423": "chrome", "422": "chrome", "425": "chrome", "424": "chrome", "427": "chrome", "426": "chrome", "308": "chrome", "309": "chrome", "855": "firefox", "300": "chrome", "301": "chrome", "302": "chrome", "303": "chrome", "304": "chrome", "305": "chrome", "306": "chrome", "307": "chrome", "895": "firefox", "825": "firefox", "824": "firefox", "827": "firefox", "847": "firefox", "846": "firefox", "845": "firefox", "826": "firefox", "843": "firefox", "842": "firefox", "841": "firefox", "840": "firefox", "821": "firefox", "853": "firefox", "849": "firefox", "820": "firefox", "823": "firefox", "822": "firefox", "954": "safari", "568": "chrome", "569": "chrome", "751": "internetexplorer", "750": "internetexplorer", "757": "internetexplorer", "737": "internetexplorer", "755": "internetexplorer", "754": "internetexplorer", "560": "chrome", "561": "chrome", "562": "chrome", "563": "chrome", "564": "chrome", "565": "chrome", "566": "chrome", "567": "chrome", "739": "internetexplorer", "229": "chrome", "228": "chrome", "227": "chrome", "226": "chrome", "225": "chrome", "224": "chrome", "223": "chrome", "222": "chrome", "221": "chrome", "220": "chrome", "391": "chrome", "726": "internetexplorer", "727": "internetexplorer", "724": "chrome", "390": "chrome", "722": "chrome", "723": "chrome", "720": "chrome", "721": "chrome", "728": "internetexplorer", "729": "internetexplorer", "151": "chrome", "150": "chrome", "153": "chrome", "152": "chrome", "155": "chrome", "154": "chrome", "157": "chrome", "156": "chrome", "159": "chrome", "158": "chrome", "609": "chrome", "608": "chrome", "976": "opera", "634": "chrome", "399": "chrome", "635": "chrome", "749": "internetexplorer", "958": "safari", "398": "chrome", "48": "chrome", "49": "chrome", "46": "chrome", "47": "chrome", "44": "chrome", "45": "chrome", "42": "chrome", "43": "chrome", "40": "chrome", "41": "chrome", "638": "chrome", "5": "chrome", "639": "chrome", "489": "chrome", "488": "chrome", "487": "chrome", "486": "chrome", "485": "chrome", "484": "chrome", "483": "chrome", "482": "chrome", "481": "chrome", "480": "chrome", "955": "safari", "472": "chrome", "473": "chrome", "470": "chrome", "471": "chrome", "476": "chrome", "477": "chrome", "474": "chrome", "475": "chrome", "478": "chrome", "479": "chrome"}} -------------------------------------------------------------------------------- /ua.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import codecs 3 | import json 4 | import random 5 | from config import HEADER 6 | 7 | 8 | class RandomHeader: 9 | def __init__(self): 10 | self.ua = UserAgent() 11 | self.header = HEADER 12 | 13 | def Header(self, referer=''): 14 | header = self.header 15 | header['User-Agent'] = self.ua.random 16 | if referer: 17 | header['Referer'] = referer 18 | return header 19 | 20 | 21 | class UserAgent(object): 22 | def __init__(self): 23 | self.data = self.load('ua.json') 24 | self.SHORTCUTS = { 25 | 'ie': 'internetexplorer', 26 | } 27 | 28 | def load(self, db): 29 | with codecs.open(db, encoding='utf-8', mode='rb',) as fp: 30 | return json.load(fp) 31 | 32 | def __getitem__(self, attr): 33 | return self.__getattr__(attr) 34 | 35 | def __getattr__(self, attr): 36 | attr = attr.lower() 37 | 38 | if attr == 'random': 39 | attr = self.data['randomize'][ 40 | str(random.randint(0, len(self.data['randomize']) - 1)) 41 | ] 42 | else: 43 | if attr in self.SHORTCUTS: 44 | attr = self.SHORTCUTS[attr] 45 | 46 | try: 47 | return self.data['browsers'][attr][ 48 | random.randint( 49 | 0, len(self.data['browsers'][attr]) - 1 50 | ) 51 | ] 52 | except KeyError: 53 | return None 54 | 55 | if __name__ == '__main__': 56 | ua = UserAgent() 57 | print ua['opera'] 58 | print ua['ie'] 59 | print ua.chrome 60 | print ua.firefox 61 | print ua.random 62 | -------------------------------------------------------------------------------- /validator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding:utf-8 3 | from gevent import monkey 4 | monkey.patch_all() 5 | from gevent.pool import Pool 6 | from logger import logger 7 | import re 8 | import time 9 | import ipip 10 | import requests 11 | from ua import RandomHeader 12 | from config import VALIDATE_CONFIG 13 | 14 | requests.packages.urllib3.disable_warnings() 15 | rh = RandomHeader() 16 | 17 | class Validator: 18 | def __init__(self): 19 | self.http_target = VALIDATE_CONFIG['HTTP_TARGET'] 20 | self.https_target = VALIDATE_CONFIG['HTTPS_TARGET'] 21 | self.timeout = VALIDATE_CONFIG['TIMEOUT'] 22 | self.thread_num = VALIDATE_CONFIG['THREAD_NUM'] 23 | self.pattern = re.compile( 24 | r'((?:IP:Port)|(?:HTTP_CLIENT_IP)|(?:HTTP_X_FORWARDED_FOR))\n?\s*(.*?)', re.I) 25 | self.headers = rh.Header(self.http_target) 26 | self.ip = self._get_self_ip() 27 | self.IPL = ipip.IPL('17monipdb.dat') 28 | self.pool = Pool(self.thread_num) 29 | 30 | def run(self, proxies): 31 | # 采用gevent进行处理 32 | if not self.ip: 33 | logger.error('Validating fail, self ip is empty') 34 | return [] 35 | avaliable_proxies = filter(lambda x: x, self.pool.map(self.validate, proxies)) 36 | logger.info('Get %s avaliable proxies' % len(avaliable_proxies)) 37 | return avaliable_proxies 38 | 39 | def _v(self, proxy, target): 40 | try: 41 | start = time.time() 42 | proxies = { 43 | 'http': 'http://%s' % proxy, 44 | 'https': 'http://%s' % proxy 45 | } 46 | r = requests.get(target, headers=self.headers, proxies=proxies, timeout=self.timeout, verify=False) 47 | if r.ok: 48 | speed = time.time() - start 49 | headers = self.pattern.findall(r.content) 50 | headers_info = {} 51 | for header in headers: 52 | headers_info[header[0]] = header[1].split(':')[0] 53 | REMOTE_ADDR = headers_info.get('IP:Port', '') 54 | HTTP_VIA = headers_info.get('HTTP_CLIENT_IP', '') 55 | HTTP_X_FORWARDED_FOR = headers_info.get('HTTP_X_FORWARDED_FOR', '') 56 | if REMOTE_ADDR and REMOTE_ADDR != self.ip: 57 | if not HTTP_X_FORWARDED_FOR: 58 | if not HTTP_VIA: 59 | type = 3 60 | elif HTTP_X_FORWARDED_FOR != self.ip: 61 | type = 2 62 | else: 63 | type = 1 64 | logger.info('Validating %s, success, type:%s, time:%ss', proxy, type, speed) 65 | return { 66 | 'ip': proxy.split(':')[0], 67 | 'port': proxy.split(':')[1], 68 | 'type': type, 69 | 'speed': speed, 70 | 'area': self.IPL.find(proxy.split(':')[0]).rstrip().replace('\t', '.') 71 | } 72 | except Exception as e: 73 | logger.debug('Validating %s, fail: %s', proxy, e) 74 | pass 75 | return None 76 | 77 | def validate(self, (proxy, protocol)): 78 | proxy_info = None 79 | http_proxy_info = None 80 | https_proxy_info = None 81 | if protocol == 'http': 82 | http_proxy_info = self._v(proxy, self.http_target) 83 | if protocol == 'https': 84 | https_proxy_info = self._v(proxy, self.https_target) 85 | else: 86 | http_proxy_info = self._v(proxy, self.http_target) 87 | if not http_proxy_info: 88 | https_proxy_info = self._v(proxy, self.https_target) 89 | 90 | if http_proxy_info: 91 | http_proxy_info['protocol'] = 'http' 92 | proxy_info = http_proxy_info 93 | elif https_proxy_info: 94 | https_proxy_info['protocol'] = 'https' 95 | proxy_info = https_proxy_info 96 | return proxy_info 97 | 98 | def _get_self_ip(self): 99 | # 获取自身外网ip 100 | try: 101 | r = requests.get(self.http_target, headers=self.headers, timeout=5) 102 | if r.ok: 103 | pattern = re.compile(r'IP:port\n?\s*([\d.]*?)(?::\d*)', re.I) 104 | ip = pattern.search(r.content).group(1) 105 | logger.info('Get self ip success: %s' % ip) 106 | return ip 107 | except Exception, e: 108 | logger.warn('Get self ip fail, %s' % e) 109 | return '' 110 | --------------------------------------------------------------------------------