├── .gitignore ├── README.md ├── alive_checker.py ├── config.example.py ├── proxy_getter.py ├── proxy_server.py ├── requirements.txt ├── rest_server.py ├── run.sh └── util ├── __init__.py ├── func.py └── logger.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | .idea/ 3 | log.txt 4 | config.py 5 | __pycache__/ 6 | venv/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AutoProxy 2 | IP代理池服务, 获取daili666/快代理(都是收费)提供的代理IP并验证可用性后以接口/代理方式提供服务。 3 | 从免费网站爬代理的方式效率太低,本项目不涉及。 4 | 5 | ## 环境依赖 6 | 1. python库: termcolor、web.py、requests 7 | 2. 可选进程管理工具pm2 8 | 9 | ## 文件说明 10 | 11 | 1. rest_server.py 提供restful接口服务,可通过接口获取可用的代理 12 | 2. alive_checker.py 检查代理的存活性,应该加入到crontab中 13 | 3. proxy_server.py 代理服务器 14 | 15 | ## 部署说明 16 | 1. 添加crontab: ` */2 * * * * python check_alive.py ` 17 | 2. 运行接口服务:`python rest_server.py`, 默认端口80 18 | 3. 运行代理服务:`python proxy_server.py`,默认端口 19 | 20 | ## 配置说明 21 | 内置了两个网站的代理IP服务, 需要配置购买订单号,选择默认使用的服务商 22 | 部署代理服务需要启动一个本地代理,8080,当代理服务器无法连接的时候就使用本地网络直接访问 23 | 24 | -------------------------------------------------------------------------------- /alive_checker.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | __author__ = 'Danny0' 4 | 5 | import re 6 | import socket 7 | import threading 8 | import requests 9 | from proxy_getter import ProxyGetter 10 | from util.func import * 11 | 12 | 13 | class CheckProxy(threading.Thread): 14 | def __init__(self): 15 | threading.Thread.__init__(self) 16 | 17 | @staticmethod 18 | def check_alive_by_port(ip): 19 | """ 20 | 检查proxy是否可用 21 | """ 22 | port = ip.split(":") 23 | if len(port) != 2: 24 | return False 25 | ip = port[0] 26 | port = int(port[1]) 27 | s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 28 | s.settimeout(CHECK_TIMEOUT) 29 | try: 30 | s.connect((ip, port)) 31 | s.shutdown(2) 32 | return True 33 | except Exception as e: 34 | log_ins.warning(ip + " port not open") 35 | return False 36 | 37 | @staticmethod 38 | def check_alive_by_curl(ip_str): 39 | """ 40 | 检查proxy是否可用 41 | """ 42 | ips = ip_str.split(":") 43 | ip = ips[0] 44 | try: 45 | res = requests.get("http://api.ipify.org", timeout=CHECK_TIMEOUT, 46 | headers={ 47 | "User-Agent": "curl/7.43.0" 48 | }, 49 | proxies={ 50 | "http": "http://" + ip_str 51 | }) 52 | if not re.search(ip, res.text): 53 | log_ins.warning(ip + " can't use") 54 | return False 55 | else: 56 | log_ins.info(ip + " proxy is ok ") 57 | return True 58 | except: 59 | log_ins.warning(ip + " can not connected") 60 | return False 61 | 62 | def run(self): 63 | log_ins.debug("thread " + str(threading.currentThread().ident) + " start") 64 | global ip_list 65 | while len(ip_list) > 0: 66 | ip = ip_list.pop() 67 | if not self.check_alive_by_port(ip) or not self.check_alive_by_curl(ip): 68 | log_ins.warning("remove ip : " + ip) 69 | redis_ins.zrem(REDIS_KEY, ip) 70 | 71 | 72 | def check(): 73 | # 启用多线程挨个检测IP存活 74 | log_ins.debug("start check ip") 75 | global ip_list 76 | ip_list = redis_ins.zrange(REDIS_KEY, 0, 10000) 77 | thread_pool = [] 78 | for i in range(0, CHECK_THREAD_NUM): 79 | thread_pool.append(CheckProxy()) 80 | thread_pool[i].start() 81 | for i in range(0, CHECK_THREAD_NUM): 82 | thread_pool[i].join() 83 | log_ins.debug("check ip end") 84 | 85 | 86 | def save(ips): 87 | """ 88 | 保存新ip到redis 89 | :param ips: 90 | :return: 91 | """ 92 | for ip in ips: 93 | redis_ins.zadd(REDIS_KEY, ip, 0) 94 | 95 | 96 | if __name__ == '__main__': 97 | proxy = ProxyGetter() 98 | redis_ins = get_redis_ins() 99 | log_ins = get_logger() 100 | ip_list = [] 101 | check() 102 | if redis_ins.zcount(REDIS_KEY, 0, 10000) < MIN_IP_NUM: 103 | log_ins.warn("usable ip num is 0, try get new ip") 104 | # 可用代理数量小于规定数量,启动获取新IP 105 | ips = proxy.get_ip(DEFAULT_PROXY, NEW_IP_NUM) 106 | save(ips) 107 | check() 108 | -------------------------------------------------------------------------------- /config.example.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | __author__ = 'Danny0' 4 | import os 5 | 6 | IS_DEBUG = False if os.uname()[0] != "Darwin" else True 7 | 8 | # min ip num 9 | MIN_IP_NUM = 100 10 | NEW_IP_NUM = 2 if IS_DEBUG else 100 11 | CHECK_THREAD_NUM = 5 12 | CHECK_TIMEOUT = 3 13 | 14 | # redis 15 | REDIS_HOST = "YOUR_ONLINE_REDIS_IP" if not IS_DEBUG else "127.0.0.1" 16 | REDIS_PORT = 6379 17 | REDIS_PASS = "YOUR_REDIS_PASS" 18 | REDIS_DB = 6 19 | REDIS_KEY = "proxy_ip" 20 | 21 | # http://www.tudoudaili.com 订单号 22 | ORDER_ID_TUDOU = 0 23 | # http://www.kuaidaili.com/usercenter/ 24 | ORDER_ID_KUAIDAILI = 0 25 | 26 | DEFAULT_PROXY = "kuaidaili" 27 | 28 | PROXY_SERVER_MAX_CONNECTION = 1000 29 | PROXY_PORT = 8080 30 | -------------------------------------------------------------------------------- /proxy_getter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | import requests 4 | 5 | from util.func import * 6 | 7 | __author__ = 'Danny0' 8 | 9 | 10 | class ProxyGetter(): 11 | proxy_source = [ 12 | "kuaidaili" 13 | ] 14 | 15 | def __init__(self): 16 | self.log_ins = get_logger() 17 | pass 18 | 19 | def get_ip(self, source, num): 20 | if source not in self.proxy_source: 21 | return False 22 | ips = eval("self.from_" + source + "(num)") 23 | self.log_ins.debug("got new ip num : " + str(len(ips))) 24 | return ips 25 | 26 | def from_kuaidaili(self, num): 27 | """ 28 | 获取代理ip列表 29 | """ 30 | self.log_ins.debug("start get new ip from kuaidaili") 31 | api = "http://dps.kuaidaili.com/api/getdps/" 32 | param = { 33 | "orderid": ORDER_ID_KUAIDAILI, 34 | "num": NEW_IP_NUM, 35 | "ut": 1, 36 | "sep": 1, 37 | } 38 | res = requests.get(api, params=param, headers={ 39 | "Accept-Encoding": "gzip" 40 | }) 41 | if res.text[0:5] == "ERROR": 42 | return [] 43 | ips = res.text.split("\r\n") 44 | return ips 45 | 46 | 47 | if __name__ == '__main__': 48 | getter = ProxyGetter() 49 | print(getter.get_ip("kuaidaili", 2)) 50 | -------------------------------------------------------------------------------- /proxy_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | proxy_getter.py 5 | ~~~~~~~~ 6 | 7 | HTTP Proxy Server in Python. 8 | 9 | :copyright: (c) 2013 by Abhinav Singh. 10 | :license: BSD, see LICENSE for more details. 11 | """ 12 | VERSION = (0, 2) 13 | __version__ = '.'.join(map(str, VERSION[0:2])) 14 | __description__ = 'HTTP Proxy Server in Python' 15 | __author__ = 'Abhinav Singh' 16 | __author_email__ = 'mailsforabhinav@gmail.com' 17 | __homepage__ = 'https://github.com/abhinavsingh/proxy_getter.py' 18 | __license__ = 'BSD' 19 | 20 | import argparse 21 | import datetime 22 | import logging 23 | import multiprocessing 24 | import select 25 | import socket 26 | import sys 27 | 28 | from util.func import * 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | # True if we are running on Python 3. 33 | PY3 = sys.version_info[0] == 3 34 | 35 | if PY3: 36 | text_type = str 37 | binary_type = bytes 38 | from urllib import parse as urlparse 39 | else: 40 | text_type = unicode 41 | binary_type = str 42 | import urlparse 43 | 44 | 45 | def text_(s, encoding='utf-8', errors='strict'): 46 | """ If ``s`` is an instance of ``binary_type``, return 47 | ``s.decode(encoding, errors)``, otherwise return ``s``""" 48 | if isinstance(s, binary_type): 49 | return s.decode(encoding, errors) 50 | return s # pragma: no cover 51 | 52 | 53 | def bytes_(s, encoding='utf-8', errors='strict'): 54 | """ If ``s`` is an instance of ``text_type``, return 55 | ``s.encode(encoding, errors)``, otherwise return ``s``""" 56 | if isinstance(s, text_type): # pragma: no cover 57 | return s.encode(encoding, errors) 58 | return s 59 | 60 | 61 | version = bytes_(__version__) 62 | 63 | CRLF, COLON, SP = b'\r\n', b':', b' ' 64 | 65 | HTTP_REQUEST_PARSER = 1 66 | HTTP_RESPONSE_PARSER = 2 67 | 68 | HTTP_PARSER_STATE_INITIALIZED = 1 69 | HTTP_PARSER_STATE_LINE_RCVD = 2 70 | HTTP_PARSER_STATE_RCVING_HEADERS = 3 71 | HTTP_PARSER_STATE_HEADERS_COMPLETE = 4 72 | HTTP_PARSER_STATE_RCVING_BODY = 5 73 | HTTP_PARSER_STATE_COMPLETE = 6 74 | 75 | CHUNK_PARSER_STATE_WAITING_FOR_SIZE = 1 76 | CHUNK_PARSER_STATE_WAITING_FOR_DATA = 2 77 | CHUNK_PARSER_STATE_COMPLETE = 3 78 | 79 | 80 | class ChunkParser(object): 81 | """HTTP chunked encoding response parser.""" 82 | 83 | def __init__(self): 84 | self.state = CHUNK_PARSER_STATE_WAITING_FOR_SIZE 85 | self.body = b'' 86 | self.chunk = b'' 87 | self.size = None 88 | 89 | def parse(self, data): 90 | more = True if len(data) > 0 else False 91 | while more: more, data = self.process(data) 92 | 93 | def process(self, data): 94 | if self.state == CHUNK_PARSER_STATE_WAITING_FOR_SIZE: 95 | line, data = HttpParser.split(data) 96 | self.size = int(line, 16) 97 | self.state = CHUNK_PARSER_STATE_WAITING_FOR_DATA 98 | elif self.state == CHUNK_PARSER_STATE_WAITING_FOR_DATA: 99 | remaining = self.size - len(self.chunk) 100 | self.chunk += data[:remaining] 101 | data = data[remaining:] 102 | if len(self.chunk) == self.size: 103 | data = data[len(CRLF):] 104 | self.body += self.chunk 105 | if self.size == 0: 106 | self.state = CHUNK_PARSER_STATE_COMPLETE 107 | else: 108 | self.state = CHUNK_PARSER_STATE_WAITING_FOR_SIZE 109 | self.chunk = b'' 110 | self.size = None 111 | return len(data) > 0, data 112 | 113 | 114 | class HttpParser(object): 115 | """HTTP request/response parser.""" 116 | 117 | def __init__(self, type=None): 118 | self.state = HTTP_PARSER_STATE_INITIALIZED 119 | self.type = type if type else HTTP_REQUEST_PARSER 120 | 121 | self.raw = b'' 122 | self.buffer = b'' 123 | 124 | self.headers = dict() 125 | self.body = None 126 | 127 | self.method = None 128 | self.url = None 129 | self.code = None 130 | self.reason = None 131 | self.version = None 132 | 133 | self.chunker = None 134 | 135 | def parse(self, data): 136 | self.raw += data 137 | data = self.buffer + data 138 | self.buffer = b'' 139 | 140 | more = True if len(data) > 0 else False 141 | while more: 142 | more, data = self.process(data) 143 | self.buffer = data 144 | 145 | def process(self, data): 146 | if self.state >= HTTP_PARSER_STATE_HEADERS_COMPLETE and \ 147 | (self.method == b"POST" or self.type == HTTP_RESPONSE_PARSER): 148 | if not self.body: 149 | self.body = b'' 150 | 151 | if b'content-length' in self.headers: 152 | self.state = HTTP_PARSER_STATE_RCVING_BODY 153 | self.body += data 154 | if len(self.body) >= int(self.headers[b'content-length'][1]): 155 | self.state = HTTP_PARSER_STATE_COMPLETE 156 | elif b'transfer-encoding' in self.headers and self.headers[b'transfer-encoding'][1].lower() == b'chunked': 157 | if not self.chunker: 158 | self.chunker = ChunkParser() 159 | self.chunker.parse(data) 160 | if self.chunker.state == CHUNK_PARSER_STATE_COMPLETE: 161 | self.body = self.chunker.body 162 | self.state = HTTP_PARSER_STATE_COMPLETE 163 | 164 | return False, b'' 165 | 166 | line, data = HttpParser.split(data) 167 | if line == False: return line, data 168 | 169 | if self.state < HTTP_PARSER_STATE_LINE_RCVD: 170 | self.process_line(line) 171 | elif self.state < HTTP_PARSER_STATE_HEADERS_COMPLETE: 172 | self.process_header(line) 173 | 174 | if self.state == HTTP_PARSER_STATE_HEADERS_COMPLETE and \ 175 | self.type == HTTP_REQUEST_PARSER and \ 176 | not self.method == b"POST" and \ 177 | self.raw.endswith(CRLF * 2): 178 | self.state = HTTP_PARSER_STATE_COMPLETE 179 | 180 | return len(data) > 0, data 181 | 182 | def process_line(self, data): 183 | line = data.split(SP) 184 | if self.type == HTTP_REQUEST_PARSER: 185 | self.method = line[0].upper() 186 | self.url = urlparse.urlsplit(line[1]) 187 | self.version = line[2] 188 | else: 189 | self.version = line[0] 190 | self.code = line[1] 191 | self.reason = b' '.join(line[2:]) 192 | self.state = HTTP_PARSER_STATE_LINE_RCVD 193 | 194 | def process_header(self, data): 195 | if len(data) == 0: 196 | if self.state == HTTP_PARSER_STATE_RCVING_HEADERS: 197 | self.state = HTTP_PARSER_STATE_HEADERS_COMPLETE 198 | elif self.state == HTTP_PARSER_STATE_LINE_RCVD: 199 | self.state = HTTP_PARSER_STATE_RCVING_HEADERS 200 | else: 201 | self.state = HTTP_PARSER_STATE_RCVING_HEADERS 202 | parts = data.split(COLON) 203 | key = parts[0].strip() 204 | value = COLON.join(parts[1:]).strip() 205 | self.headers[key.lower()] = (key, value) 206 | 207 | def build_url(self): 208 | if not self.url: 209 | return b'/None' 210 | 211 | url = self.url.path 212 | if url == b'': url = b'/' 213 | if not self.url.query == b'': url += b'?' + self.url.query 214 | if not self.url.fragment == b'': url += b'#' + self.url.fragment 215 | return url 216 | 217 | @staticmethod 218 | def build_header(k, v): 219 | return k + b": " + v + CRLF 220 | 221 | def build(self, del_headers=None, add_headers=None): 222 | req = b" ".join([self.method, self.build_url(), self.version]) 223 | req += CRLF 224 | 225 | if not del_headers: del_headers = [] 226 | for k in self.headers: 227 | if not k in del_headers: 228 | req += self.build_header(self.headers[k][0], self.headers[k][1]) 229 | 230 | if not add_headers: add_headers = [] 231 | for k in add_headers: 232 | req += self.build_header(k[0], k[1]) 233 | 234 | req += CRLF 235 | if self.body: 236 | req += self.body 237 | 238 | return req 239 | 240 | @staticmethod 241 | def split(data): 242 | pos = data.find(CRLF) 243 | if pos == -1: return False, data 244 | line = data[:pos] 245 | data = data[pos + len(CRLF):] 246 | return line, data 247 | 248 | 249 | class Connection(object): 250 | """TCP server/client connection abstraction.""" 251 | 252 | def __init__(self, what): 253 | self.buffer = b'' 254 | self.closed = False 255 | self.what = what # server or client 256 | 257 | def send(self, data): 258 | return self.conn.send(data) 259 | 260 | def recv(self, bytes=8192): 261 | try: 262 | data = self.conn.recv(bytes) 263 | if len(data) == 0: 264 | logger.debug('recvd 0 bytes from %s' % self.what) 265 | return None 266 | logger.debug('rcvd %d bytes from %s' % (len(data), self.what)) 267 | return data 268 | except Exception as e: 269 | logger.exception( 270 | 'Exception while receiving from connection %s %r with reason %r' % (self.what, self.conn, e)) 271 | return None 272 | 273 | def close(self): 274 | self.conn.close() 275 | self.closed = True 276 | 277 | def buffer_size(self): 278 | return len(self.buffer) 279 | 280 | def has_buffer(self): 281 | return self.buffer_size() > 0 282 | 283 | def queue(self, data): 284 | self.buffer += data 285 | 286 | def flush(self): 287 | sent = self.send(self.buffer) 288 | self.buffer = self.buffer[sent:] 289 | logger.debug('flushed %d bytes to %s' % (sent, self.what)) 290 | 291 | 292 | def pick_proxy(): 293 | """ 294 | 从redis中选取一个可用代理 295 | :return: 296 | """ 297 | redis_ins = get_redis_ins() 298 | ip_str = redis_ins.zrange(REDIS_KEY, 0, 0)[0] 299 | redis_ins.zincrby(REDIS_KEY, ip_str) 300 | logger.debug("use proxy: " + ip_str) 301 | ip = ip_str.split(":") 302 | to_address = (ip[0], int(ip[1])) 303 | return to_address 304 | 305 | 306 | class Server(Connection): 307 | """Establish connection to destination server.""" 308 | 309 | def __init__(self, host, port): 310 | super(Server, self).__init__(b'server') 311 | self.addr = (host, int(port)) 312 | 313 | def connect(self): 314 | self.conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 315 | self.conn.settimeout(CHECK_TIMEOUT) 316 | try: 317 | self.conn.connect(pick_proxy()) 318 | except Exception as error: 319 | # reconnect use local ip 320 | self.conn.connect((self.addr[0], self.addr[1])) 321 | 322 | 323 | class Client(Connection): 324 | """Accepted client connection.""" 325 | 326 | def __init__(self, conn, addr): 327 | super(Client, self).__init__(b'client') 328 | self.conn = conn 329 | self.addr = addr 330 | 331 | 332 | class ProxyError(Exception): 333 | pass 334 | 335 | 336 | class ProxyConnectionFailed(ProxyError): 337 | def __init__(self, host, port, reason): 338 | self.host = host 339 | self.port = port 340 | self.reason = reason 341 | 342 | def __str__(self): 343 | return '' % (self.host, self.port, self.reason) 344 | 345 | 346 | class Proxy(multiprocessing.Process): 347 | """HTTP proxy implementation. 348 | 349 | Accepts connection object and act as a proxy between client and server. 350 | """ 351 | 352 | def __init__(self, client): 353 | super(Proxy, self).__init__() 354 | 355 | self.start_time = self._now() 356 | self.last_activity = self.start_time 357 | 358 | self.client = client 359 | self.server = None 360 | 361 | self.request = HttpParser() 362 | self.response = HttpParser(HTTP_RESPONSE_PARSER) 363 | 364 | self.connection_established_pkt = CRLF.join([ 365 | b'HTTP/1.1 200 Connection established', 366 | CRLF 367 | ]) 368 | 369 | @staticmethod 370 | def _now(): 371 | return datetime.datetime.utcnow() 372 | 373 | def _inactive_for(self): 374 | return (self._now() - self.last_activity).seconds 375 | 376 | def _is_inactive(self): 377 | return self._inactive_for() > 30 378 | 379 | def _process_request(self, data): 380 | # once we have connection to the server 381 | # we don't parse the http request packets 382 | # any further, instead just pipe incoming 383 | # data from client to server 384 | if self.server and not self.server.closed: 385 | self.server.queue(data) 386 | return 387 | 388 | # parse http request 389 | self.request.parse(data) 390 | 391 | # once http request parser has reached the state complete 392 | # we attempt to establish connection to destination server 393 | if self.request.state == HTTP_PARSER_STATE_COMPLETE: 394 | logger.debug('request parser is in state complete') 395 | 396 | if self.request.method == b"CONNECT": 397 | host, port = self.request.url.path.split(COLON) 398 | elif self.request.url: 399 | host, port = self.request.url.hostname, self.request.url.port if self.request.url.port else 80 400 | else: 401 | raise ProxyError 402 | self.server = Server(host, port) 403 | try: 404 | logger.debug('connecting to server %s:%s' % (host, port)) 405 | self.server.connect() 406 | logger.debug('connected to server %s:%s' % (host, port)) 407 | except Exception as e: 408 | self.server.closed = True 409 | raise ProxyConnectionFailed(host, port, repr(e)) 410 | 411 | # for http connect methods (https requests) 412 | # queue appropriate response for client 413 | # notifying about established connection 414 | if self.request.method == b"CONNECT": 415 | self.client.queue(self.connection_established_pkt) 416 | # for usual http requests, re-build request packet 417 | # and queue for the server with appropriate headers 418 | else: 419 | self.server.queue(self.request.build( 420 | del_headers=[b'proxy-connection', b'connection', b'keep-alive'], 421 | add_headers=[(b'Connection', b'Close')] 422 | )) 423 | 424 | def _process_response(self, data): 425 | # parse incoming response packet 426 | # only for non-https requests 427 | if not self.request.method == b"CONNECT": 428 | self.response.parse(data) 429 | 430 | # queue data for client 431 | self.client.queue(data) 432 | 433 | def _access_log(self): 434 | host, port = self.server.addr if self.server else (None, None) 435 | if self.request.method == b"CONNECT": 436 | logger.info( 437 | "%s:%s - %s %s:%s" % (self.client.addr[0], self.client.addr[1], self.request.method, host, port)) 438 | elif self.request.method: 439 | logger.info("%s:%s - %s %s:%s%s - %s %s - %s bytes" % ( 440 | self.client.addr[0], self.client.addr[1], self.request.method, host, port, self.request.build_url(), 441 | self.response.code, self.response.reason, len(self.response.raw))) 442 | 443 | def _get_waitable_lists(self): 444 | rlist, wlist, xlist = [self.client.conn], [], [] 445 | logger.debug('*** watching client for read ready') 446 | 447 | if self.client.has_buffer(): 448 | logger.debug('pending client buffer found, watching client for write ready') 449 | wlist.append(self.client.conn) 450 | 451 | if self.server and not self.server.closed: 452 | logger.debug('connection to server exists, watching server for read ready') 453 | rlist.append(self.server.conn) 454 | 455 | if self.server and not self.server.closed and self.server.has_buffer(): 456 | logger.debug('connection to server exists and pending server buffer found, watching server for write ready') 457 | wlist.append(self.server.conn) 458 | 459 | return rlist, wlist, xlist 460 | 461 | def _process_wlist(self, w): 462 | if self.client.conn in w: 463 | logger.debug('client is ready for writes, flushing client buffer') 464 | self.client.flush() 465 | 466 | if self.server and not self.server.closed and self.server.conn in w: 467 | logger.debug('server is ready for writes, flushing server buffer') 468 | self.server.flush() 469 | 470 | def _process_rlist(self, r): 471 | if self.client.conn in r: 472 | logger.debug('client is ready for reads, reading') 473 | data = self.client.recv() 474 | self.last_activity = self._now() 475 | 476 | if not data: 477 | logger.debug('client closed connection, breaking') 478 | return True 479 | 480 | try: 481 | self._process_request(data) 482 | except ProxyConnectionFailed as e: 483 | logger.exception(e) 484 | self.client.queue(CRLF.join([ 485 | b'HTTP/1.1 502 Bad Gateway', 486 | b'Proxy-agent: proxy_getter.py v' + version, 487 | b'Content-Length: 11', 488 | b'Connection: close', 489 | CRLF 490 | ]) + b'Bad Gateway') 491 | self.client.flush() 492 | return True 493 | 494 | if self.server and not self.server.closed and self.server.conn in r: 495 | logger.debug('server is ready for reads, reading') 496 | data = self.server.recv() 497 | self.last_activity = self._now() 498 | 499 | if not data: 500 | logger.debug('server closed connection') 501 | self.server.close() 502 | else: 503 | self._process_response(data) 504 | 505 | return False 506 | 507 | def _process(self): 508 | while True: 509 | rlist, wlist, xlist = self._get_waitable_lists() 510 | r, w, x = select.select(rlist, wlist, xlist, 1) 511 | 512 | self._process_wlist(w) 513 | if self._process_rlist(r): 514 | break 515 | 516 | if self.client.buffer_size() == 0: 517 | if self.response.state == HTTP_PARSER_STATE_COMPLETE: 518 | logger.debug('client buffer is empty and response state is complete, breaking') 519 | break 520 | 521 | if self._is_inactive(): 522 | logger.debug('client buffer is empty and maximum inactivity has reached, breaking') 523 | break 524 | 525 | def run(self): 526 | logger.debug('Proxying connection %r at address %r' % (self.client.conn, self.client.addr)) 527 | try: 528 | self._process() 529 | except KeyboardInterrupt: 530 | pass 531 | except Exception as e: 532 | logger.exception('Exception while handling connection %r with reason %r' % (self.client.conn, e)) 533 | finally: 534 | logger.debug( 535 | "closing client connection with pending client buffer size %d bytes" % self.client.buffer_size()) 536 | self.client.close() 537 | if self.server: 538 | logger.debug( 539 | "closed client connection with pending server buffer size %d bytes" % self.server.buffer_size()) 540 | self._access_log() 541 | logger.debug('Closing proxy for connection %r at address %r' % (self.client.conn, self.client.addr)) 542 | 543 | 544 | class TCP(object): 545 | """TCP server implementation.""" 546 | socket = None 547 | 548 | def __init__(self, hostname='127.0.0.1', port=8899, backlog=1000): 549 | self.hostname = hostname 550 | self.port = port 551 | self.backlog = backlog 552 | 553 | def handle(self, client): 554 | raise NotImplementedError() 555 | 556 | def run(self): 557 | try: 558 | logger.info('Starting server on port %d' % self.port) 559 | self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 560 | self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 561 | self.socket.bind((self.hostname, self.port)) 562 | self.socket.listen(self.backlog) 563 | while True: 564 | conn, addr = self.socket.accept() 565 | logger.debug('Accepted connection %r at address %r' % (conn, addr)) 566 | client = Client(conn, addr) 567 | self.handle(client) 568 | except Exception as e: 569 | logger.exception('Exception while running the server %r' % e) 570 | finally: 571 | logger.info('Closing server socket') 572 | self.socket.close() 573 | 574 | 575 | class HTTP(TCP): 576 | """HTTP proxy server implementation. 577 | 578 | Spawns new process to proxy accepted client connection. 579 | """ 580 | 581 | def handle(self, client): 582 | proc = Proxy(client) 583 | proc.daemon = True 584 | proc.start() 585 | logger.debug('Started process %r to handle connection %r' % (proc, client.conn)) 586 | 587 | 588 | def main(): 589 | logging.basicConfig(level=getattr(logging, 'DEBUG'), 590 | format='%(asctime)s - %(levelname)s - pid:%(process)d - %(message)s') 591 | try: 592 | proxy = HTTP(get_local_ip(), PROXY_PORT) 593 | proxy.run() 594 | except KeyboardInterrupt: 595 | pass 596 | 597 | 598 | if __name__ == '__main__': 599 | main() 600 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | termcolor~=1.1.0 2 | web.py~=0.62 3 | requests~=2.31.0 4 | redis~=3.5.3 5 | -------------------------------------------------------------------------------- /rest_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | __author__ = 'Danny0' 4 | 5 | import json 6 | import time 7 | import web 8 | from abc import ABCMeta, abstractmethod 9 | from util.func import * 10 | 11 | urls = ( 12 | "/", "Index", 13 | '/ip', 'Ip', 14 | ) 15 | 16 | 17 | class Api: 18 | __metaclass__ = ABCMeta 19 | 20 | def __init__(self): 21 | self.ERROR_CODE = { 22 | "0": "success", 23 | "3000": "miss param", 24 | "3001": "param num can not bigger than 5", 25 | "4003": "no auth", 26 | "5000": "ip pool is empty", 27 | "5001": "request fail", 28 | } 29 | self.params = web.input(num="5") 30 | self.time_start = time.time() 31 | 32 | @abstractmethod 33 | def GET(self): 34 | pass 35 | 36 | @staticmethod 37 | def set_json_response(): 38 | """ 39 | 设置响应content-type为json 40 | :return: 41 | """ 42 | web.header('content-type', 'application/json;charset=utf-8', unique=True) 43 | 44 | def json(self, errno, data=None): 45 | """ 46 | 发送json格式响应 47 | :param errno: 48 | :param data: 49 | :return: 50 | """ 51 | data = data if data else [] 52 | self.set_json_response() 53 | res = { 54 | "errno": errno, 55 | "message": self.ERROR_CODE[str(errno)], 56 | "data": data, 57 | "time": round(time.time() - self.time_start, 2) 58 | } 59 | return json.dumps(res) 60 | 61 | def result(self, data): 62 | """ 63 | 根据查询结果返回json 64 | :param data: 65 | """ 66 | if not data: 67 | return self.json(5001) 68 | else: 69 | return self.json(0, data) 70 | 71 | 72 | class Ip(Api): 73 | def GET(self): 74 | # 从redis中拿 75 | redis_ins = get_redis_ins() 76 | num = int(self.params.num) 77 | if num > 5: 78 | return self.json(3001) 79 | ips = redis_ins.zrange(REDIS_KEY, 0, num - 1) # 会包含结束index 80 | if not ips: 81 | # 没有可用IP 82 | return self.json(5000) 83 | # 增加使用次数 84 | for ip in ips: 85 | redis_ins.zincrby(REDIS_KEY, ip) 86 | return self.result(ips) 87 | 88 | 89 | class Index(Api): 90 | def GET(self): 91 | return "proxy rest api" 92 | 93 | 94 | if __name__ == "__main__": 95 | os.environ["PORT"] = "9090" if os.uname()[0] == "Darwin" else "80" 96 | app = web.application(urls, globals()) 97 | app.run() 98 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | pm2 start rest_server.py -x --interpreter python 2 | pm2 start proxy_server.py -x --interpreter python -------------------------------------------------------------------------------- /util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deng00/auto-proxy/ab184815f89e376273c698320a506e98eb85699b/util/__init__.py -------------------------------------------------------------------------------- /util/func.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding:utf-8 -*- 3 | __author__ = 'Danny0' 4 | import redis 5 | from config import * 6 | from .logger import Logger 7 | 8 | redis_instance = None 9 | log_instance = None 10 | 11 | 12 | def md5(string): 13 | """ 14 | md5加密 15 | :param string: 16 | :return: 17 | """ 18 | import hashlib 19 | m = hashlib.md5() 20 | m.update(string) 21 | return m.hexdigest() 22 | 23 | 24 | def get_redis_ins(): 25 | global redis_instance 26 | if redis_instance: 27 | return redis_instance 28 | redis_instance = redis.Redis(REDIS_HOST, REDIS_PORT, REDIS_DB, REDIS_PASS) 29 | return redis_instance 30 | 31 | 32 | def get_logger(): 33 | global log_instance 34 | if log_instance: 35 | return log_instance 36 | log_instance = Logger('log.txt') 37 | 38 | return log_instance 39 | 40 | 41 | def get_local_ip(): 42 | """ 43 | 获取本机IP 44 | :return: 45 | """ 46 | import socket 47 | my_name = socket.getfqdn(socket.gethostname()) 48 | return socket.gethostbyname(my_name) 49 | -------------------------------------------------------------------------------- /util/logger.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | # coding=utf-8 3 | from __future__ import print_function 4 | import logging 5 | import time 6 | from termcolor import colored 7 | 8 | 9 | class Logger: 10 | def __init__(self, path="log.txt"): 11 | log_instance = logging.getLogger('live_logger') 12 | log_instance.setLevel(logging.DEBUG) 13 | ch = logging.FileHandler(path, 'w') 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') 16 | ch.setFormatter(formatter) 17 | log_instance.addHandler(ch) 18 | self.logger = log_instance 19 | 20 | def pr(self, message, level): 21 | color_map = { 22 | "debug": "white", 23 | "info": "green", 24 | "warning": "magenta", 25 | "error": "red", 26 | "critical": "red" 27 | } 28 | eval("self.logger." + level + "(message)") 29 | message = "[" + time.strftime('%Y-%m-%d %H:%M:%S') + "] " + "[" + level + "] " + message 30 | print(colored(message, color_map[level])) 31 | 32 | def debug(self, message): 33 | self.pr(message, "debug") 34 | 35 | def info(self, message): 36 | self.pr(message, "info") 37 | 38 | def warning(self, message): 39 | self.pr(message, "warning") 40 | 41 | warn = warning 42 | 43 | def error(self, message): 44 | self.pr(message, "error") 45 | 46 | def critical(self, message): 47 | self.pr(message, "critical") 48 | 49 | 50 | if __name__ == '__main__': 51 | log = Logger('log.txt') 52 | log.debug('一个debug信息') 53 | log.info('一个info信息') 54 | log.warn('一个warning信息') 55 | log.error('一个error信息') 56 | --------------------------------------------------------------------------------