├── .travis.yml ├── README.md └── fetch.py /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | dist: trusty 3 | sudo: false 4 | git: 5 | depth: 1 6 | python: 7 | - "2.6" 8 | - "2.7" 9 | - "3.3" 10 | - "3.6" 11 | script: 12 | - python -c "import fetch" 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | fetch-some-proxies [![Build Status](https://api.travis-ci.org/stamparm/fetch-some-proxies.svg?branch=master)](https://travis-ci.org/stamparm/fetch-some-proxies) [![Python 2.6|2.7|3.x](https://img.shields.io/badge/python-2.6|2.7|3.x-yellow.svg)](https://www.python.org/) [![License](https://img.shields.io/badge/license-Public_domain-red.svg)](https://wiki.creativecommons.org/wiki/Public_domain) 2 | ==== 3 | 4 | Simple Python script for fetching "some" (usable) proxies. It fetches (periodically updated) list of public proxies and automatically finds in a quick manner those usable in that same moment (Note: testing of SOCKS proxies is currently possible only on non-Windows platforms). 5 | 6 | Why should you use it? Well, if you've ever used free proxy lists around you'll know the pain of finding actually working proxies. This tool will automatically do the list fetching and proxy testing for you. Also, only proxies that support HTTPS traffic will be returned, which guarantees access to majority of Internet sites. 7 | 8 | ![fetch](https://i.imgur.com/WLWRGcA.png) 9 | 10 | Requirements 11 | ---- 12 | 13 | **fetch-some-proxies** works out of the box with any Python version from **2.6.x** to **3.x** on any platform. 14 | -------------------------------------------------------------------------------- /fetch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import codecs 4 | import json 5 | import locale 6 | import optparse 7 | import os 8 | import random 9 | import re 10 | import socket 11 | import subprocess 12 | import sys 13 | import threading 14 | import time 15 | import urllib 16 | 17 | if sys.version_info >= (3, 0): 18 | import queue 19 | import urllib.request 20 | 21 | build_opener = urllib.request.build_opener 22 | install_opener = urllib.request.install_opener 23 | quote = urllib.parse.quote 24 | urlopen = urllib.request.urlopen 25 | ProxyHandler = urllib.request.ProxyHandler 26 | Queue = queue.Queue 27 | Request = urllib.request.Request 28 | 29 | xrange = range 30 | else: 31 | import Queue 32 | import urllib2 33 | 34 | build_opener = urllib2.build_opener 35 | install_opener = urllib2.install_opener 36 | quote = urllib.quote 37 | urlopen = urllib2.urlopen 38 | ProxyHandler = urllib2.ProxyHandler 39 | Queue = Queue.Queue 40 | Request = urllib2.Request 41 | 42 | # Reference: http://blog.mathieu-leplatre.info/python-utf-8-print-fails-when-redirecting-stdout.html 43 | sys.stdout = codecs.getwriter(locale.getpreferredencoding())(sys.stdout) 44 | 45 | VERSION = "3.2.4" 46 | BANNER = """ 47 | +-++-++-++-++-++-++-++-++-++-++-++-++-++-++-++-++-++-+ 48 | |f||e||t||c||h||-||s||o||m||e||-||p||r||o||x||i||e||s| <- v%s 49 | +-++-++-++-++-++-++-++-++-++-++-++-++-++-++-++-++-++-+""".strip("\r\n") % VERSION 50 | 51 | ANONIMITY_LEVELS = {"high": "elite", "medium": "anonymous", "low": "transparent"} 52 | FALLBACK_METHOD = False 53 | IFCONFIG_CANDIDATES = ("https://api.ipify.org/?format=text", "https://myexternalip.com/raw", "https://wtfismyip.com/text", "https://icanhazip.com/", "https://ip4.seeip.org") 54 | IS_WIN = os.name == "nt" 55 | MAX_HELP_OPTION_LENGTH = 18 56 | PROXY_LIST_URL = "https://raw.githubusercontent.com/stamparm/aux/master/fetch-some-list.txt" 57 | ROTATION_CHARS = ('/', '-', '\\', '|') 58 | TIMEOUT = 10 59 | THREADS = 20 60 | USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0" 61 | 62 | if not IS_WIN: 63 | BANNER = re.sub(r"\|(\w)\|", lambda _: "|\033[01;41m%s\033[00;49m|" % _.group(1), BANNER) 64 | 65 | options = None 66 | counter = [0] 67 | threads = [] 68 | timeout = TIMEOUT 69 | 70 | def check_alive(address, port): 71 | result = False 72 | 73 | try: 74 | s = socket.socket() 75 | s.connect((address, port)) 76 | result = True 77 | except: 78 | pass 79 | finally: 80 | try: 81 | s.shutdown(socket.SHUT_RDWR) 82 | s.close() 83 | except: 84 | pass 85 | 86 | return result 87 | 88 | def retrieve(url, data=None, headers={"User-agent": USER_AGENT}, timeout=timeout, opener=None): 89 | try: 90 | req = Request("".join(url[i].replace(' ', "%20") if i > url.find('?') else url[i] for i in xrange(len(url))), data, headers) 91 | retval = (urlopen if not opener else opener.open)(req, timeout=timeout).read() 92 | except Exception as ex: 93 | try: 94 | retval = ex.read() if hasattr(ex, "read") else getattr(ex, "msg", str()) 95 | except: 96 | retval = None 97 | 98 | return (retval or b"").decode("utf8") 99 | 100 | def worker(queue, handle=None): 101 | try: 102 | while True: 103 | proxy = queue.get_nowait() 104 | result = "" 105 | counter[0] += 1 106 | sys.stdout.write("\r%s\r" % ROTATION_CHARS[counter[0] % len(ROTATION_CHARS)]) 107 | sys.stdout.flush() 108 | start = time.time() 109 | candidate = "%s://%s:%s" % (proxy["proto"].replace("https", "http"), proxy["ip"], proxy["port"]) 110 | if not all((proxy["ip"], proxy["port"])) or re.search(r"[^:/\w.]", candidate): 111 | continue 112 | if not check_alive(proxy["ip"], proxy["port"]): 113 | continue 114 | if not FALLBACK_METHOD: 115 | process = subprocess.Popen("curl -m %d -A \"%s\" --proxy %s %s" % (timeout, USER_AGENT, candidate, random_ifconfig()), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 116 | result, _ = process.communicate() 117 | elif proxy["proto"] in ("http", "https"): 118 | opener = build_opener(ProxyHandler({"http": candidate, "https": candidate})) 119 | result = retrieve(random_ifconfig(), timeout=timeout, opener=opener) 120 | if (result or "").strip() == proxy["ip"].encode("utf8"): 121 | latency = time.time() - start 122 | if latency < timeout: 123 | sys.stdout.write("\r%s%s # latency: %.2f sec; country: %s; anonymity: %s (%s)\n" % (candidate, " " * (32 - len(candidate)), latency, ' '.join(_.capitalize() for _ in (proxy["country"].lower() or '-').split(' ')), proxy["type"], proxy["anonymity"])) 124 | sys.stdout.flush() 125 | if handle: 126 | os.write(handle, ("%s%s" % (candidate, os.linesep)).encode("utf8")) 127 | except: 128 | pass 129 | 130 | def random_ifconfig(): 131 | retval = random.sample(IFCONFIG_CANDIDATES, 1)[0] 132 | 133 | if options.noHttps: 134 | retval = retval.replace("https://", "http://") 135 | 136 | return retval 137 | 138 | def run(): 139 | global FALLBACK_METHOD 140 | global timeout 141 | 142 | sys.stdout.write("[i] initial testing...\n") 143 | 144 | timeout = min(options.timeout or sys.maxsize, options.maxLatency or sys.maxsize, TIMEOUT) 145 | socket.setdefaulttimeout(timeout) 146 | 147 | process = subprocess.Popen("curl -m %d -A \"%s\" %s" % (TIMEOUT, USER_AGENT, random_ifconfig()), shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 148 | stdout, stderr = process.communicate() 149 | FALLBACK_METHOD = re.search(r"\d+\.\d+\.\d+\.\d+", (stdout or b"").decode("utf8")) is None 150 | 151 | if stderr and any(_ in stderr for _ in (b"not found", b"not recognized")): 152 | sys.stdout.write("[x] command 'curl' not available\n") 153 | 154 | sys.stdout.write("[i] retrieving list of proxies...\n") 155 | content = retrieve(PROXY_LIST_URL, headers={"User-agent": USER_AGENT}) 156 | if not all(_ in content for _ in ("proto", "country", "anonymity")): 157 | exit("[!] something went wrong during the proxy list retrieval/parsing ('%s'). Please check your network settings and try again" % content[:100]) 158 | 159 | proxies = json.loads(content) 160 | random.shuffle(proxies) 161 | 162 | if any((options.country, options.anonymity, options.type, options.port)): 163 | _ = [] 164 | 165 | if options.port: 166 | options.port = set(int(_) for _ in re.findall(r"\d+", options.port)) 167 | 168 | for proxy in proxies: 169 | if options.country and not re.search(options.country, proxy["country"], re.I): 170 | continue 171 | if options.port and not proxy["port"] in options.port: 172 | continue 173 | if options.anonymity and not re.search(options.anonymity, "%s (%s)" % (proxy["anonymity"], ANONIMITY_LEVELS.get(proxy["anonymity"].lower(), "")), re.I): 174 | continue 175 | if options.type and not re.search(options.type, proxy["proto"], re.I): 176 | continue 177 | _.append(proxy) 178 | proxies = _ 179 | 180 | if options.outputFile: 181 | handle = os.open(options.outputFile, os.O_APPEND | os.O_CREAT | os.O_TRUNC | os.O_WRONLY) 182 | sys.stdout.write("[i] storing results to '%s'...\n" % options.outputFile) 183 | else: 184 | handle = None 185 | 186 | queue = Queue() 187 | for proxy in proxies: 188 | queue.put(proxy) 189 | 190 | if len(proxies) == 0: 191 | exit("[!] no proxies found") 192 | 193 | sys.stdout.write("[i] testing %d proxies (%d threads)...\n\n" % (len(proxies) if not FALLBACK_METHOD else sum(proxy["proto"] in ("http", "https") for proxy in proxies), options.threads or THREADS)) 194 | for _ in xrange(options.threads or THREADS): 195 | thread = threading.Thread(target=worker, args=[queue, handle]) 196 | thread.daemon = True 197 | 198 | try: 199 | thread.start() 200 | except threading.ThreadError as ex: 201 | sys.stderr.write("[x] error occurred while starting new thread ('%s')" % ex.message) 202 | break 203 | 204 | threads.append(thread) 205 | 206 | try: 207 | alive = True 208 | while alive: 209 | alive = False 210 | for thread in threads: 211 | if thread.is_alive(): 212 | alive = True 213 | time.sleep(0.1) 214 | except KeyboardInterrupt: 215 | sys.stderr.write("\r \n[!] Ctrl-C pressed\n") 216 | else: 217 | sys.stdout.write("\n[i] done\n") 218 | finally: 219 | sys.stdout.flush() 220 | sys.stderr.flush() 221 | if handle: 222 | os.close(handle) 223 | os._exit(0) 224 | 225 | def main(): 226 | global options 227 | 228 | if "--raw" in sys.argv: 229 | sys._stdout = sys.stdout 230 | 231 | class _: 232 | def write(self, value): 233 | if "//" in value: 234 | sys._stdout.write("%s\n" % value.split()[0]) 235 | 236 | def flush(self): 237 | sys._stdout.flush() 238 | 239 | sys.stderr = sys.stdout = _() 240 | 241 | sys.stdout.write("%s\n\n" % BANNER) 242 | parser = optparse.OptionParser(version=VERSION) 243 | parser.add_option("--anonymity", dest="anonymity", help="Regex for filtering anonymity (e.g. \"anonymous|elite\")") 244 | parser.add_option("--country", dest="country", help="Regex for filtering country (e.g. \"china|brazil\")") 245 | parser.add_option("--max-latency", dest="maxLatency", type=float, help="Maximum (tolerable) latency in seconds (default %d)" % TIMEOUT) 246 | parser.add_option("--no-https", dest="noHttps", action="store_true", help="Disable HTTPS checking (not recommended)") 247 | parser.add_option("--output", dest="outputFile", help="Store resulting proxies to output file") 248 | parser.add_option("--port", dest="port", help="List of ports for filtering (e.g. \"1080,8000\")") 249 | parser.add_option("--raw", dest="raw", action="store_false", help="Display only results (minimal verbosity)") 250 | parser.add_option("--threads", dest="threads", type=int, help="Number of scanning threads (default %d)" % THREADS) 251 | parser.add_option("--timeout", dest="timeout", type=int, help="Request timeout in seconds (default %d)" % TIMEOUT) 252 | parser.add_option("--type", dest="type", help="Regex for filtering proxy type (e.g. \"http\")") 253 | 254 | # Dirty hack(s) for help message 255 | def _(self, *args): 256 | retval = parser.formatter._format_option_strings(*args) 257 | if len(retval) > MAX_HELP_OPTION_LENGTH: 258 | retval = ("%%.%ds.." % (MAX_HELP_OPTION_LENGTH - parser.formatter.indent_increment)) % retval 259 | return retval 260 | 261 | parser.formatter._format_option_strings = parser.formatter.format_option_strings 262 | parser.formatter.format_option_strings = type(parser.formatter.format_option_strings)(_, parser) 263 | 264 | for _ in ("-h", "--version"): 265 | option = parser.get_option(_) 266 | option.help = option.help.capitalize() 267 | 268 | try: 269 | options, _ = parser.parse_args() 270 | except SystemExit: 271 | sys.stdout.write("\n") 272 | sys.stdout.flush() 273 | raise 274 | 275 | run() 276 | 277 | if __name__ == "__main__": 278 | main() 279 | --------------------------------------------------------------------------------