├── README.md └── proxy.py /README.md: -------------------------------------------------------------------------------- 1 | # Python proxy checker 2 | ## Description 3 | Simple multithreaded proxy checker. Takes several text files as inputs. 4 | 5 | ## Usage 6 | The script looks for all .txt files in some directory named "in_directory", takes all the proxies out, checks them and then puts the result to the file named "out_filename". 7 | The input format is "ip:port" (e.g., "127.0.0.1:8080"). 8 | 9 | So, to change the input directory and the output file, you have to alter the following lines: 10 | ``` 11 | in_directory = './input/' 12 | out_filename = 'output/out_filtered.txt' 13 | ``` 14 | 15 | 16 | For example: 17 | ``` 18 | python-proxy-checker 19 | |-- proxy.py 20 | | 21 | |-- README.md 22 | | 23 | |-- input 24 | | |-- first.txt 25 | | |-- second.txt 26 | | |-- third.txt 27 | | 28 | |-- output 29 | |-- out_filtered.txt 30 | ``` 31 | -------------------------------------------------------------------------------- /proxy.py: -------------------------------------------------------------------------------- 1 | # Network 2 | import urllib.request, urllib.parse, urllib.error 3 | import http.cookiejar 4 | 5 | # Concurrency 6 | import threading 7 | import queue 8 | import itertools 9 | 10 | # Etc 11 | import time 12 | from colorama import Fore, Back, Style 13 | 14 | # Global variables 15 | #in_filename = 'input/3.txt' 16 | in_directory = './input/filtered' 17 | out_filename = 'output/out_filtered2.txt' 18 | test_url = 'http://www.google.com/humans.txt' 19 | thread_number = 100 20 | timeout_value = 10 21 | 22 | ok_msg = Fore.GREEN + "OK! " + Fore.RESET 23 | fail_msg = Fore.RED + "FAIL " + Fore.RESET 24 | 25 | # Stats 26 | good_proxy_num = itertools.count() 27 | start_time = time.time() 28 | end_time = time.time() 29 | 30 | # Safe print() 31 | mylock = threading.Lock() 32 | def sprint(*a, **b): 33 | with mylock: 34 | print(*a, **b) 35 | 36 | 37 | # 38 | # Printer 39 | # 40 | class PrintThread(threading.Thread): 41 | def __init__(self, queue, filename): 42 | threading.Thread.__init__(self) 43 | self.queue = queue 44 | self.output = open(filename, 'a') 45 | self.shutdown = False 46 | 47 | def write(self, line): 48 | print(line, file=self.output) 49 | 50 | def run(self): 51 | while not self.shutdown: 52 | lines = self.queue.get() 53 | self.write(lines) 54 | self.queue.task_done() 55 | 56 | def terminate(self): 57 | self.output.close() 58 | self.shutdown = True 59 | 60 | 61 | 62 | # 63 | # Processor 64 | # 65 | class ProcessThread(threading.Thread): 66 | def __init__(self, id, task_queue, out_queue): 67 | threading.Thread.__init__(self) 68 | self.task_queue = task_queue 69 | self.out_queue = out_queue 70 | self.id = id 71 | 72 | # ... 73 | def run(self): 74 | while True: 75 | task = self.task_queue.get() 76 | result = self.process(task) 77 | 78 | if result is not None: 79 | self.out_queue.put(result) 80 | next(good_proxy_num) 81 | 82 | self.task_queue.task_done() 83 | 84 | 85 | # Do the processing job here 86 | def process(self, task): 87 | proxy = task 88 | log_msg = str("Thread #%3d. Trying HTTP proxy %21s \t\t" % (self.id, proxy)) 89 | 90 | cj = http.cookiejar.CookieJar() 91 | opener = urllib.request.build_opener( 92 | urllib.request.HTTPCookieProcessor(cj), 93 | urllib.request.HTTPRedirectHandler(), 94 | urllib.request.ProxyHandler({ 'http' : proxy }) 95 | ) 96 | 97 | try: 98 | t1 = time.time() 99 | response = opener.open(test_url, timeout=timeout_value).read() 100 | t2 = time.time() 101 | except Exception as e: 102 | log_msg += "%s (%s)" % (fail_msg, str(e)) 103 | sprint(log_msg) 104 | return None 105 | 106 | log_msg += ok_msg + " Response time: %d, length=%s" % ( int((t2-t1)*1000), str(len(response)) ) 107 | sprint(log_msg) 108 | return proxy 109 | 110 | def terminate(self): 111 | None 112 | #print("Thread #%d is down..." % (self.id)) 113 | 114 | # 115 | # Main starts here 116 | # 117 | # Init some stuff 118 | input_queue = queue.Queue() 119 | result_queue = queue.Queue() 120 | 121 | 122 | # Spawn worker threads 123 | workers = [] 124 | for i in range(0, thread_number): 125 | t = ProcessThread(i, input_queue, result_queue) 126 | t.setDaemon(True) 127 | t.start() 128 | workers.append(t) 129 | 130 | # Spawn printer thread to print 131 | f_printer = PrintThread(result_queue, out_filename) 132 | f_printer.setDaemon(True) 133 | f_printer.start() 134 | 135 | # Add some stuff to the input queue 136 | start_time = time.time() 137 | 138 | proxy_list = [] 139 | import os 140 | for root, dirs, files in os.walk(in_directory): 141 | for file in files: 142 | if file.endswith(".txt"): 143 | # read all lines from file 144 | file_line_list = [line.rstrip('\n') for line in open(os.path.join(root, file), 'r')] 145 | # append to proxy_list 146 | proxy_list.extend(file_line_list) 147 | 148 | for proxy in proxy_list: 149 | input_queue.put(proxy) 150 | 151 | total_proxy_num = len(proxy_list) 152 | print("got %d proxies to check" % total_proxy_num) 153 | 154 | if total_proxy_num == 0: 155 | exit() 156 | 157 | # Wait for queue to get empty 158 | input_queue.join() 159 | result_queue.join() 160 | 161 | 162 | #while (not input_queue.empty()): 163 | # time.sleep(1) 164 | 165 | 166 | # Shutdown 167 | f_printer.terminate() 168 | 169 | for worker in workers: 170 | worker.terminate() 171 | 172 | # Print some info 173 | good_proxy_num = float(next(good_proxy_num)) 174 | print("In: %d. Good: %d, that's %.2f%%" % (total_proxy_num, good_proxy_num, 100.0 * good_proxy_num/total_proxy_num)) 175 | 176 | end_time = time.time() 177 | print("Time elapsed: %.1f seconds." % (end_time - start_time)) 178 | print("Bye-bye!") 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | ############# 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | # Read file, convert it to list of proxies. 204 | # Add proxies to queue 205 | # Launch N (10) threads 206 | # When writing to the file, use lock 207 | # When Queue is empty flash results and shutdown 208 | 209 | --------------------------------------------------------------------------------