├── .gitignore ├── requirements-win32.txt ├── requirements-linux.txt ├── sequential.py ├── io-models ├── 1_tcp_client.py ├── 6_io_multiplexing_tcp_server.py ├── 2_blocking_io_single_process_tcp_server.py ├── 5_nonblocking_io_tcp_server.py ├── 3_blocking_io_multi_processes_tcp_server.py └── 4_blocking_io_multi_threads_tcp_server.py ├── logger.py ├── common.py ├── flags.txt ├── README.md ├── threadpool.py ├── asynchronous.py └── processpool.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | __pycache__/ 3 | downloads/ 4 | logs/ -------------------------------------------------------------------------------- /requirements-win32.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | aiofiles 3 | beautifulsoup4 4 | lxml 5 | progressbar2 6 | pymongo 7 | requests -------------------------------------------------------------------------------- /requirements-linux.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | aiofiles 3 | beautifulsoup4 4 | lxml 5 | progressbar2 6 | pymongo 7 | requests 8 | uvloop -------------------------------------------------------------------------------- /sequential.py: -------------------------------------------------------------------------------- 1 | import time 2 | from common import setup_down_path, get_links, download_one 3 | from logger import logger 4 | 5 | 6 | def download_many(): 7 | '''依序下载所有图片,同步阻塞''' 8 | down_path = setup_down_path() 9 | links = get_links() 10 | 11 | for linkno, link in enumerate(links, 1): 12 | image = { 13 | 'path': down_path, 14 | 'linkno': linkno, # 图片序号,方便日志输出时,正在下载哪一张 15 | 'link': link 16 | } 17 | download_one(image) 18 | 19 | return len(links) 20 | 21 | 22 | if __name__ == '__main__': 23 | t0 = time.time() 24 | count = download_many() 25 | msg = '{} flags downloaded in {:.2f} seconds.' 26 | logger.info(msg.format(count, time.time() - t0)) 27 | -------------------------------------------------------------------------------- /io-models/1_tcp_client.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import time 4 | from datetime import datetime 5 | import socket 6 | 7 | 8 | server_ip = input('Please enter the TCP server ip: ') 9 | server_port = int(input('Enter the TCP server port: ')) 10 | client_num = int(input('Enter the TCP clients count: ')) 11 | 12 | # 保存所有已成功连接的客户端TCP socket 13 | client_socks = [] 14 | 15 | for i in range(client_num): 16 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 17 | sock.connect((server_ip, server_port)) 18 | client_socks.append(sock) 19 | print('Client {}[ID: {}] has connected to {}'.format(sock, i, (server_ip, server_port))) 20 | 21 | while True: 22 | for s in client_socks: 23 | data = str(datetime.now()).encode('utf-8') 24 | s.send(data) 25 | print('Client {} has sent {} to {}'.format(s, data, (server_ip, server_port))) 26 | # 睡眠3秒后,继续让每个客户端连接向TCP Server发送数据 27 | time.sleep(3) 28 | -------------------------------------------------------------------------------- /io-models/6_io_multiplexing_tcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | import selectors 4 | import socket 5 | 6 | # 自动选择当前OS中最优的I/O multiplexing接口,Linux中会使用selectors.EpollSelector 7 | sel = selectors.DefaultSelector() 8 | 9 | 10 | def accept(sock, mask): 11 | '''监听套接字创建新的客户端连接''' 12 | conn, addr = sock.accept() # Should be ready 13 | print('accepted', conn, 'from', addr) 14 | conn.setblocking(False) 15 | sel.register(conn, selectors.EVENT_READ, read) # 将新的客户端socket注册到epoll实例上,并监听读事件 16 | 17 | 18 | def read(conn, mask): 19 | '''接收客户端数据,并原样返回''' 20 | data = conn.recv(1000) # Should be ready 21 | if data: 22 | print('echoing', repr(data), 'to', conn) 23 | conn.send(data) # Hope it won't block 24 | else: 25 | print('closing', conn) 26 | sel.unregister(conn) 27 | conn.close() 28 | 29 | 30 | sock = socket.socket() 31 | sock.bind(('', 9090)) 32 | sock.listen(100) 33 | sock.setblocking(False) 34 | sel.register(sock, selectors.EVENT_READ, accept) 35 | 36 | while True: 37 | events = sel.select() 38 | for key, mask in events: 39 | callback = key.data 40 | callback(key.fileobj, mask) 41 | -------------------------------------------------------------------------------- /io-models/2_blocking_io_single_process_tcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # TCP Echo Server,单进程,阻塞 blocking I/O 4 | import socket 5 | 6 | 7 | # 创建监听socket 8 | server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 9 | 10 | # socket默认不支持地址复用,OSError: [Errno 98] Address already in use 11 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 12 | 13 | # 绑定IP地址和固定端口 14 | server_address = ('', 9090) 15 | print('TCP Server starting up on port {}'.format(server_address[1])) 16 | server_sock.bind(server_address) 17 | 18 | # socket默认是主动连接,调用listen()函数将socket变为被动连接,这样就可以接收客户端连接了 19 | server_sock.listen(5) 20 | 21 | try: 22 | while True: 23 | print('Main Process, waiting for client connection...') 24 | 25 | # client_sock是专为这个客户端服务的socket,client_addr是包含客户端IP和端口的元组 26 | client_sock, client_addr = server_sock.accept() 27 | print('Client {} is connected'.format(client_addr)) 28 | 29 | try: 30 | while True: 31 | # 接收客户端发来的数据,阻塞,直到有数据到来 32 | # 事实上,除非当前客户端关闭后,才会跳转到外层的while循环,即一次只能服务一个客户 33 | # 如果客户端关闭了连接,data是空字符串 34 | data = client_sock.recv(4096) 35 | if data: 36 | print('Received {}({} bytes) from {}'.format(data, len(data), client_addr)) 37 | # 返回响应数据,将客户端发送来的数据原样返回 38 | client_sock.send(data) 39 | print('Sent {} to {}'.format(data, client_addr)) 40 | else: 41 | print('Client {} is closed'.format(client_addr)) 42 | break 43 | finally: 44 | # 关闭为这个客户端服务的socket 45 | client_sock.close() 46 | finally: 47 | # 关闭监听socket,不再响应其它客户端连接 48 | server_sock.close() 49 | -------------------------------------------------------------------------------- /logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import logging 4 | 5 | 6 | ### 7 | # 1. 创建logger实例,如果参数为空则返回root logger 8 | ### 9 | 10 | logger = logging.getLogger('spider') 11 | # 设置总日志级别, 也可以给不同的handler设置不同的日志级别 12 | logger.setLevel(logging.DEBUG) 13 | 14 | ### 15 | # 2. 创建Handler, 输出日志到控制台和文件 16 | ### 17 | 18 | # 控制台日志和日志文件使用同一个Formatter 19 | formatter = logging.Formatter( 20 | '%(asctime)s - %(filename)s[line:%(lineno)d] - <%(threadName)s %(thread)d>' + 21 | '- - %(levelname)s: %(message)s' 22 | ) 23 | 24 | # 日志文件FileHandler 25 | basedir = os.path.abspath(os.path.dirname(__file__)) 26 | log_dest = os.path.join(basedir, 'logs') # 日志文件所在目录 27 | if not os.path.isdir(log_dest): 28 | os.mkdir(log_dest) 29 | filename = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + '.log' # 日志文件名,以当前时间命名 30 | file_handler = logging.FileHandler(os.path.join(log_dest, filename), encoding='utf-8') # 创建日志文件handler 31 | file_handler.setFormatter(formatter) # 设置Formatter 32 | # file_handler.setLevel(logging.INFO) # 单独设置日志文件的日志级别 33 | 34 | # 控制台日志StreamHandler 35 | stream_handler = logging.StreamHandler() 36 | stream_handler.setFormatter(formatter) 37 | # stream_handler.setLevel(logging.DEBUG) # 单独设置控制台日志的日志级别 38 | 39 | ### 40 | # 3. 将handler添加到logger中 41 | ### 42 | 43 | logger.addHandler(file_handler) 44 | logger.addHandler(stream_handler) 45 | 46 | ### 47 | # 或者,使用简单配置 48 | ### 49 | # logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.DEBUG) 50 | # logging.debug('This message should appear on the console') 51 | 52 | # logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d', level=logging.DEBUG) 53 | # logging.debug('datetime format has changed to 2018-06-01') 54 | 55 | # logging.basicConfig(filename='test.log', level=logging.DEBUG) 56 | # logging.debug('This message should go to test.log') 57 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import requests 4 | from logger import logger 5 | 6 | 7 | basepath = os.path.abspath(os.path.dirname(__file__)) # 当前模块文件的根目录 8 | 9 | 10 | def setup_down_path(): 11 | '''设置图片下载后的保存位置,所有图片放在同一个目录下''' 12 | down_path = os.path.join(basepath, 'downloads') 13 | if not os.path.isdir(down_path): 14 | os.mkdir(down_path) 15 | logger.info('Create download path {}'.format(down_path)) 16 | return down_path 17 | 18 | 19 | def get_links(): 20 | '''获取所有图片的下载链接''' 21 | with open(os.path.join(basepath, 'flags.txt')) as f: # 图片名都保存在这个文件中,每行一个图片名 22 | return ['http://192.168.40.121/flags/' + flag.strip() for flag in f.readlines()] 23 | 24 | 25 | def download_one(image): # 为什么设计成接收一个字典参数,而不是三个位置参数? 方便后续多线程时concurrent.futures.ThreadPoolExecutor.map() 26 | '''下载一张图片 27 | :param image: 字典,包括图片的保存目录、图片的序号、图片的URL 28 | ''' 29 | logger.info('Downloading No.{} [{}]'.format(image['linkno'], image['link'])) 30 | t0 = time.time() 31 | 32 | resp = requests.get(image['link']) 33 | filename = os.path.split(image['link'])[1] 34 | with open(os.path.join(image['path'], filename), 'wb') as f: 35 | f.write(resp.content) # resp.content是bytes类型,而resp.text是str类型 36 | 37 | t1 = time.time() 38 | logger.info('Task No.{} [{}] runs {:.2f} seconds.'.format(image['linkno'], image['link'], t1 - t0)) 39 | 40 | 41 | def download_one_1(path, linkno, link): 42 | '''下载一张图片 43 | :param path: 图片的保存目录 44 | :param linkno: 图片的序号 45 | :param link: 图片的URL 46 | ''' 47 | logger.info('Downloading No.{} [{}]'.format(linkno, link)) 48 | t0 = time.time() 49 | 50 | resp = requests.get(link) 51 | filename = os.path.split(link)[1] 52 | with open(os.path.join(path, filename), 'wb') as f: 53 | f.write(resp.content) 54 | 55 | t1 = time.time() 56 | logger.info('Task No.{} [{}] runs {:.2f} seconds.'.format(linkno, link, t1 - t0)) 57 | -------------------------------------------------------------------------------- /io-models/5_nonblocking_io_tcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # TCP Echo Server,单进程,非阻塞 nonblocking I/O 4 | import socket 5 | 6 | 7 | # 用来保存所有已成功连接的客户端,每个列表元素是client_sock和client_addr组成的元组 8 | clients = [] 9 | 10 | # 创建监听socket 11 | server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 12 | 13 | # socket默认不支持地址复用,OSError: [Errno 98] Address already in use 14 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 15 | 16 | # 绑定IP地址和固定端口 17 | server_address = ('', 9090) 18 | print('TCP Server starting up on port {}'.format(server_address[1])) 19 | server_sock.bind(server_address) 20 | 21 | # socket默认是主动连接,调用listen()函数将socket变为被动连接,这样就可以接收客户端连接了 22 | server_sock.listen(5) 23 | 24 | # 将监听用的server_sock设置为非阻塞 25 | server_sock.setblocking(False) 26 | 27 | print('Main Process, waiting for client connection...') 28 | 29 | try: 30 | while True: 31 | try: 32 | # client_sock是专为这个客户端服务的socket,client_addr是包含客户端IP和端口的元组 33 | client_sock, client_addr = server_sock.accept() 34 | except: 35 | # server_sock设置为非堵塞后,如果accept时,恰巧没有客户端connect,那么accept会产生一个异常 36 | pass 37 | else: 38 | print('Client {} is connected'.format(client_addr)) 39 | # 将新的客户端连接socket也设置为非阻塞 40 | client_sock.setblocking(False) 41 | # 添加到client_socks列表中 42 | clients.append((client_sock, client_addr)) 43 | 44 | # 循环处理每个客户端连接 45 | for client_sock, client_addr in clients: 46 | try: 47 | data = client_sock.recv(4096) 48 | if data: 49 | print('Received {}({} bytes) from {}'.format(data, len(data), client_addr)) 50 | # 返回响应数据,将客户端发送来的数据原样返回 51 | client_sock.send(data) 52 | print('Sent {} to {}'.format(data, client_addr)) 53 | else: 54 | print('Client {} is closed'.format(client_addr)) 55 | # 关闭为这个客户端服务的socket 56 | client_sock.close() 57 | # 从列表中删除 58 | clients.remove((client_sock, client_addr)) 59 | except: 60 | # client_sock设置为非堵塞后,如果recv时,恰巧客户端没有发送数据过来,将会产生一个异常 61 | pass 62 | finally: 63 | # 关闭监听socket,不再响应其它客户端连接 64 | server_sock.close() 65 | -------------------------------------------------------------------------------- /io-models/3_blocking_io_multi_processes_tcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # TCP Echo Server,多进程,阻塞 blocking I/O 4 | import os 5 | import socket 6 | from multiprocessing import Process 7 | 8 | 9 | def client_handler(client_sock, client_addr): 10 | '''接收各个客户端发来的数据,并原样返回''' 11 | try: 12 | while True: 13 | # 接收客户端发来的数据,阻塞,直到有数据到来 14 | # 如果客户端关闭了连接,data是空字符串 15 | data = client_sock.recv(4096) 16 | if data: 17 | print('Child Process [PID: {}], received {}({} bytes) from {}'.format(os.getpid(), data, len(data), client_addr)) 18 | # 返回响应数据,将客户端发送来的数据原样返回 19 | client_sock.send(data) 20 | print('Child Process [PID: {}], sent {} to {}'.format(os.getpid(), data, client_addr)) 21 | else: 22 | print('Child Process [PID: {}], client {} is closed'.format(os.getpid(), client_addr)) 23 | break 24 | except: 25 | # 如果客户端强制关闭连接,会报异常: ConnectionResetError: [Errno 104] Connection reset by peer 26 | pass 27 | finally: 28 | # 关闭为这个客户端服务的socket 29 | client_sock.close() 30 | 31 | 32 | # 创建监听socket 33 | server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 34 | 35 | # socket默认不支持地址复用,OSError: [Errno 98] Address already in use 36 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 37 | 38 | # 绑定IP地址和固定端口 39 | server_address = ('', 9090) 40 | print('TCP Server starting up on port {}'.format(server_address[1])) 41 | server_sock.bind(server_address) 42 | 43 | # socket默认是主动连接,调用listen()函数将socket变为被动连接,这样就可以接收客户端连接了 44 | server_sock.listen(5) 45 | 46 | try: 47 | while True: 48 | print('Main Process [PID: {}], waiting for client connection...'.format(os.getpid())) 49 | 50 | # 主进程只用来负责监听新的客户连接 51 | # client_sock是专为这个客户端服务的socket,client_addr是包含客户端IP和端口的元组 52 | client_sock, client_addr = server_sock.accept() 53 | print('Main Process [PID: {}], client {} is connected'.format(os.getpid(), client_addr)) 54 | 55 | # 为每个新的客户连接创建一个子进程,用来处理客户数据 56 | client = Process(target=client_handler, args=(client_sock, client_addr)) 57 | client.start() 58 | # 子进程已经复制了一份client_sock,所以主进程中可以关闭此client_sock 59 | client_sock.close() 60 | finally: 61 | # 关闭监听socket,不再响应其它客户端连接 62 | server_sock.close() 63 | -------------------------------------------------------------------------------- /io-models/4_blocking_io_multi_threads_tcp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # TCP Echo Server,多线程,阻塞 blocking I/O 4 | import socket 5 | import threading 6 | 7 | 8 | def client_handler(client_sock, client_addr): 9 | '''接收各个客户端发来的数据,并原样返回''' 10 | try: 11 | while True: 12 | # 接收客户端发来的数据,阻塞,直到有数据到来 13 | # 如果客户端关闭了连接,data是空字符串 14 | data = client_sock.recv(4096) 15 | if data: 16 | print('Child Thread [{}], received {}({} bytes) from {}'.format(threading.current_thread().name, data, len(data), client_addr)) 17 | # 返回响应数据,将客户端发送来的数据原样返回 18 | client_sock.send(data) 19 | print('Child Thread [{}], sent {} to {}'.format(threading.current_thread().name, data, client_addr)) 20 | else: 21 | print('Child Thread [{}], client {} is closed'.format(threading.current_thread().name, client_addr)) 22 | break 23 | except: 24 | # 如果客户端强制关闭连接,会报异常: ConnectionResetError: [Errno 104] Connection reset by peer 25 | pass 26 | finally: 27 | # 关闭为这个客户端服务的socket 28 | client_sock.close() 29 | 30 | 31 | # 创建监听socket 32 | server_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 33 | 34 | # socket默认不支持地址复用,OSError: [Errno 98] Address already in use 35 | server_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) 36 | 37 | # 绑定IP地址和固定端口 38 | server_address = ('', 9090) 39 | print('TCP Server starting up on port {}'.format(server_address[1])) 40 | server_sock.bind(server_address) 41 | 42 | # socket默认是主动连接,调用listen()函数将socket变为被动连接,这样就可以接收客户端连接了 43 | server_sock.listen(5) 44 | 45 | try: 46 | while True: 47 | print('Main Thread [{}], waiting for client connection...'.format(threading.current_thread().name)) 48 | 49 | # 主进程只用来负责监听新的客户连接 50 | # client_sock是专为这个客户端服务的socket,client_addr是包含客户端IP和端口的元组 51 | client_sock, client_addr = server_sock.accept() 52 | print('Main Thread [{}], client {} is connected'.format(threading.current_thread().name, client_addr)) 53 | 54 | # 为每个新的客户连接创建一个线程,用来处理客户数据 55 | client = threading.Thread(target=client_handler, args=(client_sock, client_addr)) 56 | client.start() 57 | 58 | # 因为主线程与子线程共享client_sock,所以在主线程中不能关闭client_sock 59 | # client_sock.close() 60 | finally: 61 | # 关闭监听socket,不再响应其它客户端连接 62 | server_sock.close() 63 | -------------------------------------------------------------------------------- /flags.txt: -------------------------------------------------------------------------------- 1 | ad.gif 2 | ae.gif 3 | af.gif 4 | ag.gif 5 | al.gif 6 | am.gif 7 | ao.gif 8 | ar.gif 9 | at.gif 10 | au.gif 11 | az.gif 12 | ba.gif 13 | bb.gif 14 | bd.gif 15 | be.gif 16 | bf.gif 17 | bg.gif 18 | bh.gif 19 | bi.gif 20 | bj.gif 21 | bn.gif 22 | bo.gif 23 | br.gif 24 | bs.gif 25 | bt.gif 26 | bw.gif 27 | by.gif 28 | bz.gif 29 | ca.gif 30 | cd.gif 31 | cf.gif 32 | cg.gif 33 | ch.gif 34 | ci.gif 35 | cl.gif 36 | cm.gif 37 | cn.gif 38 | co.gif 39 | cr.gif 40 | cu.gif 41 | cv.gif 42 | cy.gif 43 | cz.gif 44 | de.gif 45 | dj.gif 46 | dk.gif 47 | dm.gif 48 | dz.gif 49 | ec.gif 50 | ee.gif 51 | eg.gif 52 | er.gif 53 | es.gif 54 | et.gif 55 | fi.gif 56 | fj.gif 57 | fm.gif 58 | fr.gif 59 | ga.gif 60 | gb.gif 61 | gd.gif 62 | ge.gif 63 | gh.gif 64 | gm.gif 65 | gn.gif 66 | gq.gif 67 | gr.gif 68 | gt.gif 69 | gw.gif 70 | gy.gif 71 | hn.gif 72 | hr.gif 73 | ht.gif 74 | hu.gif 75 | id.gif 76 | ie.gif 77 | il.gif 78 | in.gif 79 | iq.gif 80 | ir.gif 81 | is.gif 82 | it.gif 83 | jm.gif 84 | jo.gif 85 | jp.gif 86 | ke.gif 87 | kg.gif 88 | kh.gif 89 | ki.gif 90 | km.gif 91 | kn.gif 92 | kp.gif 93 | kr.gif 94 | kw.gif 95 | kz.gif 96 | la.gif 97 | lb.gif 98 | lc.gif 99 | li.gif 100 | lk.gif 101 | lr.gif 102 | ls.gif 103 | lt.gif 104 | lu.gif 105 | lv.gif 106 | ly.gif 107 | ma.gif 108 | mc.gif 109 | md.gif 110 | me.gif 111 | mg.gif 112 | mh.gif 113 | mk.gif 114 | ml.gif 115 | mm.gif 116 | mn.gif 117 | mr.gif 118 | mt.gif 119 | mu.gif 120 | mv.gif 121 | mw.gif 122 | mx.gif 123 | my.gif 124 | mz.gif 125 | na.gif 126 | ne.gif 127 | ng.gif 128 | ni.gif 129 | nl.gif 130 | no.gif 131 | np.gif 132 | nr.gif 133 | nz.gif 134 | om.gif 135 | pa.gif 136 | pe.gif 137 | pg.gif 138 | ph.gif 139 | pk.gif 140 | pl.gif 141 | pt.gif 142 | pw.gif 143 | py.gif 144 | qa.gif 145 | ro.gif 146 | rs.gif 147 | ru.gif 148 | rw.gif 149 | sa.gif 150 | sb.gif 151 | sc.gif 152 | sd.gif 153 | se.gif 154 | sg.gif 155 | si.gif 156 | sk.gif 157 | sl.gif 158 | sm.gif 159 | sn.gif 160 | so.gif 161 | sr.gif 162 | ss.gif 163 | st.gif 164 | sv.gif 165 | sy.gif 166 | sz.gif 167 | td.gif 168 | tg.gif 169 | th.gif 170 | tj.gif 171 | tl.gif 172 | tm.gif 173 | tn.gif 174 | to.gif 175 | tr.gif 176 | tt.gif 177 | tv.gif 178 | tw.gif 179 | tz.gif 180 | ua.gif 181 | ug.gif 182 | us.gif 183 | uy.gif 184 | uz.gif 185 | va.gif 186 | vc.gif 187 | ve.gif 188 | vn.gif 189 | vu.gif 190 | ws.gif 191 | ye.gif 192 | za.gif 193 | zm.gif 194 | zw.gif 195 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [python3-concurrency](http://www.madmalls.com/blog/category/python3-spider/) 2 | 3 | [![Python](https://img.shields.io/badge/python-v3.4%2B-blue.svg)](https://www.python.org/) 4 | [![aiohttp](https://img.shields.io/badge/aiohttp-v3.3.2-brightgreen.svg)](https://aiohttp.readthedocs.io/en/stable/) 5 | [![BeautifulSoup4](https://img.shields.io/badge/BeautifulSoup4-v4.6.3-orange.svg)](https://pypi.org/project/beautifulsoup4/) 6 | [![requests](https://img.shields.io/badge/requests-v2.19.1-yellow.svg)](http://docs.python-requests.org/en/master/) 7 | [![pymongo](https://img.shields.io/badge/pymongo-v3.7.1-red.svg)](https://pypi.org/project/pymongo/) 8 | [![progressbar2](https://img.shields.io/badge/progressbar2-v3.38.0-lightgrey.svg)](https://pypi.org/project/progressbar2/) 9 | 10 | 11 | ![](http://www.madmalls.com/api/medias/uploaded/python3-concurrency-1016d526.png) 12 | 13 | 14 | # 1. 爬虫系列 15 | 16 | - [Python 3 爬虫|第1章:I/O Models 阻塞/非阻塞 同步/异步](https://madmalls.com/blog/post/io-models/) 17 | - [Python 3 爬虫|第2章:Python 并发编程](https://madmalls.com/blog/post/concurrent-programming-for-python/) 18 | - [Python 3 爬虫|第3章:同步阻塞下载](https://madmalls.com/blog/post/sequential-download-for-python/) 19 | - [Python 3 爬虫|第4章:多进程并发下载](https://madmalls.com/blog/post/multi-process-for-python3/) 20 | - [Python 3 爬虫|第5章:多线程并发下载](https://madmalls.com/blog/post/multi-thread-for-python/) 21 | - [Python 3 爬虫|第6章:可迭代对象 / 迭代器 / 生成器](https://madmalls.com/blog/post/iterable-iterator-and-generator-in-python/) 22 | - [Python 3 爬虫|第7章:协程 Coroutines](https://madmalls.com/blog/post/coroutine-in-python/) 23 | - [Python 3 爬虫|第8章:使用 asyncio 模块实现并发](https://madmalls.com/blog/post/asyncio-howto-in-python3/) 24 | - [Python 3 爬虫|第9章:使用 asyncio + aiohttp 并发下载](https://madmalls.com/blog/post/aiohttp-howto-in-python3/) 25 | - [Python 3 爬虫|第10章:爬取少量妹子图](https://madmalls.com/blog/post/python3-concurrency-pics-01/) 26 | - [Python 3 爬虫|第11章:爬取海量妹子图](https://madmalls.com/blog/post/python3-concurrency-pics-02/) 27 | 28 | 29 | # 2. 使用方法 30 | 31 | ## 2.1 Server 32 | 33 | 为防止DDoS攻击,本次测试需要在本地搭建一个HTTP测试服务器,具体方法参考 [Python3爬虫系列03 (实验) - 同步阻塞下载](http://www.madmalls.com/blog/post/sequential-download-for-python/) 34 | 35 | ## 2.2 Client 36 | 37 | ### (1) 下载代码 38 | 39 | ```bash 40 | [root@CentOS ~]# git clone https://github.com/wangy8961/python3-concurrency.git 41 | [root@CentOS ~]# cd python3-concurrency/ 42 | ``` 43 | 44 | ### (2) 准备虚拟环境 45 | 46 | 如果你的操作系统是`Linux`: 47 | 48 | ```bash 49 | [root@CentOS python3-concurrency]# python3 -m venv venv3 50 | [root@CentOS python3-concurrency]# source venv3/bin/activate 51 | ``` 52 | 53 | > `Windows`激活虚拟环境的命令是: `venv3\Scripts\activate` 54 | 55 | ### (3) 安装依赖包 56 | 57 | 如果你的操作系统是`Linux`: 58 | 59 | ```bash 60 | (venv3) [root@CentOS python3-concurrency]# pip install -r requirements-linux.txt 61 | ``` 62 | 63 | 如果你的操作系统是`Windows`(不会使用`uvloop`): 64 | 65 | ```bash 66 | (venv3) C:\Users\wangy> pip install -r requirements-win32.txt 67 | ``` 68 | 69 | ### (4) 测试 70 | 71 | 依序下载: 72 | 73 | ```python 74 | (venv3) [root@CentOS python3-concurrency]# python sequential.py 75 | ``` 76 | 77 | 多进程下载: 78 | 79 | ```python 80 | (venv3) [root@CentOS python3-concurrency]# python processpool.py 81 | ``` 82 | 83 | 多线程下载: 84 | 85 | ```python 86 | (venv3) [root@CentOS python3-concurrency]# python threadpool.py 87 | ``` 88 | 89 | 异步下载: 90 | 91 | ```python 92 | (venv3) [root@CentOS python3-concurrency]# python asynchronous.py 93 | ``` -------------------------------------------------------------------------------- /threadpool.py: -------------------------------------------------------------------------------- 1 | import time 2 | from queue import Queue 3 | from threading import Thread 4 | from functools import partial 5 | from concurrent import futures 6 | from common import setup_down_path, get_links, download_one, download_one_1 7 | from logger import logger 8 | 9 | 10 | class ThreadWorker(Thread): 11 | def __init__(self, queue): 12 | Thread.__init__(self) 13 | self.queue = queue 14 | 15 | def run(self): 16 | while True: 17 | down_path, linkno, link = self.queue.get() 18 | download_one_1(down_path, linkno, link) 19 | self.queue.task_done() 20 | 21 | 22 | def download_many(): 23 | '''多线程,按线程数 并发(非并行) 下载所有图片''' 24 | down_path = setup_down_path() 25 | links = get_links() 26 | 27 | # 创建队列 28 | queue = Queue() 29 | 30 | # 创建多个线程 31 | for i in range(64): 32 | worker = ThreadWorker(queue) 33 | worker.daemon = True # 如果工作线程在等待更多的任务时阻塞了,主线程也可以正常退出 34 | worker.start() # 启动线程 35 | 36 | # 往队列中投放任务 37 | for linkno, link in enumerate(links, 1): # 链接带序号 38 | logger.info('Queueing No.{} {}'.format(linkno, link)) 39 | queue.put((down_path, linkno, link)) 40 | 41 | logger.info('Waiting for all subthread done...') 42 | # Causes the main thread to wait for the queue to finish processing all the tasks 43 | queue.join() 44 | logger.info('All subthread done.') 45 | 46 | return len(links) 47 | 48 | 49 | def download_many_1(): 50 | '''多线程,按线程数 并发(非并行) 下载所有图片 51 | 使用concurrent.futures.ThreadPoolExecutor() 52 | Executor.map()使用Future而不是返回Future,它返回迭代器, 53 | 迭代器的__next__()方法调用各个Future的result()方法,因此我们得到的是各个Future的结果,而非Future本身 54 | 55 | 注意Executor.map()限制了download_one()只能接受一个参数,所以images是字典构成的列表 56 | ''' 57 | down_path = setup_down_path() 58 | links = get_links() 59 | 60 | images = [] 61 | for linkno, link in enumerate(links, 1): 62 | image = { 63 | 'path': down_path, 64 | 'linkno': linkno, 65 | 'link': link 66 | } 67 | images.append(image) 68 | 69 | workers = min(64, len(links)) # 保证线程池中的线程不会多于总的下载任务数 70 | # with语句将调用executor.__exit__()方法,而这个方法会调用executor.shutdown(wait=True)方法,它会在所有进程都执行完毕前阻塞主进程 71 | with futures.ThreadPoolExecutor(workers) as executor: 72 | # executor.map()效果类似于内置函数map(),但download_one()函数会在多个线程中并发调用 73 | # 它的返回值res是一个迭代器,我们后续可以迭代获取各个被调用函数的返回值 74 | res = executor.map(download_one, images) # 传一个序列 75 | 76 | return len(list(res)) # 如果有进程抛出异常,异常会在这里抛出,类似于迭代器中隐式调用next()的效果 77 | 78 | 79 | def download_many_2(): 80 | '''多线程,按线程数 并发(非并行) 下载所有图片 81 | 使用concurrent.futures.ThreadPoolExecutor() 82 | Executor.map()中的调用函数如果要接受多个参数,可以给Executor.map()传多个序列 83 | 参考:https://yuanjiang.space/threadpoolexecutor-map-method-with-multiple-parameters 84 | ''' 85 | down_path = setup_down_path() 86 | links = get_links() 87 | 88 | # 固定住保存的路径,不用每次调用下载图片函数时都传同样的down_path参数 89 | download_one_1_partial = partial(download_one_1, down_path) 90 | 91 | # 创建包含所有linkno的序列 92 | linknos = [i for i in range(len(links))] 93 | 94 | workers = min(64, len(links)) # 保证线程池中的线程不会多于总的下载任务数 95 | with futures.ThreadPoolExecutor(workers) as executor: 96 | res = executor.map(download_one_1_partial, linknos, links) # 给Executor.map()传多个序列 97 | 98 | return len(list(res)) 99 | 100 | 101 | def download_many_3(): 102 | '''多线程,按线程数 并发(非并行) 下载所有图片 103 | 使用concurrent.futures.ThreadPoolExecutor() 104 | 不使用Executor.map(),而使用Executor.submit()和concurrent.futures.as_completed() 105 | Executor.submit()方法会返回Future,而Executor.map()是使用Future 106 | ''' 107 | down_path = setup_down_path() 108 | links = get_links() 109 | 110 | # 固定住保存的路径,不用每次调用下载图片函数时都传同样的down_path参数 111 | download_one_1_partial = partial(download_one_1, down_path) 112 | 113 | workers = min(64, len(links)) # 保证线程池中的线程不会多于总的下载任务数 114 | with futures.ThreadPoolExecutor(workers) as executor: 115 | to_do = [] 116 | # 创建并排定Future 117 | for linkno, link in enumerate(links, 1): # 链接带序号 118 | future = executor.submit(download_one_1_partial, linkno, link) 119 | to_do.append(future) 120 | logger.debug('Scheduled for No.{} {}: {}'.format(linkno, link, future)) 121 | 122 | results = [] 123 | # 获取Future的结果,futures.as_completed(to_do)的参数是Future列表,返回迭代器, 124 | # 只有当有Future运行结束后,才产出future 125 | for future in futures.as_completed(to_do): # future变量表示已完成的Future对象,所以后续future.result()绝不会阻塞 126 | res = future.result() 127 | results.append(res) 128 | logger.debug('{} result: {!r}'.format(future, res)) 129 | 130 | return len(results) 131 | 132 | 133 | if __name__ == '__main__': 134 | t0 = time.time() 135 | count = download_many() 136 | msg = '{} flags downloaded in {:.2f} seconds.' 137 | logger.info(msg.format(count, time.time() - t0)) 138 | -------------------------------------------------------------------------------- /asynchronous.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import logging 3 | import os 4 | import sys 5 | import time 6 | import aiohttp 7 | import aiofiles 8 | import progressbar 9 | 10 | 11 | # 当前模块文件的根目录 12 | basepath = os.path.abspath(os.path.dirname(__file__)) 13 | 14 | # 记录日志 15 | logger = logging.getLogger('spider') # 创建logger实例 16 | logger.setLevel(logging.CRITICAL) # 保持控制台清爽,只输出总信息和进度条 17 | formatter = logging.Formatter('%(asctime)s - %(levelname)s: %(message)s') # 控制台日志和日志文件使用同一个Formatter 18 | log_path = os.path.join(basepath, 'logs') # 日志文件所在目录 19 | if not os.path.isdir(log_path): 20 | os.mkdir(log_path) 21 | filename = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + '.log' # 日志文件名,以当前时间命名 22 | file_handler = logging.FileHandler(os.path.join(log_path, filename), encoding='utf-8') # 创建日志文件handler 23 | file_handler.setFormatter(formatter) # 设置Formatter 24 | file_handler.setLevel(logging.DEBUG) # 单独设置日志文件的日志级别,注释掉则使用总日志级别 25 | stream_handler = logging.StreamHandler() # 控制台日志StreamHandler 26 | stream_handler.setFormatter(formatter) 27 | logger.addHandler(file_handler) # 将handler添加到logger中 28 | logger.addHandler(stream_handler) 29 | 30 | 31 | def setup_down_path(): 32 | '''设置图片下载后的保存位置,所有图片放在同一个目录下''' 33 | down_path = os.path.join(basepath, 'downloads') 34 | if not os.path.isdir(down_path): 35 | os.mkdir(down_path) 36 | logger.critical('Create download path {}'.format(down_path)) 37 | return down_path 38 | 39 | 40 | async def get_links(): 41 | '''获取所有图片的下载链接''' 42 | async with aiofiles.open(os.path.join(basepath, 'flags.txt')) as f: # 图片名都保存在这个文件中,每行一个图片名 43 | flags = await f.readlines() 44 | return ['http://192.168.40.121/flags/' + flag.strip() for flag in flags] 45 | 46 | 47 | async def download_one(semaphore, session, image): 48 | logger.debug('Downloading No.{} [{}]'.format(image['linkno'], image['link'])) 49 | t0 = time.time() 50 | 51 | try: 52 | async with semaphore: 53 | async with session.get(image['link']) as response: 54 | if response.status == 200: 55 | image_content = await response.read() # Binary Response Content: access the response body as bytes, for non-text requests 56 | else: 57 | logger.error('received invalid response code: {}, message: {}'.format(response.status, response.reason)) 58 | raise aiohttp.ClientError() 59 | except Exception as e: 60 | logger.error('Exception {} raised on No.{} [{}]'.format(e.__class__, image['linkno'], image['link'])) 61 | return False # 用于告知 download_one() 的调用方,请求此图片URL时失败了 62 | 63 | filename = os.path.split(image['link'])[1] 64 | async with aiofiles.open(os.path.join(image['path'], filename), 'wb') as f: 65 | await f.write(image_content) 66 | 67 | t1 = time.time() 68 | logger.debug('Task No.{} [{}] runs {:.2f} seconds.'.format(image['linkno'], image['link'], t1 - t0)) 69 | 70 | return True # 用于告知 download_one() 的调用方,成功请求此图片URL 71 | 72 | 73 | async def download_many(): 74 | down_path = setup_down_path() 75 | links = await get_links() 76 | # 用于限制并发请求数量 77 | sem = asyncio.Semaphore(min(1000, len(links))) 78 | 79 | async with aiohttp.ClientSession() as session: # aiohttp建议整个应用只创建一个session,不能为每个请求创建一个seesion 80 | successful_images = 0 # 请求成功的图片数 81 | failed_images = 0 # 请求失败的图片数 82 | 83 | if len(sys.argv) > 1 and sys.argv[1] == '-v': # 输出详细信息 84 | logger.setLevel(logging.DEBUG) 85 | 86 | tasks = [] # 保存所有任务的列表 87 | for linkno, link in enumerate(links, 1): 88 | image = { 89 | 'path': down_path, 90 | 'linkno': linkno, # 图片序号,方便日志输出时,正在下载哪一张 91 | 'link': link 92 | } 93 | task = asyncio.create_task(download_one(sem, session, image)) # asyncio.create_task()是Python 3.7新加的,否则使用asyncio.ensure_future() 94 | tasks.append(task) 95 | results = await asyncio.gather(*tasks) 96 | 97 | for result in results: 98 | if result: 99 | successful_images += 1 100 | else: 101 | failed_images += 1 102 | else: # 输出进度条 103 | to_do = [] 104 | for linkno, link in enumerate(links, 1): 105 | image = { 106 | 'path': down_path, 107 | 'linkno': linkno, # 图片序号,方便日志输出时,正在下载哪一张 108 | 'link': link 109 | } 110 | to_do.append(download_one(sem, session, image)) 111 | 112 | to_do_iter = asyncio.as_completed(to_do) 113 | 114 | with progressbar.ProgressBar(max_value=len(to_do)) as bar: 115 | for i, future in enumerate(to_do_iter): 116 | result = await future 117 | if result: 118 | successful_images += 1 119 | else: 120 | failed_images += 1 121 | bar.update(i) 122 | 123 | logger.critical('Successful [{}] images, failed [{}] images'.format(successful_images, failed_images)) 124 | 125 | 126 | if __name__ == '__main__': 127 | t0 = time.time() 128 | if sys.platform != 'win32': 129 | import uvloop 130 | asyncio.set_event_loop_policy(uvloop.EventLoopPolicy()) 131 | loop = asyncio.get_event_loop() 132 | loop.run_until_complete(download_many()) 133 | loop.close() 134 | logger.critical('Total Cost {:.2f} seconds'.format(time.time() - t0)) 135 | -------------------------------------------------------------------------------- /processpool.py: -------------------------------------------------------------------------------- 1 | import time 2 | from functools import partial 3 | from multiprocessing import Pool 4 | from concurrent import futures 5 | from common import setup_down_path, get_links, download_one, download_one_1 6 | from logger import logger 7 | 8 | 9 | def download_many(): 10 | '''多进程,按进程数 并行 下载所有图片 11 | 使用multiprocessing.Pool.apply_async() 12 | ''' 13 | down_path = setup_down_path() 14 | links = get_links() 15 | 16 | p = Pool(4) # 指定进程池中的进程数 17 | for linkno, link in enumerate(links, 1): 18 | image = { 19 | 'path': down_path, 20 | 'linkno': linkno, 21 | 'link': link 22 | } 23 | p.apply_async(download_one, args=(image,)) 24 | 25 | logger.info('Waiting for all subprocesses done...') 26 | p.close() # 关闭进程池 27 | p.join() # 主进程等待进程池中的所有子进程结束 28 | logger.info('All subprocesses done.') 29 | 30 | return len(links) 31 | 32 | 33 | def download_many_1(): 34 | '''多进程,按进程数 并行 下载所有图片 35 | 使用multiprocessing.Pool.map(download_one, images) 36 | 注意Pool.map()限制了download_one()只能接受一个参数,所以images是字典构成的列表 37 | ''' 38 | down_path = setup_down_path() 39 | links = get_links() 40 | 41 | images = [] 42 | for linkno, link in enumerate(links, 1): 43 | image = { 44 | 'path': down_path, 45 | 'linkno': linkno, 46 | 'link': link 47 | } 48 | images.append(image) 49 | 50 | with Pool(4) as p: 51 | p.map(download_one, images) # 将images序列依次映射给download_one()函数 52 | 53 | logger.info('Waiting for all subprocesses done...') 54 | # p.close() # 使用with语句和Pool.map()后,会自动调用Pool.close()和Pool.join() 55 | # p.join() 56 | logger.info('All subprocesses done.') 57 | 58 | return len(links) 59 | 60 | 61 | def download_many_2(): 62 | '''多进程,按进程数 并行 下载所有图片 63 | 使用multiprocessing.Pool.starmap(download_one_1, images),它是Python-3.3添加的 64 | 可以给download_one_1()函数传元组组成的序列,会自动解包元组给函数的多个参数 65 | ''' 66 | down_path = setup_down_path() 67 | links = get_links() 68 | 69 | images = [] 70 | for linkno, link in enumerate(links, 1): 71 | images.append((down_path, linkno, link)) 72 | 73 | with Pool(4) as p: 74 | p.starmap(download_one_1, images) # 链接带序号 75 | 76 | logger.info('Waiting for all subprocesses done...') 77 | # p.close() 78 | # p.join() 79 | logger.info('All subprocesses done.') 80 | 81 | return len(links) 82 | 83 | 84 | def download_many_3(): 85 | '''多进程,按进程数 并行 下载所有图片 86 | 使用multiprocessing.Pool.starmap(download_one_1, images),它是Python-3.3添加的 87 | 可以给download_one_1()函数传元组组成的序列,会自动解包元组给函数的多个参数 88 | 由于下载每张图片时的保存目录都相同,可以使用functools.partial()固定住这个参数 89 | ''' 90 | down_path = setup_down_path() 91 | links = get_links() 92 | 93 | # 固定住保存的路径,不用每次调用下载图片函数时都传同样的down_path参数 94 | download_one_1_partial = partial(download_one_1, down_path) 95 | 96 | images = [] 97 | for linkno, link in enumerate(links, 1): 98 | images.append((linkno, link)) # 每个元组将不包含保存的目录 99 | 100 | with Pool(4) as p: 101 | p.starmap(download_one_1_partial, images) # 链接带序号 102 | 103 | logger.info('Waiting for all subprocesses done...') 104 | # p.close() 105 | # p.join() 106 | logger.info('All subprocesses done.') 107 | 108 | return len(links) 109 | 110 | 111 | def download_many_4(): 112 | '''多进程,按进程数 并行 下载所有图片 113 | 使用concurrent.futures.ProcessPoolExecutor() 114 | Executor.map()使用Future而不是返回Future,它返回迭代器, 115 | 迭代器的__next__()方法调用各个Future的result()方法,因此我们得到的是各个Future的结果,而非Future本身 116 | 117 | 注意Executor.map()限制了download_one()只能接受一个参数,所以images是字典构成的列表 118 | ''' 119 | down_path = setup_down_path() 120 | links = get_links() 121 | 122 | images = [] 123 | for linkno, link in enumerate(links, 1): 124 | image = { 125 | 'path': down_path, 126 | 'linkno': linkno, 127 | 'link': link 128 | } 129 | images.append(image) 130 | 131 | # with语句将调用executor.__exit__()方法,而这个方法会调用executor.shutdown(wait=True)方法,它会在所有进程都执行完毕前阻塞主进程 132 | with futures.ProcessPoolExecutor(max_workers=16) as executor: # 不指定max_workers时,进程池中进程个数默认为os.cpu_count() 133 | # executor.map()效果类似于内置函数map(),但download_one()函数会在多个进程中并行调用 134 | # 它的返回值res是一个迭代器,我们后续可以迭代获取各个被调用函数的返回值 135 | res = executor.map(download_one, images) # 传一个序列 136 | 137 | return len(list(res)) # 如果有进程抛出异常,异常会在这里抛出,类似于迭代器中隐式调用next()的效果 138 | 139 | 140 | def download_many_5(): 141 | '''多进程,按进程数 并行 下载所有图片 142 | 使用concurrent.futures.ProcessPoolExecutor() 143 | Executor.map()中的调用函数如果要接受多个参数,可以给Executor.map()传多个序列 144 | 参考:https://yuanjiang.space/threadpoolexecutor-map-method-with-multiple-parameters 145 | ''' 146 | down_path = setup_down_path() 147 | links = get_links() 148 | 149 | # 固定住保存的路径,不用每次调用下载图片函数时都传同样的down_path参数 150 | download_one_1_partial = partial(download_one_1, down_path) 151 | 152 | # 创建包含所有linkno的序列 153 | linknos = [i for i in range(len(links))] 154 | 155 | with futures.ProcessPoolExecutor(max_workers=16) as executor: 156 | res = executor.map(download_one_1_partial, linknos, links) # 给Executor.map()传多个序列 157 | 158 | return len(list(res)) 159 | 160 | 161 | def download_many_6(): 162 | '''多进程,按进程数 并行 下载所有图片 163 | 使用concurrent.futures.ProcessPoolExecutor() 164 | 不使用Executor.map(),而使用Executor.submit()和concurrent.futures.as_completed() 165 | Executor.submit()方法会返回Future,而Executor.map()是使用Future 166 | ''' 167 | down_path = setup_down_path() 168 | links = get_links() 169 | 170 | # 固定住保存的路径,不用每次调用下载图片函数时都传同样的down_path参数 171 | download_one_1_partial = partial(download_one_1, down_path) 172 | 173 | with futures.ProcessPoolExecutor(max_workers=16) as executor: 174 | to_do = [] 175 | # 创建并且排定Future 176 | for linkno, link in enumerate(links, 1): # 链接带序号 177 | future = executor.submit(download_one_1_partial, linkno, link) 178 | to_do.append(future) 179 | logger.debug('Scheduled for No.{} {}: {}'.format(linkno, link, future)) 180 | 181 | results = [] 182 | # 获取Future的结果,futures.as_completed(to_do)的参数是Future列表,返回迭代器, 183 | # 只有当有Future运行结束后,才产出future 184 | for future in futures.as_completed(to_do): # future变量表示已完成的Future对象,所以后续future.result()绝不会阻塞 185 | res = future.result() 186 | results.append(res) 187 | logger.debug('{} result: {!r}'.format(future, res)) 188 | 189 | return len(results) 190 | 191 | 192 | if __name__ == '__main__': 193 | t0 = time.time() 194 | count = download_many_4() 195 | msg = '{} flags downloaded in {:.2f} seconds.' 196 | logger.info(msg.format(count, time.time() - t0)) 197 | --------------------------------------------------------------------------------