├── .gitignore ├── README.md ├── async.pdf ├── asyncio-checker.py ├── list.txt ├── naive-checker.py ├── subprocess-checker.py └── thread-checker.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Talk for Python Meetup 2017.03.08 2 | --------------------------------- 3 | 4 | Slides for talk in async.pdf -------------------------------------------------------------------------------- /async.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jimmysong/asyncio-examples/fc32073eeeac72e424adbdddc0e73f37beed3902/async.pdf -------------------------------------------------------------------------------- /asyncio-checker.py: -------------------------------------------------------------------------------- 1 | import aiohttp 2 | import asyncio 3 | import json 4 | import sys 5 | import time 6 | 7 | 8 | async def get_statuses(websites): 9 | statuses = {} 10 | tasks = [get_website_status(website) for website in websites] 11 | for status in await asyncio.gather(*tasks): 12 | if not statuses.get(status): 13 | statuses[status] = 0 14 | statuses[status] += 1 15 | print(json.dumps(statuses)) 16 | 17 | 18 | async def get_website_status(url): 19 | response = await aiohttp.get(url) 20 | status = response.status 21 | response.close() 22 | return status 23 | 24 | 25 | if __name__ == '__main__': 26 | with open(sys.argv[1], 'r') as f: 27 | websites = f.read().splitlines() 28 | t0 = time.time() 29 | loop = asyncio.get_event_loop() 30 | loop.run_until_complete(get_statuses(websites)) 31 | t1 = time.time() 32 | print("getting website statuses took {0:.1f} seconds".format(t1-t0)) 33 | -------------------------------------------------------------------------------- /list.txt: -------------------------------------------------------------------------------- 1 | http://facebook.com/ 2 | http://twitter.com/ 3 | http://google.com/ 4 | http://youtube.com/ 5 | http://linkedin.com/ 6 | http://instagram.com/ 7 | http://pinterest.com/ 8 | http://blogspot.com/ 9 | http://wordpress.com/ 10 | http://apple.com/ 11 | http://tumblr.com/ 12 | http://vimeo.com/ 13 | http://godaddy.com/ 14 | http://yahoo.com/ 15 | http://flickr.com/ 16 | http://microsoft.com/ 17 | http://dell.com/ 18 | http://nytimes.com/ 19 | http://blogger.com/ 20 | http://soundcloud.com/ 21 | http://digg.com/ 22 | http://feedburner.com/ 23 | http://stumbleupon.com/ 24 | http://github.com/ 25 | http://parallels.com/ 26 | http://cnn.com/ 27 | http://paypal.com/ 28 | http://creativecommons.org/ 29 | http://imdb.com/ 30 | http://huffingtonpost.com/ 31 | http://espn.com/ 32 | -------------------------------------------------------------------------------- /naive-checker.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import sys 4 | import time 5 | 6 | 7 | def website_statuses(websites): 8 | statuses = {} 9 | for website in websites: 10 | response = requests.get(website) 11 | status = response.status_code 12 | if not statuses.get(status): 13 | statuses[status] = 0 14 | statuses[status] += 1 15 | return statuses 16 | 17 | 18 | if __name__ == '__main__': 19 | with open(sys.argv[1], 'r') as f: 20 | websites = f.read().splitlines() 21 | t0 = time.time() 22 | print(json.dumps(website_statuses(websites))) 23 | t1 = time.time() 24 | print("getting website statuses took {0:.1f} seconds".format(t1-t0)) 25 | -------------------------------------------------------------------------------- /subprocess-checker.py: -------------------------------------------------------------------------------- 1 | import json 2 | import math 3 | import subprocess 4 | import sys 5 | import time 6 | 7 | 8 | if __name__ == '__main__': 9 | with open(sys.argv[1], 'r') as f: 10 | websites = f.read().splitlines() 11 | number_of_processes = int(sys.argv[2]) 12 | per_process = math.ceil(len(websites) / number_of_processes) 13 | # split up the work based on number of processes 14 | for i in range(number_of_processes): 15 | sites = websites[i * per_process:(i + 1) * per_process] 16 | with open("/tmp/list-{}.txt".format(i), 'w') as f: 17 | f.write("\n".join(sites)) 18 | t0 = time.time() 19 | processes = [] 20 | for i in range(number_of_processes): 21 | p = subprocess.Popen( 22 | ["python3", "naive-checker.py", "/tmp/list-{}.txt".format(i)], 23 | stdout=subprocess.PIPE) 24 | processes.append(p) 25 | # gather the results 26 | combined = {} 27 | for process in processes: 28 | result = process.communicate()[0] 29 | stats, _, _ = result.decode().split("\n") 30 | for key, value in json.loads(stats).items(): 31 | if not combined.get(key): 32 | combined[key] = 0 33 | combined[key] += value 34 | print(combined) 35 | t1 = time.time() 36 | print("getting website statuses took {0:.1f} seconds".format(t1-t0)) 37 | -------------------------------------------------------------------------------- /thread-checker.py: -------------------------------------------------------------------------------- 1 | import json 2 | import requests 3 | import sys 4 | import threading 5 | import time 6 | 7 | 8 | STATS = {} 9 | 10 | 11 | def get_website_status(url, lock): 12 | response = requests.get(url) 13 | status = response.status_code 14 | if status != 200: 15 | print(url) 16 | lock.acquire() 17 | if not STATS.get(status): 18 | STATS[status] = 0 19 | STATS[status] += 1 20 | lock.release() 21 | 22 | 23 | if __name__ == '__main__': 24 | with open(sys.argv[1], 'r') as f: 25 | websites = f.read().splitlines() 26 | t0 = time.time() 27 | threads = [] 28 | lock = threading.Lock() 29 | for website in websites: 30 | t = threading.Thread(target=get_website_status, args=(website, lock)) 31 | threads.append(t) 32 | t.start() 33 | for t in threads: 34 | t.join() 35 | t1 = time.time() 36 | print(json.dumps(STATS)) 37 | print("getting website statuses took {0:.1f} seconds".format(t1-t0)) 38 | --------------------------------------------------------------------------------