├── .gitignore ├── README.md ├── async └── .gitkeep ├── code_examples ├── asyncio_and_multiprocessing │ ├── asyncio_only.py │ ├── asyncio_with_multiprocessing.py │ ├── multiprocessing_only.py │ └── sync.py ├── concurrency │ ├── async.py │ ├── sync.py │ └── threads.py ├── parallelism │ ├── multi.py │ ├── sync.py │ └── threads.py └── pytest_asyncio │ ├── hello_asyncio.py │ ├── test_hello_asyncio.py │ └── test_hello_asyncio2.py ├── requirements.txt ├── sync └── .gitkeep └── threading └── .gitkeep /.gitignore: -------------------------------------------------------------------------------- 1 | # VS Code 2 | .vscode/ 3 | 4 | # Python env 5 | env/ 6 | venv/ 7 | 8 | # Other Python 9 | __pycache__/ 10 | .pytest_cache/ 11 | 12 | # Generated files 13 | */concurrency/sync/* 14 | */concurrency/threading/* 15 | */concurrency/async/* 16 | */asyncio_and_multiprocessing/wiki_titles.tsv 17 | 18 | sync/*.txt 19 | async/*.txt 20 | threading/*.txt 21 | wiki_titles.tsv 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Concurrency, Parallelism, and asyncio 2 | 3 | Blog post for [testdriven.io](https://testdriven.io/). If you want to follow along and create the projects yourself, you can find the post [here](https://testdriven.io/blog/concurrency-parallelism-asyncio/). 4 | 5 | 6 | ## Code Examples 7 | 8 | Create and activate a new virtual environment: 9 | 10 | *Windows Powershell, assuming the Python version in your PATH is 3.9+:* 11 | 12 | ```powershell 13 | PS X:> python -m venv venv 14 | PS X:> .\venv\Scripts\Activate.ps1 15 | (venv) PS X:> 16 | 17 | (venv) PS X:> pip install -r requirements.txt 18 | ``` 19 | 20 | *Mac/Linux:* 21 | 22 | ```sh 23 | $ python3.9 -m venv venv 24 | $ source venv/bin/activate 25 | (venv)$ 26 | 27 | (venv)$ pip install -r requirements.txt 28 | ``` 29 | 30 | ### Concurrency 31 | 32 | ```sh 33 | (venv)$ python code_examples/concurrency/sync.py 34 | (venv)$ python code_examples/concurrency/threads.py 35 | (venv)$ python code_examples/concurrency/async.py 36 | ``` 37 | 38 | ### Parallelism 39 | 40 | ```sh 41 | (venv)$ python code_examples/parallelism/sync.py 42 | (venv)$ python code_examples/parallelism/threads.py 43 | (venv)$ python code_examples/parallelism/multi.py 44 | ``` 45 | 46 | ### pytest async 47 | 48 | ```sh 49 | (venv)$ python -m pytest code_examples/pytest_asyncio/test_hello_asyncio.py 50 | (venv)$ python -m pytest code_examples/pytest_asyncio/test_hello_asyncio2.py 51 | ``` 52 | 53 | ### asyncio and multiprocessing 54 | 55 | ```sh 56 | (venv)$ python code_examples/asyncio_and_multiprocessing/sync.py 57 | (venv)$ python code_examples/asyncio_and_multiprocessing/multiprocessing_only.py 58 | (venv)$ python code_examples/asyncio_and_multiprocessing/asyncio_only.py 59 | (venv)$ python code_examples/asyncio_and_multiprocessing/asyncio_with_multiprocessing.py 60 | ``` 61 | -------------------------------------------------------------------------------- /async/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/based-jace/concurrency-parallelism-and-asyncio/cdc18199882911cceeb15b29f5b2f22a2d7dfa09/async/.gitkeep -------------------------------------------------------------------------------- /code_examples/asyncio_and_multiprocessing/asyncio_only.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import asyncio 3 | import time 4 | from math import floor 5 | from multiprocessing import cpu_count 6 | 7 | import aiofiles 8 | import aiohttp 9 | from bs4 import BeautifulSoup 10 | 11 | 12 | async def get_and_scrape_pages(num_pages: int, output_file: str): 13 | """ 14 | Makes {{ num_pages }} requests to Wikipedia to receive {{ num_pages }} random 15 | articles, then scrapes each page for its title and appends it to {{ output_file }}, 16 | separating each title with a tab: "\\t" 17 | 18 | #### Arguments 19 | --- 20 | num_pages: int - 21 | Number of random Wikipedia pages to request and scrape 22 | 23 | output_file: str - 24 | File to append titles to 25 | """ 26 | async with \ 27 | aiohttp.ClientSession() as client, \ 28 | aiofiles.open(output_file, "a+", encoding="utf-8") as f: 29 | 30 | for _ in range(num_pages): 31 | async with client.get("https://en.wikipedia.org/wiki/Special:Random") as response: 32 | if response.status > 399: 33 | # I was getting a 429 Too Many Requests at a higher volume of requests 34 | response.raise_for_status() 35 | 36 | page = await response.text() 37 | soup = BeautifulSoup(page, features="html.parser") 38 | title = soup.find("h1").text 39 | 40 | await f.write(title + "\t") 41 | 42 | await f.write("\n") 43 | 44 | 45 | async def main(): 46 | NUM_PAGES = 100 47 | OUTPUT_FILE = "./wiki_titles.tsv" # File to append our scraped titles to 48 | 49 | await get_and_scrape_pages(NUM_PAGES, OUTPUT_FILE) 50 | 51 | 52 | if __name__ == "__main__": 53 | # On Windows, this finishes successfully, but throws 'RuntimeError: Event loop is closed' 54 | # The following lines fix this 55 | # Source: https://github.com/encode/httpx/issues/914#issuecomment-622586610 56 | if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'): 57 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 58 | print("Starting: Please wait (This may take a while)....") 59 | start = time.time() 60 | asyncio.run(main()) 61 | print(f"Time to complete: {round(time.time() - start, 2)} seconds.") 62 | -------------------------------------------------------------------------------- /code_examples/asyncio_and_multiprocessing/asyncio_with_multiprocessing.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import asyncio 3 | import concurrent.futures 4 | import time 5 | from math import floor 6 | from multiprocessing import cpu_count 7 | 8 | import aiofiles 9 | import aiohttp 10 | from bs4 import BeautifulSoup 11 | 12 | 13 | async def get_and_scrape_pages(num_pages: int, output_file: str): 14 | """ 15 | Makes {{ num_pages }} requests to Wikipedia to receive {{ num_pages }} random 16 | articles, then scrapes each page for its title and appends it to {{ output_file }}, 17 | separating each title with a tab: "\\t" 18 | 19 | #### Arguments 20 | --- 21 | num_pages: int - 22 | Number of random Wikipedia pages to request and scrape 23 | 24 | output_file: str - 25 | File to append titles to 26 | """ 27 | async with \ 28 | aiohttp.ClientSession() as client, \ 29 | aiofiles.open(output_file, "a+", encoding="utf-8") as f: 30 | 31 | for _ in range(num_pages): 32 | async with client.get("https://en.wikipedia.org/wiki/Special:Random") as response: 33 | if response.status > 399: 34 | # I was getting a 429 Too Many Requests at a higher volume of requests 35 | response.raise_for_status() 36 | 37 | page = await response.text() 38 | soup = BeautifulSoup(page, features="html.parser") 39 | title = soup.find("h1").text 40 | 41 | await f.write(title + "\t") 42 | 43 | await f.write("\n") 44 | 45 | 46 | def start_scraping(num_pages: int, output_file: str, i: int): 47 | # On Windows, this finishes successfully, but throws 'RuntimeError: Event loop is closed' 48 | # The following lines fix this 49 | # Source: https://github.com/encode/httpx/issues/914#issuecomment-622586610 50 | if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'): 51 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 52 | """ Starts an async process for requesting and scraping Wikipedia pages """ 53 | print(f"Process {i} starting...") 54 | asyncio.run(get_and_scrape_pages(num_pages, output_file)) 55 | print(f"Process {i} finished.") 56 | 57 | 58 | def main(): 59 | NUM_PAGES = 100 # Number of pages to scrape altogether 60 | NUM_CORES = cpu_count() # Our number of CPU cores (including logical cores) 61 | OUTPUT_FILE = "./wiki_titles.tsv" # File to append our scraped titles to 62 | 63 | PAGES_PER_CORE = floor(NUM_PAGES / NUM_CORES) 64 | PAGES_FOR_FINAL_CORE = PAGES_PER_CORE + NUM_PAGES % PAGES_PER_CORE # For our final core 65 | 66 | futures = [] 67 | 68 | with concurrent.futures.ProcessPoolExecutor(NUM_CORES) as executor: 69 | for i in range(NUM_CORES - 1): 70 | new_future = executor.submit( 71 | start_scraping, # Function to perform 72 | # v Arguments v 73 | num_pages=PAGES_PER_CORE, 74 | output_file=OUTPUT_FILE, 75 | i=i 76 | ) 77 | futures.append(new_future) 78 | 79 | futures.append( 80 | executor.submit( 81 | start_scraping, 82 | PAGES_FOR_FINAL_CORE, OUTPUT_FILE, NUM_CORES-1 83 | ) 84 | ) 85 | 86 | concurrent.futures.wait(futures) 87 | 88 | 89 | if __name__ == "__main__": 90 | print("Starting: Please wait (This may take a while)....") 91 | start = time.time() 92 | main() 93 | print(f"Time to complete: {round(time.time() - start, 2)} seconds.") 94 | -------------------------------------------------------------------------------- /code_examples/asyncio_and_multiprocessing/multiprocessing_only.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import time 3 | import urllib.request 4 | from math import floor 5 | from multiprocessing import cpu_count 6 | 7 | from bs4 import BeautifulSoup 8 | 9 | 10 | def get_and_scrape_pages(num_pages: int, output_file: str): 11 | """ 12 | Makes {{ num_pages }} requests to Wikipedia to receive {{ num_pages }} random 13 | articles, then scrapes each page for its title and appends it to {{ output_file }}, 14 | separating each title with a tab: "\\t" 15 | 16 | #### Arguments 17 | --- 18 | num_pages: int - 19 | Number of random Wikipedia pages to request and scrape 20 | 21 | output_file: str - 22 | File to append titles to 23 | """ 24 | with open(output_file, "a+", encoding="utf-8") as f: 25 | for _ in range(num_pages): 26 | with urllib.request.urlopen("https://en.wikipedia.org/wiki/Special:Random") as response: 27 | if response.status > 399: 28 | # I was getting a 429 Too Many Requests at a higher volume of requests 29 | raise Exception(f"Received a {response.status} instead of 200.") 30 | 31 | page = response.read() 32 | soup = BeautifulSoup(page, features="html.parser") 33 | title = soup.find("h1").text 34 | f.write(title + "\t") 35 | 36 | f.write("\n") 37 | 38 | 39 | def main(): 40 | NUM_PAGES = 100 # Number of pages to scrape altogether 41 | NUM_CORES = cpu_count() # Our number of CPU cores (including logical cores) 42 | OUTPUT_FILE = "./wiki_titles.tsv" # File to append our scraped titles to 43 | 44 | PAGES_PER_CORE = floor(NUM_PAGES / NUM_CORES) 45 | PAGES_FOR_FINAL_CORE = PAGES_PER_CORE + NUM_PAGES % PAGES_PER_CORE # For our final core 46 | 47 | futures = [] 48 | with concurrent.futures.ProcessPoolExecutor(NUM_CORES) as executor: 49 | for i in range(NUM_CORES - 1): 50 | new_future = executor.submit( 51 | get_and_scrape_pages, # Function to perform 52 | # v Arguments v 53 | num_pages=PAGES_PER_CORE, 54 | output_file=OUTPUT_FILE, 55 | ) 56 | futures.append(new_future) 57 | 58 | futures.append( 59 | executor.submit( 60 | get_and_scrape_pages, 61 | PAGES_FOR_FINAL_CORE, OUTPUT_FILE 62 | ) 63 | ) 64 | 65 | concurrent.futures.wait(futures) 66 | 67 | 68 | if __name__ == "__main__": 69 | print("Starting: Please wait (This may take a while)....") 70 | start = time.time() 71 | main() 72 | print(f"Time to complete: {round(time.time() - start, 2)} seconds.") 73 | -------------------------------------------------------------------------------- /code_examples/asyncio_and_multiprocessing/sync.py: -------------------------------------------------------------------------------- 1 | import time 2 | import urllib.request 3 | 4 | from bs4 import BeautifulSoup 5 | 6 | 7 | def get_and_scrape_pages(num_pages: int, output_file: str): 8 | """ 9 | Makes {{ num_pages }} requests to Wikipedia to receive {{ num_pages }} random 10 | articles, then scrapes each page for its title and appends it to {{ output_file }}, 11 | separating each title with a tab: "\\t" 12 | 13 | #### Arguments 14 | --- 15 | num_pages: int - 16 | Number of random Wikipedia pages to request and scrape 17 | 18 | output_file: str - 19 | File to append titles to 20 | """ 21 | with open(output_file, "a+", encoding="utf-8") as f: 22 | for _ in range(num_pages): 23 | with urllib.request.urlopen("https://en.wikipedia.org/wiki/Special:Random") as response: 24 | if response.status > 399: 25 | # I was getting a 429 Too Many Requests at a higher volume of requests 26 | raise Exception(f"Received a {response.status} instead of 200.") 27 | 28 | page = response.read() 29 | soup = BeautifulSoup(page, features="html.parser") 30 | title = soup.find("h1").text 31 | f.write(title + "\t") 32 | 33 | f.write("\n") 34 | 35 | 36 | def main(): 37 | NUM_PAGES = 100 # Number of pages to scrape altogether 38 | OUTPUT_FILE = "./wiki_titles.tsv" # File to append our scraped titles to 39 | 40 | get_and_scrape_pages(NUM_PAGES, OUTPUT_FILE) 41 | 42 | 43 | if __name__ == "__main__": 44 | print("Starting: Please wait (This may take a while)....") 45 | start = time.time() 46 | main() 47 | print(f"Time to complete: {round(time.time() - start, 2)} seconds.") 48 | -------------------------------------------------------------------------------- /code_examples/concurrency/async.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import asyncio 3 | import time 4 | 5 | import aiohttp 6 | import aiofiles 7 | 8 | 9 | async def write_genre(file_name): 10 | """ 11 | Uses genrenator from binaryjazz.us to write a random genre to the 12 | name of the given file 13 | """ 14 | 15 | async with aiohttp.ClientSession() as session: 16 | async with session.get("https://binaryjazz.us/wp-json/genrenator/v1/genre/") as response: 17 | genre = await response.json() 18 | 19 | async with aiofiles.open(file_name, "w") as new_file: 20 | print(f"Writing '{genre}' to '{file_name}'...") 21 | await new_file.write(genre) 22 | 23 | 24 | async def main(): 25 | tasks = [] 26 | 27 | print("Starting...") 28 | start = time.time() 29 | 30 | for i in range(5): 31 | tasks.append(write_genre(f"./async/new_file{i}.txt")) 32 | 33 | await asyncio.gather(*tasks) 34 | 35 | end = time.time() 36 | print(f"Time to complete asyncio read/writes: {round(end - start, 2)} seconds") 37 | 38 | 39 | if __name__ == "__main__": 40 | # On Windows, this finishes successfully, but throws 'RuntimeError: Event loop is closed' 41 | # The following lines fix this 42 | # Source: https://github.com/encode/httpx/issues/914#issuecomment-622586610 43 | if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'): 44 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 45 | 46 | asyncio.run(main()) 47 | -------------------------------------------------------------------------------- /code_examples/concurrency/sync.py: -------------------------------------------------------------------------------- 1 | import json 2 | import time 3 | from urllib.request import Request, urlopen 4 | 5 | 6 | def write_genre(file_name): 7 | """ 8 | Uses genrenator from binaryjazz.us to write a random genre to the 9 | name of the given file 10 | """ 11 | 12 | req = Request("https://binaryjazz.us/wp-json/genrenator/v1/genre/", headers={"User-Agent": "Mozilla/5.0"}) 13 | genre = json.load(urlopen(req)) 14 | 15 | with open(file_name, "w") as new_file: 16 | print(f"Writing '{genre}' to '{file_name}'...") 17 | new_file.write(genre) 18 | 19 | 20 | if __name__ == "__main__": 21 | 22 | print("Starting...") 23 | start = time.time() 24 | 25 | for i in range(5): 26 | write_genre(f"./sync/new_file{i}.txt") 27 | 28 | end = time.time() 29 | print(f"Time to complete synchronous read/writes: {round(end - start, 2)} seconds") 30 | -------------------------------------------------------------------------------- /code_examples/concurrency/threads.py: -------------------------------------------------------------------------------- 1 | import json 2 | import threading 3 | import time 4 | from urllib.request import Request, urlopen 5 | 6 | 7 | def write_genre(file_name): 8 | """ 9 | Uses genrenator from binaryjazz.us to write a random genre to the 10 | name of the given file 11 | """ 12 | 13 | req = Request("https://binaryjazz.us/wp-json/genrenator/v1/genre/", headers={"User-Agent": "Mozilla/5.0"}) 14 | genre = json.load(urlopen(req)) 15 | 16 | with open(file_name, "w") as new_file: 17 | print(f"Writing '{genre}' to '{file_name}'...") 18 | new_file.write(genre) 19 | 20 | 21 | if __name__ == "__main__": 22 | 23 | threads = [] 24 | 25 | print("Starting...") 26 | start = time.time() 27 | 28 | for i in range(5): 29 | thread = threading.Thread( 30 | target=write_genre, 31 | args=[f"./threading/new_file{i}.txt"] 32 | ) 33 | thread.start() 34 | threads.append(thread) 35 | 36 | for thread in threads: 37 | thread.join() 38 | 39 | end = time.time() 40 | print(f"Time to complete threading read/writes: {round(end - start, 2)} seconds") 41 | -------------------------------------------------------------------------------- /code_examples/parallelism/multi.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import time 3 | 4 | 5 | if __name__ == "__main__": 6 | pow_list = [i for i in range(1000000, 1000016)] 7 | 8 | print("Starting...") 9 | start = time.time() 10 | 11 | with concurrent.futures.ProcessPoolExecutor() as executor: 12 | futures = [executor.submit(pow, i, i) for i in pow_list] 13 | 14 | for f in concurrent.futures.as_completed(futures): 15 | print("okay") 16 | 17 | end = time.time() 18 | print(f"Time to complete: {round(end - start, 2)}") 19 | -------------------------------------------------------------------------------- /code_examples/parallelism/sync.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | 4 | print("Starting...") 5 | start = time.time() 6 | 7 | for i in range(1000000, 1000016): 8 | pow(i, i) 9 | print("okay") 10 | 11 | end = time.time() 12 | print(f"Time to complete: {round(end - start, 2)}") 13 | -------------------------------------------------------------------------------- /code_examples/parallelism/threads.py: -------------------------------------------------------------------------------- 1 | import concurrent.futures 2 | import time 3 | 4 | 5 | if __name__ == "__main__": 6 | pow_list = [i for i in range(1000000, 1000016)] 7 | 8 | print("Starting...") 9 | start = time.time() 10 | 11 | with concurrent.futures.ThreadPoolExecutor() as executor: 12 | futures = [executor.submit(pow, i, i) for i in pow_list] 13 | 14 | for f in concurrent.futures.as_completed(futures): 15 | print("okay") 16 | 17 | end = time.time() 18 | print(f"Time to complete: {round(end - start, 2)}") 19 | -------------------------------------------------------------------------------- /code_examples/pytest_asyncio/hello_asyncio.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | 4 | async def say_hello(name: str): 5 | """ Sleeps for two seconds, then prints 'Hello, {{ name }}!' """ 6 | try: 7 | if type(name) != str: 8 | raise TypeError("'name' must be a string") 9 | if name == "": 10 | raise ValueError("'name' cannot be empty") 11 | except (TypeError, ValueError): 12 | raise 13 | 14 | print("Sleeping...") 15 | await asyncio.sleep(2) 16 | print(f"Hello, {name}!") 17 | -------------------------------------------------------------------------------- /code_examples/pytest_asyncio/test_hello_asyncio.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from hello_asyncio import say_hello 4 | 5 | 6 | @pytest.mark.parametrize("name", [ 7 | "Robert Paulson", 8 | "Seven of Nine", 9 | "x Æ a-12" 10 | ]) 11 | @pytest.mark.asyncio 12 | async def test_say_hello(name): 13 | await say_hello(name) 14 | 15 | 16 | class TestSayHelloThrowsExceptions: 17 | @pytest.mark.parametrize("name", [ 18 | "", 19 | ]) 20 | @pytest.mark.asyncio 21 | async def test_say_hello_value_error(self, name): 22 | with pytest.raises(ValueError): 23 | await say_hello(name) 24 | 25 | @pytest.mark.parametrize("name", [ 26 | 19, 27 | {"name", "Diane"}, 28 | [] 29 | ]) 30 | @pytest.mark.asyncio 31 | async def test_say_hello_type_error(self, name): 32 | with pytest.raises(TypeError): 33 | await say_hello(name) 34 | -------------------------------------------------------------------------------- /code_examples/pytest_asyncio/test_hello_asyncio2.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import pytest 3 | 4 | from hello_asyncio import say_hello 5 | 6 | 7 | @pytest.fixture 8 | def event_loop(): 9 | loop = asyncio.get_event_loop() 10 | yield loop 11 | 12 | 13 | @pytest.mark.parametrize("name", [ 14 | "Robert Paulson", 15 | "Seven of Nine", 16 | "x Æ a-12" 17 | ]) 18 | def test_say_hello(event_loop, name): 19 | event_loop.run_until_complete(say_hello(name)) 20 | 21 | 22 | class TestSayHelloThrowsExceptions: 23 | @pytest.mark.parametrize("name", [ 24 | "", 25 | ]) 26 | def test_say_hello_value_error(self, event_loop, name): 27 | with pytest.raises(ValueError): 28 | event_loop.run_until_complete(say_hello(name)) 29 | 30 | @pytest.mark.parametrize("name", [ 31 | 19, 32 | {"name", "Diane"}, 33 | [] 34 | ]) 35 | def test_say_hello_type_error(self, event_loop, name): 36 | with pytest.raises(TypeError): 37 | event_loop.run_until_complete(say_hello(name)) 38 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/based-jace/concurrency-parallelism-and-asyncio/cdc18199882911cceeb15b29f5b2f22a2d7dfa09/requirements.txt -------------------------------------------------------------------------------- /sync/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/based-jace/concurrency-parallelism-and-asyncio/cdc18199882911cceeb15b29f5b2f22a2d7dfa09/sync/.gitkeep -------------------------------------------------------------------------------- /threading/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/based-jace/concurrency-parallelism-and-asyncio/cdc18199882911cceeb15b29f5b2f22a2d7dfa09/threading/.gitkeep --------------------------------------------------------------------------------