├── .gitignore ├── cpu_bound.py ├── io_bound.py ├── media ├── asyncio.png ├── concurrency_and_parallelism.png ├── event_loop.png ├── os_thread.png └── synchronous_IO_bound.png └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | # .gitignore 2 | log/ -------------------------------------------------------------------------------- /cpu_bound.py: -------------------------------------------------------------------------------- 1 | import time 2 | import multiprocessing 3 | import os 4 | 5 | import logging 6 | import sys 7 | 8 | logging.basicConfig( 9 | level = logging.INFO, 10 | format = "[%(asctime)s] - [%(levelname)s] - [Process %(process)d, Thread %(thread)d] - %(message)s", 11 | datefmt = "%Y-%m-%d %H:%M:%S", 12 | handlers = [ 13 | logging.StreamHandler(sys.stdout), 14 | logging.FileHandler('log/multiprocessing_log.txt') 15 | ] 16 | ) 17 | 18 | logger = logging.getLogger('log output') 19 | 20 | def is_prime(n): 21 | if n <= 1: 22 | return False 23 | if n <= 3: 24 | return True 25 | if n % 2 == 0 or n % 3 == 0: 26 | return False 27 | i = 5 28 | while i * i <= n: 29 | if n % i == 0 or n % (i + 2) == 0: 30 | return False 31 | i += 6 32 | return True 33 | 34 | def find_primes(start, end): 35 | primes = [] 36 | for number in range(start, end + 1): 37 | 38 | logger.info(f'Processing number {number}') 39 | if is_prime(number): 40 | primes.append(number) 41 | return primes 42 | 43 | def multiprocessing_find_primes(prime_range:list, number_of_processors:int): 44 | 45 | chunk_size = (prime_range[1] - prime_range[0] + 1) // number_of_processors 46 | 47 | with multiprocessing.Pool(processes=number_of_processors) as pool: 48 | results = pool.starmap(find_primes, [ 49 | (prime_range[0] + i * chunk_size, prime_range[0] + (i + 1) * chunk_size - 1) 50 | for i in range(number_of_processors) 51 | ]) 52 | 53 | primes = [prime for sublist in results for prime in sublist] 54 | 55 | if __name__ == "__main__": 56 | num_processes = 3 57 | prime_range = (1, 10100000) 58 | 59 | 60 | start_time = time.time() 61 | find_primes(1, 10100000) 62 | 63 | end_time = time.time() 64 | print(f"Execution time: {end_time - start_time} seconds") -------------------------------------------------------------------------------- /io_bound.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import threading 3 | import concurrent.futures 4 | import asyncio 5 | import aiohttp 6 | import time 7 | 8 | import os 9 | import logging 10 | import sys 11 | 12 | 13 | logging.basicConfig( 14 | level = logging.INFO, 15 | format = "[%(asctime)s] - [%(levelname)s] - [Process %(process)d, Thread %(thread)d] - %(message)s", 16 | datefmt = "%Y-%m-%d %H:%M:%S", 17 | handlers = [ 18 | logging.StreamHandler(sys.stdout) 19 | ] 20 | ) 21 | 22 | logger = logging.getLogger('log output') 23 | 24 | 25 | DELAY_FACTOR = 2 26 | 27 | # Get response from API using 28 | def get_character_data(character_index: int): 29 | logger.info(f'Ingesting character number {character_index}') 30 | response = requests.get(f'https://rickandmortyapi.com/api/character/{character_index}') 31 | 32 | if response.status_code == 200: 33 | logger.info(f"Ingested successfully character number {character_index}") 34 | else: 35 | logger.error(f"Ingestion failed character number {character_index}!") 36 | 37 | time.sleep(DELAY_FACTOR) 38 | return response 39 | 40 | # Synchronous programming 41 | def synchronous_api_call(number_of_apis: int): 42 | for i in range(1, number_of_apis + 1): 43 | response = get_character_data(i) 44 | 45 | # multi-threading 46 | def threading_api_call(number_of_apis: int): 47 | # Create and start multiple threads 48 | threads = [] 49 | for i in range(1, number_of_apis + 1): 50 | thread = threading.Thread(target=get_character_data, args=(i,)) 51 | threads.append(thread) 52 | thread.start() 53 | 54 | # Wait for all threads to finish 55 | for thread in threads: 56 | thread.join() 57 | 58 | # Thread-pool 59 | def thread_pool_api_call(number_of_apis: int, number_of_threads: int): 60 | with concurrent.futures.ThreadPoolExecutor(max_workers=number_of_threads) as executor: 61 | # Use list comprehension to submit API requests to the thread pool 62 | results = [executor.submit(get_character_data, i) for i in range(1, number_of_apis + 1)] 63 | 64 | # Retrieve results from the submitted tasks 65 | for future in concurrent.futures.as_completed(results): 66 | result = future.result() 67 | 68 | # AsyncIO 69 | async def asyncio_get_character_data(character_index: int): 70 | async with aiohttp.ClientSession() as session: 71 | async with session.get(f'https://rickandmortyapi.com/api/character/{character_index}') as response: 72 | 73 | if response.status == 200: 74 | logger.info(f'Ingesting character number {character_index} ') 75 | data = await response.json() 76 | await asyncio.sleep(DELAY_FACTOR) 77 | 78 | logger.info(f"Ingested successfully character number {character_index}") 79 | return data 80 | else: 81 | logger.error(f"Ingestion failed character number {character_index}!") 82 | 83 | async def main(): 84 | list_of_characters = range(1, 11) 85 | 86 | tasks = [asyncio_get_character_data(index) for index in list_of_characters] 87 | 88 | results = await asyncio.gather(*tasks) 89 | 90 | 91 | 92 | if __name__ == "__main__": 93 | start_time = time.time() 94 | 95 | synchronous_api_call(10) # running the synchronous function 96 | # threading_api_call(10) # running the multi-threading function 97 | # thread_pool_api_call(10, 3) # running the thread-pool function 98 | # asyncio.run(main()) # running the async io function 99 | total_execution_time = time.time() - start_time 100 | print(f"Execution time: {total_execution_time} seconds") -------------------------------------------------------------------------------- /media/asyncio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vuthanhhai2302/understand-asynchronous-programming/8f3fe1135f91231b16d73562f0ee2c048458aecf/media/asyncio.png -------------------------------------------------------------------------------- /media/concurrency_and_parallelism.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vuthanhhai2302/understand-asynchronous-programming/8f3fe1135f91231b16d73562f0ee2c048458aecf/media/concurrency_and_parallelism.png -------------------------------------------------------------------------------- /media/event_loop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vuthanhhai2302/understand-asynchronous-programming/8f3fe1135f91231b16d73562f0ee2c048458aecf/media/event_loop.png -------------------------------------------------------------------------------- /media/os_thread.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vuthanhhai2302/understand-asynchronous-programming/8f3fe1135f91231b16d73562f0ee2c048458aecf/media/os_thread.png -------------------------------------------------------------------------------- /media/synchronous_IO_bound.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/vuthanhhai2302/understand-asynchronous-programming/8f3fe1135f91231b16d73562f0ee2c048458aecf/media/synchronous_IO_bound.png -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Apply asynchronous programming to speed up your Python code 2 | 3 | I wanted to take a moment to say thank you to [@nguyenanhhao998](https://github.com/nguyenanhhao998), [@chunguyenduc](https://github.com/chunguyenduc), [@hieuwu](https://github.com/hieuwu) and vot zo for the great coffee talks that inspired me to write this article. I'm grateful for your support in my learning journey. 4 | 5 | ## Introduction 6 | Traditionally, your programs run sequentially, which means using a linear order and execution of operations where each operation must complete before the next operation can begin, it can also be called synchronous programming. You can find this type of programming everywhere, from simple projects to more complex systems, because it's easier to write and understand, intuitive to debug, and predictable to run. 7 | 8 | However, this style of programming can lead to long execution times and limit the scalability of your code, especially when dealing with long running tasks that depend on an external source or heavy processing using your CPU. These operations are generally called **I/O bound** and **CPU bound**. 9 | 10 | ### What is I/O Bound and CPU bound? 11 | When we mention an action as either CPU bound or I/O bound, we are referring to the limitation that prevent our programing running faster. If we can increase the performance the operations are bound on, the program can complete in less time. **I/O bound** are when we spend time waiting for a network (transmitting data through the internet,...) or an io divice(searching in our system's hard drive storage,...). **CPU bound** refer to the computation and processing code as looping through a dataset with ten thousand rows and arregating the element or applying the business to it then calculate for reports. 12 | 13 | I/O bound and CPU bound operations live side by side with each other in real life. First, we make an API call from rickandmortyapi.com, once we have the response, we performce an loop through the list of response, get the data we need and then write the strings to our storage as an second IO bound operations. These issue occurs daily, yet how we can tackle these issues? 14 | 15 | ### The key is concurrency and parralelism 16 | the 2 concepts concurrency and parallelism are both used to managing and executing mutiple tasks, but the way they execute is different. Let's take a look at the diagram bellow to see how the 2 paradigm different from each other and compare to synchronous way: 17 |
18 |
19 |
82 |
83 |
88 |
89 |
121 |
122 |
127 |
128 |
155 |
156 |
194 |
195 |
199 |
200 |
205 |
206 |