├── 2022 ├── 06 - June │ ├── accessing-the-clipboard-with-pyperclip-library.ipynb │ ├── async-producer-consumer │ │ ├── Pipfile │ │ ├── consumer.py │ │ ├── controller.py │ │ ├── main.py │ │ ├── producer.py │ │ └── resulthandler.py │ ├── lists-deleting-elements.ipynb │ └── looping-n-times.ipynb ├── 07 - July │ ├── case-insensitive-string-comparisons.ipynb │ ├── comparing-lists.ipynb │ ├── concatenating-sequences.ipynb │ ├── pairwise-iteration-using-zip.ipynb │ ├── pydantic.ipynb │ ├── type-hinting.ipynb │ └── unicode.ipynb ├── 08 - August │ ├── black_isort │ │ ├── Makefile │ │ ├── README.md │ │ ├── badly_formatted.py │ │ ├── mod1.py │ │ ├── mod2.py │ │ └── pyproject.toml │ ├── click │ │ ├── Makefile │ │ ├── Pipfile │ │ ├── Pipfile.lock │ │ ├── README.md │ │ ├── converters │ │ │ ├── cli.py │ │ │ └── csv_converter.py │ │ ├── data │ │ │ ├── population.csv │ │ │ └── sample.json │ │ ├── main.py │ │ ├── pyproject.toml │ │ ├── requirements.txt │ │ ├── setup.py │ │ └── viewers │ │ │ ├── __init__.py │ │ │ ├── csv_viewer.py │ │ │ ├── enums.py │ │ │ └── json_viewer.py │ ├── humanize.ipynb │ └── tabulate │ │ ├── population.csv │ │ └── tabulate.ipynb ├── 09 - September │ ├── defaultdict.ipynb │ └── pathlib.ipynb ├── 10 - October │ ├── dotenv │ │ ├── README.md │ │ ├── env-template │ │ ├── main.py │ │ └── requirements.txt │ ├── faker.ipynb │ └── flake8 │ │ ├── .flake8 │ │ ├── Pipfile │ │ ├── Pipfile.lock │ │ ├── README.md │ │ ├── bad_code.bak │ │ ├── bad_code.py │ │ ├── pyproject.toml │ │ └── requirements.txt └── 12 - December │ ├── assignment_expressions.ipynb │ └── structural_pattern_matching.ipynb ├── 2023 ├── 01 - January │ ├── distributed_computations │ │ ├── README.md │ │ ├── app │ │ │ ├── config.py │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ ├── docker-compose.yaml │ │ ├── main_concept.png │ │ ├── redis_queue.png │ │ └── worker │ │ │ ├── config.py │ │ │ ├── requirements.txt │ │ │ └── worker.py │ └── flattening_nested_iterables.ipynb ├── 02 - February │ ├── base64_encoding_decoding.ipynb │ └── threading_issues_caveats │ │ ├── Pipfile │ │ ├── Pipfile.lock │ │ ├── README.md │ │ ├── p1_solution_01.py │ │ ├── p1_solution_02.py │ │ ├── p1_solution_03.py │ │ ├── p1_solution_03a.py │ │ ├── p1_solution_04.py │ │ ├── p1_solution_05.py │ │ ├── p1_solution_06.py │ │ ├── p1_solution_07.py │ │ ├── p1_solution_08.py │ │ ├── p2_solution_01.py │ │ └── p2_solution_02.py ├── 03 - March │ ├── breaking_out_of_nested_loops.ipynb │ └── concurrency_concepts_in_python.pdf ├── 04 - April │ └── decorator_factories_optional_arguments.ipynb ├── 05 - May │ ├── dataclasses_part_1.ipynb │ └── dataclasses_part_2.ipynb ├── 10 - October │ └── Migrating to Pydantic V2.ipynb ├── 11 - November │ └── Speed up your Apps using Caching.ipynb └── 12 - December │ └── Intro to Pydantic V2.ipynb ├── 2024 ├── 03 - March │ ├── arrow_library.ipynb │ └── multiprocessing_pools │ │ ├── example_1.py │ │ ├── example_2.py │ │ ├── example_3.py │ │ └── notes.md ├── 04 - April │ ├── pyyaml_library │ │ ├── config.yaml │ │ ├── docker-compose.yaml │ │ ├── nobel_prizes.json │ │ └── pyyaml_library.ipynb │ └── wrapt_library.ipynb ├── 05 - May │ ├── python-benedict-library.ipynb │ └── python-tenacity-library.ipynb ├── 06 - June │ ├── SimpleNamespace and JSON Data.ipynb │ └── postgres_pydantic │ │ ├── README.md │ │ ├── data │ │ └── .gitkeep │ │ ├── docker-compose.yml │ │ ├── example_1.py │ │ ├── example_2.py │ │ ├── example_3.py │ │ ├── migrations │ │ ├── 20240604_01_u0XKn-db-init.py │ │ ├── 20240604_02_n6kZK-rename-employee-table.py │ │ ├── 20240604_03_sYLbA-generate-sample-employee-data.py │ │ └── 20240604_04_Qqsgz-change-departments-to-fk.py │ │ └── yoyo.ini ├── 07 - July │ ├── icecream.ipynb │ └── tqdm.ipynb └── 08 - August │ ├── pydantic-partial.ipynb │ └── python-logging │ ├── README.md │ ├── example_01 │ └── main.py │ ├── example_02 │ ├── logger_config.yaml │ └── main.py │ ├── example_03 │ └── main.py │ ├── example_04 │ ├── logger_config.yaml │ └── main.py │ ├── example_05 │ ├── logger_config.yaml │ └── main.py │ ├── example_06 │ ├── logger_config.yaml │ └── main.py │ ├── example_07 │ ├── logger_config.yaml │ └── main.py │ ├── example_08 │ ├── logger_config.yaml │ └── main.py │ ├── example_09 │ ├── logger_config.yaml │ └── main.py │ ├── example_10 │ ├── logger_config.yaml │ └── main.py │ ├── example_11 │ ├── logger_config.yaml │ └── main.py │ └── example_12 │ ├── configs │ ├── log_config.py │ └── logger_formatters.py │ ├── logger_config.yaml │ ├── main.py │ ├── services │ ├── aws.py │ ├── azure.py │ └── gcp.py │ └── utils │ ├── formatters.py │ ├── loggers.py │ ├── times.py │ └── validators.py ├── .gitignore ├── Idiomatic_Python ├── 01_iter_function.ipynb ├── 02_tuple_unpacking.ipynb ├── 03_named_tuples.ipynb ├── 04_iterating_over_collections.ipynb ├── 05_itertools.ipynb ├── 06_comprehensions.ipynb ├── 07_deques.ipynb ├── 08_custom_sorts.ipynb ├── 09_generators.ipynb ├── 10_iterating_dictionaries.ipynb ├── 11_counting_things_with_dictionaries.ipynb ├── 12_merging_dictionaries.ipynb ├── 13_using_named_arguments.ipynb └── 14_decomposition │ ├── README.ipynb │ ├── original │ └── swapi.py │ ├── refactor_1 │ ├── main.py │ └── services │ │ └── swapi │ │ └── api.py │ ├── refactor_2 │ ├── main.py │ ├── services │ │ └── swapi │ │ │ └── api.py │ └── utils.py │ ├── refactor_3 │ ├── main.py │ ├── services │ │ └── swapi │ │ │ └── api.py │ └── utils.py │ ├── refactor_4 │ ├── main.py │ ├── services │ │ └── swapi │ │ │ ├── api.py │ │ │ └── paging.py │ └── utils.py │ └── refactor_5 │ ├── main.py │ ├── services │ └── swapi │ │ ├── api.py │ │ ├── paging.py │ │ └── retries.py │ └── utils.py ├── LICENSE ├── Pipfile ├── Pipfile.lock ├── README.md └── pyproject.toml /2022/06 - June/async-producer-consumer/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | 10 | [requires] 11 | python_version = "3.10" 12 | -------------------------------------------------------------------------------- /2022/06 - June/async-producer-consumer/consumer.py: -------------------------------------------------------------------------------- 1 | """The actual task 'worker' or 'processor' - the work queue consumer""" 2 | import asyncio 3 | from random import random 4 | from time import perf_counter 5 | 6 | 7 | async def do_work(work_queue: asyncio.Queue, result_queue: asyncio.Queue) -> None: 8 | """ 9 | This function (coroutine) will perform the actual work, by pulling an item 10 | from the work queue, doing some work, and pushing the result 11 | to the result queue, and repeating indefinitely. 12 | 13 | This coroutine never terminates itself - it just keeps looking for work to 14 | do in the work_queue. It will instead be terminated by the controller when 15 | the controller decides all work has been done. 16 | 17 | :param work_queue: the work queue the task consumes (pulls from) 18 | :param result_queue: the result queue the task pushes a result to 19 | once the work is complete 20 | :return: 21 | """ 22 | 23 | while True: 24 | # grab an item from the queue (if there is one) 25 | task_data = await work_queue.get() 26 | 27 | # read the data we need to perform the work 28 | task_id = task_data["task_id"] 29 | number = task_data["number"] 30 | 31 | # do some work that takes some time - simulated here with an async sleep 32 | start = perf_counter() 33 | await asyncio.sleep(random() * 2) # random wait time up to 2 seconds 34 | result = number * number 35 | end = perf_counter() 36 | 37 | # push result to result queue 38 | await result_queue.put( 39 | { 40 | "task_id": task_id, 41 | "result": result, 42 | "time_secs": end - start 43 | } 44 | ) 45 | 46 | # inform work queue the task is complete 47 | work_queue.task_done() 48 | -------------------------------------------------------------------------------- /2022/06 - June/async-producer-consumer/controller.py: -------------------------------------------------------------------------------- 1 | """Main Controller 2 | 3 | This is the main controller that orchestrates the producer, the 'workers' (aka tasks), 4 | sets up the various queues, and tracks when all work is completed to shut down everything 5 | and issue the final job completed callback. 6 | """ 7 | import asyncio 8 | from time import perf_counter 9 | from typing import Callable, List 10 | 11 | import consumer 12 | import producer 13 | import resulthandler 14 | 15 | 16 | # some constants, but could be defined in a config file, or simply passed to run_job function when called 17 | NUM_WORKERS = 25 18 | WORK_QUEUE_MAX_SIZE = 100 19 | 20 | NUM_RESULT_HANDLERS = 10 21 | RESULT_QUEUE_MAX_SIZE = 100 22 | 23 | 24 | async def _controller( 25 | batch: List[dict], task_completed_callback: Callable, job_completed_callback: Callable 26 | ) -> None: 27 | """ 28 | This is the async controller. 29 | 30 | :param batch: a list of dictionaries received that defines the parameters for each task that has to run 31 | :param task_completed_callback: the callback to use when each task result becomes available 32 | :param job_completed_callback: the callback to use when the overall job is completed 33 | :return: 34 | """ 35 | start = perf_counter() 36 | 37 | # create the work and result queues 38 | work_queue = asyncio.Queue(maxsize=WORK_QUEUE_MAX_SIZE) 39 | result_queue = asyncio.Queue(maxsize=RESULT_QUEUE_MAX_SIZE) 40 | 41 | # create a list of all the tasks that will need to run async 42 | tasks = [] 43 | 44 | # Define the producer task, defining the event we'll look for when the producer is done 45 | producer_completed = asyncio.Event() 46 | producer_completed.clear() # set the event status to False for starters 47 | tasks.append( 48 | asyncio.create_task(producer.produce_work(batch, work_queue, producer_completed)) 49 | ) 50 | 51 | # Create the worker (consumer) tasks 52 | for _ in range(NUM_WORKERS): 53 | tasks.append( 54 | asyncio.create_task(consumer.do_work(work_queue, result_queue)) 55 | ) 56 | 57 | # Create the result handler tasks 58 | for _ in range(NUM_RESULT_HANDLERS): 59 | tasks.append( 60 | asyncio.create_task(resulthandler.handle_task_result(result_queue, task_completed_callback)) 61 | ) 62 | 63 | # Now wait completion of producer, and kick off the consumers and result handlers 64 | await producer_completed.wait() 65 | await work_queue.join() 66 | await result_queue.join() 67 | 68 | # once we reach here, we're all done, so cancel all tasks 69 | for task in tasks: 70 | task.cancel() 71 | 72 | end = perf_counter() 73 | 74 | # all done, callback using the provided callback function 75 | job_completed_callback({"elapsed_secs": end - start}) 76 | 77 | 78 | def run_job( 79 | batch: List[dict], task_completed_callback: Callable, job_completed_callback: Callable 80 | ) -> None: 81 | """ 82 | This is the function caller calls to kick off the job. 83 | Note that this function is not a coroutine - it's a standard function that will run the top-level 84 | entry point for our async processing. 85 | 86 | :param batch: a list of dictionaries received that defines the parameters for each task that has to run 87 | :param task_completed_callback: the callback to use when each task result becomes available 88 | :param job_completed_callback: the callback to use when the overall job is completed 89 | :return: 90 | """ 91 | asyncio.run(_controller(batch, task_completed_callback, job_completed_callback)) 92 | -------------------------------------------------------------------------------- /2022/06 - June/async-producer-consumer/main.py: -------------------------------------------------------------------------------- 1 | """This is the main controller for the producer/consumer 2 | 3 | Here we'll define 4 | - the parameters for each task that needs to run, 5 | - the callback handler that will handle when each task runs 6 | to completion 7 | - the callback handler that will handle the message that all tasks 8 | have completed 9 | 10 | And finally, we'll kick off the process using the main() function. 11 | """ 12 | from functools import partial 13 | from random import seed 14 | from uuid import uuid4 15 | 16 | from controller import run_job 17 | 18 | 19 | def main(job_id: str) -> None: 20 | """ 21 | Main app that kicks a "job" that consists of running multiple tasks 22 | 23 | :param job_id: some job identifier 24 | :return: 25 | """ 26 | 27 | print(f"Starting Job {job_id}") 28 | 29 | # define callbacks, "injecting" the job_id 30 | task_callback = partial(task_completed_callback_handler, job_id) 31 | job_callback = partial(job_completed_callback_handler, job_id) 32 | 33 | # define the parameters for the tasks that will need to run 34 | task_data = [ 35 | {"task_id": i, "number": i} 36 | for i in range(10) 37 | ] 38 | 39 | # start job 40 | run_job(task_data, task_callback, job_callback) 41 | 42 | 43 | def task_completed_callback_handler(job_id: str, callback_message: dict) -> None: 44 | print(f"Task completed in {job_id=}: {callback_message=}") 45 | 46 | 47 | def job_completed_callback_handler(job_id: str, callback_message: dict) -> None: 48 | print(f"Job {job_id} completed: {callback_message=}") 49 | 50 | 51 | if __name__ == '__main__': 52 | seed(0) # just to get repeatability in various sleeps we use to simulate long-running processes 53 | main(str(uuid4())) 54 | -------------------------------------------------------------------------------- /2022/06 - June/async-producer-consumer/producer.py: -------------------------------------------------------------------------------- 1 | """This code defines the producer, which populates the work queue""" 2 | import asyncio 3 | from typing import List 4 | 5 | 6 | async def produce_work( 7 | batch: List[dict], work_queue: asyncio.Queue, producer_completed: asyncio.Event 8 | ): 9 | """ 10 | Puts all the requested work into the work queue. 11 | 12 | :param batch: list of all the params that were submitted to be processed (this is the list of 13 | dicts the main application sent over for processing) 14 | :param work_queue: main work queue that contains each individual task params 15 | :param producer_completed: event to indicate the producer has finished producing 16 | all requested work 17 | :return: 18 | """ 19 | for data in batch: 20 | await work_queue.put(data) 21 | 22 | # finished putting all the data into the work queue - indicate we are done using 23 | # the producer_completed event 24 | producer_completed.set() 25 | -------------------------------------------------------------------------------- /2022/06 - June/async-producer-consumer/resulthandler.py: -------------------------------------------------------------------------------- 1 | """Contains the code to handle results waiting in the result queue""" 2 | import asyncio 3 | from typing import Callable 4 | 5 | 6 | async def handle_task_result(result_queue: asyncio.Queue, callback: Callable[[dict], None]): 7 | """Result item handler 8 | This function (coroutine) will handle any results sitting in the result queue by 9 | issuing the callback. 10 | 11 | Just like the task worker, this coroutine never terminates itself. It will instead 12 | be terminated by the controller when the controller decides all work has been done. 13 | 14 | :param result_queue: the result queue this task consumes (pulls from) 15 | :param callback: the callback function to call with the results pulled from the queue 16 | :return: 17 | """ 18 | while True: 19 | result = await result_queue.get() 20 | callback(result) 21 | result_queue.task_done() # tell the queue we are done with the item 22 | -------------------------------------------------------------------------------- /2022/06 - June/looping-n-times.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "a5b4f219", 6 | "metadata": {}, 7 | "source": [ 8 | "## Looping N Times" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "ee9babc3", 14 | "metadata": {}, 15 | "source": [ 16 | "Sometimes we just want to run a loop N times - but without caring about the actual loop variable." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "id": "f87873d6", 22 | "metadata": {}, 23 | "source": [ 24 | "In Python we do not have the same for-loop construct as languages such as C or Java, we can only iterate over iterables (using iterators)." 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "id": "225a0fc4", 30 | "metadata": {}, 31 | "source": [ 32 | "So, to run a loop N times, we might write something like this:" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 1, 38 | "id": "ecd5f55f", 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "name": "stdout", 43 | "output_type": "stream", 44 | "text": [ 45 | "loop...\n", 46 | "loop...\n", 47 | "loop...\n", 48 | "loop...\n", 49 | "loop...\n", 50 | "loop...\n", 51 | "loop...\n", 52 | "loop...\n", 53 | "loop...\n", 54 | "loop...\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "for i in range(10):\n", 60 | " print('loop...')" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "id": "133210b9", 66 | "metadata": {}, 67 | "source": [ 68 | "It is customary to use the variable name `_` to indicate we don't care about the loop variable:" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 2, 74 | "id": "4ea21e3e", 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "loop...\n", 82 | "loop...\n", 83 | "loop...\n", 84 | "loop...\n", 85 | "loop...\n", 86 | "loop...\n", 87 | "loop...\n", 88 | "loop...\n", 89 | "loop...\n", 90 | "loop...\n" 91 | ] 92 | } 93 | ], 94 | "source": [ 95 | "for _ in range(10):\n", 96 | " print(\"loop...\")" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "180fd296", 102 | "metadata": {}, 103 | "source": [ 104 | "Note that there's nothing special about `_` here - it is simply a *convention* most Python developers will use to indicate to *humans* reading the code that we don't actually care about the loop variable." 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "id": "1c845163", 110 | "metadata": {}, 111 | "source": [ 112 | "But, there is an alternative, and more efficient method of achieving the same thing, use the `repeat` function from the `itertools` module." 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 3, 118 | "id": "8f6082d2", 119 | "metadata": {}, 120 | "outputs": [], 121 | "source": [ 122 | "from itertools import repeat" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 5, 128 | "id": "ca561987", 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "name": "stdout", 133 | "output_type": "stream", 134 | "text": [ 135 | "loop...\n", 136 | "loop...\n", 137 | "loop...\n", 138 | "loop...\n", 139 | "loop...\n", 140 | "loop...\n", 141 | "loop...\n", 142 | "loop...\n", 143 | "loop...\n", 144 | "loop...\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "for _ in repeat(None, 10):\n", 150 | " print(\"loop...\")" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "id": "37e46266", 156 | "metadata": {}, 157 | "source": [ 158 | "Now let's look at some timings for both approaches:" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 6, 164 | "id": "ceac1086", 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "from timeit import timeit" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 7, 174 | "id": "2dd59927", 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "def loop_range(n):\n", 179 | " for _ in range(n):\n", 180 | " pass" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 8, 186 | "id": "7b969074", 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "def loop_repeat(n):\n", 191 | " for _ in repeat(None, n):\n", 192 | " pass" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "id": "df80dac8", 198 | "metadata": {}, 199 | "source": [ 200 | "And let's time this for large loops:" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 12, 206 | "id": "cffaf61c", 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "1.034368499997072" 213 | ] 214 | }, 215 | "execution_count": 12, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "n = 100_000\n", 222 | "timeit(\"loop_range(n)\", globals=globals(), number=1000)" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 13, 228 | "id": "57836783", 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "data": { 233 | "text/plain": [ 234 | "0.4371508750191424" 235 | ] 236 | }, 237 | "execution_count": 13, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "n = 100_000\n", 244 | "timeit(\"loop_repeat(n)\", globals=globals(), number=1000)" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "id": "89235261", 250 | "metadata": {}, 251 | "source": [ 252 | "As you can see, for large loops this can make a difference." 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "id": "d6b27004", 258 | "metadata": {}, 259 | "source": [ 260 | "Does this mean you should always use `repeat` instead of `range`? Up to you - for large loops I would most likely use `repeat`, but for small loops I may not bother importing the `itertools` module, plus using `repeat` may cause some confusion for beginner Python devs looking at your code." 261 | ] 262 | }, 263 | { 264 | "cell_type": "markdown", 265 | "id": "41a3007c", 266 | "metadata": {}, 267 | "source": [ 268 | "The usual caveat I give when I discuss optimizing your code - **do not optimize prematurely**.\n", 269 | "\n", 270 | "Write your code in the most readable manner possible (without a total disregard for efficiency of course!) - but don't start optimizing your code and refactoring until you understand **where** your code is slow. In the above example, we saved about less than a second - but if your code takes 10 minutes to run, then shaving off one second might be meaningless (by itself). \n", 271 | "\n", 272 | "**First** identify the bottlenecks in your code, **then** optimize your code." 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "id": "3b95e7eb", 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 3 (ipykernel)", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.9.10" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 5 305 | } 306 | -------------------------------------------------------------------------------- /2022/08 - August/black_isort/Makefile: -------------------------------------------------------------------------------- 1 | standardize-diff: 2 | isort --diff . 3 | black --diff . 4 | 5 | standardize: 6 | isort . 7 | black . -------------------------------------------------------------------------------- /2022/08 - August/black_isort/README.md: -------------------------------------------------------------------------------- 1 | # Python `black` and `isort` 2 | 3 | `black` and `isort` are code formatters that will modify your Python code to conform to (PEP8) standards. 4 | 5 | By using tools like `black` and `isort`, development teams ensure a common style for their Python code. 6 | 7 | `black` looks at things like indentation, line length, use of double quote for strings, trailing commas in multi-line lists/tuples/dicts, and much more. 8 | 9 | `isort` is primarily focused on the location and order of **imports** in your modules. It will automatically sort your imports alphabetically, as well as split up your imports into sections, normally for standard library, 3rd party libraries, and your own project's modules/packages. It also does some formatting 10 | of imports (sometimes splitting an import statement over multiple lines) 11 | 12 | In another video, we'll take a look at the `flake8` package, and knowing how to use `isort` and `black` before we do that will be beneficial. 13 | 14 | 15 | > Note: Although this folder is part of the overall blog repo, you should consider it as a standalone project, i.e. treat this folder as your application root, create your virtual env for this root, etc. 16 | > 17 | 18 | ## `black` Installation 19 | Home page for `black` is located [here](https://black.readthedocs.io/en/stable/). 20 | 21 | You will need to install `black` to your virtual environment. 22 | 23 | You can pip install it: 24 | ```bash 25 | pip install black 26 | ``` 27 | 28 | Alternatively, if you are using this repo with `pipenv`, then the dependency is already included in the `Pipfile`, so you can just install it using 29 | ```bash 30 | pipenv install --dev 31 | ``` 32 | 33 | 34 | ## `isort` Installation 35 | Home page for `isort` is located [here](https://pycqa.github.io/isort/). 36 | 37 | You will need to install `isort` to your virtual environment. 38 | 39 | You can pip install it: 40 | ```bash 41 | pip install isort 42 | ``` 43 | 44 | Alternatively, if you are using this repo with `pipenv`, then the dependency is already included in the `Pipfile`, so you can just install it using 45 | ```bash 46 | pipenv install --dev 47 | ``` 48 | 49 | 50 | ## Running `black` 51 | When you run `black`, by default it will **modify** your Python code files. 52 | 53 | You can run `black` on a file, or a directory via the command line: 54 | 55 | ```bash 56 | black 57 | ``` 58 | 59 | If you only want to see what changes `black` would make but **without** modifying your code fiels, use the `--diff` flag - in that case `black` will produce a diff of all the changes: 60 | ```bash 61 | black --diff 62 | ``` 63 | 64 | ## Running `isort` 65 | Just like `black`, `isort` will modify your files by default. 66 | 67 | You can run `isort` on a file, or a directory via the command line: 68 | 69 | ```bash 70 | isort 71 | ``` 72 | 73 | If you only want to see what changes `isort` would make but **without** modifying your code fiels, use the `--diff` flag - in that case `isort` will produce a diff of all the changes: 74 | ```bash 75 | isort --diff 76 | ``` 77 | 78 | ## Configuring `black` 79 | Since `black` is highly opiniated, it has very few configurations! 80 | 81 | We can provide a configuration file for `black` by creating the file `pyproject.toml` - usually placed in the root of the project (this way the config will apply to every Python file in your project). 82 | 83 | One configuration I see used often is to change the maximum allowed line length from `88` (`black`'s default, to something larger or smaller). In this example, I have set the max line length to `100` by adding this to the `pyproject.toml` file: 84 | 85 | ```toml 86 | [tool.black] 87 | line-length = 100 88 | ``` 89 | 90 | 91 | ## Configuring `isort` 92 | We can also configure `isort` using the same `pyproject.toml` file. 93 | 94 | One of the things we **have** to do, is tell `isort` we are using `black`. 95 | 96 | By default `isort` applies a style to format multi-line imports that is not compatible with `black`. 97 | 98 | That setting can be added to the `pyproject.toml` file in a section for `isort`: 99 | ```toml 100 | [tool.isort] 101 | profile = "black" 102 | ``` 103 | 104 | There are more settings that you can read up on in the link I provided earlier, but usually that's pretty much all we need. 105 | 106 | ## Using a Makefile 107 | If you are running on a *nix system, you can use GNU [make](https://www.gnu.org/software/make/manual/make.html) and a `Makefile` to make running `isort` and `black` a little simpler than typing all the commands directly. 108 | 109 | If you are running in Windows, then I do not believe you have `make` available, and I am not aware of Windows based alternatives (not saying there aren't, just that I don't know - as I have not used Windows in years). 110 | 111 | To run black and isort together, I have created two commands (or **rules**) in the `Makefile` - one for running with `--diff`, and one for running without (i.e. making changes to your files) 112 | 113 | You can use run them from the command line: 114 | 115 | ```bash 116 | make standardize-diff 117 | ``` 118 | to just do a diff, and 119 | 120 | ```bash 121 | make standardize 122 | ``` 123 | to actually change your code files. 124 | 125 | 126 | And that's it, you now know how to use `black` and `isort` to standardize your code. It's a good idea to do this, and most Python software developers do - it brings some consistency to your code, and will help you write better looking code. 127 | -------------------------------------------------------------------------------- /2022/08 - August/black_isort/badly_formatted.py: -------------------------------------------------------------------------------- 1 | import mod2 2 | import pyperclip 3 | from mod1 import TestA, abc, TestB, xyz, TestC 4 | import json 5 | from typing import List, Union, Any 6 | import numpy 7 | 8 | 9 | 10 | 11 | def mult( 12 | a: int | str, 13 | b: int 14 | ): 15 | return a*b 16 | 17 | 18 | 19 | 20 | if __name__ == '__main__': 21 | print( mult( 'a', 10 ) ) 22 | 23 | long_var_name_a=10 24 | long_var_name_b=20 25 | # a long line of code that goes over the max line length 26 | if (long_var_name_a and long_var_name_b and long_var_name_a > 10 and long_var_name_b < 100 and long_var_name_a > long_var_name_b): 27 | print("no idea what that expression actually checks for :-)") 28 | -------------------------------------------------------------------------------- /2022/08 - August/black_isort/mod1.py: -------------------------------------------------------------------------------- 1 | def abc(): 2 | pass 3 | 4 | def xyz(): 5 | pass 6 | 7 | class TestA: 8 | pass 9 | 10 | 11 | 12 | 13 | class TestB: 14 | pass 15 | 16 | class TestC: 17 | pass -------------------------------------------------------------------------------- /2022/08 - August/black_isort/mod2.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | class Numbers(Enum): 4 | one = 1 5 | two = 2 6 | 7 | class Levels(Enum): 8 | critical = 1 9 | warning = 2 10 | info = 3 -------------------------------------------------------------------------------- /2022/08 - August/black_isort/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | 4 | [tool.isort] 5 | profile = "black" -------------------------------------------------------------------------------- /2022/08 - August/click/Makefile: -------------------------------------------------------------------------------- 1 | SHELL := /bin/bash 2 | 3 | .DEFAULT_GOAL := help 4 | 5 | # COLORS 6 | GREEN := $(shell tput -Txterm setaf 2) 7 | YELLOW := $(shell tput -Txterm setaf 3) 8 | WHITE := $(shell tput -Txterm setaf 7) 9 | RESET := $(shell tput -Txterm sgr0) 10 | 11 | ## show black and isort diffs 12 | standardize-diff: 13 | isort --diff . 14 | black --diff . 15 | 16 | ## run black and isort 17 | standardize: 18 | isort . 19 | black . 20 | 21 | 22 | ## Show help 23 | TARGET_MAX_CHAR_NUM=30 24 | help: 25 | @awk '/^[a-zA-Z\-\_0-9\.]+:/ { \ 26 | helpMessage = match(lastLine, /^## (.*)/); \ 27 | if (helpMessage) { \ 28 | helpCommand = substr($$1, 0, index($$1, ":")); \ 29 | helpMessage = substr(lastLine, RSTART + 3, RLENGTH); \ 30 | printf " ${YELLOW}%-$(TARGET_MAX_CHAR_NUM)s${RESET} ${GREEN}%s${RESET}\n", helpCommand, helpMessage; \ 31 | } \ 32 | } \ 33 | { lastLine = $$0 }' $(MAKEFILE_LIST) 34 | -------------------------------------------------------------------------------- /2022/08 - August/click/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | click = "*" 8 | setuptools = "*" 9 | tabulate = "*" 10 | 11 | [dev-packages] 12 | isort = "*" 13 | black = "*" 14 | 15 | [requires] 16 | python_version = "3.10" 17 | -------------------------------------------------------------------------------- /2022/08 - August/click/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "706ec5f3a333f274465683b53122adb201fddbd04acb0476eb74021b721fa2bd" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.10" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": { 19 | "click": { 20 | "hashes": [ 21 | "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", 22 | "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" 23 | ], 24 | "index": "pypi", 25 | "version": "==8.1.3" 26 | }, 27 | "setuptools": { 28 | "hashes": [ 29 | "sha256:0d33c374d41c7863419fc8f6c10bfe25b7b498aa34164d135c622e52580c6b16", 30 | "sha256:c04b44a57a6265fe34a4a444e965884716d34bae963119a76353434d6f18e450" 31 | ], 32 | "index": "pypi", 33 | "version": "==63.2.0" 34 | }, 35 | "tabulate": { 36 | "hashes": [ 37 | "sha256:0ba055423dbaa164b9e456abe7920c5e8ed33fcc16f6d1b2f2d152c8e1e8b4fc", 38 | "sha256:436f1c768b424654fce8597290d2764def1eea6a77cfa5c33be00b1bc0f4f63d", 39 | "sha256:6c57f3f3dd7ac2782770155f3adb2db0b1a269637e42f27599925e64b114f519" 40 | ], 41 | "index": "pypi", 42 | "version": "==0.8.10" 43 | } 44 | }, 45 | "develop": { 46 | "black": { 47 | "hashes": [ 48 | "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90", 49 | "sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c", 50 | "sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78", 51 | "sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4", 52 | "sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee", 53 | "sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e", 54 | "sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e", 55 | "sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6", 56 | "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9", 57 | "sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c", 58 | "sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256", 59 | "sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f", 60 | "sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2", 61 | "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c", 62 | "sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b", 63 | "sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807", 64 | "sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf", 65 | "sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def", 66 | "sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad", 67 | "sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d", 68 | "sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849", 69 | "sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69", 70 | "sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666" 71 | ], 72 | "index": "pypi", 73 | "version": "==22.6.0" 74 | }, 75 | "click": { 76 | "hashes": [ 77 | "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e", 78 | "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48" 79 | ], 80 | "index": "pypi", 81 | "version": "==8.1.3" 82 | }, 83 | "isort": { 84 | "hashes": [ 85 | "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7", 86 | "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951" 87 | ], 88 | "index": "pypi", 89 | "version": "==5.10.1" 90 | }, 91 | "mypy-extensions": { 92 | "hashes": [ 93 | "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", 94 | "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" 95 | ], 96 | "version": "==0.4.3" 97 | }, 98 | "pathspec": { 99 | "hashes": [ 100 | "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a", 101 | "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1" 102 | ], 103 | "version": "==0.9.0" 104 | }, 105 | "platformdirs": { 106 | "hashes": [ 107 | "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788", 108 | "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19" 109 | ], 110 | "markers": "python_version >= '3.7'", 111 | "version": "==2.5.2" 112 | }, 113 | "tomli": { 114 | "hashes": [ 115 | "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", 116 | "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f" 117 | ], 118 | "markers": "python_full_version < '3.11.0a7'", 119 | "version": "==2.0.1" 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /2022/08 - August/click/converters/cli.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from converters.csv_converter import convert_csv_to_json 4 | 5 | 6 | @click.group(name="converters") 7 | def converters_group(): 8 | """Converter commands.""" 9 | 10 | 11 | @click.command(name="csv") 12 | @click.option( 13 | "--infile", 14 | "-i", 15 | "csv_file_name", 16 | type=click.Path(exists=True, dir_okay=False), 17 | help="specifies the input CSV file", 18 | ) 19 | @click.option( 20 | "--outfile", 21 | "-o", 22 | "out_file_name", 23 | type=click.Path(exists=False, dir_okay=False, writable=True), 24 | help="specifies an output file - note that if the file exists, it will be overwritten", 25 | ) 26 | @click.option( 27 | "--has-headers", 28 | is_flag=True, 29 | default=False, 30 | help="provide this flag to indicate the CSV file has a headers row", 31 | ) 32 | @click.option( 33 | "--columns", 34 | "-c", 35 | "custom_headers", 36 | default=None, 37 | multiple=True, 38 | type=str, 39 | help="specify custom columns names for each column in the CSV input file", 40 | ) 41 | def csv_to_json(csv_file_name, out_file_name, has_headers, custom_headers): 42 | """Specify an input CSV file to convert to JSON. 43 | 44 | If desired, you can provide custom column names using the `--columns` option. 45 | 46 | Note that if you do not specify custom headers using --columns, then you **must** 47 | use the --has-headers flag. 48 | """ 49 | convert_csv_to_json( 50 | csv_file_name=csv_file_name, 51 | out_file_name=out_file_name, 52 | has_headers=has_headers, 53 | custom_headers=custom_headers, 54 | ) 55 | 56 | 57 | converters_group.add_command(csv_to_json) 58 | -------------------------------------------------------------------------------- /2022/08 - August/click/converters/csv_converter.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | 4 | 5 | def _validate_header_count(csv_file_name: str, expected_column_count: int): 6 | with open(csv_file_name) as f: 7 | reader = csv.reader(f) 8 | row = next(reader) 9 | if len(row) != expected_column_count: 10 | raise ValueError("Header count and actual CSV file column mismatch.") 11 | 12 | 13 | def convert_csv_to_json( 14 | csv_file_name: str, 15 | out_file_name: str, 16 | has_headers: bool = False, 17 | custom_headers: list[str] = None, 18 | ): 19 | # If has_headers is False, then custom_headers *must* be provided 20 | if not has_headers and not custom_headers: 21 | raise ValueError("Headers must be provided if CSV does not contain them.") 22 | 23 | # Validate custom_headers count (if specified) 24 | if custom_headers: 25 | _validate_header_count(csv_file_name, len(custom_headers)) 26 | 27 | # Instead of reading the entire CSV file into memory, we'll read 28 | # it in row by row, and output the JSON as the same time - more memory efficient 29 | with open(csv_file_name) as in_file: 30 | with open(out_file_name, "w") as out_file: 31 | csv_data = csv.reader(in_file) 32 | 33 | file_headers = next(csv_data) if has_headers else None 34 | csv_headers = custom_headers or file_headers 35 | 36 | out_file.write("[") 37 | first_element = True 38 | for row in csv_data: 39 | if first_element: 40 | first_element = False 41 | else: 42 | out_file.write(",") 43 | output_dict = dict(zip(csv_headers, row)) 44 | out_file.write(json.dumps(output_dict, indent=0)) 45 | out_file.write("]") 46 | -------------------------------------------------------------------------------- /2022/08 - August/click/data/population.csv: -------------------------------------------------------------------------------- 1 | Country Name,Country Code,2020,2021 2 | Aruba,ABW,106766,107195 3 | Africa Eastern and Southern,AFE,677243299,694665117 4 | Afghanistan,AFG,38928341,39835428 5 | Africa Western and Central,AFW,458803476,470898870 6 | Angola,AGO,32866268,33933611 7 | Albania,ALB,2837849,2811666 8 | Andorra,AND,77265,77354 9 | Arab World,ARB,436080728,444517783 10 | United Arab Emirates,ARE,9890400,9991083 11 | Argentina,ARG,45376763,45808747 12 | Armenia,ARM,2963234,2968128 13 | American Samoa,ASM,55197,55103 14 | Antigua and Barbuda,ATG,97928,98728 15 | Australia,AUS,25693267,25739256 16 | Austria,AUT,8916864,8956279 17 | Azerbaijan,AZE,10093121,10145212 18 | Burundi,BDI,11890781,12255429 19 | Belgium,BEL,11544241,11587882 20 | Benin,BEN,12123198,12451031 21 | Burkina Faso,BFA,20903278,21497097 22 | Bangladesh,BGD,164689383,166303494 23 | Bulgaria,BGR,6934015,6899125 24 | Bahrain,BHR,1701583,1748295 25 | "Bahamas, The",BHS,393248,396914 26 | Bosnia and Herzegovina,BIH,3280815,3263459 27 | Belarus,BLR,9379952,9340314 28 | Belize,BLZ,397621,404915 29 | Bermuda,BMU,63893,63867 30 | Bolivia,BOL,11673029,11832936 31 | Brazil,BRA,212559409,213993441 32 | Barbados,BRB,287371,287708 33 | Brunei Darussalam,BRN,437483,441532 34 | Bhutan,BTN,771612,779900 35 | Botswana,BWA,2351625,2397240 36 | Central African Republic,CAF,4829764,4919987 37 | Canada,CAN,38037204,38246108 38 | Central Europe and the Baltics,CEB,102172351,101669618 39 | Switzerland,CHE,8636561,8697723 40 | Channel Islands,CHI,173859,175244 41 | Chile,CHL,19116209,19212362 42 | China,CHN,1411100000,1412360000 43 | Cote d'Ivoire,CIV,26378275,27053629 44 | Cameroon,CMR,26545864,27224262 45 | "Congo, Dem. Rep.",COD,89561404,92377986 46 | "Congo, Rep.",COG,5518092,5657017 47 | Colombia,COL,50882884,51265841 48 | Comoros,COM,869595,888456 49 | Cabo Verde,CPV,555988,561901 50 | Costa Rica,CRI,5094114,5139053 51 | Caribbean small states,CSS,7442291,7481631 52 | Cuba,CUB,11326616,11317498 53 | Curacao,CUW,154947,152369 54 | Cayman Islands,CYM,65720,66498 55 | Cyprus,CYP,1207361,1215588 56 | Czech Republic,CZE,10697858,10703446 57 | Germany,DEU,83160871,83129285 58 | Djibouti,DJI,988002,1002197 59 | Dominica,DMA,71991,72172 60 | Denmark,DNK,5831404,5856733 61 | Dominican Republic,DOM,10847904,10953714 62 | Algeria,DZA,43851043,44616626 63 | East Asia & Pacific (excluding high income),EAP,2114009483,2122086315 64 | Early-demographic dividend,EAR,3332103561,3373866849 65 | East Asia & Pacific,EAS,2361517682,2368622859 66 | Europe & Central Asia (excluding high income),ECA,400895993,401828885 67 | Europe & Central Asia,ECS,922976036,923753699 68 | Ecuador,ECU,17643060,17888474 69 | "Egypt, Arab Rep.",EGY,102334403,104258327 70 | Euro area,EMU,342708355,342566541 71 | Eritrea,ERI,"","" 72 | Spain,ESP,47363419,47326687 73 | Estonia,EST,1329479,1329254 74 | Ethiopia,ETH,114963583,117876226 75 | European Union,EUU,447479493,446946712 76 | Fragile and conflict affected situations,FCS,930004306,952400963 77 | Finland,FIN,5529543,5541696 78 | Fiji,FJI,896444,902899 79 | France,FRA,67379908,67499343 80 | Faroe Islands,FRO,48865,49053 81 | "Micronesia, Fed. Sts.",FSM,115021,116255 82 | Gabon,GAB,2225728,2278829 83 | United Kingdom,GBR,67081000,67326569 84 | Georgia,GEO,3722716,3708610 85 | Ghana,GHA,31072945,31732128 86 | Gibraltar,GIB,33691,33691 87 | Guinea,GIN,13132792,13497237 88 | "Gambia, The",GMB,2416664,2486937 89 | Guinea-Bissau,GNB,1967998,2015490 90 | Equatorial Guinea,GNQ,1402985,1449891 91 | Greece,GRC,10700556,10664568 92 | Grenada,GRD,112519,113015 93 | Greenland,GRL,56367,56653 94 | Guatemala,GTM,16858333,17109746 95 | Guam,GUM,168783,170184 96 | Guyana,GUY,786559,790329 97 | High income,HIC,1240684527,1241374277 98 | "Hong Kong SAR, China",HKG,7481000,7413100 99 | Honduras,HND,9904608,10062994 100 | Heavily indebted poor countries (HIPC),HPC,823480038,845522272 101 | Croatia,HRV,4047680,3899000 102 | Haiti,HTI,11402533,11541683 103 | Hungary,HUN,9750149,9709886 104 | IBRD only,IBD,4862446431,4895295243 105 | IDA & IBRD total,IBT,6571053159,6642614110 106 | IDA total,IDA,1708606728,1747318867 107 | IDA blend,IDB,574159138,586883422 108 | Indonesia,IDN,273523621,276361788 109 | IDA only,IDX,1134447590,1160435445 110 | Isle of Man,IMN,85032,85410 111 | India,IND,1380004385,1393409033 112 | Not classified,INX,"","" 113 | Ireland,IRL,4985674,5028230 114 | "Iran, Islamic Rep.",IRN,83992953,85028760 115 | Iraq,IRQ,40222503,41179351 116 | Iceland,ISL,366463,372295 117 | Israel,ISR,9215100,9364000 118 | Italy,ITA,59449527,59066225 119 | Jamaica,JAM,2961161,2973462 120 | Jordan,JOR,10203140,10269022 121 | Japan,JPN,126261000,125681593 122 | Kazakhstan,KAZ,18755666,19002586 123 | Kenya,KEN,53771300,54985702 124 | Kyrgyz Republic,KGZ,6579900,6694200 125 | Cambodia,KHM,16718971,16946446 126 | Kiribati,KIR,119446,121388 127 | St. Kitts and Nevis,KNA,53192,53546 128 | "Korea, Rep.",KOR,51836239,51744876 129 | Kuwait,KWT,4270563,4328553 130 | Latin America & Caribbean (excluding high income),LAC,590928198,596217644 131 | Lao PDR,LAO,7275556,7379358 132 | Lebanon,LBN,6825442,6769151 133 | Liberia,LBR,5057677,5180208 134 | Libya,LBY,6871287,6958538 135 | St. Lucia,LCA,183629,184401 136 | Latin America & Caribbean,LCN,652365260,658089208 137 | Least developed countries: UN classification,LDC,1057131013,1081504804 138 | Low income,LIC,683532991,701926973 139 | Liechtenstein,LIE,38137,38254 140 | Sri Lanka,LKA,21919000,22156000 141 | Lower middle income,LMC,3318682068,3363196656 142 | Low & middle income,LMY,6494812232,6566551568 143 | Lesotho,LSO,2142252,2159067 144 | Late-demographic dividend,LTE,2316803603,2322070390 145 | Lithuania,LTU,2794885,2795321 146 | Luxembourg,LUX,630419,639070 147 | Latvia,LVA,1900449,1883162 148 | "Macao SAR, China",MAC,649342,658391 149 | St. Martin (French part),MAF,38659,39239 150 | Morocco,MAR,36910558,37344787 151 | Monaco,MCO,39244,39520 152 | Moldova,MDA,2620495,2573928 153 | Madagascar,MDG,27691019,28427333 154 | Maldives,MDV,540542,543620 155 | Middle East & North Africa,MEA,464542370,472494995 156 | Mexico,MEX,128932753,130262220 157 | Marshall Islands,MHL,59194,59618 158 | Middle income,MIC,5811279241,5864624595 159 | North Macedonia,MKD,2072531,2065092 160 | Mali,MLI,20250834,20855724 161 | Malta,MLT,515332,516869 162 | Myanmar,MMR,54409794,54806014 163 | Middle East & North Africa (excluding high income),MNA,396147843,403051615 164 | Montenegro,MNE,621306,620173 165 | Mongolia,MNG,3278292,3329282 166 | Northern Mariana Islands,MNP,57557,57910 167 | Mozambique,MOZ,31255435,32163045 168 | Mauritania,MRT,4649660,4775110 169 | Mauritius,MUS,1265740,1266060 170 | Malawi,MWI,19129955,19647681 171 | Malaysia,MYS,32365998,32776195 172 | North America,NAC,369602177,370203720 173 | Namibia,NAM,2540916,2587344 174 | New Caledonia,NCL,271960,272620 175 | Niger,NER,24206636,25130810 176 | Nigeria,NGA,206139587,211400704 177 | Nicaragua,NIC,6624554,6702379 178 | Netherlands,NLD,17441500,17533405 179 | Norway,NOR,5379475,5408320 180 | Nepal,NPL,29136808,29674920 181 | Nauru,NRU,10834,10873 182 | New Zealand,NZL,5090200,5122600 183 | OECD members,OED,1372980201,1375820281 184 | Oman,OMN,5106622,5223376 185 | Other small states,OSS,31929881,32464282 186 | Pakistan,PAK,220892331,225199929 187 | Panama,PAN,4314768,4381583 188 | Peru,PER,32971846,33359416 189 | Philippines,PHL,109581085,111046910 190 | Palau,PLW,18092,18174 191 | Papua New Guinea,PNG,8947027,9119005 192 | Poland,POL,37899070,37781024 193 | Pre-demographic dividend,PRE,970795671,997158227 194 | Puerto Rico,PRI,3281538,3263584 195 | "Korea, Dem. People's Rep.",PRK,25778815,25887045 196 | Portugal,PRT,10297081,10299423 197 | Paraguay,PRY,7132530,7219641 198 | West Bank and Gaza,PSE,4803269,4922749 199 | Pacific island small states,PSS,2528958,2566494 200 | Post-demographic dividend,PST,1117278019,1116655196 201 | French Polynesia,PYF,280904,282534 202 | Qatar,QAT,2881060,2930524 203 | Romania,ROU,19257520,19115146 204 | Russian Federation,RUS,144073139,143446060 205 | Rwanda,RWA,12952209,13276517 206 | South Asia,SAS,1856882402,1877902324 207 | Saudi Arabia,SAU,34813867,35340680 208 | Sudan,SDN,43849269,44909351 209 | Senegal,SEN,16743930,17196308 210 | Singapore,SGP,5685807,5453566 211 | Solomon Islands,SLB,686878,703995 212 | Sierra Leone,SLE,7976985,8141343 213 | El Salvador,SLV,6486201,6518500 214 | San Marino,SMR,33938,34010 215 | Somalia,SOM,15893219,16359500 216 | Serbia,SRB,6899126,6844078 217 | Sub-Saharan Africa (excluding high income),SSA,1135948313,1165464785 218 | South Sudan,SSD,11193729,11381377 219 | Sub-Saharan Africa,SSF,1136046775,1165563987 220 | Small states,SST,41901130,42512407 221 | Sao Tome and Principe,STP,219161,223364 222 | Suriname,SUR,586634,591798 223 | Slovak Republic,SVK,5458827,5447247 224 | Slovenia,SVN,2102419,2107007 225 | Sweden,SWE,10353442,10415811 226 | Eswatini,SWZ,1160164,1172369 227 | Sint Maarten (Dutch part),SXM,42310,42846 228 | Seychelles,SYC,98462,99202 229 | Syrian Arab Republic,SYR,17500657,18275704 230 | Turks and Caicos Islands,TCA,38718,39226 231 | Chad,TCD,16425859,16914985 232 | East Asia & Pacific (IDA & IBRD countries),TEA,2088186305,2096155040 233 | Europe & Central Asia (IDA & IBRD countries),TEC,462100263,462624055 234 | Togo,TGO,8278737,8478242 235 | Thailand,THA,69799978,69950844 236 | Tajikistan,TJK,9537642,9749625 237 | Turkmenistan,TKM,6031187,6117933 238 | Latin America & the Caribbean (IDA & IBRD countries),TLA,636492840,642239838 239 | Timor-Leste,TLS,1318442,1343875 240 | Middle East & North Africa (IDA & IBRD countries),TMN,391344574,398128866 241 | Tonga,TON,105697,106759 242 | South Asia (IDA & IBRD),TSA,1856882402,1877902324 243 | Sub-Saharan Africa (IDA & IBRD countries),TSS,1136046775,1165563987 244 | Trinidad and Tobago,TTO,1399491,1403374 245 | Tunisia,TUN,11818618,11935764 246 | Turkiye,TUR,84339067,85042736 247 | Tuvalu,TUV,11792,11925 248 | Tanzania,TZA,59734213,61498438 249 | Uganda,UGA,45741000,47123533 250 | Ukraine,UKR,44132049,43814581 251 | Upper middle income,UMC,2492597173,2501427939 252 | Uruguay,URY,3473727,3485152 253 | United States,USA,331501080,331893745 254 | Uzbekistan,UZB,34232050,34915100 255 | St. Vincent and the Grenadines,VCT,110947,111269 256 | "Venezuela, RB",VEN,28435943,28704947 257 | British Virgin Islands,VGB,30237,30423 258 | Virgin Islands (U.S.),VIR,106290,105870 259 | Vietnam,VNM,97338583,98168829 260 | Vanuatu,VUT,307150,314464 261 | World,WLD,7763932702,7836630792 262 | Samoa,WSM,198410,200144 263 | Kosovo,XKX,1790133,1806279 264 | "Yemen, Rep.",YEM,29825968,30490639 265 | South Africa,ZAF,59308690,60041996 266 | Zambia,ZMB,18383956,18920657 267 | Zimbabwe,ZWE,14862927,15092171 -------------------------------------------------------------------------------- /2022/08 - August/click/data/sample.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "name": "John von Neumann", 4 | "placeOfBirth": "Hungary", 5 | "bornYear": 1903 6 | }, 7 | { 8 | "name": "Isaac Newton", 9 | "placeOfBirth": "United Kingdom", 10 | "bornYear": 1643 11 | }, 12 | { 13 | "name": "Richard Feynman", 14 | "placeOfBirth": "United States", 15 | "bornYear": 1918 16 | }, 17 | { 18 | "name": "Albert Einstein", 19 | "placeOfBirth": "Germany", 20 | "bornYear": 1879 21 | }, 22 | { 23 | "name": "Subrahmanyan Chandrasekhar", 24 | "placeOfBirth": "Pakistan", 25 | "bornYear": 1910 26 | } 27 | ] -------------------------------------------------------------------------------- /2022/08 - August/click/main.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | import click 4 | 5 | from converters.cli import converters_group 6 | from viewers import viewers_group 7 | 8 | 9 | # first create a command group - this will be the top level CLI 10 | @click.group 11 | def main_cli(): 12 | pass 13 | 14 | 15 | @click.command 16 | def date(): 17 | click.echo(datetime.utcnow().date().isoformat()) 18 | 19 | 20 | @click.command 21 | def time(): 22 | click.echo(datetime.utcnow().time().isoformat()) 23 | 24 | 25 | @click.command(name="click-docs") 26 | def view_click_docs(): 27 | """View the click library documentation""" 28 | 29 | """ 30 | Since our decorator provides a `name` argument, the group name 31 | will be that, otherwise it would default to the function name 32 | with underscores replaced by dashes (it is customary for 33 | commands to use dashes to separate words, not underscores) 34 | 35 | If you do not wish to override the `name`, you can omit that (and the parentheses) 36 | from the decorator altogether e.g. 37 | 38 | Writing this: 39 | 40 | ```python 41 | @click.group 42 | def click_docs(): 43 | pass 44 | ``` 45 | 46 | would achieve the same thing as we have above. 47 | 48 | Writing this: 49 | 50 | ```python 51 | @click.group 52 | def view_click_docs(): 53 | pass 54 | ``` 55 | 56 | would result in a command group named `viewers-click-docs` 57 | """ 58 | click.launch("https://click.palletsprojects.com/") 59 | 60 | 61 | # add commands to CLI root 62 | main_cli.add_command(date) 63 | main_cli.add_command(time) 64 | main_cli.add_command(view_click_docs) 65 | 66 | # add command groups to CLI root 67 | main_cli.add_command(viewers_group) 68 | main_cli.add_command(converters_group) 69 | 70 | 71 | # if you do not use setuptools, you'll need to uncomment this code, 72 | # and invoke the CLI as explained in the README (better yet, check out the video in my YouTube 73 | # channel - https://www.youtube.com/channel/UCOsGw17tMhM4-GBjvQnXGzQ 74 | 75 | # if __name__ == '__main__': 76 | # main_cli() 77 | -------------------------------------------------------------------------------- /2022/08 - August/click/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | 4 | [tool.isort] 5 | profile = "black" -------------------------------------------------------------------------------- /2022/08 - August/click/requirements.txt: -------------------------------------------------------------------------------- 1 | # this project was created using Python 3.10 - YMMV if using a different version. 2 | 3 | click 4 | setuptools 5 | tabulate 6 | isort 7 | black -------------------------------------------------------------------------------- /2022/08 - August/click/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name="my-super-duper-cli", 5 | version="0.1.0", 6 | py_modules=["main"], 7 | install_requires=["click", "tabulate"], 8 | entry_points={"console_scripts": ["cli = main:main_cli"]}, 9 | ) 10 | -------------------------------------------------------------------------------- /2022/08 - August/click/viewers/__init__.py: -------------------------------------------------------------------------------- 1 | import click 2 | 3 | from viewers.csv_viewer import preview_csv 4 | from viewers.enums import TableFormat 5 | from viewers.json_viewer import preview_json 6 | 7 | 8 | @click.group(name="viewers") 9 | def viewers_group(): 10 | """CLI commands for viewing CSV and JSON files""" 11 | 12 | 13 | @click.command(name="json") 14 | @click.argument("file_name", type=click.Path(exists=True, dir_okay=False)) 15 | @click.option( 16 | "--numlines", # this will be the argument name used in the function being decorated 17 | "-n", 18 | default=None, 19 | type=click.IntRange(1), 20 | help="Number of lines of the JSON object to display, or omit to view the entire file", 21 | ) 22 | @click.option( 23 | "-i", 24 | "--indent", 25 | default=4, 26 | type=click.IntRange(0), 27 | help="Specifies the indentation level for viewing the JSON object", 28 | ) 29 | def view_json(file_name, numlines, indent): 30 | """Use FILE_NAME to Specify a path to the JSON file you wish to preview""" 31 | result = preview_json(file_name=file_name, first_n=numlines, indent=indent) 32 | click.echo(result) 33 | 34 | 35 | @click.command(name="csv") 36 | @click.argument("file_name", type=click.Path(exists=True, dir_okay=False)) 37 | @click.option( 38 | "--has-header", 39 | "has_headers", 40 | is_flag=True, 41 | default=False, 42 | help="Specify this flag is the CSV file has a header row", 43 | ) 44 | @click.option( 45 | "--numlines", 46 | "-n", 47 | "line_count", 48 | default=None, 49 | type=click.IntRange(1), 50 | help="Number of rows to display (excluding header row, if any)", 51 | ) 52 | @click.option( 53 | "--format", 54 | "-f", 55 | "format_", # actually need this format is a Python built-in function 56 | default=TableFormat.fancy_outline.name, 57 | type=click.Choice([e.name for e in TableFormat], case_sensitive=True), 58 | help="Specify the formatting style", 59 | ) 60 | def view_csv(file_name, line_count, has_headers, format_): 61 | """View CSV files""" 62 | format_ = TableFormat[format_] 63 | result = preview_csv( 64 | file_name=file_name, first_n=line_count, has_header_row=has_headers, table_format=format_ 65 | ) 66 | click.echo(result) 67 | 68 | 69 | viewers_group.add_command(view_json) 70 | viewers_group.add_command(view_csv) 71 | -------------------------------------------------------------------------------- /2022/08 - August/click/viewers/csv_viewer.py: -------------------------------------------------------------------------------- 1 | import csv 2 | from itertools import islice 3 | 4 | from tabulate import tabulate 5 | 6 | from viewers.enums import TableFormat 7 | 8 | 9 | def preview_csv( 10 | file_name: str, 11 | first_n: int = None, 12 | has_header_row: bool = False, 13 | table_format: TableFormat = TableFormat.fancy_outline, 14 | ) -> str: 15 | with open(file_name) as f: 16 | data = csv.reader(f) 17 | 18 | if has_header_row: 19 | headers = next(data) 20 | else: 21 | headers = None 22 | 23 | if first_n: 24 | rows = list(islice(data, first_n)) 25 | else: 26 | rows = list(data) 27 | 28 | if headers: 29 | return tabulate(rows, headers=headers, tablefmt=table_format.value) 30 | else: 31 | return tabulate(rows) 32 | -------------------------------------------------------------------------------- /2022/08 - August/click/viewers/enums.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class TableFormat(Enum): 5 | fancy_grid = "fancy_grid" 6 | fancy_outline = "fancy_outline" 7 | github = "github" 8 | grid = "grid" 9 | html = "html" 10 | jira = "jira" 11 | latex = "latex" 12 | latex_booktabs = "latex_booktabs" 13 | latex_longtable = "latex_longtable" 14 | latex_raw = "latex_raw" 15 | mediawiki = "mediawiki" 16 | moinmoin = "moinmoin" 17 | orgtbl = "orgtbl" 18 | pipe = "pipe" 19 | plain = "plain" 20 | presto = "presto" 21 | pretty = "pretty" 22 | psql = "psql" 23 | rst = "rst" 24 | simple = "simple" 25 | textile = "textile" 26 | tsv = "tsv" 27 | unsafehtml = "unsafehtml" 28 | youtrack = "youtrack2" 29 | -------------------------------------------------------------------------------- /2022/08 - August/click/viewers/json_viewer.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def preview_json(file_name: str, first_n: int = None, indent: int = 2) -> str: 5 | with open(file_name) as f: 6 | data = json.load(f) 7 | 8 | formatted_data = json.dumps(data, indent=indent) 9 | 10 | if indent == 0: 11 | result = formatted_data.replace("\n", "") 12 | else: 13 | formatted_data = formatted_data.split("\n") 14 | if first_n: 15 | formatted_data = formatted_data[:first_n] 16 | result = "\n".join(formatted_data) 17 | return result 18 | -------------------------------------------------------------------------------- /2022/10 - October/dotenv/README.md: -------------------------------------------------------------------------------- 1 | # The `python-dotenv` Library 2 | 3 | ## Installation and Documentation 4 | The project can be found [here](https://github.com/theskumar/python-dotenv). 5 | 6 | You will need to pip install `python-dotenv` to use this library 7 | 8 | ## For those using `pipenv` 9 | If you use `pipenv`, that system already supports using a `.env` file to automatically load environment 10 | variables defined in a `.env` file when the virtual environment is activated. However, not everyone 11 | uses `pipenv` for their virtual environments, and this is where `python-dotenv` comes in useful. 12 | 13 | For this demo I am therefore **not** going to use `pipenv`, but instead use Python's built-in virtual environments: 14 | ```bash 15 | python3.10 -m venv .venv 16 | ``` 17 | Then activate the environment: 18 | ```bash 19 | source .venv/bin/activate 20 | ``` 21 | And finally install the library: 22 | ```bash 23 | pip install -r requirements.txt 24 | ``` 25 | 26 | We can check that the library was installed: 27 | ```bash 28 | pip freeze | grep dot 29 | ``` 30 | 31 | ## Overview 32 | Often we use environment variables when our Python applications are deployed to production 33 | systems to configure a variety of settings used by our apps. 34 | 35 | Typically various mechanisms are used in CI/CD pipelines to populate those environment variables. 36 | 37 | When we are running the system locally however, we need to make sure that these environment variables 38 | exist prior to us running the app. Certain IDEs allow you to configure those environment variables, 39 | or you could use some other mechanism (such as a Makefile, or specifying the env variables on the command line 40 | that runs your Python app), but a simpler way is to use the `python-dotenv` library. 41 | 42 | This library basically "injects" environment variables defined in a `.env` file in your project root. 43 | 44 | The advantage of using a file to store the environment variables is that it is easy to add to `git` (or other 45 | versioning system), although if certain secrets (such as API keys) are defined in your `.env` file you may not want 46 | to include those in the repo. 47 | 48 | Typically the way I approach this is to create two files: 49 | 50 | - `env-template`: a file that has the basic environment variables, but omits any sensitive values, for example: 51 | ```txt 52 | SWAPI_BASE_URL=https://swapi.dev/api 53 | SECRET_1= 54 | SECRET_2= 55 | ``` 56 | - `.env`: a file that has all the environment variables populated, including the secrets, for example: 57 | ```txt 58 | SWAPI_BASE_URL=https://swapi.dev/api 59 | SECRET_1=secret_value_1 60 | SECRET_2=secret_value_2 61 | ``` 62 | 63 | I then **include** `env_template` in the git repo, but **exclude** (aka **ignore**) the `.env` file. That way I can 64 | let other people know what the `.env` file should look like, but I do not run the risk of accidentally including 65 | my `.env` file with secrets populated. (For this demo I am including the `.env` file in the repo, but in practice 66 | I do not). 67 | 68 | Once we have this in place, we can use `python-dotenv` to "inject" the environment variables from the `.env` file, 69 | and access them in Python the usual way (`os.getenv`), for example: 70 | 71 | ```python 72 | import os 73 | from dotenv import load_dotenv 74 | 75 | load_dotenv() # this "injects" the environment variables located in .env 76 | 77 | swapi_base_url = os.getenv('SWAPI_BASE_URL') 78 | secret_1 = os.getenv('SECRET_1') 79 | secret_2 = os.getenv('SECRET_2') 80 | ``` 81 | 82 | One thing to note is that we now have several ways to define environment variables: 83 | - in the `.env` file 84 | - as regular environment variables in our terminal session 85 | - as command line arguments, e.g. 86 | ```bash 87 | SWAPI_BASE_URL=https://swapi.dev/api SECRET_1=100 SECRET_2=200 python main.py 88 | ``` 89 | 90 | What happens when we mix these different ways of defining env vars? 91 | 92 | The command line version overrides any pre-existing standard environment variable (so if you have an env var 93 | defined in your terminal session, the command line definition will override it). 94 | 95 | Any value in `.env` will be **overridden** by a standard env var definition, and in turn will be overridden 96 | by the command line definition. 97 | 98 | So the **priority order** in which environment variables are picked up is: 99 | 1. command line argument 100 | 2. session environment variable 101 | 3. `.env` file. 102 | 103 | 104 | For example, if you have this scenario: 105 | - `MY_VAR_1=100` in your terminal session 106 | - `MY_VAR_1=200` in your `.env` file 107 | then once you load `.env` using `load_dotenv()`, the value of the env var `MY_VAR_1` will be `100`. 108 | 109 | And if you have this: 110 | - `MY_VAR_1=100` in your terminal session 111 | - `MY_VAR_1=200` in your `.env` file 112 | - run your script using `MY_VAR_1=300 python myscript.py` 113 | then once you load `.env` using `load_dotenv()`, the value of the env var `MY_VAR_1` will be `300`. 114 | 115 | 116 | ## More Advanced Features 117 | There are few more tricks up `python-dotenv`'s sleeve. One very useful one in particular is the ability 118 | for it to do variable interpolation. 119 | 120 | For example, we could define an `.env` file this way: 121 | ```text 122 | BASE_URL=https://swapi.dev/api 123 | FILMS_URL=${BASE_URL}/films 124 | ``` 125 | 126 | When the `.env` file is loaded, `FILMS_URL` will actually be `https://swapi.dev/api/films` 127 | 128 | ## Trying it out 129 | To try all this out, we'll run `main.py` in different ways. 130 | 131 | First, let's make sure those environment variables are not defined in our session: 132 | ```bash 133 | env | grep 'SECRET\|SWAPI' 134 | ``` 135 | 136 | First, we'll run `main.py` "normally": 137 | ```bash 138 | python main.py 139 | ``` 140 | 141 | Notice the results: 142 | ```text 143 | SWAPI_BASE_URL: https://swapi.dev/api 144 | SWAPI_FILMS: https://swapi.dev/api/films 145 | SECRET_1: 100 146 | SECRET_2: 200 147 | ``` 148 | 149 | Also notice that the environment variables are still not defined in our session. 150 | 151 | 152 | Next, we are going to set `SECRET_1` to some specific value in our current terminal session: 153 | ```bash 154 | export SECRET_1=abcdefg 155 | ``` 156 | and let's check that the variable is set in our session: 157 | ```bash 158 | env | grep 'SECRET\|SWAPI' 159 | ``` 160 | 161 | If we run 162 | ```bash 163 | python main.py 164 | ``` 165 | 166 | we get this output: 167 | ```text 168 | SWAPI_BASE_URL: https://swapi.dev/api 169 | SWAPI_FILMS: https://swapi.dev/api/films 170 | SECRET_1: abcdefg 171 | SECRET_2: 200 172 | ``` 173 | 174 | You'll notice that `SECRET_1` is set to the value we defined in our session environment variable. 175 | 176 | Finally, with that session variable set, let's override it using a command line arg when we run our script: 177 | ```bash 178 | SECRET_1=tuvwxyz python main.py 179 | ``` 180 | 181 | We get this output: 182 | ```text 183 | SWAPI_BASE_URL: https://swapi.dev/api 184 | SWAPI_FILMS: https://swapi.dev/api/films 185 | SECRET_1: tuvwxyz 186 | SECRET_2: 200 187 | ``` 188 | 189 | and you'll notice that `SECRET_1` now has the value we specified on the command line. 190 | -------------------------------------------------------------------------------- /2022/10 - October/dotenv/env-template: -------------------------------------------------------------------------------- 1 | SWAPI_BASE_URL=https://swapi.dev/api 2 | SWAPI_FILMS=${SWAPI_BASE_URL}/films 3 | SECRET_1= 4 | SECRET_2= 5 | -------------------------------------------------------------------------------- /2022/10 - October/dotenv/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | 5 | 6 | load_dotenv() 7 | 8 | print('SWAPI_BASE_URL: ', os.getenv('SWAPI_BASE_URL')) 9 | print('SWAPI_FILMS: ', os.getenv('SWAPI_FILMS')) 10 | print('SECRET_1: ', os.getenv('SECRET_1')) 11 | print('SECRET_2: ', os.getenv('SECRET_2')) 12 | 13 | -------------------------------------------------------------------------------- /2022/10 - October/dotenv/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv -------------------------------------------------------------------------------- /2022/10 - October/flake8/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 80 3 | 4 | # McCabe complexity 5 | max-complexity = 11 6 | 7 | # B901: return statements in generators - in Python 3 that's allowed. 8 | # E203: 9 | # E226: 10 | # E302: 11 | # E722: duplicate of B001, so can be excluded 12 | # S101 13 | # S311 14 | # I004 15 | # N818 16 | # T002 17 | # T003 18 | extend-ignore=B901,E203,E226,E302,E722,S101,S311,I004,N818,T002,T003 19 | -------------------------------------------------------------------------------- /2022/10 - October/flake8/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | black = "*" 10 | isort = "*" 11 | flake8 = "*" 12 | flake8-bugbear = "*" 13 | flake8-comprehensions = "*" 14 | flake8-implicit-str-concat = "*" 15 | pep8-naming = "*" 16 | flake8-builtins = "*" 17 | flake8-bandit = "*" 18 | flake8-eradicate = "*" 19 | flake8-print = "*" 20 | 21 | [requires] 22 | python_version = "3.10" 23 | -------------------------------------------------------------------------------- /2022/10 - October/flake8/README.md: -------------------------------------------------------------------------------- 1 | # The `flake8` Library 2 | 3 | ## Introduction 4 | > Note: Please treat this as an independent project - it has its own `requirements.txt` file (for `pip` installing), 5 | > and a corresponding `Pipfile` (for `pipenv` installations). 6 | 7 | `flake8` is a tool for Python style enforcements according to conventions generally agreed upon by Python developers 8 | worldwide. But it is also more than just style - it can help you find bugs in your code, possible security issues, bad 9 | design, etc. 10 | 11 | We're going to add `flake8` to our existing `black` and `isort` tools that I covered in a previous video. 12 | 13 | The `flake8` system has a base system, and a whole slew of extensions, or **plugins** using its terminology, that 14 | extend the checks that `flake8` can perform. 15 | 16 | When you run `flake8` against your code, any "violation" is reported using a code- these are generally numbers with 17 | a letter prefix such as `F401`, `E201`, etc. In general every plugin (built-in, or added) has it's own, unique, 18 | character prefix (sometimes more than one character). 19 | 20 | The documentation for `flake8` can be found [here](https://flake8.pycqa.org/en/latest/#). 21 | 22 | ## Plugins 23 | By default, `flake8` includes the `pycodestyle`, `pyflakes` and `mccabe` checkers: 24 | - [pycodestyle](https://pycodestyle.pycqa.org/en/latest/) - largely style (pep8) related 25 | - [pyflakes](https://github.com/PyCQA/pyflakes) - things like unused imports, duplicate function args, etc 26 | - [mccabe](https://github.com/pycqa/mccabe) - a measure of code complexity 27 | 28 | You can read more about the McCabe cyclomatic complexity [here](https://en.wikipedia.org/wiki/Cyclomatic_complexity) - 29 | it can be a tough read, but basically think of it as something that will warn you if your code is too complex 30 | (not necessarily hard to read, just too much complexity, usually caused by too many levels of nesting) 31 | 32 | We're also going to add a few plugins, the ones that I typically use on all my Python projects - feel free to change 33 | it up as fits your needs - but really ask yourself **why** you would **not** want to perform these checks! I'm not 34 | saying you absolutely have to, I'm saying think about why you would want or not want certain checks in place - and 35 | this may very well vary from project to project. 36 | 37 | 38 | - [bugbear](https://github.com/PyCQA/flake8-bugbear): can find a variety of bugs and design problems 39 | - [flake8-comprehensions](https://github.com/adamchainz/flake8-comprehensions): helps improve your comprehension code 40 | - [flake8-implicit-str-concat](https://github.com/flake8-implicit-str-concat/flake8-implicit-str-concat): this one helps 41 | with implicit string concatenation - I use mainly because `black` can introduce some weirdness when reformatting a 42 | string literal that was written over several lines, but would actually fit on a single line, e.g. 43 | ```python 44 | a = ( 45 | "this is a string literal " 46 | "that would fit on a single line" 47 | ) 48 | ``` 49 | will be changed to: 50 | ```python 51 | a = "this is a string literal " "that would fit on a single line" 52 | ``` 53 | This `flake8` extension will identify those kinds of issues that may arise after running `black`. 54 | 55 | - [pep8-naming](https://github.com/PyCQA/pep8-naming): Checks pep8 naming conventions 56 | - [flake8-builtins](https://pypi.org/project/flake8-builtins/): Makes sure you don't accidentally use a Python builtin 57 | for variable or parameter names 58 | - [flake8-bandit](https://github.com/tylerwince/flake8-bandit): Runs the `bandit` tool set against your Python 59 | code - more info on `bandit` is [here](https://bandit.readthedocs.io/en/latest/). This toolset can identify common 60 | security issues in your Python code. 61 | - [flake8-eradicate](https://github.com/wemake-services/flake8-eradicate): warns you of any commented code - for larger 62 | code projects, you should be using some source control system, such as git - you do not need to comment code out, 63 | instead delete it and use version control to retain the history of your code. 64 | - [flake8-print](https://github.com/jbkahn/flake8-print): checks for print statements in your code (production code 65 | usually should not contain print statements) 66 | 67 | ## Violation Codes 68 | Here you'll find links to pages that list and describe the various violation codes, both for `flake` itself, as 69 | well as the additional plugins we're using. 70 | - `flake8` pre-installed: 71 | - `pycodestyle` - Exxx, Wxxx 72 | - `pyflakes` - Fxxx 73 | - `mccabe` - C901 74 | - [codes list](https://www.flake8rules.com/) 75 | 76 | - `flake8-builtins`: 77 | - `Axxx` 78 | - No formal docs for these codes, but from the 79 | [source](https://github.com/gforcada/flake8-builtins/blob/master/flake8_builtins.py) we see there are 80 | only three codes: 81 | - `A001` (variable shadows a built-in) 82 | - `A002` (callable parameter name shadows a built-in) 83 | - `A003` (class attribute shadows a built-in) 84 | - `flake8-bugbear`: 85 | - `Bxxx` 86 | - [codes list](https://github.com/PyCQA/flake8-bugbear) 87 | - `flake8-comprehensions` 88 | - `Cxxx` (except C901, McCabe complexity which is reported by `flake8` itself) 89 | - [codes list](https://github.com/adamchainz/flake8-comprehensions) 90 | - `flake8-eradicate` 91 | - only one: `E800` (found commented code) 92 | - [codes list](https://github.com/wemake-services/flake8-eradicate) 93 | - `flake8-implicit-str-concat` 94 | - `ISCxxx` 95 | - [codes list](https://github.com/flake8-implicit-str-concat/flake8-implicit-str-concat) 96 | - `flake8-naming` 97 | - `Nxxx` 98 | - [codes list](https://github.com/PyCQA/pep8-naming) 99 | - `flake8-bandit` 100 | - `Sxxx` 101 | - Technically bandit uses codes in the range `Bxxx`, but since that code is already taken by bugbear, it is 102 | remapped to `Sxxx` 103 | - [codes list](https://bandit.readthedocs.io/en/latest/plugins/index.html#complete-test-plugin-listing) 104 | - `flake8-print` 105 | - `Txxx` 106 | - [codes list](https://github.com/jbkahn/flake8-print) 107 | 108 | 109 | ## Other Plugins 110 | There are many articles and blog posts around people's favorite `flake8` extensions - just do some web searches 111 | and you're bound to find some. 112 | 113 | This particular repo 114 | ([https://github.com/DmytroLitvinov/awesome-flake8-extensions](https://github.com/DmytroLitvinov/awesome-flake8-extensions)) 115 | has a fairly comprehensive list - but before you decide to use one of the extensions, just make sure they are 116 | (a) relatively popular, and (b) are still actively being developed. 117 | 118 | 119 | ## Installation and Configuration 120 | To install everything for this video we'll pip install the following: 121 | ```bash 122 | pip install flake8 123 | pip install flake8-bugbear 124 | pip install flake8-comprehensions 125 | pip install flake8-implicit-str-concat 126 | pip install pep8-naming 127 | pip install flake8-builtins 128 | pip install flake8-bandit 129 | pip install flake8-eradicate 130 | pip install flake8-print 131 | ``` 132 | 133 | We'll create the same configuration as before for `black` and `isort`, in the `pyproject.toml` file. 134 | 135 | (I'm setting my line length to 80 to ensure I have code formatted to a shorter line length as I have to make the font 136 | larger for the videos, feel free to set it to whatever you want.) 137 | 138 | 139 | We can also configure `flake8`, but it will not work with our `pyproject.toml` file, so we'll create a file 140 | named `.flake8` which will contain the configs for flake. 141 | 142 | I'll explain what I have in my `.flake8` config file in the video, as these are the most common config options, but 143 | there are many more configuration options available, 144 | documented [here](https://flake8.pycqa.org/en/latest/user/configuration.html) and 145 | [here](https://flake8.pycqa.org/en/latest/user/options.html#options-list). 146 | 147 | 148 | ## Running `flake8` 149 | In this repo you will find a Python file (called `bad_code.py`, and a copy of it in `bad_code_original.py`) that 150 | contains a slew of issues - the code works, but the style is terrible, and should be refactored - `black`, `isort` 151 | and `flake8` will help us get there. 152 | 153 | To run `flake8` on all your files simply run this from the command line: 154 | ```bash 155 | flake8 156 | ``` 157 | 158 | You can target specific files also: 159 | ```bash 160 | flake8 ./bad_code.py 161 | ``` 162 | 163 | (You can also specify multiple files, or even paths if you prefer.) 164 | 165 | 166 | ### Steps 167 | 168 | 1. Let's start by running `flake8` just to get an idea of what's wrong: `flake8 bad_code.py` 169 | 2. Now let's use black and isort to clean things up: 170 | 1. `black ./bad_code.py` 171 | 2. `isort ./bad_code.py` 172 | 173 | 3. That eliminated quite a few linter issues, but now we have to do the rest by hand - just tackle one at a time. 174 | 175 | > Note: When refactoring code that is already in "production", best practice is to ensure you have full unit 176 | > test coverage for all the code you are refactoring - that way, you can refactor and be (fairly) certain your changes 177 | > did not break the existing functionality. How would you know otherwise? :-) 178 | -------------------------------------------------------------------------------- /2022/10 - October/flake8/bad_code.bak: -------------------------------------------------------------------------------- 1 | """This is an example that shows poorly written Python code""" 2 | import csv 3 | import json 4 | import pathlib 5 | import random 6 | 7 | 8 | def make_TempDirectory(dirName): 9 | if pathlib.Path("./" + dirName).exists(): 10 | # dir already exists, no need to do anything 11 | pass 12 | else: 13 | pathlib.Path("./" + dirName).mkdir() 14 | 15 | 16 | def create_csv_file(fileName, cols, rows, types=[]): 17 | try: 18 | f = open(temp_dir + "/" + fileName, "w") 19 | except: 20 | return 21 | if not types: 22 | types = [float, float, str] 23 | 24 | for i in range(rows): 25 | row = list( 26 | str(types[j % len(types)](random.random())) for j in range(cols) 27 | ) 28 | f.write(",".join(row)) 29 | f.write("\n") 30 | f.close() 31 | 32 | 33 | class GildedRose(object): 34 | def __init__(self, items): 35 | self.items = items 36 | 37 | def update_quality(self): 38 | for item in self.items: 39 | if ( 40 | item.name != "Aged Brie" 41 | and item.name != "Backstage passes to a TAFKAL80ETC concert" 42 | ): 43 | if item.quality > 0: 44 | if item.name != "Sulfuras, Hand of Ragnaros": 45 | item.quality = item.quality - 1 46 | else: 47 | if item.quality < 50: 48 | item.quality = item.quality + 1 49 | if item.name == "Backstage passes to a TAFKAL80ETC concert": 50 | if item.sell_in < 11: 51 | if item.quality < 50: 52 | item.quality = item.quality + 1 53 | if item.sell_in < 6: 54 | if item.quality < 50: 55 | item.quality = item.quality + 1 56 | if item.name != "Sulfuras, Hand of Ragnaros": 57 | item.sell_in = item.sell_in - 1 58 | if item.sell_in < 0: 59 | if item.name != "Aged Brie": 60 | if item.name != "Backstage passes to a TAFKAL80ETC concert": 61 | if item.quality > 0: 62 | if item.name != "Sulfuras, Hand of Ragnaros": 63 | item.quality = item.quality - 1 64 | else: 65 | item.quality = item.quality - item.quality 66 | else: 67 | if item.quality < 50: 68 | item.quality = item.quality + 1 69 | 70 | 71 | class Item: 72 | def __init__(self, name, sell_in, quality): 73 | self.name = name 74 | self.sell_in = sell_in 75 | self.quality = quality 76 | 77 | def __repr__(self): 78 | return "%s, %s, %s" % (self.name, self.sell_in, self.quality) 79 | 80 | 81 | # program 82 | if __name__ == "__main__": 83 | const = ( 84 | "a string" 85 | "written " 86 | "over multiple lines " 87 | "that could fit on one" 88 | ) 89 | temp_dir = "temp" 90 | make_TempDirectory(temp_dir) 91 | create_csv_file("test.csv", 3, 10) 92 | -------------------------------------------------------------------------------- /2022/10 - October/flake8/bad_code.py: -------------------------------------------------------------------------------- 1 | """This is an example that shows poorly written Python code""" 2 | import csv 3 | import json 4 | import pathlib 5 | import random 6 | 7 | 8 | def make_TempDirectory(dirName): 9 | if pathlib.Path("./" + dirName).exists(): 10 | # dir already exists, no need to do anything 11 | pass 12 | else: 13 | pathlib.Path("./" + dirName).mkdir() 14 | 15 | 16 | def create_csv_file(fileName, cols, rows, types=[]): 17 | try: 18 | f = open(temp_dir + "/" + fileName, "w") 19 | except: 20 | return 21 | if not types: 22 | types = [float, float, str] 23 | 24 | for i in range(rows): 25 | row = list( 26 | str(types[j % len(types)](random.random())) for j in range(cols) 27 | ) 28 | f.write(",".join(row)) 29 | f.write("\n") 30 | f.close() 31 | 32 | 33 | class GildedRose(object): 34 | def __init__(self, items): 35 | self.items = items 36 | 37 | def update_quality(self): 38 | for item in self.items: 39 | if ( 40 | item.name != "Aged Brie" 41 | and item.name != "Backstage passes to a TAFKAL80ETC concert" 42 | ): 43 | if item.quality > 0: 44 | if item.name != "Sulfuras, Hand of Ragnaros": 45 | item.quality = item.quality - 1 46 | else: 47 | if item.quality < 50: 48 | item.quality = item.quality + 1 49 | if item.name == "Backstage passes to a TAFKAL80ETC concert": 50 | if item.sell_in < 11: 51 | if item.quality < 50: 52 | item.quality = item.quality + 1 53 | if item.sell_in < 6: 54 | if item.quality < 50: 55 | item.quality = item.quality + 1 56 | if item.name != "Sulfuras, Hand of Ragnaros": 57 | item.sell_in = item.sell_in - 1 58 | if item.sell_in < 0: 59 | if item.name != "Aged Brie": 60 | if item.name != "Backstage passes to a TAFKAL80ETC concert": 61 | if item.quality > 0: 62 | if item.name != "Sulfuras, Hand of Ragnaros": 63 | item.quality = item.quality - 1 64 | else: 65 | item.quality = item.quality - item.quality 66 | else: 67 | if item.quality < 50: 68 | item.quality = item.quality + 1 69 | 70 | 71 | class Item: 72 | def __init__(self, name, sell_in, quality): 73 | self.name = name 74 | self.sell_in = sell_in 75 | self.quality = quality 76 | 77 | def __repr__(self): 78 | return "%s, %s, %s" % (self.name, self.sell_in, self.quality) 79 | 80 | 81 | # program 82 | if __name__ == "__main__": 83 | const = ( 84 | "a string" 85 | "written " 86 | "over multiple lines " 87 | "that could fit on one" 88 | ) 89 | temp_dir = "temp" 90 | make_TempDirectory(temp_dir) 91 | create_csv_file("test.csv", 3, 10) 92 | -------------------------------------------------------------------------------- /2022/10 - October/flake8/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 80 3 | 4 | [tool.isort] 5 | profile = "black" -------------------------------------------------------------------------------- /2022/10 - October/flake8/requirements.txt: -------------------------------------------------------------------------------- 1 | black 2 | flake8 3 | flake8-bandit 4 | flake8-bugbear 5 | flake8-builtins 6 | flake8-comprehensions 7 | flake8-eradicate 8 | flake8-implicit-str-concat 9 | flake8-print 10 | isort 11 | pep8-naming 12 | -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/README.md: -------------------------------------------------------------------------------- 1 | # Distributed Computations with Python and Redis 2 | 3 | In this post I want to show you one frequent way I use a queue mechanism to distribute work 4 | in a highly scalable way. 5 | 6 | I often get questions about using sub processes in Python to scale applications. Although 7 | this is definitely an option, it is not one I use (I don't use multi threading either, but that's for 8 | another discussion!). So, yes, sub processes can scale to multiple cores or CPUs on your computer, but it is limited 9 | to a **single** computer. 10 | 11 | Most problems that need scaling these days will need to scale beyond just a single computer - maybe not in the 12 | short term, but eventually, as our data sets grow ever larger and larger, scaling to a single computer, even with 16 13 | or 32 cores is not going to be enough. In that case, we have to find a solution that can scale across 14 | multiple computers, and this would mean refactoring a lot of the code that I would have had to put in place for 15 | multiprocessing. 16 | 17 | The method I use (almost always) involves setting up data processing pipelines using message queues. 18 | 19 | The idea is extremely simple. 20 | 21 | 1. A "main" program kicks off the entire process, and eventually "divides" the concurrent work to be done into 22 | chunks that are put into a queue. The queue items, or *messages*, essentially contain all the information they need to 23 | define the inputs for the work that has to be done (it might even be links to records in a database, but the 24 | idea is the same - each item in the queue contains the information needed to complete the required work) 25 | 2. I create a second application which simply takes items from the queue, one by one, performs the work 26 | required for that queue item, "saves" the result (maybe to a database, maybe to another queue), and once done 27 | processing the item, looks in the queue for the next item to process. Rinse and repeat. This application can 28 | be run on as many computers (nodes) as we want, and even as many times on the same computer as we want. 29 | These running instances of this application are often called **workers** ( or worker processes). 30 | 31 | ![main concept](main_concept.png "Main Concept") 32 | 33 | Like many concurrency problems, one of the key things here is that the **order** in which the items are handled 34 | from the queue should not have any bearing on the final result. If that is not the case, then the approach we are 35 | looking at may not work (or would require some additional code). Even if we deal with a FIFO queue 36 | (first-in first-out), there is no guarantee that the results of processing the queue items would be emitted 37 | in that same order (one worker in the system may simply be running faster than another). 38 | 39 | So, given that we are OK with that, we can then run as many of these worker processes as we want, and scale the number 40 | of worker processes up or down as needed. We may even elect to run these workers on a single computer, even the 41 | same one generating the queue items in the first place. The advantage is that we can start small, running everything on 42 | a single computer, but if need be, we can easily scale the system to multiple processing nodes. 43 | 44 | Here's a concrete example where I have used this in the past. 45 | 46 | We need to do some image processing, which involves converting an image to some standard format (say JPEG), create 47 | multiple versions at different resolutions and crops. These images come in over time via some mechanism (maybe someone 48 | uploads the image on our web site). This original image, and associated data is then saved to a database and file 49 | system, and a queue item is created to point to the file and the database record. Independent worker processes, are 50 | listening for items in the queue, and once one becomes available, one of the workers picks up the item and starts 51 | the image processing. Once complete, new images are saved to a file system somewhere, and the database record updated 52 | to reflect processing was completed, and links to the newly created images. 53 | 54 | As you can see, this approach is **very** easy to understand (I don't do complicated well, I like to keep things 55 | simple), and yet we have a highly scalable system if we need it. 56 | 57 | So, to do this we need: 58 | 1. a message queue where we can push and pop messages that can be accessed on the network 59 | 2. a Python app that pushes concurrent chunks of work to this queue 60 | 3. another Python app that pops items from the queue, and performs the work 61 | 62 | One important thing for this to work, is the ability to make sure that once a worker "picks up" an item from the 63 | queue, no other worker can pick it up (after all, we only need to process each item just once). 64 | 65 | There are many queue platforms that would work very well for this. My most frequent goto platform is RabbitMQ - it 66 | is feature rich and provides a very robust and scalable queue infrastructure. Slightly simpler to set up would be 67 | Amazon's SQS or elasticMQ, but many other message queue platforms exist as well (Kafka, ActiveMQ, etc). 68 | 69 | Another possibility is using Redis list as a queue - it is simple enough, and comes 70 | without the additional complexity of using some of the other options, is often already available in projects I work on, 71 | but unlike Rabbit or SQS it is not necessarily 100% foolproof (worker could crash in the middle of processing a 72 | queue item, in which case the event may get lost - but there is a solution to that, see the last section in this 73 | README.) 74 | 75 | To use Redis as a queue, we are going to use a list item, and push/pull from either the left or the right of the list. 76 | 77 | ![redis queue](redis_queue.png "Redis Queue") 78 | 79 | 80 | In future episodes I could cover how to work around the potential message loss issue in Redis, or cover a more robust 81 | queue mechanism such as SQS (or elasticMQ) - let me know in the comments, and if there's enough interest I'll make 82 | another video(s). 83 | 84 | 85 | ## Getting things set up 86 | In this folder you will find a few things: 87 | 1. A docker compose file that we'll use to stand up a local Redis instance (you'll need Docker) 88 | 2. A folder named `app` that contains our main app (the one that pushes data to Redis) 89 | 3. A folder named `worker` that contains the Python worker app 90 | 91 | In each Python app folder, you will find a `requirements.txt` file that you will need to create the virtual environment 92 | to run each Python app. 93 | 94 | Our apps are not going to do much. 95 | 96 | The main app is just going to push items onto the queue, and the worker app is going to pop items off the queue and 97 | just generate a log entry that it has processed the item. 98 | 99 | In practice, I would add a few things to make the system more robust - in particular, a way to requeue items that the 100 | worker failed to process for some reason (many full-blown queue systems have a concept of a dead letter queue (DLQ) that 101 | helps with this). Redis does not have a DLQ concept, but we could create another queue in Redis to mimic that. 102 | 103 | Since I do want to handle re-queuing in this demo, I will keep a counter of how many attempts were made to process 104 | an item, and simply emit an error level log after N times. (in practice, we would probably dead letter the message, 105 | and set up some monitoring/alerting on the DLQ). 106 | 107 | To start a local Redis instance, use: 108 | ```bash 109 | docker compose up -d 110 | ``` 111 | 112 | The docker compose file maps defines which port to use for communicating with Redis - here we have `6379`, but 113 | you can always map it to another port on your computer if it clashes with something else. 114 | 115 | For example, to map the container Redis port to `6699`, you would simply change your docker compose file from 116 | ```yaml 117 | ports: 118 | - '6379:6379' 119 | ``` 120 | 121 | to 122 | ```yaml 123 | ports: 124 | - '6699:6379' 125 | ``` 126 | 127 | If you do change the port, you will need to change that in the config files for both main and worker apps as well. 128 | 129 | Once you have Redis up and running, you can run the main app to generate events, and as many instances of the worker app 130 | as you want. 131 | 132 | 133 | ## Running Things 134 | First, we'll run the main app - this will push one message per second to the Redis queue. 135 | 136 | We can use the Redis cli to quickly query Redis and see if your messages are being pushed. 137 | 138 | To get the Redis cli, we can use 139 | 140 | ```bash 141 | docker compose run redis redis-cli -h redis -a secret -n 0 142 | ``` 143 | 144 | We can then issue commands such as: 145 | - `ping` - should get `PONG` response 146 | - `keys *` - lists out all keys in Redis - initially this should be empty, but once we start our main 147 | app, we should see one entry for our queue (key should be `demo-1`) 148 | - `lrange demo-1 0 -1` - this lists our all the messages in the queue (a Redis list) 149 | 150 | 151 | Next we can start as many workers as we want, and we should observe the processing happening. We can 152 | also verify, using the Redis query above, that the queue (list) is changing over time as messages are added 153 | and removed. 154 | 155 | 156 | ## Possible Enhancements 157 | - use another Redis database to de-duplicate messages - should not happen, but there may be concurrency 158 | race conditions (haven't encountered any yet, but just to be sure!), so you could store a successfully 159 | processed message ID (with some TTL), and double check that the message was not already handled successfully 160 | before handling it. 161 | - implement another Redis queue as a DLQ, and monitor that for messages that simply cannot be handled for some reason 162 | - potential loss of a message is there (worker dies after popping from the queue, but before completing its work). 163 | In that case, it may be possible to use another Redis queue to store "in-process" items, and processing - see the 164 | Redis documentation for [Pattern: Reliable Queue](https://redis.io/commands/lmove/) for more details. But if this 165 | becomes an issue, I will typically look at a more complete solution such as RabbitMQ, or SQS/elasticMQ. 166 | -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/app/config.py: -------------------------------------------------------------------------------- 1 | """Various configs for the app 2 | 3 | Values are hardcoded here, but in practice you would probably use 4 | some better mechanism (such as ENV variables) 5 | """ 6 | 7 | redis_host = "localhost" 8 | redis_port = 6379 9 | redis_db_number = 0 10 | redis_password = "secret" 11 | redis_queue_name = "demo-1" 12 | -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/app/main.py: -------------------------------------------------------------------------------- 1 | """Main app 2 | 3 | This app pushes messages into the redis queue 4 | """ 5 | import random 6 | from datetime import datetime 7 | from json import dumps 8 | from time import sleep 9 | from uuid import uuid4 10 | 11 | import redis 12 | 13 | import config 14 | 15 | 16 | def redis_db(): 17 | db = redis.Redis( 18 | host=config.redis_host, 19 | port=config.redis_port, 20 | db=config.redis_db_number, 21 | password=config.redis_password, 22 | decode_responses=True, 23 | ) 24 | 25 | # make sure redis is up and running 26 | db.ping() 27 | 28 | return db 29 | 30 | 31 | def redis_queue_push(db, message): 32 | # push to tail of the queue (left of the list) 33 | db.lpush(config.redis_queue_name, message) 34 | 35 | 36 | 37 | def main(num_messages: int, delay: float = 1): 38 | """ 39 | Generates `num_messages` and pushes them to a Redis queue 40 | :param num_messages: 41 | :return: 42 | """ 43 | 44 | # connect to Redis 45 | db = redis_db() 46 | 47 | for i in range(num_messages): 48 | # Create message data 49 | message = { 50 | "id": str(uuid4()), 51 | "ts": datetime.utcnow().isoformat(), 52 | "data": { 53 | "message_number": i, 54 | "x": random.randrange(0, 100), 55 | "y": random.randrange(0, 100), 56 | }, 57 | } 58 | 59 | # We'll store the data as JSON in Redis 60 | message_json = dumps(message) 61 | 62 | # Push message to Redis queue 63 | print(f"Sending message {i+1} (id={message['id']})") 64 | redis_queue_push(db, message_json) 65 | 66 | # wait a bit so we have time to start up workers and see how things interact 67 | sleep(delay) 68 | 69 | 70 | 71 | if __name__ == '__main__': 72 | main(30, 0.1) 73 | -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/app/requirements.txt: -------------------------------------------------------------------------------- 1 | redis < 4.5 -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3' 3 | 4 | services: 5 | redis: 6 | image: redis:latest 7 | container_name: redis_queue 8 | restart: always 9 | ports: 10 | - '6379:6379' 11 | command: redis-server --save 20 1 --loglevel warning --requirepass secret 12 | volumes: 13 | - data-volume:/data 14 | 15 | volumes: 16 | data-volume: 17 | -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/main_concept.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fbaptiste/python-blog/f10c3b29c4d5e80a65d23bc36d2e1d51e8851856/2023/01 - January/distributed_computations/main_concept.png -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/redis_queue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fbaptiste/python-blog/f10c3b29c4d5e80a65d23bc36d2e1d51e8851856/2023/01 - January/distributed_computations/redis_queue.png -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/worker/config.py: -------------------------------------------------------------------------------- 1 | """Various configs for the app 2 | 3 | Values are hardcoded here, but in practice you would probably use 4 | some better mechanism (such as ENV variables) 5 | """ 6 | 7 | redis_host = "localhost" 8 | redis_port = 6379 9 | redis_db_number = 0 10 | redis_password = "secret" 11 | redis_queue_name = "demo-1" 12 | -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/worker/requirements.txt: -------------------------------------------------------------------------------- 1 | redis < 4.5 -------------------------------------------------------------------------------- /2023/01 - January/distributed_computations/worker/worker.py: -------------------------------------------------------------------------------- 1 | """Main Worker app 2 | 3 | This app listens messages into the redis queue 4 | """ 5 | import random 6 | from json import loads 7 | 8 | import redis 9 | 10 | import config 11 | 12 | 13 | def redis_db(): 14 | db = redis.Redis( 15 | host=config.redis_host, 16 | port=config.redis_port, 17 | db=config.redis_db_number, 18 | password=config.redis_password, 19 | decode_responses=True, 20 | ) 21 | 22 | # make sure redis is up and running 23 | db.ping() 24 | 25 | return db 26 | 27 | 28 | def redis_queue_push(db, message): 29 | # push to tail of the queue (left of the list) 30 | db.lpush(config.redis_queue_name, message) 31 | 32 | 33 | def redis_queue_pop(db): 34 | # pop from head of the queue (right of the list) 35 | # the `b` in `brpop` indicates this is a blocking call (waits until an item becomes available) 36 | _, message_json = db.brpop(config.redis_queue_name) 37 | return message_json 38 | 39 | 40 | def process_message(db, message_json: str): 41 | message = loads(message_json) 42 | print(f"Message received: id={message['id']}, message_number={message['data']['message_number']}") 43 | 44 | # mimic potential processing errors 45 | processed_ok = random.choices((True, False), weights=(5, 1), k=1)[0] 46 | if processed_ok: 47 | print(f"\tProcessed successfully") 48 | else: 49 | print(f"\tProcessing failed - requeuing...") 50 | redis_queue_push(db, message_json) 51 | 52 | 53 | def main(): 54 | """ 55 | Consumes items from the Redis queue 56 | """ 57 | 58 | # connect to Redis 59 | db = redis_db() 60 | 61 | while True: 62 | message_json = redis_queue_pop(db) # this blocks until an item is received 63 | process_message(db, message_json) 64 | 65 | 66 | if __name__ == '__main__': 67 | main() 68 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | 8 | [dev-packages] 9 | 10 | [requires] 11 | python_version = "3.11" 12 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/Pipfile.lock: -------------------------------------------------------------------------------- 1 | { 2 | "_meta": { 3 | "hash": { 4 | "sha256": "ed6d5d614626ae28e274e453164affb26694755170ccab3aa5866f093d51d3e4" 5 | }, 6 | "pipfile-spec": 6, 7 | "requires": { 8 | "python_version": "3.11" 9 | }, 10 | "sources": [ 11 | { 12 | "name": "pypi", 13 | "url": "https://pypi.org/simple", 14 | "verify_ssl": true 15 | } 16 | ] 17 | }, 18 | "default": {}, 19 | "develop": {} 20 | } 21 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_01.py: -------------------------------------------------------------------------------- 1 | NUM_ITER = 100 2 | counter = 0 3 | sum_ = 0 4 | 5 | 6 | def do_work(): 7 | global counter 8 | global sum_ 9 | 10 | counter = counter + 1 11 | next_sum = sum_ + counter 12 | print(f"{sum_} + {counter} = {next_sum}") 13 | print("-" * 20) 14 | sum_ = next_sum 15 | 16 | 17 | if __name__ == '__main__': 18 | for i in range(NUM_ITER): 19 | do_work() 20 | 21 | print(f"DONE: solution = {sum_}") 22 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_02.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | NUM_ITER = 100 5 | counter = 0 6 | sum_ = 0 7 | 8 | 9 | def do_work(): 10 | global counter 11 | global sum_ 12 | 13 | counter += 1 14 | next_sum = sum_ + counter 15 | print(f"{sum_} + {counter} = {next_sum}") 16 | print("-" * 20) 17 | sum_ = next_sum 18 | 19 | 20 | if __name__ == '__main__': 21 | threads = [] 22 | 23 | # create the threads 24 | for i in range(NUM_ITER): 25 | threads.append(threading.Thread(target=do_work)) 26 | 27 | # start the threads 28 | for thread in threads: 29 | thread.start() 30 | 31 | # wait until all threads are done 32 | for thread in threads: 33 | thread.join() 34 | 35 | print(f"DONE: solution = {sum_}") 36 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_03.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | NUM_ITER = 100 5 | counter = 0 6 | sum_ = 0 7 | p_lock = threading.Lock() 8 | 9 | 10 | def do_work(): 11 | global counter 12 | global sum_ 13 | 14 | counter += 1 15 | next_sum = sum_ + counter 16 | p_lock.acquire() 17 | print(f"{sum_} + {counter} = {next_sum}") 18 | print("-" * 20) 19 | p_lock.release() 20 | sum_ = next_sum 21 | 22 | 23 | if __name__ == '__main__': 24 | threads = [] 25 | 26 | # create the threads 27 | for i in range(NUM_ITER): 28 | threads.append(threading.Thread(target=do_work)) 29 | 30 | # start the threads 31 | for thread in threads: 32 | thread.start() 33 | 34 | # wait until all threads are done 35 | for thread in threads: 36 | thread.join() 37 | 38 | print(f"DONE: solution = {sum_}") 39 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_03a.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | NUM_ITER = 100 5 | counter = 0 6 | sum_ = 0 7 | p_lock = threading.Lock() 8 | 9 | 10 | def do_work(): 11 | global counter 12 | global sum_ 13 | 14 | counter += 1 15 | next_sum = sum_ + counter 16 | with p_lock: 17 | print(f"{sum_} + {counter} = {next_sum}") 18 | print("-" * 20) 19 | sum_ = next_sum 20 | 21 | 22 | if __name__ == '__main__': 23 | threads = [] 24 | 25 | # create the threads 26 | for i in range(100): 27 | threads.append(threading.Thread(target=do_work)) 28 | 29 | # start the threads 30 | for thread in threads: 31 | thread.start() 32 | 33 | # wait until all threads are done 34 | for thread in threads: 35 | thread.join() 36 | 37 | print(f"DONE: solution = {sum_}") 38 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_04.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | 4 | NUM_ITER = 100 5 | counter = 0 6 | sum_ = 0 7 | p_lock = threading.Lock() 8 | c_lock = threading.Lock() 9 | 10 | 11 | def do_work(): 12 | global counter 13 | global sum_ 14 | 15 | with c_lock: 16 | prev_sum = sum_ 17 | counter += 1 18 | next_sum = sum_ + counter 19 | sum_ = next_sum 20 | 21 | with p_lock: 22 | print(f"{prev_sum} + {counter} = {next_sum}") 23 | print("-" * 20) 24 | 25 | 26 | if __name__ == '__main__': 27 | threads = [] 28 | 29 | # create the threads 30 | for i in range(NUM_ITER): 31 | threads.append(threading.Thread(target=do_work)) 32 | 33 | # start the threads 34 | for thread in threads: 35 | thread.start() 36 | 37 | # wait until all threads are done 38 | for thread in threads: 39 | thread.join() 40 | 41 | print(f"DONE: solution = {sum_}") 42 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_05.py: -------------------------------------------------------------------------------- 1 | import random 2 | import threading 3 | from time import sleep 4 | 5 | 6 | NUM_ITER = 100 7 | counter = 0 8 | sum_ = 0 9 | p_lock = threading.Lock() 10 | c_lock = threading.Lock() 11 | 12 | 13 | def fuzz(): 14 | sleep(random.random() / 10) 15 | 16 | 17 | def do_work(): 18 | global counter 19 | global sum_ 20 | 21 | fuzz() 22 | with c_lock: 23 | counter += 1 24 | curr_counter = counter 25 | prev_sum = sum_ 26 | next_sum = sum_ + counter 27 | sum_ = next_sum 28 | fuzz() 29 | with p_lock: 30 | print(f"{prev_sum} + {curr_counter} = {next_sum}") 31 | print("-" * 20) 32 | fuzz() 33 | 34 | 35 | if __name__ == '__main__': 36 | threads = [] 37 | 38 | # create the threads 39 | for i in range(NUM_ITER): 40 | threads.append(threading.Thread(target=do_work)) 41 | 42 | # start the threads 43 | for thread in threads: 44 | thread.start() 45 | 46 | # wait until all threads are done 47 | for thread in threads: 48 | thread.join() 49 | 50 | print(f"DONE: solution = {sum_}") 51 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_06.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import random 3 | import threading 4 | from time import sleep 5 | 6 | 7 | NUM_ITER = 100 8 | counter = 0 9 | sum_ = 0 10 | c_lock = threading.Lock() 11 | print_queue = queue.Queue() 12 | 13 | 14 | def fuzz(): 15 | sleep(random.random() / 10) 16 | 17 | 18 | def print_queue_watcher(): 19 | while True: 20 | item = print_queue.get() 21 | fuzz() 22 | print(item) 23 | fuzz() 24 | print_queue.task_done() 25 | fuzz() 26 | 27 | 28 | def do_work(): 29 | global counter 30 | global sum_ 31 | 32 | fuzz() 33 | with c_lock: 34 | counter += 1 35 | next_sum = sum_ + counter 36 | print_queue.put(f"{sum_} + {counter} = {next_sum}") 37 | print_queue.put("-" * 20) 38 | sum_ = next_sum 39 | fuzz() 40 | 41 | 42 | if __name__ == '__main__': 43 | threads = [] 44 | 45 | # start daemon print watcher thread 46 | threading.Thread(target=print_queue_watcher, daemon=True).start() 47 | 48 | # create the threads 49 | for i in range(NUM_ITER): 50 | threads.append(threading.Thread(target=do_work)) 51 | 52 | # start the threads with some fuzzing between starts 53 | for thread in threads: 54 | thread.start() 55 | fuzz() 56 | 57 | # wait until all threads are done 58 | for thread in threads: 59 | thread.join() 60 | 61 | # wait until the print queue is empty 62 | print_queue.join() 63 | 64 | print(f"DONE: solution = {sum_}") 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_07.py: -------------------------------------------------------------------------------- 1 | from time import perf_counter 2 | 3 | NUM_ITER = 100_000 4 | counter = 0 5 | sum_ = 0 6 | 7 | 8 | def do_work(): 9 | global counter 10 | global sum_ 11 | 12 | counter = counter + 1 13 | next_sum = sum_ + counter 14 | print(f"{sum_} + {counter} = {next_sum}") 15 | print("-" * 20) 16 | sum_ = next_sum 17 | 18 | 19 | if __name__ == '__main__': 20 | start = perf_counter() 21 | for i in range(NUM_ITER): 22 | do_work() 23 | 24 | end = perf_counter() 25 | print(f"DONE: solution = {sum_}") 26 | print(f"elapsed: {end - start:.2f} seconds") 27 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p1_solution_08.py: -------------------------------------------------------------------------------- 1 | import queue 2 | import threading 3 | from time import perf_counter 4 | 5 | NUM_ITER = 100_000 6 | counter = 0 7 | sum_ = 0 8 | c_lock = threading.Lock() 9 | print_queue = queue.Queue() 10 | 11 | 12 | def print_queue_watcher(): 13 | while True: 14 | item = print_queue.get() 15 | print(item) 16 | print_queue.task_done() 17 | 18 | 19 | def do_work(): 20 | global counter 21 | global sum_ 22 | 23 | with c_lock: 24 | counter += 1 25 | next_sum = sum_ + counter 26 | print_queue.put(f"{sum_} + {counter} = {next_sum}") 27 | print_queue.put("-" * 20) 28 | sum_ = next_sum 29 | 30 | 31 | if __name__ == '__main__': 32 | start = perf_counter() 33 | threads = [] 34 | 35 | # start daemon print watcher thread 36 | threading.Thread(target=print_queue_watcher, daemon=True).start() 37 | 38 | # create the threads 39 | for i in range(NUM_ITER): 40 | threads.append(threading.Thread(target=do_work)) 41 | 42 | # start the threads with some fuzzing between starts 43 | for thread in threads: 44 | thread.start() 45 | 46 | # wait until all threads are done 47 | for thread in threads: 48 | thread.join() 49 | 50 | # wait until the print queue is empty 51 | print_queue.join() 52 | 53 | end = perf_counter() 54 | print(f"DONE: solution = {sum_}") 55 | print(f"elapsed: {end - start:.2f} seconds") 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p2_solution_01.py: -------------------------------------------------------------------------------- 1 | import math 2 | from time import perf_counter 3 | 4 | 5 | NUM_INTERVALS = 10_000_000 6 | 7 | 8 | def func(x): 9 | # semi-circle, radius 1, centered at (0, 0) 10 | # integral from -1 to 1 of this function should gives us 11 | # the area of this semi-circle: pi / 2 12 | return math.sqrt(1 - x * x) 13 | 14 | 15 | def riemann_sum(func, delta, a, i_start, i_end): 16 | # calculates the right Riemann sums 17 | area = 0 18 | for i in range(i_start, i_end): 19 | x = a + delta * i 20 | area += func(x) * delta 21 | return area 22 | 23 | 24 | if __name__ == '__main__': 25 | start = perf_counter() 26 | a = -1 27 | b = 1 28 | delta = (b - a) / NUM_INTERVALS 29 | area = riemann_sum(func, delta, a, 0, NUM_INTERVALS) 30 | end = perf_counter() 31 | print(f"Area: {area:.10f}, pi/2={math.pi/2:.10f}") 32 | print(f"Elapsed: {end - start:.4f} seconds") 33 | -------------------------------------------------------------------------------- /2023/02 - February/threading_issues_caveats/p2_solution_02.py: -------------------------------------------------------------------------------- 1 | import math 2 | import threading 3 | from time import perf_counter 4 | 5 | 6 | NUM_INTERVALS = 10_000_000 7 | NUM_THREADS = 10_000 8 | results = [] 9 | 10 | 11 | def func(x): 12 | # semi-circle, radius 1, centered at (0, 0) 13 | # integral from -1 to 1 of this function should gives us 14 | # the area of this semi-circle: pi / 2 15 | return math.sqrt(1 - x * x) 16 | 17 | 18 | def riemann_sum(func, delta, a, i_start, i_end): 19 | # calculates the right Riemann sums 20 | area = 0 21 | for i in range(i_start, i_end): 22 | x = a + delta * i 23 | area += func(x) * delta 24 | results.append(area) 25 | 26 | 27 | def split(num_intervals, n): 28 | k, m = divmod(num_intervals, n) 29 | return [(i * k + min(i, m), (i + 1) * k + min(i+1, m)) for i in range(n)] 30 | 31 | 32 | if __name__ == '__main__': 33 | start = perf_counter() 34 | a = -1 35 | b = 1 36 | delta = (b - a) / NUM_INTERVALS 37 | 38 | # split the intervals into NUM_THREADS 39 | chunks = split(NUM_INTERVALS, NUM_THREADS) 40 | 41 | # Create the threads 42 | threads = [] 43 | for i_start, i_end in chunks: 44 | threads.append( 45 | threading.Thread(target=riemann_sum, args=(func, delta, a, i_start, i_end)) 46 | ) 47 | 48 | # Start and join the threads 49 | for thread in threads: 50 | thread.start() 51 | 52 | for thread in threads: 53 | thread.join() 54 | 55 | # all threads done processing - add all results up for final answer 56 | area = sum(results) 57 | 58 | end = perf_counter() 59 | print(f"Area: {area:.10f}, pi/2={math.pi / 2:.10f}") 60 | print(f"Elapsed: {end - start:.4f} seconds") -------------------------------------------------------------------------------- /2023/03 - March/concurrency_concepts_in_python.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fbaptiste/python-blog/f10c3b29c4d5e80a65d23bc36d2e1d51e8851856/2023/03 - March/concurrency_concepts_in_python.pdf -------------------------------------------------------------------------------- /2024/03 - March/multiprocessing_pools/example_1.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | from random import randint, seed 3 | from time import perf_counter, sleep 4 | 5 | 6 | def long_running_func(job_id, arg1, arg2, sleep_time): 7 | print(f"running job #{job_id} (sleep={sleep_time})") 8 | sleep(sleep_time) 9 | print(f"finished running job #{job_id}") 10 | return arg1 + arg2 11 | 12 | 13 | def run_pool(job_size, pool_size): 14 | jobs = [ 15 | (i, randint(1, 100), randint(1, 100), randint(1, 3)) 16 | for i in range(job_size) 17 | ] 18 | # kick off all the processes 19 | with Pool(processes=pool_size) as pool: 20 | all_results = pool.starmap(long_running_func, jobs) 21 | 22 | # gather all results from all processes 23 | for result in all_results: 24 | print(result) 25 | 26 | 27 | if __name__ == "__main__": 28 | start = perf_counter() 29 | seed(0) 30 | run_pool(job_size=50, pool_size=50) 31 | print(f"Elapsed time: {perf_counter() - start:.2f}") 32 | -------------------------------------------------------------------------------- /2024/03 - March/multiprocessing_pools/example_2.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | from random import randint, seed 3 | from time import perf_counter, sleep 4 | 5 | 6 | def sieve(upper_bound): 7 | print(f"running sieve: {upper_bound=}") 8 | candidates = [False] * 2 + [True] * (upper_bound - 2) 9 | primes = [] 10 | 11 | for i, isprime in enumerate(candidates): 12 | if isprime: 13 | primes.append(i) 14 | for n in range(i*i, upper_bound, i): 15 | candidates[n] = False 16 | 17 | return primes 18 | 19 | 20 | def run_pool(job_size, pool_size): 21 | jobs = [ 22 | randint(1_000_000, 10_000_000) 23 | for i in range(job_size) 24 | ] 25 | # kick off all the processes 26 | with Pool(processes=pool_size) as pool: 27 | all_results = pool.map(sieve, jobs) 28 | 29 | # gather all results from all processes 30 | print(all_results[0]) 31 | for result in all_results: 32 | print(f"number of primes found: {len(result)}") 33 | 34 | 35 | if __name__ == "__main__": 36 | start = perf_counter() 37 | seed(0) 38 | run_pool(job_size=100, pool_size=10) 39 | print(f"Elapsed time: {perf_counter() - start:.2f}") 40 | -------------------------------------------------------------------------------- /2024/03 - March/multiprocessing_pools/example_3.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import Pool 2 | from random import randint, seed 3 | from time import perf_counter, sleep 4 | 5 | 6 | def func(a: int, b: int, *, upper_bound: int, job_id: int): 7 | print(f"Job #{job_id}: {a=}, {b=}, {job_id=}, {upper_bound=}") 8 | candidates = [False] * 2 + [True] * (upper_bound - 2) 9 | primes = [] 10 | 11 | for i, isprime in enumerate(candidates): 12 | if isprime: 13 | primes.append(i) 14 | for n in range(i * i, upper_bound, i): 15 | candidates[n] = False 16 | 17 | return primes 18 | 19 | 20 | def run_pool(job_size, pool_size): 21 | jobs = [ 22 | ( 23 | (i, i + 1), 24 | { 25 | "job_id": i, 26 | "upper_bound": randint(1_000_000, 10_000_000) 27 | } 28 | ) 29 | for i in range(job_size) 30 | ] 31 | # kick off all the processes 32 | pool = Pool(processes=pool_size) 33 | 34 | async_results = [ 35 | pool.apply_async(func, args=positionals, kwds=kwargs) 36 | for positionals, kwargs in jobs 37 | ] 38 | pool.close() 39 | 40 | # wait for async results to come back 41 | pool.join() 42 | 43 | # get all the results 44 | results = [result.get() for result in async_results] 45 | print(results[0]) 46 | 47 | 48 | if __name__ == "__main__": 49 | start = perf_counter() 50 | seed(0) 51 | run_pool(job_size=100, pool_size=10) 52 | print(f"Elapsed time: {perf_counter() - start:.2f}") 53 | -------------------------------------------------------------------------------- /2024/03 - March/multiprocessing_pools/notes.md: -------------------------------------------------------------------------------- 1 | # Python Multiprocessing Pools 2 | 3 | In this video we look at how to spread CPU-bound workloads across multiple cores on your machine using 4 | multiprocessing pools. 5 | 6 | Note, that for I/O bound workloads, a much better alternative is to use asyncio (or threading). I explain this 7 | an earlier video [here](https://youtu.be/S05-MZAJqNM). 8 | 9 | ## Example 1 10 | In this example we have a long-running function where we use a blocking `sleep()` to simulate a CPU bound 11 | function - a lot of the examples I see everywhere uses that, so let's give it a shot too... Or should we? Oh well, 12 | let's just do it and see what happens. 13 | 14 | This function is simple, it takes two positional arguments, sleeps a certain amount of time, and returns the sum of 15 | the two values. 16 | 17 | We'll want to run this function a certain number of times with different input values. 18 | Then, we'll want to spread this computational workload across multiple cores. 19 | To do this we'll use a multiprocessing `Pool`. 20 | 21 | So, we need to: 22 | - spawn multiple parallel processes 23 | - pass some values to the function 24 | - receive a result back 25 | 26 | In this example, our function will look for just two positional arguments. 27 | 28 | The `Pool` instance supports passing positional arguments via a tuple for each function call, and using the `starmap` 29 | method to "spread" the input arguments as positional arguments - much like the `starmap` function in the `functools` 30 | module - Python docs [here]() (or better yet, check out my 31 | [Deep Dive](https://www.udemy.com/course/python-3-deep-dive-part-2/?referralCode=3E7AFEF5174F04E5C8D4) 32 | course that covers `starmap`, and a lot more!) 33 | 34 | We'll start with a pool sized at 1 - this will essentially run all the function calls sequentially, so we can establish 35 | a baseline for how long this takes to run. 36 | 37 | Then, we'll start increasing the pool size (the max number of parallel processes), and see what we get. 38 | 39 | To make things a bit more realistic, the long-running function is going to sleep a variable amount of time (from 1 40 | to 3 seconds). To ensure repeatability, we'll set a specific seed from our `random` module. 41 | 42 | Note that there is no guarantee of the order in which the functions are going to be called ( a consequence of the way 43 | we are starting the functions, later I'll show you a different method that starts the functions in a specific 44 | sequence) - so, in order to keep things consistent from one run to another, I will generate the random sleep 45 | times **outside** the called function itself. 46 | 47 | Another important thing to understand, Python may or may not use the max number of processes available, and I do not 48 | think there is any guarantee of how it spreads the processes across the available cores. Also, setting a pool size 49 | that is smaller than the number of cores on your machine does not guarantee that the load will be spread out 50 | across that smaller number of cores only. There are ways to actually control that, but way too complex for me! 51 | As I mentioned in a previous video 52 | ([Distributed Computing](https://youtu.be/XCSARhkRg7g)), 53 | I rarely use multiprocessing and instead created a distributed computing system instead - this lets me scale beyond 54 | just a single machine if I need to. 55 | 56 | Back to multiprocessing... 57 | 58 | For testing this out, I have a Mac with 10 cores (M1Max), and here are my results, running a workload of `50` 59 | function calls, with varying pool size: 60 | 61 | 62 | | Pool Size | Total Time | 63 | |-----------|------------| 64 | | 1 | 96 | 65 | | 2 | 54 | 66 | | 4 | 28 | 67 | | 6 | 19 | 68 | | 8 | 16 | 69 | | 10 | 13 | 70 | | 12 | 12 | 71 | | 14 | 9 | 72 | | 18 | 8 | 73 | | 50 | 6 | 74 | 75 | Now these results are a bit suspicious... We definitely see a speed improvement as we increase the pool size. 76 | But why does the time keep dropping if I am running more processes than available cores? 77 | 78 | The problem is that my long-running function is **not actually doing any computations** - so each core that's running a 79 | process actually has plenty of bandwidth to run a few more at the same time. 80 | 81 | In fact, using a `sleep()` does **NOT** simulate a CPU-bound workload!! So much for collective wisdom. In this case, 82 | it's OK, but only up to a point. 83 | 84 | In the next example we'll remedy that. 85 | 86 | ## Example 2 87 | In the previous example we saw how to set up multiprocessing. But our long running function was not truly CPU bound, 88 | so let's change this, and once more benchmark our results. 89 | 90 | For the computation, I'll implement a sieve of Eratosthenes - if you don't know what it is or how it works, 91 | don't worry about it - the only thing here is we want a function that is computationally intensive. 92 | 93 | Note that since we only need to pass a single argument to the sieve function, we no longer need to use 94 | `starmp` to "spread out" multiple positional arguments - instead we can just use `map`. 95 | 96 | Here are my results now, setting a job size of `100`: 97 | 98 | | Pool Size | Total Time | 99 | |-----------|------------| 100 | | 1 | 55 | 101 | | 2 | 31 | 102 | | 4 | 17 | 103 | | 6 | 12 | 104 | | 8 | 11 | 105 | | 10 | 9 | 106 | | 12 | 10 | 107 | | 14 | 10 | 108 | | 18 | 10 | 109 | | 50 | 11 | 110 | | 100 | 13 | 111 | 112 | As you can see, going beyond the number of cores you have available does not increase performance as long as each 113 | function call is essentially using up all the CPU resources on that core. And in fact, if you start going 114 | way beyond your total number of cores, you'll start to see performance decreases. 115 | 116 | Personally I usually do not set my pool size greater than my number of cores - 2. That way I don't starve the OS 117 | for running other things, and my machine stays (somewhat) responsive. 118 | 119 | 120 | ## Example 3 121 | One last thing I want to show you is how to pass **named** arguments when spawning your processes. 122 | 123 | We can't use `map` or `starmap` - this will only work for positional arguments. 124 | 125 | Instead, we can use the `apply_async` method (there is an `apply() method - but it's blocking so not very useful 126 | in our case where we want all the work to be parallelized as much as possible). 127 | 128 | We will also have to deal with the results a bit differently - each `apply_async` results in its own set of results, 129 | so we need to collect those somehow. 130 | 131 | To implement this I won't use a Pool context manager - I need to control closing the pool and waiting for all async 132 | results to come back (in essence similar to joining multiple threads), using the pool's `join()` method. 133 | 134 | Getting results back from an async result also needs to be done via a `.get()` method - the result object itself is 135 | an async object, so it does not contain the result value directly. 136 | 137 | You'll notice, by the way, that the jobs are started in sequence now - simply because we are starting them that way 138 | (they may not complete in sequence, but they start that way). 139 | 140 | 141 | ## Conclusion 142 | And there you have it, how to use multiprocessing pools to speed up your workloads. Of course, you're limited to 143 | a single machine. In large production systems, this is usually not enough, but if you really want to push your single 144 | machine to the limit, multiprocessing can help you. 145 | 146 | There's a lot of other options for multiprocessing, I just scratched the surface here. But honestly, this is 147 | probably going to be the 80/20 rule. -------------------------------------------------------------------------------- /2024/04 - April/pyyaml_library/config.yaml: -------------------------------------------------------------------------------- 1 | observer: 2 | latitude: 33.4 3 | longitude: -111.8 4 | horizon_file: data/horizon.csv 5 | 6 | catalog: 7 | file: data/dso_catalog.csv 8 | categories: 9 | - emission_nebula 10 | - reflection_nebula 11 | - hii_regions 12 | - galaxies 13 | - galaxy_clusters -------------------------------------------------------------------------------- /2024/04 - April/pyyaml_library/docker-compose.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3' 3 | 4 | services: 5 | redis: 6 | image: redis:latest 7 | container_name: redis_queue 8 | restart: always 9 | ports: 10 | - '6379:6379' 11 | command: redis-server --save 20 1 --loglevel warning --requirepass secret 12 | volumes: 13 | - data-volume:/data 14 | 15 | volumes: 16 | data-volume: 17 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fbaptiste/python-blog/f10c3b29c4d5e80a65d23bc36d2e1d51e8851856/2024/06 - June/postgres_pydantic/data/.gitkeep -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.9' 2 | 3 | services: 4 | postgres: 5 | image: postgres:16-alpine 6 | restart: always 7 | shm_size: 128mb 8 | ports: 9 | - 5432:5432 10 | volumes: 11 | - ./data:/var/lib/postgresql/data 12 | environment: 13 | - POSTGRES_PASSWORD=secret 14 | - POSTGRES_USER=admin 15 | - POSTGRES_DB=mathbyte 16 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/example_1.py: -------------------------------------------------------------------------------- 1 | """Example 1""" 2 | # embedding configs here to keep things simple 3 | # never do this in actual code - especially if it gets committed to a VCS 4 | from dataclasses import dataclass 5 | 6 | import psycopg 7 | from psycopg.rows import dict_row, namedtuple_row, class_row 8 | from pydantic import BaseModel, Field 9 | 10 | 11 | POSTGRES_HOST = "localhost" 12 | POSTGRES_PORT = 5432 13 | POSTGRES_DB = "mathbyte" 14 | POSTGRES_USER = "admin" 15 | POSTGRES_PWD = "secret" 16 | 17 | 18 | @dataclass 19 | class DCEmployee: 20 | id: int 21 | first_name: str 22 | last_name: str 23 | nickname: str 24 | department_id: int 25 | 26 | 27 | class Employee(BaseModel): 28 | employee_id: int = Field(alias="id") 29 | first_name: str 30 | last_name: str 31 | nickname: str | None = None 32 | department_id: int 33 | 34 | 35 | def run_query_tuple(conn): 36 | with conn.cursor() as cur: 37 | cur.execute("SELECT * FROM employees;") 38 | rows = cur.fetchall() 39 | 40 | print("Standard Default Return") 41 | print("=" * 50) 42 | if rows: 43 | print(f"type(row)={type(rows[0])}") 44 | for row in rows: 45 | print(row) 46 | print("\n") 47 | 48 | 49 | def run_query_with_factory(conn, row_factory): 50 | with conn.cursor(row_factory=row_factory) as cur: 51 | cur.execute("SELECT * FROM employees;") 52 | rows = cur.fetchall() 53 | 54 | print(f"{row_factory.__name__} Factory") 55 | print("=" * 50) 56 | if rows: 57 | print(f"type(row)={type(rows[0])}") 58 | for row in rows: 59 | print(row) 60 | print("\n") 61 | 62 | 63 | if __name__ == "__main__": 64 | conn_str = ( 65 | f"host={POSTGRES_HOST} " 66 | f"port={POSTGRES_PORT} " 67 | f"dbname={POSTGRES_DB} " 68 | f"user={POSTGRES_USER} " 69 | f"password={POSTGRES_PWD}" 70 | ) 71 | 72 | with psycopg.connect(conn_str) as conn: 73 | # example 1: return data as a tuple 74 | run_query_tuple(conn) 75 | 76 | # example 2: return data as a named tuple 77 | run_query_with_factory(conn, namedtuple_row) 78 | 79 | # example 3: return data as a Python dict 80 | run_query_with_factory(conn, dict_row) 81 | 82 | # example 4: return data as a custom dataclass 83 | run_query_with_factory(conn, class_row(DCEmployee)) 84 | 85 | # example 5: return data as a custom Pydantic model 86 | run_query_with_factory(conn, class_row(Employee)) -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/example_2.py: -------------------------------------------------------------------------------- 1 | """Example 2""" 2 | # embedding configs here to keep things simple 3 | # never do this in actual code - especially if it gets committed to a VCS 4 | 5 | import psycopg 6 | from psycopg.rows import dict_row, namedtuple_row, class_row 7 | from pydantic import BaseModel, Field 8 | 9 | 10 | POSTGRES_HOST = "localhost" 11 | POSTGRES_PORT = 5432 12 | POSTGRES_DB = "mathbyte" 13 | POSTGRES_USER = "admin" 14 | POSTGRES_PWD = "secret" 15 | 16 | 17 | class Employee(BaseModel): 18 | employee_id: int = Field(alias="id") 19 | first_name: str 20 | last_name: str 21 | nickname: str | None = None 22 | department: str 23 | 24 | 25 | if __name__ == "__main__": 26 | conn_str = ( 27 | f"host={POSTGRES_HOST} " 28 | f"port={POSTGRES_PORT} " 29 | f"dbname={POSTGRES_DB} " 30 | f"user={POSTGRES_USER} " 31 | f"password={POSTGRES_PWD}" 32 | ) 33 | 34 | with psycopg.connect(conn_str) as conn: 35 | with conn.cursor(row_factory=class_row(Employee)) as cur: 36 | cur.execute(""" 37 | SELECT employees.id, 38 | employees.first_name, 39 | employees.last_name, 40 | employees.nickname, 41 | departments.name as department 42 | FROM employees 43 | INNER JOIN departments on employees.department_id=departments.id 44 | ORDER BY last_name; 45 | """) 46 | rows = cur.fetchall() 47 | 48 | for row in rows: 49 | print(row) 50 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/example_3.py: -------------------------------------------------------------------------------- 1 | """Example 3""" 2 | # embedding configs here to keep things simple 3 | # never do this in actual code - especially if it gets committed to a VCS 4 | 5 | import psycopg 6 | from psycopg.rows import dict_row, namedtuple_row, class_row 7 | from pydantic import BaseModel, ConfigDict, Field 8 | 9 | 10 | POSTGRES_HOST = "localhost" 11 | POSTGRES_PORT = 5432 12 | POSTGRES_DB = "mathbyte" 13 | POSTGRES_USER = "admin" 14 | POSTGRES_PWD = "secret" 15 | 16 | 17 | class Employee(BaseModel): 18 | model_config = ConfigDict(extras="ignore") 19 | 20 | employee_id: int = Field(alias="id") 21 | first_name: str 22 | last_name: str 23 | nickname: str | None = None 24 | department: str = Field(alias="name") 25 | 26 | 27 | if __name__ == "__main__": 28 | conn_str = ( 29 | f"host={POSTGRES_HOST} " 30 | f"port={POSTGRES_PORT} " 31 | f"dbname={POSTGRES_DB} " 32 | f"user={POSTGRES_USER} " 33 | f"password={POSTGRES_PWD}" 34 | ) 35 | 36 | with psycopg.connect(conn_str) as conn: 37 | with conn.cursor(row_factory=class_row(Employee)) as cur: 38 | cur.execute(""" 39 | SELECT employees.*, departments.name 40 | FROM employees 41 | INNER JOIN departments on employees.department_id=departments.id 42 | ORDER BY last_name; 43 | """) 44 | rows = cur.fetchall() 45 | 46 | for row in rows: 47 | print(row) 48 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/migrations/20240604_01_u0XKn-db-init.py: -------------------------------------------------------------------------------- 1 | """ 2 | db init 3 | """ 4 | 5 | from yoyo import step 6 | 7 | __depends__ = {} 8 | 9 | steps = [ 10 | step( 11 | """ 12 | CREATE TABLE employee ( 13 | id SERIAL PRIMARY KEY, 14 | first_name text NOT NULL, 15 | last_name text NOT NULL, 16 | nickname text, 17 | department text NOT NULL 18 | ); 19 | """, 20 | "DROP TABLE employee;" 21 | ) 22 | ] 23 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/migrations/20240604_02_n6kZK-rename-employee-table.py: -------------------------------------------------------------------------------- 1 | """ 2 | rename employee table 3 | """ 4 | 5 | from yoyo import step 6 | 7 | __depends__ = {'20240604_01_u0XKn-db-init'} 8 | 9 | steps = [ 10 | step( 11 | "ALTER TABLE employee RENAME TO employees;", 12 | "ALTER TABLE employees RENAME TO employee;", 13 | ), 14 | ] 15 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/migrations/20240604_03_sYLbA-generate-sample-employee-data.py: -------------------------------------------------------------------------------- 1 | """ 2 | generate sample employee data 3 | """ 4 | 5 | from yoyo import step 6 | 7 | __depends__ = {'20240604_02_n6kZK-rename-employee-table'} 8 | 9 | steps = [ 10 | step( 11 | """ 12 | INSERT INTO employees (first_name, last_name, nickname, department) values 13 | ('Isaac', 'Newton', null, 'Physics'), 14 | ('Albert', 'Einstein', null, 'Physics'), 15 | ('John', 'von Neumann', 'Johnny', 'Mathematics'), 16 | ('Joseph', 'Fourier', 'Joe', 'Mathematics'), 17 | ('Blaise', 'Pascal', null, 'Mathematics'), 18 | ('John', 'Cleese', null, 'Drama'), 19 | ('William', 'Shakespeare', 'Willie', 'English Lit') 20 | ; 21 | """, 22 | "DELETE FROM employees;" 23 | ) 24 | ] 25 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/migrations/20240604_04_Qqsgz-change-departments-to-fk.py: -------------------------------------------------------------------------------- 1 | """ 2 | change departments to FK 3 | """ 4 | 5 | from yoyo import step 6 | 7 | __depends__ = {'20240604_03_sYLbA-generate-sample-employee-data'} 8 | 9 | steps = [ 10 | step( 11 | "CREATE TABLE departments (id SERIAL PRIMARY KEY, name text);", 12 | "DROP TABLE departments;" 13 | ), 14 | step( 15 | "INSERT INTO departments (name) SELECT distinct department from employees;", 16 | "DELETE FROM departments;" 17 | ), 18 | step( 19 | "ALTER TABLE employees ADD COLUMN department_id integer;", 20 | "ALTER TABLE employees DROP COLUMN department_id;" 21 | ), 22 | step( 23 | """ 24 | UPDATE employees 25 | SET department_id=departments.id 26 | FROM departments 27 | WHERE employees.department = departments.name; 28 | """, 29 | "UPDATE employees set department_id = null;" 30 | ), 31 | step( 32 | "ALTER TABLE employees DROP COLUMN department;", 33 | """ 34 | ALTER TABLE employees ADD COLUMN department text; 35 | UPDATE employees 36 | SET department = departments.name 37 | FROM departments 38 | WHERE employees.department_id = departments.id; 39 | """ 40 | ), 41 | step( 42 | """ 43 | ALTER TABLE employees 44 | ADD CONSTRAINT fk_employees_departments FOREIGN KEY (department_id) REFERENCES departments (id); 45 | """, 46 | """ALTER TABLE employees DROP CONSTRAINT fk_employees_departments;""" 47 | ) 48 | ] 49 | -------------------------------------------------------------------------------- /2024/06 - June/postgres_pydantic/yoyo.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | sources = %(here)s/migrations 3 | database = postgresql+psycopg://admin:secret@localhost/mathbyte?port=5432 4 | verbosity = 2 5 | batch_mode = on -------------------------------------------------------------------------------- /2024/08 - August/python-logging/README.md: -------------------------------------------------------------------------------- 1 | # Demystifying Python Logging - Coding 2 | 3 | This is a follow up to my previous post Demystifying Python logging - Concepts. 4 | If you have not watched it, and are not already familiar with Python logging concepts such as log records, levels, 5 | loggers and logger trees, handlers, formatters, filters, etc, I recommend that you watch that video first. 6 | 7 | This video is going to be a hands-on coding session where we'll explore all these topics. 8 | 9 | ## Useful Links and Info 10 | The following are useful links and information that we'll sometimes refer to in this video: 11 | - Python logging documentation: https://docs.python.org/3/library/logging.html 12 | - Logging levels: https://docs.python.org/3/library/logging.html#logging-levels 13 | - LogRecord attributes: https://docs.python.org/3/library/logging.html#logrecord-attributes 14 | - Built-in Handlers: https://docs.python.org/3/library/logging.handlers.html 15 | 16 | ## Examples 17 | We are going to work through a variety of examples in this video: 18 | 19 | 1. Example 1: Creating the root logger, inspecting a logger to determine some of its properties 20 | 2. Example 2: Creating the main application logger with a YAML file configuration 21 | 3. Example 3: Using code instead of a YAML file to configure the logger 22 | 4. Example 4: Setting up a FileHandler 23 | 5. Example 5: Setting up a RotatingFileHandler 24 | 6. Example 6: Setting up multiple Handlers 25 | 7. Example 7: Customizing the String Formatter 26 | 8. Example 8: Using the extra Parameter 27 | 9. Example 9: Creating Custom Filters 28 | 10. Example 10: Creating Custom Formatters 29 | 11. Example 11: Suppressing Logging Exceptions for Production 30 | 12. Example 12: Setting up and using Multiple Loggers 31 | 32 | 33 | ## Conclusion 34 | As you will have seen from these examples and my last video, conceptually, Python logging is not complicated. 35 | 36 | What can make things complex is the flexibility offered by the logging system, which combined with rather poor 37 | documentation, can make it difficult to sometimes understand what is going on with your logs. 38 | 39 | For this reason, I recommend that you always start simple, and only add complexity as you need it. 40 | 41 | There is also an additional way to configure your logging, which is to use the `basicConfig()` function. This is 42 | basically trying to simplify the dict configuration method, and is simpler than the code based setup I showed you. 43 | However, it is not introducing any new functionality, so I don't cover it here. If you understand the code 44 | we have discussed in this video, you'll understand exactly how to use `basicConfig`. 45 | 46 | Alternatively, you have 3rd party logging libraries that are more modern and easier to use, such as structlog, 47 | loguru, etc. However, I recommend that you first understand the Python logging module, as it is part of the 48 | standard library and you will likely encounter it in many projects. Many of the concepts in these 3rd party logging 49 | libraries are similar to those of the standard library logging module, and in fact can often integrate directly with it. 50 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_01/main.py: -------------------------------------------------------------------------------- 1 | """Example 1 2 | 3 | In this example we just create a root logger with no configuration, and log to that root logger, to see what happens. 4 | Note: as I mentioned in the concepts video, we do not normally log to the root logger, but here I just want to show you 5 | the default configuration for the root logger, as well as the "last resort" functionality. 6 | 7 | The default configuration for the root logger is a WARNING level, and no handlers, with last resort turned on. 8 | """ 9 | 10 | import logging 11 | 12 | # With last resort on (the default) 13 | logger = logging.getLogger() 14 | 15 | # Inspect root logger configuration 16 | print(f"{logger.hasHandlers()=}") 17 | print(f"{logger.getEffectiveLevel()=}, {logging.WARNING=}") 18 | 19 | # Send various level log messages to the root logger 20 | logger.debug("This is a debug message") 21 | logger.info("This is an info message") 22 | logger.warning("This is a warning message") 23 | logger.error("This is an error message") 24 | logger.critical("This is a critical message") 25 | 26 | 27 | # Turn off last resort handler: 28 | logging.lastResort = False 29 | logger.warning("This is a warning message") 30 | logger.error("This is an error message") 31 | logger.critical("This is a critical message") 32 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_02/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 # this is needed, and is always 1 (at least at this time) 3 | disable_existing_loggers: true # turn off any existing loggers - this is the default, so don't actually need this 4 | 5 | # Define one or more formatters 6 | # You can name them whatever you want, here I chose to name it `simple` 7 | formatters: 8 | simple: 9 | style: "{" 10 | format: "SIMPLE FORMATTER: {asctime} - {name} - {levelname} - {message}" 11 | 12 | # Define one or more handlers 13 | # You can name them whatever you want, here I chose to name it `console` 14 | handlers: 15 | console: 16 | formatter: simple 17 | class: logging.StreamHandler 18 | stream: ext://sys.stdout 19 | 20 | # Configure root logger, and app logger 21 | loggers: 22 | root: 23 | level: DEBUG 24 | handlers: [console] 25 | 26 | app: 27 | level: INFO 28 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_02/main.py: -------------------------------------------------------------------------------- 1 | """Example 2 2 | 3 | In this example, we configure two loggers, the root logger and an app logger. 4 | 5 | In our first configuration we do the following: 6 | 1. Define a console handler, and a custom string formatter 7 | 2. Attach that handler to the root logger only, and set that logger to DEBUG level. 8 | 3. Define the app logger, setting it's level to INFO, with no handlers, but with propagation turned on (the default) 9 | 10 | When we do this, notice that debug logs do not show up in the console, whereas info and higher logs do. 11 | This is because the app logger is set to INFO, so debug messages will be rejected by that logger. Higher levels 12 | however, are accepted, and therefore propagate up to the root logger, where the console handler is attached. 13 | 14 | Let's tweak our configuration a bit: 15 | 1. Change app logger level to DEBUG 16 | So now, both the root logger and app logger are set to DEBUG level. 17 | When we do this, notice that the debug logs now show up. 18 | 19 | Let's tweak things a bit more: 20 | 1. Change root logger to WARNING. 21 | So now, the root logger is set to INFO, and the app logger is set to DEBUG. 22 | When we do this, any debug logs sent to the app **still** get handled by the root logger, this is because the app 23 | logger already accepted the request and propagated it up the logger chain. 24 | 25 | Finally, let's do one more tweak: 26 | 1. Change the app logger to disable propagation (`propagate: no` (or `false`, or `0`) 27 | 28 | When we do this, we essentially are logging to a logger that has **no** handlers, and does not propagate, so 29 | Python logging will automatically use the "last resort" logger, which is set to WARNING level, hence we only see 30 | log messages with levels of WARNING and up. 31 | """ 32 | 33 | import logging 34 | import logging.config 35 | 36 | from yaml import safe_load 37 | 38 | logger = logging.getLogger("app") 39 | 40 | 41 | def configure_loggers(): 42 | with open("logger_config.yaml") as f: 43 | config = safe_load(f) 44 | 45 | logging.config.dictConfig(config) 46 | 47 | 48 | def main(): 49 | logger.debug("This is a debug message") 50 | logger.info("This is an info message") 51 | logger.warning("This is a warning message") 52 | 53 | 54 | if __name__ == "__main__": 55 | configure_loggers() 56 | main() 57 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_03/main.py: -------------------------------------------------------------------------------- 1 | """Example 3 2 | 3 | In this example we are going to configure our loggers the same way we did in Example 2, but we'll use cde instead of 4 | a config dict. 5 | 6 | Notice how the app logger is a singleton object. 7 | """ 8 | 9 | import logging 10 | import sys 11 | 12 | 13 | def configure_loggers(): 14 | root_logger = logging.getLogger() 15 | app_logger = logging.getLogger("app") 16 | print(f"app logger id: {hex(id(app_logger))}") 17 | 18 | stream_handler = logging.StreamHandler(stream=sys.stdout) 19 | simple_formatter = logging.Formatter( 20 | "SIMPLE FORMATTER: {asctime} - {name} - {levelname} - {message}", 21 | style="{", 22 | ) 23 | stream_handler.setFormatter(simple_formatter) 24 | root_logger.addHandler(stream_handler) 25 | root_logger.setLevel(logging.DEBUG) 26 | 27 | app_logger.setLevel(logging.INFO) 28 | 29 | 30 | def main(): 31 | logger = logging.getLogger("app") 32 | print(f"app logger id: {hex(id(logger))}") 33 | logger.debug("This is a debug message") 34 | logger.info("This is an info message") 35 | logger.warning("This is a warning message") 36 | 37 | 38 | if __name__ == "__main__": 39 | configure_loggers() 40 | main() 41 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_04/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | file_formatter: 6 | style: "{" 7 | format: "{asctime} - {name} - {levelname} - {module} - {funcName} - {message}" 8 | 9 | handlers: 10 | simple_file: 11 | formatter: file_formatter 12 | class: logging.FileHandler 13 | filename: logs/example4.log 14 | mode: a 15 | 16 | loggers: 17 | root: 18 | level: DEBUG 19 | handlers: [simple_file] 20 | 21 | app: 22 | level: INFO 23 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_04/main.py: -------------------------------------------------------------------------------- 1 | """Example 4 2 | 3 | In this example we are going to set up a simple File handler. 4 | 5 | Note that this file handler will just keep growing the log file over time, so you will need to 6 | establish some process to purge it yourself. 7 | """ 8 | 9 | import logging 10 | import logging.config 11 | 12 | from yaml import safe_load 13 | 14 | logger = logging.getLogger("app") 15 | 16 | 17 | def configure_loggers(): 18 | with open("logger_config.yaml") as f: 19 | config = safe_load(f) 20 | 21 | logging.config.dictConfig(config) 22 | 23 | 24 | def main(): 25 | logger.error("This is an error message: %s, %f.3f", "arg1", 0.333) 26 | logger.info("This is an info message") 27 | 28 | 29 | if __name__ == "__main__": 30 | configure_loggers() 31 | main() 32 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_05/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | file_formatter: 6 | style: "{" 7 | format: "{asctime} - {name} - {levelname} - {module} - {funcName} - {message}" 8 | 9 | handlers: 10 | rotating_file: 11 | formatter: file_formatter 12 | class: logging.handlers.RotatingFileHandler 13 | filename: logs/app.log 14 | maxBytes: 1000 15 | backupCount: 3 16 | 17 | loggers: 18 | root: 19 | level: DEBUG 20 | handlers: [rotating_file] 21 | 22 | app: 23 | level: INFO 24 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_05/main.py: -------------------------------------------------------------------------------- 1 | """Example 5 2 | 3 | In this example, we are going to set up a rotating file handler. We'll do a size based handler 4 | with a ridiculously small size in order to see the rotation happening quickly. 5 | 6 | As you can see, as the log files fill up they are renamed with an appended number, and only 3 of the 7 | "old" log files are retained. The current log file is always named "app.log", as we set in our config. 8 | """ 9 | 10 | import logging 11 | import logging.config 12 | 13 | from yaml import safe_load 14 | 15 | logger = logging.getLogger("app") 16 | 17 | 18 | def configure_loggers(): 19 | with open("logger_config.yaml") as f: 20 | config = safe_load(f) 21 | 22 | logging.config.dictConfig(config) 23 | 24 | 25 | def main(): 26 | for i in range(100): 27 | logger.info("Info message #%i", i) 28 | 29 | 30 | if __name__ == "__main__": 31 | configure_loggers() 32 | main() 33 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_06/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | file_formatter: 6 | format: '{"time": "%(asctime)s", "logger": "%(name)s", "level": "%(levelname)s", "message": "%(message)s"}' 7 | console_formatter: 8 | style: "{" 9 | format: "{asctime} - {name} - {levelname} - {module} - {funcName} - {message}" 10 | 11 | handlers: 12 | rotating_file: 13 | class: logging.handlers.RotatingFileHandler 14 | formatter: file_formatter 15 | filename: logs/app.log 16 | maxBytes: 1000 17 | backupCount: 3 18 | 19 | console: 20 | level: WARNING 21 | class: logging.StreamHandler 22 | formatter: console_formatter 23 | 24 | loggers: 25 | root: 26 | level: DEBUG 27 | handlers: [rotating_file, console] 28 | 29 | app: 30 | level: INFO 31 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_06/main.py: -------------------------------------------------------------------------------- 1 | """Example 6 2 | 3 | In this example, we are going to set up a system to log all our logs to a rotating file, 4 | but also set things up, so that WARNING and higher logs will **also** log to the console. 5 | Also, while we're at it, we'll use a file formatter to emit JSON strings. I'll use the printf syntax just 6 | to show how that works. As we'll see in this example, this approach to generating JSON is only partially 7 | adequate - things go off the rails when we log exceptions (so, if emitting JSON this way, do not log 8 | exceptions, as the stack trace is going to interfere with your output). We'll come back to this at a 9 | later time. 10 | """ 11 | 12 | import logging 13 | import logging.config 14 | import time 15 | 16 | from yaml import safe_load 17 | 18 | logger = logging.getLogger("app") 19 | 20 | 21 | def configure_loggers(): 22 | with open("logger_config.yaml") as f: 23 | config = safe_load(f) 24 | 25 | logging.config.dictConfig(config) 26 | 27 | 28 | def main(): 29 | logger.info("Info message") 30 | time.sleep(0.1) 31 | logger.error("Error message") 32 | 33 | time.sleep(0.1) 34 | try: 35 | raise ValueError("A value error occurred.") 36 | except ValueError: 37 | logger.exception("An exception occurred.") 38 | 39 | 40 | if __name__ == "__main__": 41 | configure_loggers() 42 | main() 43 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_07/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | simple: 6 | style: "{" 7 | format: "{asctime} - {name} - {levelname} - {message}" 8 | 9 | handlers: 10 | console: 11 | formatter: simple 12 | class: logging.StreamHandler 13 | stream: ext://sys.stdout 14 | 15 | loggers: 16 | root: 17 | level: DEBUG 18 | handlers: [console] 19 | 20 | app: 21 | level: DEBUG 22 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_07/main.py: -------------------------------------------------------------------------------- 1 | """Example 7 2 | 3 | In this example, we are going to customize the datetime output in our logs. 4 | 5 | By default, the logging system uses **local** time (usually servers are configured in UTC, but your local 6 | system probably is not. So, we'll want to somehow ensure that no matter what the local timezone is, the 7 | logs records are **always** in UTC. Additionally, we'll change the serialization format to something other 8 | than `2024-07-20 21:41:06,651`. 9 | 10 | One way to do this is to specify this globally in the logging library, using the `logging.Formatter.converter` 11 | attribute. We can set it to use the `gmtime` function in the `time` module. We could do this using pure 12 | configuration, but I'm not going to do this because in addition I want to customize the format string. 13 | 14 | For the actual formatting, we have to be a bit careful. The time converters (`time.localtime()` or `time.gmtime()` do 15 | not provide fractional seconds. The logging library gets around this by actually defining two format strings,one 16 | for the datetime at a seconds resolution, and a second format string just for the milliseconds. 17 | We can override both those string at the logging library level using the `logging.Formatter.default_time_format` and 18 | `logging.Formatter.default_msec_format` attributes. 19 | 20 | There are other ways of doing this, but this is probably the simplest way to do it (that I'm aware of). 21 | 22 | Note also that this method will affect all formatters, which is usually what we want. If you need to have different 23 | formatters with different time formats, you'll have to do something more complicated, like creating a custom Formatter 24 | subclass. 25 | """ 26 | 27 | import logging 28 | import logging.config 29 | import time 30 | 31 | from yaml import safe_load 32 | 33 | logger = logging.getLogger("app") 34 | 35 | 36 | def configure_loggers(): 37 | with open("logger_config.yaml") as f: 38 | config = safe_load(f) 39 | 40 | logging.Formatter.converter = time.gmtime 41 | logging.Formatter.default_time_format = "%Y-%m-%dT%H:%M:%S" 42 | logging.Formatter.default_msec_format = ( 43 | "%s.%03dZ" # we know times will be UTC, so we can hardcode the Z suffix 44 | ) 45 | 46 | logging.config.dictConfig(config) 47 | 48 | 49 | def main(): 50 | logger.info("Info message") 51 | time.sleep(0.1) 52 | logger.error("Error message") 53 | 54 | 55 | if __name__ == "__main__": 56 | configure_loggers() 57 | main() 58 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_08/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | simple: 6 | style: "{" 7 | format: "{asctime} - {name} - {levelname} - {message} (arg1={arg1}, arg2={arg2})" 8 | 9 | handlers: 10 | console: 11 | formatter: simple 12 | class: logging.StreamHandler 13 | stream: ext://sys.stdout 14 | 15 | loggers: 16 | root: 17 | level: DEBUG 18 | handlers: [console] 19 | 20 | app: 21 | level: DEBUG 22 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_08/main.py: -------------------------------------------------------------------------------- 1 | """Example 8 2 | 3 | In this example, we are going to look at how we can use the `extra` parameter. 4 | 5 | Note that in this example, because of the way de configured our formatter's format string, that `extra` 6 | dictionary **must** always contain the keys we use in the formatter. We'll come back to this later 7 | in the context of structured logging and custom formatters 8 | """ 9 | 10 | import logging 11 | import logging.config 12 | import time 13 | 14 | from yaml import safe_load 15 | 16 | logger = logging.getLogger("app") 17 | 18 | 19 | def configure_loggers(): 20 | with open("logger_config.yaml") as f: 21 | config = safe_load(f) 22 | 23 | logging.config.dictConfig(config) 24 | 25 | 26 | def main(): 27 | logger.info("Info message", extra={"arg1": 100, "arg2": "test 1"}) 28 | time.sleep(0.1) 29 | logger.error("Error message", extra={"arg1": 200, "arg2": "test 2"}) 30 | 31 | 32 | if __name__ == "__main__": 33 | configure_loggers() 34 | main() 35 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_09/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | simple: 6 | style: "{" 7 | format: "{asctime} - {name} - {levelname} - {message}" 8 | special: 9 | style: "{" 10 | format: "Special handling: {asctime} - {name} - {levelname} - {message}" 11 | 12 | filters: 13 | 'my_filter': 14 | '()': __main__.CustomFilter # () simply tells logging system that this is a callable 15 | 'arg_name': 'my_arg' 16 | 'arg_threshold': 100 17 | 18 | handlers: 19 | console: 20 | formatter: simple 21 | class: logging.StreamHandler 22 | stream: ext://sys.stdout 23 | special: 24 | level: WARNING 25 | formatter: special 26 | class: logging.StreamHandler 27 | stream: ext://sys.stdout 28 | filters: ['my_filter'] 29 | 30 | loggers: 31 | root: 32 | level: DEBUG 33 | handlers: [console, special] 34 | 35 | app: 36 | level: INFO 37 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_09/main.py: -------------------------------------------------------------------------------- 1 | """Example 9 2 | 3 | In this example we'll look at how to create custom filters. 4 | 5 | We'll set up two handlers, one of which will look for some value in the `extra` dictionary, and only handle the log 6 | record if that value satisfies some condition. 7 | 8 | Specifically we are going to configure our logger this way: 9 | 1. App logger will be set to WARNING 10 | 2. App logger wil have two handlers, the "regular" handler which will log all 11 | log records it receives (INFO and up), and a "special" handler, which will 12 | only handle WARNING and higher levels, as well as check that some value(s) in the `extra` 13 | data meet some condition(s) before it will handle the log. 14 | 15 | The custom filter needs to be defined in Python code, that's not something we can 16 | specify in the configuration, but we will make a reference to it in the config when we attach it 17 | to the specific handler. Side note, you can also attach filters to the logger itself, using the same 18 | approach, just setting the configuration at the logger level instead of the handler level. 19 | 20 | This example also shows how we can pass filter configuration values from the YAML config file to the filter class 21 | when it gets instantiated by the logging system. 22 | 23 | You'll see in the configuration file that we use something weird in the filter definition: `()`. 24 | This is meant to be a hint to the logging system that it should instantiate the class, and pass the 25 | configuration. 26 | This is documented here: https://docs.python.org/3/library/logging.config.html#user-defined-objects 27 | """ 28 | 29 | import logging 30 | import logging.config 31 | 32 | from yaml import safe_load 33 | 34 | logger = logging.getLogger("app") 35 | 36 | 37 | class CustomFilter(logging.Filter): 38 | def __init__(self, arg_name: str = None, arg_threshold: int = None): 39 | self.arg_name = arg_name 40 | self.arg_threshold = arg_threshold 41 | 42 | def filter(self, record: logging.LogRecord): # noqa: A003 43 | # Here we look for the arg attribute. If the attribute is present, and it's value 44 | # is > self.arg_threshold, then we allow the record to be processed (return True), 45 | # otherwise we filter it out (return False) 46 | return ( 47 | self.arg_name 48 | and self.arg_threshold 49 | and hasattr(record, self.arg_name) 50 | and getattr(record, self.arg_name) > self.arg_threshold 51 | ) 52 | 53 | 54 | def configure_loggers(): 55 | with open("logger_config.yaml") as f: 56 | config = safe_load(f) 57 | 58 | logging.config.dictConfig(config) 59 | 60 | 61 | def main(): 62 | logger.error("Error message 1, my_arg not specified") 63 | logger.error("Error message 2, my_arg=200", extra={"my_arg": 200}) 64 | logger.error("Error message 3, my_arg=50", extra={"my_arg": 50}) 65 | 66 | 67 | if __name__ == "__main__": 68 | configure_loggers() 69 | main() 70 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_10/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | json: 6 | "()": __main__.JSONFormatter 7 | 8 | handlers: 9 | console: 10 | formatter: json 11 | class: logging.StreamHandler 12 | stream: ext://sys.stdout 13 | 14 | loggers: 15 | root: 16 | level: DEBUG 17 | handlers: [console] 18 | 19 | app: 20 | level: INFO 21 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_10/main.py: -------------------------------------------------------------------------------- 1 | """Example 10 2 | 3 | In this example we'll look at how to write a custom formatter. 4 | 5 | We'll go back to our previous JSON formatter, but this time we'll make it a bit more complex 6 | and give it the ability to handle exceptions properly. 7 | 8 | Note that you do have 3rd party libraries that will do this, and a lot more, for you. 9 | 10 | For example the structlog library is a very powerful library that can handle structured logging. Slightly simpler 11 | alternatives exist too, that just focus on a JSON formatter - but, like libraries in general, they need to cater 12 | to a wide range of use cases, and so can be a bit more complex than what you need. 13 | 14 | Here, I want to do things from first principles, so we understand what's going on under the hood. 15 | Once you do, then feel free to use those 3rd party libraries. 16 | 17 | Also, to be completely honest, I rarely need the advanced functionality these 3rd party libraries provide, 18 | and often, for me at least, simpler is better. One less library to learn, one less set of unverified code 19 | included in my code base (do you really check that the library you are using is safe??), and one less thing that 20 | can go wrong or functionality that gets deprecated over time. Basically, if I can implement a piece of functionality 21 | quickly and effectively, I'll do it myself, rather than rely on a library. Enough with the soapbox! 22 | """ 23 | 24 | import inspect 25 | import json 26 | import logging 27 | import logging.config 28 | from datetime import UTC, datetime 29 | 30 | from yaml import safe_load 31 | 32 | logger = logging.getLogger("app") 33 | 34 | 35 | def serialize_local_timestamp(t: float) -> str: 36 | dt = datetime.fromtimestamp(t, UTC) 37 | return dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ") 38 | 39 | 40 | class JSONFormatter(logging.Formatter): 41 | def format(self, record: logging.LogRecord): # noqa: A003 42 | # Create a dictionary that gathers all the info we want to log 43 | # We'll need to make sure whatever we gather is JSON serializable (or 44 | # we can alternatively create a custom JSON encoder) 45 | # We could even leverage Pydantic to do this for us, especially if we already use Pydantic 46 | # in our application. But let's keep it simple, and just see how custom formatters work. 47 | 48 | # Notes: 49 | # The attribute record.message is the raw (non-interpolated message string). Instead, we use 50 | # .getMessage() to have the log record return the interpolated message string. 51 | # The log creation time is available in record.created, which is a float representing the time in 52 | # seconds, but as a local time. We can convert this to UTC and with whatever string serialization we want. 53 | # We'll want to serialize the exception info and stack trace, and although the record provides us that info, 54 | # we would need to serialize that ourselves - instead, we can use the Formatter class's built-in methods 55 | # formatException() and formatStack() to do this for us. 56 | 57 | log_dict = { 58 | "time": serialize_local_timestamp(record.created), 59 | "loggerName": record.name, 60 | "levelName": record.levelname, 61 | "levelNumber": record.levelno, 62 | "message": record.getMessage(), 63 | "module": record.module, 64 | "filename": record.filename, 65 | "filePath": record.pathname, 66 | "funcName": record.funcName, 67 | "exceptionInfo": ( 68 | self.formatException(record.exc_info) 69 | if record.exc_info 70 | else None 71 | ), 72 | "stackTrace": ( 73 | self.formatStack(record.stack_info) 74 | if record.stack_info 75 | else None 76 | ), 77 | 78 | } 79 | 80 | # Identify attributes that were passed in extras - no direct way to do this, so we'll 81 | # compare what a regular log record without any extras looks like, and identify keys 82 | # in the current log record that are not present in the plain-vanilla log record. 83 | blank_record = logging.LogRecord("", 0, "", 0, "", (), None) 84 | standard_fields = {key for key, _ in inspect.getmembers(blank_record)} 85 | for key, value in inspect.getmembers(record): 86 | if key not in standard_fields: 87 | log_dict[key] = value 88 | 89 | return json.dumps(log_dict) 90 | 91 | 92 | def configure_loggers(): 93 | with open("logger_config.yaml") as f: 94 | config = safe_load(f) 95 | 96 | logging.config.dictConfig(config) 97 | 98 | 99 | def main(): 100 | logger.info("Message 1") 101 | logger.info("Message 2: val=%s", "value") 102 | logger.error("Message 3", extra={"a": 1, "b": 2, "c": 3}) 103 | 104 | try: 105 | raise ValueError("This is a test exception") 106 | except ValueError: 107 | logger.exception("An exception occurred", stack_info=True) 108 | 109 | 110 | if __name__ == "__main__": 111 | configure_loggers() 112 | main() 113 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_11/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | 4 | formatters: 5 | json: 6 | "()": __main__.JSONFormatter 7 | 8 | handlers: 9 | console: 10 | formatter: json 11 | class: logging.StreamHandler 12 | stream: ext://sys.stdout 13 | file: 14 | formatter: json 15 | class: logging.FileHandler 16 | filename: logs/app.log 17 | 18 | loggers: 19 | root: 20 | level: DEBUG 21 | handlers: [console, file] 22 | 23 | app: 24 | level: INFO 25 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_11/main.py: -------------------------------------------------------------------------------- 1 | """Example 11 2 | 3 | In this example I just want to talk about what happens to our application when an exception occurs during logging. 4 | 5 | As we are developing our application, this is fine - we definitely want to be aware of issues with our logging code. 6 | 7 | But, when we move to production, we probably do not want an exception in our logging to affect the application itself. 8 | 9 | We can tell the logging library to suppress these exceptions, and let our app continue running uninterrupted, by 10 | setting a flag, at the library level: logging.raiseExceptions = False. Unfortunately, we this flag does not seem 11 | to be supported in the dict config, so we have to do this in code (so we'd probably want to use an ENV var in order to easily 12 | set it differently in various environments) 13 | 14 | The app will continue running in both cases, but in the latter case, we won't see the 15 | exception in the console. And if we are logging to that same console, then we are essentially "polluting" our logs. 16 | 17 | To demonstrate this, I'm going to take the custom handler code we did in the last example, and introduce a bug. 18 | 19 | We'll see how the application behaves, then we'll set that flag and see what changes. 20 | """ 21 | 22 | import json 23 | import logging 24 | import logging.config 25 | from datetime import UTC, datetime 26 | 27 | from yaml import safe_load 28 | 29 | logger = logging.getLogger("app") 30 | 31 | 32 | def serialize_local_timestamp(t: float) -> str: 33 | dt = datetime.fromtimestamp(t, UTC) 34 | return dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ") 35 | 36 | 37 | class JSONFormatter(logging.Formatter): 38 | def format(self, record: logging.LogRecord): # noqa: A003 39 | log_dict = { 40 | "time": serialize_local_timestamp(record.created), 41 | "loggerName": record.name, 42 | "levelName": record.levelname, 43 | "levelNumber": record.levelno, 44 | "message": record.getMessage(), 45 | "module": record.module, 46 | "filename": record.filename, 47 | "filePath": record.pathname, 48 | "funcName": record.funcName, 49 | "exceptionInfo": record.exc_info, 50 | } 51 | 52 | return json.dumps(log_dict) 53 | 54 | 55 | def configure_loggers(raise_exceptions: bool = True): 56 | with open("logger_config.yaml") as f: 57 | config = safe_load(f) 58 | 59 | logging.config.dictConfig(config) 60 | logging.raiseExceptions = raise_exceptions 61 | 62 | 63 | def main(): 64 | try: 65 | raise ValueError("This is a test exception") 66 | except ValueError: 67 | logger.exception("An exception occurred", stack_info=True) 68 | 69 | logger.info("App continues running after logging exception") 70 | 71 | 72 | if __name__ == "__main__": 73 | configure_loggers(raise_exceptions=False) 74 | main() 75 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/configs/log_config.py: -------------------------------------------------------------------------------- 1 | """App Configuration""" 2 | 3 | import logging 4 | import logging.config 5 | import sys 6 | 7 | from yaml import safe_load 8 | 9 | 10 | def inspect_logger_disabling(): 11 | # sourced from: https://stackoverflow.com/a/28694704 12 | @property 13 | def disabled(self): 14 | try: 15 | return self._disabled 16 | except AttributeError: 17 | return False 18 | 19 | @disabled.setter 20 | def disabled(self, disabled): 21 | if disabled: 22 | frame = sys._getframe(1) 23 | print( 24 | f"{frame.f_code.co_filename}:{frame.f_lineno} " 25 | f"disabled the {self.name} logger" 26 | ) 27 | self._disabled = disabled 28 | 29 | logging.Logger.disabled = disabled 30 | 31 | 32 | def configure_loggers(raise_exceptions: bool = True): 33 | with open("logger_config.yaml") as f: 34 | config = safe_load(f) 35 | 36 | logging.config.dictConfig(config) 37 | logging.raiseExceptions = raise_exceptions 38 | 39 | 40 | inspect_logger_disabling() 41 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/configs/logger_formatters.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | from datetime import UTC, datetime 4 | 5 | 6 | def serialize_local_timestamp(t: float) -> str | None: 7 | dt = datetime.fromtimestamp(t, UTC) 8 | return dt.strftime("%Y-%m-%dT%H:%M:%S.%fZ") 9 | 10 | 11 | class JSONFormatter(logging.Formatter): 12 | def format(self, record: logging.LogRecord): # noqa: A003 13 | log_dict = { 14 | "time": serialize_local_timestamp(record.created), 15 | "loggerName": record.name, 16 | "levelName": record.levelname, 17 | "levelNumber": record.levelno, 18 | "message": record.getMessage(), 19 | "module": record.module, 20 | "filename": record.filename, 21 | "filePath": record.pathname, 22 | "funcName": record.funcName, 23 | "exceptionInfo": record.exc_info, 24 | } 25 | 26 | return json.dumps(log_dict) 27 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/logger_config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | version: 1 3 | disable_existing_loggers: false 4 | 5 | formatters: 6 | json: 7 | "()": configs.logger_formatters.JSONFormatter 8 | simple: 9 | style: "{" 10 | format: "{asctime} - {name} - {levelname} - {message}" 11 | 12 | handlers: 13 | console: 14 | formatter: json 15 | class: logging.StreamHandler 16 | stream: ext://sys.stdout 17 | 18 | utils_handler: 19 | formatter: simple 20 | class: logging.FileHandler 21 | filename: logs/utils.log 22 | mode: w # this mode re-creates the file every time app starts 23 | 24 | loggers: 25 | root: 26 | level: DEBUG 27 | handlers: [console] 28 | 29 | my_app: 30 | level: DEBUG 31 | 32 | utils: 33 | level: DEBUG 34 | handlers: [utils_handler] 35 | propagate: false 36 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/main.py: -------------------------------------------------------------------------------- 1 | """Example 12 2 | 3 | In this example we are going to see how to use the logger hierarchy. This example will use one logger 4 | per module in some parts of our application, and also use less granular loggers for other part, 5 | leveraging the fact that loggers are singleton objects. 6 | 7 | There is one major kind of issue we need to contend with. The problem is that logging was probably 8 | not designed to use global variables for getting the logger instances. 9 | When you get a logger (getLogger(name)), it will create that logger if it does not exist. 10 | 11 | If you use a global variable in your module to get and store a ref to the logger for that module (so that you can 12 | then use it in various classes and functions in that module without having to call getLogger(name) inside every 13 | function you implement), then be aware that the logger is created immediately upon import. 14 | 15 | The problem is if this import happens **before** you configure the logging system. 16 | The way logging is implemented, when you run the configuration for your loging system, it will run through each 17 | existing logger and disable any existing loggers not defined in your config (unless you explicitly set the 18 | disable_existing_loggers setting to False in the config file - it defaults to True.) 19 | I will show you this in the code. 20 | 21 | So, if disable_existing_loggers is set to True (or not set since default is True), **and** you create loggers 22 | in your code **before** the configuration happens, those loggers will be disabled and won't work as expected. 23 | 24 | So, the moral of the story is that you need to make sure you do not run getLogger(name) before you configure 25 | the logging system, or just set disable_existing_loggers to False. 26 | 27 | Here I chose to set disable_existing_loggers to False. 28 | """ 29 | 30 | import logging 31 | from datetime import datetime 32 | 33 | from configs import log_config 34 | from services import aws, azure, gcp 35 | from utils import formatters 36 | 37 | if __name__ == "__main__": 38 | print("*** Running log configuration...") 39 | log_config.configure_loggers() 40 | 41 | # Logging to my_app logger 42 | # Create the logger here is fine, not only is it created after configuration has happened, but it is also 43 | # explicitly defined in the configs 44 | logger = logging.getLogger("my_app") 45 | logger.info("This is a test message from the main module") 46 | 47 | # Will log to services.aws, services.azure and services.gcp loggers 48 | # Those loggers are not explicitly defined in the configs 49 | aws.list_s3_bucket("aws_bucket") 50 | azure.get_sql_server_connection() 51 | gcp.upload_file("source", "target") 52 | 53 | # Will log to utils logger 54 | # That logger is explicitly defined in the configs 55 | formatters.format_date_standard(datetime.now()) 56 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/services/aws.py: -------------------------------------------------------------------------------- 1 | """AWS Helper Functions""" 2 | 3 | import logging 4 | 5 | from utils.loggers import inspect_logger 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def list_s3_bucket(bucket_name: str): 11 | inspect_logger(__name__) 12 | logger.debug("Listing bucket contents", extra={"bucketName": bucket_name}) 13 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/services/azure.py: -------------------------------------------------------------------------------- 1 | """Azure Helper Functions""" 2 | 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | def get_sql_server_connection(): 9 | logger.info("Getting SQL server connection") 10 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/services/gcp.py: -------------------------------------------------------------------------------- 1 | """GCP Helper Functions""" 2 | 3 | import logging 4 | 5 | logger = logging.getLogger(__name__) 6 | 7 | 8 | def upload_file(source, target): 9 | logger.warning( 10 | "Uploading file using deprecated function", 11 | extra={"source": source, "target": target}, 12 | ) 13 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/utils/formatters.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime 3 | 4 | from utils.loggers import inspect_logger 5 | 6 | logger = logging.getLogger("utils") 7 | inspect_logger("utils") 8 | 9 | 10 | def format_date_standard(dt: datetime) -> str: 11 | inspect_logger("utils") 12 | logger.info("Formatting datetime", extra={"datetime": dt}) 13 | return dt.strftime("%Y-%m-%d %H:%M:%S") 14 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/utils/loggers.py: -------------------------------------------------------------------------------- 1 | """Some utility functions for loggers""" 2 | 3 | import logging 4 | 5 | 6 | def inspect_logger(name: str): 7 | if name not in logging.Logger.manager.loggerDict: 8 | print(f"Logger {name} not found") 9 | else: 10 | logger = logging.getLogger(name) 11 | print("-" * len(name)) 12 | print(name) 13 | print("-" * len(name)) 14 | print("Effective level: ", logger.getEffectiveLevel()) 15 | print("Handlers:", logger.handlers) 16 | print("Disabled? ", logger.disabled) 17 | print("=" * len(name)) 18 | -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/utils/times.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fbaptiste/python-blog/f10c3b29c4d5e80a65d23bc36d2e1d51e8851856/2024/08 - August/python-logging/example_12/utils/times.py -------------------------------------------------------------------------------- /2024/08 - August/python-logging/example_12/utils/validators.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fbaptiste/python-blog/f10c3b29c4d5e80a65d23bc36d2e1d51e8851856/2024/08 - August/python-logging/example_12/utils/validators.py -------------------------------------------------------------------------------- /Idiomatic_Python/13_using_named_arguments.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "fd184b7a", 6 | "metadata": {}, 7 | "source": [ 8 | "### Idiomatic Python: Using Named Arguments" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "8dc2f59f", 14 | "metadata": {}, 15 | "source": [ 16 | "When we write a function in Python, we have the option of creating **positional** arguments as well as **keyword-only** arguments.\n", 17 | "\n", 18 | "A recent addition to Python is the ability to also define **positional-only** arguments for our functions, just like the built-ins (which are written in C) always could do, but we couldn't in user defined callables." 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "e3a919e6", 24 | "metadata": {}, 25 | "source": [ 26 | "Let's take a look at an example:" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 1, 32 | "id": "282284df", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "def my_func(a, b, c):\n", 37 | " return(f\"{a=}, {b=}, {c=}\")" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "id": "0f4bdf70", 43 | "metadata": {}, 44 | "source": [ 45 | "In this case `my_func` has three positional arguments, which means we can call the function this way:" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 2, 51 | "id": "eb48c12a", 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "'a=1, b=2, c=3'" 58 | ] 59 | }, 60 | "execution_count": 2, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "my_func(1, 2, 3)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "id": "a552f979", 72 | "metadata": {}, 73 | "source": [ 74 | "But, Python also allows us to pass these same arguments by name:" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 3, 80 | "id": "602c03c3", 81 | "metadata": {}, 82 | "outputs": [ 83 | { 84 | "data": { 85 | "text/plain": [ 86 | "'a=1, b=2, c=3'" 87 | ] 88 | }, 89 | "execution_count": 3, 90 | "metadata": {}, 91 | "output_type": "execute_result" 92 | } 93 | ], 94 | "source": [ 95 | "my_func(a=1, b=2, c=3)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "id": "bd145d96", 101 | "metadata": {}, 102 | "source": [ 103 | "Or even, just some of them by name:" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 4, 109 | "id": "45f46cd2", 110 | "metadata": {}, 111 | "outputs": [ 112 | { 113 | "data": { 114 | "text/plain": [ 115 | "'a=1, b=2, c=3'" 116 | ] 117 | }, 118 | "execution_count": 4, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "my_func(1, b=2, c=3)" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "id": "05e45e6c", 130 | "metadata": {}, 131 | "source": [ 132 | "The advantage of using named arguments is that we do not have to worry about the specific order in which the arguments have to be passed." 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 5, 138 | "id": "be94306f", 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "data": { 143 | "text/plain": [ 144 | "'a=1, b=2, c=3'" 145 | ] 146 | }, 147 | "execution_count": 5, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "my_func(b=2, c=3, a=1)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "id": "0b883357", 159 | "metadata": {}, 160 | "source": [ 161 | "Not so with positional arguments:" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 6, 167 | "id": "6942808c", 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "'a=2, b=3, c=1'" 174 | ] 175 | }, 176 | "execution_count": 6, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "my_func(2, 3, 1)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "id": "f00532a7", 188 | "metadata": {}, 189 | "source": [ 190 | "So, the issue, as we are writing code that calls functions that have many arguments, is that if we pass those arguments positionally, we have to **remember** the specific order in which the arguments need to be passed." 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "id": "2f4560ef", 196 | "metadata": {}, 197 | "source": [ 198 | "This can easily lead to decreased productivity (keep having to go back to your function definition, or docs), and easily introducing bugs in your app." 199 | ] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "id": "93664cac", 204 | "metadata": {}, 205 | "source": [ 206 | "One of my favorite examples is if a function requires both a longitude and a latitude:" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 7, 212 | "id": "c3adadad", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "def position(longitude, latitude):\n", 217 | " return f\"{longitude=}, {latitude=}\"" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "id": "6a845449", 223 | "metadata": {}, 224 | "source": [ 225 | "Now when I call `position`, I have to everytime think \"does longitude come before latitude?\".\n", 226 | "\n", 227 | "And if I get it wrong, I have mistakenly swapped the longitude and the latitude." 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "id": "ed99b37b", 233 | "metadata": {}, 234 | "source": [ 235 | "So, even though the function itself does not enforce passing the arguments by name, as a developer I can still choose to do so:" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 8, 241 | "id": "837cdaef", 242 | "metadata": {}, 243 | "outputs": [ 244 | { 245 | "data": { 246 | "text/plain": [ 247 | "'longitude=-3, latitude=10'" 248 | ] 249 | }, 250 | "execution_count": 8, 251 | "metadata": {}, 252 | "output_type": "execute_result" 253 | } 254 | ], 255 | "source": [ 256 | "position(latitude=10, longitude=-3)" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "id": "5a95df55", 262 | "metadata": {}, 263 | "source": [ 264 | "When you are calling a function that has multiple positional arguments, the better, more pythonic way, is to call that function with named arguments." 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "id": "f6a0b4b6", 270 | "metadata": {}, 271 | "source": [ 272 | "In fact, I will often write my own functions this way:" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 9, 278 | "id": "166f5817", 279 | "metadata": {}, 280 | "outputs": [], 281 | "source": [ 282 | "def position(*, longitude, latitude):\n", 283 | " return f\"{longitude=}, {latitude=}\"" 284 | ] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "id": "22218fb3", 289 | "metadata": {}, 290 | "source": [ 291 | "I often get asked \"what does that do?\" by people reviewing my code. \n", 292 | "\n", 293 | "They usually just don't understand what that `*,` syntax does - it makes the parameters defined after it **keyword-only** arguments. This way, I can **force** someone who calls my `position` function to used named arguments." 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "id": "04e7c497", 299 | "metadata": {}, 300 | "source": [ 301 | "Why do I do this?\n", 302 | "\n", 303 | "I do this to help other developers using my function decrease the odds of introducing a bug. Not every developer is going to use named arguments in these cases, so I force them to. Out of kindness and concern for their well-being (and also so I don't have to sit three months later trying to debug their code when something breaks in production in the middle of the night 😉) " 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "id": "eb6bbb0b", 309 | "metadata": {}, 310 | "source": [ 311 | "> **Bottom line**: when calling a function that has multiple arguments, and the order of these arguments is meaningful, call the function using named arguments if the functions allows it (even if the function does not force you to do so).\n", 312 | ">\n", 313 | ">It makes the code far more expressive, and decreases the chances for introducing bugs. It takes just a few extra keystrokes, and does not impact performance in any significant way.\n", 314 | ">\n", 315 | ">You'll be a better programmer for it!" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "id": "88bb8663", 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [] 325 | } 326 | ], 327 | "metadata": { 328 | "kernelspec": { 329 | "display_name": "Python 3 (ipykernel)", 330 | "language": "python", 331 | "name": "python3" 332 | }, 333 | "language_info": { 334 | "codemirror_mode": { 335 | "name": "ipython", 336 | "version": 3 337 | }, 338 | "file_extension": ".py", 339 | "mimetype": "text/x-python", 340 | "name": "python", 341 | "nbconvert_exporter": "python", 342 | "pygments_lexer": "ipython3", 343 | "version": "3.11.1" 344 | } 345 | }, 346 | "nbformat": 4, 347 | "nbformat_minor": 5 348 | } 349 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/original/swapi.py: -------------------------------------------------------------------------------- 1 | """SW API Demo 2 | 3 | A perfect example of how NOT to write code. 4 | """ 5 | import requests 6 | from requests.exceptions import Timeout, RequestException 7 | 8 | 9 | def starship_films(min_capacity): 10 | # query API, with retry if timeout happens, up to some max number of times 11 | attempt_number = 1 12 | max_attempts = 5 13 | initial_timeout = 1 14 | current_timeout = initial_timeout 15 | ships = [] # holds collection of all ships retried from paging API 16 | request_url = "https://swapi.dev/api/starships" # default to first page 17 | while True: 18 | print(f"\trequesting: {request_url}") 19 | # max number of retries exceeded 20 | if attempt_number > max_attempts: 21 | print("Exceeded max number of attempts") 22 | return 23 | 24 | # query API (with specific page) 25 | try: 26 | response = requests.get( 27 | request_url, 28 | headers={"Content-Type": "application/json"}, 29 | timeout=current_timeout 30 | ) 31 | response.raise_for_status() 32 | except Timeout: 33 | print("Request time out. Trying again with a longer timeout.") 34 | current_timeout *= 2 35 | attempt_number += 1 36 | continue 37 | except RequestException as ex: 38 | print(f"API query failed - aborting: {ex}") 39 | return 40 | 41 | # get JSON from response, and add ships from results to main ships list 42 | data = response.json() 43 | ships.extend(data.get("results", [])) 44 | 45 | # is there a next page? 46 | if data.get('next'): 47 | request_url = data['next'] 48 | # reset retry count and timeout (we start fresh for each page) 49 | current_timeout = initial_timeout 50 | attempt_number = 1 51 | else: 52 | # done getting all results 53 | break 54 | 55 | # find all ships with a minimum cargo capacity 56 | film_urls = set() 57 | for ship in ships: 58 | try: 59 | cargo_capacity = int(ship["cargo_capacity"]) 60 | except (KeyError, ValueError): 61 | # could not get a numeric cargo capacity, skip this ship 62 | continue 63 | if cargo_capacity >= min_capacity: 64 | if ship.get("films"): 65 | film_urls.update(ship["films"]) 66 | 67 | # Collect film titles for each film identified (paging not needed, but want timeout retries) 68 | film_titles = set() 69 | for film_url in film_urls: 70 | attempt_number = 1 71 | initial_timeout = 2 72 | current_timeout = initial_timeout 73 | try: 74 | while True: 75 | print(f"\trequesting: {film_url}") 76 | # max number of retries exceeded 77 | if attempt_number > max_attempts: 78 | print("Exceeded max number of attempts") 79 | raise StopIteration # we don't want to abort call, we'll just skip this film 80 | 81 | # query API 82 | try: 83 | response = requests.get( 84 | film_url, 85 | headers={"Content-Type": "application/json"}, 86 | timeout=current_timeout 87 | ) 88 | response.raise_for_status() 89 | except Timeout: 90 | print("Request time out. Trying again with a longer timeout.") 91 | current_timeout *= 2 92 | attempt_number += 1 93 | continue 94 | except RequestException as ex: 95 | print(f"API query failed - aborting: {ex}") 96 | return 97 | 98 | data = response.json() 99 | title = data.get("title") 100 | if title: 101 | film_titles.add(title) 102 | break 103 | except StopIteration: 104 | # inner loop wants to just move on to next film 105 | continue 106 | 107 | # return all film names as a sorted list 108 | return sorted(film_titles) 109 | 110 | 111 | if __name__ == '__main__': 112 | films = starship_films(1_000) 113 | print("=" * 50) 114 | for film in films: 115 | print(film) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_1/main.py: -------------------------------------------------------------------------------- 1 | """SWAPI Demo 2 | 3 | Note: Technically SWAPI provides an SDK, but here my goal is to show 4 | how to use decomposition to structure our code in a more readable, manageable, 5 | and testable way. 6 | """ 7 | 8 | from services.swapi import api as swapi 9 | 10 | def main(min_capacity: int): 11 | # get all ships 12 | ships = swapi.all_starships() 13 | 14 | # filter the ones we want, and just store the film urls 15 | film_urls = set() 16 | for ship in ships: 17 | try: 18 | cargo_capacity = int(ship["cargo_capacity"]) 19 | except (KeyError, ValueError): 20 | # could not get a numeric cargo capacity, skip this ship 21 | continue 22 | if cargo_capacity >= min_capacity: 23 | if ship.get("films"): 24 | film_urls.update(ship["films"]) 25 | 26 | # retrieve and return all the film titles 27 | return swapi.film_titles(film_urls) 28 | 29 | 30 | if __name__ == '__main__': 31 | titles = main(1_000) 32 | print("=" * 50) 33 | for title in titles: 34 | print(title) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_1/services/swapi/api.py: -------------------------------------------------------------------------------- 1 | """SWAPI Service 2 | 3 | Used for querying the SWAPI API. 4 | """ 5 | import requests 6 | from requests.exceptions import Timeout, RequestException 7 | 8 | 9 | BASE_URL = "https://swapi.dev/api" 10 | MAX_ATTEMPTS = 5 11 | INITIAL_TIMEOUT = 1 # seconds 12 | 13 | 14 | def all_starships(): 15 | """ 16 | Retrieves a list of all starships from SWAPI. 17 | :return: 18 | """ 19 | attempt_number = 1 20 | current_timeout = INITIAL_TIMEOUT 21 | ships = [] 22 | request_url = f"{BASE_URL}/starships" 23 | 24 | while True: 25 | print(f"\trequesting: {request_url}") 26 | # max number of retries exceeded 27 | if attempt_number > MAX_ATTEMPTS: 28 | print("Exceeded max number of attempts") 29 | return 30 | 31 | # query API (with specific page) 32 | try: 33 | response = requests.get( 34 | request_url, 35 | headers={"Content-Type": "application/json"}, 36 | timeout=current_timeout 37 | ) 38 | response.raise_for_status() 39 | except Timeout: 40 | print("Request time out. Trying again with a longer timeout.") 41 | current_timeout *= 2 42 | attempt_number += 1 43 | continue 44 | except RequestException as ex: 45 | print(f"API query failed - aborting: {ex}") 46 | return 47 | 48 | # get JSON from response, and add ships from results to main ships list 49 | data = response.json() 50 | ships.extend(data.get("results", [])) 51 | 52 | # is there a next page? 53 | if data.get('next'): 54 | request_url = data['next'] 55 | # reset retry count and timeout (we start fresh for each page) 56 | current_timeout = INITIAL_TIMEOUT 57 | attempt_number = 1 58 | else: 59 | # done getting all results 60 | break 61 | 62 | return ships 63 | 64 | 65 | def film_titles(film_urls): 66 | results = set() 67 | for film_url in film_urls: 68 | attempt_number = 1 69 | current_timeout = INITIAL_TIMEOUT 70 | try: 71 | while True: 72 | print(f"\trequesting: {film_url}") 73 | # max number of retries exceeded 74 | if attempt_number > MAX_ATTEMPTS: 75 | print("Exceeded max number of attempts") 76 | raise StopIteration # we don't want to abort call, we'll just skip this film 77 | 78 | # query API 79 | try: 80 | response = requests.get( 81 | film_url, 82 | headers={"Content-Type": "application/json"}, 83 | timeout=current_timeout 84 | ) 85 | response.raise_for_status() 86 | except Timeout: 87 | print("Request time out. Trying again with a longer timeout.") 88 | current_timeout *= 2 89 | attempt_number += 1 90 | continue 91 | except RequestException as ex: 92 | print(f"API query failed - aborting: {ex}") 93 | return 94 | 95 | data = response.json() 96 | title = data.get("title") 97 | if title: 98 | results.add(title) 99 | break 100 | except StopIteration: 101 | # inner loop wants to just move on to next film 102 | continue 103 | 104 | # return all film names as a sorted list 105 | return sorted(results) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_2/main.py: -------------------------------------------------------------------------------- 1 | """App to demo SWAPI Usage 2 | 3 | Note: Technically SWAPI provides an SDK, but here my goal is to show 4 | how to use decomposition to structure our code in a more readable, manageable, 5 | and testable way. 6 | """ 7 | from services.swapi import api as swapi 8 | from utils import films_for_min_capacity_ships 9 | 10 | 11 | def main(min_capacity: int): 12 | ships = swapi.all_starships() 13 | film_urls = films_for_min_capacity_ships(ships, min_capacity) 14 | return swapi.film_titles(list(film_urls)) 15 | 16 | 17 | if __name__ == '__main__': 18 | titles = main(1_000) 19 | print("=" * 50) 20 | for title in titles: 21 | print(title) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_2/services/swapi/api.py: -------------------------------------------------------------------------------- 1 | """SWAPI Service 2 | 3 | Used for querying the SWAPI API. 4 | """ 5 | import requests 6 | from requests.exceptions import Timeout, RequestException 7 | 8 | 9 | BASE_URL = "https://swapi.dev/api" 10 | MAX_ATTEMPTS = 5 11 | INITIAL_TIMEOUT = 2 # seconds 12 | 13 | 14 | def all_starships(): 15 | """ 16 | Retrieves a list of all starships from SWAPI. 17 | :return: 18 | """ 19 | attempt_number = 1 20 | current_timeout = INITIAL_TIMEOUT 21 | ships = [] 22 | request_url = f"{BASE_URL}/starships" 23 | 24 | while True: 25 | print(f"\trequesting: {request_url}") 26 | # max number of retries exceeded 27 | if attempt_number > MAX_ATTEMPTS: 28 | print("Exceeded max number of attempts") 29 | return 30 | 31 | # query API (with specific page) 32 | try: 33 | response = requests.get( 34 | request_url, 35 | headers={"Content-Type": "application/json"}, 36 | timeout=current_timeout 37 | ) 38 | response.raise_for_status() 39 | except Timeout: 40 | print("Request time out. Trying again with a longer timeout.") 41 | current_timeout *= 2 42 | attempt_number += 1 43 | continue 44 | except RequestException as ex: 45 | print(f"API query failed - aborting: {ex}") 46 | return 47 | 48 | # get JSON from response, and add ships from results to main ships list 49 | data = response.json() 50 | ships.extend(data.get("results", [])) 51 | 52 | # is there a next page? 53 | if data.get('next'): 54 | request_url = data['next'] 55 | # reset retry count and timeout (we start fresh for each page) 56 | current_timeout = INITIAL_TIMEOUT 57 | attempt_number = 1 58 | else: 59 | # done getting all results 60 | break 61 | 62 | return ships 63 | 64 | 65 | def film_titles(film_urls): 66 | results = set() 67 | for film_url in film_urls: 68 | attempt_number = 1 69 | current_timeout = INITIAL_TIMEOUT 70 | try: 71 | while True: 72 | print(f"\trequesting: {film_url}") 73 | # max number of retries exceeded 74 | if attempt_number > MAX_ATTEMPTS: 75 | print("Exceeded max number of attempts") 76 | raise StopIteration # we don't want to abort call, we'll just skip this film 77 | 78 | # query API 79 | try: 80 | response = requests.get( 81 | film_url, 82 | headers={"Content-Type": "application/json"}, 83 | timeout=current_timeout 84 | ) 85 | response.raise_for_status() 86 | except Timeout: 87 | print("Request time out. Trying again with a longer timeout.") 88 | current_timeout *= 2 89 | attempt_number += 1 90 | continue 91 | except RequestException as ex: 92 | print(f"API query failed - aborting: {ex}") 93 | return 94 | 95 | data = response.json() 96 | title = data.get("title") 97 | if title: 98 | results.add(title) 99 | break 100 | except StopIteration: 101 | # inner loop wants to just move on to next film 102 | continue 103 | 104 | # return all film names as a sorted list 105 | return sorted(results) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_2/utils.py: -------------------------------------------------------------------------------- 1 | """Various utility functions used in SWAPI app""" 2 | 3 | def films_for_min_capacity_ships(ships, min_capacity): 4 | """ 5 | Determine films in which ships of a minimum capacity appear. 6 | 7 | :param ships: list of ship objects 8 | :param min_capacity: the minimum capacity (inclusive) of each ship 9 | :return: a list of film URLs 10 | """ 11 | film_urls = set() 12 | for ship in ships: 13 | try: 14 | cargo_capacity = int(ship["cargo_capacity"]) 15 | except (KeyError, ValueError): 16 | # could not get a numeric cargo capacity, skip this ship 17 | continue 18 | if cargo_capacity >= min_capacity: 19 | if ship.get("films"): 20 | film_urls.update(ship["films"]) 21 | 22 | return list(film_urls) 23 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_3/main.py: -------------------------------------------------------------------------------- 1 | """App to demo SWAPI Usage 2 | 3 | Note: Technically SWAPI provides an SDK, but here my goal is to show 4 | how to use decomposition to structure our code in a more readable, manageable, 5 | and testable way. 6 | """ 7 | from services.swapi import api as swapi 8 | from utils import extract_film_urls, filter_ships_by_capacity 9 | 10 | 11 | def main(min_capacity): 12 | ships = swapi.all_starships() 13 | filtered_ships = filter_ships_by_capacity(ships, min_capacity) 14 | film_urls = set(extract_film_urls(filtered_ships)) 15 | return swapi.film_titles(film_urls) 16 | 17 | 18 | if __name__ == '__main__': 19 | titles = main(1_000) 20 | print("=" * 50) 21 | for title in titles: 22 | print(title) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_3/services/swapi/api.py: -------------------------------------------------------------------------------- 1 | """SWAPI Service 2 | 3 | Used for querying the SWAPI API. 4 | """ 5 | import requests 6 | from requests.exceptions import Timeout, RequestException 7 | 8 | 9 | BASE_URL = "https://swapi.dev/api" 10 | MAX_ATTEMPTS = 5 11 | INITIAL_TIMEOUT = 2 # seconds 12 | 13 | 14 | def all_starships(): 15 | """ 16 | Retrieves a list of all starships from SWAPI. 17 | :return: 18 | """ 19 | attempt_number = 1 20 | current_timeout = INITIAL_TIMEOUT 21 | ships = [] 22 | request_url = f"{BASE_URL}/starships" 23 | 24 | while True: 25 | print(f"\trequesting: {request_url}") 26 | # max number of retries exceeded 27 | if attempt_number > MAX_ATTEMPTS: 28 | print("Exceed max number of retries") 29 | return 30 | 31 | # query API (with specific page) 32 | try: 33 | response = requests.get( 34 | request_url, 35 | headers={"Content-Type": "application/json"}, 36 | timeout=current_timeout 37 | ) 38 | response.raise_for_status() 39 | except Timeout: 40 | print("Request time out. Trying again with a longer timeout.") 41 | current_timeout *= 2 42 | attempt_number += 1 43 | continue 44 | except RequestException as ex: 45 | print(f"API query failed - aborting: {ex}") 46 | return 47 | 48 | # get JSON from response, and add ships from results to main ships list 49 | data = response.json() 50 | ships.extend(data.get("results", [])) 51 | 52 | # is there a next page? 53 | if data.get('next'): 54 | request_url = data['next'] 55 | # reset retry count and timeout (we start fresh for each page) 56 | current_timeout = INITIAL_TIMEOUT 57 | attempt_number = 1 58 | else: 59 | # done getting all results 60 | break 61 | 62 | return ships 63 | 64 | 65 | def film_titles(film_urls): 66 | results = set() 67 | for film_url in film_urls: 68 | try_number = 1 69 | current_timeout = INITIAL_TIMEOUT 70 | try: 71 | while True: 72 | print(f"\trequesting: {film_url}") 73 | # max number of retries exceeded 74 | if try_number > MAX_ATTEMPTS: 75 | print("Exceed max number of retries") 76 | raise StopIteration # we don't want to abort call, we'll just skip this film 77 | 78 | # query API 79 | try: 80 | response = requests.get( 81 | film_url, 82 | headers={"Content-Type": "application/json"}, 83 | timeout=current_timeout 84 | ) 85 | response.raise_for_status() 86 | except Timeout: 87 | print("Request time out. Trying again with a longer timeout.") 88 | current_timeout *= 2 89 | try_number += 1 90 | continue 91 | except RequestException as ex: 92 | print(f"API query failed - aborting: {ex}") 93 | return 94 | 95 | data = response.json() 96 | title = data.get("title") 97 | if title: 98 | results.add(title) 99 | break 100 | except StopIteration: 101 | # inner loop wants to just move on to next film 102 | continue 103 | 104 | # return all film names as a sorted list 105 | return sorted(results) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_3/utils.py: -------------------------------------------------------------------------------- 1 | """Various utility functions used in SWAPI app""" 2 | 3 | def filter_ships_by_capacity(ships, min_capacity): 4 | """ 5 | Filters a list of ships based on some minimum capacity 6 | 7 | :param ships: list of ship objects 8 | :param min_capacity: the minimum capacity (inclusive) of each ship 9 | :return: a generator of ships 10 | """ 11 | for ship in ships: 12 | try: 13 | cargo_capacity = int(ship["cargo_capacity"]) 14 | except (KeyError, ValueError): 15 | # could not get a numeric cargo capacity, skip this ship 16 | continue 17 | if cargo_capacity >= min_capacity: 18 | yield ship 19 | 20 | 21 | def extract_film_urls(ships): 22 | """ 23 | For some iterable of ships, extracts the film URLs 24 | 25 | :param ships: list of ship objects 26 | :return: a generator (of potentially non-unique) film URLs 27 | """ 28 | for ship in ships: 29 | if ship.get("films"): 30 | yield from ship["films"] 31 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_4/main.py: -------------------------------------------------------------------------------- 1 | """App to demo SWAPI Usage 2 | 3 | Note: Technically SWAPI provides an SDK, but here my goal is to show 4 | how to use decomposition to structure our code in a more readable, manageable, 5 | and testable way. 6 | """ 7 | from services.swapi import api as swapi 8 | from utils import extract_film_urls, filter_ships_by_capacity 9 | 10 | 11 | def main(min_capacity): 12 | ships = swapi.all_starships() 13 | filtered_ships = filter_ships_by_capacity(ships, min_capacity) 14 | film_urls = set(extract_film_urls(filtered_ships)) 15 | return swapi.film_titles(film_urls) 16 | 17 | 18 | if __name__ == '__main__': 19 | titles = main(1_000) 20 | print("=" * 50) 21 | for title in titles: 22 | print(title) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_4/services/swapi/api.py: -------------------------------------------------------------------------------- 1 | """SWAPI Service 2 | 3 | Used for querying the SWAPI API. 4 | """ 5 | import requests 6 | from requests.exceptions import Timeout, RequestException 7 | 8 | from services.swapi.paging import paged 9 | 10 | 11 | BASE_URL = "https://swapi.dev/api" 12 | MAX_ATTEMPTS = 5 13 | INITIAL_TIMEOUT = 2 # seconds 14 | 15 | 16 | def starships(request_url): 17 | """ 18 | Returns a single page's worth of starships 19 | 20 | :param request_url: the url to query 21 | :return: list of starships, and the URL for the next page (if any) 22 | """ 23 | print(f"\trunning query: {request_url}") 24 | response = requests.get( 25 | request_url, 26 | headers={"Content-Type": "application/json"} 27 | ) 28 | response.raise_for_status() 29 | 30 | data = response.json() 31 | results = data.get("results", []) 32 | next_page = data.get("next", None) 33 | 34 | return results, next_page 35 | 36 | 37 | def all_starships(): 38 | """ 39 | Retrieves a list of all starships from SWAPI. 40 | :return: a generator of all starships 41 | """ 42 | yield from paged(starships, f"{BASE_URL}/starships") 43 | 44 | 45 | 46 | def film_titles(film_urls): 47 | results = set() 48 | for film_url in film_urls: 49 | attempt_number = 1 50 | current_timeout = INITIAL_TIMEOUT 51 | try: 52 | while True: 53 | print(f"\trequesting: {film_url}") 54 | # max number of retries exceeded 55 | if attempt_number > MAX_ATTEMPTS: 56 | print("Exceed max number of attempts") 57 | raise StopIteration # we don't want to abort call, we'll just skip this film 58 | 59 | # query API 60 | try: 61 | response = requests.get( 62 | film_url, 63 | headers={"Content-Type": "application/json"}, 64 | timeout=current_timeout 65 | ) 66 | response.raise_for_status() 67 | except Timeout: 68 | print("Request time out. Trying again with a longer timeout.") 69 | current_timeout *= 2 70 | attempt_number += 1 71 | continue 72 | except RequestException as ex: 73 | print(f"API query failed - aborting: {ex}") 74 | return 75 | 76 | data = response.json() 77 | title = data.get("title") 78 | if title: 79 | results.add(title) 80 | break 81 | except StopIteration: 82 | # inner loop wants to just move on to next film 83 | continue 84 | 85 | # return all film names as a sorted list 86 | return sorted(results) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_4/services/swapi/paging.py: -------------------------------------------------------------------------------- 1 | """Utilities to implement api paging""" 2 | def paged(single_page_func, initial_request_url): 3 | next_request_url = initial_request_url 4 | while next_request_url: 5 | results, next_request_url = single_page_func(next_request_url) 6 | yield from results 7 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_4/utils.py: -------------------------------------------------------------------------------- 1 | """Various utility functions used in SWAPI app""" 2 | 3 | def filter_ships_by_capacity(ships, min_capacity): 4 | """ 5 | Filters a list of ships based on some minimum capacity 6 | 7 | :param ships: list of ship objects 8 | :param min_capacity: the minimum capacity (inclusive) of each ship 9 | :return: a generator of ships 10 | """ 11 | for ship in ships: 12 | try: 13 | cargo_capacity = int(ship["cargo_capacity"]) 14 | except (KeyError, ValueError): 15 | # could not get a numeric cargo capacity, skip this ship 16 | continue 17 | if cargo_capacity >= min_capacity: 18 | yield ship 19 | 20 | 21 | def extract_film_urls(ships): 22 | """ 23 | For some iterable of ships, extracts the film URLs 24 | 25 | :param ships: list of ship objects 26 | :return: a generator (of potentially non-unique) film URLs 27 | """ 28 | for ship in ships: 29 | if ship.get("films"): 30 | yield from ship["films"] 31 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_5/main.py: -------------------------------------------------------------------------------- 1 | """App to demo SWAPI Usage""" 2 | from services.swapi import api as swapi 3 | from utils import extract_film_urls, filter_ships_by_capacity 4 | 5 | 6 | def main(min_capacity): 7 | ships = swapi.all_starships() 8 | filtered_ships = filter_ships_by_capacity(ships, min_capacity) 9 | film_urls = set(extract_film_urls(filtered_ships)) 10 | return sorted(swapi.film_titles(film_urls)) 11 | 12 | 13 | if __name__ == '__main__': 14 | titles = main(1_000) 15 | print("=" * 50) 16 | for title in titles: 17 | print(title) -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_5/services/swapi/api.py: -------------------------------------------------------------------------------- 1 | """SWAPI Service 2 | 3 | Used for querying the SWAPI API. 4 | """ 5 | import requests 6 | from requests.exceptions import Timeout, RequestException 7 | 8 | from services.swapi.paging import paged 9 | from services.swapi.retries import timeout_retry 10 | 11 | 12 | BASE_URL = "https://swapi.dev/api" 13 | MAX_ATTEMPTS = 5 14 | INITIAL_TIMEOUT = 1 # seconds 15 | 16 | 17 | 18 | def starships(request_url, *, timeout=None): 19 | """ 20 | Returns a single page's worth of starships 21 | 22 | :param request_url: the url to query 23 | :param timeout: timeout for request query 24 | :return: list of starships, and the URL for the next page (if any) 25 | """ 26 | print(f"running query: {request_url}") 27 | 28 | retry_get = timeout_retry(max_attempts=MAX_ATTEMPTS, initial_timeout=INITIAL_TIMEOUT)(requests.get) 29 | response = retry_get( 30 | request_url, 31 | headers={"Content-Type": "application/json"} 32 | ) 33 | response.raise_for_status() 34 | 35 | data = response.json() 36 | results = data.get("results", []) 37 | next_page = data.get("next", None) 38 | 39 | return results, next_page 40 | 41 | 42 | def all_starships(): 43 | """ 44 | Retrieves a list of all starships from SWAPI. 45 | :return: a generator of all starships 46 | """ 47 | yield from paged(starships, f"{BASE_URL}/starships") 48 | 49 | 50 | def film_title(film_url): 51 | """ 52 | Gets title for a specific film 53 | :param film_url: url to query 54 | :return: a string title (or None if title is missing) 55 | """ 56 | print(f"running query: {film_url}") 57 | 58 | retry_get = timeout_retry(max_attempts=MAX_ATTEMPTS, initial_timeout=INITIAL_TIMEOUT)(requests.get) 59 | response = retry_get( 60 | film_url, 61 | headers={"Content-Type": "application/json"} 62 | ) 63 | response.raise_for_status() 64 | data = response.json() 65 | return data.get("title") 66 | 67 | 68 | def film_titles(film_urls): 69 | """ 70 | Gets film titles for each film url passed to function 71 | :param film_urls: a list of film urls 72 | :return: a generator of titles 73 | """ 74 | for film_url in film_urls: 75 | yield film_title(film_url) 76 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_5/services/swapi/paging.py: -------------------------------------------------------------------------------- 1 | """Utilities to implement api paging""" 2 | def paged(single_page_func, initial_request_url): 3 | """ 4 | Used to paginate API requests. 5 | 6 | :param single_page_func: a function that takes a single argument for the url to query 7 | :param initial_request_url: the initial starting page - this paging function will 8 | provide url for subsequent pages 9 | :return: a generator of results from all requested pages 10 | """ 11 | next_request_url = initial_request_url 12 | while next_request_url: 13 | results, next_request_url = single_page_func(next_request_url) 14 | yield from results 15 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_5/services/swapi/retries.py: -------------------------------------------------------------------------------- 1 | """Utilities for a timeout retry decorator""" 2 | from requests.exceptions import Timeout, RequestException 3 | 4 | 5 | def timeout_retry(max_attempts, initial_timeout): 6 | """ 7 | Decorator factory that abstracts out timeout retry logic. 8 | 9 | :param max_attempts: maximum number of attempts 10 | :param initial_timeout: the initial timeout, gets doubled if query times out 11 | :return: decorator function 12 | """ 13 | attempt_number = 1 14 | current_timeout = initial_timeout 15 | 16 | def decorator(fn): 17 | def inner(*args, **kwargs): 18 | nonlocal attempt_number 19 | nonlocal current_timeout 20 | 21 | while True: 22 | try: 23 | print(f"\tattempt #{attempt_number}") 24 | result = fn(*args, **kwargs, timeout=current_timeout) 25 | 26 | return result 27 | except Timeout: 28 | if attempt_number >= max_attempts: 29 | # reached max number of attempts 30 | raise RequestException("Max attempts exceeded") 31 | attempt_number += 1 32 | current_timeout *= 2 33 | print(f"\t\tRequest timed out. Trying again with a longer timeout ({current_timeout} s).") 34 | 35 | return inner 36 | return decorator 37 | 38 | 39 | -------------------------------------------------------------------------------- /Idiomatic_Python/14_decomposition/refactor_5/utils.py: -------------------------------------------------------------------------------- 1 | """Various utility functions used in SWAPI app""" 2 | 3 | def filter_ships_by_capacity(ships, min_capacity): 4 | """ 5 | Filters a list of ships based on some minimum capacity 6 | 7 | :param ships: list of ship objects 8 | :param min_capacity: the minimum capacity (inclusive) of each ship 9 | :return: a generator of ships 10 | """ 11 | for ship in ships: 12 | try: 13 | cargo_capacity = int(ship["cargo_capacity"]) 14 | except (KeyError, ValueError): 15 | # could not get a numeric cargo capacity, skip this ship 16 | continue 17 | if cargo_capacity >= min_capacity: 18 | yield ship 19 | 20 | 21 | def extract_film_urls(ships): 22 | """ 23 | For some iterable of ships, extracts the film URLs 24 | 25 | :param ships: list of ship objects 26 | :return: a generator (of potentially non-unique) film URLs 27 | """ 28 | for ship in ships: 29 | if ship.get("films"): 30 | yield from ship["films"] 31 | -------------------------------------------------------------------------------- /Pipfile: -------------------------------------------------------------------------------- 1 | [[source]] 2 | url = "https://pypi.org/simple" 3 | verify_ssl = true 4 | name = "pypi" 5 | 6 | [packages] 7 | notebook = "*" 8 | pandas = "*" 9 | tabulate = "==0.8.9" 10 | numpy = "*" 11 | pyperclip = "*" 12 | jupyterthemes = "*" 13 | pydantic = "*" 14 | python-dateutil = "*" 15 | humanize = "*" 16 | faker = "*" 17 | faker-airtravel = "*" 18 | jupytext = "*" 19 | matplotlib = "*" 20 | redis = "*" 21 | requests = "*" 22 | pyhumps = "*" 23 | arrow = "*" 24 | wrapt = "*" 25 | pyyaml = "*" 26 | charset-normalizer = "*" 27 | pyjwt = "*" 28 | multidict = "*" 29 | jupyterlab = "*" 30 | python-benedict = {extras = ["all"], version = "*"} 31 | tenacity = "*" 32 | psycopg = {extras = ["binary"], version = "*"} 33 | yoyo-migrations = "*" 34 | icecream = "*" 35 | executing = ">=2.0.0" 36 | tdqm = "*" 37 | ipywidgets = "*" 38 | 39 | [dev-packages] 40 | black = "*" 41 | isort = "*" 42 | 43 | [requires] 44 | python_version = "3.12" 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Blog 2 | 3 | These are resources that correspond to my Python blog videos located [here](https://www.youtube.com/channel/UCOsGw17tMhM4-GBjvQnXGzQ). 4 | 5 | Some of my other Python related repos you might be interested in: 6 | 7 | - [Python Fundamentals](https://github.com/fbaptiste/python-fundamentals) - code repo for my Python Fundamentals course 8 | - [Python 3 Deep Dive](https://github.com/fbaptiste/python-deepdive) - code repo for my Python Deep Dive courses 9 | - [Pydantic V2: Essentials](https://github.com/fbaptiste/pydantic-essentials) - code repo for my Pydantic V2 course 10 | - [Python Primer](https://github.com/fbaptiste/python-primer) - quick intro to Python for developers with experience in another language (such as Java) 11 | 12 | My online courses (all on Udemy): 13 | 14 | - [Python Fundamentals](https://www.udemy.com/course/python3-fundamentals/?referralCode=DA09C6F40CEC38C942F6) 15 | - [Pydantic V2: Essentials](https://www.udemy.com/course/pydantic/?referralCode=581AD0DC27E0E1EDB538) 16 | - [Python 3 Deep Dive (Part 1)](https://www.udemy.com/course/python-3-deep-dive-part-1/?referralCode=E46B931C71EE01845062) 17 | - [Python 3 Deep Dive (Part 2)](https://www.udemy.com/course/python-3-deep-dive-part-2/?referralCode=3E7AFEF5174F04E5C8D4) 18 | - [Python 3 Deep Dive (Part 3)](https://www.udemy.com/course/python-3-deep-dive-part-3/?referralCode=C5B0D9AB965B9BF4C49F) 19 | - [Python 3 Deep Dive (Part 4)](https://www.udemy.com/course/python-3-deep-dive-part-4/?referralCode=3BB758BE4C04FB983E6F) 20 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 100 3 | 4 | [tool.isort] 5 | profile = "black" --------------------------------------------------------------------------------