├── .github ├── dependabot.yml └── workflows │ └── build.yml ├── .gitignore ├── .pre-commit-config.yaml ├── README.md ├── _config.yml ├── _toc.yml ├── environment.yml ├── images ├── LevelUpYourPython.png └── LevelUpYourPython.svg ├── notebooks ├── 0 Intro.ipynb ├── 1.1 Intro to Classes.ipynb ├── 1.2 Inspection.ipynb ├── 1.3 Logging.ipynb ├── 1.4 Debugging.ipynb ├── 1.5 Profiling.ipynb ├── 2.1 Errors.ipynb ├── 2.2 Generators.ipynb ├── 2.3 Decorators.ipynb ├── 2.4 Context Managers.ipynb ├── 2.5 Static Typing.ipynb ├── 2.6 Using Packages.ipynb ├── 2.7 Creating Packages.ipynb ├── 2.8 Pattern Matching.ipynb ├── 3.1 pytest.ipynb ├── 3.2 NumPy.ipynb ├── 3.3 Pandas.ipynb ├── 3.4 Numba.ipynb ├── 3.5 pybind11.ipynb ├── 3.6 Code Quality and CI.ipynb ├── 4.1 Memory Model.ipynb ├── 4.2 Classes.ipynb └── save_and_run.py ├── noxfile.py └── postBuild /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # Maintain dependencies for GitHub Actions 4 | - package-ecosystem: "github-actions" 5 | directory: "/" 6 | schedule: 7 | interval: "weekly" 8 | groups: 9 | actions: 10 | patterns: 11 | - "*" 12 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: deploy-book 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | 8 | permissions: 9 | contents: read 10 | pages: write 11 | id-token: write 12 | 13 | concurrency: 14 | group: "pages" 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | build-book: 19 | runs-on: ubuntu-latest 20 | defaults: 21 | run: 22 | shell: bash -l {0} 23 | steps: 24 | - uses: actions/checkout@v3 25 | 26 | - uses: conda-incubator/setup-miniconda@v2 27 | with: 28 | environment-file: environment.yml 29 | activate-environment: level-up-your-python 30 | miniforge-variant: Mambaforge 31 | use-mamba: true 32 | 33 | - name: Install the kernel 34 | run: | 35 | python -m ipykernel install --user --name conda-env-level-up-your-python-py 36 | 37 | - name: Build the book 38 | run: | 39 | jupyter-book build . 40 | 41 | - uses: actions/upload-artifact@v3 42 | with: 43 | name: jupyterbook 44 | path: _build/html/* 45 | 46 | 47 | build-pyodide: 48 | runs-on: ubuntu-latest 49 | steps: 50 | - uses: actions/checkout@v3 51 | 52 | - name: Build the Pyodide output 53 | run: | 54 | pipx run nox -s pyodide 55 | 56 | - uses: actions/upload-artifact@v3 57 | with: 58 | name: jupyterlite 59 | path: _output/* 60 | 61 | 62 | deploy: 63 | environment: 64 | name: github-pages 65 | url: ${{ steps.deployment.outputs.page_url }} 66 | runs-on: ubuntu-latest 67 | needs: [build-book, build-pyodide] 68 | steps: 69 | - name: Setup Pages 70 | uses: actions/configure-pages@v1 71 | 72 | - uses: actions/download-artifact@v3 73 | with: 74 | name: jupyterbook 75 | path: public 76 | 77 | - uses: actions/download-artifact@v3 78 | with: 79 | name: jupyterlite 80 | path: public/live 81 | 82 | - name: Upload artifact 83 | uses: actions/upload-pages-artifact@v1 84 | with: 85 | path: public 86 | 87 | - name: Deploy to GitHub Pages 88 | id: deployment 89 | uses: actions/deploy-pages@v1 90 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | *tmp*.py 163 | *temp*.py 164 | *tmp*.csv 165 | 166 | _build/ 167 | _output/ 168 | 169 | *.doit.db 170 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: "v4.4.0" 4 | hooks: 5 | - id: check-added-large-files 6 | - id: check-case-conflict 7 | - id: check-merge-conflict 8 | - id: check-symlinks 9 | - id: check-yaml 10 | - id: debug-statements 11 | - id: end-of-file-fixer 12 | - id: mixed-line-ending 13 | - id: requirements-txt-fixer 14 | - id: trailing-whitespace 15 | 16 | - repo: https://github.com/psf/black 17 | rev: 23.7.0 18 | hooks: 19 | - id: black-jupyter 20 | args: [--target-version=py310] 21 | 22 | - repo: https://github.com/kynan/nbstripout 23 | rev: 0.6.1 24 | hooks: 25 | - id: nbstripout 26 | 27 | - repo: https://github.com/codespell-project/codespell 28 | rev: "v2.2.5" 29 | hooks: 30 | - id: codespell 31 | args: ["-L", "hist,whet"] 32 | 33 | - repo: local 34 | hooks: 35 | - id: disallow-caps 36 | name: Disallow improper capitalization 37 | language: pygrep 38 | entry: PyBind|Numpy|Cmake|CCache|Github|PyTest 39 | exclude: .pre-commit-config.yaml 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Level Up Your Python 2 | 3 | Website here: 4 | 5 | Live (binder) instance here: 6 | 7 | Live WebAssymbly (Pyodide + JupyterLite) instance here: 8 | 9 | This project comes with a conda environment file. To install, run: 10 | 11 | ```bash 12 | conda create env 13 | ``` 14 | 15 | Then you can just start up a jupyter lab server: 16 | 17 | ```bash 18 | juptyer lab 19 | ``` 20 | 21 | You only need to have [conda installed](https://docs.conda.io/en/latest/miniconda.html). If you want to have it pre-configured with [conda-forge](https://conda-forge.org/), that's [available here](https://github.com/conda-forge/miniforge). The Mambaforge version is _FAST_! 22 | 23 | --- 24 | 25 | ## Princeton mini-course 26 | 27 | This was first taught as a Princeton mini-course in 2020. 28 | 29 | ## PyHEP 2021 30 | 31 | This was taught at PyHEP 2021 32 | 33 | ## Wintersession 2022 34 | 35 | This was at Wintersession, 2022 at Princeton. 36 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | # Book settings 2 | title: Level Up Your Python 3 | author: Henry Schreiner 4 | logo: images/LevelUpYourPython.png 5 | 6 | repository: 7 | url: https://github.com/henryiii/level-up-your-python 8 | branch: main 9 | 10 | execute: 11 | allow_errors: true 12 | 13 | html: 14 | use_repository_button: true 15 | 16 | launch_buttons: 17 | notebook_interface: jupyterlab 18 | -------------------------------------------------------------------------------- /_toc.yml: -------------------------------------------------------------------------------- 1 | format: jb-book 2 | root: notebooks/0 Intro 3 | parts: 4 | - caption: Object Model 5 | numbered: true 6 | chapters: 7 | - file: notebooks/1.1 Intro to Classes 8 | - file: notebooks/1.2 Inspection 9 | - file: notebooks/1.3 Logging 10 | - file: notebooks/1.4 Debugging 11 | - file: notebooks/1.5 Profiling 12 | - caption: Syntax 13 | numbered: true 14 | chapters: 15 | - file: notebooks/2.1 Errors 16 | - file: notebooks/2.2 Generators 17 | - file: notebooks/2.3 Decorators 18 | - file: notebooks/2.4 Context Managers 19 | - file: notebooks/2.5 Static Typing 20 | - file: notebooks/2.6 Using Packages 21 | - file: notebooks/2.7 Creating Packages 22 | - file: notebooks/2.8 Pattern Matching 23 | - caption: Libraries 24 | numbered: true 25 | chapters: 26 | - file: notebooks/3.1 pytest 27 | - file: notebooks/3.2 NumPy 28 | - file: notebooks/3.3 Pandas 29 | - file: notebooks/3.4 Numba 30 | - file: notebooks/3.5 pybind11 31 | - file: notebooks/3.6 Code Quality and CI 32 | - caption: Advanced 33 | numbered: true 34 | chapters: 35 | - file: notebooks/4.1 Memory Model 36 | - file: notebooks/4.2 Classes 37 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: level-up-your-python 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - black 6 | - ipykernel >=6.0 7 | - ipython >=8.0 8 | - ipywidgets 9 | - jupyter-book 10 | - jupyterlab >=3 11 | - line_profiler 12 | - matplotlib >=3.5 13 | - mypy >=0.9r760 14 | - nb_conda_kernels 15 | - numba >=0.55 16 | - numpy >=1.20 17 | - pandas >=1.3 18 | - pip 19 | - pybind11 >=2.10 20 | - pytest >7 21 | - pytest-mock 22 | - python ==3.10.* 23 | - rich >=11 24 | -------------------------------------------------------------------------------- /images/LevelUpYourPython.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/henryiii/level-up-your-python/513027e3bc832d0a3272019b6446d562cc7fe7e5/images/LevelUpYourPython.png -------------------------------------------------------------------------------- /images/LevelUpYourPython.svg: -------------------------------------------------------------------------------- 1 | 2 | 20 | 22 | 45 | 47 | 48 | 50 | image/svg+xml 51 | 53 | 54 | 55 | 56 | 57 | 62 | Your 73 | Level 87 | Up 98 | Python 109 | + 120 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /notebooks/0 Intro.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Level Up Your Python\n", 8 | "\n", 9 | "A course in _intermediate_ Python for a beginner ready to move up. [Binder version](https://mybinder.org/v2/gh/henryiii/level-up-your-python/main?urlpath=lab/tree/notebooks/0%20Intro.ipynb) and [Live WebAssembly version available too!](https://henryiii.github.io/level-up-your-python/live)\n", 10 | "\n", 11 | "**Henry Schreiner**\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "tags": [ 19 | "remove-cell" 20 | ] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "# WebAssembly version using Pyodide!\n", 25 | "# The following code is specific to the Pyodide backend.\n", 26 | "\n", 27 | "import sys\n", 28 | "\n", 29 | "if sys.platform.startswith(\"emscripten\"):\n", 30 | " import micropip\n", 31 | "\n", 32 | " await micropip.install(\"rich\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Introduction\n", 40 | "\n", 41 | "### Expected Knowledge\n", 42 | "\n", 43 | "You should already know:\n", 44 | "\n", 45 | "* Basic Python syntax\n", 46 | "* Functions\n", 47 | "* Basic classes (will cover advanced usages mostly)\n", 48 | "* Basic NumPy (will mention, but not cover)\n", 49 | "* Git - _CRITICAL FOR ANY SOFTWARE WORK!_\n", 50 | "\n", 51 | "And we will be using notebooks in JupyterLab." 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "## About the author\n", 59 | "\n", 60 | "[![Henryiii's github stats](https://github-readme-stats.vercel.app/api?username=henryiii)](https://github.com/anuraghazra/github-readme-stats)\n", 61 | "\n", 62 | "Most important link: \n", 63 | "\n", 64 | "[PyPA](https://github.com/pypa) member. [Scikit-HEP](https://scikit-hep.org) admin, [scikit-build](https://github.com/scikit-build) admin, member of [IRIS-HEP](https://iris-hep.org).\n", 65 | "\n", 66 | "### Projects\n", 67 | "\n", 68 | "[pybind11](https://pybind11.readthedocs.io) •\n", 69 | "[cibuildwheel](https://cibuildwheel.readthedocs.io) •\n", 70 | "[build](https://pypa-build.readthedocs.io) •\n", 71 | "[scikit-build](https://github.com/scikit-build) •\n", 72 | "[boost-histogram](https://github.com/scikit-hep/boost-histogram) •\n", 73 | "[Hist](https://github.com/scikit-hep/hist) •\n", 74 | "[UHI](https://github.com/scikit-hep/uhi) •\n", 75 | "[Vector](https://github.com/scikit-hep/vector) •\n", 76 | "[CLI11](https://github.com/CLIUtils/CLI11) •\n", 77 | "[Plumbum](https://plumbum.readthedocs.io/en/latest) •\n", 78 | "[GooFit](https://github.com/GooFit/GooFit) •\n", 79 | "[Particle](https://github.com/scikit-hep/particle) •\n", 80 | "[DecayLanguage](https://github.com/scikit-hep/decaylanguage) •\n", 81 | "[Conda-Forge ROOT](https://github.com/conda-forge/root-feedstock) •\n", 82 | "[POVM](https://github.com/Princeton-Penn-Vents/princeton-penn-flowmeter) •\n", 83 | "[Jekyll-Indico](https://github.com/iris-hep/jekyll-indico)\n", 84 | "\n", 85 | "### Interests:\n", 86 | "\n", 87 | "Packaging and building • Bindings • Building a HEP analysis toolchain in Python, JITable\n", 88 | "\n", 89 | "\n", 90 | "#### My books and workshops\n", 91 | "[Modern CMake](https://cliutils.gitlab.io/modern-cmake/) •\n", 92 | "[CMake Workshop](https://hsf-training.github.io/hsf-training-cmake-webpage/) •\n", 93 | "[Computational Physics Class](https://henryiii.github.io/compclass) •\n", 94 | "Python [CPU](https://github.com/henryiii/python-performance-minicourse),\n", 95 | "[GPU](https://github.com/henryiii/pygpu-minicourse),\n", 96 | "[Compiled](https://github.com/henryiii/python-compiled-minicourse) minicourses\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "## Tempering your expectations\n", 104 | "\n", 105 | "### What can you expect?\n", 106 | "\n", 107 | "* I don't know what you know;\n", 108 | "* We don't have time to study any topic in depth.\n", 109 | "\n", 110 | "So, we will move _fast_, and cover a _lot_.\n", 111 | "\n", 112 | "You are not expected to able to master everything you see.\n", 113 | "\n", 114 | "### Instead, you are expected to:\n", 115 | "\n", 116 | "1. Know what is possible, so you know to look for it;\n", 117 | "2. Get pointers on where to look (lots of links!);\n", 118 | "3. Refer back to this material later." 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## Theory: New features in programming\n", 126 | "\n", 127 | "Programming is all about _organization_. This is not always obvious, and has some odd consequences. Let's look at one: **new features remove functionality from the user**. And that's a good thing.\n", 128 | "\n", 129 | "Don't believe me? Pick one. Let's go with an old, simple one you should already know: `goto` vs. loops (`for`/`while`). (This is my favorite example, even though Python thankfully came along late enough to not even have `goto` in the first place, except as [an April fools joke](http://entrian.com/goto/) or [a proof of concept library](https://pypi.org/project/goto-statement/).)\n", 130 | "\n", 131 | "\n", 132 | "You have total power with goto! Jump from anywhere, to anywhere! You can recreate all loops (for loops, for each loops, while loops, do while (C) loops) with it, and more (like functions)! So why are loops the newer, better feature?" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "**goto ([partially hypothetical](https://github.com/snoack/python-goto) in Python)**\n", 140 | "```python\n", 141 | "i = 0\n", 142 | "label .start\n", 143 | "print(f\"Hi {i}\")\n", 144 | "i + 1\n", 145 | "if i <= 10:\n", 146 | " goto .start\n", 147 | "```" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "**Compare to for loop:**\n", 155 | " " 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "for i in range(10):\n", 165 | " print(f\"Hi {i}\")" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "A programmer has to spend time to recognize what is happening in the first example - in the second example, even a fairly new Python programmer will immediately say \"that prints 0 to 9\". The second example lets you build more complex programs, because you are working at a 'higher level', humans tend to do better which high level concepts (while computers work up from low level).\n", 173 | "\n", 174 | "Also, we now need several features to make up for the loss of goto; the for loop, the while loop, and functions. Each is more restricted, with less functionality, but better readability and composability.\n", 175 | "\n", 176 | "![GOTO xkcd comic](https://imgs.xkcd.com/comics/goto.png)\n", 177 | "\n", 178 | "We will see lots of examples of this -- in section 2, especially." 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "## Notebooks\n", 186 | "\n", 187 | "We will be using notebooks today. Notebooks are fantastic for teaching, quick experimentation, for developing, or for driving a final analysis product. They are not for serious programming - that happens in `.py` files. Once you write something and get it working, move it to a `.py` file and add a test. Then import it into your notebook!" 188 | ] 189 | }, 190 | { 191 | "cell_type": "markdown", 192 | "metadata": {}, 193 | "source": [ 194 | "## Python version\n", 195 | "\n", 196 | "We will be using Python 3.11. I'll try to point out when something is newer than 3.8. [NEP 29](https://numpy.org/neps/nep-0029-deprecation_policy.html) mandates that data science libraries currently support 3.9+ (support dropped 42 months after release), while [general Python EOL is 3.8+](https://endoflife.date/python) (5 year support window). IPython and NumPy have already dropped Python 3.8.\n", 197 | "\n", 198 | "Key upcoming dates:\n", 199 | "\n", 200 | "| Python | Release | NEP 29 drop | General EOL |\n", 201 | "|----------------|--------------|--------------|--------------|\n", 202 | "| ~~Python 3.7~~ | ~~Jun 2018~~ | ~~Dec 2021~~ | ~~Jun 2023~~ |\n", 203 | "| Python 3.8 | Oct 2019 | ~~Apr 2023~~ | Oct 2024 |\n", 204 | "| Python 3.9 | Oct 2020 | Apr 2024 | Oct 2025 |\n", 205 | "| Python 3.10 | Oct 2021 | Apr 2025 | Oct 2026 |\n", 206 | "| Python 3.11 | Oct 2022 | Apr 2026 | Oct 2027 |\n", 207 | "| Python 3.12 | Oct 2023 | Apr 2027 | Oct 2028 |\n", 208 | "| Python 3.X | Oct 2011+X | Apr 2015+X | Oct 2016+X |\n", 209 | "\n", 210 | "Since Python 3.8, Python releases yearly, so you can expect a new Python release every October, and an EOL every December (April a year before for data science).\n", 211 | "\n", 212 | "Note that SPEC 0 seems to be replacing NEP 29, and it has an even shorter support cycle, 36 months. So releases get dropped in October three years after they debut." 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "metadata": {}, 218 | "source": [ 219 | "## Extra: saving and running a file from Jupyter\n", 220 | "\n", 221 | "For teaching purposes, we will be running some tools (pytest and mypy) from notebooks; this is not what they were designed to do, so we will use the following small extension to save a cell to a file and then run it with a Python module. I'm using a third-party library, `rich`, to render this with nice syntax highlighting in the notebook." 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "import rich\n", 231 | "from rich.syntax import Syntax\n", 232 | "from pathlib import Path\n", 233 | "\n", 234 | "filepath = Path(\"save_and_run.py\")\n", 235 | "rich.print(Syntax(filepath.read_text(), \"python\", theme=\"default\"))" 236 | ] 237 | } 238 | ], 239 | "metadata": { 240 | "kernelspec": { 241 | "display_name": "conda-env-level-up-your-python-py", 242 | "language": "python", 243 | "name": "conda-env-level-up-your-python-py" 244 | }, 245 | "language_info": { 246 | "codemirror_mode": { 247 | "name": "ipython", 248 | "version": 3 249 | }, 250 | "file_extension": ".py", 251 | "mimetype": "text/x-python", 252 | "name": "python", 253 | "nbconvert_exporter": "python", 254 | "pygments_lexer": "ipython3", 255 | "version": "3.10.5" 256 | }, 257 | "toc-autonumbering": false, 258 | "toc-showmarkdowntxt": false, 259 | "toc-showtags": false 260 | }, 261 | "nbformat": 4, 262 | "nbformat_minor": 4 263 | } 264 | -------------------------------------------------------------------------------- /notebooks/1.1 Intro to Classes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "26d2c6fc-6368-4335-9a2c-1faa75ff4ece", 6 | "metadata": {}, 7 | "source": [ 8 | "# Intro to classes\n", 9 | "\n", 10 | "Your first step into intermediate Python begins with classes. Classes are at the core of Python: everything is an object in Python, which means everything has a class. Even built-in objects written in C are still Python objects with classes." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "474071b5-839b-4328-b7ce-3ddfeada6bf3", 16 | "metadata": {}, 17 | "source": [ 18 | "## What is an object?\n", 19 | "\n", 20 | "An object is simply a collection of data and functions that operate on that data.\n", 21 | "\n", 22 | "For example, let's say we wanted to represent our home directory as an object. It might look something like this:\n", 23 | "\n", 24 | "```\n", 25 | "home_directory:\n", 26 | " string_location = \"/home/me\"\n", 27 | " exists(self) -> bool\n", 28 | "```\n", 29 | "\n", 30 | "This object holds a single data \"member\" (`string_location`), and has a function, called a \"method\", to see if the directory exists.\n", 31 | "\n", 32 | "We could produce lots of these, each with different `string_location` values, and we could use them in our code to track directories and see if they exist. All of these objects are interchangeable, and all of them have identical functions - only the contents of the data are different. This suggests we could make a further improvement to the model. (Unless we were in JavaScript, by the way, where this really _is_ how objects were implemented!)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "a99596a7-c949-4c3d-b1ac-27f16fb1abc9", 38 | "metadata": {}, 39 | "source": [ 40 | "## What is a class?\n", 41 | "\n", 42 | "Now we will make a \"template\" for creating new objects; called a class." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "id": "23d3aaeb-5f9b-4ecc-8604-c4b846e17d97", 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "import os\n", 53 | "\n", 54 | "\n", 55 | "class Path:\n", 56 | " def __init__(self, string_location):\n", 57 | " self.string_location = string_location\n", 58 | "\n", 59 | " def exists(self):\n", 60 | " return os.path.exists(self.string_location)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "id": "a396914b-764c-48e1-bf8a-cf3cfe10cfeb", 67 | "metadata": {}, 68 | "outputs": [], 69 | "source": [ 70 | "usr_bin = Path(\"/usr/bin\")\n", 71 | "usr_bin.exists()" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "id": "aa5e34a8-75e8-415d-bac0-e60318ef9eae", 77 | "metadata": {}, 78 | "source": [ 79 | "The `__init__` method is special to Python: if you \"call\" the class, Python will create a new instance of the class, then call it's `__init__` method, passing in the new, empty instance into \"self\". Inside this method, we add `string_location` to `self`." 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "3078983b-f817-4434-aae2-37d9cc7c2e18", 85 | "metadata": {}, 86 | "source": [ 87 | "Notice that Python automatically knows that calling a method on a class instance should pass the instance as the first arguments. We could have written this instead, which is identical:" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "054af9d8-6171-4b27-bd82-f1cb58f3eded", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "Path.exists(usr_bin)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "id": "089888da-844f-4ce2-92bc-c1d4cf20c0da", 103 | "metadata": {}, 104 | "source": [ 105 | "But it's a lot more convenient and concise to call it on the instance itself." 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "id": "501a35e5-f61f-4184-97c1-0e0a833d41f1", 111 | "metadata": {}, 112 | "source": [ 113 | "The thing called `usr_bin` only caries the data we've assigned to it:" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "id": "a462be59-96bb-4484-8b6e-1a8a3f2af9eb", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "usr_bin.__dict__" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "id": "b73033ca-b610-43fc-9cca-038eb7736239", 129 | "metadata": {}, 130 | "source": [ 131 | "It remembers its class, though:" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "6f82c7cf-283e-40c6-94f2-7755a51c63c8", 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "usr_bin.__class__" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "id": "a2025612-a9e6-4ff6-bbef-344ec50683cd", 147 | "metadata": {}, 148 | "source": [ 149 | "When you try to access attributes, it checks, the object first, then it tries the class:" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "id": "3fe69d31-6bfd-4679-b6f5-f84aa31fa6ad", 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "usr_bin.__class__.__dict__" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "id": "c3a5208c-13e5-4bf5-8805-ddd205ae9221", 165 | "metadata": {}, 166 | "source": [ 167 | "There's some autogenerated stuff in there, but you can see `exists` is there too!" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "id": "e2003ba6-77bd-4ffb-ad52-256aae38dafe", 173 | "metadata": {}, 174 | "source": [ 175 | "### Advanced: subclassing" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "id": "205e0bf4-ff50-4d87-90ec-fa476069b2cf", 181 | "metadata": {}, 182 | "source": [ 183 | "Why stop there? It's often useful to organize in further levels. This is accomplished by subclassing - a class can be \"based on\" another class - what this means is that the most specific class is checked first, but then it keeps going up the chain. This lookup mechanism is called the mro (method resolution order), and you can check it explicitly:" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "49fcd227-e0bf-4096-90c5-9ce62db1530d", 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "usr_bin.__class__.__mro__" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "id": "39f751e9-2952-4f00-b9fe-c5ddca6e20f9", 199 | "metadata": {}, 200 | "source": [ 201 | "All classes are eventually subclasses of `object` - the last item in this list. That's where the default behaviors come from." 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "id": "5880afb3-8550-4fe3-9304-95b02ef7571c", 207 | "metadata": {}, 208 | "source": [ 209 | "## Simpler: dataclasses\n", 210 | "\n", 211 | "If you come from a compiled language, the syntax for making a class might be unusual for you. You might be more used to simply listing the members and methods together, something like this:\n", 212 | "\n", 213 | "\n", 214 | "```python\n", 215 | "class BadPath:\n", 216 | " string_location = ...\n", 217 | "\n", 218 | " def exists(self):\n", 219 | " return os.path.exists(self.string_location)\n", 220 | "```\n", 221 | "\n", 222 | "\n", 223 | "Question: Why is this wrong?\n", 224 | "\n", 225 | "Answer: This member variable is _on the class_. That means all `BadPath`'s would have the same `string_location`! We also don't know what to assign it too (it should be assigned when you make an instance for an instance variable).\n", 226 | "\n", 227 | "\n", 228 | "But... Wouldn't it be nice if we didn't have to be so repetitive? Well, we can have the best of both worlds:" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "ee0237cd-1cc8-4a94-8a03-3b98255384c9", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "import dataclasses\n", 239 | "\n", 240 | "\n", 241 | "@dataclasses.dataclass\n", 242 | "class DataPath:\n", 243 | " string_location: str\n", 244 | "\n", 245 | " def exists(self):\n", 246 | " return os.path.exists(self.string_location)" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "id": "d5793cbf-9435-49b6-9986-bda69b6322ff", 252 | "metadata": {}, 253 | "source": [ 254 | "We just add a decorator (we'll cover these later, for now it's just a marker that processes this class into the correct output. In fact, that's kind of always what they are). We also add a type annotation since Python doesn't allow a variable declaration without at least a type annotation or a value. The type annotation just tells the reader (not Python) what type this expects. Python still doesn't care what you really do with this." 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "id": "5c550062-e742-43bb-abe3-3aca08edc68f", 260 | "metadata": {}, 261 | "source": [ 262 | "Now, we get an `__init__` for free!" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "id": "a044dd61-83db-4779-abd3-d97978dda232", 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "DataPath(\"/usr/local\")" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "id": "a8c5a843-53af-4e7c-8833-d9b66e38fb95", 278 | "metadata": {}, 279 | "source": [ 280 | "In fact, we got a lot more for free. Notice now nicely it printed out? Compare that to our old class:" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": null, 286 | "id": "efa6f95a-8eec-448d-8970-f922acb03afe", 287 | "metadata": {}, 288 | "outputs": [], 289 | "source": [ 290 | "Path(\"/usr/local\")" 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "id": "f0532ebf-8f82-4134-b58d-a623d8258285", 296 | "metadata": {}, 297 | "source": [ 298 | "That's the default `object` repr, which just tells you the `__class__.__name__` and memory location (ugh), instead of something more helpful. We would have had to do a lot more work to make a nice class with the vanilla syntax!" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "id": "eb78bbd1-3e04-4a39-9880-40b9285cd83d", 304 | "metadata": {}, 305 | "source": [ 306 | "There are a lot of useful options in dataclasses that can help you make useful classes; here are most of them:\n", 307 | "\n", 308 | "- `init`: Make a init function (default: True)\n", 309 | "- `repr`: Make a nice repr (default: True)\n", 310 | "- `eq`: Allow equality (default: True)\n", 311 | "- `order`: Allow comparisons (default: False)\n", 312 | "- `frozen`: Disallow mutation (default: False)\n", 313 | "- `slots`: Keep the class from accepting new members (Python 3.10+, default: False, slots classes have no `__dict__`)\n", 314 | "- `kw_only`: Do not allow pass-by position (Python 3.10, default False, frees up subclassing a lot)\n", 315 | "- `match_args`: Support Python 3.10 pattern matching via position (Python 3.10, default: True)\n", 316 | "\n", 317 | "You can also control each attribute (field in dataclass terms) with options, and you can specify `__post_init__`, which runs after the generated `__init__`.\n", 318 | "\n", 319 | "If you like dataclasses, feel free to check out `attrs`, which inspired dataclasses and is a little more powerful, `cattrs`, which handles conversions for both the stdlib dataclasses and attrs, as well as `pydantic`, which is an all-in-one solution for data conversion and validation too, but less flexible." 320 | ] 321 | }, 322 | { 323 | "cell_type": "markdown", 324 | "id": "7464ff6b-e8f9-46cb-a587-41c8faddfac9", 325 | "metadata": {}, 326 | "source": [ 327 | "## Using classes" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "id": "478f4796-44ea-4097-b37a-13e56c637568", 333 | "metadata": {}, 334 | "source": [ 335 | "Let's look at a built-in class, `int`:" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "id": "abd7fc01-73de-4996-b2b2-a83e902cf170", 342 | "metadata": {}, 343 | "outputs": [], 344 | "source": [ 345 | "my_int = int(3)" 346 | ] 347 | }, 348 | { 349 | "cell_type": "markdown", 350 | "id": "26ee85ce-d455-4ce9-9c66-eef5815fd169", 351 | "metadata": {}, 352 | "source": [ 353 | "Since this is so common, there's a built in shortcut for this - we could have used `my_int = 3` directly - Python turns numbers into integers when it sees them. We can call methods, too:" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "id": "bd9234d1-b2a9-4526-8207-8d15665f9831", 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "my_int.bit_length()" 364 | ] 365 | }, 366 | { 367 | "cell_type": "markdown", 368 | "id": "298108a5-965f-4943-baf8-42899e0dd328", 369 | "metadata": {}, 370 | "source": [ 371 | "It takes 2 bits to be able to represent this integer. Python uses many more than that, but this is useful information about integers.\n", 372 | "\n", 373 | "> Note: you cannot write `3.bit_length()`; due to the Python parser, this is invalid syntax due to Python thinking you've started writing a float. You can, however, do this with a float. `2.0.is_integer()` is valid, for example. As is `(2).bit_length()`." 374 | ] 375 | }, 376 | { 377 | "cell_type": "markdown", 378 | "id": "e48fe023-c3a1-458a-8019-0688de5367fd", 379 | "metadata": {}, 380 | "source": [ 381 | "## Special methods\n", 382 | "\n", 383 | "We can't go very far without writing a special method - `__init__` we've already seen. Python has a lot of special methods that have double underscores before and after the name - called \"dunder methods\". These customize all sorts of things about the class. Let's try to make our \"plain\" class look more like our \"dataclass\":" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "id": "c90dbacc-eb8f-4e8d-a605-3a8fefb8c09c", 390 | "metadata": {}, 391 | "outputs": [], 392 | "source": [ 393 | "class Path:\n", 394 | " def __init__(self, string_location):\n", 395 | " self.string_location = string_location\n", 396 | "\n", 397 | " def exists(self):\n", 398 | " return os.path.exists(self.string_location)\n", 399 | "\n", 400 | " def __repr__(self):\n", 401 | " return f\"{self.__class__.__name__}(string_location={self.string_location!r})\"" 402 | ] 403 | }, 404 | { 405 | "cell_type": "code", 406 | "execution_count": null, 407 | "id": "0e623a06-264d-4164-9302-21f7f18107f5", 408 | "metadata": {}, 409 | "outputs": [], 410 | "source": [ 411 | "Path(\"/usr/local\")" 412 | ] 413 | }, 414 | { 415 | "cell_type": "markdown", 416 | "id": "9b8ad967-a069-40d6-9f55-87e252d43711", 417 | "metadata": {}, 418 | "source": [ 419 | "This looks like our `DataPath` now! We've customized what the \"representation\" of the object of this class look like. We could also separately control what the string representation (`__str__`) look like, which will allow the printed form and the REPL form to look different, which is a really nice feature of Python missing from some other languages like Matlab. `repr` is usually programmer friendly, and `str` is usually user friendly." 420 | ] 421 | }, 422 | { 423 | "cell_type": "markdown", 424 | "id": "38619c31-ee6b-464c-a492-7c4b7a24019e", 425 | "metadata": {}, 426 | "source": [ 427 | "You can also control what most operators do on the class, like comparison:" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "id": "8a5b6874-7fb0-4417-98c4-2888bf0b6a1a", 434 | "metadata": {}, 435 | "outputs": [], 436 | "source": [ 437 | "DataPath(\"/usr/local\") == DataPath(\"/usr/local\")" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": null, 443 | "id": "ecaec26a-7ce1-493e-8de0-1989a7799798", 444 | "metadata": {}, 445 | "outputs": [], 446 | "source": [ 447 | "Path(\"/usr/local\") == Path(\"/usr/local\")" 448 | ] 449 | }, 450 | { 451 | "cell_type": "markdown", 452 | "id": "6c2f987e-cb9e-4338-aa59-c7a0b81df33c", 453 | "metadata": {}, 454 | "source": [ 455 | "Yeah, like `__repr__`, dataclasses generated a reasonable default `__eq__` method for us, while the vanilla class just falls back on `object`'s `__eq__`, which checks to see if the objects share the same memory (which these do not).\n", 456 | "\n", 457 | "We could add this manually. Let's use inheritance to add it, since we are ~~lazy~~ good programmers and don't like repeating ourselves:" 458 | ] 459 | }, 460 | { 461 | "cell_type": "code", 462 | "execution_count": null, 463 | "id": "851975ab-81a9-4ea8-a058-9c7ed12a1617", 464 | "metadata": {}, 465 | "outputs": [], 466 | "source": [ 467 | "class EqPath(Path):\n", 468 | " def __eq__(self, other):\n", 469 | " return self.string_location == other.string_location" 470 | ] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "id": "e07e9c21-4614-4bc3-9a38-2f8f0cbf0dd0", 476 | "metadata": {}, 477 | "outputs": [], 478 | "source": [ 479 | "EqPath(\"/usr/local\") == EqPath(\"/usr/local\")" 480 | ] 481 | }, 482 | { 483 | "cell_type": "markdown", 484 | "id": "3c969b1c-3e35-49e4-8f73-e8dd4892b33d", 485 | "metadata": {}, 486 | "source": [ 487 | "What if we wanted to sort paths alphabetically? Neither of our class families support it out of the box:" 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "id": "b4579c60-bf67-439c-a379-c0e7192b77d3", 494 | "metadata": {}, 495 | "outputs": [], 496 | "source": [ 497 | "# sorted([EqPath(\"/loc/a\"), EqPath(\"/loc/b\")])\n", 498 | "# sorted([DataPath(\"/loc/a\"), DataPath(\"/loc/b\")])" 499 | ] 500 | }, 501 | { 502 | "cell_type": "markdown", 503 | "id": "a0079558-222b-46c5-893e-c73b3a1e7753", 504 | "metadata": {}, 505 | "source": [ 506 | "Adding this would require adding `__lt__`, or dataclasses to the rescue again with `order=True`):" 507 | ] 508 | }, 509 | { 510 | "cell_type": "code", 511 | "execution_count": null, 512 | "id": "2f9cb863-7db2-4a48-a3a4-ae97cdadc035", 513 | "metadata": {}, 514 | "outputs": [], 515 | "source": [ 516 | "import dataclasses\n", 517 | "\n", 518 | "\n", 519 | "@dataclasses.dataclass(order=True)\n", 520 | "class DataPath:\n", 521 | " string_location: str\n", 522 | "\n", 523 | " def exists(self):\n", 524 | " return os.path.exists(self.string_location)" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "id": "20cdba99-1779-4da7-9fb4-ad3f7e78b481", 531 | "metadata": {}, 532 | "outputs": [], 533 | "source": [ 534 | "print(*sorted([DataPath(\"/loc/b\"), DataPath(\"/loc/a\")]), sep=\"\\n\")" 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "id": "aaad6e69-bc9e-4966-a73e-32982eeba54a", 540 | "metadata": {}, 541 | "source": [ 542 | "How does this work? It treats these like a tuple when sorting: the first field is sorted first." 543 | ] 544 | } 545 | ], 546 | "metadata": { 547 | "kernelspec": { 548 | "display_name": "Python [conda env:level-up-your-python] *", 549 | "language": "python", 550 | "name": "conda-env-level-up-your-python-py" 551 | }, 552 | "language_info": { 553 | "codemirror_mode": { 554 | "name": "ipython", 555 | "version": 3 556 | }, 557 | "file_extension": ".py", 558 | "mimetype": "text/x-python", 559 | "name": "python", 560 | "nbconvert_exporter": "python", 561 | "pygments_lexer": "ipython3", 562 | "version": "3.10.5" 563 | } 564 | }, 565 | "nbformat": 4, 566 | "nbformat_minor": 5 567 | } 568 | -------------------------------------------------------------------------------- /notebooks/1.2 Inspection.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "621d873b-a8e7-4ed3-88ed-9575a726a0a0", 6 | "metadata": {}, 7 | "source": [ 8 | "# Inspection\n", 9 | "\n", 10 | "Everything in Python is an object. You can inspect objects! Objects usually contain help information, and you can see what's available, and often can even find the source all through inspection." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "226b3dd9-c64b-4b58-927d-024a75b6d79e", 17 | "metadata": { 18 | "tags": [ 19 | "remove-cell" 20 | ] 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "# WebAssembly version using Pyodide!\n", 25 | "# The following code is specific to the Pyodide backend.\n", 26 | "\n", 27 | "import sys\n", 28 | "\n", 29 | "if sys.platform.startswith(\"emscripten\"):\n", 30 | " import micropip\n", 31 | "\n", 32 | " await micropip.install(\"rich\")" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "9d19dab0-4474-48cf-9f4c-e02ee5bc24e1", 38 | "metadata": {}, 39 | "source": [ 40 | "## Inspecting\n", 41 | "\n", 42 | "You can inspect objects. There are lots of ways.\n", 43 | "\n", 44 | "* In a Jupyter notebook, use `object.` to bring up completions, shift tab for help.\n", 45 | "* You can use `dir(object)` to see all attributes (more or less)\n", 46 | "* You can use `help(object)` or `object?` (IPython only) to see help\n", 47 | "* You can `import inspect` and use the tools there\n", 48 | "* You can install the rich library and use `rich.inspect()`" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "d8b218eb-a08b-46cb-948b-8cfde5a27f40", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def f(x: float) -> float:\n", 59 | " \"\"\"I am a square!\"\"\"\n", 60 | " return x**2" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "id": "d8eb6422-d609-4360-814d-6e106306b194", 66 | "metadata": {}, 67 | "source": [ 68 | "The help of an object includes its signature and its docstring:" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "id": "7b600d81-dd98-442f-9b92-829cf57a6c51", 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "help(f)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "markdown", 83 | "id": "a530a3df-961a-4623-abc1-2c39460bd894", 84 | "metadata": {}, 85 | "source": [ 86 | "You can see a list of methods (or use `` in iPython or the Python REPL, but underscored methods often require you start by typing an underscore first):" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "id": "67de20b0-f7a1-46ed-bcc9-729012f24027", 93 | "metadata": {}, 94 | "outputs": [], 95 | "source": [ 96 | "dir(f)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "id": "bc9dba3e-ce68-4d52-8ed1-a4ce7693c6ad", 102 | "metadata": {}, 103 | "source": [ 104 | "The inspect module is a built-in module that can provide a lot of other information:" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "bf2fe178-a08f-4888-a24a-433eb973e2f7", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "import inspect\n", 115 | "\n", 116 | "print(inspect.getsourcefile(f))\n", 117 | "print()\n", 118 | "print(inspect.getsource(f))\n", 119 | "# WARNING! THIS DOES NOT ALWAYS WORK!" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "id": "fc8ec338-8629-4156-891d-0489d0b39a8f", 125 | "metadata": {}, 126 | "source": [ 127 | "**WARNING: You cannot *always* see the source of a function, so this is a user trick, not one to use in a library!**\n", 128 | "\n", 129 | "Python does a three stage procedure when interpreting. It converts source to bytecode (pyc files), then runs the bytecode in the interpreter. When loading a file that has been run before (or came from a wheel, more on that later), it only loads the bytecode if the source hasn't changed - the source is not re-parsed. So inspect works by looking up the original file location. _But you can delete the original file and run from bytecode only!_ Don't do that, but you can. Also, you can run from a zip file, and the original file might not be openable. And, finally, when running live in a REPL, there may not be a source (it works in IPython for us, though).\n" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "id": "83a3d9c7-0bdc-450c-b20c-080467cd6135", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "import rich\n", 140 | "\n", 141 | "rich.inspect(f)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "dfd8bbf0-9b57-402c-8c41-5184a5949f6b", 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "rich.inspect(3)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "id": "5cbb730b-2a13-4bc5-80cd-d84610f0805d", 157 | "metadata": {}, 158 | "source": [ 159 | "> Try adding different keyword arguments to `rich.inspect`. Shift-tab in IPython to see options. `methods=True` on the int, for example." 160 | ] 161 | } 162 | ], 163 | "metadata": { 164 | "kernelspec": { 165 | "display_name": "Python [conda env:level-up-your-python] *", 166 | "language": "python", 167 | "name": "conda-env-level-up-your-python-py" 168 | }, 169 | "language_info": { 170 | "codemirror_mode": { 171 | "name": "ipython", 172 | "version": 3 173 | }, 174 | "file_extension": ".py", 175 | "mimetype": "text/x-python", 176 | "name": "python", 177 | "nbconvert_exporter": "python", 178 | "pygments_lexer": "ipython3", 179 | "version": "3.10.5" 180 | } 181 | }, 182 | "nbformat": 4, 183 | "nbformat_minor": 5 184 | } 185 | -------------------------------------------------------------------------------- /notebooks/1.3 Logging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Logging" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "Something broke. Add a print statement! Fixed. Take it out! We've all been here. A steady stream of adding and (hopefully) removing print statements. But there is a better way, if you are willing to pay the (rather ugly) cost of setting it up: Logging. Here's what it looks like (yes, it looks like it was designed in the 80's, even though Python only dates back to '91):" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import logging" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# Global setting\n", 33 | "logging.basicConfig(level=\"INFO\")" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "This is a global setting that affects all loggers, including the ones in the libraries you are using (hopefully). You can also change in individual logger.\n", 41 | "\n", 42 | "Next we get a logger, these are usually given names that match the package they are in (globally unique). If there is no logger to match, one is created." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "log = logging.getLogger(\"unique\")" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "log.warning(\"Very important\")" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": {}, 67 | "outputs": [], 68 | "source": [ 69 | "log.info(\"Logging this here\")" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "log.debug(\"Logging this here\")" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "log.setLevel(\"DEBUG\")\n", 88 | "\n", 89 | "log.debug(\"Try again\")" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "You can see that messages at or above the current level get printed out. You can set fancier handlers, too, which can add timestamps and such.\n", 97 | "\n", 98 | "This is _very_ powerful for adding printouts that only show up if you ask for info or debug printouts (the normal setting is \"WARN\"). Sadely the design is _very_ old, with classic `%` style formatting baked in (use f-strings in the logging messages, though; that works well), global logger pool, and such. See Rich for a much more beautiful setting.\n", 99 | "\n", 100 | "The hardest part is generally setting up the infrastructure for controlling the logger, usually; it's best if you have a flag or environment variable that can control this, and you have to decide or allow a choice on whether you want all loggers or just yours to change level. And you have might want to log to a file, rotate logs, etc; everything doable but not all that pretty." 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "## Test logging" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "We'll cover testing later, but you can have pytest add your logs whenever tests fail! This can save a lot of time debugging failures.\n", 115 | "\n", 116 | "This is the configuration line to do that:\n", 117 | "\n", 118 | "```toml\n", 119 | "[tool.pytest.ini_options]\n", 120 | "log_cli_level = \"info\"\n", 121 | "```" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## More logging" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "If you need more from your logging, check out [structlog](https://www.structlog.org)! Also [rich](https://rich.readthedocs.io) can print beautiful logs. (And yes, you can combine structlog and rich!)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "This is how you would use Rich:\n", 143 | "\n", 144 | "```python\n", 145 | "from rich.logging import RichHandler\n", 146 | "\n", 147 | "FORMAT = \"%(message)s\"\n", 148 | "logging.basicConfig(\n", 149 | " level=\"NOTSET\", format=FORMAT, datefmt=\"[%X]\", handlers=[RichHandler()]\n", 150 | ")\n", 151 | "\n", 152 | "log = logging.getLogger(\"rich\")\n", 153 | "log.info(\"Hello, World!\")\n", 154 | "```\n", 155 | "\n", 156 | "(Must be set on first call to `basicConfig`)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "This should be set for an _application_, not a library - libraries should stick to defaults and let applications override things like this." 164 | ] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python [conda env:level-up-your-python] *", 170 | "language": "python", 171 | "name": "conda-env-level-up-your-python-py" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.10.5" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 4 188 | } 189 | -------------------------------------------------------------------------------- /notebooks/1.4 Debugging.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Debugging\n", 8 | "\n", 9 | "Code always eventually breaks. Let's look at some broken code: " 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "tags": [ 17 | "remove-cell" 18 | ] 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "# WebAssembly version using Pyodide!\n", 23 | "# The following code is specific to the Pyodide backend.\n", 24 | "\n", 25 | "import sys\n", 26 | "\n", 27 | "if sys.platform.startswith(\"emscripten\"):\n", 28 | " import micropip\n", 29 | "\n", 30 | " await micropip.install(\"rich\")" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "from typing import Tuple\n", 40 | "import sys" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "def broken() -> None:\n", 50 | " 1 / 0" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "def my_broken_function() -> Tuple[int, int]:\n", 60 | " x = 6\n", 61 | " y = 4\n", 62 | " x += 2\n", 63 | " y *= 2\n", 64 | " x -= y\n", 65 | " y /= x\n", 66 | " return x, y" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "my_broken_function()" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## IPython debugger\n", 83 | "Try writing `%debug` into the cell below! (you can even skip the `%`)" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "# %debug" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "The mini-language here is `pdb`, and is similar to `gdb` and many other debuggers. You can step forward, up, etc. You can set breakpoints, or in Python 3.7+, you can just write `breakpoint()` anywhere, and the \"current\" debugger will pick up there!" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "## Rich tracebacks\n", 107 | "\n", 108 | "Another trick comes from the Rich library. You can install a nicer traceback handler. Never do this in a _library_, but only in applications and user code." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "import rich.traceback\n", 118 | "\n", 119 | "rich.traceback.install(show_locals=True)" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "This needs to be in a file (normally it will be) for the traceback to show up nicely:" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": {}, 133 | "outputs": [], 134 | "source": [ 135 | "%%writefile tmp_rich.py\n", 136 | "def my_broken_function():\n", 137 | " x = 6\n", 138 | " y = 4\n", 139 | " x += 2\n", 140 | " y *= 2\n", 141 | " x -= y\n", 142 | " y /= x\n", 143 | " return x, y" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "import tmp_rich\n", 153 | "\n", 154 | "tmp_rich.my_broken_function()" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "## Debugging in Jupyter Lab\n", 162 | "\n", 163 | "> This used to require the Xeus Python kernel instead of IPython, but IPyKernel 6+ now supports the visual debugger protocol directly.\n", 164 | "\n", 165 | "Turn on the debugger with the switch on the top right. Click on the line numbers to set a breakpoint. Then run." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "def my_broken_function():\n", 175 | " # breakpoint()\n", 176 | " x = 6\n", 177 | " y = 4\n", 178 | " x += 2\n", 179 | " y *= 2\n", 180 | " x -= y\n", 181 | " y /= x\n", 182 | " return x, y" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "my_broken_function()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": {}, 198 | "outputs": [], 199 | "source": [] 200 | } 201 | ], 202 | "metadata": { 203 | "kernelspec": { 204 | "display_name": "Python [conda env:level-up-your-python] *", 205 | "language": "python", 206 | "name": "conda-env-level-up-your-python-py" 207 | }, 208 | "language_info": { 209 | "codemirror_mode": { 210 | "name": "ipython", 211 | "version": 3 212 | }, 213 | "file_extension": ".py", 214 | "mimetype": "text/x-python", 215 | "name": "python", 216 | "nbconvert_exporter": "python", 217 | "pygments_lexer": "ipython3", 218 | "version": "3.10.5" 219 | } 220 | }, 221 | "nbformat": 4, 222 | "nbformat_minor": 4 223 | } 224 | -------------------------------------------------------------------------------- /notebooks/1.5 Profiling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "dede0c60-90c2-4b53-9c77-3e3bc588b552", 6 | "metadata": { 7 | "tags": [] 8 | }, 9 | "source": [ 10 | "# Profiling\n", 11 | "\n", 12 | "If you want to make code faster, always _profile first_; you want to spend your time on the slow parts, and you only want to have the important bits of your code designed to go fast - everything else should be designed to be clean and maintainable.\n", 13 | "\n", 14 | "## Line profiler\n", 15 | "\n", 16 | "There is a build in cProfile (one of the very, very few modules to still have the \"classic\" Python 2 naming scheme), but instead, let's use `line_profiler` and its IPython magic. Let's start with this simple file that contains a MC sampler:" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "id": "df1b3e0f-592a-4c20-98eb-b9c6b082fa26", 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "# This notebook does not work on WebAssembly (no line profiler)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "58cc8a65-82f7-40aa-ba18-04b3141ec7db", 33 | "metadata": { 34 | "tags": [ 35 | "remove-cell" 36 | ] 37 | }, 38 | "outputs": [], 39 | "source": [ 40 | "import numpy as np" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "26714775-0a2f-46b4-8c12-302b1a40a55e", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "%%writefile temp_prof.py\n", 51 | "\n", 52 | "import numpy as np\n", 53 | "\n", 54 | "def norm(loc: float, scale: float, x: np.ndarray) -> np.ndarray:\n", 55 | " y = (x - loc) / scale\n", 56 | " return np.exp(-y**2/2)/np.sqrt(2*np.pi) / scale\n", 57 | "\n", 58 | "def sampler(\n", 59 | " data: np.ndarray,\n", 60 | " samples: int,\n", 61 | " *,\n", 62 | " mu_init: float = 0.5,\n", 63 | " proposal_width: float = 0.5,\n", 64 | " mu_prior_mu: float = 0,\n", 65 | " mu_prior_sd: float = 1.0,\n", 66 | ") -> np.ndarray:\n", 67 | " \n", 68 | " mu_current = mu_init\n", 69 | " posterior = [mu_current]\n", 70 | " for i in range(samples):\n", 71 | " # Suggest new position\n", 72 | " mu_proposal = np.random.normal(mu_current, proposal_width)\n", 73 | "\n", 74 | " # Compute likelihood by multiplying probabilities of each data point\n", 75 | " likelihood_current = np.prod(norm(mu_current, 1, data))\n", 76 | " likelihood_proposal = np.prod(norm(mu_proposal, 1, data))\n", 77 | "\n", 78 | " # Compute prior probability of current and proposed mu\n", 79 | " prior_current = norm(mu_prior_mu, mu_prior_sd, mu_current)\n", 80 | " prior_proposal = norm(mu_prior_mu, mu_prior_sd, mu_proposal)\n", 81 | "\n", 82 | " p_current = likelihood_current * prior_current\n", 83 | " p_proposal = likelihood_proposal * prior_proposal\n", 84 | "\n", 85 | " # Accept proposal?\n", 86 | " p_accept = p_proposal / p_current\n", 87 | "\n", 88 | " # Usually would include prior probability, which we neglect here for simplicity\n", 89 | " accept = np.random.rand() < p_accept\n", 90 | "\n", 91 | " if accept:\n", 92 | " # Update position\n", 93 | " mu_current = mu_proposal\n", 94 | "\n", 95 | " posterior.append(mu_current)\n", 96 | "\n", 97 | " return np.asarray(posterior)\n" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "id": "bd26b2f4-976d-42c5-9b9e-071cbea78a48", 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "np.random.seed(123)\n", 108 | "data = np.random.randn(20)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "id": "e84c8489-b741-4c66-9ff9-85893846cfe1", 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "from temp_prof import sampler" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "d8d9dba8-4978-4e94-a16f-ceeed9bae54b", 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "%load_ext line_profiler" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "id": "cc40559b-0684-40fc-9493-719931daff25", 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "%lprun -f sampler posterior = sampler(data, samples=10_000, mu_init=1.)" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "id": "4eba2b7d-c47e-40cc-8d4f-f0a3dbcb8148", 144 | "metadata": {}, 145 | "source": [ 146 | "## Memory profiling\n", 147 | "\n", 148 | "Please see [Fil](https://pythonspeed.com/articles/memory-profiler-data-scientists/). It doesn't support Python 3.10 yet, and doesn't work with notebooks, so not included here." 149 | ] 150 | } 151 | ], 152 | "metadata": { 153 | "kernelspec": { 154 | "display_name": "Python [conda env:level-up-your-python] *", 155 | "language": "python", 156 | "name": "conda-env-level-up-your-python-py" 157 | }, 158 | "language_info": { 159 | "codemirror_mode": { 160 | "name": "ipython", 161 | "version": 3 162 | }, 163 | "file_extension": ".py", 164 | "mimetype": "text/x-python", 165 | "name": "python", 166 | "nbconvert_exporter": "python", 167 | "pygments_lexer": "ipython3", 168 | "version": "3.10.5" 169 | } 170 | }, 171 | "nbformat": 4, 172 | "nbformat_minor": 5 173 | } 174 | -------------------------------------------------------------------------------- /notebooks/2.1 Errors.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Errors and catching them\n", 8 | "\n", 9 | "There are two types of errors in Python. A few really bad errors are segfaults. These are almost always something triggered via the C interface (such as by using `ctypes`), and are not due to problems in your Python code. _Most_ errors in Python are part of the language, called Exceptions.\n", 10 | "\n", 11 | "An Exception is just a special control flow feature for things that are \"exceptional\"; often errors, but they are used for other things. In fact, internally, loops end by triggering a special exception!\n", 12 | "\n", 13 | "Exceptions \"bubble up\" through the stack to the outermost scope. If you catch an exception before it reaches the top, you can handle it. If you don't, then it shows up on the screen or in your logs as a traceback." 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "# Uncomment this to see an exception:\n", 23 | "# 1 / 0" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "try:\n", 33 | " 1 / 0\n", 34 | "except ZeroDivisionError:\n", 35 | " pass" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Exceptions use inheritance to form a tree structure, so you can be as tight or as loose as needed in catching them. Let's see the parents of Zero Division error:" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "ZeroDivisionError.__mro__" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "You could catch any of these instead:" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "try:\n", 68 | " 1 / 0\n", 69 | "except ArithmeticError as e:\n", 70 | " print(repr(e))" 71 | ] 72 | }, 73 | { 74 | "cell_type": "markdown", 75 | "metadata": {}, 76 | "source": [ 77 | "Always catch the most narrow exception you can! Never try to catch a really broad class or all exceptions, because things like running out of memory, exit signals, and more are exceptions too, and you don't want to catch those if you didn't mean to!\n", 78 | "\n", 79 | "Here's a basic example of making your own:" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "class MyNewException(RuntimeError):\n", 89 | " pass\n", 90 | "\n", 91 | "\n", 92 | "try:\n", 93 | " raise MyNewException()\n", 94 | "except MyNewException:\n", 95 | " pass" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "There can be as many `except` blocks as you need, there's an `else` block if you want something to run only if nothing was caught, and there's also a `finally` block, which will always run, even if the exception is caught:" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "try:\n", 112 | " 1 / 0\n", 113 | "except ArithmeticError:\n", 114 | " print(\"Caught the exception!\")\n", 115 | "finally:\n", 116 | " print(\"I can run cleanup, regardless of what happens above!\")" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Where would you want something like this? How about closing a file!\n", 124 | "\n", 125 | "```python\n", 126 | "try:\n", 127 | " f = open(...)\n", 128 | " # do stuff with f that might throw an exception (basically anything)\n", 129 | "finally:\n", 130 | " f.close()\n", 131 | "```\n", 132 | "\n", 133 | "This way, if an exception is thrown, the file still gets nicely closed. In fact, this is so important, we'll see a feature built around it soon!" 134 | ] 135 | } 136 | ], 137 | "metadata": { 138 | "kernelspec": { 139 | "display_name": "Python [conda env:level-up-your-python] *", 140 | "language": "python", 141 | "name": "conda-env-level-up-your-python-py" 142 | }, 143 | "language_info": { 144 | "codemirror_mode": { 145 | "name": "ipython", 146 | "version": 3 147 | }, 148 | "file_extension": ".py", 149 | "mimetype": "text/x-python", 150 | "name": "python", 151 | "nbconvert_exporter": "python", 152 | "pygments_lexer": "ipython3", 153 | "version": "3.10.5" 154 | }, 155 | "toc-autonumbering": false, 156 | "toc-showmarkdowntxt": false, 157 | "toc-showtags": false 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 4 161 | } 162 | -------------------------------------------------------------------------------- /notebooks/2.2 Generators.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Generators and Iterators\n", 8 | "\n", 9 | "Let's change topics just for a moment (we'll get to context managers in a moment, which we are building toward). Let's look into iterators, which are a form of generator. I'm sure you've seen one:" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "for i in range(4):\n", 19 | " print(i)" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "But what is `range(4)`? It's not a list, it's a custom object:" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "range(4)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "Python has built into it the concept of iteration. What the various looping structures do is call `iter()` on the object first, then call `next()` over and over until a `StopIteration` Exception is raised. Try it:" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "it = iter(range(0, 4))" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "next(it)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "## Defining iterators\n", 68 | "\n", 69 | "You could implement `__iter__` and `__next__` yourself, but Python has a built in syntax shortcut for making iterators (and generators, which have a `__send__` too): " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "def range4():\n", 79 | " yield 0\n", 80 | " yield 1\n", 81 | " yield 2\n", 82 | " yield 3" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "A function that has at least one `yield` in it creates a factory function that returns a generator (iterator)." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "range4()" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "metadata": {}, 105 | "outputs": [], 106 | "source": [ 107 | "for i in range4():\n", 108 | " print(i)" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "The presence of a single yield anywhere in a function turns it into an iterator. Notice \"calling\" the iterator factory function just produces an iterable object, it does not run anything yet. Then, when you iterate it, it \"pauses\" at each yield.\n", 116 | "\n", 117 | "Many Python functions take iterables, like `list` and `tuple`:" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "list(range4())" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "If Python were rewritten today, there would likely be a keyword, like `iter def`, to indicate that a `def` is making an iterator instead of a normal function; but for historical reasons, you just have to look for `yield`'s inside the function. " 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "If you like list comprehensions:" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "[a for a in range(4)]" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "Then you'll be glad to know there is a generator comprehension too:" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "(a for a in range(4))" 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "**What about restarting?**\n", 173 | "\n", 174 | "Generators are often \"one shot\", and expected to be recreated if needed again - this is true for the `yield` based syntax above. But you can make a multiple passes that supports restarting if you do it yourself. In fact, `range` supports this:" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "r = range(4)\n", 184 | "print(list(r), list(r))" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "But normally, you call them inplace, such as `list(range(4))`, so it is not often missed if they can't be restarted." 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": { 197 | "tags": [] 198 | }, 199 | "source": [ 200 | "## Factoring iterators and generators" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": { 206 | "tags": [] 207 | }, 208 | "source": [ 209 | "You can also factor out genators, just like you can factor out functions:" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "def middle_two():\n", 219 | " yield 1\n", 220 | " yield 2\n", 221 | "\n", 222 | "\n", 223 | "def range4_factored():\n", 224 | " yield 0\n", 225 | " yield from middle_two()\n", 226 | " yield 3" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "list(range4_factored())" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": {}, 241 | "source": [ 242 | "You might be tempted to place a loop inside the generator with a yield (`for item in middle_two(): yield item`), but `yield from` is simpler and also works correctly with generators (next section)." 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "## General generators\n", 250 | "\n" 251 | ] 252 | }, 253 | { 254 | "cell_type": "markdown", 255 | "metadata": {}, 256 | "source": [ 257 | "A generator that only returns values is called an iterator, and that's mostly what you directly see. Generators that are not iterators support two-way communication. You rarely need these, and there really isn't a nice syntax method for sending information to a generator, but here is one, just as an example:" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "metadata": {}, 264 | "outputs": [], 265 | "source": [ 266 | "def generator():\n", 267 | " received = yield 1\n", 268 | " print(\"Received\", received)\n", 269 | " received = yield 2\n", 270 | " print(\"Received\", received)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "# Prepare generator\n", 280 | "active = iter(generator())\n", 281 | "print(\"Running first send\")\n", 282 | "print(f\"{active.send(None) = }\")\n", 283 | "print(\"Running second send\")\n", 284 | "print(f\"{active.send(10) = }\")\n", 285 | "try:\n", 286 | " active.send(20)\n", 287 | "except StopIteration:\n", 288 | " print(\"Done\")" 289 | ] 290 | }, 291 | { 292 | "cell_type": "markdown", 293 | "metadata": {}, 294 | "source": [ 295 | "`next(active)` is the same thing as `active.send(None)` - the first send must always be `None`, since it hasn't reached the first `=` sign yet. The final send does not need to be None, though I didn't accept anything for the second yield above, so I just used `next` to end it." 296 | ] 297 | } 298 | ], 299 | "metadata": { 300 | "kernelspec": { 301 | "display_name": "Python [conda env:level-up-your-python] *", 302 | "language": "python", 303 | "name": "conda-env-level-up-your-python-py" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": { 307 | "name": "ipython", 308 | "version": 3 309 | }, 310 | "file_extension": ".py", 311 | "mimetype": "text/x-python", 312 | "name": "python", 313 | "nbconvert_exporter": "python", 314 | "pygments_lexer": "ipython3", 315 | "version": "3.10.5" 316 | }, 317 | "toc-autonumbering": false, 318 | "toc-showmarkdowntxt": false, 319 | "toc-showtags": false 320 | }, 321 | "nbformat": 4, 322 | "nbformat_minor": 4 323 | } 324 | -------------------------------------------------------------------------------- /notebooks/2.3 Decorators.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Decorators\n", 8 | "\n", 9 | "This is likely the simplest syntactic sugar you'll see today, but maybe one with some of the furthest reaching consequences. Let's say you have a bit of code that looks like this:\n", 10 | "\n", 11 | "```python\n", 12 | "def f(): ...\n", 13 | "f = g(f)\n", 14 | "```\n", 15 | "\n", 16 | "So `g` is a function that takes a function and (hopefully) returns a function, probably a very similar one since you are giving it the same name as the old \"f\". In Python 2.5, we gained the ability to write this instead:\n", 17 | "\n", 18 | "```python\n", 19 | "@g\n", 20 | "def f(): ...\n", 21 | "```\n", 22 | "\n", 23 | "That's it. The thing after the `@` \"decorates\" (or transforms) the function you are defining and the output is saved with the name `f`." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "def bad_decorator(func):\n", 33 | " print(f\"You don't need {func.__name__}!\")\n", 34 | " return 2\n", 35 | "\n", 36 | "\n", 37 | "@bad_decorator\n", 38 | "def f(x):\n", 39 | " return x**2" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "f" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Okay, so that's useless (well, except for the printout, which could be good for logging). What could this be used for? Turns out, almost anything. Having a syntax for \"modifying\" a function (it also works on methods and classes, too) is fantastic, and lets you think in a different way.\n", 56 | "\n", 57 | "There are several decorators in builtins, `property`, `classmethod`, and `staticmethod`. For example:" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "class BagOfFunctions:\n", 67 | " @staticmethod\n", 68 | " def f(x):\n", 69 | " return x**2" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "What's missing from the above function? Self! It's static, it doesn't need an instance, or even the current class." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "BagOfFunctions().f(2)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "BagOfFunctions.f(2)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "The decorator took our method and added the correct handling to it so it works with or without an instance.\n", 102 | "\n", 103 | "If the thing after the `@` is called, this is called a decorator factory; it's exactly the same as above, just slightly more unusual in structure to what you normally see:\n", 104 | "\n", 105 | "```python\n", 106 | "def f...\n", 107 | "f = g()(f)\n", 108 | "\n", 109 | "# same as\n", 110 | "\n", 111 | "@g()\n", 112 | "def f...\n", 113 | "```\n", 114 | "\n", 115 | "You can also nest decorators." 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "You could have a rate decorator, which causes a function to wait after completing so that it always takes the same amount of time. You could have a logging decorator, which prints to a log every time the wrapped function is called. There are quite a few decorators in the standard library; we'll see more later, but here are a couple interesting ones:" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "## Examples\n", 130 | "### Least Recently Used Caching (LRU)\n", 131 | "\n", 132 | "This is all you need to implement a cache based on the input arguments. When you call this again with recently used arguments (the cache size is adjustable), it pulls from a cache instead of rerunning the function." 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": {}, 139 | "outputs": [], 140 | "source": [ 141 | "import functools\n", 142 | "import time\n", 143 | "\n", 144 | "\n", 145 | "@functools.lru_cache\n", 146 | "def slow(x: int) -> int:\n", 147 | " time.sleep(2)\n", 148 | " return x" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": null, 154 | "metadata": {}, 155 | "outputs": [], 156 | "source": [ 157 | "slow(4)" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": {}, 164 | "outputs": [], 165 | "source": [ 166 | "slow(4)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "### Single Dispatch\n", 174 | "\n", 175 | "Another magical decorator is `functools.singledispatch`, which lets you simulate type based dispatch (but only on the first argument) from other languages:" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "@functools.singledispatch\n", 185 | "def square(x):\n", 186 | " print(\"Not implemented\")\n", 187 | "\n", 188 | "\n", 189 | "@square.register\n", 190 | "def square_int(x: int) -> int:\n", 191 | " return x**2\n", 192 | "\n", 193 | "\n", 194 | "@square.register\n", 195 | "def square_str(x: str) -> str:\n", 196 | " return f\"{x}²\"" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "square(2)" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": {}, 212 | "outputs": [], 213 | "source": [ 214 | "square(\"x\")" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "### Other functools decorators\n", 222 | "\n", 223 | "There's also `@functools.total_ordering`, which when applied to a class, fills in the missing comparison operators from the ones that are already there (`==`, `!=`, `<`, `<=`, `>`, `>=` can be computed from just two functions)\n", 224 | "\n", 225 | "And `@functools.wraps` is a decorator that helps you write decorators that wrap functions. Also see [decorator](https://github.com/micheles/decorator) and the newer, fancier [wrapt](https://github.com/GrahamDumpleton/wrapt) libraries on PyPI." 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "### Dataclasses\n", 233 | "\n", 234 | "Another use case we've briefly seen is dataclasses from Python 3.7:" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": null, 240 | "metadata": {}, 241 | "outputs": [], 242 | "source": [ 243 | "from dataclasses import dataclass\n", 244 | "\n", 245 | "\n", 246 | "@dataclass\n", 247 | "class Vector:\n", 248 | " x: float\n", 249 | " y: float" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "This `@dataclass` is taking the class you pass in, converting the class annotations to instance variables, making an `__init__`, `__repr__`, and much more. When you are viewing a class as data + functionality, this is a very natural way to work." 257 | ] 258 | }, 259 | { 260 | "cell_type": "code", 261 | "execution_count": null, 262 | "metadata": {}, 263 | "outputs": [], 264 | "source": [ 265 | "Vector(1, y=2)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": {}, 271 | "source": [ 272 | "If you need Dataclasses in 3.6, there's a pip install dataclasses backport, and this was based on the popular [attrs](https://www.attrs.org/en/stable/) library, which is much more powerful and can do all sorts of tricks, like validate and transform values." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### Third party: Click\n", 280 | "\n", 281 | "[Click](https://click.palletsprojects.com/en/7.x/) is a package that lets you write command line interfaces using decorators on functions:\n", 282 | "\n", 283 | "```python\n", 284 | "import click\n", 285 | "\n", 286 | "@click.command()\n", 287 | "@click.option('--count', default=1, help='Number of greetings.')\n", 288 | "@click.option('--name', prompt='Your name',\n", 289 | " help='The person to greet.')\n", 290 | "def hello(count, name):\n", 291 | " \"\"\"Simple program that greets NAME for a total of COUNT times.\"\"\"\n", 292 | " for x in range(count):\n", 293 | " click.echo('Hello %s!' % name)\n", 294 | "\n", 295 | "if __name__ == '__main__':\n", 296 | " hello()\n", 297 | "```" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "We'll see more decorators, don't worry!" 305 | ] 306 | } 307 | ], 308 | "metadata": { 309 | "kernelspec": { 310 | "display_name": "Python [conda env:level-up-your-python] *", 311 | "language": "python", 312 | "name": "conda-env-level-up-your-python-py" 313 | }, 314 | "language_info": { 315 | "codemirror_mode": { 316 | "name": "ipython", 317 | "version": 3 318 | }, 319 | "file_extension": ".py", 320 | "mimetype": "text/x-python", 321 | "name": "python", 322 | "nbconvert_exporter": "python", 323 | "pygments_lexer": "ipython3", 324 | "version": "3.10.5" 325 | }, 326 | "toc-autonumbering": false, 327 | "toc-showmarkdowntxt": false, 328 | "toc-showtags": false 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 4 332 | } 333 | -------------------------------------------------------------------------------- /notebooks/2.4 Context Managers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Context Managers\n", 8 | "\n", 9 | "Yes! Our journey is complete, we are where I wanted to be. Context managers are one of my favorites, and a little underused, especially in user code, when they are really easy to both write and use (while decorators, for comparison, are really easy to use but a bit tricky to write). A context manager has a specific purpose.\n", 10 | "\n", 11 | "A context manager is used for what I call \"action at a distance\". It lets you schedule an action for later that is sure to always happen (unless you get a segfault, or exit a really nasty way). This is likely the most famous context manager:\n", 12 | "\n", 13 | "```python\n", 14 | "with open(...) as f:\n", 15 | " txt = f.readlines()\n", 16 | "```\n", 17 | "\n", 18 | "When you enter the with block, `__enter__` is called and the result is assigned to the `as` target, if there is one. Then when you leave the block, `__exit__` is called. If you leave via an exception, `__exit__` gets special arguments that let you even decide what to do based on that the exception is - or even handle the exception and continue. `contextlib` has several simple context managers, like `redirect_stdout`, `redirect_stderr`, and `suppress`:" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "import contextlib" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "with contextlib.suppress(ZeroDivisionError):\n", 37 | " 1 / 0\n", 38 | " print(\"This is never reached\")" 39 | ] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "But the real star of contextlib is `contextmanager`, which is a decorator that makes writing context managers really easy. You use \"yield\" to break the before and after code. Let's try one of my favorites, a timer context manager:" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "import time\n", 55 | "\n", 56 | "\n", 57 | "@contextlib.contextmanager\n", 58 | "def timer():\n", 59 | " old_time = time.monotonic()\n", 60 | " try:\n", 61 | " yield\n", 62 | " finally:\n", 63 | " new_time = time.monotonic()\n", 64 | " print(f\"Time taken: {new_time - old_time} seconds\")" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "with timer():\n", 74 | " print(\"Start\")\n", 75 | " time.sleep(1.5)\n", 76 | " print(\"End\")" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "As an extra bonus, `contextmanager` uses `ContextDecorator`, so the objects it makes can also be used as Decorators!" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "@timer()\n", 93 | "def long_function():\n", 94 | " print(\"Start\")\n", 95 | " time.sleep(1.5)\n", 96 | " print(\"End\")\n", 97 | "\n", 98 | "\n", 99 | "long_function()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "**Just a quick word on this**: if you are coming from a language like JavaScript or Ruby, you might be thinking these look like blocks/Procs/lambdas. They are not; they are unscoped, and you cannot access the code inside the with block from the context manager (unlike a decorator, too). So you cannot create a \"run this twice\" context manager, for example. They are only for action-at-a-distance." 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "[Pretty much everything](https://docs.python.org/3/library/contextlib.html) in the `contextlib` module that does not have the word `async` in it is worth learning. `contextlib.closing` turns an object with a `.close()` into a context manager, and `contextlib.ExitStack` lets you nest context managers without eating up massive amounts of whitespace." 114 | ] 115 | }, 116 | { 117 | "cell_type": "markdown", 118 | "metadata": {}, 119 | "source": [ 120 | "## Quick note: Async\n", 121 | "\n", 122 | "Everything we've been doing has built on itself, and we seemed to be going somewhere; the pinnacle of this direction was actually not context managers, but `async/await`. All of this feeds into `async/await`, which was formally introduced as a language component in Python 3.6. However, we did skip a necessary step; we didn't talk about generators (iterators can actually \"send\" values in, not just produce them, but there's no specific construct for doing that, like there is for consuming values in a for loop). The main reason we didn't try to reach `async` though is that I've never found a great use for it in scientific programming; it is much more intrusive than normal threading, it doesn't really \"live\" side-by-side with normal synchronous programming all that well (it's better now, though), and the libraries for it are a little young. Feel free to investigate on your own, though! I've also discussed the mechanisms behind it in detail in my blog a few years ago." 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "Let's whet your appetite with a quick example, though:" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "import asyncio\n", 139 | "\n", 140 | "\n", 141 | "# This is an \"async\" function, like a generator\n", 142 | "async def slow(t: int) -> int:\n", 143 | " print(f\"About to sleep for {t} seconds\")\n", 144 | " await asyncio.sleep(t)\n", 145 | " print(f\"Slept for {t} seconds\")\n", 146 | " return t\n", 147 | "\n", 148 | "\n", 149 | "# Gather runs its arguments in parallel when awaited on\n", 150 | "await asyncio.gather(slow(3), slow(1), slow(2))\n", 151 | "\n", 152 | "# Only works if running in an eventloop already, like IPython or with python -m asyncio\n", 153 | "# Otherwise, use: asyncio.run(...)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "Notice _there are no locks_! We don't have to worry about printing being overleaved, because it's not running at the same time. Only the explicit \"await\" lines \"wait\" at the same time!" 161 | ] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": {}, 166 | "source": [ 167 | "Once we start using Python 3.11 (probably early-mid 2023, based on Pyodide), an asyncio section using TaskGroups will likely be added." 168 | ] 169 | } 170 | ], 171 | "metadata": { 172 | "kernelspec": { 173 | "display_name": "Python [conda env:level-up-your-python] *", 174 | "language": "python", 175 | "name": "conda-env-level-up-your-python-py" 176 | }, 177 | "language_info": { 178 | "codemirror_mode": { 179 | "name": "ipython", 180 | "version": 3 181 | }, 182 | "file_extension": ".py", 183 | "mimetype": "text/x-python", 184 | "name": "python", 185 | "nbconvert_exporter": "python", 186 | "pygments_lexer": "ipython3", 187 | "version": "3.10.5" 188 | }, 189 | "toc-autonumbering": false, 190 | "toc-showmarkdowntxt": false, 191 | "toc-showtags": false 192 | }, 193 | "nbformat": 4, 194 | "nbformat_minor": 4 195 | } 196 | -------------------------------------------------------------------------------- /notebooks/2.5 Static Typing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Static Type Hinting" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "tags": [ 15 | "remove-cell" 16 | ] 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# This notebook does not work on WebAssembly (no line profiler)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# Small local extension\n", 30 | "%load_ext save_and_run" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Basics\n", 38 | "\n", 39 | "The most exciting thing happening right now in Python development is static typing. Since Python 3.0, we've had function annotations, and since 3.6, variable annotations. In 3.5, we got a \"typing\" library, which provides tools to describe types. You've already seen me using type hints:" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "def f(x: int) -> int:\n", 49 | " return x * 5" 50 | ] 51 | }, 52 | { 53 | "cell_type": "markdown", 54 | "metadata": {}, 55 | "source": [ 56 | "You might have been asking yourself, what does that do? Does it limit what I can use here?" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "f([\"hi\"])" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "No. It does *nothing* at runtime, except store the object. And in the upcoming Python 3.11 or 3.12 (or 3.7+ with `from __future__ import annotations`), it doesn't even store the actual object, just the string you type here, so then anything that can pass the Python parser is allowed here.\n", 73 | "\n", 74 | "It is not useless though! For one, it helps the reader. Knowing the types expected really gives you a much better idea of what is going on and what you can do and can't do.\n", 75 | "\n", 76 | "But the key goal is: static type checking! There are a collection of static type checkers, the most \"official\" and famous of which is MyPy. You can think of this as the \"compiler\" for compiled languages like C++; it checks to make sure you are not lying about the types. For example:" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "%%save_and_run mypy\n", 86 | "def f(x: int) -> int:\n", 87 | " return x * 5\n", 88 | "\n", 89 | "f([\"hi\"])" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "There we go! And, most importantly, _we didn't have to run any code to see this error_! Your tests cannot test every possible branch, every line of code. MyPy can (though it doesn't by default, due to gradual typing). You may have code that runs rarely, that requires remote resources, that is slow, etc. All those can be checked by MyPy. It also keeps you (too?) truthful in your types." 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Catching an error" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Let's see an example of an error that MyPy can catch:" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "%%save_and_run mypy\n", 120 | "from __future__ import annotations # Python 3.7+\n", 121 | "\n", 122 | "\n", 123 | "def f(x: int | None) -> int | None:\n", 124 | " return x * 5\n", 125 | "\n", 126 | "f(4)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "Your test suite may have forgotten to run with a `None` input. You may not run into `None` often, until you are in a critical situation. But MyPy can find it and tell you there's a logic issue, your function cannot take `None` like it claims it can." 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "### Adding types\n", 141 | "\n", 142 | "There are three ways to add types.\n", 143 | "\n", 144 | "1. They can be inline as annotations. Best for Python 3 code, usually.\n", 145 | "2. They can be in special \"type comments\". Required mostly for Python 2 code, and still requires the proper imports (one reason why the packaging section is so important, depending on libraries like backports is important).\n", 146 | "3. They can be in a separate file with the same name but with a `.pyi` extension. This is important for type stubs or for cases where you don't want to add imports or touch the original code. You can annotate compiled files or libraries you don't control this way.\n", 147 | "\n", 148 | "If you have a library you don't control, you can add \"type stubs\" for it, then give MyPy your stubs directory. MyPy will pull the types from your stubs. If you are writing code for a Raspberry Pi, for example, you could add the stubs for the Pi libraries, and then validate your code, without ever even installing the Pi-only libraries!" 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "### Configuration\n", 156 | "\n", 157 | "By default, MyPy does as little as possible, so that you can add it iteratively to a code base. By default:\n", 158 | " \n", 159 | "* All untyped variables and return values will be Any\n", 160 | "* Code inside untyped functions is not checked _at all_\n", 161 | "\n", 162 | "\n", 163 | "You can add configuration to `pyproject.toml` (and a little bit to the files themselves), or you can go all the way and pass `--strict`, which will turn on everything.\n", 164 | "\n", 165 | "For a library to support typing, it has to a) add types using any of the three methods, and b) add a `py.typed` empty file to indicate that it's okay to look for types inside it. MyPy also looks in `typeshed`, which is a library full of type hints for (mostly) the standard library.\n", 166 | "\n", 167 | "Third party libraries that are typed sometimes forget this last step, by the way!" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "Personally, I recommend using pre-commit to run all your checks except pytest (and that only because it's likely slow), and including MyPy in your pre-commit testing. Try to turn on as much as possible, and increase it until you can run with full `strict` checking." 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "### Other features\n", 182 | "\n", 183 | "Static typing has some great features worth checking out:\n", 184 | "\n", 185 | "* Unions (New syntax in Python 3.10)\n", 186 | "* Generic Types (New syntax in Python 3.9)\n", 187 | "* Protocols\n", 188 | "* Literals\n", 189 | "* TypedDict\n", 190 | "* Nicer NamedTuple definition (very popular in Python 3 code)\n", 191 | "* MyPy validates the Python version you ask for" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "## Extended example\n", 199 | "\n", 200 | "Here's the classic syntax, which you need to use if support 3.6+." 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [], 208 | "source": [ 209 | "%%save_and_run mypy --strict\n", 210 | "from typing import Union, List\n", 211 | "\n", 212 | "\n", 213 | "# Generic types take bracket arguments\n", 214 | "def f(x: int) -> List[int]:\n", 215 | " return list(range(x))\n", 216 | "\n", 217 | "# Unions are a list of types that all could be allowed\n", 218 | "def g(x: Union[str, int]) -> None:\n", 219 | " # Type narrowing - Unions get narrowed\n", 220 | " if isinstance(x, str):\n", 221 | " print(\"string\", x.lower())\n", 222 | " else:\n", 223 | " print(\"int\", x)\n", 224 | " \n", 225 | " # Calling x.lower() is invalid here!" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "metadata": {}, 231 | "source": [ 232 | "With `from __future__ import annotations` in Python 3.7, annotations no longer get evaluated at runtime, and so this is valid on Python 3.7 and MyPy!" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": null, 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "%%save_and_run mypy --strict\n", 242 | "from __future__ import annotations\n", 243 | "\n", 244 | "\n", 245 | "def f(x: int) -> list[int]:\n", 246 | " return list(range(x))\n", 247 | "\n", 248 | "def g(x: str | int) -> None:\n", 249 | " if isinstance(x, str):\n", 250 | " print(\"string\", x.lower())\n", 251 | " else:\n", 252 | " print(\"int\", x)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "Notice that I didn't even have to import anything from typing! Note that you cannot use the \"new\" syntax in non annotation locations (like unions in `isinstance`) until Python supports it.\n", 260 | "\n", 261 | "You can use the above in earlier Python versions if you use strings.\n", 262 | "\n", 263 | "When run alongside a good linter like flake8, this can catch a huge number of issues before tests or they are discovered in the wild! It also prompts _better design_, because you are thinking about how types work and interact. It's also more readable, since if I give you code like this:\n", 264 | "\n", 265 | "```python\n", 266 | "def compute(timestamp):\n", 267 | " ...\n", 268 | "```\n", 269 | "\n", 270 | "You don't know \"what\" timestamp is. Is it an int? A float? An object? With types, you'll know what I was intending to give you. You can use type aliases to really give expressive names here!" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "## Protocols\n", 278 | "\n", 279 | "One of the best features of MyPy is support for structural subtyping via Protocols - formalized duck-typing, basically. This allows _cross library interoperability_, unlike traditional inheritance. Here's how it works:" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": null, 285 | "metadata": {}, 286 | "outputs": [], 287 | "source": [ 288 | "from typing import Protocol\n", 289 | "\n", 290 | "\n", 291 | "class Duck(Protocol):\n", 292 | " def quack(self) -> str:\n", 293 | " ..." 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "Yes, the `...` is actually part of the code here; it's conventional to use it instead of `pass` for typing." 301 | ] 302 | }, 303 | { 304 | "cell_type": "markdown", 305 | "metadata": {}, 306 | "source": [ 307 | "Now any object that can \"quack\" (and return a string) is a Duck. We can even add `@runtime_checkable` which will allow us to check this (minus the types) at runtime in `isinstance`. So now we can design code like this: " 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": null, 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "def pester_duck(a_duck: Duck) -> None:\n", 317 | " print(a_duck.quack())\n", 318 | " print(a_duck.quack())" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "metadata": {}, 324 | "source": [ 325 | "And the type checker will ensure we only write code valid on all \"Duck\"s. And, we can write a duck implementation and test it like this:" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "class MyDuck:\n", 335 | " def quack() -> str:\n", 336 | " return \"quack\"" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "This will pass a check for being a Duck, for example something like this:" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "import typing\n", 353 | "\n", 354 | "if typing.TYPE_CHECKING:\n", 355 | " _: Duck = typing.cast(MyDuck, None)" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "metadata": {}, 361 | "source": [ 362 | "Notice the complete lack of dependencies here. We don't need `MyDuck` to write `pester_duck`, or vice-versa. And, we don't even need `Duck` to write either one at runtime! The dependence on Duck for `pester_duck` is entirely a type-check-time dependence (unless we want to use a `runtime_checkable` powered `isinstance`)." 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "There are _lots_ of built-in Protocols, most of which pre-date typing and are available in an Abstract Base Class form. Most of them check for one or more special methods, like Iterable, Iterator, etc." 370 | ] 371 | } 372 | ], 373 | "metadata": { 374 | "kernelspec": { 375 | "display_name": "Python [conda env:level-up-your-python] *", 376 | "language": "python", 377 | "name": "conda-env-level-up-your-python-py" 378 | }, 379 | "language_info": { 380 | "codemirror_mode": { 381 | "name": "ipython", 382 | "version": 3 383 | }, 384 | "file_extension": ".py", 385 | "mimetype": "text/x-python", 386 | "name": "python", 387 | "nbconvert_exporter": "python", 388 | "pygments_lexer": "ipython3", 389 | "version": "3.10.5" 390 | }, 391 | "toc-autonumbering": false, 392 | "toc-showmarkdowntxt": false, 393 | "toc-showtags": false 394 | }, 395 | "nbformat": 4, 396 | "nbformat_minor": 4 397 | } 398 | -------------------------------------------------------------------------------- /notebooks/2.6 Using Packages.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Using packages" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "When you are programming, usually you are solving a problem - the programming is simply a means to an end. If that's the case, then packaging is a means to a means to an end." 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "Packaging is of my favorite subjects, because it's absolutely critical as soon as you:\n", 22 | "\n", 23 | "* Work on more than one thing\n", 24 | "* Share your work with anyone (even if not as a package)\n", 25 | "* Work in more than one place\n", 26 | "* Upgrade or change anything on your computer\n", 27 | "\n", 28 | "Unfortunately, packing has a _lot_ of historical cruft, bad practices that have easy solutions today but are still propagated.\n", 29 | "\n", 30 | "We will split our focus into two situations, then pull both ideas together." 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "## Installing a package\n", 38 | "\n", 39 | "You will see two _very_ common recommendations:\n", 40 | " \n", 41 | "```bash\n", 42 | "pip install # Use only in virtual environment!\n", 43 | "pip install --user # Almost never use\n", 44 | "```\n", 45 | "\n", 46 | "Don't use them unless you know exactly what you are doing! The first one will try to install globally, and if you don't have permission, will install to your user site packages (as of a recent pip update). In global site packages, you can get conflicting versions of libraries, you can't tell what you've installed for what, it's a mess. And user site packages are worse, because all installs of Python on your computer share it, so you might override and break things you didn't intend to.\n", 47 | "\n", 48 | "The solution depends on what you are doing:" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "### Safe libraries" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "There are likely a _few_ libraries (possibly one) that you just have to install globally. Go ahead, but be careful (and always use your system package manager instead if you can, like [`brew` on macOS](https://brew.sh) or the Windows ones - Linux package managers tend to be too old to use for Python libraries).\n", 63 | "\n", 64 | "Ideas for safe libraries: the other libraries you see listed in this lesson! It's likely better than bootstrapping them. In fact, you can get away with just one:" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "### pipx: pip for executables!" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "If you are installing an \"application\", that is, it has a script end-point and you don't expect to import it, *do not use pip*; use [pipx](https://pypa.github.io/pipx/). It will isolate it in a virtual environment, but hide all that for you, and then you'll just have an application you can use with no global/user side effects!\n", 79 | "\n", 80 | "```bash\n", 81 | "pip install pipx # Easier to install like this\n", 82 | "\n", 83 | "pipx install black\n", 84 | "black myfile.py\n", 85 | "```\n", 86 | "\n", 87 | "Now you have \"black\", but nothing has changed in your global site packages! You cannot import black or any of it's dependencies! There are no conflicting requirements (more common in pip 20.3+, which now will refuse to install two packages that have incompatible requirements)." 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "#### Directly running applications" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Pipx also has a very powerful feature: you can install and run an application in a temporary environment!\n", 102 | "\n", 103 | "For example, this works just as well as the second two lines above:\n", 104 | "\n", 105 | "```bash\n", 106 | "pipx run black myfile.py\n", 107 | "```\n", 108 | "\n", 109 | "The first time you do this, pipx create a venv and puts black in it, then runs it. If you run it again, it will reuse the cached environment if it hasn't been cleaned up yet, so it's fast.\n", 110 | "\n", 111 | "Another example:\n", 112 | "\n", 113 | "```bash\n", 114 | "pipx run build\n", 115 | "```\n", 116 | "\n", 117 | "> This is great for CI! Pipx is installed by default in GitHub Actions (GHA); you do not need `actions/setup-python` to run it.\n", 118 | "\n", 119 | "If the command and the package have different names, then you may have to write this with a `--spec`, though pipx has a way to customize this, and it will try to guess if there's only one command in the package. You can also pin exactly, specify extras, etc:\n", 120 | "\n", 121 | "```bash\n", 122 | "pipx run --spec cibuildwheel==2.14.1 cibuildwheel --platform linux\n", 123 | "```" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "### Environment tools" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "There are other tools we are about to talk about, like `virtualenv`, `poetry`, `pipenv`, `nox`, `tox`, etc. that you could also install with `pip` (or better yet, with `pipx`), and are _not too_ likely to interfere or break down if you use `pip`. But keep it to a minimum or use `pipx`." 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "### Nox and Tox" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "You can also use a task runner tool like `nox` or `tox`. These create and manage virtual environment for each task (called sessions in `nox`). This is a very simple way to avoid making and entering an environment, and is great for less common tasks, like scripts and docs." 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### Python launcher" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "The Python launcher for Unix (a Rust port of the one bundled with Python on Windows by a Python core developer) supports virtual environments in a `.venv` folder. So if you make a virtual environment with `python -m venv .venv` or `virtualenv .venv`, then you can just run `py ` instead of `python ` and it uses the virtual environment for you. This feature has not been back-ported to the Windows version yet." 166 | ] 167 | }, 168 | { 169 | "cell_type": "markdown", 170 | "metadata": {}, 171 | "source": [ 172 | "## Environments" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "There are several environment systems available for Python, and they generally come in two categories. The Python Packaging Authority supports PyPI (Python Package Index), and all the systems except one build on this (usually by pip somewhere). The lone exception is Conda, which has a completely separate set of packages (often but not always with matching names)." 180 | ] 181 | }, 182 | { 183 | "cell_type": "markdown", 184 | "metadata": {}, 185 | "source": [ 186 | "### Environment specification" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "All systems have an environment specification, something like this:\n", 194 | "\n", 195 | "```\n", 196 | "requests\n", 197 | "rich >=9.8\n", 198 | "```\n", 199 | "\n", 200 | "This is technically a valid `requirements.txt` file. If you wanted to use it, you would do:\n", 201 | "\n", 202 | "\n", 203 | "```bash\n", 204 | "python3 -m venv venv\n", 205 | ". venv/bin/activate\n", 206 | "pip install -r requirements.txt\n", 207 | "```\n", 208 | "Use `deactivate` to \"leave\" the virtual environment.\n", 209 | "\n", 210 | "These two tools (venv to isolate a virtual environment) and the requirements file let you set up non-interacting places to work for each project, and you can set up again anywhere." 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "### Locking an environment" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "metadata": {}, 223 | "source": [ 224 | "But now you want to share your environment with someone else. But let's say `rich` updated and now something doesn't work. You have a working environment (until you update), but your friend does not, theirs installed broken (this just happened to me with `IPython` and `jedi`, by the way). How do you recover a working version without going back to your computer? With a lock file! This would look something like this:\n", 225 | "\n", 226 | "```\n", 227 | "requests ==2.25.1\n", 228 | "rich ==9.8.0\n", 229 | "typing-extensions ==3.7.4\n", 230 | "...\n", 231 | "```\n", 232 | "\n", 233 | "This file lists all installed packages with exact versions, so now you can restore your environment if you need to. However, managing these by hand is not ideal and easy to forget. If you like this, `pdm`, `poetry`, and `pipenv`. You can look into it off-line, but we are moving on. We'll encounter this idea again." 234 | ] 235 | }, 236 | { 237 | "cell_type": "markdown", 238 | "metadata": {}, 239 | "source": [ 240 | "### Dev environments or Extras" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Some environment tools have the idea of a \"dev\" environment, or optional components to the environment that you can ask for. Look for them wherever fine environments are made.\n", 248 | "\n", 249 | "When you install a package via pip or any of the (non-locked) methods, you can also ask for \"extras\", though you have to know about them beforehand. For example, `pip install rich[jupyter]` will add some extra requirements for interacting with notebooks. *These add requirements only*, you can't change the package with an extra." 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### Conda environments" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "If you use Conda, the environment file is called `environment.yaml`. The one we are using can be seen here:" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": { 270 | "tags": [ 271 | "remove-cell" 272 | ] 273 | }, 274 | "outputs": [], 275 | "source": [ 276 | "# WebAssembly version using Pyodide!\n", 277 | "# The following code is specific to the Pyodide backend.\n", 278 | "\n", 279 | "import sys\n", 280 | "\n", 281 | "if sys.platform.startswith(\"emscripten\"):\n", 282 | " import micropip\n", 283 | "\n", 284 | " await micropip.install(\"rich\")" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "import rich\n", 294 | "from rich.syntax import Syntax\n", 295 | "from pathlib import Path\n", 296 | "\n", 297 | "filepath = Path(\"../environment.yml\")\n", 298 | "rich.print(Syntax(filepath.read_text(), \"yaml\", theme=\"default\"))" 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "You can specify pip dependencies, too:\n", 306 | "\n", 307 | "```yaml\n", 308 | "- pip:\n", 309 | " - i_couldnt_think_of_a_library_missing_from_conda\n", 310 | "```" 311 | ] 312 | } 313 | ], 314 | "metadata": { 315 | "kernelspec": { 316 | "display_name": "conda-env-level-up-your-python-py", 317 | "language": "python", 318 | "name": "conda-env-level-up-your-python-py" 319 | }, 320 | "language_info": { 321 | "codemirror_mode": { 322 | "name": "ipython", 323 | "version": 3 324 | }, 325 | "file_extension": ".py", 326 | "mimetype": "text/x-python", 327 | "name": "python", 328 | "nbconvert_exporter": "python", 329 | "pygments_lexer": "ipython3", 330 | "version": "3.10.5" 331 | }, 332 | "toc-autonumbering": false, 333 | "toc-showmarkdowntxt": false, 334 | "toc-showtags": false 335 | }, 336 | "nbformat": 4, 337 | "nbformat_minor": 4 338 | } 339 | -------------------------------------------------------------------------------- /notebooks/2.7 Creating Packages.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "cbc0cade-d116-47e5-bb55-2cb2b4ad4da6", 6 | "metadata": {}, 7 | "source": [ 8 | "# Creating packages" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "975f48c1-d333-43c5-b4e1-5431cc2caa1a", 15 | "metadata": { 16 | "tags": [ 17 | "remove-cell" 18 | ] 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "# This notebook does not work in WebAssymbly (no shell)" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "id": "535aefce-e403-4751-ad9e-6ed88883de0c", 28 | "metadata": {}, 29 | "source": [ 30 | "Now, let's change gears and look at creating our own packages. If you want to make your code accessible to someone else to use via `pip` install, you need to make it a package. In fact, as you'll see at the end of this section, even if you just want to develop an application, it's much better to be working in a package. I won't show you the internals of setting up a setuptools package, but we'll just go over how you work with it and how it is distributed.\n", 31 | "\n", 32 | "To install a local package, use:\n", 33 | "\n", 34 | "```bash\n", 35 | "pip install .\n", 36 | "```\n", 37 | "\n", 38 | "This will _copy_ the files into site-packages. If you want to actively develop a module, use this instead (setuptools only, command varies on other tools):\n", 39 | "\n", 40 | "```bash\n", 41 | "pip install -e .\n", 42 | "```\n", 43 | "\n", 44 | "This uses symlink(s) so that you can edit the local files and immediately see the changes (after restarting Python, as usual)." 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "944d2903-1e6a-4a02-bddc-090e139b54a5", 50 | "metadata": {}, 51 | "source": [ 52 | "If you want to produce an SDist for distributing the source, use\n", 53 | "\n", 54 | "\n", 55 | "```bash\n", 56 | "pip install build\n", 57 | "python -m build --sdist\n", 58 | "```\n", 59 | "\n", 60 | "If you want to produce a wheel for distributing, use\n", 61 | "\n", 62 | "```bash\n", 63 | "python -m build --wheel\n", 64 | "```\n", 65 | "\n", 66 | "You'll see old tutorials directly call `python setup.py ...`; if you can possibly avoid doing that, please do! The `setup.py` file is still a good idea for setuptools, but it's not even required there (and doesn't exist for any other packaging software). (It's also quite valid to use pipx to install build, but remember the command is `pyproject-build` if you do that)." 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "id": "82a23486-3fa4-4a9e-ae6f-b0254b67e551", 72 | "metadata": {}, 73 | "source": [ 74 | "## Distributions" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "id": "d0fe488c-496e-4ad9-8957-15fc7ccc7138", 80 | "metadata": {}, 81 | "source": [ 82 | "### Wheel: fast and simple" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "id": "0369ba7a-19aa-449c-9d1c-6112016bc400", 88 | "metadata": {}, 89 | "source": [ 90 | "A wheel is just a normal zipped file with the extension `.whl`. It contains folders that get copied to specific locations, and a metadata folder.\n", 91 | "\n", 92 | "It _does not_ contain `setup.py`/`setup.cfg`/`pyproject.toml`.\n", 93 | "\n", 94 | "\n", 95 | "Why use wheels?\n", 96 | "\n", 97 | "* Secure installs - arbitrary code does not run\n", 98 | "* Fast installs - files are just copied inplace\n", 99 | "* Reliable - does not depend on pretty much anything being on user's machine, including setuptools!\n", 100 | "* Faster first imports - pip makes .pyc files when it installs\n", 101 | "* Can be tagged for Python version, OS, and/or architecture (supports binaries!).\n", 102 | "\n", 103 | "See " 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "id": "1c476e05-fb9c-491d-8c1b-8f0439087c50", 109 | "metadata": {}, 110 | "source": [ 111 | "### SDist: Source distribution" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "df3a7ace-7fd4-4da6-9b2a-10e9be02671b", 117 | "metadata": {}, 118 | "source": [ 119 | "This is a `.tar.gz` file holding the files needed to make a wheel. It is often a subset of the files in the GitHub repo, though sometimes it contains generated files, like `version.py` or maybe Cython/SWIG generated source files. If there is no matching wheel (only for projects with binary components, in general), then pip gets the SDist and builds/installs manually." 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "id": "735fd27b-1e24-4937-82c4-27de73870663", 125 | "metadata": {}, 126 | "source": [ 127 | "## PDM/Hatch/Flit/Poetry: A breath of fresh air" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "id": "7abb1008-2ec5-4631-aef0-455e3d700cc5", 133 | "metadata": {}, 134 | "source": [ 135 | "See for a complete setup!\n", 136 | "\n", 137 | "Let's look at an all-in-one solution: PDM. It is a bit younger than Poetry, the current leader of all-in-one solutions, but it follows standards much better. There are some caveats:\n", 138 | "\n", 139 | "* Should be pure Python (no compiled extensions in your code)\n", 140 | "* Should be PyPI based (no Conda integration AFAIK)\n", 141 | " \n", 142 | " \n", 143 | "I'm [strongly against](https://iscinumpy.dev/post/bound-version-constraints/) some of the decisions in Poetry and to a much lesser extent, PDM, along with many of the other PyPA members and Python core developers. These decisions were mostly made for \"application\" uses, so they are only problematic when making a library. You can avoid them, just follow the following rules:\n", 144 | "\n", 145 | "* Never add an upper limit to your Python version. `^3.8` should be changed to `>=3.8`. Poetry will _force_ you to add an upper limit if a package you include does this, though, so the bad practice percolates.\n", 146 | "* Never add an upper limit to a project you don't heavily depend on unless you know you really don't support some version. It's much more likely that you will support the next version than you won't, and makes a mess for solving this later, and forces you to constantly \"maintain\" the upper limit." 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "id": "a1a85c29-ea64-43b6-a27a-a87bd32c3905", 152 | "metadata": {}, 153 | "source": [ 154 | "### Step 1: make a new project" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "id": "c8364a76-ddb3-4c72-8bf9-16eae6ab9b3c", 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "!pdm new tmp_project" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "id": "9c84e66d-75ba-41b2-a9c8-84dacf7db8e0", 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "%ls tmp_project/" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "6a1eaddb-a42b-4a89-9483-8b3da45db198", 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "%cat tmp_project/pyproject.toml" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "id": "fa155325-7bf5-454b-aa89-b8fcc0a1b943", 190 | "metadata": {}, 191 | "source": [ 192 | "The following commands I'll demo in a shell, if I have time.\n", 193 | "\n", 194 | "```bash\n", 195 | "# Create a virtual environment, start the poetry.lock file\n", 196 | "pdm install\n", 197 | "\n", 198 | "# \"Enter\" the environment (Ctrl-D or exit to exit)\n", 199 | "pdm shell\n", 200 | "\n", 201 | "# Run without entering the environment\n", 202 | "pdm run ...\n", 203 | "\n", 204 | "# Add a new package (--dev to make it development only)\n", 205 | "# Modifies your pyproject.toml\n", 206 | "pdm add rich\n", 207 | "\n", 208 | "# Update the environment and lock files\n", 209 | "pdm update\n", 210 | "\n", 211 | "# You can use python -m build, or you can use pdm build\n", 212 | "# You can publish to PyPI with pdm publish\n", 213 | "# And that's package + environment management!\n", 214 | "```" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "id": "d9fd1f8a-2706-4212-ab9c-87d69da0922f", 220 | "metadata": {}, 221 | "source": [ 222 | "When you publish your package, it makes completely normal wheels, so `pip install` works exactly as expected.\n", 223 | "\n", 224 | "New developers can start developing right away by getting your repository and running `pdm install`. They _even get the dev dependencies_ by default! (which was a brilliant choice, IMO). They start with the lock file if it exists, so they always get what you have, and anyone can run `pdm update` if needed." 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "id": "aa1185ff-d32e-465b-9384-d483a7ed1850", 230 | "metadata": {}, 231 | "source": [ 232 | "With PDM, you can even select _any_ PEP 621 backend, including the excellent Hatchling, and the (too) minimal flit-core! Poetry does _not_ support standards like this, at least yet." 233 | ] 234 | }, 235 | { 236 | "cell_type": "markdown", 237 | "id": "79d74bdb-0a5d-4069-8fd1-cd90f458dc6f", 238 | "metadata": {}, 239 | "source": [ 240 | "## Hatch / hatchling\n", 241 | "\n", 242 | "The \"Hatch\" tool is like PDM/Poetry, but is based on multiple environments. This allows it to be a \"true\" all-in-one tool by replaing nox/tox. It comes with a fantastic \"Hatchling\" backend that is currently the nicest PEP 517 builder; this is what I nearly always use.\n", 243 | "\n", 244 | "Hatch doesn't support locking environments yet (was waiting on an official solution, but that's been hard to agree on). But Hatching is currently the nicest and most extensable PEPL 517 builder available! I'd highly reommend using it (even with PDM, which is what I usually do)." 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "id": "4fcb46d9-c26c-4492-b2d2-a0315a375c0f", 250 | "metadata": {}, 251 | "source": [ 252 | "## Flit: Lightweight, (too) simple" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "id": "bb0f745b-e8f0-497c-b57f-7d0aaf758014", 258 | "metadata": {}, 259 | "source": [ 260 | "Flit is great for simple projects that don't need all the bells and wistles. Ironically, it's currently more stable that setuptools is or will be till Python 3.12, since setuptools is fighting through the distutils deprecation process. The PyPA is likely to start moving some core packages to using Flit. Short guide for Flit:\n", 261 | "\n", 262 | "* Consider using the flit command line tool for a streamlined experience (though you don't need to, and I don't)\n", 263 | "* Use the PEP 621 (new metadata) format - it's better and can be used more places (like with PDM!)\n", 264 | "* One design feature/problem is that SDists are _exactly_ tars of your repo - there's no build step. If you need that, look elsewhere. This includes Git-based versioning, sadly..\n", 265 | "* Ahh, another problem: Standards-based SDist builds do not use Git info - so you have to check the files explicitly." 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "id": "a0f2ff90-48f9-400e-b04e-0a0a9c478153", 271 | "metadata": {}, 272 | "source": [ 273 | "## Setuptools: Classic, powerful, verbose" 274 | ] 275 | }, 276 | { 277 | "cell_type": "markdown", 278 | "id": "197d8a4c-0e61-42b1-bbaa-ce4c2107dd74", 279 | "metadata": {}, 280 | "source": [ 281 | "The most powerful (and originally, forced by pip) tool is setuptools. This is a collection of hacks built on top of distutils, which is a collections of hacks to build packages (which was the standard library tool that is now deprecated and may be removed in Python 3.12). There are some awful examples around on using it, so look at for a proper example.\n", 282 | "\n", 283 | "The short version:\n", 284 | "\n", 285 | "* Use declarative `setup.cfg` for everything you can\n", 286 | " - Use file: to read files\n", 287 | " - Always use find: for packages - include or exclude if you need to\n", 288 | " - Always set `python_requires`!\n", 289 | "* Logic goes in `setup.py`; often it's just `from setuptools import setup; setup()`\n", 290 | " - Binary extensions go here too\n", 291 | " - You don't need this file at all much of the time.\n", 292 | "* Always include a `pyproject.toml`, often it's just 5 or so lines\n", 293 | "* Check your `MANIFEST.in` to make sure it's not missing things going into the SDist" 294 | ] 295 | } 296 | ], 297 | "metadata": { 298 | "kernelspec": { 299 | "display_name": "conda-env-level-up-your-python-py", 300 | "language": "python", 301 | "name": "conda-env-level-up-your-python-py" 302 | }, 303 | "language_info": { 304 | "codemirror_mode": { 305 | "name": "ipython", 306 | "version": 3 307 | }, 308 | "file_extension": ".py", 309 | "mimetype": "text/x-python", 310 | "name": "python", 311 | "nbconvert_exporter": "python", 312 | "pygments_lexer": "ipython3", 313 | "version": "3.10.5" 314 | } 315 | }, 316 | "nbformat": 4, 317 | "nbformat_minor": 5 318 | } 319 | -------------------------------------------------------------------------------- /notebooks/3.1 pytest.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# pytest: Unit Testing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": { 14 | "tags": [ 15 | "remove-cell" 16 | ] 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "# This notebook does not work in WebAssembly (no shell)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# Small local extension\n", 30 | "%load_ext save_and_run" 31 | ] 32 | }, 33 | { 34 | "cell_type": "markdown", 35 | "metadata": {}, 36 | "source": [ 37 | "I highly recommend taking some time to learn advanced pytest, as anything that makes writing tests easiser enables more and better testing, which is always a plus!" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "## Tests should be easy\n", 45 | "\n", 46 | "Always use pytest. The built-in unittest is _very_ verbose; the simpler the writing of tests, the more tests you will write!" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "%%save_and_run unittest\n", 56 | "import unittest\n", 57 | "\n", 58 | "class MyTestCase(unittest.TestCase):\n", 59 | " def test_something(self):\n", 60 | " x = 1\n", 61 | " self.assertEqual(x, 2)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Contrast this to pytest:" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "%%save_and_run pytest\n", 78 | "def test_something():\n", 79 | " x = 1\n", 80 | " assert x == 2" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "pytest still gives you clear breakdowns, including what the value of `x` actually is, even though it seems to use the Python `assert` statement! You don't need to set up a class (though you can), and you don't need to remember 50 or so different `self.assert*` functions! pytest can also run unittest tests, as well as the old `nose` package's tests, too.\n", 88 | "\n", 89 | "Approximately equals is normally ugly to check, but pytest makes it easy too:" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "%%save_and_run pytest\n", 99 | "from pytest import approx\n", 100 | "\n", 101 | "def test_approx():\n", 102 | " .3333333333333 == approx(1/3)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Tests should test for failures too\n", 110 | "\n", 111 | "\n", 112 | "You should make sure that expected errors are thrown:" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": {}, 119 | "outputs": [], 120 | "source": [ 121 | "%%save_and_run pytest --no-header\n", 122 | "import pytest\n", 123 | "\n", 124 | "def test_raises():\n", 125 | " with pytest.raises(ZeroDivisionError):\n", 126 | " 1 / 0" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "metadata": {}, 132 | "source": [ 133 | "You can check for warnings as well, with `pytest.warns` or `pytest.deprecated_call`." 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "## Tests should stay easy when scaling out\n", 141 | "\n", 142 | "pytest [uses fixtures](https://docs.pytest.org/en/stable/fixture.html) to represent complex ideas, like setup/teardown, temporary resources, or parameterization.\n", 143 | "\n", 144 | "\n", 145 | "A fixture looks like a function argument; pytest recognizes them by name:\n" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "%%save_and_run pytest --no-header\n", 155 | "def test_printout(capsys):\n", 156 | " print(\"hello\")\n", 157 | " \n", 158 | " captured = capsys.readouterr()\n", 159 | " assert \"hello\" in captured.out" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "Making a new fixture is not too hard, and can be placed in the test file or in `conftest.py`:" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "%%save_and_run pytest --no-header\n", 176 | "import pytest\n", 177 | "\n", 178 | "@pytest.fixture(params=[1,2,3], ids=[\"one\", \"two\", \"three\"])\n", 179 | "def ints(request):\n", 180 | " return request.param\n", 181 | "\n", 182 | "def test_on_ints(ints):\n", 183 | " assert ints**2 == ints*ints" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "We could have left off `ids`, but for complex inputs, this lets the tests have beautiful names.\n", 191 | "\n", 192 | "\n", 193 | "Now you will get three tests, `test_on_ints-one`, `test_on_ints-two`, and `test_on_ints-three`!\n", 194 | "\n", 195 | "Fixtures can be scoped, allowing simple setup/teardown (use `yield` if you need to run teardown). You can even set `autouse=True` to use a fixture always in a file or module (via `conftest.py`). You can have `conftest.py`'s in nested folders, too!" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "Here's an advanced example, which also uses `monkeypatch`, which is a great way for making things hard to split into units into unit tests. Let's say you wanted to make a test that \"tricked\" your code into thinking that it was running on different platforms:" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "%%save_and_run pytest --no-header\n", 212 | "\n", 213 | "import platform\n", 214 | "import pytest\n", 215 | "\n", 216 | "@pytest.fixture(params=['Linux', 'Darwin', 'Windows'], autouse=True)\n", 217 | "def platform_system(request, monkeypatch):\n", 218 | " monkeypatch.setattr(platform, \"system\", lambda : request.param)\n", 219 | " \n", 220 | "def test_anything():\n", 221 | " assert platform.system() in {\"Linux\", \"Darwin\", \"Windows\"}" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "Now every test in the file this is in (or the directory that this is in if in conftest) will run three times, and each time will identify as a different `platform.system()`! Leave `autouse` off, and it becomes opt-in; adding `platform_system` to the list of arguments will opt in." 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "## Tests should be organized\n", 236 | "\n", 237 | "You can use `pytest.mark.*` to mark tests, so you can easily turn on or off groups of tests, or do something else special with marked tests, like tests marked \"slow\", for example. Probably the most useful built-in mark is `skipif`:" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": {}, 244 | "outputs": [], 245 | "source": [ 246 | "%%save_and_run pytest --no-header\n", 247 | "import pytest\n", 248 | "\n", 249 | "@pytest.mark.skipif(\"sys.version_info < (3, 8)\")\n", 250 | "def test_only_on_37plus():\n", 251 | " x = 3\n", 252 | " assert f\"{x = }\" == \"x = 3\"" 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "Now this test will only run on Python 3.8, and will be skipped on earlier versions. You don't have to use a string for the condition, but if you don't, add a `reason=` so there will still be nice printout explaining why the test was skipped.\n", 260 | "\n", 261 | "You can also use `xfail` for tests that are expected to fail (you can even strictly test them as failing if you want). You can use `parametrize` to make a single parameterized test instead of sharing them (with fixtures). There's a `filterwarnings` mark, too.\n", 262 | "\n", 263 | "Many pytest plugins support new marks too, like `pytest-parametrize`. You can also use custom marks to enable/disable groups of tests, or to pass data into fixtures." 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "## Tests should test the installed version, not the local version\n", 271 | "\n", 272 | "Your tests should run against an _installed_ version of your code. Testing against the _local_ version might work while the installed version does not (due to a missing file, changed paths, etc). This is one of the big reasons to use `/src/package` instead of `/package`, as `python -m pytest` will pick up local directories and `pytest` does not. Also, there may come a time when someone (possibly you) needs to run your tests off of a wheel or a conda package, or in a build system, and if you are unable to test against an installed version, you won't be able to run your tests! (It happens more than you might think)." 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### Mock expensive or tricky calls\n", 280 | "\n", 281 | "If you have to call something that is expensive or hard to call, it is often better to mock it. To isolate parts of your own code for \"unit\" testing, mocking is useful too. Combined with monkey patching (shown in an earlier example), this is a very powerful tool!\n", 282 | "\n", 283 | "Say we want to write a function that calls matplotlib. We could use `pytest-mpl` to capture images and compare them in our test, but that's an integration test, not a unit test; and if something does go wrong, we are stuck comparing pictures, and we don't know how our usage of matplotlib changed from the test report. Let's see how we could mock it. We will use the `pytest-mock` plugin for pytest, which simply adapts the built-in `unittest.mock` in a more native pytest fashion as fixtures and such." 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "%%save_and_run pytest --no-header --disable-pytest-warnings\n", 293 | "import pytest\n", 294 | "from pytest import approx\n", 295 | "import matplotlib.pyplot\n", 296 | "from types import SimpleNamespace\n", 297 | "\n", 298 | "def my_plot(ax):\n", 299 | " ax.plot([1,3,2], label=None, linewidth=1.5)\n", 300 | "\n", 301 | "@pytest.fixture\n", 302 | "def mock_matplotlib(mocker):\n", 303 | " fig = mocker.Mock(spec=matplotlib.pyplot.Figure)\n", 304 | " ax = mocker.Mock(spec=matplotlib.pyplot.Axes)\n", 305 | " line2d = mocker.Mock(name=\"step\", spec=matplotlib.lines.Line2D)\n", 306 | " ax.plot.return_value = (line2d,)\n", 307 | "\n", 308 | " # Patch the main library if used directly\n", 309 | " mpl = mocker.patch(\"matplotlib.pyplot\", autospec=True)\n", 310 | " mocker.patch(\"matplotlib.pyplot.subplots\", return_value=(fig, ax))\n", 311 | "\n", 312 | " return SimpleNamespace(fig=fig, ax=ax, mpl=mpl)\n", 313 | "\n", 314 | "\n", 315 | "def test_my_plot(mock_matplotlib):\n", 316 | " ax = mock_matplotlib.ax\n", 317 | " my_plot(ax=ax)\n", 318 | "\n", 319 | " assert len(ax.mock_calls) == 1\n", 320 | "\n", 321 | " ax.plot.assert_called_once_with(\n", 322 | " approx([1.0, 3.0, 2.0]),\n", 323 | " label=None,\n", 324 | " linewidth=approx(1.5),\n", 325 | " )" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "metadata": {}, 331 | "source": [ 332 | "We've just mocked the parts we touch in our plot function that we need to test. We use `spec=` to get the mock to just respond to the same things that the original object would have responded to. We can set return values so that our objects behave like the real thing. \n", 333 | "\n", 334 | "If this changes, we immediately know exactly what changed - and this runs\n", 335 | "instantly, we aren't making any images! While this is a little work to set up,\n", 336 | "it pays off in the long run.\n", 337 | "\n", 338 | "The documentation at [pytest-mock](https://pypi.org/project/pytest-mock/) is\n", 339 | "helpful, though most of it just redirects to the standard library\n", 340 | "[unittest.mock](https://docs.python.org/3/library/unittest.mock.html)." 341 | ] 342 | } 343 | ], 344 | "metadata": { 345 | "kernelspec": { 346 | "display_name": "conda-env-level-up-your-python-py", 347 | "language": "python", 348 | "name": "conda-env-level-up-your-python-py" 349 | }, 350 | "language_info": { 351 | "codemirror_mode": { 352 | "name": "ipython", 353 | "version": 3 354 | }, 355 | "file_extension": ".py", 356 | "mimetype": "text/x-python", 357 | "name": "python", 358 | "nbconvert_exporter": "python", 359 | "pygments_lexer": "ipython3", 360 | "version": "3.10.5" 361 | } 362 | }, 363 | "nbformat": 4, 364 | "nbformat_minor": 4 365 | } 366 | -------------------------------------------------------------------------------- /notebooks/3.2 NumPy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy: Numeric computing\n", 8 | "\n", 9 | "The core of all modern scientific computing. You should know this one! This is NumPy + SciPy + Matplotlib, which form the core of the modern computing stack for [Scientific Python](https://scientific-python.org)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import matplotlib.pyplot as plt" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "metadata": {}, 25 | "source": [ 26 | "It is used for N-dimensional arrays:" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "v = np.array([1, 2, 3])\n", 36 | "v" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "The clever trick is that computations on an array are pre-compiled, and can be much faster than Python:" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "v**2" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "Note that it was designed for large arrays; around 10 elements or less you _might_ be faster using plain Python (though it's still more expressive). You should \"vectorize\" your code (by making the arrays bigger with more dimensions) if your arrays are very small and you care about performance." 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "In the spirit of the course, I'll avoid covering the basics of NumPy here (arrays, simple dtypes, operations), and instead focus on intermediate features." 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "## Slicing\n", 74 | "\n", 75 | "NumPy slicing may not make copies of arrays, but can just adjust the start and strides of the data. This is usually true of reshaping and adding empty (length 1) dimensions, as well. For example:" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "arr = np.zeros([2, 3, 4], dtype=int)\n", 85 | "print(arr.flags)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "Slicing produces a copy; we can see the effect of setting values, for example:" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "x = arr[:, :2, 2:]\n", 102 | "x[...] = 1\n", 103 | "print(arr)" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Flattening also avoids a copy if the data is contagious:" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "y = arr.ravel()\n", 120 | "y[:3] = 2\n", 121 | "print(arr)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "However, flattening / reshaping an array without a copy may not always be possible, such as if the data is non-contagious:" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "y = arr.T.ravel()\n", 138 | "y[:3] = 3\n", 139 | "print(arr)" 140 | ] 141 | }, 142 | { 143 | "cell_type": "markdown", 144 | "metadata": { 145 | "tags": [] 146 | }, 147 | "source": [ 148 | "## Random Numbers\n", 149 | "\n", 150 | "The simple random number interface `np.random.` should only be used for highly interactive work. Normal usage should use a more modern, object oriented approach; first construct a random number generator:" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "rng = np.random.default_rng()" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "You can pass an explicit seed if you want reproducibility. Then this generator has distributions as methods:" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "dist = rng.normal(0, 1, size=1_000_000)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "fix, ax = plt.subplots()\n", 185 | "ax.hist(dist, bins=100)\n", 186 | "plt.show()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "jp-MarkdownHeadingCollapsed": true, 193 | "tags": [] 194 | }, 195 | "source": [ 196 | "The benefits include explicit control over the generator, reproducibility, and support for multiple independent generators." 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": {}, 202 | "source": [ 203 | "## Broadcasting\n", 204 | "\n", 205 | "One benefit of true 1D arrays (vs. languages that don't have 1D arrays) is that NumPy can support broadcasting. Broadcasting occurs whenever an operation (element-wise or matrix multiplication) encounters an array that is the wrong shape. The rules can be viewed two ways:\n", 206 | "\n", 207 | "1. If the number of dimensions does not match, prepend length-1 dimensions until they do.\n", 208 | "2. If the size of a dimension does not match and one of the dimensions is 1, duplicate along that axis to make it match.\n", 209 | "\n", 210 | "Or:\n", 211 | "1. Moving right to left, if a dimension is missing or 1, set the stride to 0 on that array, so the iteration pointer does not change along that dimension.\n", 212 | "\n", 213 | "The former is often easier to reason about, while the later is how it's implemented (so it is very efficient)." 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": {}, 220 | "outputs": [], 221 | "source": [ 222 | "(np.ones((2, 3, 4)) * np.ones((1, 4))).shape" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "You can predict the final shape with `broadcast_shapes`:" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "np.broadcast_shapes((2, 3, 4), (1, 4))" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "You can also explicitly expand with `np.broadcast_arrays`.\n", 246 | "\n", 247 | "Broadcasting is extremely useful for array-at-a-time programming, such as for coordinate arrays and the like. If you organize your calculations to delay expansion, you can optimize a lot of computation out." 248 | ] 249 | }, 250 | { 251 | "cell_type": "markdown", 252 | "metadata": {}, 253 | "source": [ 254 | "For example, we can use `ogrid` or `meshgrid(..., sparse=True)` to replace `mgrid` but with compressed arrays ready for broadcasting. Here we make a grid with 101 points on x from 1-2 and 501 points on y from 0 to 5 (ends inclusive):" 255 | ] 256 | }, 257 | { 258 | "cell_type": "code", 259 | "execution_count": null, 260 | "metadata": {}, 261 | "outputs": [], 262 | "source": [ 263 | "x, y = np.mgrid[2.5:3.5:101j, 0:5:501j]\n", 264 | "radius = np.sqrt(x**2 + y**2)\n", 265 | "print(f\"{x.shape=}, {y.shape=}\")\n", 266 | "print(f\"{radius[50,400] = }\")" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "x, y = np.ogrid[2.5:3.5:101j, 0:5:501j]\n", 276 | "radius = np.sqrt(x**2 + y**2)\n", 277 | "print(f\"{x.shape=}, {y.shape=}\")\n", 278 | "print(f\"{radius[50,400] = }\")" 279 | ] 280 | }, 281 | { 282 | "cell_type": "markdown", 283 | "metadata": {}, 284 | "source": [ 285 | "## Masked arrays\n", 286 | "\n", 287 | "A powerful and under supported feature is masked arrays. If values are missing, you can \"mask\" them.\n", 288 | "\n", 289 | "This is quite nice logically, but the downside is not all interfaces support masked arrays. You also use extra space for the boolean mask. Another trick is to use NaN's in a floating point array, or to use Pandas's support for None's in all arrays." 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": {}, 296 | "outputs": [], 297 | "source": [ 298 | "x, y = np.mgrid[-5:5:101j, -5:5:101j]\n", 299 | "radius = np.sqrt(x**2 + y**2)\n", 300 | "mradius = np.ma.masked_where(radius > 5, radius)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": {}, 307 | "outputs": [], 308 | "source": [ 309 | "fix, ax = plt.subplots()\n", 310 | "ax.pcolormesh(x, y, mradius)\n", 311 | "ax.set_aspect(\"equal\")\n", 312 | "plt.show()" 313 | ] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "## Record arrays (AKA preview for Pandas!)\n", 320 | "\n", 321 | "NumPy has support for complex DTypes. While Pandas or xarray handles structured dtypes more elegantly (and in a columnar form), but if you actually have data from some source that is structured and in an array form, this can be incredibly useful." 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": null, 327 | "metadata": {}, 328 | "outputs": [], 329 | "source": [ 330 | "arr = np.array([(1, 2.0), (3, 4.0)], dtype=[(\"id\", int), (\"val\", float)])\n", 331 | "arr" 332 | ] 333 | }, 334 | { 335 | "cell_type": "markdown", 336 | "metadata": {}, 337 | "source": [ 338 | "You can select out a single named dtype (without copy) as well:" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "metadata": {}, 345 | "outputs": [], 346 | "source": [ 347 | "arr[\"val\"]" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "## Smaller features" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "### Buffer protocol\n", 362 | "\n", 363 | "There's a buffer protocol in Python 3 that allows different libraries to communicate this sort of data without copies. This is also useful to communicate things like shared memory or existing memory to NumPy." 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": {}, 370 | "outputs": [], 371 | "source": [ 372 | "import array" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": null, 378 | "metadata": {}, 379 | "outputs": [], 380 | "source": [ 381 | "python_array = array.array(\"d\", (1, 2, 3, 4))" 382 | ] 383 | }, 384 | { 385 | "cell_type": "markdown", 386 | "metadata": {}, 387 | "source": [ 388 | "We can explicitly convert from a buffer:" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "metadata": {}, 395 | "outputs": [], 396 | "source": [ 397 | "np.frombuffer(python_array)" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "Or most NumPy functions also work directly on buffers by converting them:" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "arr = np.asarray(python_array)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "This \"conversion\" does not copy! You still are looking at the buffer's original memory:" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [ 429 | "arr[1] = 42" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": null, 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "python_array" 439 | ] 440 | }, 441 | { 442 | "cell_type": "markdown", 443 | "metadata": {}, 444 | "source": [ 445 | "You can verify that the NumPy array doesn't own the memory:" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": {}, 452 | "outputs": [], 453 | "source": [ 454 | "arr.flags.owndata" 455 | ] 456 | }, 457 | { 458 | "cell_type": "markdown", 459 | "metadata": { 460 | "tags": [] 461 | }, 462 | "source": [ 463 | "## Custom arrays\n", 464 | "\n", 465 | "### UFuncs\n", 466 | "\n", 467 | "NumPy has the concept of UFuncs; functions that can take array arguments (broadcastable) and will return a broadcast result. For example:" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [ 476 | "np.sin(1)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "np.sin(np.array([0, 1, 2]))" 486 | ] 487 | }, 488 | { 489 | "cell_type": "code", 490 | "execution_count": null, 491 | "metadata": {}, 492 | "outputs": [], 493 | "source": [ 494 | "np.add(7, np.arange(3))" 495 | ] 496 | }, 497 | { 498 | "cell_type": "markdown", 499 | "metadata": {}, 500 | "source": [ 501 | "UFuncs have several standard keyword arguments:\n", 502 | "\n", 503 | "* `out`: Output to existing array, skip allocation\n", 504 | "* `where`: Mask computation\n", 505 | "* `axes`, `axis`: axes to operate on (generalized UFuncs only)\n", 506 | "* `keepdims`: To remove reduced dims (some generalized UFuncts only)\n", 507 | "* `casting='same_kind'`: rules for casting different DTypes\n", 508 | "* `order='K'`: memory layout (\"K\"eep)\n", 509 | "* `dtype=None`: Dtype for output array\n", 510 | "* `subok=True`: Output can be a subclass\n", 511 | "* `signature`/`extobj`: Exact control over dtypes/buffers\n", 512 | "\n", 513 | "(A generalized UFunc supports specific patterns, like matrix multiplication, rather than being element-wise)" 514 | ] 515 | }, 516 | { 517 | "cell_type": "markdown", 518 | "metadata": {}, 519 | "source": [ 520 | "UFuncs also support several methods and have a few properties. There are exactly six methods:\n", 521 | "\n", 522 | "* `__call__`: Elementwise computation\n", 523 | "* `at`: Local elementwise computation (provide indices)\n", 524 | "* `reduce`: A reduction\n", 525 | "* `reduceat`: A local reduction (provide indices)\n", 526 | "* `accumulate`: An accumulation\n", 527 | "* `outer`: An outer apply (`np.multiply.outer` is identical to `np.tensordot`)" 528 | ] 529 | }, 530 | { 531 | "cell_type": "markdown", 532 | "metadata": {}, 533 | "source": [ 534 | "### NEP 13/18\n", 535 | "\n", 536 | "If you have a custom class, you can also customize UFuncs via NEP 13!" 537 | ] 538 | }, 539 | { 540 | "cell_type": "code", 541 | "execution_count": null, 542 | "metadata": {}, 543 | "outputs": [], 544 | "source": [ 545 | "# Aside: let's add pytest's raises decorator\n", 546 | "import pytest" 547 | ] 548 | }, 549 | { 550 | "cell_type": "code", 551 | "execution_count": null, 552 | "metadata": {}, 553 | "outputs": [], 554 | "source": [ 555 | "class SimpleUnc(np.ndarray):\n", 556 | " pass\n", 557 | "\n", 558 | "\n", 559 | "def simple_unc(val, unc):\n", 560 | " arr = np.array(list(zip(val, unc)), dtype=[(\"val\", float), (\"unc\", float)])\n", 561 | " return arr.view(SimpleUnc)" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": null, 567 | "metadata": {}, 568 | "outputs": [], 569 | "source": [ 570 | "a = simple_unc([1, 2, 3], [0.1, 0.1, 0.1])\n", 571 | "a" 572 | ] 573 | }, 574 | { 575 | "cell_type": "code", 576 | "execution_count": null, 577 | "metadata": {}, 578 | "outputs": [], 579 | "source": [ 580 | "a == a" 581 | ] 582 | }, 583 | { 584 | "cell_type": "markdown", 585 | "metadata": {}, 586 | "source": [ 587 | "We can see that this does raise an error if you try to add it, though:" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": null, 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "with pytest.raises(TypeError):\n", 597 | " a + a" 598 | ] 599 | }, 600 | { 601 | "cell_type": "markdown", 602 | "metadata": {}, 603 | "source": [ 604 | "Now let's provide `__array_ufunc__`, which will allow us to customize the behavior of UFuncs. This will use Python 3.10's pattern matching for simplicity." 605 | ] 606 | }, 607 | { 608 | "cell_type": "code", 609 | "execution_count": null, 610 | "metadata": {}, 611 | "outputs": [], 612 | "source": [ 613 | "class SimpleUnc(np.ndarray):\n", 614 | " def __array_ufunc__(self, ufunc, method: str, *inputs, **kwargs):\n", 615 | " # Avoid infinite recursion\n", 616 | " raw_inputs = [np.asarray(x) for x in inputs]\n", 617 | "\n", 618 | " match (ufunc, method, raw_inputs):\n", 619 | " # Custom add / subtract\n", 620 | " case np.add | np.subtract, \"__call__\", (a, b):\n", 621 | " # This can waste an allocation\n", 622 | " (result,) = kwargs.pop(\"out\", [np.empty(self.shape, self.dtype)])\n", 623 | "\n", 624 | " ufunc(a[\"val\"], b[\"val\"], out=result[\"val\"], **kwargs)\n", 625 | " np.add(a[\"unc\"] ** 2, b[\"unc\"] ** 2, out=result[\"unc\"], **kwargs)\n", 626 | " np.sqrt(result[\"unc\"], out=result[\"unc\"], **kwargs)\n", 627 | "\n", 628 | " return result.view(self.__class__)\n", 629 | "\n", 630 | " # Fall back on whatever it would have done before - do not return this subclass\n", 631 | " case _:\n", 632 | " return super().__array_ufunc__(ufunc, method, *raw_inputs, **kwargs)" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": null, 638 | "metadata": {}, 639 | "outputs": [], 640 | "source": [ 641 | "a = simple_unc([1, 2, 3], [0.1, 0.1, 0.1])\n", 642 | "a + a" 643 | ] 644 | }, 645 | { 646 | "cell_type": "code", 647 | "execution_count": null, 648 | "metadata": {}, 649 | "outputs": [], 650 | "source": [ 651 | "a - a" 652 | ] 653 | }, 654 | { 655 | "cell_type": "markdown", 656 | "metadata": {}, 657 | "source": [ 658 | "This is very simple and doesn't account for correlations, but it does show that it works." 659 | ] 660 | }, 661 | { 662 | "cell_type": "code", 663 | "execution_count": null, 664 | "metadata": {}, 665 | "outputs": [], 666 | "source": [ 667 | "a == a" 668 | ] 669 | }, 670 | { 671 | "cell_type": "markdown", 672 | "metadata": {}, 673 | "source": [ 674 | "There's a lot more to NumPy; it's worth looking around in the docs. An example of a function you might find is `np.diff(a)`, which is equivalent to `a[:-1] - a[1:]` but reads better without repeating the array name. Also look at the huge number of useful methods in SciPy." 675 | ] 676 | } 677 | ], 678 | "metadata": { 679 | "kernelspec": { 680 | "display_name": "conda-env-level-up-your-python-py", 681 | "language": "python", 682 | "name": "conda-env-level-up-your-python-py" 683 | }, 684 | "language_info": { 685 | "codemirror_mode": { 686 | "name": "ipython", 687 | "version": 3 688 | }, 689 | "file_extension": ".py", 690 | "mimetype": "text/x-python", 691 | "name": "python", 692 | "nbconvert_exporter": "python", 693 | "pygments_lexer": "ipython3", 694 | "version": "3.10.5" 695 | } 696 | }, 697 | "nbformat": 4, 698 | "nbformat_minor": 4 699 | } 700 | -------------------------------------------------------------------------------- /notebooks/3.3 Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Pandas: DataFrames for Python\n", 8 | "\n", 9 | "Python is a general purpose language. It doesn't have to be better than a specialized language, it just has to have a good enough library - it is better at all the other parts, like dealing with files, CLI/GUI, etc.\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "DataFrames (well known from R) are like Excel spreadsheets in Python. (In fact, it can open Excel files). They are for _structured data_. If a NumPy axis has a meaning you want to assign a name to, it's probably structured." 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "import pandas as pd" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "We could make a DataFrame by hand, most most of the time you'll load them from various data sources. So let's make a CSV:" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": {}, 39 | "outputs": [], 40 | "source": [ 41 | "%%writefile tmp.csv\n", 42 | "id, version, os, arch\n", 43 | "cp37-macos_arm64, 3.7, macos, arm64\n", 44 | "cp38-macos_arm64, 3.8, macos, arm64\n", 45 | "cp39-macos_arm64, 3.9, macos, arm64\n", 46 | "cp37-macos_x86_64, 3.7, macos, x86_64\n", 47 | "cp38-macos_x86_64, 3.8, macos, x86_64\n", 48 | "cp39-macos_x86_64, 3.9, macos, x86_64" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "By default, pandas can read it, and even nicely format something for your screen:" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "pd.read_csv(\"tmp.csv\")" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "There are lots of powerful tools when reading and for later cleanup; let's do a better job of importing." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "df = pd.read_csv(\n", 81 | " \"tmp.csv\",\n", 82 | " index_col=0,\n", 83 | " skipinitialspace=True,\n", 84 | " dtype={\"os\": \"category\", \"arch\": \"category\"},\n", 85 | ")\n", 86 | "df" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "df.info()" 96 | ] 97 | }, 98 | { 99 | "cell_type": "markdown", 100 | "metadata": {}, 101 | "source": [ 102 | "We can query columns (or anything else):" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "df[\"os\"]" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "For simple names, columns can be even easier to access:" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "df.arch" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "You have quick, easy access to lots of analysis tools:" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "df.version.plot.bar();" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "You can select using a variety of methods, including NumPy style boolean arrays:" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "df[df.arch == \"arm64\"]" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "The powerful groupby lets you collect and analyze with ease. For example, to compute the mean for each possible arch:" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [ 175 | "df.groupby(\"arch\").version.mean()" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Pandas pioneered a lot of DSL (Domain Specific Language) for Python, taking over the Python language to keep things simple and consistent within DataFrames. For example, it provides accessors, like the `.str` accessor, that apply normal methods to a series instead:" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "df.arch.str.upper()" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "This is just scratching the surface. Besides manipulating these dataframes and series, Pandas also offers:\n", 199 | "\n", 200 | "* Fantastic date manipulation, including holidays, work weeks, and more\n", 201 | "* Great periodic tools, rolling calculations, and more\n", 202 | "\n", 203 | "Great Pandas, like vectorized NumPy, can be a little hard to write, taking a few iterations, but once you have it written, it is easy to read and very expressive." 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "## More reading" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "See this notebook than analyze COVID data that runs daily on my website: " 218 | ] 219 | } 220 | ], 221 | "metadata": { 222 | "kernelspec": { 223 | "display_name": "conda-env-level-up-your-python-py", 224 | "language": "python", 225 | "name": "conda-env-level-up-your-python-py" 226 | }, 227 | "language_info": { 228 | "codemirror_mode": { 229 | "name": "ipython", 230 | "version": 3 231 | }, 232 | "file_extension": ".py", 233 | "mimetype": "text/x-python", 234 | "name": "python", 235 | "nbconvert_exporter": "python", 236 | "pygments_lexer": "ipython3", 237 | "version": "3.10.5" 238 | } 239 | }, 240 | "nbformat": 4, 241 | "nbformat_minor": 4 242 | } 243 | -------------------------------------------------------------------------------- /notebooks/3.4 Numba.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Numba: JIT for Speed!\n", 8 | "\n", 9 | "\n", 10 | "Numba is one of the most exciting things to happen to Python. It is a library than take a Python function, convert the bytecode to LLVM, compile it, and run it at full machine speed!" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "metadata": { 17 | "tags": [ 18 | "remove-cell" 19 | ] 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "# This notebook does not work on WebAssembly (no numba JIT)" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "import numba\n", 33 | "import numpy as np\n", 34 | "import matplotlib.pyplot as plt" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## First example" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "def f1(a, b):\n", 51 | " return 2 * a**3 + 3 * b**0.5\n", 52 | "\n", 53 | "\n", 54 | "@numba.vectorize\n", 55 | "def f2(a, b):\n", 56 | " return 2 * a**3 + 3 * b**0.5" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "a, b = np.random.random_sample(size=(2, 100_000))" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "metadata": {}, 72 | "outputs": [], 73 | "source": [ 74 | "%%timeit\n", 75 | "c = f1(a, b)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "%%time\n", 85 | "c = f2(a, b)" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "This probably took a bit longer. The very first time you JIT compile something, it takes time to do the compilation. Numba is pretty fast, but you probably still pay a cost. There are things you can do to control when this happens, but there is a small cost." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [ 101 | "%%timeit\n", 102 | "c = f2(a, b)" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "It took the function we defined, pulled it apart, and turned into Low Level Virtual Machine (LLVM) code, and compiled it. No special strings or special syntax; it is just a (large) subset of Python and NumPy. And users and libraries can extend it too. It also supports:\n", 110 | "\n", 111 | "* Vectorized, general vectorized, or regular functions\n", 112 | "* Ahead of time compilation, JIT, or dynamic JIT\n", 113 | "* Parallelized targets\n", 114 | "* GPU targets via CUDA or ROCm\n", 115 | "* Nesting\n", 116 | "* Creating cfunction callbacks\n", 117 | "\n", 118 | "It is almost always as fast or faster than any other compiled solution (minus the JIT time). A couple of years ago it became much easier to install (via PIP with LLVMLite's lightweight and independent LLVM build)." 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "## JIT example\n", 126 | "\n", 127 | "The example above using `@numba.vectorize` to make \"ufunc\" like functions. These can take any (broadcastable) size of array(s) and produces an output array. It's similar to `@numpy.vectorize` which just loops in Python. Let's try controlling the looping ourselves, using a ODE solver:" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "### Problem setup" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Let's setup an ODE function to solve. We can write our ODE as a system of linear first order ODE equations:" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "The harmonic motion equation can be written in terms of $\\mathbf{f}(t, \\mathbf{y}) = \\dot{\\mathbf{y}}$, where this is in the standard form:\n", 149 | "\n", 150 | "$$\n", 151 | "\\mathbf{y} =\n", 152 | "\\left(\n", 153 | "\\begin{matrix}\n", 154 | "\\dot{x} \\\\\n", 155 | "x\n", 156 | "\\end{matrix}\n", 157 | "\\right)\n", 158 | "$$\n", 159 | "\n", 160 | "$$\n", 161 | "\\mathbf{f}(t, \\mathbf{y}) = \n", 162 | "\\dot{\\mathbf{y}}\n", 163 | "=\n", 164 | "\\left(\n", 165 | "\\begin{matrix}\n", 166 | "\\ddot{x} \\\\\n", 167 | "\\dot{x}\n", 168 | "\\end{matrix}\n", 169 | "\\right)\n", 170 | "=\n", 171 | "\\left(\n", 172 | "\\begin{matrix}\n", 173 | "-\\frac{k}{m} x \\\\\n", 174 | "\\dot{x}\n", 175 | "\\end{matrix}\n", 176 | "\\right)\n", 177 | "=\n", 178 | "\\left(\n", 179 | "\\begin{matrix}\n", 180 | "-\\frac{k}{m} y_1 \\\\\n", 181 | "y_0\n", 182 | "\\end{matrix}\n", 183 | "\\right)\n", 184 | "$$" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "x_max = 1 # Size of x max\n", 194 | "v_0 = 0\n", 195 | "koverm = 1 # k / m\n", 196 | "\n", 197 | "\n", 198 | "def f(t, y):\n", 199 | " \"Y has two elements, x and v\"\n", 200 | " return np.array([-koverm * y[1], y[0]])" 201 | ] 202 | }, 203 | { 204 | "cell_type": "markdown", 205 | "metadata": {}, 206 | "source": [ 207 | "### Runge-Kutta introduction\n", 208 | "\n", 209 | "Note that $h = t_{n+1} - t_n $.\n", 210 | "\n", 211 | "$$\n", 212 | "\\dot{y} = f(t,y)\n", 213 | "$$\n", 214 | "$$\n", 215 | "\\implies y = \\int f(t,y) \\, dt\n", 216 | "$$\n", 217 | "$$\n", 218 | "\\implies y_{n+1} = y_{n} + \\int_{t_n}^{t_{n+1}} f(t,y) \\, dt\n", 219 | "$$" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "Now, expand $f$ in a Taylor series around the *midpoint* of the interval:\n", 227 | "\n", 228 | "$$\n", 229 | "f(t,y) \\approx f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}})\n", 230 | " + \\left( t - t_{n+\\frac{1}{2}}\\right)\n", 231 | " \\dot{f}(t_{n+\\frac{1}{2}})\n", 232 | " + \\mathcal{O}(h^2)\n", 233 | "$$\n", 234 | "\n", 235 | "The second term here is symmetric in the interval, so all we have left is the first term in the integral:\n", 236 | "\n", 237 | "$$\n", 238 | "\\int_{t_n}^{t_{n+1}} f(t,y) \\, dt \\approx\n", 239 | " h\\, f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}}) + \\mathcal{O}(h^3)\n", 240 | "$$" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "Back into the original statement, we get:\n", 248 | "\n", 249 | "$$\n", 250 | "y_{n+1} \\approx \n", 251 | "\\color{blue}{\n", 252 | "y_{n}\n", 253 | "+ h\\, f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}})\n", 254 | "}\n", 255 | "+ \\mathcal{O}(h^3)\n", 256 | "\\tag{rk2}\n", 257 | "$$\n", 258 | "\n", 259 | "We've got one more problem! How do we calculate $f(t_{n+\\frac{1}{2}},y_{n+\\frac{1}{2}})$? We can use the Euler's algorithm that we saw last time:\n", 260 | "\n", 261 | "$$\n", 262 | "y_{n+\\frac{1}{2}}\n", 263 | "\\approx y_n + \\frac{1}{2} h \\dot{y}\n", 264 | "= \\color{red}{\n", 265 | "y_n + \\frac{1}{2} h f(t_{n},y_{n})\n", 266 | "}\n", 267 | "$$" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "Putting it together, this is our RK2 algorithm:\n", 275 | "\n", 276 | "$$\n", 277 | "\\mathbf{y}_{n+1} \\approx\n", 278 | "\\color{blue}{\n", 279 | "\\mathbf{y}_{n}\n", 280 | "+ \\mathbf{k}_2\n", 281 | "}\n", 282 | "\\tag{1.0}\n", 283 | "$$\n", 284 | "\n", 285 | "\n", 286 | "$$\n", 287 | "\\mathbf{k}_1 = h \\mathbf{f}(t_n,\\, \\mathbf{y}_n)\n", 288 | "\\tag{1.1}\n", 289 | "$$\n", 290 | "\n", 291 | "$$\n", 292 | "\\mathbf{k}_2 = h \\mathbf{f}(t_n + \\frac{h}{2},\\, \\color{red}{\\mathbf{y}_n\n", 293 | "+ \\frac{\\mathbf{k}_1}{2}})\n", 294 | "\\tag{1.2}\n", 295 | "$$\n", 296 | "\n", 297 | "We've picked up bold face to indicate that we can have a vector of ODEs." 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "We can get the RK4 algorithm by keeping another non-zero term in the Taylor series:\n", 305 | "\n", 306 | "$$\n", 307 | "\\mathbf{y}_{n+1} \\approx\n", 308 | "\\mathbf{y}_{n}\n", 309 | "+ \\frac{1}{6} (\\mathbf{k}_1 + 2 \\mathbf{k}_2 + 2 \\mathbf{k}_3 + \\mathbf{k}_4 )\n", 310 | "\\tag{2.0}\n", 311 | "$$\n", 312 | "\n", 313 | "$$\n", 314 | "\\mathbf{k}_1 = h \\mathbf{f}(t_n,\\, \\mathbf{y}_n)\n", 315 | "\\tag{2.1}\n", 316 | "$$\n", 317 | "\n", 318 | "$$\n", 319 | "\\mathbf{k}_2 = h \\mathbf{f}(t_n + \\frac{h}{2},\\,\n", 320 | " \\mathbf{y}_n + \\frac{\\mathrm{k}_1}{2})\n", 321 | "\\tag{2.2}\n", 322 | "$$\n", 323 | "\n", 324 | "$$\n", 325 | "\\mathbf{k}_3 = h \\mathbf{f}(t_n + \\frac{h}{2},\\,\n", 326 | " \\mathbf{y}_n + \\frac{\\mathrm{k}_2}{2})\n", 327 | "\\tag{2.3}\n", 328 | "$$\n", 329 | "\n", 330 | "$$\n", 331 | "\\mathbf{k}_4 = h \\mathbf{f}(t_n + h,\\,\n", 332 | " \\mathbf{y}_n + \\mathrm{k}_3)\n", 333 | "\\tag{2.4}\n", 334 | "$$" 335 | ] 336 | }, 337 | { 338 | "cell_type": "code", 339 | "execution_count": null, 340 | "metadata": {}, 341 | "outputs": [], 342 | "source": [ 343 | "def rk4_ivp(f, init_y, t):\n", 344 | " steps = len(t)\n", 345 | " order = len(init_y)\n", 346 | "\n", 347 | " y = np.empty((steps, order))\n", 348 | " y[0] = init_y\n", 349 | "\n", 350 | " for n in range(steps - 1):\n", 351 | " h = t[n + 1] - t[n]\n", 352 | "\n", 353 | " k1 = h * f(t[n], y[n]) # 2.1\n", 354 | " k2 = h * f(t[n] + h / 2, y[n] + k1 / 2) # 2.2\n", 355 | " k3 = h * f(t[n] + h / 2, y[n] + k2 / 2) # 2.3\n", 356 | " k4 = h * f(t[n] + h, y[n] + k3) # 2.4\n", 357 | "\n", 358 | " y[n + 1] = y[n] + 1 / 6 * (k1 + 2 * k2 + 2 * k3 + k4) # 2.0\n", 359 | "\n", 360 | " return y" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "Let's plot this:" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "ts = np.linspace(0, 40, 100 + 1)\n", 377 | "y = rk4_ivp(f, [x_max, v_0], ts)\n", 378 | "plt.plot(ts, np.cos(ts))\n", 379 | "plt.plot(ts, y[:, 0], \"--\");" 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "execution_count": null, 385 | "metadata": {}, 386 | "outputs": [], 387 | "source": [ 388 | "%%timeit\n", 389 | "ts = np.linspace(0, 40, 1000 + 1)\n", 390 | "y = rk4_ivp(f, [x_max, v_0], ts)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "### Adding Numba" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "metadata": {}, 403 | "source": [ 404 | "Normally, you'd use a decorator here, but I'm lazy and don't want to rewrite the function, so I'll just manually apply the decorator, since we covered what the syntax actually does." 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "execution_count": null, 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "f_jit = numba.njit(f)\n", 414 | "rk4_ivp_jit = numba.njit(rk4_ivp)" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": null, 420 | "metadata": {}, 421 | "outputs": [], 422 | "source": [ 423 | "%%timeit\n", 424 | "ts = np.linspace(0, 40, 1000 + 1)\n", 425 | "y = rk4_ivp_jit(f_jit, np.array([x_max, v_0]), ts)" 426 | ] 427 | }, 428 | { 429 | "cell_type": "markdown", 430 | "metadata": {}, 431 | "source": [ 432 | "You can inspect the types if you'd like to add them after running once:" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": {}, 439 | "outputs": [], 440 | "source": [ 441 | "f_jit.inspect_types()" 442 | ] 443 | }, 444 | { 445 | "cell_type": "markdown", 446 | "metadata": {}, 447 | "source": [ 448 | "## See also:\n", 449 | "\n", 450 | "* [CompClass: RK](https://nbviewer.jupyter.org/github/henryiii/compclass/blob/master/classes/week10/2_rk.ipynb)" 451 | ] 452 | } 453 | ], 454 | "metadata": { 455 | "kernelspec": { 456 | "display_name": "conda-env-level-up-your-python-py", 457 | "language": "python", 458 | "name": "conda-env-level-up-your-python-py" 459 | }, 460 | "language_info": { 461 | "codemirror_mode": { 462 | "name": "ipython", 463 | "version": 3 464 | }, 465 | "file_extension": ".py", 466 | "mimetype": "text/x-python", 467 | "name": "python", 468 | "nbconvert_exporter": "python", 469 | "pygments_lexer": "ipython3", 470 | "version": "3.10.5" 471 | } 472 | }, 473 | "nbformat": 4, 474 | "nbformat_minor": 4 475 | } 476 | -------------------------------------------------------------------------------- /notebooks/3.5 pybind11.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# pybind11: Use C++ libraries\n", 8 | "\n", 9 | "\n", 10 | "[pybind11](https://pybind11.readthedocs.io) lets you write Python extensions using pure C++; no special tool or processing step needed. It's just a header-only library that works just about everywhere. Used by SciPy, PyTorch, and many more libraries." 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "metadata": {}, 16 | "source": [ 17 | "## Example\n", 18 | "\n", 19 | "A Python extension in pybind11 looks like this:\n", 20 | "\n", 21 | "---\n", 22 | "\n", 23 | "```cpp\n", 24 | "#include \n", 25 | "\n", 26 | "namespace py = pybind11;\n", 27 | "\n", 28 | "int square(int x) {\n", 29 | " return x * x;\n", 30 | "}\n", 31 | "\n", 32 | "PYBIND11_MODULE(somecode, m) {\n", 33 | " m.def(\"square\", &square);\n", 34 | "}\n", 35 | "```\n", 36 | "\n", 37 | "---" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "metadata": {}, 43 | "source": [ 44 | "You can use `cppimport` to import it for a quick test, or a build system like scikit-build-core to build. I'm not including a compiler in this environment, so I'm not going to build here - see one of my other classes. This is a minimal `CMakeLists.txt`:\n", 45 | "\n", 46 | "---\n", 47 | "\n", 48 | "```cmake\n", 49 | "cmake_minimium_required(VERSION 3.15...3.26)\n", 50 | "project(python_example LANGUAGES CXX)\n", 51 | "\n", 52 | "set(PYBIND11_FINDPYTHON ON)\n", 53 | "find_package(pybind11 CONFIG REQUIRED)\n", 54 | "\n", 55 | "pybind11_add_module(python_example MODULE src/main.cpp)\n", 56 | "\n", 57 | "install(TARGETS python_example DESTINATION .)\n", 58 | "```\n", 59 | "\n", 60 | "---" 61 | ] 62 | }, 63 | { 64 | "cell_type": "markdown", 65 | "metadata": {}, 66 | "source": [ 67 | "And, your pyproject.toml:\n", 68 | "\n", 69 | "\n", 70 | "---\n", 71 | "\n", 72 | "```toml\n", 73 | "[build-system]\n", 74 | "requires = [\"scikit-build-core\", \"pybind11\"]\n", 75 | "build-backend = \"scikit_build_core.build\"\n", 76 | "\n", 77 | "[project]\n", 78 | "name = \"example\"\n", 79 | "version = \"0.0.1\"\n", 80 | "requires-python = \">=3.8\"\n", 81 | "```\n", 82 | "---" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "metadata": {}, 88 | "source": [ 89 | "If you want to build and distribute, use [cibuildwheel](https://cibuildwheel.readthedocs.io), which is used by Scikit-Learn, Matplotlib, MyPy, and many more; it can be setup for Linux, macOS, and Windows and all common CPython and PyPy versions in just 13 lines:\n", 90 | "\n", 91 | "---\n", 92 | "\n", 93 | "```yaml\n", 94 | "on: [push, pull_request]\n", 95 | "\n", 96 | "jobs:\n", 97 | " build_wheels:\n", 98 | " strategy:\n", 99 | " matrix:\n", 100 | " os: [ubuntu-latest, windows-latest, macos-latest]\n", 101 | " runs-on: ${{ matrix.os }}\n", 102 | "\n", 103 | " steps:\n", 104 | " - uses: actions/checkout@v3\n", 105 | " \n", 106 | " - uses: pypa/cibuildwheel@v2.14\n", 107 | "\n", 108 | " - uses: actions/upload-artifact@v3\n", 109 | " with:\n", 110 | " path: ./wheelhouse/*.whl\n", 111 | "```\n", 112 | "---\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "## So much more\n", 120 | "\n", 121 | "Some examples of classes:\n", 122 | "\n", 123 | "```python\n", 124 | "#include \n", 125 | "\n", 126 | "using namespace pybind11::literals;\n", 127 | "\n", 128 | "PYBIND11_MODULE(example, m) {\n", 129 | " py::class_(m, \"Vector\")\n", 130 | " .def(py::init())\n", 131 | " .def_property(\"x\", &Vector::getX, &Vector::setX)\n", 132 | " .def_property(\"y\", &Vector::getY, &Vector::setY)\n", 133 | " .def(\"mag\", &Vector::mag, \"I am a mag function\")\n", 134 | " \n", 135 | " .def(\"unit\", [](const Vector& self){return self.unit();})\n", 136 | " \n", 137 | " .def(\"__str__\", [](const Vector& self){return py::str(\"[{}, {}]\").format(self.getX(), self.getY());})\n", 138 | " \n", 139 | " .def(py::self *= float())\n", 140 | " .def(float() * py::self)\n", 141 | " .def(py::self * float())\n", 142 | "}\n", 143 | "```" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "You can use lambda functions almost anywhere, and you can ask for `py::object` or the C++ type interchangeably, or cast between them. " 151 | ] 152 | } 153 | ], 154 | "metadata": { 155 | "kernelspec": { 156 | "display_name": "conda-env-level-up-your-python-py", 157 | "language": "python", 158 | "name": "conda-env-level-up-your-python-py" 159 | }, 160 | "language_info": { 161 | "codemirror_mode": { 162 | "name": "ipython", 163 | "version": 3 164 | }, 165 | "file_extension": ".py", 166 | "mimetype": "text/x-python", 167 | "name": "python", 168 | "nbconvert_exporter": "python", 169 | "pygments_lexer": "ipython3", 170 | "version": "3.10.5" 171 | } 172 | }, 173 | "nbformat": 4, 174 | "nbformat_minor": 4 175 | } 176 | -------------------------------------------------------------------------------- /notebooks/3.6 Code Quality and CI.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Code Quality and CI" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "For more information, please see the [Scientific Python Development Guidelines](https://learn.scientific-python.org/development), which covers this in much more detail!" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "## Pre-commit" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "One of my favorite tools is [pre-commit](https://pre-commit.com). It allows you to drive almost any \"fixer\" or \"linter\" available, all from one place. It handles environments and caching and even updates for you.\n", 29 | "\n", 30 | "To configure, add a `.pre-commit-config.yaml` file like this:\n", 31 | "\n", 32 | "```yaml\n", 33 | "repos:\n", 34 | "# Some \"common\" checks useful for almost any repo\n", 35 | "- repo: https://github.com/pre-commit/pre-commit-hooks\n", 36 | " rev: v4.4.0\n", 37 | " hooks:\n", 38 | " - id: check-added-large-files\n", 39 | " - id: check-case-conflict\n", 40 | " - id: check-merge-conflict\n", 41 | " - id: check-symlinks\n", 42 | " - id: check-yaml\n", 43 | " - id: debug-statements\n", 44 | " - id: end-of-file-fixer\n", 45 | " - id: mixed-line-ending\n", 46 | " - id: requirements-txt-fixer\n", 47 | " - id: trailing-whitespace\n", 48 | "\n", 49 | "# Automatically format Python code\n", 50 | "- repo: https://github.com/psf/black\n", 51 | " rev: \"23.7.0\"\n", 52 | " hooks:\n", 53 | " - id: black\n", 54 | "```\n", 55 | "\n", 56 | "The file has a list of repos (local checks can be written too). Each repo contains pre-commit hooks that you can run and configure. You should put modifying \"fixer\" checks before the \"linter\" checks, just in case they fix something that then gets linted." 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "You can install pre-commit from `brew` (macOS), or via `pipx`/`pip` for anything with Python.\n", 64 | "\n", 65 | "You can then run it like this:\n", 66 | "\n", 67 | "```bash\n", 68 | "pre-commit run -a\n", 69 | "```\n", 70 | "\n", 71 | "That will check everything. You don't need to know anything about how to run the checkers or linters, it's a single standard interface for all projects. Each hook gets a unique, cached environment, so the next time you run it, it's lightning fast. If you leave off the `-a`, it _only checks the changed files in your staging area, even partially staged ones!_.\n", 72 | "\n", 73 | "If you want to update to the latest versions of all your hooks, run:\n", 74 | "\n", 75 | "```bash\n", 76 | "pre-commit autoupdate\n", 77 | "```\n", 78 | "\n", 79 | "If you want to use it in the namesake \"pre-commit\" mode, then run:\n", 80 | "\n", 81 | "```bash\n", 82 | "pre-commit install\n", 83 | "```\n", 84 | "\n", 85 | "Now it runs before every commit, and you'll never check in \"bad\" code again! Use `-n` to skip the pre-commit check when committing for emergencies.\n", 86 | "\n", 87 | "One of the recent exciting advancements is , where you can just add your repo to the GitHub list, and then you get your PRs autocorrected and checked, and you get weekly update PRs to your config!\n", 88 | "\n", 89 | "PS: This is generally not used for pytest (though it could be), since tests are generally slower and take more setup, including being installed properly." 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## CI: GitHub Actions" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "One of the most important aspect of good code is Continuous Integration (CI); every change should be tested and ideally not allowed to be merged unless it passes. If you support multiple versions of Python or OSs, you should test on each of them. CI lets you do this, and other contributors get the benefit too; if you have good tests you can feel comfortable about making and accepting changes.\n", 104 | "\n", 105 | "There are many services, but the most popular and possibly one of the best designed ones is GitHub Actions. It is really easy to setup, doesn't require extra permissions or accounts, and runs 10(!) parallel jobs, and supports all three OSs, often with the same code, and is highly modular. This is what a simple job would look like:\n", 106 | "\n", 107 | "`.github/workflows/ci.yml`:\n", 108 | "\n", 109 | "```yaml\n", 110 | "name: Python tests\n", 111 | "\n", 112 | "on: [push]\n", 113 | "\n", 114 | "jobs:\n", 115 | " build:\n", 116 | " runs-on: ${{ matrix.runs-on }}\n", 117 | " strategy:\n", 118 | " matrix:\n", 119 | " python-version: [\"3.8\", \"3.11\"]\n", 120 | " runs-on: [ubuntu-latest, macos-latest, windows-latest]\n", 121 | "\n", 122 | " steps:\n", 123 | " - uses: actions/checkout@v3\n", 124 | " \n", 125 | " - name: actions/setup-python@v4\n", 126 | " with:\n", 127 | " python-version: ${{ matrix.python-version }}\n", 128 | " \n", 129 | " - name: Install with dev requirements\n", 130 | " run: pip install .[dev]\n", 131 | "\n", 132 | " - name: Test with pytest\n", 133 | " run: pytest\n", 134 | "```\n", 135 | "\n", 136 | "That's it, 6 jobs run and test your code!" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "* [Official docs](https://docs.github.com/en/actions/guides/building-and-testing-python) are good\n", 144 | "* [Scientific Python Development Guidelines - GHA](https://learn.scientific-python.org/development/guides/gha-basic) has some good help, too!\n", 145 | "\n", 146 | "CI is not just for tests! You can use it for deploying static websites, for building binaries for distribution, for making releases, for compiling documents or documentation, for monitoring things at a regular interval, for automating tasks and making PRs, and much, much more! You can have as many workflows or jobs in a workflow as you want (up to 255).\n", 147 | "\n", 148 | "If you are on GitLab, GitLab CI is excellent too, just not quite as modular.\n", 149 | "\n", 150 | "The website for this course is built in GHA. Can you find the config for it?" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": {}, 156 | "source": [ 157 | "## Quick package construction: Cookiecutter\n", 158 | "\n", 159 | "If you want to set up a project fast, you can use cookiecutter, a Python application (as usual, install with `brew` on macOS or `pipx`/`pip` elsewhere). Then you can stamp out a new package based on online templates. If you are following the [Scientific Python Development Guidelines](https://learn.scientific-python.org/development), then run:\n", 160 | "\n", 161 | "```bash\n", 162 | "cookiecutter gh:scientific-python/cookie\n", 163 | "```\n", 164 | "\n", 165 | "Answer a few questions, then you get a working package with CI and strict style checking in one of around a dozen backends, including C++ pybind11 with cibuildwheel wheels!" 166 | ] 167 | } 168 | ], 169 | "metadata": { 170 | "kernelspec": { 171 | "display_name": "conda-env-level-up-your-python-py", 172 | "language": "python", 173 | "name": "conda-env-level-up-your-python-py" 174 | }, 175 | "language_info": { 176 | "codemirror_mode": { 177 | "name": "ipython", 178 | "version": 3 179 | }, 180 | "file_extension": ".py", 181 | "mimetype": "text/x-python", 182 | "name": "python", 183 | "nbconvert_exporter": "python", 184 | "pygments_lexer": "ipython3", 185 | "version": "3.10.5" 186 | } 187 | }, 188 | "nbformat": 4, 189 | "nbformat_minor": 4 190 | } 191 | -------------------------------------------------------------------------------- /notebooks/save_and_run.py: -------------------------------------------------------------------------------- 1 | from IPython.core.magic import Magics, magics_class, cell_magic 2 | 3 | import sys 4 | import subprocess 5 | from pathlib import Path 6 | 7 | 8 | @magics_class 9 | class AutoMagics(Magics): 10 | @cell_magic 11 | def save_and_run(self, line, cell, local_ns=None): 12 | commands = line.split() 13 | filename = "tmp.py" 14 | Path(filename).write_text(cell) 15 | 16 | subprocess.run( 17 | [sys.executable, "-m", *commands, filename], 18 | # This is just to support colors in the notebook 19 | env={"FORCE_COLOR": "1", "MYPY_FORCE_COLOR": "1", "TERM": "xterm-color"}, 20 | ) 21 | 22 | 23 | # Why sys.executable here? 24 | # 25 | # We are running inside a virtual environment, which also has mypy installed. 26 | # However, this is a shell command (the starting `!`), so it will not 27 | # necessarily run in the same environment. So we'll use the current python 28 | # interpreter `sys.executable` and use the `python -m mypy` expression instead 29 | # of plain `mypy` to make sure we get our installed MyPy. You don't usually run 30 | # mypy from a notebook. 31 | 32 | 33 | def load_ipython_extension(ipython): 34 | ipython.register_magics(AutoMagics) 35 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | import nox 2 | 3 | nox.needs_version = ">=2022.1.7" 4 | 5 | 6 | @nox.session(reuse_venv=True) 7 | def pyodide(session: nox.Session) -> None: 8 | session.install("jupyterlite[lab]") 9 | session.run("jupyter", "lite", "init") 10 | session.run("jupyter", "lite", "build", "--contents=notebooks") 11 | 12 | if "--serve" in session.posargs: 13 | session.run("jupyter", "lite", "serve") 14 | -------------------------------------------------------------------------------- /postBuild: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | 4 | # Avoid having a no kernel popup 5 | python3 -m ipykernel install --user --name conda-env-level-up-your-python-py 6 | --------------------------------------------------------------------------------