├── .gitignore ├── LICENSE ├── README.md ├── code ├── derivatives.py ├── derivatives_numba.py ├── draw_triangles-v1.py ├── draw_triangles-v2.py ├── draw_triangles-v3.py ├── flip_list-v1.py ├── flip_list-v2.py ├── flip_string.py ├── graphics.py ├── my_csv.py ├── test_flip_string-v1.py ├── test_flip_string-v2.py ├── test_flip_string-v3.py ├── test_flip_string-v4.py ├── test_flip_string-v5.py └── test_strflip.py ├── data ├── feet.csv ├── images │ ├── 00.txt │ ├── 01.txt │ ├── 02.txt │ ├── 03.txt │ ├── 04.txt │ ├── 05.txt │ ├── 06.txt │ ├── 07.txt │ ├── 08.txt │ ├── 09.txt │ ├── 10.txt │ ├── 11.txt │ ├── 12.txt │ ├── 13.txt │ ├── 14.txt │ ├── 15.txt │ ├── 16.txt │ ├── 17.txt │ ├── 18.txt │ ├── 19.txt │ ├── 20.txt │ ├── 21.txt │ ├── 22.txt │ ├── 23.txt │ ├── 24.txt │ ├── 25.txt │ ├── 26.txt │ ├── 27.txt │ ├── 28.txt │ ├── 29.txt │ ├── 30.txt │ ├── 31.txt │ ├── 32.txt │ ├── 33.txt │ ├── 34.txt │ ├── 35.txt │ ├── 36.txt │ ├── 37.txt │ ├── 38.txt │ ├── 39.txt │ ├── 40.txt │ ├── 41.txt │ ├── 42.txt │ ├── 43.txt │ ├── 44.txt │ ├── 45.txt │ ├── 46.txt │ ├── 47.txt │ ├── 48.txt │ └── 49.txt └── small.csv ├── docs ├── Makefile ├── conf.py ├── documenting.rst ├── images │ └── triangle.png ├── index.rst ├── packaging.rst ├── performance.rst ├── testing.rst └── usability.rst └── scripts └── make-triangles.py /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python 3 | 4 | ### Python ### 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | 63 | # Flask stuff: 64 | instance/ 65 | .webassets-cache 66 | 67 | # Scrapy stuff: 68 | .scrapy 69 | 70 | # Sphinx documentation 71 | _build/ 72 | 73 | # PyBuilder 74 | target/ 75 | 76 | # Jupyter Notebook 77 | .ipynb_checkpoints 78 | 79 | # pyenv 80 | .python-version 81 | 82 | # celery beat schedule file 83 | celerybeat-schedule 84 | 85 | # SageMath parsed files 86 | *.sage.py 87 | 88 | # Environments 89 | .env 90 | .venv 91 | env/ 92 | venv/ 93 | ENV/ 94 | env.bak/ 95 | venv.bak/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | 101 | # Rope project settings 102 | .ropeproject 103 | 104 | # mkdocs documentation 105 | /site 106 | 107 | # mypy 108 | .mypy_cache/ 109 | 110 | ### Python Patch ### 111 | .venv/ 112 | 113 | 114 | # End of https://www.gitignore.io/api/python 115 | 116 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ashwin Srinath 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Python 102 for Scientific Computing and Data Analysis 2 | ----------------------------------------------------- 3 | 4 | 5 | 6 | [![Documentation Status](https://readthedocs.org/projects/python-102/badge/?version=latest)](https://python-102.readthedocs.io/en/latest/?badge=latest) 7 | -------------------------------------------------------------------------------- /code/derivatives.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def dfdx(f, dx, y): 4 | y[1:-1] = (f[2:] - f[:-2]) / (2*dx) 5 | y[0] = (f[1] - f[0]) / dx 6 | y[1] = (f[-2] - f[-1]) / dx 7 | return y 8 | -------------------------------------------------------------------------------- /code/derivatives_numba.py: -------------------------------------------------------------------------------- 1 | from numba import jit, prange 2 | 3 | @jit(parallel=True, nopython=True) 4 | def dfdx(f, dx, y): 5 | for i in prange(1, len(y)-1): 6 | y[i] = (f[i+1] - f[i-1]) / 2*dx 7 | y[0] = (f[1] - f[0]) / dx 8 | y[-1] = (f[-1] - f[-2]) / dx 9 | return y 10 | -------------------------------------------------------------------------------- /code/draw_triangles-v1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | fig, ax = plt.subplots() 4 | 5 | ax.set_xlabel('x') 6 | ax.set_ylabel('y') 7 | 8 | patch = plt.Polygon([ 9 | (0.2, 0.2), 10 | (0.2, 0.6), 11 | (0.4, 0.4) 12 | ]) 13 | 14 | ax.add_patch(patch) 15 | 16 | ax.text(0.2, 0.4, '(0.2, 0.4)') 17 | ax.text(0.2, 0.6, '(0.2, 0.6)') 18 | ax.text(0.2, 0.4, '(0.2, 0.4)') 19 | 20 | patch = plt.Polygon([ 21 | (0.6, 0.8), 22 | (0.8, 0.8), 23 | (0.5, 0.5) 24 | ]) 25 | 26 | ax.add_patch(patch) 27 | 28 | ax.text(0.6, 0.8, '(0.6, 0.8)') 29 | ax.text(0.8, 0.8, '(0.8, 0.8)') 30 | ax.text(0.5, 0.5, '(0.5, 0.5)') 31 | 32 | patch = plt.Polygon([ 33 | (0.6, 0.1), 34 | (0.7, 0.3), 35 | (0.9, 0.2) 36 | ]) 37 | 38 | ax.add_patch(patch) 39 | 40 | ax.text(0.6, 0.1, '(0.6, 0.1)') 41 | ax.text(0.7, 0.3, '(0.7, 0.3)') 42 | ax.text(0.9, 0.2, '(0.9, 0.2)') 43 | 44 | plt.show() 45 | -------------------------------------------------------------------------------- /code/draw_triangles-v2.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def draw_triangle(points, ax=None): 4 | if ax is None: 5 | ax = plt.gca() 6 | else: 7 | fig, ax = plt.subplots() 8 | ax.set_xlabel('x') 9 | ax.set_ylabel('y') 10 | 11 | patch = plt.Polygon(points) 12 | ax.add_patch(patch) 13 | 14 | for pt in points: 15 | x, y = pt 16 | ax.text(x, y, '({}, {})'.format(x, y)) 17 | 18 | draw_triangle([ 19 | (0.2, 0.2), 20 | (0.2, 0.6), 21 | (0.4, 0.4) 22 | ]) 23 | 24 | draw_triangle([ 25 | (0.6, 0.8), 26 | (0.8, 0.8), 27 | (0.5, 0.5) 28 | ]) 29 | 30 | draw_triangle([ 31 | (0.6, 0.1), 32 | (0.7, 0.3), 33 | (0.9, 0.2) 34 | ]) 35 | 36 | plt.show() 37 | -------------------------------------------------------------------------------- /code/draw_triangles-v3.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def draw_triangle(points, ax=None): 4 | if ax is None: 5 | ax = plt.gca() 6 | else: 7 | fig, ax = plt.subplots() 8 | ax.set_xlabel('x') 9 | ax.set_ylabel('y') 10 | 11 | patch = plt.Polygon(points) 12 | ax.add_patch(patch) 13 | 14 | for pt in points: 15 | x, y = pt 16 | ax.text(x, y, '({}, {})'.format(x, y)) 17 | 18 | if __name__ == "__main__": 19 | 20 | draw_triangle([ 21 | (0.2, 0.2), 22 | (0.2, 0.6), 23 | (0.4, 0.4) 24 | ]) 25 | 26 | draw_triangle([ 27 | (0.6, 0.8), 28 | (0.8, 0.8), 29 | (0.5, 0.5) 30 | ]) 31 | 32 | draw_triangle([ 33 | (0.6, 0.1), 34 | (0.7, 0.3), 35 | (0.9, 0.2) 36 | ]) 37 | 38 | plt.show() 39 | -------------------------------------------------------------------------------- /code/flip_list-v1.py: -------------------------------------------------------------------------------- 1 | def flip_list(a, inplace=False): 2 | """ 3 | Flip (reverse) a list. 4 | 5 | Parameters 6 | ---------- 7 | a : list 8 | List to be reversed. 9 | inplace : bool, optional 10 | Specifies whether to flip the list "in place", 11 | or return a new list (default). 12 | 13 | Returns 14 | ------- 15 | flipped : list (or None) 16 | The flipped list. If `inplace=True`, None is returned. 17 | """ 18 | if inplace is True: 19 | a[:] = a[::-1] 20 | return None 21 | else: 22 | return a[::-1] 23 | -------------------------------------------------------------------------------- /code/flip_list-v2.py: -------------------------------------------------------------------------------- 1 | def flip_list(a, inplace=False): 2 | """ 3 | Flip (reverse) a list. 4 | 5 | Parameters 6 | ---------- 7 | a : list 8 | List to be reversed. 9 | inplace : bool, optional 10 | Specifies whether to flip the list "in place", 11 | or return a new list (default). 12 | 13 | Returns 14 | ------- 15 | flipped : list (or None) 16 | The flipped list. If `inplace=True`, None is returned. 17 | 18 | 19 | >>> flip_list([1, 2, 3]) 20 | [3, 2, 1] 21 | 22 | >>> a = [1, 2, 3] 23 | >>> flip_list(a, inplace=True) 24 | >>> a 25 | [3, 2, 1] 26 | """ 27 | if inplace is True: 28 | a[:] = a[::-1] 29 | return None 30 | else: 31 | return a[::-1] 32 | -------------------------------------------------------------------------------- /code/flip_string.py: -------------------------------------------------------------------------------- 1 | def flip_string(s): 2 | """ 3 | flip_string: Flip a string 4 | 5 | Parameters 6 | ---------- 7 | s : str 8 | String to reverse 9 | 10 | Returns 11 | ------- 12 | flipped : str 13 | Copy of `s` with characters arranged in reverse order 14 | """ 15 | 16 | flipped = '' 17 | 18 | # Starting from the last character in `s`, 19 | # add the character to `flipped`, 20 | # and proceed to the previous character in `s`. 21 | # Stop whenever we reach the first character. 22 | 23 | i = len(s) 24 | 25 | while True: 26 | i = i-1 27 | char = s[i] 28 | flipped = flipped + char 29 | 30 | # stop if we have reached the first character: 31 | if char == s[0]: 32 | break 33 | 34 | return flipped 35 | -------------------------------------------------------------------------------- /code/graphics.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | def draw_triangle(points, ax=None): 4 | if ax is None: 5 | ax = plt.gca() 6 | else: 7 | fig, ax = plt.subplots() 8 | ax.set_xlabel('x') 9 | ax.set_ylabel('y') 10 | 11 | patch = plt.Polygon(points) 12 | ax.add_patch(patch) 13 | 14 | for pt in points: 15 | x, y = pt 16 | ax.text(x, y, '({}, {})'.format(x, y)) 17 | -------------------------------------------------------------------------------- /code/my_csv.py: -------------------------------------------------------------------------------- 1 | import pandas 2 | 3 | def read_csv(fname): 4 | with open(fname) as f: 5 | col_names = f.readline().rstrip().split(',') 6 | df = pandas.DataFrame(columns=col_names) 7 | for line in f: 8 | record = pandas.DataFrame([line.rstrip().split(',')], columns=col_names) 9 | df = df.append(record, ignore_index=True) 10 | return df 11 | -------------------------------------------------------------------------------- /code/test_flip_string-v1.py: -------------------------------------------------------------------------------- 1 | from flip_string import flip_string 2 | 3 | flipped = flip_string("mario") 4 | print("mario flipped is:", flipped) 5 | 6 | flipped = flip_string("luigi") 7 | print("luigi flipped is:", flipped) 8 | -------------------------------------------------------------------------------- /code/test_flip_string-v2.py: -------------------------------------------------------------------------------- 1 | from flip_string import flip_string 2 | 3 | assert flip_string('mario') == 'oiram' 4 | assert flip_string('luigi') == 'igiul' 5 | -------------------------------------------------------------------------------- /code/test_flip_string-v3.py: -------------------------------------------------------------------------------- 1 | from flip_string import flip_string 2 | 3 | assert flip_string('mario') == 'oiram' 4 | assert flip_string('luigi') == 'igiul' 5 | assert flip_string('samus') == 'sumas' 6 | -------------------------------------------------------------------------------- /code/test_flip_string-v4.py: -------------------------------------------------------------------------------- 1 | from flip_string import flip_string 2 | 3 | def test_flip_mario(): 4 | assert flip_string('mario') == 'oiram' 5 | 6 | def test_flip_luigi(): 7 | assert flip_string('luigi') == 'igiul' 8 | 9 | def test_flip_samus(): 10 | assert flip_string('samus') == 'sumas' 11 | -------------------------------------------------------------------------------- /code/test_flip_string-v5.py: -------------------------------------------------------------------------------- 1 | from flip_string import flip_string 2 | 3 | def test_flip_one_char(): 4 | assert flip_string('a') == 'a' 5 | 6 | def test_flp_two_charsi(): 7 | assert flip_string('ab') == 'ba' 8 | 9 | def test_flip_palindrome(): 10 | assert flip_string('aba') == 'aba' 11 | -------------------------------------------------------------------------------- /code/test_strflip.py: -------------------------------------------------------------------------------- 1 | from strflip import strflip 2 | 3 | def test_flip_empty_string(): 4 | assert strflip('') == '' 5 | 6 | def test_flip_one_char(): 7 | assert strflip('a') == 'a' 8 | 9 | def test_flip_repeated_char(): 10 | assert strflip('abca') == 'acba' 11 | -------------------------------------------------------------------------------- /data/small.csv: -------------------------------------------------------------------------------- 1 | plot_id,species_id,hindfoot_length 2 | 2,NL,32.0 3 | 3,NL,33.0 4 | 2,DM,37.0 5 | 7,DM,36.0 6 | 3,DM,35.0 7 | 1,PF,14.0 8 | 2,PE, 9 | 1,DM,37.0 10 | 1,DM,34.0 11 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = python-102 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # This file does only contain a selection of the most common options. For a 6 | # full list see the documentation: 7 | # http://www.sphinx-doc.org/en/master/config 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | # import os 16 | # import sys 17 | # sys.path.insert(0, os.path.abspath('.')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'Python 102' 23 | copyright = '2018, Ashwin Srinath' 24 | author = 'Ashwin Srinath' 25 | 26 | # The short X.Y version 27 | version = '' 28 | # The full version, including alpha/beta/rc tags 29 | release = '0.1' 30 | 31 | 32 | # -- General configuration --------------------------------------------------- 33 | 34 | # If your documentation needs a minimal Sphinx version, state it here. 35 | # 36 | # needs_sphinx = '1.0' 37 | 38 | # Add any Sphinx extension module names here, as strings. They can be 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 40 | # ones. 41 | extensions = [ 42 | 'sphinx.ext.intersphinx', 43 | 'sphinx.ext.mathjax', 44 | 'sphinx.ext.githubpages', 45 | ] 46 | 47 | # Add any paths that contain templates here, relative to this directory. 48 | templates_path = ['_templates'] 49 | 50 | # The suffix(es) of source filenames. 51 | # You can specify multiple suffix as a list of string: 52 | # 53 | # source_suffix = ['.rst', '.md'] 54 | source_suffix = '.rst' 55 | 56 | # The master toctree document. 57 | master_doc = 'index' 58 | 59 | # The language for content autogenerated by Sphinx. Refer to documentation 60 | # for a list of supported languages. 61 | # 62 | # This is also used if you do content translation via gettext catalogs. 63 | # Usually you set "language" from the command line for these cases. 64 | language = None 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | # This pattern also affects html_static_path and html_extra_path . 69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 70 | 71 | # The name of the Pygments (syntax highlighting) style to use. 72 | pygments_style = 'sphinx' 73 | 74 | 75 | # -- Options for HTML output ------------------------------------------------- 76 | 77 | # The theme to use for HTML and HTML Help pages. See the documentation for 78 | # a list of builtin themes. 79 | # 80 | html_theme = 'sphinx_rtd_theme' 81 | 82 | # Theme options are theme-specific and customize the look and feel of a theme 83 | # further. For a list of options available for each theme, see the 84 | # documentation. 85 | # 86 | # html_theme_options = {} 87 | 88 | # Add any paths that contain custom static files (such as style sheets) here, 89 | # relative to this directory. They are copied after the builtin static files, 90 | # so a file named "default.css" will overwrite the builtin "default.css". 91 | html_static_path = ['_static'] 92 | 93 | # Custom sidebar templates, must be a dictionary that maps document names 94 | # to template names. 95 | # 96 | # The default sidebars (for documents that don't match any pattern) are 97 | # defined by theme itself. Builtin themes are using these templates by 98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html', 99 | # 'searchbox.html']``. 100 | # 101 | # html_sidebars = {} 102 | 103 | html_title = 'Python 102' 104 | 105 | # -- Options for HTMLHelp output --------------------------------------------- 106 | 107 | # Output file base name for HTML help builder. 108 | htmlhelp_basename = 'python-102doc' 109 | 110 | 111 | # -- Options for LaTeX output ------------------------------------------------ 112 | 113 | latex_elements = { 114 | # The paper size ('letterpaper' or 'a4paper'). 115 | # 116 | # 'papersize': 'letterpaper', 117 | 118 | # The font size ('10pt', '11pt' or '12pt'). 119 | # 120 | # 'pointsize': '10pt', 121 | 122 | # Additional stuff for the LaTeX preamble. 123 | # 124 | # 'preamble': '', 125 | 126 | # Latex figure (float) alignment 127 | # 128 | # 'figure_align': 'htbp', 129 | } 130 | 131 | # Grouping the document tree into LaTeX files. List of tuples 132 | # (source start file, target name, title, 133 | # author, documentclass [howto, manual, or own class]). 134 | latex_documents = [ 135 | (master_doc, 'python-102.tex', 'python-102 Documentation', 136 | 'Ashwin Srinath', 'manual'), 137 | ] 138 | 139 | 140 | # -- Options for manual page output ------------------------------------------ 141 | 142 | # One entry per manual page. List of tuples 143 | # (source start file, name, description, authors, manual section). 144 | man_pages = [ 145 | (master_doc, 'python-102', 'python-102 Documentation', 146 | [author], 1) 147 | ] 148 | 149 | 150 | # -- Options for Texinfo output ---------------------------------------------- 151 | 152 | # Grouping the document tree into Texinfo files. List of tuples 153 | # (source start file, target name, title, author, 154 | # dir menu entry, description, category) 155 | texinfo_documents = [ 156 | (master_doc, 'python-102', 'python-102 Documentation', 157 | author, 'python-102', 'One line description of project.', 158 | 'Miscellaneous'), 159 | ] 160 | 161 | 162 | # -- Extension configuration ------------------------------------------------- 163 | 164 | # -- Options for intersphinx extension --------------------------------------- 165 | 166 | # Example configuration for intersphinx: refer to the Python standard library. 167 | intersphinx_mapping = {'https://docs.python.org/3/': None} 168 | -------------------------------------------------------------------------------- /docs/documenting.rst: -------------------------------------------------------------------------------- 1 | Documenting your code 2 | ===================== 3 | 4 | Most people think of writing documentation as 5 | an unpleasant, but necessary task, 6 | done for the benefit of othe people 7 | with no real benefit to themselves. 8 | So they choose not to do it, 9 | or they do it with little care. 10 | 11 | But even if you are the only person who will ever use your code, 12 | it's still a good idea to document it well. 13 | Being able to document your own code gives you confidence that you understand it yourself, 14 | and a sign of well-written code is that it can be easily documented. 15 | Code you wrote a few weeks ago 16 | may as well have been written by someone else, 17 | and you will be glad that you documented it. 18 | 19 | The good news is that writing documentation can be fun, 20 | and you really don't need to write a lot of it. 21 | 22 | Docstrings and comments 23 | ----------------------- 24 | 25 | Documentation is *not* comments. 26 | 27 | A *docstring* in Python is a string literal 28 | that appears at the beginning of a module, function, class, or method. 29 | 30 | .. code-block:: python 31 | 32 | """ 33 | A docstring in Python that appears 34 | at the beginning of a module, function, class or method. 35 | """ 36 | 37 | The *docstring* of a module, function, class or method 38 | becomes the ``__doc__`` attribute of that object, 39 | and is printed if you type ``help(object)``: 40 | 41 | .. code-block:: python 42 | 43 | In [1]: def fahr_to_celsius(F): 44 | ...: """ 45 | ...: Convert temperature from Fahrenheit to Celsius. 46 | ...: """ 47 | ...: return (F - 32) * (5/9) 48 | 49 | In [2]: help(fahr_to_celsius) 50 | 51 | Help on function fahr_to_celsius in module __main__: 52 | 53 | fahr_to_celsius(F) 54 | Convert temperature from Fahrenheit to Celsius. 55 | 56 | A *comment* in Python is any line that begins with a ``#``: 57 | 58 | .. code-block:: python 59 | 60 | # a comment. 61 | 62 | The purpose of a docstring is to document a module, function, class, or method. 63 | The purpose of a comment is to explain a very difficult piece of code, 64 | or to justify a choice that was made while writing it. 65 | 66 | Docstrings should not be used in place of comments, 67 | or vice versa. **Don't do the following**: 68 | 69 | .. code-block:: python 70 | 71 | In [1]: def fahr_to_celsius(F): 72 | ...: # Convert temperature from Fahrenheit to Celsius. 73 | ...: return (F - 32) * (5/9) 74 | 75 | Deleting code 76 | ^^^^^^^^^^^^^ 77 | 78 | Incidentally, many people use comments and string literals 79 | as a way of "deleting" code - also known as *commenting out* code. 80 | See `this article `_ on a better way to delete code. 81 | 82 | What to document? 83 | ----------------- 84 | 85 | So what goes in a dosctring? 86 | 87 | At minimum, the docstring for a function or method should consist of the following: 88 | 89 | 1. A **Summary** section that describes in a sentence or two 90 | what the function does. 91 | 2. A **Parameters** section that provides a 92 | description of the parameters to the function, 93 | their types, 94 | and default values (in the case of optional arguments). 95 | 3. A **Returns** section that similarly describes the return values. 96 | 4. Optionally, 97 | a **Notes** section that describes the implementation, 98 | and includes references. 99 | 100 | Here is a simple example of this in action: 101 | 102 | .. literalinclude:: ../code/flip_list-v1.py 103 | 104 | NumPy's `documentation guidelines `_ are a great 105 | reference for more information about what and how to document your code. 106 | 107 | Doctests 108 | -------- 109 | 110 | In addition to the sections above, 111 | your documentation can also contain runnable tests. 112 | This is possible using the 113 | `doctest `_ module. 114 | 115 | .. literalinclude:: ../code/flip_list-v2.py 116 | :caption: flip_list.py 117 | 118 | You can tell ``pytest`` to run doctests as well as other tests 119 | using the ``--doctest-modules`` switch: 120 | 121 | :: 122 | 123 | $ pytest --doctest-modules flip_list.py 124 | 125 | collected 1 item 126 | 127 | flip_list.py . [100%] 128 | 129 | =========================== 1 passed in 0.03 seconds =========================== 130 | 131 | Doctests are great because they double up 132 | as documentation as well as tests. 133 | But they shouldn't be the *only* kind of tests you write. 134 | 135 | Documentation generation 136 | ------------------------ 137 | 138 | Finally, you can turn your documentation into a beautiful website (like this one!), 139 | a PDF manual, and various other formats, 140 | using a document generator such as 141 | `Sphinx `_. 142 | You can use services like 143 | `readthedocs `_ 144 | to build and host your website for free. 145 | -------------------------------------------------------------------------------- /docs/images/triangle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shwina/python-102/8843c1f273d3857016193874267f22096d8fb0ea/docs/images/triangle.png -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. python-102 documentation master file, created by 2 | sphinx-quickstart on Thu Jul 12 10:12:51 2018. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Python 102 for scientific computing and data analysis 7 | ===================================================== 8 | 9 | This tutorial covers topics that are 10 | essential for scientific computing and data analysis in Python, 11 | but typically *not* covered in an introductory course or workshop. 12 | 13 | These are the thing you *need* to know if you are writing software 14 | that meets any of the following criteria: 15 | 16 | * You expect to be working on it for more than a couple of weeks. 17 | * You expect that it will be composed of 18 | more than a hundred 19 | or so lines of code. 20 | * You want it to produce results that can be trusted - for example, 21 | if you are publishing a research paper based on those results. 22 | * You expect that it will be used by one or more other people. 23 | * You are contributing to another project - 24 | e.g., an open-source software package. 25 | 26 | What you will learn 27 | ------------------- 28 | 29 | 1. How to organize the code for your project, 30 | and how to make it an installable *package* 31 | rather than a loose collection of files. 32 | 2. How to write tests for your code so that 33 | you can be sure it always produces the correct answer, 34 | even as you make changes to it. 35 | 3. How to document your code so that it is easy for 36 | you and others to use and navigate. 37 | 4. How to improve the usability of your code. 38 | 5. How to improve the performance of your code. 39 | 40 | What you need to know 41 | --------------------- 42 | 43 | This tutorial assumes you know the 44 | very basics of programming with Python. 45 | 46 | If you can write a loop and a function in Python, 47 | and if you know how to run a ``.py`` script, 48 | you should be able to follow this tutorial easily. 49 | 50 | What you need to have 51 | --------------------- 52 | 53 | If you plan to participate in the hands-on exercises, 54 | you will need: 55 | 56 | * A laptop with `Anaconda `_ installed on it 57 | * 1 or more friends. 58 | It is **highly** encouraged to work in groups, 59 | so if you haven't already, 60 | please introduce yourself to your neighbour(s). 61 | 62 | .. toctree:: 63 | :maxdepth: 2 64 | :caption: Contents: 65 | 66 | packaging 67 | testing 68 | documenting 69 | usability 70 | performance 71 | 72 | 73 | Indices and tables 74 | ================== 75 | 76 | * :ref:`genindex` 77 | * :ref:`modindex` 78 | * :ref:`search` 79 | -------------------------------------------------------------------------------- /docs/packaging.rst: -------------------------------------------------------------------------------- 1 | Organizing code for a Python project 2 | ==================================== 3 | 4 | A well structured project is 5 | easy to navigate and make changes and improvements to. 6 | It's also more likely to be used by other people -- 7 | and that includes *you* a few weeks from now! 8 | 9 | Organization basics 10 | ------------------- 11 | 12 | We want to write a Python program that draws triangles: 13 | 14 | .. image:: images/triangle.png 15 | :width: 60% 16 | 17 | We use the the 18 | `Polygon `_ class 19 | of the `matplotlib `_ library 20 | and write a script called ``draw_triangles.py`` to do this: 21 | 22 | .. literalinclude:: ../code/draw_triangles-v1.py 23 | :caption: draw_triangles.py 24 | :language: python 25 | 26 | Do you think this is a good way to organize the code? 27 | What do you think could be improved in the script ``draw_triangles.py``? 28 | 29 | Functions 30 | ^^^^^^^^^ 31 | 32 | Functions facilitate code reuse. 33 | Whenever you see yourself 34 | typing the same code twice 35 | in the same program or project, 36 | it is a clear indication that the code belongs in a function. 37 | 38 | A good function: 39 | 40 | * has a descriptive name. ``draw_triangle`` is a better name 41 | than ``plot``. 42 | * is small -- no more than a couple of dozen lines -- 43 | and does **one** thing. 44 | If a function is doing too much, 45 | then it should probably be broken into smaller functions. 46 | * can be easily tested -- more on this soon. 47 | * is well documented -- more on this later. 48 | 49 | In the script ``draw_triangles.py`` above, 50 | it would be a good idea to define a function 51 | called ``draw_triangle`` that draws a single triangle, 52 | and re-use this function every time we need to draw a triangle: 53 | 54 | .. literalinclude:: ../code/draw_triangles-v2.py 55 | :caption: draw_triangles.py 56 | 57 | Python scripts and modules 58 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 59 | 60 | A *module* is a file containing a collection of Python definitions and statements, 61 | typically named with a ``.py`` suffix. 62 | 63 | A *script* is a module that is intended to be run by the Python interpreter. 64 | For example, 65 | the script ``draw_triangles.py`` can be run from the command-line 66 | using the command: 67 | 68 | :: 69 | 70 | $ python draw_triangles.py 71 | 72 | If you are using an Integrated Development Environment 73 | like Spyder or `PyCharm `_, 74 | then the script can be run by opening it in the IDE and clicking on the "Run" button. 75 | 76 | Modules, or specific functions from a module can be imported 77 | using the ``import`` statement: 78 | 79 | .. code-block:: python 80 | 81 | import draw_triangles 82 | from draw_triangles import draw_triangle 83 | 84 | When a module is imported, 85 | all the statements in the module are executed by the Python interpreter. 86 | This happens only the first time the module is imported. 87 | 88 | It is sometimes useful to have both 89 | importable functions 90 | as well as executable statements 91 | in a single module. 92 | When importing functions from this module, 93 | it is possible to avoid running other code by placing it under 94 | ``if __name__ == "__main__"``: 95 | 96 | .. literalinclude:: ../code/draw_triangles-v3.py 97 | :caption: draw_triangles.py 98 | 99 | When another module imports the module ``draw_triangles`` above, 100 | the code under ``if __name__ == "__main__"`` is **not** executed. 101 | 102 | How to structure a Python project? 103 | ---------------------------------- 104 | 105 | Let us now imagine we had a lot more code; 106 | for example, a *collection* of functions for: 107 | 108 | * plotting shapes (like ``draw_triangle`` above) 109 | * calculating areas 110 | * geometric transformations 111 | 112 | What are the different ways to organize code for a Python project 113 | that is more than a handful of lines long? 114 | 115 | A single module 116 | ^^^^^^^^^^^^^^^ 117 | 118 | :: 119 | 120 | geometry 121 | └── draw_triangles.py 122 | 123 | One way to organize your code 124 | is to put all of it 125 | in a single ``.py`` file (module) 126 | like ``draw_triangles.py`` above. 127 | 128 | Multiple modules 129 | ^^^^^^^^^^^^^^^^ 130 | 131 | For a small number of functions 132 | the approach above is fine, 133 | and even recommended, 134 | but as the size and/or scope of the project grows, 135 | it may be necessary to 136 | divide up code into different modules, 137 | each containing related data and functionality. 138 | 139 | :: 140 | 141 | geometry 142 | ├── draw_triangles.py 143 | └── graphics.py 144 | 145 | .. literalinclude:: ../code/graphics.py 146 | :caption: graphics.py 147 | 148 | Typically, the "top-level" executable code is 149 | put in a separate script 150 | which imports functions and data from other modules: 151 | 152 | .. code-block:: python 153 | :caption: draw_triangles.py 154 | 155 | import graphics 156 | 157 | graphics.draw_triangle([ 158 | (0.2, 0.2), 159 | (0.2, 0.6), 160 | (0.4, 0.4) 161 | ]) 162 | 163 | graphics.draw_triangle([ 164 | (0.6, 0.8), 165 | (0.8, 0.8), 166 | (0.5, 0.5) 167 | ]) 168 | 169 | graphics.draw_triangle([ 170 | (0.6, 0.1), 171 | (0.7, 0.3), 172 | (0.9, 0.2) 173 | ]) 174 | 175 | Packages 176 | ^^^^^^^^ 177 | 178 | A Python **package** is a directory 179 | containing a file called ``__init__.py``, 180 | which can be empty. 181 | Packages can contain modules 182 | as well as other packages 183 | (sometimes referred to as *sub-packages*). 184 | 185 | For example, ``geometry`` below is a package, 186 | containing various modules: 187 | 188 | :: 189 | 190 | draw_triangles.py 191 | geometry 192 | ├── graphics.py 193 | └── __init__.py 194 | 195 | A module from the package can be imported using the "dot" notation: 196 | 197 | .. code-block:: python 198 | 199 | import geometry.graphics 200 | geometry.graphics.draw_triangle(args) 201 | 202 | It's also possible to import a specific function 203 | from the module: 204 | 205 | .. code-block:: python 206 | 207 | from geometry.graphics import draw_triangle 208 | draw_triangle(args) 209 | 210 | Packages can themselves be imported, 211 | which really just imports the ``__init__.py`` module. 212 | 213 | .. code-block:: python 214 | 215 | import geometry 216 | 217 | If ``__init__.py`` is empty, 218 | there is "nothing" in the imported ``geometry`` package, 219 | and the following line gives an error: 220 | 221 | .. code-block:: python 222 | 223 | geometry.graphics.draw_triangle(args) 224 | 225 | .. code-block:: python 226 | 227 | AttributeError: module 'geometry' has no attribute 'graphics' 228 | 229 | Importing from anywhere 230 | ----------------------- 231 | 232 | sys.path 233 | ^^^^^^^^ 234 | 235 | To improve their reusability, 236 | you typically want to be able to 237 | ``import`` your modules and packages 238 | from anywhere, 239 | i.e., from any directory on your computer. 240 | 241 | One way to do this is to use ``sys.path``: 242 | 243 | .. code-block:: python 244 | 245 | import sys 246 | sys.path.append('/path/to/geometry') 247 | 248 | import graphics 249 | 250 | ``sys.path`` is a list of directories 251 | that Python looks for modules and packages in 252 | when you ``import`` them. 253 | 254 | Installable projects 255 | ^^^^^^^^^^^^^^^^^^^^ 256 | 257 | A better way is to make your project "installable" 258 | using `setuptools `_. 259 | To do this, you will need to 260 | include a ``setup.py`` with your project. 261 | Your project should be organized as follows: 262 | 263 | .. code-block:: bash 264 | 265 | draw_triangles.py 266 | geometry 267 | ├── graphics.py 268 | └── __init__.py 269 | setup.py 270 | 271 | A minimal ``setup.py`` can include the following 272 | 273 | .. code-block:: python 274 | :caption: setup.py 275 | 276 | from setuptools import setup 277 | 278 | setup(name='geometry', 279 | version='0.1', 280 | author='Ashwin Srinath', 281 | packages=['geometry']) 282 | 283 | You can install the package using ``pip`` 284 | with the following command 285 | (run from the same directory as ``setup.py``): 286 | 287 | .. code-block:: bash 288 | 289 | $ pip install -e . --user 290 | 291 | This installs the package in *editable* mode, 292 | creating a link to it in the user's ``site-packages`` directory, 293 | which happens to already be in ``sys.path``. 294 | 295 | Once your project is installed, 296 | you don't need to worry about 297 | adding it manually to ``sys.path`` each time you need to use it. 298 | 299 | It's also easy to *uninstall* a package; 300 | just run the following command from the same directory as ``setup.py``: 301 | 302 | .. code-block:: bash 303 | 304 | $ pip uninstall . 305 | -------------------------------------------------------------------------------- /docs/performance.rst: -------------------------------------------------------------------------------- 1 | Improving the performance of Python programs 2 | ============================================ 3 | 4 | Timing code and identifying bottlenecks 5 | --------------------------------------- 6 | 7 | Of course, 8 | the first step toward improving performance 9 | is to figure out where to focus your efforts. 10 | This means identifying the section of code in your program 11 | that is taking the most time, 12 | i.e., the "bottleneck". 13 | 14 | Sometimes, 15 | the bottleneck is very obvious 16 | (e.g., the training step in a machine learning application), 17 | and sometimes it may not be clear. 18 | In the latter case, 19 | you need to be able to measure the time taken by various parts of your program. 20 | 21 | The ``time`` function 22 | ^^^^^^^^^^^^^^^^^^^^^ 23 | 24 | The `time `_ 25 | function can be used to time a section of code as follows: 26 | 27 | .. code-block:: python 28 | 29 | import time 30 | import numpy as np 31 | 32 | t1 = time.time() 33 | a = np.random.rand(5000, 5000) 34 | t2 = time.time() 35 | print("Generating random array took {} seconds".format(t2-t1)) 36 | 37 | :: 38 | 39 | Generating random array took 0.44880104064941406 seconds 40 | 41 | 42 | ``%timeit`` and ``%%timeit`` 43 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 44 | 45 | ``%timeit%`` and ``%%timeit`` are 46 | `magic statements `_ 47 | that can be used in IPython 48 | or in Jupyter Notebook 49 | for timing a single line of code or a block of code 50 | conveniently: 51 | 52 | :: 53 | 54 | In [1]: import numpy as np 55 | 56 | In [2]: %timeit np.random.rand(5000, 5000) 57 | 410 ms ± 2.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) 58 | 59 | In [3]: %%timeit 60 | ...: a = np.random.rand(5000, 5000) 61 | ...: b = np.random.rand(5000, 5000) 62 | ...: c = a * b 63 | ...: 64 | 897 ms ± 10.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) 65 | 66 | Profilers 67 | ^^^^^^^^^ 68 | 69 | ``time`` and ``timeit`` should help with most of your measurement needs, 70 | but if you need to profile a very long program with lots of functions, 71 | you may benefit from using 72 | a `profiler `_. 73 | 74 | There is also a 75 | `line_profiler `_ 76 | that can help you automatically profile 77 | each line in a script, 78 | and a `memory_profiler `_ 79 | to measure memory consumption. 80 | 81 | Install optimized versions of libraries 82 | --------------------------------------- 83 | 84 | This is the easiest way to get "free" performance improvements. 85 | If your computer supports it, 86 | install optimized version of Python libraries, 87 | for example, those provided by 88 | the `Intel Distribution for Python `_. 89 | 90 | Another option is `PyPy `_. 91 | 92 | Choose the right algorithm 93 | -------------------------- 94 | 95 | This is one of the most effective ways to 96 | improve the performance of a program. 97 | 98 | When choosing a function from a library 99 | or writing your own, 100 | ensure that you understand how it will perform 101 | for the type and size of data you have, 102 | and what options there may be to boost its performance. 103 | Always benchmark to compare with other functions and libraries. 104 | 105 | For example, 106 | if you are doing linear algebra, 107 | you may benefit from the use of 108 | `sparse `_ matrices and algorithms 109 | if you are dealing with very large matrices with relatively few non-zeros. 110 | 111 | As another example, many kinds of algorithms are iterative 112 | and require an initial "guess" for the solution. 113 | Typically, the closer this initial guess is to the actual solution, 114 | the faster the algorithm performs. 115 | 116 | Choose the appropriate data format 117 | ---------------------------------- 118 | 119 | Familiarize yourself with 120 | the various data formats available for the type of data you are dealing with, 121 | and the performance considerations for each. 122 | For example, 123 | `this page `_ 124 | provides a good overview of various data formats for 125 | tabular data supported by the Pandas library. 126 | Performance for each is reported 127 | `here `_. 128 | 129 | Don't reinvent the wheel 130 | ------------------------ 131 | 132 | Resist any temptation 133 | to write your own implementation for a 134 | common task or a well-known algorithm. 135 | Rely instead on other well-tested and well-used implementations. 136 | 137 | For instance, it's easy to write a few lines of Python to 138 | read data from a ``.csv`` file into a Pandas DataFrame: 139 | 140 | .. code-block:: python 141 | :caption: my_csv.py 142 | 143 | def read_csv(fname): 144 | with open(fname) as f: 145 | col_names = f.readline().rstrip().split(',') 146 | df = pandas.DataFrame(columns=col_names) 147 | for line in f: 148 | record = pandas.DataFrame([line.rstrip().split(',')], columns=col_names) 149 | df = df.append(record, ignore_index=True) 150 | return df 151 | 152 | But such code performs poorly. 153 | Compare the performance with Pandas' ``read_csv`` function: 154 | 155 | .. code-block:: python 156 | 157 | In [1]: from my_csv import read_csv 158 | 159 | In [2]: %time data = read_csv('feet.csv') 160 | CPU times: user 2min 3s, sys: 1.39 s, total: 2min 4s 161 | Wall time: 2min 5s 162 | 163 | .. code-block:: python 164 | 165 | In [1]: from pandas import read_csv 166 | 167 | In [2]: %time data = read_csv('feet.csv') 168 | CPU times: user 28.5 ms, sys: 10.8 ms, total: 39.3 ms 169 | Wall time: 54.2 ms 170 | 171 | It also isn't nearly as versatile, 172 | and doesn't account for the dozens of edge cases than Pandas does. 173 | 174 | Benchmark, benchmark, benchmark! 175 | -------------------------------- 176 | 177 | If there are two ways of doing the same thing, 178 | *benchmark* to see which is faster for different problem sizes. 179 | 180 | For example, let's say we want to compute 181 | the average ``hindfooth_length`` for 182 | all species in ``plot_id`` 13 in the following dataset: 183 | 184 | .. code-block:: python 185 | 186 | In [1]: data = pandas.read_csv('feet.csv') 187 | 188 | In [2]: data.head() 189 | Out[2]: 190 | plot_id species_id hindfoot_length 191 | 0 2 NL 32.0 192 | 1 3 NL 33.0 193 | 2 2 DM 37.0 194 | 3 7 DM 36.0 195 | 4 3 DM 35.0 196 | 197 | One way to do this would be to group by the ``plot_id``, 198 | compute the mean hindfoot length for each group, 199 | and extract the result for the group with ``plot_id`` 13: 200 | 201 | .. code-block:: python 202 | 203 | In [2]: data.groupby('plot_id')['hindfoot_length'].mean()[13] 204 | Out[2]: 27.570887035633056 205 | 206 | Another way would be to filter the data first, 207 | keeping only records with ``plot_id`` 13, 208 | and then computing the mean of the ``hindfoot_length`` column: 209 | 210 | .. code-block:: python 211 | 212 | In [3]: data[data['plot_id'] == 13]['hindfoot_length'].mean() 213 | Out[3]: 27.570887035633056 214 | 215 | Both methods give identical results, 216 | but the difference in performance is significant: 217 | 218 | .. code-block:: python 219 | 220 | In [4]: %timeit data.groupby('plot_id')['hindfoot_length'].mean()[13] 221 | 1.34 ms ± 24.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each) 222 | 223 | In [5]: %timeit data[data['plot_id'] == 13]['hindfoot_length'].mean() 224 | 750 µs ± 506 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each) 225 | 226 | Why do you think the first method is slower? 227 | 228 | Avoid explicit loops 229 | -------------------- 230 | 231 | Very often, you need to operate on multiple elements of a collection 232 | such as a 233 | NumPy array or 234 | Pandas DataFrame. 235 | 236 | In such cases, it is almost always a bad idea to write 237 | an explicit ``for`` loop over the elements. 238 | 239 | For instance, 240 | looping over the rows (a.k.a, *indices* or *records*) 241 | of a Pandas DataFrame is considered poor practice, 242 | and is very slow. 243 | Consider replacing values in a column of a dataframe: 244 | 245 | .. code-block:: python 246 | 247 | In [5]: %%timeit 248 | ...: for i in range(len(data['species_id'])): 249 | ...: if data.loc[i, 'species_id'] == 'NL': 250 | ...: data.loc[i, 'species_id'] = 'NZ' 251 | ...: 252 | 308 ms ± 4.49 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) 253 | 254 | A better way to do this is 255 | simply to use the ``replace()`` method: 256 | 257 | .. code-block:: python 258 | 259 | In [2]: %time data['species_id'].replace('NL', 'NZ', inplace=True) 260 | CPU times: user 3.1 ms, sys: 652 µs, total: 3.75 ms 261 | Wall time: 3.34 ms 262 | 263 | In addition to being faster, 264 | this also leads to more readable code. 265 | 266 | Of course, loops are unavoidable in many situations; 267 | but look for alternatives before you write a ``for`` loop 268 | over the elements of an array, DataFrame, or similar data structure. 269 | 270 | Avoid repeatedly allocating, copying and rearranging data 271 | --------------------------------------------------------- 272 | 273 | Repeatedly creating and destroying new data can be very expensive 274 | especially if you are working with very large arrays or data frames. 275 | So avoid, for instance, creating a new array each time inside a loop. 276 | When operating on NumPy arrays, 277 | memory is allocated for intermediate results. 278 | Packages like `numexpr `_ aim to help with this. 279 | 280 | Understand when data needs to be copied v/s when data can be operated "in-place". 281 | It also helps to know *when* copies are made. 282 | For example, do you think 283 | the following code results in two copies of the same array? 284 | 285 | .. code-block:: python 286 | 287 | import numpy as np 288 | 289 | a = np.random.rand(50, 50) 290 | b = a 291 | 292 | `This article `_ 293 | clears up a lot of confusion 294 | about how names and values work in Python 295 | and when copies are made v/s when they are not. 296 | 297 | Access data from memory efficiently 298 | ----------------------------------- 299 | 300 | Accessing data in the "wrong order": 301 | it is always more efficient to access values that are 302 | "closer together" in memory than values that are farther apart. 303 | For example, looping over the elements along the rows of a 2-d NumPy array 304 | is *much* more efficient than looping over the elements along its columns. 305 | Similarly, looping over the columns of a DataFrame in Pandas will be faster 306 | than looping over its rows. 307 | 308 | * Redundant computations / computing "too much": 309 | if you only need to compute on a subset of your data, 310 | filter *before* doing the computation 311 | rather than after. 312 | 313 | 314 | Interfacing with compiled code 315 | ------------------------------ 316 | 317 | You may have heard that Python is "slow" 318 | compared to other languages like C, C++, or Fortran. 319 | This is somewhat true in that Python programs 320 | written in "pure Python", i.e., without the use 321 | of any libraries except the standard libraries, 322 | will be slow compared to their C/Fortran counterparts. 323 | One of the reasons that C is so much faster than Python 324 | is that it is a 325 | `compiled language `_, 326 | while Python is an 327 | `interpreted language `_. 328 | 329 | However, 330 | the core of libraries like NumPy 331 | are actually written in C, 332 | making them much faster than "pure Python". 333 | 334 | It's also possible for you to write your own code 335 | so that it interfaces with languages like C, C++ or Fortran. 336 | Better still, 337 | you often don't even need to write any code in those languages, 338 | and instead can have other libraries "generate" them for you. 339 | 340 | `Numba `_ is a library that lets you compile 341 | code written in Python using 342 | a very convenient "decorator" syntax. 343 | 344 | As an example, 345 | consider numerically evaluating the derivative 346 | of a function using finite differences. 347 | A function that uses NumPy to do this might look like the following: 348 | 349 | .. literalinclude:: ../code/derivatives.py 350 | :language: python 351 | :caption: derivatives.py 352 | 353 | Below, we time the function for a grid of 10000000 points: 354 | 355 | :: 356 | 357 | In [1]: x = np.linspace(0, 1, 10000000) 358 | 359 | In [2]: dx = x[1] - x[0] 360 | 361 | In [3]: f = np.sin(2 * np.pi * x / 1000000) 362 | 363 | In [4]: y = np.zeros_like(f) 364 | 365 | In [5]: %timeit dfdx(f, dx, y) 366 | 61.1 ms ± 2.62 ms per loop (mean ± std. dev. of 7 runs, 10 loops each) 367 | 368 | Below is a function that is compiled using Numba to do the same task: 369 | 370 | .. literalinclude:: ../code/derivatives_numba.py 371 | :language: python 372 | :caption: derivatives.py 373 | 374 | We see much better performance for the same grid size: 375 | 376 | :: 377 | 378 | In [1]: %timeit dfdx(f, dx, y) 379 | 14.6 ms ± 282 µs per loop (mean ± std. dev. of 7 runs, 1 loop each) 380 | 381 | `Cython `_ is another option for interfacing with compiled code. 382 | It performs about the same as Numba but requires much more effort; 383 | although it can do many things that Numba cannot, 384 | such as generating C code, and 385 | interface with C/C++ libraries. 386 | 387 | Parallelization 388 | --------------- 389 | 390 | Finally, 391 | if your computer has multiple cores, 392 | or if you have access to a bigger computer (e.g., a high-performance computing cluster), 393 | parallelizing your code may be an option. 394 | 395 | * Note that many libraries support parallelization without any effort on your part. 396 | Libraries like Numba and `Tensorflow `_ 397 | can use all the cores on your CPU, 398 | and even your GPU for accelerating computations. 399 | 400 | * `Dask `_ is a great library for 401 | parallelizing computations 402 | and operating on large datasets that don't fit in RAM. 403 | 404 | * The `multiprocessing `_ package 405 | is useful when you have several independent tasks that can all be done concurrently. 406 | `joblib `_ is another popular library for this. 407 | 408 | -------------------------------------------------------------------------------- /docs/testing.rst: -------------------------------------------------------------------------------- 1 | Testing your code 2 | ================= 3 | 4 | .. note:: 5 | 6 | This section is based heavily on Ned Batchelder's 7 | excellent article and PyCon 2014 talk 8 | `Getting Started Testing `_. 9 | 10 | | *Tests are the dental floss of development: everyone knows they should do it more,* 11 | | *but they don’t, and they feel guilty about it.* 12 | | - Ned Batchelder 13 | 14 | | *Code without tests should be approached with a 10-foot pole.* 15 | | - me 16 | 17 | How can you write 18 | modular, extensible, and reusable code? 19 | 20 | After making changes to a program, 21 | how do you ensure that 22 | it will still give the same answers as before? 23 | 24 | How can we make finding and fixing bugs 25 | an easy, fun and rewarding experience? 26 | 27 | These seemingly unrelated questions all 28 | have the same answer, 29 | and it is 30 | **automated testing**. 31 | 32 | Testing by example: ``flip_string`` 33 | ----------------------------------- 34 | 35 | Here is a function called ``flip_string`` that flips (reverses) a string. 36 | There are bug(s) in this function that we need to find and fix. 37 | Test the function for 38 | various inputs and compare the results obtained with expected output. 39 | 40 | .. literalinclude:: ../code/flip_string.py 41 | :caption: flip_string.py 42 | 43 | * What tests did you come up with? Why did you choose those tests? 44 | * How did you organize and execute your tests? 45 | * Can the results of your tests help you figure out what problem(s) 46 | there might be with the code? 47 | 48 | Testing interactively 49 | ^^^^^^^^^^^^^^^^^^^^^ 50 | 51 | This is the most common type of testing, 52 | and something you have probably done before. 53 | To test a function or a line of code, 54 | you simply fire up an interactive Python interpreter, 55 | import the function, 56 | and test away: 57 | 58 | .. code-block:: python 59 | 60 | >>> from flip_string import flip_string 61 | >>> flip_string('mario') 62 | 'oiram' 63 | >>> flip_string('luigi') 64 | 'igiul' 65 | 66 | While this kind of testing is better than not doing any testing at all, 67 | it leaves much to be desired. 68 | First, 69 | it needs to be done 70 | each time ``flip_string`` is changed. 71 | It also requires that we manually inspect the output from each test to 72 | decide if the code "passes" or "fails" that test. 73 | Further, 74 | we need to remember all the tests came up with today 75 | if we want to test again tomorrow. 76 | 77 | Writing a test script 78 | ^^^^^^^^^^^^^^^^^^^^^ 79 | 80 | A *much* better way to write tests is to put them in a script: 81 | 82 | .. literalinclude:: ../code/test_flip_string-v1.py 83 | :caption: test_flip_string.py 84 | 85 | Now, running and re-running our tests is very easy - we just run the script: 86 | 87 | .. code-block:: bash 88 | 89 | $ python test_flip_string.py 90 | mario flipped is: oiram 91 | luigi flipped is: igiul 92 | 93 | It's also easy to add new tests, 94 | and there's no need to remember 95 | all the tests we come up with. 96 | 97 | Testing with assertions 98 | ^^^^^^^^^^^^^^^^^^^^^^^ 99 | 100 | One problem with the method above is that 101 | we *still* need to manually inspect the results of our tests. 102 | 103 | Assertions can help with this. 104 | 105 | The ``assert`` statement in Python is very simple: 106 | Given a condition, like ``1 == 2``, 107 | it checks to see if the condition is true or false. 108 | If it is true, then ``assert`` does nothing, 109 | and if it false, it raises an ``AssertionError``: 110 | 111 | .. code-block:: python 112 | 113 | >>> assert 1 == 1 114 | >>> assert 1 < 2 115 | >>> assert 1 > 2 116 | Traceback (most recent call last): 117 | File "", line 1, in 118 | AssertionError 119 | 120 | We can re-write our script ``test_flip_string.py`` 121 | using assertions as follows: 122 | 123 | .. literalinclude:: ../code/test_flip_string-v2.py 124 | :caption: test_flip_string.py 125 | 126 | And we still run our tests the same way: 127 | 128 | .. code-block:: bash 129 | 130 | $ python test_flip_string.py 131 | 132 | This time, there's no need to inspect the test results. 133 | If we get an ``AssertionError``, then we had a test fail, 134 | and if not, all our tests passed. 135 | 136 | However, there's no way to know if *more* than one test failed. 137 | The script stops executing after the first ``AssertionError`` is encountered. 138 | 139 | Let's add another test to our test script and re-run it: 140 | 141 | .. literalinclude:: ../code/test_flip_string-v3.py 142 | :caption: test_flip_string.py 143 | 144 | .. code-block:: bash 145 | 146 | $ python test_flip_string.py 147 | 148 | Traceback (most recent call last): 149 | File "test_flip_string.py", line 5, in 150 | assert flip_string('samus') == 'sumas' 151 | AssertionError 152 | 153 | This time we get a failed test, 154 | because - as we said - our code has bugs in it. 155 | Before adding more tests to investigate further, 156 | we'll discuss one more method for running tests. 157 | 158 | Using a test runner 159 | ^^^^^^^^^^^^^^^^^^^ 160 | 161 | A test runner takes a bunch of tests, 162 | executes them all, 163 | and then reports which of them passed 164 | and which of them failed. 165 | 166 | A very popular test runner for Python is 167 | `pytest `_. 168 | 169 | To run our tests using pytest, 170 | we need to re-write them as follows 171 | (essentially, wrap each test in a function): 172 | 173 | .. code-block:: python 174 | :caption: test_flip_string.py 175 | 176 | from flip_string import flip_string 177 | 178 | def test_flip_mario(): 179 | assert flip_string('mario') == 'oiram' 180 | 181 | def test_flip_luigi(): 182 | assert flip_string('luigi') = 'igiul' 183 | 184 | def test_flip_samus(): 185 | assert flip_string('samus') == 'sumas' 186 | 187 | To run our tests, 188 | we simply type ``pytest`` on the command line. 189 | When we do this, pytest will 190 | look for all files containing tests, 191 | run all the tests in those files, 192 | and report what it found: 193 | 194 | .. code-block:: bash 195 | 196 | $ pytest 197 | 198 | collected 3 items 199 | 200 | test_flip_string.py ..F [100%] 201 | 202 | =================================== FAILURES =================================== 203 | _______________________________ test_flip_samus ________________________________ 204 | 205 | def test_flip_samus(): 206 | > assert flip_string('samus') == 'sumas' 207 | E AssertionError: assert 's' == 'sumas' 208 | E - s 209 | E + sumas 210 | 211 | test_flip_string.py:10: AssertionError 212 | ====================== 1 failed, 2 passed in 0.07 seconds ====================== 213 | 214 | As you can see above, 215 | pytest prints a lot of useful information in its report. 216 | First, 217 | it prints a summary of passed v/s failed tests: 218 | 219 | :: 220 | 221 | test_flip_string.py ..F [100%] 222 | 223 | A dot (``.``) indicates a passed test, 224 | while a ``F`` indicates a failed test. 225 | 226 | For each failed test, 227 | it provides further information, 228 | including the 229 | expected value as well as the obtained value 230 | in the failed assertion: 231 | 232 | :: 233 | 234 | =================================== FAILURES =================================== 235 | _______________________________ test_flip_samus ________________________________ 236 | 237 | def test_flip_samus(): 238 | > assert flip_string('samus') == 'sumas' 239 | E AssertionError: assert 's' == 'sumas' 240 | E - s 241 | E + sumas 242 | 243 | test_flip_string.py:10: AssertionError 244 | 245 | Useful tests 246 | ^^^^^^^^^^^^ 247 | 248 | Now that we know how to write and run tests, 249 | what kind of tests should we write? 250 | Testing ``flip_string`` for arbitrary words like ``'mario'`` and ``'luigi'`` 251 | might not tell us much about where the problem might be. 252 | 253 | Instead, we should choose tests that exercise specific functionality 254 | of the code we are testing, 255 | or represent different conditions that the code may be exposed to. 256 | 257 | Here are some examples of more useful tests: 258 | 259 | * Flipping a string with a single character (no work needs to be done) 260 | * Flipping a string with two characters (minmum amount of work needs to be done) 261 | * Flipping a string that reads the same forwards and backwards 262 | 263 | .. literalinclude:: ../code/test_flip_string-v5.py 264 | :caption: test_flip_string.py 265 | 266 | :: 267 | 268 | collected 3 items 269 | 270 | test_flip_string-v5.py ..F [100%] 271 | 272 | =================================== FAILURES =================================== 273 | _____________________________ test_flip_palindrome _____________________________ 274 | 275 | def test_flip_palindrome(): 276 | > assert flip_string('aba') == 'aba' 277 | E AssertionError: assert 'a' == 'aba' 278 | E - a 279 | E + aba 280 | 281 | test_flip_string.py:10: AssertionError 282 | ====================== 2 failed, 1 passed in 0.08 seconds ====================== 283 | 284 | Fixing the code 285 | ^^^^^^^^^^^^^^^ 286 | 287 | From the test results above, we see that ``flip_string`` failed 288 | for the input ``'aba'``. 289 | Now, can you trace the execution of the code 290 | in the function ``flip_string`` for this input 291 | and figure out why it returned ``a``? 292 | 293 | After fixing the code, 294 | re-run the tests to make sure you didn't break anything else 295 | in the process of fixing this bug -- 296 | this is one of the reasons tests are so valuable! 297 | 298 | Types of testing 299 | ---------------- 300 | 301 | Software testing is a vast topic 302 | and there are 303 | `many levels and types `_ 304 | of software testing. 305 | 306 | For scientific and research software, 307 | the focus of testing efforts is primarily: 308 | 309 | 1. **Unit tests**: Unit tests aim to test small, independent sections of code 310 | (a function or parts of a function), 311 | so that when a test fails, 312 | the failure can easily be associated with that section of code. 313 | This is the kind of testing that we have been doing so far. 314 | 315 | 2. **Regression tests**: Regression tests aim to check whether 316 | changes to the program result in it producing 317 | different results from before. 318 | Regression tests can test 319 | larger sections of code 320 | than unit tests. 321 | As an example, if you are writing a machine learning application, 322 | you may want to run your model on small data 323 | in an automated way 324 | each time your software undergoes changes, 325 | and make sure that the same (or a better) result is produced. 326 | 327 | Test-driven development 328 | ----------------------- 329 | 330 | `Test-driven development (TDD) `_ 331 | is the practice of writing tests for a function or method 332 | *before* actually writing any code for that function or method. 333 | The TDD process is to: 334 | 335 | 1. Write a test for a function or method 336 | 2. Write just enough code that the function or method passes that test 337 | 3. Ensure that all tests written so far pass 338 | 4. Repeat the above steps until you are satisfied with the code 339 | 340 | Proponents of TDD suggest that this results in better code. 341 | Whether or not TDD sounds appealing to you, 342 | writing tests should be *part* of your development process, 343 | and never an afterthought. 344 | In the process of writing tests, 345 | you often come up with new corner cases for your code, 346 | and realize better ways to organize it. 347 | The result is usually code that is 348 | more modular, 349 | more reusable 350 | and of course, 351 | more testable, 352 | than if you didn't do any testing. 353 | 354 | Growing a useful test suite 355 | --------------------------- 356 | 357 | More tests are always better than less, 358 | and your code should have as many tests as you are willing to write. 359 | That being said, 360 | some tests are more useful than others. 361 | Designing a useful suite of tests is a challenge in itself, 362 | and it helps to keep the following in mind when growing tests: 363 | 364 | 1. **Tests should run quickly**: testing is meant to be done as often as possible. 365 | Your entire test suite should complete in no more than a few seconds, 366 | otherwise you won't run your tests often enough for them to be useful. 367 | Always test your functions or algorithms on very small and simple data; 368 | even if in practice they will be dealing with more complex and large datasets. 369 | 370 | 2. **Tests should be focused**: each test should exercise a small part of your code. 371 | When a test fails, 372 | it should be easy for you to 373 | figure out which part of your program you need to focus debugging efforts on. 374 | This can be difficult if your code isn't modular, 375 | i.e., if different parts of your code depend heavily on each other. 376 | This is one of the reasons TDD is said to produce more modular code. 377 | 378 | 3. **Tests should cover all possible code paths**: if your function has multiple code paths 379 | (e.g., an if-else statement), 380 | write tests that execute both the "if" part 381 | and the "else" part. 382 | Otherwise, you might have bugs in your code and still have all tests pass. 383 | 384 | 4. **Test data should include difficult and edge cases**: it's easy to 385 | write code that only handles cases with well-defined inputs and outputs. 386 | In practice however, your code may have to deal with 387 | input data for which it isn't clear what the behaviour should be. 388 | For example, what should ``flip_string('')`` return? 389 | Make sure you write tests for such cases, 390 | so that you force your code to handle them. 391 | -------------------------------------------------------------------------------- /docs/usability.rst: -------------------------------------------------------------------------------- 1 | Improving the usability of Python programs 2 | ========================================== 3 | 4 | Logging 5 | ------- 6 | 7 | It can be useful to print out either a message or the value of some variable, 8 | etc., while your code is running. This is quite common and is usually 9 | accomplished with a simple call to the ``print`` function. 10 | 11 | .. code-block:: python 12 | 13 | x = 1.234 14 | print("The value of x is {0:0.4f}.".format(x)) 15 | 16 | .. code-block:: none 17 | 18 | The value of x is 1.2340. 19 | 20 | Doing this is a good idea to keep track of milestones in your code. That way, 21 | both when you are developing your code but also when other users are running 22 | the code, they can be notified of an event, progress, or value. 23 | 24 | Printing a message is also useful for notifying the user when something is not 25 | going as expected. These are all different *levels* of messaging. 26 | 27 | *Logging* is simply engaging in this behavior of printing out messages, with 28 | the added feature that you include meta data (e.g., a timestamp, the message 29 | category) with the message, as well as a filter where only messages with a high 30 | enough level of criticality are actually allowed to be printed. 31 | 32 | 33 | Logging Basics 34 | ~~~~~~~~~~~~~~ 35 | 36 | 37 | The general idea is that there are multiple levels of messages that can be 38 | printed. Typically these include: 39 | 40 | 1. DEBUG - diagnostic purposes. 41 | 2. INFO - basic information (most common). 42 | 3. WARNING - indicating non-normal behavior. 43 | 4. ERROR - error (the operation cannot continue). 44 | 5. CRITICAL - error (the program cannot continue). 45 | 46 | During the initialization portion of your code, you would configure a *logger* 47 | object with a format, where to print messages (e.g., console, file, or both), 48 | and what level to use by default. Usually, you would set the default log level 49 | to ``INFO`` and the debugging messages used for diagnostics would not actually 50 | be printed. Then, allow the user to override this with a `command line argument 51 | <#command-line-arguments>`_ (e.g., ``--debug``). 52 | 53 | 54 | Example Setup 55 | ~~~~~~~~~~~~~ 56 | 57 | Python has a `logging `_ module 58 | as part of the standard library. It is very comprehensive and allows the user 59 | to heavily customize many parts of the behavior. It is pretty strait forward to 60 | implement your own logging functionality; unless you're doing something special 61 | why not use the standard library? 62 | 63 | .. code-block:: python 64 | 65 | import logging 66 | 67 | log = logging.getLogger("ProjectName") 68 | 69 | file_handler = logging.FileHandler("path/for/output.log") 70 | console_handler = logging.StreamHandler() 71 | 72 | formatter = logging.Formatter("%(levelname)s %(asctime)s %(name)s - %(message)s") 73 | file_handler.setFormatter(formatter) 74 | console_handler.setFormatter(formatter) 75 | 76 | log.addHandler(file_handler) 77 | log.addHandler(console_handler) 78 | log.setLevel(logging.INFO) 79 | 80 | Then, somewhere in the code: 81 | 82 | .. code-block:: python 83 | 84 | log.debug("report on some variable") 85 | log.info("notification of milestone") 86 | log.warn("non-standard behavior") 87 | log.error("unrecoverable issue") 88 | log.critical("panic!") 89 | 90 | .. code-block:: none 91 | 92 | INFO 2018-07-24 09:41:56,683 ProjectName - notification of milestone 93 | WARNING 2018-07-24 09:41:56,835 ProjectName - non-standard behavior 94 | ERROR 2018-07-24 09:41:57,103 ProjectName - unrecoverable issue 95 | CRITICAL 2018-07-24 09:41:57,103 ProjectName - panic! 96 | 97 | Notice that the debug message was not printed. This is because we set the log 98 | level to ``INFO``. Only messages with a level equal to or higher then the 99 | assigned level will make it passed the filter. 100 | 101 | 102 | Logging with Color 103 | ~~~~~~~~~~~~~~~~~~ 104 | 105 | Finally, another common feature of logging is to add color as an indicator of 106 | the message type. Obviously, this only applies to messages that are printed to 107 | the console. If you've ever started up a *Jupyter* notebook server you might 108 | have noticed the logging messages it puts out a similar format as used here and 109 | the meta data is a bold color. The color codes are generally as follows: 110 | 111 | - DEBUG (blue) 112 | - INFO (green) 113 | - WARNING (orange or yellow) 114 | - ERROR (red) 115 | - CRITICAL (purple) 116 | 117 | 118 | Command Line Arguments 119 | ---------------------- 120 | 121 | In addition to packaging your code in a way that other users or projects can 122 | import for use in their code, often it makes sense to also make elements of the 123 | code executable from the command line as stand alone scripts. Python has 124 | everything you need to do this built right in. 125 | 126 | As with logging, there are several python packages available that 127 | handle command line argument parsing for you, including a robust 128 | implementation provided right in the standard library - `argparse 129 | `_. 130 | 131 | The *argparse* module, as well as the others, rely on a universally excepted 132 | convention for how command line arguments should be structured. Nearly all of 133 | the standard utilities on Unix/Linux systems use this same syntax. This 134 | convention covers both the command line argument syntax as well as the structure 135 | of *usage* statements that your script prints out (e.g., when supplying the 136 | ``--help`` option). The *argparse* module actually takes care of all of this 137 | for you. 138 | 139 | Unix Convention 140 | ~~~~~~~~~~~~~~~ 141 | 142 | There is a fair bit of complexity to the convention surrounding the *usage* 143 | statements, but the argument syntax is fairly simple. 144 | 145 | *Positional arguments* are those that don't have names. These are usually file 146 | paths in the context of analysis scripts. *Optional arguments* are those that 147 | have defaults and may or may not accept a value. 148 | 149 | Optional arguments can be specified with *short form* or *long form* names 150 | (usually both). The short form names are a single letter preceded by a single 151 | dash (e.g., ``-a``). Short form options that don't take an argument can be 152 | stacked (e.g., ``-abc``). Long form arguments are whole words and preceded by 153 | two dashes (e.g., ``--debug``). Long form arguments that are multiple words are 154 | usually joined with dashes (e.g., ``--output-directory``). 155 | 156 | There is more, but these are the basics. 157 | 158 | Simple Example 159 | ~~~~~~~~~~~~~~ 160 | 161 | The best (most robust and cross-platform) way of providing a stand along script 162 | with your package is to let your `setup.py` file handle it. Doing the following 163 | will create the proper executable on both Windows and Unix systems and put it 164 | in a place that is readily callable (i.e., on the user's `PATH`). 165 | 166 | .. code-block:: python 167 | 168 | # setup.py 169 | 170 | # use "entry_points" to point to function and setuptools 171 | # will create executables on your behalf. 172 | setup( 173 | # ... 174 | # syntax: "{name}={package}.{module}:{function}" 175 | # "{name}" will be on your PATH in the same "/bin/" 176 | # alongside python/pip executables. 177 | entry_points = {"console_scripts": [ 178 | "do_science=my_package.do_science:main", 179 | ]}, 180 | # ... 181 | ) 182 | 183 | This says that I have a file, ``my_package/do_science.py``, with a function 184 | called ``main`` that when called does the thing I want the script to do. The 185 | function won't be given any arguments, but we can get what we need from 186 | ``sys.argv``. This has the effect of creating an executable we can invoke with 187 | the name ``do_science`` that behaves equivalent to the following. 188 | 189 | .. code-block:: python 190 | 191 | import sys 192 | from my_package.do_science import main 193 | sys.exit(main()) 194 | 195 | With this in mind, your function can and should return integer values which 196 | will be used as the exit status of the command. This is another Unix 197 | convention; returning zero is for success, returning a non-zero status 198 | indicates some specific error has occurred. 199 | 200 | The following shows a basic usage of ``argparse`` and how to define your "main" 201 | function. 202 | 203 | .. code-block:: python 204 | 205 | # do_science.py 206 | # script for doing cool science things 207 | 208 | import argparse 209 | 210 | parser = argparse.ArgumentParser(prog="do_science", 211 | description="do cool science thing") 212 | 213 | # positional argument 214 | parser.add_argument("input_file", help="path to input data file") 215 | 216 | # optional argument 217 | parser.add_argument("-d", "--debug", action="store_true", 218 | help="enable debugging messages") 219 | 220 | def main() -> int: 221 | """Main entry point for `do_science`. 222 | 223 | Returns: 224 | exit_status: int 225 | 0 if success, non-zero otherwise. 226 | """ 227 | 228 | # parse_args() automatically grabs sys.argv if you don't provide them. 229 | opts = parser.parse_args() 230 | # opts is a namespace 231 | # opts.input_file is a string with the value from the command line 232 | # opts.debug is True or False (default is False w/ "store_true") 233 | return 0 234 | 235 | 236 | After the package is installed, ``pip install my_package ...``, you'll be able to 237 | call the script: 238 | 239 | .. code-block:: none 240 | 241 | > do_science 242 | usage: do_science [-h] [-d] input_file 243 | 244 | .. code-block:: none 245 | 246 | > do_science --help 247 | usage: do_science [-h] [-d] input_file 248 | 249 | do cool science thing 250 | 251 | positional arguments: 252 | input_file path to input data file 253 | 254 | optional arguments: 255 | -h, --help show this help message and exit 256 | -d, --debug enable debugging messages 257 | 258 | 259 | -------------------------------------------------------------------------------- /scripts/make-triangles.py: -------------------------------------------------------------------------------- 1 | """ 2 | This script generates a plot of 3 | some triangles. 4 | """ 5 | 6 | import matplotlib.pyplot as plt 7 | 8 | def plot_triangle(points, ax=None): 9 | if ax is None: 10 | ax = plt.gca() 11 | else: 12 | fig, ax = plt.subplots() 13 | ax.set_xlabel('x') 14 | ax.set_ylabel('y') 15 | 16 | patch = plt.Polygon(points) 17 | ax.add_patch(patch) 18 | 19 | for pt in points: 20 | x, y = pt 21 | ax.text(x, y, '({}, {})'.format(x, y)) 22 | 23 | if __name__ == "__main__": 24 | 25 | plot_triangle([ 26 | (0.2, 0.2), 27 | (0.2, 0.6), 28 | (0.4, 0.4) 29 | ]) 30 | 31 | plot_triangle([ 32 | (0.6, 0.8), 33 | (0.8, 0.8), 34 | (0.5, 0.5) 35 | ]) 36 | 37 | plot_triangle([ 38 | (0.6, 0.1), 39 | (0.7, 0.3), 40 | (0.9, 0.2) 41 | ]) 42 | 43 | plt.savefig('../docs/images/triangle.png', transparent=True) 44 | --------------------------------------------------------------------------------