├── .gitignore
├── LICENSE
├── README.md
├── code
    ├── derivatives.py
    ├── derivatives_numba.py
    ├── draw_triangles-v1.py
    ├── draw_triangles-v2.py
    ├── draw_triangles-v3.py
    ├── flip_list-v1.py
    ├── flip_list-v2.py
    ├── flip_string.py
    ├── graphics.py
    ├── my_csv.py
    ├── test_flip_string-v1.py
    ├── test_flip_string-v2.py
    ├── test_flip_string-v3.py
    ├── test_flip_string-v4.py
    ├── test_flip_string-v5.py
    └── test_strflip.py
├── data
    ├── feet.csv
    ├── images
    │   ├── 00.txt
    │   ├── 01.txt
    │   ├── 02.txt
    │   ├── 03.txt
    │   ├── 04.txt
    │   ├── 05.txt
    │   ├── 06.txt
    │   ├── 07.txt
    │   ├── 08.txt
    │   ├── 09.txt
    │   ├── 10.txt
    │   ├── 11.txt
    │   ├── 12.txt
    │   ├── 13.txt
    │   ├── 14.txt
    │   ├── 15.txt
    │   ├── 16.txt
    │   ├── 17.txt
    │   ├── 18.txt
    │   ├── 19.txt
    │   ├── 20.txt
    │   ├── 21.txt
    │   ├── 22.txt
    │   ├── 23.txt
    │   ├── 24.txt
    │   ├── 25.txt
    │   ├── 26.txt
    │   ├── 27.txt
    │   ├── 28.txt
    │   ├── 29.txt
    │   ├── 30.txt
    │   ├── 31.txt
    │   ├── 32.txt
    │   ├── 33.txt
    │   ├── 34.txt
    │   ├── 35.txt
    │   ├── 36.txt
    │   ├── 37.txt
    │   ├── 38.txt
    │   ├── 39.txt
    │   ├── 40.txt
    │   ├── 41.txt
    │   ├── 42.txt
    │   ├── 43.txt
    │   ├── 44.txt
    │   ├── 45.txt
    │   ├── 46.txt
    │   ├── 47.txt
    │   ├── 48.txt
    │   └── 49.txt
    └── small.csv
├── docs
    ├── Makefile
    ├── conf.py
    ├── documenting.rst
    ├── images
    │   └── triangle.png
    ├── index.rst
    ├── packaging.rst
    ├── performance.rst
    ├── testing.rst
    └── usability.rst
└── scripts
    └── make-triangles.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | _build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 
110 | ### Python Patch ###
111 | .venv/
112 | 
113 | 
114 | # End of https://www.gitignore.io/api/python
115 | 
116 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Ashwin Srinath
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Python 102 for Scientific Computing and Data Analysis
2 | -----------------------------------------------------
3 | 
4 | <https://python-102.readthedocs.io>
5 | 
6 | [![Documentation Status](https://readthedocs.org/projects/python-102/badge/?version=latest)](https://python-102.readthedocs.io/en/latest/?badge=latest)
7 | 


--------------------------------------------------------------------------------
/code/derivatives.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | 
3 | def dfdx(f, dx, y):
4 |    y[1:-1] = (f[2:] - f[:-2]) / (2*dx)
5 |    y[0] = (f[1] - f[0]) / dx
6 |    y[1] = (f[-2] - f[-1]) / dx
7 |    return y
8 | 


--------------------------------------------------------------------------------
/code/derivatives_numba.py:
--------------------------------------------------------------------------------
 1 | from numba import jit, prange
 2 | 
 3 | @jit(parallel=True, nopython=True)
 4 | def dfdx(f, dx, y):
 5 |     for i in prange(1, len(y)-1):
 6 |         y[i] = (f[i+1] - f[i-1]) / 2*dx
 7 |     y[0] = (f[1] - f[0]) / dx
 8 |     y[-1] = (f[-1] - f[-2]) / dx
 9 |     return y
10 | 


--------------------------------------------------------------------------------
/code/draw_triangles-v1.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | fig, ax = plt.subplots()
 4 | 
 5 | ax.set_xlabel('x')
 6 | ax.set_ylabel('y')
 7 | 
 8 | patch = plt.Polygon([
 9 |     (0.2, 0.2),
10 |     (0.2, 0.6),
11 |     (0.4, 0.4)
12 | ])
13 | 
14 | ax.add_patch(patch)
15 | 
16 | ax.text(0.2, 0.4, '(0.2, 0.4)')
17 | ax.text(0.2, 0.6, '(0.2, 0.6)')
18 | ax.text(0.2, 0.4, '(0.2, 0.4)')
19 | 
20 | patch = plt.Polygon([
21 |     (0.6, 0.8),
22 |     (0.8, 0.8),
23 |     (0.5, 0.5)
24 | ])
25 | 
26 | ax.add_patch(patch)
27 | 
28 | ax.text(0.6, 0.8, '(0.6, 0.8)')
29 | ax.text(0.8, 0.8, '(0.8, 0.8)')
30 | ax.text(0.5, 0.5, '(0.5, 0.5)')
31 | 
32 | patch = plt.Polygon([
33 |     (0.6, 0.1),
34 |     (0.7, 0.3),
35 |     (0.9, 0.2)
36 | ])
37 | 
38 | ax.add_patch(patch)
39 | 
40 | ax.text(0.6, 0.1, '(0.6, 0.1)')
41 | ax.text(0.7, 0.3, '(0.7, 0.3)')
42 | ax.text(0.9, 0.2, '(0.9, 0.2)')
43 | 
44 | plt.show()
45 | 


--------------------------------------------------------------------------------
/code/draw_triangles-v2.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | def draw_triangle(points, ax=None):
 4 |     if ax is None:
 5 |         ax = plt.gca()
 6 |     else:
 7 |         fig, ax = plt.subplots()
 8 |         ax.set_xlabel('x')
 9 |         ax.set_ylabel('y')
10 | 
11 |     patch = plt.Polygon(points)
12 |     ax.add_patch(patch)
13 | 
14 |     for pt in points:
15 |         x, y = pt
16 |         ax.text(x, y, '({}, {})'.format(x, y))
17 | 
18 | draw_triangle([
19 |     (0.2, 0.2),
20 |     (0.2, 0.6),
21 |     (0.4, 0.4)
22 | ])
23 | 
24 | draw_triangle([
25 |     (0.6, 0.8),
26 |     (0.8, 0.8),
27 |     (0.5, 0.5)
28 | ])
29 | 
30 | draw_triangle([
31 |     (0.6, 0.1),
32 |     (0.7, 0.3),
33 |     (0.9, 0.2)
34 | ])
35 | 
36 | plt.show()
37 | 


--------------------------------------------------------------------------------
/code/draw_triangles-v3.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | def draw_triangle(points, ax=None):
 4 |     if ax is None:
 5 |         ax = plt.gca()
 6 |     else:
 7 |         fig, ax = plt.subplots()
 8 |         ax.set_xlabel('x')
 9 |         ax.set_ylabel('y')
10 | 
11 |     patch = plt.Polygon(points)
12 |     ax.add_patch(patch)
13 | 
14 |     for pt in points:
15 |         x, y = pt
16 |         ax.text(x, y, '({}, {})'.format(x, y))
17 | 
18 | if __name__ == "__main__":
19 | 
20 |     draw_triangle([
21 |         (0.2, 0.2),
22 |         (0.2, 0.6),
23 |         (0.4, 0.4)
24 |     ])
25 | 
26 |     draw_triangle([
27 |         (0.6, 0.8),
28 |         (0.8, 0.8),
29 |         (0.5, 0.5)
30 |     ])
31 | 
32 |     draw_triangle([
33 |         (0.6, 0.1),
34 |         (0.7, 0.3),
35 |         (0.9, 0.2)
36 |     ])
37 | 
38 |     plt.show()
39 | 


--------------------------------------------------------------------------------
/code/flip_list-v1.py:
--------------------------------------------------------------------------------
 1 | def flip_list(a, inplace=False):
 2 |     """
 3 |     Flip (reverse) a list.
 4 | 
 5 |     Parameters
 6 |     ----------
 7 |     a : list 
 8 |         List to be reversed.
 9 |     inplace : bool, optional
10 |         Specifies whether to flip the list "in place",
11 |         or return a new list (default).
12 | 
13 |     Returns
14 |     -------
15 |     flipped : list (or None)
16 |         The flipped list. If `inplace=True`, None is returned.
17 |     """
18 |     if inplace is True:
19 |         a[:] = a[::-1]
20 |         return None
21 |     else:
22 |         return a[::-1]
23 | 


--------------------------------------------------------------------------------
/code/flip_list-v2.py:
--------------------------------------------------------------------------------
 1 | def flip_list(a, inplace=False):
 2 |     """
 3 |     Flip (reverse) a list.
 4 | 
 5 |     Parameters
 6 |     ----------
 7 |     a : list 
 8 |         List to be reversed.
 9 |     inplace : bool, optional
10 |         Specifies whether to flip the list "in place",
11 |         or return a new list (default).
12 | 
13 |     Returns
14 |     -------
15 |     flipped : list (or None)
16 |         The flipped list. If `inplace=True`, None is returned.
17 | 
18 | 
19 |     >>> flip_list([1, 2, 3])
20 |     [3, 2, 1]
21 | 
22 |     >>> a = [1, 2, 3]
23 |     >>> flip_list(a, inplace=True)
24 |     >>> a
25 |     [3, 2, 1]
26 |     """
27 |     if inplace is True:
28 |         a[:] = a[::-1]
29 |         return None
30 |     else:
31 |         return a[::-1]
32 | 


--------------------------------------------------------------------------------
/code/flip_string.py:
--------------------------------------------------------------------------------
 1 | def flip_string(s):
 2 |     """
 3 |     flip_string: Flip a string
 4 |  
 5 |     Parameters
 6 |     ----------
 7 |     s : str
 8 |         String to reverse
 9 |  
10 |     Returns
11 |     -------
12 |     flipped : str
13 |         Copy of `s` with characters arranged in reverse order
14 |     """
15 |  
16 |     flipped = ''
17 |  
18 |     # Starting from the last character in `s`,
19 |     # add the character to `flipped`,
20 |     # and proceed to the previous character in `s`.
21 |     # Stop whenever we reach the first character.
22 |  
23 |     i = len(s)
24 |  
25 |     while True:
26 |         i = i-1
27 |         char = s[i]
28 |         flipped = flipped + char
29 |  
30 |         # stop if we have reached the first character:
31 |         if char == s[0]:
32 |            break
33 |  
34 |     return flipped
35 | 


--------------------------------------------------------------------------------
/code/graphics.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | 
 3 | def draw_triangle(points, ax=None):
 4 |     if ax is None:
 5 |         ax = plt.gca()
 6 |     else:
 7 |         fig, ax = plt.subplots()
 8 |         ax.set_xlabel('x')
 9 |         ax.set_ylabel('y')
10 | 
11 |     patch = plt.Polygon(points)
12 |     ax.add_patch(patch)
13 | 
14 |     for pt in points:
15 |         x, y = pt
16 |         ax.text(x, y, '({}, {})'.format(x, y))
17 | 


--------------------------------------------------------------------------------
/code/my_csv.py:
--------------------------------------------------------------------------------
 1 | import pandas
 2 | 
 3 | def read_csv(fname):
 4 |     with open(fname) as f:
 5 |         col_names = f.readline().rstrip().split(',')
 6 |         df = pandas.DataFrame(columns=col_names)
 7 |         for line in f:
 8 |             record = pandas.DataFrame([line.rstrip().split(',')], columns=col_names)
 9 |             df = df.append(record, ignore_index=True)
10 |     return df
11 | 


--------------------------------------------------------------------------------
/code/test_flip_string-v1.py:
--------------------------------------------------------------------------------
1 | from flip_string import flip_string
2 | 
3 | flipped = flip_string("mario")
4 | print("mario flipped is:", flipped)
5 | 
6 | flipped = flip_string("luigi")
7 | print("luigi flipped is:", flipped)
8 | 


--------------------------------------------------------------------------------
/code/test_flip_string-v2.py:
--------------------------------------------------------------------------------
1 | from flip_string import flip_string
2 | 
3 | assert flip_string('mario') == 'oiram'
4 | assert flip_string('luigi') == 'igiul'
5 | 


--------------------------------------------------------------------------------
/code/test_flip_string-v3.py:
--------------------------------------------------------------------------------
1 | from flip_string import flip_string
2 | 
3 | assert flip_string('mario') == 'oiram'
4 | assert flip_string('luigi') == 'igiul'
5 | assert flip_string('samus') == 'sumas'
6 | 


--------------------------------------------------------------------------------
/code/test_flip_string-v4.py:
--------------------------------------------------------------------------------
 1 | from flip_string import flip_string
 2 | 
 3 | def test_flip_mario():
 4 |     assert flip_string('mario') == 'oiram'
 5 | 
 6 | def test_flip_luigi():
 7 |     assert flip_string('luigi') == 'igiul'
 8 | 
 9 | def test_flip_samus():
10 |     assert flip_string('samus') == 'sumas'
11 | 


--------------------------------------------------------------------------------
/code/test_flip_string-v5.py:
--------------------------------------------------------------------------------
 1 | from flip_string import flip_string
 2 | 
 3 | def test_flip_one_char():
 4 |     assert flip_string('a') == 'a'
 5 | 
 6 | def test_flp_two_charsi():
 7 |     assert flip_string('ab') == 'ba'
 8 | 
 9 | def test_flip_palindrome():
10 |     assert flip_string('aba') == 'aba'
11 | 


--------------------------------------------------------------------------------
/code/test_strflip.py:
--------------------------------------------------------------------------------
 1 | from strflip import strflip
 2 | 
 3 | def test_flip_empty_string():
 4 |     assert strflip('') == ''
 5 | 
 6 | def test_flip_one_char():
 7 |     assert strflip('a') == 'a'
 8 | 
 9 | def test_flip_repeated_char():
10 |     assert strflip('abca') == 'acba'
11 | 


--------------------------------------------------------------------------------
/data/small.csv:
--------------------------------------------------------------------------------
 1 | plot_id,species_id,hindfoot_length
 2 | 2,NL,32.0
 3 | 3,NL,33.0
 4 | 2,DM,37.0
 5 | 7,DM,36.0
 6 | 3,DM,35.0
 7 | 1,PF,14.0
 8 | 2,PE,
 9 | 1,DM,37.0
10 | 1,DM,34.0
11 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SPHINXPROJ    = python-102
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | # import os
 16 | # import sys
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'Python 102'
 23 | copyright = '2018, Ashwin Srinath'
 24 | author = 'Ashwin Srinath'
 25 | 
 26 | # The short X.Y version
 27 | version = ''
 28 | # The full version, including alpha/beta/rc tags
 29 | release = '0.1'
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 |     'sphinx.ext.intersphinx',
 43 |     'sphinx.ext.mathjax',
 44 |     'sphinx.ext.githubpages',
 45 | ]
 46 | 
 47 | # Add any paths that contain templates here, relative to this directory.
 48 | templates_path = ['_templates']
 49 | 
 50 | # The suffix(es) of source filenames.
 51 | # You can specify multiple suffix as a list of string:
 52 | #
 53 | # source_suffix = ['.rst', '.md']
 54 | source_suffix = '.rst'
 55 | 
 56 | # The master toctree document.
 57 | master_doc = 'index'
 58 | 
 59 | # The language for content autogenerated by Sphinx. Refer to documentation
 60 | # for a list of supported languages.
 61 | #
 62 | # This is also used if you do content translation via gettext catalogs.
 63 | # Usually you set "language" from the command line for these cases.
 64 | language = None
 65 | 
 66 | # List of patterns, relative to source directory, that match files and
 67 | # directories to ignore when looking for source files.
 68 | # This pattern also affects html_static_path and html_extra_path .
 69 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 70 | 
 71 | # The name of the Pygments (syntax highlighting) style to use.
 72 | pygments_style = 'sphinx'
 73 | 
 74 | 
 75 | # -- Options for HTML output -------------------------------------------------
 76 | 
 77 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 78 | # a list of builtin themes.
 79 | #
 80 | html_theme = 'sphinx_rtd_theme'
 81 | 
 82 | # Theme options are theme-specific and customize the look and feel of a theme
 83 | # further.  For a list of options available for each theme, see the
 84 | # documentation.
 85 | #
 86 | # html_theme_options = {}
 87 | 
 88 | # Add any paths that contain custom static files (such as style sheets) here,
 89 | # relative to this directory. They are copied after the builtin static files,
 90 | # so a file named "default.css" will overwrite the builtin "default.css".
 91 | html_static_path = ['_static']
 92 | 
 93 | # Custom sidebar templates, must be a dictionary that maps document names
 94 | # to template names.
 95 | #
 96 | # The default sidebars (for documents that don't match any pattern) are
 97 | # defined by theme itself.  Builtin themes are using these templates by
 98 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
 99 | # 'searchbox.html']``.
100 | #
101 | # html_sidebars = {}
102 | 
103 | html_title = 'Python 102'
104 | 
105 | # -- Options for HTMLHelp output ---------------------------------------------
106 | 
107 | # Output file base name for HTML help builder.
108 | htmlhelp_basename = 'python-102doc'
109 | 
110 | 
111 | # -- Options for LaTeX output ------------------------------------------------
112 | 
113 | latex_elements = {
114 |     # The paper size ('letterpaper' or 'a4paper').
115 |     #
116 |     # 'papersize': 'letterpaper',
117 | 
118 |     # The font size ('10pt', '11pt' or '12pt').
119 |     #
120 |     # 'pointsize': '10pt',
121 | 
122 |     # Additional stuff for the LaTeX preamble.
123 |     #
124 |     # 'preamble': '',
125 | 
126 |     # Latex figure (float) alignment
127 |     #
128 |     # 'figure_align': 'htbp',
129 | }
130 | 
131 | # Grouping the document tree into LaTeX files. List of tuples
132 | # (source start file, target name, title,
133 | #  author, documentclass [howto, manual, or own class]).
134 | latex_documents = [
135 |     (master_doc, 'python-102.tex', 'python-102 Documentation',
136 |      'Ashwin Srinath', 'manual'),
137 | ]
138 | 
139 | 
140 | # -- Options for manual page output ------------------------------------------
141 | 
142 | # One entry per manual page. List of tuples
143 | # (source start file, name, description, authors, manual section).
144 | man_pages = [
145 |     (master_doc, 'python-102', 'python-102 Documentation',
146 |      [author], 1)
147 | ]
148 | 
149 | 
150 | # -- Options for Texinfo output ----------------------------------------------
151 | 
152 | # Grouping the document tree into Texinfo files. List of tuples
153 | # (source start file, target name, title, author,
154 | #  dir menu entry, description, category)
155 | texinfo_documents = [
156 |     (master_doc, 'python-102', 'python-102 Documentation',
157 |      author, 'python-102', 'One line description of project.',
158 |      'Miscellaneous'),
159 | ]
160 | 
161 | 
162 | # -- Extension configuration -------------------------------------------------
163 | 
164 | # -- Options for intersphinx extension ---------------------------------------
165 | 
166 | # Example configuration for intersphinx: refer to the Python standard library.
167 | intersphinx_mapping = {'https://docs.python.org/3/': None}
168 | 


--------------------------------------------------------------------------------
/docs/documenting.rst:
--------------------------------------------------------------------------------
  1 | Documenting your code
  2 | =====================
  3 | 
  4 | Most people think of writing documentation as
  5 | an unpleasant, but necessary task,
  6 | done for the benefit of othe people
  7 | with no real benefit to themselves.
  8 | So they choose not to do it,
  9 | or they do it with little care.
 10 | 
 11 | But even if you are the only person who will ever use your code,
 12 | it's still a good idea to document it well.
 13 | Being able to document your own code gives you confidence that you understand it yourself,
 14 | and a sign of well-written code is that it can be easily documented.
 15 | Code you wrote a few weeks ago
 16 | may as well have been written by someone else,
 17 | and you will be glad that you documented it.
 18 | 
 19 | The good news is that writing documentation can be fun,
 20 | and you really don't need to write a lot of it.
 21 | 
 22 | Docstrings and comments
 23 | -----------------------
 24 | 
 25 | Documentation is *not* comments.
 26 | 
 27 | A *docstring* in Python is a string literal
 28 | that appears at the beginning of a module, function, class, or method.
 29 | 
 30 | .. code-block:: python
 31 | 
 32 |    """
 33 |    A docstring in Python that appears
 34 |    at the beginning of a module, function, class or method.
 35 |    """
 36 | 
 37 | The *docstring* of a module, function, class or method
 38 | becomes the ``__doc__`` attribute of that object,
 39 | and is printed if you type ``help(object)``:
 40 | 
 41 | .. code-block:: python
 42 | 
 43 |    In [1]: def fahr_to_celsius(F):
 44 |       ...:     """
 45 |       ...:     Convert temperature from Fahrenheit to Celsius.
 46 |       ...:     """
 47 |       ...:     return (F - 32) * (5/9)
 48 | 
 49 |    In [2]: help(fahr_to_celsius)
 50 | 
 51 |    Help on function fahr_to_celsius in module __main__:
 52 | 
 53 |    fahr_to_celsius(F)
 54 |     Convert temperature from Fahrenheit to Celsius. 
 55 | 
 56 | A *comment* in Python is any line that begins with a ``#``:
 57 | 
 58 | .. code-block:: python
 59 | 
 60 |    # a comment.
 61 | 
 62 | The purpose of a docstring is to document a module, function, class, or method.
 63 | The purpose of a comment is to explain a very difficult piece of code,
 64 | or to justify a choice that was made while writing it.
 65 | 
 66 | Docstrings should not be used in place of comments,
 67 | or vice versa. **Don't do the following**:
 68 | 
 69 | .. code-block:: python
 70 | 
 71 |    In [1]: def fahr_to_celsius(F):
 72 |       ...:     # Convert temperature from Fahrenheit to Celsius.
 73 |       ...:     return (F - 32) * (5/9)
 74 | 
 75 | Deleting code
 76 | ^^^^^^^^^^^^^
 77 | 
 78 | Incidentally, many people use comments and string literals
 79 | as a way of "deleting" code - also known as *commenting out* code.
 80 | See `this article <https://nedbatchelder.com/text/deleting-code.html>`_ on a better way to delete code.
 81 | 
 82 | What to document?
 83 | -----------------
 84 | 
 85 | So what goes in a dosctring?
 86 | 
 87 | At minimum, the docstring for a function or method should consist of the following:
 88 | 
 89 | 1. A **Summary** section that describes in a sentence or two
 90 |    what the function does.
 91 | 2. A **Parameters** section that provides a
 92 |    description of the parameters to the function,
 93 |    their types,
 94 |    and default values (in the case of optional arguments).
 95 | 3. A **Returns** section that similarly describes the return values.
 96 | 4. Optionally,
 97 |    a **Notes** section that describes the implementation,
 98 |    and includes references.
 99 | 
100 | Here is a simple example of this in action:
101 | 
102 | .. literalinclude:: ../code/flip_list-v1.py
103 | 
104 | NumPy's `documentation guidelines <https://numpydoc.readthedocs.io/en/latest/>`_ are a great
105 | reference for more information about what and how to document your code.
106 | 
107 | Doctests
108 | --------
109 | 
110 | In addition to the sections above,
111 | your documentation can also contain runnable tests.
112 | This is possible using the
113 | `doctest <https://docs.python.org/3/library/doctest.html>`_ module.
114 | 
115 | .. literalinclude:: ../code/flip_list-v2.py
116 |    :caption: flip_list.py
117 | 
118 | You can tell ``pytest`` to run doctests as well as other tests
119 | using the ``--doctest-modules`` switch:
120 | 
121 | ::
122 | 
123 |    $ pytest --doctest-modules flip_list.py
124 | 
125 |    collected 1 item
126 | 
127 |    flip_list.py .                                                            [100%]
128 | 
129 |    =========================== 1 passed in 0.03 seconds ===========================
130 | 
131 | Doctests are great because they double up
132 | as documentation as well as tests.
133 | But they shouldn't be the *only* kind of tests you write.
134 | 
135 | Documentation generation
136 | ------------------------
137 | 
138 | Finally, you can turn your documentation into a beautiful website (like this one!),
139 | a PDF manual, and various other formats,
140 | using a document generator such as
141 | `Sphinx <http://www.sphinx-doc.org/en/master/>`_.
142 | You can use services like
143 | `readthedocs <http://readthedocs.org/>`_
144 | to build and host your website for free.
145 | 


--------------------------------------------------------------------------------
/docs/images/triangle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/shwina/python-102/8843c1f273d3857016193874267f22096d8fb0ea/docs/images/triangle.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. python-102 documentation master file, created by
 2 |    sphinx-quickstart on Thu Jul 12 10:12:51 2018.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Python 102 for scientific computing and data analysis
 7 | =====================================================
 8 | 
 9 | This tutorial covers topics that are
10 | essential for scientific computing and data analysis in Python,
11 | but typically *not* covered in an introductory course or workshop.
12 | 
13 | These are the thing you *need* to know if you are writing software
14 | that meets any of the following criteria:
15 | 
16 | * You expect to be working on it for more than a couple of weeks.
17 | * You expect that it will be composed of
18 |   more than a hundred
19 |   or so lines of code.
20 | * You want it to produce results that can be trusted - for example,
21 |   if you are publishing a research paper based on those results.
22 | * You expect that it will be used by one or more other people.
23 | * You are contributing to another project -
24 |   e.g., an open-source software package.
25 | 
26 | What you will learn
27 | -------------------
28 | 
29 | 1. How to organize the code for your project,
30 |    and how to make it an installable *package*
31 |    rather than a loose collection of files.
32 | 2. How to write tests for your code so that
33 |    you can be sure it always produces the correct answer,
34 |    even as you make changes to it.
35 | 3. How to document your code so that it is easy for
36 |    you and others to use and navigate.
37 | 4. How to improve the usability of your code.
38 | 5. How to improve the performance of your code.
39 | 
40 | What you need to know
41 | ---------------------
42 | 
43 | This tutorial assumes you know the
44 | very basics of programming with Python.
45 | 
46 | If you can write a loop and a function in Python,
47 | and if you know how to run a ``.py`` script,
48 | you should be able to follow this tutorial easily.
49 | 
50 | What you need to have
51 | ---------------------
52 | 
53 | If you plan to participate in the hands-on exercises,
54 | you will need:
55 | 
56 | * A laptop with `Anaconda <https://www.anaconda.com/download/>`_ installed on it
57 | * 1 or more friends.
58 |   It is **highly** encouraged to work in groups,
59 |   so if you haven't already,
60 |   please introduce yourself to your neighbour(s).
61 | 
62 | .. toctree::
63 |    :maxdepth: 2
64 |    :caption: Contents:
65 |    
66 |    packaging
67 |    testing
68 |    documenting
69 |    usability
70 |    performance
71 | 
72 | 
73 | Indices and tables
74 | ==================
75 | 
76 | * :ref:`genindex`
77 | * :ref:`modindex`
78 | * :ref:`search`
79 | 


--------------------------------------------------------------------------------
/docs/packaging.rst:
--------------------------------------------------------------------------------
  1 | Organizing code for a Python project
  2 | ====================================
  3 | 
  4 | A well structured project is
  5 | easy to navigate and make changes and improvements to.
  6 | It's also more likely to be used by other people --
  7 | and that includes *you* a few weeks from now!
  8 | 
  9 | Organization basics
 10 | -------------------
 11 | 
 12 | We want to write a Python program that draws triangles:
 13 | 
 14 | .. image:: images/triangle.png
 15 |    :width: 60%
 16 | 
 17 | We use the the
 18 | `Polygon <https://matplotlib.org/gallery/api/patch_collection.html>`_ class
 19 | of the `matplotlib <https://matplotlib.org/>`_ library
 20 | and write a script called ``draw_triangles.py`` to do this:
 21 | 
 22 | .. literalinclude:: ../code/draw_triangles-v1.py
 23 |    :caption: draw_triangles.py
 24 |    :language: python
 25 | 
 26 | Do you think this is a good way to organize the code?
 27 | What do you think could be improved in the script ``draw_triangles.py``?
 28 | 
 29 | Functions
 30 | ^^^^^^^^^
 31 | 
 32 | Functions facilitate code reuse.
 33 | Whenever you see yourself
 34 | typing the same code twice
 35 | in the same program or project,
 36 | it is a clear indication that the code belongs in a function.
 37 | 
 38 | A good function:
 39 | 
 40 | * has a descriptive name. ``draw_triangle`` is a better name
 41 |   than ``plot``.
 42 | * is small -- no more than a couple of dozen lines --
 43 |   and does **one** thing.
 44 |   If a function is doing too much,
 45 |   then it should probably be broken into smaller functions.
 46 | * can be easily tested -- more on this soon.
 47 | * is well documented -- more on this later.
 48 | 
 49 | In the script ``draw_triangles.py`` above,
 50 | it would be a good idea to define a function
 51 | called ``draw_triangle`` that draws a single triangle,
 52 | and re-use this function every time we need to draw a triangle:
 53 | 
 54 | .. literalinclude:: ../code/draw_triangles-v2.py
 55 |    :caption: draw_triangles.py
 56 | 
 57 | Python scripts and modules
 58 | ^^^^^^^^^^^^^^^^^^^^^^^^^^
 59 | 
 60 | A *module* is a file containing a collection of Python definitions and statements,
 61 | typically named with a ``.py`` suffix.
 62 | 
 63 | A *script* is a module that is intended to be run by the Python interpreter.
 64 | For example,
 65 | the script ``draw_triangles.py`` can be run from the command-line
 66 | using the command:
 67 | 
 68 | ::
 69 | 
 70 |   $ python draw_triangles.py
 71 | 
 72 | If you are using an Integrated Development Environment
 73 | like Spyder or `PyCharm <https://www.jetbrains.com/pycharm/>`_,
 74 | then the script can be run by opening it in the IDE and clicking on the "Run" button.
 75 | 
 76 | Modules, or specific functions from a module can be imported
 77 | using the ``import`` statement:
 78 | 
 79 | .. code-block:: python
 80 | 
 81 |    import draw_triangles
 82 |    from draw_triangles import draw_triangle
 83 | 
 84 | When a module is imported,
 85 | all the statements in the module are executed by the Python interpreter.
 86 | This happens only the first time the module is imported.
 87 | 
 88 | It is sometimes useful to have both
 89 | importable functions
 90 | as well as executable statements
 91 | in a single module.
 92 | When importing functions from this module,
 93 | it is possible to avoid running other code by placing it under
 94 | ``if __name__ == "__main__"``:
 95 | 
 96 | .. literalinclude:: ../code/draw_triangles-v3.py
 97 |    :caption: draw_triangles.py
 98 | 
 99 | When another module imports the module ``draw_triangles`` above,
100 | the code under ``if __name__ == "__main__"`` is **not** executed.
101 | 
102 | How to structure a Python project?
103 | ----------------------------------
104 | 
105 | Let us now imagine we had a lot more code;
106 | for example, a *collection* of functions for:
107 | 
108 | * plotting shapes (like ``draw_triangle`` above)
109 | * calculating areas
110 | * geometric transformations
111 | 
112 | What are the different ways to organize code for a Python project
113 | that is more than a handful of lines long?
114 | 
115 | A single module
116 | ^^^^^^^^^^^^^^^
117 | 
118 | ::
119 | 
120 |    geometry
121 |    └── draw_triangles.py
122 | 
123 | One way to organize your code
124 | is to put all of it
125 | in a single ``.py`` file (module)
126 | like ``draw_triangles.py`` above.
127 | 
128 | Multiple modules
129 | ^^^^^^^^^^^^^^^^
130 | 
131 | For a small number of functions
132 | the approach above is fine,
133 | and even recommended,
134 | but as the size and/or scope of the project grows,
135 | it may be necessary to
136 | divide up code into different modules,
137 | each containing related data and functionality.
138 | 
139 | ::
140 | 
141 |    geometry
142 |    ├── draw_triangles.py
143 |    └── graphics.py
144 | 
145 | .. literalinclude:: ../code/graphics.py
146 |    :caption: graphics.py
147 | 
148 | Typically, the "top-level" executable code is
149 | put in a separate script
150 | which imports functions and data from other modules:
151 | 
152 | .. code-block:: python 
153 |    :caption: draw_triangles.py
154 | 
155 |    import graphics 
156 | 
157 |    graphics.draw_triangle([
158 |        (0.2, 0.2),
159 |        (0.2, 0.6),
160 |        (0.4, 0.4)
161 |    ])
162 | 
163 |    graphics.draw_triangle([
164 |        (0.6, 0.8),
165 |        (0.8, 0.8),
166 |        (0.5, 0.5)
167 |    ])
168 | 
169 |    graphics.draw_triangle([
170 |        (0.6, 0.1),
171 |        (0.7, 0.3),
172 |        (0.9, 0.2)
173 |    ])
174 | 
175 | Packages
176 | ^^^^^^^^
177 | 
178 | A Python **package** is a directory
179 | containing a file called ``__init__.py``,
180 | which can be empty.
181 | Packages can contain modules
182 | as well as other packages
183 | (sometimes referred to as *sub-packages*).
184 | 
185 | For example, ``geometry`` below is a package,
186 | containing various modules:
187 | 
188 | ::
189 | 
190 |    draw_triangles.py
191 |    geometry
192 |    ├── graphics.py
193 |    └── __init__.py
194 | 
195 | A module from the package can be imported using the "dot" notation:
196 | 
197 | .. code-block:: python
198 | 
199 |    import geometry.graphics
200 |    geometry.graphics.draw_triangle(args)
201 | 
202 | It's also possible to import a specific function
203 | from the module:
204 | 
205 | .. code-block:: python
206 |    
207 |    from geometry.graphics import draw_triangle
208 |    draw_triangle(args)
209 | 
210 | Packages can themselves be imported,
211 | which really just imports the ``__init__.py`` module.
212 | 
213 | .. code-block:: python
214 |    
215 |    import geometry
216 | 
217 | If ``__init__.py`` is empty,
218 | there is "nothing" in the imported ``geometry`` package,
219 | and the following line gives an error:
220 | 
221 | .. code-block:: python
222 | 
223 |    geometry.graphics.draw_triangle(args)
224 | 
225 | .. code-block:: python
226 | 
227 |    AttributeError: module 'geometry' has no attribute 'graphics'
228 | 
229 | Importing from anywhere
230 | -----------------------
231 | 
232 | sys.path
233 | ^^^^^^^^
234 | 
235 | To improve their reusability,
236 | you typically want to be able to
237 | ``import`` your modules and packages
238 | from anywhere,
239 | i.e., from any directory on your computer.
240 | 
241 | One way to do this is to use ``sys.path``:
242 | 
243 | .. code-block:: python
244 | 
245 |    import sys
246 |    sys.path.append('/path/to/geometry')
247 | 
248 |    import graphics
249 | 
250 | ``sys.path`` is a list of directories
251 | that Python looks for modules and packages in
252 | when you ``import`` them.
253 | 
254 | Installable projects
255 | ^^^^^^^^^^^^^^^^^^^^
256 | 
257 | A better way is to make your project "installable"
258 | using `setuptools <https://setuptools.readthedocs.io/en/latest/>`_.
259 | To do this, you will need to
260 | include a ``setup.py`` with your project.
261 | Your project should be organized as follows:
262 | 
263 | .. code-block:: bash
264 | 
265 |    draw_triangles.py
266 |    geometry
267 |    ├── graphics.py
268 |    └── __init__.py
269 |    setup.py
270 | 
271 | A minimal ``setup.py`` can include the following
272 | 
273 | .. code-block:: python 
274 |    :caption: setup.py
275 | 
276 |    from setuptools import setup
277 | 
278 |    setup(name='geometry',
279 |       version='0.1',
280 |       author='Ashwin Srinath',
281 |       packages=['geometry'])
282 | 
283 | You can install the package using ``pip``
284 | with the following command
285 | (run from the same directory as ``setup.py``):
286 | 
287 | .. code-block:: bash
288 | 
289 |    $ pip install -e . --user
290 | 
291 | This installs the package in *editable* mode,
292 | creating a link to it in the user's ``site-packages`` directory,
293 | which happens to already be in ``sys.path``.
294 | 
295 | Once your project is installed,
296 | you don't need to worry about
297 | adding it manually to ``sys.path`` each time you need to use it.
298 | 
299 | It's also easy to *uninstall* a package;
300 | just run the following command from the same directory as ``setup.py``:
301 | 
302 | .. code-block:: bash
303 |    
304 |    $ pip uninstall .
305 | 


--------------------------------------------------------------------------------
/docs/performance.rst:
--------------------------------------------------------------------------------
  1 | Improving the performance of Python programs
  2 | ============================================
  3 | 
  4 | Timing code and identifying bottlenecks
  5 | ---------------------------------------
  6 | 
  7 | Of course,
  8 | the first step toward improving performance
  9 | is to figure out where to focus your efforts.
 10 | This means identifying the section of code in your program
 11 | that is taking the most time,
 12 | i.e., the "bottleneck".
 13 | 
 14 | Sometimes,
 15 | the bottleneck is very obvious
 16 | (e.g., the training step in a machine learning application),
 17 | and sometimes it may not be clear.
 18 | In the latter case,
 19 | you need to be able to measure the time taken by various parts of your program.
 20 | 
 21 | The ``time`` function
 22 | ^^^^^^^^^^^^^^^^^^^^^
 23 | 
 24 | The `time <https://docs.python.org/3/library/time.html#time.time>`_
 25 | function can be used to time a section of code as follows:
 26 | 
 27 | .. code-block:: python
 28 | 
 29 |    import time
 30 |    import numpy as np
 31 | 
 32 |    t1 = time.time()
 33 |    a = np.random.rand(5000, 5000)
 34 |    t2 = time.time()
 35 |    print("Generating random array took {} seconds".format(t2-t1))
 36 | 
 37 | ::
 38 | 
 39 |    Generating random array took 0.44880104064941406 seconds
 40 | 
 41 | 
 42 | ``%timeit`` and ``%%timeit``
 43 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 44 | 
 45 | ``%timeit%`` and ``%%timeit`` are
 46 | `magic statements <https://ipython.readthedocs.io/en/stable/interactive/magics.html>`_
 47 | that can be used in IPython
 48 | or in Jupyter Notebook
 49 | for timing a single line of code or a block of code
 50 | conveniently:
 51 | 
 52 | ::
 53 |     
 54 |    In [1]: import numpy as np
 55 | 
 56 |    In [2]: %timeit np.random.rand(5000, 5000)
 57 |    410 ms ± 2.59 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 58 | 
 59 |    In [3]: %%timeit
 60 |       ...: a = np.random.rand(5000, 5000)
 61 |       ...: b = np.random.rand(5000, 5000)
 62 |       ...: c = a * b
 63 |       ...:
 64 |    897 ms ± 10.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
 65 | 
 66 | Profilers
 67 | ^^^^^^^^^
 68 | 
 69 | ``time`` and ``timeit`` should help with most of your measurement needs,
 70 | but if you need to profile a very long program with lots of functions,
 71 | you may benefit from using
 72 | a  `profiler <https://docs.python.org/3/library/profile.html>`_.
 73 | 
 74 | There is also a
 75 | `line_profiler <https://github.com/rkern/line_profiler>`_
 76 | that can help you automatically profile
 77 | each line in a script,
 78 | and a `memory_profiler <https://github.com/pythonprofilers/memory_profiler>`_
 79 | to measure memory consumption.
 80 | 
 81 | Install optimized versions of libraries
 82 | ---------------------------------------
 83 | 
 84 | This is the easiest way to get "free" performance improvements.
 85 | If your computer supports it,
 86 | install optimized version of Python libraries,
 87 | for example, those provided by
 88 | the `Intel Distribution for Python <https://software.intel.com/en-us/distribution-for-python>`_.
 89 | 
 90 | Another option is `PyPy <https://pypy.org/compat.html>`_.
 91 | 
 92 | Choose the right algorithm
 93 | --------------------------
 94 | 
 95 | This is one of the most effective ways to
 96 | improve the performance of a program.
 97 | 
 98 | When choosing a function from a library
 99 | or writing your own,
100 | ensure that  you understand how it will perform
101 | for the type and size of data you have,
102 | and what options there may be to boost its performance.
103 | Always benchmark to compare with other functions and libraries.
104 | 
105 | For example,
106 | if you are doing linear algebra,
107 | you may benefit from the use of
108 | `sparse <https://en.wikipedia.org/wiki/Sparse_matrix>`_ matrices and algorithms
109 | if you are dealing with very large matrices with relatively few non-zeros.
110 | 
111 | As another example, many kinds of algorithms are iterative
112 | and require an initial "guess" for the solution.
113 | Typically, the closer this initial guess is to the actual solution,
114 | the faster the algorithm performs.
115 | 
116 | Choose the appropriate data format
117 | ----------------------------------
118 | 
119 | Familiarize yourself with
120 | the various data formats available for the type of data you are dealing with,
121 | and the performance considerations for each.
122 | For example,
123 | `this page <https://pandas.pydata.org/pandas-docs/stable/io.html>`_
124 | provides a good overview of various data formats for
125 | tabular data supported by the Pandas library.
126 | Performance for each is reported
127 | `here <https://pandas.pydata.org/pandas-docs/stable/io.html#performance-considerations>`_.
128 | 
129 | Don't reinvent the wheel
130 | ------------------------
131 | 
132 | Resist any temptation
133 | to write your own implementation for a
134 | common task or a well-known algorithm.
135 | Rely instead on other well-tested and well-used implementations.
136 | 
137 | For instance, it's easy to write a few lines of Python to
138 | read data from a ``.csv`` file into a Pandas DataFrame:
139 |    
140 | .. code-block:: python
141 |    :caption: my_csv.py
142 | 
143 |    def read_csv(fname):
144 |        with open(fname) as f:
145 |            col_names = f.readline().rstrip().split(',')
146 |            df = pandas.DataFrame(columns=col_names)
147 |                for line in f:
148 |                    record = pandas.DataFrame([line.rstrip().split(',')], columns=col_names)
149 |                    df = df.append(record, ignore_index=True)
150 |        return df
151 | 
152 | But such code performs poorly.
153 | Compare the performance with Pandas' ``read_csv`` function:
154 | 
155 | .. code-block:: python
156 | 
157 |    In [1]: from my_csv import read_csv
158 | 
159 |    In [2]: %time data = read_csv('feet.csv')
160 |    CPU times: user 2min 3s, sys: 1.39 s, total: 2min 4s
161 |    Wall time: 2min 5s
162 | 
163 | .. code-block:: python
164 |    
165 |    In [1]: from pandas import read_csv
166 | 
167 |    In [2]: %time data = read_csv('feet.csv')
168 |    CPU times: user 28.5 ms, sys: 10.8 ms, total: 39.3 ms
169 |    Wall time: 54.2 ms
170 | 
171 | It also isn't nearly as versatile,
172 | and doesn't account for the dozens of edge cases than Pandas does.
173 | 
174 | Benchmark, benchmark, benchmark!
175 | --------------------------------
176 | 
177 | If there are two ways of doing the same thing,
178 | *benchmark* to see which is faster for different problem sizes.
179 | 
180 | For example, let's say we want to compute
181 | the average ``hindfooth_length`` for
182 | all species in ``plot_id`` 13 in the following dataset:
183 | 
184 | .. code-block:: python
185 | 
186 |     In [1]: data = pandas.read_csv('feet.csv')
187 | 
188 |     In [2]: data.head()
189 |     Out[2]:
190 |        plot_id species_id  hindfoot_length
191 |     0        2         NL             32.0
192 |     1        3         NL             33.0
193 |     2        2         DM             37.0
194 |     3        7         DM             36.0
195 |     4        3         DM             35.0
196 | 
197 | One way to do this would be to group by the ``plot_id``,
198 | compute the mean hindfoot length for each group,
199 | and extract the result for the group with ``plot_id`` 13:
200 | 
201 | .. code-block:: python
202 | 
203 |     In [2]: data.groupby('plot_id')['hindfoot_length'].mean()[13]
204 |     Out[2]: 27.570887035633056
205 | 
206 | Another way would be to filter the data first,
207 | keeping only records with ``plot_id`` 13,
208 | and then computing the mean of the ``hindfoot_length`` column:
209 | 
210 | .. code-block:: python
211 | 
212 |     In [3]: data[data['plot_id'] == 13]['hindfoot_length'].mean()
213 |     Out[3]: 27.570887035633056
214 | 
215 | Both methods give identical results,
216 | but the difference in performance is significant:
217 | 
218 | .. code-block:: python
219 | 
220 |     In [4]: %timeit data.groupby('plot_id')['hindfoot_length'].mean()[13]
221 |     1.34 ms ± 24.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
222 | 
223 |     In [5]: %timeit data[data['plot_id'] == 13]['hindfoot_length'].mean()
224 |     750 µs ± 506 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
225 | 
226 | Why do you think the first method is slower?
227 | 
228 | Avoid explicit loops
229 | --------------------
230 | 
231 | Very often, you need to operate on multiple elements of a collection
232 | such as a
233 | NumPy array or
234 | Pandas DataFrame.
235 | 
236 | In such cases, it is almost always a bad idea to write
237 | an explicit ``for`` loop over the elements.
238 | 
239 | For instance,
240 | looping over the rows (a.k.a, *indices* or *records*)
241 | of a Pandas DataFrame is considered poor practice,
242 | and is very slow.
243 | Consider replacing values in a column of a dataframe:
244 | 
245 | .. code-block:: python
246 | 
247 |    In [5]: %%timeit
248 |       ...: for i in range(len(data['species_id'])):
249 |       ...:     if data.loc[i, 'species_id'] == 'NL':
250 |       ...:         data.loc[i, 'species_id'] = 'NZ'
251 |       ...:
252 |    308 ms ± 4.49 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
253 | 
254 | A better way to do this is
255 | simply to use the ``replace()`` method:
256 | 
257 | .. code-block:: python
258 | 
259 |     In [2]: %time data['species_id'].replace('NL', 'NZ', inplace=True)
260 |     CPU times: user 3.1 ms, sys: 652 µs, total: 3.75 ms
261 |     Wall time: 3.34 ms
262 | 
263 | In addition to being faster,
264 | this also leads to more readable code.
265 | 
266 | Of course, loops are unavoidable in many situations;
267 | but look for alternatives before you write a ``for`` loop
268 | over the elements of an array, DataFrame, or similar data structure.
269 | 
270 | Avoid repeatedly allocating, copying and rearranging data
271 | ---------------------------------------------------------
272 | 
273 | Repeatedly creating and destroying new data can be very expensive
274 | especially if you are working with very large arrays or data frames.
275 | So avoid, for instance, creating a new array each time inside a loop.
276 | When operating on NumPy arrays,
277 | memory is allocated for intermediate results.
278 | Packages like `numexpr <https://github.com/pydata/numexpr>`_ aim to help with this.
279 | 
280 | Understand when data needs to be copied v/s when data can be operated "in-place".
281 | It also helps to know *when* copies are made.
282 | For example, do you think
283 | the following code results in two copies of the same array?
284 | 
285 | .. code-block:: python
286 | 
287 |    import numpy as np
288 | 
289 |    a = np.random.rand(50, 50)
290 |    b = a
291 | 
292 | `This article <https://nedbatchelder.com/text/names.html>`_
293 | clears up a lot of confusion
294 | about how names and values work in Python
295 | and when copies are made v/s when they are not.
296 | 
297 | Access data from memory efficiently
298 | -----------------------------------
299 | 
300 | Accessing data in the "wrong order":
301 | it is always more efficient to access values that are
302 | "closer together" in memory than values that are farther apart.
303 | For example, looping over the elements along the rows of a 2-d NumPy array
304 | is *much* more efficient than looping over the elements along its columns.
305 | Similarly, looping over the columns of a DataFrame in Pandas will be faster
306 | than looping over its rows.
307 | 
308 | * Redundant computations / computing "too much":
309 |   if you only need to compute on a subset of your data,
310 |   filter *before* doing the computation
311 |   rather than after.
312 | 
313 | 
314 | Interfacing with compiled code
315 | ------------------------------
316 | 
317 | You may have heard that Python is "slow"
318 | compared to other languages like C, C++, or Fortran.
319 | This is somewhat true in that Python programs
320 | written in "pure Python", i.e., without the use
321 | of any libraries except the standard libraries,
322 | will be slow compared to their C/Fortran counterparts.
323 | One of the reasons that C is so much faster than Python
324 | is that it is a
325 | `compiled language <https://en.wikipedia.org/wiki/Compiled_language>`_,
326 | while Python is an
327 | `interpreted language <https://en.wikipedia.org/wiki/Interpreted_language>`_.
328 | 
329 | However,
330 | the core of libraries like NumPy
331 | are actually written in C,
332 | making them much faster than "pure Python".
333 | 
334 | It's also possible for you to write your own code
335 | so that it interfaces with languages like C, C++ or Fortran.
336 | Better still,
337 | you often don't even need to write any code in those languages,
338 | and instead can have other libraries "generate" them for you.
339 | 
340 | `Numba <https://numba.pydata.org/>`_ is a library that lets you compile
341 | code written in Python using
342 | a very convenient "decorator" syntax.
343 | 
344 | As an example,
345 | consider numerically evaluating the derivative
346 | of a function using finite differences.
347 | A function that uses NumPy to do this might look like the following:
348 | 
349 | .. literalinclude:: ../code/derivatives.py
350 |    :language: python
351 |    :caption: derivatives.py
352 | 
353 | Below, we time the function for a grid of 10000000 points:
354 | 
355 | :: 
356 | 
357 |    In [1]: x = np.linspace(0, 1, 10000000)
358 | 
359 |    In [2]: dx = x[1] - x[0]
360 | 
361 |    In [3]: f = np.sin(2 * np.pi * x / 1000000)
362 | 
363 |    In [4]: y = np.zeros_like(f)
364 | 
365 |    In [5]: %timeit dfdx(f, dx, y)
366 |    61.1 ms ± 2.62 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
367 | 
368 | Below is a function that is compiled using Numba to do the same task:
369 | 
370 | .. literalinclude:: ../code/derivatives_numba.py
371 |    :language: python
372 |    :caption: derivatives.py
373 | 
374 | We see much better performance for the same grid size:
375 | 
376 | ::
377 | 
378 |    In [1]: %timeit dfdx(f, dx, y)
379 |    14.6 ms ± 282 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
380 | 
381 | `Cython <http://cython.org/>`_ is another option for interfacing with compiled code.
382 | It performs about the same as Numba but requires much more effort;
383 | although it can do many things that Numba cannot,
384 | such as generating C code, and
385 | interface with C/C++ libraries.
386 | 
387 | Parallelization
388 | ---------------
389 | 
390 | Finally,
391 | if your computer has multiple cores,
392 | or if you have access to a bigger computer (e.g., a high-performance computing cluster),
393 | parallelizing your code may be an option.
394 | 
395 | * Note that many libraries support parallelization without any effort on your part.
396 |   Libraries like Numba and `Tensorflow <https://www.tensorflow.org/>`_
397 |   can use all the cores on your CPU,
398 |   and even your GPU for accelerating computations.
399 | 
400 | * `Dask <https://dask.pydata.org/en/latest/>`_ is a great library for
401 |   parallelizing computations
402 |   and operating on large datasets that don't fit in RAM.
403 | 
404 | * The `multiprocessing <https://docs.python.org/3/library/multiprocessing.html>`_ package
405 |   is useful when you have several independent tasks that can all be done concurrently.
406 |   `joblib <https://pythonhosted.org/joblib/>`_ is another popular library for this.
407 | 
408 | 


--------------------------------------------------------------------------------
/docs/testing.rst:
--------------------------------------------------------------------------------
  1 | Testing your code
  2 | =================
  3 | 
  4 | .. note::
  5 | 
  6 |    This section is based heavily on Ned Batchelder's
  7 |    excellent article and PyCon 2014 talk
  8 |    `Getting Started Testing <https://nedbatchelder.com/text/test0.html>`_.
  9 | 
 10 |    | *Tests are the dental floss of development: everyone knows they should do it more,*
 11 |    | *but they don’t, and they feel guilty about it.*
 12 |    | - Ned Batchelder
 13 | 
 14 |    | *Code without tests should be approached with a 10-foot pole.*
 15 |    | - me
 16 | 
 17 | How can you write
 18 | modular, extensible, and reusable code?
 19 | 
 20 | After making changes to a program,
 21 | how do you ensure that
 22 | it will still give the same answers as before?
 23 | 
 24 | How can we make finding and fixing bugs
 25 | an easy, fun and rewarding experience? 
 26 | 
 27 | These seemingly unrelated questions all
 28 | have the same answer,
 29 | and it is
 30 | **automated testing**.
 31 | 
 32 | Testing by example: ``flip_string``
 33 | -----------------------------------
 34 | 
 35 | Here is a function called ``flip_string`` that flips (reverses) a string.
 36 | There are bug(s) in this function that we need to find and fix.
 37 | Test the function for
 38 | various inputs and compare the results obtained with expected output.
 39 | 
 40 | .. literalinclude:: ../code/flip_string.py
 41 |    :caption: flip_string.py
 42 | 
 43 | * What tests did you come up with? Why did you choose those tests?
 44 | * How did you organize and execute your tests?
 45 | * Can the results of your tests help you figure out what problem(s)
 46 |   there might be with the code?
 47 | 
 48 | Testing interactively
 49 | ^^^^^^^^^^^^^^^^^^^^^
 50 | 
 51 | This is the most common type of testing,
 52 | and something you have probably done before.
 53 | To test a function or a line of code,
 54 | you simply fire up an interactive Python interpreter,
 55 | import the function,
 56 | and test away:
 57 | 
 58 | .. code-block:: python
 59 | 
 60 |    >>> from flip_string import flip_string
 61 |    >>> flip_string('mario')
 62 |    'oiram'
 63 |    >>> flip_string('luigi')
 64 |    'igiul'
 65 | 
 66 | While this kind of testing is better than not doing any testing at all,
 67 | it leaves much to be desired.
 68 | First,
 69 | it needs to be done
 70 | each time ``flip_string`` is changed.
 71 | It also requires that we manually inspect the output from each test to
 72 | decide if the code "passes" or "fails" that test.
 73 | Further,
 74 | we need to remember all the tests came up with today
 75 | if we want to test again tomorrow.
 76 | 
 77 | Writing a test script
 78 | ^^^^^^^^^^^^^^^^^^^^^
 79 | 
 80 | A *much* better way to write tests is to put them in a script:
 81 | 
 82 | .. literalinclude:: ../code/test_flip_string-v1.py
 83 |    :caption: test_flip_string.py
 84 | 
 85 | Now, running and re-running our tests is very easy - we just run the script:
 86 | 
 87 | .. code-block:: bash
 88 | 
 89 |    $ python test_flip_string.py
 90 |    mario flipped is: oiram
 91 |    luigi flipped is: igiul
 92 | 
 93 | It's also easy to add new tests,
 94 | and there's no need to remember
 95 | all the tests we come up with.
 96 | 
 97 | Testing with assertions
 98 | ^^^^^^^^^^^^^^^^^^^^^^^
 99 | 
100 | One problem with the method above is that
101 | we *still* need to manually inspect the results of our tests.
102 | 
103 | Assertions can help with this.
104 | 
105 | The ``assert`` statement in Python is very simple:
106 | Given a condition, like ``1 == 2``,
107 | it checks to see if the condition is true or false.
108 | If it is true, then ``assert`` does nothing,
109 | and if it false, it raises an ``AssertionError``:
110 | 
111 | .. code-block:: python
112 | 
113 |    >>> assert 1 == 1
114 |    >>> assert 1 < 2
115 |    >>> assert 1 > 2
116 |    Traceback (most recent call last):
117 |      File "<stdin>", line 1, in <module>
118 |    AssertionError
119 | 
120 | We can re-write our script ``test_flip_string.py``
121 | using assertions as follows:
122 | 
123 | .. literalinclude:: ../code/test_flip_string-v2.py
124 |    :caption: test_flip_string.py
125 | 
126 | And we still run our tests the same way:
127 | 
128 | .. code-block:: bash
129 | 
130 |    $ python test_flip_string.py
131 | 
132 | This time, there's no need to inspect the test results.
133 | If we get an ``AssertionError``, then we had a test fail,
134 | and if not, all our tests passed.
135 | 
136 | However, there's no way to know if *more* than one test failed.
137 | The script stops executing after the first ``AssertionError`` is encountered.
138 | 
139 | Let's add another test to our test script and re-run it:
140 | 
141 | .. literalinclude:: ../code/test_flip_string-v3.py
142 |    :caption: test_flip_string.py
143 | 
144 | .. code-block:: bash
145 | 
146 |    $ python test_flip_string.py
147 | 
148 |    Traceback (most recent call last):
149 |      File "test_flip_string.py", line 5, in <module>
150 |        assert flip_string('samus') == 'sumas'
151 |    AssertionError
152 | 
153 | This time we get a failed test,
154 | because - as we said - our code has bugs in it.
155 | Before adding more tests to investigate further,
156 | we'll discuss one more method for running tests.
157 | 
158 | Using a test runner
159 | ^^^^^^^^^^^^^^^^^^^
160 | 
161 | A test runner takes a bunch of tests,
162 | executes them all,
163 | and then reports which of them passed
164 | and which of them failed.
165 | 
166 | A very popular test runner for Python is
167 | `pytest <https://docs.pytest.org/en/latest/>`_.
168 | 
169 | To run our tests using pytest,
170 | we need to re-write them as follows
171 | (essentially, wrap each test in a function):
172 | 
173 | .. code-block:: python
174 |    :caption: test_flip_string.py
175 | 
176 |     from flip_string import flip_string
177 | 
178 |     def test_flip_mario():
179 |         assert flip_string('mario') == 'oiram'
180 | 
181 |     def test_flip_luigi():
182 |         assert flip_string('luigi') = 'igiul'
183 | 
184 |     def test_flip_samus():
185 |         assert flip_string('samus') == 'sumas'
186 | 
187 | To run our tests,
188 | we simply type ``pytest`` on the command line.
189 | When we do this, pytest will
190 | look for all files containing tests,
191 | run all the tests in those files,
192 | and report what it found:
193 | 
194 | .. code-block:: bash
195 | 
196 |    $ pytest
197 | 
198 |    collected 3 items
199 | 
200 |    test_flip_string.py ..F                                               [100%]
201 | 
202 |    =================================== FAILURES ===================================
203 |    _______________________________ test_flip_samus ________________________________
204 | 
205 |        def test_flip_samus():
206 |    >       assert flip_string('samus') == 'sumas'
207 |    E       AssertionError: assert 's' == 'sumas'
208 |    E         - s
209 |    E         + sumas
210 | 
211 |    test_flip_string.py:10: AssertionError
212 |    ====================== 1 failed, 2 passed in 0.07 seconds ======================
213 | 
214 | As you can see above,
215 | pytest prints a lot of useful information in its report.
216 | First,
217 | it prints a summary of passed v/s failed tests:
218 | 
219 | ::
220 | 
221 |     test_flip_string.py ..F                                               [100%]
222 | 
223 | A dot (``.``) indicates a passed test,
224 | while a ``F`` indicates a failed test.
225 | 
226 | For each failed test,
227 | it provides further information,
228 | including the
229 | expected value as well as the obtained value
230 | in the failed assertion:
231 | 
232 | ::
233 | 
234 |     =================================== FAILURES ===================================
235 |     _______________________________ test_flip_samus ________________________________
236 |     
237 |         def test_flip_samus():
238 |     >       assert flip_string('samus') == 'sumas'
239 |     E       AssertionError: assert 's' == 'sumas'
240 |     E         - s
241 |     E         + sumas
242 |     
243 |     test_flip_string.py:10: AssertionError
244 | 
245 | Useful tests
246 | ^^^^^^^^^^^^
247 | 
248 | Now that we know how to write and run tests,
249 | what kind of tests should we write?
250 | Testing ``flip_string`` for arbitrary words like ``'mario'`` and ``'luigi'``
251 | might not tell us much about where the problem might be.
252 | 
253 | Instead, we should choose tests that exercise specific functionality
254 | of the code we are testing,
255 | or represent different conditions that the code may be exposed to.
256 | 
257 | Here are some examples of more useful tests:
258 | 
259 | * Flipping a string with a single character (no work needs to be done)
260 | * Flipping a string with two characters (minmum amount of work needs to be done)
261 | * Flipping a string that reads the same forwards and backwards
262 | 
263 | .. literalinclude:: ../code/test_flip_string-v5.py
264 |    :caption: test_flip_string.py
265 | 
266 | :: 
267 | 
268 |    collected 3 items
269 | 
270 |    test_flip_string-v5.py ..F                                               [100%]
271 | 
272 |    =================================== FAILURES ===================================
273 |    _____________________________ test_flip_palindrome _____________________________
274 | 
275 |        def test_flip_palindrome():
276 |    >       assert flip_string('aba') == 'aba'
277 |    E       AssertionError: assert 'a' == 'aba'
278 |    E         - a
279 |    E         + aba
280 | 
281 |    test_flip_string.py:10: AssertionError
282 |    ====================== 2 failed, 1 passed in 0.08 seconds ======================
283 | 
284 | Fixing the code
285 | ^^^^^^^^^^^^^^^
286 | 
287 | From the test results above, we see that ``flip_string`` failed
288 | for the input ``'aba'``.
289 | Now, can you trace the execution of the code
290 | in the function ``flip_string`` for this input
291 | and figure out why it returned ``a``?
292 | 
293 | After fixing the code,
294 | re-run the tests to make sure you didn't break anything else
295 | in the process of fixing this bug --
296 | this is one of the reasons tests are so valuable!
297 | 
298 | Types of testing
299 | ----------------
300 | 
301 | Software testing is a vast topic
302 | and there are
303 | `many levels and types <https://en.wikipedia.org/wiki/Software_testing>`_
304 | of software testing.
305 | 
306 | For scientific and research software,
307 | the focus of testing efforts is primarily:
308 | 
309 | 1. **Unit tests**: Unit tests aim to test small, independent sections of code
310 |    (a function or parts of a function),
311 |    so that when a test fails,
312 |    the failure can easily be associated with that section of code.
313 |    This is the kind of testing that we have been doing so far.
314 | 
315 | 2. **Regression tests**: Regression tests aim to check whether
316 |    changes to the program result in it producing
317 |    different results from before.
318 |    Regression tests can test
319 |    larger sections of code
320 |    than unit tests.
321 |    As an example, if you are writing a machine learning application,
322 |    you may want to run your model on small data
323 |    in an automated way
324 |    each time your software undergoes changes,
325 |    and make sure that the same (or a better) result is produced.
326 | 
327 | Test-driven development
328 | -----------------------
329 | 
330 | `Test-driven development (TDD) <https://en.wikipedia.org/wiki/Test-driven_development>`_
331 | is the practice of writing tests for a function or method
332 | *before* actually writing any code for that function or method.
333 | The TDD process is to:
334 | 
335 | 1. Write a test for a function or method
336 | 2. Write just enough code that the function or method passes that test
337 | 3. Ensure that all tests written so far pass
338 | 4. Repeat the above steps until you are satisfied with the code
339 | 
340 | Proponents of TDD suggest that this results in better code.
341 | Whether or not TDD sounds appealing to you,
342 | writing tests should be *part* of your development process,
343 | and never an afterthought.
344 | In the process of writing tests,
345 | you often come up with new corner cases for your code,
346 | and realize better ways to organize it.
347 | The result is usually code that is
348 | more modular,
349 | more reusable
350 | and of course,
351 | more testable,
352 | than if you didn't do any testing.
353 | 
354 | Growing a useful test suite
355 | ---------------------------
356 | 
357 | More tests are always better than less,
358 | and your code should have as many tests as you are willing to write.
359 | That being said,
360 | some tests are more useful than others.
361 | Designing a useful suite of tests is a challenge in itself,
362 | and it helps to keep the following in mind when growing tests:
363 | 
364 | 1. **Tests should run quickly**: testing is meant to be done as often as possible.
365 |    Your entire test suite should complete in no more than a few seconds,
366 |    otherwise you won't run your tests often enough for them to be useful.
367 |    Always test your functions or algorithms on very small and simple data;
368 |    even if in practice they will be dealing with more complex and large datasets.
369 | 
370 | 2. **Tests should be focused**: each test should exercise a small part of your code.
371 |    When a test fails,
372 |    it should be easy for you to
373 |    figure out which part of your program you need to focus debugging efforts on.
374 |    This can be difficult if your code isn't modular,
375 |    i.e., if different parts of your code depend heavily on each other.
376 |    This is one of the reasons TDD is said to produce more modular code.
377 |    
378 | 3. **Tests should cover all possible code paths**: if your function has multiple code paths
379 |    (e.g., an if-else statement),
380 |    write tests that execute both the "if" part
381 |    and the "else" part.
382 |    Otherwise, you might have bugs in your code and still have all tests pass.
383 | 
384 | 4. **Test data should include difficult and edge cases**: it's easy to
385 |    write code that only handles cases with well-defined inputs and outputs.
386 |    In practice however, your code may have to deal with
387 |    input data for which it isn't clear what the behaviour should be.
388 |    For example, what should ``flip_string('')`` return?
389 |    Make sure you write tests for such cases,
390 |    so that you force your code to handle them.
391 | 


--------------------------------------------------------------------------------
/docs/usability.rst:
--------------------------------------------------------------------------------
  1 | Improving the usability of Python programs
  2 | ==========================================
  3 | 
  4 | Logging
  5 | -------
  6 | 
  7 | It can be useful  to print out either a message or the  value of some variable,
  8 | etc.,  while  your  code is  running.  This  is  quite  common and  is  usually
  9 | accomplished with a simple call to the ``print`` function.
 10 | 
 11 | .. code-block:: python
 12 | 
 13 |     x = 1.234
 14 |     print("The value of x is {0:0.4f}.".format(x))
 15 | 
 16 | .. code-block:: none
 17 | 
 18 |     The value of x is 1.2340.
 19 | 
 20 | Doing this is a  good idea to keep track of milestones in  your code. That way,
 21 | both when you  are developing your code  but also when other  users are running
 22 | the code, they can be notified of an event, progress, or value.
 23 | 
 24 | Printing a message is also useful for  notifying the user when something is not
 25 | going as expected. These are all different *levels* of messaging.
 26 | 
 27 | *Logging* is  simply engaging in this  behavior of printing out  messages, with
 28 | the added  feature that you include  meta data (e.g., a  timestamp, the message
 29 | category) with the message, as well as a filter where only messages with a high
 30 | enough level of criticality are actually allowed to be printed.
 31 | 
 32 | 
 33 | Logging Basics
 34 | ~~~~~~~~~~~~~~
 35 | 
 36 | 
 37 | The general  idea is  that there are  multiple levels of  messages that  can be
 38 | printed. Typically these include:
 39 | 
 40 | 1. DEBUG    - diagnostic purposes.
 41 | 2. INFO     - basic information (most common).
 42 | 3. WARNING  - indicating non-normal behavior.
 43 | 4. ERROR    - error (the operation cannot continue).
 44 | 5. CRITICAL - error (the program cannot continue).
 45 | 
 46 | During the initialization portion of your  code, you would configure a *logger*
 47 | object with a  format, where to print messages (e.g.,  console, file, or both),
 48 | and what level to use by default.  Usually, you would set the default log level
 49 | to ``INFO`` and the debugging messages  used for diagnostics would not actually
 50 | be printed. Then, allow the user to override this with a `command line argument
 51 | <#command-line-arguments>`_ (e.g., ``--debug``).
 52 | 
 53 | 
 54 | Example Setup
 55 | ~~~~~~~~~~~~~
 56 | 
 57 | Python has a `logging <https://docs.python.org/3/library/logging.html>`_ module
 58 | as part of the  standard library. It is very comprehensive  and allows the user
 59 | to heavily customize many parts of the behavior. It is pretty strait forward to 
 60 | implement your own logging functionality; unless you're doing something special 
 61 | why not use the standard library?
 62 | 
 63 | .. code-block:: python
 64 | 
 65 |     import logging
 66 | 
 67 |     log = logging.getLogger("ProjectName")
 68 | 
 69 |     file_handler = logging.FileHandler("path/for/output.log")
 70 |     console_handler = logging.StreamHandler()
 71 | 
 72 |     formatter = logging.Formatter("%(levelname)s %(asctime)s %(name)s - %(message)s")
 73 |     file_handler.setFormatter(formatter)
 74 |     console_handler.setFormatter(formatter)
 75 | 
 76 |     log.addHandler(file_handler)
 77 |     log.addHandler(console_handler)
 78 |     log.setLevel(logging.INFO)
 79 | 
 80 | Then, somewhere in the code:
 81 | 
 82 | .. code-block:: python
 83 | 
 84 |     log.debug("report on some variable")
 85 |     log.info("notification of milestone")
 86 |     log.warn("non-standard behavior")
 87 |     log.error("unrecoverable issue")
 88 |     log.critical("panic!")
 89 | 
 90 | .. code-block:: none
 91 | 
 92 |     INFO 2018-07-24 09:41:56,683 ProjectName - notification of milestone
 93 |     WARNING 2018-07-24 09:41:56,835 ProjectName - non-standard behavior
 94 |     ERROR 2018-07-24 09:41:57,103 ProjectName - unrecoverable issue
 95 |     CRITICAL 2018-07-24 09:41:57,103 ProjectName - panic!
 96 | 
 97 | Notice that the debug  message was not printed. This is because  we set the log
 98 | level to  ``INFO``. Only  messages with  a level  equal to  or higher  then the
 99 | assigned level will make it passed the filter.
100 | 
101 | 
102 | Logging with Color
103 | ~~~~~~~~~~~~~~~~~~
104 | 
105 | Finally, another common feature  of logging is to add color  as an indicator of
106 | the message type. Obviously, this only  applies to messages that are printed to
107 | the console.  If you've ever started  up a *Jupyter* notebook  server you might
108 | have noticed the logging messages it puts out a similar format as used here and
109 | the meta data is a bold color. The color codes are generally as follows:
110 | 
111 | - DEBUG (blue)
112 | - INFO (green)
113 | - WARNING (orange or yellow)
114 | - ERROR (red)
115 | - CRITICAL (purple)
116 | 
117 | 
118 | Command Line Arguments
119 | ----------------------
120 | 
121 | In addition to  packaging your code in  a way that other users  or projects can
122 | import for use in their code, often it makes sense to also make elements of the
123 | code  executable from  the  command line  as stand  alone  scripts. Python  has
124 | everything you need to do this built right in.
125 | 
126 | As   with  logging,   there  are   several  python   packages  available   that
127 | handle   command  line   argument   parsing  for   you,   including  a   robust
128 | implementation   provided   right  in   the   standard   library  -   `argparse
129 | <https://docs.python.org/3/library/argparse.html>`_.
130 | 
131 | The *argparse*  module, as  well as  the others, rely  on a  universally excepted
132 | convention for how  command line arguments should be structured.  Nearly all of
133 | the  standard  utilities on  Unix/Linux  systems  use  this same  syntax.  This
134 | convention covers both the command line argument syntax as well as the structure
135 | of *usage*  statements that your  script prints  out (e.g., when  supplying the
136 | ``--help`` option).  The *argparse* module actually  takes care of all  of this
137 | for you.
138 | 
139 | Unix Convention
140 | ~~~~~~~~~~~~~~~
141 | 
142 | There is  a fair bit  of complexity to  the convention surrounding  the *usage*
143 | statements, but the argument syntax is fairly simple.
144 | 
145 | *Positional arguments* are those that don't  have names. These are usually file
146 | paths in the  context of analysis scripts. *Optional arguments*  are those that
147 | have defaults and may or may not accept a value.
148 | 
149 | Optional arguments  can be  specified with  *short form*  or *long  form* names
150 | (usually both). The short  form names are a single letter  preceded by a single
151 | dash (e.g.,  ``-a``). Short  form options  that don't take  an argument  can be
152 | stacked (e.g., ``-abc``).  Long form arguments are whole words  and preceded by
153 | two dashes (e.g., ``--debug``). Long form arguments that are multiple words are
154 | usually joined with dashes (e.g., ``--output-directory``).
155 | 
156 | There is more, but these are the basics.
157 | 
158 | Simple Example
159 | ~~~~~~~~~~~~~~
160 | 
161 | The best (most robust and cross-platform) way of providing a stand along script
162 | with your package is to let your `setup.py` file handle it. Doing the following
163 | will create the proper  executable on both Windows and Unix  systems and put it
164 | in a place that is readily callable (i.e., on the user's `PATH`).
165 | 
166 | .. code-block:: python
167 | 
168 |     # setup.py
169 | 
170 |     # use "entry_points" to point to function and setuptools
171 |     # will create executables on your behalf.
172 |     setup(
173 |     # ...
174 |         # syntax: "{name}={package}.{module}:{function}"
175 |         # "{name}" will be on your PATH in the same "/bin/"
176 |         # alongside python/pip executables.
177 |         entry_points = {"console_scripts": [
178 |             "do_science=my_package.do_science:main",
179 |         ]},
180 |     # ...
181 |     )
182 | 
183 | This says  that I  have a file,  ``my_package/do_science.py``, with  a function
184 | called ``main`` that  when called does the  thing I want the script  to do. The
185 | function  won't be  given any  arguments,  but we  can  get what  we need  from
186 | ``sys.argv``. This has the effect of  creating an executable we can invoke with
187 | the name ``do_science`` that behaves equivalent to the following.
188 | 
189 | .. code-block:: python
190 | 
191 |     import sys
192 |     from my_package.do_science import main
193 |     sys.exit(main())
194 | 
195 | With this  in mind, your  function can and  should return integer  values which
196 | will  be  used  as the  exit  status  of  the  command. This  is  another  Unix
197 | convention;  returning  zero  is  for  success,  returning  a  non-zero  status
198 | indicates some specific error has occurred.
199 | 
200 | The following shows a basic usage of ``argparse`` and how to define your "main"
201 | function.
202 | 
203 | .. code-block:: python
204 | 
205 |     # do_science.py
206 |     # script for doing cool science things
207 | 
208 |     import argparse
209 | 
210 |     parser = argparse.ArgumentParser(prog="do_science",
211 |                                      description="do cool science thing")
212 | 
213 |     # positional argument
214 |     parser.add_argument("input_file", help="path to input data file")
215 | 
216 |     # optional argument
217 |     parser.add_argument("-d", "--debug", action="store_true",
218 |                         help="enable debugging messages")
219 | 
220 |     def main() -> int:
221 |         """Main entry point for `do_science`.
222 | 
223 |            Returns:
224 |            exit_status: int
225 |                0 if success, non-zero otherwise.
226 |         """
227 | 
228 |         # parse_args() automatically grabs sys.argv if you don't provide them.
229 |         opts = parser.parse_args()
230 |         # opts is a namespace
231 |         # opts.input_file is a string with the value from the command line
232 |         # opts.debug is True or False (default is False w/ "store_true")
233 |         return 0
234 | 
235 | 
236 | After the package is installed, ``pip install my_package ...``, you'll be able to
237 | call the script:
238 | 
239 | .. code-block:: none
240 | 
241 |     > do_science 
242 |     usage: do_science [-h] [-d] input_file
243 | 
244 | .. code-block:: none
245 | 
246 |     > do_science --help
247 |     usage: do_science [-h] [-d] input_file
248 |     
249 |     do cool science thing
250 | 
251 |     positional arguments:
252 |       input_file   path to input data file
253 |     
254 |     optional arguments:
255 |       -h, --help   show this help message and exit
256 |       -d, --debug  enable debugging messages
257 | 
258 | 
259 | 


--------------------------------------------------------------------------------
/scripts/make-triangles.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This script generates a plot of
 3 | some triangles.
 4 | """
 5 | 
 6 | import matplotlib.pyplot as plt
 7 | 
 8 | def plot_triangle(points, ax=None):
 9 |     if ax is None:
10 |         ax = plt.gca()
11 |     else:
12 |         fig, ax = plt.subplots()
13 |         ax.set_xlabel('x')
14 |         ax.set_ylabel('y')
15 | 
16 |     patch = plt.Polygon(points)
17 |     ax.add_patch(patch)
18 | 
19 |     for pt in points:
20 |         x, y = pt
21 |         ax.text(x, y, '({}, {})'.format(x, y))
22 | 
23 | if __name__ == "__main__":
24 | 
25 |     plot_triangle([
26 |         (0.2, 0.2),
27 |         (0.2, 0.6),
28 |         (0.4, 0.4)
29 |     ])
30 | 
31 |     plot_triangle([
32 |         (0.6, 0.8),
33 |         (0.8, 0.8),
34 |         (0.5, 0.5)
35 |     ])
36 | 
37 |     plot_triangle([
38 |         (0.6, 0.1),
39 |         (0.7, 0.3),
40 |         (0.9, 0.2)
41 |     ])
42 | 
43 |     plt.savefig('../docs/images/triangle.png', transparent=True)
44 | 


--------------------------------------------------------------------------------