├── .github
    └── workflows
    │   └── sphinx.yml
├── .gitignore
├── LICENSE
├── Makefile
├── content
    ├── a_list.dot
    ├── a_list.svg
    ├── binder.rst
    ├── conf.py
    ├── data-formats.rst
    ├── dependencies.rst
    ├── exercises.md
    ├── format_comparison_array.csv
    ├── format_comparison_tidy.csv
    ├── guide.rst
    ├── img
    │   ├── binder
    │   │   ├── binder.jpg
    │   │   └── python_unmasked.jpg
    │   ├── installation
    │   │   ├── anaconda-navigator-jupyterlab.png
    │   │   ├── anaconda-prompt.png
    │   │   ├── jupyterlab-notebook.png
    │   │   └── jupyterlab-terminal.png
    │   ├── jupyter
    │   │   ├── main-ui.png
    │   │   └── notebook-ui.png
    │   ├── numpy-advanced
    │   │   ├── 01_memory_layout.svg
    │   │   └── 02_views.svg
    │   ├── pandas
    │   │   ├── 01_table_dataframe.svg
    │   │   └── tidy_data.png
    │   └── xarray
    │   │   ├── xarray_1d_plot.png
    │   │   ├── xarray_2d_plot.png
    │   │   ├── xarray_dataset_image.png
    │   │   └── xarray_hist.png
    ├── index.rst
    ├── installation.rst
    ├── jupyter.ipynb
    ├── libraries.rst
    ├── ndarray.dot
    ├── ndarray.svg
    ├── numpy-advanced.rst
    ├── numpy.rst
    ├── packaging-example-project
    │   ├── calculator
    │   │   ├── __init__.py
    │   │   ├── adding.py
    │   │   ├── integrating.py
    │   │   └── subtracting.py
    │   ├── pyproject.toml
    │   ├── test.py
    │   └── test_editable.py
    ├── packaging.rst
    ├── pandas.rst
    ├── parallel-pi-multiprocessing.ipynb
    ├── parallel.rst
    ├── plotting-matplotlib.md
    ├── plotting-matplotlib
    │   ├── customizing
    │   │   ├── gapminder-larger-font.png
    │   │   ├── gapminder-linear.png
    │   │   └── gapminder-log.png
    │   └── first-plot
    │   │   ├── exercise.png
    │   │   └── getting-started.png
    ├── plotting-vega-altair.md
    ├── plotting-vega-altair
    │   ├── precipitation-on-top-yearmonth.svg
    │   ├── precipitation-on-top.svg
    │   ├── precipitation-side.svg
    │   ├── precipitation-stacked-x.svg
    │   ├── precipitation-stacked-y.svg
    │   ├── snow-depth-circles.svg
    │   ├── snow-depth-color.svg
    │   ├── snow-depth-plasma.svg
    │   ├── snow-depth.svg
    │   ├── temperature-ranges-combined.svg
    │   └── temperature-ranges-side.svg
    ├── productivity.md
    ├── productivity
    │   ├── chatgpt.png
    │   └── code-completion.gif
    ├── profiling.md
    ├── profiling
    │   ├── exercise.png
    │   └── exercise.py
    ├── python.rst
    ├── quick-reference.rst
    ├── scipy.rst
    ├── scripts.rst
    ├── web-apis.ipynb
    ├── work-with-data.rst
    └── xarray.rst
├── extras
    ├── data-formats-comparison-array.ipynb
    └── data-formats-comparison-tidy.ipynb
├── make.bat
├── requirements.txt
├── resources
    ├── code
    │   └── scripts
    │   │   ├── __pycache__
    │   │       ├── optionsparser.cpython-38.pyc
    │   │       ├── weather_functions.cpython-38.pyc
    │   │       └── weather_functions_config.cpython-38.pyc
    │   │   ├── optionsparser.py
    │   │   ├── out.png
    │   │   ├── rain_in_cairo.png
    │   │   ├── weather.png
    │   │   ├── weather_functions.py
    │   │   ├── weather_functions_config.py
    │   │   ├── weather_observations.ipynb
    │   │   ├── weather_observations.py
    │   │   ├── weather_observations_argparse.py
    │   │   ├── weather_observations_config.py
    │   │   └── weather_options.yml
    ├── data
    │   ├── laureate.csv
    │   ├── plotting
    │   │   ├── README.md
    │   │   ├── exercise-2.csv
    │   │   ├── oslo-daily.csv
    │   │   ├── oslo-monthly.csv
    │   │   ├── tromso-daily.csv
    │   │   └── tromso-monthly.csv
    │   └── scripts
    │   │   ├── weather_cairo.csv
    │   │   └── weather_tapiola.csv
    └── notebooks
    │   ├── plotting-exercise-2.ipynb
    │   └── plotting.ipynb
└── software
    └── environment.yml


/.github/workflows/sphinx.yml:
--------------------------------------------------------------------------------
  1 | # Deploy Sphinx.  This could be shorter, but we also do some extra
  2 | # stuff.
  3 | #
  4 | # License: CC-0.  This is the canonical location of this file, which
  5 | # you may want to link to anyway:
  6 | #   https://github.com/coderefinery/sphinx-lesson-template/blob/main/.github/workflows/sphinx.yml
  7 | #  https://raw.githubusercontent.com/coderefinery/sphinx-lesson-template/main/.github/workflows/sphinx.yml
  8 | 
  9 | 
 10 | name: sphinx
 11 | on: [push, pull_request]
 12 | 
 13 | env:
 14 |   DEFAULT_BRANCH: "master"
 15 |   # If these SPHINXOPTS are enabled, then be strict about the
 16 |   # builds and fail on any warnings.
 17 |   #SPHINXOPTS: "-W --keep-going -T"
 18 |   GENERATE_PDF: true          # to enable, must be 'true' lowercase
 19 |   GENERATE_SINGLEHTML: true   # to enable, must be 'true' lowercase
 20 |   PDF_FILENAME: lesson.pdf
 21 |   MULTIBRANCH: true    # to enable, must be 'true' lowercase
 22 | 
 23 | 
 24 | jobs:
 25 |   build:
 26 |     name: Build
 27 |     runs-on: ubuntu-latest
 28 |     permissions:
 29 |       contents: read
 30 | 
 31 |     steps:
 32 |       # https://github.com/marketplace/actions/checkout
 33 |       - uses: actions/checkout@v4
 34 |         with:
 35 |           fetch-depth: 0
 36 |           lfs: true
 37 | 
 38 |       # https://github.com/marketplace/actions/setup-python
 39 |       # ^-- This gives info on matrix testing.
 40 |       - name: Install Python
 41 |         uses: actions/setup-python@v4
 42 |         with:
 43 |           python-version: '3.11'
 44 |           cache: 'pip'
 45 | 
 46 |       # https://docs.github.com/en/actions/guides/building-and-testing-python#installing-dependencies
 47 |       # ^-- This gives info on installing dependencies with pip
 48 |       - name: Install dependencies
 49 |         run: |
 50 |           python -m pip install --upgrade pip
 51 |           pip install -r requirements.txt
 52 | 
 53 |         # Debug
 54 |       - name: Debugging information
 55 |         env:
 56 |           ref: ${{github.ref}}
 57 |           event_name: ${{github.event_name}}
 58 |           head_ref: ${{github.head_ref}}
 59 |           base_ref: ${{github.base_ref}}
 60 |         run: |
 61 |           echo "github.ref: ${ref}"
 62 |           echo "github.event_name: ${event_name}"
 63 |           echo "github.head_ref: ${head_ref}"
 64 |           echo "github.base_ref: ${base_ref}"
 65 |           echo "GENERATE_PDF: ${GENERATE_PDF}"
 66 |           echo "GENERATE_SINGLEHTML: ${GENERATE_SINGLEHTML}"
 67 |           set -x
 68 |           git rev-parse --abbrev-ref HEAD
 69 |           git branch
 70 |           git branch -a
 71 |           git remote -v
 72 |           python -V
 73 |           pip list --not-required
 74 |           pip list
 75 | 
 76 | 
 77 |       # Build
 78 |       - uses: ammaraskar/sphinx-problem-matcher@master
 79 |       - name: Build Sphinx docs (dirhtml)
 80 |         # SPHINXOPTS used via environment variables
 81 |         run: |
 82 |           make dirhtml
 83 |           # This fixes broken copy button icons, as explained in
 84 |           #   https://github.com/coderefinery/sphinx-lesson/issues/50
 85 |           #   https://github.com/executablebooks/sphinx-copybutton/issues/110
 86 |           # This can be removed once these PRs are accepted (but the
 87 |           # fixes also need to propagate to other themes):
 88 |           #   https://github.com/sphinx-doc/sphinx/pull/8524
 89 |           #   https://github.com/readthedocs/sphinx_rtd_theme/pull/1025
 90 |           sed -i 's/url_root="#"/url_root=""/' _build/dirhtml/index.html || true
 91 | 
 92 |       # singlehtml
 93 |       - name: Generate singlehtml
 94 |         if: ${{ env.GENERATE_SINGLEHTML == 'true' }}
 95 |         run: |
 96 |           make singlehtml
 97 |           mv _build/singlehtml/ _build/dirhtml/singlehtml/
 98 | 
 99 |       # PDF if requested
100 |       - name: Generate PDF
101 |         if: ${{ env.GENERATE_PDF == 'true' }}
102 |         run: |
103 |           pip install https://github.com/rkdarst/sphinx_pyppeteer_builder/archive/refs/heads/main.zip
104 |           make pyppeteer
105 |           mv _build/pyppeteer/*.pdf _build/dirhtml/${PDF_FILENAME}
106 | 
107 |       # Stage all deployed assets in _gh-pages/ for simplicity, and to
108 |       # prepare to do a multi-branch deployment.
109 |       - name: Copy deployment data to _gh-pages/
110 |         if: ${{ github.event_name == 'push' }}
111 |         run:
112 |           rsync -a _build/dirhtml/ _gh-pages/
113 | 
114 |       # Use gh-pages-multibranch to multiplex different branches into
115 |       # one deployment. See
116 |       # https://github.com/coderefinery/gh-pages-multibranch
117 |       - name: gh-pages multibranch
118 |         uses: coderefinery/gh-pages-multibranch@main
119 |         if: ${{ github.event_name == 'push' && env.MULTIBRANCH == 'true' }}
120 |         with:
121 |           directory: _gh-pages/
122 |           default_branch: ${{ env.DEFAULT_BRANCH }}
123 |           publish_branch: gh-pages
124 | 
125 |       # Add the .nojekyll file
126 |       - name: nojekyll
127 |         if: ${{ github.event_name == 'push' }}
128 |         run: |
129 |           touch _gh-pages/.nojekyll
130 | 
131 |       # Save artifact for the next step.
132 |       - uses: actions/upload-artifact@v4
133 |         if: ${{ github.event_name == 'push' }}
134 |         with:
135 |           name: gh-pages-build
136 |           path: _gh-pages/
137 | 
138 |   # Deploy in a separate job so that write permissions are restricted
139 |   # to the minimum steps.
140 |   deploy:
141 |     name: Deploy
142 |     runs-on: ubuntu-latest
143 |     needs: build
144 |     # This if can't use the env context - find better way later.
145 |     if: ${{ github.event_name == 'push' }}
146 |     permissions:
147 |       contents: write
148 | 
149 |     steps:
150 |       - uses: actions/download-artifact@v4
151 |         if: ${{ github.event_name == 'push' && ( env.MULTIBRANCH == 'true' || github.ref == format('refs/heads/{0}', env.DEFAULT_BRANCH )) }}
152 |         with:
153 |           name: gh-pages-build
154 |           path: _gh-pages/
155 | 
156 |       # As of 2023, we could publish to pages via a Deployment.  This
157 |       # isn't done yet to give it time to stabilize (out of beta), and
158 |       # also having a gh-pages branch to check out is rather
159 |       # convenient.
160 | 
161 |       # Deploy
162 |       # https://github.com/peaceiris/actions-gh-pages
163 |       - name: Deploy
164 |         uses: peaceiris/actions-gh-pages@v3
165 |         if: ${{ github.event_name == 'push' && ( env.MULTIBRANCH == 'true' || github.ref == format('refs/heads/{0}', env.DEFAULT_BRANCH )) }}
166 |         with:
167 |           publish_branch: gh-pages
168 |           github_token: ${{ secrets.GITHUB_TOKEN }}
169 |           publish_dir: _gh-pages/
170 |           force_orphan: true
171 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /_build
2 | /venv
3 | .ipynb_checkpoints/
4 | .vscode
5 | catfacts.jsonl
6 | jupyter_execute/
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Creative Commons Attribution 4.0


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = content
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | # Live reload site documents for local development
23 | livehtml:
24 | 	sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
25 | 


--------------------------------------------------------------------------------
/content/a_list.dot:
--------------------------------------------------------------------------------
 1 | strict digraph a_list {
 2 |         graph [compound=true];
 3 |         
 4 |         node [style = filled, color=cyan];
 5 | 
 6 |         a_list [label="Variable a_list (lvalue)", color=gold];
 7 |         aobj [label="PyObject a_list"];
 8 |         one [label="PyObject 1"];
 9 |         hello [label="PyObject hello"];
10 |         oneptwo [label="PyObject 1.2"];
11 | 
12 |         a_list -> aobj;
13 | 
14 | 
15 | 
16 |         subgraph cluster_adata {
17 |                 label = "Data array for a_list PyObject";
18 |                 color = aquamarine;
19 |                 style = filled;
20 |                 
21 |                 adata_0 [label="element [0]"];
22 |                 adata_1 [label="element [1]"];
23 |                 adata_2 [label="element [2]"];
24 |         }
25 | 
26 |         adata_0 -> one;
27 |         adata_1 -> hello;
28 |         adata_2 -> oneptwo;
29 |         
30 | 
31 |         aobj -> adata_1 [lhead=cluster_adata];
32 |         
33 | }
34 | 


--------------------------------------------------------------------------------
/content/a_list.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 4 | <!-- Generated by graphviz version 2.38.0 (20140413.2041)
 5 |  -->
 6 | <!-- Title: a_list Pages: 1 -->
 7 | <svg width="394pt" height="263pt"
 8 |  viewBox="0.00 0.00 394.19 263.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 259)">
10 | <title>a_list</title>
11 | <polygon fill="white" stroke="none" points="-4,4 -4,-259 390.195,-259 390.195,4 -4,4"/>
12 | <g id="clust1" class="cluster"><title>cluster_adata</title>
13 | <polygon fill="aquamarine" stroke="aquamarine" points="8,-64 8,-139 372,-139 372,-64 8,-64"/>
14 | <text text-anchor="middle" x="190" y="-123.8" font-family="Times,serif" font-size="14.00">Data array for a_list PyObject</text>
15 | </g>
16 | <!-- a_list -->
17 | <g id="node1" class="node"><title>a_list</title>
18 | <ellipse fill="gold" stroke="gold" cx="190" cy="-237" rx="92.0841" ry="18"/>
19 | <text text-anchor="middle" x="190" y="-233.3" font-family="Times,serif" font-size="14.00">Variable a_list (lvalue)</text>
20 | </g>
21 | <!-- aobj -->
22 | <g id="node2" class="node"><title>aobj</title>
23 | <ellipse fill="cyan" stroke="cyan" cx="190" cy="-165" rx="64.9885" ry="18"/>
24 | <text text-anchor="middle" x="190" y="-161.3" font-family="Times,serif" font-size="14.00">PyObject a_list</text>
25 | </g>
26 | <!-- a_list&#45;&gt;aobj -->
27 | <g id="edge1" class="edge"><title>a_list&#45;&gt;aobj</title>
28 | <path fill="none" stroke="black" d="M190,-218.697C190,-210.983 190,-201.712 190,-193.112"/>
29 | <polygon fill="black" stroke="black" points="193.5,-193.104 190,-183.104 186.5,-193.104 193.5,-193.104"/>
30 | </g>
31 | <!-- adata_1 -->
32 | <g id="node7" class="node"><title>adata_1</title>
33 | <ellipse fill="cyan" stroke="cyan" cx="190" cy="-90" rx="51.9908" ry="18"/>
34 | <text text-anchor="middle" x="190" y="-86.3" font-family="Times,serif" font-size="14.00">element [1]</text>
35 | </g>
36 | <!-- aobj&#45;&gt;adata_1 -->
37 | <g id="edge5" class="edge"><title>aobj&#45;&gt;adata_1</title>
38 | <path fill="none" stroke="black" d="M190,-146.7C190,-146.543 190,-146.386 190,-146.227"/>
39 | <polygon fill="black" stroke="black" points="193.5,-148.735 190,-138.735 186.5,-148.735 193.5,-148.735"/>
40 | </g>
41 | <!-- one -->
42 | <g id="node3" class="node"><title>one</title>
43 | <ellipse fill="cyan" stroke="cyan" cx="58" cy="-18" rx="50.0912" ry="18"/>
44 | <text text-anchor="middle" x="58" y="-14.3" font-family="Times,serif" font-size="14.00">PyObject 1</text>
45 | </g>
46 | <!-- hello -->
47 | <g id="node4" class="node"><title>hello</title>
48 | <ellipse fill="cyan" stroke="cyan" cx="190" cy="-18" rx="63.8893" ry="18"/>
49 | <text text-anchor="middle" x="190" y="-14.3" font-family="Times,serif" font-size="14.00">PyObject hello</text>
50 | </g>
51 | <!-- oneptwo -->
52 | <g id="node5" class="node"><title>oneptwo</title>
53 | <ellipse fill="cyan" stroke="cyan" cx="329" cy="-18" rx="57.3905" ry="18"/>
54 | <text text-anchor="middle" x="329" y="-14.3" font-family="Times,serif" font-size="14.00">PyObject 1.2</text>
55 | </g>
56 | <!-- adata_0 -->
57 | <g id="node6" class="node"><title>adata_0</title>
58 | <ellipse fill="cyan" stroke="cyan" cx="68" cy="-90" rx="51.9908" ry="18"/>
59 | <text text-anchor="middle" x="68" y="-86.3" font-family="Times,serif" font-size="14.00">element [0]</text>
60 | </g>
61 | <!-- adata_0&#45;&gt;one -->
62 | <g id="edge2" class="edge"><title>adata_0&#45;&gt;one</title>
63 | <path fill="none" stroke="black" d="M65.5281,-71.6966C64.4261,-63.9827 63.1018,-54.7125 61.8732,-46.1124"/>
64 | <polygon fill="black" stroke="black" points="65.3226,-45.5088 60.4435,-36.1043 58.393,-46.4988 65.3226,-45.5088"/>
65 | </g>
66 | <!-- adata_1&#45;&gt;hello -->
67 | <g id="edge3" class="edge"><title>adata_1&#45;&gt;hello</title>
68 | <path fill="none" stroke="black" d="M190,-71.6966C190,-63.9827 190,-54.7125 190,-46.1124"/>
69 | <polygon fill="black" stroke="black" points="193.5,-46.1043 190,-36.1043 186.5,-46.1044 193.5,-46.1043"/>
70 | </g>
71 | <!-- adata_2 -->
72 | <g id="node8" class="node"><title>adata_2</title>
73 | <ellipse fill="cyan" stroke="cyan" cx="312" cy="-90" rx="51.9908" ry="18"/>
74 | <text text-anchor="middle" x="312" y="-86.3" font-family="Times,serif" font-size="14.00">element [2]</text>
75 | </g>
76 | <!-- adata_2&#45;&gt;oneptwo -->
77 | <g id="edge4" class="edge"><title>adata_2&#45;&gt;oneptwo</title>
78 | <path fill="none" stroke="black" d="M316.115,-72.055C318.008,-64.2609 320.3,-54.8219 322.424,-46.0789"/>
79 | <polygon fill="black" stroke="black" points="325.846,-46.8189 324.805,-36.2753 319.043,-45.1668 325.846,-46.8189"/>
80 | </g>
81 | </g>
82 | </svg>
83 | 


--------------------------------------------------------------------------------
/content/binder.rst:
--------------------------------------------------------------------------------
  1 | Binder
  2 | ======
  3 | 
  4 | .. questions::
  5 | 
  6 |    - Why sharing code alone may not be sufficient.
  7 |    - How to share a computational environment?
  8 |    - What is Binder?
  9 |    - How to binderize my Python repository?
 10 |    - How to publish my Python repository?
 11 | 
 12 | .. objectives::
 13 | 
 14 |    - Learn about reproducible computational environments.
 15 |    - Learn to create and share custom computing environments with Binder.
 16 |    - Learn to get a DOI from Zenodo for a repository.
 17 | 
 18 | 
 19 | Why is it sometimes not enough to share your code?
 20 | --------------------------------------------------
 21 | 
 22 | .. image:: img/binder/python_unmasked.jpg
 23 | 
 24 | 
 25 | Exercise 1
 26 | ~~~~~~~~~~
 27 | 
 28 | .. challenge:: Binder-1: Discuss better strategies than only code sharing (10 min)
 29 | 
 30 |    Lea is a PhD student in computational biology and after 2 years of intensive
 31 |    work, she is finally ready to publish her first paper. The code she has used
 32 |    for analyzing her data is available on GitHub but her supervisor who is an
 33 |    advocate of open science told her that sharing code is not sufficient.
 34 | 
 35 |    **Why is it possibly not enough to share "just" your code?
 36 |    What problems can you anticipate 2-5 years from now?**
 37 | 
 38 |    We form small groups (4-5 persons) and discuss in groups. If the workshop is
 39 |    online, each group will join a breakout room.
 40 |    If joining a group is not possible or practical, we use the shared document
 41 |    to discuss this collaboratively.
 42 | 
 43 |    Each group write a summary (bullet points) of the discussion in the workshop
 44 |    shared document (the link will be provided by your instructors).
 45 | 
 46 | 
 47 | Sharing a computing environment with Binder
 48 | -------------------------------------------
 49 | 
 50 | `Binder <https://mybinder.readthedocs.io/en/latest/>`__ allows you to create
 51 | custom computing environments that can be shared and used by many remote users.
 52 | It uses  `repo2docker <https://repo2docker.readthedocs.io/en/latest/>`__  to
 53 | create a container image (`docker <https://www.docker.com/>`__ image) of a
 54 | project using information contained in included configuration files.
 55 | 
 56 | Repo2docker is a standalone package that you can install locally on your laptop
 57 | but an `online Binder <https://mybinder.org/>`__ service is freely available.
 58 | This is what we will be using in the tutorial.
 59 | 
 60 | The main objective of this exercise is to learn to fork a repository and add a
 61 | requirement file to share the computational environment with Binder.
 62 | 
 63 | .. image:: https://opendreamkit.org/public/images/use-cases/reproducible_logbook.png
 64 | 
 65 | Credit: `Juliette Taka, Logilab and the OpenDreamKit project (2017) <https://opendreamkit.org/2017/11/02/use-case-publishing-reproducible-notebooks/>`_
 66 | 
 67 | 
 68 | Binder exercise/demo
 69 | ~~~~~~~~~~~~~~~~~~~~
 70 | 
 71 | In an earlier episode (Data visualization with Matplotlib) we have created this notebook:
 72 | 
 73 | .. code-block:: python
 74 | 
 75 |    import pandas as pd
 76 |    import matplotlib.pyplot as plt
 77 | 
 78 |    url = "https://raw.githubusercontent.com/plotly/datasets/master/gapminder_with_codes.csv"
 79 |    data = pd.read_csv(url)
 80 |    data_2007 = data[data["year"] == 2007]
 81 | 
 82 |    fig, ax = plt.subplots()
 83 | 
 84 |    ax.scatter(x=data_2007["gdpPercap"], y=data_2007["lifeExp"], alpha=0.5)
 85 | 
 86 |    ax.set_xscale("log")
 87 | 
 88 |    ax.set_xlabel("GDP (USD) per capita")
 89 |    ax.set_ylabel("life expectancy (years)")
 90 | 
 91 | We will now first share it via `GitHub <https://github.com/>`__ "statically",
 92 | then using `Binder <https://mybinder.org/>`__.
 93 | 
 94 | .. challenge:: Binder-2: Exercise/demo: Make your notebooks reproducible by anyone (15 min)
 95 | 
 96 |    Instructor demonstrates this.  **This exercise (and all following)
 97 |    requires git/GitHub knowledge and accounts, which wasn't a
 98 |    prerequisite of this course.  Thus, this is a demo (and might even
 99 |    be too fast for you to type-along).  Watch the video if you
100 |    are reading this later on**:
101 | 
102 |    - Creates a GitHub repository
103 |    - Uploads the notebook file
104 |    - Then we look at the statically rendered version of the notebook on GitHub
105 |    - Create a ``requirements.txt`` file which contains:
106 | 
107 |      .. code-block:: none
108 | 
109 |        pandas==1.2.3
110 |        matplotlib==3.4.2
111 | 
112 |    - Commit and push also this file to your notebook repository.
113 |    - Visit https://mybinder.org and copy paste the code under "Copy the text below ..." into your `README.md`:
114 | 
115 |      .. image:: img/binder/binder.jpg
116 | 
117 |    - Check that your notebook repository now has a "launch binder"
118 |      badge in your `README.md` file on GitHub.
119 |    - Try clicking the button and see how your repository is launched
120 |      on Binder (can take a minute or two). Your notebooks can now be explored and executed in the cloud.
121 |    - Enjoy being fully reproducible!
122 | 
123 | 
124 | How can I get a DOI from Zenodo?
125 | ---------------------------------
126 | 
127 | `Zenodo <https://about.zenodo.org/>`__ is a general purpose open-access
128 | repository built and operated by `CERN <https://home.cern/>`__ and `OpenAIRE
129 | <https://www.openaire.eu/>`__ that allows researchers to archive and get a
130 | `Digital Object Identifier (DOI) <https://www.doi.org/>`__ to data that they
131 | share.
132 | 
133 | .. challenge:: Binder-3: Link a Github repository with Zenodo (optional)
134 | 
135 |   **Everything you deposit on Zenodo is meant to be kept (long-term archive).
136 |   Therefore we recommend to practice with the Zenodo "sandbox" (practice/test area)
137 |   instead:** https://sandbox.zenodo.org
138 | 
139 |   1. **Link GitHub with Zenodo**:
140 | 
141 |     - Go to https://sandbox.zenodo.org (or to https://zenodo.org for the real upload later, after practicing).
142 |     - Log in to Zenodo with your GitHub account. Be aware that you may need to
143 |       authorize Zenodo application (Zenodo will redirect you back to GitHub for
144 |       Authorization).
145 |     - Choose the repository webhooks options.
146 |     - From the drop-down menu next to your email address at the top of the page, select GitHub.
147 |     - You will be presented with a list of all your Github repositories.
148 | 
149 |   2. **Archiving a repo**:
150 | 
151 |     - Select a repository you want to archive on Zenodo.
152 |     - Toggle the "on" button next to the repository ou need to archive.
153 |     - Click on the Repo that you want to reserve.
154 |     - Click on Create release button at the top of the page. Zenodo will redirect you back to GitHub’s repo page to generate a release.
155 | 
156 |   3. **Trigger Zenodo to Archive your repository**
157 | 
158 |     - Go to GitHub and create a release. Zenodo will automatically download a .zip-ball of each new release and register a DOI.
159 |     - If this is the first release of your code then you should give it a
160 |       version number of v1.0.0. Add description for your release then click the
161 |       Publish release button.
162 |     - Zenodo takes an archive of your GitHub repository each time you create a new Release.
163 | 
164 |   4.  **To ensure that everything is working**:
165 | 
166 |     - Go to https://zenodo.org/account/settings/github/ (or the corresponding
167 |       sandbox at https://sandbox.zenodo.org/account/settings/github/), or the
168 |       Upload page (https://zenodo.org/deposit), you will find your repo is
169 |       listed.
170 |     - Click on the repo, Zenodo will redirect you to a page that contains a DOI for your repo will the information that you added to the repo.
171 |     - You can edit the archive on Zenodo and/or publish a new version of your software.
172 |     - It is recommended that you add a description for your repo and fill in other metadata in the edit page. Instead of editing metadata
173 |       manually, you can also add a ``.zenodo.json`` or a ``CITATION.cff`` file to your repo and Zenodo will infer the metadata from this file.
174 |     - Your code is now published on a Github public repository and archived on Zenodo.
175 |     - Update the README file in your repository with the newly created zenodo badge.
176 | 
177 | 
178 | Create a Binder link for your Zenodo DOI
179 | ----------------------------------------
180 | 
181 | Rather than specifying a GitHub repository when launching binder, you can instead use a Zenodo DOI.
182 | 
183 | .. challenge:: Binder-4: Link Binder with Zenodo (10 min)
184 | 
185 |   We will be using an existing Zenodo DOI `10.5281/zenodo.3886864 <https://doi.org/10.5281/zenodo.3247652>`_ to start Binder:
186 | 
187 |     - Go to `https://mybinder.org <https://mybinder.org>`__ and fill information using Zenodo DOI (as shown on the animation below):
188 | 
189 |     .. image:: https://miro.medium.com/max/1050/1*xOABVY2hNtVmjV5-LXreFw.gif
190 | 
191 |     - You can also get a Binder badge and update the README file in the
192 |       repository. It is good practice to add both the Zenodo badge and the
193 |       corresponding Binder badge.
194 | 
195 | .. keypoints::
196 | 
197 |    - It is easy to sharing reproducible computational environments
198 |    - Binder provides a way for anyone to test and run code - without
199 |      you needing to set up a dedicated server for it.
200 |    - Zenodo provides permanent archives and a DOI.
201 | 


--------------------------------------------------------------------------------
/content/conf.py:
--------------------------------------------------------------------------------
  1 | # Configuration file for the Sphinx documentation builder.
  2 | #
  3 | # This file only contains a selection of the most common options. For a full
  4 | # list see the documentation:
  5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
  6 | 
  7 | # -- Path setup --------------------------------------------------------------
  8 | 
  9 | # If extensions (or modules to document with autodoc) are in another directory,
 10 | # add these directories to sys.path here. If the directory is relative to the
 11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 12 | #
 13 | # import os
 14 | # import sys
 15 | # sys.path.insert(0, os.path.abspath('.'))
 16 | 
 17 | 
 18 | # -- Project information -----------------------------------------------------
 19 | 
 20 | project = 'Python for Scientific Computing'
 21 | copyright = '2020-2024, The contributors'
 22 | author = 'The contributors'
 23 | github_user = 'AaltoSciComp'
 24 | github_repo_name = 'python-for-scicomp'  # auto-detected from dirname if blank
 25 | github_version = 'master/content/' # with trailing slash
 26 | 
 27 | 
 28 | # -- General configuration ---------------------------------------------------
 29 | 
 30 | highlight_language = 'python3'
 31 | 
 32 | # Add any Sphinx extension module names here, as strings. They can be
 33 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 34 | # ones.
 35 | extensions = [
 36 |     'sphinx_lesson',
 37 |     'sphinx_rtd_theme_ext_color_contrast',
 38 |     'sphinx.ext.todo',
 39 |     'sphinx.ext.intersphinx',
 40 |     'sphinx.ext.mathjax',
 41 |     'sphinx_aaltoscicomp_branding',
 42 |     'sphinxext.opengraph',
 43 |     'sphinx_thebe',
 44 | ]
 45 | myst_enable_extensions = ['colon_fence']
 46 | 
 47 | thebe_config = {
 48 |    "selector": "div.highlight"
 49 | }
 50 | 
 51 | nb_execution_mode = "off"
 52 | 
 53 | ogp_site_name = "Python for Scientific Computing"
 54 | ogp_site_url = 'https://aaltoscicomp.github.io/python-for-scicomp/'
 55 | import datetime
 56 | if datetime.date.today() < datetime.date(2022,12,15):
 57 |     ogp_image = 'https://www.aalto.fi/sites/g/files/flghsv161/files/styles/o_914w_ah_n/public/2022-11/PFSC22_v2.png'
 58 |     ogp_image_alt = 'Python for Scientific Computing course logo with date of 22-25/11/2022, twitch.tv/coderefinery, and partner logos'
 59 | 
 60 | copybutton_exclude = '.linenos, .gp'
 61 | 
 62 | import os
 63 | if (
 64 |     'GITHUB_ACTION' in os.environ
 65 |     and os.environ.get('GITHUB_REPOSITORY', '').lower() == 'aaltoscicomp/python-for-scicomp'
 66 |     and os.environ.get('GITHUB_REF') == 'refs/heads/master'
 67 |     ):
 68 |     html_js_files = [
 69 |         ('https://plausible.cs.aalto.fi/js/script.js', {"data-domain": "aaltoscicomp.github.io/python-for-scicomp", "defer": "defer"}),
 70 |     ]
 71 | 
 72 | 
 73 | # Add any paths that contain templates here, relative to this directory.
 74 | #templates_path = ['_templates']
 75 | 
 76 | # List of patterns, relative to source directory, that match files and
 77 | # directories to ignore when looking for source files.
 78 | # This pattern also affects html_static_path and html_extra_path.
 79 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'venv', 'jupyter_execute']
 80 | 
 81 | 
 82 | # -- Options for HTML output -------------------------------------------------
 83 | 
 84 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 85 | # a list of builtin themes.
 86 | #
 87 | html_theme = 'sphinx_rtd_theme'
 88 | 
 89 | # Add any paths that contain custom static files (such as style sheets) here,
 90 | # relative to this directory. They are copied after the builtin static files,
 91 | # so a file named "default.css" will overwrite the builtin "default.css".
 92 | #html_static_path = ['_static']
 93 | 
 94 | 
 95 | # HTML context:
 96 | from os.path import dirname, realpath, basename
 97 | html_context = {'display_github': True,
 98 |                 'github_user': github_user,
 99 |                 # Auto-detect directory name.  This can break, but
100 |                 # useful as a default.
101 |                 'github_repo': github_repo_name or basename(dirname(realpath(__file__))),
102 |                 'github_version': github_version,
103 |                }
104 | 
105 | 
106 | intersphinx_mapping = {
107 |     'python': ('https://docs.python.org/3', None),
108 |     'numpy': ('https://numpy.org/doc/stable', None),
109 |     'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None),
110 |     'matplotlib': ('https://matplotlib.org/stable', None),
111 |     'requests': ('https://requests.readthedocs.io/en/latest/', None),
112 |     }
113 | 


--------------------------------------------------------------------------------
/content/exercises.md:
--------------------------------------------------------------------------------
 1 | # List of exercises
 2 | 
 3 | ## Full list
 4 | 
 5 | This is a list of all exercises and solutions in this lesson, mainly
 6 | as a reference for helpers and instructors.  This list is
 7 | automatically generated from all of the other pages in the lesson.
 8 | Any single teaching event will probably cover only a subset of these,
 9 | depending on their interests.
10 | 
11 | ```{exerciselist}
12 | ```
13 | 


--------------------------------------------------------------------------------
/content/format_comparison_array.csv:
--------------------------------------------------------------------------------
1 | File format,File size [MB],Write time [ms],Read time [ms],Data matches exactly
2 | CSV,23.8,690,294,True
3 | npy,7.63,13.8,2.72,True
4 | HDF5,7.63,27,3.97,True
5 | NetCDF4,7.64,28.8,12.2,True
6 | 


--------------------------------------------------------------------------------
/content/format_comparison_tidy.csv:
--------------------------------------------------------------------------------
1 | File format,File size [MB],Write time [ms],Read time [ms],Data matches exactly
2 | CSV,4.57,360,81.2,False
3 | Feather,2.2,12.9,6.67,True
4 | Parquet,1.82,35.1,8.96,True
5 | HDF5,4.89,41.7,29.6,True
6 | NetCDF4,6.9,92.9,74.2,True
7 | 


--------------------------------------------------------------------------------
/content/guide.rst:
--------------------------------------------------------------------------------
  1 | Instructor's guide
  2 | ==================
  3 | 
  4 | Learner personas
  5 | ----------------
  6 | 
  7 | A is a early career PhD researcher who has been using Python a bit,
  8 | but is not sure what they know or don't know.  They want to be able to
  9 | do their research more efficiently and make sure that they are using
 10 | the right tools.  A may know that numpy exists, etc. and could
 11 | theoretically read some about it themselves, but aren't sure if they
 12 | are going in the right direction.
 13 | 
 14 | A2 can use numpy and pandas, but have learned little bits here and
 15 | there and hasn't had a comprehensive introduction.  They want to
 16 | ensure they are using best practices.  (Baseline of high-level
 17 | packages)
 18 | 
 19 | B is a mid-to-late undergraduate student who has used Python in some
 20 | classes.  They have possibly learned the syntax and enough to use it
 21 | in courses, but in a course-like manner where they are expected to
 22 | create everything themselves.
 23 | 
 24 | 
 25 | Prerequisites:
 26 | - Knowing basic Python syntax
 27 | - Watch the command line crash course, if you aren't familiar.
 28 | 
 29 | Not prerequisites:
 30 | - Any external libraries, e.g. numpy
 31 | - Knowing how to make scripts or use Jupyter
 32 | 
 33 | 
 34 | 
 35 | About each section
 36 | ------------------
 37 | 
 38 | In general, "Python for Scientific Computing could be a multi-year
 39 | course.  We can't even pretend to really teach even a small fraction
 40 | of it.  We can, however, introduce people to things that can very
 41 | easily be missed in the typical academic career path.
 42 | 
 43 | * **Python intro:** We can't really replace a Python tutorial, but
 44 |   here we try to outline some of the main points.  We don't go over
 45 |   this in the course.
 46 | 
 47 | * **Jupyter:** Jupyter is somewhat useful, but the main reason we go
 48 |   over it is that it provides a convenient user interface for the
 49 |   other programming lessons (it's easier to spend a bit of time with
 50 |   Jupyter than expect people to be able to use some
 51 |   editor/IDE/shell/etc).  So, we do start from the beginning, so that
 52 |   people can do the other lessons, but also try to teach some advanced
 53 |   tips and tricks.
 54 | 
 55 | * **Numpy:** The basic of much of the rest of scipy, so we need to
 56 |   cover it.  We try to get the main principles out, but if someone
 57 |   already knows it this can be a bit boring.  We try to make sure
 58 |   everyone comes out with an appreciation for vectorization and
 59 |   broadcasting.
 60 | 
 61 | * **Pandas:** A lot of similar goals to the Numpy section, especially
 62 |   the concepts behind Dataframes that one needs to know in order to
 63 |   read other documentation.
 64 | 
 65 | * **Visualization:** Matplotlib is getting a bit old, but is still the
 66 |   backbone of other plotting packages.  We try to get forth the ideas
 67 |   of the matplotlib API that can be seen in other packages and the
 68 |   importance of scripted plots.
 69 | 
 70 | * **Data formats:** Input/output/storage is a common task, and can
 71 |   easily either be a bottleneck or a huge mess.  This lessons tries to
 72 |   show some best practices with data formats and, as usual, get the
 73 |   idea to not "do it yourself".  Pandas is used as a common framework,
 74 |   but we should point out there are plenty of other options.
 75 | 
 76 | * **Scripts:** The most important lesson here is to break out of
 77 |   Jupyter/run buttons of editors.  If you can't make actual programs
 78 |   with an actual interface, you can't scale up.
 79 | 
 80 |   * This is the first lesson to introduce the command line.  We
 81 |     recommend being as simple as possible: at least demonstrate the
 82 |     JupyterLab terminal and discuss the bigger picture behind what it
 83 |     means and why.
 84 | 
 85 |   * This is also the first lesson to use non-Jupyter code editor.  We
 86 |     recommend again being simple: use the JupyterLab code editor to
 87 |     start off, and carefully explain what is going on.
 88 | 
 89 | * **Scipy:** We don't cover much here (this is super short), but the
 90 |   point is scipy exists and the concept of wrapping existing C/fortran
 91 |   libraries and so on.
 92 | 
 93 | * **Library ecosystem:** This was an overview of the types of packages
 94 |   available in the "scipy ecosystem", which is a large and ill-defined
 95 |   thing.  But there is another point: choosing what to use.  Do you
 96 |   trust a half-done thing published on someone's personal webpage?  If
 97 |   it's on Github?  How do you make your code more reusable?  When
 98 |   coming from academic courses, you get a "build it yourself" idea,
 99 |   which isn't sustainable in research.
100 | 
101 | * **Parallel programming:**
102 | 
103 | * **Dependencies:** The main point here is environments, another thing
104 |   you often don't learn in courses.
105 | 
106 |   * There is a lot of material here.  Consider what you will demo,
107 |     what will be done as exercises, and what is advanced/optional.
108 |     However, it is the fourth-day lesson that is most interactive, so
109 |     it is OK if it take a while to go through everything.
110 | 
111 |   * If someone else installs Anaconda for a user (e.g. admin-managed
112 |     laptop), the conda environment creations (with ``--name``,
113 |     possibly with ``--prefix`` too?) may not work.  Be prepared for
114 |     this and mention it.  You don't need to solve the problem but
115 |     acknowledge that the lesson becomes a demo.  The virtualenv part
116 |     should hopefully work for them.
117 | 
118 | * **Binder:** Binder exists and can help make code
119 |   reproducible/reusable by others.
120 | 
121 | * **Packaging:** How to make your code reusable by others.  By the
122 |   time we get here, people are tired and the topics get involved.  We
123 |   more explicitly say "you might want to watch and take this as a
124 |   demo".
125 | 
126 | 


--------------------------------------------------------------------------------
/content/img/binder/binder.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/binder/binder.jpg


--------------------------------------------------------------------------------
/content/img/binder/python_unmasked.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/binder/python_unmasked.jpg


--------------------------------------------------------------------------------
/content/img/installation/anaconda-navigator-jupyterlab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/installation/anaconda-navigator-jupyterlab.png


--------------------------------------------------------------------------------
/content/img/installation/anaconda-prompt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/installation/anaconda-prompt.png


--------------------------------------------------------------------------------
/content/img/installation/jupyterlab-notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/installation/jupyterlab-notebook.png


--------------------------------------------------------------------------------
/content/img/installation/jupyterlab-terminal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/installation/jupyterlab-terminal.png


--------------------------------------------------------------------------------
/content/img/jupyter/main-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/jupyter/main-ui.png


--------------------------------------------------------------------------------
/content/img/jupyter/notebook-ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/jupyter/notebook-ui.png


--------------------------------------------------------------------------------
/content/img/numpy-advanced/02_views.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    width="133.70311mm"
  6 |    height="52.44648mm"
  7 |    viewBox="0 0 133.70311 52.44648"
  8 |    version="1.1"
  9 |    id="svg5"
 10 |    inkscape:version="1.2.1 (9c6d41e410, 2022-07-14)"
 11 |    sodipodi:docname="02_views.svg"
 12 |    inkscape:export-filename="02_views.png"
 13 |    inkscape:export-xdpi="227.96777"
 14 |    inkscape:export-ydpi="227.96777"
 15 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 16 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 17 |    xmlns="http://www.w3.org/2000/svg"
 18 |    xmlns:svg="http://www.w3.org/2000/svg">
 19 |   <sodipodi:namedview
 20 |      id="namedview7"
 21 |      pagecolor="#ffffff"
 22 |      bordercolor="#000000"
 23 |      borderopacity="0.25"
 24 |      inkscape:showpageshadow="2"
 25 |      inkscape:pageopacity="0.0"
 26 |      inkscape:pagecheckerboard="0"
 27 |      inkscape:deskcolor="#d1d1d1"
 28 |      inkscape:document-units="mm"
 29 |      showgrid="false"
 30 |      inkscape:zoom="1.466344"
 31 |      inkscape:cx="92.747678"
 32 |      inkscape:cy="149.35104"
 33 |      inkscape:window-width="1920"
 34 |      inkscape:window-height="1001"
 35 |      inkscape:window-x="-7"
 36 |      inkscape:window-y="-7"
 37 |      inkscape:window-maximized="1"
 38 |      inkscape:current-layer="layer1" />
 39 |   <defs
 40 |      id="defs2">
 41 |     <marker
 42 |        style="overflow:visible"
 43 |        id="TriangleStart"
 44 |        refX="0"
 45 |        refY="0"
 46 |        orient="auto-start-reverse"
 47 |        inkscape:stockid="TriangleStart"
 48 |        markerWidth="5.3244081"
 49 |        markerHeight="6.155385"
 50 |        viewBox="0 0 5.3244081 6.1553851"
 51 |        inkscape:isstock="true"
 52 |        inkscape:collect="always"
 53 |        preserveAspectRatio="xMidYMid">
 54 |       <path
 55 |          transform="scale(0.5)"
 56 |          style="fill:context-stroke;fill-rule:evenodd;stroke:context-stroke;stroke-width:1pt"
 57 |          d="M 5.77,0 -2.88,5 V -5 Z"
 58 |          id="path135" />
 59 |     </marker>
 60 |   </defs>
 61 |   <g
 62 |      inkscape:label="Layer 1"
 63 |      inkscape:groupmode="layer"
 64 |      id="layer1"
 65 |      transform="translate(-26.031134,-16.43247)">
 66 |     <rect
 67 |        style="fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
 68 |        id="rect3925"
 69 |        width="133.5237"
 70 |        height="52.687729"
 71 |        x="26.031134"
 72 |        y="16.43247" />
 73 |     <rect
 74 |        style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
 75 |        id="rect111"
 76 |        width="29.772175"
 77 |        height="25.982988"
 78 |        x="26.885178"
 79 |        y="42.763668" />
 80 |     <g
 81 |        id="g959">
 82 |       <rect
 83 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
 84 |          id="rect791"
 85 |          width="6.6761847"
 86 |          height="6.6761847"
 87 |          x="26.163425"
 88 |          y="23.276428" />
 89 |       <rect
 90 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
 91 |          id="rect847"
 92 |          width="6.6761847"
 93 |          height="6.6761847"
 94 |          x="32.839622"
 95 |          y="23.276428" />
 96 |       <rect
 97 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
 98 |          id="rect903"
 99 |          width="6.6761847"
100 |          height="6.6761847"
101 |          x="39.515797"
102 |          y="23.276428" />
103 |       <rect
104 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
105 |          id="rect905"
106 |          width="6.6761847"
107 |          height="6.6761847"
108 |          x="46.191994"
109 |          y="23.276428" />
110 |       <rect
111 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
112 |          id="rect907"
113 |          width="6.6761847"
114 |          height="6.6761847"
115 |          x="52.823055"
116 |          y="23.276428" />
117 |       <rect
118 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
119 |          id="rect909"
120 |          width="6.6761847"
121 |          height="6.6761847"
122 |          x="59.499252"
123 |          y="23.276428" />
124 |       <rect
125 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
126 |          id="rect911"
127 |          width="6.6761847"
128 |          height="6.6761847"
129 |          x="66.175423"
130 |          y="23.276428" />
131 |       <rect
132 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
133 |          id="rect913"
134 |          width="6.6761847"
135 |          height="6.6761847"
136 |          x="72.851624"
137 |          y="23.276428" />
138 |       <rect
139 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
140 |          id="rect915"
141 |          width="6.6761847"
142 |          height="6.6761847"
143 |          x="79.532806"
144 |          y="23.276428" />
145 |       <rect
146 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
147 |          id="rect917"
148 |          width="6.6761847"
149 |          height="6.6761847"
150 |          x="86.209007"
151 |          y="23.276428" />
152 |       <rect
153 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
154 |          id="rect919"
155 |          width="6.6761847"
156 |          height="6.6761847"
157 |          x="92.885178"
158 |          y="23.276428" />
159 |       <rect
160 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
161 |          id="rect921"
162 |          width="6.6761847"
163 |          height="6.6761847"
164 |          x="99.561378"
165 |          y="23.276428" />
166 |       <rect
167 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
168 |          id="rect923"
169 |          width="6.6761847"
170 |          height="6.6761847"
171 |          x="106.19244"
172 |          y="23.276428" />
173 |       <rect
174 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
175 |          id="rect925"
176 |          width="6.6761847"
177 |          height="6.6761847"
178 |          x="112.8686"
179 |          y="23.276428" />
180 |       <rect
181 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
182 |          id="rect927"
183 |          width="6.6761847"
184 |          height="6.6761847"
185 |          x="119.54477"
186 |          y="23.276428" />
187 |       <rect
188 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
189 |          id="rect929"
190 |          width="6.6761847"
191 |          height="6.6761847"
192 |          x="126.22102"
193 |          y="23.276428" />
194 |       <rect
195 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
196 |          id="rect931"
197 |          width="6.6761847"
198 |          height="6.6761847"
199 |          x="132.89717"
200 |          y="23.276428" />
201 |       <rect
202 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
203 |          id="rect933"
204 |          width="6.6761847"
205 |          height="6.6761847"
206 |          x="139.57333"
207 |          y="23.276428" />
208 |       <rect
209 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
210 |          id="rect935"
211 |          width="6.6761847"
212 |          height="6.6761847"
213 |          x="146.24951"
214 |          y="23.276428" />
215 |       <rect
216 |          style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
217 |          id="rect937"
218 |          width="6.6761847"
219 |          height="6.6761847"
220 |          x="152.92577"
221 |          y="23.276428" />
222 |     </g>
223 |     <rect
224 |        style="fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
225 |        id="rect961"
226 |        width="29.772175"
227 |        height="25.982988"
228 |        x="77.407661"
229 |        y="42.763668" />
230 |     <text
231 |        xml:space="preserve"
232 |        style="font-size:4.93889px;fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
233 |        x="30.757637"
234 |        y="48.718105"
235 |        id="text1017"><tspan
236 |          sodipodi:role="line"
237 |          id="tspan1015"
238 |          style="font-size:4.93889px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
239 |          x="30.757637"
240 |          y="48.718105">array &quot;a&quot;</tspan></text>
241 |     <text
242 |        xml:space="preserve"
243 |        style="font-size:4.93889px;fill:none;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
244 |        x="81.224655"
245 |        y="48.718105"
246 |        id="text1071"><tspan
247 |          sodipodi:role="line"
248 |          id="tspan1069"
249 |          style="font-size:4.93889px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
250 |          x="81.224655"
251 |          y="48.718105">array &quot;b&quot;</tspan></text>
252 |     <text
253 |        xml:space="preserve"
254 |        style="font-size:4.9389px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
255 |        x="30.070339"
256 |        y="57.55954"
257 |        id="text1075"><tspan
258 |          sodipodi:role="line"
259 |          id="tspan1073"
260 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:consolas;-inkscape-font-specification:consolas;stroke-width:0.264583"
261 |          x="30.070339"
262 |          y="57.55954">.shape</tspan><tspan
263 |          sodipodi:role="line"
264 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:consolas;-inkscape-font-specification:consolas;stroke-width:0.264583"
265 |          x="30.070339"
266 |          y="63.953709"
267 |          id="tspan1079">.strides</tspan></text>
268 |     <text
269 |        xml:space="preserve"
270 |        style="font-size:4.9389px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
271 |        x="80.537354"
272 |        y="57.55954"
273 |        id="text1087"><tspan
274 |          sodipodi:role="line"
275 |          id="tspan1083"
276 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:consolas;-inkscape-font-specification:consolas;stroke-width:0.264583"
277 |          x="80.537354"
278 |          y="57.55954">.shape</tspan><tspan
279 |          sodipodi:role="line"
280 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:consolas;-inkscape-font-specification:consolas;stroke-width:0.264583"
281 |          x="80.537354"
282 |          y="63.953709"
283 |          id="tspan1085">.strides</tspan></text>
284 |     <path
285 |        style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
286 |        d="M 26.885175,51.244227 H 56.65735"
287 |        id="path1143" />
288 |     <path
289 |        style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.264583;stroke-opacity:1"
290 |        d="m 77.40766,51.244227 h 29.77217"
291 |        id="path1145" />
292 |     <path
293 |        style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.264583;stroke-opacity:1;marker-end:url(#TriangleStart)"
294 |        d="M 41.229952,42.76367 31.937424,31.215673"
295 |        id="path1147"
296 |        sodipodi:nodetypes="cc" />
297 |     <path
298 |        style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.264583;stroke-opacity:1;marker-end:url(#TriangleStart)"
299 |        d="M 91.571993,42.76367 96.173147,31.305892"
300 |        id="path1494"
301 |        sodipodi:nodetypes="cc" />
302 |     <text
303 |        xml:space="preserve"
304 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.9389px;font-family:consolas;-inkscape-font-specification:consolas;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
305 |        x="39.425575"
306 |        y="38.252731"
307 |        id="text1500"><tspan
308 |          sodipodi:role="line"
309 |          id="tspan1498"
310 |          style="fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583"
311 |          x="39.425575"
312 |          y="38.252731">data pointer</tspan></text>
313 |     <text
314 |        xml:space="preserve"
315 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.9389px;font-family:consolas;-inkscape-font-specification:consolas;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
316 |        x="96.714455"
317 |        y="38.252731"
318 |        id="text1642"><tspan
319 |          sodipodi:role="line"
320 |          id="tspan1640"
321 |          style="stroke-width:0.264583"
322 |          x="96.714455"
323 |          y="38.252731">data pointer</tspan></text>
324 |     <text
325 |        xml:space="preserve"
326 |        style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:4.9389px;font-family:consolas;-inkscape-font-specification:consolas;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.264583;stroke-opacity:1"
327 |        x="26.163425"
328 |        y="20.208992"
329 |        id="text1648"><tspan
330 |          sodipodi:role="line"
331 |          id="tspan1646"
332 |          style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-family:sans-serif;-inkscape-font-specification:sans-serif;stroke-width:0.264583"
333 |          x="26.163425"
334 |          y="20.208992">memory buffer</tspan></text>
335 |   </g>
336 | </svg>
337 | 


--------------------------------------------------------------------------------
/content/img/pandas/01_table_dataframe.svg:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2 | <!-- Created with Inkscape (http://www.inkscape.org/) -->
  3 | 
  4 | <svg
  5 |    xmlns:dc="http://purl.org/dc/elements/1.1/"
  6 |    xmlns:cc="http://creativecommons.org/ns#"
  7 |    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
  8 |    xmlns:svg="http://www.w3.org/2000/svg"
  9 |    xmlns="http://www.w3.org/2000/svg"
 10 |    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
 11 |    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
 12 |    width="143.19496mm"
 13 |    height="104.30615mm"
 14 |    viewBox="0 0 143.19496 104.30615"
 15 |    version="1.1"
 16 |    id="svg10280"
 17 |    inkscape:version="0.92.4 (f8dce91, 2019-08-02)"
 18 |    sodipodi:docname="01_table_dataframe.svg">
 19 |   <defs
 20 |      id="defs10274" />
 21 |   <sodipodi:namedview
 22 |      id="base"
 23 |      pagecolor="#ffffff"
 24 |      bordercolor="#666666"
 25 |      borderopacity="1.0"
 26 |      inkscape:pageopacity="0.0"
 27 |      inkscape:pageshadow="2"
 28 |      inkscape:zoom="0.7"
 29 |      inkscape:cx="355.65317"
 30 |      inkscape:cy="142.80245"
 31 |      inkscape:document-units="mm"
 32 |      inkscape:current-layer="layer1"
 33 |      showgrid="false"
 34 |      inkscape:window-width="1551"
 35 |      inkscape:window-height="849"
 36 |      inkscape:window-x="49"
 37 |      inkscape:window-y="27"
 38 |      inkscape:window-maximized="1"
 39 |      fit-margin-top="0"
 40 |      fit-margin-left="0"
 41 |      fit-margin-right="0"
 42 |      fit-margin-bottom="0" />
 43 |   <metadata
 44 |      id="metadata10277">
 45 |     <rdf:RDF>
 46 |       <cc:Work
 47 |          rdf:about="">
 48 |         <dc:format>image/svg+xml</dc:format>
 49 |         <dc:type
 50 |            rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
 51 |         <dc:title />
 52 |       </cc:Work>
 53 |     </rdf:RDF>
 54 |   </metadata>
 55 |   <g
 56 |      inkscape:label="Layer 1"
 57 |      inkscape:groupmode="layer"
 58 |      id="layer1"
 59 |      transform="translate(-22.613419,-96.097789)">
 60 |     <g
 61 |        id="g888"
 62 |        transform="matrix(0.89990753,0,0,0.9,9.4364163,14.825088)"
 63 |        style="stroke-width:1.11116815">
 64 |       <path
 65 |          inkscape:connector-curvature="0"
 66 |          id="path4891-1-50-8-2-1"
 67 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 68 |          d="M 23.647349,141.16281 H 48.53529 V 128.97485 H 23.647349 Z" />
 69 |       <path
 70 |          inkscape:connector-curvature="0"
 71 |          id="path4891-1-50-8-2-7"
 72 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 73 |          d="M 50.063339,127.4468 H 74.951291 V 115.25884 H 50.063339 Z" />
 74 |       <path
 75 |          inkscape:connector-curvature="0"
 76 |          id="path4891-1-50-8-2-1-9"
 77 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 78 |          d="M 50.063339,141.16281 H 74.951291 V 128.97485 H 50.063339 Z" />
 79 |       <path
 80 |          inkscape:connector-curvature="0"
 81 |          id="path4891-1-50-8-2-2"
 82 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 83 |          d="M 23.647339,153.86281 H 48.53529 V 141.67486 H 23.647339 Z" />
 84 |       <path
 85 |          inkscape:connector-curvature="0"
 86 |          id="path4891-1-50-8-2-7-5"
 87 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 88 |          d="M 50.063339,153.86281 H 74.951291 V 141.67486 H 50.063339 Z" />
 89 |       <path
 90 |          inkscape:connector-curvature="0"
 91 |          id="path4891-1-50-8-2-6"
 92 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 93 |          d="M 75.463341,127.4468 H 100.3513 V 115.25884 H 75.463341 Z" />
 94 |       <path
 95 |          inkscape:connector-curvature="0"
 96 |          id="path4891-1-50-8-2-1-5"
 97 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
 98 |          d="M 75.463341,141.16281 H 100.3513 V 128.97485 H 75.463341 Z" />
 99 |       <path
100 |          inkscape:connector-curvature="0"
101 |          id="path4891-1-50-8-2-2-3"
102 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
103 |          d="M 75.463341,153.86281 H 100.3513 V 141.67486 H 75.463341 Z" />
104 |       <path
105 |          inkscape:connector-curvature="0"
106 |          id="path4891-1-50-8-2-1-7-5"
107 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
108 |          d="M 23.647349,179.26284 H 48.53529 V 167.07487 H 23.647349 Z" />
109 |       <path
110 |          inkscape:connector-curvature="0"
111 |          id="path4891-1-50-8-2-1-9-8-0"
112 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
113 |          d="M 50.063339,179.26284 H 74.951291 V 167.07487 H 50.063339 Z" />
114 |       <path
115 |          inkscape:connector-curvature="0"
116 |          id="path4891-1-50-8-2-1-7-8-5"
117 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
118 |          d="M 75.463341,179.26284 H 100.3513 V 167.07487 H 75.463341 Z" />
119 |       <path
120 |          inkscape:connector-curvature="0"
121 |          id="path4891-1-50-8-2-1-7-5-2"
122 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
123 |          d="M 23.647349,191.96284 H 48.53529 V 179.77488 H 23.647349 Z" />
124 |       <path
125 |          inkscape:connector-curvature="0"
126 |          id="path4891-1-50-8-2-1-9-8-0-7"
127 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
128 |          d="M 50.063339,191.96284 H 74.951291 V 179.77488 H 50.063339 Z" />
129 |       <path
130 |          inkscape:connector-curvature="0"
131 |          id="path4891-1-50-8-2-1-7-8-5-2"
132 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
133 |          d="M 75.463341,191.96284 H 100.3513 V 179.77488 H 75.463341 Z" />
134 |       <path
135 |          inkscape:connector-curvature="0"
136 |          id="path4891-1-50-8-2-7-8"
137 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
138 |          d="m 100.86334,127.4468 h 24.88794 v -12.18796 h -24.88794 z" />
139 |       <path
140 |          inkscape:connector-curvature="0"
141 |          id="path4891-1-50-8-2-1-9-4"
142 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
143 |          d="m 100.86335,141.16281 h 24.88793 v -12.18796 h -24.88793 z" />
144 |       <path
145 |          inkscape:connector-curvature="0"
146 |          id="path4891-1-50-8-2-7-5-5"
147 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
148 |          d="m 100.86334,153.86281 h 24.88794 v -12.18795 h -24.88794 z" />
149 |       <path
150 |          inkscape:connector-curvature="0"
151 |          id="path4891-1-50-8-2-1-9-8-2-7"
152 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
153 |          d="m 100.86335,179.26284 h 24.88793 v -12.18797 h -24.88793 z" />
154 |       <path
155 |          inkscape:connector-curvature="0"
156 |          id="path4891-1-50-8-2-1-9-8-2-7-1"
157 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
158 |          d="m 100.86335,191.96284 h 24.88793 v -12.18796 h -24.88793 z" />
159 |       <g
160 |          id="g935"
161 |          style="stroke-width:1.11116815">
162 |         <path
163 |            d="M 23.647349,166.56283 H 48.53529 V 154.37487 H 23.647349 Z"
164 |            style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
165 |            id="path4891-1-50-8-2-1-7"
166 |            inkscape:connector-curvature="0" />
167 |         <path
168 |            d="M 50.063339,166.56283 H 74.951291 V 154.37487 H 50.063339 Z"
169 |            style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
170 |            id="path4891-1-50-8-2-1-9-8"
171 |            inkscape:connector-curvature="0" />
172 |         <path
173 |            d="M 75.463341,166.56283 H 100.3513 V 154.37487 H 75.463341 Z"
174 |            style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
175 |            id="path4891-1-50-8-2-1-7-8"
176 |            inkscape:connector-curvature="0" />
177 |         <path
178 |            d="m 100.86335,166.56283 h 24.88793 v -12.18796 h -24.88793 z"
179 |            style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
180 |            id="path4891-1-50-8-2-1-9-8-2"
181 |            inkscape:connector-curvature="0" />
182 |         <path
183 |            d="m 126.26334,166.56283 h 24.88792 v -12.18796 h -24.88792 z"
184 |            style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
185 |            id="path4891-1-50-8-2-1-9-8-2-4"
186 |            inkscape:connector-curvature="0" />
187 |       </g>
188 |       <path
189 |          inkscape:connector-curvature="0"
190 |          id="path4891-1-50-8-2-7-8-2"
191 |          style="fill:#afabab;fill-opacity:1;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
192 |          d="m 126.26333,127.4468 h 24.88793 v -12.18796 h -24.88793 z" />
193 |       <path
194 |          inkscape:connector-curvature="0"
195 |          id="path4891-1-50-8-2-1-9-4-2"
196 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
197 |          d="m 126.26334,141.16281 h 24.88792 v -12.18796 h -24.88792 z" />
198 |       <path
199 |          inkscape:connector-curvature="0"
200 |          id="path4891-1-50-8-2-7-5-5-1"
201 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
202 |          d="m 126.26333,153.86281 h 24.88793 v -12.18795 h -24.88793 z" />
203 |       <path
204 |          inkscape:connector-curvature="0"
205 |          id="path4891-1-50-8-2-1-9-8-2-7-6"
206 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
207 |          d="m 126.26334,179.26284 h 24.88792 v -12.18797 h -24.88792 z" />
208 |       <path
209 |          inkscape:connector-curvature="0"
210 |          id="path4891-1-50-8-2-1-9-8-2-7-1-2"
211 |          style="fill:#afabab;fill-opacity:0.39215686;fill-rule:evenodd;stroke:#ffffff;stroke-width:0.56897205;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
212 |          d="m 126.26334,191.96284 h 24.88792 v -12.18796 h -24.88792 z" />
213 |       <text
214 |          id="text47247-9"
215 |          y="200.30403"
216 |          x="100.55013"
217 |          style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.29399657"
218 |          xml:space="preserve"><tspan
219 |            style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05555534px;font-family:monospace;-inkscape-font-specification:monospace;stroke-width:0.29399657"
220 |            y="200.30403"
221 |            x="100.55013"
222 |            id="tspan47245-7"
223 |            sodipodi:role="line">column</tspan></text>
224 |       <text
225 |          id="text47247-1"
226 |          y="103.81308"
227 |          x="76.306671"
228 |          style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.29399657"
229 |          xml:space="preserve"><tspan
230 |            style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:10.58333302px;font-family:monospace;-inkscape-font-specification:monospace;stroke-width:0.29399657"
231 |            y="103.81308"
232 |            x="76.306671"
233 |            id="tspan47245-3"
234 |            sodipodi:role="line">DataFrame</tspan></text>
235 |       <rect
236 |          y="113.61689"
237 |          x="100.55073"
238 |          height="79.987907"
239 |          width="25.513165"
240 |          id="rect850"
241 |          style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.29399657;stroke-opacity:1" />
242 |       <rect
243 |          y="154.10715"
244 |          x="22.74571"
245 |          height="12.771029"
246 |          width="129.30719"
247 |          id="rect850-3"
248 |          style="fill:none;fill-opacity:1;stroke:#000000;stroke-width:0.29399657;stroke-opacity:1" />
249 |       <text
250 |          id="text47247-9-6"
251 |          y="162.41847"
252 |          x="153.07187"
253 |          style="font-style:normal;font-weight:normal;font-size:10.58333302px;line-height:1.25;font-family:sans-serif;letter-spacing:0px;word-spacing:0px;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.29399657"
254 |          xml:space="preserve"><tspan
255 |            style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:7.05555534px;font-family:monospace;-inkscape-font-specification:monospace;stroke-width:0.29399657"
256 |            y="162.41847"
257 |            x="153.07187"
258 |            id="tspan47245-7-7"
259 |            sodipodi:role="line">row</tspan></text>
260 |     </g>
261 |   </g>
262 | </svg>
263 | 


--------------------------------------------------------------------------------
/content/img/pandas/tidy_data.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/pandas/tidy_data.png


--------------------------------------------------------------------------------
/content/img/xarray/xarray_1d_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/xarray/xarray_1d_plot.png


--------------------------------------------------------------------------------
/content/img/xarray/xarray_2d_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/xarray/xarray_2d_plot.png


--------------------------------------------------------------------------------
/content/img/xarray/xarray_dataset_image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/xarray/xarray_dataset_image.png


--------------------------------------------------------------------------------
/content/img/xarray/xarray_hist.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/img/xarray/xarray_hist.png


--------------------------------------------------------------------------------
/content/index.rst:
--------------------------------------------------------------------------------
  1 | ===============================
  2 | Python for Scientific Computing
  3 | ===============================
  4 | 
  5 | .. admonition:: Attending the course 5-7 November, 2024?
  6 | 
  7 |    `See the course page here
  8 |    <https://scicomp.aalto.fi/training/scip/python-for-scicomp-2024/>`__
  9 |    and watch at https://twitch.tv/coderefinery.
 10 |    Whether you are or aren't, the course material is below.  Videos
 11 |    will appear in `this playlist <https://www.youtube.com/playlist?list=PLZLVmS9rf3nMWEKWtagJ6h0q9BrFO49tn>`__ (Last year's videos: `playlist <https://www.youtube.com/playlist?list=PLZLVmS9rf3nNI3oQEqSJW6yXltOAZnkpa>`__).
 12 | 
 13 | 
 14 | Python is a modern, object-oriented programming language, which has
 15 | become popular in several areas of software development. This course
 16 | discusses how Python can be utilized in scientific computing. The
 17 | course starts by introducing some of the main Python tools for
 18 | computing: Jupyter for interactive analysis, NumPy and SciPy for
 19 | numerical analysis, Matplotlib for visualization, and so on.  In
 20 | addition, it talks about *how* python is used:
 21 | related scientific libraries, reproducibility, and the broader
 22 | ecosystem of science in Python, because your work is more than the raw
 23 | code you write.
 24 | 
 25 | This course (like any course) can't teach you Python... it can show
 26 | your some examples, let you see how experts do things, and prepare you
 27 | to learn yourself as you need to.
 28 | 
 29 | .. _prerequisites:
 30 | 
 31 | .. prereq::
 32 | 
 33 |    - Knowing basic Python syntax.  We assume that you can do some
 34 |      Python programming, but not much more that that.  We don't cover
 35 |      standard Python programming. `Here a short course on basic Python 
 36 |      syntax, with further references <https://coderefinery.github.io/data-visualization-python/python-basics/>`__.
 37 |    - Watch or read the `command line crash course
 38 |      <https://scicomp.aalto.fi/scicomp/shell/>`__, if you aren't
 39 |      familiar.
 40 |    - You should be able to use a text editor to edit files some.
 41 |    - The :doc:`software installation <installation>` described below
 42 |      (basically, anaconda).
 43 | 
 44 |    These are not prerequisites:
 45 | 
 46 |    - Any external libraries, e.g. numpy
 47 |    - Knowing how to make scripts or use Jupyter
 48 | 
 49 | 
 50 | .. admonition:: Videos and archived Q&A
 51 | 
 52 |    Videos and material from past instances:
 53 | 
 54 |    * 2021: `this YouTube playlist
 55 |      <https://www.youtube.com/playlist?list=PLZLVmS9rf3nOS7bHNmbcDoyTnMYaz_TJW>`__.
 56 |    * 2022: `here
 57 |      <https://www.youtube.com/playlist?list=PLZLVmS9rf3nOm3xkYuInBWPUvS93sAUlk>`__,
 58 |      Q&A: `days 1-2
 59 |      <https://hackmd.io/@coderefinery/python2022archive>`__, `days 3-4
 60 |      <https://hackmd.io/@coderefinery/python2022archive2>`__
 61 | 
 62 |    * 2023: `Videos
 63 |      <https://www.youtube.com/playlist?list=PLZLVmS9rf3nNI3oQEqSJW6yXltOAZnkpa>`__
 64 | 
 65 |    * 2024 (Please contact us if you would like to help to process the videos): `Videos <https://www.youtube.com/playlist?list=PLZLVmS9rf3nMWEKWtagJ6h0q9BrFO49tn>`__
 66 | 
 67 | 
 68 | .. csv-table::
 69 |    :widths: auto
 70 |    :delim: ;
 71 | 
 72 |    (prereq) ; :doc:`python`
 73 |    30 min   ; :doc:`jupyter`
 74 |    60 min   ; :doc:`numpy` or :doc:`numpy-advanced`
 75 |    60 min   ; :doc:`pandas`
 76 |    30 min   ; :doc:`xarray`
 77 |    60 min   ; :doc:`plotting-matplotlib`
 78 |    60 min   ; :doc:`plotting-vega-altair`
 79 |    30 min   ; :doc:`work-with-data`
 80 |    60 min   ; :doc:`scripts`
 81 |    40 min   ; :doc:`profiling`
 82 |    20 min   ; :doc:`productivity`
 83 |    30 min   ; :doc:`web-apis`
 84 |    15 min   ; :doc:`scipy`
 85 |    30 min   ; :doc:`libraries`
 86 |    45 min   ; :doc:`parallel`
 87 |    45 min   ; :doc:`dependencies`
 88 |    30 min   ; :doc:`binder`
 89 |    60 min   ; :doc:`packaging`
 90 | 
 91 | 
 92 | .. toctree::
 93 |    :maxdepth: 1
 94 |    :caption: The lesson
 95 |    :hidden:
 96 | 
 97 |    python
 98 |    jupyter
 99 |    numpy
100 |    numpy-advanced
101 |    pandas
102 |    xarray
103 |    plotting-matplotlib
104 |    plotting-vega-altair
105 |    work-with-data
106 |    scripts
107 |    profiling
108 |    productivity
109 |    scipy
110 |    libraries
111 |    dependencies
112 |    binder
113 |    parallel
114 |    packaging
115 |    web-apis
116 | 
117 | .. toctree::
118 |    :maxdepth: 1
119 |    :caption: Reference
120 | 
121 |    installation
122 |    quick-reference
123 |    exercises
124 |    guide
125 |    data-formats
126 | 
127 | 
128 | .. _learner-personas:
129 | 
130 | Who is the course for?
131 | ======================
132 | 
133 | The course is targeted towards these learner personas:
134 | 
135 | * A is a early career PhD researcher who has been using Python a bit,
136 |   but is not sure what they know or don't know.  They want to be able
137 |   to do their research more efficiently and make sure that they are
138 |   using the right tools.  A may know that numpy exists, etc. and could
139 |   theoretically read some about it themselves, but aren't sure if they
140 |   are going in the right direction.
141 | 
142 | * A2 can use numpy and pandas, but have learned little bits here and
143 |   there and hasn't had a comprehensive introduction.  They want to
144 |   ensure they are using best practices.  (Baseline of high-level
145 |   packages)
146 | 
147 | * B is a mid-to-late undergraduate student who has used Python in some
148 |   classes.  They have possibly learned the syntax and enough to use it
149 |   in courses, but in a course-like manner where they are expected to
150 |   create everything themselves: they want to know how to reuse tools
151 |   that already exist.
152 | 
153 | 
154 | Motivation
155 | ==========
156 | 
157 | Why Python
158 | ----------
159 | 
160 | Python has become popular, largely due to good reasons. It's very easy
161 | to get started, there's lots of educational material, a huge amount of
162 | libraries for doing everything imaginable.  Particularly in the
163 | scientific computing space, there is the Numpy, Scipy, and matplotlib
164 | libraries which form the basis of almost everything.  Numpy and Scipy
165 | are excellent examples of using Python as a glue language, meaning to
166 | glue together battle-tested and well performing code and present them
167 | with an easy to use interface.  Also machine learning and deep
168 | learning frameworks have embraced python as the glue language of
169 | choice.  And finally, Python is open source, meaning that anybody can
170 | download and install it on their computer, without having to bother
171 | with acquiring a license or such.  This makes it easier to distribute
172 | your code e.g. to collaborators in different universities.
173 | 
174 | 
175 | Why not Python for Scientific Computing
176 | ---------------------------------------
177 | 
178 | While Python is extremely popular in scientific computing today, there
179 | are certainly things better left to other tools.
180 | 
181 | - Implementing performance-critical kernels.  Python is a **very**
182 |   slow language, which often doesn't matter if you can offload the
183 |   heavy lifting to fast compiled code, e.g. by using Numpy array
184 |   operations.  But if what you're trying to do isn't *vectorizable*
185 |   then you're out of luck.  An alternative to Python, albeit much less
186 |   mature and with a smaller ecosystem, but which provides very fast
187 |   generated code, is *Julia*.
188 | 
189 | - Creating libraries that can be called from other languages.  In this
190 |   case you'll often want to create a library with a C interface, which
191 |   can then be called from most languages.  Suitable languages for this
192 |   sort of task, depending on what you are doing, could be Rust, C,
193 |   C++, or Fortran.
194 | 
195 | - You really like static typing, or functional programming
196 |   approaches. *Haskell* might be what you're looking for.
197 | 
198 | 
199 | Python 2 vs Python 3
200 | --------------------
201 | 
202 | Python 3.0 came out in September 2008 and was just slightly different
203 | enough that most code had to be changed, which meant that many
204 | projects ignored it for many years.  It was about 3-5 years until the
205 | differences were reduced enough (and better transition plans came out,
206 | so that it was reasonable to use a single code for both versions) that
207 | it become more and more adopted in the scientific community.  Python 2
208 | finally became unsupported in 2020, and by now Python 3 is the defacto
209 | standard.
210 | 
211 | At this point, all new projects should use Python 3, and existing
212 | actively developed projects should be upgraded to use it.  Still, you
213 | might find some old unmaintained tools that are only compatible with
214 | Python 2.
215 | 
216 | 
217 | 
218 | Credits
219 | =======
220 | 
221 | This course was originally designed by Janne Blomqvist.
222 | 
223 | In 2020 it was completely redesigned by a team of the following:
224 | 
225 | * Authors: Radovan Bast, Richard Darst, Anne Fouilloux, Thor Wikfeldt, ...
226 | * Editor:
227 | * Testers and advisors: Enrico Glerean
228 | 
229 | We follow The Carpentries Code of Conduct: https://docs.carpentries.org/topic_folders/policies/code-of-conduct.html
230 | 
231 | 
232 | See also
233 | ========
234 | 
235 | * `High Performance Data Analytics in Python
236 |   <https://enccs.github.io/hpda-python/>`__ is a logical follow-up to
237 |   this lesson that goes more in-depth to tools of high-performance
238 |   and large-scale Python.
239 | 


--------------------------------------------------------------------------------
/content/installation.rst:
--------------------------------------------------------------------------------
  1 | Software installation
  2 | =====================
  3 | 
  4 | This course is interactive and demonstrates many different tools.
  5 | Thus, even beyond Python, extra software (Python libraries) needs to
  6 | be installed.  This page contains the instructions.
  7 | 
  8 | **Once the course starts, we don't have time to stop for installing
  9 | software.**
 10 | 
 11 | Please make sure before the course that you have all the required
 12 | software installed or some other way access to it.  For example, the
 13 | workshop could be done with a remote Jupyter server, as long as you
 14 | can use the terminal from the Jupyter (you need to be able to access
 15 | the command line for some lessons).
 16 | 
 17 | .. admonition:: Do you need help?
 18 |    :class: important
 19 | 
 20 |    Participants from a partner institution are invited to install help
 21 |    sessions. (Hint: ask your institution to become a partner if it
 22 |    isn't already!)
 23 | 
 24 |    Otherwise, if you need installation help, show this page to someone
 25 |    around you and they can probably help.  These are relatively
 26 |    standard tools.
 27 | 
 28 |    Don't be afraid to ask for help.  Installing scientific software is
 29 |    *harder than it should be* and it helps to have someone guide you
 30 |    through it.
 31 | 
 32 | .. highlight:: console
 33 | 
 34 | 
 35 | 
 36 | Python
 37 | ------
 38 | 
 39 | We expect you to have a working Python installation with some common
 40 | libraries.  **We currently recommend Miniforge, which includes the base and
 41 | packages through a different, freely usable channel.**  You can
 42 | explore the options in the tabs below.
 43 | 
 44 | .. admonition:: Python, conda, anaconda, miniforge, etc?
 45 |    :class: dropdown
 46 | 
 47 |    Unfortunately there's a lot of jargon.  We'll go over this in the
 48 |    course but here is a crash course:
 49 | 
 50 |    * **Python** is a programming language very commonly used in
 51 |      science, it's the topic of this course.
 52 |    * **Conda** is a package manager: it allows distributing and
 53 |      installing packages, and is designed for complex scientific
 54 |      code.
 55 |    * **Mamba** is a re-implementation of Conda to be much faster with
 56 |      resolving dependencies and installing things.
 57 |    * An **Environment** is a self-contained collections of packages
 58 |      which can be installed separately from others.  They are used so
 59 |      each project can install what it needs without affecting others.
 60 |    * **Anaconda** is a commercial distribution of Python+Conda+many
 61 |      packages that all work together.  It used to be freely usable for
 62 |      research, but since ~2023-2024 it's more limited.  Thus, we don't
 63 |      recommend it (even though it has a nice graphical user interface).
 64 |    * **conda-forge** is another channel of distributing packages that
 65 |      is maintained by the community, and thus can be used by anyone.
 66 |      (Anaconda's parent company also hosts conda-forge packages)
 67 |    * **miniforge** is a distribution of conda pre-configured for
 68 |      conda-forge.  It operates via the command line.
 69 |    * **miniconda** is a distribution of conda pre-configured to use
 70 |      the Anaconda channels.
 71 | 
 72 | .. tabs::
 73 | 
 74 |    .. group-tab:: Miniforge
 75 | 
 76 |       This is our recommended method - it can be used for any purpose
 77 |       and makes a strong base for the future.
 78 | 
 79 |       Follow the `instructions on the miniforge web page
 80 |       <https://github.com/conda-forge/miniforge>`__.  This installs
 81 |       the base, and from here other packages can be installed.
 82 | 
 83 |       ..
 84 |         You can read how to install miniconda from the `CodeRefinery
 85 |         installation instructions
 86 |         <https://coderefinery.github.io/installation/conda/>`__.
 87 | 
 88 |       Miniforge uses the command line - this gives you the most power
 89 |       but can feel unfamiliar.  See the `command line crash course
 90 |       <https://scicomp.aalto.fi/scicomp/shell/>`__ for an intro.
 91 | 
 92 |    .. group-tab:: Anaconda
 93 | 
 94 |       Anaconda is easier to get started with, but may be more limiting
 95 |       in the future.  The Anaconda Navigator provides a graphical
 96 |       interface to most of what you would need.
 97 | 
 98 |       The `Anaconda Python distribution
 99 |       <https://docs.continuum.io/anaconda/install/>`__ conveniently packages
100 |       everything, but its license has does not allow large organizations to
101 |       use it for free (and has actually been enforced against
102 |       universities).
103 | 
104 |       Note the license of Anaconda - there were recently issues with
105 |       it being used by large universities for free, and this is not
106 |       yet fully resolved.
107 | 
108 |    .. group-tab:: Other options
109 | 
110 |       There are many ways to install Python.  Other methods can work,
111 |       as long as you can install the libraries from the
112 |       ``environment.yml`` file mentioned in the Miniforge
113 |       instructions.
114 | 
115 |       We don't currently provide a ``requirements.txt`` for installing
116 |       the required packages without Conda/Mamba, though.
117 | 
118 | 
119 | 
120 | Starting Python
121 | ---------------
122 | 
123 | You need to Python in a way that activates conda/mamba.
124 | 
125 | .. tabs::
126 | 
127 |    .. group-tab:: Miniforge
128 | 
129 |       .. tabs::
130 | 
131 |          .. group-tab:: Linux / MacOS
132 | 
133 |             Linux/MacOS: Each time you start a new command line terminal,
134 |             you can activate Miniforge by running.  This is needed so that
135 |             Miniforge is usable wherever you need, but doesn't affect any
136 |             other software on your computer (this is not needed if you
137 |             choose "Do you wish to update your shell profile to
138 |             automatically initialize conda?", but then it will always be
139 |             active)::
140 | 
141 |                $ source ~/miniforge3/bin/activate
142 | 
143 |          .. group-tab:: Windows
144 | 
145 |             Windows: Use the "Miniforge Prompt" to start Miniforge.  This
146 |             will set up everything so that ``conda`` and ``mamba`` are
147 |             available.
148 | 
149 |    .. group-tab:: Anaconda
150 | 
151 |       The `Anaconda Navigator
152 |       <https://docs.anaconda.com/navigator/>`__ provides a convenient
153 |       way to access the software. It can be installed from that page.
154 | 
155 | 
156 |    .. group-tab:: Other options
157 | 
158 |       You are on your own here.
159 | 
160 | 
161 | Python for SciComp software environment
162 | ---------------------------------------
163 | 
164 | Once Python and conda/mamba are installed, you can use it to install
165 | an environment.  An **environment** is a self-contained set of extra
166 | libraries - different projects can use different environments to not
167 | interfere with each other.  This environment will have all of the
168 | software needed for this particular course.
169 | 
170 | .. tabs::
171 | 
172 |    .. group-tab:: Miniforge
173 | 
174 |       This `environment file
175 |       <https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/master/software/environment.yml>`__
176 |       contains all packages needed for the course, and can be
177 |       installed with.  The following command will install an
178 |       environment named ``python-for-scicomp`` (there may be lots of
179 |       warning messages: this is OK if it still goes through):
180 | 
181 |       .. tabs::
182 | 
183 |          .. group-tab:: Linux / MacOS
184 | 
185 | 	    ::
186 | 
187 | 	       $ mamba env create -n python-for-scicomp -f https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/master/software/environment.yml
188 | 
189 |          .. group-tab:: Windows
190 | 
191 | 	    ::
192 | 
193 | 	       $ mamba env create -n python-for-scicomp -f https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/master/software/environment.yml
194 | 
195 |       Each time you start a new command line, you need to activate
196 |       miniforge and this environment:
197 | 
198 |       .. tabs::
199 | 
200 |          .. group-tab:: Linux / MacOS
201 | 
202 | 	    ::
203 | 
204 |                $ source ~/miniforge3/bin/activate
205 |                $ conda activate python-for-scicomp
206 | 
207 |          .. group-tab:: Windows
208 | 
209 |             ::
210 | 
211 |                $ # Start the Miniforge Prompt.
212 |                $ conda activate python-for-scicomp
213 | 
214 |    .. group-tab:: Anaconda
215 | 
216 |       Anaconda includes most of the things needed for the course
217 |       automatically, but as of 2024 not everything.  You can use the
218 |       navigator to create new environments from this `this environment
219 |       file
220 |       <https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/master/software/environment.yml>`__.
221 |       You'll have to download it and then `import it
222 |       <https://docs.anaconda.com/navigator/tutorials/manage-environments/#importing-an-environment>`__.
223 | 
224 |       When running this course's exercise, make sure the
225 |       ``python-for-scicomp`` environment is activated before starting
226 |       JupyterLab or any code.  You need to start termnials or
227 |       JupyterLab from the Anaconda Navigator for the
228 |       ``python-for-scicomp`` environment to be used.
229 | 
230 |    .. group-tab:: Other options
231 | 
232 |       **Minoconda, Anaconda command line, other conda/mamba command
233 |       line tools**: see "Miniforge" instructions.
234 | 
235 |       Virtual environments: we don't currently provide a
236 |       ``requirements.txt`` but many package names can probably be
237 |       copied from the ``environment.yml`` file.  We really recommend
238 |       conda/mamba based systems: it's designed for complex scientific
239 |       software.
240 | 
241 |       Any other Python distribution which you can install libraries into
242 |       would work, but because there are so many different ways to do this,
243 |       we don't support them.  You would need the extra libraries mentioned
244 |       in the Miniforge instructions.
245 | 
246 |       Remember you need to activate the environment each time you use it.
247 | 
248 | 
249 | 
250 | JupyterLab
251 | ----------
252 | 
253 | We do most of the lessons from JupyterLab (and JupyterLab provides
254 | most of the other tools we need).
255 | 
256 | .. tabs::
257 | 
258 |    .. group-tab:: Miniforge
259 | 
260 |       JupyterLab was instaled in the previous step.  To run it, first,
261 |       start the Miniforge command line interface.  Remember, you may
262 |       need to activate Miniforge and the environment first.
263 | 
264 |       .. tabs::
265 | 
266 |          .. group-tab:: Linux / MacOS
267 | 
268 | 	    ::
269 | 
270 |                $ source ~/miniforge3/bin/activate
271 |                $ conda activate python-for-scicomp
272 |                $ jupyter-lab
273 | 
274 |          .. group-tab:: Windows
275 | 
276 |             ::
277 | 
278 |                $ # Start the Miniforge Prompt.
279 |                $ conda activate python-for-scicomp
280 |                $ jupyter-lab
281 | 
282 |    .. group-tab:: Anaconda
283 | 
284 |       If you install the full Anaconda distribution, this will be
285 |       available and can be started either through Anaconda Navigator
286 |       or command line.
287 | 
288 |       Make sure the ``python-for-scicomp`` environment is selected and
289 |       you can start JupyterLab.
290 | 
291 | 
292 | 
293 | Verification of Python and JupyterLab
294 | -------------------------------------
295 | 
296 | .. admonition:: Watch the video
297 | 
298 |    See this `verification in video form
299 |    <https://youtu.be/OEX1ss_HCHc>`__ - if you can do this, you are
300 |    ready to go for day one.  Your exact steps may be a bit different.
301 | 
302 |    Remember that you need to activate the environment first - see the
303 |    step above.
304 | 
305 | .. tabs::
306 | 
307 |    .. group-tab:: Miniforge
308 | 
309 |       You can start JupyterLab from the command line::
310 | 
311 |          $ jupyter-lab
312 |          (... Jupyter starts in a web browser)
313 | 
314 | 
315 |    .. group-tab:: Anaconda
316 | 
317 |       **You should be able to start JupyterLab.**  You can do this from the
318 |       `Anaconda Navigator <https://docs.anaconda.com/anaconda/navigator/>`__ (recommended if you have it):
319 | 
320 |       .. figure:: img/installation/anaconda-navigator-jupyterlab.png
321 |          :class: with-border
322 | 
323 |          Starting JupyterLab from the Anaconda Navigator.
324 | 
325 |       ... or you can start JupyterLab from the command line::
326 | 
327 |          $ jupyter-lab
328 |          (... Jupyter starts in a web browser)
329 | 
330 | 
331 | 
332 | **Verify that you can start a Jupyter notebook.** We will learn how to
333 | do this in day 1, but you can try running ``print("Hello, world!")``
334 | if you want.
335 | 
336 | .. figure:: img/installation/jupyterlab-notebook.png
337 |    :class: with-border
338 | 
339 |    Starting a Jupyter Notebook from JupyterLab.
340 | 
341 | 
342 | 
343 | Text editor
344 | -----------
345 | 
346 | For one portion of the course, you will need a text editor.  **If you
347 | don't know what to use, you can use the text editor that comes from
348 | JupyterLab and it will do everything you need - no extra installation
349 | needed.**
350 | 
351 | .. admonition:: Other editors
352 |    :class: toggle
353 | 
354 |    Because we need to be simple in our teaching, we only teach the
355 |    most basic editors.  We encourage you to try out more advanced ones
356 |    yourself.
357 | 
358 |    For other editors, see the `CodeRefinery instructions
359 |    <https://coderefinery.github.io/installation/editors/>`__.  You don't
360 |    exactly need a terminal editor - the graphical ones, such as VSCode or
361 |    whatever you use now, will work as well.
362 | 
363 | 
364 | 
365 | Command line
366 | ------------
367 | 
368 | **You need access to the command line for some lessons.  JupyterLab
369 | includes it, so no extra installation is needed.**  If you want to
370 | test in advance:
371 | 
372 | * You can start it from JupyterLab (recommended):
373 | 
374 |   .. figure:: img/installation/jupyterlab-terminal.png
375 |      :class: with-border
376 |      :scale: 75%
377 | 
378 |      From the JupyterLab launcher, select "Terminal".
379 | 
380 | .. admonition:: Other ways to access the command line
381 |    :class: toggle
382 | 
383 |    * From the Anaconda Navigator:
384 | 
385 |      .. figure:: img/installation/anaconda-prompt.png
386 |         :class: with-border
387 | 
388 |         From the Anaconda Navigator, you can select "environments" on the
389 |         left, then click on one, then the arrow, then "Open terminal".
390 | 
391 |    * From your operating system's terminal applications, if you activate
392 |      Anaconda.
393 | 
394 | 
395 | 
396 | Verification of the command line
397 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
398 | 
399 | To verify command line usage, type the following commands (without the
400 | ``$``), and you should see the corresponding output that lists the
401 | Python version:
402 | 
403 | .. code-block:: console
404 | 
405 |    $ python3 -V
406 |    Python 3.8.3
407 | 
408 |    ## Or python... if it's installed as that
409 |    $ python -V
410 |    Python 3.8.3
411 | 
412 | Any recent version of Python 3 should work for the course (for example
413 | 3.8 or higher).
414 | 
415 | 
416 | 
417 | Zoom
418 | ----
419 | 
420 | If this is an online workshop, it might use Zoom.  You can see
421 | `CodeRefinery instructions for it
422 | <https://coderefinery.github.io/installation/zoom/>`__.
423 | 
424 | 
425 | 
426 | Need help?
427 | ----------
428 | 
429 | If you have access, come to one of the installation help sessions.
430 | Or, ask your colleagues: these are standard tools and you can
431 | definitely find someone can help you get set up!
432 | 
433 | 
434 | 
435 | See also
436 | --------
437 | 
438 | * `Research Software Hour on conda
439 |   <https://www.youtube.com/watch?v=ddCde5Nu2qo&list=PLpLblYHCzJAB6blBBa0O2BEYadVZV3JYf>`__
440 | * `Conda manual <https://docs.conda.io/en/latest/>`__ (technical)
441 | * `Anaconda individual edition home
442 |   <https://www.anaconda.com/products/distribution>`__
443 | * `Anaconda getting started
444 |   <https://docs.anaconda.com/anaconda/user-guide/getting-started/>`__
445 | 


--------------------------------------------------------------------------------
/content/libraries.rst:
--------------------------------------------------------------------------------
  1 | Library ecosystem
  2 | =================
  3 | 
  4 | .. questions::
  5 | 
  6 |    - What happens when you need some method beyond what we discuss in this course, what is available?
  7 |    - How do you decide what to build on for your work?
  8 | 
  9 | .. objectives::
 10 | 
 11 |    - Know of some other available packages, but don't necessarily know
 12 |      how to use them.
 13 |    - Be able to evaluate what you should reuse and what you should
 14 |      develop yourself.
 15 | 
 16 | You can't do everything yourself.  In fact, once we heard a quote such
 17 | as this:
 18 | 
 19 |     When you are a student, you are expected to do everything
 20 |     yourself, and that is how you are evaluated.  When you become a
 21 |     researcher, you *have* to be able to reuse what others have done.
 22 |     We don't have much practice in doing this.
 23 |     -- A student
 24 | 
 25 | In this lesson, we'll talk about the broader ecosystem in Python: all
 26 | the resources you have available to you.  Perhaps we can even classify
 27 | this into two types:
 28 | 
 29 | - Well-maintained libraries that are used by many others.
 30 | - A wide variety of public code that might work but isn't necessarily
 31 |   well-maintained (for example, code from articles).
 32 | 
 33 | We'll start with the first then go to the second.
 34 | 
 35 | 
 36 | 
 37 | Glossary
 38 | --------
 39 | 
 40 | Library
 41 |     A collection of code used by a program.
 42 | 
 43 | Package
 44 |     A library that has been made easily installable and reusable.
 45 |     Often published on public repositories such as the `Python Package
 46 |     Index <https://pypi.python.org>`__
 47 | 
 48 | Dependency
 49 |     A requirement of another program, not included in that program.
 50 | 
 51 | 
 52 | 
 53 | The Python/SciPy ecosystem
 54 | --------------------------
 55 | 
 56 | This section is nothing more than a tour of what exists in Python.
 57 | You aren't expected to particularly remember any of these right now,
 58 | but searching for these repositories is a starting point of a lot of
 59 | future work.
 60 | 
 61 | The "core" packages `could be considered
 62 | <https://www.scipy.org/about/>`__.  Many other packages build on
 63 | these, and others that try to do similar things often try to conform
 64 | to their interfaces (especially numpy):
 65 | 
 66 | * Python
 67 | * Numpy - arrays, everything builds on this
 68 | * Scipy - scientific functions (not necessarily a lot builds on this)
 69 | * matplotlib - plotting, many other plotting tools build on this
 70 | * pandas - data structures
 71 | * IPython / Jupyter: interactive work
 72 | 
 73 | 
 74 | Core numerics libraries
 75 | ~~~~~~~~~~~~~~~~~~~~~~~
 76 | 
 77 | * `numpy <https://numpy.org/doc/stable/>`__ - Arrays and array math.
 78 | * `scipy <https://docs.scipy.org/doc/scipy/reference/>`__ - Software
 79 |   for math, science, and engineering.
 80 | 
 81 | 
 82 | Plotting
 83 | ~~~~~~~~
 84 | 
 85 | * `matplotlib <https://matplotlib.org/>`__ - Base plotting package,
 86 |   somewhat low level but almost everything builds on it.
 87 | * `seaborn <https://seaborn.pydata.org/>`__ - Higher level plotting
 88 |   interface; statistical graphics.
 89 | * `Vega-Altair <https://altair-viz.github.io/>`__ - Declarative Python
 90 |   plotting.
 91 | * `mayavi <https://docs.enthought.com/mayavi/mayavi/>`__ - 3D plotting
 92 | * `Plotly <https://plotly.com/python/>`__ - Big graphing library.
 93 | 
 94 | 
 95 | Data analysis and other important core packages
 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 97 | 
 98 | * `pandas <https://pandas.pydata.org/docs/user_guide/>`__ - Columnar
 99 |   data analysi.
100 | * `polars <https://pola.rs/>` - Alternative to pandas that uses similar
101 |   API, but is re-imagined for more speed.
102 | * `Vaex <https://vaex.io/docs/index.html>`__ - Alternative for pandas
103 |   that uses similar API for lazy-loading and processing huge DataFrames.
104 | * `Dask <https://www.dask.org/>`__ - Alternative to Pandas that uses
105 |   similar API and can do analysis in parallel.
106 | * `xarrray <https://docs.xarray.dev/en/stable/>`__ - Framework for
107 |   working with mutli-dimensional arrays.
108 | * `statsmodels <https://www.statsmodels.org/stable/>`__ - Statistical
109 |   models and tests.
110 | * `SymPy <https://www.sympy.org/>`__ - Symbolic math.
111 | * `networkx <https://networkx.org/>`__ - Graph and network analysis.
112 | * `graph-tool <https://graph-tool.skewed.de/>`__ - Graph and network analysis
113 |   toolkit implemented in C++.
114 | 
115 | 
116 | Interactive computing and human interface
117 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
118 | * Interactive computing
119 | 
120 |   * `IPython <https://ipython.org/>`__ - Nicer interactive interpreter
121 |   * `Jupyter <https://jupyter.org/>`__ - Web-based interface to IPython
122 |     and other languages (includes projects such as jupyter notebook,
123 |     lab, hub, ...) 
124 | 
125 | * Testing
126 | 
127 |   * `pytest <https://docs.pytest.org/>`__ - Automated testing interface
128 | 
129 | * Documentation
130 | 
131 |   * `Sphinx <https://www.sphinx-doc.org/>`__ - Documentation generator
132 |     (also used for this lesson...)
133 | 
134 | * Development environments
135 | 
136 |   * `Spyder <https://www.spyder-ide.org/>`__ - Interactive Python
137 |     development environment.
138 |   * `Visual Studio Code <https://code.visualstudio.com/>`__ - Microsoft's
139 |     flagship code editor.
140 |   * `PyCharm <https://www.jetbrains.com/pycharm/>`__ - JetBrains's
141 |     Python IDE.
142 | 
143 | * `Binder <https://mybinder.org/>`__ - load any git repository in
144 |   Jupyter automatically, good for reproducible research
145 | 
146 | 
147 | Data format support and data ingestion
148 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
149 | 
150 | * `pillow <https://python-pillow.org/>`__ - Image manipulation.  The
151 |   original PIL is no longer maintained, the new "Pillow" is a drop-in
152 |   replacement.
153 | * `h5py <https://www.h5py.org/>`__ and `PyTables <https://www.pytables.org/>`__ -
154 |   Interfaces to the `HDF5 <https://en.wikipedia.org/wiki/Hierarchical_Data_Format>`__
155 |   file format.
156 | 
157 | 
158 | Speeding up code and parallelism
159 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
160 | 
161 | * `MPI for Python (mpi4py) <https://mpi4py.readthedocs.io/en/stable/>`__ - Message
162 |   Passing Interface (MPI) in Python for parallelizing jobs.
163 | * `cython <https://cython.org/>`__ - easily make C extensions for
164 |   Python, also interface to C libraries
165 | * `numba <https://numba.pydata.org/>`__ - just in time compiling of
166 |   functions for speed-up
167 | * `PyPy <https://www.pypy.org/>`__ - Python written in Python so that
168 |   it can internally optimize more.
169 | * `Dask <https://www.dask.org/>`__ - Distributed array data structure for
170 |   distributed computation
171 | * `Joblib <https://joblib.readthedocs.io/>`__ - Easy embarrassingly
172 |   parallel computing
173 | * `IPyParallel <https://ipyparallel.readthedocs.io/>`__ - Easy
174 |   parallel task engine.
175 | * `numexpr <https://numexpr.readthedocs.io/>`__ - Fast evaluation of
176 |   array expressions by automatically compiling the arithmetic.
177 | 
178 | 
179 | Machine learning
180 | ~~~~~~~~~~~~~~~~
181 | 
182 | * `nltk <https://www.nltk.org/>`__ - Natural language processing
183 |   toolkit.
184 | * `scikit-learn <https://scikit-learn.org/>`__ - Traditional
185 |   machine learning toolkit.
186 | * `xgboost <https://xgboost.readthedocs.io/en/stable/>`__ - Toolkit for
187 |   gradient boosting algorithms.
188 | 
189 | 
190 | Deep learning
191 | ~~~~~~~~~~~~~
192 | 
193 | * `tensorflow <https://www.tensorflow.org/>`__ - Deep learning
194 |   library by Google.
195 | * `pytorch <https://pytorch.org/>`__ - Currently the most popular
196 |   deep learning library.
197 | * `keras <https://keras.io/>`__ - Simple libary for doing deep learning.
198 | * `huggingface <https://huggingface.co>`__ - Ecosystem for sharing
199 |   and running deep learning models and datasets. Incluses packages
200 |   like ``transformers``, ``datasets``, ``accelerate``, etc.
201 | * `jax <https://jax.readthedocs.io/en/latest/index.html>`__ - Google's
202 |   Python library for running NumPy and automatic differentiation
203 |   on GPUs.
204 | * `flax <https://flax.readthedocs.io/en/latest/>`__ - Neural network
205 |   framework built on Jax.
206 | * `equinox <https://docs.kidger.site/equinox/>`__ - Another neural
207 |   network framework built on Jax.
208 | * `DeepSpeed <https://www.deepspeed.ai/>`__ - Algorithms for running
209 |   massive scale trainings. Included in many of the frameworks.
210 | * `PyTorch Lightning <https://lightning.ai/docs/pytorch/stable/>`__ -
211 |   Framework for creating and training PyTorch models.
212 | * `Tensorboard <https://www.tensorflow.org/tensorboard/>` - Tool
213 |   for visualizing model training on a web page.
214 | 
215 | 
216 | Other packages for special cases
217 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
218 | 
219 | * `dateutil <https://dateutil.readthedocs.io/>`__ and `pytz
220 |   <https://pythonhosted.org/pytz/>`__ - Date arithmetic and handling,
221 |   timezone database and conversion.
222 | 
223 | 
224 | 
225 | 
226 | Connecting Python to other languages
227 | ------------------------------------
228 | 
229 | As we discussed with Scipy, very many of the above packages aren't
230 | written in Python: they are written in some other language and have a
231 | Python interface.  Python is written in C, and thus has great C
232 | interfaces.  This contributes to two things:
233 | 
234 | * **Extending Python** by writing your own modules in C.
235 | 
236 |   * It's actually common to first have (or write) an analysis package
237 |     in C or C++, then make the Python interface.  Then it can be
238 |     supported by other languages, too.
239 | 
240 |   * Or one starts an analysis package in Python, and slowly moves bits
241 |     of it to C over time as there is need.
242 | 
243 | * **Embedding Python**, where you have another primary application
244 |   that uses Python under the hood as an internal scripting language.
245 | 
246 | These features aren't exactly unique to Python, but Python does
247 | support them very well.  Read more: `Extending and embedding Python
248 | <https://docs.python.org/extending/index.html>`__.
249 | 
250 | 
251 | Tools for interfacing with other languages
252 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
253 | 
254 | These days, one rarely directly extends the Python interpreter, but uses
255 | 
256 | * `cffi <https://cffi.readthedocs.io/>`__ and `ctypes
257 |   <https://docs.python.org/3/library/ctypes.html>`__ - interface to C
258 |   and compatible libraries
259 | * `cython <https://cython.org/>`__ - easily make C extensions for
260 |   Python, also interface to C libraries
261 | * `f2py <https://numpy.org/doc/stable/f2py/>`__ - interface to Fortran
262 |   code
263 | * `swig <https://swig.org/>`__ - connect to a variety of programming languages.
264 | * ``Boost.python`` - Another Python/C++ interface
265 | * TODO: Julia modules for Python?
266 | 
267 | 
268 | 
269 | Evaluating Python packages for reuse
270 | ------------------------------------
271 | 
272 | Above, we talked about well-maintained mainstream packages.  **Do you
273 | trust random code you find online (for example included in a paper)?**
274 | 
275 | Especially consider scientific results, which *have* to be correct.
276 | Still, you also *can't* build everything yourself, so you have to
277 | carefully evaluate the situation.
278 | 
279 | Below are some things to consider:
280 | 
281 | * Are there releases?  Have they been going on for a while?
282 | 
283 | * Are releases installable without copy-paste?
284 | 
285 | * Are dependencies handled well?
286 | 
287 | * Does the code randomly change, so that it no longer works with your
288 |   code.  Is this relevant?
289 | 
290 | * Is there good documentation, that not just tells how to use it but
291 |   how it works?
292 | 
293 | * Is there automated testing?  What's your evaluation of the risk of
294 |   undetectable scientific errors?
295 | 
296 | * Is there a community, or is it one person?  Is it backed by some
297 |   organization?  Does it have a permanent home?
298 | 
299 | * Is it is a public hosting site (GitLab, GitHub, Bitbucket, etc)
300 |   where a community *could* form?
301 | 
302 | * Do others post issues and make contributions?  Are these issues
303 |   dealt with in a timely manner?  Can you search past bug reports?
304 | 
305 | * Is the software citeable?
306 | 
307 | 
308 | 
309 | Is your work reuseable?
310 | -----------------------
311 | 
312 | Every small project you do contributes a little bit to the Python and
313 | SciPy ecosystem.  This course has sort of started you on that path,
314 | and a `CodeRefinery workshop <https://coderefinery.org>`__ will make
315 | sure you have the tools to produce high-quality, reusable code.
316 | 
317 | 
318 | 
319 | What's next?
320 | ------------
321 | 
322 | * The `CodeRefinery workshop <https://coderefinery.org>`__ mentioned
323 |   above will prepare you for others to reuse your code and for you to
324 |   contribute to other code.
325 | * The upcoming :doc:`dependencies` lesson will teach you how to
326 |   record and manage dependencies so that anyone can seamlessly reuse
327 |   your code.
328 | 
329 | 
330 | 
331 | Exercises
332 | ---------
333 | 
334 | .. exercise:: Libraries 1.1: Libraries in your work
335 | 
336 |    What libraries do you use in your work?  What have you made, which
337 |    you could have reused from some other source.  What have you used
338 |    from some other source that you wished you had re-created?
339 | 
340 |    Discuss in your groups or HackMD.
341 | 
342 | .. solution:: Libraries 1.1
343 | 
344 |    ... is there anything to say here?
345 | 
346 | 
347 | .. exercise:: Libraries 1.2: Evaluating packages
348 | 
349 |    Below are some links to some packages, both public and made by the
350 |    authors of this lesson.  Evaluate them, considering "would I use
351 |    this in my project?"
352 | 
353 |    a) https://github.com/networkx/networkx/
354 |    b) some code on webpage in a paper's footnote
355 |    c) https://github.com/rkdarst/pcd
356 |    d) https://github.com/dftlibs/numgrid
357 |    e) https://github.com/rkdarst/dynbench
358 |    f) https://vpython.org/
359 | 
360 | .. solution:: Libraries 1.2
361 | 
362 |    a) networkx: This seems to be a relatively large, active project
363 |       using best practices.  Probably usable.
364 |    b) I would probably use it if I had to, but would prefer not to.
365 |    c) This (written by one of the authors of this lesson) has no
366 |       documenting, no community, no best practices, and is very old.
367 |       Probably not a good idea to try to use it
368 |    d) This project uses best practices, but doesn't seem to have a big
369 |       community.  It's probably fine to use, but who knows if it will
370 |       be maintained 10 years from now.  It does have automated tests
371 |       via Github Actions (``.github/workflows`` and the green checks),
372 |       so the authors have put some work into making it correct.
373 |    e) This (also written by one of the authors) looks like it was made
374 |       for a paper of some sort.  It has some minimal documentation,
375 |       but still is missing many best practices and is clearly not
376 |       maintained anymore (look at the ancient pull request).  Probably
377 |       not a good idea to use unless you have to.
378 |    f) This project has a pretty website, and some information.  But
379 |       seems to not be using best practices of an open repository, and
380 |       custom locations which could disappear at any time.
381 | 
382 |    You notice that several of the older projects here were written by
383 |    one of the authors of this lesson.  It goes to show that everyone
384 |    starts somewhere and improves over time - don't feel bad if your
385 |    work isn't perfect, as long as you keep trying to get better!
386 | 
387 | 
388 | 
389 | See also
390 | --------
391 | 
392 | * `Topical Software in the SciPy ecosystem
393 |   <https://new.scipy.org/topical-software.html>`__ - relatively
394 |   detailed (but not comprehensive) list of projects
395 | 
396 | 
397 | .. keypoints::
398 | 
399 |    - Almost everything you need can already be found, except your
400 |      incremental work.
401 |    - When do you build on that other work, and when do you create
402 |      things yourself?
403 | 


--------------------------------------------------------------------------------
/content/ndarray.dot:
--------------------------------------------------------------------------------
 1 | strict digraph ndarray {
 2 |         graph [compound=true];
 3 |         
 4 |         node [style = filled, color=cyan];
 5 | 
 6 |         n [label="Variable n (lvalue)", color=gold];
 7 |         nobj [label="PyObject n"];
 8 |         ndesc [label="ndarray metadata"];
 9 |         
10 |         n -> nobj;
11 |         nobj -> ndesc;
12 | 
13 |         subgraph cluster_n {
14 |                 label = "Data array for n";
15 |                 color = aquamarine;
16 |                 style = filled;
17 |                 node [shape=box];
18 |                 
19 |                 ndata_0 [label="3"];
20 |                 ndata_1 [label="2"];
21 |                 ndata_2 [label="1"];
22 |         }
23 | 
24 | 
25 |         ndesc -> ndata_1 [lhead=cluster_n];
26 |         
27 | }
28 | 


--------------------------------------------------------------------------------
/content/ndarray.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
 3 |  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
 4 | <!-- Generated by graphviz version 2.38.0 (20140413.2041)
 5 |  -->
 6 | <!-- Title: ndarray Pages: 1 -->
 7 | <svg width="238pt" height="279pt"
 8 |  viewBox="0.00 0.00 238.00 279.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
 9 | <g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 275)">
10 | <title>ndarray</title>
11 | <polygon fill="white" stroke="none" points="-4,4 -4,-275 234,-275 234,4 -4,4"/>
12 | <g id="clust1" class="cluster"><title>cluster_n</title>
13 | <polygon fill="aquamarine" stroke="aquamarine" points="8,-8 8,-83 222,-83 222,-8 8,-8"/>
14 | <text text-anchor="middle" x="115" y="-67.8" font-family="Times,serif" font-size="14.00">Data array for n</text>
15 | </g>
16 | <!-- n -->
17 | <g id="node1" class="node"><title>n</title>
18 | <ellipse fill="gold" stroke="gold" cx="115" cy="-253" rx="77.1866" ry="18"/>
19 | <text text-anchor="middle" x="115" y="-249.3" font-family="Times,serif" font-size="14.00">Variable n (lvalue)</text>
20 | </g>
21 | <!-- nobj -->
22 | <g id="node2" class="node"><title>nobj</title>
23 | <ellipse fill="cyan" stroke="cyan" cx="115" cy="-181" rx="50.0912" ry="18"/>
24 | <text text-anchor="middle" x="115" y="-177.3" font-family="Times,serif" font-size="14.00">PyObject n</text>
25 | </g>
26 | <!-- n&#45;&gt;nobj -->
27 | <g id="edge1" class="edge"><title>n&#45;&gt;nobj</title>
28 | <path fill="none" stroke="black" d="M115,-234.697C115,-226.983 115,-217.712 115,-209.112"/>
29 | <polygon fill="black" stroke="black" points="118.5,-209.104 115,-199.104 111.5,-209.104 118.5,-209.104"/>
30 | </g>
31 | <!-- ndesc -->
32 | <g id="node3" class="node"><title>ndesc</title>
33 | <ellipse fill="cyan" stroke="cyan" cx="115" cy="-109" rx="72.2875" ry="18"/>
34 | <text text-anchor="middle" x="115" y="-105.3" font-family="Times,serif" font-size="14.00">ndarray metadata</text>
35 | </g>
36 | <!-- nobj&#45;&gt;ndesc -->
37 | <g id="edge2" class="edge"><title>nobj&#45;&gt;ndesc</title>
38 | <path fill="none" stroke="black" d="M115,-162.697C115,-154.983 115,-145.712 115,-137.112"/>
39 | <polygon fill="black" stroke="black" points="118.5,-137.104 115,-127.104 111.5,-137.104 118.5,-137.104"/>
40 | </g>
41 | <!-- ndata_1 -->
42 | <g id="node5" class="node"><title>ndata_1</title>
43 | <polygon fill="cyan" stroke="cyan" points="142,-52 88,-52 88,-16 142,-16 142,-52"/>
44 | <text text-anchor="middle" x="115" y="-30.3" font-family="Times,serif" font-size="14.00">2</text>
45 | </g>
46 | <!-- ndesc&#45;&gt;ndata_1 -->
47 | <g id="edge3" class="edge"><title>ndesc&#45;&gt;ndata_1</title>
48 | <path fill="none" stroke="black" d="M115,-90.7C115,-90.5431 115,-90.3856 115,-90.2274"/>
49 | <polygon fill="black" stroke="black" points="118.5,-92.7351 115,-82.7352 111.5,-92.7352 118.5,-92.7351"/>
50 | </g>
51 | <!-- ndata_0 -->
52 | <g id="node4" class="node"><title>ndata_0</title>
53 | <polygon fill="cyan" stroke="cyan" points="214,-52 160,-52 160,-16 214,-16 214,-52"/>
54 | <text text-anchor="middle" x="187" y="-30.3" font-family="Times,serif" font-size="14.00">3</text>
55 | </g>
56 | <!-- ndata_2 -->
57 | <g id="node6" class="node"><title>ndata_2</title>
58 | <polygon fill="cyan" stroke="cyan" points="70,-52 16,-52 16,-16 70,-16 70,-52"/>
59 | <text text-anchor="middle" x="43" y="-30.3" font-family="Times,serif" font-size="14.00">1</text>
60 | </g>
61 | </g>
62 | </svg>
63 | 


--------------------------------------------------------------------------------
/content/packaging-example-project/calculator/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Example calculator package.
 3 | """
 4 | 
 5 | from .adding import add
 6 | from .subtracting import subtract
 7 | from .integrating import integral
 8 | 
 9 | __version__ = "0.1.0"
10 | 


--------------------------------------------------------------------------------
/content/packaging-example-project/calculator/adding.py:
--------------------------------------------------------------------------------
1 | def add(x, y):
2 |     return x + y
3 | 


--------------------------------------------------------------------------------
/content/packaging-example-project/calculator/integrating.py:
--------------------------------------------------------------------------------
1 | from scipy import integrate
2 | 
3 | 
4 | def integral(function, lower_limit, upper_limit):
5 |     return integrate.quad(function, lower_limit, upper_limit)
6 | 


--------------------------------------------------------------------------------
/content/packaging-example-project/calculator/subtracting.py:
--------------------------------------------------------------------------------
1 | def subtract(x, y):
2 |     return x - y
3 | 


--------------------------------------------------------------------------------
/content/packaging-example-project/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "calculator-myname"
 7 | description = "A small example package"
 8 | version = "0.1.0"
 9 | readme = "README.md"
10 | authors = [
11 |     { name = "Firstname Lastname", email = "firstname.lastname@example.org" }
12 | ]
13 | dependencies = [
14 |     "scipy"
15 | ]
16 | 


--------------------------------------------------------------------------------
/content/packaging-example-project/test.py:
--------------------------------------------------------------------------------
1 | from calculator import add, subtract, integral
2 | 
3 | print("2 + 3 =", add(2, 3))
4 | print("2 - 3 =", subtract(2, 3))
5 | integral_x_squared, error = integral(lambda x: x * x, 0.0, 1.0)
6 | print(f"{integral_x_squared = }")
7 | 


--------------------------------------------------------------------------------
/content/packaging-example-project/test_editable.py:
--------------------------------------------------------------------------------
1 | from calculator import subtract
2 | 
3 | print("2 - 3 =", subtract(2, 3))
4 | 


--------------------------------------------------------------------------------
/content/packaging.rst:
--------------------------------------------------------------------------------
  1 | Packaging
  2 | =========
  3 | 
  4 | .. questions::
  5 | 
  6 |    - How to organize Python projects larger than one script?
  7 |    - What is a good file and folder structure for Python projects?
  8 |    - How can you make your Python functions most usable by your collaborators?
  9 |    - How to prepare your code to make a Python package?
 10 |    - How to publish your Python package?
 11 | 
 12 | .. objectives::
 13 | 
 14 |    - Learn to identify the components of a Python package
 15 |    - Learn to create a Python package
 16 |    - Learn to publish a Python package
 17 | 
 18 | 
 19 | Organizing Python projects
 20 | --------------------------
 21 | 
 22 | Python projects often start as a single script or Jupyter notebook but
 23 | they can grow out of a single file.
 24 | 
 25 | In the :ref:`scripts` episode we have also learned how to import functions
 26 | and objects from other Python files (modules). Now we will take it a step further.
 27 | 
 28 | **Recommendations**:
 29 | 
 30 | - Collect related functions into modules (files).
 31 | - Collect related modules into packages (we will show how).
 32 | - Add a ``LICENSE`` file to your code from `choosealicense.com <https://choosealicense.com>`__
 33 |   (see `Software Licensing and Open source explained with cakes <https://github.com/coderefinery/social-coding/blob/main/licensing-and-cakes.md>`__).
 34 | - Write a ``README.md`` file describing what the code does and how to use it.
 35 | - It is also recommended to `document your package <https://coderefinery.github.io/documentation/>`__.
 36 | - When the project grows, you might need `automated testing <https://coderefinery.github.io/testing/>`__.
 37 | 
 38 | To have a concrete but still simple example, we will create a project
 39 | consisting of 3 functions, each in its own file. We can then imagine that each
 40 | file would contain many more functions. To make it more interesting,
 41 | one of these functions will depend on an external library: ``scipy``.
 42 | 
 43 | These are the 3 files:
 44 | 
 45 | .. literalinclude:: packaging-example-project/calculator/adding.py
 46 |    :caption: adding.py
 47 | 
 48 | .. literalinclude:: packaging-example-project/calculator/subtracting.py
 49 |    :caption: subtracting.py
 50 | 
 51 | .. literalinclude:: packaging-example-project/calculator/integrating.py
 52 |    :caption: integrating.py
 53 | 
 54 | We will add a fourth file:
 55 | 
 56 | .. literalinclude:: packaging-example-project/calculator/__init__.py
 57 |    :caption: __init__.py
 58 | 
 59 | This ``__init__.py`` file will be the interface of our package/library.
 60 | It also holds the package docstring and the version string.
 61 | Note how it imports functions from the various modules using *relative imports*
 62 | (with the dot).
 63 | 
 64 | This is how we will arrange the files in the project folder/repository:
 65 | 
 66 | .. code-block:: none
 67 |    :emphasize-lines: 3-6
 68 | 
 69 |    project-folder
 70 |    ├── calculator
 71 |    │   ├── adding.py
 72 |    │   ├── __init__.py
 73 |    │   ├── integrating.py
 74 |    │   └── subtracting.py
 75 |    ├── LICENSE
 76 |    └── README.md
 77 | 
 78 | Now we are ready to test the package. For this we need to be in the "root"
 79 | folder, what we have called the *project-folder*.  We also need to have
 80 | ``scipy`` available in our environment:
 81 | 
 82 | .. literalinclude:: packaging-example-project/test.py
 83 | 
 84 | The package is not yet pip-installable, though. We will make this possible in
 85 | the next section.
 86 | 
 87 | 
 88 | Testing a local pip install
 89 | ---------------------------
 90 | 
 91 | To make our example package pip-installable we need to add one more file:
 92 | 
 93 | .. code-block:: none
 94 |    :emphasize-lines: 9
 95 | 
 96 |    project-folder
 97 |    ├── calculator
 98 |    │   ├── adding.py
 99 |    │   ├── __init__.py
100 |    │   ├── integrating.py
101 |    │   └── subtracting.py
102 |    ├── LICENSE
103 |    ├── README.md
104 |    └── pyproject.toml
105 | 
106 | This is how ``pyproject.toml`` looks:
107 | 
108 | .. literalinclude:: packaging-example-project/pyproject.toml
109 |    :caption: pyproject.toml
110 |    :emphasize-lines: 13-15
111 | 
112 | Note how our package requires ``scipy`` and we decided to not pin the version
113 | here (see :ref:`version_pinning`).
114 | 
115 | Now we have all the building blocks to test a local pip install. This is a good
116 | test before trying to upload a package to PyPI or test-PyPI
117 | (see :ref:`pypi`)
118 | 
119 | .. note::
120 | 
121 |    Sometime you need to rely on unreleased, development versions as 
122 |    dependencies and this is also possible. For example, to use the 
123 |    latest ``xarray`` you could add::
124 | 
125 |      dependencies = [
126 |           "scipy",
127 |           "xarray @ https://github.com/pydata/xarray/archive/main.zip"
128 |      ]
129 | 
130 |    .. seealso::
131 |       - `pip requirement specifiers <https://pip.pypa.io/en/stable/reference/requirement-specifiers/>`__
132 |       - pyOpenSci tutorial on
133 |         `pyproject.toml metadata <https://www.pyopensci.org/python-package-guide/tutorials/pyproject-toml.html>`__
134 | 
135 | 
136 | 
137 | Exercise 1
138 | ----------
139 | 
140 | .. challenge:: Packaging-1
141 | 
142 |    To test a local pip install:
143 | 
144 |    - Create a new folder outside of our example project
145 |    - Create a new virtual environment (:ref:`dependency_management`)
146 |    - Install the example package from the project folder
147 |      into the new environment::
148 | 
149 |         pip install --editable /path/to/project-folder/
150 | 
151 |    - Test the local installation:
152 | 
153 |    .. literalinclude:: packaging-example-project/test.py
154 | 
155 |    - Make a change in the ``subtract`` function above such that it always
156 |      returns a float ``return float(x - y)``.
157 | 
158 |    - Open a new Python console and test the following lines. Compare it with
159 |      the previous output.
160 | 
161 |    .. literalinclude:: packaging-example-project/test_editable.py
162 | 
163 | Sharing packages via PyPI
164 | -------------------------
165 | 
166 | .. demo::
167 | 
168 |    Most people will watch and observe this, due to the speed with which we will
169 |    move.
170 | 
171 | Once we are able to pip-install the example package locally, we are ready for
172 | upload.
173 | 
174 | We exercise by uploading to test-PyPI_, not the
175 | real `PyPI <https://pypi.org/>`__, so that if we mess things up, nothing bad
176 | happens.
177 | 
178 | We need two more things:
179 | 
180 | - We will do this using `Twine <https://twine.readthedocs.io/>`__ so you need
181 |   to pip install that, too.
182 | - You need an account on test-PyPI_
183 | 
184 | .. _test-PyPI: https://test.pypi.org/
185 | 
186 | .. highlight:: console
187 | 
188 | Let's try it out. First we create the distribution package::
189 | 
190 |   $ python3 -m build
191 | 
192 | We need twine::
193 | 
194 |   $ pip install twine
195 | 
196 | And use twine to upload the distribution files to test-PyPI::
197 | 
198 |   $ twine upload -r testpypi dist/*
199 | 
200 |   Uploading distributions to https://test.pypi.org/legacy/
201 |   Enter your API token:
202 | 
203 | 
204 | .. _Create API token: https://test.pypi.org/manage/account/token/
205 | 
206 | .. note::
207 | 
208 |    To generate an API token,  proceed to the `Create API token`_ page in test-PyPI.
209 |    You will be prompted for your password.
210 | 
211 |    .. solution:: The long-version for finding the *Create API token* page
212 | 
213 |       1. Log on to test-PyPI_ at https://test.pypi.org
214 |       2. In the top-right corner, click on the drop-down menu and click **Account settings** or
215 |          follow this `link <https://test.pypi.org/manage/account/#api-tokens>`__.
216 |       3. Scroll down to the section **API tokens** and click the button **Add API token**,
217 |          which opens up the
218 |          `Create API token`_ page.
219 | 
220 | 
221 |    #. Under **Token name** write something memorable.
222 |       It should remind you the *purpose*
223 |       or the *name of the computer*, such that when you are done 
224 |       using it, you can safely delete it.
225 |    #. Under **Scope** select ``Entire account (all projects)``.
226 |    #. Click on **Create token**.
227 |    #. Click on **Copy token** once a long string which starts
228 |       with ``pypi-`` is generated.
229 | 
230 |    Paste that token back into the terminal where ``twine upload ...`` is running and press ENTER.
231 | 
232 | Once this is done, create yet another virtual environment and try to install from test-PyPI (adapt ``myname``).
233 | 
234 | .. tabs::
235 | 
236 |    .. tab:: Linux / macOS
237 | 
238 |       .. code-block:: console
239 |          :emphasize-lines: 4-7
240 | 
241 |           $ python3 -m venv venv-calculator
242 |           $ source venv-calculator/bin/activate
243 |           $ which python
244 |           $ python3 -m pip install \
245 |               -i https://test.pypi.org/simple/ \
246 |               --extra-index-url https://pypi.org/simple/ \
247 |               calculator-myname
248 |           $ deactivate
249 | 
250 |    .. tab:: Windows
251 | 
252 |       .. code-block:: console
253 |          :emphasize-lines: 4
254 | 
255 |           $ python3 -m venv venv-calculator
256 |           $ venv-calculator\Scripts\activate
257 |           $ where python
258 |           $ python3 -m pip install -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ calculator-myname
259 |           $ deactivate
260 | 
261 | Tools that simplify sharing via PyPI
262 | ------------------------------------
263 | 
264 | The solution that we have used to create the example package (using
265 | ``setuptools`` and ``twine``) is not the only approach. There are many ways to
266 | achieve this and we avoided going into too many details and comparisons to not
267 | confuse too much. If you web-search this, you will also see that recently the
268 | trend goes towards using ``pyproject.toml`` as more general
269 | alternative to the previous ``setup.py``.
270 | 
271 | There are at least two tools which try to make the packaging and PyPI interaction easier:
272 | 
273 | - `Poetry <https://python-poetry.org/>`__
274 | - `Flit <https://flit.pypa.io/>`__
275 | 
276 | If you upload packages to PyPI or test PyPI often you can create an API token and
277 | `save it in the .pypirc file <https://packaging.python.org/en/latest/specifications/pypirc/#common-configurations>`__.
278 | 
279 | Building a conda package and share it
280 | -------------------------------------
281 | 
282 | 
283 | .. callout:: Prerequisites
284 | 
285 |   To generate a conda build recipe, the package ``grayskull`` and
286 |   to build it, the package ``conda-build`` are required.
287 |   You may install these with **Anaconda Navigator** or from the command line::
288 | 
289 |     $ conda install -n base grayskull conda-build
290 | 
291 | 
292 | The simplest way for creating a conda package for your python script is to
293 | first publish it in `PyPI <https://pypi.org/>`__ following the steps explained
294 | above.
295 | 
296 | 
297 | Building a python package with grayskull and conda-build
298 | ********************************************************
299 | 
300 | Once build, the conda package can be installed locally. For this example, we
301 | will use `runtest <https://pypi.org/project/runtest/>`__.  `runtest
302 | <https://github.com/bast/runtest>`__ is a numerically tolerant end-to-end test
303 | library for research software.
304 | 
305 | 1. Generate the *recipe* by executing (``grayskull`` or ``conda grayskull``)::
306 | 
307 |       $ conda grayskull pypi runtest
308 | 
309 |    The command above will create a new folder called `runtest` containing a file `meta.yaml`,
310 |    the conda recipe for building the `runtest` package.
311 | 
312 | 2. View the contents of `meta.yaml` and ensure requirements :
313 | 
314 |    .. code-block:: yaml
315 | 
316 |       requirements:
317 |         host:
318 |           - python
319 |           - flit-core >=2,<4
320 |           - pip
321 |         run:
322 |           - python
323 | 
324 |    In the requirements above, we specified what is required for the `host <https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#host>`__ and for `running <https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#run>`__  the package.
325 | 
326 |    .. callout:: Remark
327 | 
328 |       For pure python recipes, this is all you need for building a python package with conda.
329 |       If your package needs to be built (for instance compilation), you would need additional files e.g. `build.sh` (to build on Linux/Mac-OSX) and `bld.bat` (to build on Windows systems). You can also add test scripts for testing your package. See `documentation <https://docs.conda.io/projects/conda-build/en/latest/user-guide/tutorials/build-pkgs.html#writing-the-build-script-files-build-sh-and-bld-bat>`__
330 | 
331 | 
332 | 3. Build your package with conda
333 | 
334 |    Your package is now ready to be build with conda::
335 | 
336 |      $ conda build runtest
337 | 
338 | 
339 |    .. callout:: Conda package location
340 | 
341 |       Look at the messages produced while building. The location of the local conda package is given (search for `anaconda upload`):
342 | 
343 |       .. code-block:: none
344 | 
345 |          /home/username/miniforge3/conda-bld/noarch/runtest-2.3.4-py_0.tar.bz2
346 | 
347 |       The prefix ``/home/username/miniforge3/`` may be different on your machine.
348 |       depending on your operating system (Linux, Mac-OSX or Windows). The sub-folder is named ``noarch`` since
349 |       it is a pure-python package and the recipe indicates the same.
350 | 
351 |       If package contained compiled code then the sub-folder would have been named ``win-64`` or ``linux-64``.
352 |       It could then be converted to other platforms using
353 |       `conda convert <https://docs.conda.io/projects/conda-build/en/latest/user-guide/tutorials/build-pkgs.html#converting-a-package-for-use-on-all-platforms>`__.
354 | 
355 | 4. Check within new environment
356 | 
357 |    It is not necessary to create a new conda environment to install it but as explained in previous episode, it is good practice to have isolated environments.
358 | 
359 |    ::
360 | 
361 |       $ conda create -n local-runtest --use-local runtest
362 | 
363 |    We can then check `runtest` has been successfully installed in `local-runtest` conda environment. Open a new Terminal with `local-runtest` environment (either from the command line::
364 | 
365 |      $ conda activate local-runtest
366 | 
367 |    or via **Anaconda Navigator** (Open Terminal), import runtest and
368 |    check its version:
369 | 
370 |    .. code-block:: python
371 | 
372 |      import runtest
373 |      print(runtest.__version__)
374 | 
375 | 
376 | .. callout:: Building a conda package from scratch
377 | 
378 |   It is possible to build a conda package from scratch without using conda grayskull.
379 |   We recommend you to check the
380 |   `conda-build documentation <https://docs.conda.io/projects/conda-build/en/latest/user-guide/tutorials/build-pkgs.html>`__
381 |   for more information.
382 | 
383 | To be able to share and install your local conda package anywhere (on other platforms), you would need to upload it to a `conda channel <https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/channels.html>`__ (see below).
384 | 
385 | 
386 | 
387 | Publishing a python package
388 | ***************************
389 | 
390 | - Upload your package to `conda-forge <https://conda-forge.org/>`__:
391 |   conda-forge is a conda channel: it contains community-led collection of
392 |   recipes, build infrastructure and distributions for the conda package
393 |   manager. Anyone can
394 |   `publish conda packages to conda-forge <https://conda-forge.org/docs/maintainer/adding_pkgs/>`__
395 |   if certain
396 |   `guidelines <https://conda-forge.org/docs/maintainer/guidelines/>`__ are respected.
397 | 
398 | - Upload your package to `bioconda <https://bioconda.github.io/>`_: bioconda is
399 |   a very popular channel for the conda package manager specializing in
400 |   bioinformatics software. As for conda-forge, you need to follow their
401 |   `guidelines <https://bioconda.github.io/contributor/guidelines.html>`__ when
402 |   building conda recipes.
403 | 
404 | You can also `create your own conda channel
405 | <https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/create-custom-channels.html>`__
406 | for publishing your packages.
407 | 
408 | 
409 | .. keypoints::
410 | 
411 |    - It is worth it to organize your code for publishing, even if only
412 |      you are using it.
413 |    - PyPI is a place for Python packages
414 |    - conda is similar but is not limited to Python
415 | 


--------------------------------------------------------------------------------
/content/parallel-pi-multiprocessing.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Python multithreading solution\n",
  8 |     "Here, we will create a simple stochastic calculation of pi, and then parallelize it using multiprocessing (and multithreading to compare)."
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import random"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {},
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "def sample(n):\n",
 27 |     "    \"\"\"Make n trials of points in the square.  Return (n, number_in_circle)\n",
 28 |     "    \n",
 29 |     "    This is our basic function.  By design, it returns everything it\\\n",
 30 |     "    needs to compute the final answer: both n (even though it is an input\n",
 31 |     "    argument) and n_inside_circle.  To compute our final answer, all we\n",
 32 |     "    have to do is sum up the n:s and the n_inside_circle:s and do our\n",
 33 |     "    computation\"\"\"\n",
 34 |     "    n_inside_circle = 0\n",
 35 |     "    for i in range(n):\n",
 36 |     "        x = random.random()\n",
 37 |     "        y = random.random()\n",
 38 |     "        if x**2 + y**2 < 1.0:\n",
 39 |     "            n_inside_circle += 1\n",
 40 |     "    return n, n_inside_circle"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 3,
 46 |    "metadata": {},
 47 |    "outputs": [
 48 |     {
 49 |      "name": "stdout",
 50 |      "output_type": "stream",
 51 |      "text": [
 52 |       "598 ms ± 29.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
 53 |      ]
 54 |     }
 55 |    ],
 56 |    "source": [
 57 |     "%%timeit\n",
 58 |     "# Do it just for timing\n",
 59 |     "n, n_inside_circle = sample(10**6)"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 4,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# Do the actual calculation (the previous result doesn't get saved)\n",
 69 |     "n, n_inside_circle = sample(10**6)"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "markdown",
 74 |    "metadata": {},
 75 |    "source": [
 76 |     "This is the \"calculate answer\" phase."
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 5,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "3.144548"
 88 |       ]
 89 |      },
 90 |      "execution_count": 5,
 91 |      "metadata": {},
 92 |      "output_type": "execute_result"
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "pi = 4.0 * (n_inside_circle / n)\n",
 97 |     "pi"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "## Do it in parallel with multiprocessing\n",
105 |     "This divides the calculation into 10 tasks and runs `sample` on each of them.  Then it re-combines the results."
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 6,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "import multiprocessing.pool\n",
115 |     "pool = multiprocessing.pool.Pool()\n",
116 |     "# The default pool makes one process per CPU"
117 |    ]
118 |   },
119 |   {
120 |    "cell_type": "code",
121 |    "execution_count": 7,
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "320 ms ± 38.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
129 |      ]
130 |     }
131 |    ],
132 |    "source": [
133 |     "%%timeit\n",
134 |     "# Do it once to time it\n",
135 |     "results = pool.map(sample, [10**5] * 10)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 8,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "# Do it again to get the results, since the results of the above\n",
145 |     "# cell aren't accessible because of the %%timeit magic.\n",
146 |     "results = pool.map(sample, [10**5] * 10)"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 9,
152 |    "metadata": {},
153 |    "outputs": [],
154 |    "source": [
155 |     "pool.close()"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 10,
161 |    "metadata": {},
162 |    "outputs": [
163 |     {
164 |      "data": {
165 |       "text/plain": [
166 |        "3.140768"
167 |       ]
168 |      },
169 |      "execution_count": 10,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "source": [
175 |     "n_sum = sum(x[0] for x in results)\n",
176 |     "n_inside_circle_sum = sum(x[1] for x in results)\n",
177 |     "pi = 4.0 * (n_inside_circle_sum / n_sum)\n",
178 |     "pi"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "## Do it in \"parallel\" with threads\n",
186 |     "To compare.  This should not be any faster, because the multiple Python functions can not run at the same time in the same process."
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "code",
191 |    "execution_count": 11,
192 |    "metadata": {},
193 |    "outputs": [],
194 |    "source": [
195 |     "threadpool = multiprocessing.pool.ThreadPool()"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": 12,
201 |    "metadata": {},
202 |    "outputs": [
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "635 ms ± 28.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
208 |      ]
209 |     },
210 |     {
211 |      "data": {
212 |       "text/plain": [
213 |        "<TimeitResult : 635 ms ± 28.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)>"
214 |       ]
215 |      },
216 |      "execution_count": 12,
217 |      "metadata": {},
218 |      "output_type": "execute_result"
219 |     }
220 |    ],
221 |    "source": [
222 |     "%%timeit -o\n",
223 |     "# Do it once to time it\n",
224 |     "threadpool.map(sample, [10**5] * 10)"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 13,
230 |    "metadata": {},
231 |    "outputs": [],
232 |    "source": [
233 |     "# Do it again to get the results, since the results of the above\n",
234 |     "# cell aren't accessible because of the %%timeit magic.\n",
235 |     "results = threadpool.map(sample, [10**5] * 10)"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": 14,
241 |    "metadata": {},
242 |    "outputs": [],
243 |    "source": [
244 |     "threadpool.close()"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 15,
250 |    "metadata": {},
251 |    "outputs": [
252 |     {
253 |      "data": {
254 |       "text/plain": [
255 |        "3.142388"
256 |       ]
257 |      },
258 |      "execution_count": 15,
259 |      "metadata": {},
260 |      "output_type": "execute_result"
261 |     }
262 |    ],
263 |    "source": [
264 |     "n_sum = sum(x[0] for x in results)\n",
265 |     "n_inside_circle_sum = sum(x[1] for x in results)\n",
266 |     "pi = 4.0 * (n_inside_circle_sum / n_sum)\n",
267 |     "pi"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "## Future ideas\n",
275 |     "\n",
276 |     "You could make a separate `calculate` function that take a list of results and returns pi.  This can be used regardless of if it is done with multiprocessing or without.\n",
277 |     "\n",
278 |     "Notice the similarity to [split-apply-combine](https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html) or [map-reduce](https://en.wikipedia.org/wiki/MapReduce) which is a specialization of split-apply-combine."
279 |    ]
280 |   }
281 |  ],
282 |  "metadata": {
283 |   "kernelspec": {
284 |    "display_name": "Python 3",
285 |    "language": "python",
286 |    "name": "python3"
287 |   },
288 |   "language_info": {
289 |    "codemirror_mode": {
290 |     "name": "ipython",
291 |     "version": 3
292 |    },
293 |    "file_extension": ".py",
294 |    "mimetype": "text/x-python",
295 |    "name": "python",
296 |    "nbconvert_exporter": "python",
297 |    "pygments_lexer": "ipython3",
298 |    "version": "3.8.5"
299 |   }
300 |  },
301 |  "nbformat": 4,
302 |  "nbformat_minor": 4
303 | }
304 | 


--------------------------------------------------------------------------------
/content/plotting-matplotlib/customizing/gapminder-larger-font.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/plotting-matplotlib/customizing/gapminder-larger-font.png


--------------------------------------------------------------------------------
/content/plotting-matplotlib/customizing/gapminder-linear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/plotting-matplotlib/customizing/gapminder-linear.png


--------------------------------------------------------------------------------
/content/plotting-matplotlib/customizing/gapminder-log.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/plotting-matplotlib/customizing/gapminder-log.png


--------------------------------------------------------------------------------
/content/plotting-matplotlib/first-plot/exercise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/plotting-matplotlib/first-plot/exercise.png


--------------------------------------------------------------------------------
/content/plotting-matplotlib/first-plot/getting-started.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/plotting-matplotlib/first-plot/getting-started.png


--------------------------------------------------------------------------------
/content/plotting-vega-altair/temperature-ranges-combined.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" class="marks" width="464" height="347" viewBox="0 0 464 347"><rect width="464" height="347" fill="white"/><g fill="none" stroke-miterlimit="10" transform="translate(44,10)"><g class="mark-group role-frame root" role="graphics-object" aria-roledescription="group mark container"><g transform="translate(0,0)"><path class="background" aria-hidden="true" d="M0.5,0.5h300v300h-300Z" stroke="#ddd"/><g><g class="mark-group role-axis" aria-hidden="true"><g transform="translate(0.5,300.5)"><path class="background" aria-hidden="true" d="M0,0h0v0h0Z" pointer-events="none"/><g><g class="mark-rule role-axis-grid" pointer-events="none"><line transform="translate(0,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(26,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(50,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(76,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(101,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(124,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(150,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(174,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(200,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(224,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(250,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(275,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(300,-300)" x2="0" y2="300" stroke="#ddd" stroke-width="1" opacity="1"/></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g></g><g class="mark-group role-axis" aria-hidden="true"><g transform="translate(0.5,0.5)"><path class="background" aria-hidden="true" d="M0,0h0v0h0Z" pointer-events="none"/><g><g class="mark-rule role-axis-grid" pointer-events="none"><line transform="translate(0,300)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,273)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,245)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,218)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,191)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,164)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,136)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,109)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,82)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,55)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,27)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/><line transform="translate(0,0)" x2="300" y2="0" stroke="#ddd" stroke-width="1" opacity="1"/></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g></g><g class="mark-group role-axis" role="graphics-symbol" aria-roledescription="axis" aria-label="X-axis titled 'date (year-month)' for a time scale with values from October 2022 to October 2023"><g transform="translate(0.5,300.5)"><path class="background" aria-hidden="true" d="M0,0h0v0h0Z" pointer-events="none"/><g><g class="mark-rule role-axis-tick" pointer-events="none"><line transform="translate(0,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(26,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(50,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(76,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(101,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(124,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(150,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(174,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(200,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(224,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(250,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(275,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(300,0)" x2="0" y2="5" stroke="#888" stroke-width="1" opacity="1"/></g><g class="mark-text role-axis-label" pointer-events="none"><text text-anchor="start" transform="translate(0,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">Oct 2022</text><text text-anchor="middle" transform="translate(25.513698630136986,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Nov 2022</text><text text-anchor="middle" transform="translate(50.17123287671233,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Dec 2022</text><text text-anchor="middle" transform="translate(75.65068493150686,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Jan 2023</text><text text-anchor="middle" transform="translate(101.13013698630137,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">Feb 2023</text><text text-anchor="middle" transform="translate(124.14383561643837,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Mar 2023</text><text text-anchor="middle" transform="translate(149.58904109589042,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Apr 2023</text><text text-anchor="middle" transform="translate(174.24657534246575,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">May 2023</text><text text-anchor="middle" transform="translate(199.72602739726028,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">Jun 2023</text><text text-anchor="middle" transform="translate(224.38356164383563,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Jul 2023</text><text text-anchor="middle" transform="translate(249.86301369863014,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Aug 2023</text><text text-anchor="middle" transform="translate(275.3424657534247,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="0">Sep 2023</text><text text-anchor="end" transform="translate(300,15)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">Oct 2023</text></g><g class="mark-rule role-axis-domain" pointer-events="none"><line transform="translate(0,0)" x2="300" y2="0" stroke="#888" stroke-width="1" opacity="1"/></g><g class="mark-text role-axis-title" pointer-events="none"><text text-anchor="middle" transform="translate(150,30)" font-family="sans-serif" font-size="11px" font-weight="bold" fill="#000" opacity="1">date (year-month)</text></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g></g><g class="mark-group role-axis" role="graphics-symbol" aria-roledescription="axis" aria-label="Y-axis titled 'max temperature, min temperature' for a linear scale with values from −20 to 35"><g transform="translate(0.5,0.5)"><path class="background" aria-hidden="true" d="M0,0h0v0h0Z" pointer-events="none"/><g><g class="mark-rule role-axis-tick" pointer-events="none"><line transform="translate(0,300)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,273)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,245)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,218)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,191)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,164)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,136)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,109)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,82)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,55)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,27)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/><line transform="translate(0,0)" x2="-5" y2="0" stroke="#888" stroke-width="1" opacity="1"/></g><g class="mark-text role-axis-label" pointer-events="none"><text text-anchor="end" transform="translate(-7,303)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">−20</text><text text-anchor="end" transform="translate(-7,275.7272727272727)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">−15</text><text text-anchor="end" transform="translate(-7,248.45454545454544)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">−10</text><text text-anchor="end" transform="translate(-7,221.1818181818182)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">−5</text><text text-anchor="end" transform="translate(-7,193.9090909090909)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">0</text><text text-anchor="end" transform="translate(-7,166.63636363636363)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">5</text><text text-anchor="end" transform="translate(-7,139.36363636363637)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">10</text><text text-anchor="end" transform="translate(-7,112.0909090909091)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">15</text><text text-anchor="end" transform="translate(-7,84.81818181818181)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">20</text><text text-anchor="end" transform="translate(-7,57.54545454545453)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">25</text><text text-anchor="end" transform="translate(-7,30.27272727272728)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">30</text><text text-anchor="end" transform="translate(-7,3)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">35</text></g><g class="mark-rule role-axis-domain" pointer-events="none"><line transform="translate(0,300)" x2="0" y2="-300" stroke="#888" stroke-width="1" opacity="1"/></g><g class="mark-text role-axis-title" pointer-events="none"><text text-anchor="middle" transform="translate(-27.962890625,150) rotate(-90) translate(0,-2)" font-family="sans-serif" font-size="11px" font-weight="bold" fill="#000" opacity="1">max temperature, min temperature</text></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g></g><g class="mark-group role-scope pathgroup" role="graphics-object" aria-roledescription="group mark container"><g transform="translate(0,0)"><path class="background" aria-hidden="true" d="M0,0h300v300h-300Z"/><g><g class="mark-area role-mark marks" role="graphics-object" aria-roledescription="area mark container"><path aria-label="date (year-month): Oct 2022; max temperature: 10.7; min temperature: −4.2; name: Tromso - Langnes" role="graphics-symbol" aria-roledescription="area mark" d="M0,132.545L25.514,144.545L50.171,160.364L75.651,148.909L101.13,154.909L124.144,166.364L149.589,137.455L174.247,94.364L199.726,52.364L224.384,45.273L249.863,54L275.342,85.636L300,137.455L300,194.182L275.342,189.273L249.863,161.455L224.384,158.182L199.726,193.091L174.247,216L149.589,229.636L124.144,273.273L101.13,249.273L75.651,266.727L50.171,254.727L25.514,229.091L0,213.818Z" fill="#f58518" opacity="0.5"/></g></g><path class="foreground" aria-hidden="true" d="" display="none"/></g><g transform="translate(0,0)"><path class="background" aria-hidden="true" d="M0,0h300v300h-300Z"/><g><g class="mark-area role-mark marks" role="graphics-object" aria-roledescription="area mark container"><path aria-label="date (year-month): Oct 2022; max temperature: 17.1; min temperature: −0.4; name: Oslo - Blindern" role="graphics-symbol" aria-roledescription="area mark" d="M0,97.636L25.514,108.545L50.171,155.455L75.651,151.636L101.13,135.273L124.144,137.455L149.589,82.909L174.247,58.909L199.726,17.455L224.384,36L249.863,57.273L275.342,54L300,97.636L300,194.727L275.342,162L249.863,137.455L224.384,144L199.726,165.818L174.247,195.273L149.589,216.545L124.144,259.636L101.13,242.182L75.651,264L50.171,270.545L25.514,202.364L0,193.091Z" fill="#4c78a8" opacity="0.5"/></g></g><path class="foreground" aria-hidden="true" d="" display="none"/></g></g><g class="mark-group role-legend" role="graphics-symbol" aria-roledescription="legend" aria-label="Symbol legend titled 'name' for fill color with 2 values: Oslo - Blindern, Tromso - Langnes"><g transform="translate(318,0)"><path class="background" aria-hidden="true" d="M0,0h97v40h-97Z" pointer-events="none"/><g><g class="mark-group role-legend-entry"><g transform="translate(0,16)"><path class="background" aria-hidden="true" d="M0,0h0v0h0Z" pointer-events="none"/><g><g class="mark-group role-scope" role="graphics-object" aria-roledescription="group mark container"><g transform="translate(0,0)"><path class="background" aria-hidden="true" d="M0,0h96.7763671875v11h-96.7763671875Z" pointer-events="none" opacity="1"/><g><g class="mark-symbol role-legend-symbol" pointer-events="none"><path transform="translate(6,6)" d="M5,0A5,5,0,1,1,-5,0A5,5,0,1,1,5,0" fill="#4c78a8" stroke-width="1.5" opacity="0.5"/></g><g class="mark-text role-legend-label" pointer-events="none"><text text-anchor="start" transform="translate(16,9)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">Oslo - Blindern</text></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g><g transform="translate(0,13)"><path class="background" aria-hidden="true" d="M0,0h96.7763671875v11h-96.7763671875Z" pointer-events="none" opacity="1"/><g><g class="mark-symbol role-legend-symbol" pointer-events="none"><path transform="translate(6,6)" d="M5,0A5,5,0,1,1,-5,0A5,5,0,1,1,5,0" fill="#f58518" stroke-width="1.5" opacity="0.5"/></g><g class="mark-text role-legend-label" pointer-events="none"><text text-anchor="start" transform="translate(16,9)" font-family="sans-serif" font-size="10px" fill="#000" opacity="1">Tromso - Langnes</text></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g></g><g class="mark-text role-legend-title" pointer-events="none"><text text-anchor="start" transform="translate(0,9)" font-family="sans-serif" font-size="11px" font-weight="bold" fill="#000" opacity="1">name</text></g></g><path class="foreground" aria-hidden="true" d="" pointer-events="none" display="none"/></g></g></g><path class="foreground" aria-hidden="true" d="" display="none"/></g></g></g></svg>


--------------------------------------------------------------------------------
/content/productivity.md:
--------------------------------------------------------------------------------
  1 | # Productivity tools
  2 | 
  3 | :::{objectives}
  4 | - Know about tools that can help you **spot code problems** and help you following
  5 |   a **consistent code style** without you having to do it manually.
  6 | - Get an overview of **AI-based tools** and how they can help you
  7 |   writing code.
  8 | :::
  9 | 
 10 | :::{instructor-note}
 11 | - Demo/discussion: 20 min
 12 | :::
 13 | 
 14 | 
 15 | ## Linters and formatters
 16 | 
 17 | **Linter**: Tool that analyzes source code to detect potential errors, unused
 18 | imports, unused variables, code style violations, and to improve readability.
 19 | - Popular linters:
 20 |   - [Autoflake](https://pypi.org/project/autoflake/)
 21 |   - [Flake8](https://flake8.pycqa.org/)
 22 |   - [Pyflakes](https://pypi.org/project/pyflakes/)
 23 |   - [Pycodestyle](https://pycodestyle.pycqa.org/)
 24 |   - [Pylint](https://pylint.readthedocs.io/)
 25 |   - [Ruff](https://docs.astral.sh/ruff/)
 26 | 
 27 | **Formatter**: Tool that automatically formats your code to a consistent style,
 28 | for instance following [PEP 8](https://peps.python.org/pep-0008/).
 29 | 
 30 | - Popular formatters:
 31 |   - [Black](https://black.readthedocs.io/)
 32 |   - [YAPF](https://github.com/google/yapf)
 33 |   - [Ruff](https://docs.astral.sh/ruff/)
 34 | 
 35 | In this course we will focus on [Ruff](https://docs.astral.sh/ruff/) since it
 36 | can do **both checking and formatting** and you don't have to switch between
 37 | multiple tools.
 38 | 
 39 | :::{discussion} Linters and formatters can be configured to your liking
 40 | These tools typically have good defaults. But if you don't like the defaults,
 41 | you can configure what they should ignore or how they should format or not format.
 42 | :::
 43 | 
 44 | 
 45 | ## Examples
 46 | 
 47 | This code example (which we possibly recognize from the previous section about
 48 | {ref}`profiling`)
 49 | has few problems (highlighted):
 50 | ```{code-block} python
 51 | ---
 52 | emphasize-lines: 2, 7, 10
 53 | ---
 54 | import re
 55 | import requests
 56 | 
 57 | 
 58 | def count_unique_words(file_path: str) -> int:
 59 |     unique_words = set()
 60 |     forgotten_variable = 13
 61 |     with open(file_path, "r", encoding="utf-8") as file:
 62 |         for line in file:
 63 |             words = re.findall(r"\b\w+\b", line.lower()))
 64 |             for word in words:
 65 |                 unique_words.add(word)
 66 |     return len(unique_words)
 67 | ```
 68 | 
 69 | Please try whether you can locate these problems using Ruff:
 70 | ```console
 71 | $ ruff check
 72 | ```
 73 | 
 74 | Next, let us try to auto-format a code example which is badly formatted and also difficult
 75 | to read:
 76 | :::::{tabs}
 77 |   ::::{tab} Badly formatted
 78 |   ```python
 79 |   import re
 80 |   def  count_unique_words (file_path : str)->int:
 81 |     unique_words=set()
 82 |     with open(file_path,"r",encoding="utf-8") as file:
 83 |       for line in file:
 84 |         words=re.findall(r"\b\w+\b",line.lower())
 85 |         for word in words:
 86 |           unique_words.add(word)
 87 |     return len(   unique_words   )
 88 |   ```
 89 |   ::::
 90 | 
 91 |   ::::{tab} Auto-formatted
 92 |   ```python
 93 |   import re
 94 |   
 95 |   
 96 |   def count_unique_words(file_path: str) -> int:
 97 |       unique_words = set()
 98 |       with open(file_path, "r", encoding="utf-8") as file:
 99 |           for line in file:
100 |               words = re.findall(r"\b\w+\b", line.lower())
101 |               for word in words:
102 |                   unique_words.add(word)
103 |       return len(unique_words)
104 |   ```
105 | 
106 |   This was done using:
107 |   ```console
108 |   $ ruff format
109 |   ```
110 |   ::::
111 | :::::
112 | 
113 | 
114 | ## Type checking
115 | 
116 | A (static) type checker is a tool that checks whether the types of variables in your
117 | code match the types that you have specified.
118 | - Tools:
119 |   - [Mypy](https://mypy.readthedocs.io/)
120 |   - [Pyright](https://github.com/microsoft/pyright) (Microsoft)
121 |   - [Pyre](https://pyre-check.org/) (Meta)
122 | 
123 | 
124 | ## Integration with editors
125 | 
126 | Many/most of the above tools can be integrated with your editor.  For instance,
127 | you can configure your editor to automatically format your code when you save
128 | the file. However, this only makes sense when all team members agree to follow
129 | the same style, otherwise saving and possibly committing changes to version
130 | control will show up changes to code written by others which you possibly
131 | didn't intend to make.
132 | 
133 | 
134 | ## Integration with Jupyter notebooks
135 | 
136 | It is possible to automatically format your code in Jupyter notebooks!
137 | For this to work you need
138 | the following three dependencies installed:
139 | - `jupyterlab-code-formatter`
140 | - `black`
141 | - `isort`
142 | 
143 | More information and a screen-cast of how this works can be found at
144 | <https://jupyterlab-code-formatter.readthedocs.io/>.
145 | 
146 | 
147 | ## Integration with version control
148 | 
149 | If you use version control and like to have your code checked or formatted
150 | **before you commit the change**, you can use tools like [pre-commit](https://pre-commit.com/).
151 | 
152 | 
153 | ## AI-assisted coding
154 | 
155 | We can use AI as an assistant/apprentice:
156 | - Code completion
157 | - Write a test based on an implementation
158 | - Write an implementation based on a test
159 | 
160 | Or we can use AI as a mentor:
161 | - Explain a concept
162 | - Improve code
163 | - Show a different (possibly better) way of implementing the same thing
164 | 
165 | 
166 | :::{figure} productivity/chatgpt.png
167 | :alt: Screenshot of ChatGPT
168 | :width: 100%
169 | 
170 | Example for using a chat-based AI tool.
171 | :::
172 | 
173 | :::{figure} productivity/code-completion.gif
174 | :alt: Screen-cast of working with GitHub Copilot
175 | :width: 100%
176 | 
177 | Example for using AI to complete code in an editor.
178 | :::
179 | 
180 | :::{admonition} AI tools open up a box of questions
181 | - Legal
182 | - Ethical
183 | - Privacy
184 | - Lock-in/ monopolies
185 | - Lack of diversity
186 | - Will we still need to learn programming?
187 | - How will it affect learning and teaching programming?
188 | :::
189 | 


--------------------------------------------------------------------------------
/content/productivity/chatgpt.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/productivity/chatgpt.png


--------------------------------------------------------------------------------
/content/productivity/code-completion.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/productivity/code-completion.gif


--------------------------------------------------------------------------------
/content/profiling.md:
--------------------------------------------------------------------------------
  1 | # Profiling
  2 | 
  3 | :::{objectives}
  4 | - Understand when improving code performance is worth the time and effort.
  5 | - Knowing how to find performance bottlenecks in Python code.
  6 | - Try `scalene` as one of many tools to profile Python code.
  7 | :::
  8 | 
  9 | :::{instructor-note}
 10 | - Discussion: 20 min
 11 | - Exercise: 20 min
 12 | :::
 13 | 
 14 | 
 15 | ## Should we even optimize the code?
 16 | 
 17 | Classic quote to keep in mind: "Premature optimization is the root of all evil." [Donald Knuth]
 18 | 
 19 | :::{discussion}
 20 | It is important to ask ourselves whether it is worth it.
 21 | - Is it worth spending e.g. 2 days to make a program run 20% faster?
 22 | - Is it worth optimizing the code so that it spends 90% less memory?
 23 | 
 24 | Depends. What does it depend on?
 25 | :::
 26 | 
 27 | 
 28 | ## Measure instead of guessing
 29 | 
 30 | Before doing code surgery to optimize the run time or lower the memory usage,
 31 | we should **measure** where the bottlenecks are. This is called **profiling**.
 32 | 
 33 | Analogy: Medical doctors don't start surgery based on guessing. They first measure
 34 | (X-ray, MRI, ...) to know precisely where the problem is.
 35 | 
 36 | Not only programming beginners can otherwise guess wrong, but also experienced
 37 | programmers can be surprised by the results of profiling.
 38 | 
 39 | 
 40 | ## One of the simplest tools is to insert timers
 41 | 
 42 | Below we will list some tools that can be used to profile Python code.
 43 | But even without these tools you can find **time-consuming parts** of your code
 44 | by inserting timers:
 45 | 
 46 | 
 47 | 
 48 | ```{code-block} python
 49 | ---
 50 | emphasize-lines: 1,8,10
 51 | ---
 52 | import time
 53 | 
 54 | 
 55 | # ...
 56 | # code before the function
 57 | 
 58 | 
 59 | start = time.time()
 60 | result = some_function()
 61 | print(f"some_function took {time.time() - start} seconds")
 62 | 
 63 | 
 64 | # code after the function
 65 | # ...
 66 | ```
 67 | 
 68 | 
 69 | ## Many tools exist
 70 | 
 71 | The list below here is probably not complete, but it gives an overview of the
 72 | different tools available for profiling Python code.
 73 | 
 74 | CPU profilers:
 75 | - [cProfile and profile](https://docs.python.org/3/library/profile.html)
 76 | - [line_profiler](https://kernprof.readthedocs.io/)
 77 | - [py-spy](https://github.com/benfred/py-spy)
 78 | - [Yappi](https://github.com/sumerc/yappi)
 79 | - [pyinstrument](https://pyinstrument.readthedocs.io/)
 80 | - [Perfetto](https://perfetto.dev/docs/analysis/trace-processor-python)
 81 | 
 82 | Memory profilers:
 83 | - [memory_profiler](https://pypi.org/project/memory-profiler/) (not actively maintained)
 84 | - [Pympler](https://pympler.readthedocs.io/)
 85 | - [tracemalloc](https://docs.python.org/3/library/tracemalloc.html)
 86 | - [guppy/heapy](https://github.com/zhuyifei1999/guppy3/)
 87 | 
 88 | Both CPU and memory:
 89 | - [Scalene](https://github.com/plasma-umass/scalene)
 90 | 
 91 | In the exercise below, we will use Scalene to profile a Python program. Scalene
 92 | is a sampling profiler that can profile CPU, memory, and GPU usage of Python.
 93 | 
 94 | 
 95 | ## Tracing profilers vs. sampling profilers
 96 | 
 97 | **Tracing profilers** record every function call and event in the program,
 98 | logging the exact sequence and duration of events.
 99 | - **Pros:**
100 |   - Provides detailed information on the program's execution.
101 |   - Deterministic: Captures exact call sequences and timings.
102 | - **Cons:**
103 |   - Higher overhead, slowing down the program.
104 |   - Can generate larger amount of data.
105 | 
106 | **Sampling profilers** periodically samples the program's state (where it is
107 | and how much memory is used), providing a statistical view of where time is
108 | spent.
109 | - **Pros:**
110 |   - Lower overhead, as it doesn't track every event.
111 |   - Scales better with larger programs.
112 | - **Cons:**
113 |   - Less precise, potentially missing infrequent or short calls.
114 |   - Provides an approximation rather than exact timing.
115 | 
116 | :::{discussion} Analogy: Imagine we want to optimize the London Underground (subway) system
117 | We wish to detect bottlenecks in the system to improve the service and for this we have
118 | asked few passengers to help us by tracking their journey.
119 | - **Tracing**: We follow every train and passenger, recording every stop
120 |   and delay. When passengers enter and exit the train, we record the exact time
121 |   and location.
122 | - **Sampling**: Every 5 minutes the phone notifies the passenger to note
123 |   down their current location. We then use this information to estimate
124 |   the most crowded stations and trains.
125 | :::
126 | 
127 | 
128 | ## Choosing the right system size
129 | 
130 | Sometimes we can configure the system size (for instance the time step in a simulation
131 | or the number of time steps or the matrix dimensions) to make the program finish sooner.
132 | 
133 | For profiling, we should choose a system size that is **representative of the real-world**
134 | use case. If we profile a program with a small input size, we might not see the same
135 | bottlenecks as when running the program with a larger input size.
136 | 
137 | Often, when we scale up the system size, or scale the number of processors, new bottlenecks
138 | might appear which we didn't see before. This brings us back to: "measure instead of guessing".
139 | 
140 | 
141 | ## Exercises
142 | 
143 | ::::{exercise} Exercise: Practicing profiling
144 | In this exercise we will use the Scalene profiler to find out where most of the time is spent
145 | and most of the memory is used in a given code example.
146 | 
147 | Please try to go through the exercise in the following steps:
148 | 1. Make sure `scalene` is installed in your environment (if you have followed
149 |    this course from the start and installed the recommended software
150 |    environment, then it is).
151 | 1. Download Leo Tolstoy's "War and Peace" from the following link (the text is
152 |    provided by [Project Gutenberg](https://www.gutenberg.org/)):
153 |    <https://www.gutenberg.org/cache/epub/2600/pg2600.txt>
154 |    (right-click and "save as" to download the file and **save it as "book.txt"**).
155 | 1. **Before** you run the profiler, try to predict in which function the code
156 |    (the example code is below)
157 |    will spend most of the time and in which function it will use most of the
158 |    memory.
159 | 1. Save the example code as `example.py` and
160 |    run the `scalene` profiler on the following code example and browse the
161 |    generated HTML report to find out where most of the time is spent and where
162 |    most of the memory is used:
163 |    ```console
164 |    $ scalene example.py
165 |    ```
166 |    Alternatively you can do this (and then open the generated file in a browser):
167 |    ```console
168 |    $ scalene example.py --html > profile.html
169 |    ```
170 |    You can find an example of the generated HTML report in the solution below.
171 | 1. Does the result match your prediction? Can you explain the results?
172 | 
173 | Example code (`example.py`):
174 | :::{literalinclude} profiling/exercise.py
175 | :::
176 | 
177 | :::{solution}
178 |   ```{figure} profiling/exercise.png
179 |   :alt: Result of the profiling run for the above code example.
180 |   :width: 100%
181 | 
182 |   Result of the profiling run for the above code example. You can click on the image to make it larger.
183 |   ```
184 | 
185 |   Results:
186 |   - Most time is spent in the `count_unique_words2` function.
187 |   - Most memory is used in the `count_unique_words1` function.
188 | 
189 |   Explanation:
190 |   - The `count_unique_words2` function is the slowest because it **uses a list**
191 |     to store unique words and checks if a word is already in the list before
192 |     adding it.
193 |     Checking whether a list contains an element might require traversing the
194 |     whole list, which is an O(n) operation. As the list grows in size,
195 |     the lookup time increases with the size of the list.
196 |   - The `count_unique_words1` and `count_unique_words3` functions are faster
197 |     because they **use a set** to store unique words.
198 |     Checking whether a set contains an element is an O(1) operation.
199 |   - The `count_unique_words1` function uses the most memory because it **creates
200 |     a list of all words** in the text file and then **creates a set** from that
201 |     list.
202 |   - The `count_unique_words3` function uses less memory because it traverses
203 |     the text file line by line instead of reading the whole file into memory.
204 | 
205 |   What we can learn from this exercise:
206 |   - When processing large files, it can be good to read them line by line
207 |     or in batches
208 |     instead of reading the whole file into memory.
209 |   - It is good to get an overview over standard data structures and their
210 |     advantages and disadvantages (e.g. adding an element to a list is fast but checking whether
211 |     it already contains the element can be slow).
212 |   :::
213 | ::::
214 | 
215 | 
216 | ## Additional resources
217 | 
218 | - [Python performance workshop (by ENCCS)](https://enccs.github.io/python-perf/profile/)
219 | 


--------------------------------------------------------------------------------
/content/profiling/exercise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/content/profiling/exercise.png


--------------------------------------------------------------------------------
/content/profiling/exercise.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The code below reads a text file and counts the number of unique words in it
 3 | (case-insensitive).
 4 | """
 5 | import re
 6 | 
 7 | 
 8 | def count_unique_words1(file_path: str) -> int:
 9 |     with open(file_path, "r", encoding="utf-8") as file:
10 |         text = file.read()
11 |     words = re.findall(r"\b\w+\b", text.lower())
12 |     return len(set(words))
13 | 
14 | 
15 | def count_unique_words2(file_path: str) -> int:
16 |     unique_words = []
17 |     with open(file_path, "r", encoding="utf-8") as file:
18 |         for line in file:
19 |             words = re.findall(r"\b\w+\b", line.lower())
20 |             for word in words:
21 |                 if word not in unique_words:
22 |                     unique_words.append(word)
23 |     return len(unique_words)
24 | 
25 | 
26 | def count_unique_words3(file_path: str) -> int:
27 |     unique_words = set()
28 |     with open(file_path, "r", encoding="utf-8") as file:
29 |         for line in file:
30 |             words = re.findall(r"\b\w+\b", line.lower())
31 |             for word in words:
32 |                 unique_words.add(word)
33 |     return len(unique_words)
34 | 
35 | 
36 | def main():
37 |     # book.txt is downloaded from https://www.gutenberg.org/cache/epub/2600/pg2600.txt
38 |     _result = count_unique_words1("book.txt")
39 |     _result = count_unique_words2("book.txt")
40 |     _result = count_unique_words3("book.txt")
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     main()
45 | 


--------------------------------------------------------------------------------
/content/python.rst:
--------------------------------------------------------------------------------
  1 | Introduction to Python
  2 | ======================
  3 | 
  4 | .. questions::
  5 | 
  6 |    - What are the basic blocks of Python language?
  7 |    - How are functions and classes defined in Python?
  8 | 
  9 | .. objectives::
 10 | 
 11 |    - Get a *very* short introduction to Python types and syntax
 12 |    - Be able to follow the rest of the examples in the course, even if you don't understand everything perfectly.
 13 | 
 14 |    We expect everyone to be able to know the following basic material
 15 |    to follow the course (though it is not *everything* you need to
 16 |    know about Python).
 17 | 
 18 | If you are not familiar with Python, here is a *very* short
 19 | introduction.  It will not be enough to do everything in this course,
 20 | but you will be able to follow along a bit more than you would otherwise.
 21 | 
 22 | .. seealso::
 23 | 
 24 |    This page contains an overview of the basics of Python.  You can
 25 |    also refer to `This Python overview from a different lesson
 26 |    <https://coderefinery.github.io/data-visualization-python/python-basics/>`__
 27 |    which is slightly more engaging.
 28 | 
 29 | 
 30 | 
 31 | Scalars
 32 | -------
 33 | 
 34 | Scalar types, that is, single elements of various types:
 35 | 
 36 | ::
 37 | 
 38 |    i = 42       # integer
 39 |    i = 2**77    # Integers have arbitrary precision
 40 |    g = 3.14     # floating point number
 41 |    c = 2 - 3j   # Complex number
 42 |    b = True     # boolean
 43 |    s = "Hello!" # String (Unicode)
 44 |    q = b'Hello' # bytes (8-bit values)
 45 | 
 46 | Read more: :class:`int`, :class:`float`, :class:`complex`,
 47 | :class:`bool`, :class:`str`, :class:`bytes`.
 48 | 
 49 | 
 50 | Collections
 51 | -----------
 52 | 
 53 | Collections are data structures capable of storing multiple values.
 54 | 
 55 | ::
 56 | 
 57 |    l = [1, 2, 3]                      # list
 58 |    l[1]                               # lists are indexed by int
 59 |    l[1] = True                        # list elements can be any type
 60 |    d = {"Janne": 123, "Richard": 456} # dictionary
 61 |    d["Janne"]
 62 |    s = set(("apple", "cherry", "banana", "apple")) # Set of unique values
 63 |    s
 64 | 
 65 | Read more: :class:`list`, :class:`tuple`, :class:`dict`, :class:`set`.
 66 | 
 67 | 
 68 | Control structures
 69 | ------------------
 70 | 
 71 | Python has the usual control structures, that is conditional
 72 | statements and loops.  For example, the :ref:`if` statement:
 73 | 
 74 | ::
 75 | 
 76 |    x = 2
 77 |    if x == 3:
 78 |        print('x is 3')
 79 |    elif x == 2:
 80 |        print('x is 2')
 81 |    else:
 82 |        print('x is something else')
 83 | 
 84 | :ref:`While <while>` loops loop until some condition is met:
 85 | 
 86 | ::
 87 | 
 88 |    x = 0
 89 |    while x < 42:
 90 |        print('x is ', x)
 91 |        x += 0.2
 92 | 
 93 | :ref:`For <for>` loops loop over some collection of values:
 94 | 
 95 | ::
 96 | 
 97 |    xs = [1, 2, 3, 4]
 98 |    for x in xs:
 99 |        print(x)
100 | 
101 | 
102 | Often you want to loop over a sequence of integers, in that case the
103 | :class:`range` function is useful:
104 | 
105 | ::
106 | 
107 |    for x in range(9):
108 |        print(x)
109 | 
110 | Another common need is to iterate over a collection, but at the same
111 | time also have an index number. For this there is the :func:`enumerate`
112 | function:
113 | 
114 | ::
115 | 
116 |    xs = [1, 'hello', 'world']
117 |    for ii, x in enumerate(xs):
118 |        print(ii, x)
119 | 
120 | 
121 | Functions and classes
122 | ---------------------
123 | 
124 | Python functions are defined by the :ref:`def` keyword. They take a
125 | number of arguments, and return a number of return values.
126 | 
127 | ::
128 | 
129 |    def hello(name):
130 |        """Say hello to the person given by the argument"""
131 |        print('Hello', name)
132 |        return 'Hello ' + name
133 | 
134 |    hello("Anne")
135 | 
136 | Classes are defined by the :ref:`class` keyword:
137 | 
138 | ::
139 | 
140 |    class Hello:
141 |        def __init__(self, name):
142 |            self._name = name
143 |        def say(self):
144 |            print('Hello', self._name)
145 | 
146 |    h = Hello("Richard")
147 |    h.say()
148 | 
149 | 
150 | Python type system
151 | ------------------
152 | 
153 | Python is strongly and dynamically typed.
154 | 
155 | Strong here means, roughly, that it's not possible to circumvent the
156 | type system (at least, not easily, and not without invoking undefined
157 | behavior).
158 | 
159 | ::
160 | 
161 |    x = 42
162 |    type(x)
163 |    x + "hello"
164 | 
165 | Dynamic typing means that types are determined at runtime, and a
166 | variable can be redefined to refer to an instance of another type:
167 | 
168 | ::
169 | 
170 |    x = 42
171 |    x = "hello"
172 | 
173 | 
174 | *Jargon*: Types are associated with rvalues, not lvalues. In
175 | statically typed language, types are associated with lvalues, and are
176 | (typically) reified during compilation.
177 | 
178 | 
179 | ??? (lesson here)
180 | 
181 | 
182 | 
183 | .. keypoints::
184 | 
185 |    - Python offers a nice set of basic types as many other programming languages
186 |    - Python is strongly typed and dynamically typed
187 | 


--------------------------------------------------------------------------------
/content/quick-reference.rst:
--------------------------------------------------------------------------------
 1 | Quick reference
 2 | ===============
 3 | 
 4 | * `Pandas cheatsheet
 5 |   <https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf>`__ (pandas.pydata.org)
 6 | 
 7 | * `Pandas cheatsheet
 8 |   <http://datacamp-community-prod.s3.amazonaws.com/f04456d7-8e61-482f-9cc9-da6f7f25fc9b>`__
 9 |   (via `Datacamp
10 |   <https://www.datacamp.com/community/data-science-cheatsheets>`__)
11 | 
12 | * `Numpy cheatsheet
13 |   <http://datacamp-community-prod.s3.amazonaws.com/da466534-51fe-4c6d-b0cb-154f4782eb54>`__
14 |   (via `Datacamp
15 |   <https://www.datacamp.com/community/data-science-cheatsheets>`__)
16 | 
17 | * `JupyterLab cheatsheet
18 |   <https://comp.anu.edu.au/courses/comp2420/labs/lab-1/helpManuals/JupyterLab-Cheatsheet.pdf>`__
19 | 
20 | * `Matplotlib cheatsheet
21 |   <https://datacamp-community-prod.s3.amazonaws.com/e1a8f39d-71ad-4d13-9a6b-618fe1b8c9e9>`__
22 |   (via `Datacamp
23 |   <https://www.datacamp.com/cheat-sheet>`__)
24 | 
25 | * `Numpy, Pandas, Matplotlib, Scikit-learn all together
26 |   <https://web.itu.edu.tr/iguzel/files/Python_Cheat_Sheets.pdf>`__
27 | 


--------------------------------------------------------------------------------
/content/scipy.rst:
--------------------------------------------------------------------------------
  1 | SciPy
  2 | =====
  3 | 
  4 | .. questions::
  5 | 
  6 |    - When you need more advanced mathematical functions, where do you
  7 |      look?
  8 | 
  9 | .. objectives::
 10 | 
 11 |    - Understand that SciPy exists and what kinds of things it has.
 12 |    - Understand the importance of using external libraries and how to
 13 |      use them.
 14 |    - Understand the purpose of wrapping existing C/Fortran code.
 15 |    - Non-objective: know details of everything (or anything) in SciPy.
 16 | 
 17 | .. seealso::
 18 | 
 19 |    * Main article: `SciPy documentation <https://docs.scipy.org/doc/scipy/reference/>`__
 20 | 
 21 | 
 22 | 
 23 | SciPy is a library that builds on top of NumPy. It contains a lot of
 24 | interfaces to battle-tested numerical routines written in Fortran or
 25 | C, as well as python implementations of many common algorithms.
 26 | 
 27 | 
 28 | 
 29 | What's in SciPy?
 30 | ----------------
 31 | 
 32 | Briefly, it contains functionality for
 33 | 
 34 | - Special functions (Bessel, Gamma, etc.)
 35 | - Numerical integration
 36 | - Optimization
 37 | - Interpolation
 38 | - Fast Fourier Transform (FFT)
 39 | - Signal processing
 40 | - Linear algebra (more complete than in NumPy)
 41 | - Sparse matrices
 42 | - Statistics
 43 | - More I/O routine, e.g. Matrix Market format for sparse matrices,
 44 |   MATLAB files (.mat), etc.
 45 | 
 46 | Many (most?) of these are not written specifically for SciPy, but use
 47 | the best available open source C or Fortran libraries.  Thus, you get
 48 | the best of Python and the best of compiled languages.
 49 | 
 50 | Most functions are documented ridiculously well from a scientific
 51 | standpoint: you aren't just using some unknown function, but have a
 52 | full scientific description and citation to the method and
 53 | implementation.
 54 | 
 55 | 
 56 | 
 57 | Exercises: use SciPy
 58 | --------------------
 59 | 
 60 | These exercises do not exist because *you* might need *these*
 61 | functions someday.  They are because *you* will need to *read
 62 | documentation and understand documentation of an an external library*
 63 | eventually.
 64 | 
 65 | 1: Numerical integration
 66 | ~~~~~~~~~~~~~~~~~~~~~~~~
 67 | 
 68 | .. challenge::
 69 | 
 70 |    Do the following exercise **or** read the documentation and
 71 |    understand the relevant functions of SciPy:
 72 | 
 73 |    Define a function of one variable and using
 74 |    `scipy.integrate.quad <https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quad.html#scipy.integrate.quad>`__
 75 |    calculate the integral of your function in the
 76 |    interval ``[0.0, 4.0]``. Then vary the interval and also modify the function and check
 77 |    whether scipy can integrate it.
 78 | 
 79 | 
 80 | .. solution::
 81 | 
 82 |    .. code-block:: python
 83 | 
 84 |       from scipy import integrate
 85 | 
 86 |       def myfunction(x):
 87 |           # you need to define result
 88 |           return result
 89 | 
 90 |       integral = integrate.quad(myfunction, 0.0, 4.0)
 91 |       print(integral)
 92 | 
 93 |    `quad
 94 |    <https://docs.scipy.org/doc/scipy/reference/generated/scipy.integrate.quad.html#scipy.integrate.quad>`__
 95 |    uses the Fortran library QUADPACK, which one can assume is pretty
 96 |    good.  You can also see a whole lot of scientific information about
 97 |    the function on the docs page - including the scientific names of
 98 |    the methods used.
 99 | 
100 | 
101 | 
102 | 2: Sparse matrices
103 | ~~~~~~~~~~~~~~~~~~
104 | 
105 | .. challenge::
106 | 
107 |    Do the following exercise **or** read the documentation and
108 |    understand the relevant functions of SciPy:
109 | 
110 |    Use the SciPy sparse matrix functionality to create a random sparse
111 |    matrix with a probability of non-zero elements of 0.05 and size 10000
112 |    x 10000. The use the SciPy sparse linear algebra support to calculate
113 |    the matrix-vector product of the sparse matrix you just created and a
114 |    random vector. Use the %timeit macro to measure how long it
115 |    takes. Does the optional ``format`` argument when you create the
116 |    sparse matrix make a difference?
117 | 
118 |    Then, compare to how long it takes if you'd instead first convert the
119 |    sparse matrix to a normal NumPy dense array, and use the NumPy ``dot``
120 |    method to calculate the matrix-vector product.
121 | 
122 |    Can you figure out a quick rule of thumb when it's worth using a
123 |    sparse matrix representation vs. a dense representation?
124 | 
125 | .. solution::
126 | 
127 |    The basic code to do the test is:
128 | 
129 |    .. code-block::
130 | 
131 |       import numpy
132 |       import scipy.sparse
133 | 
134 |       vector = numpy.random.random(10000)
135 |       matrix = scipy.sparse.rand(10000, 10000, density=.05, format='csc')
136 | 
137 |       # We time this line
138 |       matrix.dot(vector)
139 | 
140 |    From the top of the `spare matrix module documentation
141 |    <https://docs.scipy.org/doc/scipy/reference/sparse.html>`__, we can
142 |    see there are a variety of different available sparse matrix types:
143 |    ``bsr``, ``coo``, ``csr``, ``csc``, etc.  These each represent a
144 |    different way of storing the matrices.
145 | 
146 |    It seems that ``csr`` and ``csc`` are fairly fast.  ``lil`` and
147 |    ``dok`` are slow but it says that these are good for creating
148 |    matrices with random insertions.
149 | 
150 |    For example, ``csr`` takes 7ms, ``lil`` 42ms, ``dok`` 1600ms, and
151 |    converting to a non-sparse array ``matrix.toarray()`` and
152 |    multiplying takes 64ms on one particular computer.
153 | 
154 |    This code allows us to time the performance at different
155 |    densities.  It seems that with the ``csr`` format, sparse is better
156 |    below densities of around .4 to .5:
157 | 
158 |    ..code-block::
159 | 
160 |       for density in [.01, .05, .1, .2, .3, .4, .5]:
161 |           matrix = scipy.sparse.rand(10000, 10000, density=density, format='csr')
162 | 	  time_sparse = timeit.timeit('matrix.dot(vector)', number=10, globals=globals())
163 | 	  matrix2 = matrix.toarray()
164 | 	  time_full = timeit.timeit('matrix2.dot(vector)', number=10, globals=globals())
165 | 	  print(f"{density} {time_sparse:.3f} {time_full:.3f}")
166 | 
167 | 
168 | 
169 | See also
170 | --------
171 | 
172 | * `SciPy general introduction <https://docs.scipy.org/doc/scipy/tutorial/general.html>`__
173 | * `SciPy documentation
174 |   <https://docs.scipy.org/doc/scipy/reference/>`__
175 | 
176 | 
177 | 
178 | .. keypoints::
179 | 
180 |    - When you need advance math or scientific functions, let's just
181 |      admit it: you do a web search first.
182 |    - But when you see something in SciPy come up, you know your
183 |      solutions are in good hands.
184 | 


--------------------------------------------------------------------------------
/content/work-with-data.rst:
--------------------------------------------------------------------------------
  1 | Working with Data
  2 | =================
  3 | 
  4 | .. questions::
  5 | 
  6 |    - How do you store your data right now?
  7 |    - Are you doing data cleaning / preprocessing every time you load the data?
  8 | 
  9 | .. objectives::
 10 | 
 11 |    - Learn benefits/drawbacks of common data formats.
 12 |    - Learn how you can read and write data in a variety of formats.
 13 | 
 14 | 
 15 | ..  figure:: https://imgs.xkcd.com/comics/norm_normal_file_format.png
 16 | 
 17 |     Source: `xkcd #2116 <https://xkcd.com/2116/>`__
 18 | 
 19 | 
 20 | What is a data format?
 21 | ----------------------
 22 | 
 23 | Data format can mean two different things
 24 | 
 25 | 1. `data structure <https://en.wikipedia.org/wiki/Data_structure>`__ or how
 26 |    you're storing the data in memory while you're working on it;
 27 | 2. `file format <https://en.wikipedia.org/wiki/File_format>`__ or the way you're
 28 |    storing the data in the disk.
 29 | 
 30 | Let's consider this randomly generated DataFrame with various columns::
 31 | 
 32 |     import pandas as pd
 33 |     import numpy as np
 34 | 
 35 |     n_rows = 100000
 36 | 
 37 |     dataset = pd.DataFrame(
 38 |         data={
 39 |             'string': np.random.choice(('apple', 'banana', 'carrot'), size=n_rows),
 40 |             'timestamp': pd.date_range("20130101", periods=n_rows, freq="s"),
 41 |             'integer': np.random.choice(range(0,10), size=n_rows),
 42 |             'float': np.random.uniform(size=n_rows),
 43 |         },
 44 |     )
 45 | 
 46 |     dataset.info()
 47 | 
 48 | This DataFrame is structured in the *tidy data* format.
 49 | In tidy data we have multiple columns of data that are collected in a Pandas
 50 | DataFrame, where each column represents a value of a specific type.
 51 | 
 52 | ..  image:: img/pandas/tidy_data.png
 53 | 
 54 | Let's consider another example::
 55 | 
 56 |     n = 1000
 57 | 
 58 |     data_array = np.random.uniform(size=(n,n))
 59 |     np.info(data_array)
 60 | 
 61 | 
 62 | Here we have a different data structure: we have a two-dimensional array of numbers.
 63 | This is different to a Pandas DataFrame as data is stored as one contiguous block
 64 | instead of individual columns. This also means that the whole array must have one
 65 | data type.
 66 | 
 67 | 
 68 | ..  figure:: https://github.com/elegant-scipy/elegant-scipy/raw/master/figures/NumPy_ndarrays_v2.png
 69 | 
 70 |     Source: `Elegant Scipy <https://github.com/elegant-scipy/elegant-scipy>`__
 71 | 
 72 | Now the question is: **Can the data be saved to the disk without changing the
 73 | data format?**
 74 | 
 75 | For this we need a **file format** that can easily store our **data structure**.
 76 | 
 77 | .. admonition:: Data type vs. data structure vs. file format
 78 |    :class: dropdown
 79 | 
 80 |    - **Data type:** Type of a single piece of data (integer, string,
 81 |      float, ...).
 82 |    - **Data structure:** How the data is organized in memory (individual
 83 |      columns, 2D-array, nested dictionaries, ...).
 84 |    - **File format:** How the data is organized when it is saved to the disk
 85 |      (columns of strings, block of binary data, ...).
 86 | 
 87 |    For example, a black and white image stored as a .png-file (**file format**)
 88 |    might be stored in memory as an NxM array (**data structure**) of integers
 89 |    (**data type**) with each entry representing the color value of the pixel.
 90 | 
 91 | What to look for in a file format?
 92 | ----------------------------------
 93 | 
 94 | When deciding which file format you should use for your program, you should
 95 | remember the following:
 96 | 
 97 | **There is no file format that is good for every use case.**
 98 | 
 99 | and
100 | 
101 | **It is very likely, that a good format already exists for your use case.**
102 | 
103 | There are, indeed, various standard file formats for various use cases:
104 | 
105 | .. figure:: https://imgs.xkcd.com/comics/standards.png
106 | 
107 |    Source: `xkcd #927 <https://xkcd.com/927/>`__.
108 | 
109 | Usually, you'll want to consider the following things when choosing a file
110 | format:
111 | 
112 | 1. Is the file format good for my data structure (is it fast/space
113 |    efficient/easy to use)?
114 | 2. Is everybody else / leading authorities in my field recommending a certain
115 |    format?
116 | 3. Do I need a human-readable format or is it enough to work on it using code?
117 | 4. Do I want to archive / share the data or do I just want to store it while
118 |    I'm working?
119 | 
120 | Pandas supports
121 | `many file formats <https://pandas.pydata.org/docs/user_guide/io.html>`__
122 | for tidy data and Numpy supports
123 | `some file formats <https://numpy.org/doc/stable/reference/routines.io.html>`__
124 | for array data. However, there are many other file formats that can be used
125 | through other libraries.
126 | 
127 | Table below describes some data formats:
128 | 
129 | .. list-table::
130 |    :header-rows: 1
131 | 
132 |    * - | Name:
133 |      - | Human
134 |        | readable:
135 |      - | Space
136 |        | efficiency:
137 |      - | Arbitrary
138 |        | data:
139 |      - | Tidy
140 |        | data:
141 |      - | Array
142 |        | data:
143 |      - | Long term
144 |        | storage/sharing:
145 | 
146 |    * - :ref:`Pickle <pickle>`
147 |      - ❌
148 |      - 🟨
149 |      - ✅
150 |      - 🟨
151 |      - 🟨
152 |      - ❌
153 | 
154 |    * - :ref:`CSV <csv>`
155 |      - ✅
156 |      - ❌
157 |      - ❌
158 |      - ✅
159 |      - 🟨
160 |      - ✅
161 | 
162 |    * - :ref:`Feather <feather>`
163 |      - ❌
164 |      - ✅
165 |      - ❌
166 |      - ✅
167 |      - ❌
168 |      - ❌
169 | 
170 |    * - :ref:`Parquet <parquet>`
171 |      - ❌
172 |      - ✅
173 |      - 🟨
174 |      - ✅
175 |      - 🟨
176 |      - ✅
177 | 
178 |    * - :ref:`npy <npy>`
179 |      - ❌
180 |      - 🟨
181 |      - ❌
182 |      - ❌
183 |      - ✅
184 |      - ❌
185 | 
186 |    * - :ref:`HDF5 <hdf5>`
187 |      - ❌
188 |      - ✅
189 |      - ❌
190 |      - ❌
191 |      - ✅
192 |      - ✅
193 | 
194 |    * - :ref:`NetCDF4 <netcdf4>`
195 |      - ❌
196 |      - ✅
197 |      - ❌
198 |      - ❌
199 |      - ✅
200 |      - ✅
201 | 
202 |    * - :ref:`JSON <json>`
203 |      - ✅
204 |      - ❌
205 |      - 🟨
206 |      - ❌
207 |      - ❌
208 |      - ✅
209 | 
210 |    * - :ref:`Excel <excel>`
211 |      - ❌
212 |      - ❌
213 |      - ❌
214 |      - 🟨
215 |      - ❌
216 |      - 🟨
217 | 
218 |    * - :ref:`Graph formats <graph>`
219 |      - 🟨
220 |      - 🟨
221 |      - ❌
222 |      - ❌
223 |      - ❌
224 |      - ✅
225 | 
226 | .. important::
227 | 
228 |     - ✅ : Good
229 |     - 🟨 : Ok / depends on a case
230 |     - ❌ : Bad
231 | 
232 | 
233 | A more in-depth analysis of the file formats mentioned above, can be found
234 | :doc:`here <data-formats>`.
235 | 
236 | Pros and cons
237 | -------------
238 | 
239 | Let's have a general look at pros and cons of some types of file formats
240 | 
241 | Binary File formats
242 | ~~~~~~~~~~~~~~~~~~~
243 | 
244 | Good things
245 | +++++++++++
246 | 
247 | - Can represent floating point numbers with full precision.
248 | - Can potentially save lots of space, especially, when storing numbers.
249 | - Data reading and writing is usually much faster than loading from text files,
250 |   since the format contains information about the data structure, and thus
251 |   memory allocation can be done more efficiently.
252 | - More explicit specification for storing multiple data sets and metadata in
253 |   the same file.
254 | - Many binary formats allow for partial loading of the data.
255 |   This makes it possible to work with datasets that are larger than your
256 |   computer's memory.
257 | 
258 | Bad things
259 | ++++++++++
260 | 
261 | - Commonly requires the use of a specific library to read and write the data.
262 | - Library specific formats can be version dependent.
263 | - Not human readable.
264 | - Sharing can be more difficult (requires some expertise to be able to
265 |   read the data).
266 | - Might require more documentation efforts.
267 | 
268 | Textual formats
269 | ~~~~~~~~~~~~~~~
270 | 
271 | Good things
272 | +++++++++++
273 | 
274 | - Human readable.
275 | - Easy to check for (structural) errors.
276 | - Supported by many tool out of the box.
277 | - Easily shared.
278 | 
279 | Bad things
280 | ++++++++++
281 | 
282 | - Can be slow to read and write.
283 | - High potential to increase required disk space substantially (e.g. when
284 |   storing floating point numbers as text).
285 | - Prone to losing precision when storing floating point numbers.
286 | - Multi-dimensional data can be hard to represent.
287 | - While the data format might be specified, the data structure might not be
288 |   clear when starting to read the data.
289 | 
290 | Further considerations
291 | ~~~~~~~~~~~~~~~~~~~~~~
292 | 
293 | - The closer your stored data is to the code, the more likely it depends on the
294 |   environment you are working in. If you ``pickle``, e.g. a generated model,
295 |   you can only be sure that the model will work as intended if you load it in
296 |   an environment that has the same versions of all libraries the model depends
297 |   on.
298 | 
299 | 
300 | Exercise
301 | --------
302 | 
303 | .. challenge::
304 | 
305 |     You have a model that you have been training for a while.
306 |     Lets assume it's a relatively simple neural network (consisting of a
307 |     network structure and it's associated weights).
308 | 
309 |     Let's consider 2 scenarios
310 | 
311 |     A: You have a different project, that is supposed to take this model, and
312 |     do some processing with it to determine it's efficiency after different
313 |     times of training.
314 | 
315 |     B: You want to publish the model and make it available to others.
316 | 
317 |     What are good options to store the model in each of these scenarios?
318 | 
319 | .. solution::
320 | 
321 |     A:
322 | 
323 |        Some export into a binary format that can be easily read. E.g. pickle
324 |        or a specific export function from the library you use.
325 | 
326 |        It also depends on whether you intend to make the intermediary steps
327 |        available to others. If you do, you might also want to consider storing
328 |        structure and weights separately or use a format specific for the
329 |        type of model you are training to keep the data independent of the
330 |        library.
331 | 
332 |     B:
333 | 
334 |        You might want to consider a more general format that is supported by
335 |        many libraries, e.g. ONNX, or a format that is specifically designed
336 |        for the type of model you are training.
337 | 
338 |        You might also want to consider additionally storing the model in a way
339 |        that is easily readable by humans, to make it easier for others to
340 |        understand the model.
341 | 
342 | 
343 | Case study: Converting untidy data to tidy data
344 | -----------------------------------------------
345 | 
346 | Many data analysis tools (like Pandas) are designed to work with tidy data,
347 | but some data is not in a suitable format. What we have seen often in the
348 | past is people then not using the powerful tools, but write complicated
349 | scripts that extract individual pieces from the data each time they need
350 | to do a calculation.
351 | 
352 | As an example, let's see how we can use country data from an example REST API
353 | endpoint (for more information on how to work with web APIs, see
354 | :doc:`this page <web-apis>`). Let's get the data with the following piece
355 | of code:
356 | 
357 | .. code-block:: python
358 | 
359 |    import json
360 |    import requests
361 | 
362 |    url = 'https://api.sampleapis.com/countries/countries'
363 | 
364 |    response = requests.get(url)
365 | 
366 |    countries_json = json.loads(response.content)
367 | 
368 | Let's try to find the country with the largest population.
369 | 
370 | An example of a "questionable" way of solving this problem would be something
371 | like the following piece of code that is written in pure Python:
372 | 
373 | .. code-block:: python
374 | 
375 |    max_population = 0
376 |    top_population_country = ''
377 | 
378 |    for country in countries_json:
379 |        if country.get('population', 0) > max_population:
380 |            top_population_country = country['name']
381 |            max_population = country.get('population', 0)
382 | 
383 |    print(top_population_country)
384 | 
385 | This is a very natural way of writing a solution for the problem, but it has
386 | major caveats:
387 | 
388 | 1. We throw all of the other data out so we cannot answer any
389 |    follow up questions.
390 | 2. For bigger data, this would be very slow and ineffective.
391 | 3. We have to write lots of code to do a simple thing.
392 | 
393 | Another typical solution would be something like the following code,
394 | which picks some of the data and creates a Pandas dataframe out of it:
395 | 
396 | .. code-block:: python
397 | 
398 |    import pandas as pd
399 | 
400 |    countries_list = []
401 | 
402 |    for country in countries_json:
403 |        countries_list.append([country['name'], country.get('population',0)])
404 | 
405 |    countries_df = pd.DataFrame(countries_list, columns=['name', 'population'])
406 | 
407 |    print(countries_df.nlargest(1, 'population')['name'].values[0])
408 | 
409 | This solution has many of the same problems as the previous one, but now we can
410 | use Pandas to do follow up analysis.
411 | 
412 | Better solution would be to use Pandas'
413 | `pandas.DataFrame.from_dict <https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.from_dict.html>`__
414 | or `pandas.json_normalize <https://pandas.pydata.org/docs/reference/api/pandas.json_normalize.html>`__
415 | to read the full data in:
416 | 
417 | .. code-block:: python
418 | 
419 |    countries_df = pd.DataFrame.from_dict(countries_json)
420 |    print(countries_df.nlargest(1, 'population')['name'].values[0])
421 | 
422 |    countries_df = pd.json_normalize(countries_json)
423 |    print(countries_df.nlargest(1, 'population')['name'].values[0])
424 | 
425 | .. admonition:: Key points
426 | 
427 |    - Convert your data to a format where it is easy to do analysis on it.
428 |    - Check the tools you're using if they have an existing feature that can help
429 |      you read the data in.
430 | 
431 | 
432 | Things to remember
433 | ------------------
434 | 
435 | 1. **There is no file format that is good for every use case.**
436 | 2. Usually, your research question determines which libraries you want to use
437 |    to solve it. Similarly, the data format you have determines file format you
438 |    want to use.
439 | 3. However, if you're using a previously existing framework or tools or you
440 |    work in a specific field, you should prioritize using the formats that are
441 |    used in said framework/tools/field.
442 | 4. When you're starting your project, it's a good idea to take your initial
443 |    data, clean it, and store the results in a good binary format that works as
444 |    a starting point for your future analysis. If you've written the cleaning
445 |    procedure as a script, you can always reproduce it.
446 | 5. Throughout your work, you should use code to turn important data to
447 |    a human-readable format (e.g. plots, averages,
448 |    :meth:`pandas.DataFrame.head`), not to keep your full data in a
449 |    human-readable format.
450 | 6. Once you've finished, you should store the data in a format that can be
451 |    easily shared to other people.
452 | 
453 | 
454 | See also
455 | --------
456 | 
457 | - `Pandas' IO tools <https://pandas.pydata.org/docs/user_guide/io.html>`__
458 | - `Tidy data comparison notebook <https://github.com/AaltoSciComp/python-for-scicomp/tree/master/extras/data-formats-comparison-tidy.ipynb>`__
459 | - `Array data comparison notebook <https://github.com/AaltoSciComp/python-for-scicomp/tree/master/extras/data-formats-comparison-array.ipynb>`__
460 | 
461 | 
462 | .. keypoints::
463 | 
464 |    - Pandas can read and write a variety of data formats.
465 |    - There are many good, standard formats, and you don't need to create your own.
466 |    - There are plenty of other libraries dedicated to various formats.
467 | 


--------------------------------------------------------------------------------
/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=content
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Sphinx
 2 | sphinx_rtd_theme
 3 | sphinx_rtd_theme_ext_color_contrast
 4 | myst_nb
 5 | sphinx-lesson
 6 | https://github.com/aaltoscicomp/sphinx-aaltoscicomp-branding/archive/master.zip
 7 | sphinxext-opengraph
 8 | sphinx-thebe
 9 | 
10 | # for web-apis execution
11 | jsonlines
12 | bs4
13 | 


--------------------------------------------------------------------------------
/resources/code/scripts/__pycache__/optionsparser.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/resources/code/scripts/__pycache__/optionsparser.cpython-38.pyc


--------------------------------------------------------------------------------
/resources/code/scripts/__pycache__/weather_functions.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/resources/code/scripts/__pycache__/weather_functions.cpython-38.pyc


--------------------------------------------------------------------------------
/resources/code/scripts/__pycache__/weather_functions_config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/resources/code/scripts/__pycache__/weather_functions_config.cpython-38.pyc


--------------------------------------------------------------------------------
/resources/code/scripts/optionsparser.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | 
 3 | def get_parameters(config_file, required, defaults):
 4 |     '''
 5 |     Parameters:
 6 |     Optionfile:  FileName of the yaml file containing the options
 7 |     required:    Dict of required argument names and their object types.
 8 |     defaults:    Dict of default parameters mapping to their default values
 9 |     
10 |     Returns:     An object with fields named according to required and optional values.
11 |     '''
12 |     f = open(config_file)
13 |     options = yaml.safe_load(f)
14 |     # create a parameters object that allows setting attributes.
15 |     parameters = type('Options', (), {})()
16 |     # check required arguments
17 |     for arg in required:
18 |         if not arg in options:
19 |             raise Exception("Could not find required Argument " + arg + " aborting...")
20 |         else:
21 |             if not isinstance(options[arg],required[arg]):
22 |                 raise Exception("Expected input of type " + str(required[arg]) + " but got " + str(type(options[arg])))                
23 |         print("Setting " + arg + " to " + str(options[arg]))
24 |         setattr(parameters,arg,options[arg])
25 |     # check the default values.          
26 |     for arg in defaults:
27 |         if arg in options:
28 |             if not isinstance(options[arg],type(defaults[arg])):
29 |                 #Wrong type for the parameter
30 |                 raise Exception("Expected input of type " + str(type(defaults[arg])) + " but got " + str(type(options[arg])))
31 |             print("Setting " + arg + " to " + str(options[arg]))
32 |             setattr(parameters,arg,options[arg])
33 |         else:
34 |             print( arg + " not found in option file. Using default: " +str(defaults[arg]))
35 |             setattr(parameters,arg,defaults[arg])
36 |     return parameters
37 |     
38 | 
39 | 


--------------------------------------------------------------------------------
/resources/code/scripts/out.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/resources/code/scripts/out.png


--------------------------------------------------------------------------------
/resources/code/scripts/rain_in_cairo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/resources/code/scripts/rain_in_cairo.png


--------------------------------------------------------------------------------
/resources/code/scripts/weather.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/80042c08efd912d18034a3d977c31288922b93cf/resources/code/scripts/weather.png


--------------------------------------------------------------------------------
/resources/code/scripts/weather_functions.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | def preprocessing(dataset, start_date, end_date):
 5 |     # The date format in the file is in a day-first format, which matplotlib does nto understand.
 6 |     # so we need to convert it.
 7 |     dataset['Local time'] = pd.to_datetime(dataset['Local time'],dayfirst=True)
 8 |     dataset = dataset[dataset['Local time'].between(start_date,end_date)]
 9 |     return dataset
10 |     
11 | 
12 | def plot_data(dates, values):
13 |     fig, ax = plt.subplots()
14 |     ax.plot(dates, values)
15 |     # label the axes
16 |     ax.set_xlabel("Date of observation")
17 |     ax.set_ylabel("Temperature in Celsius")
18 |     ax.set_title("Temperature Observations")
19 |     # adjust tick labels
20 |     fig.autofmt_xdate()
21 |     return ax,fig
22 | 
23 | 


--------------------------------------------------------------------------------
/resources/code/scripts/weather_functions_config.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | 
 4 | def preprocessing(dataset, start_date, end_date):
 5 |     # The date format in the file is in a day-first format, which matplotlib does nto understand.
 6 |     # so we need to convert it.
 7 |     dataset['Local time'] = pd.to_datetime(dataset['Local time'],dayfirst=True)
 8 |     dataset = dataset[dataset['Local time'].between(start_date,end_date)]
 9 |     return dataset
10 |     
11 | 
12 | def plot_data(dates, values, labels):
13 |     fig, ax = plt.subplots()
14 |     ax.plot(dates, values)
15 |     # label the axes
16 |     ax.set_xlabel(labels.xlabel)
17 |     ax.set_ylabel(labels.ylabel)
18 |     ax.set_title(labels.title)
19 |     # adjust tick labels
20 |     fig.autofmt_xdate()
21 |     return ax,fig
22 | 
23 | 


--------------------------------------------------------------------------------
/resources/code/scripts/weather_observations.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import pandas as pd
 5 | import weather_functions
 6 | 
 7 | url = "https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/master/resources/data/scripts/weather_tapiola.csv"
 8 | weather = pd.read_csv(url,comment='#')
 9 | 
10 | # define the start and end time for the plot 
11 | start_date=pd.to_datetime('01/06/2021', dayfirst=True)
12 | end_date=pd.to_datetime('01/10/2021', dayfirst=True)
13 | #Preprocess the data
14 | weather['Local time'] = pd.to_datetime(weather['Local time'], dayfirst=True)
15 | # select the data
16 | weather = weather[weather['Local time'].between(start_date,end_date)]
17 | 
18 | # Now, we have the data loaded, and adapted to our needs. So lets get plotting
19 | import matplotlib.pyplot as plt
20 | # start the figure.
21 | fig, ax = plt.subplots()
22 | ax.plot(weather['Local time'], weather['T'])
23 | # label the axes
24 | ax.set_xlabel("Date of observation")
25 | ax.set_ylabel("Temperature in Celsius")
26 | ax.set_title("Temperature Observations")
27 | # adjust the date labels, so that they look nicer
28 | fig.autofmt_xdate()
29 | # save the figure
30 | 
31 | # save the figure
32 | fig.savefig('weather.png')
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/resources/code/scripts/weather_observations_argparse.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import argparse
 3 | 
 4 | parser = argparse.ArgumentParser()
 5 | parser.add_argument("input", type=str, help="Input data file")
 6 | parser.add_argument("output", type=str, help="Output plot file")
 7 | parser.add_argument("-s", "--start", default="01/01/2019", type=str, help="Start date in DD/MM/YYYY format")
 8 | parser.add_argument("-e", "--end", default="16/10/2021", type=str, help="End date in DD/MM/YYYY format")
 9 | 
10 | args = parser.parse_args()
11 | 
12 | # load the data
13 | weather = pd.read_csv(args.input,comment='#')
14 | 
15 | # define the start and end time for the plot
16 | start_date=pd.to_datetime(args.start, dayfirst=True)
17 | end_date=pd.to_datetime(args.end, dayfirst=True)
18 | 
19 | # preprocess the data
20 | weather['Local time'] = pd.to_datetime(weather['Local time'], dayfirst=True)
21 | # select the data
22 | weather = weather[weather['Local time'].between(start_date,end_date)]
23 | 
24 | # plot the data
25 | import matplotlib.pyplot as plt
26 | # start the figure.
27 | fig, ax = plt.subplots()
28 | ax.plot(weather['Local time'], weather['T'])
29 | # label the axes
30 | ax.set_xlabel("Date of observation")
31 | ax.set_ylabel("Temperature in Celsius")
32 | ax.set_title("Temperature Observations")
33 | # adjust the date labels, so that they look nicer
34 | fig.autofmt_xdate()
35 | 
36 | # save the figure
37 | fig.savefig(args.output)
38 | 


--------------------------------------------------------------------------------
/resources/code/scripts/weather_observations_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding: utf-8
 3 | 
 4 | import pandas as pd
 5 | from optionsparser import get_parameters
 6 | import argparse 
 7 | 
 8 | # Lets start reading our confg file. we'll use argparse to get the config file.
 9 | parser = argparse.ArgumentParser()
10 | parser.add_argument('input', type=str,
11 |                     help="Config File name ")
12 | args = parser.parse_args()
13 | 
14 | # Set optional parameters with default values and required parameter values with their type
15 | defaults = {
16 |            "xlabel"      : "Date of observation",
17 |            "title"       : "Weather Observations",
18 |            "start"       : "01/06/2021",
19 |            "end"         : "01/10/2021",
20 |            "output"      : "weather.png",
21 |            "ylabel"      : "Temperature in Celsius",
22 |            "data_column" : "T",
23 |            }
24 | 
25 | required = {
26 |            "input"  : str
27 |            }
28 |            
29 | # now, parse the config file
30 | parameters = get_parameters(args.input, required, defaults)
31 | 
32 | # load the data
33 | weather = pd.read_csv(parameters.input,comment='#')
34 | 
35 | # obtain start and end date
36 | start_date=pd.to_datetime(parameters.start, dayfirst=True)
37 | end_date=pd.to_datetime(parameters.end, dayfirst=True)
38 | 
39 | # Data preprocessing
40 | weather['Local time'] = pd.to_datetime(weather['Local time'], dayfirst=True)
41 | # select the data
42 | weather = weather[weather['Local time'].between(start_date,end_date)]
43 | 
44 | # Data plotting
45 | import matplotlib.pyplot as plt
46 | # start the figure.
47 | fig, ax = plt.subplots()
48 | ax.plot(weather['Local time'], weather['T'])
49 | # label the axes
50 | ax.set_xlabel("Date of observation")
51 | ax.set_ylabel("Temperature in Celsius")
52 | ax.set_title("Temperature Observations")
53 | # adjust the date labels, so that they look nicer
54 | fig.autofmt_xdate()
55 | 
56 | 
57 | # save the figure
58 | fig.savefig(parameters.output)
59 | 


--------------------------------------------------------------------------------
/resources/code/scripts/weather_options.yml:
--------------------------------------------------------------------------------
1 | input:        https://raw.githubusercontent.com/AaltoSciComp/python-for-scicomp/master/resources/data/scripts/weather_cairo.csv
2 | output:       rain_in_cairo.png
3 | xlabel:       Days in June
4 | ylabel:       Rainfall in mm
5 | title:        Rainfall in Cairo
6 | data_column:  RRR
7 | start:        01/06/2021
8 | end:          30/06/2021
9 | 


--------------------------------------------------------------------------------
/resources/data/plotting/README.md:
--------------------------------------------------------------------------------
1 | Data obtained from [Norsk
2 | KlimaServiceSenter](https://seklima.met.no/observations/), Meteorologisk
3 | institutt (MET) (CC BY 4.0).
4 | 
5 | The following changes were applied to the data to make it easier to work with:
6 | - The decimal separator was changed from a comma to a period.
7 | - The column separator was changed from a semicolon to a comma.
8 | - Missing values were replaced with zeros instead of a dash.
9 | 


--------------------------------------------------------------------------------
/resources/data/plotting/exercise-2.csv:
--------------------------------------------------------------------------------
 1 | xval,yval
 2 | 01,7.7
 3 | 02,6.6
 4 | 03,4.5
 5 | 04,9.8
 6 | 05,17.7
 7 | 06,25.4
 8 | 07,26.7
 9 | 08,25.1
10 | 09,19.3
11 | 10,9.8
12 | 


--------------------------------------------------------------------------------
/resources/data/plotting/oslo-monthly.csv:
--------------------------------------------------------------------------------
 1 | ﻿name,station,date,max temperature,precipitation,min temperature
 2 | Oslo - Blindern,SN18700,10.2022,17.1,82.9,-0.4
 3 | Oslo - Blindern,SN18700,11.2022,15.1,83.4,-2.1
 4 | Oslo - Blindern,SN18700,12.2022,6.5,85.5,-14.6
 5 | Oslo - Blindern,SN18700,01.2023,7.2,100.5,-13.4
 6 | Oslo - Blindern,SN18700,02.2023,10.2,46,-9.4
 7 | Oslo - Blindern,SN18700,03.2023,9.8,72.6,-12.6
 8 | Oslo - Blindern,SN18700,04.2023,19.8,99.7,-4.7
 9 | Oslo - Blindern,SN18700,05.2023,24.2,17,-0.8
10 | Oslo - Blindern,SN18700,06.2023,31.8,39.9,4.6
11 | Oslo - Blindern,SN18700,07.2023,28.4,146.9,8.6
12 | Oslo - Blindern,SN18700,08.2023,24.5,259.8,9.8
13 | Oslo - Blindern,SN18700,09.2023,25.1,105.8,5.3
14 | Oslo - Blindern,SN18700,10.2023,17.1,7.3,-0.7
15 | 


--------------------------------------------------------------------------------
/resources/data/plotting/tromso-monthly.csv:
--------------------------------------------------------------------------------
 1 | ﻿name,station,date,max temperature,precipitation,min temperature
 2 | Tromso - Langnes,SN90490,10.2022,10.7,187,-4.2
 3 | Tromso - Langnes,SN90490,11.2022,8.5,41.5,-7
 4 | Tromso - Langnes,SN90490,12.2022,5.6,88.8,-11.7
 5 | Tromso - Langnes,SN90490,01.2023,7.7,111.4,-13.9
 6 | Tromso - Langnes,SN90490,02.2023,6.6,171.3,-10.7
 7 | Tromso - Langnes,SN90490,03.2023,4.5,157,-15.1
 8 | Tromso - Langnes,SN90490,04.2023,9.8,85,-7.1
 9 | Tromso - Langnes,SN90490,05.2023,17.7,101.2,-4.6
10 | Tromso - Langnes,SN90490,06.2023,25.4,43.4,-0.4
11 | Tromso - Langnes,SN90490,07.2023,26.7,14,6
12 | Tromso - Langnes,SN90490,08.2023,25.1,43.4,5.4
13 | Tromso - Langnes,SN90490,09.2023,19.3,163.7,0.3
14 | Tromso - Langnes,SN90490,10.2023,9.8,64.8,-0.6
15 | 


--------------------------------------------------------------------------------
/software/environment.yml:
--------------------------------------------------------------------------------
 1 | name: python-for-scicomp
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - jsonlines
 6 |   - jupyterlab
 7 |   - notebook
 8 |   - ipywidgets
 9 |   - requests
10 |   - numpy
11 |   - scipy
12 |   - matplotlib
13 |   - seaborn
14 |   - mpi4py
15 |   - dask
16 |   - setuptools
17 |   - twine
18 |   - poetry
19 |   - flit
20 |   - scikit-learn
21 |   - scalene
22 |   - ruff
23 |   - altair-all
24 |   - vega_datasets
25 |   - xarray
26 |   - netcdf4
27 |   - yfinance
28 |   - pip
29 |   - pip:
30 |     - pythia_datasets
31 | 


--------------------------------------------------------------------------------