├── .github └── workflows │ ├── build-and-deploy.yml │ ├── build.yml │ └── nightly.yml ├── .gitignore ├── README.md ├── book ├── _config.yml ├── _static │ └── lecture_specific │ │ ├── about_py │ │ ├── bn_density1.png │ │ ├── career_vf.png │ │ ├── pandas_vs_matlab.png │ │ ├── python_vs_matlab.png │ │ ├── qs.png │ │ └── qs.py │ │ ├── getting_started │ │ ├── editing_vim.png │ │ ├── ipython_shell.png │ │ ├── jp_demo.png │ │ ├── nb.png │ │ ├── nb2.png │ │ ├── nb3.png │ │ ├── nb6.png │ │ ├── nb6a.png │ │ ├── nb7.png │ │ ├── nb8.png │ │ ├── nb_run.png │ │ ├── nb_upload.png │ │ ├── nb_wntest2.png │ │ └── starting_nb.png │ │ ├── matplotlib │ │ └── matplotlib_ex1.png │ │ ├── pandas │ │ ├── data │ │ │ ├── test_pwt.csv │ │ │ └── ticker_data.csv │ │ ├── pandas_indices_pctchange.png │ │ ├── pandas_share_prices.png │ │ ├── pandas_vs_rest.png │ │ └── wb_download.py │ │ ├── parallelization │ │ └── htop_parallel_npmat.png │ │ ├── python_advanced_features │ │ ├── numbers.txt │ │ └── test_table.csv │ │ ├── python_by_example │ │ ├── pbe_ex2_fig.png │ │ └── test_program_1_updated.png │ │ ├── python_foundations │ │ ├── test_table.csv │ │ └── us_cities.txt │ │ ├── sci_libs │ │ └── nfs_ex1.png │ │ └── troubleshooting │ │ └── launch.png ├── _toc.yml ├── docs │ ├── about_py.md │ ├── debugging.md │ ├── functions.md │ ├── getting_started.md │ ├── index.md │ ├── matplotlib.md │ ├── need_for_speed.md │ ├── numba.md │ ├── numpy.md │ ├── oop_intro.md │ ├── pandas.md │ ├── parallelization.md │ ├── python_advanced_features.md │ ├── python_by_example.md │ ├── python_essentials.md │ ├── python_oop.md │ ├── quant-econ.bib │ ├── scipy.md │ └── writing_good_code.md └── qe-logo-large.png ├── environment.yml └── source_rst ├── about_lectures.rst ├── about_py.rst ├── debugging.rst ├── functions.rst ├── getting_started.rst ├── matplotlib.rst ├── need_for_speed.rst ├── numba.rst ├── numpy.rst ├── oop_intro.rst ├── pandas.rst ├── parallelization.rst ├── python_advanced_features.rst ├── python_by_example.rst ├── python_essentials.rst ├── python_oop.rst ├── scipy.rst ├── troubleshooting.rst └── writing_good_code.rst /.github/workflows/build-and-deploy.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | build-and-deploy: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout 🛎️ 11 | uses: actions/checkout@v2 12 | with: 13 | persist-credentials: false 14 | 15 | - name: Setup Miniconda 16 | uses: goanpeca/setup-miniconda@v1 17 | with: 18 | auto-update-conda: true 19 | auto-activate-base: false 20 | miniconda-version: 'latest' 21 | python-version: 3.7 22 | environment-file: environment.yml 23 | activate-environment: qe-example 24 | 25 | - name: Install jupyter_book 26 | shell: bash -l {0} 27 | run: pip install git+https://github.com/ExecutableBookProject/jupyter-book.git@master 28 | 29 | - name: Build QuantEcon Example 30 | shell: bash -l {0} 31 | run: jb build book/ 32 | 33 | - name: Install SSH Client 🔑 34 | uses: webfactory/ssh-agent@v0.2.0 35 | with: 36 | ssh-private-key: ${{ secrets.DEPLOY_KEY }} 37 | 38 | - name: Deploy 🚀 39 | uses: JamesIves/github-pages-deploy-action@releases/v3 40 | with: 41 | SSH: true 42 | BRANCH: gh-pages 43 | FOLDER: book/_build/html 44 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Test Build 2 | on: 3 | push: 4 | branches-ignore: 5 | - master 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout 🛎️ 11 | uses: actions/checkout@v2 12 | with: 13 | persist-credentials: false 14 | 15 | - name: Setup Miniconda 16 | uses: goanpeca/setup-miniconda@v1 17 | with: 18 | auto-update-conda: true 19 | auto-activate-base: false 20 | miniconda-version: 'latest' 21 | python-version: 3.7 22 | environment-file: environment.yml 23 | activate-environment: qe-example 24 | 25 | - name: Install jupyter_book 26 | shell: bash -l {0} 27 | run: pip install git+https://github.com/ExecutableBookProject/jupyter-book.git@master 28 | 29 | - name: Build QuantEcon Example 30 | shell: bash -l {0} 31 | run: jb build book/ -------------------------------------------------------------------------------- /.github/workflows/nightly.yml: -------------------------------------------------------------------------------- 1 | name: Build and Deploy (Nightly) 2 | on: 3 | schedule: 4 | - cron: '0 0 * * *' 5 | jobs: 6 | build-and-deploy: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - name: Checkout 🛎️ 10 | uses: actions/checkout@v2 11 | with: 12 | persist-credentials: false 13 | 14 | - name: Setup Miniconda 15 | uses: goanpeca/setup-miniconda@v1 16 | with: 17 | auto-update-conda: true 18 | auto-activate-base: false 19 | miniconda-version: 'latest' 20 | python-version: 3.7 21 | environment-file: environment.yml 22 | activate-environment: qe-example 23 | 24 | - name: Install jupyter_book 25 | shell: bash -l {0} 26 | run: pip install git+https://github.com/ExecutableBookProject/jupyter-book.git@master 27 | 28 | - name: Build QuantEcon Example 29 | shell: bash -l {0} 30 | run: jb build book/ 31 | 32 | - name: Install SSH Client 🔑 33 | uses: webfactory/ssh-agent@v0.2.0 34 | with: 35 | ssh-private-key: ${{ secrets.DEPLOY_KEY }} 36 | 37 | - name: Deploy 🚀 38 | uses: JamesIves/github-pages-deploy-action@releases/v3 39 | with: 40 | SSH: true 41 | BRANCH: gh-pages 42 | FOLDER: book/_build/html 43 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _build 2 | .ipynb_checkpoints 3 | .DS_Store 4 | __pycache__/ 5 | foo.py 6 | math2.py 7 | mod.py 8 | newfile.txt 9 | numbers.txt 10 | test.py 11 | test_table.csv 12 | us_cities.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # quantecon-example 2 | 3 | A demonstration of Jupyter Book functionality using QuantEcon Python 4 | programming source material. 5 | 6 | ## How to create your own Jupyter Book 7 | 8 | For the purposes of this project, we are replicating the content under [Python 9 | Programming for Quantitative Economics](https://python-programming.quantecon.org). To demonstrate at a high 10 | level what has been done, we first convert each source file from rST to 11 | MyST-syntax markdown, then build the book by following the instructions in the 12 | [Books with Jupyter documentation](https://beta.jupyterbook.org/intro.html). 13 | 14 | ### Creating an environment 15 | 16 | 17 | 1. `conda env create -f environment.yml` 18 | 2. `conda activate qe-example` 19 | 20 | 21 | ### Building a Jupyter Book 22 | 23 | Run the following command in your terminal: `jb build book/`. 24 | If you would like to work with a clean build, you can empty the build folder by running `jb clean book/`. If the jupyter execution is cached, this command will not delete the cached folder. To remove the build folder, you can run `jb clean --all book/`. 25 | 26 | ### Publishing this Jupyter Book 27 | 28 | Run `ghp-import -n -p -f book/_build/html` 29 | 30 | If you are working on improving the `quantecon-example`, the publishing of your work is taken care by Github workflows. -------------------------------------------------------------------------------- /book/_config.yml: -------------------------------------------------------------------------------- 1 | ####################################################################################### 2 | # Book settings 3 | title : Programming for Quantitative Economics 4 | author : Thomas J. Sargent and John Stachurski 5 | logo : 'qe-logo-large.png' 6 | email : admin@quantecon.org 7 | description: >- 8 | This book presents a set of lectures on Python programming for quantitative economics, designed and written by **Thomas J. Sargent** and **John Stachurski**. 9 | 10 | 11 | execute: 12 | execute_notebooks : cache 13 | 14 | 15 | html: 16 | favicon : 'qe-logo-large.png' 17 | home_page_in_navbar : false 18 | use_edit_page_button : true 19 | use_repository_button : true 20 | use_issue_button : true 21 | 22 | 23 | repository: 24 | url : https://github.com/executablebooks/quantecon-example/ 25 | branch : master 26 | path_to_book : book 27 | 28 | 29 | # launch_buttons: 30 | # notebook_interface : classic 31 | # binderhub_url : "https://mybinder.org" 32 | # colab_url : "https://colab.research.google.com" 33 | # thebe : true 34 | -------------------------------------------------------------------------------- /book/_static/lecture_specific/about_py/bn_density1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/about_py/bn_density1.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/about_py/career_vf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/about_py/career_vf.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/about_py/pandas_vs_matlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/about_py/pandas_vs_matlab.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/about_py/python_vs_matlab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/about_py/python_vs_matlab.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/about_py/qs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/about_py/qs.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/about_py/qs.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | from scipy.stats import norm 5 | from matplotlib import cm 6 | 7 | xmin, xmax = -4, 12 8 | x = 10 9 | α = 0.5 10 | 11 | m, v = x, 10 12 | 13 | xgrid = np.linspace(xmin, xmax, 200) 14 | 15 | fig, ax = plt.subplots() 16 | 17 | ax.spines['right'].set_color('none') 18 | ax.spines['top'].set_color('none') 19 | ax.spines['left'].set_color('none') 20 | ax.xaxis.set_ticks_position('bottom') 21 | ax.spines['bottom'].set_position(('data', 0)) 22 | 23 | ax.set_ylim(-0.05, 0.5) 24 | ax.set_xticks((x,)) 25 | ax.set_xticklabels((r'$x$', ), fontsize=18) 26 | ax.set_yticks(()) 27 | 28 | K = 3 29 | for i in range(K): 30 | m = α * m 31 | v = α * α * v + 1 32 | f = norm(loc=m, scale=np.sqrt(v)) 33 | k = (i + 0.5) / K 34 | ax.plot(xgrid, f.pdf(xgrid), lw=1, color='black', alpha=0.4) 35 | ax.fill_between(xgrid, 0 * xgrid, f.pdf(xgrid), color=cm.jet(k), alpha=0.4) 36 | 37 | 38 | ax.annotate(r'$Q(x,\cdot)$', xy=(6.6, 0.2), xycoords='data', 39 | xytext=(20, 90), textcoords='offset points', fontsize=16, 40 | arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=-0.2")) 41 | ax.annotate(r'$Q^2(x,\cdot)$', xy=(3.6, 0.24), xycoords='data', 42 | xytext=(20, 90), textcoords='offset points', fontsize=16, 43 | arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=-0.2")) 44 | ax.annotate(r'$Q^3(x,\cdot)$', xy=(-0.2, 0.28), xycoords='data', 45 | xytext=(-90, 90), textcoords='offset points', fontsize=16, 46 | arrowprops=dict(arrowstyle="->", connectionstyle="arc3,rad=0.2")) 47 | fig.show() 48 | -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/editing_vim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/editing_vim.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/ipython_shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/ipython_shell.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/jp_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/jp_demo.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb2.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb3.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb6.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb6a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb6a.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb7.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb8.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb_run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb_run.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb_upload.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb_upload.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/nb_wntest2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/nb_wntest2.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/getting_started/starting_nb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/getting_started/starting_nb.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/matplotlib/matplotlib_ex1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/matplotlib/matplotlib_ex1.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/pandas/data/test_pwt.csv: -------------------------------------------------------------------------------- 1 | "country","country isocode","year","POP","XRAT","tcgdp","cc","cg" 2 | "Argentina","ARG","2000","37335.653","0.9995","295072.21869","75.716805379","5.5788042896" 3 | "Australia","AUS","2000","19053.186","1.72483","541804.6521","67.759025993","6.7200975332" 4 | "India","IND","2000","1006300.297","44.9416","1728144.3748","64.575551328","14.072205773" 5 | "Israel","ISR","2000","6114.57","4.07733","129253.89423","64.436450847","10.266688415" 6 | "Malawi","MWI","2000","11801.505","59.543808333","5026.2217836","74.707624181","11.658954494" 7 | "South Africa","ZAF","2000","45064.098","6.93983","227242.36949","72.718710427","5.7265463933" 8 | "United States","USA","2000","282171.957","1","9898700","72.347054303","6.0324539789" 9 | "Uruguay","URY","2000","3219.793","12.099591667","25255.961693","78.978740282","5.108067988" 10 | -------------------------------------------------------------------------------- /book/_static/lecture_specific/pandas/pandas_indices_pctchange.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/pandas/pandas_indices_pctchange.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/pandas/pandas_share_prices.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/pandas/pandas_share_prices.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/pandas/pandas_vs_rest.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/pandas/pandas_vs_rest.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/pandas/wb_download.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import requests 3 | import pandas as pd 4 | 5 | # == Get data and read into file gd.xls == # 6 | wb_data_query = "http://api.worldbank.org/v2/en/indicator/gc.dod.totl.gd.zs?downloadformat=excel" 7 | r = requests.get(wb_data_query) 8 | with open('gd.xls', 'wb') as output: 9 | output.write(r.content) 10 | 11 | # == Parse data into a DataFrame == # 12 | govt_debt = pd.read_excel('gd.xls', sheet_name='Data', skiprows=3, index_col=1) 13 | 14 | # == Take desired values and plot == # 15 | govt_debt = govt_debt.transpose() 16 | govt_debt = govt_debt[['AUS', 'USA']] 17 | govt_debt = govt_debt[38:] 18 | govt_debt.plot(lw=2) 19 | plt.show() 20 | -------------------------------------------------------------------------------- /book/_static/lecture_specific/parallelization/htop_parallel_npmat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/parallelization/htop_parallel_npmat.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/python_advanced_features/numbers.txt: -------------------------------------------------------------------------------- 1 | prices 2 | 3 3 | 8 4 | 5 | 7 6 | 21 -------------------------------------------------------------------------------- /book/_static/lecture_specific/python_by_example/pbe_ex2_fig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/python_by_example/pbe_ex2_fig.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/python_by_example/test_program_1_updated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/python_by_example/test_program_1_updated.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/python_foundations/us_cities.txt: -------------------------------------------------------------------------------- 1 | new york: 8244910 2 | los angeles: 3819702 3 | chicago: 2707120 4 | houston: 2145146 5 | philadelphia: 1536471 6 | phoenix: 1469471 7 | san antonio: 1359758 8 | san diego: 1326179 9 | dallas: 1223229 10 | -------------------------------------------------------------------------------- /book/_static/lecture_specific/sci_libs/nfs_ex1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/sci_libs/nfs_ex1.png -------------------------------------------------------------------------------- /book/_static/lecture_specific/troubleshooting/launch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/_static/lecture_specific/troubleshooting/launch.png -------------------------------------------------------------------------------- /book/_toc.yml: -------------------------------------------------------------------------------- 1 | - file: docs/index 2 | 3 | - part: Introduction to Python 4 | chapters: 5 | - file: docs/about_py 6 | - file: docs/getting_started 7 | - file: docs/python_by_example 8 | - file: docs/functions 9 | - file: docs/python_essentials 10 | - file: docs/oop_intro 11 | - file: docs/python_oop 12 | 13 | - part: Scientific Libraries 14 | chapters: 15 | - file: docs/need_for_speed 16 | - file: docs/numpy 17 | - file: docs/matplotlib 18 | - file: docs/scipy 19 | - file: docs/numba 20 | - file: docs/parallelization 21 | - file: docs/pandas 22 | 23 | - part: Advanced Python 24 | chapters: 25 | - file: docs/writing_good_code 26 | - file: docs/python_advanced_features 27 | - file: docs/debugging -------------------------------------------------------------------------------- /book/docs/about_py.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (about_py)= 13 | 14 | # About Python 15 | 16 | > \"Python has gotten sufficiently weapons grade that we don't descend 17 | > into R anymore. Sorry, R people. I used to be one of you but we no 18 | > longer descend into R.\" -- Chris Wiggins 19 | 20 | ## Overview 21 | 22 | 23 | In this lecture we will 24 | 25 | - outline what Python is 26 | - showcase some of its abilities 27 | - compare it to some other languages. 28 | 29 | At this stage, it\'s **not** our intention that you try to replicate all 30 | you see. 31 | 32 | We will work through what follows at a slow pace later in the lecture 33 | series. 34 | 35 | Our only objective for this lecture is to give you some feel of what 36 | Python is, and what it can do. 37 | 38 | ## What\'s Python? 39 | 40 | [Python](https://www.python.org) is a general-purpose programming 41 | language conceived in 1989 by Dutch programmer [Guido van 42 | Rossum](https://en.wikipedia.org/wiki/Guido_van_Rossum). 43 | 44 | Python is free and open source, with development coordinated through the 45 | [Python Software Foundation](https://www.python.org/psf/). 46 | 47 | Python has experienced rapid adoption in the last decade and is now one 48 | of the most popular programming languages. 49 | 50 | ### Common Uses 51 | 52 | Python is a general-purpose language used in almost all application domains 53 | such as 54 | 55 | - communications 56 | - web development 57 | - CGI and graphical user interfaces 58 | - game development 59 | - multimedia, data processing, security, etc., etc., etc. 60 | 61 | Used extensively by Internet services and high tech companies including 62 | 63 | - [Google](https://www.google.com/) 64 | - [Dropbox](https://www.dropbox.com/) 65 | - [Reddit](https://www.reddit.com/) 66 | - [YouTube](https://www.youtube.com/) 67 | - [Walt Disney 68 | Animation](https://pydanny-event-notes.readthedocs.org/en/latest/socalpiggies/20110526-wda.html). 69 | 70 | Python is very beginner-friendly and is often used to [teach computer 71 | science and 72 | programming](http://cacm.acm.org/blogs/blog-cacm/176450-python-is-now-the-most-popular-introductory-teaching-language-at-top-us-universities/fulltext). 73 | 74 | For reasons we will discuss, Python is particularly popular within the 75 | scientific community with users including NASA, CERN and practically all 76 | branches of academia. 77 | 78 | It is also [replacing familiar tools like 79 | Excel](https://news.efinancialcareers.com/us-en/3002556/python-replaced-excel-banking) 80 | in the fields of finance and banking. 81 | 82 | ### Relative Popularity 83 | 84 | The following chart, produced using Stack Overflow Trends, shows one 85 | measure of the relative popularity of Python 86 | 87 | ```{figure} /_static/lecture_specific/about_py/python_vs_matlab.png 88 | :scale: 72% 89 | ``` 90 | 91 | The figure indicates not only that Python is widely used but also that 92 | adoption of Python has accelerated significantly since 2012. 93 | 94 | We suspect this is driven at least in part by uptake in the scientific 95 | domain, particularly in rapidly growing fields like data science. 96 | 97 | For example, the popularity of [pandas](http://pandas.pydata.org/), a 98 | library for data analysis with Python has exploded, as seen here. 99 | 100 | (The corresponding time path for MATLAB is shown for comparison) 101 | 102 | ```{figure} /_static/lecture_specific/about_py/pandas_vs_matlab.png 103 | :scale: 23% 104 | ``` 105 | 106 | Note that pandas takes off in 2012, which is the same year that we see 107 | Python\'s popularity begin to spike in the first figure. 108 | 109 | Overall, it\'s clear that 110 | 111 | - Python is [one of the most popular programming languages 112 | worldwide](http://spectrum.ieee.org/computing/software/the-2017-top-programming-languages). 113 | - Python is a major tool for scientific computing, accounting for a 114 | rapidly rising share of scientific work around the globe. 115 | 116 | ### Features 117 | 118 | Python is a [high-level 119 | language](https://en.wikipedia.org/wiki/High-level_programming_language) 120 | suitable for rapid development. 121 | 122 | It has a relatively small core language supported by many libraries. 123 | 124 | Other features of Python: 125 | 126 | - multiple programming styles are supported (procedural, 127 | object-oriented, functional, etc.) 128 | - it is interpreted rather than compiled. 129 | 130 | ### Syntax and Design 131 | 132 | One nice feature of Python is its elegant syntax --- we\'ll see many 133 | examples later on. 134 | 135 | Elegant code might sound superfluous but in fact it\'s highly beneficial 136 | because it makes the syntax easy to read and easy to remember. 137 | 138 | Remembering how to read from files, sort dictionaries and other such 139 | routine tasks means that you don\'t need to break your flow in order to 140 | hunt down correct syntax. 141 | 142 | Closely related to elegant syntax is an elegant design. 143 | 144 | Features like iterators, generators, decorators and list comprehensions 145 | make Python highly expressive, allowing you to get more done with less 146 | code. 147 | 148 | [Namespaces](https://en.wikipedia.org/wiki/Namespace) improve 149 | productivity by cutting down on bugs and syntax errors. 150 | 151 | ## Scientific Programming 152 | 153 | Python has become one of the core languages of scientific computing. 154 | 155 | It\'s either the dominant player or a major player in 156 | 157 | - [machine learning and data science](http://scikit-learn.org/stable/) 158 | - [astronomy](http://www.astropy.org/) 159 | - [artificial 160 | intelligence](https://wiki.python.org/moin/PythonForArtificialIntelligence) 161 | - [chemistry](http://chemlab.github.io/chemlab/) 162 | - [computational biology](http://biopython.org/wiki/Main_Page) 163 | - [meteorology](https://pypi.org/project/meteorology/) 164 | 165 | Its popularity in economics is also beginning to rise. 166 | 167 | This section briefly showcases some examples of Python for scientific 168 | programming. 169 | 170 | - All of these topics will be covered in detail later on. 171 | 172 | ### Numerical Programming 173 | 174 | Fundamental matrix and array processing capabilities are provided by the 175 | excellent [NumPy](http://www.numpy.org/) library. 176 | 177 | NumPy provides the basic array data type plus some simple processing 178 | operations. 179 | 180 | For example, let\'s build some arrays 181 | 182 | ```{code-cell} ipython3 183 | import numpy as np # Load the library 184 | 185 | a = np.linspace(-np.pi, np.pi, 100) # Create even grid from -π to π 186 | b = np.cos(a) # Apply cosine to each element of a 187 | c = np.sin(a) # Apply sin to each element of a 188 | ``` 189 | 190 | Now let\'s take the inner product 191 | 192 | ```{code-cell} ipython3 193 | b @ c 194 | ``` 195 | 196 | The number you see here might vary slightly but it\'s essentially zero. 197 | 198 | (For older versions of Python and NumPy you need to use the 199 | [np.dot](http://docs.scipy.org/doc/numpy/reference/generated/numpy.dot.html) 200 | function) 201 | 202 | The [SciPy](http://www.scipy.org) library is built on top of NumPy and 203 | provides additional functionality. 204 | 205 | (tuple_unpacking_example)= 206 | For example, let\'s calculate $\int_{-2}^2 \phi(z) dz$ where $\phi$ is 207 | the standard normal density. 208 | 209 | ```{code-cell} ipython3 210 | from scipy.stats import norm 211 | from scipy.integrate import quad 212 | 213 | ϕ = norm() 214 | value, error = quad(ϕ.pdf, -2, 2) # Integrate using Gaussian quadrature 215 | value 216 | ``` 217 | 218 | SciPy includes many of the standard routines used in 219 | 220 | - [linear 221 | algebra](http://docs.scipy.org/doc/scipy/reference/linalg.html) 222 | - [integration](http://docs.scipy.org/doc/scipy/reference/integrate.html) 223 | - [interpolation](http://docs.scipy.org/doc/scipy/reference/interpolate.html) 224 | - [optimization](http://docs.scipy.org/doc/scipy/reference/optimize.html) 225 | - [distributions and random number 226 | generation](http://docs.scipy.org/doc/scipy/reference/stats.html) 227 | - [signal 228 | processing](http://docs.scipy.org/doc/scipy/reference/signal.html) 229 | 230 | See them all 231 | [here](http://docs.scipy.org/doc/scipy/reference/index.html). 232 | 233 | ### Graphics 234 | 235 | The most popular and comprehensive Python library for creating figures 236 | and graphs is [Matplotlib](http://matplotlib.org/), with functionality 237 | including 238 | 239 | - plots, histograms, contour images, 3D graphs, bar charts etc. 240 | - output in many formats (PDF, PNG, EPS, etc.) 241 | - LaTeX integration 242 | 243 | Example 2D plot with embedded LaTeX annotations 244 | 245 | ```{figure} /_static/lecture_specific/about_py/qs.png 246 | :scale: 55% 247 | ``` 248 | 249 | Example contour plot 250 | 251 | ```{figure} /_static/lecture_specific/about_py/bn_density1.png 252 | :scale: 55% 253 | ``` 254 | 255 | Example 3D plot 256 | 257 | ```{figure} /_static/lecture_specific/about_py/career_vf.png 258 | :scale: 80% 259 | ``` 260 | 261 | More examples can be found in the [Matplotlib thumbnail 262 | gallery](http://matplotlib.org/gallery.html). 263 | 264 | Other graphics libraries include 265 | 266 | - [Plotly](https://plot.ly/python/) 267 | - [Bokeh](http://bokeh.pydata.org/en/latest/) 268 | - [VPython](http://www.vpython.org/) --- 3D graphics and animations 269 | 270 | ### Symbolic Algebra 271 | 272 | It\'s useful to be able to manipulate symbolic expressions, as in 273 | Mathematica or Maple. 274 | 275 | The [SymPy](http://www.sympy.org/) library provides this functionality 276 | from within the Python shell. 277 | 278 | ```{code-cell} ipython3 279 | from sympy import Symbol 280 | 281 | x, y = Symbol('x'), Symbol('y') # Treat 'x' and 'y' as algebraic symbols 282 | x + x + x + y 283 | ``` 284 | 285 | We can manipulate expressions 286 | 287 | ```{code-cell} ipython3 288 | expression = (x + y)**2 289 | expression.expand() 290 | ``` 291 | 292 | solve polynomials 293 | 294 | ```{code-cell} ipython3 295 | from sympy import solve 296 | 297 | solve(x**2 + x + 2) 298 | ``` 299 | 300 | and calculate limits, derivatives and integrals 301 | 302 | ```{code-cell} ipython3 303 | from sympy import limit, sin, diff 304 | 305 | limit(1 / x, x, 0) 306 | ``` 307 | 308 | ```{code-cell} ipython3 309 | limit(sin(x) / x, x, 0) 310 | ``` 311 | 312 | ```{code-cell} ipython3 313 | diff(sin(x), x) 314 | ``` 315 | 316 | The beauty of importing this functionality into Python is that we are 317 | working within a fully fledged programming language. 318 | 319 | We can easily create tables of derivatives, generate LaTeX output, add 320 | that output to figures and so on. 321 | 322 | ### Statistics 323 | 324 | Python\'s data manipulation and statistics libraries have improved 325 | rapidly over the last few years. 326 | 327 | #### Pandas 328 | 329 | One of the most popular libraries for working with data is 330 | [pandas](http://pandas.pydata.org/). 331 | 332 | Pandas is fast, efficient, flexible and well designed. 333 | 334 | Here\'s a simple example, using some dummy data generated with Numpy\'s 335 | excellent `random` functionality. 336 | 337 | ```{code-cell} ipython3 338 | import pandas as pd 339 | np.random.seed(1234) 340 | 341 | data = np.random.randn(5, 2) # 5x2 matrix of N(0, 1) random draws 342 | dates = pd.date_range('28/12/2010', periods=5) 343 | 344 | df = pd.DataFrame(data, columns=('price', 'weight'), index=dates) 345 | print(df) 346 | ``` 347 | 348 | ```{code-cell} ipython3 349 | df.mean() 350 | ``` 351 | 352 | #### Other Useful Statistics Libraries 353 | 354 | - [statsmodels](http://statsmodels.sourceforge.net/) --- various 355 | statistical routines 356 | 357 | - [scikit-learn](http://scikit-learn.org/) --- machine learning in 358 | Python (sponsored by Google, among others) 359 | 360 | - [pyMC](http://pymc-devs.github.io/pymc/) --- for Bayesian data 361 | analysis 362 | 363 | - [pystan](https://pystan.readthedocs.org/en/latest/) Bayesian 364 | analysis based on [stan](http://mc-stan.org/) 365 | 366 | ### Networks and Graphs 367 | 368 | Python has many libraries for studying graphs. 369 | 370 | One well-known example is [NetworkX](http://networkx.github.io/). Its 371 | features include, among many other things: 372 | 373 | - standard graph algorithms for analyzing networks 374 | - plotting routines 375 | 376 | Here\'s some example code that generates and plots a random graph, with 377 | node color determined by shortest path length from a central node. 378 | 379 | ```{code-cell} ipython3 380 | import networkx as nx 381 | import matplotlib.pyplot as plt 382 | %matplotlib inline 383 | np.random.seed(1234) 384 | 385 | # Generate a random graph 386 | p = dict((i, (np.random.uniform(0, 1), np.random.uniform(0, 1))) 387 | for i in range(200)) 388 | g = nx.random_geometric_graph(200, 0.12, pos=p) 389 | pos = nx.get_node_attributes(g, 'pos') 390 | 391 | # Find node nearest the center point (0.5, 0.5) 392 | dists = [(x - 0.5)**2 + (y - 0.5)**2 for x, y in list(pos.values())] 393 | ncenter = np.argmin(dists) 394 | 395 | # Plot graph, coloring by path length from central node 396 | p = nx.single_source_shortest_path_length(g, ncenter) 397 | plt.figure() 398 | nx.draw_networkx_edges(g, pos, alpha=0.4) 399 | nx.draw_networkx_nodes(g, 400 | pos, 401 | nodelist=list(p.keys()), 402 | node_size=120, alpha=0.5, 403 | node_color=list(p.values()), 404 | cmap=plt.cm.jet_r) 405 | plt.show() 406 | ``` 407 | 408 | ### Cloud Computing 409 | 410 | Running your Python code on massive servers in the cloud is becoming 411 | easier and easier. 412 | 413 | A nice example is [Anaconda 414 | Enterprise](https://www.anaconda.com/enterprise/). 415 | 416 | See also 417 | 418 | - [Amazon Elastic Compute Cloud](http://aws.amazon.com/ec2/) 419 | - The [Google App Engine](https://cloud.google.com/appengine/) 420 | (Python, Java, PHP or Go) 421 | - [Pythonanywhere](https://www.pythonanywhere.com/) 422 | - [Sagemath Cloud](https://cloud.sagemath.com/) 423 | 424 | ### Parallel Processing 425 | 426 | Apart from the cloud computing options listed above, you might like to 427 | consider 428 | 429 | - [Parallel computing through IPython 430 | clusters](http://ipython.org/ipython-doc/stable/parallel/parallel_demos.html). 431 | - The [Starcluster](http://star.mit.edu/cluster/) interface to 432 | Amazon\'s EC2. 433 | - GPU programming through [PyCuda](https://wiki.tiker.net/PyCuda), 434 | [PyOpenCL](https://mathema.tician.de/software/pyopencl/), 435 | [Theano](http://deeplearning.net/software/theano/) or similar. 436 | 437 | (intfc)= 438 | 439 | ### Other Developments 440 | 441 | There are many other interesting developments with scientific 442 | programming in Python. 443 | 444 | Some representative examples include 445 | 446 | - [Jupyter](http://jupyter.org/) --- Python in your browser with 447 | interactive code cells, embedded images and other useful features. 448 | - [Numba](http://numba.pydata.org/) --- Make Python run at the same 449 | speed as native machine code! 450 | - [Blaze](http://blaze.pydata.org/) --- a generalization of NumPy. 451 | - [PyTables](http://www.pytables.org) --- manage large data sets. 452 | - [CVXPY](https://github.com/cvxgrp/cvxpy) --- convex optimization 453 | in Python. 454 | 455 | ## Learn More 456 | 457 | - Browse some Python projects on 458 | [GitHub](https://github.com/trending?l=python). 459 | - Read more about [Python\'s history and rise in 460 | popularity](https://www.welcometothejungle.com/en/articles/btc-python-popular). 461 | - Have a look at [some of the Jupyter 462 | notebooks](http://nbviewer.jupyter.org/) people have shared on 463 | various scientific topics. 464 | - Visit the [Python Package Index](https://pypi.org/). 465 | - View some of the questions people are asking about Python on 466 | [Stackoverflow](http://stackoverflow.com/questions/tagged/python). 467 | - Keep up to date on what\'s happening in the Python community with 468 | the [Python subreddit](https://www.reddit.com:443/r/Python/). -------------------------------------------------------------------------------- /book/docs/debugging.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (debugging)= 13 | 14 | # Debugging 15 | 16 | > \"Debugging is twice as hard as writing the code in the first place. 17 | > Therefore, if you write the code as cleverly as possible, you are, by 18 | > definition, not smart enough to debug it.\" -- Brian Kernighan 19 | 20 | ## Overview 21 | 22 | Are you one of those programmers who fills their code with `print` 23 | statements when trying to debug their programs? 24 | 25 | Hey, we all used to do that. 26 | 27 | (OK, sometimes we still do that...) 28 | 29 | But once you start writing larger programs you\'ll need a better system. 30 | 31 | Debugging tools for Python vary across platforms, IDEs and editors. 32 | 33 | Here we\'ll focus on Jupyter and leave you to explore other settings. 34 | 35 | We\'ll need the following imports 36 | 37 | ```{code-cell} ipython3 38 | :tags: [remove-output] 39 | 40 | import numpy as np 41 | import matplotlib.pyplot as plt 42 | %matplotlib inline 43 | ``` 44 | 45 | ## Debugging 46 | 47 | ### The `debug` Magic 48 | 49 | Let\'s consider a simple (and rather contrived) example 50 | 51 | ```{code-cell} ipython3 52 | :tags: [raises-exception] 53 | 54 | def plot_log(): 55 | fig, ax = plt.subplots(2, 1) 56 | x = np.linspace(1, 2, 10) 57 | ax.plot(x, np.log(x)) 58 | plt.show() 59 | 60 | plot_log() # Call the function, generate plot 61 | ``` 62 | 63 | This code is intended to plot the `log` function over the interval 64 | $[1, 2]$. 65 | 66 | But there\'s an error here: `plt.subplots(2, 1)` should be just 67 | `plt.subplots()`. 68 | 69 | (The call `plt.subplots(2, 1)` returns a NumPy array containing two axes 70 | objects, suitable for having two subplots on the same figure) 71 | 72 | The traceback shows that the error occurs at the method call 73 | `ax.plot(x, np.log(x))`. 74 | 75 | The error occurs because we have mistakenly made `ax` a NumPy array, and 76 | a NumPy array has no `plot` method. 77 | 78 | But let\'s pretend that we don\'t understand this for the moment. 79 | 80 | We might suspect there\'s something wrong with `ax` but when we try to 81 | investigate this object, we get the following exception: 82 | 83 | ```{code-cell} ipython3 84 | :tags: [raises-exception] 85 | 86 | ax 87 | ``` 88 | 89 | The problem is that `ax` was defined inside `plot_log()`, and the name 90 | is lost once that function terminates. 91 | 92 | Let\'s try doing it a different way. 93 | 94 | We run the first cell block again, generating the same error 95 | 96 | ```{code-cell} ipython3 97 | :tags: [raises-exception] 98 | 99 | def plot_log(): 100 | fig, ax = plt.subplots(2, 1) 101 | x = np.linspace(1, 2, 10) 102 | ax.plot(x, np.log(x)) 103 | plt.show() 104 | 105 | plot_log() # Call the function, generate plot 106 | ``` 107 | 108 | But this time we type in the following cell block 109 | 110 | ```python 111 | %debug 112 | ``` 113 | 114 | You should be dropped into a new prompt that looks something like this 115 | 116 | ```{code-block} none 117 | ipdb> 118 | ``` 119 | 120 | (You might see `pdb\>` instead) 121 | 122 | Now we can investigate the value of our variables at this point in the 123 | program, step forward through the code, etc. 124 | 125 | For example, here we simply type the name `ax` to see what\'s happening 126 | with this object: 127 | 128 | ```{code-block} none 129 | ipdb> ax 130 | array([, 131 | ], dtype=object) 132 | ``` 133 | 134 | It\'s now very clear that `ax` is an array, which clarifies the source 135 | of the problem. 136 | 137 | To find out what else you can do from inside `ipdb` (or `pdb`), use the 138 | online help 139 | 140 | ```{code-block} none 141 | ipdb> h 142 | 143 | Documented commands (type help ): 144 | ======================================== 145 | EOF bt cont enable jump pdef r tbreak w 146 | a c continue exit l pdoc restart u whatis 147 | alias cl d h list pinfo return unalias where 148 | args clear debug help n pp run unt 149 | b commands disable ignore next q s until 150 | break condition down j p quit step up 151 | 152 | Miscellaneous help topics: 153 | ========================== 154 | exec pdb 155 | 156 | Undocumented commands: 157 | ====================== 158 | retval rv 159 | 160 | ipdb> h c 161 | c(ont(inue)) 162 | Continue execution, only stop when a breakpoint is encountered. 163 | ``` 164 | 165 | ### Setting a Break Point 166 | 167 | The preceding approach is handy but sometimes insufficient. 168 | 169 | Consider the following modified version of our function above 170 | 171 | ```{code-cell} ipython3 172 | def plot_log(): 173 | fig, ax = plt.subplots() 174 | x = np.logspace(1, 2, 10) 175 | ax.plot(x, np.log(x)) 176 | plt.show() 177 | 178 | plot_log() 179 | ``` 180 | 181 | Here the original problem is fixed, but we\'ve accidentally written 182 | `np.logspace(1, 2, 10)` instead of `np.linspace(1, 2, 10)`. 183 | 184 | Now there won\'t be any exception, but the plot won\'t look right. 185 | 186 | To investigate, it would be helpful if we could inspect variables like 187 | `x` during execution of the function. 188 | 189 | To this end, we add a \"break point\" by inserting `breakpoint()` inside 190 | the function code block 191 | 192 | ```python 193 | def plot_log(): 194 | breakpoint() 195 | fig, ax = plt.subplots() 196 | x = np.logspace(1, 2, 10) 197 | ax.plot(x, np.log(x)) 198 | plt.show() 199 | 200 | plot_log() 201 | ``` 202 | 203 | Now let\'s run the script, and investigate via the debugger 204 | 205 | ```{code-block} none 206 | > (6)plot_log() 207 | -> fig, ax = plt.subplots() 208 | (Pdb) n 209 | > (7)plot_log() 210 | -> x = np.logspace(1, 2, 10) 211 | (Pdb) n 212 | > (8)plot_log() 213 | -> ax.plot(x, np.log(x)) 214 | (Pdb) x 215 | array([ 10. , 12.91549665, 16.68100537, 21.5443469 , 216 | 27.82559402, 35.93813664, 46.41588834, 59.94842503, 217 | 77.42636827, 100. ]) 218 | ``` 219 | 220 | We used `n` twice to step forward through the code (one line at a time). 221 | 222 | Then we printed the value of `x` to see what was happening with that 223 | variable. 224 | 225 | To exit from the debugger, use `q`. 226 | 227 | ## Other Useful Magics 228 | 229 | In this lecture, we used the `%debug` IPython magic. 230 | 231 | There are many other useful magics: 232 | 233 | - `%precision 4` sets printed precision for floats to 4 decimal places 234 | - `%whos` gives a list of variables and their values 235 | - `%quickref` gives a list of magics 236 | 237 | The full list of magics is 238 | [here](http://ipython.readthedocs.org/en/stable/interactive/magics.html). 239 | -------------------------------------------------------------------------------- /book/docs/functions.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (functions)= 13 | 14 | # Functions 15 | 16 | ## Overview 17 | 18 | One construct that\'s extremely useful and provided by almost all 19 | programming languages is **functions**. 20 | 21 | We have already met several functions, such as 22 | 23 | - the `sqrt()` function from NumPy and 24 | - the built-in `print()` function 25 | 26 | In this lecture we\'ll treat functions systematically and begin to learn 27 | just how useful and important they are. 28 | 29 | One of the things we will learn to do is build our own user-defined 30 | functions 31 | 32 | We will use the following imports. 33 | 34 | ```{code-cell} ipython3 35 | :tags: [remove-output] 36 | 37 | import numpy as np 38 | import matplotlib.pyplot as plt 39 | %matplotlib inline 40 | ``` 41 | 42 | ## Function Basics 43 | 44 | A function is a named section of a program that implements a specific 45 | task. 46 | 47 | Many functions exist already and we can use them off the shelf. 48 | 49 | First we review these functions and then discuss how we can build our 50 | own. 51 | 52 | ### Built-In Functions 53 | 54 | Python has a number of *built-in* functions that are available without 55 | `import`. 56 | 57 | We have already met some 58 | 59 | ```{code-cell} ipython3 60 | max(19, 20) 61 | ``` 62 | 63 | ```{code-cell} ipython3 64 | print('foobar') 65 | ``` 66 | 67 | ```{code-cell} ipython3 68 | str(22) 69 | ``` 70 | 71 | ```{code-cell} ipython3 72 | type(22) 73 | ``` 74 | 75 | Two more useful built-in functions are `any()` and `all()` 76 | 77 | ```{code-cell} ipython3 78 | bools = False, True, True 79 | all(bools) # True if all are True and False otherwise 80 | ``` 81 | 82 | ```{code-cell} ipython3 83 | any(bools) # False if all are False and True otherwise 84 | ``` 85 | 86 | The full list of Python built-ins is 87 | [here](https://docs.python.org/library/functions.html). 88 | 89 | ### Third Party Functions 90 | 91 | If the built-in functions don\'t cover what we need, we either need to 92 | import functions or create our own. 93 | 94 | Examples of importing and using functions were given in the 95 | {ref}`previous lecture ` 96 | 97 | Here\'s another one, which tests whether a given year is a leap year: 98 | 99 | ```{code-cell} ipython3 100 | import calendar 101 | 102 | calendar.isleap(2020) 103 | ``` 104 | 105 | ## Defining Functions 106 | 107 | In many instances, it is useful to be able to define our own functions. 108 | 109 | This will become clearer as you see more examples. 110 | 111 | Let\'s start by discussing how it\'s done. 112 | 113 | ### Syntax 114 | 115 | Here\'s a very simple Python function, that implements the mathematical 116 | function $f(x) = 2 x + 1$ 117 | 118 | ```{code-cell} ipython3 119 | def f(x): 120 | return 2 * x + 1 121 | ``` 122 | 123 | Now that we\'ve *defined* this function, let\'s *call* it and check 124 | whether it does what we expect: 125 | 126 | ```{code-cell} ipython3 127 | f(1) 128 | ``` 129 | 130 | ```{code-cell} ipython3 131 | f(10) 132 | ``` 133 | 134 | Here\'s a longer function, that computes the absolute value of a given 135 | number. 136 | 137 | (Such a function already exists as a built-in, but let\'s write our own 138 | for the exercise.) 139 | 140 | ```{code-cell} ipython3 141 | def new_abs_function(x): 142 | 143 | if x < 0: 144 | abs_value = -x 145 | else: 146 | abs_value = x 147 | 148 | return abs_value 149 | ``` 150 | 151 | Let\'s review the syntax here. 152 | 153 | - `def` is a Python keyword used to start function definitions. 154 | - `def new_abs_function(x):` indicates that the function is called 155 | `new_abs_function` and that it has a single argument `x`. 156 | - The indented code is a code block called the *function body*. 157 | - The `return` keyword indicates that `abs_value` is the object that 158 | should be returned to the calling code. 159 | 160 | This whole function definition is read by the Python interpreter and 161 | stored in memory. 162 | 163 | Let\'s call it to check that it works: 164 | 165 | ```{code-cell} ipython3 166 | print(new_abs_function(3)) 167 | print(new_abs_function(-3)) 168 | ``` 169 | 170 | ### Why Write Functions? 171 | 172 | User-defined functions are important for improving the clarity of your 173 | code by 174 | 175 | - separating different strands of logic 176 | - facilitating code reuse 177 | 178 | (Writing the same thing twice is [almost always a bad 179 | idea](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself)) 180 | 181 | We will say more about this 182 | {ref}`later `. 183 | 184 | ## Applications 185 | 186 | ### Random Draws 187 | 188 | Consider again this code from the 189 | {ref}`previous lecture ` 190 | 191 | ```{code-cell} ipython3 192 | ts_length = 100 193 | ϵ_values = [] # empty list 194 | 195 | for i in range(ts_length): 196 | e = np.random.randn() 197 | ϵ_values.append(e) 198 | 199 | plt.plot(ϵ_values) 200 | plt.show() 201 | ``` 202 | 203 | We will break this program into two parts: 204 | 205 | 1. A user-defined function that generates a list of random variables. 206 | 2. The main part of the program that 207 | 1. calls this function to get data 208 | 2. plots the data 209 | 210 | This is accomplished in the next program 211 | 212 | (funcloopprog)= 213 | 214 | ```{code-cell} ipython3 215 | def generate_data(n): 216 | ϵ_values = [] 217 | for i in range(n): 218 | e = np.random.randn() 219 | ϵ_values.append(e) 220 | return ϵ_values 221 | 222 | data = generate_data(100) 223 | plt.plot(data) 224 | plt.show() 225 | ``` 226 | 227 | When the interpreter gets to the expression `generate_data(100)`, it 228 | executes the function body with `n` set equal to 100. 229 | 230 | The net result is that the name `data` is *bound* to the list `ϵ_values` 231 | returned by the function. 232 | 233 | ### Adding Conditions 234 | 235 | Our function `generate_data()` is rather limited. 236 | 237 | Let\'s make it slightly more useful by giving it the ability to return 238 | either standard normals or uniform random variables on $(0, 1)$ as 239 | required. 240 | 241 | This is achieved in the next piece of code. 242 | 243 | (funcloopprog2)= 244 | 245 | ```{code-cell} ipython3 246 | def generate_data(n, generator_type): 247 | ϵ_values = [] 248 | for i in range(n): 249 | if generator_type == 'U': 250 | e = np.random.uniform(0, 1) 251 | else: 252 | e = np.random.randn() 253 | ϵ_values.append(e) 254 | return ϵ_values 255 | 256 | data = generate_data(100, 'U') 257 | plt.plot(data) 258 | plt.show() 259 | ``` 260 | 261 | Hopefully, the syntax of the if/else clause is self-explanatory, with 262 | indentation again delimiting the extent of the code blocks. 263 | 264 | ```{note} 265 | - We are passing the argument `U` as a string, which is why we write it as `'U'`. 266 | - Notice that equality is tested with the `==` syntax, not `=`. 267 | - For example, the statement `a = 10` assigns the name `a` to the value `10`. 268 | - The expression `a == 10` evaluates to either `True` or `False`, depending on the value of `a`. 269 | ``` 270 | 271 | Now, there are several ways that we can simplify the code above. 272 | 273 | For example, we can get rid of the conditionals all together by just 274 | passing the desired generator type *as a function*. 275 | 276 | To understand this, consider the following version. 277 | 278 | (test_program_6)= 279 | 280 | ```{code-cell} ipython3 281 | def generate_data(n, generator_type): 282 | ϵ_values = [] 283 | for i in range(n): 284 | e = generator_type() 285 | ϵ_values.append(e) 286 | return ϵ_values 287 | 288 | data = generate_data(100, np.random.uniform) 289 | plt.plot(data) 290 | plt.show() 291 | ``` 292 | 293 | Now, when we call the function `generate_data()`, we pass 294 | `np.random.uniform` as the second argument. 295 | 296 | This object is a *function*. 297 | 298 | When the function call `generate_data(100, np.random.uniform)` is 299 | executed, Python runs the function code block with `n` equal to 100 and 300 | the name `generator_type` \"bound\" to the function `np.random.uniform`. 301 | 302 | - While these lines are executed, the names `generator_type` and 303 | `np.random.uniform` are \"synonyms\", and can be used in identical 304 | ways. 305 | 306 | This principle works more generally---for example, consider the 307 | following piece of code 308 | 309 | ```{code-cell} ipython3 310 | max(7, 2, 4) # max() is a built-in Python function 311 | ``` 312 | 313 | ```{code-cell} ipython3 314 | m = max 315 | m(7, 2, 4) 316 | ``` 317 | 318 | Here we created another name for the built-in function `max()`, which 319 | could then be used in identical ways. 320 | 321 | In the context of our program, the ability to bind new names to 322 | functions means that there is no problem \*passing a function as an 323 | argument to another function\*---as we did above. 324 | 325 | ## Exercises 326 | 327 | ### Exercise 1 328 | 329 | Recall that $n!$ is read as \"$n$ factorial\" and defined as 330 | $n! = n \times (n - 1) \times \cdots \times 2 \times 1$. 331 | 332 | There are functions to compute this in various modules, but let\'s write 333 | our own version as an exercise. 334 | 335 | In particular, write a function `factorial` such that `factorial(n)` 336 | returns $n!$ for any positive integer $n$. 337 | 338 | ### Exercise 2 339 | 340 | The [binomial random 341 | variable](https://en.wikipedia.org/wiki/Binomial_distribution) 342 | $Y \sim Bin(n, p)$ represents the number of successes in $n$ binary 343 | trials, where each trial succeeds with probability $p$. 344 | 345 | Without any import besides `from numpy.random import uniform`, write a 346 | function `binomial_rv` such that `binomial_rv(n, p)` generates one draw 347 | of $Y$. 348 | 349 | Hint: If $U$ is uniform on $(0, 1)$ and $p \in (0,1)$, then the 350 | expression `U < p` evaluates to `True` with probability $p$. 351 | 352 | ### Exercise 3 353 | 354 | First, write a function that returns one realization of the following 355 | random device 356 | 357 | 1. Flip an unbiased coin 10 times. 358 | 2. If a head occurs `k` or more times consecutively within this 359 | sequence at least once, pay one dollar. 360 | 3. If not, pay nothing. 361 | 362 | Second, write another function that does the same task except that the 363 | second rule of the above random device becomes 364 | 365 | - If a head occurs `k` or more times within this sequence, pay one 366 | dollar. 367 | 368 | Use no import besides `from numpy.random import uniform`. 369 | 370 | ## Solutions 371 | 372 | ### Exercise 1 373 | 374 | Here\'s one solution. 375 | 376 | ```{code-cell} ipython3 377 | def factorial(n): 378 | k = 1 379 | for i in range(n): 380 | k = k * (i + 1) 381 | return k 382 | 383 | factorial(4) 384 | ``` 385 | 386 | ### Exercise 2 387 | 388 | ```{code-cell} ipython3 389 | from numpy.random import uniform 390 | 391 | def binomial_rv(n, p): 392 | count = 0 393 | for i in range(n): 394 | U = uniform() 395 | if U < p: 396 | count = count + 1 # Or count += 1 397 | return count 398 | 399 | binomial_rv(10, 0.5) 400 | ``` 401 | 402 | ### Exercise 3 403 | 404 | Here\'s a function for the first random device. 405 | 406 | ```{code-cell} ipython3 407 | from numpy.random import uniform 408 | 409 | def draw(k): # pays if k consecutive successes in a sequence 410 | 411 | payoff = 0 412 | count = 0 413 | 414 | for i in range(10): 415 | U = uniform() 416 | count = count + 1 if U < 0.5 else 0 417 | print(count) # print counts for clarity 418 | if count == k: 419 | payoff = 1 420 | 421 | return payoff 422 | 423 | draw(3) 424 | ``` 425 | 426 | Here\'s another function for the second random device. 427 | 428 | ```{code-cell} ipython3 429 | def draw_new(k): # pays if k successes in a sequence 430 | 431 | payoff = 0 432 | count = 0 433 | 434 | for i in range(10): 435 | U = uniform() 436 | count = count + ( 1 if U < 0.5 else 0 ) 437 | print(count) 438 | if count == k: 439 | payoff = 1 440 | 441 | return payoff 442 | 443 | draw_new(3) 444 | ``` 445 | -------------------------------------------------------------------------------- /book/docs/index.md: -------------------------------------------------------------------------------- 1 | # Programming for Quantitative Economics 2 | 3 | ```{note} 4 | This collection of lectures was built using [Jupyter 5 | Book](https://beta.jupyterbook.org/intro.html), as part of the 6 | [ExecutableBookProject](https://ebp.jupyterbook.org/en/latest/). These tools 7 | are still at an early stage of development and breaking changes may occur. 8 | Feedback and participation is very welcome. 9 | ``` 10 | 11 | These notes present a set of lectures on Python programming for quantitative economics, designed and written by **[Thomas J. Sargent](http://www.tomsargent.com/)** and **[John Stachurski](https://johnstachurski.net/)**. 12 | 13 | They are one part of a larger set of lectures on open source computing, 14 | economics and finance backed by [QuantEcon](https://quantecon.org). 15 | 16 | 17 | (troubleshooting)= 18 | 19 | ## Troubleshooting 20 | 21 | ```{note} This page is for readers experiencing errors when running the code from 22 | the lectures. 23 | ``` 24 | 25 | The basic assumption of the lectures is that code in a lecture should 26 | execute whenever 27 | 28 | 1. it is executed in a Jupyter notebook and 29 | 2. the notebook is running on a machine with the latest version of 30 | Anaconda Python. 31 | 32 | To install Anaconda, follow the instructions in {ref}`this lecture `. 33 | 34 | The most common source of problems for our 35 | readers is that their Anaconda distribution is not up to date. 36 | 37 | [Here\'s a useful article](https://www.anaconda.com/keeping-anaconda-date/) on how to 38 | update Anaconda. 39 | 40 | Another option is to simply remove Anaconda and reinstall. 41 | 42 | You also need to keep the external code libraries, such as [QuantEcon.py](https://quantecon.org/quantecon-py) up to date. 43 | 44 | For this task you can either 45 | 46 | - use `pip install --upgrade quantecon` on the command 47 | line, or 48 | - execute `!pip install --upgrade quantecon` within a 49 | Jupyter notebook. 50 | 51 | ## Feedback 52 | 53 | To provide feedback on these lectures you can 54 | 55 | - use our [discourse forum](https://discourse.quantecon.org/) or 56 | - provide direct feedback to . 57 | -------------------------------------------------------------------------------- /book/docs/matplotlib.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (matplotlib)= 13 | 14 | # Matplotlib 15 | 16 | ## Overview 17 | 18 | We\'ve already generated quite a few figures in these lectures using 19 | [Matplotlib](http://matplotlib.org/). 20 | 21 | Matplotlib is an outstanding graphics library, designed for scientific 22 | computing, with 23 | 24 | - high-quality 2D and 3D plots 25 | - output in all the usual formats (PDF, PNG, etc.) 26 | - LaTeX integration 27 | - fine-grained control over all aspects of presentation 28 | - animation, etc. 29 | 30 | ### Matplotlib\'s Split Personality 31 | 32 | Matplotlib is unusual in that it offers two different interfaces to 33 | plotting. 34 | 35 | One is a simple MATLAB-style API (Application Programming Interface) 36 | that was written to help MATLAB refugees find a ready home. 37 | 38 | The other is a more \"Pythonic\" object-oriented API. 39 | 40 | For reasons described below, we recommend that you use the second API. 41 | 42 | But first, let\'s discuss the difference. 43 | 44 | ## The APIs 45 | 46 | ### The MATLAB-style API 47 | 48 | Here\'s the kind of easy example you might find in introductory 49 | treatments 50 | 51 | ```{code-cell} ipython3 52 | import matplotlib.pyplot as plt 53 | %matplotlib inline 54 | import numpy as np 55 | 56 | x = np.linspace(0, 10, 200) 57 | y = np.sin(x) 58 | 59 | plt.plot(x, y, 'b-', linewidth=2) 60 | plt.show() 61 | ``` 62 | 63 | This is simple and convenient, but also somewhat limited and 64 | un-Pythonic. 65 | 66 | For example, in the function calls, a lot of objects get created and 67 | passed around without making themselves known to the programmer. 68 | 69 | Python programmers tend to prefer a more explicit style of programming 70 | (run `import this` in a code block and look at the second line). 71 | 72 | This leads us to the alternative, object-oriented Matplotlib API. 73 | 74 | ### The Object-Oriented API 75 | 76 | Here\'s the code corresponding to the preceding figure using the 77 | object-oriented API 78 | 79 | ```{code-cell} ipython3 80 | fig, ax = plt.subplots() 81 | ax.plot(x, y, 'b-', linewidth=2) 82 | plt.show() 83 | ``` 84 | 85 | Here the call `fig, ax = plt.subplots()` returns a pair, where 86 | 87 | - `fig` is a `Figure` instance---like a blank canvas. 88 | - `ax` is an `AxesSubplot` instance---think of a frame for plotting 89 | in. 90 | 91 | The `plot()` function is actually a method of `ax`. 92 | 93 | While there\'s a bit more typing, the more explicit use of objects gives 94 | us better control. 95 | 96 | This will become more clear as we go along. 97 | 98 | ### Tweaks 99 | 100 | Here we\'ve changed the line to red and added a legend 101 | 102 | ```{code-cell} ipython3 103 | fig, ax = plt.subplots() 104 | ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.6) 105 | ax.legend() 106 | plt.show() 107 | ``` 108 | 109 | We\'ve also used `alpha` to make the line slightly transparent---which 110 | makes it look smoother. 111 | 112 | The location of the legend can be changed by replacing `ax.legend()` 113 | with `ax.legend(loc='upper center')`. 114 | 115 | ```{code-cell} ipython3 116 | fig, ax = plt.subplots() 117 | ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.6) 118 | ax.legend(loc='upper center') 119 | plt.show() 120 | ``` 121 | 122 | If everything is properly configured, then adding LaTeX is trivial 123 | 124 | ```{code-cell} ipython3 125 | fig, ax = plt.subplots() 126 | ax.plot(x, y, 'r-', linewidth=2, label='$y=\sin(x)$', alpha=0.6) 127 | ax.legend(loc='upper center') 128 | plt.show() 129 | ``` 130 | 131 | Controlling the ticks, adding titles and so on is also straightforward 132 | 133 | ```{code-cell} ipython3 134 | fig, ax = plt.subplots() 135 | ax.plot(x, y, 'r-', linewidth=2, label='$y=\sin(x)$', alpha=0.6) 136 | ax.legend(loc='upper center') 137 | ax.set_yticks([-1, 0, 1]) 138 | ax.set_title('Test plot') 139 | plt.show() 140 | ``` 141 | 142 | ## More Features 143 | 144 | Matplotlib has a huge array of functions and features, which you can 145 | discover over time as you have need for them. 146 | 147 | We mention just a few. 148 | 149 | ### Multiple Plots on One Axis 150 | 151 | It\'s straightforward to generate multiple plots on the same axes. 152 | 153 | Here\'s an example that randomly generates three normal densities and 154 | adds a label with their mean 155 | 156 | ```{code-cell} ipython3 157 | from scipy.stats import norm 158 | from random import uniform 159 | 160 | fig, ax = plt.subplots() 161 | x = np.linspace(-4, 4, 150) 162 | for i in range(3): 163 | m, s = uniform(-1, 1), uniform(1, 2) 164 | y = norm.pdf(x, loc=m, scale=s) 165 | current_label = f'$\mu = {m:.2}$' 166 | ax.plot(x, y, linewidth=2, alpha=0.6, label=current_label) 167 | ax.legend() 168 | plt.show() 169 | ``` 170 | 171 | ### Multiple Subplots 172 | 173 | Sometimes we want multiple subplots in one figure. 174 | 175 | Here\'s an example that generates 6 histograms 176 | 177 | ```{code-cell} ipython3 178 | num_rows, num_cols = 3, 2 179 | fig, axes = plt.subplots(num_rows, num_cols) 180 | for i in range(num_rows): 181 | for j in range(num_cols): 182 | m, s = uniform(-1, 1), uniform(1, 2) 183 | x = norm.rvs(loc=m, scale=s, size=100) 184 | axes[i, j].hist(x, alpha=0.6, bins=20) 185 | t = f'$\mu = {m:.2}, \quad \sigma = {s:.2}$' 186 | axes[i, j].set(title=t, xticks=[-4, 0, 4], yticks=[]) 187 | 188 | plt.tight_layout() 189 | plt.show() 190 | ``` 191 | 192 | ### 3D Plots 193 | 194 | Matplotlib does a nice job of 3D plots --- here is one example 195 | 196 | ```{code-cell} ipython3 197 | from mpl_toolkits.mplot3d.axes3d import Axes3D 198 | from matplotlib import cm 199 | 200 | 201 | def f(x, y): 202 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 203 | 204 | xgrid = np.linspace(-3, 3, 50) 205 | ygrid = xgrid 206 | x, y = np.meshgrid(xgrid, ygrid) 207 | 208 | fig = plt.figure() 209 | ax = fig.add_subplot(111, projection='3d') 210 | ax.plot_surface(x, 211 | y, 212 | f(x, y), 213 | rstride=2, cstride=2, 214 | cmap=cm.jet, 215 | alpha=0.7, 216 | linewidth=0.25) 217 | ax.set_zlim(-0.5, 1.0) 218 | plt.show() 219 | ``` 220 | 221 | ### A Customizing Function 222 | 223 | Perhaps you will find a set of customizations that you regularly use. 224 | 225 | Suppose we usually prefer our axes to go through the origin, and to have 226 | a grid. 227 | 228 | Here\'s a nice example from [Matthew Doty](https://github.com/xcthulhu) 229 | of how the object-oriented API can be used to build a custom `subplots` 230 | function that implements these changes. 231 | 232 | Read carefully through the code and see if you can follow what\'s going 233 | on 234 | 235 | ```{code-cell} ipython3 236 | def subplots(): 237 | "Custom subplots with axes through the origin" 238 | fig, ax = plt.subplots() 239 | 240 | # Set the axes through the origin 241 | for spine in ['left', 'bottom']: 242 | ax.spines[spine].set_position('zero') 243 | for spine in ['right', 'top']: 244 | ax.spines[spine].set_color('none') 245 | 246 | ax.grid() 247 | return fig, ax 248 | 249 | 250 | fig, ax = subplots() # Call the local version, not plt.subplots() 251 | x = np.linspace(-2, 10, 200) 252 | y = np.sin(x) 253 | ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.6) 254 | ax.legend(loc='lower right') 255 | plt.show() 256 | ``` 257 | 258 | The custom `subplots` function 259 | 260 | 1. calls the standard `plt.subplots` function internally to generate 261 | the `fig, ax` pair, 262 | 2. makes the desired customizations to `ax`, and 263 | 3. passes the `fig, ax` pair back to the calling code. 264 | 265 | ## Further Reading 266 | 267 | - The [Matplotlib gallery](http://matplotlib.org/gallery.html) 268 | provides many examples. 269 | - A nice [Matplotlib 270 | tutorial](http://scipy-lectures.org/intro/matplotlib/index.html) by 271 | Nicolas Rougier, Mike Muller and Gael Varoquaux. 272 | - [mpltools](http://tonysyu.github.io/mpltools/index.html) allows easy 273 | switching between plot styles. 274 | - [Seaborn](https://github.com/mwaskom/seaborn) facilitates common 275 | statistics plots in Matplotlib. 276 | 277 | ## Exercises 278 | 279 | ### Exercise 1 280 | 281 | Plot the function 282 | 283 | $$ 284 | f(x) = \cos(\pi \theta x) \exp(-x) 285 | $$ 286 | 287 | over the interval $[0, 5]$ for each $\theta$ in `np.linspace(0, 2, 10)`. 288 | 289 | Place all the curves in the same figure. 290 | 291 | The output should look like this 292 | 293 | ```{glue:} matplotlib_ex1 294 | ``` 295 | 296 | ## Solutions 297 | 298 | ### Exercise 1 299 | 300 | Here\'s one solution 301 | 302 | ```{code-cell} ipython3 303 | def f(x, θ): 304 | return np.cos(np.pi * θ * x ) * np.exp(- x) 305 | 306 | θ_vals = np.linspace(0, 2, 10) 307 | x = np.linspace(0, 5, 200) 308 | fig, ax = plt.subplots() 309 | 310 | for θ in θ_vals: 311 | ax.plot(x, f(x, θ)) 312 | 313 | plt.show() 314 | ``` 315 | 316 | ```{code-cell} ipython3 317 | :tags: [remove-cell] 318 | 319 | from myst_nb import glue 320 | glue("matplotlib_ex1", fig, display=False) 321 | ``` 322 | -------------------------------------------------------------------------------- /book/docs/need_for_speed.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (speed)= 13 | 14 | # Python for Scientific Computing 15 | 16 | In addition to what\'s in Anaconda, this lecture will need the following 17 | libraries: 18 | 19 | ```{code-cell} ipython3 20 | :tags: [remove-output] 21 | 22 | !pip install --upgrade quantecon 23 | ``` 24 | 25 | ## Overview 26 | 27 | Python is extremely popular for scientific computing, due to such 28 | factors as 29 | 30 | - the accessible and flexible nature of the language itself, 31 | - the huge range of high quality scientific libraries now available, 32 | - the fact that the language and libraries are open source, 33 | - the popular Anaconda Python distribution, which simplifies 34 | installation and management of those libraries, and 35 | - the recent surge of interest in using Python for machine learning 36 | and artificial intelligence. 37 | 38 | In this lecture we give a short overview of scientific computing in 39 | Python, addressing the following questions: 40 | 41 | - What are the relative strengths and weaknesses of Python for these 42 | tasks? 43 | - What are the main elements of the scientific Python ecosystem? 44 | - How is the situation changing over time? 45 | 46 | ## Scientific Libraries 47 | 48 | Let\'s briefly review Python\'s scientific libraries, starting with why 49 | we need them. 50 | 51 | ### The Role of Scientific Libraries 52 | 53 | One obvious reason we use scientific libraries is because they implement 54 | routines we want to use. 55 | 56 | For example, it\'s almost always better to use an existing routine for 57 | root finding than to write a new one from scratch. 58 | 59 | (For standard algorithms, efficiency is maximized if the community can 60 | coordinate on a common set of implementations, written by experts and 61 | tuned by users to be as fast and robust as possible.) 62 | 63 | But this is not the only reason that we use Python\'s scientific 64 | libraries. 65 | 66 | Another is that pure Python, while flexible and elegant, is not fast. 67 | 68 | So we need libraries that are designed to accelerate execution of Python 69 | code. 70 | 71 | As we\'ll see below, there are now Python libraries that can do this 72 | extremely well. 73 | 74 | ### Python\'s Scientific Ecosystem 75 | 76 | In terms of popularity, the big four in the world of scientific Python 77 | libraries are 78 | 79 | - NumPy 80 | - SciPy 81 | - Matplotlib 82 | - Pandas 83 | 84 | For us, there\'s another (relatively new) library that will also be 85 | essential for numerical computing: 86 | 87 | - Numba 88 | 89 | Over the next few lectures we\'ll see how to use these libraries. 90 | 91 | But first, let\'s quickly review how they fit together. 92 | 93 | - NumPy forms the foundations by providing a basic array data type 94 | (think of vectors and matrices) and functions for acting on these 95 | arrays (e.g., matrix multiplication). 96 | - SciPy builds on NumPy by adding the kinds of numerical methods that 97 | are routinely used in science (interpolation, optimization, root 98 | finding, etc.). 99 | - Matplotlib is used to generate figures, with a focus on plotting 100 | data stored in NumPy arrays. 101 | - Pandas provides types and functions for empirical work (e.g., 102 | manipulating data). 103 | - Numba accelerates execution via JIT compilation --- we\'ll learn 104 | about this soon. 105 | 106 | ## The Need for Speed 107 | 108 | Now let\'s discuss execution speed. 109 | 110 | Higher-level languages like Python are optimized for humans. 111 | 112 | This means that the programmer can leave many details to the runtime 113 | environment 114 | 115 | - specifying variable types 116 | - memory allocation/deallocation, etc. 117 | 118 | The upside is that, compared to low-level languages, Python is typically 119 | faster to write, less error-prone and easier to debug. 120 | 121 | The downside is that Python is harder to optimize --- that is, turn 122 | into fast machine code --- than languages like C or Fortran. 123 | 124 | Indeed, the standard implementation of Python (called CPython) cannot 125 | match the speed of compiled languages such as C or Fortran. 126 | 127 | Does that mean that we should just switch to C or Fortran for 128 | everything? 129 | 130 | The answer is: No, no and one hundred times no! 131 | 132 | (This is what you should say to the senior professor insisting that the 133 | model needs to be rewritten in Fortran or C++.) 134 | 135 | There are two reasons why: 136 | 137 | First, for any given program, relatively few lines are ever going to be 138 | time-critical. 139 | 140 | Hence it is far more efficient to write most of our code in a high 141 | productivity language like Python. 142 | 143 | Second, even for those lines of code that *are* time-critical, we can 144 | now achieve the same speed as C or Fortran using Python\'s scientific 145 | libraries. 146 | 147 | ### Where are the Bottlenecks? 148 | 149 | Before we learn how to do this, let\'s try to understand why plain 150 | vanilla Python is slower than C or Fortran. 151 | 152 | This will, in turn, help us figure out how to speed things up. 153 | 154 | #### Dynamic Typing 155 | 156 | Consider this Python operation 157 | 158 | ```{code-cell} ipython3 159 | a, b = 10, 10 160 | a + b 161 | ``` 162 | 163 | Even for this simple operation, the Python interpreter has a fair bit of 164 | work to do. 165 | 166 | For example, in the statement `a + b`, the interpreter has to know which 167 | operation to invoke. 168 | 169 | If `a` and `b` are strings, then `a + b` requires string concatenation 170 | 171 | ```{code-cell} ipython3 172 | a, b = 'foo', 'bar' 173 | a + b 174 | ``` 175 | 176 | If `a` and `b` are lists, then `a + b` requires list concatenation 177 | 178 | ```{code-cell} ipython3 179 | a, b = ['foo'], ['bar'] 180 | a + b 181 | ``` 182 | 183 | (We say that the operator `+` is *overloaded* --- its action depends 184 | on the type of the objects on which it acts) 185 | 186 | As a result, Python must check the type of the objects and then call the 187 | correct operation. 188 | 189 | This involves substantial overheads. 190 | 191 | #### Static Types 192 | 193 | Compiled languages avoid these overheads with explicit, static types. 194 | 195 | For example, consider the following C code, which sums the integers from 196 | 1 to 10 197 | 198 | ```c 199 | #include 200 | 201 | int main(void) { 202 | int i; 203 | int sum = 0; 204 | for (i = 1; i <= 10; i++) { 205 | sum = sum + i; 206 | } 207 | printf("sum = %d\n", sum); 208 | return 0; 209 | } 210 | ``` 211 | 212 | The variables `i` and `sum` are explicitly declared to be integers. 213 | 214 | Hence, the meaning of addition here is completely unambiguous. 215 | 216 | ### Data Access 217 | 218 | Another drag on speed for high-level languages is data access. 219 | 220 | To illustrate, let\'s consider the problem of summing some data --- 221 | say, a collection of integers. 222 | 223 | #### Summing with Compiled Code 224 | 225 | In C or Fortran, these integers would typically be stored in an array, 226 | which is a simple data structure for storing homogeneous data. 227 | 228 | Such an array is stored in a single contiguous block of memory 229 | 230 | - In modern computers, memory addresses are allocated to each byte 231 | (one byte = 8 bits). 232 | - For example, a 64 bit integer is stored in 8 bytes of memory. 233 | - An array of $n$ such integers occupies $8n$ **consecutive** memory 234 | slots. 235 | 236 | Moreover, the compiler is made aware of the data type by the programmer. 237 | 238 | - In this case 64 bit integers 239 | 240 | Hence, each successive data point can be accessed by shifting forward in 241 | memory space by a known and fixed amount. 242 | 243 | - In this case 8 bytes 244 | 245 | #### Summing in Pure Python 246 | 247 | Python tries to replicate these ideas to some degree. 248 | 249 | For example, in the standard Python implementation (CPython), list 250 | elements are placed in memory locations that are in a sense contiguous. 251 | 252 | However, these list elements are more like pointers to data rather than 253 | actual data. 254 | 255 | Hence, there is still overhead involved in accessing the data values 256 | themselves. 257 | 258 | This is a considerable drag on speed. 259 | 260 | In fact, it\'s generally true that memory traffic is a major culprit 261 | when it comes to slow execution. 262 | 263 | Let\'s look at some ways around these problems. 264 | 265 | ## Vectorization 266 | 267 | There is a clever method called **vectorization** that can be used to 268 | speed up high level languages in numerical applications. 269 | 270 | The key idea is to send array processing operations in batch to 271 | pre-compiled and efficient native machine code. 272 | 273 | The machine code itself is typically compiled from carefully optimized C 274 | or Fortran. 275 | 276 | For example, when working in a high level language, the operation of 277 | inverting a large matrix can be subcontracted to efficient machine code 278 | that is pre-compiled for this purpose and supplied to users as part of a 279 | package. 280 | 281 | This clever idea dates back to MATLAB, which uses vectorization 282 | extensively. 283 | 284 | Vectorization can greatly accelerate many numerical computations (but 285 | not all, as we shall see). 286 | 287 | Let\'s see how vectorization works in Python, using NumPy. 288 | 289 | ### Operations on Arrays 290 | 291 | First, let\'s run some imports 292 | 293 | ```{code-cell} ipython3 294 | import random 295 | import numpy as np 296 | import quantecon as qe 297 | ``` 298 | 299 | Next let\'s try some non-vectorized code, which uses a native Python 300 | loop to generate, square and then sum a large number of random 301 | variables: 302 | 303 | ```{code-cell} ipython3 304 | n = 1_000_000 305 | ``` 306 | 307 | ```{code-cell} ipython3 308 | %%time 309 | 310 | y = 0 # Will accumulate and store sum 311 | for i in range(n): 312 | x = random.uniform(0, 1) 313 | y += x**2 314 | ``` 315 | 316 | The following vectorized code achieves the same thing. 317 | 318 | ```{code-cell} ipython3 319 | %%time 320 | 321 | x = np.random.uniform(0, 1, n) 322 | y = np.sum(x**2) 323 | ``` 324 | 325 | As you can see, the second code block runs much faster. Why? 326 | 327 | The second code block breaks the loop down into three basic operations 328 | 329 | 1. draw `n` uniforms 330 | 2. square them 331 | 3. sum them 332 | 333 | These are sent as batch operators to optimized machine code. 334 | 335 | Apart from minor overheads associated with sending data back and forth, 336 | the result is C or Fortran-like speed. 337 | 338 | When we run batch operations on arrays like this, we say that the code 339 | is *vectorized*. 340 | 341 | Vectorized code is typically fast and efficient. 342 | 343 | It is also surprisingly flexible, in the sense that many operations can 344 | be vectorized. 345 | 346 | The next section illustrates this point. 347 | 348 | (ufuncs)= 349 | 350 | ### Universal Functions 351 | 352 | Many functions provided by NumPy are so-called *universal functions* 353 | --- also called 354 | [ufuncs](https://docs.scipy.org/doc/numpy/reference/ufuncs.html). 355 | 356 | This means that they 357 | 358 | - map scalars into scalars, as expected 359 | - map arrays into arrays, acting element-wise 360 | 361 | For example, `np.cos` is a ufunc: 362 | 363 | ```{code-cell} ipython3 364 | np.cos(1.0) 365 | ``` 366 | 367 | ```{code-cell} ipython3 368 | np.cos(np.linspace(0, 1, 3)) 369 | ``` 370 | 371 | By exploiting ufuncs, many operations can be vectorized. 372 | 373 | For example, consider the problem of maximizing a function $f$ of two 374 | variables $(x,y)$ over the square $[-a, a] \times [-a, a]$. 375 | 376 | For $f$ and $a$ let\'s choose 377 | 378 | $$ 379 | f(x,y) = \frac{\cos(x^2 + y^2)}{1 + x^2 + y^2} 380 | \quad \text{and} \quad 381 | a = 3 382 | $$ 383 | 384 | Here\'s a plot of $f$ 385 | 386 | ```{code-cell} ipython3 387 | import matplotlib.pyplot as plt 388 | %matplotlib inline 389 | from mpl_toolkits.mplot3d.axes3d import Axes3D 390 | from matplotlib import cm 391 | 392 | def f(x, y): 393 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 394 | 395 | xgrid = np.linspace(-3, 3, 50) 396 | ygrid = xgrid 397 | x, y = np.meshgrid(xgrid, ygrid) 398 | 399 | fig = plt.figure() 400 | ax = fig.add_subplot(111, projection='3d') 401 | ax.plot_surface(x, 402 | y, 403 | f(x, y), 404 | rstride=2, cstride=2, 405 | cmap=cm.jet, 406 | alpha=0.7, 407 | linewidth=0.25) 408 | ax.set_zlim(-0.5, 1.0) 409 | ax.set_xlabel('$x$', fontsize=14) 410 | ax.set_ylabel('$y$', fontsize=14) 411 | plt.show() 412 | ``` 413 | 414 | To maximize it, we\'re going to use a naive grid search: 415 | 416 | 1. Evaluate $f$ for all $(x,y)$ in a grid on the square. 417 | 2. Return the maximum of observed values. 418 | 419 | The grid will be 420 | 421 | ```{code-cell} ipython3 422 | grid = np.linspace(-3, 3, 1000) 423 | ``` 424 | 425 | Here\'s a non-vectorized version that uses Python loops. 426 | 427 | ```{code-cell} ipython3 428 | %%time 429 | 430 | m = -np.inf 431 | 432 | for x in grid: 433 | for y in grid: 434 | z = f(x, y) 435 | if z > m: 436 | m = z 437 | ``` 438 | 439 | And here\'s a vectorized version 440 | 441 | ```{code-cell} ipython3 442 | %%time 443 | 444 | x, y = np.meshgrid(grid, grid) 445 | np.max(f(x, y)) 446 | ``` 447 | 448 | In the vectorized version, all the looping takes place in compiled code. 449 | 450 | As you can see, the second version is **much** faster. 451 | 452 | (We\'ll make it even faster again later on, using more scientific 453 | programming tricks.) 454 | 455 | (numba-p_c_vectorization)= 456 | 457 | ## Beyond Vectorization 458 | 459 | At its best, vectorization yields fast, simple code. 460 | 461 | However, it\'s not without disadvantages. 462 | 463 | One issue is that it can be highly memory-intensive. 464 | 465 | For example, the vectorized maximization routine above is far more 466 | memory intensive than the non-vectorized version that preceded it. 467 | 468 | This is because vectorization tends to create many intermediate arrays 469 | before producing the final calculation. 470 | 471 | Another issue is that not all algorithms can be vectorized. 472 | 473 | In these kinds of settings, we need to go back to loops. 474 | 475 | Fortunately, there are alternative ways to speed up Python loops that 476 | work in almost any setting. 477 | 478 | For example, in the last few years, a new Python library called [Numba] 479 | (http://numba.pydata.org/) has appeared that solves 480 | the main problems with vectorization listed above. 481 | 482 | It does so through something called **just in time (JIT) compilation**, 483 | which can generate extremely fast and efficient code. 484 | 485 | We\'ll learn how to use Numba [soon](numba). 486 | -------------------------------------------------------------------------------- /book/docs/oop_intro.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (oop_intro)= 13 | 14 | # OOP I: Introduction to Object Oriented Programming 15 | 16 | ## Overview 17 | 18 | [OOP](https://en.wikipedia.org/wiki/Object-oriented_programming) is one 19 | of the major paradigms in programming. 20 | 21 | The traditional programming paradigm (think Fortran, C, MATLAB, etc.) is 22 | called *procedural*. 23 | 24 | It works as follows 25 | 26 | - The program has a state corresponding to the values of its 27 | variables. 28 | - Functions are called to act on these data. 29 | - Data are passed back and forth via function calls. 30 | 31 | In contrast, in the OOP paradigm 32 | 33 | - data and functions are \"bundled together\" into \"objects\" 34 | 35 | (Functions in this context are referred to as **methods**) 36 | 37 | ### Python and OOP 38 | 39 | Python is a pragmatic language that blends object-oriented and 40 | procedural styles, rather than taking a purist approach. 41 | 42 | However, at a foundational level, Python *is* object-oriented. 43 | 44 | In particular, in Python, *everything is an object*. 45 | 46 | In this lecture, we explain what that statement means and why it 47 | matters. 48 | 49 | ## Objects 50 | 51 | In Python, an *object* is a collection of data and instructions held in 52 | computer memory that consists of 53 | 54 | 1. a type 55 | 2. a unique identity 56 | 3. data (i.e., content) 57 | 4. methods 58 | 59 | These concepts are defined and discussed sequentially below. 60 | 61 | ### Type 62 | 63 | Python provides for different types of objects, to accommodate different 64 | categories of data. 65 | 66 | For example 67 | 68 | ```{code-cell} ipython3 69 | s = 'This is a string' 70 | type(s) 71 | ``` 72 | 73 | ```{code-cell} ipython3 74 | x = 42 # Now let's create an integer 75 | type(x) 76 | ``` 77 | 78 | The type of an object matters for many expressions. 79 | 80 | For example, the addition operator between two strings means 81 | concatenation 82 | 83 | ```{code-cell} ipython3 84 | '300' + 'cc' 85 | ``` 86 | 87 | On the other hand, between two numbers it means ordinary addition 88 | 89 | ```{code-cell} ipython3 90 | 300 + 400 91 | ``` 92 | 93 | Consider the following expression 94 | 95 | ```{code-cell} ipython3 96 | :tags: [raises-exception] 97 | 98 | '300' + 400 99 | ``` 100 | 101 | Here we are mixing types, and it\'s unclear to Python whether the user 102 | wants to 103 | 104 | - convert `'300'` to an integer and then add it to `400`, or 105 | - convert `400` to string and then concatenate it with `'300'` 106 | 107 | Some languages might try to guess but Python is *strongly typed* 108 | 109 | - Type is important, and implicit type conversion is rare. 110 | - Python will respond instead by raising a `TypeError`. 111 | 112 | To avoid the error, you need to clarify by changing the relevant type. 113 | 114 | For example, 115 | 116 | ```{code-cell} ipython3 117 | int('300') + 400 # To add as numbers, change the string to an integer 118 | ``` 119 | 120 | ### Identity 121 | 122 | In Python, each object has a unique identifier, which helps Python (and 123 | us) keep track of the object. 124 | 125 | The identity of an object can be obtained via the `id()` function 126 | 127 | ```{code-cell} ipython3 128 | y = 2.5 129 | z = 2.5 130 | id(y) 131 | ``` 132 | 133 | ```{code-cell} ipython3 134 | id(z) 135 | ``` 136 | 137 | In this example, `y` and `z` happen to have the same value (i.e., 138 | `2.5`), but they are not the same object. 139 | 140 | The identity of an object is in fact just the address of the object in 141 | memory. 142 | 143 | ### Object Content: Data and Attributes 144 | 145 | If we set `x = 42` then we create an object of type `int` that contains 146 | the data `42`. 147 | 148 | In fact, it contains more, as the following example shows 149 | 150 | ```{code-cell} ipython3 151 | x = 42 152 | x 153 | ``` 154 | 155 | ```{code-cell} ipython3 156 | x.imag 157 | ``` 158 | 159 | ```{code-cell} ipython3 160 | x.__class__ 161 | ``` 162 | 163 | When Python creates this integer object, it stores with it various 164 | auxiliary information, such as the imaginary part, and the type. 165 | 166 | Any name following a dot is called an *attribute* of the object to the 167 | left of the dot. 168 | 169 | - e.g.,`imag` and `__class__` are attributes of `x`. 170 | 171 | We see from this example that objects have attributes that contain 172 | auxiliary information. 173 | 174 | They also have attributes that act like functions, called *methods*. 175 | 176 | These attributes are important, so let\'s discuss them in-depth. 177 | 178 | ### Methods 179 | 180 | Methods are *functions that are bundled with objects*. 181 | 182 | Formally, methods are attributes of objects that are callable (i.e., can 183 | be called as functions) 184 | 185 | ```{code-cell} ipython3 186 | x = ['foo', 'bar'] 187 | callable(x.append) 188 | ``` 189 | 190 | ```{code-cell} ipython3 191 | callable(x.__doc__) 192 | ``` 193 | 194 | Methods typically act on the data contained in the object they belong 195 | to, or combine that data with other data 196 | 197 | ```{code-cell} ipython3 198 | x = ['a', 'b'] 199 | x.append('c') 200 | s = 'This is a string' 201 | s.upper() 202 | ``` 203 | 204 | ```{code-cell} ipython3 205 | s.lower() 206 | ``` 207 | 208 | ```{code-cell} ipython3 209 | s.replace('This', 'That') 210 | ``` 211 | 212 | A great deal of Python functionality is organized around method calls. 213 | 214 | For example, consider the following piece of code 215 | 216 | ```{code-cell} ipython3 217 | x = ['a', 'b'] 218 | x[0] = 'aa' # Item assignment using square bracket notation 219 | x 220 | ``` 221 | 222 | It doesn\'t look like there are any methods used here, but in fact the 223 | square bracket assignment notation is just a convenient interface to a 224 | method call. 225 | 226 | What actually happens is that Python calls the `__setitem__` method, as 227 | follows 228 | 229 | ```{code-cell} ipython3 230 | x = ['a', 'b'] 231 | x.__setitem__(0, 'aa') # Equivalent to x[0] = 'aa' 232 | x 233 | ``` 234 | 235 | (If you wanted to you could modify the `__setitem__` method, so that 236 | square bracket assignment does something totally different) 237 | 238 | ## Summary 239 | 240 | In Python, *everything in memory is treated as an object*. 241 | 242 | This includes not just lists, strings, etc., but also less obvious 243 | things, such as 244 | 245 | - functions (once they have been read into memory) 246 | - modules (ditto) 247 | - files opened for reading or writing 248 | - integers, etc. 249 | 250 | Consider, for example, functions. 251 | 252 | When Python reads a function definition, it creates a **function 253 | object** and stores it in memory. 254 | 255 | The following code illustrates 256 | 257 | ```{code-cell} ipython3 258 | def f(x): return x**2 259 | f 260 | ``` 261 | 262 | ```{code-cell} ipython3 263 | type(f) 264 | ``` 265 | 266 | ```{code-cell} ipython3 267 | id(f) 268 | ``` 269 | 270 | ```{code-cell} ipython3 271 | f.__name__ 272 | ``` 273 | 274 | We can see that `f` has type, identity, attributes and so on---just 275 | like any other object. 276 | 277 | It also has methods. 278 | 279 | One example is the `__call__` method, which just evaluates the function 280 | 281 | ```{code-cell} ipython3 282 | f.__call__(3) 283 | ``` 284 | 285 | Another is the `__dir__` method, which returns a list of attributes. 286 | 287 | Modules loaded into memory are also treated as objects 288 | 289 | ```{code-cell} ipython3 290 | import math 291 | 292 | id(math) 293 | ``` 294 | 295 | This uniform treatment of data in Python (everything is an object) helps 296 | keep the language simple and consistent. 297 | -------------------------------------------------------------------------------- /book/docs/parallelization.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (parallel)= 13 | 14 | # Parallelization 15 | 16 | In addition to what\'s in Anaconda, this lecture will need the following 17 | libraries: 18 | 19 | ```{code-cell} ipython3 20 | :tags: [remove-output] 21 | 22 | !pip install --upgrade quantecon 23 | ``` 24 | 25 | ## Overview 26 | 27 | The growth of CPU clock speed (i.e., the speed at which a single chain 28 | of logic can be run) has slowed dramatically in recent years. 29 | 30 | This is unlikely to change in the near future, due to inherent physical 31 | limitations on the construction of chips and circuit boards. 32 | 33 | Chip designers and computer programmers have responded to the slowdown 34 | by seeking a different path to fast execution: parallelization. 35 | 36 | Hardware makers have increased the number of cores (physical CPUs) 37 | embedded in each machine. 38 | 39 | For programmers, the challenge has been to exploit these multiple CPUs 40 | by running many processes in parallel (i.e., simultaneously). 41 | 42 | This is particularly important in scientific programming, which requires 43 | handling 44 | 45 | - large amounts of data and 46 | - CPU intensive simulations and other calculations. 47 | 48 | In this lecture we discuss parallelization for scientific computing, 49 | with a focus on 50 | 51 | 1. the best tools for parallelization in Python and 52 | 2. how these tools can be applied to quantitative economic problems. 53 | 54 | Let\'s start with some imports: 55 | 56 | ```{code-cell} ipython3 57 | import numpy as np 58 | import quantecon as qe 59 | import matplotlib.pyplot as plt 60 | 61 | %matplotlib inline 62 | ``` 63 | 64 | ## Types of Parallelization 65 | 66 | Large textbooks have been written on different approaches to 67 | parallelization but we will keep a tight focus on what\'s most useful to 68 | us. 69 | 70 | We will briefly review the two main kinds of parallelization commonly 71 | used in scientific computing and discuss their pros and cons. 72 | 73 | ### Multiprocessing 74 | 75 | Multiprocessing means concurrent execution of multiple processes using 76 | more than one processor. 77 | 78 | In this context, a **process** is a chain of instructions (i.e., a 79 | program). 80 | 81 | Multiprocessing can be carried out on one machine with multiple CPUs or 82 | on a collection of machines connected by a network. 83 | 84 | In the latter case, the collection of machines is usually called a 85 | **cluster**. 86 | 87 | With multiprocessing, each process has its own memory space, although 88 | the physical memory chip might be shared. 89 | 90 | ### Multithreading 91 | 92 | Multithreading is similar to multiprocessing, except that, during 93 | execution, the threads all share the same memory space. 94 | 95 | Native Python struggles to implement multithreading due to some [legacy 96 | design features](https://wiki.python.org/moin/GlobalInterpreterLock). 97 | 98 | But this is not a restriction for scientific libraries like NumPy and 99 | Numba. 100 | 101 | Functions imported from these libraries and JIT-compiled code run in low 102 | level execution environments where Python\'s legacy restrictions don\'t 103 | apply. 104 | 105 | ### Advantages and Disadvantages 106 | 107 | Multithreading is more lightweight because most system and memory 108 | resources are shared by the threads. 109 | 110 | In addition, the fact that multiple threads all access a shared pool of 111 | memory is extremely convenient for numerical programming. 112 | 113 | On the other hand, multiprocessing is more flexible and can be 114 | distributed across clusters. 115 | 116 | For the great majority of what we do in these lectures, multithreading 117 | will suffice. 118 | 119 | ## Implicit Multithreading in NumPy 120 | 121 | Actually, you have already been using multithreading in your Python 122 | code, although you might not have realized it. 123 | 124 | (We are, as usual, assuming that you are running the latest version of 125 | Anaconda Python.) 126 | 127 | This is because NumPy cleverly implements multithreading in a lot of its 128 | compiled code. 129 | 130 | Let\'s look at some examples to see this in action. 131 | 132 | ### A Matrix Operation 133 | 134 | The next piece of code computes the eigenvalues of a large number of 135 | randomly generated matrices. 136 | 137 | It takes a few seconds to run. 138 | 139 | ```{code-cell} ipython3 140 | n = 20 141 | m = 1000 142 | for i in range(n): 143 | X = np.random.randn(m, m) 144 | λ = np.linalg.eigvals(X) 145 | ``` 146 | 147 | Now, let\'s look at the output of the `htop` system monitor 148 | on our machine while this code is running: 149 | 150 | ```{figure} /_static/lecture_specific/parallelization/htop_parallel_npmat.png 151 | :scale: 65% 152 | ``` 153 | 154 | We can see that 4 of the 8 CPUs are running at full speed. 155 | 156 | This is because NumPy\'s `eigvals` routine neatly splits up the tasks 157 | and distributes them to different threads. 158 | 159 | ### A Multithreaded Ufunc 160 | 161 | Over the last few years, NumPy has managed to push this kind of 162 | multithreading out to more and more operations. 163 | 164 | For example, let\'s return to a maximization problem 165 | {ref}`discussed previously `: 166 | 167 | ```{code-cell} ipython3 168 | def f(x, y): 169 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 170 | 171 | grid = np.linspace(-3, 3, 5000) 172 | x, y = np.meshgrid(grid, grid) 173 | ``` 174 | 175 | ```{code-cell} ipython3 176 | %timeit np.max(f(x, y)) 177 | ``` 178 | 179 | If you have a system monitor such as `htop` (Linux/Mac) or 180 | `perfmon` (Windows), then try running this and then 181 | observing the load on your CPUs. 182 | 183 | (You will probably need to bump up the grid size to see large effects.) 184 | 185 | At least on our machine, the output shows that the operation is 186 | successfully distributed across multiple threads. 187 | 188 | This is one of the reasons why the vectorized code above is fast. 189 | 190 | ### A Comparison with Numba 191 | 192 | To get some basis for comparison for the last example, let\'s try the 193 | same thing with Numba. 194 | 195 | In fact there is an easy way to do this, since Numba can also be used to 196 | create custom {ref}`ufuncs ` with the 197 | [@vectorize](http://numba.pydata.org/numba-doc/dev/user/vectorize.html) 198 | decorator. 199 | 200 | ```{code-cell} ipython3 201 | from numba import vectorize 202 | 203 | @vectorize 204 | def f_vec(x, y): 205 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 206 | 207 | np.max(f_vec(x, y)) # Run once to compile 208 | ``` 209 | 210 | ```{code-cell} ipython3 211 | %timeit np.max(f_vec(x, y)) 212 | ``` 213 | 214 | At least on our machine, the difference in the speed between the Numba 215 | version and the vectorized NumPy version shown above is not large. 216 | 217 | But there\'s quite a bit going on here so let\'s try to break down what 218 | is happening. 219 | 220 | Both Numba and NumPy use efficient machine code that\'s specialized to 221 | these floating point operations. 222 | 223 | However, the code NumPy uses is, in some ways, less efficient. 224 | 225 | The reason is that, in NumPy, the operation 226 | `np.cos(x**2 + y**2) / (1 + x**2 + y**2)` generates several intermediate 227 | arrays. 228 | 229 | For example, a new array is created when `x**2` is calculated. 230 | 231 | The same is true when `y**2` is calculated, and then `x**2 + y**2` and 232 | so on. 233 | 234 | Numba avoids creating all these intermediate arrays by compiling one 235 | function that is specialized to the entire operation. 236 | 237 | But if this is true, then why isn\'t the Numba code faster? 238 | 239 | The reason is that NumPy makes up for its disadvantages with implicit 240 | multithreading, as we\'ve just discussed. 241 | 242 | ### Multithreading a Numba Ufunc 243 | 244 | Can we get both of these advantages at once? 245 | 246 | In other words, can we pair 247 | 248 | - the efficiency of Numba\'s highly specialized JIT compiled function 249 | and 250 | - the speed gains from parallelization obtained by NumPy\'s implicit 251 | multithreading? 252 | 253 | It turns out that we can, by adding some type information plus 254 | `target='parallel'`. 255 | 256 | ```{code-cell} ipython3 257 | @vectorize('float64(float64, float64)', target='parallel') 258 | def f_vec(x, y): 259 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 260 | 261 | np.max(f_vec(x, y)) # Run once to compile 262 | ``` 263 | 264 | ```{code-cell} ipython3 265 | %timeit np.max(f_vec(x, y)) 266 | ``` 267 | 268 | Now our code runs significantly faster than the NumPy version. 269 | 270 | ## Multithreaded Loops in Numba 271 | 272 | We just saw one approach to parallelization in Numba, using the 273 | `parallel` flag in `@vectorize`. 274 | 275 | This is neat but, it turns out, not well suited to many problems we 276 | consider. 277 | 278 | Fortunately, Numba provides another approach to multithreading that will 279 | work for us almost everywhere parallelization is possible. 280 | 281 | To illustrate, let\'s look first at a simple, single-threaded (i.e., 282 | non-parallelized) piece of code. 283 | 284 | The code simulates updating the wealth $w_t$ of a household via the rule 285 | 286 | $$ 287 | w_{t+1} = R_{t+1} s w_t + y_{t+1} 288 | $$ 289 | 290 | Here 291 | 292 | - $R$ is the gross rate of return on assets 293 | - $s$ is the savings rate of the household and 294 | - $y$ is labor income. 295 | 296 | We model both $R$ and $y$ as independent draws from a lognormal 297 | distribution. 298 | 299 | Here\'s the code: 300 | 301 | ```{code-cell} ipython3 302 | from numpy.random import randn 303 | from numba import njit 304 | 305 | @njit 306 | def h(w, r=0.1, s=0.3, v1=0.1, v2=1.0): 307 | """ 308 | Updates household wealth. 309 | """ 310 | 311 | # Draw shocks 312 | R = np.exp(v1 * randn()) * (1 + r) 313 | y = np.exp(v2 * randn()) 314 | 315 | # Update wealth 316 | w = R * s * w + y 317 | return w 318 | ``` 319 | 320 | Let\'s have a look at how wealth evolves under this rule. 321 | 322 | ```{code-cell} ipython3 323 | fig, ax = plt.subplots() 324 | 325 | T = 100 326 | w = np.empty(T) 327 | w[0] = 5 328 | for t in range(T-1): 329 | w[t+1] = h(w[t]) 330 | 331 | ax.plot(w) 332 | ax.set_xlabel('$t$', fontsize=12) 333 | ax.set_ylabel('$w_{t}$', fontsize=12) 334 | plt.show() 335 | ``` 336 | 337 | Now let\'s suppose that we have a large population of households and we 338 | want to know what median wealth will be. 339 | 340 | This is not easy to solve with pencil and paper, so we will use 341 | simulation instead. 342 | 343 | In particular, we will simulate a large number of households and then 344 | calculate median wealth for this group. 345 | 346 | Suppose we are interested in the long-run average of this median over 347 | time. 348 | 349 | It turns out that, for the specification that we\'ve chosen above, we 350 | can calculate this by taking a one-period snapshot of what has happened 351 | to median wealth of the group at the end of a long simulation. 352 | 353 | Moreover, provided the simulation period is long enough, initial 354 | conditions don\'t matter. 355 | 356 | - This is due to something called ergodicity, which we will discuss [later on](https://python-intro.quantecon.org/finite_markov.html#Ergodicity). 357 | 358 | So, in summary, we are going to simulate 50,000 households by 359 | 360 | 1. arbitrarily setting initial wealth to 1 and 361 | 2. simulating forward in time for 1,000 periods. 362 | 363 | Then we\'ll calculate median wealth at the end period. 364 | 365 | Here\'s the code: 366 | 367 | ```{code-cell} ipython3 368 | @njit 369 | def compute_long_run_median(w0=1, T=1000, num_reps=50_000): 370 | 371 | obs = np.empty(num_reps) 372 | for i in range(num_reps): 373 | w = w0 374 | for t in range(T): 375 | w = h(w) 376 | obs[i] = w 377 | 378 | return np.median(obs) 379 | ``` 380 | 381 | Let\'s see how fast this runs: 382 | 383 | ```{code-cell} ipython3 384 | %%time 385 | compute_long_run_median() 386 | ``` 387 | 388 | To speed this up, we\'re going to parallelize it via multithreading. 389 | 390 | To do so, we add the `parallel=True` flag and change `range` to 391 | `prange`: 392 | 393 | ```{code-cell} ipython3 394 | from numba import prange 395 | 396 | @njit(parallel=True) 397 | def compute_long_run_median_parallel(w0=1, T=1000, num_reps=50_000): 398 | 399 | obs = np.empty(num_reps) 400 | for i in prange(num_reps): 401 | w = w0 402 | for t in range(T): 403 | w = h(w) 404 | obs[i] = w 405 | 406 | return np.median(obs) 407 | ``` 408 | 409 | Let\'s look at the timing: 410 | 411 | ```{code-cell} ipython3 412 | %%time 413 | compute_long_run_median_parallel() 414 | ``` 415 | 416 | The speed-up is significant. 417 | 418 | ### A Warning 419 | 420 | Parallelization works well in the outer loop of the last example because 421 | the individual tasks inside the loop are independent of each other. 422 | 423 | If this independence fails then parallelization is often problematic. 424 | 425 | For example, each step inside the inner loop depends on the last step, 426 | so independence fails, and this is why we use ordinary `range` instead 427 | of `prange`. 428 | 429 | When you see us using `prange` in later lectures, it is because the 430 | independence of tasks holds true. 431 | 432 | When you see us using ordinary `range` in a jitted function, it is 433 | either because the speed gain from parallelization is small or because 434 | independence fails. 435 | 436 | ## Exercises 437 | 438 | ### Exercise 1 439 | 440 | In {ref}`an earlier exercise `, we 441 | used Numba to accelerate an effort to compute the constant $\pi$ by 442 | Monte Carlo. 443 | 444 | Now try adding parallelization and see if you get further speed gains. 445 | 446 | You should not expect huge gains here because, while there are many 447 | independent tasks (draw point and test if in circle), each one has low 448 | execution time. 449 | 450 | Generally speaking, parallelization is less effective when the 451 | individual tasks to be parallelized are very small relative to total 452 | execution time. 453 | 454 | This is due to overheads associated with spreading all of these small 455 | tasks across multiple CPUs. 456 | 457 | Nevertheless, with suitable hardware, it is possible to get nontrivial 458 | speed gains in this exercise. 459 | 460 | For the size of the Monte Carlo simulation, use something substantial, 461 | such as `n = 100_000_000`. 462 | 463 | ## Solutions 464 | 465 | ### Exercise 1 466 | 467 | Here is one solution: 468 | 469 | ```{code-cell} ipython3 470 | from random import uniform 471 | 472 | @njit(parallel=True) 473 | def calculate_pi(n=1_000_000): 474 | count = 0 475 | for i in prange(n): 476 | u, v = uniform(0, 1), uniform(0, 1) 477 | d = np.sqrt((u - 0.5)**2 + (v - 0.5)**2) 478 | if d < 0.5: 479 | count += 1 480 | 481 | area_estimate = count / n 482 | return area_estimate * 4 # dividing by radius**2 483 | ``` 484 | 485 | Now let\'s see how fast it runs: 486 | 487 | ```{code-cell} ipython3 488 | %time calculate_pi() 489 | ``` 490 | 491 | ```{code-cell} ipython3 492 | %time calculate_pi() 493 | ``` 494 | 495 | By switching parallelization on and off (selecting `True` or `False` in 496 | the `@njit` annotation), we can test the speed gain that multithreading 497 | provides on top of JIT compilation. 498 | 499 | On our workstation, we find that parallelization increases execution 500 | speed by a factor of 2 or 3. 501 | 502 | (If you are executing locally, you will get different numbers, depending 503 | mainly on the number of CPUs on your machine.) 504 | -------------------------------------------------------------------------------- /book/docs/quant-econ.bib: -------------------------------------------------------------------------------- 1 | @book{lasota2013chaos, 2 | title={Chaos, fractals, and noise: stochastic aspects of dynamics}, 3 | author={Lasota, Andrzej and Mackey, Michael C}, 4 | volume={97}, 5 | year={2013}, 6 | publisher={Springer Science \& Business Media} 7 | } -------------------------------------------------------------------------------- /book/docs/scipy.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (sp)= 13 | 14 | # SciPy 15 | 16 | ## Overview 17 | 18 | [SciPy](http://www.scipy.org) builds on top of NumPy to provide common 19 | tools for scientific programming such as 20 | 21 | - [linear algebra](http://docs.scipy.org/doc/scipy/reference/linalg.html) 22 | - [numerical 23 | integration](http://docs.scipy.org/doc/scipy/reference/integrate.html) 24 | - [interpolation](http://docs.scipy.org/doc/scipy/reference/interpolate.html) 25 | - [optimization](http://docs.scipy.org/doc/scipy/reference/optimize.html) 26 | - [distributions and random number generation](http://docs.scipy.org/doc/scipy/reference/stats.html) 27 | - [signal processing](http://docs.scipy.org/doc/scipy/reference/signal.html) 28 | - etc., etc 29 | 30 | Like NumPy, SciPy is stable, mature and widely used. 31 | 32 | Many SciPy routines are thin wrappers around industry-standard Fortran 33 | libraries such as [LAPACK](https://en.wikipedia.org/wiki/LAPACK), 34 | [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms), 35 | etc. 36 | 37 | It\'s not really necessary to \"learn\" SciPy as a whole. 38 | 39 | A more common approach is to get some idea of what\'s in the library and 40 | then look up 41 | [documentation](http://docs.scipy.org/doc/scipy/reference/index.html) as 42 | required. 43 | 44 | In this lecture, we aim only to highlight some useful parts of the 45 | package. 46 | 47 | ## SciPy versus NumPy 48 | 49 | SciPy is a package that contains various tools that are built on top of 50 | NumPy, using its array data type and related functionality. 51 | 52 | In fact, when we import SciPy we also get NumPy, as can be seen from 53 | this excerpt the SciPy initialization file: 54 | 55 | ```{code-cell} ipython3 56 | # Import numpy symbols to scipy namespace 57 | from numpy import * 58 | from numpy.random import rand, randn 59 | from numpy.fft import fft, ifft 60 | from numpy.lib.scimath import * 61 | ``` 62 | 63 | However, it\'s more common and better practice to use NumPy 64 | functionality explicitly 65 | 66 | ```{code-cell} ipython3 67 | import numpy as np 68 | 69 | a = np.identity(3) 70 | ``` 71 | 72 | What is useful in SciPy is the functionality in its sub-packages 73 | 74 | - `scipy.optimize`, `scipy.integrate`, `scipy.stats`, etc. 75 | 76 | Let\'s explore some of the major sub-packages. 77 | 78 | ## Statistics 79 | 80 | The `scipy.stats` subpackage supplies 81 | 82 | - numerous random variable objects (densities, cumulative 83 | distributions, random sampling, etc.) 84 | - some estimation procedures 85 | - some statistical tests 86 | 87 | ### Random Variables and Distributions 88 | 89 | Recall that `numpy.random` provides functions for generating random 90 | variables 91 | 92 | ```{code-cell} ipython3 93 | np.random.beta(5, 5, size=3) 94 | ``` 95 | 96 | This generates a draw from the distribution with the density function 97 | below when `a, b = 5, 5` 98 | 99 | $$ 100 | f(x; a, b) = \frac{x^{(a - 1)} (1 - x)^{(b - 1)}} 101 | {\int_0^1 u^{(a - 1)} (1 - u)^{(b - 1)} du} 102 | \qquad (0 \leq x \leq 1) 103 | $$ 104 | 105 | Sometimes we need access to the density itself, or the cdf, the 106 | quantiles, etc. 107 | 108 | For this, we can use `scipy.stats`, which provides all of this 109 | functionality as well as random number generation in a single consistent 110 | interface. 111 | 112 | Here\'s an example of usage 113 | 114 | ```{code-cell} ipython3 115 | from scipy.stats import beta 116 | import matplotlib.pyplot as plt 117 | %matplotlib inline 118 | 119 | q = beta(5, 5) # Beta(a, b), with a = b = 5 120 | obs = q.rvs(2000) # 2000 observations 121 | grid = np.linspace(0.01, 0.99, 100) 122 | 123 | fig, ax = plt.subplots() 124 | ax.hist(obs, bins=40, density=True) 125 | ax.plot(grid, q.pdf(grid), 'k-', linewidth=2) 126 | plt.show() 127 | ``` 128 | 129 | The object `q` that represents the distribution has additional useful 130 | methods, including 131 | 132 | ```{code-cell} ipython3 133 | q.cdf(0.4) # Cumulative distribution function 134 | ``` 135 | 136 | ```{code-cell} ipython3 137 | q.ppf(0.8) # Quantile (inverse cdf) function 138 | ``` 139 | 140 | ```{code-cell} ipython3 141 | q.mean() 142 | ``` 143 | 144 | The general syntax for creating these objects that represent 145 | distributions (of type `rv_frozen`) is 146 | 147 | > `name = scipy.stats.distribution_name(shape_parameters, loc=c, scale=d)` 148 | 149 | Here `distribution_name` is one of the distribution names in 150 | [scipy.stats](http://docs.scipy.org/doc/scipy/reference/stats.html). 151 | 152 | The `loc` and `scale` parameters transform the original random variable 153 | $X$ into $Y = c + d X$. 154 | 155 | ### Alternative Syntax 156 | 157 | There is an alternative way of calling the methods described above. 158 | 159 | For example, the code that generates the figure above can be replaced by 160 | 161 | ```{code-cell} ipython3 162 | obs = beta.rvs(5, 5, size=2000) 163 | grid = np.linspace(0.01, 0.99, 100) 164 | 165 | fig, ax = plt.subplots() 166 | ax.hist(obs, bins=40, density=True) 167 | ax.plot(grid, beta.pdf(grid, 5, 5), 'k-', linewidth=2) 168 | plt.show() 169 | ``` 170 | 171 | ### Other Goodies in scipy.stats 172 | 173 | There are a variety of statistical functions in `scipy.stats`. 174 | 175 | For example, `scipy.stats.linregress` implements simple linear 176 | regression 177 | 178 | ```{code-cell} ipython3 179 | from scipy.stats import linregress 180 | 181 | x = np.random.randn(200) 182 | y = 2 * x + 0.1 * np.random.randn(200) 183 | gradient, intercept, r_value, p_value, std_err = linregress(x, y) 184 | gradient, intercept 185 | ``` 186 | 187 | To see the full list, consult the 188 | [documentation](https://docs.scipy.org/doc/scipy/reference/stats.html#statistical-functions-scipy-stats). 189 | 190 | ## Roots and Fixed Points 191 | 192 | A **root** or **zero** of a real function $f$ on $[a,b]$ is an 193 | $x \in [a, b]$ such that $f(x)=0$. 194 | 195 | For example, if we plot the function 196 | 197 | ```{math} 198 | --- 199 | label: root_f 200 | --- 201 | f(x) = \sin(4 (x - 1/4)) + x + x^{20} - 1 202 | ``` 203 | 204 | with $x \in [0,1]$ we get 205 | 206 | ```{code-cell} ipython3 207 | f = lambda x: np.sin(4 * (x - 1/4)) + x + x**20 - 1 208 | x = np.linspace(0, 1, 100) 209 | 210 | fig, ax = plt.subplots() 211 | ax.plot(x, f(x)) 212 | ax.axhline(ls='--', c='k', label='$f(x)$') 213 | ax.set_xlabel('$x$', fontsize=12) 214 | ax.set_ylabel('$f(x)$', fontsize=12) 215 | ax.legend(fontsize=12) 216 | plt.show() 217 | ``` 218 | 219 | The unique root is approximately 0.408. 220 | 221 | Let\'s consider some numerical techniques for finding roots. 222 | 223 | ### Bisection 224 | 225 | One of the most common algorithms for numerical root-finding is 226 | *bisection*. 227 | 228 | To understand the idea, recall the well-known game where 229 | 230 | - Player A thinks of a secret number between 1 and 100 231 | - Player B asks if it\'s less than 50 232 | 233 | > - If yes, B asks if it\'s less than 25 234 | > - If no, B asks if it\'s less than 75 235 | 236 | And so on. 237 | 238 | This is bisection. 239 | 240 | Here\'s a simplistic implementation of the algorithm in Python. 241 | 242 | It works for all sufficiently well behaved increasing continuous 243 | functions with $f(a) < 0 < f(b)$ 244 | 245 | (bisect_func)= 246 | 247 | ```{code-cell} ipython3 248 | def bisect(f, a, b, tol=10e-5): 249 | """ 250 | Implements the bisection root finding algorithm, assuming that f is a 251 | real-valued function on [a, b] satisfying f(a) < 0 < f(b). 252 | """ 253 | lower, upper = a, b 254 | 255 | while upper - lower > tol: 256 | middle = 0.5 * (upper + lower) 257 | if f(middle) > 0: # root is between lower and middle 258 | lower, upper = lower, middle 259 | else: # root is between middle and upper 260 | lower, upper = middle, upper 261 | 262 | return 0.5 * (upper + lower) 263 | ``` 264 | 265 | Let\'s test it using the function $f$ defined in 266 | {math:numref}`root_f` 267 | 268 | ```{code-cell} ipython3 269 | bisect(f, 0, 1) 270 | ``` 271 | 272 | Not surprisingly, SciPy provides its own bisection function. 273 | 274 | Let\'s test it using the same function $f$ defined in 275 | {math:numref}`root_f` 276 | 277 | ```{code-cell} ipython3 278 | from scipy.optimize import bisect 279 | 280 | bisect(f, 0, 1) 281 | ``` 282 | 283 | ### The Newton-Raphson Method 284 | 285 | Another very common root-finding algorithm is the [Newton-Raphson 286 | method](https://en.wikipedia.org/wiki/Newton%27s_method). 287 | 288 | In SciPy this algorithm is implemented by `scipy.optimize.newton`. 289 | 290 | Unlike bisection, the Newton-Raphson method uses local slope information 291 | in an attempt to increase the speed of convergence. 292 | 293 | Let\'s investigate this using the same function $f$ defined above. 294 | 295 | With a suitable initial condition for the search we get convergence: 296 | 297 | ```{code-cell} ipython3 298 | from scipy.optimize import newton 299 | 300 | newton(f, 0.2) # Start the search at initial condition x = 0.2 301 | ``` 302 | 303 | But other initial conditions lead to failure of convergence: 304 | 305 | ```{code-cell} ipython3 306 | newton(f, 0.7) # Start the search at x = 0.7 instead 307 | ``` 308 | 309 | ### Hybrid Methods 310 | 311 | A general principle of numerical methods is as follows: 312 | 313 | - If you have specific knowledge about a given problem, you might be 314 | able to exploit it to generate efficiency. 315 | - If not, then the choice of algorithm involves a trade-off between 316 | speed and robustness. 317 | 318 | In practice, most default algorithms for root-finding, optimization and 319 | fixed points use *hybrid* methods. 320 | 321 | These methods typically combine a fast method with a robust method in 322 | the following manner: 323 | 324 | 1. Attempt to use a fast method 325 | 2. Check diagnostics 326 | 3. If diagnostics are bad, then switch to a more robust algorithm 327 | 328 | In `scipy.optimize`, the function `brentq` is such a hybrid method and a 329 | good default 330 | 331 | ```{code-cell} ipython3 332 | from scipy.optimize import brentq 333 | 334 | brentq(f, 0, 1) 335 | ``` 336 | 337 | Here the correct solution is found and the speed is better than 338 | bisection: 339 | 340 | ```{code-cell} ipython3 341 | %timeit brentq(f, 0, 1) 342 | ``` 343 | 344 | ```{code-cell} ipython3 345 | %timeit bisect(f, 0, 1) 346 | ``` 347 | 348 | ### Multivariate Root-Finding 349 | 350 | Use `scipy.optimize.fsolve`, a wrapper for a hybrid method in MINPACK. 351 | 352 | See the 353 | [documentation](http://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fsolve.html) 354 | for details. 355 | 356 | ### Fixed Points 357 | 358 | A **fixed point** of a real function $f$ on $[a,b]$ is an $x \in [a, b]$ 359 | such that $f(x)=x$. 360 | 361 | SciPy has a function for finding (scalar) fixed points too 362 | 363 | ```{code-cell} ipython3 364 | from scipy.optimize import fixed_point 365 | 366 | fixed_point(lambda x: x**2, 10.0) # 10.0 is an initial guess 367 | ``` 368 | 369 | If you don\'t get good results, you can always switch back to the 370 | `brentq` root finder, since the fixed point of a function $f$ is the 371 | root of $g(x) := x - f(x)$. 372 | 373 | ## Optimization 374 | 375 | Most numerical packages provide only functions for *minimization*. 376 | 377 | Maximization can be performed by recalling that the maximizer of a 378 | function $f$ on domain $D$ is the minimizer of $-f$ on $D$. 379 | 380 | Minimization is closely related to root-finding: For smooth functions, 381 | interior optima correspond to roots of the first derivative. 382 | 383 | The speed/robustness trade-off described above is present with numerical 384 | optimization too. 385 | 386 | Unless you have some prior information you can exploit, it\'s usually 387 | best to use hybrid methods. 388 | 389 | For constrained, univariate (i.e., scalar) minimization, a good hybrid 390 | option is `fminbound` 391 | 392 | ```{code-cell} ipython3 393 | from scipy.optimize import fminbound 394 | 395 | fminbound(lambda x: x**2, -1, 2) # Search in [-1, 2] 396 | ``` 397 | 398 | ### Multivariate Optimization 399 | 400 | Multivariate local optimizers include `minimize`, `fmin`, `fmin_powell`, 401 | `fmin_cg`, `fmin_bfgs`, and `fmin_ncg`. 402 | 403 | Constrained multivariate local optimizers include `fmin_l_bfgs_b`, 404 | `fmin_tnc`, `fmin_cobyla`. 405 | 406 | See the 407 | [documentation](http://docs.scipy.org/doc/scipy/reference/optimize.html) 408 | for details. 409 | 410 | ## Integration 411 | 412 | Most numerical integration methods work by computing the integral of an 413 | approximating polynomial. 414 | 415 | The resulting error depends on how well the polynomial fits the 416 | integrand, which in turn depends on how \"regular\" the integrand is. 417 | 418 | In SciPy, the relevant module for numerical integration is 419 | `scipy.integrate`. 420 | 421 | A good default for univariate integration is `quad` 422 | 423 | ```{code-cell} ipython3 424 | from scipy.integrate import quad 425 | 426 | integral, error = quad(lambda x: x**2, 0, 1) 427 | integral 428 | ``` 429 | 430 | In fact, `quad` is an interface to a very standard numerical integration 431 | routine in the Fortran library QUADPACK. 432 | 433 | It uses [Clenshaw-Curtis 434 | quadrature](https://en.wikipedia.org/wiki/Clenshaw-Curtis_quadrature), 435 | based on expansion in terms of Chebychev polynomials. 436 | 437 | There are other options for univariate integration---a useful one is 438 | `fixed_quad`, which is fast and hence works well inside `for` loops. 439 | 440 | There are also functions for multivariate integration. 441 | 442 | See the 443 | [documentation](http://docs.scipy.org/doc/scipy/reference/integrate.html) 444 | for more details. 445 | 446 | ## Linear Algebra 447 | 448 | We saw that NumPy provides a module for linear algebra called `linalg`. 449 | 450 | SciPy also provides a module for linear algebra with the same name. 451 | 452 | The latter is not an exact superset of the former, but overall it has 453 | more functionality. 454 | 455 | We leave you to investigate the [set of available 456 | routines](http://docs.scipy.org/doc/scipy/reference/linalg.html). 457 | 458 | ## Exercises 459 | 460 | (sp_ex1)= 461 | 462 | ### Exercise 1 463 | 464 | Previously we discussed the concept of 465 | {ref}`recursive function calls `. 466 | 467 | Try to write a recursive implementation of homemade bisection function 468 | {ref}`described above `. 469 | 470 | Test it on the function {math:numref}`root_f`. 471 | 472 | ## Solutions 473 | 474 | ### Exercise 1 475 | 476 | Here\'s a reasonable solution: 477 | 478 | ```{code-cell} ipython3 479 | def bisect(f, a, b, tol=10e-5): 480 | """ 481 | Implements the bisection root-finding algorithm, assuming that f is a 482 | real-valued function on [a, b] satisfying f(a) < 0 < f(b). 483 | """ 484 | lower, upper = a, b 485 | if upper - lower < tol: 486 | return 0.5 * (upper + lower) 487 | else: 488 | middle = 0.5 * (upper + lower) 489 | print(f'Current mid point = {middle}') 490 | if f(middle) > 0: # Implies root is between lower and middle 491 | return bisect(f, lower, middle) 492 | else: # Implies root is between middle and upper 493 | return bisect(f, middle, upper) 494 | ``` 495 | 496 | We can test it as follows 497 | 498 | ```{code-cell} ipython3 499 | f = lambda x: np.sin(4 * (x - 0.25)) + x + x**20 - 1 500 | bisect(f, 0, 1) 501 | ``` 502 | -------------------------------------------------------------------------------- /book/docs/writing_good_code.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupytext: 3 | text_representation: 4 | extension: .md 5 | format_name: myst 6 | kernelspec: 7 | display_name: Python 3 8 | language: python 9 | name: python3 10 | --- 11 | 12 | (writing_good_code)= 13 | 14 | # Writing Good Code 15 | 16 | ## Overview 17 | 18 | When computer programs are small, poorly written code is not overly 19 | costly. 20 | 21 | But more data, more sophisticated models, and more computer power are 22 | enabling us to take on more challenging problems that involve writing 23 | longer programs. 24 | 25 | For such programs, investment in good coding practices will pay high 26 | returns. 27 | 28 | The main payoffs are higher productivity and faster code. 29 | 30 | In this lecture, we review some elements of good coding practice. 31 | 32 | We also touch on modern developments in scientific computing --- such 33 | as just in time compilation --- and how they affect good program 34 | design. 35 | 36 | ## An Example of Poor Code 37 | 38 | Let\'s have a look at some poorly written code. 39 | 40 | The job of the code is to generate and plot time series of the 41 | simplified Solow model 42 | 43 | ```{math} 44 | --- 45 | label: gc_solmod 46 | --- 47 | k_{t+1} = s k_t^{\alpha} + (1 - \delta) k_t, 48 | \quad t = 0, 1, 2, \ldots 49 | ``` 50 | 51 | Here 52 | 53 | - $k_t$ is capital at time $t$ and 54 | - $s, \alpha, \delta$ are parameters (savings, a productivity 55 | parameter and depreciation) 56 | 57 | For each parameterization, the code 58 | 59 | 1. sets $k_0 = 1$ 60 | 2. iterates using {math:numref}`gc_solmod` to produce a 61 | sequence $k_0, k_1, k_2 \ldots , k_T$ 62 | 3. plots the sequence 63 | 64 | The plots will be grouped into three subfigures. 65 | 66 | In each subfigure, two parameters are held fixed while another varies 67 | 68 | ```{code-cell} ipython3 69 | import numpy as np 70 | import matplotlib.pyplot as plt 71 | %matplotlib inline 72 | 73 | # Allocate memory for time series 74 | k = np.empty(50) 75 | 76 | fig, axes = plt.subplots(3, 1, figsize=(6, 14)) 77 | 78 | # Trajectories with different α 79 | δ = 0.1 80 | s = 0.4 81 | α = (0.25, 0.33, 0.45) 82 | 83 | for j in range(3): 84 | k[0] = 1 85 | for t in range(49): 86 | k[t+1] = s * k[t]**α[j] + (1 - δ) * k[t] 87 | axes[0].plot(k, 'o-', label=rf"$\alpha = {α[j]},\; s = {s},\; \delta={δ}$") 88 | 89 | axes[0].grid(lw=0.2) 90 | axes[0].set_ylim(0, 18) 91 | axes[0].set_xlabel('time') 92 | axes[0].set_ylabel('capital') 93 | axes[0].legend(loc='upper left', frameon=True) 94 | 95 | # Trajectories with different s 96 | δ = 0.1 97 | α = 0.33 98 | s = (0.3, 0.4, 0.5) 99 | 100 | for j in range(3): 101 | k[0] = 1 102 | for t in range(49): 103 | k[t+1] = s[j] * k[t]**α + (1 - δ) * k[t] 104 | axes[1].plot(k, 'o-', label=rf"$\alpha = {α},\; s = {s[j]},\; \delta={δ}$") 105 | 106 | axes[1].grid(lw=0.2) 107 | axes[1].set_xlabel('time') 108 | axes[1].set_ylabel('capital') 109 | axes[1].set_ylim(0, 18) 110 | axes[1].legend(loc='upper left', frameon=True) 111 | 112 | # Trajectories with different δ 113 | δ = (0.05, 0.1, 0.15) 114 | α = 0.33 115 | s = 0.4 116 | 117 | for j in range(3): 118 | k[0] = 1 119 | for t in range(49): 120 | k[t+1] = s * k[t]**α + (1 - δ[j]) * k[t] 121 | axes[2].plot(k, 'o-', label=rf"$\alpha = {α},\; s = {s},\; \delta={δ[j]}$") 122 | 123 | axes[2].set_ylim(0, 18) 124 | axes[2].set_xlabel('time') 125 | axes[2].set_ylabel('capital') 126 | axes[2].grid(lw=0.2) 127 | axes[2].legend(loc='upper left', frameon=True) 128 | 129 | plt.show() 130 | ``` 131 | 132 | True, the code more or less follows 133 | [PEP8](https://www.python.org/dev/peps/pep-0008/). 134 | 135 | At the same time, it\'s very poorly structured. 136 | 137 | Let\'s talk about why that\'s the case, and what we can do about it. 138 | 139 | ## Good Coding Practice 140 | 141 | There are usually many different ways to write a program that 142 | accomplishes a given task. 143 | 144 | For small programs, like the one above, the way you write code doesn\'t 145 | matter too much. 146 | 147 | But if you are ambitious and want to produce useful things, you\'ll 148 | write medium to large programs too. 149 | 150 | In those settings, coding style matters **a great deal**. 151 | 152 | Fortunately, lots of smart people have thought about the best way to 153 | write code. 154 | 155 | Here are some basic precepts. 156 | 157 | ### Don\'t Use Magic Numbers 158 | 159 | If you look at the code above, you\'ll see numbers like `50` and `49` 160 | and `3` scattered through the code. 161 | 162 | These kinds of numeric literals in the body of your code are sometimes 163 | called \"magic numbers\". 164 | 165 | This is not a compliment. 166 | 167 | While numeric literals are not all evil, the numbers shown in the 168 | program above should certainly be replaced by named constants. 169 | 170 | For example, the code above could declare the variable 171 | `time_series_length = 50`. 172 | 173 | Then in the loops, `49` should be replaced by `time_series_length - 1`. 174 | 175 | The advantages are: 176 | 177 | - the meaning is much clearer throughout 178 | - to alter the time series length, you only need to change one value 179 | 180 | ### Don\'t Repeat Yourself 181 | 182 | The other mortal sin in the code snippet above is repetition. 183 | 184 | Blocks of logic (such as the loop to generate time series) are repeated 185 | with only minor changes. 186 | 187 | This violates a fundamental tenet of programming: Don\'t repeat yourself 188 | (DRY). 189 | 190 | - Also called DIE (duplication is evil). 191 | 192 | Yes, we realize that you can just cut and paste and change a few 193 | symbols. 194 | 195 | But as a programmer, your aim should be to **automate** repetition, 196 | **not** do it yourself. 197 | 198 | More importantly, repeating the same logic in different places means 199 | that eventually one of them will likely be wrong. 200 | 201 | If you want to know more, read the excellent summary found on [this 202 | page](https://code.tutsplus.com/tutorials/3-key-software-principles-you-must-understand--net-25161). 203 | 204 | We\'ll talk about how to avoid repetition below. 205 | 206 | ### Minimize Global Variables 207 | 208 | Sure, global variables (i.e., names assigned to values outside of any 209 | function or class) are convenient. 210 | 211 | Rookie programmers typically use global variables with abandon --- as 212 | we once did ourselves. 213 | 214 | But global variables are dangerous, especially in medium to large size 215 | programs, since 216 | 217 | - they can affect what happens in any part of your program 218 | - they can be changed by any function 219 | 220 | This makes it much harder to be certain about what some small part of a 221 | given piece of code actually commands. 222 | 223 | Here\'s a [useful discussion on the 224 | topic](http://wiki.c2.com/?GlobalVariablesAreBad). 225 | 226 | While the odd global in small scripts is no big deal, we recommend that 227 | you teach yourself to avoid them. 228 | 229 | (We\'ll discuss how just below). 230 | 231 | #### JIT Compilation 232 | 233 | For scientific computing, there is another good reason to avoid global 234 | variables. 235 | 236 | As [we've seen in previous lectures](numba), JIT compilation can generate excellent performance for 237 | scripting languages like Python. 238 | 239 | But the task of the compiler used for JIT compilation becomes harder 240 | when global variables are present. 241 | 242 | Put differently, the type inference required for JIT compilation is 243 | safer and more effective when variables are sandboxed inside a function. 244 | 245 | ### Use Functions or Classes 246 | 247 | Fortunately, we can easily avoid the evils of global variables and WET 248 | code. 249 | 250 | - WET stands for \"we enjoy typing\" and is the opposite of DRY. 251 | 252 | We can do this by making frequent use of functions or classes. 253 | 254 | In fact, functions and classes are designed specifically to help us 255 | avoid shaming ourselves by repeating code or excessive use of global 256 | variables. 257 | 258 | #### Which One, Functions or Classes? 259 | 260 | Both can be useful, and in fact they work well with each other. 261 | 262 | We\'ll learn more about these topics over time. 263 | 264 | (Personal preference is part of the story too) 265 | 266 | What\'s really important is that you use one or the other or both. 267 | 268 | ## Revisiting the Example 269 | 270 | Here\'s some code that reproduces the plot above with better coding 271 | style. 272 | 273 | ```{code-cell} ipython3 274 | from itertools import product 275 | 276 | def plot_path(ax, αs, s_vals, δs, time_series_length=50): 277 | """ 278 | Add a time series plot to the axes ax for all given parameters. 279 | """ 280 | k = np.empty(time_series_length) 281 | 282 | for (α, s, δ) in product(αs, s_vals, δs): 283 | k[0] = 1 284 | for t in range(time_series_length-1): 285 | k[t+1] = s * k[t]**α + (1 - δ) * k[t] 286 | ax.plot(k, 'o-', label=rf"$\alpha = {α},\; s = {s},\; \delta = {δ}$") 287 | 288 | ax.set_xlabel('time') 289 | ax.set_ylabel('capital') 290 | ax.set_ylim(0, 18) 291 | ax.legend(loc='upper left', frameon=True) 292 | 293 | fig, axes = plt.subplots(3, 1, figsize=(6, 14)) 294 | 295 | # Parameters (αs, s_vals, δs) 296 | set_one = ([0.25, 0.33, 0.45], [0.4], [0.1]) 297 | set_two = ([0.33], [0.3, 0.4, 0.5], [0.1]) 298 | set_three = ([0.33], [0.4], [0.05, 0.1, 0.15]) 299 | 300 | for (ax, params) in zip(axes, (set_one, set_two, set_three)): 301 | αs, s_vals, δs = params 302 | plot_path(ax, αs, s_vals, δs) 303 | 304 | plt.show() 305 | ``` 306 | 307 | If you inspect this code, you will see that 308 | 309 | - it uses a function to avoid repetition. 310 | - Global variables are quarantined by collecting them together at the 311 | end, not the start of the program. 312 | - Magic numbers are avoided. 313 | - The loop at the end where the actual work is done is short and 314 | relatively simple. 315 | 316 | ## Exercises 317 | 318 | ### Exercise 1 319 | 320 | Here is some code that needs improving. 321 | 322 | It involves a basic supply and demand problem. 323 | 324 | Supply is given by 325 | 326 | $$ 327 | q_s(p) = \exp(\alpha p) - \beta. 328 | $$ 329 | 330 | The demand curve is 331 | 332 | $$ 333 | q_d(p) = \gamma p^{-\delta}. 334 | $$ 335 | 336 | The values $\alpha$, $\beta$, $\gamma$ and $\delta$ are **parameters** 337 | 338 | The equilibrium $p^*$ is the price such that $q_d(p) = q_s(p)$. 339 | 340 | We can solve for this equilibrium using a root finding algorithm. 341 | Specifically, we will find the $p$ such that $h(p) = 0$, where 342 | 343 | $$ 344 | h(p) := q_d(p) - q_s(p) 345 | $$ 346 | 347 | This yields the equilibrium price $p^*$. From this we get the 348 | equilibrium price by $q^* = q_s(p^*)$ 349 | 350 | The parameter values will be 351 | 352 | - $\alpha = 0.1$ 353 | - $\beta = 1$ 354 | - $\gamma = 1$ 355 | - $\delta = 1$ 356 | 357 | ```{code-cell} ipython3 358 | from scipy.optimize import brentq 359 | 360 | # Compute equilibrium 361 | def h(p): 362 | return p**(-1) - (np.exp(0.1 * p) - 1) # demand - supply 363 | 364 | p_star = brentq(h, 2, 4) 365 | q_star = np.exp(0.1 * p_star) - 1 366 | 367 | print(f'Equilibrium price is {p_star: .2f}') 368 | print(f'Equilibrium quantity is {q_star: .2f}') 369 | ``` 370 | 371 | Let\'s also plot our results. 372 | 373 | ```{code-cell} ipython3 374 | # Now plot 375 | grid = np.linspace(2, 4, 100) 376 | fig, ax = plt.subplots() 377 | 378 | qs = np.exp(0.1 * grid) - 1 379 | qd = grid**(-1) 380 | 381 | 382 | ax.plot(grid, qd, 'b-', lw=2, label='demand') 383 | ax.plot(grid, qs, 'g-', lw=2, label='supply') 384 | 385 | ax.set_xlabel('price') 386 | ax.set_ylabel('quantity') 387 | ax.legend(loc='upper center') 388 | 389 | plt.show() 390 | ``` 391 | 392 | We also want to consider supply and demand shifts. 393 | 394 | For example, let\'s see what happens when demand shifts up, with 395 | $\gamma$ increasing to $1.25$: 396 | 397 | ```{code-cell} ipython3 398 | # Compute equilibrium 399 | def h(p): 400 | return 1.25 * p**(-1) - (np.exp(0.1 * p) - 1) 401 | 402 | p_star = brentq(h, 2, 4) 403 | q_star = np.exp(0.1 * p_star) - 1 404 | 405 | print(f'Equilibrium price is {p_star: .2f}') 406 | print(f'Equilibrium quantity is {q_star: .2f}') 407 | ``` 408 | 409 | ```{code-cell} ipython3 410 | # Now plot 411 | p_grid = np.linspace(2, 4, 100) 412 | fig, ax = plt.subplots() 413 | 414 | qs = np.exp(0.1 * p_grid) - 1 415 | qd = 1.25 * p_grid**(-1) 416 | 417 | 418 | ax.plot(grid, qd, 'b-', lw=2, label='demand') 419 | ax.plot(grid, qs, 'g-', lw=2, label='supply') 420 | 421 | ax.set_xlabel('price') 422 | ax.set_ylabel('quantity') 423 | ax.legend(loc='upper center') 424 | 425 | plt.show() 426 | ``` 427 | 428 | Now we might consider supply shifts, but you already get the idea that 429 | there\'s a lot of repeated code here. 430 | 431 | Refactor and improve clarity in the code above using the principles 432 | discussed in this lecture. 433 | 434 | ## Solutions 435 | 436 | ### Exercise 1 437 | 438 | Here\'s one solution, that uses a class: 439 | 440 | ```{code-cell} ipython3 441 | class Equilibrium: 442 | 443 | def __init__(self, α=0.1, β=1, γ=1, δ=1): 444 | self.α, self.β, self.γ, self.δ = α, β, γ, δ 445 | 446 | def qs(self, p): 447 | return np.exp(self.α * p) - self.β 448 | 449 | def qd(self, p): 450 | return self.γ * p**(-self.δ) 451 | 452 | def compute_equilibrium(self): 453 | def h(p): 454 | return self.qd(p) - self.qs(p) 455 | p_star = brentq(h, 2, 4) 456 | q_star = np.exp(self.α * p_star) - self.β 457 | 458 | print(f'Equilibrium price is {p_star: .2f}') 459 | print(f'Equilibrium quantity is {q_star: .2f}') 460 | 461 | def plot_equilibrium(self): 462 | # Now plot 463 | grid = np.linspace(2, 4, 100) 464 | fig, ax = plt.subplots() 465 | 466 | ax.plot(grid, self.qd(grid), 'b-', lw=2, label='demand') 467 | ax.plot(grid, self.qs(grid), 'g-', lw=2, label='supply') 468 | 469 | ax.set_xlabel('price') 470 | ax.set_ylabel('quantity') 471 | ax.legend(loc='upper center') 472 | 473 | plt.show() 474 | ``` 475 | 476 | Let\'s create an instance at the default parameter values. 477 | 478 | ```{code-cell} ipython3 479 | eq = Equilibrium() 480 | ``` 481 | 482 | Now we\'ll compute the equilibrium and plot it. 483 | 484 | ```{code-cell} ipython3 485 | eq.compute_equilibrium() 486 | ``` 487 | 488 | ```{code-cell} ipython3 489 | eq.plot_equilibrium() 490 | ``` 491 | 492 | One of the nice things about our refactored code is that, when we change 493 | parameters, we don\'t need to repeat ourselves: 494 | 495 | ```{code-cell} ipython3 496 | eq.γ = 1.25 497 | ``` 498 | 499 | ```{code-cell} ipython3 500 | eq.compute_equilibrium() 501 | ``` 502 | 503 | ```{code-cell} ipython3 504 | eq.plot_equilibrium() 505 | ``` 506 | -------------------------------------------------------------------------------- /book/qe-logo-large.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/executablebooks/quantecon-example/e0783e8768bbffcd3966007013dbb994335c9034/book/qe-logo-large.png -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: qe-example 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - python=3.7.* 7 | - jupyter 8 | - sphinx=2.4.4 9 | - pydata-sphinx-theme 10 | - ghp-import 11 | - matplotlib 12 | - numpy 13 | - scipy 14 | - sympy 15 | - pandas 16 | - networkx 17 | - numba 18 | - pip 19 | - pip: 20 | - jupyter-book 21 | 22 | -------------------------------------------------------------------------------- /source_rst/about_lectures.rst: -------------------------------------------------------------------------------- 1 | ************** 2 | About Lectures 3 | ************** 4 | 5 | TBD -------------------------------------------------------------------------------- /source_rst/debugging.rst: -------------------------------------------------------------------------------- 1 | .. _debugging: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | *********************************** 6 | Debugging 7 | *********************************** 8 | 9 | .. index:: 10 | single: Debugging 11 | 12 | .. contents:: :depth: 2 13 | 14 | .. epigraph:: 15 | 16 | "Debugging is twice as hard as writing the code in the first place. 17 | Therefore, if you write the code as cleverly as possible, you are, by definition, 18 | not smart enough to debug it." -- Brian Kernighan 19 | 20 | 21 | 22 | Overview 23 | =========== 24 | 25 | 26 | Are you one of those programmers who fills their code with ``print`` statements when trying to debug their programs? 27 | 28 | Hey, we all used to do that. 29 | 30 | (OK, sometimes we still do that...) 31 | 32 | But once you start writing larger programs you'll need a better system. 33 | 34 | Debugging tools for Python vary across platforms, IDEs and editors. 35 | 36 | Here we'll focus on Jupyter and leave you to explore other settings. 37 | 38 | We'll need the following imports 39 | 40 | .. code-block:: ipython 41 | 42 | import numpy as np 43 | import matplotlib.pyplot as plt 44 | %matplotlib inline 45 | 46 | 47 | Debugging 48 | ============ 49 | 50 | .. index:: 51 | single: Debugging 52 | 53 | 54 | The ``debug`` Magic 55 | ---------------------- 56 | 57 | Let's consider a simple (and rather contrived) example 58 | 59 | .. code-block:: ipython 60 | :class: skip-test 61 | 62 | def plot_log(): 63 | fig, ax = plt.subplots(2, 1) 64 | x = np.linspace(1, 2, 10) 65 | ax.plot(x, np.log(x)) 66 | plt.show() 67 | 68 | plot_log() # Call the function, generate plot 69 | 70 | 71 | This code is intended to plot the ``log`` function over the interval :math:`[1, 2]`. 72 | 73 | But there's an error here: ``plt.subplots(2, 1)`` should be just ``plt.subplots()``. 74 | 75 | (The call ``plt.subplots(2, 1)`` returns a NumPy array containing two axes objects, suitable for having two subplots on the same figure) 76 | 77 | The traceback shows that the error occurs at the method call ``ax.plot(x, np.log(x))``. 78 | 79 | The error occurs because we have mistakenly made ``ax`` a NumPy array, and a NumPy array has no ``plot`` method. 80 | 81 | But let's pretend that we don't understand this for the moment. 82 | 83 | We might suspect there's something wrong with ``ax`` but when we try to investigate this object, we get the following exception: 84 | 85 | .. code-block:: python3 86 | :class: skip-test 87 | 88 | ax 89 | 90 | The problem is that ``ax`` was defined inside ``plot_log()``, and the name is 91 | lost once that function terminates. 92 | 93 | Let's try doing it a different way. 94 | 95 | We run the first cell block again, generating the same error 96 | 97 | .. code-block:: python3 98 | :class: skip-test 99 | 100 | def plot_log(): 101 | fig, ax = plt.subplots(2, 1) 102 | x = np.linspace(1, 2, 10) 103 | ax.plot(x, np.log(x)) 104 | plt.show() 105 | 106 | plot_log() # Call the function, generate plot 107 | 108 | 109 | But this time we type in the following cell block 110 | 111 | .. code-block:: ipython 112 | :class: no-execute 113 | 114 | %debug 115 | 116 | You should be dropped into a new prompt that looks something like this 117 | 118 | .. code-block:: ipython 119 | :class: no-execute 120 | 121 | ipdb> 122 | 123 | (You might see `pdb>` instead) 124 | 125 | Now we can investigate the value of our variables at this point in the program, step forward through the code, etc. 126 | 127 | For example, here we simply type the name ``ax`` to see what's happening with 128 | this object: 129 | 130 | .. code-block:: ipython 131 | :class: no-execute 132 | 133 | ipdb> ax 134 | array([, 135 | ], dtype=object) 136 | 137 | It's now very clear that ``ax`` is an array, which clarifies the source of the 138 | problem. 139 | 140 | To find out what else you can do from inside ``ipdb`` (or ``pdb``), use the 141 | online help 142 | 143 | .. code-block:: ipython 144 | :class: no-execute 145 | 146 | ipdb> h 147 | 148 | Documented commands (type help ): 149 | ======================================== 150 | EOF bt cont enable jump pdef r tbreak w 151 | a c continue exit l pdoc restart u whatis 152 | alias cl d h list pinfo return unalias where 153 | args clear debug help n pp run unt 154 | b commands disable ignore next q s until 155 | break condition down j p quit step up 156 | 157 | Miscellaneous help topics: 158 | ========================== 159 | exec pdb 160 | 161 | Undocumented commands: 162 | ====================== 163 | retval rv 164 | 165 | ipdb> h c 166 | c(ont(inue)) 167 | Continue execution, only stop when a breakpoint is encountered. 168 | 169 | 170 | Setting a Break Point 171 | ---------------------- 172 | 173 | The preceding approach is handy but sometimes insufficient. 174 | 175 | Consider the following modified version of our function above 176 | 177 | .. code-block:: python3 178 | :class: skip-test 179 | 180 | def plot_log(): 181 | fig, ax = plt.subplots() 182 | x = np.logspace(1, 2, 10) 183 | ax.plot(x, np.log(x)) 184 | plt.show() 185 | 186 | plot_log() 187 | 188 | Here the original problem is fixed, but we've accidentally written 189 | ``np.logspace(1, 2, 10)`` instead of ``np.linspace(1, 2, 10)``. 190 | 191 | Now there won't be any exception, but the plot won't look right. 192 | 193 | To investigate, it would be helpful if we could inspect variables like ``x`` during execution of the function. 194 | 195 | To this end, we add a "break point" by inserting ``breakpoint()`` inside the function code block 196 | 197 | .. code-block:: python3 198 | :class: no-execute 199 | 200 | def plot_log(): 201 | breakpoint() 202 | fig, ax = plt.subplots() 203 | x = np.logspace(1, 2, 10) 204 | ax.plot(x, np.log(x)) 205 | plt.show() 206 | 207 | plot_log() 208 | 209 | Now let's run the script, and investigate via the debugger 210 | 211 | .. code-block:: ipython 212 | :class: no-execute 213 | 214 | > (6)plot_log() 215 | -> fig, ax = plt.subplots() 216 | (Pdb) n 217 | > (7)plot_log() 218 | -> x = np.logspace(1, 2, 10) 219 | (Pdb) n 220 | > (8)plot_log() 221 | -> ax.plot(x, np.log(x)) 222 | (Pdb) x 223 | array([ 10. , 12.91549665, 16.68100537, 21.5443469 , 224 | 27.82559402, 35.93813664, 46.41588834, 59.94842503, 225 | 77.42636827, 100. ]) 226 | 227 | We used ``n`` twice to step forward through the code (one line at a time). 228 | 229 | Then we printed the value of ``x`` to see what was happening with that variable. 230 | 231 | To exit from the debugger, use ``q``. 232 | 233 | 234 | 235 | Other Useful Magics 236 | ================================== 237 | 238 | In this lecture, we used the ``%debug`` IPython magic. 239 | 240 | There are many other useful magics: 241 | 242 | * ``%precision 4`` sets printed precision for floats to 4 decimal places 243 | 244 | * ``%whos`` gives a list of variables and their values 245 | 246 | * ``%quickref`` gives a list of magics 247 | 248 | The full list of magics is `here `_. 249 | -------------------------------------------------------------------------------- /source_rst/functions.rst: -------------------------------------------------------------------------------- 1 | .. _functions: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | .. highlight:: python3 6 | 7 | 8 | 9 | ********* 10 | Functions 11 | ********* 12 | 13 | 14 | 15 | .. index:: 16 | single: Python; User-defined functions 17 | 18 | .. contents:: :depth: 2 19 | 20 | 21 | 22 | Overview 23 | ======== 24 | 25 | One construct that's extremely useful and provided by almost all programming 26 | languages is **functions**. 27 | 28 | We have already met several functions, such as 29 | 30 | * the ``sqrt()`` function from NumPy and 31 | * the built-in ``print()`` function 32 | 33 | In this lecture we'll treat functions systematically and begin to learn just how 34 | useful and important they are. 35 | 36 | One of the things we will learn to do is build our own user-defined functions 37 | 38 | 39 | We will use the following imports. 40 | 41 | .. code-block:: ipython 42 | 43 | import numpy as np 44 | import matplotlib.pyplot as plt 45 | %matplotlib inline 46 | 47 | 48 | 49 | 50 | 51 | Function Basics 52 | ================== 53 | 54 | 55 | A function is a named section of a program that implements a specific task. 56 | 57 | Many functions exist already and we can use them off the shelf. 58 | 59 | First we review these functions and then discuss how we can build our own. 60 | 61 | 62 | Built-In Functions 63 | ------------------ 64 | 65 | Python has a number of *built-in* functions that are available without ``import``. 66 | 67 | 68 | We have already met some 69 | 70 | .. code-block:: python3 71 | 72 | max(19, 20) 73 | 74 | .. code-block:: python3 75 | 76 | print('foobar') 77 | 78 | .. code-block:: python3 79 | 80 | str(22) 81 | 82 | .. code-block:: python3 83 | 84 | type(22) 85 | 86 | 87 | Two more useful built-in functions are ``any()`` and ``all()`` 88 | 89 | .. code-block:: python3 90 | 91 | bools = False, True, True 92 | all(bools) # True if all are True and False otherwise 93 | 94 | .. code-block:: python3 95 | 96 | any(bools) # False if all are False and True otherwise 97 | 98 | 99 | The full list of Python built-ins is `here `_. 100 | 101 | 102 | Third Party Functions 103 | --------------------- 104 | 105 | If the built-in functions don't cover what we need, we either need to import 106 | functions or create our own. 107 | 108 | Examples of importing and using functions 109 | were given in the :doc:`previous lecture ` 110 | 111 | Here's another one, which tests whether a given year is a leap year: 112 | 113 | 114 | .. code-block:: python3 115 | 116 | import calendar 117 | 118 | calendar.isleap(2020) 119 | 120 | 121 | 122 | Defining Functions 123 | ================== 124 | 125 | In many instances, it is useful to be able to define our own functions. 126 | 127 | This will become clearer as you see more examples. 128 | 129 | Let's start by discussing how it's done. 130 | 131 | 132 | Syntax 133 | ------ 134 | 135 | Here's a very simple Python function, that implements the mathematical function 136 | :math:`f(x) = 2 x + 1` 137 | 138 | .. code-block:: python3 139 | 140 | def f(x): 141 | return 2 * x + 1 142 | 143 | Now that we've *defined* this function, let's *call* it and check whether it 144 | does what we expect: 145 | 146 | .. code-block:: python3 147 | 148 | f(1) 149 | 150 | .. code-block:: python3 151 | 152 | f(10) 153 | 154 | 155 | 156 | Here's a longer function, that computes the absolute value of a given number. 157 | 158 | (Such a function already exists as a built-in, but let's write our own for the 159 | exercise.) 160 | 161 | .. code-block:: python3 162 | 163 | def new_abs_function(x): 164 | 165 | if x < 0: 166 | abs_value = -x 167 | else: 168 | abs_value = x 169 | 170 | return abs_value 171 | 172 | Let's review the syntax here. 173 | 174 | * ``def`` is a Python keyword used to start function definitions. 175 | 176 | * ``def new_abs_function(x):`` indicates that the function is called ``new_abs_function`` and that it has a single argument ``x``. 177 | 178 | * The indented code is a code block called the *function body*. 179 | 180 | * The ``return`` keyword indicates that ``abs_value`` is the object that should be returned to the calling code. 181 | 182 | This whole function definition is read by the Python interpreter and stored in memory. 183 | 184 | Let's call it to check that it works: 185 | 186 | 187 | .. code-block:: python3 188 | 189 | print(new_abs_function(3)) 190 | print(new_abs_function(-3)) 191 | 192 | 193 | 194 | Why Write Functions? 195 | -------------------- 196 | 197 | User-defined functions are important for improving the clarity of your code by 198 | 199 | * separating different strands of logic 200 | 201 | * facilitating code reuse 202 | 203 | (Writing the same thing twice is `almost always a bad idea `_) 204 | 205 | We will say more about this :doc:`later `. 206 | 207 | 208 | Applications 209 | ============ 210 | 211 | 212 | Random Draws 213 | ------------ 214 | 215 | 216 | Consider again this code from the :doc:`previous lecture ` 217 | 218 | .. code-block:: python3 219 | 220 | ts_length = 100 221 | ϵ_values = [] # empty list 222 | 223 | for i in range(ts_length): 224 | e = np.random.randn() 225 | ϵ_values.append(e) 226 | 227 | plt.plot(ϵ_values) 228 | plt.show() 229 | 230 | 231 | We will break this program into two parts: 232 | 233 | #. A user-defined function that generates a list of random variables. 234 | 235 | #. The main part of the program that 236 | 237 | #. calls this function to get data 238 | 239 | #. plots the data 240 | 241 | This is accomplished in the next program 242 | 243 | .. _funcloopprog: 244 | 245 | .. code-block:: python3 246 | 247 | def generate_data(n): 248 | ϵ_values = [] 249 | for i in range(n): 250 | e = np.random.randn() 251 | ϵ_values.append(e) 252 | return ϵ_values 253 | 254 | data = generate_data(100) 255 | plt.plot(data) 256 | plt.show() 257 | 258 | 259 | When the interpreter gets to the expression ``generate_data(100)``, it executes the function body with ``n`` set equal to 100. 260 | 261 | The net result is that the name ``data`` is *bound* to the list ``ϵ_values`` returned by the function. 262 | 263 | 264 | 265 | Adding Conditions 266 | ----------------- 267 | 268 | .. index:: 269 | single: Python; Conditions 270 | 271 | Our function ``generate_data()`` is rather limited. 272 | 273 | Let's make it slightly more useful by giving it the ability to return either standard normals or uniform random variables on :math:`(0, 1)` as required. 274 | 275 | This is achieved in the next piece of code. 276 | 277 | 278 | .. _funcloopprog2: 279 | 280 | .. code-block:: python3 281 | 282 | 283 | def generate_data(n, generator_type): 284 | ϵ_values = [] 285 | for i in range(n): 286 | if generator_type == 'U': 287 | e = np.random.uniform(0, 1) 288 | else: 289 | e = np.random.randn() 290 | ϵ_values.append(e) 291 | return ϵ_values 292 | 293 | data = generate_data(100, 'U') 294 | plt.plot(data) 295 | plt.show() 296 | 297 | Hopefully, the syntax of the if/else clause is self-explanatory, with indentation again delimiting the extent of the code blocks. 298 | 299 | Notes 300 | 301 | * We are passing the argument ``U`` as a string, which is why we write it as ``'U'``. 302 | 303 | * Notice that equality is tested with the ``==`` syntax, not ``=``. 304 | 305 | * For example, the statement ``a = 10`` assigns the name ``a`` to the value ``10``. 306 | 307 | * The expression ``a == 10`` evaluates to either ``True`` or ``False``, depending on the value of ``a``. 308 | 309 | Now, there are several ways that we can simplify the code above. 310 | 311 | For example, we can get rid of the conditionals all together by just passing the desired generator type *as a function*. 312 | 313 | To understand this, consider the following version. 314 | 315 | .. _test_program_6: 316 | 317 | .. code-block:: python3 318 | 319 | 320 | def generate_data(n, generator_type): 321 | ϵ_values = [] 322 | for i in range(n): 323 | e = generator_type() 324 | ϵ_values.append(e) 325 | return ϵ_values 326 | 327 | data = generate_data(100, np.random.uniform) 328 | plt.plot(data) 329 | plt.show() 330 | 331 | 332 | Now, when we call the function ``generate_data()``, we pass ``np.random.uniform`` 333 | as the second argument. 334 | 335 | This object is a *function*. 336 | 337 | When the function call ``generate_data(100, np.random.uniform)`` is executed, Python runs the function code block with ``n`` equal to 100 and the name ``generator_type`` "bound" to the function ``np.random.uniform``. 338 | 339 | * While these lines are executed, the names ``generator_type`` and ``np.random.uniform`` are "synonyms", and can be used in identical ways. 340 | 341 | This principle works more generally---for example, consider the following piece of code 342 | 343 | .. code-block:: python3 344 | 345 | max(7, 2, 4) # max() is a built-in Python function 346 | 347 | .. code-block:: python3 348 | 349 | m = max 350 | m(7, 2, 4) 351 | 352 | Here we created another name for the built-in function ``max()``, which could 353 | then be used in identical ways. 354 | 355 | In the context of our program, the ability to bind new names to functions 356 | means that there is no problem *passing a function as an argument to another 357 | function*---as we did above. 358 | 359 | 360 | 361 | 362 | 363 | 364 | Exercises 365 | ========= 366 | 367 | 368 | 369 | Exercise 1 370 | ---------- 371 | 372 | Recall that :math:`n!` is read as ":math:`n` factorial" and defined as 373 | :math:`n! = n \times (n - 1) \times \cdots \times 2 \times 1`. 374 | 375 | There are functions to compute this in various modules, but let's 376 | write our own version as an exercise. 377 | 378 | In particular, write a function ``factorial`` such that ``factorial(n)`` returns :math:`n!` 379 | for any positive integer :math:`n`. 380 | 381 | 382 | 383 | Exercise 2 384 | ---------- 385 | 386 | The `binomial random variable `_ :math:`Y \sim Bin(n, p)` represents the number of successes in :math:`n` binary trials, where each trial succeeds with probability :math:`p`. 387 | 388 | Without any import besides ``from numpy.random import uniform``, write a function 389 | ``binomial_rv`` such that ``binomial_rv(n, p)`` generates one draw of :math:`Y`. 390 | 391 | Hint: If :math:`U` is uniform on :math:`(0, 1)` and :math:`p \in (0,1)`, then the expression ``U < p`` evaluates to ``True`` with probability :math:`p`. 392 | 393 | 394 | 395 | 396 | Exercise 3 397 | ---------- 398 | 399 | First, write a function that returns one realization of the following random device 400 | 401 | 1. Flip an unbiased coin 10 times. 402 | 2. If a head occurs ``k`` or more times consecutively within this sequence at least once, pay one dollar. 403 | 3. If not, pay nothing. 404 | 405 | Second, write another function that does the same task except that the second rule of the above random device becomes 406 | 407 | - If a head occurs ``k`` or more times within this sequence, pay one dollar. 408 | 409 | Use no import besides ``from numpy.random import uniform``. 410 | 411 | 412 | 413 | 414 | 415 | 416 | Solutions 417 | ========= 418 | 419 | 420 | Exercise 1 421 | ---------- 422 | 423 | Here's one solution. 424 | 425 | .. code-block:: python3 426 | 427 | def factorial(n): 428 | k = 1 429 | for i in range(n): 430 | k = k * (i + 1) 431 | return k 432 | 433 | factorial(4) 434 | 435 | 436 | 437 | Exercise 2 438 | ---------- 439 | 440 | .. code-block:: python3 441 | 442 | from numpy.random import uniform 443 | 444 | def binomial_rv(n, p): 445 | count = 0 446 | for i in range(n): 447 | U = uniform() 448 | if U < p: 449 | count = count + 1 # Or count += 1 450 | return count 451 | 452 | binomial_rv(10, 0.5) 453 | 454 | 455 | 456 | Exercise 3 457 | ---------- 458 | 459 | Here's a function for the first random device. 460 | 461 | .. code-block:: python3 462 | 463 | from numpy.random import uniform 464 | 465 | def draw(k): # pays if k consecutive successes in a sequence 466 | 467 | payoff = 0 468 | count = 0 469 | 470 | for i in range(10): 471 | U = uniform() 472 | count = count + 1 if U < 0.5 else 0 473 | print(count) # print counts for clarity 474 | if count == k: 475 | payoff = 1 476 | 477 | return payoff 478 | 479 | draw(3) 480 | 481 | Here's another function for the second random device. 482 | 483 | .. code-block:: python3 484 | 485 | def draw_new(k): # pays if k successes in a sequence 486 | 487 | payoff = 0 488 | count = 0 489 | 490 | for i in range(10): 491 | U = uniform() 492 | count = count + ( 1 if U < 0.5 else 0 ) 493 | print(count) 494 | if count == k: 495 | payoff = 1 496 | 497 | return payoff 498 | 499 | draw_new(3) 500 | 501 | 502 | -------------------------------------------------------------------------------- /source_rst/matplotlib.rst: -------------------------------------------------------------------------------- 1 | .. _matplotlib: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | ******************* 6 | :index:`Matplotlib` 7 | ******************* 8 | 9 | .. index:: 10 | single: Python; Matplotlib 11 | 12 | .. contents:: :depth: 2 13 | 14 | Overview 15 | ======== 16 | 17 | We've already generated quite a few figures in these lectures using `Matplotlib `__. 18 | 19 | Matplotlib is an outstanding graphics library, designed for scientific computing, with 20 | 21 | * high-quality 2D and 3D plots 22 | 23 | * output in all the usual formats (PDF, PNG, etc.) 24 | 25 | * LaTeX integration 26 | 27 | * fine-grained control over all aspects of presentation 28 | 29 | * animation, etc. 30 | 31 | 32 | 33 | Matplotlib's Split Personality 34 | ------------------------------ 35 | 36 | 37 | Matplotlib is unusual in that it offers two different interfaces to plotting. 38 | 39 | One is a simple MATLAB-style API (Application Programming Interface) that was written to help MATLAB refugees find a ready home. 40 | 41 | The other is a more "Pythonic" object-oriented API. 42 | 43 | For reasons described below, we recommend that you use the second API. 44 | 45 | But first, let's discuss the difference. 46 | 47 | 48 | 49 | 50 | The APIs 51 | ======== 52 | 53 | .. index:: 54 | single: Matplotlib; Simple API 55 | 56 | The MATLAB-style API 57 | -------------------- 58 | 59 | Here's the kind of easy example you might find in introductory treatments 60 | 61 | .. code-block:: ipython 62 | 63 | import matplotlib.pyplot as plt 64 | %matplotlib inline 65 | import numpy as np 66 | 67 | x = np.linspace(0, 10, 200) 68 | y = np.sin(x) 69 | 70 | plt.plot(x, y, 'b-', linewidth=2) 71 | plt.show() 72 | 73 | 74 | This is simple and convenient, but also somewhat limited and un-Pythonic. 75 | 76 | For example, in the function calls, a lot of objects get created and passed around without making themselves known to the programmer. 77 | 78 | Python programmers tend to prefer a more explicit style of programming (run ``import this`` in a code block and look at the second line). 79 | 80 | This leads us to the alternative, object-oriented Matplotlib API. 81 | 82 | The Object-Oriented API 83 | ----------------------- 84 | 85 | Here's the code corresponding to the preceding figure using the object-oriented API 86 | 87 | .. code-block:: python3 88 | 89 | fig, ax = plt.subplots() 90 | ax.plot(x, y, 'b-', linewidth=2) 91 | plt.show() 92 | 93 | 94 | Here the call ``fig, ax = plt.subplots()`` returns a pair, where 95 | 96 | * ``fig`` is a ``Figure`` instance---like a blank canvas. 97 | 98 | * ``ax`` is an ``AxesSubplot`` instance---think of a frame for plotting in. 99 | 100 | The ``plot()`` function is actually a method of ``ax``. 101 | 102 | While there's a bit more typing, the more explicit use of objects gives us better control. 103 | 104 | This will become more clear as we go along. 105 | 106 | 107 | Tweaks 108 | ------ 109 | 110 | Here we've changed the line to red and added a legend 111 | 112 | .. code-block:: python3 113 | 114 | fig, ax = plt.subplots() 115 | ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.6) 116 | ax.legend() 117 | plt.show() 118 | 119 | We've also used ``alpha`` to make the line slightly transparent---which makes it look smoother. 120 | 121 | The location of the legend can be changed by replacing ``ax.legend()`` with ``ax.legend(loc='upper center')``. 122 | 123 | .. code-block:: python3 124 | 125 | fig, ax = plt.subplots() 126 | ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.6) 127 | ax.legend(loc='upper center') 128 | plt.show() 129 | 130 | If everything is properly configured, then adding LaTeX is trivial 131 | 132 | .. code-block:: python3 133 | 134 | fig, ax = plt.subplots() 135 | ax.plot(x, y, 'r-', linewidth=2, label='$y=\sin(x)$', alpha=0.6) 136 | ax.legend(loc='upper center') 137 | plt.show() 138 | 139 | Controlling the ticks, adding titles and so on is also straightforward 140 | 141 | .. code-block:: python3 142 | 143 | fig, ax = plt.subplots() 144 | ax.plot(x, y, 'r-', linewidth=2, label='$y=\sin(x)$', alpha=0.6) 145 | ax.legend(loc='upper center') 146 | ax.set_yticks([-1, 0, 1]) 147 | ax.set_title('Test plot') 148 | plt.show() 149 | 150 | 151 | More Features 152 | ============= 153 | 154 | Matplotlib has a huge array of functions and features, which you can discover 155 | over time as you have need for them. 156 | 157 | We mention just a few. 158 | 159 | 160 | Multiple Plots on One Axis 161 | -------------------------- 162 | 163 | .. index:: 164 | single: Matplotlib; Multiple Plots on One Axis 165 | 166 | It's straightforward to generate multiple plots on the same axes. 167 | 168 | Here's an example that randomly generates three normal densities and adds a label with their mean 169 | 170 | .. code-block:: python3 171 | 172 | from scipy.stats import norm 173 | from random import uniform 174 | 175 | fig, ax = plt.subplots() 176 | x = np.linspace(-4, 4, 150) 177 | for i in range(3): 178 | m, s = uniform(-1, 1), uniform(1, 2) 179 | y = norm.pdf(x, loc=m, scale=s) 180 | current_label = f'$\mu = {m:.2}$' 181 | ax.plot(x, y, linewidth=2, alpha=0.6, label=current_label) 182 | ax.legend() 183 | plt.show() 184 | 185 | 186 | Multiple Subplots 187 | ----------------- 188 | 189 | .. index:: 190 | single: Matplotlib; Subplots 191 | 192 | Sometimes we want multiple subplots in one figure. 193 | 194 | Here's an example that generates 6 histograms 195 | 196 | .. code-block:: python3 197 | 198 | num_rows, num_cols = 3, 2 199 | fig, axes = plt.subplots(num_rows, num_cols, figsize=(10, 12)) 200 | for i in range(num_rows): 201 | for j in range(num_cols): 202 | m, s = uniform(-1, 1), uniform(1, 2) 203 | x = norm.rvs(loc=m, scale=s, size=100) 204 | axes[i, j].hist(x, alpha=0.6, bins=20) 205 | t = f'$\mu = {m:.2}, \quad \sigma = {s:.2}$' 206 | axes[i, j].set(title=t, xticks=[-4, 0, 4], yticks=[]) 207 | plt.show() 208 | 209 | 210 | 3D Plots 211 | -------- 212 | 213 | .. index:: 214 | single: Matplotlib; 3D Plots 215 | 216 | Matplotlib does a nice job of 3D plots --- here is one example 217 | 218 | 219 | .. code-block:: python3 220 | 221 | from mpl_toolkits.mplot3d.axes3d import Axes3D 222 | from matplotlib import cm 223 | 224 | 225 | def f(x, y): 226 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 227 | 228 | xgrid = np.linspace(-3, 3, 50) 229 | ygrid = xgrid 230 | x, y = np.meshgrid(xgrid, ygrid) 231 | 232 | fig = plt.figure(figsize=(8, 6)) 233 | ax = fig.add_subplot(111, projection='3d') 234 | ax.plot_surface(x, 235 | y, 236 | f(x, y), 237 | rstride=2, cstride=2, 238 | cmap=cm.jet, 239 | alpha=0.7, 240 | linewidth=0.25) 241 | ax.set_zlim(-0.5, 1.0) 242 | plt.show() 243 | 244 | 245 | 246 | A Customizing Function 247 | ---------------------- 248 | 249 | Perhaps you will find a set of customizations that you regularly use. 250 | 251 | Suppose we usually prefer our axes to go through the origin, and to have a grid. 252 | 253 | Here's a nice example from `Matthew Doty `_ of how the object-oriented API can be used to build a custom ``subplots`` function that implements these changes. 254 | 255 | Read carefully through the code and see if you can follow what's going on 256 | 257 | .. code-block:: python3 258 | 259 | def subplots(): 260 | "Custom subplots with axes through the origin" 261 | fig, ax = plt.subplots() 262 | 263 | # Set the axes through the origin 264 | for spine in ['left', 'bottom']: 265 | ax.spines[spine].set_position('zero') 266 | for spine in ['right', 'top']: 267 | ax.spines[spine].set_color('none') 268 | 269 | ax.grid() 270 | return fig, ax 271 | 272 | 273 | fig, ax = subplots() # Call the local version, not plt.subplots() 274 | x = np.linspace(-2, 10, 200) 275 | y = np.sin(x) 276 | ax.plot(x, y, 'r-', linewidth=2, label='sine function', alpha=0.6) 277 | ax.legend(loc='lower right') 278 | plt.show() 279 | 280 | The custom ``subplots`` function 281 | 282 | #. calls the standard ``plt.subplots`` function internally to generate the ``fig, ax`` pair, 283 | 284 | #. makes the desired customizations to ``ax``, and 285 | 286 | #. passes the ``fig, ax`` pair back to the calling code. 287 | 288 | 289 | 290 | Further Reading 291 | =============== 292 | 293 | 294 | * The `Matplotlib gallery `__ provides many examples. 295 | 296 | * A nice `Matplotlib tutorial `__ by Nicolas Rougier, Mike Muller and Gael Varoquaux. 297 | 298 | * `mpltools `_ allows easy 299 | switching between plot styles. 300 | 301 | * `Seaborn `_ facilitates common statistics plots in Matplotlib. 302 | 303 | 304 | 305 | Exercises 306 | ========= 307 | 308 | 309 | Exercise 1 310 | ---------- 311 | 312 | 313 | Plot the function 314 | 315 | .. math:: 316 | 317 | f(x) = \cos(\pi \theta x) \exp(-x) 318 | 319 | over the interval :math:`[0, 5]` for each :math:`\theta` in ``np.linspace(0, 2, 10)``. 320 | 321 | Place all the curves in the same figure. 322 | 323 | The output should look like this 324 | 325 | 326 | .. figure:: /_static/lecture_specific/matplotlib/matplotlib_ex1.png 327 | 328 | 329 | 330 | 331 | Solutions 332 | ========= 333 | 334 | 335 | Exercise 1 336 | ---------- 337 | 338 | 339 | Here's one solution 340 | 341 | .. code:: ipython3 342 | 343 | def f(x, θ): 344 | return np.cos(np.pi * θ * x ) * np.exp(- x) 345 | 346 | θ_vals = np.linspace(0, 2, 10) 347 | x = np.linspace(0, 5, 200) 348 | fig, ax = plt.subplots() 349 | 350 | for θ in θ_vals: 351 | ax.plot(x, f(x, θ)) 352 | 353 | plt.show() 354 | -------------------------------------------------------------------------------- /source_rst/need_for_speed.rst: -------------------------------------------------------------------------------- 1 | .. _speed: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | ******************************* 6 | Python for Scientific Computing 7 | ******************************* 8 | 9 | .. contents:: :depth: 2 10 | 11 | In addition to what's in Anaconda, this lecture will need the following libraries: 12 | 13 | .. code-block:: ipython 14 | :class: hide-output 15 | 16 | !pip install --upgrade quantecon 17 | 18 | 19 | 20 | 21 | Overview 22 | ======== 23 | 24 | Python is extremely popular for scientific computing, due to such factors as 25 | 26 | * the accessible and flexible nature of the language itself, 27 | 28 | * the huge range of high quality scientific libraries now available, 29 | 30 | * the fact that the language and libraries are open source, 31 | 32 | * the popular Anaconda Python distribution, which simplifies installation and 33 | management of those libraries, and 34 | 35 | * the recent surge of interest in using Python for machine learning and 36 | artificial intelligence. 37 | 38 | In this lecture we give a short overview of scientific computing in Python, 39 | addressing the following questions: 40 | 41 | * What are the relative strengths and weaknesses of Python for these tasks? 42 | 43 | * What are the main elements of the scientific Python ecosystem? 44 | 45 | * How is the situation changing over time? 46 | 47 | 48 | 49 | Scientific Libraries 50 | ============================= 51 | 52 | Let's briefly review Python's scientific libraries, starting with why we need 53 | them. 54 | 55 | The Role of Scientific Libraries 56 | -------------------------------- 57 | 58 | One obvious reason we use scientific libraries is because they implement 59 | routines we want to use. 60 | 61 | For example, it's almost always better to use an existing routine for root 62 | finding than to write a new one from scratch. 63 | 64 | (For standard algorithms, efficiency is maximized if the community can coordinate on a 65 | common set of implementations, written by experts and tuned by users to be as fast and robust as possible.) 66 | 67 | But this is not the only reason that we use Python's scientific libraries. 68 | 69 | Another is that pure Python, while flexible and elegant, is not fast. 70 | 71 | So we need libraries that are designed to accelerate execution of Python code. 72 | 73 | As we'll see below, there are now Python libraries that can do this extremely well. 74 | 75 | 76 | 77 | Python's Scientific Ecosystem 78 | ----------------------------- 79 | 80 | 81 | In terms of popularity, the big four in the world of scientific Python 82 | libraries are 83 | 84 | * NumPy 85 | * SciPy 86 | * Matplotlib 87 | * Pandas 88 | 89 | For us, there's another (relatively new) library that will also be essential for 90 | numerical computing: 91 | 92 | * Numba 93 | 94 | Over the next few lectures we'll see how to use these libraries. 95 | 96 | But first, let's quickly review how they fit together. 97 | 98 | * NumPy forms the foundations by providing a basic array data type (think of 99 | vectors and matrices) and functions for acting on these arrays (e.g., matrix 100 | multiplication). 101 | 102 | * SciPy builds on NumPy by adding the kinds of numerical methods that are 103 | routinely used in science (interpolation, optimization, root finding, etc.). 104 | 105 | * Matplotlib is used to generate figures, with a focus on plotting data stored in NumPy arrays. 106 | 107 | * Pandas provides types and functions for empirical work (e.g., manipulating data). 108 | 109 | * Numba accelerates execution via JIT compilation --- we'll learn about this 110 | soon. 111 | 112 | 113 | 114 | 115 | 116 | The Need for Speed 117 | ================== 118 | 119 | Now let's discuss execution speed. 120 | 121 | Higher-level languages like Python are optimized for humans. 122 | 123 | This means that the programmer can leave many details to the runtime environment 124 | 125 | * specifying variable types 126 | 127 | * memory allocation/deallocation, etc. 128 | 129 | The upside is that, compared to low-level languages, Python is typically faster to write, less error-prone and easier to debug. 130 | 131 | The downside is that Python is harder to optimize --- that is, turn into fast machine code --- than languages like C or Fortran. 132 | 133 | Indeed, the standard implementation of Python (called CPython) cannot match the speed of compiled languages such as C or Fortran. 134 | 135 | Does that mean that we should just switch to C or Fortran for everything? 136 | 137 | The answer is: No, no and one hundred times no! 138 | 139 | (This is what you should say to the senior professor insisting that the model 140 | needs to be rewritten in Fortran or C++.) 141 | 142 | There are two reasons why: 143 | 144 | First, for any given program, relatively few lines are ever going to 145 | be time-critical. 146 | 147 | Hence it is far more efficient to write most of our code in a high productivity language like Python. 148 | 149 | Second, even for those lines of code that *are* time-critical, we can now achieve the same speed as C or Fortran using Python's scientific libraries. 150 | 151 | 152 | Where are the Bottlenecks? 153 | -------------------------- 154 | 155 | Before we learn how to do this, let's try to understand why plain vanilla 156 | Python is slower than C or Fortran. 157 | 158 | This will, in turn, help us figure out how to speed things up. 159 | 160 | 161 | Dynamic Typing 162 | ^^^^^^^^^^^^^^ 163 | 164 | .. index:: 165 | single: Dynamic Typing 166 | 167 | Consider this Python operation 168 | 169 | .. code-block:: python3 170 | 171 | a, b = 10, 10 172 | a + b 173 | 174 | 175 | Even for this simple operation, the Python interpreter has a fair bit of work to do. 176 | 177 | For example, in the statement ``a + b``, the interpreter has to know which 178 | operation to invoke. 179 | 180 | If ``a`` and ``b`` are strings, then ``a + b`` requires string concatenation 181 | 182 | .. code-block:: python3 183 | 184 | a, b = 'foo', 'bar' 185 | a + b 186 | 187 | 188 | If ``a`` and ``b`` are lists, then ``a + b`` requires list concatenation 189 | 190 | .. code-block:: python3 191 | 192 | a, b = ['foo'], ['bar'] 193 | a + b 194 | 195 | 196 | (We say that the operator ``+`` is *overloaded* --- its action depends on the 197 | type of the objects on which it acts) 198 | 199 | As a result, Python must check the type of the objects and then call the correct operation. 200 | 201 | This involves substantial overheads. 202 | 203 | Static Types 204 | ^^^^^^^^^^^^ 205 | 206 | .. index:: 207 | single: Static Types 208 | 209 | Compiled languages avoid these overheads with explicit, static types. 210 | 211 | For example, consider the following C code, which sums the integers from 1 to 10 212 | 213 | .. code-block:: c 214 | :class: no-execute 215 | 216 | #include 217 | 218 | int main(void) { 219 | int i; 220 | int sum = 0; 221 | for (i = 1; i <= 10; i++) { 222 | sum = sum + i; 223 | } 224 | printf("sum = %d\n", sum); 225 | return 0; 226 | } 227 | 228 | The variables ``i`` and ``sum`` are explicitly declared to be integers. 229 | 230 | Hence, the meaning of addition here is completely unambiguous. 231 | 232 | Data Access 233 | ----------- 234 | 235 | Another drag on speed for high-level languages is data access. 236 | 237 | To illustrate, let's consider the problem of summing some data --- say, a collection of integers. 238 | 239 | Summing with Compiled Code 240 | ^^^^^^^^^^^^^^^^^^^^^^^^^^ 241 | 242 | In C or Fortran, these integers would typically be stored in an array, which 243 | is a simple data structure for storing homogeneous data. 244 | 245 | Such an array is stored in a single contiguous block of memory 246 | 247 | * In modern computers, memory addresses are allocated to each byte (one byte = 8 bits). 248 | 249 | * For example, a 64 bit integer is stored in 8 bytes of memory. 250 | 251 | * An array of :math:`n` such integers occupies :math:`8n` **consecutive** memory slots. 252 | 253 | Moreover, the compiler is made aware of the data type by the programmer. 254 | 255 | * In this case 64 bit integers 256 | 257 | Hence, each successive data point can be accessed by shifting forward in memory 258 | space by a known and fixed amount. 259 | 260 | * In this case 8 bytes 261 | 262 | 263 | Summing in Pure Python 264 | ^^^^^^^^^^^^^^^^^^^^^^ 265 | 266 | Python tries to replicate these ideas to some degree. 267 | 268 | For example, in the standard Python implementation (CPython), list elements are placed in memory locations that are in a sense contiguous. 269 | 270 | However, these list elements are more like pointers to data rather than actual data. 271 | 272 | Hence, there is still overhead involved in accessing the data values themselves. 273 | 274 | This is a considerable drag on speed. 275 | 276 | In fact, it's generally true that memory traffic is a major culprit when it comes to slow execution. 277 | 278 | Let's look at some ways around these problems. 279 | 280 | 281 | 282 | :index:`Vectorization` 283 | ====================== 284 | 285 | .. index:: 286 | single: Python; Vectorization 287 | 288 | There is a clever method called **vectorization** that can be 289 | used to speed up high level languages in numerical applications. 290 | 291 | The key idea is to send array processing operations in batch to pre-compiled 292 | and efficient native machine code. 293 | 294 | The machine code itself is typically compiled from carefully optimized C or Fortran. 295 | 296 | For example, when working in a high level language, the operation of inverting a large matrix can be subcontracted to efficient machine code that is pre-compiled for this purpose and supplied to users as part of a package. 297 | 298 | This clever idea dates back to MATLAB, which uses vectorization extensively. 299 | 300 | Vectorization can greatly accelerate many numerical computations (but not all, 301 | as we shall see). 302 | 303 | Let's see how vectorization works in Python, using NumPy. 304 | 305 | 306 | Operations on Arrays 307 | -------------------- 308 | 309 | .. index:: 310 | single: Vectorization; Operations on Arrays 311 | 312 | First, let's run some imports 313 | 314 | .. code-block:: python3 315 | 316 | import random 317 | import numpy as np 318 | import quantecon as qe 319 | 320 | Next let's try some non-vectorized code, which uses a native Python loop to generate, 321 | square and then sum a large number of random variables: 322 | 323 | .. code-block:: python3 324 | 325 | n = 1_000_000 326 | 327 | .. code-block:: python3 328 | 329 | %%time 330 | 331 | y = 0 # Will accumulate and store sum 332 | for i in range(n): 333 | x = random.uniform(0, 1) 334 | y += x**2 335 | 336 | The following vectorized code achieves the same thing. 337 | 338 | .. code-block:: ipython 339 | 340 | %%time 341 | 342 | x = np.random.uniform(0, 1, n) 343 | y = np.sum(x**2) 344 | 345 | 346 | As you can see, the second code block runs much faster. Why? 347 | 348 | The second code block breaks the loop down into three basic operations 349 | 350 | #. draw ``n`` uniforms 351 | 352 | #. square them 353 | 354 | #. sum them 355 | 356 | These are sent as batch operators to optimized machine code. 357 | 358 | Apart from minor overheads associated with sending data back and forth, the result is C or Fortran-like speed. 359 | 360 | When we run batch operations on arrays like this, we say that the code is *vectorized*. 361 | 362 | Vectorized code is typically fast and efficient. 363 | 364 | It is also surprisingly flexible, in the sense that many operations can be vectorized. 365 | 366 | The next section illustrates this point. 367 | 368 | 369 | 370 | 371 | 372 | .. _ufuncs: 373 | 374 | 375 | Universal Functions 376 | ------------------- 377 | 378 | .. index:: 379 | single: NumPy; Universal Functions 380 | 381 | Many functions provided by NumPy are so-called *universal functions* --- also called `ufuncs `__. 382 | 383 | This means that they 384 | 385 | * map scalars into scalars, as expected 386 | 387 | * map arrays into arrays, acting element-wise 388 | 389 | For example, ``np.cos`` is a ufunc: 390 | 391 | .. code-block:: python3 392 | 393 | np.cos(1.0) 394 | 395 | .. code-block:: python3 396 | 397 | np.cos(np.linspace(0, 1, 3)) 398 | 399 | 400 | By exploiting ufuncs, many operations can be vectorized. 401 | 402 | For example, consider the problem of maximizing a function :math:`f` of two 403 | variables :math:`(x,y)` over the square :math:`[-a, a] \times [-a, a]`. 404 | 405 | For :math:`f` and :math:`a` let's choose 406 | 407 | .. math:: 408 | 409 | f(x,y) = \frac{\cos(x^2 + y^2)}{1 + x^2 + y^2} 410 | \quad \text{and} \quad 411 | a = 3 412 | 413 | 414 | Here's a plot of :math:`f` 415 | 416 | .. code-block:: ipython 417 | 418 | import matplotlib.pyplot as plt 419 | %matplotlib inline 420 | from mpl_toolkits.mplot3d.axes3d import Axes3D 421 | from matplotlib import cm 422 | 423 | def f(x, y): 424 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 425 | 426 | xgrid = np.linspace(-3, 3, 50) 427 | ygrid = xgrid 428 | x, y = np.meshgrid(xgrid, ygrid) 429 | 430 | fig = plt.figure(figsize=(8, 6)) 431 | ax = fig.add_subplot(111, projection='3d') 432 | ax.plot_surface(x, 433 | y, 434 | f(x, y), 435 | rstride=2, cstride=2, 436 | cmap=cm.jet, 437 | alpha=0.7, 438 | linewidth=0.25) 439 | ax.set_zlim(-0.5, 1.0) 440 | ax.set_xlabel('$x$', fontsize=14) 441 | ax.set_ylabel('$y$', fontsize=14) 442 | plt.show() 443 | 444 | To maximize it, we're going to use a naive grid search: 445 | 446 | #. Evaluate :math:`f` for all :math:`(x,y)` in a grid on the square. 447 | 448 | #. Return the maximum of observed values. 449 | 450 | The grid will be 451 | 452 | .. code-block:: python3 453 | 454 | grid = np.linspace(-3, 3, 1000) 455 | 456 | Here's a non-vectorized version that uses Python loops. 457 | 458 | .. code-block:: python3 459 | 460 | %%time 461 | 462 | m = -np.inf 463 | 464 | for x in grid: 465 | for y in grid: 466 | z = f(x, y) 467 | if z > m: 468 | m = z 469 | 470 | 471 | And here's a vectorized version 472 | 473 | .. code-block:: python3 474 | 475 | %%time 476 | 477 | x, y = np.meshgrid(grid, grid) 478 | np.max(f(x, y)) 479 | 480 | 481 | In the vectorized version, all the looping takes place in compiled code. 482 | 483 | As you can see, the second version is **much** faster. 484 | 485 | (We'll make it even faster again later on, using more scientific programming tricks.) 486 | 487 | 488 | 489 | .. _numba-p_c_vectorization: 490 | 491 | Beyond Vectorization 492 | ==================== 493 | 494 | 495 | At its best, vectorization yields fast, simple code. 496 | 497 | However, it's not without disadvantages. 498 | 499 | One issue is that it can be highly memory-intensive. 500 | 501 | For example, the vectorized maximization routine above is far more memory 502 | intensive than the non-vectorized version that preceded it. 503 | 504 | This is because vectorization tends to create many intermediate arrays before 505 | producing the final calculation. 506 | 507 | Another issue is that not all algorithms can be vectorized. 508 | 509 | In these kinds of settings, we need to go back to loops. 510 | 511 | Fortunately, there are alternative ways to speed up Python loops that work in 512 | almost any setting. 513 | 514 | For example, in the last few years, a new Python library called `Numba 515 | `__ has appeared that solves the main problems 516 | with vectorization listed above. 517 | 518 | It does so through something called **just in time (JIT) compilation**, 519 | which can generate extremely fast and efficient code. 520 | 521 | We'll learn how to use Numba :doc:`soon `. 522 | -------------------------------------------------------------------------------- /source_rst/oop_intro.rst: -------------------------------------------------------------------------------- 1 | .. _oop_intro: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | ************************************************** 6 | OOP I: Introduction to Object Oriented Programming 7 | ************************************************** 8 | 9 | .. contents:: :depth: 2 10 | 11 | Overview 12 | ============ 13 | 14 | 15 | `OOP `_ is one of the major paradigms in programming. 16 | 17 | 18 | The traditional programming paradigm (think Fortran, C, MATLAB, etc.) is called *procedural*. 19 | 20 | It works as follows 21 | 22 | * The program has a state corresponding to the values of its variables. 23 | 24 | * Functions are called to act on these data. 25 | 26 | * Data are passed back and forth via function calls. 27 | 28 | 29 | In contrast, in the OOP paradigm 30 | 31 | * data and functions are "bundled together" into "objects" 32 | 33 | (Functions in this context are referred to as **methods**) 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | Python and OOP 42 | -------------- 43 | 44 | Python is a pragmatic language that blends object-oriented and procedural styles, rather than taking a purist approach. 45 | 46 | However, at a foundational level, Python *is* object-oriented. 47 | 48 | In particular, in Python, *everything is an object*. 49 | 50 | In this lecture, we explain what that statement means and why it matters. 51 | 52 | 53 | 54 | 55 | 56 | Objects 57 | ======= 58 | 59 | .. index:: 60 | single: Python; Objects 61 | 62 | 63 | In Python, an *object* is a collection of data and instructions held in computer memory that consists of 64 | 65 | #. a type 66 | 67 | #. a unique identity 68 | 69 | #. data (i.e., content) 70 | 71 | #. methods 72 | 73 | These concepts are defined and discussed sequentially below. 74 | 75 | 76 | 77 | 78 | 79 | 80 | .. _type: 81 | 82 | Type 83 | ---- 84 | 85 | .. index:: 86 | single: Python; Type 87 | 88 | Python provides for different types of objects, to accommodate different categories of data. 89 | 90 | For example 91 | 92 | .. code-block:: python3 93 | 94 | s = 'This is a string' 95 | type(s) 96 | 97 | .. code-block:: python3 98 | 99 | x = 42 # Now let's create an integer 100 | type(x) 101 | 102 | The type of an object matters for many expressions. 103 | 104 | For example, the addition operator between two strings means concatenation 105 | 106 | .. code-block:: python3 107 | 108 | '300' + 'cc' 109 | 110 | On the other hand, between two numbers it means ordinary addition 111 | 112 | .. code-block:: python3 113 | 114 | 300 + 400 115 | 116 | Consider the following expression 117 | 118 | .. code-block:: python3 119 | :class: skip-test 120 | 121 | '300' + 400 122 | 123 | 124 | Here we are mixing types, and it's unclear to Python whether the user wants to 125 | 126 | * convert ``'300'`` to an integer and then add it to ``400``, or 127 | 128 | * convert ``400`` to string and then concatenate it with ``'300'`` 129 | 130 | Some languages might try to guess but Python is *strongly typed* 131 | 132 | * Type is important, and implicit type conversion is rare. 133 | 134 | * Python will respond instead by raising a ``TypeError``. 135 | 136 | 137 | To avoid the error, you need to clarify by changing the relevant type. 138 | 139 | For example, 140 | 141 | .. code-block:: python3 142 | 143 | int('300') + 400 # To add as numbers, change the string to an integer 144 | 145 | 146 | 147 | .. _identity: 148 | 149 | Identity 150 | -------- 151 | 152 | .. index:: 153 | single: Python; Identity 154 | 155 | In Python, each object has a unique identifier, which helps Python (and us) keep track of the object. 156 | 157 | The identity of an object can be obtained via the ``id()`` function 158 | 159 | .. code-block:: python3 160 | 161 | y = 2.5 162 | z = 2.5 163 | id(y) 164 | 165 | .. code-block:: python3 166 | 167 | id(z) 168 | 169 | In this example, ``y`` and ``z`` happen to have the same value (i.e., ``2.5``), but they are not the same object. 170 | 171 | The identity of an object is in fact just the address of the object in memory. 172 | 173 | 174 | 175 | 176 | 177 | Object Content: Data and Attributes 178 | ----------------------------------- 179 | 180 | .. index:: 181 | single: Python; Content 182 | 183 | If we set ``x = 42`` then we create an object of type ``int`` that contains 184 | the data ``42``. 185 | 186 | In fact, it contains more, as the following example shows 187 | 188 | 189 | .. code-block:: python3 190 | 191 | x = 42 192 | x 193 | 194 | .. code-block:: python3 195 | 196 | x.imag 197 | 198 | .. code-block:: python3 199 | 200 | x.__class__ 201 | 202 | When Python creates this integer object, it stores with it various auxiliary information, such as the imaginary part, and the type. 203 | 204 | Any name following a dot is called an *attribute* of the object to the left of the dot. 205 | 206 | * e.g.,``imag`` and ``__class__`` are attributes of ``x``. 207 | 208 | 209 | We see from this example that objects have attributes that contain auxiliary information. 210 | 211 | 212 | They also have attributes that act like functions, called *methods*. 213 | 214 | These attributes are important, so let's discuss them in-depth. 215 | 216 | 217 | .. _methods: 218 | 219 | Methods 220 | ------- 221 | 222 | .. index:: 223 | single: Python; Methods 224 | 225 | Methods are *functions that are bundled with objects*. 226 | 227 | 228 | 229 | Formally, methods are attributes of objects that are callable (i.e., can be called as functions) 230 | 231 | .. code-block:: python3 232 | 233 | x = ['foo', 'bar'] 234 | callable(x.append) 235 | 236 | .. code-block:: python3 237 | 238 | callable(x.__doc__) 239 | 240 | 241 | 242 | Methods typically act on the data contained in the object they belong to, or combine that data with other data 243 | 244 | .. code-block:: python3 245 | 246 | x = ['a', 'b'] 247 | x.append('c') 248 | s = 'This is a string' 249 | s.upper() 250 | 251 | .. code-block:: python3 252 | 253 | s.lower() 254 | 255 | .. code-block:: python3 256 | 257 | s.replace('This', 'That') 258 | 259 | A great deal of Python functionality is organized around method calls. 260 | 261 | For example, consider the following piece of code 262 | 263 | .. code-block:: python3 264 | 265 | x = ['a', 'b'] 266 | x[0] = 'aa' # Item assignment using square bracket notation 267 | x 268 | 269 | It doesn't look like there are any methods used here, but in fact the square bracket assignment notation is just a convenient interface to a method call. 270 | 271 | What actually happens is that Python calls the ``__setitem__`` method, as follows 272 | 273 | .. code-block:: python3 274 | 275 | x = ['a', 'b'] 276 | x.__setitem__(0, 'aa') # Equivalent to x[0] = 'aa' 277 | x 278 | 279 | (If you wanted to you could modify the ``__setitem__`` method, so that square bracket assignment does something totally different) 280 | 281 | 282 | 283 | 284 | Summary 285 | ========== 286 | 287 | In Python, *everything in memory is treated as an object*. 288 | 289 | This includes not just lists, strings, etc., but also less obvious things, such as 290 | 291 | * functions (once they have been read into memory) 292 | 293 | * modules (ditto) 294 | 295 | * files opened for reading or writing 296 | 297 | * integers, etc. 298 | 299 | Consider, for example, functions. 300 | 301 | When Python reads a function definition, it creates a **function object** and stores it in memory. 302 | 303 | 304 | The following code illustrates 305 | 306 | .. code-block:: python3 307 | 308 | def f(x): return x**2 309 | f 310 | 311 | 312 | .. code-block:: python3 313 | 314 | type(f) 315 | 316 | .. code-block:: python3 317 | 318 | id(f) 319 | 320 | .. code-block:: python3 321 | 322 | f.__name__ 323 | 324 | We can see that ``f`` has type, identity, attributes and so on---just like any other object. 325 | 326 | It also has methods. 327 | 328 | One example is the ``__call__`` method, which just evaluates the function 329 | 330 | .. code-block:: python3 331 | 332 | f.__call__(3) 333 | 334 | Another is the ``__dir__`` method, which returns a list of attributes. 335 | 336 | 337 | Modules loaded into memory are also treated as objects 338 | 339 | .. code-block:: python3 340 | 341 | import math 342 | 343 | id(math) 344 | 345 | 346 | This uniform treatment of data in Python (everything is an object) helps keep the language simple and consistent. 347 | -------------------------------------------------------------------------------- /source_rst/parallelization.rst: -------------------------------------------------------------------------------- 1 | .. _parallel: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | *************** 6 | Parallelization 7 | *************** 8 | 9 | .. contents:: :depth: 2 10 | 11 | In addition to what's in Anaconda, this lecture will need the following libraries: 12 | 13 | .. code-block:: ipython 14 | :class: hide-output 15 | 16 | !pip install --upgrade quantecon 17 | 18 | 19 | Overview 20 | ======== 21 | 22 | 23 | The growth of CPU clock speed (i.e., the speed at which a single chain of logic can 24 | be run) has slowed dramatically in recent years. 25 | 26 | This is unlikely to change in the near future, due to inherent physical 27 | limitations on the construction of chips and circuit boards. 28 | 29 | Chip designers and computer programmers have responded to the slowdown by 30 | seeking a different path to fast execution: parallelization. 31 | 32 | Hardware makers have increased the number of cores (physical CPUs) embedded in each machine. 33 | 34 | For programmers, the challenge has been to exploit these multiple CPUs by running many processes in parallel (i.e., simultaneously). 35 | 36 | This is particularly important in scientific programming, which requires handling 37 | 38 | * large amounts of data and 39 | 40 | * CPU intensive simulations and other calculations. 41 | 42 | In this lecture we discuss parallelization for scientific computing, with a focus on 43 | 44 | #. the best tools for parallelization in Python and 45 | 46 | #. how these tools can be applied to quantitative economic problems. 47 | 48 | 49 | Let's start with some imports: 50 | 51 | .. code-block:: ipython 52 | 53 | import numpy as np 54 | import quantecon as qe 55 | import matplotlib.pyplot as plt 56 | 57 | %matplotlib inline 58 | 59 | 60 | Types of Parallelization 61 | ======================== 62 | 63 | Large textbooks have been written on different approaches to parallelization but we will keep a tight focus on what's most useful to us. 64 | 65 | We will briefly review the two main kinds of parallelization commonly used in 66 | scientific computing and discuss their pros and cons. 67 | 68 | 69 | Multiprocessing 70 | --------------- 71 | 72 | Multiprocessing means concurrent execution of multiple processes using more than one processor. 73 | 74 | In this context, a **process** is a chain of instructions (i.e., a program). 75 | 76 | Multiprocessing can be carried out on one machine with multiple CPUs or on a 77 | collection of machines connected by a network. 78 | 79 | In the latter case, the collection of machines is usually called a 80 | **cluster**. 81 | 82 | With multiprocessing, each process has its own memory space, although the 83 | physical memory chip might be shared. 84 | 85 | 86 | Multithreading 87 | -------------- 88 | 89 | Multithreading is similar to multiprocessing, except that, during execution, the threads all share the same memory space. 90 | 91 | Native Python struggles to implement multithreading due to some `legacy design 92 | features `__. 93 | 94 | But this is not a restriction for scientific libraries like NumPy and Numba. 95 | 96 | Functions imported from these libraries and JIT-compiled code run in low level 97 | execution environments where Python's legacy restrictions don't apply. 98 | 99 | 100 | Advantages and Disadvantages 101 | ---------------------------- 102 | 103 | Multithreading is more lightweight because most system and memory resources 104 | are shared by the threads. 105 | 106 | In addition, the fact that multiple threads all access a shared pool of memory 107 | is extremely convenient for numerical programming. 108 | 109 | On the other hand, multiprocessing is more flexible and can be distributed 110 | across clusters. 111 | 112 | For the great majority of what we do in these lectures, multithreading will 113 | suffice. 114 | 115 | 116 | Implicit Multithreading in NumPy 117 | ================================ 118 | 119 | Actually, you have already been using multithreading in your Python code, 120 | although you might not have realized it. 121 | 122 | (We are, as usual, assuming that you are running the latest version of 123 | Anaconda Python.) 124 | 125 | This is because NumPy cleverly implements multithreading in a lot of its 126 | compiled code. 127 | 128 | Let's look at some examples to see this in action. 129 | 130 | A Matrix Operation 131 | ------------------ 132 | 133 | The next piece of code computes the eigenvalues of a large number of randomly 134 | generated matrices. 135 | 136 | It takes a few seconds to run. 137 | 138 | .. code-block:: python3 139 | 140 | n = 20 141 | m = 1000 142 | for i in range(n): 143 | X = np.random.randn(m, m) 144 | λ = np.linalg.eigvals(X) 145 | 146 | Now, let's look at the output of the `htop` system monitor on our machine while 147 | this code is running: 148 | 149 | .. figure:: /_static/lecture_specific/parallelization/htop_parallel_npmat.png 150 | 151 | We can see that 4 of the 8 CPUs are running at full speed. 152 | 153 | 154 | This is because NumPy's ``eigvals`` routine neatly splits up the tasks and 155 | distributes them to different threads. 156 | 157 | 158 | A Multithreaded Ufunc 159 | --------------------- 160 | 161 | Over the last few years, NumPy has managed to push this kind of multithreading 162 | out to more and more operations. 163 | 164 | For example, let's return to a maximization problem :ref:`discussed previously `: 165 | 166 | .. code-block:: python3 167 | 168 | def f(x, y): 169 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 170 | 171 | grid = np.linspace(-3, 3, 5000) 172 | x, y = np.meshgrid(grid, grid) 173 | 174 | .. code-block:: ipython3 175 | 176 | %timeit np.max(f(x, y)) 177 | 178 | If you have a system monitor such as `htop` (Linux/Mac) or `perfmon` 179 | (Windows), then try running this and then observing the load on your CPUs. 180 | 181 | (You will probably need to bump up the grid size to see large effects.) 182 | 183 | At least on our machine, the output shows that the operation is successfully 184 | distributed across multiple threads. 185 | 186 | This is one of the reasons why the vectorized code above is fast. 187 | 188 | A Comparison with Numba 189 | ----------------------- 190 | 191 | To get some basis for comparison for the last example, let's try the same 192 | thing with Numba. 193 | 194 | In fact there is an easy way to do this, since Numba can also be used to 195 | create custom :ref:`ufuncs ` with the `@vectorize 196 | `__ decorator. 197 | 198 | .. code-block:: python3 199 | 200 | from numba import vectorize 201 | 202 | @vectorize 203 | def f_vec(x, y): 204 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 205 | 206 | np.max(f_vec(x, y)) # Run once to compile 207 | 208 | .. code-block:: ipython3 209 | 210 | %timeit np.max(f_vec(x, y)) 211 | 212 | At least on our machine, the difference in the speed between the 213 | Numba version and the vectorized NumPy version shown above is not large. 214 | 215 | But there's quite a bit going on here so let's try to break down what is 216 | happening. 217 | 218 | Both Numba and NumPy use efficient machine code that's specialized to these 219 | floating point operations. 220 | 221 | However, the code NumPy uses is, in some ways, less efficient. 222 | 223 | The reason is that, in NumPy, the operation ``np.cos(x**2 + y**2) / (1 + 224 | x**2 + y**2)`` generates several intermediate arrays. 225 | 226 | For example, a new array is created when ``x**2`` is calculated. 227 | 228 | The same is true when ``y**2`` is calculated, and then ``x**2 + y**2`` and so on. 229 | 230 | Numba avoids creating all these intermediate arrays by compiling one 231 | function that is specialized to the entire operation. 232 | 233 | But if this is true, then why isn't the Numba code faster? 234 | 235 | The reason is that NumPy makes up for its disadvantages with implicit 236 | multithreading, as we've just discussed. 237 | 238 | Multithreading a Numba Ufunc 239 | ---------------------------- 240 | 241 | Can we get both of these advantages at once? 242 | 243 | In other words, can we pair 244 | 245 | * the efficiency of Numba's highly specialized JIT compiled function and 246 | 247 | * the speed gains from parallelization obtained by NumPy's implicit 248 | multithreading? 249 | 250 | It turns out that we can, by adding some type information plus ``target='parallel'``. 251 | 252 | .. code-block:: python3 253 | 254 | @vectorize('float64(float64, float64)', target='parallel') 255 | def f_vec(x, y): 256 | return np.cos(x**2 + y**2) / (1 + x**2 + y**2) 257 | 258 | np.max(f_vec(x, y)) # Run once to compile 259 | 260 | .. code-block:: ipython3 261 | 262 | %timeit np.max(f_vec(x, y)) 263 | 264 | Now our code runs significantly faster than the NumPy version. 265 | 266 | 267 | 268 | Multithreaded Loops in Numba 269 | ============================ 270 | 271 | We just saw one approach to parallelization in Numba, using the ``parallel`` 272 | flag in ``@vectorize``. 273 | 274 | This is neat but, it turns out, not well suited to many problems we consider. 275 | 276 | Fortunately, Numba provides another approach to multithreading that will work 277 | for us almost everywhere parallelization is possible. 278 | 279 | To illustrate, let's look first at a simple, single-threaded (i.e., non-parallelized) piece of code. 280 | 281 | The code simulates updating the wealth :math:`w_t` of a household via the rule 282 | 283 | .. math:: 284 | 285 | w_{t+1} = R_{t+1} s w_t + y_{t+1} 286 | 287 | Here 288 | 289 | * :math:`R` is the gross rate of return on assets 290 | * :math:`s` is the savings rate of the household and 291 | * :math:`y` is labor income. 292 | 293 | We model both :math:`R` and :math:`y` as independent draws from a lognormal 294 | distribution. 295 | 296 | Here's the code: 297 | 298 | .. code-block:: ipython 299 | 300 | from numpy.random import randn 301 | from numba import njit 302 | 303 | @njit 304 | def h(w, r=0.1, s=0.3, v1=0.1, v2=1.0): 305 | """ 306 | Updates household wealth. 307 | """ 308 | 309 | # Draw shocks 310 | R = np.exp(v1 * randn()) * (1 + r) 311 | y = np.exp(v2 * randn()) 312 | 313 | # Update wealth 314 | w = R * s * w + y 315 | return w 316 | 317 | 318 | Let's have a look at how wealth evolves under this rule. 319 | 320 | .. code-block:: ipython 321 | 322 | fig, ax = plt.subplots() 323 | 324 | T = 100 325 | w = np.empty(T) 326 | w[0] = 5 327 | for t in range(T-1): 328 | w[t+1] = h(w[t]) 329 | 330 | ax.plot(w) 331 | ax.set_xlabel('$t$', fontsize=12) 332 | ax.set_ylabel('$w_{t}$', fontsize=12) 333 | plt.show() 334 | 335 | Now let's suppose that we have a large population of households and we want to 336 | know what median wealth will be. 337 | 338 | This is not easy to solve with pencil and paper, so we will use simulation 339 | instead. 340 | 341 | In particular, we will simulate a large number of households and then 342 | calculate median wealth for this group. 343 | 344 | Suppose we are interested in the long-run average of this median over time. 345 | 346 | It turns out that, for the specification that we've chosen above, we can 347 | calculate this by taking a one-period snapshot of what has happened to median 348 | wealth of the group at the end of a long simulation. 349 | 350 | Moreover, provided the simulation period is long enough, initial conditions 351 | don't matter. 352 | 353 | * This is due to something called ergodicity, which we will discuss `later on `_. 354 | 355 | So, in summary, we are going to simulate 50,000 households by 356 | 357 | #. arbitrarily setting initial wealth to 1 and 358 | 359 | #. simulating forward in time for 1,000 periods. 360 | 361 | Then we'll calculate median wealth at the end period. 362 | 363 | Here's the code: 364 | 365 | .. code-block:: ipython 366 | 367 | @njit 368 | def compute_long_run_median(w0=1, T=1000, num_reps=50_000): 369 | 370 | obs = np.empty(num_reps) 371 | for i in range(num_reps): 372 | w = w0 373 | for t in range(T): 374 | w = h(w) 375 | obs[i] = w 376 | 377 | return np.median(obs) 378 | 379 | Let's see how fast this runs: 380 | 381 | .. code-block:: ipython 382 | 383 | %%time 384 | compute_long_run_median() 385 | 386 | 387 | To speed this up, we're going to parallelize it via multithreading. 388 | 389 | To do so, we add the ``parallel=True`` flag and change ``range`` to ``prange``: 390 | 391 | .. code-block:: ipython 392 | 393 | from numba import prange 394 | 395 | @njit(parallel=True) 396 | def compute_long_run_median_parallel(w0=1, T=1000, num_reps=50_000): 397 | 398 | obs = np.empty(num_reps) 399 | for i in prange(num_reps): 400 | w = w0 401 | for t in range(T): 402 | w = h(w) 403 | obs[i] = w 404 | 405 | return np.median(obs) 406 | 407 | Let's look at the timing: 408 | 409 | .. code-block:: ipython 410 | 411 | %%time 412 | compute_long_run_median_parallel() 413 | 414 | The speed-up is significant. 415 | 416 | A Warning 417 | --------- 418 | 419 | Parallelization works well in the outer loop of the last example because the individual tasks inside the loop are independent of each other. 420 | 421 | If this independence fails then parallelization is often problematic. 422 | 423 | For example, each step inside the inner loop depends on the last step, so 424 | independence fails, and this is why we use ordinary ``range`` instead of ``prange``. 425 | 426 | When you see us using ``prange`` in later lectures, it is because the 427 | independence of tasks holds true. 428 | 429 | When you see us using ordinary ``range`` in a jitted function, it is either because the speed gain from parallelization is small or because independence fails. 430 | 431 | .. Dask 432 | 433 | .. To be added. 434 | 435 | 436 | .. GPUs 437 | 438 | .. Just say a few words about them. How do they relate to the foregoing? Explain that we can't introduce executable GPU code here. 439 | 440 | 441 | Exercises 442 | ========= 443 | 444 | Exercise 1 445 | ---------- 446 | 447 | In :ref:`an earlier exercise `, we used Numba to accelerate an 448 | effort to compute the constant :math:`\pi` by Monte Carlo. 449 | 450 | Now try adding parallelization and see if you get further speed gains. 451 | 452 | You should not expect huge gains here because, while there are many 453 | independent tasks (draw point and test if in circle), each one has low 454 | execution time. 455 | 456 | Generally speaking, parallelization is less effective when the individual 457 | tasks to be parallelized are very small relative to total execution time. 458 | 459 | This is due to overheads associated with spreading all of these small tasks across multiple CPUs. 460 | 461 | Nevertheless, with suitable hardware, it is possible to get nontrivial speed gains in this exercise. 462 | 463 | For the size of the Monte Carlo simulation, use something substantial, such as 464 | ``n = 100_000_000``. 465 | 466 | 467 | Solutions 468 | ========= 469 | 470 | Exercise 1 471 | ---------- 472 | 473 | Here is one solution: 474 | 475 | .. code-block:: python3 476 | 477 | from random import uniform 478 | 479 | @njit(parallel=True) 480 | def calculate_pi(n=1_000_000): 481 | count = 0 482 | for i in prange(n): 483 | u, v = uniform(0, 1), uniform(0, 1) 484 | d = np.sqrt((u - 0.5)**2 + (v - 0.5)**2) 485 | if d < 0.5: 486 | count += 1 487 | 488 | area_estimate = count / n 489 | return area_estimate * 4 # dividing by radius**2 490 | 491 | Now let's see how fast it runs: 492 | 493 | .. code-block:: ipython3 494 | 495 | %time calculate_pi() 496 | 497 | .. code-block:: ipython3 498 | 499 | %time calculate_pi() 500 | 501 | By switching parallelization on and off (selecting ``True`` or 502 | ``False`` in the ``@njit`` annotation), we can test the speed gain that 503 | multithreading provides on top of JIT compilation. 504 | 505 | On our workstation, we find that parallelization increases execution speed by 506 | a factor of 2 or 3. 507 | 508 | (If you are executing locally, you will get different numbers, depending mainly 509 | on the number of CPUs on your machine.) 510 | 511 | 512 | 513 | 514 | -------------------------------------------------------------------------------- /source_rst/troubleshooting.rst: -------------------------------------------------------------------------------- 1 | .. _troubleshooting: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | .. highlight:: python3 6 | 7 | *************** 8 | Troubleshooting 9 | *************** 10 | 11 | .. contents:: :depth: 2 12 | 13 | This page is for readers experiencing errors when running the code from the lectures. 14 | 15 | Fixing Your Local Environment 16 | ============================== 17 | 18 | The basic assumption of the lectures is that code in a lecture should execute whenever 19 | 20 | #. it is executed in a Jupyter notebook and 21 | 22 | #. the notebook is running on a machine with the latest version of Anaconda Python. 23 | 24 | You have installed Anaconda, haven't you, following the instructions in :doc:`this lecture `? 25 | 26 | Assuming that you have, the most common source of problems for our readers is that their Anaconda distribution is not up to date. 27 | 28 | `Here's a useful article `__ 29 | on how to update Anaconda. 30 | 31 | Another option is to simply remove Anaconda and reinstall. 32 | 33 | You also need to keep the external code libraries, such as `QuantEcon.py 34 | `__ up to date. 35 | 36 | For this task you can either 37 | 38 | * use `pip install --upgrade quantecon` on the command line, or 39 | 40 | * execute `!pip install --upgrade quantecon` within a Jupyter notebook. 41 | 42 | If your local environment is still not working you can do two things. 43 | 44 | First, you can use a remote machine instead, by clicking on the `Launch Notebook` icon available for each lecture 45 | 46 | .. image:: _static/lecture_specific/troubleshooting/launch.png 47 | 48 | Second, you can report an issue, so we can try to fix your local set up. 49 | 50 | We like getting feedback on the lectures so please don't hesitate to get in 51 | touch. 52 | 53 | Reporting an Issue 54 | =================== 55 | 56 | One way to give feedback is to raise an issue through our `issue tracker 57 | `__. 58 | 59 | Please be as specific as possible. Tell us where the problem is and as much 60 | detail about your local set up as you can provide. 61 | 62 | Another feedback option is to use our `discourse forum `__. 63 | 64 | Finally, you can provide direct feedback to contact@quantecon.org 65 | 66 | -------------------------------------------------------------------------------- /source_rst/writing_good_code.rst: -------------------------------------------------------------------------------- 1 | .. _writing_good_code: 2 | 3 | .. include:: /_static/includes/header.raw 4 | 5 | .. highlight:: python3 6 | 7 | ***************** 8 | Writing Good Code 9 | ***************** 10 | 11 | .. index:: 12 | single: Models; Code style 13 | 14 | .. contents:: :depth: 2 15 | 16 | 17 | 18 | Overview 19 | ======== 20 | 21 | When computer programs are small, poorly written code is not overly costly. 22 | 23 | But more data, more sophisticated models, and more computer power are enabling us to take on more challenging problems that involve writing longer programs. 24 | 25 | For such programs, investment in good coding practices will pay high returns. 26 | 27 | The main payoffs are higher productivity and faster code. 28 | 29 | In this lecture, we review some elements of good coding practice. 30 | 31 | We also touch on modern developments in scientific computing --- such as just in time compilation --- and how they affect good program design. 32 | 33 | 34 | 35 | 36 | An Example of Poor Code 37 | ======================= 38 | 39 | Let's have a look at some poorly written code. 40 | 41 | The job of the code is to generate and plot time series of the simplified Solow model 42 | 43 | .. math:: 44 | :label: gc_solmod 45 | 46 | k_{t+1} = s k_t^{\alpha} + (1 - \delta) k_t, 47 | \quad t = 0, 1, 2, \ldots 48 | 49 | 50 | Here 51 | 52 | * :math:`k_t` is capital at time :math:`t` and 53 | 54 | * :math:`s, \alpha, \delta` are parameters (savings, a productivity parameter and depreciation) 55 | 56 | For each parameterization, the code 57 | 58 | #. sets :math:`k_0 = 1` 59 | 60 | #. iterates using :eq:`gc_solmod` to produce a sequence :math:`k_0, k_1, k_2 \ldots , k_T` 61 | 62 | #. plots the sequence 63 | 64 | The plots will be grouped into three subfigures. 65 | 66 | In each subfigure, two parameters are held fixed while another varies 67 | 68 | .. code-block:: ipython 69 | 70 | import numpy as np 71 | import matplotlib.pyplot as plt 72 | %matplotlib inline 73 | 74 | # Allocate memory for time series 75 | k = np.empty(50) 76 | 77 | fig, axes = plt.subplots(3, 1, figsize=(6, 14)) 78 | 79 | # Trajectories with different α 80 | δ = 0.1 81 | s = 0.4 82 | α = (0.25, 0.33, 0.45) 83 | 84 | for j in range(3): 85 | k[0] = 1 86 | for t in range(49): 87 | k[t+1] = s * k[t]**α[j] + (1 - δ) * k[t] 88 | axes[0].plot(k, 'o-', label=rf"$\alpha = {α[j]},\; s = {s},\; \delta={δ}$") 89 | 90 | axes[0].grid(lw=0.2) 91 | axes[0].set_ylim(0, 18) 92 | axes[0].set_xlabel('time') 93 | axes[0].set_ylabel('capital') 94 | axes[0].legend(loc='upper left', frameon=True) 95 | 96 | # Trajectories with different s 97 | δ = 0.1 98 | α = 0.33 99 | s = (0.3, 0.4, 0.5) 100 | 101 | for j in range(3): 102 | k[0] = 1 103 | for t in range(49): 104 | k[t+1] = s[j] * k[t]**α + (1 - δ) * k[t] 105 | axes[1].plot(k, 'o-', label=rf"$\alpha = {α},\; s = {s[j]},\; \delta={δ}$") 106 | 107 | axes[1].grid(lw=0.2) 108 | axes[1].set_xlabel('time') 109 | axes[1].set_ylabel('capital') 110 | axes[1].set_ylim(0, 18) 111 | axes[1].legend(loc='upper left', frameon=True) 112 | 113 | # Trajectories with different δ 114 | δ = (0.05, 0.1, 0.15) 115 | α = 0.33 116 | s = 0.4 117 | 118 | for j in range(3): 119 | k[0] = 1 120 | for t in range(49): 121 | k[t+1] = s * k[t]**α + (1 - δ[j]) * k[t] 122 | axes[2].plot(k, 'o-', label=rf"$\alpha = {α},\; s = {s},\; \delta={δ[j]}$") 123 | 124 | axes[2].set_ylim(0, 18) 125 | axes[2].set_xlabel('time') 126 | axes[2].set_ylabel('capital') 127 | axes[2].grid(lw=0.2) 128 | axes[2].legend(loc='upper left', frameon=True) 129 | 130 | plt.show() 131 | 132 | 133 | True, the code more or less follows `PEP8 `__. 134 | 135 | At the same time, it's very poorly structured. 136 | 137 | Let's talk about why that's the case, and what we can do about it. 138 | 139 | 140 | Good Coding Practice 141 | ==================== 142 | 143 | There are usually many different ways to write a program that accomplishes a given task. 144 | 145 | For small programs, like the one above, the way you write code doesn't matter too much. 146 | 147 | But if you are ambitious and want to produce useful things, you'll write medium to large programs too. 148 | 149 | In those settings, coding style matters **a great deal**. 150 | 151 | Fortunately, lots of smart people have thought about the best way to write code. 152 | 153 | Here are some basic precepts. 154 | 155 | 156 | 157 | Don't Use Magic Numbers 158 | ----------------------- 159 | 160 | If you look at the code above, you'll see numbers like ``50`` and ``49`` and ``3`` scattered through the code. 161 | 162 | These kinds of numeric literals in the body of your code are sometimes called "magic numbers". 163 | 164 | This is not a compliment. 165 | 166 | While numeric literals are not all evil, the numbers shown in the program above 167 | should certainly be replaced by named constants. 168 | 169 | For example, the code above could declare the variable ``time_series_length = 50``. 170 | 171 | Then in the loops, ``49`` should be replaced by ``time_series_length - 1``. 172 | 173 | The advantages are: 174 | 175 | * the meaning is much clearer throughout 176 | 177 | * to alter the time series length, you only need to change one value 178 | 179 | 180 | Don't Repeat Yourself 181 | --------------------- 182 | 183 | The other mortal sin in the code snippet above is repetition. 184 | 185 | Blocks of logic (such as the loop to generate time series) are repeated with only minor changes. 186 | 187 | This violates a fundamental tenet of programming: Don't repeat yourself (DRY). 188 | 189 | * Also called DIE (duplication is evil). 190 | 191 | Yes, we realize that you can just cut and paste and change a few symbols. 192 | 193 | But as a programmer, your aim should be to **automate** repetition, **not** do it yourself. 194 | 195 | More importantly, repeating the same logic in different places means that eventually one of them will likely be wrong. 196 | 197 | If you want to know more, read the excellent summary found on `this page `__. 198 | 199 | We'll talk about how to avoid repetition below. 200 | 201 | 202 | Minimize Global Variables 203 | ------------------------- 204 | 205 | Sure, global variables (i.e., names assigned to values outside of any function or class) are convenient. 206 | 207 | Rookie programmers typically use global variables with abandon --- as we once did ourselves. 208 | 209 | But global variables are dangerous, especially in medium to large size programs, since 210 | 211 | * they can affect what happens in any part of your program 212 | 213 | * they can be changed by any function 214 | 215 | This makes it much harder to be certain about what some small part of a given piece of code actually commands. 216 | 217 | Here's a `useful discussion on the topic `__. 218 | 219 | While the odd global in small scripts is no big deal, we recommend that you teach yourself to avoid them. 220 | 221 | (We'll discuss how just below). 222 | 223 | 224 | JIT Compilation 225 | ^^^^^^^^^^^^^^^ 226 | 227 | For scientific computing, there is another good reason to avoid global variables. 228 | 229 | As :doc:`we've seen in previous lectures `, JIT compilation can generate excellent performance for scripting languages like Python. 230 | 231 | But the task of the compiler used for JIT compilation becomes harder when global variables are present. 232 | 233 | Put differently, the type inference required for JIT compilation is safer and 234 | more effective when variables are sandboxed inside a function. 235 | 236 | 237 | Use Functions or Classes 238 | ------------------------ 239 | 240 | Fortunately, we can easily avoid the evils of global variables and WET code. 241 | 242 | * WET stands for "we enjoy typing" and is the opposite of DRY. 243 | 244 | We can do this by making frequent use of functions or classes. 245 | 246 | In fact, functions and classes are designed specifically to help us avoid shaming ourselves by repeating code or excessive use of global variables. 247 | 248 | 249 | Which One, Functions or Classes? 250 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 251 | 252 | Both can be useful, and in fact they work well with each other. 253 | 254 | We'll learn more about these topics over time. 255 | 256 | (Personal preference is part of the story too) 257 | 258 | What's really important is that you use one or the other or both. 259 | 260 | 261 | 262 | Revisiting the Example 263 | ====================== 264 | 265 | Here's some code that reproduces the plot above with better coding style. 266 | 267 | 268 | .. code-block:: python3 269 | 270 | from itertools import product 271 | 272 | def plot_path(ax, αs, s_vals, δs, time_series_length=50): 273 | """ 274 | Add a time series plot to the axes ax for all given parameters. 275 | """ 276 | k = np.empty(time_series_length) 277 | 278 | for (α, s, δ) in product(αs, s_vals, δs): 279 | k[0] = 1 280 | for t in range(time_series_length-1): 281 | k[t+1] = s * k[t]**α + (1 - δ) * k[t] 282 | ax.plot(k, 'o-', label=rf"$\alpha = {α},\; s = {s},\; \delta = {δ}$") 283 | 284 | ax.set_xlabel('time') 285 | ax.set_ylabel('capital') 286 | ax.set_ylim(0, 18) 287 | ax.legend(loc='upper left', frameon=True) 288 | 289 | fig, axes = plt.subplots(3, 1, figsize=(6, 14)) 290 | 291 | # Parameters (αs, s_vals, δs) 292 | set_one = ([0.25, 0.33, 0.45], [0.4], [0.1]) 293 | set_two = ([0.33], [0.3, 0.4, 0.5], [0.1]) 294 | set_three = ([0.33], [0.4], [0.05, 0.1, 0.15]) 295 | 296 | for (ax, params) in zip(axes, (set_one, set_two, set_three)): 297 | αs, s_vals, δs = params 298 | plot_path(ax, αs, s_vals, δs) 299 | 300 | plt.show() 301 | 302 | 303 | If you inspect this code, you will see that 304 | 305 | * it uses a function to avoid repetition. 306 | * Global variables are quarantined by collecting them together at the end, not the start of the program. 307 | * Magic numbers are avoided. 308 | * The loop at the end where the actual work is done is short and relatively simple. 309 | 310 | Exercises 311 | ========= 312 | 313 | Exercise 1 314 | ---------- 315 | 316 | Here is some code that needs improving. 317 | 318 | It involves a basic supply and demand problem. 319 | 320 | Supply is given by 321 | 322 | .. math:: q_s(p) = \exp(\alpha p) - \beta. 323 | 324 | The demand curve is 325 | 326 | .. math:: q_d(p) = \gamma p^{-\delta}. 327 | 328 | The values :math:`\alpha`, :math:`\beta`, :math:`\gamma` and 329 | :math:`\delta` are **parameters** 330 | 331 | The equilibrium :math:`p^*` is the price such that 332 | :math:`q_d(p) = q_s(p)`. 333 | 334 | We can solve for this equilibrium using a root finding algorithm. 335 | Specifically, we will find the :math:`p` such that :math:`h(p) = 0`, 336 | where 337 | 338 | .. math:: h(p) := q_d(p) - q_s(p) 339 | 340 | This yields the equilibrium price :math:`p^*`. From this we get the 341 | equilibrium price by :math:`q^* = q_s(p^*)` 342 | 343 | The parameter values will be 344 | 345 | - :math:`\alpha = 0.1` 346 | - :math:`\beta = 1` 347 | - :math:`\gamma = 1` 348 | - :math:`\delta = 1` 349 | 350 | .. code:: ipython3 351 | 352 | from scipy.optimize import brentq 353 | 354 | # Compute equilibrium 355 | def h(p): 356 | return p**(-1) - (np.exp(0.1 * p) - 1) # demand - supply 357 | 358 | p_star = brentq(h, 2, 4) 359 | q_star = np.exp(0.1 * p_star) - 1 360 | 361 | print(f'Equilibrium price is {p_star: .2f}') 362 | print(f'Equilibrium quantity is {q_star: .2f}') 363 | 364 | Let's also plot our results. 365 | 366 | .. code:: ipython3 367 | 368 | # Now plot 369 | grid = np.linspace(2, 4, 100) 370 | fig, ax = plt.subplots() 371 | 372 | qs = np.exp(0.1 * grid) - 1 373 | qd = grid**(-1) 374 | 375 | 376 | ax.plot(grid, qd, 'b-', lw=2, label='demand') 377 | ax.plot(grid, qs, 'g-', lw=2, label='supply') 378 | 379 | ax.set_xlabel('price') 380 | ax.set_ylabel('quantity') 381 | ax.legend(loc='upper center') 382 | 383 | plt.show() 384 | 385 | We also want to consider supply and demand shifts. 386 | 387 | For example, let's see what happens when demand shifts up, with :math:`\gamma` increasing to :math:`1.25`: 388 | 389 | .. code:: ipython3 390 | 391 | # Compute equilibrium 392 | def h(p): 393 | return 1.25 * p**(-1) - (np.exp(0.1 * p) - 1) 394 | 395 | p_star = brentq(h, 2, 4) 396 | q_star = np.exp(0.1 * p_star) - 1 397 | 398 | print(f'Equilibrium price is {p_star: .2f}') 399 | print(f'Equilibrium quantity is {q_star: .2f}') 400 | 401 | .. code:: ipython3 402 | 403 | # Now plot 404 | p_grid = np.linspace(2, 4, 100) 405 | fig, ax = plt.subplots() 406 | 407 | qs = np.exp(0.1 * p_grid) - 1 408 | qd = 1.25 * p_grid**(-1) 409 | 410 | 411 | ax.plot(grid, qd, 'b-', lw=2, label='demand') 412 | ax.plot(grid, qs, 'g-', lw=2, label='supply') 413 | 414 | ax.set_xlabel('price') 415 | ax.set_ylabel('quantity') 416 | ax.legend(loc='upper center') 417 | 418 | plt.show() 419 | 420 | 421 | Now we might consider supply shifts, but you already get the idea that there's 422 | a lot of repeated code here. 423 | 424 | Refactor and improve clarity in the code above using the principles discussed 425 | in this lecture. 426 | 427 | 428 | 429 | Solutions 430 | ========= 431 | 432 | Exercise 1 433 | ---------- 434 | 435 | Here's one solution, that uses a class: 436 | 437 | 438 | .. code:: ipython3 439 | 440 | class Equilibrium: 441 | 442 | def __init__(self, α=0.1, β=1, γ=1, δ=1): 443 | self.α, self.β, self.γ, self.δ = α, β, γ, δ 444 | 445 | def qs(self, p): 446 | return np.exp(self.α * p) - self.β 447 | 448 | def qd(self, p): 449 | return self.γ * p**(-self.δ) 450 | 451 | def compute_equilibrium(self): 452 | def h(p): 453 | return self.qd(p) - self.qs(p) 454 | p_star = brentq(h, 2, 4) 455 | q_star = np.exp(self.α * p_star) - self.β 456 | 457 | print(f'Equilibrium price is {p_star: .2f}') 458 | print(f'Equilibrium quantity is {q_star: .2f}') 459 | 460 | def plot_equilibrium(self): 461 | # Now plot 462 | grid = np.linspace(2, 4, 100) 463 | fig, ax = plt.subplots() 464 | 465 | ax.plot(grid, self.qd(grid), 'b-', lw=2, label='demand') 466 | ax.plot(grid, self.qs(grid), 'g-', lw=2, label='supply') 467 | 468 | ax.set_xlabel('price') 469 | ax.set_ylabel('quantity') 470 | ax.legend(loc='upper center') 471 | 472 | plt.show() 473 | 474 | Let's create an instance at the default parameter values. 475 | 476 | .. code:: ipython3 477 | 478 | eq = Equilibrium() 479 | 480 | Now we'll compute the equilibrium and plot it. 481 | 482 | .. code:: ipython3 483 | 484 | eq.compute_equilibrium() 485 | 486 | .. code:: ipython3 487 | 488 | eq.plot_equilibrium() 489 | 490 | One of the nice things about our refactored code is that, when we change 491 | parameters, we don't need to repeat ourselves: 492 | 493 | .. code:: ipython3 494 | 495 | eq.γ = 1.25 496 | 497 | .. code:: ipython3 498 | 499 | eq.compute_equilibrium() 500 | 501 | .. code:: ipython3 502 | 503 | eq.plot_equilibrium() 504 | 505 | --------------------------------------------------------------------------------