├── docs ├── index.md ├── basics │ └── images │ │ └── distributions.png ├── glossary.md ├── advanced │ ├── diagnostics.ipynb │ └── regression.ipynb └── pandoc.css ├── src ├── bayes_tutorial │ ├── __init__.py │ ├── solutions │ │ ├── __init__.py │ │ ├── inference.py │ │ ├── estimation.py │ │ ├── probability.py │ │ ├── simulation.py │ │ └── hierarchical.py │ └── data.py └── setup.py ├── .dockerignore ├── data ├── .gitignore ├── manuscirpt_figures_larger_bins.xlsx ├── baseballdb │ ├── README.txt │ └── core │ │ ├── TeamsHalf.csv │ │ ├── ManagersHalf.csv │ │ ├── TeamsFranchises.csv │ │ ├── AwardsManagers.csv │ │ └── SeriesPost.csv ├── microtubules_14.csv ├── Prussion Horse-Kick Data.csv ├── iq.csv ├── finch_beaks_2012.csv ├── sterilization.csv ├── sanitization.csv └── finch_beaks_1975.csv ├── .gitattributes ├── images ├── badchains.png ├── Galton_box.jpg ├── coin_model.pdf ├── pydata-jake.png ├── bacteria_model.jpg ├── bacteria_model.pdf ├── baseball-model.jpg ├── baseball-model.pdf ├── kruschke_model.jpg ├── kruschke_model.pdf ├── saturn-laplace.png ├── darwins-finches-model.jpg ├── darwins-finches-model.pdf ├── radioactive-decay-model.jpg ├── radioactive-decay-model.pdf ├── baseball-hierarchical-model.jpg ├── baseball-hierarchical-model.pdf ├── Exponential_probability_density.png ├── darwins-finches-hierarchical-model.jpg └── darwins-finches-hierarchical-model.pdf ├── notebooks ├── matplotlibrc ├── SciPy-2020 │ ├── matplotlibrc │ ├── utils.py │ └── data.py ├── SciPy-2021 │ ├── matplotlibrc │ ├── utils.py │ └── data.py ├── SciPy-2022 │ ├── matplotlibrc │ ├── utils.py │ ├── Untitled.ipynb │ └── data.py ├── ODSC-East-2020-04-14 │ ├── matplotlibrc │ ├── utils.py │ └── data.py ├── ODSC-Europe-2021-06-08 │ ├── matplotlibrc │ ├── utils.py │ └── data.py ├── URGsADS-NYC-2020-02-19 │ ├── matplotlibrc │ ├── utils.py │ └── data.py ├── utils.py ├── data.py ├── archive │ ├── 07-student-hierarchical-finches.ipynb │ ├── 07-instructor-hierarchical-finches.ipynb │ ├── 05-student-two-group-comparison-finches.ipynb │ └── extra-practice-student-multi-group-comparsion-sterilization.ipynb └── 05-student-bayesian-curve-regression.ipynb ├── .devcontainer ├── noop.txt ├── devcontainer.json └── Dockerfile ├── .azure-pipelines ├── templates │ ├── nb-docs-win.yml │ ├── nb-docs-nix.yml │ ├── setup-script-win.yml │ └── setup-script-nix.yml ├── macos.yml ├── linux.yml └── azure-pipelines.yml ├── pyproject.toml ├── .pre-commit-config.yaml ├── checkenv.py ├── binder └── environment.yml ├── reminders.md ├── Dockerfile ├── scripts ├── make_iq.py └── ice_cream_shop_simulator.ipynb ├── LICENSE ├── .gitignore ├── .travis.yml ├── mkdocs.yml └── README.md /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome! 2 | -------------------------------------------------------------------------------- /src/bayes_tutorial/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/bayes_tutorial/solutions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .dockerignore 2 | Dockerfile 3 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | ice_cream_shop_hierarchical_posterior.nc 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb filter=nbstripout 2 | 3 | *.ipynb diff=ipynb 4 | -------------------------------------------------------------------------------- /images/badchains.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/badchains.png -------------------------------------------------------------------------------- /images/Galton_box.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/Galton_box.jpg -------------------------------------------------------------------------------- /images/coin_model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/coin_model.pdf -------------------------------------------------------------------------------- /images/pydata-jake.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/pydata-jake.png -------------------------------------------------------------------------------- /images/bacteria_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/bacteria_model.jpg -------------------------------------------------------------------------------- /images/bacteria_model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/bacteria_model.pdf -------------------------------------------------------------------------------- /images/baseball-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/baseball-model.jpg -------------------------------------------------------------------------------- /images/baseball-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/baseball-model.pdf -------------------------------------------------------------------------------- /images/kruschke_model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/kruschke_model.jpg -------------------------------------------------------------------------------- /images/kruschke_model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/kruschke_model.pdf -------------------------------------------------------------------------------- /images/saturn-laplace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/saturn-laplace.png -------------------------------------------------------------------------------- /images/darwins-finches-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/darwins-finches-model.jpg -------------------------------------------------------------------------------- /images/darwins-finches-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/darwins-finches-model.pdf -------------------------------------------------------------------------------- /images/radioactive-decay-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/radioactive-decay-model.jpg -------------------------------------------------------------------------------- /images/radioactive-decay-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/radioactive-decay-model.pdf -------------------------------------------------------------------------------- /docs/basics/images/distributions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/docs/basics/images/distributions.png -------------------------------------------------------------------------------- /images/baseball-hierarchical-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/baseball-hierarchical-model.jpg -------------------------------------------------------------------------------- /images/baseball-hierarchical-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/baseball-hierarchical-model.pdf -------------------------------------------------------------------------------- /data/manuscirpt_figures_larger_bins.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/data/manuscirpt_figures_larger_bins.xlsx -------------------------------------------------------------------------------- /images/Exponential_probability_density.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/Exponential_probability_density.png -------------------------------------------------------------------------------- /images/darwins-finches-hierarchical-model.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/darwins-finches-hierarchical-model.jpg -------------------------------------------------------------------------------- /images/darwins-finches-hierarchical-model.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ericmjl/bayesian-stats-modelling-tutorial/HEAD/images/darwins-finches-hierarchical-model.pdf -------------------------------------------------------------------------------- /notebooks/matplotlibrc: -------------------------------------------------------------------------------- 1 | axes.spines.left : True # display axis spines 2 | axes.spines.bottom : True 3 | axes.spines.top : False 4 | axes.spines.right : False 5 | 6 | -------------------------------------------------------------------------------- /notebooks/SciPy-2020/matplotlibrc: -------------------------------------------------------------------------------- 1 | axes.spines.left : True # display axis spines 2 | axes.spines.bottom : True 3 | axes.spines.top : False 4 | axes.spines.right : False 5 | 6 | -------------------------------------------------------------------------------- /notebooks/SciPy-2021/matplotlibrc: -------------------------------------------------------------------------------- 1 | axes.spines.left : True # display axis spines 2 | axes.spines.bottom : True 3 | axes.spines.top : False 4 | axes.spines.right : False 5 | 6 | -------------------------------------------------------------------------------- /notebooks/SciPy-2022/matplotlibrc: -------------------------------------------------------------------------------- 1 | axes.spines.left : True # display axis spines 2 | axes.spines.bottom : True 3 | axes.spines.top : False 4 | axes.spines.right : False 5 | 6 | -------------------------------------------------------------------------------- /notebooks/ODSC-East-2020-04-14/matplotlibrc: -------------------------------------------------------------------------------- 1 | axes.spines.left : True # display axis spines 2 | axes.spines.bottom : True 3 | axes.spines.top : False 4 | axes.spines.right : False 5 | 6 | -------------------------------------------------------------------------------- /notebooks/ODSC-Europe-2021-06-08/matplotlibrc: -------------------------------------------------------------------------------- 1 | axes.spines.left : True # display axis spines 2 | axes.spines.bottom : True 3 | axes.spines.top : False 4 | axes.spines.right : False 5 | 6 | -------------------------------------------------------------------------------- /notebooks/URGsADS-NYC-2020-02-19/matplotlibrc: -------------------------------------------------------------------------------- 1 | axes.spines.left : True # display axis spines 2 | axes.spines.bottom : True 3 | axes.spines.top : False 4 | axes.spines.right : False 5 | 6 | -------------------------------------------------------------------------------- /.devcontainer/noop.txt: -------------------------------------------------------------------------------- 1 | This file is copied into the container along with environment.yml* from the 2 | parent folder. This is done to prevent the Dockerfile COPY instruction from 3 | failing if no environment.yml is found. 4 | -------------------------------------------------------------------------------- /src/setup.py: -------------------------------------------------------------------------------- 1 | """Setup script.""" 2 | import os 3 | from setuptools import setup, find_packages 4 | 5 | setup( 6 | # mandatory 7 | name="bayes_tutorial", 8 | # mandatory 9 | version="0.1", 10 | # mandatory 11 | author="Eric J. Ma, Hugo Bowne-Anderson", 12 | ) 13 | -------------------------------------------------------------------------------- /docs/glossary.md: -------------------------------------------------------------------------------- 1 | ## Random Processes 2 | 3 | A "random process" is a _stochastic (as opposed to _deterministic) sequence of outcome-producing events/trials_. 4 | 5 | An example is a series of coin flips. 6 | It is _stochastic_, because we are unable to deterministically control 7 | what the result of the coin flip (_event/trial_) is, 8 | and over multiple coin flips (events/trial), we get back a sequence of heads and tails (outcomes). 9 | -------------------------------------------------------------------------------- /notebooks/SciPy-2020/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ECDF(data): 5 | x = np.sort(data) 6 | y = np.cumsum(x) / np.sum(x) 7 | 8 | return x, y 9 | 10 | 11 | def despine(ax): 12 | ax.spines['right'].set_visible(False) 13 | ax.spines['top'].set_visible(False) 14 | 15 | 16 | def despine_traceplot(traceplot): 17 | for row in traceplot: 18 | for ax in row: 19 | despine(ax) -------------------------------------------------------------------------------- /notebooks/SciPy-2021/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ECDF(data): 5 | x = np.sort(data) 6 | y = np.cumsum(x) / np.sum(x) 7 | 8 | return x, y 9 | 10 | 11 | def despine(ax): 12 | ax.spines['right'].set_visible(False) 13 | ax.spines['top'].set_visible(False) 14 | 15 | 16 | def despine_traceplot(traceplot): 17 | for row in traceplot: 18 | for ax in row: 19 | despine(ax) -------------------------------------------------------------------------------- /notebooks/SciPy-2022/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ECDF(data): 5 | x = np.sort(data) 6 | y = np.cumsum(x) / np.sum(x) 7 | 8 | return x, y 9 | 10 | 11 | def despine(ax): 12 | ax.spines['right'].set_visible(False) 13 | ax.spines['top'].set_visible(False) 14 | 15 | 16 | def despine_traceplot(traceplot): 17 | for row in traceplot: 18 | for ax in row: 19 | despine(ax) -------------------------------------------------------------------------------- /.azure-pipelines/templates/nb-docs-win.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | activate bayesian-modelling-tutorial 4 | mkdir -p docs/notebooks 5 | jupyter nbconvert --config nbconvert_config.py --execute --template full 6 | displayName: 'Convert notebooks to HTML' 7 | 8 | - script: | 9 | activate bayesian-modelling-tutorial 10 | pandoc README.md -o docs/index.html -c static/pandoc.css -s 11 | displayName: "Make index page." 12 | -------------------------------------------------------------------------------- /notebooks/ODSC-East-2020-04-14/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ECDF(data): 5 | x = np.sort(data) 6 | y = np.cumsum(x) / np.sum(x) 7 | 8 | return x, y 9 | 10 | 11 | def despine(ax): 12 | ax.spines['right'].set_visible(False) 13 | ax.spines['top'].set_visible(False) 14 | 15 | 16 | def despine_traceplot(traceplot): 17 | for row in traceplot: 18 | for ax in row: 19 | despine(ax) -------------------------------------------------------------------------------- /notebooks/ODSC-Europe-2021-06-08/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ECDF(data): 5 | x = np.sort(data) 6 | y = np.cumsum(x) / np.sum(x) 7 | 8 | return x, y 9 | 10 | 11 | def despine(ax): 12 | ax.spines['right'].set_visible(False) 13 | ax.spines['top'].set_visible(False) 14 | 15 | 16 | def despine_traceplot(traceplot): 17 | for row in traceplot: 18 | for ax in row: 19 | despine(ax) -------------------------------------------------------------------------------- /notebooks/URGsADS-NYC-2020-02-19/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ECDF(data): 5 | x = np.sort(data) 6 | y = np.cumsum(x) / np.sum(x) 7 | 8 | return x, y 9 | 10 | 11 | def despine(ax): 12 | ax.spines['right'].set_visible(False) 13 | ax.spines['top'].set_visible(False) 14 | 15 | 16 | def despine_traceplot(traceplot): 17 | for row in traceplot: 18 | for ax in row: 19 | despine(ax) -------------------------------------------------------------------------------- /.azure-pipelines/templates/nb-docs-nix.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | source activate bayesian-modelling-tutorial 4 | mkdir -p docs/notebooks 5 | jupyter nbconvert --config nbconvert_config.py --execute --template full 6 | displayName: 'Convert notebooks to HTML' 7 | 8 | - script: | 9 | source activate bayesian-modelling-tutorial 10 | pandoc README.md -o docs/index.html -c static/pandoc.css -s 11 | displayName: "Make index page." 12 | -------------------------------------------------------------------------------- /.azure-pipelines/templates/setup-script-win.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | conda env create -f binder/environment.yml 4 | activate bayesian-modelling-tutorial 5 | conda install -y python=$(python.version) 6 | python -m ipykernel install --user --name bayesian-modelling-tutorial 7 | displayName: 'Create environment, install correct Python, and activate kernel.' 8 | 9 | - script: | 10 | activate bayesian-modelling-tutorial 11 | conda list 12 | displayName: 'Display all packages, for diagnostic purposes.' 13 | -------------------------------------------------------------------------------- /.azure-pipelines/templates/setup-script-nix.yml: -------------------------------------------------------------------------------- 1 | steps: 2 | - script: | 3 | conda env create -f binder/environment.yml 4 | source activate bayesian-modelling-tutorial 5 | conda install -y python=$(python.version) 6 | python -m ipykernel install --user --name bayesian-modelling-tutorial 7 | displayName: 'Create environment, install correct Python, and activate kernel.' 8 | 9 | - script: | 10 | source activate bayesian-modelling-tutorial 11 | conda list 12 | displayName: 'Display all packages, for diagnostic purposes.' 13 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.black] 2 | line-length = 79 3 | target-version = ['py38'] 4 | include = '\.pyi?$' 5 | exclude = ''' 6 | 7 | ( 8 | /( 9 | \.eggs # exclude a few common directories in the 10 | | \.git # root of the project 11 | | \.hg 12 | | \.mypy_cache 13 | | \.tox 14 | | \.venv 15 | | _build 16 | | buck-out 17 | | build 18 | | dist 19 | )/ 20 | | foo.py # also separately exclude a file named foo.py in 21 | # the root of the project 22 | ) 23 | ''' 24 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v2.4.0 6 | hooks: 7 | - id: trailing-whitespace 8 | - id: end-of-file-fixer 9 | - id: check-yaml 10 | - id: check-added-large-files 11 | - repo: https://github.com/kynan/nbstripout 12 | rev: master 13 | hooks: 14 | - id: nbstripout 15 | files: ".ipynb" 16 | - repo: https://github.com/psf/black 17 | rev: stable 18 | hooks: 19 | - id: black 20 | -------------------------------------------------------------------------------- /notebooks/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def ECDF(data): 5 | """Compute ECDF for a one-dimensional array of measurements.""" 6 | # Number of data points 7 | n = len(data) 8 | 9 | # x-data for the ECDF 10 | x = np.sort(data) 11 | 12 | # y-data for the ECDF 13 | y = np.arange(1, n+1) / n 14 | 15 | return x, y 16 | 17 | 18 | def despine(ax): 19 | ax.spines['right'].set_visible(False) 20 | ax.spines['top'].set_visible(False) 21 | 22 | 23 | def despine_traceplot(traceplot): 24 | for row in traceplot: 25 | for ax in row: 26 | despine(ax) 27 | -------------------------------------------------------------------------------- /checkenv.py: -------------------------------------------------------------------------------- 1 | # Check that the packages are installed. 2 | from pkgutil import iter_modules 3 | import sys 4 | 5 | 6 | def check_import(packagename): 7 | if packagename in (name for _, name, _ in iter_modules()): 8 | return True 9 | else: 10 | return False 11 | 12 | assert sys.version_info.major >= 3 and sys.version_info.minor >= 6, 'Please install Python 3.6!' 13 | 14 | packages = ['jupyter', 'pymc3', 'seaborn', 'matplotlib', 'numpy', 'scipy', 15 | 'pandas', 'tqdm', 'jupyterlab'] 16 | 17 | all_passed = True 18 | 19 | for p in packages: 20 | assert check_import(p),\ 21 | '{0} not present. Please install via pip or conda.'.format(p) 22 | 23 | if all_passed: 24 | print('All checks passed. Your environment is good to go!') 25 | -------------------------------------------------------------------------------- /binder/environment.yml: -------------------------------------------------------------------------------- 1 | name: bayesian-modelling-tutorial 2 | channels: 3 | - conda-forge 4 | - defaults 5 | - anaconda 6 | - ericmjl 7 | dependencies: 8 | - python>=3.8 9 | - jupyter 10 | - jupyterlab>=3.0 11 | - pymc=4.0 12 | - pip 13 | - seaborn 14 | - matplotlib 15 | - numpy 16 | - scipy 17 | - pandas 18 | - tqdm 19 | - missingno 20 | - scikit-learn 21 | - nodejs 22 | - pyjanitor 23 | - xarray 24 | - pandoc 25 | - black 26 | - nbstripout 27 | - pylint 28 | - arviz 29 | - theano 30 | - ipykernel 31 | - hvplot 32 | - bokeh 33 | - holoviews 34 | - mkl 35 | - mkl-service 36 | - pre-commit 37 | - networkx 38 | - mamba 39 | - "python-graphviz" 40 | - pyprojroot 41 | - faker 42 | - pip: 43 | - mkdocs 44 | - mkdocs-material 45 | - mknotebooks 46 | - daft 47 | - gapminder 48 | # Need to add pip install from github for repo, so solutions can be executed. -------------------------------------------------------------------------------- /reminders.md: -------------------------------------------------------------------------------- 1 | # Reminders 2 | 3 | - The datasets we are playing with here are idiosyncratic. There may be questions regarding the data. Remember to encourage participants to ask questions about the data publicly, so that everybody can benefit together. 4 | - The corollary to this fact: we may end up with 5-10 minutes of answering questions regarding the data. We should announce up-front that this is okay to do. 5 | 6 |
7 | -------------------------------------------------------------------------------- /data/baseballdb/README.txt: -------------------------------------------------------------------------------- 1 | Baseball Databank is a compilation of historical baseball data in a 2 | convenient, tidy format, distributed under Open Data terms. 3 | 4 | This work is licensed under a Creative Commons Attribution-ShareAlike 5 | 3.0 Unported License. For details see: 6 | http://creativecommons.org/licenses/by-sa/3.0/ 7 | 8 | Person identification and demographics data are provided by 9 | Chadwick Baseball Bureau (http://www.chadwick-bureau.com), 10 | from its Register of baseball personnel. 11 | 12 | Player performance data for 1871 through 2014 is based on the 13 | Lahman Baseball Database, version 2015-01-24, which is 14 | Copyright (C) 1996-2015 by Sean Lahman. 15 | 16 | The tables Parks.csv and HomeGames.csv are based on the game logs 17 | and park code table published by Retrosheet. 18 | This information is available free of charge from and is copyrighted 19 | by Retrosheet. Interested parties may contact Retrosheet at 20 | http://www.retrosheet.org. 21 | 22 | 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Base image: miniconda3 2 | FROM continuumio/miniconda3 3 | 4 | # Install GCC for Theano 5 | RUN apt-get install build-essential -y 6 | 7 | # Install environment 8 | COPY ./binder/environment.yml /environment.yml 9 | RUN conda env create -f /environment.yml 10 | RUN rm /environment.yml 11 | 12 | ENV PATH /opt/conda/envs/bayesian-modelling-tutorial/bin:$PATH 13 | # For debugging purposes during environment build 14 | RUN conda list -n bayesian-modelling-tutorial 15 | 16 | # Install jupyterlab extensions 17 | RUN jupyter labextension install @pyviz/jupyterlab_pyviz 18 | RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager 19 | 20 | # Copy contents of repository to Dockerfile 21 | COPY . /root/bayes 22 | WORKDIR /root/bayes 23 | 24 | # Create Jupyter kernel to match notebooks 25 | RUN python -m ipykernel install --user --name bayesian-stats-modelling-tutorial 26 | 27 | # Entry point is Jupyter lab 28 | ENTRYPOINT jupyter lab --port 8999 --ip="*" --allow-root --no-browser 29 | -------------------------------------------------------------------------------- /docs/advanced/diagnostics.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload\n", 10 | "%autoreload 2\n", 11 | "%matplotlib inline\n", 12 | "%config InlineBackend.figure_format = 'retina'" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [] 21 | } 22 | ], 23 | "metadata": { 24 | "kernelspec": { 25 | "display_name": "bayesian-modelling-tutorial", 26 | "language": "python", 27 | "name": "bayesian-modelling-tutorial" 28 | }, 29 | "language_info": { 30 | "codemirror_mode": { 31 | "name": "ipython", 32 | "version": 3 33 | }, 34 | "file_extension": ".py", 35 | "mimetype": "text/x-python", 36 | "name": "python", 37 | "nbconvert_exporter": "python", 38 | "pygments_lexer": "ipython3", 39 | "version": "3.8.5" 40 | } 41 | }, 42 | "nbformat": 4, 43 | "nbformat_minor": 4 44 | } 45 | -------------------------------------------------------------------------------- /.azure-pipelines/macos.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | - master 3 | 4 | variables: 5 | miniconda.url: https://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh 6 | 7 | pool: 8 | vmImage: macOS-10.14 9 | 10 | steps: 11 | # Add conda to PATH. 12 | 13 | - bash: echo "##vso[task.prependpath]$CONDA/bin" 14 | displayName: Add conda to PATH 15 | 16 | # - script: | 17 | # # Install Python, py.test, and required packages. 18 | # conda env create -f binder/environment.yml 19 | # source activate bayesian-modelling-tutorial 20 | # conda install -y python=$(python.version) 21 | # python -m ipykernel install --user --name bayesian-modelling-tutorial 22 | # displayName: 'Create environment, install correct Python, and activate kernel.' 23 | 24 | # # Q: Does second script not recognize environment context from 1st script? 25 | # - script: | 26 | # source activate bayesian-modelling-tutorial 27 | # mkdir -p docs/notebooks 28 | # jupyter nbconvert --config nbconvert_config.py --execute --template full 29 | # pandoc README.md -o docs/index.html -c static/pandoc.css -s 30 | # displayName: 'Build docs pages' 31 | -------------------------------------------------------------------------------- /scripts/make_iq.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def main(): 5 | drug = [ 99., 110., 107., 104., 103., 105., 105., 110., 99., 6 | 109., 100., 102., 104., 104., 100., 104., 101., 104., 7 | 101., 100., 109., 104., 105., 112., 97., 106., 103., 8 | 101., 101., 104., 96., 102., 101., 100., 92., 108., 9 | 97., 106., 96., 90., 109., 108., 105., 104., 110., 10 | 92., 100.] 11 | 12 | placebo = [ 95., 105., 103., 99., 104., 98., 103., 104., 102., 13 | 91., 97., 101., 100., 113., 98., 102., 100., 105., 14 | 97., 94., 104., 92., 98., 105., 106., 101., 106., 15 | 105., 101., 105., 102., 95., 91., 99., 96., 102., 16 | 94., 93., 99., 99., 113., 96.] 17 | 18 | 19 | data = dict() 20 | data['treatment'] = ['drug'] * len(drug) + ['placebo'] * len(placebo) 21 | data['iq'] = drug + placebo 22 | 23 | df = pd.DataFrame(data) 24 | df.to_csv('../data/iq.csv') 25 | 26 | 27 | if __name__ == "__main__": 28 | main() 29 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Eric Ma 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.azure-pipelines/linux.yml: -------------------------------------------------------------------------------- 1 | trigger: 2 | - master 3 | 4 | variables: 5 | miniconda.url: https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh 6 | 7 | strategy: 8 | matrix: 9 | py37: 10 | python.version: "3.7" 11 | 12 | pool: 13 | vmImage: ubuntu-16.04 14 | 15 | steps: 16 | # Add conda to PATH. 17 | - bash: echo "##vso[task.prependpath]$CONDA/bin" 18 | displayName: Add conda to PATH 19 | 20 | - script: | 21 | # Install Python, py.test, and required packages. 22 | conda env create -f binder/environment.yml 23 | source activate bayesian-modelling-tutorial 24 | conda install -y python=$(python.version) 25 | python -m ipykernel install --user --name bayesian-modelling-tutorial 26 | displayName: 'Create environment, install correct Python, and activate kernel.' 27 | 28 | # Q: Does second script not recognize environment context from 1st script? 29 | - script: | 30 | source activate bayesian-modelling-tutorial 31 | mkdir -p docs/notebooks 32 | jupyter nbconvert --config nbconvert_config.py --execute --template full 33 | pandoc README.md -o docs/index.html -c static/pandoc.css -s 34 | displayName: 'Build docs pages' 35 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/vscode-remote/devcontainer.json or this file's README at: 2 | // https://github.com/microsoft/vscode-dev-containers/tree/v0.117.1/containers/python-3-miniconda 3 | { 4 | "name": "Python 3 - Miniconda", 5 | "context": "..", 6 | "dockerFile": "Dockerfile", 7 | // Set *default* container specific settings.json values on container create. 8 | "settings": { 9 | "terminal.integrated.shell.linux": "/bin/bash", 10 | "python.pythonPath": "/opt/conda/bin/python", 11 | "python.linting.enabled": true, 12 | "python.linting.pylintEnabled": true, 13 | "python.linting.pylintPath": "/opt/conda/bin/pylint" 14 | }, 15 | // Add the IDs of extensions you want installed when the container is created. 16 | "extensions": [ 17 | "ms-python.python" 18 | ], 19 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 20 | "forwardPorts": [ 21 | 5959, 22 | 8000, 23 | 8888, 24 | 8889, 25 | 8890, 26 | 8891, 27 | ], 28 | // Use 'postCreateCommand' to run commands after the container is created. 29 | // "postCreateCommand": "python --version", 30 | // Uncomment to connect as a non-root user. See https://aka.ms/vscode-remote/containers/non-root. 31 | // "remoteUser": "vscode" 32 | } 33 | -------------------------------------------------------------------------------- /data/microtubules_14.csv: -------------------------------------------------------------------------------- 1 | 60 2 | 75 3 | 75 4 | 85 5 | 115 6 | 115 7 | 135 8 | 140 9 | 145 10 | 150 11 | 155 12 | 165 13 | 180 14 | 190 15 | 210 16 | 210 17 | 215 18 | 220 19 | 225 20 | 235 21 | 235 22 | 240 23 | 240 24 | 245 25 | 260 26 | 260 27 | 265 28 | 265 29 | 265 30 | 275 31 | 275 32 | 275 33 | 280 34 | 280 35 | 280 36 | 280 37 | 290 38 | 295 39 | 295 40 | 300 41 | 310 42 | 310 43 | 315 44 | 320 45 | 325 46 | 325 47 | 325 48 | 325 49 | 325 50 | 330 51 | 330 52 | 335 53 | 345 54 | 345 55 | 345 56 | 355 57 | 355 58 | 365 59 | 365 60 | 365 61 | 370 62 | 370 63 | 385 64 | 385 65 | 395 66 | 405 67 | 410 68 | 415 69 | 425 70 | 430 71 | 435 72 | 435 73 | 435 74 | 435 75 | 440 76 | 445 77 | 445 78 | 450 79 | 450 80 | 450 81 | 455 82 | 460 83 | 460 84 | 465 85 | 465 86 | 470 87 | 470 88 | 470 89 | 480 90 | 490 91 | 500 92 | 505 93 | 505 94 | 515 95 | 525 96 | 525 97 | 545 98 | 545 99 | 555 100 | 560 101 | 570 102 | 570 103 | 585 104 | 590 105 | 590 106 | 615 107 | 625 108 | 630 109 | 640 110 | 640 111 | 650 112 | 655 113 | 680 114 | 685 115 | 685 116 | 690 117 | 695 118 | 695 119 | 705 120 | 730 121 | 755 122 | 760 123 | 765 124 | 770 125 | 770 126 | 790 127 | 795 128 | 795 129 | 820 130 | 830 131 | 840 132 | 850 133 | 875 134 | 890 135 | 975 136 | 1000 137 | 1005 138 | 1135 139 | 1305 140 | 1400 141 | 1420 -------------------------------------------------------------------------------- /data/Prussion Horse-Kick Data.csv: -------------------------------------------------------------------------------- 1 | "Year","GC","C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C14","C15" 2 | "1875","0","0","0","0","0","0","0","1","1","0","0","0","1","0" 3 | "1876","2","0","0","0","1","0","0","0","0","0","0","0","1","1" 4 | "1877","2","0","0","0","0","0","1","1","0","0","1","0","2","0" 5 | "1878","1","2","2","1","1","0","0","0","0","0","1","0","1","0" 6 | "1879","0","0","0","1","1","2","2","0","1","0","0","2","1","0" 7 | "1880","0","3","2","1","1","1","0","0","0","2","1","4","3","0" 8 | "1881","1","0","0","2","1","0","0","1","0","1","0","0","0","0" 9 | "1882","1","2","0","0","0","0","1","0","1","1","2","1","4","1" 10 | "1883","0","0","1","2","0","1","2","1","0","1","0","3","0","0" 11 | "1884","3","0","1","0","0","0","0","1","0","0","2","0","1","1" 12 | "1885","0","0","0","0","0","0","1","0","0","2","0","1","0","1" 13 | "1886","2","1","0","0","1","1","1","0","0","1","0","1","3","0" 14 | "1887","1","1","2","1","0","0","3","2","1","1","0","1","2","0" 15 | "1888","0","1","1","0","0","1","1","0","0","0","0","1","1","0" 16 | "1889","0","0","1","1","0","1","1","0","0","1","2","2","0","2" 17 | "1890","1","2","0","2","0","1","1","2","0","2","1","1","2","2" 18 | "1891","0","0","0","1","1","1","0","1","1","0","3","3","1","0" 19 | "1892","1","3","2","0","1","1","3","0","1","1","0","1","1","0" 20 | "1893","0","1","0","0","0","1","0","2","0","0","1","3","0","0" 21 | "1894","1","0","0","0","0","0","0","0","1","0","1","1","0","0" -------------------------------------------------------------------------------- /data/iq.csv: -------------------------------------------------------------------------------- 1 | ,treatment,iq 2 | 0,drug,99.0 3 | 1,drug,110.0 4 | 2,drug,107.0 5 | 3,drug,104.0 6 | 4,drug,103.0 7 | 5,drug,105.0 8 | 6,drug,105.0 9 | 7,drug,110.0 10 | 8,drug,99.0 11 | 9,drug,109.0 12 | 10,drug,100.0 13 | 11,drug,102.0 14 | 12,drug,104.0 15 | 13,drug,104.0 16 | 14,drug,100.0 17 | 15,drug,104.0 18 | 16,drug,101.0 19 | 17,drug,104.0 20 | 18,drug,101.0 21 | 19,drug,100.0 22 | 20,drug,109.0 23 | 21,drug,104.0 24 | 22,drug,105.0 25 | 23,drug,112.0 26 | 24,drug,97.0 27 | 25,drug,106.0 28 | 26,drug,103.0 29 | 27,drug,101.0 30 | 28,drug,101.0 31 | 29,drug,104.0 32 | 30,drug,96.0 33 | 31,drug,102.0 34 | 32,drug,101.0 35 | 33,drug,100.0 36 | 34,drug,92.0 37 | 35,drug,108.0 38 | 36,drug,97.0 39 | 37,drug,106.0 40 | 38,drug,96.0 41 | 39,drug,90.0 42 | 40,drug,109.0 43 | 41,drug,108.0 44 | 42,drug,105.0 45 | 43,drug,104.0 46 | 44,drug,110.0 47 | 45,drug,92.0 48 | 46,drug,100.0 49 | 47,placebo,95.0 50 | 48,placebo,105.0 51 | 49,placebo,103.0 52 | 50,placebo,99.0 53 | 51,placebo,104.0 54 | 52,placebo,98.0 55 | 53,placebo,103.0 56 | 54,placebo,104.0 57 | 55,placebo,102.0 58 | 56,placebo,91.0 59 | 57,placebo,97.0 60 | 58,placebo,101.0 61 | 59,placebo,100.0 62 | 60,placebo,113.0 63 | 61,placebo,98.0 64 | 62,placebo,102.0 65 | 63,placebo,100.0 66 | 64,placebo,105.0 67 | 65,placebo,97.0 68 | 66,placebo,94.0 69 | 67,placebo,104.0 70 | 68,placebo,92.0 71 | 69,placebo,98.0 72 | 70,placebo,105.0 73 | 71,placebo,106.0 74 | 72,placebo,101.0 75 | 73,placebo,106.0 76 | 74,placebo,105.0 77 | 75,placebo,101.0 78 | 76,placebo,105.0 79 | 77,placebo,102.0 80 | 78,placebo,95.0 81 | 79,placebo,91.0 82 | 80,placebo,99.0 83 | 81,placebo,96.0 84 | 82,placebo,102.0 85 | 83,placebo,94.0 86 | 84,placebo,93.0 87 | 85,placebo,99.0 88 | 86,placebo,99.0 89 | 87,placebo,113.0 90 | 88,placebo,96.0 91 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # Custom 104 | *.md.tmp 105 | .vscode/* 106 | -------------------------------------------------------------------------------- /docs/advanced/regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%load_ext autoreload\n", 10 | "%autoreload 2\n", 11 | "%matplotlib inline\n", 12 | "%config InlineBackend.figure_format = 'retina'" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## Introduction\n", 20 | "\n", 21 | "In this chapter, we are going to look at how to do _regression_ modelling.\n", 22 | "\n", 23 | "Regression modelling takes estimation one step further\n", 24 | "beyond what we've been doing these past few chapters.\n", 25 | "Prior to this chapter, we were doing pure _estimation_.\n", 26 | "There was a key parameter of interest, $p$,\n", 27 | "which represented something useful about our system under study\n", 28 | "(average favourability of ice cream shops).\n", 29 | "We collected data that gave us hints\n", 30 | "as to what the \"true\" value of the key parameter of interest might be." 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [] 39 | } 40 | ], 41 | "metadata": { 42 | "kernelspec": { 43 | "display_name": "bayesian-modelling-tutorial", 44 | "language": "python", 45 | "name": "bayesian-modelling-tutorial" 46 | }, 47 | "language_info": { 48 | "codemirror_mode": { 49 | "name": "ipython", 50 | "version": 3 51 | }, 52 | "file_extension": ".py", 53 | "mimetype": "text/x-python", 54 | "name": "python", 55 | "nbconvert_exporter": "python", 56 | "pygments_lexer": "ipython3", 57 | "version": "3.8.5" 58 | } 59 | }, 60 | "nbformat": 4, 61 | "nbformat_minor": 4 62 | } 63 | -------------------------------------------------------------------------------- /data/baseballdb/core/TeamsHalf.csv: -------------------------------------------------------------------------------- 1 | yearID,lgID,teamID,Half,divID,DivWin,Rank,G,W,L 2 | 1981,NL,ATL,1,W,N,4,54,25,29 3 | 1981,NL,ATL,2,W,N,5,52,25,27 4 | 1981,AL,BAL,1,E,N,2,54,31,23 5 | 1981,AL,BAL,2,E,N,4,51,28,23 6 | 1981,AL,BOS,1,E,N,5,56,30,26 7 | 1981,AL,BOS,2,E,N,2,52,29,23 8 | 1981,AL,CAL,1,W,N,4,60,31,29 9 | 1981,AL,CAL,2,W,N,6,50,20,30 10 | 1981,AL,CHA,1,W,N,3,53,31,22 11 | 1981,AL,CHA,2,W,N,6,53,23,30 12 | 1981,NL,CHN,1,E,N,6,52,15,37 13 | 1981,NL,CHN,2,E,N,5,51,23,28 14 | 1981,NL,CIN,1,W,N,2,56,35,21 15 | 1981,NL,CIN,2,W,N,2,52,31,21 16 | 1981,AL,CLE,1,E,N,6,50,26,24 17 | 1981,AL,CLE,2,E,N,5,53,26,27 18 | 1981,AL,DET,1,E,N,4,57,31,26 19 | 1981,AL,DET,2,E,N,2,52,29,23 20 | 1981,NL,HOU,1,W,N,3,57,28,29 21 | 1981,NL,HOU,2,W,N,1,53,33,20 22 | 1981,AL,KCA,1,W,N,5,50,20,30 23 | 1981,AL,KCA,2,W,N,1,53,30,23 24 | 1981,NL,LAN,1,W,N,1,57,36,21 25 | 1981,NL,LAN,2,W,N,4,53,27,26 26 | 1981,AL,MIN,1,W,N,7,56,17,39 27 | 1981,AL,MIN,2,W,N,4,53,24,29 28 | 1981,AL,ML4,1,E,N,3,56,31,25 29 | 1981,AL,ML4,2,E,N,1,53,31,22 30 | 1981,NL,MON,1,E,N,3,55,30,25 31 | 1981,NL,MON,2,E,N,1,53,30,23 32 | 1981,AL,NYA,1,E,N,1,56,34,22 33 | 1981,AL,NYA,2,E,N,5,51,25,26 34 | 1981,NL,NYN,1,E,N,5,51,17,34 35 | 1981,NL,NYN,2,E,N,4,52,24,28 36 | 1981,AL,OAK,1,W,N,1,60,37,23 37 | 1981,AL,OAK,2,W,N,2,49,27,22 38 | 1981,NL,PHI,1,E,N,1,55,34,21 39 | 1981,NL,PHI,2,E,N,3,52,25,27 40 | 1981,NL,PIT,1,E,N,4,48,25,23 41 | 1981,NL,PIT,2,E,N,6,54,21,33 42 | 1981,NL,SDN,1,W,N,6,56,23,33 43 | 1981,NL,SDN,2,W,N,6,54,18,36 44 | 1981,AL,SEA,1,W,N,6,57,21,36 45 | 1981,AL,SEA,2,W,N,5,52,23,29 46 | 1981,NL,SFN,1,W,N,5,59,27,32 47 | 1981,NL,SFN,2,W,N,3,52,29,23 48 | 1981,NL,SLN,1,E,N,2,50,30,20 49 | 1981,NL,SLN,2,E,N,2,52,29,23 50 | 1981,AL,TEX,1,W,N,2,55,33,22 51 | 1981,AL,TEX,2,W,N,3,50,24,26 52 | 1981,AL,TOR,1,E,N,7,58,16,42 53 | 1981,AL,TOR,2,E,N,6,48,21,27 54 | -------------------------------------------------------------------------------- /notebooks/SciPy-2022/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "d4082f80-a5f2-4f51-98ae-42b7e3949f89", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import nbformat\n", 11 | "\n" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 3, 17 | "id": "6b2c6b19-dc33-47a7-ba62-0d6ca5e6362b", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# Reading the notebooks\n", 22 | "first_notebook = nbformat.read('02-Instructor-Parameter_estimation_hypothesis_testing.ipynb', 4)\n", 23 | "second_notebook = nbformat.read('03-Instructor-PPL-first-steps.ipynb', 4)\n", 24 | "\n", 25 | "# Creating a new notebook\n", 26 | "final_notebook = nbformat.v4.new_notebook(metadata=first_notebook.metadata)\n", 27 | "\n", 28 | "# Concatenating the notebooks\n", 29 | "final_notebook.cells = first_notebook.cells + second_notebook.cells\n", 30 | "\n", 31 | "# Saving the new notebook \n", 32 | "nbformat.write(final_notebook, 'final_notebook.ipynb')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "id": "803fc9af-4a3a-4acc-9b9c-b657f85c44a9", 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [] 42 | } 43 | ], 44 | "metadata": { 45 | "kernelspec": { 46 | "display_name": "Python 3 (ipykernel)", 47 | "language": "python", 48 | "name": "python3" 49 | }, 50 | "language_info": { 51 | "codemirror_mode": { 52 | "name": "ipython", 53 | "version": 3 54 | }, 55 | "file_extension": ".py", 56 | "mimetype": "text/x-python", 57 | "name": "python", 58 | "nbconvert_exporter": "python", 59 | "pygments_lexer": "ipython3", 60 | "version": "3.9.12" 61 | } 62 | }, 63 | "nbformat": 4, 64 | "nbformat_minor": 5 65 | } 66 | -------------------------------------------------------------------------------- /.azure-pipelines/azure-pipelines.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | - job: macOS 3 | strategy: 4 | matrix: 5 | py37: 6 | python.version: "3.7" 7 | pool: 8 | vmImage: macOS-10.14 9 | steps: 10 | # https://docs.microsoft.com/en-us/azure/devops/pipelines/languages/anaconda?view=azure-devops&tabs=macos 11 | - bash: echo "##vso[task.prependpath]$CONDA/bin" 12 | displayName: Add conda to PATH 13 | 14 | # On Hosted macOS, the agent user doesn't have ownership of Miniconda's installation directory/ 15 | # We need to take ownership if we want to update conda or install packages globally 16 | - bash: sudo chown -R $USER $CONDA 17 | displayName: Take ownership of conda installation 18 | - template: templates/setup-script-nix.yml 19 | - template: templates/nb-docs-nix.yml 20 | 21 | - job: linux 22 | strategy: 23 | matrix: 24 | py37: 25 | python.version: "3.7" 26 | pool: 27 | vmImage: ubuntu-16.04 28 | steps: 29 | # https://docs.microsoft.com/en-us/azure/devops/pipelines/languages/anaconda?view=azure-devops&tabs=ubuntu-16-04 30 | - bash: echo "##vso[task.prependpath]$CONDA/bin" 31 | displayName: Add conda to PATH 32 | 33 | - template: templates/setup-script-nix.yml 34 | - template: templates/nb-docs-nix.yml 35 | 36 | # - job: windows 37 | # strategy: 38 | # matrix: 39 | # py37: 40 | # python.version: "3.7" 41 | # py36: 42 | # python.version: "3.6" 43 | # pool: 44 | # vmImage: vs2017-win2016 45 | # steps: 46 | # # https://docs.microsoft.com/en-us/azure/devops/pipelines/languages/anaconda?view=azure-devops&tabs=vs2017 47 | # - powershell: Write-Host "##vso[task.prependpath]$env:CONDA\Scripts" 48 | # displayName: Add conda to PATH 49 | 50 | # - template: templates/setup-script-win.yml 51 | # - template: templates/nb-docs-win.yml 52 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This is necessary because matplotlib is involved. 2 | before_script: 3 | - "export DISPLAY=:99.0" 4 | - "sh -e /etc/init.d/xvfb start" 5 | - sleep 5 # give xvfb some time to start 6 | 7 | language: python 8 | matrix: 9 | include: 10 | - python: 3.5 # we don't actually use this 11 | env: PYTHON_VERSION=3.7 12 | # command to install dependencies 13 | install: 14 | - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh 15 | - bash miniconda.sh -b -p $HOME/anaconda 16 | - export PATH="$HOME/anaconda/bin:$PATH" 17 | - hash -r 18 | - conda config --set always_yes yes --set changeps1 no 19 | - conda update -q conda 20 | - conda config --add channels conda-forge 21 | 22 | # Useful for debugging any issues with conda 23 | - conda info -a 24 | 25 | # Install Python, py.test, and required packages. 26 | - conda env create -f binder/environment.yml 27 | - source activate bayesian-modelling-tutorial 28 | # This guarantees that the Python version is matrixed. 29 | - conda install python=$PYTHON_VERSION 30 | - python -m ipykernel install --user --name bayesian-modelling-tutorial 31 | # command to run tests 32 | script: 33 | - mkdir -p docs/notebooks 34 | # All notebooks that need to be converted are inside nbconvert_config.py 35 | # We have to call on --execute - without doing so, the execution config 36 | # inside nbconvert_config.py won't run. 37 | - jupyter nbconvert --config nbconvert_config.py --execute --template full 38 | # Build the index page. 39 | - pandoc README.md -o docs/index.html -c static/pandoc.css -s 40 | 41 | 42 | deploy: 43 | provider: pages 44 | skip-cleanup: true 45 | github-token: $GITHUB_TOKEN # Set in the settings page of your repository, as a secure variable 46 | keep-history: true 47 | # We read the master branch 48 | on: 49 | branch: master 50 | # Take the docs/ directory 51 | local-dir: docs 52 | # Publish to the gh-pages branch 53 | target-branch: gh-pages 54 | verbose: true 55 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: Bayesian Stats Modelling Tutorial 2 | site_url: https://ericmjl.github.io/bayesian-stats-modelling-tutorial 3 | 4 | theme: 5 | name: "material" 6 | palette: 7 | primary: "yellow" 8 | accent: "yellow" 9 | icon: 10 | logo: "fontawesome/solid/book" 11 | features: 12 | - instant 13 | - tabs 14 | language: en 15 | 16 | # We customize the navigation by hand to control the order 17 | # in which pages show up. 18 | nav: 19 | - Bayesian Stats Modelling Tutorial: 20 | - Welcome: index.md 21 | - Basics: 22 | - Probability Distributions: "basics/probability.ipynb" 23 | - Statistical Simulation: "basics/probabilistic_stories.ipynb" 24 | - Basic Inference: "basics/inference.ipynb" 25 | - Intermediate Topics: 26 | - Multi-Group Estimation: "intermediate/estimation.ipynb" 27 | - Hierarchical Modelling: "intermediate/hierarchical.ipynb" 28 | - MCMC Sampling BTS: "intermediate/sampling.ipynb" 29 | - Advanced Topics: 30 | - Reparametrization: "advanced/reparametrization.ipynb" 31 | - MCMC Diagnostics: "advanced/diagnostics.ipynb" 32 | - Resources: 33 | - Glossary: "glossary.md" 34 | 35 | plugins: 36 | - search 37 | - mknotebooks: 38 | execute: true 39 | write_markdown: true 40 | allow_errors: true 41 | timeout: 1200 42 | 43 | # Taken from here: https://squidfunk.github.io/mkdocs-material/extensions/codehilite/ 44 | markdown_extensions: 45 | - codehilite 46 | - admonition 47 | - pymdownx.tabbed 48 | - pymdownx.arithmatex 49 | - pymdownx.details 50 | - pymdownx.superfences 51 | - markdown.extensions.attr_list 52 | 53 | extra_javascript: 54 | - https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js?config=TeX-AMS-MML_HTMLorMML 55 | 56 | # extra_css: 57 | # - css/nb_mods.css 58 | # - css/apidocs.css 59 | 60 | # repo_name: null 61 | # repo_url: null 62 | 63 | extra: 64 | social: 65 | - icon: "fontawesome/brands/github" 66 | link: "https://github.com/ericmjl" 67 | - icon: "fontawesome/brands/twitter" 68 | link: "https://twitter.com/ericmjl" 69 | - icon: "fontawesome/brands/linkedin" 70 | link: "https://linkedin.com/in/ericmjl" 71 | -------------------------------------------------------------------------------- /src/bayes_tutorial/solutions/inference.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from scipy.stats import beta, poisson, norm 3 | import numpy as np 4 | import pymc3 as pm 5 | import arviz as az 6 | from functools import lru_cache 7 | 8 | 9 | def plot_betadist_pdf(a, b): 10 | b = beta(a, b) 11 | x = np.linspace(0, 1, 1000) 12 | pdf = b.pdf(x) 13 | plt.plot(x, pdf) 14 | 15 | 16 | def coin_flip_data(): 17 | return np.array([1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1,]) 18 | 19 | 20 | def car_crash_data(): 21 | return poisson(3).rvs(25) 22 | 23 | 24 | def finch_beak_data(): 25 | return norm(12, 1).rvs(38) 26 | 27 | 28 | def car_crash_model_generator(): 29 | data = car_crash_data() 30 | with pm.Model() as car_crash_model: 31 | mu = pm.Exponential("mu", lam=1 / 29.0) 32 | like = pm.Poisson("like", mu=mu, observed=data) 33 | return car_crash_model 34 | 35 | 36 | def car_crash_interpretation(): 37 | ans = """ 38 | We believe that the rate of car crashes per week 39 | is anywhere from between 2.3 to 3.6 (94% posterior HDI), 40 | with a mean of 2.9. 41 | """ 42 | return ans 43 | 44 | 45 | def model_inference_answer(model): 46 | with model: 47 | trace = pm.sample(2000) 48 | trace = az.from_pymc3(trace) 49 | return trace 50 | 51 | 52 | def model_trace_answer(trace): 53 | az.plot_trace(trace) 54 | 55 | 56 | def model_posterior_answer(trace): 57 | az.plot_posterior(trace) 58 | 59 | 60 | def finch_beak_model_generator(): 61 | data = finch_beak_data() 62 | with pm.Model() as finch_beak_model: 63 | mu = pm.Normal("mu", mu=10, sigma=3) 64 | sigma = pm.Exponential("sigma", lam=1 / 29.0) 65 | like = pm.Normal("like", mu=mu, sigma=sigma, observed=data) 66 | return finch_beak_model 67 | 68 | 69 | def finch_beak_interpretation(): 70 | ans = """ 71 | Having seen the data, we believe that finch beak lengths 72 | are expected to be between 11.7 and 12.1 (approx, 94% HDI), 73 | with an average of 11.9. 74 | 75 | We also believe that the intrinsic variance of finch beak lengths 76 | across the entire population of finches 77 | is estimated to be around 0.56 to 0.87, 78 | with an average of 0.7. 79 | """ 80 | return ans 81 | -------------------------------------------------------------------------------- /notebooks/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor as jn 4 | import numpy as np 5 | 6 | 7 | def load_finches_2012(): 8 | path = '../data/finch_beaks_2012.csv' 9 | return load_finches(path) 10 | 11 | 12 | def load_finches_1975(): 13 | path = '../data/finch_beaks_1975.csv' 14 | df = load_finches(path) 15 | df = df.rename_column('beak_length_mm', 'beak_length').rename_column('beak_depth_mm', 'beak_depth') 16 | return df 17 | 18 | 19 | def load_finches(path): 20 | # Load the data 21 | df = ( 22 | pd.read_csv(path) 23 | .clean_names() # clean column names 24 | .rename_column('blength', 'beak_length') # rename blength to beak_length (readability fix) 25 | .rename_column('bdepth', 'beak_depth') # rename bdepth to beak_depth (readability fix) 26 | .label_encode('species') # create a `species_enc` column that has the species encoded numerically 27 | ) 28 | return df 29 | 30 | 31 | def load_baseball(): 32 | df = pd.read_csv('../data/baseballdb/core/Batting.csv') 33 | df['AB'] = df['AB'].replace(0, np.nan) 34 | df = df.dropna() 35 | df['batting_avg'] = df['H'] / df['AB'] 36 | df = df[df['yearID'] >= 2016] 37 | df = df.iloc[0:15] 38 | df.head(5) 39 | return df 40 | 41 | 42 | def load_sterilization(): 43 | df = ( 44 | pd.read_csv('../data/sterilization.csv', na_filter=True, na_values=['#DIV/0!']) 45 | .clean_names() 46 | .label_encode('treatment') 47 | ) 48 | mapping = dict(zip(df['treatment'], df['treatment_enc'])) 49 | return df, mapping 50 | 51 | 52 | def load_kruschke(): 53 | df = ( 54 | pd.read_csv('../data/iq.csv', index_col=0) # comment out the path to the file for students. 55 | .label_encode('treatment') 56 | ) 57 | return df 58 | 59 | 60 | # Constants for load_decay 61 | tau = 71.9 # indium decay half life 62 | A = 42 # starting magnitude 63 | C = 21 # measurement error 64 | noise_scale = 1 65 | 66 | 67 | def load_decay(): 68 | t = np.arange(0, 1000) 69 | def decay_func(ts, noise): 70 | return A * np.exp(-t/tau) + C + np.random.normal(0, noise, size=(len(t))) 71 | 72 | data = {'t': t, 'activity': decay_func(t, noise_scale)} 73 | df = pd.DataFrame(data) 74 | return df -------------------------------------------------------------------------------- /notebooks/SciPy-2020/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor as jn 4 | import numpy as np 5 | 6 | 7 | def load_finches_2012(): 8 | path = '../data/finch_beaks_2012.csv' 9 | return load_finches(path) 10 | 11 | 12 | def load_finches_1975(): 13 | path = '../data/finch_beaks_1975.csv' 14 | df = load_finches(path) 15 | df = df.rename_column('beak_length_mm', 'beak_length').rename_column('beak_depth_mm', 'beak_depth') 16 | return df 17 | 18 | 19 | def load_finches(path): 20 | # Load the data 21 | df = ( 22 | pd.read_csv(path) 23 | .clean_names() # clean column names 24 | .rename_column('blength', 'beak_length') # rename blength to beak_length (readability fix) 25 | .rename_column('bdepth', 'beak_depth') # rename bdepth to beak_depth (readability fix) 26 | .label_encode('species') # create a `species_enc` column that has the species encoded numerically 27 | ) 28 | return df 29 | 30 | 31 | def load_baseball(): 32 | df = pd.read_csv('../data/baseballdb/core/Batting.csv') 33 | df['AB'] = df['AB'].replace(0, np.nan) 34 | df = df.dropna() 35 | df['batting_avg'] = df['H'] / df['AB'] 36 | df = df[df['yearID'] >= 2016] 37 | df = df.iloc[0:15] 38 | df.head(5) 39 | return df 40 | 41 | 42 | def load_sterilization(): 43 | df = ( 44 | pd.read_csv('../data/sterilization.csv', na_filter=True, na_values=['#DIV/0!']) 45 | .clean_names() 46 | .label_encode('treatment') 47 | ) 48 | mapping = dict(zip(df['treatment'], df['treatment_enc'])) 49 | return df, mapping 50 | 51 | 52 | def load_kruschke(): 53 | df = ( 54 | pd.read_csv('../data/iq.csv', index_col=0) # comment out the path to the file for students. 55 | .label_encode('treatment') 56 | ) 57 | return df 58 | 59 | 60 | # Constants for load_decay 61 | tau = 71.9 # indium decay half life 62 | A = 42 # starting magnitude 63 | C = 21 # measurement error 64 | noise_scale = 1 65 | 66 | 67 | def load_decay(): 68 | t = np.arange(0, 1000) 69 | def decay_func(ts, noise): 70 | return A * np.exp(-t/tau) + C + np.random.normal(0, noise, size=(len(t))) 71 | 72 | data = {'t': t, 'activity': decay_func(t, noise_scale)} 73 | df = pd.DataFrame(data) 74 | return df -------------------------------------------------------------------------------- /notebooks/SciPy-2021/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor as jn 4 | import numpy as np 5 | 6 | 7 | def load_finches_2012(): 8 | path = '../data/finch_beaks_2012.csv' 9 | return load_finches(path) 10 | 11 | 12 | def load_finches_1975(): 13 | path = '../data/finch_beaks_1975.csv' 14 | df = load_finches(path) 15 | df = df.rename_column('beak_length_mm', 'beak_length').rename_column('beak_depth_mm', 'beak_depth') 16 | return df 17 | 18 | 19 | def load_finches(path): 20 | # Load the data 21 | df = ( 22 | pd.read_csv(path) 23 | .clean_names() # clean column names 24 | .rename_column('blength', 'beak_length') # rename blength to beak_length (readability fix) 25 | .rename_column('bdepth', 'beak_depth') # rename bdepth to beak_depth (readability fix) 26 | .label_encode('species') # create a `species_enc` column that has the species encoded numerically 27 | ) 28 | return df 29 | 30 | 31 | def load_baseball(): 32 | df = pd.read_csv('../data/baseballdb/core/Batting.csv') 33 | df['AB'] = df['AB'].replace(0, np.nan) 34 | df = df.dropna() 35 | df['batting_avg'] = df['H'] / df['AB'] 36 | df = df[df['yearID'] >= 2016] 37 | df = df.iloc[0:15] 38 | df.head(5) 39 | return df 40 | 41 | 42 | def load_sterilization(): 43 | df = ( 44 | pd.read_csv('../data/sterilization.csv', na_filter=True, na_values=['#DIV/0!']) 45 | .clean_names() 46 | .label_encode('treatment') 47 | ) 48 | mapping = dict(zip(df['treatment'], df['treatment_enc'])) 49 | return df, mapping 50 | 51 | 52 | def load_kruschke(): 53 | df = ( 54 | pd.read_csv('../data/iq.csv', index_col=0) # comment out the path to the file for students. 55 | .label_encode('treatment') 56 | ) 57 | return df 58 | 59 | 60 | # Constants for load_decay 61 | tau = 71.9 # indium decay half life 62 | A = 42 # starting magnitude 63 | C = 21 # measurement error 64 | noise_scale = 1 65 | 66 | 67 | def load_decay(): 68 | t = np.arange(0, 1000) 69 | def decay_func(ts, noise): 70 | return A * np.exp(-t/tau) + C + np.random.normal(0, noise, size=(len(t))) 71 | 72 | data = {'t': t, 'activity': decay_func(t, noise_scale)} 73 | df = pd.DataFrame(data) 74 | return df -------------------------------------------------------------------------------- /notebooks/SciPy-2022/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor as jn 4 | import numpy as np 5 | 6 | 7 | def load_finches_2012(): 8 | path = '../data/finch_beaks_2012.csv' 9 | return load_finches(path) 10 | 11 | 12 | def load_finches_1975(): 13 | path = '../data/finch_beaks_1975.csv' 14 | df = load_finches(path) 15 | df = df.rename_column('beak_length_mm', 'beak_length').rename_column('beak_depth_mm', 'beak_depth') 16 | return df 17 | 18 | 19 | def load_finches(path): 20 | # Load the data 21 | df = ( 22 | pd.read_csv(path) 23 | .clean_names() # clean column names 24 | .rename_column('blength', 'beak_length') # rename blength to beak_length (readability fix) 25 | .rename_column('bdepth', 'beak_depth') # rename bdepth to beak_depth (readability fix) 26 | .label_encode('species') # create a `species_enc` column that has the species encoded numerically 27 | ) 28 | return df 29 | 30 | 31 | def load_baseball(): 32 | df = pd.read_csv('../data/baseballdb/core/Batting.csv') 33 | df['AB'] = df['AB'].replace(0, np.nan) 34 | df = df.dropna() 35 | df['batting_avg'] = df['H'] / df['AB'] 36 | df = df[df['yearID'] >= 2016] 37 | df = df.iloc[0:15] 38 | df.head(5) 39 | return df 40 | 41 | 42 | def load_sterilization(): 43 | df = ( 44 | pd.read_csv('../data/sterilization.csv', na_filter=True, na_values=['#DIV/0!']) 45 | .clean_names() 46 | .label_encode('treatment') 47 | ) 48 | mapping = dict(zip(df['treatment'], df['treatment_enc'])) 49 | return df, mapping 50 | 51 | 52 | def load_kruschke(): 53 | df = ( 54 | pd.read_csv('../data/iq.csv', index_col=0) # comment out the path to the file for students. 55 | .label_encode('treatment') 56 | ) 57 | return df 58 | 59 | 60 | # Constants for load_decay 61 | tau = 71.9 # indium decay half life 62 | A = 42 # starting magnitude 63 | C = 21 # measurement error 64 | noise_scale = 1 65 | 66 | 67 | def load_decay(): 68 | t = np.arange(0, 1000) 69 | def decay_func(ts, noise): 70 | return A * np.exp(-t/tau) + C + np.random.normal(0, noise, size=(len(t))) 71 | 72 | data = {'t': t, 'activity': decay_func(t, noise_scale)} 73 | df = pd.DataFrame(data) 74 | return df -------------------------------------------------------------------------------- /notebooks/ODSC-East-2020-04-14/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor as jn 4 | import numpy as np 5 | 6 | 7 | def load_finches_2012(): 8 | path = '../data/finch_beaks_2012.csv' 9 | return load_finches(path) 10 | 11 | 12 | def load_finches_1975(): 13 | path = '../data/finch_beaks_1975.csv' 14 | df = load_finches(path) 15 | df = df.rename_column('beak_length_mm', 'beak_length').rename_column('beak_depth_mm', 'beak_depth') 16 | return df 17 | 18 | 19 | def load_finches(path): 20 | # Load the data 21 | df = ( 22 | pd.read_csv(path) 23 | .clean_names() # clean column names 24 | .rename_column('blength', 'beak_length') # rename blength to beak_length (readability fix) 25 | .rename_column('bdepth', 'beak_depth') # rename bdepth to beak_depth (readability fix) 26 | .label_encode('species') # create a `species_enc` column that has the species encoded numerically 27 | ) 28 | return df 29 | 30 | 31 | def load_baseball(): 32 | df = pd.read_csv('../data/baseballdb/core/Batting.csv') 33 | df['AB'] = df['AB'].replace(0, np.nan) 34 | df = df.dropna() 35 | df['batting_avg'] = df['H'] / df['AB'] 36 | df = df[df['yearID'] >= 2016] 37 | df = df.iloc[0:15] 38 | df.head(5) 39 | return df 40 | 41 | 42 | def load_sterilization(): 43 | df = ( 44 | pd.read_csv('../data/sterilization.csv', na_filter=True, na_values=['#DIV/0!']) 45 | .clean_names() 46 | .label_encode('treatment') 47 | ) 48 | mapping = dict(zip(df['treatment'], df['treatment_enc'])) 49 | return df, mapping 50 | 51 | 52 | def load_kruschke(): 53 | df = ( 54 | pd.read_csv('../data/iq.csv', index_col=0) # comment out the path to the file for students. 55 | .label_encode('treatment') 56 | ) 57 | return df 58 | 59 | 60 | # Constants for load_decay 61 | tau = 71.9 # indium decay half life 62 | A = 42 # starting magnitude 63 | C = 21 # measurement error 64 | noise_scale = 1 65 | 66 | 67 | def load_decay(): 68 | t = np.arange(0, 1000) 69 | def decay_func(ts, noise): 70 | return A * np.exp(-t/tau) + C + np.random.normal(0, noise, size=(len(t))) 71 | 72 | data = {'t': t, 'activity': decay_func(t, noise_scale)} 73 | df = pd.DataFrame(data) 74 | return df -------------------------------------------------------------------------------- /notebooks/ODSC-Europe-2021-06-08/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor as jn 4 | import numpy as np 5 | 6 | 7 | def load_finches_2012(): 8 | path = '../data/finch_beaks_2012.csv' 9 | return load_finches(path) 10 | 11 | 12 | def load_finches_1975(): 13 | path = '../data/finch_beaks_1975.csv' 14 | df = load_finches(path) 15 | df = df.rename_column('beak_length_mm', 'beak_length').rename_column('beak_depth_mm', 'beak_depth') 16 | return df 17 | 18 | 19 | def load_finches(path): 20 | # Load the data 21 | df = ( 22 | pd.read_csv(path) 23 | .clean_names() # clean column names 24 | .rename_column('blength', 'beak_length') # rename blength to beak_length (readability fix) 25 | .rename_column('bdepth', 'beak_depth') # rename bdepth to beak_depth (readability fix) 26 | .label_encode('species') # create a `species_enc` column that has the species encoded numerically 27 | ) 28 | return df 29 | 30 | 31 | def load_baseball(): 32 | df = pd.read_csv('../data/baseballdb/core/Batting.csv') 33 | df['AB'] = df['AB'].replace(0, np.nan) 34 | df = df.dropna() 35 | df['batting_avg'] = df['H'] / df['AB'] 36 | df = df[df['yearID'] >= 2016] 37 | df = df.iloc[0:15] 38 | df.head(5) 39 | return df 40 | 41 | 42 | def load_sterilization(): 43 | df = ( 44 | pd.read_csv('../data/sterilization.csv', na_filter=True, na_values=['#DIV/0!']) 45 | .clean_names() 46 | .label_encode('treatment') 47 | ) 48 | mapping = dict(zip(df['treatment'], df['treatment_enc'])) 49 | return df, mapping 50 | 51 | 52 | def load_kruschke(): 53 | df = ( 54 | pd.read_csv('../data/iq.csv', index_col=0) # comment out the path to the file for students. 55 | .label_encode('treatment') 56 | ) 57 | return df 58 | 59 | 60 | # Constants for load_decay 61 | tau = 71.9 # indium decay half life 62 | A = 42 # starting magnitude 63 | C = 21 # measurement error 64 | noise_scale = 1 65 | 66 | 67 | def load_decay(): 68 | t = np.arange(0, 1000) 69 | def decay_func(ts, noise): 70 | return A * np.exp(-t/tau) + C + np.random.normal(0, noise, size=(len(t))) 71 | 72 | data = {'t': t, 'activity': decay_func(t, noise_scale)} 73 | df = pd.DataFrame(data) 74 | return df -------------------------------------------------------------------------------- /notebooks/URGsADS-NYC-2020-02-19/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor as jn 4 | import numpy as np 5 | 6 | 7 | def load_finches_2012(): 8 | path = '../data/finch_beaks_2012.csv' 9 | return load_finches(path) 10 | 11 | 12 | def load_finches_1975(): 13 | path = '../data/finch_beaks_1975.csv' 14 | df = load_finches(path) 15 | df = df.rename_column('beak_length_mm', 'beak_length').rename_column('beak_depth_mm', 'beak_depth') 16 | return df 17 | 18 | 19 | def load_finches(path): 20 | # Load the data 21 | df = ( 22 | pd.read_csv(path) 23 | .clean_names() # clean column names 24 | .rename_column('blength', 'beak_length') # rename blength to beak_length (readability fix) 25 | .rename_column('bdepth', 'beak_depth') # rename bdepth to beak_depth (readability fix) 26 | .label_encode('species') # create a `species_enc` column that has the species encoded numerically 27 | ) 28 | return df 29 | 30 | 31 | def load_baseball(): 32 | df = pd.read_csv('../data/baseballdb/core/Batting.csv') 33 | df['AB'] = df['AB'].replace(0, np.nan) 34 | df = df.dropna() 35 | df['batting_avg'] = df['H'] / df['AB'] 36 | df = df[df['yearID'] >= 2016] 37 | df = df.iloc[0:15] 38 | df.head(5) 39 | return df 40 | 41 | 42 | def load_sterilization(): 43 | df = ( 44 | pd.read_csv('../data/sterilization.csv', na_filter=True, na_values=['#DIV/0!']) 45 | .clean_names() 46 | .label_encode('treatment') 47 | ) 48 | mapping = dict(zip(df['treatment'], df['treatment_enc'])) 49 | return df, mapping 50 | 51 | 52 | def load_kruschke(): 53 | df = ( 54 | pd.read_csv('../data/iq.csv', index_col=0) # comment out the path to the file for students. 55 | .label_encode('treatment') 56 | ) 57 | return df 58 | 59 | 60 | # Constants for load_decay 61 | tau = 71.9 # indium decay half life 62 | A = 42 # starting magnitude 63 | C = 21 # measurement error 64 | noise_scale = 1 65 | 66 | 67 | def load_decay(): 68 | t = np.arange(0, 1000) 69 | def decay_func(ts, noise): 70 | return A * np.exp(-t/tau) + C + np.random.normal(0, noise, size=(len(t))) 71 | 72 | data = {'t': t, 'activity': decay_func(t, noise_scale)} 73 | df = pd.DataFrame(data) 74 | return df -------------------------------------------------------------------------------- /.devcontainer/Dockerfile: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. See https://go.microsoft.com/fwlink/?linkid=2090316 for license information. 4 | #------------------------------------------------------------------------------------------------------------- 5 | 6 | FROM continuumio/miniconda3 7 | 8 | # Avoid warnings by switching to noninteractive 9 | ENV DEBIAN_FRONTEND=noninteractive 10 | 11 | # This Dockerfile adds a non-root user with sudo access. Use the "remoteUser" 12 | # property in devcontainer.json to use it. On Linux, the container user's GID/UIDs 13 | # will be updated to match your local UID/GID (when using the dockerFile property). 14 | # See https://aka.ms/vscode-remote/containers/non-root-user for details. 15 | ARG USERNAME=vscode 16 | ARG USER_UID=1000 17 | ARG USER_GID=$USER_UID 18 | 19 | # Copy environment.yml (if found) to a temp locaition so we update the environment. Also 20 | # copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists. 21 | COPY binder/environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/ 22 | 23 | # Configure apt and install packages 24 | RUN apt-get update \ 25 | && apt-get -y install --no-install-recommends apt-utils dialog 2>&1 \ 26 | # 27 | # Verify git, process tools, lsb-release (common in install instructions for CLIs) installed 28 | && apt-get -y install git openssh-client less iproute2 procps iproute2 lsb-release \ 29 | # 30 | # Install pylint 31 | && /opt/conda/bin/pip install pylint \ 32 | # 33 | # Update Python environment based on environment.yml (if present) 34 | && if [ -f "/tmp/conda-tmp/environment.yml" ]; then /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \ 35 | && rm -rf /tmp/conda-tmp \ 36 | # 37 | # Create a non-root user to use if preferred - see https://aka.ms/vscode-remote/containers/non-root-user. 38 | && groupadd --gid $USER_GID $USERNAME \ 39 | && useradd -s /bin/bash --uid $USER_UID --gid $USER_GID -m $USERNAME \ 40 | # [Optional] Add sudo support for the non-root user 41 | && apt-get install -y sudo \ 42 | && echo $USERNAME ALL=\(root\) NOPASSWD:ALL > /etc/sudoers.d/$USERNAME\ 43 | && chmod 0440 /etc/sudoers.d/$USERNAME \ 44 | # 45 | # Clean up 46 | && apt-get autoremove -y \ 47 | && apt-get clean -y \ 48 | && rm -rf /var/lib/apt/lists/* 49 | 50 | # Switch back to dialog for any ad-hoc use of apt-get 51 | ENV DEBIAN_FRONTEND=dialog 52 | -------------------------------------------------------------------------------- /src/bayes_tutorial/solutions/estimation.py: -------------------------------------------------------------------------------- 1 | import janitor 2 | import numpy as np 3 | import pandas as pd 4 | from pyprojroot import here 5 | import pymc3 as pm 6 | import arviz as az 7 | 8 | 9 | def naive_estimate(data): 10 | estimated_p = data.join_apply( 11 | lambda x: x["num_favs"] / x["num_customers"] 12 | if x["num_customers"] > 0 13 | else np.nan, 14 | "p_hat", 15 | ) 16 | return estimated_p 17 | 18 | 19 | def assumptions(): 20 | ans = """ 21 | When we choose to represent p_hat with 0, 22 | we are oftentimes implicitly placing a strong assumption 23 | that the estimate 0 is an unbiased estimator of the true p. 24 | 25 | When we choose to represent p_hat with 0, 26 | we are implicitly placing a strong assumption 27 | that we don't have enough information to know p. 28 | 29 | Either way, we have assumed something, 30 | and there's no "objectivity" escape hatch here. 31 | """ 32 | return ans 33 | 34 | 35 | def ice_cream_store_model(data: pd.DataFrame) -> pm.Model: 36 | with pm.Model() as model: 37 | p = pm.Beta("p", alpha=2, beta=2, shape=(len(data),)) 38 | like = pm.Binomial( 39 | "like", n=data["num_customers"], p=p, observed=data["num_favs"] 40 | ) 41 | return model 42 | 43 | 44 | def posterior_quantile(trace, q): 45 | trace_reshaped = trace.posterior.stack(draws=("chain", "draw")) 46 | return trace_reshaped.quantile(q=q, dim="draws").to_dataframe() 47 | 48 | 49 | def trace_all_stores(data): 50 | with ice_cream_store_model(data): 51 | trace = pm.sample(2000) 52 | trace = az.from_pymc3(trace, coords={"p_dim_0": data["shopname"]}) 53 | return trace 54 | 55 | 56 | from daft import PGM 57 | 58 | 59 | def ice_cream_one_group_pgm(): 60 | G = PGM() 61 | G.add_node("alpha", content=r"$\alpha$", x=-1, y=1, scale=1.2, fixed=True) 62 | G.add_node("beta", content=r"$\beta$", x=1, y=1, scale=1.2, fixed=True) 63 | 64 | G.add_node("p", content="p", x=0, y=1, scale=1.2) 65 | G.add_node("likes", content="l", x=0, y=0, scale=1.2, observed=True) 66 | G.add_edge("alpha", "p") 67 | G.add_edge("beta", "p") 68 | G.add_edge("p", "likes") 69 | G.show() 70 | 71 | 72 | def ice_cream_n_group_pgm(): 73 | G = PGM() 74 | G.add_node("alpha", content=r"$\alpha$", x=-1, y=1, scale=1.2, fixed=True) 75 | G.add_node("beta", content=r"$\beta$", x=1, y=1, scale=1.2, fixed=True) 76 | 77 | G.add_node("p", content=r"$p_{i}$", x=0, y=1, scale=1.2) 78 | G.add_node("likes", content=r"$l_{i}$", x=0, y=0, scale=1.2, observed=True) 79 | G.add_edge("alpha", "p") 80 | G.add_edge("beta", "p") 81 | G.add_edge("p", "likes") 82 | G.add_plate([-0.5, -0.8, 1, 2.3], label=r"shop $i$") 83 | G.show() 84 | -------------------------------------------------------------------------------- /src/bayes_tutorial/data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from sklearn.preprocessing import LabelEncoder 3 | import janitor 4 | import numpy as np 5 | from pyprojroot import here 6 | 7 | 8 | def load_finches_2012(): 9 | path = here() / "data/finch_beaks_2012.csv" 10 | return load_finches(path) 11 | 12 | 13 | def load_finches_1975(): 14 | path = here() / "data/finch_beaks_1975.csv" 15 | df = ( 16 | load_finches(path) 17 | .rename_column("beak_length_mm", "beak_length") 18 | .rename_column("beak_depth_mm", "beak_depth") 19 | ) 20 | return df 21 | 22 | 23 | def load_finches(path): 24 | # Load the data 25 | df = ( 26 | pd.read_csv(path) 27 | .clean_names() # clean column names 28 | .rename_column( 29 | "blength", "beak_length" 30 | ) # rename blength to beak_length (readability fix) 31 | .rename_column( 32 | "bdepth", "beak_depth" 33 | ) # rename bdepth to beak_depth (readability fix) 34 | .label_encode( 35 | "species" 36 | ) # create a `species_enc` column that has the species encoded numerically 37 | ) 38 | return df 39 | 40 | 41 | def load_baseball(): 42 | df = pd.read_csv(here() / "data/baseballdb/core/Batting.csv") 43 | df["AB"] = df["AB"].replace(0, np.nan) 44 | df = df.dropna() 45 | df["batting_avg"] = df["H"] / df["AB"] 46 | df = df[df["yearID"] >= 2016] 47 | df = df.iloc[0:15] 48 | df.head(5) 49 | return df 50 | 51 | 52 | def load_sterilization(): 53 | df = ( 54 | pd.read_csv( 55 | here() / "data/sterilization.csv", 56 | na_filter=True, 57 | na_values=["#DIV/0!"], 58 | ) 59 | .clean_names() 60 | .label_encode("treatment") 61 | ) 62 | mapping = dict(zip(df["treatment"], df["treatment_enc"])) 63 | return df, mapping 64 | 65 | 66 | def load_kruschke(): 67 | df = pd.read_csv( 68 | here() / "data/iq.csv", index_col=0 69 | ).label_encode( # comment out the path to the file for students. 70 | "treatment" 71 | ) 72 | return df 73 | 74 | 75 | # Constants for load_decay 76 | tau = 71.9 # indium decay half life 77 | A = 42 # starting magnitude 78 | C = 21 # measurement error 79 | noise_scale = 1 80 | 81 | 82 | def load_decay(): 83 | t = np.arange(0, 1000) 84 | 85 | def decay_func(ts, noise): 86 | return ( 87 | A * np.exp(-t / tau) 88 | + C 89 | + np.random.normal(0, noise, size=(len(t))) 90 | ) 91 | 92 | data = {"t": t, "activity": decay_func(t, noise_scale)} 93 | df = pd.DataFrame(data) 94 | return df 95 | 96 | 97 | def load_ice_cream(): 98 | data = ( 99 | pd.read_csv(here() / "data/ice_cream_shop.csv", index_col=0) 100 | .reset_index(drop=True) 101 | .select_columns(["shopname", "num_customers", "owner_idx", "num_favs"]) 102 | ) 103 | return data 104 | -------------------------------------------------------------------------------- /src/bayes_tutorial/solutions/probability.py: -------------------------------------------------------------------------------- 1 | from scipy.stats import bernoulli, binom, multinomial 2 | import numpy as np 3 | 4 | 5 | def coin_distribution(): 6 | return { 7 | "H": 1 / 3, 8 | "T": 2 / 3, 9 | } 10 | 11 | 12 | def test_coin_distribution(): 13 | cred_points = coin_distribution().values() 14 | assert np.allclose(sum(cred_points), 1) 15 | 16 | 17 | def dice_distribution(): 18 | return { 19 | 1: 1 / 6, 20 | 2: 1 / 6, 21 | 3: 1 / 6, 22 | 4: 1 / 6, 23 | 5: 1 / 6, 24 | 6: 1 / 6, 25 | } 26 | 27 | 28 | def test_dice_distribution(): 29 | cred_points = dice_distribution().values() 30 | assert np.allclose(sum(cred_points), 1) 31 | 32 | 33 | def binomial_answer(): 34 | binomial_dist = binom(n=1, p=0.8) 35 | draws = binomial_dist.rvs(10) 36 | return draws 37 | 38 | 39 | def multinomial_answer(): 40 | multinomial_dist = multinomial(n=1, p=[1 / 6] * 6) 41 | draws = multinomial_dist.rvs(10) 42 | return draws 43 | 44 | 45 | def likelihood_coin_toss(): 46 | answer = """ 47 | This does not surprise me. 48 | Under a fair coin toss model, 49 | a continuous sequence of 0s has the same likelihood 50 | as a mixed sequence of 0s and 1s. 51 | As such, fair coin tosses are expected to be quite "clumpy"! 52 | 53 | HOWEVER... 54 | 55 | A continuous sequence of 0s should compel me 56 | to think about whether the fair coin toss model is right or not... 57 | """ 58 | return answer 59 | 60 | 61 | def fair_coin_model(): 62 | return bernoulli(0.5) 63 | 64 | 65 | def coin_data_likelihood(coin_data_1, coin_data_2): 66 | coin = fair_coin_model() 67 | return (coin.pmf(coin_data_1), coin.pmf(coin_data_2)) 68 | 69 | 70 | def coin_data_joint_likelihood(coin_data_1, coin_data_2): 71 | coin = fair_coin_model() 72 | return ( 73 | np.product(coin.pmf(coin_data_1)), 74 | np.product(coin.pmf(coin_data_2)), 75 | ) 76 | 77 | 78 | def coin_data_joint_loglikelihood(coin_data_1, coin_data_2): 79 | coin = fair_coin_model() 80 | return (np.sum(coin.logpmf(coin_data_1)), np.sum(coin.logpmf(coin_data_2))) 81 | 82 | 83 | def spaces_of_p(): 84 | answer = """ 85 | There are actually infinite number of possible Bernoullis that we could instantiate! 86 | 87 | Because there are an infinite set of numbers in the [0, 1] interval, 88 | therefore there are an infinite number of possible Bernoullis that we could instantiate. 89 | """ 90 | return answer 91 | 92 | 93 | def spaces_of_data(): 94 | answer = """ 95 | If you assume that there are no restrictions on the outcome, 96 | then there should be $2^5$ ways to configure five Bernoulli draws. 97 | 98 | More generally... 99 | 100 | First off, there's no reason why we always have to have three 1s and two 0s in five draws; 101 | it could have been five 1s or five 0s. 102 | Secondly, the order of data (though it doesn't really matter in this case) 103 | for three 1s and two 0s might well have been different. 104 | """ 105 | return answer 106 | -------------------------------------------------------------------------------- /data/baseballdb/core/ManagersHalf.csv: -------------------------------------------------------------------------------- 1 | playerID,yearID,teamID,lgID,inseason,half,G,W,L,rank 2 | hanlone01,1892,BLN,NL,3,1,56,17,39,12 3 | hanlone01,1892,BLN,NL,3,2,77,26,46,10 4 | vanhage01,1892,BLN,NL,1,1,11,1,10,12 5 | waltzjo99,1892,BLN,NL,2,1,8,2,6,12 6 | wardjo01,1892,BRO,NL,1,1,78,51,26,2 7 | wardjo01,1892,BRO,NL,1,2,80,44,33,3 8 | seleefr99,1892,BSN,NL,1,1,75,52,22,1 9 | seleefr99,1892,BSN,NL,1,2,77,50,26,2 10 | ansonca01,1892,CHN,NL,1,1,71,31,39,8 11 | ansonca01,1892,CHN,NL,1,2,76,39,37,7 12 | comisch01,1892,CIN,NL,1,1,77,44,31,4 13 | comisch01,1892,CIN,NL,1,2,78,38,37,8 14 | tebeapa01,1892,CL4,NL,1,1,74,40,33,5 15 | tebeapa01,1892,CL4,NL,1,2,79,53,23,1 16 | chapmja01,1892,LS3,NL,1,1,54,21,33,11 17 | pfefffr01,1892,LS3,NL,2,1,23,9,14,11 18 | pfefffr01,1892,LS3,NL,2,2,77,33,42,9 19 | powerpa99,1892,NY1,NL,1,1,74,31,43,10 20 | powerpa99,1892,NY1,NL,1,2,79,40,37,6 21 | wrighha01,1892,PHI,NL,1,1,77,46,30,3 22 | wrighha01,1892,PHI,NL,1,2,78,41,36,5 23 | buckeal99,1892,PIT,NL,1,1,29,15,14,6 24 | buckeal99,1892,PIT,NL,3,2,66,38,27,4 25 | burnsto01,1892,PIT,NL,2,1,47,22,25,6 26 | burnsto01,1892,PIT,NL,2,2,13,5,7,4 27 | carutbo01,1892,SLN,NL,5,2,50,16,32,11 28 | crookja01,1892,SLN,NL,3,1,47,24,22,9 29 | crookja01,1892,SLN,NL,3,2,15,3,11,11 30 | glassja01,1892,SLN,NL,1,1,4,1,3,9 31 | gorege01,1892,SLN,NL,4,2,16,6,9,11 32 | striccu01,1892,SLN,NL,2,1,23,6,17,9 33 | barnibi01,1892,WAS,NL,1,1,2,0,2,7 34 | irwinar01,1892,WAS,NL,2,1,74,35,39,7 35 | irwinar01,1892,WAS,NL,2,2,34,11,21,12 36 | richada01,1892,WAS,NL,3,2,43,12,31,12 37 | coxbo01,1981,ATL,NL,1,1,55,25,29,4 38 | coxbo01,1981,ATL,NL,1,2,52,25,27,5 39 | weaveea99,1981,BAL,AL,1,1,54,31,23,2 40 | weaveea99,1981,BAL,AL,1,2,51,28,23,4 41 | houkra01,1981,BOS,AL,1,1,56,30,26,5 42 | houkra01,1981,BOS,AL,1,2,52,29,23,2 43 | fregoji01,1981,CAL,AL,1,1,47,22,25,4 44 | mauchge01,1981,CAL,AL,2,1,13,9,4,4 45 | mauchge01,1981,CAL,AL,2,2,50,20,30,7 46 | larusto01,1981,CHA,AL,1,1,53,31,22,3 47 | larusto01,1981,CHA,AL,1,2,53,23,30,6 48 | amalfjo01,1981,CHN,NL,1,1,54,15,37,6 49 | amalfjo01,1981,CHN,NL,1,2,52,23,28,5 50 | mcnamjo99,1981,CIN,NL,1,1,56,35,21,2 51 | mcnamjo99,1981,CIN,NL,1,2,52,31,21,2 52 | garcida99,1981,CLE,AL,1,1,50,26,24,6 53 | garcida99,1981,CLE,AL,1,2,53,26,27,5 54 | andersp01,1981,DET,AL,1,1,57,31,26,4 55 | andersp01,1981,DET,AL,1,2,52,29,23,2 56 | virdobi01,1981,HOU,NL,1,1,57,28,29,3 57 | virdobi01,1981,HOU,NL,1,2,53,33,20,1 58 | freyji99,1981,KCA,AL,1,1,50,20,30,5 59 | freyji99,1981,KCA,AL,1,2,20,10,10,1 60 | howsedi01,1981,KCA,AL,2,2,33,20,13,1 61 | lasorto01,1981,LAN,NL,1,1,57,36,21,1 62 | lasorto01,1981,LAN,NL,1,2,53,27,26,4 63 | gardnbi02,1981,MIN,AL,2,1,20,6,14,7 64 | gardnbi02,1981,MIN,AL,2,2,53,24,29,4 65 | goryljo01,1981,MIN,AL,1,1,37,11,25,7 66 | rodgebu01,1981,ML4,AL,1,1,56,31,25,3 67 | rodgebu01,1981,ML4,AL,1,2,53,31,22,1 68 | fanniji01,1981,MON,NL,2,2,27,16,11,1 69 | willidi02,1981,MON,NL,1,1,55,30,25,3 70 | willidi02,1981,MON,NL,1,2,26,14,12,1 71 | lemonbo01,1981,NYA,AL,2,2,25,11,14,6 72 | michage01,1981,NYA,AL,1,1,56,34,22,1 73 | michage01,1981,NYA,AL,1,2,26,14,12,6 74 | torrejo01,1981,NYN,NL,1,1,52,17,34,5 75 | torrejo01,1981,NYN,NL,1,2,53,24,28,4 76 | martibi02,1981,OAK,AL,1,1,60,37,23,1 77 | martibi02,1981,OAK,AL,1,2,49,27,22,2 78 | greenda02,1981,PHI,NL,1,1,55,34,21,1 79 | greenda02,1981,PHI,NL,1,2,52,25,27,3 80 | tannech01,1981,PIT,NL,1,1,49,25,23,4 81 | tannech01,1981,PIT,NL,1,2,54,21,33,6 82 | howarfr01,1981,SDN,NL,1,1,56,23,33,6 83 | howarfr01,1981,SDN,NL,1,2,54,18,36,6 84 | lachere01,1981,SEA,AL,2,1,33,15,18,6 85 | lachere01,1981,SEA,AL,2,2,52,23,29,5 86 | willsma01,1981,SEA,AL,1,1,25,6,18,6 87 | robinfr02,1981,SFN,NL,1,1,59,27,32,5 88 | robinfr02,1981,SFN,NL,1,2,52,29,23,3 89 | herzowh01,1981,SLN,NL,1,1,51,30,20,2 90 | herzowh01,1981,SLN,NL,1,2,52,29,23,2 91 | zimmedo01,1981,TEX,AL,1,1,55,33,22,2 92 | zimmedo01,1981,TEX,AL,1,2,50,24,26,3 93 | mattibo01,1981,TOR,AL,1,1,58,16,42,7 94 | mattibo01,1981,TOR,AL,1,2,48,21,27,7 95 | -------------------------------------------------------------------------------- /data/baseballdb/core/TeamsFranchises.csv: -------------------------------------------------------------------------------- 1 | franchID,franchName,active,NAassoc 2 | ALT,Altoona Mountain City,N, 3 | ANA,Los Angeles Angels of Anaheim,Y, 4 | ARI,Arizona Diamondbacks,Y, 5 | ATH,Philadelphia Athletics,N,PNA 6 | ATL,Atlanta Braves,Y,BNA 7 | BAL,Baltimore Orioles,Y, 8 | BFB,Buffalo Bisons,N, 9 | BFL,Buffalo Bisons,N, 10 | BLC,Baltimore Canaries,NA, 11 | BLO,Baltimore Orioles,N, 12 | BLT,Baltimore Terrapins,N, 13 | BLU,Baltimore Monumentals,N, 14 | BNA,Boston Red Stockings,NA,ATL 15 | BOS,Boston Red Sox,Y, 16 | BRA,Brooklyn Atlantics,NA, 17 | BRD,Boston Reds,N, 18 | BRG,Brooklyn Gladiators,N, 19 | BRS,Boston Reds,N, 20 | BTT,Brooklyn Tip-Tops,N, 21 | BUF,Buffalo Bisons,N, 22 | BWW,Brooklyn Ward's Wonders,N, 23 | CBK,Columbus Buckeyes,N, 24 | CBL,Cleveland Blues,N, 25 | CEN,Philadelphia Centennials,NA, 26 | CFC,Cleveland Forest Citys,NA, 27 | CHC,Chicago Cubs,Y,CNA 28 | CHH,Chicago Whales,N, 29 | CHP,Chicago Pirates,N, 30 | CHW,Chicago White Sox,Y, 31 | CIN,Cincinnati Reds,Y, 32 | CKK,Cincinnati Kelly's Killers,N, 33 | CLE,Cleveland Indians,Y, 34 | CLI,Cleveland Infants,N, 35 | CLS,Columbus Solons,N, 36 | CLV,Cleveland Spiders,N, 37 | CNA,Chicago White Stockings,NA,CHC 38 | CNR,Cincinnati Reds,N, 39 | COL,Colorado Rockies,Y, 40 | COR,Cincinnati Outlaw Reds,N, 41 | CPI,Chicago/Pittsburgh (Union League),N, 42 | DET,Detroit Tigers,Y, 43 | DTN,Detroit Wolverines,N, 44 | ECK,Brooklyn Eckfords,NA, 45 | FLA,Florida Marlins,Y, 46 | HAR,Hartford Dark Blues,N,HNA 47 | HNA,Hartford Dark Blues,NA,HAR 48 | HOU,Houston Astros,Y, 49 | IBL,Indianapolis Blues,N, 50 | IHO,Indianapolis Hoosiers,N, 51 | IND,Indianapolis Hoosiers,N, 52 | KCC,Kansas City Cowboys,N, 53 | KCN,Kansas City Cowboys,N, 54 | KCP,Kansas City Packers,N, 55 | KCR,Kansas City Royals,Y, 56 | KCU,Kansas City Cowboys,N, 57 | KEK,Fort Wayne Kekiongas,NA, 58 | LAD,Los Angeles Dodgers,Y, 59 | LGR,Louisville Grays,N, 60 | LOU,Louisville Colonels,N, 61 | MAN,Middletown Mansfields,NA, 62 | MAR,Baltimore Marylands,NA, 63 | MIL,Milwaukee Brewers,Y, 64 | MIN,Minnesota Twins,Y, 65 | MLA,Milwaukee Brewers,N, 66 | MLG,Milwaukee Grays,N, 67 | MLU,Milwaukee Brewers,N, 68 | NAT,Washington Nationals,NA, 69 | NEW,Newark Pepper,N, 70 | NHV,New Haven Elm Citys,NA, 71 | NNA,New York Mutuals,NA,NYU 72 | NYI,New York Giants,N, 73 | NYM,New York Mets,Y, 74 | NYP,New York Metropolitans,N, 75 | NYU,New York Mutuals,N,NNA 76 | NYY,New York Yankees,Y, 77 | OAK,Oakland Athletics,Y, 78 | OLY,Washington Olympics,NA, 79 | PBB,Pittsburgh Burghers,N, 80 | PBS,Pittsburgh Rebels,N, 81 | PHA,Philadelphia Athletics,N, 82 | PHI,Philadelphia Phillies,Y, 83 | PHK,Philadelphia Keystones,N, 84 | PHQ,Philadelphia Athletics,N, 85 | PIT,Pittsburgh Pirates,Y, 86 | PNA,Philadelphia Athletics,NA,ATH 87 | PRO,Providence Grays,N, 88 | PWS,Philadelphia White Stockings,NA, 89 | RES,Elizabeth Resolutes,NA, 90 | RIC,Richmond Virginians,N, 91 | ROC,Rochester Broncos,N, 92 | ROK,Rockford Forest Citys,NA, 93 | SBS,St. Louis Brown Stockings,N,SNA 94 | SDP,San Diego Padres,Y, 95 | SEA,Seattle Mariners,Y, 96 | SFG,San Francisco Giants,Y, 97 | SLI,St. Louis Terriers,N, 98 | SLM,St. Louis Maroons,N, 99 | SLR,St. Louis Red Stockings,NA, 100 | SNA,St. Louis Brown Stockings,NA,SBS 101 | STL,St. Louis Cardinals,Y, 102 | STP,St. Paul Apostles,N, 103 | SYR,Syracuse Stars,N, 104 | SYS,Syracuse Stars,N, 105 | TBD,Tampa Bay Rays,Y, 106 | TEX,Texas Rangers,Y, 107 | TLM,Toledo Maumees,N, 108 | TOL,Toledo Blue Stockings,N, 109 | TOR,Toronto Blue Jays,Y, 110 | TRO,Troy Haymakers,NA, 111 | TRT,Troy Trojans,N, 112 | WAS,Washington Senators,N, 113 | WBL,Washington Blue Legs,NA, 114 | WES,Keokuk Westerns,NA, 115 | WIL,Wilmington Quicksteps,N, 116 | WNA,Washington Nationals,N, 117 | WNL,Washington Nationals,N, 118 | WNT,Washington Nationals,NA, 119 | WOR,Worcester Ruby Legs,N, 120 | WSN,Washington Nationals,Y, 121 | WST,Washington Statesmen,N, 122 | -------------------------------------------------------------------------------- /src/bayes_tutorial/solutions/simulation.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from typing import List 3 | import numpy as np 4 | import scipy.stats as sts 5 | from daft import PGM 6 | 7 | 8 | def coin_flip_pgm(): 9 | G = PGM() 10 | G.add_node("alpha", content=r"$\alpha$", x=-1, y=1, scale=1.2, fixed=True) 11 | G.add_node("beta", content=r"$\beta$", x=1, y=1, scale=1.2, fixed=True) 12 | G.add_node("p", content="p", x=0, y=1, scale=1.2) 13 | G.add_node("result", content="result", x=0, y=0, scale=1.2, observed=True) 14 | G.add_edge("alpha", "p") 15 | G.add_edge("beta", "p") 16 | G.add_edge("p", "result") 17 | G.show() 18 | 19 | 20 | def coin_flip_generator_v2(alpha: float, beta: float) -> np.ndarray: 21 | """ 22 | Coin flip generator for a `p` that is not precisely known. 23 | """ 24 | if alpha < 0: 25 | raise ValueError(f"alpha must be positive, but you passed in {alpha}") 26 | if beta < 0: 27 | raise ValueError(f"beta must be positive, but you passed in {beta}.") 28 | p = sts.beta(a=alpha, b=beta).rvs(1) 29 | result = sts.bernoulli(p=p).rvs(1) 30 | return result 31 | 32 | 33 | def generate_many_coin_flips( 34 | n_draws: int, alpha: float, beta: float 35 | ) -> List[int]: 36 | """ 37 | Generate n draws from the coin flip generator. 38 | """ 39 | data = [coin_flip_generator_v2(alpha, beta) for _ in range(n_draws)] 40 | return np.array(data).flatten() 41 | 42 | 43 | def coin_flip_joint_loglike(data: List[int], p: float) -> float: 44 | p_loglike = sts.beta(a=10, b=10).logpdf( 45 | p 46 | ) # evaluate guesses of `p` against the prior distribution 47 | data_loglike = sts.bernoulli(p=p).logpmf(data) 48 | 49 | return np.sum(data_loglike) + np.sum(p_loglike) 50 | 51 | 52 | def car_crash_pgm(): 53 | G = PGM() 54 | G.add_node("crashes", content="crashes", x=0, y=0, scale=1.5) 55 | G.add_node("rate", content="rate", x=0, y=1, scale=1.5) 56 | G.add_edge("rate", "crashes") 57 | G.show() 58 | 59 | 60 | def car_crash_data(): 61 | data = [1, 5, 2, 3, 8, 4, 5] 62 | return data 63 | 64 | 65 | def car_crash_loglike(rate: float, crashes: List[int]) -> float: 66 | """Evaluate likelihood of per-week car crash data points.""" 67 | rate_like = np.sum(sts.expon(scale=0.5).logpdf(rate)) 68 | crashes_like = np.sum(sts.poisson(mu=rate).logpmf(crashes)) 69 | return rate_like + crashes_like 70 | 71 | 72 | def car_crash_loglike_plot(): 73 | data = car_crash_data() 74 | _, ax = plt.subplots() 75 | rates = np.arange(0, 10, 0.1) 76 | loglike = [] 77 | for rate in rates: 78 | loglike.append(car_crash_loglike(rate, data)) 79 | ax.plot(rates, loglike) 80 | return ax 81 | 82 | 83 | def korea_pgm(): 84 | G = PGM() 85 | G.add_node("s_mean", r"$\mu_{s}$", x=0, y=1) 86 | G.add_node("s_scale", r"$\sigma_{s}$", x=1, y=1) 87 | G.add_node("s_height", r"$h_s$", x=0.5, y=0) 88 | G.add_edge("s_mean", "s_height") 89 | G.add_edge("s_scale", "s_height") 90 | 91 | G.add_node("n_mean", r"$\mu_{n}$", x=2, y=1) 92 | G.add_node("n_scale", r"$\sigma_{n}$", x=3, y=1) 93 | G.add_node("n_height", r"$h_n$", x=2.5, y=0) 94 | G.add_edge("n_mean", "n_height") 95 | G.add_edge("n_scale", "n_height") 96 | 97 | G.show() 98 | 99 | 100 | def s_korea_generator(): 101 | s_korea_mean = sts.norm(loc=180, scale=3).rvs() 102 | s_korea_scale = sts.expon(scale=1).rvs() 103 | height = sts.norm(loc=s_korea_mean, scale=s_korea_scale).rvs() 104 | return height 105 | 106 | 107 | def n_korea_generator(): 108 | n_korea_mean = sts.norm(loc=165, scale=3).rvs() 109 | n_korea_scale = sts.expon(scale=1).rvs() 110 | height = sts.norm(loc=n_korea_mean, scale=n_korea_scale).rvs() 111 | return height 112 | 113 | 114 | def s_korea_height_loglike( 115 | mean: float, scale: float, heights: List[float] 116 | ) -> float: 117 | mean_loglike = sts.norm(loc=180, scale=3).logpdf(mean) 118 | scale_loglike = sts.expon(scale=1).logpdf(scale) 119 | height_loglike = sts.norm(loc=mean, scale=scale).logpdf(heights) 120 | return ( 121 | np.sum(height_loglike) + np.sum(mean_loglike) + np.sum(scale_loglike) 122 | ) 123 | 124 | 125 | def n_korea_height_loglike( 126 | mean: float, scale: float, heights: List[float] 127 | ) -> float: 128 | mean_loglike = sts.norm(loc=165, scale=3).logpdf(mean) 129 | scale_loglike = sts.expon(scale=1).logpdf(scale) 130 | height_loglike = sts.norm(loc=mean, scale=scale).logpdf(heights) 131 | return ( 132 | np.sum(height_loglike) + np.sum(scale_loglike) + np.sum(mean_loglike) 133 | ) 134 | 135 | 136 | def joint_height_loglike( 137 | s_mean: float, 138 | s_scale: float, 139 | n_mean: float, 140 | n_scale: float, 141 | s_heights: List[int], 142 | n_heights: List[int], 143 | ) -> float: 144 | s_korea_loglike = s_korea_height_loglike(s_mean, s_scale, s_heights) 145 | n_korea_loglike = n_korea_height_loglike(n_mean, n_scale, n_heights) 146 | return s_korea_loglike + n_korea_loglike 147 | 148 | 149 | def korea_height_generator( 150 | mean_loc: float, mean_scale: float, scale_loc: float 151 | ) -> float: 152 | mean = sts.norm(loc=mean_loc, scale=mean_scale).rvs() 153 | scale = sts.expon(loc=scale_loc).rvs() 154 | height = sts.norm(loc=mean, scale=scale).rvs() 155 | return height 156 | 157 | 158 | def s_korea_height_data(): 159 | return [korea_height_generator(175, 0.3, 5) for _ in range(1000)] 160 | 161 | 162 | def n_korea_height_data(): 163 | return [korea_height_generator(165, 0.2, 3) for _ in range(1000)] 164 | -------------------------------------------------------------------------------- /notebooks/archive/07-student-hierarchical-finches.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "This notebook is designed to be the \"exercise\" notebook for you to practice defining hierarchical models. We will do this with the finches dataset again." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import pymc3 as pm\n", 20 | "from data import load_finches_2012\n", 21 | "import arviz as az\n", 22 | "\n", 23 | "%load_ext autoreload\n", 24 | "%autoreload 2\n", 25 | "%matplotlib inline\n", 26 | "%config InlineBackend.figure_format = 'retina'" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "df = load_finches_2012()\n", 36 | "df.groupby('species').size()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "df.sample(5)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "df.groupby('species')['beak_depth'].describe()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": null, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "fortis_filter = df['species'] == 'fortis'\n", 64 | "scandens_filter = df['species'] == 'scandens'\n", 65 | "unknown_filter = df['species'] == 'unknown'" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "**Exercise:** Define a hierarchical model for the finches beak depths. For bonus points, use NumPy-like fancy indexing!\n", 73 | "\n", 74 | "If you'd like a hint, one possible model you can implement is shown below.\n", 75 | "\n", 76 | "" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "with pm.Model() as beak_depth_model:\n", 86 | " # SD can only be positive, therefore it is reasonable to constrain to >0\n", 87 | " # Likewise for betas.\n", 88 | " sd_hyper = pm._________('sd_hyper', _________)\n", 89 | " beta_hyper = pm._________('beta_hyper', _________)\n", 90 | " \n", 91 | " # Beaks cannot be of \"negative\" mean, therefore, HalfNormal is \n", 92 | " # a reasonable, constrained prior.\n", 93 | " mean = pm._________('mean', _________, shape=_________)\n", 94 | " sd = pm._________('sd', _________, shape=_________)\n", 95 | " nu = pm._________('nu', _________) + 1\n", 96 | " \n", 97 | " # Define the likelihood distribution for the data.\n", 98 | " " 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "Sample from the posterior distribution!" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# Your code below.\n" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": {}, 120 | "source": [ 121 | "Visualize the traceplots to check for convergence." 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": {}, 128 | "outputs": [], 129 | "source": [ 130 | "# Your code below\n" 131 | ] 132 | }, 133 | { 134 | "cell_type": "markdown", 135 | "metadata": {}, 136 | "source": [ 137 | "Visualize the posterior distributions using the `plot_posterior` or `forestplot` functions." 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": {}, 144 | "outputs": [], 145 | "source": [ 146 | "ax1, ax2, ax3 = pm.plot_posterior(trace, varnames=['mean'])\n", 147 | "ax1.set_title('fortis')\n", 148 | "ax2.set_title('scandens')\n", 149 | "ax3.set_title('unknown')" 150 | ] 151 | }, 152 | { 153 | "cell_type": "markdown", 154 | "metadata": {}, 155 | "source": [ 156 | "Now, repeat the model specification for beak length." 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "# Model definition" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "# Sample from posterior" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "# Check for convergence" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# Plot posterior distribution" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "**Discuss:** \n", 200 | "\n", 201 | "- Are the estimates for the unknown species' beak depth and beak length more reasonable? How so?" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [] 210 | } 211 | ], 212 | "metadata": { 213 | "kernelspec": { 214 | "display_name": "bayesian-modelling-tutorial", 215 | "language": "python", 216 | "name": "bayesian-modelling-tutorial" 217 | }, 218 | "language_info": { 219 | "codemirror_mode": { 220 | "name": "ipython", 221 | "version": 3 222 | }, 223 | "file_extension": ".py", 224 | "mimetype": "text/x-python", 225 | "name": "python", 226 | "nbconvert_exporter": "python", 227 | "pygments_lexer": "ipython3", 228 | "version": "3.7.2" 229 | } 230 | }, 231 | "nbformat": 4, 232 | "nbformat_minor": 2 233 | } 234 | -------------------------------------------------------------------------------- /docs/pandoc.css: -------------------------------------------------------------------------------- 1 | /* 2 | * I add this to html files generated with pandoc. 3 | */ 4 | 5 | html { 6 | font-size: 100%; 7 | overflow-y: scroll; 8 | -webkit-text-size-adjust: 100%; 9 | -ms-text-size-adjust: 100%; 10 | } 11 | 12 | body { 13 | color: #444; 14 | font-family: Georgia, Palatino, 'Palatino Linotype', Times, 'Times New Roman', serif; 15 | font-size: 12px; 16 | line-height: 1.7; 17 | padding: 1em; 18 | margin: auto; 19 | max-width: 42em; 20 | background: #fefefe; 21 | } 22 | 23 | a { 24 | color: #0645ad; 25 | text-decoration: none; 26 | } 27 | 28 | a:visited { 29 | color: #0b0080; 30 | } 31 | 32 | a:hover { 33 | color: #06e; 34 | } 35 | 36 | a:active { 37 | color: #faa700; 38 | } 39 | 40 | a:focus { 41 | outline: thin dotted; 42 | } 43 | 44 | *::-moz-selection { 45 | background: rgba(255, 255, 0, 0.3); 46 | color: #000; 47 | } 48 | 49 | *::selection { 50 | background: rgba(255, 255, 0, 0.3); 51 | color: #000; 52 | } 53 | 54 | a::-moz-selection { 55 | background: rgba(255, 255, 0, 0.3); 56 | color: #0645ad; 57 | } 58 | 59 | a::selection { 60 | background: rgba(255, 255, 0, 0.3); 61 | color: #0645ad; 62 | } 63 | 64 | p { 65 | margin: 1em 0; 66 | } 67 | 68 | img { 69 | max-width: 100%; 70 | } 71 | 72 | h1, h2, h3, h4, h5, h6 { 73 | color: #111; 74 | line-height: 125%; 75 | margin-top: 2em; 76 | font-weight: normal; 77 | } 78 | 79 | h4, h5, h6 { 80 | font-weight: bold; 81 | } 82 | 83 | h1 { 84 | font-size: 2.5em; 85 | } 86 | 87 | h2 { 88 | font-size: 2em; 89 | } 90 | 91 | h3 { 92 | font-size: 1.5em; 93 | } 94 | 95 | h4 { 96 | font-size: 1.2em; 97 | } 98 | 99 | h5 { 100 | font-size: 1em; 101 | } 102 | 103 | h6 { 104 | font-size: 0.9em; 105 | } 106 | 107 | blockquote { 108 | color: #666666; 109 | margin: 0; 110 | padding-left: 3em; 111 | border-left: 0.5em #EEE solid; 112 | } 113 | 114 | hr { 115 | display: block; 116 | height: 2px; 117 | border: 0; 118 | border-top: 1px solid #aaa; 119 | border-bottom: 1px solid #eee; 120 | margin: 1em 0; 121 | padding: 0; 122 | } 123 | 124 | pre, code, kbd, samp { 125 | color: #000; 126 | font-family: monospace, monospace; 127 | _font-family: 'courier new', monospace; 128 | font-size: 0.98em; 129 | } 130 | 131 | pre { 132 | white-space: pre; 133 | white-space: pre-wrap; 134 | word-wrap: break-word; 135 | } 136 | 137 | b, strong { 138 | font-weight: bold; 139 | } 140 | 141 | dfn { 142 | font-style: italic; 143 | } 144 | 145 | ins { 146 | background: #ff9; 147 | color: #000; 148 | text-decoration: none; 149 | } 150 | 151 | mark { 152 | background: #ff0; 153 | color: #000; 154 | font-style: italic; 155 | font-weight: bold; 156 | } 157 | 158 | sub, sup { 159 | font-size: 75%; 160 | line-height: 0; 161 | position: relative; 162 | vertical-align: baseline; 163 | } 164 | 165 | sup { 166 | top: -0.5em; 167 | } 168 | 169 | sub { 170 | bottom: -0.25em; 171 | } 172 | 173 | ul, ol { 174 | margin: 1em 0; 175 | padding: 0 0 0 2em; 176 | } 177 | 178 | li p:last-child { 179 | margin-bottom: 0; 180 | } 181 | 182 | ul ul, ol ol { 183 | margin: .3em 0; 184 | } 185 | 186 | dl { 187 | margin-bottom: 1em; 188 | } 189 | 190 | dt { 191 | font-weight: bold; 192 | margin-bottom: .8em; 193 | } 194 | 195 | dd { 196 | margin: 0 0 .8em 2em; 197 | } 198 | 199 | dd:last-child { 200 | margin-bottom: 0; 201 | } 202 | 203 | img { 204 | border: 0; 205 | -ms-interpolation-mode: bicubic; 206 | vertical-align: middle; 207 | } 208 | 209 | figure { 210 | display: block; 211 | text-align: center; 212 | margin: 1em 0; 213 | } 214 | 215 | figure img { 216 | border: none; 217 | margin: 0 auto; 218 | } 219 | 220 | figcaption { 221 | font-size: 0.8em; 222 | font-style: italic; 223 | margin: 0 0 .8em; 224 | } 225 | 226 | table { 227 | margin-bottom: 2em; 228 | border-bottom: 1px solid #ddd; 229 | border-right: 1px solid #ddd; 230 | border-spacing: 0; 231 | border-collapse: collapse; 232 | } 233 | 234 | table th { 235 | padding: .2em 1em; 236 | background-color: #eee; 237 | border-top: 1px solid #ddd; 238 | border-left: 1px solid #ddd; 239 | } 240 | 241 | table td { 242 | padding: .2em 1em; 243 | border-top: 1px solid #ddd; 244 | border-left: 1px solid #ddd; 245 | vertical-align: top; 246 | } 247 | 248 | .author { 249 | font-size: 1.2em; 250 | text-align: center; 251 | } 252 | 253 | @media only screen and (min-width: 480px) { 254 | body { 255 | font-size: 14px; 256 | } 257 | } 258 | @media only screen and (min-width: 768px) { 259 | body { 260 | font-size: 16px; 261 | } 262 | } 263 | @media print { 264 | * { 265 | background: transparent !important; 266 | color: black !important; 267 | filter: none !important; 268 | -ms-filter: none !important; 269 | } 270 | 271 | body { 272 | font-size: 12pt; 273 | max-width: 100%; 274 | } 275 | 276 | a, a:visited { 277 | text-decoration: underline; 278 | } 279 | 280 | hr { 281 | height: 1px; 282 | border: 0; 283 | border-bottom: 1px solid black; 284 | } 285 | 286 | a[href]:after { 287 | content: " (" attr(href) ")"; 288 | } 289 | 290 | abbr[title]:after { 291 | content: " (" attr(title) ")"; 292 | } 293 | 294 | .ir a:after, a[href^="javascript:"]:after, a[href^="#"]:after { 295 | content: ""; 296 | } 297 | 298 | pre, blockquote { 299 | border: 1px solid #999; 300 | padding-right: 1em; 301 | page-break-inside: avoid; 302 | } 303 | 304 | tr, img { 305 | page-break-inside: avoid; 306 | } 307 | 308 | img { 309 | max-width: 100% !important; 310 | } 311 | 312 | @page :left { 313 | margin: 15mm 20mm 15mm 10mm; 314 | } 315 | 316 | @page :right { 317 | margin: 15mm 10mm 15mm 20mm; 318 | } 319 | 320 | p, h2, h3 { 321 | orphans: 3; 322 | widows: 3; 323 | } 324 | 325 | h2, h3 { 326 | page-break-after: avoid; 327 | } 328 | } 329 | -------------------------------------------------------------------------------- /src/bayes_tutorial/solutions/hierarchical.py: -------------------------------------------------------------------------------- 1 | from daft import PGM, Node 2 | 3 | 4 | def hierarchical_p(): 5 | """A naive representation of the hierarchical p that we desire.""" 6 | G = PGM() 7 | G.add_node("p_shop", content=r"$p_{j, i}$", x=1, y=2, scale=1.2) 8 | G.add_node( 9 | "likes", content="$l_{j, i}$", x=1, y=1, scale=1.2, observed=True 10 | ) 11 | G.add_node("p_owner", content=r"$p_{j}$", x=1, y=3, scale=1.2) 12 | G.add_node("p_pop", content=r"$p$", x=1, y=4, scale=1.2) 13 | 14 | G.add_edge("p_pop", "p_owner") 15 | G.add_edge("p_owner", "p_shop") 16 | G.add_edge("p_shop", "likes") 17 | 18 | G.add_plate(plate=[0.3, 0.3, 1.5, 2.2], label=r"shop $i$") 19 | G.add_plate(plate=[0, -0.1, 2.1, 3.6], label=r"owner $j$") 20 | 21 | G.render() 22 | 23 | 24 | def convoluted_hierarchical_p(): 25 | G = PGM() 26 | G.add_node( 27 | "likes", content="$l_{j, i}$", x=1, y=1, scale=1.2, observed=True 28 | ) 29 | G.add_node("p_shop", content="$p_{j, i}$", x=1, y=2, scale=1.2) 30 | G.add_node("alpha_owner", content=r"$\alpha_{j}$", x=0, y=3, scale=1.2) 31 | G.add_node("beta_owner", content=r"$\beta_{j}$", x=2, y=3, scale=1.2) 32 | G.add_node( 33 | "lambda_a_pop", content=r"$\lambda_{\alpha}$", x=0, y=4, scale=1.2 34 | ) 35 | G.add_node( 36 | "lambda_b_pop", content=r"$\lambda_{\beta}$", x=2, y=4, scale=1.2 37 | ) 38 | G.add_node( 39 | "tau_lambda_a", 40 | content=r"$\tau_{\lambda_{\alpha}}$", 41 | x=0, 42 | y=5, 43 | fixed=True, 44 | ) 45 | G.add_node( 46 | "tau_lambda_b", 47 | content=r"$\tau_{\lambda_{\beta}}$", 48 | x=2, 49 | y=5, 50 | fixed=True, 51 | ) 52 | 53 | G.add_edge("alpha_owner", "p_shop") 54 | G.add_edge("beta_owner", "p_shop") 55 | G.add_edge("p_shop", "likes") 56 | G.add_edge("lambda_a_pop", "alpha_owner") 57 | G.add_edge("lambda_b_pop", "beta_owner") 58 | G.add_edge("tau_lambda_a", "lambda_a_pop") 59 | G.add_edge("tau_lambda_b", "lambda_b_pop") 60 | 61 | G.add_plate(plate=[0.5, 0.2, 1, 2.3], label=r"shop $i$") 62 | G.add_plate(plate=[-0.5, 0, 3, 3.5], label=r"owner $j$") 63 | G.render() 64 | 65 | 66 | import matplotlib.pyplot as plt 67 | from scipy.stats import norm 68 | from scipy.special import expit 69 | import numpy as np 70 | import seaborn as sns 71 | 72 | 73 | def plot_mu_p(mu, sigma): 74 | xs = np.linspace(mu - sigma * 4, mu + sigma * 4, 1000) 75 | ys = norm(loc=mu, scale=sigma).pdf(xs) 76 | 77 | fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4), sharey=True) 78 | ax[0].plot(xs, ys) 79 | ax[0].set_xlabel(r"$\mu$") 80 | ax[0].set_ylabel("PDF") 81 | ax[0].axvline(x=0, color="red") 82 | ax[0].set_title("Gaussian Space") 83 | ax[1].plot(expit(xs), ys) 84 | ax[1].set_xlim(0, 1) 85 | ax[1].set_xlabel(r"p = invlogit($\mu$)") 86 | ax[1].set_title("Bounded space") 87 | sns.despine() 88 | plt.show() 89 | 90 | 91 | def hierarchical_pgm(): 92 | G = PGM() 93 | 94 | tfm_plot_params = {"ec": "red"} 95 | 96 | G.add_node( 97 | "likes", content=r"$l_{j,i}$", x=0, y=0, scale=1.2, observed=True 98 | ) 99 | G.add_node( 100 | "p_shop", 101 | content=r"$p_{j,i}$", 102 | x=0, 103 | y=1, 104 | scale=1.2, 105 | plot_params=tfm_plot_params, 106 | ) 107 | G.add_node("mu_shop", content=r"$\mu_{j,i}$", x=1, y=1, scale=1.2) 108 | G.add_node("mu_owner", content=r"$\mu_{j}$", x=1, y=2, scale=1.2) 109 | G.add_node("sigma_owner", content=r"$\sigma_{j}$", x=2, y=2, scale=1.2) 110 | G.add_node( 111 | "p_owner", 112 | content=r"$p_{j}$", 113 | x=0, 114 | y=2, 115 | scale=1.2, 116 | plot_params=tfm_plot_params, 117 | ) 118 | G.add_node("mu_population", content=r"$\mu$", x=1, y=3, scale=1.2) 119 | G.add_node( 120 | "sigma_population", 121 | content=r"$\sigma$", 122 | x=2, 123 | y=3, 124 | scale=1.2, 125 | fixed=True, 126 | ) 127 | G.add_node( 128 | "p_population", 129 | content="p", 130 | x=0, 131 | y=3, 132 | scale=1.2, 133 | plot_params=tfm_plot_params, 134 | ) 135 | G.add_node("lambda", content=r"$\lambda$", x=3, y=2, scale=1.2, fixed=True) 136 | G.add_node( 137 | "mean_population", content="mean", x=1, y=4, scale=1.2, fixed=True 138 | ) 139 | G.add_node( 140 | "variance_population", 141 | content="variance", 142 | x=2, 143 | y=4, 144 | scale=1.2, 145 | fixed=True, 146 | ) 147 | 148 | G.add_edge("mu_shop", "p_shop") 149 | G.add_edge("p_shop", "likes") 150 | G.add_edge("mu_owner", "mu_shop") 151 | G.add_edge("sigma_owner", "mu_shop") 152 | G.add_edge("mu_owner", "p_owner") 153 | G.add_edge("mu_population", "mu_owner") 154 | G.add_edge("sigma_population", "mu_owner") 155 | G.add_edge("mu_population", "p_population") 156 | G.add_edge("lambda", "sigma_owner") 157 | G.add_edge("mean_population", "mu_population") 158 | G.add_edge("variance_population", "mu_population") 159 | 160 | G.add_plate([-0.5, -0.5, 2, 2], label="shop $i$", position="bottom right") 161 | G.add_plate( 162 | [-0.7, -0.7, 3.2, 3.2], label="owner $j$", position="bottom right" 163 | ) 164 | 165 | G.render() 166 | 167 | 168 | import pymc3 as pm 169 | 170 | 171 | def ice_cream_hierarchical_model(data): 172 | """Hierarchical model for ice cream shops""" 173 | n_owners = len(data["owner_idx"].unique()) 174 | with pm.Model() as model: 175 | logit_p_overall = pm.Normal("logit_p_overall", mu=0, sigma=1) 176 | logit_p_owner_mean = pm.Normal( 177 | "logit_p_owner_mean", 178 | mu=logit_p_overall, 179 | sigma=1, 180 | shape=(n_owners,), 181 | ) 182 | logit_p_owner_scale = pm.Exponential( 183 | "logit_p_owner_scale", lam=1 / 5.0, shape=(n_owners,) 184 | ) 185 | logit_p_shop = pm.Normal( 186 | "logit_p_shop", 187 | mu=logit_p_owner_mean[data["owner_idx"]], 188 | sigma=logit_p_owner_scale[data["owner_idx"]], 189 | shape=(len(data),), 190 | ) 191 | 192 | p_overall = pm.Deterministic("p_overall", pm.invlogit(logit_p_overall)) 193 | p_shop = pm.Deterministic("p_shop", pm.invlogit(logit_p_shop)) 194 | p_owner = pm.Deterministic("p_owner", pm.invlogit(logit_p_owner_mean)) 195 | like = pm.Binomial( 196 | "like", 197 | n=data["num_customers"], 198 | p=p_shop, 199 | observed=data["num_favs"], 200 | ) 201 | return model 202 | -------------------------------------------------------------------------------- /data/finch_beaks_2012.csv: -------------------------------------------------------------------------------- 1 | band,species,blength,bdepth 2 | 19022,fortis,10,8.5 3 | 19028,fortis,12.5,8.9 4 | 19032,fortis,9.3,7.5 5 | 19041,fortis,10.3,9.6 6 | 19044,fortis,11,9.2 7 | 19048,fortis,10.1,8.2 8 | 19072,fortis,9.6,7.8 9 | 19082,fortis,10.9,8.6 10 | 19104,fortis,10.3,8.4 11 | 19114,fortis,9.8,7.7 12 | 19121,fortis,10.1,8 13 | 19126,fortis,10.4,8.7 14 | 19146,fortis,9.6,8.1 15 | 19164,fortis,10.6,8.8 16 | 19174,fortis,10.6,9.4 17 | 19203,fortis,11.9,10 18 | 19210,fortis,11.3,9.6 19 | 19217,fortis,11.3,9.6 20 | 19224,fortis,9.7,8.1 21 | 19226,fortis,9.7,7.5 22 | 19252,fortis,10.1,8.4 23 | 19263,fortis,10,7.9 24 | 19274,fortis,10,8.3 25 | 19280,fortis,10,8.9 26 | 19288,fortis,11.5,9.1 27 | 19328,fortis,9.5,7.7 28 | 19349,fortis,11.2,8.3 29 | 19362,fortis,10.7,8.6 30 | 19372,fortis,10,8.4 31 | 19382,fortis,9.8,7.7 32 | 19384,fortis,10.9,9.1 33 | 19392,fortis,9.2,7.7 34 | 19394,fortis,10.2,9 35 | 19422,fortis,11.3,10.2 36 | 19439,fortis,10.3,8.1 37 | 19461,fortis,10.7,8.6 38 | 19482,fortis,10,8.4 39 | 19502,fortis,9.7,8.1 40 | 19511,fortis,9.9,8 41 | 19536,fortis,10.7,8.3 42 | 19563,fortis,11,10.3 43 | 19568,fortis,9.7,8 44 | 19602,fortis,10.5,8.8 45 | 19604,fortis,11.7,9 46 | 19614,fortis,10.8,9.3 47 | 19623,fortis,9.1,7.6 48 | 19627,fortis,10.9,8.2 49 | 19642,fortis,12.2,10 50 | 19649,fortis,10.9,8.2 51 | 19654,fortis,10.7,8.2 52 | 19674,fortis,10.4,8.3 53 | 19682,fortis,9.7,7.8 54 | 19712,fortis,10.3,8 55 | 19734,fortis,9.8,8.4 56 | 19746,fortis,10.6,9.3 57 | 19749,fortis,10.5,8.9 58 | 19774,fortis,12.5,9.7 59 | 19782,fortis,9.2,7.6 60 | 19815,fortis,10.1,9 61 | 19820,fortis,10.6,8.6 62 | 19821,fortis,11.5,8.9 63 | 19829,fortis,10.8,8.6 64 | 19832,fortis,10.5,8.8 65 | 19835,fortis,10.1,8.5 66 | 19840,fortis,10.7,9.2 67 | 19849,fortis,9.6,8 68 | 19874,fortis,10.7,9.4 69 | 19878,fortis,10.1,7.7 70 | 19889,fortis,10,8.4 71 | 19914,fortis,10,8.5 72 | 19921,fortis,10.4,9 73 | 19922,fortis,10.4,9 74 | 19928,fortis,11.7,9.3 75 | 19932,fortis,10.6,8.7 76 | 19942,fortis,11.5,8.1 77 | 19946,fortis,10.7,8.7 78 | 19947,fortis,10.4,7.9 79 | 19952,fortis,10.1,7.7 80 | 19974,fortis,10.8,8.4 81 | 19993,fortis,11.5,9.25 82 | 19994,fortis,11.1,8.1 83 | 21049,fortis,9.9,8.3 84 | 21052,fortis,10.5,8.5 85 | 21080,fortis,12.2,9.9 86 | 21082,fortis,10.9,8.4 87 | 21087,fortis,12.9,9.9 88 | 21088,fortis,9.7,7.2 89 | 21089,fortis,9.7,8.2 90 | 21090,fortis,10,8.2 91 | 21129,fortis,10.1,8.3 92 | 21160,fortis,10.2,8.4 93 | 21161,fortis,9.6,7.5 94 | 21162,fortis,9.9,8.2 95 | 21165,fortis,10.6,8.6 96 | 21169,fortis,9.8,8.2 97 | 21191,fortis,11.1,8.8 98 | 21244,fortis,10,8.5 99 | 21247,fortis,10.9,8.1 100 | 21249,fortis,10,8.3 101 | 21258,fortis,10.6,8.5 102 | 21259,fortis,10.7,8.1 103 | 21261,fortis,9.4,7.3 104 | 21262,fortis,10.1,8 105 | 21265,fortis,11.8,10.2 106 | 21266,fortis,12.2,11.1 107 | 21272,fortis,12.9,9.9 108 | 21273,fortis,10.1,8.7 109 | 21276,fortis,10.1,8.3 110 | 21277,fortis,9,7.8 111 | 21282,fortis,11.7,9.9 112 | 21283,fortis,10.9,10.3 113 | 21287,fortis,10.4,8.4 114 | 21293,fortis,12.7,8.7 115 | 21294,fortis,10.5,9.8 116 | 21296,fortis,9.6,8.7 117 | 21298,fortis,10.6,9 118 | 21299,fortis,10.4,7.8 119 | 21341,fortis,10.5,8.5 120 | 21343,fortis,10.1,8.2 121 | 21349,fortis,10.6,9.2 122 | 22000,fortis,10.6,9 123 | 19026,scandens,14.3,9.4 124 | 19028,scandens,12.5,8.9 125 | 19029,scandens,13.7,9.5 126 | 19094,scandens,13.8,11 127 | 19122,scandens,12,8.7 128 | 19125,scandens,13,8.4 129 | 19129,scandens,13,9.1 130 | 19172,scandens,13.6,8.7 131 | 19182,scandens,12.8,10.2 132 | 19212,scandens,13.6,9.6 133 | 19214,scandens,12.95,8.85 134 | 19244,scandens,13.1,8.8 135 | 19251,scandens,13.4,9.5 136 | 19260,scandens,13.9,9.2 137 | 19270,scandens,12.3,9 138 | 19278,scandens,14,9.8 139 | 19289,scandens,12.5,9.3 140 | 19299,scandens,12.3,9 141 | 19312,scandens,13.9,10.2 142 | 19326,scandens,13.1,7.7 143 | 19343,scandens,12.5,9 144 | 19374,scandens,13.9,9.5 145 | 19401,scandens,13.7,9.4 146 | 19406,scandens,12,8 147 | 19408,scandens,14.4,8.9 148 | 19426,scandens,13.5,9.4 149 | 19430,scandens,13.8,9.5 150 | 19433,scandens,13,8 151 | 19438,scandens,14.9,10 152 | 19452,scandens,12.5,8.95 153 | 19466,scandens,12.3,8.2 154 | 19469,scandens,12.8,8.8 155 | 19486,scandens,13.4,9.2 156 | 19492,scandens,13.8,9.4 157 | 19493,scandens,13.5,9.5 158 | 19494,scandens,13.5,8.1 159 | 19495,scandens,13.4,9.5 160 | 19496,scandens,12.3,8.4 161 | 19497,scandens,14.35,9.3 162 | 19510,scandens,13.2,9.3 163 | 19513,scandens,13.8,9.6 164 | 19518,scandens,14.6,9.2 165 | 19526,scandens,14.3,10 166 | 19527,scandens,13.8,8.9 167 | 19528,scandens,13.6,10.5 168 | 19543,scandens,12.9,8.9 169 | 19553,scandens,13,8.6 170 | 19554,scandens,13.5,8.8 171 | 19573,scandens,13.2,9.15 172 | 19592,scandens,13.7,9.5 173 | 19594,scandens,13.1,9.1 174 | 19597,scandens,13.2,10.2 175 | 19598,scandens,12.6,8.4 176 | 19599,scandens,13,10 177 | 19619,scandens,13.9,10.2 178 | 19622,scandens,13.2,9.3 179 | 19652,scandens,15,10.8 180 | 19653,scandens,13.37,8.3 181 | 19664,scandens,11.4,7.8 182 | 19692,scandens,13.8,9.8 183 | 19720,scandens,13,7.9 184 | 19740,scandens,13,8.9 185 | 19747,scandens,13.1,7.7 186 | 19766,scandens,12.8,8.9 187 | 19783,scandens,13.3,9.4 188 | 19844,scandens,13.5,9.4 189 | 19848,scandens,12.4,8.5 190 | 19852,scandens,13.1,8.5 191 | 19854,scandens,14,9.6 192 | 19855,scandens,13.5,10.2 193 | 19868,scandens,11.8,8.8 194 | 19882,scandens,13.7,9.5 195 | 19900,scandens,13.2,9.3 196 | 19910,scandens,12.2,9 197 | 19936,scandens,13,9.2 198 | 19940,scandens,13.1,8.7 199 | 19941,scandens,14.7,9 200 | 19951,scandens,13.7,9.1 201 | 19955,scandens,13.5,8.7 202 | 19956,scandens,13.3,9.4 203 | 21040,scandens,14.1,9.8 204 | 21041,scandens,12.5,8.6 205 | 21045,scandens,13.7,10.6 206 | 21047,scandens,14.6,9 207 | 21053,scandens,14.1,9.5 208 | 21057,scandens,12.9,8.1 209 | 21070,scandens,13.9,9.3 210 | 21081,scandens,13.4,9.6 211 | 21092,scandens,13,8.5 212 | 21093,scandens,12.7,8.2 213 | 21106,scandens,12.1,8 214 | 21109,scandens,14,9.5 215 | 21111,scandens,14.9,9.7 216 | 21113,scandens,13.9,9.9 217 | 21131,scandens,12.9,9.1 218 | 21135,scandens,14.6,9.5 219 | 21136,scandens,14,9.8 220 | 21159,scandens,13,8.4 221 | 21167,scandens,12.7,8.3 222 | 21176,scandens,14,9.6 223 | 21248,scandens,14.1,9.4 224 | 21253,scandens,14.1,10 225 | 21255,scandens,13,8.9 226 | 21256,scandens,13.5,9.1 227 | 21257,scandens,13.4,9.8 228 | 21260,scandens,13.9,9.3 229 | 21263,scandens,13.1,9.9 230 | 21267,scandens,12.9,8.9 231 | 21268,scandens,14,8.5 232 | 21270,scandens,14,10.6 233 | 21271,scandens,14.1,9.3 234 | 21278,scandens,14.7,8.9 235 | 21279,scandens,13.4,8.9 236 | 21280,scandens,13.8,9.7 237 | 21281,scandens,13.4,9.8 238 | 21285,scandens,13.8,10.5 239 | 21286,scandens,12.4,8.4 240 | 21288,scandens,14.1,10 241 | 21289,scandens,12.9,9 242 | 21290,scandens,13.9,8.7 243 | 21291,scandens,14.3,8.8 244 | 21292,scandens,13.2,8.4 245 | 21295,scandens,14.2,9.3 246 | 21297,scandens,13,9.8 247 | 21340,scandens,14.6,8.9 248 | 21342,scandens,13.1,9.8 249 | 21347,scandens,15.2,9.1 250 | 99999,unknown,19.3,6.8 251 | -------------------------------------------------------------------------------- /notebooks/05-student-bayesian-curve-regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pymc3 as pm\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import numpy as np\n", 12 | "from utils import ECDF\n", 13 | "from data import load_decay\n", 14 | "import pandas as pd\n", 15 | "import theano.tensor as tt\n", 16 | "import arviz as az\n", 17 | "\n", 18 | "%load_ext autoreload\n", 19 | "%autoreload 2\n", 20 | "%matplotlib inline\n", 21 | "%config InlineBackend.figure_format = 'retina'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Arbitrary Curve Regression\n", 29 | "\n", 30 | "Now that you've learned about Bayesian estimation, we're going to explore one more topic: Bayesian curve fitting.\n", 31 | "\n", 32 | "By \"curve fitting\", we're really talking about any curve: those that are bendy, those that are straight, and those that are in between. \n", 33 | "\n", 34 | "In order to reinforce this point, rather than show you plain vanilla linear regression, we will work through an exponential decay curve example." 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Problem Setup\n", 42 | "\n", 43 | "You've taken radioactive decay measurements of an unknown element in a secure facility. The measurements are noisy, though, and potentially have some bias. In the face of this, we would like to be able to characterize the decay constant of this unknown material, potentially leading to an identification of the material." 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "### Load Data\n", 51 | "\n", 52 | "Let's load in the data." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "np.random.seed(42)\n", 62 | "\n", 63 | "df = load_decay()\n", 64 | "df.head(5)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "markdown", 69 | "metadata": {}, 70 | "source": [ 71 | "Plot `activity` vs. `time`." 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "ax = df['activity'].plot()" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "### Discussion\n", 88 | "\n", 89 | "- For the scenario that we're in, what is a plausible equation that links time to activity?\n", 90 | "- What are the key parameters that we need to worry about?\n", 91 | "- What might be justifiable priors for them?" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "### Link Functions\n", 99 | "\n", 100 | "If we were to draw out a model for the curve above, how might it look like?\n", 101 | "\n", 102 | "(To reveal one possible model, double-click on this Markdown cell and remove the `z` from the end of the filename.)\n", 103 | "\n", 104 | "\n", 105 | "\n", 106 | "The most important part of this diagram is the \"link function\" - this is what \"links\" the data to the output. In this case, we've used the exponential decay curve as the link function, but if you were doing a linear regression model, all you would have to do is to change the link function for the $y=mx+c$ \"straight curve\", and do another curve fit with the appropriate priors for $m$ and $c$.\n", 107 | "\n", 108 | "If you're familiar with the mathematical groundings of deep learning, you'll immediately recognize that a deep neural network model is merely another instance of a really complicated link function that links the input data $x$ to the observed data $y$, with the model weights and biases corresponding to the parameters (let's collectively call this set of parameters $\\theta$)." 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "### Hands-on: Model Implementation\n", 116 | "\n", 117 | "Now that you've seen a pictorial description of the model, implement it below in PyMC3." 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "with pm.Model() as model:\n", 127 | " # Put the model here.\n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " " 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "### Sample from the Posterior" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "# Put your code here\n" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "### Visual Diagnostics\n", 159 | "\n", 160 | "Check that sampling has converged." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "az.plot_trace(trace);" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "az.plot_posterior(trace);" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "# Summary\n", 186 | "\n", 187 | "- In lieu of showing you a \"straight curve\" (line) fit, you've now seen an arbitrary curve fit.\n", 188 | "- As long as you can find a way to parameterize the curve with a function, you can perform inference on the curve's parameters.\n", 189 | "- That function is called the \"link function\", which provides the link between the parameters, data and the output.\n", 190 | "\n", 191 | "More generally, if\n", 192 | "\n", 193 | "$$y = f(x, \\theta)$$\n", 194 | "\n", 195 | "where $\\theta$ are merely a set of parameters, then you can perform inference on the curve's parameters $\\theta$. To make this clear:\n", 196 | "\n", 197 | "| curve name | functional form | parameters |\n", 198 | "|------------|-----------------|---------------------|\n", 199 | "| exponential decay | $y = Ae^{-t/\\tau} + C$ | $A$, $\\tau$, $C$|\n", 200 | "| sine curves | $y = A\\sin(\\omega x + \\phi)$ | $A$, $\\omega$, $\\phi$ |\n", 201 | "| linear regression | $y = mx + c$ | $m$, $c$ |\n", 202 | "| logistic regression | $y = L(mx + c)$ | $m$, $c$ |\n", 203 | "| 4-parameter IC50 | $y = \\frac{a - i}{1 + 10^{\\beta(log(\\tau) - x)}} + i$ | $a$, $i$, $\\tau$, $\\beta$ |\n", 204 | "| deep learning | $y = f(x, \\theta)$ | $\\theta$ |" 205 | ] 206 | }, 207 | { 208 | "cell_type": "code", 209 | "execution_count": null, 210 | "metadata": {}, 211 | "outputs": [], 212 | "source": [] 213 | } 214 | ], 215 | "metadata": { 216 | "kernelspec": { 217 | "display_name": "bayesian-modelling-tutorial", 218 | "language": "python", 219 | "name": "bayesian-modelling-tutorial" 220 | }, 221 | "language_info": { 222 | "codemirror_mode": { 223 | "name": "ipython", 224 | "version": 3 225 | }, 226 | "file_extension": ".py", 227 | "mimetype": "text/x-python", 228 | "name": "python", 229 | "nbconvert_exporter": "python", 230 | "pygments_lexer": "ipython3", 231 | "version": "3.7.3" 232 | } 233 | }, 234 | "nbformat": 4, 235 | "nbformat_minor": 2 236 | } 237 | -------------------------------------------------------------------------------- /notebooks/archive/07-instructor-hierarchical-finches.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "This notebook is designed to be the \"exercise\" notebook for you to practice defining hierarchical models. We will do this with the finches dataset again." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import pymc3 as pm\n", 20 | "from data import load_finches_2012\n", 21 | "from utils import despine_traceplot\n", 22 | "import arviz as az\n", 23 | "\n", 24 | "%load_ext autoreload\n", 25 | "%autoreload 2\n", 26 | "%matplotlib inline\n", 27 | "%config InlineBackend.figure_format = 'retina'" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "df = load_finches_2012()\n", 37 | "df.groupby('species').size()" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": {}, 44 | "outputs": [], 45 | "source": [ 46 | "df.sample(5)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "df.groupby('species')['beak_depth'].describe()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": {}, 62 | "outputs": [], 63 | "source": [ 64 | "fortis_filter = df['species'] == 'fortis'\n", 65 | "scandens_filter = df['species'] == 'scandens'\n", 66 | "unknown_filter = df['species'] == 'unknown'" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "**Exercise:** Define a hierarchical model for the finches beak depths. For bonus points, use NumPy-like fancy indexing!\n", 74 | "\n", 75 | "If you'd like a hint, one possible model you can implement is shown below.\n", 76 | "\n", 77 | "" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "with pm.Model() as beak_depth_model:\n", 87 | " # SD can only be positive, therefore it is reasonable to constrain to >0\n", 88 | " # Likewise for betas.\n", 89 | " sd_hyper = pm.HalfCauchy('sd_hyper', beta=100)\n", 90 | " beta_hyper = pm.HalfCauchy('beta_hyper', beta=100)\n", 91 | " \n", 92 | " # Beaks cannot be of \"negative\" mean, therefore, HalfNormal is \n", 93 | " # a reasonable, constrained prior.\n", 94 | " mean = pm.HalfNormal('mean', sd=sd_hyper, shape=(3,))\n", 95 | " sd = pm.HalfCauchy('sd', beta=beta_hyper, shape=(3,))\n", 96 | " nu = pm.Exponential('nu', lam=1/29.) + 1\n", 97 | " \n", 98 | " # Define the likelihood distribution for the data.\n", 99 | " like = pm.StudentT('likelihood', \n", 100 | " nu=nu,\n", 101 | " mu=mean[df['species_enc']], \n", 102 | " sd=sd[df['species_enc']], \n", 103 | " observed=df['beak_depth'])" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "Sample from the posterior distribution!" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": {}, 117 | "outputs": [], 118 | "source": [ 119 | "with beak_depth_model:\n", 120 | " trace = pm.sample(2000, nuts_kwargs={'target_accept': 0.95})" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "Visualize the traceplots to check for convergence." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "traces = az.plot_trace(trace, var_names=['mean'])\n", 137 | "despine_traceplot(traces)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "Visualize the posterior distributions using the `plot_posterior` or `forestplot` functions." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": {}, 151 | "outputs": [], 152 | "source": [ 153 | "ax1, ax2, ax3 = az.plot_posterior(trace, var_names=['mean'])\n", 154 | "ax1.set_title('fortis')\n", 155 | "ax2.set_title('scandens')\n", 156 | "ax3.set_title('unknown')" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "Now, repeat the model specification for beak length." 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": null, 169 | "metadata": {}, 170 | "outputs": [], 171 | "source": [ 172 | "with pm.Model() as beak_length_model:\n", 173 | " # SD can only be positive, therefore it is reasonable to constrain to >0\n", 174 | " # Likewise for betas.\n", 175 | " sd_hyper = pm.HalfCauchy('sd_hyper', beta=100)\n", 176 | " beta_hyper = pm.HalfCauchy('beta_hyper', beta=100)\n", 177 | " \n", 178 | " # Beaks cannot be of \"negative\" mean, therefore, HalfNormal is \n", 179 | " # a reasonable, constrained prior.\n", 180 | " mean = pm.HalfNormal('mean', sd=sd_hyper, shape=(3,))\n", 181 | " sd = pm.HalfCauchy('sd', beta=beta_hyper, shape=(3,))\n", 182 | " nu = pm.Exponential('nu', lam=1/29.) + 1\n", 183 | " \n", 184 | " # Define the likelihood distribution for the data.\n", 185 | " like = pm.StudentT('likelihood', \n", 186 | " nu=nu,\n", 187 | " mu=mean[df['species_enc']], \n", 188 | " sd=sd[df['species_enc']], \n", 189 | " observed=df['beak_length'])" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": {}, 196 | "outputs": [], 197 | "source": [ 198 | "with beak_length_model:\n", 199 | " trace = pm.sample(2000, nuts_kwargs={'target_accept': 0.95})" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "traces = az.plot_trace(trace)\n", 209 | "despine_traceplot(traces)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "ax1, ax2, ax3 = az.plot_posterior(trace, var_names=['mean'])\n", 219 | "ax1.set_title('fortis')\n", 220 | "ax2.set_title('scandens')\n", 221 | "ax3.set_title('unknown')" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "**Discuss:** \n", 229 | "\n", 230 | "- Are the estimates for the unknown species' beak depth and beak length more reasonable? How so?" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [] 239 | } 240 | ], 241 | "metadata": { 242 | "kernelspec": { 243 | "display_name": "bayesian-modelling-tutorial", 244 | "language": "python", 245 | "name": "bayesian-modelling-tutorial" 246 | }, 247 | "language_info": { 248 | "codemirror_mode": { 249 | "name": "ipython", 250 | "version": 3 251 | }, 252 | "file_extension": ".py", 253 | "mimetype": "text/x-python", 254 | "name": "python", 255 | "nbconvert_exporter": "python", 256 | "pygments_lexer": "ipython3", 257 | "version": "3.7.2" 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 2 262 | } 263 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | If you're taking this tutorial at SciPy 2022, please pull the repository 9am CT the day of the tutorial to make sure that you have the most recent version! 2 | 3 | # bayesian-stats-modelling-tutorial 4 | 5 | [](https://mybinder.org/v2/gh/ericmjl/bayesian-stats-modelling-tutorial/master) 6 | 7 | How to do Bayesian statistical modelling using numpy and PyMC3. 8 | 9 | # for conference tutorial attendees 10 | 11 | If you're looking for the material for a specific conference tutorial, navigate to the notebooks directory and look for a subdirectory for the conference you're interested. For example, `notebooks/ODSC-East-2020-04-14` contains the material for [Hugo's ODSC East tutorial on April 14, 2020](https://odsc.com/speakers/bayesian-data-science-probabilistic-programming/). 12 | 13 | # getting started 14 | 15 | To get started, first identify whether you: 16 | 17 | - Would like to run the tutorial material on servers hosted elsewhere, to avoid installation, 18 | - Prefer to use the `conda` package manager (which ships with the Anaconda distribution of Python), 19 | - Prefer to use `pipenv`, which is a package authored by Kenneth Reitz for package management with `pip` and `virtualenv`, or 20 | - Only want to view the website version of the notebooks. 21 | 22 | 23 | ## To run the tutorial material on servers elsewhere 24 | 25 | [](https://mybinder.org/v2/gh/ericmjl/bayesian-stats-modelling-tutorial/master) 26 | 27 | To do this, click on the [Binder](https://mybinder.readthedocs.io/en/latest/) badge above. This will spin up the necessary computational environment for you so you can write and execute Python code from the comfort of your browser. It is a free service. Due to this, the resources are not guaranteed, though they usually work well. If you want as close to a guarantee as possible, follow the instructions below to set up your computational environment locally (that is, on your own computer). 28 | 29 | ## 1. Clone the repository locally 30 | 31 | In your terminal, use `git` to clone the repository locally. 32 | 33 | ```bash 34 | git clone https://github.com/ericmjl/bayesian-stats-modelling-tutorial 35 | ``` 36 | 37 | Alternatively, you can download the zip file of the repository at the top of the main page of the repository. 38 | If you prefer not to use git or don't have experience with it, this a good option. 39 | 40 | ## 2. Download Anaconda (if you haven't already) 41 | 42 | If you do not already have the [Anaconda distribution](https://www.anaconda.com/download/) of Python 3, 43 | go get it 44 | (note: you can also set up your project environment w/out Anaconda using `pip` to install the required packages; 45 | however Anaconda is great for Data Science and we encourage you to use it). 46 | 47 | ## 3. Set up your environment 48 | 49 | ### 3a. `conda` users 50 | 51 | If this is the first time you're setting up your compute environment, 52 | use the `conda` package manager 53 | to **install all the necessary packages** 54 | from the provided `environment.yml` file. 55 | 56 | ```bash 57 | conda env create -f binder/environment.yml 58 | ``` 59 | 60 | To **activate the environment**, use the `conda activate` command. 61 | 62 | ```bash 63 | conda activate bayesian-modelling-tutorial 64 | ``` 65 | 66 | **If you get an error activating the environment**, use the older `source activate` command. 67 | 68 | ```bash 69 | source activate bayesian-modelling-tutorial 70 | ``` 71 | 72 | To **update the environment** based on the `environment.yml` specification file, use the `conda update` command. 73 | 74 | ```bash 75 | conda env update -f binder/environment.yml 76 | ``` 77 | 78 | ### 3b. `pip` users 79 | 80 | Please install all of the packages listed in the `environment.yml` file manually. 81 | An example command would be: 82 | 83 | ```bash 84 | pip install networkx scipy ... 85 | ``` 86 | 87 | ### 3c. don't want to mess with dev-ops 88 | 89 | If you don't want to mess around with dev-ops, click the following badge to get a Binder session on which you can compute and write code. 90 | 91 | [](https://mybinder.org/v2/gh/ericmjl/bayesian-stats-modelling-tutorial/master) 92 | 93 | 94 | ### 4a. Open your Jupyter notebook 95 | 96 | 1. You will have to install a new IPython kernelspec if you created a new conda environment with `binder/environment.yml`. 97 | 98 | python -m ipykernel install --user --name bayesian-modelling-tutorial --display-name "Python (bayesian-modelling-tutorial)" 99 | 100 | You can change the `--display-name` to anything you want, though if you leave it out, the kernel's display name will default to the value passed to the `--name` flag. 101 | 102 | 2. In the terminal, execute `jupyter notebook`. 103 | 104 | Navigate to the notebooks directory 105 | and open the notebook `01-Student-Probability_a_simulated_introduction.ipynb`. 106 | 107 | ### 4b. Open your Jupyter notebook in Jupyter Lab! 108 | 109 | 110 | In the terminal, execute `jupyter lab`. 111 | 112 | Navigate to the notebooks directory 113 | and open the notebook `01-Student-Probability_a_simulated_introduction.ipynb`. 114 | 115 | Now, if you're using Jupyter lab, for Notebook 2, you'll need to get ipywidgets working. 116 | The documentation is [here](https://ipywidgets.readthedocs.io/en/latest/user_install.html#installing-the-jupyterlab-extension). 117 | 118 | In short, you'll need node installed & you'll need to run the following in your terminal: 119 | 120 | `jupyter labextension install @jupyter-widgets/jupyterlab-manager` 121 | 122 | ### 4c. Open your Jupyter notebook using Binder. 123 | 124 | Launch Binder using the button at the top of this README.md. Voila! 125 | 126 | ### 4d. Want to view static HTML notebooks 127 | 128 | If you're interested in only viewing the static HTML versions of the notebooks, 129 | the links are provided below: 130 | 131 | Part 1: Bayesian Data Science by Simulation 132 | 133 | - [Introduction to Probability](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/01-Instructor-Probability_a_simulated_introduction.html) 134 | - [Parameter Estimation and Hypothesis Testing](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/02-Instructor-Parameter_estimation_hypothesis_testing.html) 135 | 136 | Part 2: Bayesian Data Science by Probabilistic Programming 137 | 138 | - [Two Group Comparisons: Drug effect on IQ](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/03-instructor-two-group-iq.html) 139 | - [Multi-Group Comparisons: Multiple ways of sterilizing phones](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/04-instructor-multi-group-comparsion-sterilization.html) 140 | - [Two Group Comparisons: Darwin's Finches](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/05-instructor-two-group-comparison-finches.html) 141 | - [Hierarchical Modelling: Baseball](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/06-instructor-hierarchical-baseball.html) 142 | - [Hierarchical Modelling: Darwin's Finches](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/07-instructor-hierarchical-finches.html) 143 | - [Bayesian Curve Regression: Identifying Radioactive Element](https://ericmjl.github.io/bayesian-stats-modelling-tutorial/notebooks/08-bayesian-curve-regression.html) 144 | 145 | 146 | # Acknowledgements 147 | 148 | Development of this type of material is almost always a result of years of discussions between members of a community. 149 | We'd like to thank the community and to mention several people who have played pivotal roles in our understanding the the material: 150 | Michael Betancourt, 151 | Justin Bois, 152 | Allen Downey, 153 | Chris Fonnesbeck, 154 | Jake VanderPlas. 155 | Also, Andrew Gelman rocks! 156 | 157 | 158 | # Feedback 159 | 160 | Please leave feedback for us [here](https://ericma1.typeform.com/to/j88n8P)! 161 | We'll use this information to help improve the teaching and delivery of the material. 162 | 163 | # data credits 164 | 165 | Please see individual notebooks for dataset attribution. 166 | 167 | # Further Reading & Resources 168 | 169 | Further reading resources that are not specifically tied to any notebooks. 170 | 171 | - [Visualization in Bayesian workflow](https://arxiv.org/abs/1709.01449) 172 | - [PyMC3 examples gallery](https://docs.pymc.io/nb_examples/index.html) 173 | - [Bayesian Analysis Recipes](https://github.com/ericmjl/bayesian-analysis-recipes) 174 | - [Communicating uncertainty about facts, numbers and science](https://royalsocietypublishing.org/doi/full/10.1098/rsos.181870) 175 | - [Bernoulli's Fallacy by Aubrey Clayton](https://cup.columbia.edu/book/bernoullis-fallacy/9780231199940) 176 | -------------------------------------------------------------------------------- /data/baseballdb/core/AwardsManagers.csv: -------------------------------------------------------------------------------- 1 | playerID,awardID,yearID,lgID,tie,notes 2 | larusto01,BBWAA Manager of the Year,1983,AL,, 3 | lasorto01,BBWAA Manager of the Year,1983,NL,, 4 | andersp01,BBWAA Manager of the Year,1984,AL,, 5 | freyji99,BBWAA Manager of the Year,1984,NL,, 6 | coxbo01,BBWAA Manager of the Year,1985,AL,, 7 | herzowh01,BBWAA Manager of the Year,1985,NL,, 8 | mcnamjo99,BBWAA Manager of the Year,1986,AL,, 9 | lanieha01,BBWAA Manager of the Year,1986,NL,, 10 | andersp01,BBWAA Manager of the Year,1987,AL,, 11 | rodgebu01,BBWAA Manager of the Year,1987,NL,, 12 | larusto01,BBWAA Manager of the Year,1988,AL,, 13 | lasorto01,BBWAA Manager of the Year,1988,NL,, 14 | robinfr02,BBWAA Manager of the Year,1989,AL,, 15 | zimmedo01,BBWAA Manager of the Year,1989,NL,, 16 | torboje01,BBWAA Manager of the Year,1990,AL,, 17 | leylaji99,BBWAA Manager of the Year,1990,NL,, 18 | kellyto01,BBWAA Manager of the Year,1991,AL,, 19 | coxbo01,BBWAA Manager of the Year,1991,NL,, 20 | larusto01,BBWAA Manager of the Year,1992,AL,, 21 | leylaji99,BBWAA Manager of the Year,1992,NL,, 22 | lamonge01,BBWAA Manager of the Year,1993,AL,, 23 | bakerdu01,BBWAA Manager of the Year,1993,NL,, 24 | showabu99,BBWAA Manager of the Year,1994,AL,, 25 | aloufe01,BBWAA Manager of the Year,1994,NL,, 26 | pinielo01,BBWAA Manager of the Year,1995,AL,, 27 | baylodo01,BBWAA Manager of the Year,1995,NL,, 28 | oatesjo01,BBWAA Manager of the Year,1996,AL,Y, 29 | torrejo01,BBWAA Manager of the Year,1996,AL,Y, 30 | bochybr01,BBWAA Manager of the Year,1996,NL,, 31 | johnsda02,BBWAA Manager of the Year,1997,AL,, 32 | bakerdu01,BBWAA Manager of the Year,1997,NL,, 33 | torrejo01,BBWAA Manager of the Year,1998,AL,, 34 | dierkla01,BBWAA Manager of the Year,1998,NL,, 35 | williji03,BBWAA Manager of the Year,1999,AL,, 36 | mckeoja99,BBWAA Manager of the Year,1999,NL,, 37 | manueje01,BBWAA Manager of the Year,2000,AL,, 38 | bakerdu01,BBWAA Manager of the Year,2000,NL,, 39 | pinielo01,BBWAA Manager of the Year,2001,AL,, 40 | bowala01,BBWAA Manager of the Year,2001,NL,, 41 | sciosmi01,BBWAA Manager of the Year,2002,AL,, 42 | larusto01,BBWAA Manager of the Year,2002,NL,, 43 | penato01,BBWAA Manager of the Year,2003,AL,, 44 | mckeoja99,BBWAA Manager of the Year,2003,NL,, 45 | showabu99,BBWAA Manager of the Year,2004,AL,, 46 | coxbo01,BBWAA Manager of the Year,2004,NL,, 47 | guilloz01,BBWAA Manager of the Year,2005,AL,, 48 | coxbo01,BBWAA Manager of the Year,2005,NL,, 49 | leylaji99,BBWAA Manager of the Year,2006,AL,, 50 | girarjo01,BBWAA Manager of the Year,2006,NL,, 51 | wedgeer01,BBWAA Manager of the Year,2007,AL,, 52 | melvibo01,BBWAA Manager of the Year,2007,NL,, 53 | maddojo99,BBWAA Manager of the Year,2008,AL,, 54 | pinielo01,BBWAA Manager of the Year,2008,NL,, 55 | sciosmi01,BBWAA Manager of the Year,2009,AL,, 56 | tracyji01,BBWAA Manager of the Year,2009,NL,, 57 | gardero01,BBWAA Manager of the Year,2010,AL,, 58 | blackbu02,BBWAA Manager of the Year,2010,NL,, 59 | maddojo99,BBWAA Manager of the Year,2011,AL,, 60 | gibsoki01,BBWAA Manager of the Year,2011,NL,, 61 | melvibo01,BBWAA Manager of the Year,2012,AL,, 62 | johnsda02,BBWAA Manager of the Year,2012,NL,, 63 | francte01,BBWAA Manager of the Year,2013,AL,, 64 | hurdlcl01,BBWAA Manager of the Year,2013,NL,, 65 | mccarjo99,TSN Manager of the Year,1936,ML,, 66 | mckecbi01,TSN Manager of the Year,1937,ML,, 67 | mccarjo99,TSN Manager of the Year,1938,ML,, 68 | durocle01,TSN Manager of the Year,1939,ML,, 69 | mckecbi01,TSN Manager of the Year,1940,ML,, 70 | southbi01,TSN Manager of the Year,1941,ML,, 71 | southbi01,TSN Manager of the Year,1942,ML,, 72 | mccarjo99,TSN Manager of the Year,1943,ML,, 73 | sewellu01,TSN Manager of the Year,1944,ML,, 74 | bluegos01,TSN Manager of the Year,1945,ML,, 75 | dyered01,TSN Manager of the Year,1946,ML,, 76 | harribu01,TSN Manager of the Year,1947,ML,, 77 | meyerbi01,TSN Manager of the Year,1948,ML,, 78 | stengca01,TSN Manager of the Year,1949,ML,, 79 | rolfere01,TSN Manager of the Year,1950,ML,, 80 | durocle01,TSN Manager of the Year,1951,ML,, 81 | stanked01,TSN Manager of the Year,1952,ML,, 82 | stengca01,TSN Manager of the Year,1953,ML,, 83 | durocle01,TSN Manager of the Year,1954,ML,, 84 | alstowa01,TSN Manager of the Year,1955,ML,, 85 | tebbebi01,TSN Manager of the Year,1956,ML,, 86 | hutchfr01,TSN Manager of the Year,1957,ML,, 87 | stengca01,TSN Manager of the Year,1958,ML,, 88 | alstowa01,TSN Manager of the Year,1959,ML,, 89 | murtada01,TSN Manager of the Year,1960,ML,, 90 | houkra01,TSN Manager of the Year,1961,ML,, 91 | rignebi01,TSN Manager of the Year,1962,ML,, 92 | alstowa01,TSN Manager of the Year,1963,ML,, 93 | keanejo99,TSN Manager of the Year,1964,ML,, 94 | melesa01,TSN Manager of the Year,1965,ML,, 95 | bauerha01,TSN Manager of the Year,1966,ML,, 96 | willidi02,TSN Manager of the Year,1967,ML,, 97 | smithma01,TSN Manager of the Year,1968,ML,, 98 | hodgegi01,TSN Manager of the Year,1969,ML,, 99 | murtada01,TSN Manager of the Year,1970,ML,, 100 | foxch01,TSN Manager of the Year,1971,ML,, 101 | tannech01,TSN Manager of the Year,1972,ML,, 102 | mauchge01,TSN Manager of the Year,1973,ML,, 103 | virdobi01,TSN Manager of the Year,1974,ML,, 104 | johnsda01,TSN Manager of the Year,1975,ML,, 105 | ozarkda99,TSN Manager of the Year,1976,ML,, 106 | weaveea99,TSN Manager of the Year,1977,ML,, 107 | bambege01,TSN Manager of the Year,1978,ML,, 108 | weaveea99,TSN Manager of the Year,1979,ML,, 109 | virdobi01,TSN Manager of the Year,1980,ML,, 110 | martibi02,TSN Manager of the Year,1981,ML,, 111 | herzowh01,TSN Manager of the Year,1982,ML,, 112 | larusto01,TSN Manager of the Year,1983,ML,, 113 | freyji99,TSN Manager of the Year,1984,ML,, 114 | coxbo01,TSN Manager of the Year,1985,ML,, 115 | lanieha01,TSN Manager of the Year,1986,NL,, 116 | mcnamjo99,TSN Manager of the Year,1986,AL,, 117 | andersp01,TSN Manager of the Year,1987,AL,, 118 | rodgebu01,TSN Manager of the Year,1987,NL,, 119 | larusto01,TSN Manager of the Year,1988,AL,, 120 | leylaji99,TSN Manager of the Year,1988,NL,, 121 | robinfr02,TSN Manager of the Year,1989,AL,, 122 | zimmedo01,TSN Manager of the Year,1989,NL,, 123 | leylaji99,TSN Manager of the Year,1990,NL,, 124 | torboje01,TSN Manager of the Year,1990,AL,, 125 | coxbo01,TSN Manager of the Year,1991,NL,, 126 | kellyto01,TSN Manager of the Year,1991,AL,, 127 | larusto01,TSN Manager of the Year,1992,AL,, 128 | leylaji99,TSN Manager of the Year,1992,NL,, 129 | coxbo01,TSN Manager of the Year,1993,NL,, 130 | oatesjo01,TSN Manager of the Year,1993,AL,, 131 | aloufe01,TSN Manager of the Year,1994,NL,, 132 | showabu99,TSN Manager of the Year,1994,AL,, 133 | baylodo01,TSN Manager of the Year,1995,NL,, 134 | hargrmi01,TSN Manager of the Year,1995,AL,, 135 | bochybr01,TSN Manager of the Year,1996,NL,, 136 | oatesjo01,TSN Manager of the Year,1996,AL,, 137 | bakerdu01,TSN Manager of the Year,1997,NL,, 138 | johnsda02,TSN Manager of the Year,1997,AL,, 139 | bochybr01,TSN Manager of the Year,1998,NL,, 140 | torrejo01,TSN Manager of the Year,1998,AL,, 141 | coxbo01,TSN Manager of the Year,1999,NL,, 142 | williji03,TSN Manager of the Year,1999,AL,, 143 | bakerdu01,TSN Manager of the Year,2000,NL,, 144 | manueje01,TSN Manager of the Year,2000,AL,, 145 | bowala01,TSN Manager of the Year,2001,NL,, 146 | pinielo01,TSN Manager of the Year,2001,AL,, 147 | coxbo01,TSN Manager of the Year,2002,NL,, 148 | sciosmi01,TSN Manager of the Year,2002,AL,, 149 | coxbo01,TSN Manager of the Year,2003,NL,, 150 | penato01,TSN Manager of the Year,2003,AL,, 151 | coxbo01,TSN Manager of the Year,2004,NL,, 152 | gardero01,TSN Manager of the Year,2004,AL,, 153 | coxbo01,TSN Manager of the Year,2005,NL,, 154 | guilloz01,TSN Manager of the Year,2005,AL,, 155 | girarjo01,TSN Manager of the Year,2006,NL,, 156 | leylaji99,TSN Manager of the Year,2006,AL,, 157 | melvibo01,TSN Manager of the Year,2007,NL,, 158 | wedgeer01,TSN Manager of the Year,2007,AL,, 159 | maddojo99,TSN Manager of the Year,2008,AL,, 160 | gonzafr99,TSN Manager of the Year,2008,NL,, 161 | sciosmi01,TSN Manager of the Year,2009,AL,, 162 | tracyji01,TSN Manager of the Year,2009,NL,, 163 | gardero01,TSN Manager of the Year,2010,AL,, 164 | blackbu02,TSN Manager of the Year,2010,NL,, 165 | maddojo99,TSN Manager of the Year,2011,AL,, 166 | gibsoki01,TSN Manager of the Year,2011,NL,, 167 | showabu99,TSN Manager of the Year,2012,AL,, 168 | johnsda02,TSN Manager of the Year,2012,NL,, 169 | farrejo03,TSN Manager of the Year,2013,AL,, 170 | hurdlcl01,TSN Manager of the Year,2013,NL,, 171 | showabu99,TSN Manager of the Year,2014,AL,, 172 | willima04,TSN Manager of the Year,2014,NL,, 173 | molitpa01,TSN Manager of the Year,2015,AL,, 174 | collite99,TSN Manager of the Year,2015,NL,, 175 | showabu99,BBWAA Manager of the Year,2014,AL,, 176 | willima04,BBWAA Manager of the Year,2014,NL,, 177 | banisje01,BBWAA Manager of the Year,2015,AL,, 178 | maddojo99,BBWAA Manager of the Year,2015,NL,, 179 | francte01,BBWAA Manager of the Year,2016,AL,, 180 | roberda07,BBWAA Manager of the Year,2016,NL,, 181 | -------------------------------------------------------------------------------- /notebooks/archive/05-student-two-group-comparison-finches.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import janitor as jn\n", 11 | "import pymc3 as pm\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import seaborn as sns\n", 14 | "import numpy as np\n", 15 | "from utils import ECDF\n", 16 | "import arviz as az\n", 17 | "\n", 18 | "%load_ext autoreload\n", 19 | "%autoreload 2\n", 20 | "%matplotlib inline\n", 21 | "%config InlineBackend.figure_format = 'retina'" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Darwin's Finches\n", 29 | "\n", 30 | "A research group has taken measurements of the descendants of the finches that Charles Darwin observed when he postulated the theory of evolution.\n", 31 | "\n", 32 | "We will be using Bayesian methods to analyze this data, specifically answering the question of how quantitatively different two species of birds' beaks are.\n", 33 | "\n", 34 | "## Data Credits\n", 35 | "\n", 36 | "The Darwin's finches datasets come from the paper, [40 years of evolution. Darwin's finches on Daphne Major Island][data]. \n", 37 | "\n", 38 | "One row of data has been added for pedagogical purposes.\n", 39 | "\n", 40 | "[data]: (https://datadryad.org/resource/doi:10.5061/dryad.g6g3h). \n", 41 | "\n", 42 | "Let's get started and load the data." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from data import load_finches_2012\n", 52 | "df = load_finches_2012()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "**Exercise:** View a random sample of the data to get a feel for the structure of the dataset." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "# Your code below\n" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "**Note:** I have added one row of data, simulating the discovery of an \"unknown\" species of finch for which beak measurements have been taken.\n", 76 | "\n", 77 | "For pedagogical brevity, we will analyze only beak depth during the class. However, I would encourage you to perform a similar analysis for beak length as well." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": {}, 84 | "outputs": [], 85 | "source": [ 86 | "# These are filters that we can use later on.\n", 87 | "fortis_filter = df['species'] == 'fortis'\n", 88 | "scandens_filter = df['species'] == 'scandens'\n", 89 | "unknown_filter = df['species'] == 'unknown'" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "**Exercise:** Recreate the estimation model for finch beak depths. A few things to note:\n", 97 | "\n", 98 | "- Practice using numpy-like fancy indexing.\n", 99 | "- Difference of means & effect size are optional.\n", 100 | "- Feel free to play around with other priors." 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "A visual representation of the model using distribution diagrams is as follows:\n", 108 | "\n", 109 | "" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "with pm.Model() as beak_depth_model:\n", 119 | "\n", 120 | " # Your model defined here.\n" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "**Exercise:** Perform MCMC sampling to estimate the posterior distribution of each parameter." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "# Your code below.\n" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "**Exercise:** Diagnose whether the sampling has converged or not using trace plots." 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# Your code below.\n" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "**Exercise:** Visualize the posterior distribution over the parameters using the forest plot." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": null, 165 | "metadata": {}, 166 | "outputs": [], 167 | "source": [ 168 | "# Your code below.\n" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "**Exercise:** Visualize the posterior distribution over the parameters using the forest plot." 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": {}, 182 | "outputs": [], 183 | "source": [ 184 | "# Your code below.\n" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "**Discuss:**\n", 192 | "- Is the posterior distribution of beaks for the unknown species reasonable?" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "**Exercise:** Perform a posterior predictive check to visually diagnose whether the model describes the data generating process well or not." 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": null, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "samples = pm.sample_ppc(trace, model=beak_depth_model, samples=2000)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "Hint: Each column in the samples (key: \"likelihood\") corresponds to simulated measurements of each finch in the dataset. We can use fancy indexing along the columns (axis 1) to select out simulated measurements for each category, and then flatten the resultant array to get the full estimated distribution of values for each class." 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": {}, 222 | "outputs": [], 223 | "source": [ 224 | "fig = plt.figure()\n", 225 | "ax_fortis = fig.add_subplot(2, 1, 1)\n", 226 | "ax_scandens = fig.add_subplot(2, 1, 2, sharex=ax_fortis)\n", 227 | "\n", 228 | "# Extract just the fortis samples.\n", 229 | "\n", 230 | "# Compute the ECDF for the fortis samples.\n", 231 | "\n", 232 | "ax_fortis.plot(x_s, y_s, label='samples')\n", 233 | "\n", 234 | "# Extract just the fortis measurements.\n", 235 | "\n", 236 | "# Compute the ECDF for the fortis measurements.\n", 237 | "\n", 238 | "ax_fortis.plot(x, y, label='data')\n", 239 | "\n", 240 | "ax_fortis.legend()\n", 241 | "ax_fortis.set_title('fortis')\n", 242 | "\n", 243 | "# Extract just the scandens samples.\n", 244 | "\n", 245 | "# Compute the ECDF for the scandens samples\n", 246 | "\n", 247 | "ax_scandens.plot(x_s, y_s, label='samples')\n", 248 | "\n", 249 | "# Extract just the scandens measurements.\n", 250 | "\n", 251 | "# Compute the ECDF for the scanens measurements.\n", 252 | "\n", 253 | "\n", 254 | "ax_scandens.plot(x, y, label='data')\n", 255 | "ax_scandens.legend()\n", 256 | "ax_scandens.set_title('scandens')\n", 257 | "ax_scandens.set_xlabel('beak length')\n", 258 | "\n", 259 | "plt.tight_layout()" 260 | ] 261 | }, 262 | { 263 | "cell_type": "markdown", 264 | "metadata": {}, 265 | "source": [ 266 | "## Summary\n", 267 | "\n", 268 | "1. NumPy-like fancy indexing lets us write models in a concise fashion.\n", 269 | "1. Posterior estimates can show up as being \"unreasonable\", \"absurd\", or at the minimum, counter-intuitive, if we do not impose the right set of assumptions on the model.\n" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": null, 275 | "metadata": {}, 276 | "outputs": [], 277 | "source": [] 278 | } 279 | ], 280 | "metadata": { 281 | "kernelspec": { 282 | "display_name": "bayesian-modelling-tutorial", 283 | "language": "python", 284 | "name": "bayesian-modelling-tutorial" 285 | }, 286 | "language_info": { 287 | "codemirror_mode": { 288 | "name": "ipython", 289 | "version": 3 290 | }, 291 | "file_extension": ".py", 292 | "mimetype": "text/x-python", 293 | "name": "python", 294 | "nbconvert_exporter": "python", 295 | "pygments_lexer": "ipython3", 296 | "version": "3.7.2" 297 | } 298 | }, 299 | "nbformat": 4, 300 | "nbformat_minor": 2 301 | } 302 | -------------------------------------------------------------------------------- /data/sterilization.csv: -------------------------------------------------------------------------------- 1 | sample_id,treatment,colonies_pre,colonies_post,perc_reduction colonies,morphologies_pre,morphologies_post,year,month,day,perc_reduction morph,site,phone ID,no case,screen protector 2 | 1,phonesoap,0,0,#DIV/0!,0,0,2015,10,7,#DIV/0!,phone,4,, 3 | 2,phonesoap,12,0,100,3,0,2015,10,7,100,junction,4,, 4 | 3,phonesoap,14,4,71.42857143,5,2,2015,10,7,60,case,4,, 5 | 4,ethanol,0,0,#DIV/0!,0,0,2016,4,20,#DIV/0!,phone,4,, 6 | 5,ethanol,0,1,#DIV/0!,0,1,2016,4,20,#DIV/0!,junction,4,, 7 | 6,ethanol,1,0,100,1,0,2016,4,20,100,case,4,, 8 | 7,phonesoap,19,0,100,7,0,2015,10,7,100,phone,5,, 9 | 8,phonesoap,29,0,100,3,0,2015,10,7,100,junction,5,, 10 | 9,phonesoap,50,8,84,8,3,2015,10,7,62.5,case,5,, 11 | 10,bleachwipe,0,0,#DIV/0!,0,0,2015,11,12,#DIV/0!,phone,15,, 12 | 11,bleachwipe,12,1,91.66666667,5,1,2015,11,12,80,junction,15,, 13 | 12,bleachwipe,47,1,97.87234043,4,1,2015,11,12,75,case,15,, 14 | 13,quatricide,1,1,0,1,1,2015,12,8,0,phone,15,, 15 | 14,quatricide,23,0,100,4,0,2015,12,8,100,junction,15,, 16 | 15,quatricide,100,1,99,6,1,2015,12,8,83.33333333,case,15,, 17 | 16,kimwipe,25,5,80,4,2,2016,5,10,50,phone,15,, 18 | 17,kimwipe,35,18,48.57142857,4,3,2016,5,10,25,junction,15,, 19 | 18,kimwipe,40,7,82.5,3,1,2016,5,10,66.66666667,case,15,, 20 | 19,phonesoap,20,0,100,4,0,2015,10,7,100,phone,17,,plastic 21 | 20,phonesoap,50,1,98,5,1,2015,10,7,80,junction,17,,plastic 22 | 21,phonesoap,41,5,87.80487805,5,4,2015,10,7,20,case,17,,plastic 23 | 22,bleachwipe,55,6,89.09090909,5,3,2015,11,12,40,phone,17,,plastic 24 | 23,bleachwipe,19,3,84.21052632,4,1,2015,11,12,75,junction,17,,plastic 25 | 24,bleachwipe,93,9,90.32258065,6,2,2015,11,12,66.66666667,case,17,,plastic 26 | 25,ethanol,38,13,65.78947368,3,3,2016,4,21,0,phone,17,,plastic 27 | 26,ethanol,6,3,50,2,1,2016,4,21,50,junction,17,,plastic 28 | 27,ethanol,44,2,95.45454545,5,2,2016,4,21,60,case,17,,plastic 29 | 28,kimwipe,50,3,94,4,2,2016,5,10,50,phone,17,,plastic 30 | 29,kimwipe,25,37,-48,5,3,2016,5,10,40,junction,17,,plastic 31 | 30,kimwipe,5,0,100,2,0,2016,5,10,100,case,17,,plastic 32 | 31,quatricide,8,28,-250,2,2,,,,0,phone,17,,plastic 33 | 32,quatricide,10,1,90,2,1,,,,50,junction,17,,plastic 34 | 33,quatricide,14,4,71.42857143,3,2,,,,33.33333333,case,17,,plastic 35 | 34,phonesoap,12,0,100,4,0,2015,10,7,100,phone,18,,plastic 36 | 35,phonesoap,21,0,100,6,0,2015,10,7,100,junction,18,,plastic 37 | 36,phonesoap,52,0,100,6,0,2015,10,7,100,case,18,,plastic 38 | 37,bleachwipe,5,0,100,2,0,2015,11,12,100,phone,18,, 39 | 38,bleachwipe,1,0,100,1,0,2015,11,12,100,junction,18,, 40 | 39,bleachwipe,4,6,-50,1,3,2015,11,12,-200,case,18,, 41 | 40,ethanol,8,0,100,2,0,2016,4,20,100,phone,18,, 42 | 41,ethanol,6,3,50,2,1,2016,4,20,50,junction,18,, 43 | 42,ethanol,44,2,95.45454545,5,2,2016,4,20,60,case,18,, 44 | 43,quatricide,3,0,100,3,0,2015,12,8,100,phone,19,, 45 | 44,quatricide,54,3,94.44444444,7,3,2015,12,8,57.14285714,junction,19,, 46 | 45,quatricide,13,1,92.30769231,6,1,2015,12,8,83.33333333,case,19,, 47 | 46,ethanol,3,0,100,2,0,2016,4,20,100,phone,19,, 48 | 47,ethanol,13,0,100,3,0,2016,4,20,100,junction,19,, 49 | 48,ethanol,3,0,100,1,0,2016,4,20,100,case,19,, 50 | 49,kimwipe,1,0,100,1,0,2016,5,10,100,phone,19,, 51 | 50,kimwipe,4,0,100,3,0,2016,5,10,100,junction,19,, 52 | 51,kimwipe,27,0,100,4,0,2016,5,10,100,case,19,, 53 | 52,kimwipe,7,1,85.71428571,2,1,,,,50,phone,19,, 54 | 53,kimwipe,17,6,64.70588235,4,3,,,,25,junction,19,, 55 | 54,kimwipe,4,9,-125,2,3,,,,-50,case,19,, 56 | 55,bleachwipe,0,0,#DIV/0!,0,0,2015,11,12,#DIV/0!,phone,20,, 57 | 56,bleachwipe,1,0,100,1,0,2015,11,12,100,junction,20,, 58 | 57,bleachwipe,34,13,61.76470588,6,2,2015,11,12,66.66666667,case,20,, 59 | 58,phonesoap,0,0,#DIV/0!,0,0,2016,5,10,#DIV/0!,phone,21,, 60 | 59,phonesoap,18,0,100,4,0,,,,100,case,21,, 61 | 60,kimwipe,54,1,98.14814815,4,1,,,,75,phone,28,, 62 | 61,kimwipe,53,26,50.94339623,4,4,,,,0,junction,28,, 63 | 62,kimwipe,100,47,53,5,4,,,,20,case,28,, 64 | 63,bleachwipe,62,26,58.06451613,6,5,,,,16.66666667,phone,29,, 65 | 64,bleachwipe,31,13,58.06451613,5,3,,,,40,junction,29,, 66 | 65,bleachwipe,8,0,100,3,0,,,,100,case,29,, 67 | 66,ethanol,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,30,, 68 | 67,ethanol,12,0,100,6,0,,,,100,junction,30,, 69 | 68,ethanol,18,1,94.44444444,3,1,,,,66.66666667,case,30,, 70 | 69,quatricide,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,31,,plastic 71 | 70,quatricide,73,4,94.52054795,5,3,,,,40,junction,31,,plastic 72 | 71,quatricide,5,3,40,2,2,,,,0,case,31,,plastic 73 | 72,kimwipe,1,1,0,1,1,,,,0,phone,32,, 74 | 73,kimwipe,4,1,75,3,1,,,,66.66666667,junction,32,, 75 | 74,kimwipe,1,0,100,1,0,,,,100,case,32,, 76 | 75,kimwipe,4,0,100,1,0,,,,100,phone,33,,glass 77 | 76,kimwipe,12,10,16.66666667,5,3,,,,40,junction,33,,glass 78 | 77,kimwipe,8,4,50,2,1,,,,50,case,33,,glass 79 | 78,quatricide,0,2,#DIV/0!,0,1,,,,#DIV/0!,phone,34,, 80 | 79,quatricide,43,19,55.81395349,4,3,,,,25,junction,34,, 81 | 80,quatricide,19,8,57.89473684,3,3,,,,0,case,34,, 82 | 81,phonesoap,1,0,100,1,0,,,,100,phone,37,,plastic 83 | 82,phonesoap,10,0,100,3,0,,,,100,junction,37,,plastic 84 | 83,phonesoap,2,31,-1450,2,1,,,,50,case,37,,plastic 85 | 84,ethanol,2,0,100,1,0,,,,100,phone,38,, 86 | 85,ethanol,7,1,85.71428571,3,1,,,,66.66666667,junction,38,, 87 | 86,ethanol,2,9,-350,2,1,,,,50,case,38,, 88 | 87,phonesoap,2,0,100,2,0,,,,100,phone,39,,glass 89 | 88,phonesoap,13,0,100,4,0,,,,100,junction,39,,glass 90 | 89,phonesoap,6,0,100,3,0,,,,100,case,39,,glass 91 | 90,phonesoap,0,0,#DIV/0!,0,0,2015,10,7,#DIV/0!,phone,1,, 92 | 91,phonesoap,16,0,100,4,0,2015,10,7,100,junction,1,, 93 | 92,phonesoap,32,0,100,4,0,2015,10,7,100,case,1,, 94 | 93,bleachwipe,12,0,100,3,0,2015,11,12,100,phone,1,, 95 | 94,bleachwipe,18,1,94.44444444,3,1,2015,11,12,66.66666667,junction,1,, 96 | 95,bleachwipe,11,4,63.63636364,3,2,2015,11,12,33.33333333,case,1,, 97 | 96,quatricide,10,0,100,3,0,2015,12,8,100,phone,1,, 98 | 97,quatricide,16,6,62.5,4,4,2015,12,8,0,junction,1,, 99 | 98,quatricide,15,4,73.33333333,5,2,2015,12,8,60,case,1,, 100 | 99,phonesoap,56,0,100,3,0,2016,5,10,100,phone,3,, 101 | 100,phonesoap,86,0,100,4,0,2016,5,10,100,junction,3,, 102 | 101,phonesoap,5,0,100,3,0,2016,5,10,100,case,3,, 103 | 102,quatricide,6,0,100,3,0,2015,12,8,100,phone,3,, 104 | 103,quatricide,21,0,100,4,0,2015,12,8,100,junction,3,, 105 | 104,quatricide,0,4,#DIV/0!,0,3,2015,12,8,#DIV/0!,case,3,, 106 | 105,ethanol,100,1,99,5,1,2016,4,21,80,phone,3,, 107 | 106,ethanol,14,1,92.85714286,4,1,2016,4,21,75,junction,3,, 108 | 107,ethanol,8,2,75,3,2,2016,4,21,33.33333333,case,3,, 109 | 108,bleachwipe,7,3,57.14285714,1,2,,,,-100,phone,3,, 110 | 109,bleachwipe,17,2,88.23529412,3,2,,,,33.33333333,junction,3,, 111 | 110,bleachwipe,9,1,88.88888889,2,1,,,,50,case,3,, 112 | 111,FBM_2,19,0,100,2,0,,,,100,phone,3,, 113 | 112,FBM_2,11,0,100,3,0,,,,100,junction,3,, 114 | 113,FBM_2,2,0,100,1,0,,,,100,case,3,, 115 | 114,FBM_2,10,0,100,2,0,,,,100,phone,15,, 116 | 115,FBM_2,17,0,100,4,0,,,,100,junction,15,, 117 | 116,FBM_2,15,0,100,2,0,,,,100,case,15,, 118 | 117,FBM_2,15,0,100,3,0,,,,100,phone,17,, 119 | 118,FBM_2,7,0,100,2,0,,,,100,junction,17,, 120 | 119,FBM_2,7,0,100,3,0,,,,100,case,17,, 121 | 120,FBM_2,4,0,100,4,0,,,,100,phone,47,, 122 | 121,FBM_2,12,0,100,3,0,,,,100,junction,47,, 123 | 122,FBM_2,15,0,100,3,0,,,,100,case,47,, 124 | 123,FBM_2,1,0,100,1,0,,,,100,phone,48,, 125 | 124,FBM_2,42,0,100,2,0,,,,100,junction,48,, 126 | 125,FBM_2,4,0,100,3,0,,,,100,case,48,, 127 | 126,FBM_2,9,0,100,4,0,,,,100,phone,49,, 128 | 127,FBM_2,10,0,100,3,0,,,,100,junction,49,, 129 | 128,FBM_2,9,0,100,3,0,,,,100,case,49,, 130 | 129,FBM_2,7,0,100,2,0,,,,100,phone,50,, 131 | 130,FBM_2,9,0,100,3,0,,,,100,junction,50,, 132 | 131,FBM_2,7,0,100,3,0,,,,100,case,50,, 133 | 132,CB30,50,1,98,4,1,,,,75,phone,3,, 134 | 133,CB30,8,1,87.5,4,1,,,,75,junction,3,, 135 | 134,CB30,15,4,73.33333333,4,2,,,,50,case,3,, 136 | 135,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,17,, 137 | 136,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,junction,17,, 138 | 137,CB30,4,5,-25,2,2,,,,0,case,17,, 139 | 138,CB30,2,0,100,1,0,,,,100,phone,19,, 140 | 139,CB30,12,0,100,2,0,,,,100,junction,19,, 141 | 140,CB30,8,3,62.5,3,2,,,,33.33333333,case,19,, 142 | 141,CB30,2,0,100,2,0,,,,100,phone,32,, 143 | 142,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,junction,32,, 144 | 143,CB30,5,0,100,2,0,,,,100,case,32,, 145 | 144,CB30,7,1,85.71428571,3,1,,,,66.66666667,phone,51,, 146 | 145,CB30,15,0,100,3,0,,,,100,junction,51,, 147 | 146,CB30,12,6,50,3,3,,,,0,case,51,, 148 | 147,CB30,6,1,83.33333333,1,1,,,,0,phone,52,, 149 | 148,CB30,9,0,100,1,0,,,,100,junction,52,, 150 | 149,CB30,4,3,25,1,1,,,,0,case,52,, 151 | 150,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,53,, 152 | 151,CB30,3,1,66.66666667,2,1,,,,50,junction,53,, 153 | 152,CB30,1,1,0,1,1,,,,0,case,53,, 154 | 153,CB30,1,0,100,1,0,,,,100,phone,18,, 155 | 154,CB30,3,0,100,2,0,,,,100,junction,18,, 156 | 155,CB30,30,5,83.33333333,4,3,,,,25,case,18,, 157 | 156,CB30,2,3,-50,2,1,,,,50,phone,54,, 158 | 157,CB30,15,7,53.33333333,3,3,,,,0,junction,54,, 159 | 158,CB30,42,17,59.52380952,4,3,,,,25,case,54,, 160 | 159,cellblaster,7,0,100,2,0,,,,100,phone,4,, 161 | 160,cellblaster,100,6,94,7,4,,,,42.85714286,junction,4,, 162 | 161,cellblaster,9,0,100,3,0,,,,100,case,4,, 163 | 162,cellblaster,33,22,33.33333333,5,4,,,,20,phone,5,, 164 | 163,cellblaster,100,26,74,7,5,,,,28.57142857,junction,5,, 165 | 164,cellblaster,13,3,76.92307692,3,3,,,,0,case,5,, 166 | 165,cellblaster,5,3,40,3,3,,,,0,phone,11,, 167 | 166,cellblaster,27,15,44.44444444,6,5,,,,16.66666667,junction,11,, 168 | 167,cellblaster,22,9,59.09090909,4,3,,,,25,case,11,, 169 | 168,cellblaster,53,6,88.67924528,2,3,,,,-50,phone,14,, 170 | 169,cellblaster,25,9,64,6,4,,,,33.33333333,junction,14,, 171 | 170,cellblaster,3,11,-266.6666667,1,3,,,,-200,case,14,, 172 | 171,cellblaster,1,1,0,1,1,,,,0,phone,16,,plastic 173 | 172,cellblaster,11,2,81.81818182,2,1,,,,50,junction,16,,plastic 174 | 173,cellblaster,12,3,75,6,3,,,,50,case,16,,plastic 175 | 174,cellblaster,100,16,84,4,4,,,,0,phone,18,, 176 | 175,cellblaster,55,4,92.72727273,5,3,,,,40,junction,18,, 177 | 176,cellblaster,6,43,-616.6666667,3,4,,,,-33.33333333,case,18,, 178 | 177,cellblaster,48,5,89.58333333,5,2,,,,60,phone,35,,plastic 179 | 178,cellblaster,54,14,74.07407407,4,3,,,,25,junction,35,,plastic 180 | 179,cellblaster,12,13,-8.333333333,3,3,,,,0,case,35,,plastic 181 | 180,cellblaster,22,5,77.27272727,2,1,,,,50,phone,36,,plastic 182 | 181,cellblaster,40,11,72.5,3,2,,,,33.33333333,junction,36,,plastic 183 | 182,cellblaster,5,4,20,2,1,,,,50,case,36,,plastic -------------------------------------------------------------------------------- /data/sanitization.csv: -------------------------------------------------------------------------------- 1 | sample_id,treatment,colonies_pre,colonies_post,perc_reduction colonies,morphologies_pre,morphologies_post,year,month,day,perc_reduction morph,site,phone ID,no case,screen protector 2 | 1,phonesoap,0,0,#DIV/0!,0,0,2015,10,7,#DIV/0!,phone,4,, 3 | 2,phonesoap,12,0,100,3,0,2015,10,7,100,junction,4,, 4 | 3,phonesoap,14,4,71.42857143,5,2,2015,10,7,60,case,4,, 5 | 4,ethanol,0,0,#DIV/0!,0,0,2016,4,20,#DIV/0!,phone,4,, 6 | 5,ethanol,0,1,#DIV/0!,0,1,2016,4,20,#DIV/0!,junction,4,, 7 | 6,ethanol,1,0,100,1,0,2016,4,20,100,case,4,, 8 | 7,phonesoap,19,0,100,7,0,2015,10,7,100,phone,5,, 9 | 8,phonesoap,29,0,100,3,0,2015,10,7,100,junction,5,, 10 | 9,phonesoap,50,8,84,8,3,2015,10,7,62.5,case,5,, 11 | 10,bleachwipe,0,0,#DIV/0!,0,0,2015,11,12,#DIV/0!,phone,15,, 12 | 11,bleachwipe,12,1,91.66666667,5,1,2015,11,12,80,junction,15,, 13 | 12,bleachwipe,47,1,97.87234043,4,1,2015,11,12,75,case,15,, 14 | 13,quatricide,1,1,0,1,1,2015,12,8,0,phone,15,, 15 | 14,quatricide,23,0,100,4,0,2015,12,8,100,junction,15,, 16 | 15,quatricide,100,1,99,6,1,2015,12,8,83.33333333,case,15,, 17 | 16,kimwipe,25,5,80,4,2,2016,5,10,50,phone,15,, 18 | 17,kimwipe,35,18,48.57142857,4,3,2016,5,10,25,junction,15,, 19 | 18,kimwipe,40,7,82.5,3,1,2016,5,10,66.66666667,case,15,, 20 | 19,phonesoap,20,0,100,4,0,2015,10,7,100,phone,17,,plastic 21 | 20,phonesoap,50,1,98,5,1,2015,10,7,80,junction,17,,plastic 22 | 21,phonesoap,41,5,87.80487805,5,4,2015,10,7,20,case,17,,plastic 23 | 22,bleachwipe,55,6,89.09090909,5,3,2015,11,12,40,phone,17,,plastic 24 | 23,bleachwipe,19,3,84.21052632,4,1,2015,11,12,75,junction,17,,plastic 25 | 24,bleachwipe,93,9,90.32258065,6,2,2015,11,12,66.66666667,case,17,,plastic 26 | 25,ethanol,38,13,65.78947368,3,3,2016,4,21,0,phone,17,,plastic 27 | 26,ethanol,6,3,50,2,1,2016,4,21,50,junction,17,,plastic 28 | 27,ethanol,44,2,95.45454545,5,2,2016,4,21,60,case,17,,plastic 29 | 28,kimwipe,50,3,94,4,2,2016,5,10,50,phone,17,,plastic 30 | 29,kimwipe,25,37,-48,5,3,2016,5,10,40,junction,17,,plastic 31 | 30,kimwipe,5,0,100,2,0,2016,5,10,100,case,17,,plastic 32 | 31,quatricide,8,28,-250,2,2,,,,0,phone,17,,plastic 33 | 32,quatricide,10,1,90,2,1,,,,50,junction,17,,plastic 34 | 33,quatricide,14,4,71.42857143,3,2,,,,33.33333333,case,17,,plastic 35 | 34,phonesoap,12,0,100,4,0,2015,10,7,100,phone,18,,plastic 36 | 35,phonesoap,21,0,100,6,0,2015,10,7,100,junction,18,,plastic 37 | 36,phonesoap,52,0,100,6,0,2015,10,7,100,case,18,,plastic 38 | 37,bleachwipe,5,0,100,2,0,2015,11,12,100,phone,18,, 39 | 38,bleachwipe,1,0,100,1,0,2015,11,12,100,junction,18,, 40 | 39,bleachwipe,4,6,-50,1,3,2015,11,12,-200,case,18,, 41 | 40,ethanol,8,0,100,2,0,2016,4,20,100,phone,18,, 42 | 41,ethanol,6,3,50,2,1,2016,4,20,50,junction,18,, 43 | 42,ethanol,44,2,95.45454545,5,2,2016,4,20,60,case,18,, 44 | 43,quatricide,3,0,100,3,0,2015,12,8,100,phone,19,, 45 | 44,quatricide,54,3,94.44444444,7,3,2015,12,8,57.14285714,junction,19,, 46 | 45,quatricide,13,1,92.30769231,6,1,2015,12,8,83.33333333,case,19,, 47 | 46,ethanol,3,0,100,2,0,2016,4,20,100,phone,19,, 48 | 47,ethanol,13,0,100,3,0,2016,4,20,100,junction,19,, 49 | 48,ethanol,3,0,100,1,0,2016,4,20,100,case,19,, 50 | 49,kimwipe,1,0,100,1,0,2016,5,10,100,phone,19,, 51 | 50,kimwipe,4,0,100,3,0,2016,5,10,100,junction,19,, 52 | 51,kimwipe,27,0,100,4,0,2016,5,10,100,case,19,, 53 | 52,kimwipe,7,1,85.71428571,2,1,,,,50,phone,19,, 54 | 53,kimwipe,17,6,64.70588235,4,3,,,,25,junction,19,, 55 | 54,kimwipe,4,9,-125,2,3,,,,-50,case,19,, 56 | 55,bleachwipe,0,0,#DIV/0!,0,0,2015,11,12,#DIV/0!,phone,20,, 57 | 56,bleachwipe,1,0,100,1,0,2015,11,12,100,junction,20,, 58 | 57,bleachwipe,34,13,61.76470588,6,2,2015,11,12,66.66666667,case,20,, 59 | 58,phonesoap,0,0,#DIV/0!,0,0,2016,5,10,#DIV/0!,phone,21,, 60 | 59,phonesoap,18,0,100,4,0,,,,100,case,21,, 61 | 60,kimwipe,54,1,98.14814815,4,1,,,,75,phone,28,, 62 | 61,kimwipe,53,26,50.94339623,4,4,,,,0,junction,28,, 63 | 62,kimwipe,100,47,53,5,4,,,,20,case,28,, 64 | 63,bleachwipe,62,26,58.06451613,6,5,,,,16.66666667,phone,29,, 65 | 64,bleachwipe,31,13,58.06451613,5,3,,,,40,junction,29,, 66 | 65,bleachwipe,8,0,100,3,0,,,,100,case,29,, 67 | 66,ethanol,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,30,, 68 | 67,ethanol,12,0,100,6,0,,,,100,junction,30,, 69 | 68,ethanol,18,1,94.44444444,3,1,,,,66.66666667,case,30,, 70 | 69,quatricide,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,31,,plastic 71 | 70,quatricide,73,4,94.52054795,5,3,,,,40,junction,31,,plastic 72 | 71,quatricide,5,3,40,2,2,,,,0,case,31,,plastic 73 | 72,kimwipe,1,1,0,1,1,,,,0,phone,32,, 74 | 73,kimwipe,4,1,75,3,1,,,,66.66666667,junction,32,, 75 | 74,kimwipe,1,0,100,1,0,,,,100,case,32,, 76 | 75,kimwipe,4,0,100,1,0,,,,100,phone,33,,glass 77 | 76,kimwipe,12,10,16.66666667,5,3,,,,40,junction,33,,glass 78 | 77,kimwipe,8,4,50,2,1,,,,50,case,33,,glass 79 | 78,quatricide,0,2,#DIV/0!,0,1,,,,#DIV/0!,phone,34,, 80 | 79,quatricide,43,19,55.81395349,4,3,,,,25,junction,34,, 81 | 80,quatricide,19,8,57.89473684,3,3,,,,0,case,34,, 82 | 81,phonesoap,1,0,100,1,0,,,,100,phone,37,,plastic 83 | 82,phonesoap,10,0,100,3,0,,,,100,junction,37,,plastic 84 | 83,phonesoap,2,31,-1450,2,1,,,,50,case,37,,plastic 85 | 84,ethanol,2,0,100,1,0,,,,100,phone,38,, 86 | 85,ethanol,7,1,85.71428571,3,1,,,,66.66666667,junction,38,, 87 | 86,ethanol,2,9,-350,2,1,,,,50,case,38,, 88 | 87,phonesoap,2,0,100,2,0,,,,100,phone,39,,glass 89 | 88,phonesoap,13,0,100,4,0,,,,100,junction,39,,glass 90 | 89,phonesoap,6,0,100,3,0,,,,100,case,39,,glass 91 | 90,phonesoap,0,0,#DIV/0!,0,0,2015,10,7,#DIV/0!,phone,1,, 92 | 91,phonesoap,16,0,100,4,0,2015,10,7,100,junction,1,, 93 | 92,phonesoap,32,0,100,4,0,2015,10,7,100,case,1,, 94 | 93,bleachwipe,12,0,100,3,0,2015,11,12,100,phone,1,, 95 | 94,bleachwipe,18,1,94.44444444,3,1,2015,11,12,66.66666667,junction,1,, 96 | 95,bleachwipe,11,4,63.63636364,3,2,2015,11,12,33.33333333,case,1,, 97 | 96,quatricide,10,0,100,3,0,2015,12,8,100,phone,1,, 98 | 97,quatricide,16,6,62.5,4,4,2015,12,8,0,junction,1,, 99 | 98,quatricide,15,4,73.33333333,5,2,2015,12,8,60,case,1,, 100 | 99,phonesoap,56,0,100,3,0,2016,5,10,100,phone,3,, 101 | 100,phonesoap,86,0,100,4,0,2016,5,10,100,junction,3,, 102 | 101,phonesoap,5,0,100,3,0,2016,5,10,100,case,3,, 103 | 102,quatricide,6,0,100,3,0,2015,12,8,100,phone,3,, 104 | 103,quatricide,21,0,100,4,0,2015,12,8,100,junction,3,, 105 | 104,quatricide,0,4,#DIV/0!,0,3,2015,12,8,#DIV/0!,case,3,, 106 | 105,ethanol,100,1,99,5,1,2016,4,21,80,phone,3,, 107 | 106,ethanol,14,1,92.85714286,4,1,2016,4,21,75,junction,3,, 108 | 107,ethanol,8,2,75,3,2,2016,4,21,33.33333333,case,3,, 109 | 108,bleachwipe,7,3,57.14285714,1,2,,,,-100,phone,3,, 110 | 109,bleachwipe,17,2,88.23529412,3,2,,,,33.33333333,junction,3,, 111 | 110,bleachwipe,9,1,88.88888889,2,1,,,,50,case,3,, 112 | 111,FBM_2,19,0,100,2,0,,,,100,phone,3,, 113 | 112,FBM_2,11,0,100,3,0,,,,100,junction,3,, 114 | 113,FBM_2,2,0,100,1,0,,,,100,case,3,, 115 | 114,FBM_2,10,0,100,2,0,,,,100,phone,15,, 116 | 115,FBM_2,17,0,100,4,0,,,,100,junction,15,, 117 | 116,FBM_2,15,0,100,2,0,,,,100,case,15,, 118 | 117,FBM_2,15,0,100,3,0,,,,100,phone,17,, 119 | 118,FBM_2,7,0,100,2,0,,,,100,junction,17,, 120 | 119,FBM_2,7,0,100,3,0,,,,100,case,17,, 121 | 120,FBM_2,4,0,100,4,0,,,,100,phone,47,, 122 | 121,FBM_2,12,0,100,3,0,,,,100,junction,47,, 123 | 122,FBM_2,15,0,100,3,0,,,,100,case,47,, 124 | 123,FBM_2,1,0,100,1,0,,,,100,phone,48,, 125 | 124,FBM_2,42,0,100,2,0,,,,100,junction,48,, 126 | 125,FBM_2,4,0,100,3,0,,,,100,case,48,, 127 | 126,FBM_2,9,0,100,4,0,,,,100,phone,49,, 128 | 127,FBM_2,10,0,100,3,0,,,,100,junction,49,, 129 | 128,FBM_2,9,0,100,3,0,,,,100,case,49,, 130 | 129,FBM_2,7,0,100,2,0,,,,100,phone,50,, 131 | 130,FBM_2,9,0,100,3,0,,,,100,junction,50,, 132 | 131,FBM_2,7,0,100,3,0,,,,100,case,50,, 133 | 132,CB30,50,1,98,4,1,,,,75,phone,3,, 134 | 133,CB30,8,1,87.5,4,1,,,,75,junction,3,, 135 | 134,CB30,15,4,73.33333333,4,2,,,,50,case,3,, 136 | 135,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,17,, 137 | 136,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,junction,17,, 138 | 137,CB30,4,5,-25,2,2,,,,0,case,17,, 139 | 138,CB30,2,0,100,1,0,,,,100,phone,19,, 140 | 139,CB30,12,0,100,2,0,,,,100,junction,19,, 141 | 140,CB30,8,3,62.5,3,2,,,,33.33333333,case,19,, 142 | 141,CB30,2,0,100,2,0,,,,100,phone,32,, 143 | 142,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,junction,32,, 144 | 143,CB30,5,0,100,2,0,,,,100,case,32,, 145 | 144,CB30,7,1,85.71428571,3,1,,,,66.66666667,phone,51,, 146 | 145,CB30,15,0,100,3,0,,,,100,junction,51,, 147 | 146,CB30,12,6,50,3,3,,,,0,case,51,, 148 | 147,CB30,6,1,83.33333333,1,1,,,,0,phone,52,, 149 | 148,CB30,9,0,100,1,0,,,,100,junction,52,, 150 | 149,CB30,4,3,25,1,1,,,,0,case,52,, 151 | 150,CB30,0,0,#DIV/0!,0,0,,,,#DIV/0!,phone,53,, 152 | 151,CB30,3,1,66.66666667,2,1,,,,50,junction,53,, 153 | 152,CB30,1,1,0,1,1,,,,0,case,53,, 154 | 153,CB30,1,0,100,1,0,,,,100,phone,18,, 155 | 154,CB30,3,0,100,2,0,,,,100,junction,18,, 156 | 155,CB30,30,5,83.33333333,4,3,,,,25,case,18,, 157 | 156,CB30,2,3,-50,2,1,,,,50,phone,54,, 158 | 157,CB30,15,7,53.33333333,3,3,,,,0,junction,54,, 159 | 158,CB30,42,17,59.52380952,4,3,,,,25,case,54,, 160 | 159,cellblaster,7,0,100,2,0,,,,100,phone,4,, 161 | 160,cellblaster,100,6,94,7,4,,,,42.85714286,junction,4,, 162 | 161,cellblaster,9,0,100,3,0,,,,100,case,4,, 163 | 162,cellblaster,33,22,33.33333333,5,4,,,,20,phone,5,, 164 | 163,cellblaster,100,26,74,7,5,,,,28.57142857,junction,5,, 165 | 164,cellblaster,13,3,76.92307692,3,3,,,,0,case,5,, 166 | 165,cellblaster,5,3,40,3,3,,,,0,phone,11,, 167 | 166,cellblaster,27,15,44.44444444,6,5,,,,16.66666667,junction,11,, 168 | 167,cellblaster,22,9,59.09090909,4,3,,,,25,case,11,, 169 | 168,cellblaster,53,6,88.67924528,2,3,,,,-50,phone,14,, 170 | 169,cellblaster,25,9,64,6,4,,,,33.33333333,junction,14,, 171 | 170,cellblaster,3,11,-266.6666667,1,3,,,,-200,case,14,, 172 | 171,cellblaster,1,1,0,1,1,,,,0,phone,16,,plastic 173 | 172,cellblaster,11,2,81.81818182,2,1,,,,50,junction,16,,plastic 174 | 173,cellblaster,12,3,75,6,3,,,,50,case,16,,plastic 175 | 174,cellblaster,100,16,84,4,4,,,,0,phone,18,, 176 | 175,cellblaster,55,4,92.72727273,5,3,,,,40,junction,18,, 177 | 176,cellblaster,6,43,-616.6666667,3,4,,,,-33.33333333,case,18,, 178 | 177,cellblaster,48,5,89.58333333,5,2,,,,60,phone,35,,plastic 179 | 178,cellblaster,54,14,74.07407407,4,3,,,,25,junction,35,,plastic 180 | 179,cellblaster,12,13,-8.333333333,3,3,,,,0,case,35,,plastic 181 | 180,cellblaster,22,5,77.27272727,2,1,,,,50,phone,36,,plastic 182 | 181,cellblaster,40,11,72.5,3,2,,,,33.33333333,junction,36,,plastic 183 | 182,cellblaster,5,4,20,2,1,,,,50,case,36,,plastic 184 | -------------------------------------------------------------------------------- /data/finch_beaks_1975.csv: -------------------------------------------------------------------------------- 1 | band,species,"Beak length, mm","Beak depth, mm" 2 | 2,fortis,9.4,8 3 | 9,fortis,9.2,8.3 4 | 12,fortis,9.5,7.5 5 | 15,fortis,9.5,8 6 | 305,fortis,11.5,9.9 7 | 307,fortis,11.1,8.6 8 | 308,fortis,9.9,8.4 9 | 309,fortis,11.5,9.8 10 | 311,fortis,10.8,9.2 11 | 312,fortis,11.3,9 12 | 313,fortis,11.5,9.5 13 | 314,fortis,11.5,8.9 14 | 315,fortis,9.7,8.8 15 | 316,fortis,10.9,9.7 16 | 316,fortis,10.9,9.85 17 | 320,fortis,10.1,8.9 18 | 321,fortis,10.6,8.5 19 | 323,fortis,9.6,8.2 20 | 324,fortis,10.3,9.3 21 | 326,fortis,10.5,9.1 22 | 328,fortis,10.5,8.8 23 | 329,fortis,10.9,9.4 24 | 330,fortis,11.2,9.6 25 | 340,fortis,9.4,8.5 26 | 341,fortis,10.1,8.1 27 | 342,fortis,9.6,8 28 | 343,fortis,10.5,9.7 29 | 344,fortis,9.9,8.2 30 | 345,fortis,9.9,8.4 31 | 346,fortis,9.6,7.9 32 | 347,fortis,10.7,9.3 33 | 348,fortis,11.8,10.5 34 | 349,fortis,10.2,9.7 35 | 350,fortis,10.9,9.6 36 | 352,fortis,9.3,7.7 37 | 354,fortis,11.6,9.8 38 | 355,fortis,11.2,9.8 39 | 356,fortis,10.1,8.5 40 | 360,fortis,10.7,9.2 41 | 366,fortis,11,10.1 42 | 368,fortis,10.8,9 43 | 369,fortis,10.2,9 44 | 401,fortis,10.8,9.4 45 | 402,fortis,10.4,8.8 46 | 403,fortis,11.2,9.9 47 | 405,fortis,11.4,9.9 48 | 410,fortis,10.9,8.9 49 | 411,fortis,10.4,9.4 50 | 413,fortis,9.7,8.2 51 | 414,fortis,10.2,8.2 52 | 415,fortis,9.2,8.1 53 | 416,fortis,10.1,8.4 54 | 417,fortis,11,10.1 55 | 418,fortis,10.3,8.6 56 | 419,fortis,10.2,8.5 57 | 420,fortis,11,9.7 58 | 422,fortis,11,10.3 59 | 423,fortis,10.1,8.6 60 | 424,fortis,10.7,9.3 61 | 425,fortis,10.7,8.9 62 | 426,fortis,11,9.7 63 | 427,fortis,12,10.2 64 | 428,fortis,11.6,10.2 65 | 450,fortis,9.9,8.5 66 | 451,fortis,10.8,8.8 67 | 452,fortis,10.5,8.9 68 | 453,fortis,10,8.7 69 | 454,fortis,9.6,9.3 70 | 455,fortis,11,9.7 71 | 456,fortis,10.2,9.6 72 | 457,fortis,9.7,7.85 73 | 458,fortis,11.1,9.6 74 | 459,fortis,10.4,9.3 75 | 461,fortis,11.1,9.8 76 | 462,fortis,10.2,8.8 77 | 463,fortis,10.3,9.1 78 | 465,fortis,9.9,9 79 | 466,fortis,11,10.4 80 | 468,fortis,10.8,9 81 | 473,fortis,10.5,9.5 82 | 474,fortis,10.5,8.7 83 | 475,fortis,11.2,9.6 84 | 476,fortis,11.2,9.4 85 | 477,fortis,11.7,9.5 86 | 479,fortis,10.5,8.6 87 | 501,fortis,10.5,9.9 88 | 502,fortis,11.2,9.4 89 | 503,fortis,10,9.1 90 | 504,fortis,10.4,8.6 91 | 505,fortis,10.8,9.3 92 | 506,fortis,11.1,9.2 93 | 507,fortis,10.3,8.8 94 | 509,fortis,11.1,9.2 95 | 510,fortis,9.8,8.3 96 | 511,fortis,10.5,8.8 97 | 512,fortis,11,9.4 98 | 513,fortis,11.2,10.4 99 | 514,fortis,9.8,8.5 100 | 515,fortis,9.8,7.9 101 | 516,fortis,9.8,7.9 102 | 517,fortis,10.3,10.1 103 | 518,fortis,11.3,9.8 104 | 519,fortis,10,8.3 105 | 520,fortis,11.1,9.4 106 | 521,fortis,10,9 107 | 522,fortis,10.3,8.4 108 | 524,fortis,10.2,8.7 109 | 526,fortis,10.4,9.2 110 | 527,fortis,11,9.5 111 | 528,fortis,11.5,10.1 112 | 529,fortis,11.8,9.8 113 | 560,fortis,10.2,8.5 114 | 561,fortis,11.7,10.2 115 | 563,fortis,10.1,8.6 116 | 564,fortis,10.2,9.3 117 | 565,fortis,9.8,8.7 118 | 566,fortis,10.6,8.9 119 | 567,fortis,10,8.4 120 | 572,fortis,12.1,10.3 121 | 574,fortis,11.1,9.9 122 | 576,fortis,10.5,8.9 123 | 578,fortis,10,9.1 124 | 579,fortis,10.2,8.6 125 | 601,fortis,9.7,7.8 126 | 602,fortis,10,8.6 127 | 603,fortis,10.7,9.5 128 | 604,fortis,11,9.5 129 | 605,fortis,10.9,10.2 130 | 606,fortis,10.5,9.1 131 | 607,fortis,10.9,9.7 132 | 608,fortis,10.7,9.4 133 | 609,fortis,11.9,10.5 134 | 610,fortis,10.2,9 135 | 611,fortis,10.5,9.8 136 | 615,fortis,11.2,10 137 | 616,fortis,10.7,9.6 138 | 617,fortis,10.3,9.6 139 | 618,fortis,11.6,9.9 140 | 619,fortis,10.5,9.3 141 | 620,fortis,10.9,9.6 142 | 621,fortis,9.8,7.6 143 | 622,fortis,10.9,9.2 144 | 623,fortis,10.3,8.8 145 | 624,fortis,11.7,10.1 146 | 625,fortis,11,8.9 147 | 626,fortis,9.8,9.5 148 | 627,fortis,10.4,8.2 149 | 628,fortis,10.8,9.7 150 | 629,fortis,11,9.8 151 | 670,fortis,10.6,9.3 152 | 671,fortis,11.3,9.8 153 | 672,fortis,10.1,8.8 154 | 673,fortis,11.4,10.1 155 | 674,fortis,11.8,10.5 156 | 675,fortis,9.9,8.6 157 | 676,fortis,11,9.7 158 | 677,fortis,10.8,10 159 | 678,fortis,11.3,9.7 160 | 679,fortis,11.1,10.5 161 | 680,fortis,11,10.4 162 | 681,fortis,10.4,9.2 163 | 682,fortis,9.1,8.1 164 | 683,fortis,10.8,9.4 165 | 684,fortis,10.6,8.9 166 | 685,fortis,8.7,8.2 167 | 686,fortis,10.7,9.3 168 | 687,fortis,10.3,8.6 169 | 688,fortis,10.5,9 170 | 689,fortis,9.1,7.6 171 | 701,fortis,9.5,8.7 172 | 702,fortis,10.8,9.8 173 | 703,fortis,10.6,9.4 174 | 704,fortis,9.9,9.9 175 | 705,fortis,11.5,10.1 176 | 706,fortis,11.2,9.8 177 | 707,fortis,9.7,7.9 178 | 708,fortis,12.2,10.8 179 | 709,fortis,9.2,7.9 180 | 710,fortis,10.5,10 181 | 711,fortis,10.5,8.4 182 | 712,fortis,10.8,9.5 183 | 713,fortis,10.7,8.8 184 | 714,fortis,11.8,10.4 185 | 715,fortis,9.1,8.1 186 | 716,fortis,10.1,9.8 187 | 717,fortis,10,8.2 188 | 719,fortis,11,9.5 189 | 720,fortis,12.2,10.5 190 | 721,fortis,11.4,9.7 191 | 723,fortis,10.9,9.6 192 | 725,fortis,10.6,9.6 193 | 726,fortis,9.3,7.9 194 | 780,fortis,10.9,10.1 195 | 781,fortis,10.6,9.3 196 | 785,fortis,11,10.3 197 | 786,fortis,9,7.9 198 | 787,fortis,11,9.1 199 | 788,fortis,10.4,8.9 200 | 789,fortis,10.4,8.3 201 | 790,fortis,9.6,9.4 202 | 801,fortis,10.6,9.5 203 | 802,fortis,10.1,8.5 204 | 803,fortis,9.7,8.6 205 | 804,fortis,9.6,8.5 206 | 805,fortis,10.1,8.2 207 | 807,fortis,9.9,7.9 208 | 808,fortis,11,10 209 | 809,fortis,10.9,9.4 210 | 810,fortis,9.7,8 211 | 811,fortis,10,8.6 212 | 812,fortis,10.4,9 213 | 813,fortis,11.6,9.9 214 | 814,fortis,9.6,8 215 | 815,fortis,10.8,9.6 216 | 817,fortis,10.9,9.2 217 | 818,fortis,10.2,9 218 | 818,fortis,10.2,9 219 | 819,fortis,10.4,9.2 220 | 820,fortis,11,9.5 221 | 821,fortis,10.7,9.2 222 | 822,fortis,11.1,9.7 223 | 823,fortis,10.8,9.1 224 | 824,fortis,10.9,9.4 225 | 825,fortis,9.9,8.6 226 | 826,fortis,11.8,9.8 227 | 827,fortis,9.7,8 228 | 828,fortis,11.9,10.4 229 | 829,fortis,9.6,8.1 230 | 830,fortis,10.9,9.9 231 | 831,fortis,10.2,8.6 232 | 891,fortis,9.9,8 233 | 892,fortis,11.3,9 234 | 893,fortis,10.9,9.7 235 | 894,fortis,9.8,8.1 236 | 895,fortis,10.3,8.8 237 | 896,fortis,8.8,7.5 238 | 897,fortis,11.6,10.2 239 | 898,fortis,9.9,8.2 240 | 899,fortis,9.9,8.6 241 | 900,fortis,9.9,8.7 242 | 902,fortis,10.4,8.6 243 | 903,fortis,11.2,9.9 244 | 904,fortis,10.8,9.5 245 | 905,fortis,9.9,8.4 246 | 906,fortis,9.9,8.7 247 | 907,fortis,10.2,8.2 248 | 908,fortis,10.4,9.2 249 | 909,fortis,9.4,8.2 250 | 910,fortis,10.2,9.2 251 | 911,fortis,11,9.3 252 | 912,fortis,10.6,8.9 253 | 913,fortis,10.5,9 254 | 914,fortis,10.4,9.1 255 | 915,fortis,10.2,8.8 256 | 917,fortis,10.2,8.8 257 | 919,fortis,11,9.6 258 | 921,fortis,11.1,10.2 259 | 922,fortis,9.7,8.2 260 | 923,fortis,10.5,8.9 261 | 924,fortis,10.9,9 262 | 925,fortis,10.7,8.8 263 | 926,fortis,11,9.9 264 | 927,fortis,10.9,9.3 265 | 928,fortis,11.2,9.9 266 | 929,fortis,11.5,9.5 267 | 930,fortis,10.8,10 268 | 931,fortis,10.2,8.9 269 | 932,fortis,11,9.8 270 | 933,fortis,10.9,9.9 271 | 934,fortis,10.5,8.7 272 | 936,fortis,10.8,9.2 273 | 941,fortis,9.8,8.2 274 | 942,fortis,9.7,7.9 275 | 943,fortis,10.3,9.1 276 | 944,fortis,10.3,8.3 277 | 944,fortis,10.3,8.3 278 | 945,fortis,11.6,10.8 279 | 945,fortis,11.6,10.8 280 | 951,fortis,10.9,9.9 281 | 952,fortis,10.6,9 282 | 954,fortis,11.5,9.3 283 | 991,fortis,10.8,9.6 284 | 1040,fortis,10.83,9.3 285 | 1368,fortis,11.73,10.2 286 | 1420,fortis,10.23,8.6 287 | 1433,fortis,11.83,10.98 288 | 1560,fortis,11.43,10.28 289 | 1581,fortis,10.73,9.6 290 | 1770,fortis,10.33,9.28 291 | 1857,fortis,10.23,9.5 292 | 1860,fortis,11.53,9.4 293 | 1884,fortis,11.03,9.15 294 | 2102,fortis,11.73,9.8 295 | 2105,fortis,10.53,9.35 296 | 2220,fortis,9.93,8.5 297 | 2381,fortis,11.23,10.5 298 | 2482,fortis,9.83,8.5 299 | 2939,fortis,9.63,8.31 300 | 2955,fortis,10.6,9.9 301 | 2974,fortis,11.88,10.36 302 | 3642,fortis,11.03,10.28 303 | 8016,fortis,10.73,8.9 304 | 8020,fortis,10.13,8.7 305 | 20139,fortis,10.15,9.15 306 | 20165,fortis,10.85,10.35 307 | 20166,fortis,10.95,8.85 308 | 20168,fortis,9.85,8.55 309 | 20204,fortis,10.25,8.95 310 | 20238,fortis,11.75,10.75 311 | 20267,fortis,10.95,10.05 312 | 20273,fortis,10.95,10.15 313 | 20283,fortis,10.55,8.45 314 | 20285,fortis,9.65,8.85 315 | 20286,fortis,11.45,10.25 316 | 20293,fortis,10.25,9.75 317 | 20298,fortis,10.05,8.95 318 | 302,scandens,13.9,8.4 319 | 304,scandens,14,8.8 320 | 306,scandens,12.9,8.4 321 | 310,scandens,13.5,8 322 | 317,scandens,12.9,7.9 323 | 318,scandens,14.6,8.9 324 | 319,scandens,13,8.6 325 | 322,scandens,14.2,8.5 326 | 325,scandens,14,8.9 327 | 327,scandens,14.2,9.1 328 | 351,scandens,13.1,8.6 329 | 353,scandens,15.1,9.8 330 | 357,scandens,13.5,8.2 331 | 358,scandens,14.4,9 332 | 359,scandens,14.9,9.7 333 | 361,scandens,12.9,8.6 334 | 362,scandens,13,8.2 335 | 363,scandens,14.9,9 336 | 364,scandens,14,8.4 337 | 365,scandens,13.8,8.6 338 | 367,scandens,13,8.9 339 | 404,scandens,14.75,9.1 340 | 406,scandens,13.7,8.3 341 | 407,scandens,13.8,8.7 342 | 408,scandens,14,9.6 343 | 409,scandens,14.6,8.5 344 | 412,scandens,15.2,9.1 345 | 421,scandens,13.5,9 346 | 429,scandens,15.1,9.2 347 | 460,scandens,15,9.9 348 | 464,scandens,12.8,8.6 349 | 467,scandens,14.9,9.2 350 | 469,scandens,15.3,8.4 351 | 470,scandens,13.4,8.9 352 | 471,scandens,14.2,8.5 353 | 472,scandens,15.1,10.4 354 | 478,scandens,15.1,9.6 355 | 508,scandens,14,9.1 356 | 523,scandens,13.6,9.3 357 | 525,scandens,14,9.3 358 | 562,scandens,14,8.8 359 | 568,scandens,13.9,8.3 360 | 569,scandens,14,8.8 361 | 570,scandens,14.9,9.1 362 | 571,scandens,15.6,10.1 363 | 573,scandens,13.8,8.9 364 | 575,scandens,14.4,9.2 365 | 577,scandens,12.8,8.5 366 | 612,scandens,14.2,10.2 367 | 613,scandens,13.4,10.1 368 | 614,scandens,14,9.2 369 | 718,scandens,14.8,9.7 370 | 722,scandens,14.2,9.1 371 | 724,scandens,13.5,8.5 372 | 727,scandens,13.4,8.2 373 | 728,scandens,14.6,9 374 | 729,scandens,13.5,9.3 375 | 782,scandens,13.7,8 376 | 783,scandens,13.9,9.1 377 | 784,scandens,13.1,8.1 378 | 806,scandens,13.4,8.3 379 | 816,scandens,13.8,8.7 380 | 890,scandens,13.6,8.8 381 | 901,scandens,14,8.6 382 | 916,scandens,13.5,8.7 383 | 918,scandens,12.8,8 384 | 920,scandens,14,8.8 385 | 935,scandens,13.4,9 386 | 953,scandens,14.9,9.1 387 | 1014,scandens,15.54,9.74 388 | 1642,scandens,14.63,9.1 389 | 1748,scandens,14.73,9.8 390 | 1841,scandens,15.73,10.4 391 | 1842,scandens,14.83,8.3 392 | 2084,scandens,15.94,9.44 393 | 2397,scandens,15.14,9.04 394 | 8110,scandens,14.23,9 395 | 20122,scandens,14.15,9.05 396 | 20151,scandens,14.35,9.65 397 | 20188,scandens,14.95,9.45 398 | 20210,scandens,13.95,8.65 399 | 20223,scandens,14.05,9.45 400 | 20225,scandens,14.55,9.45 401 | 20252,scandens,14.05,9.05 402 | 20255,scandens,14.45,8.75 403 | 20266,scandens,15.05,9.45 404 | 20279,scandens,13.25,8.35 405 | -------------------------------------------------------------------------------- /notebooks/archive/extra-practice-student-multi-group-comparsion-sterilization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pymc3 as pm\n", 10 | "import matplotlib.pyplot as plt\n", 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "import missingno as msno\n", 14 | "from sklearn.preprocessing import LabelEncoder\n", 15 | "import theano.tensor as tt\n", 16 | "from utils import ECDF\n", 17 | "import arviz as az\n", 18 | "\n", 19 | "%load_ext autoreload\n", 20 | "%autoreload 2\n", 21 | "%matplotlib inline\n", 22 | "%config InlineBackend.figure_format = 'retina'" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "# Introduction\n", 30 | "\n", 31 | "This notebook is designed to give you more practice with PyMC3 syntax. \n", 32 | "\n", 33 | "It is intentionally designed to provide more guidance w.r.t. model definition, i.e. which parameters to use, such that the focus is more on PyMC3 syntax rather than the mechanics of model definition. \n", 34 | "\n", 35 | "If you are already feeling comfortable with PyMC3 syntax, and would like to instead move on to practice with model definition, then feel free to move onto notebook 5 instead, where you can play with the Darwin's Finches dataset. That notebook is intentionally designed with much more freedom.\n", 36 | "\n", 37 | "## Setup\n", 38 | "\n", 39 | "You will be experimentally analyzing the effectiveness of six different phone sterilization methods against two control methods. This research was conducted at MIT's Division of Comparative Medicine, and was published this year in the Journal of the American Association for Laboratory Animal Science. If you're interested, you can read the paper [here][jaalas].\n", 40 | "\n", 41 | "[jaalas]: https://www.ncbi.nlm.nih.gov/pubmed/29402348\n", 42 | "\n", 43 | "### Experiment Design\n", 44 | "\n", 45 | "Briefly, the experiments were setup as such.\n", 46 | "\n", 47 | "1. Pre-sterilization, three sites on the phone were swabbed and the number of colony forming units (CFUs) was determined by letting the swabbed bacteria grow on an agar plate.\n", 48 | "1. Post-sterilization, the same three sites were swabbed and the number of CFUs was counted.\n", 49 | "1. Sterilization efficacy was determined by taking the ratio of the difference of CFUs pre- and post-sterilization.\n", 50 | "\n", 51 | "In the paper, we used the following formula to compute the percentage reduction:\n", 52 | "\n", 53 | "$$\\delta_{method} = \\frac{{count}_{pre} - {count}_{post}}{{count}_{pre}}$$\n", 54 | "\n", 55 | "In retrospect, a better definition would have been:\n", 56 | "\n", 57 | "$$x = \\frac{{count}_{pre} - {count}_{post}}{{count}_{pre}}$$\n", 58 | "$$\\delta_{method} = \\begin{cases}\n", 59 | " 0 & \\text{if} & x\\lt0, \\\\\n", 60 | " 1 & \\text{if} & x\\gt1, \\\\\n", 61 | " x & \\text{otherwise}\n", 62 | " \\end{cases}$$\n", 63 | "\n", 64 | "Sometimes, due to experimental variation, there are more colonies post-sterilization than pre-sterilization. This may occur particularly when the number of colonies counted on a plate is low (e.g. countable on our fingers). Thus, clipping the values helps us avoid negative percentage reductions, which one might consider to be be an \"absurd\" scenario to be in.\n", 65 | "\n", 66 | "### Data\n", 67 | "\n", 68 | "The data for this notebook came from [Evaluation of 6 Methods for Aerobic Bacterial Sanitization of Smartphones](https://www.ncbi.nlm.nih.gov/pubmed/29402348)." 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "### Step 1: Define Data Generating Process\n", 76 | "\n", 77 | "Just as in the previous notebook, you may want to spend 5-10 minutes talking through the data generating process before proceeding. Most important is to list out the distributions that you think are most relevant to the problem." 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "### Step 2: Explore the Data\n", 85 | "\n", 86 | "Let's load the data. Here is the source code for the `load_sterilization` function, which will return the cleaned data as a pandas dataframe, as well as a mapping dictionary that we will use later to label groups." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": null, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "from data import load_sterilization\n", 96 | "\n", 97 | "load_sterilization??" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "df, mapping = load_sterilization()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "**Exercse:** \n", 114 | "View a random sample of 5 rows to get a feel for the structure of the data." 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "# Your code below\n" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "metadata": {}, 129 | "source": [ 130 | "**Exercise:** To help you visualize what data are available and missing in the dataframe, run the cell below to get a visual matrix (using MissingNo). (By the way, be sure to make use of this awesome tool in your data analysis!)" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": null, 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# Run this cell, no coding required.\n", 140 | "msno.matrix(df)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "**Exercise:** Plot the average percentage reduction in colonies for each treatment." 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": null, 153 | "metadata": {}, 154 | "outputs": [], 155 | "source": [ 156 | "# Write your code here.\n" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "metadata": {}, 162 | "source": [ 163 | "### Step 3: Implement and Fit Model\n", 164 | "\n", 165 | "**Exercise:** Write the generative model for the data. \n", 166 | "\n", 167 | "To help you, this is a diagrammed version of the model below.\n", 168 | "\n", 169 | "" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": null, 175 | "metadata": {}, 176 | "outputs": [], 177 | "source": [ 178 | "# Implement the model following the hints provided.\n", 179 | "with pm.Model() as _________:\n", 180 | " \n", 181 | " # Define an exponential distribution: it is positive-only, which fits our assumptions of the\n", 182 | " # data. Also, we choose a lam parameter that allows the prior to be flat.\n", 183 | " # Remember that there are 8 treatments, so the shape of these distributions must be set correctly.\n", 184 | " mu_pre = \n", 185 | " mu_post = \n", 186 | " \n", 187 | " # Define the likelihoods using Poisson distributions.\n", 188 | " like_pre = \n", 189 | " like_post = \n", 190 | " \n", 191 | " # Compute percentage reduction\n", 192 | " # hint: tt.clip performs the exact same operation as \n", 193 | " # np.clip, which bounds your values between 0 and 1.\n", 194 | " perc_reduction = " 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "**Exercise:** Now, sample from the posterior!" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "# Your code below\n" 211 | ] 212 | }, 213 | { 214 | "cell_type": "markdown", 215 | "metadata": {}, 216 | "source": [ 217 | "**Exercise:** Check the traces to make sure that sampling has converged." 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": null, 223 | "metadata": {}, 224 | "outputs": [], 225 | "source": [ 226 | "# Your code below. Use pm.traceplot(trace)\n" 227 | ] 228 | }, 229 | { 230 | "cell_type": "markdown", 231 | "metadata": {}, 232 | "source": [ 233 | "**Exercise:** Visualize the posterior distributions of percentage reduction" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": {}, 240 | "outputs": [], 241 | "source": [ 242 | "# Use the `forestplot` or `plot_posterior` functions.\n", 243 | "az._______(_______, var_names=______)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "markdown", 248 | "metadata": {}, 249 | "source": [ 250 | "**Discussion:** Find a neighbour who is working on the same notebook, and discuss this together.\n", 251 | "\n", 252 | "- Which method of sterilization is the most effective? \n", 253 | "- Observe the posterior distribution. Is there any uncertainty surrounding this method's effectiveness? Could we still be wrong about the uncertainty?" 254 | ] 255 | }, 256 | { 257 | "cell_type": "markdown", 258 | "metadata": {}, 259 | "source": [ 260 | "# Summary\n", 261 | "\n", 262 | "- Two-group (and multi-group) comparisons involve a Bayesian estimation procedure.\n", 263 | "- We estimate parameter of interest for each group, and then compare the parameter posterior distributions." 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [] 272 | } 273 | ], 274 | "metadata": { 275 | "kernelspec": { 276 | "display_name": "bayesian-modelling-tutorial", 277 | "language": "python", 278 | "name": "bayesian-modelling-tutorial" 279 | }, 280 | "language_info": { 281 | "codemirror_mode": { 282 | "name": "ipython", 283 | "version": 3 284 | }, 285 | "file_extension": ".py", 286 | "mimetype": "text/x-python", 287 | "name": "python", 288 | "nbconvert_exporter": "python", 289 | "pygments_lexer": "ipython3", 290 | "version": "3.7.2" 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 2 295 | } 296 | -------------------------------------------------------------------------------- /data/baseballdb/core/SeriesPost.csv: -------------------------------------------------------------------------------- 1 | yearID,round,teamIDwinner,lgIDwinner,teamIDloser,lgIDloser,wins,losses,ties 2 | 1884,WS,PRO,NL,NY4,AA,3,0,0 3 | 1885,WS,CHN,NL,SL4,AA,3,3,1 4 | 1886,WS,SL4,AA,CHN,NL,4,2,0 5 | 1887,WS,DTN,NL,SL4,AA,10,5,0 6 | 1888,WS,NY1,NL,SL4,AA,6,4,0 7 | 1889,WS,NY1,NL,BR3,AA,6,3,0 8 | 1890,WS,BRO,NL,LS2,AA,3,3,1 9 | 1892,CS,BSN,NL,CL4,NL,5,0,1 10 | 1903,WS,BOS,AL,PIT,NL,5,3,0 11 | 1905,WS,NY1,NL,PHA,AL,4,1,0 12 | 1906,WS,CHA,AL,CHN,NL,4,2,0 13 | 1907,WS,CHN,NL,DET,AL,4,0,0 14 | 1908,WS,CHN,NL,DET,AL,4,1,0 15 | 1909,WS,PIT,NL,DET,AL,4,3,0 16 | 1910,WS,PHA,AL,CHN,NL,4,1,0 17 | 1911,WS,PHA,AL,NY1,NL,4,2,0 18 | 1912,WS,BOS,AL,NY1,NL,4,3,0 19 | 1913,WS,PHA,AL,NY1,NL,4,1,0 20 | 1914,WS,BSN,NL,PHA,AL,4,0,0 21 | 1915,WS,BOS,AL,PHI,NL,4,1,0 22 | 1916,WS,BOS,AL,BRO,NL,4,1,0 23 | 1917,WS,CHA,AL,NY1,NL,4,2,0 24 | 1918,WS,BOS,AL,CHN,NL,4,2,0 25 | 1919,WS,CIN,NL,CHA,AL,5,3,0 26 | 1920,WS,CLE,AL,BRO,NL,5,2,0 27 | 1921,WS,NY1,NL,NYA,AL,5,3,0 28 | 1922,WS,NY1,NL,NYA,AL,4,0,0 29 | 1923,WS,NYA,AL,NY1,NL,4,2,0 30 | 1924,WS,WS1,AL,NY1,NL,4,3,0 31 | 1925,WS,PIT,NL,WS1,AL,4,3,0 32 | 1926,WS,SLN,NL,NYA,AL,4,3,0 33 | 1927,WS,NYA,AL,PIT,NL,4,0,0 34 | 1928,WS,NYA,AL,SLN,NL,4,0,0 35 | 1929,WS,PHA,AL,CHN,NL,4,1,0 36 | 1930,WS,PHA,AL,SLN,NL,4,2,0 37 | 1931,WS,SLN,NL,PHA,AL,4,3,0 38 | 1932,WS,NYA,AL,CHN,NL,4,0,0 39 | 1933,WS,NY1,NL,WS1,AL,4,1,0 40 | 1934,WS,SLN,NL,DET,AL,4,3,0 41 | 1935,WS,DET,AL,CHN,NL,4,2,0 42 | 1936,WS,NYA,AL,NY1,NL,4,2,0 43 | 1937,WS,NYA,AL,NY1,NL,4,1,0 44 | 1938,WS,NYA,AL,CHN,NL,4,0,0 45 | 1939,WS,NYA,AL,CIN,NL,4,0,0 46 | 1940,WS,CIN,NL,DET,AL,4,3,0 47 | 1941,WS,NYA,AL,BRO,NL,4,1,0 48 | 1942,WS,SLN,NL,NYA,AL,4,1,0 49 | 1943,WS,NYA,AL,SLN,NL,4,1,0 50 | 1944,WS,SLN,NL,SLA,AL,4,2,0 51 | 1945,WS,DET,AL,CHN,NL,4,3,0 52 | 1946,WS,SLN,NL,BOS,AL,4,3,0 53 | 1947,WS,NYA,AL,BRO,NL,4,3,0 54 | 1948,WS,CLE,AL,BSN,NL,4,2,0 55 | 1949,WS,NYA,AL,BRO,NL,4,1,0 56 | 1950,WS,NYA,AL,PHI,NL,4,0,0 57 | 1951,WS,NYA,AL,NY1,NL,4,2,0 58 | 1952,WS,NYA,AL,BRO,NL,4,3,0 59 | 1953,WS,NYA,AL,BRO,NL,4,2,0 60 | 1954,WS,NY1,NL,CLE,AL,4,0,0 61 | 1955,WS,BRO,NL,NYA,AL,4,3,0 62 | 1956,WS,NYA,AL,BRO,NL,4,3,0 63 | 1957,WS,ML1,NL,NYA,AL,4,3,0 64 | 1958,WS,NYA,AL,ML1,NL,4,3,0 65 | 1959,WS,LAN,NL,CHA,AL,4,2,0 66 | 1960,WS,PIT,NL,NYA,AL,4,3,0 67 | 1961,WS,NYA,AL,CIN,NL,4,1,0 68 | 1962,WS,NYA,AL,SFN,NL,4,3,0 69 | 1963,WS,LAN,NL,NYA,AL,4,0,0 70 | 1964,WS,SLN,NL,NYA,AL,4,3,0 71 | 1965,WS,LAN,NL,MIN,AL,4,3,0 72 | 1966,WS,BAL,AL,LAN,NL,4,0,0 73 | 1967,WS,SLN,NL,BOS,AL,4,3,0 74 | 1968,WS,DET,AL,SLN,NL,4,3,0 75 | 1969,ALCS,BAL,AL,MIN,AL,3,0,0 76 | 1969,NLCS,NYN,NL,ATL,NL,3,0,0 77 | 1969,WS,NYN,NL,BAL,AL,4,1,0 78 | 1970,ALCS,BAL,AL,MIN,AL,3,0,0 79 | 1970,NLCS,CIN,NL,PIT,NL,3,0,0 80 | 1970,WS,BAL,AL,CIN,NL,4,1,0 81 | 1971,ALCS,BAL,AL,OAK,AL,3,0,0 82 | 1971,NLCS,PIT,NL,SFN,NL,3,1,0 83 | 1971,WS,PIT,NL,BAL,AL,4,3,0 84 | 1972,ALCS,OAK,AL,DET,AL,3,2,0 85 | 1972,NLCS,CIN,NL,PIT,NL,3,2,0 86 | 1972,WS,OAK,AL,CIN,NL,4,3,0 87 | 1973,ALCS,OAK,AL,BAL,AL,3,2,0 88 | 1973,NLCS,NYN,NL,CIN,NL,3,2,0 89 | 1973,WS,OAK,AL,NYN,NL,4,3,0 90 | 1974,ALCS,OAK,AL,BAL,AL,3,1,0 91 | 1974,NLCS,LAN,NL,PIT,NL,3,1,0 92 | 1974,WS,OAK,AL,LAN,NL,4,1,0 93 | 1975,ALCS,BOS,AL,OAK,AL,3,0,0 94 | 1975,NLCS,CIN,NL,PIT,NL,3,0,0 95 | 1975,WS,CIN,NL,BOS,AL,4,3,0 96 | 1976,ALCS,NYA,AL,KCA,AL,3,2,0 97 | 1976,NLCS,CIN,NL,PHI,NL,3,0,0 98 | 1976,WS,CIN,NL,NYA,AL,4,0,0 99 | 1977,ALCS,NYA,AL,KCA,AL,3,2,0 100 | 1977,NLCS,LAN,NL,PHI,NL,3,1,0 101 | 1977,WS,NYA,AL,LAN,NL,4,2,0 102 | 1978,ALCS,NYA,AL,KCA,AL,3,1,0 103 | 1978,NLCS,LAN,NL,PHI,NL,3,1,0 104 | 1978,WS,NYA,AL,LAN,NL,4,2,0 105 | 1979,ALCS,BAL,AL,CAL,AL,3,1,0 106 | 1979,NLCS,PIT,NL,CIN,NL,3,0,0 107 | 1979,WS,PIT,NL,BAL,AL,4,3,0 108 | 1980,ALCS,KCA,AL,NYA,AL,3,0,0 109 | 1980,NLCS,PHI,NL,HOU,NL,3,2,0 110 | 1980,WS,PHI,NL,KCA,AL,4,2,0 111 | 1981,AEDIV,NYA,AL,ML4,AL,3,2,0 112 | 1981,ALCS,NYA,AL,OAK,AL,3,0,0 113 | 1981,AWDIV,OAK,AL,KCA,AL,3,0,0 114 | 1981,NEDIV,MON,NL,PHI,NL,3,2,0 115 | 1981,NLCS,LAN,NL,MON,NL,3,2,0 116 | 1981,NWDIV,LAN,NL,HOU,NL,3,2,0 117 | 1981,WS,LAN,NL,NYA,AL,4,2,0 118 | 1982,ALCS,ML4,AL,CAL,AL,3,2,0 119 | 1982,NLCS,SLN,NL,ATL,NL,3,0,0 120 | 1982,WS,SLN,NL,ML4,AL,4,3,0 121 | 1983,ALCS,BAL,AL,CHA,AL,3,1,0 122 | 1983,NLCS,PHI,NL,LAN,NL,3,1,0 123 | 1983,WS,BAL,AL,PHI,NL,4,1,0 124 | 1984,ALCS,DET,AL,KCA,AL,3,0,0 125 | 1984,NLCS,SDN,NL,CHN,NL,3,2,0 126 | 1984,WS,DET,AL,SDN,NL,4,1,0 127 | 1985,ALCS,KCA,AL,TOR,AL,4,3,0 128 | 1985,NLCS,SLN,NL,LAN,NL,4,2,0 129 | 1985,WS,KCA,AL,SLN,NL,4,3,0 130 | 1986,ALCS,BOS,AL,CAL,AL,4,3,0 131 | 1986,NLCS,NYN,NL,HOU,NL,4,2,0 132 | 1986,WS,NYN,NL,BOS,AL,4,3,0 133 | 1987,ALCS,MIN,AL,DET,AL,4,1,0 134 | 1987,NLCS,SLN,NL,SFN,NL,4,3,0 135 | 1987,WS,MIN,AL,SLN,NL,4,3,0 136 | 1988,ALCS,OAK,AL,BOS,AL,4,0,0 137 | 1988,NLCS,LAN,NL,NYN,NL,4,3,0 138 | 1988,WS,LAN,NL,OAK,AL,4,1,0 139 | 1989,ALCS,OAK,AL,TOR,AL,4,1,0 140 | 1989,NLCS,SFN,NL,CHN,NL,4,1,0 141 | 1989,WS,OAK,AL,SFN,NL,4,0,0 142 | 1990,ALCS,OAK,AL,BOS,AL,4,0,0 143 | 1990,NLCS,CIN,NL,PIT,NL,4,2,0 144 | 1990,WS,CIN,NL,OAK,AL,4,0,0 145 | 1991,ALCS,MIN,AL,TOR,AL,4,1,0 146 | 1991,NLCS,ATL,NL,PIT,NL,4,3,0 147 | 1991,WS,MIN,AL,ATL,NL,4,3,0 148 | 1992,ALCS,TOR,AL,OAK,AL,4,2,0 149 | 1992,NLCS,ATL,NL,PIT,NL,4,3,0 150 | 1992,WS,TOR,AL,ATL,NL,4,2,0 151 | 1993,ALCS,TOR,AL,CHA,AL,4,2,0 152 | 1993,NLCS,PHI,NL,ATL,NL,4,2,0 153 | 1993,WS,TOR,AL,PHI,NL,4,2,0 154 | 1995,ALCS,CLE,AL,SEA,AL,4,2,0 155 | 1995,ALDS1,CLE,AL,BOS,AL,3,0,0 156 | 1995,ALDS2,SEA,AL,NYA,AL,3,2,0 157 | 1995,NLCS,ATL,NL,CIN,NL,4,0,0 158 | 1995,NLDS1,ATL,NL,COL,NL,3,1,0 159 | 1995,NLDS2,CIN,NL,LAN,NL,3,0,0 160 | 1995,WS,ATL,NL,CLE,AL,4,2,0 161 | 1996,ALCS,NYA,AL,BAL,AL,4,1,0 162 | 1996,ALDS1,BAL,AL,CLE,AL,3,1,0 163 | 1996,ALDS2,NYA,AL,TEX,AL,3,1,0 164 | 1996,NLCS,ATL,NL,SLN,NL,4,3,0 165 | 1996,NLDS1,ATL,NL,LAN,NL,3,0,0 166 | 1996,NLDS2,SLN,NL,SDN,NL,3,0,0 167 | 1996,WS,NYA,AL,ATL,NL,4,2,0 168 | 1997,ALCS,CLE,AL,BAL,AL,4,2,0 169 | 1997,ALDS1,CLE,AL,NYA,AL,3,2,0 170 | 1997,ALDS2,BAL,AL,SEA,AL,3,1,0 171 | 1997,NLCS,FLO,NL,ATL,NL,4,2,0 172 | 1997,NLDS1,FLO,NL,SFN,NL,3,0,0 173 | 1997,NLDS2,ATL,NL,HOU,NL,3,0,0 174 | 1997,WS,FLO,NL,CLE,AL,4,3,0 175 | 1998,ALCS,NYA,AL,CLE,AL,4,2,0 176 | 1998,ALDS1,CLE,AL,BOS,AL,3,1,0 177 | 1998,ALDS2,NYA,AL,TEX,AL,3,0,0 178 | 1998,NLCS,SDN,NL,ATL,NL,4,2,0 179 | 1998,NLDS1,ATL,NL,CHN,NL,3,0,0 180 | 1998,NLDS2,SDN,NL,HOU,NL,3,1,0 181 | 1998,WS,NYA,AL,SDN,NL,4,0,0 182 | 1999,ALCS,NYA,AL,BOS,AL,4,1,0 183 | 1999,ALDS1,BOS,AL,CLE,AL,3,2,0 184 | 1999,ALDS2,NYA,AL,TEX,AL,3,0,0 185 | 1999,NLCS,ATL,NL,NYN,NL,4,2,0 186 | 1999,NLDS1,ATL,NL,HOU,NL,3,1,0 187 | 1999,NLDS2,NYN,NL,ARI,NL,3,1,0 188 | 1999,WS,NYA,AL,ATL,NL,4,0,0 189 | 2000,ALCS,NYA,AL,SEA,AL,4,2,0 190 | 2000,ALDS1,NYA,AL,OAK,AL,3,2,0 191 | 2000,ALDS2,SEA,AL,CHA,AL,3,0,0 192 | 2000,NLCS,NYN,NL,SLN,NL,4,1,0 193 | 2000,NLDS1,SLN,NL,ATL,NL,3,0,0 194 | 2000,NLDS2,NYN,NL,SFN,NL,3,1,0 195 | 2000,WS,NYA,AL,NYN,NL,4,1,0 196 | 2001,ALCS,NYA,AL,SEA,AL,4,1,0 197 | 2001,ALDS1,SEA,AL,CLE,AL,3,2,0 198 | 2001,ALDS2,NYA,AL,OAK,AL,3,2,0 199 | 2001,NLCS,ARI,NL,ATL,NL,4,1,0 200 | 2001,NLDS1,ATL,NL,HOU,NL,3,0,0 201 | 2001,NLDS2,ARI,NL,SLN,NL,3,2,0 202 | 2001,WS,ARI,NL,NYA,AL,4,3,0 203 | 2002,ALCS,ANA,AL,MIN,AL,4,1,0 204 | 2002,ALDS1,ANA,AL,NYA,AL,3,1,0 205 | 2002,ALDS2,MIN,AL,OAK,AL,3,2,0 206 | 2002,NLCS,SFN,NL,SLN,NL,4,1,0 207 | 2002,NLDS1,SFN,NL,ATL,NL,3,2,0 208 | 2002,NLDS2,SLN,NL,ARI,NL,3,0,0 209 | 2002,WS,ANA,AL,SFN,NL,4,3,0 210 | 2003,ALCS,NYA,AL,BOS,AL,4,3,0 211 | 2003,ALDS1,NYA,AL,MIN,AL,3,1,0 212 | 2003,ALDS2,BOS,AL,OAK,AL,3,2,0 213 | 2003,NLCS,FLO,NL,CHN,NL,4,3,0 214 | 2003,NLDS1,FLO,NL,SFN,NL,3,1,0 215 | 2003,NLDS2,CHN,NL,ATL,NL,3,2,0 216 | 2003,WS,FLO,NL,NYA,AL,4,2,0 217 | 2004,ALCS,BOS,AL,NYA,AL,4,3,0 218 | 2004,ALDS1,BOS,AL,ANA,AL,3,0,0 219 | 2004,ALDS2,NYA,AL,MIN,AL,3,1,0 220 | 2004,NLCS,SLN,NL,HOU,NL,4,3,0 221 | 2004,NLDS1,SLN,NL,LAN,NL,3,1,0 222 | 2004,NLDS2,HOU,NL,ATL,NL,3,2,0 223 | 2004,WS,BOS,AL,SLN,NL,4,0,0 224 | 2005,ALCS,CHA,AL,LAA,AL,4,1,0 225 | 2005,ALDS1,CHA,AL,BOS,AL,3,0,0 226 | 2005,ALDS2,LAA,AL,NYA,AL,3,2,0 227 | 2005,NLCS,HOU,NL,SLN,NL,4,2,0 228 | 2005,NLDS1,SLN,NL,SDN,NL,3,0,0 229 | 2005,NLDS2,HOU,NL,ATL,NL,3,1,0 230 | 2005,WS,CHA,AL,HOU,NL,4,0,0 231 | 2006,ALCS,DET,AL,OAK,AL,4,0,0 232 | 2006,ALDS1,DET,AL,NYA,AL,3,1,0 233 | 2006,ALDS2,OAK,AL,MIN,AL,3,0,0 234 | 2006,NLCS,SLN,NL,NYN,NL,4,3,0 235 | 2006,NLDS1,NYN,NL,LAN,NL,3,0,0 236 | 2006,NLDS2,SLN,NL,SDN,NL,3,1,0 237 | 2006,WS,SLN,NL,DET,AL,4,1,0 238 | 2007,ALCS,BOS,AL,CLE,AL,4,3,0 239 | 2007,ALDS1,BOS,AL,LAA,AL,3,0,0 240 | 2007,ALDS2,CLE,AL,NYA,AL,3,1,0 241 | 2007,NLCS,COL,NL,ARI,NL,4,0,0 242 | 2007,NLDS1,ARI,NL,CHN,NL,3,0,0 243 | 2007,NLDS2,COL,NL,PHI,NL,3,0,0 244 | 2007,WS,BOS,AL,COL,NL,4,0,0 245 | 2008,ALCS,TBA,AL,BOS,AL,4,3,0 246 | 2008,ALDS1,BOS,AL,LAA,AL,3,1,0 247 | 2008,ALDS2,TBA,AL,CHA,AL,3,1,0 248 | 2008,NLCS,PHI,NL,LAN,NL,4,1,0 249 | 2008,NLDS1,LAN,NL,CHN,NL,3,0,0 250 | 2008,NLDS2,PHI,NL,MIL,NL,3,1,0 251 | 2008,WS,PHI,NL,TBA,AL,4,1,0 252 | 2009,ALCS,NYA,AL,LAA,AL,4,2,0 253 | 2009,ALDS1,NYA,AL,MIN,AL,3,0,0 254 | 2009,ALDS2,LAA,AL,BOS,AL,3,0,0 255 | 2009,NLCS,PHI,NL,LAN,NL,4,1,0 256 | 2009,NLDS1,LAN,NL,SLN,NL,3,0,0 257 | 2009,NLDS2,PHI,NL,COL,NL,3,1,0 258 | 2009,WS,NYA,AL,PHI,NL,4,2,0 259 | 2010,ALCS,TEX,AL,NYA,AL,4,2,0 260 | 2010,ALDS1,TEX,AL,TBA,AL,3,2,0 261 | 2010,ALDS2,NYA,AL,MIN,AL,3,0,0 262 | 2010,NLCS,SFN,NL,PHI,NL,4,2,0 263 | 2010,NLDS1,PHI,NL,CIN,NL,3,0,0 264 | 2010,NLDS2,SFN,NL,ATL,NL,3,1,0 265 | 2010,WS,SFN,NL,TEX,AL,4,1,0 266 | 2011,ALCS,TEX,AL,DET,AL,4,2,0 267 | 2011,ALDS1,DET,AL,NYA,AL,3,2,0 268 | 2011,ALDS2,TEX,AL,TBA,AL,3,1,0 269 | 2011,NLCS,SLN,NL,MIL,NL,4,2,0 270 | 2011,NLDS1,SLN,NL,PHI,NL,3,2,0 271 | 2011,NLDS2,MIL,NL,ARI,NL,3,2,0 272 | 2011,WS,SLN,NL,TEX,AL,4,3,0 273 | 2012,ALWC,BAL,AL,TEX,AL,1,0,0 274 | 2012,ALCS,DET,AL,NYA,AL,4,0,0 275 | 2012,ALDS1,NYA,AL,BAL,AL,3,2,0 276 | 2012,ALDS2,DET,AL,OAK,AL,3,2,0 277 | 2012,NLWC,SLN,NL,ATL,NL,1,0,0 278 | 2012,NLCS,SFN,NL,SLN,NL,4,3,0 279 | 2012,NLDS1,SLN,NL,WAS,NL,3,2,0 280 | 2012,NLDS2,SFN,NL,CIN,NL,3,2,0 281 | 2012,WS,SFN,NL,DET,AL,4,0,0 282 | 2013,ALWC,TBA,AL,CLE,AL,1,0,0 283 | 2013,ALCS,BOS,AL,DET,AL,4,2,0 284 | 2013,ALDS1,BOS,AL,TBA,AL,3,1,0 285 | 2013,ALDS2,DET,AL,OAK,AL,3,2,0 286 | 2013,NLWC,PIT,NL,CIN,NL,1,0,0 287 | 2013,NLCS,SLN,NL,LAN,NL,4,2,0 288 | 2013,NLDS1,SLN,NL,PIT,NL,3,2,0 289 | 2013,NLDS2,LAN,NL,ATL,NL,3,1,0 290 | 2013,WS,BOS,AL,SLN,NL,4,3,0 291 | 2014,ALWC,KCA,AL,OAK,AL,1,0,0 292 | 2014,ALCS,KCA,AL,BAL,AL,4,0,0 293 | 2014,ALDS1,KCA,AL,LAA,AL,3,0,0 294 | 2014,ALDS2,BAL,AL,DET,AL,3,0,0 295 | 2014,NLWC,SFN,NL,PIT,NL,1,0,0 296 | 2014,NLCS,SFN,NL,SLN,NL,4,1,0 297 | 2014,NLDS1,SFN,NL,WAS,NL,3,1,0 298 | 2014,NLDS2,SLN,NL,LAN,NL,3,1,0 299 | 2014,WS,SFN,NL,KCA,AL,4,3,0 300 | 2015,ALWC,HOU,AL,NYA,AL,1,0,0 301 | 2015,ALCS,KCA,AL,TOR,AL,4,2,0 302 | 2015,ALDS1,KCA,AL,HOU,AL,3,2,0 303 | 2015,ALDS2,TOR,AL,TEX,AL,3,2,0 304 | 2015,NLWC,CHN,NL,PIT,NL,1,0,0 305 | 2015,NLCS,NYN,NL,CHN,NL,4,0,0 306 | 2015,NLDS1,CHN,NL,SLN,NL,3,1,0 307 | 2015,NLDS2,NYN,NL,LAN,NL,3,2,0 308 | 2015,WS,KCA,AL,NYN,NL,4,1,0 309 | 2016,ALWC,TOR,AL,BAL,AL,1,0,0 310 | 2016,ALCS,CLE,AL,TOR,AL,4,1,0 311 | 2016,ALDS1,TOR,AL,TEX,AL,3,0,0 312 | 2016,ALDS2,CLE,AL,BOS,AL,3,0,0 313 | 2016,NLWC,SFN,NL,NYN,NL,1,0,0 314 | 2016,NLCS,CHN,NL,LAN,NL,4,2,0 315 | 2016,NLDS1,CHN,NL,SFN,NL,3,1,0 316 | 2016,NLDS2,LAN,NL,WAS,NL,3,2,0 317 | 2016,WS,CHN,NL,CLE,AL,4,3,0 318 | -------------------------------------------------------------------------------- /scripts/ice_cream_shop_simulator.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Introduction\n", 8 | "\n", 9 | "This notebook is just here to try making up some ice cream shop data.\n", 10 | "\n", 11 | "One thing I do want to generate is a dataset that invovles causal inference concepts. " 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "from bayes_tutorial.data import load_baseball\n", 21 | "import pandas as pd\n", 22 | "from pyprojroot import here\n", 23 | "import namegenerator\n", 24 | "from faker.providers import company" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": {}, 31 | "outputs": [], 32 | "source": [ 33 | "data = pd.read_csv(here() / 'data/baseballdb/core/Batting.csv')" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "from faker import Faker\n", 43 | "\n", 44 | "f = Faker()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": {}, 51 | "outputs": [], 52 | "source": [ 53 | "import janitor\n", 54 | "import numpy as np\n", 55 | "\n", 56 | "starter_data = (\n", 57 | " data\n", 58 | " .query(\"yearID == 2016\")\n", 59 | " .select_columns([\"playerID\", \"AB\", \"H\"])\n", 60 | " .rename_columns(\n", 61 | " {\n", 62 | " \"playerID\":\"shopname\",\n", 63 | " \"AB\": \"num_customers\", # this is the column that matters the most\n", 64 | " \"H\": \"num_likes\" # this one isn't as important, because I will be generating data.\n", 65 | " }\n", 66 | " )\n", 67 | " .transform_column(\"shopname\", lambda dummy : namegenerator.gen())\n", 68 | " .transform_column(\"shopname\", lambda x: \" \".join(x.split(\"-\")))\n", 69 | " .transform_column(\"shopname\", lambda x: x.capitalize())\n", 70 | " .join_apply(lambda x: x[\"num_likes\"] / x[\"num_customers\"] if x[\"num_customers\"] > 0 else np.nan, \"fraction_likes\")\n", 71 | ")" 72 | ] 73 | }, 74 | { 75 | "cell_type": "markdown", 76 | "metadata": {}, 77 | "source": [ 78 | "What we are going to do now is generate values of $p$ using a presumed hierarchical model.\n", 79 | "\n", 80 | "Firstly, likes and dislikes could be correlated by their parent chain.\n", 81 | "(Some chains are run well, while others are not.)\n", 82 | "\n", 83 | "The distribution of shops per company is such that most of them are independent and locally-owned businesses, while a few are large chains." 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "from scipy.stats import poisson\n", 93 | "\n", 94 | "num_chain_held_stores = poisson(50).rvs(8)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "# Was generated from the previous cell in one particular run\n", 104 | "num_chain_held_stores = [55, 48, 48, 54, 44, 62, 38, 58]" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "There are 8 \"chains\", and they each have the aforementioned number of stores per chain. (Some healthy competition going on there!) For the purposes of generating data, there is a 9th \"chain\" is really just a placeholder.\n", 112 | "\n", 113 | "Let's now build the index that maps store to chain (or independent business).\n", 114 | "\n", 115 | "To do this, we will work in two steps:" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": {}, 122 | "outputs": [], 123 | "source": [ 124 | "owner_indices = []\n", 125 | "# Firstly, populate chain indices.\n", 126 | "for i, n in enumerate(num_chain_held_stores):\n", 127 | " owner_indices.extend([i] * n)\n", 128 | "\n", 129 | "# # Secondly, populate independently-owned businesses' indices.\n", 130 | "# for i in range(len(starter_data) - sum(num_chain_held_stores)):\n", 131 | "# owner_indices.append(i + len(num_chain_held_stores))\n", 132 | "owner_indices.extend([i + 1] * (len(starter_data) - sum(num_chain_held_stores)))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "markdown", 137 | "metadata": {}, 138 | "source": [ 139 | "Now, we shuffle them up!" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "from random import shuffle\n", 149 | "\n", 150 | "shuffle(owner_indices)" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "starter_data.add_column(\"owner_idx\", owner_indices).shuffle(reset_index=False)" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "Now, we are going to generate the $p$ for each of the shops.\n", 167 | "\n", 168 | "Firstly, I'm going to start with a hard-coded population parameter. Most of the shops _are_ going to have a generally positive rating at about 0.7." 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "p_pop = 0.7" 178 | ] 179 | }, 180 | { 181 | "cell_type": "markdown", 182 | "metadata": {}, 183 | "source": [ 184 | "We are going to go into logit space because it allows us to _more easily_\n", 185 | "reason about \"central tendencies\"." 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [ 194 | "from scipy.special import logit, expit" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "expit(logit(p_pop))" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "Because there are 8 stores, I will generate a $p$ for each of them." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "metadata": {}, 217 | "outputs": [], 218 | "source": [ 219 | "from scipy.stats import norm\n", 220 | "\n", 221 | "logit(beta(13, 17).rvs(6))" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": null, 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "beta(35, 8).rvs(2)" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "company_ps = [\n", 240 | " 0.48427595, # chain 0\n", 241 | " 0.52588245, # chain 1\n", 242 | " 0.34491850, # chain 2\n", 243 | " 0.30949678, # chain 3\n", 244 | " 0.43965704, # chain 4\n", 245 | " 0.31991239, # chain 5\n", 246 | " 0.80628789, # chain 6\n", 247 | " 0.78982137, # chain 7\n", 248 | " 0.86220633, # independent chains\n", 249 | "]" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "company_mus = logit(company_ps)\n", 259 | "company_mus" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "metadata": {}, 266 | "outputs": [], 267 | "source": [ 268 | "from scipy.stats import expon\n", 269 | "\n", 270 | "expon(1/4).rvs(9)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": null, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "company_sigmas = np.array([\n", 280 | " 0.21505173, # chain 0\n", 281 | " 0.60319852, # chain 1\n", 282 | " 0.30978955, # chain 2\n", 283 | " 0.16837932, # chain 3\n", 284 | " 0.14264645, # chain 4\n", 285 | " 0.54077756, # chain 5\n", 286 | " 0.18131425, # chain 6\n", 287 | " 0.16748833, # chain 7\n", 288 | " 1.20746328, # independently-held businesses\n", 289 | "])" 290 | ] 291 | }, 292 | { 293 | "cell_type": "markdown", 294 | "metadata": {}, 295 | "source": [ 296 | "Now, we can start drawing numbers!" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "metadata": {}, 303 | "outputs": [], 304 | "source": [ 305 | "data_generator = starter_data.add_column(\"owner_idx\", owner_indices)\n", 306 | "data_generator.shuffle(reset_index=False)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "assert len(company_mus) == len(company_sigmas), print(len(company_mus), len(company_sigmas))" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "metadata": {}, 322 | "outputs": [], 323 | "source": [ 324 | "company_logit_p = norm(loc=company_mus, scale=company_sigmas)\n", 325 | "company_logit_p" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": {}, 332 | "outputs": [], 333 | "source": [ 334 | "from scipy.stats import binom \n", 335 | "(\n", 336 | " data_generator\n", 337 | " .add_column(\"mus\", company_mus[data_generator[\"owner_idx\"]])\n", 338 | " .add_column(\"sigmas\", company_sigmas[data_generator[\"owner_idx\"]])\n", 339 | " .join_apply(lambda x: norm(x[\"mus\"], x[\"sigmas\"]).rvs(), \"logit_p\")\n", 340 | " .join_apply(lambda x: binom(x[\"num_customers\"], expit(x[\"logit_p\"])).rvs(), \"num_favs\")\n", 341 | ").to_csv(here() / \"data/ice_cream_shop.csv\")" 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": {}, 348 | "outputs": [], 349 | "source": [] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": null, 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [] 357 | } 358 | ], 359 | "metadata": { 360 | "kernelspec": { 361 | "display_name": "bayesian-modelling-tutorial", 362 | "language": "python", 363 | "name": "bayesian-modelling-tutorial" 364 | }, 365 | "language_info": { 366 | "codemirror_mode": { 367 | "name": "ipython", 368 | "version": 3 369 | }, 370 | "file_extension": ".py", 371 | "mimetype": "text/x-python", 372 | "name": "python", 373 | "nbconvert_exporter": "python", 374 | "pygments_lexer": "ipython3", 375 | "version": "3.7.6" 376 | } 377 | }, 378 | "nbformat": 4, 379 | "nbformat_minor": 4 380 | } 381 | --------------------------------------------------------------------------------"Significance" is not "importance", nor "magnitude." It just means we can discern it from zero, following an old test introduced by R.A. Fisher. https://t.co/bMBCqfZLEA
— Steven V. Miller (@stevenvmiller) July 6, 2018