├── .gitignore ├── LICENSE ├── README.md ├── notebooks ├── Beta Distribution.ipynb ├── Binomial Distribution.ipynb ├── Cauchy Distribution.ipynb ├── Chi-Squared Distribution.ipynb ├── Geometric Distribution.ipynb ├── Hypergeometric Distribution.ipynb ├── Normal Distribution.ipynb └── Poisson Distribution.ipynb └── src ├── beta ├── 01_general.py ├── 02_sum.py ├── 03_fraction.py └── 04_MCMC_estimation.py ├── binomial ├── 01_general.py ├── 02_p.py ├── 03_n.py └── 04_mcmc_estimation.py ├── cauchy ├── 01_general.py ├── 02_x_0.py ├── 03_lambda.py └── 04_MCMC_estimation.py ├── chi2 ├── 01_general.py ├── 02_k.py ├── 03_estimation.py └── 04_MCMC_estimation.py ├── geometric ├── 01_general.py ├── 02_p.py ├── 03_estimation.py └── 04_MCMC_estimation.py ├── normal ├── 01_general.py ├── 02_mu.py ├── 03_sigma.py ├── 04_estimation.py └── 05_MCMC_estimation.py └── poisson ├── 01_general.py ├── 02_lambda.py ├── 03_estimation.py └── 04_mcmc_estimation.py /.gitignore: -------------------------------------------------------------------------------- 1 | Template.ipynb 2 | .ipynb_checkpoints/ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Joshua Görner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Distribution-cheatsheet 2 | A lookup repo for a variety of distributions (implemented in Python). 3 | *** 4 | 5 | ## Distribution Characteristics 6 | The characteristics of each distribution are seperated into the following paragraphs: 7 | 1. Definition & Formula 8 | 2. Parameter 9 | 3. Implementation in Python 10 | 4. Inference of Parameter 11 | 12 | ## Overview 13 | The following distributions are (or will be) implemented in this repository: 14 | - [Beta Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Beta%20Distribution.ipynb) 15 | - [Binomial Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Binomial%20Distribution.ipynb) 16 | - [Cauchy Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Cauchy%20Distribution.ipynb) 17 | - [Chi-Squared Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Chi-Squared%20Distribution.ipynb) 18 | - F Distribtution 19 | - Gamma Distribution 20 | - [Geometric Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Geometric%20Distribution.ipynb) 21 | - [Hypergeometric Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Hypergeometric%20Distribution.ipynb) 22 | - [Normal Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Normal%20Distribution.ipynb) 23 | - [Poisson Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Poisson%20Distribution.ipynb) 24 | - T Distribution 25 | -------------------------------------------------------------------------------- /notebooks/Hypergeometric Distribution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Hypergeometric Distribution\n", 8 | "***\n", 9 | "## Definition\n", 10 | ">The Hypergeometric is a discrete probability distribution and used (e.g.) \"to calculate probabilities when sampling without replacement\" $ ^{[1]}$." 11 | ] 12 | } 13 | ], 14 | "metadata": { 15 | "kernelspec": { 16 | "display_name": "Python 3", 17 | "language": "python", 18 | "name": "python3" 19 | }, 20 | "language_info": { 21 | "codemirror_mode": { 22 | "name": "ipython", 23 | "version": 3 24 | }, 25 | "file_extension": ".py", 26 | "mimetype": "text/x-python", 27 | "name": "python", 28 | "nbconvert_exporter": "python", 29 | "pygments_lexer": "ipython3", 30 | "version": "3.6.2" 31 | } 32 | }, 33 | "nbformat": 4, 34 | "nbformat_minor": 2 35 | } 36 | -------------------------------------------------------------------------------- /src/beta/01_general.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF 24 | plt.plot(np.linspace(0, 1, 100), 25 | stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2) / np.max(stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2)), 26 | ) 27 | plt.fill_between(np.linspace(0, 1, 100), 28 | stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2) / np.max(stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2)), 29 | alpha=.15, 30 | ) 31 | 32 | # CDF 33 | plt.plot(np.linspace(0, 1, 100), 34 | stats.beta.cdf(np.linspace(0, 1, 100),a=2,b=2), 35 | ) 36 | 37 | # LEGEND 38 | plt.text(x=0.1, y=.7, s="pdf (normed)", rotation=52, alpha=.75, weight="bold", color="#008fd5") 39 | plt.text(x=0.45, y=.5, s="cdf", rotation=40, alpha=.75, weight="bold", color="#fc4f30") 40 | 41 | # TICKS 42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 43 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 44 | 45 | # TITLE, SUBTITLE & FOOTER 46 | plt.text(x = -.125, y = 1.25, s = "Beta Distribution - Overview", 47 | fontsize = 26, weight = 'bold', alpha = .75) 48 | plt.text(x = -.125, y = 1.1, 49 | s = 'Depicted below are the normed probability density function (pdf) and the cumulative density\nfunction (cdf) of a beta distributed random variable ' + r'$ y \sim Beta(\alpha, \beta)$, given $ \alpha = 2 $ and $ \beta = 2$.', 50 | fontsize = 19, alpha = .85) 51 | plt.text(x = -.125,y = -0.2, 52 | s = ' ©Joshua Görner github.com/jgoerner ', 53 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/beta/02_sum.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # A = B = 1 24 | plt.plot(np.linspace(0, 1, 200), 25 | stats.beta.pdf(np.linspace(0, 1, 200), a=1, b=1), 26 | ) 27 | plt.fill_between(np.linspace(0, 1, 200), 28 | stats.beta.pdf(np.linspace(0, 1, 200), a=1, b=1), 29 | alpha=.15, 30 | ) 31 | 32 | # A = B = 10 33 | plt.plot(np.linspace(0, 1, 200), 34 | stats.beta.pdf(np.linspace(0, 1, 200), a=10, b=10), 35 | ) 36 | plt.fill_between(np.linspace(0, 1, 200), 37 | stats.beta.pdf(np.linspace(0, 1, 200), a=10, b=10), 38 | alpha=.15, 39 | ) 40 | 41 | # A = B = 100 42 | plt.plot(np.linspace(0, 1, 200), 43 | stats.beta.pdf(np.linspace(0, 1, 200), a=100, b=100), 44 | ) 45 | plt.fill_between(np.linspace(0, 1, 200), 46 | stats.beta.pdf(np.linspace(0, 1, 200), a=100, b=100), 47 | alpha=.15, 48 | ) 49 | 50 | # LEGEND 51 | plt.text(x=0.1, y=1.45, s=r"$ \alpha = 1, \beta = 1$", alpha=.75, weight="bold", color="#008fd5") 52 | plt.text(x=0.325, y=3.5, s=r"$ \alpha = 10, \beta = 10$", rotation=35, alpha=.75, weight="bold", color="#fc4f30") 53 | plt.text(x=0.4125, y=8, s=r"$ \alpha = 100, \beta = 100$", rotation=80, alpha=.75, weight="bold", color="#e5ae38") 54 | 55 | 56 | # TICKS 57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 59 | 60 | # TITLE, SUBTITLE & FOOTER 61 | plt.text(x = -.1, y = 13.75, s = r"Beta Distribution - constant $\frac{\alpha}{\beta}$, varying $\alpha + \beta$", 62 | fontsize = 26, weight = 'bold', alpha = .75) 63 | plt.text(x = -.1, y = 12, 64 | s = 'Depicted below are three beta distributed random variables with '+ r'equal $\frac{\alpha}{\beta} $ and varying $\alpha+\beta$'+'.\nAs one can see the sum of ' + r'$\alpha + \beta$ (mainly) sharpens the distribution (the bigger the sharper).', 65 | fontsize = 19, alpha = .85) 66 | plt.text(x = -.1,y = -2, 67 | s = ' ©Joshua Görner github.com/jgoerner ', 68 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/beta/03_fraction.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # A / B = 1/3 24 | plt.plot(np.linspace(0, 1, 200), 25 | stats.beta.pdf(np.linspace(0, 1, 200), a=25, b=75), 26 | ) 27 | plt.fill_between(np.linspace(0, 1, 200), 28 | stats.beta.pdf(np.linspace(0, 1, 200), a=25, b=75), 29 | alpha=.15, 30 | ) 31 | 32 | # A / B = 1 33 | plt.plot(np.linspace(0, 1, 200), 34 | stats.beta.pdf(np.linspace(0, 1, 200), a=50, b=50), 35 | ) 36 | plt.fill_between(np.linspace(0, 1, 200), 37 | stats.beta.pdf(np.linspace(0, 1, 200), a=50, b=50), 38 | alpha=.15, 39 | ) 40 | 41 | # A / B = 3 42 | plt.plot(np.linspace(0, 1, 200), 43 | stats.beta.pdf(np.linspace(0, 1, 200), a=75, b=25), 44 | ) 45 | plt.fill_between(np.linspace(0, 1, 200), 46 | stats.beta.pdf(np.linspace(0, 1, 200), a=75, b=25), 47 | alpha=.15, 48 | ) 49 | 50 | # LEGEND 51 | plt.text(x=0.15, y=5, s=r"$ \alpha = 25, \beta = 75$", rotation=80, alpha=.75, weight="bold", color="#008fd5") 52 | plt.text(x=0.39, y=5, s=r"$ \alpha = 50, \beta = 50$", rotation=80, alpha=.75, weight="bold", color="#fc4f30") 53 | plt.text(x=0.65, y=5, s=r"$ \alpha = 100, \beta = 100$", rotation=80, alpha=.75, weight="bold", color="#e5ae38") 54 | 55 | 56 | # TICKS 57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 59 | 60 | # TITLE, SUBTITLE & FOOTER 61 | plt.text(x = -.1, y = 11.75, s = r"Beta Distribution - constant $\alpha + \beta$, varying $\frac{\alpha}{\beta}$", 62 | fontsize = 26, weight = 'bold', alpha = .75) 63 | plt.text(x = -.1, y = 10, 64 | s = 'Depicted below are three beta distributed random variables with '+ r'equal $\alpha+\beta$ and varying $\frac{\alpha}{\beta} $'+'.\nAs one can see the fraction of ' + r'$\frac{\alpha}{\beta} $ (mainly) shifts the distribution ' + r'($\alpha$ towards 1, $\beta$ towards 0).', 65 | fontsize = 19, alpha = .85) 66 | plt.text(x = -.1,y = -2, 67 | s = ' ©Joshua Görner github.com/jgoerner ', 68 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/beta/04_MCMC_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import pymc3 as pm 3 | import numpy as np 4 | from scipy import stats 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### GENERATE DATA ##### 25 | A_TRUE = 75 26 | B_TRUE = 100 27 | np.random.seed(42) 28 | sample = stats.beta.rvs(a=A_TRUE, b=B_TRUE, size=200) 29 | 30 | ##### SIMULATION ##### 31 | # MODEL BUILDING 32 | with pm.Model() as model: 33 | a = pm.Uniform("a", upper=200) 34 | b = pm.Uniform("b", upper=200) 35 | beta = pm.Beta("beta", alpha=a, beta=b, observed=sample) 36 | 37 | # MODEL RUN 38 | with model: 39 | step = pm.Metropolis() 40 | trace = pm.sample(100000, step=step) 41 | burned_trace = trace[20000:] 42 | 43 | # A - 95% CONF INTERVAL 44 | a_s = burned_trace["a"] 45 | a_est_95 = np.mean(a_s) - 2*np.std(a_s), np.mean(a_s) + 2*np.std(a_s) 46 | print("95% of sampled mus are between {:0.3f} and {:0.3f}".format(*a_est_95)) 47 | 48 | # A - 95% CONF INTERVAL 49 | b_s = burned_trace["b"] 50 | b_est_95 = np.mean(b_s) - 2*np.std(b_s), np.mean(b_s) + 2*np.std(b_s) 51 | print("95% of sampled mus are between {:0.3f} and {:0.3f}".format(*b_est_95)) 52 | 53 | #### PLOTTING ##### 54 | # SAMPLE DISTRIBUTION 55 | plt.hist(sample, bins=50,normed=True, alpha=.25) 56 | 57 | # TRUE CURVE 58 | plt.plot(np.linspace(0.3, 0.6, 100), stats.beta.pdf(np.linspace(0.3, 0.6, 100),a=A_TRUE, b=B_TRUE)) 59 | 60 | # ESTIMATED CURVE MCMC 61 | plt.plot(np.linspace(0.3, 0.6, 100), stats.beta.pdf(np.linspace(0.3, 0.6, 100),a=a_s.mean(), b=b_s.mean())) 62 | 63 | # LEGEND 64 | plt.text(x=0.4125, y=2.5, s="sample", alpha=.75, weight="bold", color="#008fd5") 65 | plt.text(x=0.475, y=8, s="true distrubtion", rotation=-55, alpha=.75, weight="bold", color="#fc4f30") 66 | plt.text(x=0.34, y=9, s="estimated distribution", rotation=55, alpha=.75, weight="bold", color="#e5ae38") 67 | 68 | # TICKS 69 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 70 | plt.axhline(y = 0.1, color = 'black', linewidth = 1.3, alpha = .7) 71 | 72 | # TITLE, SUBTITLE & FOOTER 73 | plt.text(x = 0.275, y = 17, s = "Beta Distribution - Parameter Estimation (MCMC)", 74 | fontsize = 26, weight = 'bold', alpha = .75) 75 | plt.text(x = 0.275, y = 15, 76 | s = 'Depicted below is the distribution of a sample (blue) drawn from a beta distribution with '+ r'$\alpha = 75$'+'\nand ' + r'$\beta = 100$ (red). Also the estimated distrubution with $\alpha \sim {:.3f} $ and $\beta \sim {:.3f} $ is shown.'.format(a_s.mean(), b_s.mean()), 77 | fontsize = 19, alpha = .85) 78 | plt.text(x = 0.275,y = -1.5, 79 | s = ' ©Joshua Görner github.com/jgoerner ', 80 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/binomial/01_general.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF 24 | plt.bar(left=np.arange(20), 25 | height=(stats.binom.pmf(np.arange(20), p=.5, n=20)/np.max(stats.binom.pmf(np.arange(20), p=.5, n=20))), 26 | width=.75, 27 | alpha=0.75 28 | ) 29 | 30 | # CDF 31 | plt.plot(np.arange(20), 32 | stats.binom.cdf(np.arange(20), p=.5, n=20), 33 | color="#fc4f30", 34 | ) 35 | 36 | # LEGEND 37 | plt.text(x=4.5, y=.7, s="pmf (normed)", alpha=.75, weight="bold", color="#008fd5") 38 | plt.text(x=14.5, y=.9, s="cdf", alpha=.75, weight="bold", color="#fc4f30") 39 | 40 | # TICKS 41 | plt.xticks(range(21)[::2]) 42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 43 | plt.axhline(y = 0.005, color = 'black', linewidth = 1.3, alpha = .7) 44 | 45 | # TITLE, SUBTITLE & FOOTER 46 | plt.text(x = -2.5, y = 1.25, s = "Binomial Distribution - Overview", 47 | fontsize = 26, weight = 'bold', alpha = .75) 48 | plt.text(x = -2.5, y = 1.1, 49 | s = 'Depicted below are the normed probability mass function (pmf) and the cumulative density\nfunction (cdf) of a Binomial distributed random variable $ y \sim Binom(N, p) $, given $ N = 20$ and $p =0.5 $.', 50 | fontsize = 19, alpha = .85) 51 | plt.text(x = -2.5,y = -0.125, 52 | s = ' ©Joshua Görner github.com/jgoerner ', 53 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/binomial/02_p.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF P = .2 24 | plt.scatter(np.arange(21), 25 | (stats.binom.pmf(np.arange(21), p=.2, n=20)), 26 | alpha=0.75, 27 | s=100 28 | ) 29 | plt.plot(np.arange(21), 30 | (stats.binom.pmf(np.arange(21), p=.2, n=20)), 31 | alpha=0.75, 32 | ) 33 | 34 | # PDF P = .5 35 | plt.scatter(np.arange(21), 36 | (stats.binom.pmf(np.arange(21), p=.5, n=20)), 37 | alpha=0.75, 38 | s=100 39 | ) 40 | plt.plot(np.arange(21), 41 | (stats.binom.pmf(np.arange(21), p=.5, n=20)), 42 | alpha=0.75, 43 | ) 44 | 45 | # PDF P = .9 46 | plt.scatter(np.arange(21), 47 | (stats.binom.pmf(np.arange(21), p=.9, n=20)), 48 | alpha=0.75, 49 | s=100 50 | ) 51 | plt.plot(np.arange(21), 52 | (stats.binom.pmf(np.arange(21), p=.9, n=20)), 53 | alpha=0.75, 54 | ) 55 | 56 | # LEGEND 57 | plt.text(x=3.5, y=.075, s="$p = 0.2$", alpha=.75, weight="bold", color="#008fd5") 58 | plt.text(x=9.5, y=.075, s="$p = 0.5$", alpha=.75, weight="bold", color="#fc4f30") 59 | plt.text(x=17.5, y=.075, s="$p = 0.9$", alpha=.75, weight="bold", color="#e5ae38") 60 | 61 | # TICKS 62 | plt.xticks(range(21)[::2]) 63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 65 | 66 | # TITLE, SUBTITLE & FOOTER 67 | plt.text(x = -2.5, y = .37, s = "Binomial Distribution - $p$", 68 | fontsize = 26, weight = 'bold', alpha = .75) 69 | plt.text(x = -2.5, y = .32, 70 | s = 'Depicted below are three Binomial distributed random variables with varying $p $. As one can see\nthe parameter $p$ shifts and skews the distribution.', 71 | fontsize = 19, alpha = .85) 72 | plt.text(x = -2.5,y = -0.065, 73 | s = ' ©Joshua Görner github.com/jgoerner ', 74 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/binomial/03_n.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF N = 10 24 | plt.scatter(np.arange(11), 25 | (stats.binom.pmf(np.arange(11), p=.5, n=10)), 26 | alpha=0.75, 27 | s=100 28 | ) 29 | plt.plot(np.arange(11), 30 | (stats.binom.pmf(np.arange(11), p=.5, n=10)), 31 | alpha=0.75, 32 | ) 33 | 34 | # PDF N = 15 35 | plt.scatter(np.arange(16), 36 | (stats.binom.pmf(np.arange(16), p=.5, n=15)), 37 | alpha=0.75, 38 | s=100 39 | ) 40 | plt.plot(np.arange(16), 41 | (stats.binom.pmf(np.arange(16), p=.5, n=15)), 42 | alpha=0.75, 43 | ) 44 | 45 | # PDF N = 20 46 | plt.scatter(np.arange(21), 47 | (stats.binom.pmf(np.arange(21), p=.5, n=20)), 48 | alpha=0.75, 49 | s=100 50 | ) 51 | plt.plot(np.arange(21), 52 | (stats.binom.pmf(np.arange(21), p=.5, n=20)), 53 | alpha=0.75, 54 | ) 55 | 56 | # LEGEND 57 | plt.text(x=6, y=.225, s="$N = 10$", alpha=.75, weight="bold", color="#008fd5") 58 | plt.text(x=8.5, y=.2, s="$N = 15$", alpha=.75, weight="bold", color="#fc4f30") 59 | plt.text(x=11, y=.175, s="$N = 20$", alpha=.75, weight="bold", color="#e5ae38") 60 | 61 | # TICKS 62 | plt.xticks(range(21)[::2]) 63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 65 | 66 | # TITLE, SUBTITLE & FOOTER 67 | plt.text(x = -2.5, y = .31, s = "Binomial Distribution - $N$", 68 | fontsize = 26, weight = 'bold', alpha = .75) 69 | plt.text(x = -2.5, y = .27, 70 | s = 'Depicted below are three Binomial distributed random variables with varying $N$. As one can see\nthe parameter $N$ streches the distribution (the larger $N$ the flatter the distribution).', 71 | fontsize = 19, alpha = .85) 72 | plt.text(x = -2.5,y = -0.055, 73 | s = ' ©Joshua Görner github.com/jgoerner ', 74 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/binomial/04_mcmc_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import pymc3 as pm 3 | import numpy as np 4 | from scipy.stats import binom 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### DATA GENERATION ##### 25 | # DRAW A SAMPLE 26 | np.random.seed(42) 27 | sample = stats.binom.rvs(p=0.3, n=200, size=1000) 28 | 29 | ##### SIMULATION ##### 30 | # MODEL BUILDING 31 | with pm.Model() as model: 32 | p = pm.Beta("p", 1, 1) 33 | n = pm.DiscreteUniform("n", lower=sample.max(), upper=10*sample.max()) 34 | binomial = pm.Binomial("binomial", p=p, n=n, observed=sample) 35 | 36 | # MODEL RUN 37 | with model: 38 | step = pm.Metropolis() 39 | trace = pm.sample(100000, step=step) 40 | burned_trace = trace[50000:] 41 | 42 | # P - 95% CONF INTERVAL 43 | ps = burned_trace["p"] 44 | ps_est_95 = ps.mean() - 2*ps.std(), ps.mean() + 2*ps.std() 45 | print("95% of sampled ps are between {:0.3f} and {:0.3f}".format(*ps_est_95)) 46 | 47 | # N - 95% CONF INTERVAL 48 | ns = burned_trace["n"] 49 | ns_est_95 = ns.mean() - 2*ns.std(), ns.mean() + 2*ns.std() 50 | print("95% of sampled Ns are between {:0.3f} and {:0.3f}".format(*ns_est_95)) 51 | 52 | ##### PLOTTING ##### 53 | # SAMPLE 54 | plt.hist(sample, 55 | bins=30, 56 | normed=True, 57 | alpha=.25, 58 | ) 59 | 60 | # TRUE CURVE 61 | plt.plot(np.arange(40, 90), 62 | stats.binom.pmf(np.arange(40, 90), 63 | p=0.3, 64 | n=200, 65 | ), 66 | ) 67 | 68 | # ESTIMATED CURVE 69 | plt.plot(np.arange(40, 90), 70 | stats.binom.pmf(np.arange(40, 90), 71 | p=burned_trace["p"].mean(), 72 | n=burned_trace["n"].mean(), 73 | ), 74 | ) 75 | 76 | # LEGEND 77 | plt.text(x=58, y=.03, s="sample", alpha=.75, weight="bold", color="#008fd5") 78 | plt.text(x=48, y=.055, s="true distrubtion", rotation=50, alpha=.75, weight="bold", color="#fc4f30") 79 | plt.text(x=68, y=.055, s="estimated distribution", rotation=-50, alpha=.75, weight="bold", color="#e5ae38") 80 | 81 | # TICKS 82 | plt.xticks(range(40, 91)[::4]) 83 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 84 | plt.axhline(y = 0.0009, color = 'black', linewidth = 1.3, alpha = .7) 85 | 86 | # TITLE, SUBTITLE & FOOTER 87 | plt.text(x = 34, y = 0.135, s = "Binomial Distribution - Parameter Estimation (MCMC)", 88 | fontsize = 26, weight = 'bold', alpha = .75) 89 | plt.text(x = 34, y = 0.12, 90 | s = 'Depicted below is the distribution of a sample drawn from a Binomial distribution with $N = 100$\nand $p = 0.3$. Additionally the estimated distrubution with $N \sim {:.3f}$ and $p \sim {:.2f}$ is shown.'.format(np.mean(ns), np.mean(ps)), 91 | fontsize = 19, alpha = .85) 92 | plt.text(x = 34, y = -0.02, 93 | s = ' ©Joshua Görner github.com/jgoerner ', 94 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/cauchy/01_general.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF 24 | plt.plot(np.linspace(-6, 6, 100), 25 | stats.cauchy.pdf(np.linspace(-6, 6, 100))/np.max(stats.cauchy.pdf(np.linspace(-6, 6, 100))), 26 | ) 27 | plt.fill_between(np.linspace(-6, 6, 100), 28 | stats.cauchy.pdf(np.linspace(-6, 6, 100))/np.max(stats.cauchy.pdf(np.linspace(-6, 6, 100))), 29 | alpha=.15, 30 | ) 31 | # CDF 32 | plt.plot(np.linspace(-6, 6, 100), 33 | stats.cauchy.cdf(np.linspace(-6, 6, 100)), 34 | ) 35 | 36 | # LEGEND 37 | plt.text(x=2, y=.25, s="pdf", rotation=-50, alpha=.75, weight="bold", color="#008fd5") 38 | plt.text(x=-.4, y=.5, s="cdf", rotation=55, alpha=.75, weight="bold", color="#fc4f30") 39 | 40 | # TICKS 41 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 42 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 43 | 44 | # TITLE, SUBTITLE & FOOTER 45 | plt.text(x = -7.25, y = 1.25, s = "Cauchy - Overview", 46 | fontsize = 26, weight = 'bold', alpha = .75) 47 | plt.text(x = -7.25, y = 1.1, 48 | s = ("Depicted below are the normed probability density function (pdf) and the cumulative density \nfunction (cdf) of a cauchy distributed random variable $ x \sim Cauchy(\lambda , x_0)$" 49 | " given $\lambda = 1, x_0 = 0$"), 50 | fontsize = 19, alpha = .85) 51 | plt.text(x = -7.25,y = -0.2, 52 | s = ' © Hagen Mohr github.com/jgoerner ', 53 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/cauchy/02_x_0.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF MU = 0 24 | plt.plot(np.linspace(-6, 6, 100), 25 | stats.cauchy.pdf(np.linspace(-6, 6, 100)), 26 | ) 27 | plt.fill_between(np.linspace(-6, 6, 100), 28 | stats.cauchy.pdf(np.linspace(-6, 6, 100)), 29 | alpha=.15, 30 | ) 31 | 32 | # PDF MU = 2 33 | plt.plot(np.linspace(-6, 6, 100), 34 | stats.cauchy.pdf(np.linspace(-6, 6, 100), loc=2), 35 | ) 36 | plt.fill_between(np.linspace(-6, 6, 100), 37 | stats.cauchy.pdf(np.linspace(-6, 6, 100),loc=2), 38 | alpha=.15, 39 | ) 40 | 41 | # PDF MU = -2 42 | plt.plot(np.linspace(-6, 6, 100), 43 | stats.cauchy.pdf(np.linspace(-6, 6, 100), loc=-2), 44 | ) 45 | plt.fill_between(np.linspace(-6, 6, 100), 46 | stats.cauchy.pdf(np.linspace(-6, 6, 100),loc=-2), 47 | alpha=.15, 48 | ) 49 | 50 | # LEGEND 51 | plt.text(x=-1, y=.25, s="$ x_0 = 0$", rotation=70, alpha=.75, weight="bold", color="#008fd5") 52 | plt.text(x=1, y=.25, s="$ x_0 = 2$", rotation=70, alpha=.75, weight="bold", color="#fc4f30") 53 | plt.text(x=-3.125, y=.25, s="$ x_0 = -2$", rotation=70, alpha=.75, weight="bold", color="#e5ae38") 54 | 55 | 56 | # TICKS 57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 59 | 60 | # TITLE, SUBTITLE & FOOTER 61 | plt.text(x = -7.25, y = 0.395, s = "Cauchy Distribution - $ x_0 $", 62 | fontsize = 26, weight = 'bold', alpha = .75) 63 | plt.text(x = -7.25, y = 0.35, 64 | s = 'Depicted below are three Cauchy distributed random variables with varying $ x_0 $. As one can \neasily see the parameter $x_0$ shifts the distribution along the x-axis.', 65 | fontsize = 19, alpha = .85) 66 | plt.text(x = -7.25,y = -0.05, 67 | s = ' © Hagen Mohr github.com/jgoerner ', 68 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/cauchy/03_lambda.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF lambda = 1 24 | plt.plot(np.linspace(-6, 6, 100), 25 | stats.cauchy.pdf(np.linspace(-6, 6, 100)), 26 | ) 27 | plt.fill_between(np.linspace(-6, 6, 100), 28 | stats.cauchy.pdf(np.linspace(-6, 6, 100)), 29 | alpha=.15, 30 | ) 31 | 32 | # PDF lambda = 2 33 | plt.plot(np.linspace(-6, 6, 100), 34 | stats.cauchy.pdf(np.linspace(-6, 6, 100), scale=2), 35 | ) 36 | plt.fill_between(np.linspace(-6, 6, 100), 37 | stats.cauchy.pdf(np.linspace(-6, 6, 100),scale=2), 38 | alpha=.15, 39 | ) 40 | 41 | # PDF lambda = 0.5 42 | plt.plot(np.linspace(-6, 6, 100), 43 | stats.cauchy.pdf(np.linspace(-6, 6, 100), scale=0.5), 44 | ) 45 | plt.fill_between(np.linspace(-6, 6, 100), 46 | stats.cauchy.pdf(np.linspace(-6, 6, 100),scale=0.5), 47 | alpha=.15, 48 | ) 49 | 50 | # LEGEND 51 | plt.text(x=-1.25, y=.3, s="$ \lambda = 1$", rotation=51, alpha=.75, weight="bold", color="#008fd5") 52 | plt.text(x=-2.5, y=.13, s="$ \lambda = 2$", rotation=11, alpha=.75, weight="bold", color="#fc4f30") 53 | plt.text(x=-0.75, y=.55, s="$ \lambda = 0.5$", rotation=75, alpha=.75, weight="bold", color="#e5ae38") 54 | 55 | 56 | # TICKS 57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 59 | 60 | # TITLE, SUBTITLE & FOOTER 61 | plt.text(x = -7.25, y = 0.77, s = "Cauchy Distribution - $ \lambda $", 62 | fontsize = 26, weight = 'bold', alpha = .75) 63 | plt.text(x = -7.25, y = 0.68, 64 | s = ("Depicted below are three Cauchy distributed random variables with varying $\lambda$. " + 65 | "It becomes \napparent, that $\lambda$ streches or tightens the distribution" + 66 | " (the smaller $\lambda$ the higher the peak)"), 67 | fontsize = 19, alpha = .85) 68 | plt.text(x = -7.25,y = -0.1, 69 | s = ' © Hagen Mohr github.com/jgoerner ', 70 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/cauchy/04_MCMC_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import pymc3 as pm 3 | import numpy as np 4 | from scipy import stats 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### GENERATE DATA ##### 25 | x_0_true = 10 26 | lambd_true = 1.5 27 | np.random.seed(42) 28 | sample = stats.cauchy.rvs(loc=x_0_true, scale=lambd_true, size=200) 29 | 30 | ##### SIMULATION ##### 31 | # MODEL BUILDING 32 | with pm.Model() as model: 33 | x_0 = pm.Uniform("x_0", upper=50) # technically x_0 could take on negative values - not tested here 34 | lambd = pm.Uniform("lambda", upper=20) # lambda is always > 0 35 | cauchy = pm.Cauchy("cauchy", alpha=x_0, beta=lambd, observed=sample) 36 | 37 | # MODEL RUN 38 | with model: 39 | trace = pm.sample(draws=100000) 40 | burned_trace = trace[20000:] 41 | 42 | # x_0 - 95% CONF INTERVAL 43 | x_0s = burned_trace["x_0"] 44 | x_0_est_95 = np.mean(x_0s) - 2*np.std(x_0s), np.mean(x_0s) + 2*np.std(x_0s) 45 | print("95% of sampled x_0s are between {:0.3f} and {:0.3f}".format(*x_0_est_95)) 46 | 47 | # Lambda - 95% CONF INTERVAL 48 | lambds = burned_trace["lambda"] 49 | lambd_est_95 = np.mean(lambds) - 2*np.std(lambds), np.mean(lambds) + 2*np.std(lambds) 50 | print("95% of sampled lambdas are between {:0.3f} and {:0.3f}".format(*lambd_est_95)) 51 | 52 | #### PLOTTING ##### 53 | # SAMPLE DISTRIBUTION 54 | plt.hist(sample, bins=50,normed=True, alpha=.25, range=[-10, 30]) 55 | 56 | # TRUE CURVE 57 | plt.plot(np.linspace(-10, 30, 50), stats.cauchy.pdf(np.linspace(-10, 30, 50),loc=x_0_true, scale=lambd_true)) 58 | 59 | # ESTIMATED CURVE MCMC 60 | plt.plot(np.linspace(-10, 30, 50), stats.cauchy.pdf(np.linspace(-10, 30, 50),loc=np.mean(x_0s), scale=np.mean(lambds))) 61 | 62 | # LEGEND 63 | plt.text(x=8.5, y=.05, s="sample", alpha=.75, weight="bold", color="#008fd5") 64 | plt.text(x=13, y=.1, s="true distrubtion", rotation=0, alpha=.75, weight="bold", color="#fc4f30") 65 | plt.text(x=-1.5, y=.1, s="estimated distribution", rotation=0, alpha=.75, weight="bold", color="#e5ae38") 66 | 67 | # TICKS 68 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 69 | plt.axhline(y = 0.001, color = 'black', linewidth = 1.3, alpha = .7) 70 | 71 | # TITLE, SUBTITLE & FOOTER 72 | plt.text(x = -15, y = 0.255, s = "Cauchy - Parameter Estimation (MCMC)", 73 | fontsize = 26, weight = 'bold', alpha = .75) 74 | plt.text(x = -15, y = 0.225, 75 | s = 'Depicted below is the distribution of a sample (blue) drawn from a cauchy distribution with ' + r'$x_0 = 10$' + '\nand ' + r'$\lambda = 1.5$ (red). ' + r'Also the estimated distrubution with $x_0 \sim {:.3f} $ and $\lambda \sim {:.3f} $ is shown (yellow).'.format(np.mean(x_0s), np.mean(lambds)), 76 | fontsize = 19, alpha = .85) 77 | plt.text(x = -15,y = -0.025, 78 | s = ' © Hagen Mohr github.com/jgoerner ', 79 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/chi2/01_general.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF 24 | plt.plot(np.linspace(0, 20, 100), 25 | stats.chi2.pdf(np.linspace(0, 20, 100), df=4) / np.max(stats.chi2.pdf(np.linspace(0, 20, 100), df=4)), 26 | ) 27 | plt.fill_between(np.linspace(0, 20, 100), 28 | stats.chi2.pdf(np.linspace(0, 20, 100), df=4) / np.max(stats.chi2.pdf(np.linspace(0, 20, 100), df=4)), 29 | alpha=.15, 30 | ) 31 | 32 | # CDF 33 | plt.plot(np.linspace(0, 20, 100), 34 | stats.chi2.cdf(np.linspace(0, 20, 100), df=4), 35 | ) 36 | 37 | # LEGEND 38 | plt.xticks(np.arange(0, 21, 2)) 39 | plt.text(x=11, y=.25, s="pdf (normed)", alpha=.75, weight="bold", color="#008fd5") 40 | plt.text(x=11, y=.85, s="cdf", alpha=.75, weight="bold", color="#fc4f30") 41 | 42 | # TICKS 43 | plt.xticks(np.arange(0, 21, 2)) 44 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 45 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 46 | 47 | # TITLE, SUBTITLE & FOOTER 48 | plt.text(x = -2, y = 1.25, s = r"Chi-Squared $(\chi^{2})$ Distribution - Overview", 49 | fontsize = 26, weight = 'bold', alpha = .75) 50 | plt.text(x = -2, y = 1.1, 51 | s = 'Depicted below are the normed probability density function (pdf) and the cumulative density\nfunction (cdf) of a Chi-Squared distributed random variable $ y \sim \chi^{2}(k) $, given $k$=4.', 52 | fontsize = 19, alpha = .85) 53 | plt.text(x = -2,y = -0.2, 54 | s = ' ©Joshua Görner github.com/jgoerner ', 55 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/chi2/02_k.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF k = 1 24 | plt.plot(np.linspace(0, 15, 500), 25 | stats.chi2.pdf(np.linspace(0, 15, 500), df=1), 26 | ) 27 | plt.fill_between(np.linspace(0, 15, 500), 28 | stats.chi2.pdf(np.linspace(0, 15, 500), df=1), 29 | alpha=.15, 30 | ) 31 | 32 | # PDF k = 3 33 | plt.plot(np.linspace(0, 15, 100), 34 | stats.chi2.pdf(np.linspace(0, 15, 100), df=3), 35 | ) 36 | plt.fill_between(np.linspace(0, 15, 100), 37 | stats.chi2.pdf(np.linspace(0, 15, 100), df=3), 38 | alpha=.15, 39 | ) 40 | 41 | # PDF k = 6 42 | plt.plot(np.linspace(0, 15, 100), 43 | stats.chi2.pdf(np.linspace(0, 15, 100), df=6), 44 | ) 45 | plt.fill_between(np.linspace(0, 15, 100), 46 | stats.chi2.pdf(np.linspace(0, 15, 100), df=6), 47 | alpha=.15, 48 | ) 49 | 50 | # LEGEND 51 | plt.text(x=.5, y=.7, s="$ k = 1$", rotation=-65, alpha=.75, weight="bold", color="#008fd5") 52 | plt.text(x=1.5, y=.35, s="$ k = 3$", alpha=.75, weight="bold", color="#fc4f30") 53 | plt.text(x=5, y=.2, s="$ k = 6$", alpha=.75, weight="bold", color="#e5ae38") 54 | 55 | 56 | # TICKS 57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 59 | 60 | # TITLE, SUBTITLE & FOOTER 61 | plt.text(x = -1.5, y = 2.8, s = "Chi-Squared Distribution - $ k $", 62 | fontsize = 26, weight = 'bold', alpha = .75) 63 | plt.text(x = -1.5, y = 2.5, 64 | s = 'Depicted below are three Chi-Squared distributed random variables with varying $ k $. As one can\nsee the parameter $k$ smoothens the distribution and softens the skewness.', 65 | fontsize = 19, alpha = .85) 66 | plt.text(x = -1.5,y = -0.4, 67 | s = ' ©Joshua Görner github.com/jgoerner ', 68 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/chi2/03_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | ##### COMPUTATION ##### 24 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE 25 | k_real = 2 26 | 27 | # DRAW A SAMPLE OF N=1000 28 | np.random.seed(42) 29 | sample = stats.chi2.rvs(df=k_real, size=1000) 30 | 31 | # ESTIMATE K 32 | k_est = np.mean(sample) 33 | print("Estimated k: {}".format(k_est)) 34 | 35 | ##### PLOTTING ##### 36 | # SAMPLE DISTRIBUTION 37 | plt.hist(sample, bins=50,normed=True, alpha=.25) 38 | 39 | # TRUE CURVE 40 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=k_real)) 41 | 42 | # ESTIMATED CURVE 43 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=k_est)) 44 | 45 | # LEGEND 46 | plt.text(x=.75, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5") 47 | plt.text(x=3, y=.15, s="true distrubtion", alpha=.75, weight="bold", color="#fc4f30") 48 | plt.text(x=1, y=.4, s="estimated distribution", alpha=.75, weight="bold", color="#e5ae38") 49 | 50 | # TICKS 51 | plt.xticks(range(0, 19)[::2]) 52 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 53 | plt.axhline(y = 0.003, color = 'black', linewidth = 1.3, alpha = .7) 54 | 55 | # TITLE, SUBTITLE & FOOTER 56 | plt.text(x = -2, y = 0.675, s = "Chi-Squared Distribution - Parameter Estimation", 57 | fontsize = 26, weight = 'bold', alpha = .75) 58 | plt.text(x = -2, y = 0.6, 59 | s = 'Depicted below is the distribution of a sample (blue) drawn from a Chi-Squared distribution with \n$k=2$ (red). Also the estimated distrubution with $k \sim {:.3f} $ is shown (yellow).'.format(np.mean(sample)), 60 | fontsize = 19, alpha = .85) 61 | plt.text(x = -2,y = -0.075, 62 | s = ' ©Joshua Görner github.com/jgoerner ', 63 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/chi2/04_MCMC_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import pymc3 as pm 4 | import scipy.stats as stats 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### COMPUTATION ##### 25 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE 26 | k_real = 2 27 | 28 | # DRAW A SAMPLE OF N=1000 29 | np.random.seed(42) 30 | sample = stats.chi2.rvs(df=k_real, size=1000) 31 | 32 | ##### SIMULATION ##### 33 | # MODEL BUILDING 34 | with pm.Model() as model: 35 | k = pm.DiscreteUniform("k", lower=0, upper=np.mean(sample)*7) # mean + 3stds 36 | chi_2 = pm.ChiSquared("chi2", nu=k, observed=sample) 37 | 38 | 39 | # MODEL RUN 40 | with model: 41 | trace = pm.sample(50000) 42 | burned_trace = trace[45000:] 43 | 44 | # MU - 95% CONF INTERVAL 45 | ks = burned_trace["k"] 46 | k_est_95 = np.mean(ks) - 2*np.std(ks), np.mean(ks) + 2*np.std(ks) 47 | print("95% of sampled mus are between {} and {}".format(*k_est_95)) 48 | 49 | ##### PLOTTING ##### 50 | # SAMPLE DISTRIBUTION 51 | plt.hist(sample, bins=50,normed=True, alpha=.25) 52 | 53 | # TRUE CURVE 54 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=k_real), linestyle="--") 55 | 56 | # ESTIMATED CURVE 57 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=np.mean(ks)), linestyle=":") 58 | 59 | # LEGEND 60 | plt.text(x=.75, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5") 61 | plt.text(x=3, y=.15, s="true distrubtion", alpha=.75, weight="bold", color="#fc4f30") 62 | plt.text(x=1, y=.4, s="estimated distribution", alpha=.75, weight="bold", color="#e5ae38") 63 | 64 | # TICKS 65 | plt.xticks(range(0, 19)[::2]) 66 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 67 | plt.axhline(y = 0.003, color = 'black', linewidth = 1.3, alpha = .7) 68 | 69 | # TITLE, SUBTITLE & FOOTER 70 | plt.text(x = -2, y = 0.675, s = "Chi-Squared Distribution - Parameter Estimation (MCMC)", 71 | fontsize = 26, weight = 'bold', alpha = .75) 72 | plt.text(x = -2, y = 0.6, 73 | s = 'Depicted below is the distribution of a sample (blue) drawn from a Chi-Squared distribution with \n$k=2$ (red). Also the estimated distrubution with $k \sim {} $ is shown (yellow).'.format(np.mean(ks)), 74 | fontsize = 19, alpha = .85) 75 | plt.text(x = -2,y = -0.075, 76 | s = ' ©Joshua Görner github.com/jgoerner ', 77 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/geometric/01_general.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF 24 | plt.bar(left=np.arange(10), 25 | height=(stats.geom.pmf(np.arange(10), p=.5)/np.max(stats.geom.pmf(np.arange(10), p=.5))), 26 | width=.75, 27 | alpha=0.75 28 | ) 29 | 30 | # CDF 31 | plt.plot(np.arange(10), 32 | stats.geom.cdf(np.arange(10), p=.5), 33 | color="#fc4f30", 34 | ) 35 | 36 | # LEGEND 37 | plt.text(x=3.5, y=.3, s="pmf (normed)", alpha=.75, weight="bold", color="#008fd5") 38 | plt.text(x=2.5, y=.7, s="cdf", alpha=.75, weight="bold", color="#fc4f30") 39 | 40 | # TICKS 41 | plt.xticks(range(11)) 42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 43 | plt.axhline(y = 0.005, color = 'black', linewidth = 1.3, alpha = .7) 44 | 45 | # TITLE, SUBTITLE & FOOTER 46 | plt.text(x = -1.5, y = 1.25, s = "Geometric Distribution - Overview", 47 | fontsize = 26, weight = 'bold', alpha = .75) 48 | plt.text(x = -1.5, y = 1.1, 49 | s = 'Depicted below are the normed probability mass function (pmf) and the cumulative density\nfunction (cdf) of a Geometric distributed random variable $ y \sim Geom(p) $, given parameter $p =0.5 $.', 50 | fontsize = 19, alpha = .85) 51 | plt.text(x = -1.5,y = -0.125, 52 | s = ' ©Joshua Görner github.com/jgoerner ', 53 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/geometric/02_p.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF P = .2 24 | plt.scatter(np.arange(11), 25 | (stats.geom.pmf(np.arange(11), p=.2)), 26 | alpha=0.75, 27 | s=100 28 | ) 29 | plt.plot(np.arange(11), 30 | (stats.geom.pmf(np.arange(11), p=.2)), 31 | alpha=0.75, 32 | ) 33 | 34 | # PDF P = .5 35 | plt.scatter(np.arange(11), 36 | (stats.geom.pmf(np.arange(11), p=.5)), 37 | alpha=0.75, 38 | s=100 39 | ) 40 | plt.plot(np.arange(11), 41 | (stats.geom.pmf(np.arange(11), p=.5)), 42 | alpha=0.75, 43 | ) 44 | 45 | # PDF P = .9 46 | plt.scatter(np.arange(11), 47 | (stats.geom.pmf(np.arange(11), p=.9)), 48 | alpha=0.75, 49 | s=100 50 | ) 51 | plt.plot(np.arange(11), 52 | (stats.geom.pmf(np.arange(11), p=.9)), 53 | alpha=0.75, 54 | ) 55 | 56 | # LEGEND 57 | plt.text(x=4.25, y=.15, s="$p = 0.2$", alpha=.75, weight="bold", color="#008fd5") 58 | plt.text(x=2.5, y=.25, s="$p = 0.5$", alpha=.75, weight="bold", color="#fc4f30") 59 | plt.text(x=1.5, y=.7, s="$p = 0.9$", alpha=.75, weight="bold", color="#e5ae38") 60 | 61 | # TICKS 62 | plt.xticks(range(11)) 63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 65 | 66 | # TITLE, SUBTITLE & FOOTER 67 | plt.text(x = -1, y = 1.125, s = "Geometric Distribution - $p$", 68 | fontsize = 26, weight = 'bold', alpha = .75) 69 | plt.text(x = -1, y = 1, 70 | s = 'Depicted below are three Geometric distributed random variables with varying $p $. As one can\nsee the parameter $p$ flattens the distribution (the larger p the sharper the distribution).', 71 | fontsize = 19, alpha = .85) 72 | plt.text(x = -1,y = -0.175, 73 | s = ' ©Joshua Görner github.com/jgoerner ', 74 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/geometric/03_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | from collections import Counter 3 | import numpy as np 4 | from scipy.stats import geom 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### COMPUTATION ##### 25 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE 26 | p_real = 0.3 27 | 28 | # DRAW A SAMPLE OF N=100 29 | np.random.seed(42) 30 | sample = geom.rvs(p=p_real, size=100) 31 | 32 | # ESTIMATE P 33 | p_est = 1.0/np.mean(sample) 34 | print("Estimated p: {}".format(p_est)) 35 | 36 | ##### PLOTTING ##### 37 | # SAMPLE DISTRIBUTION 38 | cnt = Counter(sample) 39 | cnt[0] = 0 # added to fit pmf 40 | _, values = zip(*sorted(cnt.items())) 41 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25); 42 | 43 | # TRUE CURVE 44 | plt.plot(range(18), geom.pmf(k=range(18), p=p_real), color="#fc4f30") 45 | 46 | # ESTIMATED CURVE 47 | plt.plot(range(18), geom.pmf(k=range(18), p=p_est), color="#e5ae38") 48 | 49 | # LEGEND 50 | plt.text(x=2, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5") 51 | plt.text(x=6.5, y=.075, s="true distrubtion", rotation=-15, alpha=.75, weight="bold", color="#fc4f30") 52 | plt.text(x=2, y=.275, s="estimated distribution", rotation=-60, alpha=.75, weight="bold", color="#e5ae38") 53 | 54 | # TICKS 55 | plt.xticks(range(17)[::2]) 56 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 57 | plt.axhline(y = 0.002, color = 'black', linewidth = 1.3, alpha = .7) 58 | 59 | # TITLE, SUBTITLE & FOOTER 60 | plt.text(x = -2.5, y = 0.425, s = "Geometric Distribution - Parameter Estimation", 61 | fontsize = 26, weight = 'bold', alpha = .75) 62 | plt.text(x = -2.5, y = 0.375, 63 | s = 'Depicted below is the distribution of a sample (blue) drawn from a Geometric distribution with\n$p = 0.3$ (red). Also the estimated distrubution with $p \sim {:.3f}$ is shown (yellow).'.format(np.mean(sample)), 64 | fontsize = 19, alpha = .85) 65 | plt.text(x = -2.5,y = -0.04, 66 | s = ' ©Joshua Görner github.com/jgoerner ', 67 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/geometric/04_MCMC_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | from collections import Counter 3 | import numpy as np 4 | from scipy.stats import geom 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | import pymc3 as pm 9 | 10 | # PLOTTING CONFIG 11 | %matplotlib inline 12 | style.use('fivethirtyeight') 13 | plt.rcParams["figure.figsize"] = (14, 7) 14 | HTML(""" 15 | 22 | """) 23 | plt.figure(dpi=100) 24 | 25 | ##### COMPUTATION ##### 26 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE 27 | p_real = 0.3 28 | 29 | # DRAW A SAMPLE OF N=1000 30 | np.random.seed(42) 31 | sample = geom.rvs(p=p_real, size=100) 32 | 33 | ##### SIMULATION ##### 34 | # MODEL BUILDING 35 | with pm.Model() as model: 36 | p = pm.Uniform("p") 37 | geometric = pm.Geometric("geometric", p=p, observed=sample) 38 | 39 | # MODEL RUN 40 | with model: 41 | step = pm.Metropolis() 42 | trace = pm.sample(100000, step=step) 43 | burned_trace = trace[50000:] 44 | 45 | # P - 95% CONF INTERVAL 46 | ps = burned_trace["p"] 47 | ps_est_95 = ps.mean() - 2*ps.std(), ps.mean() + 2*ps.std() 48 | print("95% of sampled ps are between {:0.3f} and {:0.3f}".format(*ps_est_95)) 49 | 50 | ##### PLOTTING ##### 51 | # SAMPLE DISTRIBUTION 52 | cnt = Counter(sample) 53 | cnt[0] = 0 # added to fit pmf 54 | _, values = zip(*sorted(cnt.items())) 55 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25); 56 | 57 | # TRUE CURVE 58 | plt.plot(range(18), geom.pmf(k=range(18), p=p_real), color="#fc4f30") 59 | 60 | # ESTIMATED CURVE 61 | plt.plot(range(18), geom.pmf(k=range(18), p=ps.mean()), color="#e5ae38") 62 | 63 | # LEGEND 64 | plt.text(x=2, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5") 65 | plt.text(x=6.5, y=.075, s="true distrubtion", rotation=-15, alpha=.75, weight="bold", color="#fc4f30") 66 | plt.text(x=2, y=.275, s="estimated distribution", rotation=-60, alpha=.75, weight="bold", color="#e5ae38") 67 | 68 | # TICKS 69 | plt.xticks(range(17)[::2]) 70 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 71 | plt.axhline(y = 0.002, color = 'black', linewidth = 1.3, alpha = .7) 72 | 73 | # TITLE, SUBTITLE & FOOTER 74 | plt.text(x = -2.5, y = 0.425, s = "Geometric Distribution - Parameter Estimation (MCMC)", 75 | fontsize = 26, weight = 'bold', alpha = .75) 76 | plt.text(x = -2.5, y = 0.375, 77 | s = 'Depicted below is the distribution of a sample (blue) drawn from a Geometric distribution with\n$p = 0.3$ (red). Also the estimated distrubution with $p \sim {:.3f}$ is shown (yellow).'.format(ps.mean()), 78 | fontsize = 19, alpha = .85) 79 | plt.text(x = -2.5,y = -0.04, 80 | s = ' ©Joshua Görner github.com/jgoerner ', 81 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/normal/01_general.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF 24 | plt.plot(np.linspace(-4, 4, 100), 25 | stats.norm.pdf(np.linspace(-4, 4, 100)) / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))), 26 | ) 27 | plt.fill_between(np.linspace(-4, 4, 100), 28 | stats.norm.pdf(np.linspace(-4, 4, 100)) / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))), 29 | alpha=.15, 30 | ) 31 | # CDF 32 | plt.plot(np.linspace(-4, 4, 100), 33 | stats.norm.cdf(np.linspace(-4, 4, 100)), 34 | ) 35 | 36 | # LEGEND 37 | plt.text(x=-1.5, y=.7, s="pdf (normed)", rotation=65, alpha=.75, weight="bold", color="#008fd5") 38 | plt.text(x=-.4, y=.5, s="cdf", rotation=55, alpha=.75, weight="bold", color="#fc4f30") 39 | 40 | # TICKS 41 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 42 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 43 | 44 | # TITLE, SUBTITLE & FOOTER 45 | plt.text(x = -5, y = 1.25, s = "Normal Distribution - Overview", 46 | fontsize = 26, weight = 'bold', alpha = .75) 47 | plt.text(x = -5, y = 1.1, 48 | s = 'Depicted below are the normed probability density function (pdf) and the cumulative density\nfunction (cdf) of a normally distributed random variable $ y \sim \mathcal{N}(\mu,\sigma) $, given $ \mu = 0 $ and $ \sigma = 1$.', 49 | fontsize = 19, alpha = .85) 50 | plt.text(x = -5,y = -0.2, 51 | s = ' ©Joshua Görner github.com/jgoerner ', 52 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/normal/02_mu.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF MU = 0 24 | plt.plot(np.linspace(-4, 4, 100), 25 | stats.norm.pdf(np.linspace(-4, 4, 100)), 26 | ) 27 | plt.fill_between(np.linspace(-4, 4, 100), 28 | stats.norm.pdf(np.linspace(-4, 4, 100)), 29 | alpha=.15, 30 | ) 31 | 32 | # PDF MU = 2 33 | plt.plot(np.linspace(-4, 4, 100), 34 | stats.norm.pdf(np.linspace(-4, 4, 100), loc=2), 35 | ) 36 | plt.fill_between(np.linspace(-4, 4, 100), 37 | stats.norm.pdf(np.linspace(-4, 4, 100),loc=2), 38 | alpha=.15, 39 | ) 40 | 41 | # PDF MU = -2 42 | plt.plot(np.linspace(-4, 4, 100), 43 | stats.norm.pdf(np.linspace(-4, 4, 100), loc=-2), 44 | ) 45 | plt.fill_between(np.linspace(-4, 4, 100), 46 | stats.norm.pdf(np.linspace(-4, 4, 100),loc=-2), 47 | alpha=.15, 48 | ) 49 | 50 | # LEGEND 51 | plt.text(x=-1, y=.35, s="$ \mu = 0$", rotation=65, alpha=.75, weight="bold", color="#008fd5") 52 | plt.text(x=1, y=.35, s="$ \mu = 2$", rotation=65, alpha=.75, weight="bold", color="#fc4f30") 53 | plt.text(x=-3, y=.35, s="$ \mu = -2$", rotation=65, alpha=.75, weight="bold", color="#e5ae38") 54 | 55 | 56 | # TICKS 57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 59 | 60 | # TITLE, SUBTITLE & FOOTER 61 | plt.text(x = -5, y = 0.51, s = "Normal Distribution - $ \mu $", 62 | fontsize = 26, weight = 'bold', alpha = .75) 63 | plt.text(x = -5, y = 0.45, 64 | s = 'Depicted below are three normally distributed random variables with varying $ \mu $. As one can easily\nsee the parameter $\mu$ shifts the distribution along the x-axis.', 65 | fontsize = 19, alpha = .85) 66 | plt.text(x = -5,y = -0.075, 67 | s = ' ©Joshua Görner github.com/jgoerner ', 68 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/normal/03_sigma.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF SIGMA = 1 24 | plt.plot(np.linspace(-4, 4, 100), 25 | stats.norm.pdf(np.linspace(-4, 4, 100), scale=1), 26 | ) 27 | plt.fill_between(np.linspace(-4, 4, 100), 28 | stats.norm.pdf(np.linspace(-4, 4, 100), scale=1), 29 | alpha=.15, 30 | ) 31 | 32 | # PDF SIGMA = 2 33 | plt.plot(np.linspace(-4, 4, 100), 34 | stats.norm.pdf(np.linspace(-4, 4, 100), scale=2), 35 | ) 36 | plt.fill_between(np.linspace(-4, 4, 100), 37 | stats.norm.pdf(np.linspace(-4, 4, 100), scale=2), 38 | alpha=.15, 39 | ) 40 | 41 | # PDF SIGMA = 0.5 42 | plt.plot(np.linspace(-4, 4, 100), 43 | stats.norm.pdf(np.linspace(-4, 4, 100), scale=0.5), 44 | ) 45 | plt.fill_between(np.linspace(-4, 4, 100), 46 | stats.norm.pdf(np.linspace(-4, 4, 100), scale=0.5), 47 | alpha=.15, 48 | ) 49 | 50 | # LEGEND 51 | plt.text(x=-1.25, y=.3, s="$ \sigma = 1$", rotation=51, alpha=.75, weight="bold", color="#008fd5") 52 | plt.text(x=-2.5, y=.13, s="$ \sigma = 2$", rotation=11, alpha=.75, weight="bold", color="#fc4f30") 53 | plt.text(x=-0.75, y=.55, s="$ \sigma = 0.5$", rotation=75, alpha=.75, weight="bold", color="#e5ae38") 54 | 55 | 56 | # TICKS 57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 59 | 60 | # TITLE, SUBTITLE & FOOTER 61 | plt.text(x = -5, y = 0.98, s = "Normal Distribution - $ \sigma $", 62 | fontsize = 26, weight = 'bold', alpha = .75) 63 | plt.text(x = -5, y = 0.87, 64 | s = 'Depicted below are three normally distributed random variables with varying $\sigma $. As one can easily\nsee the parameter $\sigma$ "sharpens" the distribution (the smaller $ \sigma $ the sharper the function).', 65 | fontsize = 19, alpha = .85) 66 | plt.text(x = -5,y = -0.15, 67 | s = ' ©Joshua Görner github.com/jgoerner ', 68 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/normal/04_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | ##### COMPUTATION ##### 24 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE 25 | mu_real = 10 26 | sigma_real = 2 27 | 28 | # DRAW A SAMPLE OF N=1000 29 | np.random.seed(42) 30 | sample = stats.norm.rvs(loc=mu_real, scale=sigma_real, size=1000) 31 | 32 | # ESTIMATE MU AND SIGMA 33 | mu_est = np.mean(sample) 34 | sigma_est = np.std(sample) 35 | print("Estimated MU: {}\nEstimated SIGMA: {}".format(mu_est, sigma_est)) 36 | 37 | ##### PLOTTING ##### 38 | # SAMPLE DISTRIBUTION 39 | plt.hist(sample, bins=50,normed=True, alpha=.25) 40 | 41 | # TRUE CURVE 42 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=mu_real, scale=sigma_real)) 43 | 44 | # ESTIMATED CURVE 45 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=np.mean(sample), scale=np.std(sample))) 46 | 47 | # LEGEND 48 | plt.text(x=9.5, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5") 49 | plt.text(x=7, y=.2, s="true distrubtion", rotation=55, alpha=.75, weight="bold", color="#fc4f30") 50 | plt.text(x=5, y=.12, s="estimated distribution", rotation=55, alpha=.75, weight="bold", color="#e5ae38") 51 | 52 | # TICKS 53 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 54 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 55 | 56 | # TITLE, SUBTITLE & FOOTER 57 | plt.text(x = 0, y = 0.3, s = "Normal Distribution", 58 | fontsize = 26, weight = 'bold', alpha = .75) 59 | plt.text(x = 0, y = 0.265, 60 | s = 'Depicted below is the distribution of a sample (blue) drawn from a normal distribution with $\mu = 10$\nand $\sigma = 2$ (red). Also the estimated distrubution with $\mu \sim {:.3f} $ and $\sigma \sim {:.3f} $ is shown (yellow).'.format(np.mean(sample), np.std(sample)), 61 | fontsize = 19, alpha = .85) 62 | plt.text(x = 0,y = -0.025, 63 | s = ' ©Joshua Görner github.com/jgoerner ', 64 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/normal/05_MCMC_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import pymc3 as pm 3 | import numpy as np 4 | from scipy.stats import norm 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### SIMULATION ##### 25 | # MODEL BUILDING 26 | with pm.Model() as model: 27 | mu = pm.Uniform("mu", upper=20) 28 | std = pm.Uniform("std", upper=5) 29 | normal = pm.Normal("normal", mu=mu, sd=std, observed=sample) 30 | 31 | # MODEL RUN 32 | with model: 33 | step = pm.Metropolis() 34 | trace = pm.sample(50000, step=step) 35 | burned_trace = trace[45000:] 36 | 37 | # MU - 95% CONF INTERVAL 38 | mus = burned_trace["mu"] 39 | mu_est_95 = np.mean(mus) - 2*np.std(mus), np.mean(mus) + 2*np.std(mus) 40 | print("95% of sampled mus are between {:0.3f} and {:0.3f}".format(*mu_est_95)) 41 | 42 | # STD - 95% CONF INTERVAL 43 | stds = burned_trace["std"] 44 | std_est_95 = np.mean(stds) - 2*np.std(stds), np.mean(stds) + 2*np.std(stds) 45 | print("95% of sampled sigmas are between {:0.3f} and {:0.3f}".format(*std_est_95)) 46 | 47 | #### PLOTTING ##### 48 | # SAMPLE DISTRIBUTION 49 | plt.hist(sample, bins=50,normed=True, alpha=.25) 50 | 51 | # TRUE CURVE 52 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=mu_real, scale=sigma_real)) 53 | 54 | # ESTIMATED CURVE MCMC 55 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=np.mean(mus), scale=np.mean(stds))) 56 | 57 | # LEGEND 58 | plt.text(x=9.5, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5") 59 | plt.text(x=7, y=.2, s="true distrubtion", rotation=55, alpha=.75, weight="bold", color="#fc4f30") 60 | plt.text(x=5, y=.12, s="estimated distribution", rotation=55, alpha=.75, weight="bold", color="#e5ae38") 61 | 62 | # TICKS 63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 65 | 66 | # TITLE, SUBTITLE & FOOTER 67 | plt.text(x = 0, y = 0.3, s = "Normal Distribution - Parameter Estimation (MCMC)", 68 | fontsize = 26, weight = 'bold', alpha = .75) 69 | plt.text(x = 0, y = 0.265, 70 | s = 'Depicted below is the distribution of a sample (blue) drawn from a normal distribution with $\mu = 10$\nand $\sigma = 2$ (red). Also the estimated distrubution with $\mu \sim {:.3f} $ and $\sigma \sim {:.3f} $ is shown (yellow).'.format(np.mean(mus), np.mean(stds)), 71 | fontsize = 19, alpha = .85) 72 | plt.text(x = 0,y = -0.025, 73 | s = ' ©Joshua Görner github.com/jgoerner ', 74 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/poisson/01_general.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF 24 | plt.bar(left=np.arange(20), 25 | height=(stats.poisson.pmf(np.arange(20), mu=5)/np.max(stats.poisson.pmf(np.arange(20), mu=5))), 26 | width=.75, 27 | alpha=0.75 28 | ) 29 | 30 | # CDF 31 | plt.plot(np.arange(20), 32 | stats.poisson.cdf(np.arange(20), mu=5), 33 | color="#fc4f30", 34 | ) 35 | 36 | # LEGEND 37 | plt.text(x=8, y=.45, s="pmf (normed)", alpha=.75, weight="bold", color="#008fd5") 38 | plt.text(x=8.5, y=.9, s="cdf", alpha=.75, weight="bold", color="#fc4f30") 39 | 40 | # TICKS 41 | plt.xticks(range(21)[::2]) 42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 43 | plt.axhline(y = 0.005, color = 'black', linewidth = 1.3, alpha = .7) 44 | 45 | # TITLE, SUBTITLE & FOOTER 46 | plt.text(x = -2.5, y = 1.25, s = "Poisson Distribution - Overview", 47 | fontsize = 26, weight = 'bold', alpha = .75) 48 | plt.text(x = -2.5, y = 1.1, 49 | s = 'Depicted below are the normed probability mass function (pmf) and the cumulative density\nfunction (cdf) of a Poisson distributed random variable $ y \sim Poi(\lambda) $, given $ \lambda = 5 $.', 50 | fontsize = 19, alpha = .85) 51 | plt.text(x = -2.5,y = -0.125, 52 | s = ' ©Joshua Görner github.com/jgoerner ', 53 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/poisson/02_lambda.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import numpy as np 3 | import scipy.stats as stats 4 | import matplotlib.pyplot as plt 5 | import matplotlib.style as style 6 | from IPython.core.display import HTML 7 | 8 | # PLOTTING CONFIG 9 | %matplotlib inline 10 | style.use('fivethirtyeight') 11 | plt.rcParams["figure.figsize"] = (14, 7) 12 | HTML(""" 13 | 20 | """) 21 | plt.figure(dpi=100) 22 | 23 | # PDF LAM = 1 24 | plt.scatter(np.arange(20), 25 | (stats.poisson.pmf(np.arange(20), mu=1)),#/np.max(stats.poisson.pmf(np.arange(20), mu=1))), 26 | alpha=0.75, 27 | s=100 28 | ) 29 | plt.plot(np.arange(20), 30 | (stats.poisson.pmf(np.arange(20), mu=1)),#/np.max(stats.poisson.pmf(np.arange(20), mu=1))), 31 | alpha=0.75, 32 | ) 33 | 34 | # PDF LAM = 5 35 | plt.scatter(np.arange(20), 36 | (stats.poisson.pmf(np.arange(20), mu=5)), 37 | alpha=0.75, 38 | s=100 39 | ) 40 | plt.plot(np.arange(20), 41 | (stats.poisson.pmf(np.arange(20), mu=5)), 42 | alpha=0.75, 43 | ) 44 | 45 | # PDF LAM = 10 46 | plt.scatter(np.arange(20), 47 | (stats.poisson.pmf(np.arange(20), mu=10)), 48 | alpha=0.75, 49 | s=100 50 | ) 51 | plt.plot(np.arange(20), 52 | (stats.poisson.pmf(np.arange(20), mu=10)), 53 | alpha=0.75, 54 | ) 55 | 56 | # LEGEND 57 | plt.text(x=3, y=.1, s="$\lambda = 1$", alpha=.75, rotation=-65, weight="bold", color="#008fd5") 58 | plt.text(x=8.25, y=.075, s="$\lambda = 5$", alpha=.75, rotation=-35, weight="bold", color="#fc4f30") 59 | plt.text(x=14.5, y=.06, s="$\lambda = 10$", alpha=.75, rotation=-20, weight="bold", color="#e5ae38") 60 | 61 | # TICKS 62 | plt.xticks(range(21)[::2]) 63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7) 65 | 66 | # TITLE, SUBTITLE & FOOTER 67 | plt.text(x = -2.5, y = .475, s = "Poisson Distribution - $\lambda$", 68 | fontsize = 26, weight = 'bold', alpha = .75) 69 | plt.text(x = -2.5, y = .425, 70 | s = 'Depicted below are three Poisson distributed random variables with varying $\lambda $. As one can easily\nsee the parameter $\lambda$ shifts and flattens the distribution (the smaller $ \lambda $ the sharper the function).', 71 | fontsize = 19, alpha = .85) 72 | plt.text(x = -2.5,y = -0.075, 73 | s = ' ©Joshua Görner github.com/jgoerner ', 74 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/poisson/03_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | from collections import Counter 3 | import numpy as np 4 | from scipy.stats import poisson 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### COMPUTATION ##### 25 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE 26 | lambda_real = 7 27 | 28 | # DRAW A SAMPLE OF N=1000 29 | np.random.seed(42) 30 | sample = poisson.rvs(mu=lambda_real, size=1000) 31 | 32 | # ESTIMATE MU AND SIGMA 33 | lambda_est = np.mean(sample) 34 | print("Estimated LAMBDA: {}".format(lambda_est)) 35 | 36 | ##### PLOTTING ##### 37 | # SAMPLE DISTRIBUTION 38 | cnt = Counter(sample) 39 | _, values = zip(*sorted(cnt.items())) 40 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25); 41 | 42 | # TRUE CURVE 43 | plt.plot(range(18), poisson.pmf(k=range(18), mu=lambda_real), color="#fc4f30") 44 | 45 | # ESTIMATED CURVE 46 | plt.plot(range(18), poisson.pmf(k=range(18), mu=lambda_est), color="#e5ae38") 47 | 48 | # LEGEND 49 | plt.text(x=6, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5") 50 | plt.text(x=3.5, y=.14, s="true distrubtion", rotation=60, alpha=.75, weight="bold", color="#fc4f30") 51 | plt.text(x=1, y=.08, s="estimated distribution", rotation=60, alpha=.75, weight="bold", color="#e5ae38") 52 | 53 | # TICKS 54 | plt.xticks(range(17)[::2]) 55 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 56 | plt.axhline(y = 0.0009, color = 'black', linewidth = 1.3, alpha = .7) 57 | 58 | # TITLE, SUBTITLE & FOOTER 59 | plt.text(x = -2.5, y = 0.19, s = "Poisson Distribution - Parameter Estimation", 60 | fontsize = 26, weight = 'bold', alpha = .75) 61 | plt.text(x = -2.5, y = 0.17, 62 | s = 'Depicted below is the distribution of a sample (blue) drawn from a Poisson distribution with $\lambda = 7$.\nAlso the estimated distrubution with $\lambda \sim {:.3f}$ is shown (yellow).'.format(np.mean(sample)), 63 | fontsize = 19, alpha = .85) 64 | plt.text(x = -2.5,y = -0.02, 65 | s = ' ©Joshua Görner github.com/jgoerner ', 66 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); -------------------------------------------------------------------------------- /src/poisson/04_mcmc_estimation.py: -------------------------------------------------------------------------------- 1 | # IMPORTS 2 | import pymc3 as pm 3 | import numpy as np 4 | from scipy.stats import norm 5 | import matplotlib.pyplot as plt 6 | import matplotlib.style as style 7 | from IPython.core.display import HTML 8 | 9 | # PLOTTING CONFIG 10 | %matplotlib inline 11 | style.use('fivethirtyeight') 12 | plt.rcParams["figure.figsize"] = (14, 7) 13 | HTML(""" 14 | 21 | """) 22 | plt.figure(dpi=100) 23 | 24 | ##### SIMULATION ##### 25 | # MODEL BUILDING 26 | with pm.Model() as model: 27 | lam = pm.Uniform("lambda", upper=20) 28 | normal = pm.Poisson("poisson", mu=lam, observed=sample) 29 | 30 | # MODEL RUN 31 | with model: 32 | step = pm.Metropolis() 33 | trace = pm.sample(50000, step=step) 34 | burned_trace = trace[45000:] 35 | 36 | # LAMBDA - 95% CONF INTERVAL 37 | lambdas = burned_trace["lambda"] 38 | lambda_est_95 = np.mean(lambdas) - 2*np.std(lambdas), np.mean(lambdas) + 2*np.std(lambdas) 39 | print("95% of sampled lambdas are between {:0.3f} and {:0.3f}".format(*lambda_est_95)) 40 | 41 | # SAMPLE DISTRIBUTION 42 | cnt = Counter(sample) 43 | _, values = zip(*sorted(cnt.items())) 44 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25); 45 | 46 | # TRUE CURVE 47 | plt.plot(range(18), poisson.pmf(k=range(18), mu=lambda_real), color="#fc4f30") 48 | 49 | # ESTIMATED CURVE 50 | plt.plot(range(18), poisson.pmf(k=range(18), mu=np.mean(lambdas)), color="#e5ae38") 51 | 52 | # LEGEND 53 | plt.text(x=6, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5") 54 | plt.text(x=3.5, y=.14, s="true distrubtion", rotation=60, alpha=.75, weight="bold", color="#fc4f30") 55 | plt.text(x=1, y=.08, s="estimated distribution", rotation=60, alpha=.75, weight="bold", color="#e5ae38") 56 | 57 | # TICKS 58 | plt.xticks(range(17)[::2]) 59 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18) 60 | plt.axhline(y = 0.0009, color = 'black', linewidth = 1.3, alpha = .7) 61 | 62 | # TITLE, SUBTITLE & FOOTER 63 | plt.text(x = -2.5, y = 0.19, s = "Poisson Distribution - Parameter Estimation (MCMC)", 64 | fontsize = 26, weight = 'bold', alpha = .75) 65 | plt.text(x = -2.5, y = 0.17, 66 | s = 'Depicted below is the distribution of a sample (blue) drawn from a Poisson distribution with $\lambda = 7$.\nAlso the estimated distrubution with $\lambda \sim {:.3f}$ is shown (yellow).'.format(np.mean(lambdas)), 67 | fontsize = 19, alpha = .85) 68 | plt.text(x = -2.5,y = -0.02, 69 | s = ' ©Joshua Görner github.com/jgoerner ', 70 | fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey'); --------------------------------------------------------------------------------