├── .gitignore
├── LICENSE
├── README.md
├── notebooks
    ├── Beta Distribution.ipynb
    ├── Binomial Distribution.ipynb
    ├── Cauchy Distribution.ipynb
    ├── Chi-Squared Distribution.ipynb
    ├── Geometric Distribution.ipynb
    ├── Hypergeometric Distribution.ipynb
    ├── Normal Distribution.ipynb
    └── Poisson Distribution.ipynb
└── src
    ├── beta
        ├── 01_general.py
        ├── 02_sum.py
        ├── 03_fraction.py
        └── 04_MCMC_estimation.py
    ├── binomial
        ├── 01_general.py
        ├── 02_p.py
        ├── 03_n.py
        └── 04_mcmc_estimation.py
    ├── cauchy
        ├── 01_general.py
        ├── 02_x_0.py
        ├── 03_lambda.py
        └── 04_MCMC_estimation.py
    ├── chi2
        ├── 01_general.py
        ├── 02_k.py
        ├── 03_estimation.py
        └── 04_MCMC_estimation.py
    ├── geometric
        ├── 01_general.py
        ├── 02_p.py
        ├── 03_estimation.py
        └── 04_MCMC_estimation.py
    ├── normal
        ├── 01_general.py
        ├── 02_mu.py
        ├── 03_sigma.py
        ├── 04_estimation.py
        └── 05_MCMC_estimation.py
    └── poisson
        ├── 01_general.py
        ├── 02_lambda.py
        ├── 03_estimation.py
        └── 04_mcmc_estimation.py


/.gitignore:
--------------------------------------------------------------------------------
1 | Template.ipynb
2 | .ipynb_checkpoints/
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Joshua Görner
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Distribution-cheatsheet
 2 | A lookup repo for a variety of distributions (implemented in Python).
 3 | ***
 4 | 
 5 | ## Distribution Characteristics
 6 | The characteristics of each distribution are seperated into the following paragraphs:
 7 | 1. Definition & Formula
 8 | 2. Parameter
 9 | 3. Implementation in Python
10 | 4. Inference of Parameter
11 | 
12 | ## Overview
13 | The following distributions are (or will be) implemented in this repository:
14 | - [Beta Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Beta%20Distribution.ipynb)
15 | - [Binomial Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Binomial%20Distribution.ipynb)
16 | - [Cauchy Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Cauchy%20Distribution.ipynb)
17 | - [Chi-Squared Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Chi-Squared%20Distribution.ipynb)
18 | - F Distribtution
19 | - Gamma Distribution
20 | - [Geometric Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Geometric%20Distribution.ipynb)
21 | - [Hypergeometric Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Hypergeometric%20Distribution.ipynb)
22 | - [Normal Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Normal%20Distribution.ipynb)
23 | - [Poisson Distribution](https://github.com/jgoerner/distribution-cheatsheet/blob/master/notebooks/Poisson%20Distribution.ipynb)
24 | - T Distribution
25 | 


--------------------------------------------------------------------------------
/notebooks/Hypergeometric Distribution.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Hypergeometric Distribution\n",
 8 |     "***\n",
 9 |     "## Definition\n",
10 |     ">The Hypergeometric is a discrete probability distribution and used (e.g.) \"to calculate probabilities when sampling without replacement\" $ ^{[1]}$."
11 |    ]
12 |   }
13 |  ],
14 |  "metadata": {
15 |   "kernelspec": {
16 |    "display_name": "Python 3",
17 |    "language": "python",
18 |    "name": "python3"
19 |   },
20 |   "language_info": {
21 |    "codemirror_mode": {
22 |     "name": "ipython",
23 |     "version": 3
24 |    },
25 |    "file_extension": ".py",
26 |    "mimetype": "text/x-python",
27 |    "name": "python",
28 |    "nbconvert_exporter": "python",
29 |    "pygments_lexer": "ipython3",
30 |    "version": "3.6.2"
31 |   }
32 |  },
33 |  "nbformat": 4,
34 |  "nbformat_minor": 2
35 | }
36 | 


--------------------------------------------------------------------------------
/src/beta/01_general.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF
24 | plt.plot(np.linspace(0, 1, 100), 
25 |          stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2) / np.max(stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2)),
26 |         )
27 | plt.fill_between(np.linspace(0, 1, 100),
28 |                  stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2) / np.max(stats.beta.pdf(np.linspace(0, 1, 100),a=2,b=2)),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # CDF
33 | plt.plot(np.linspace(0, 1, 100), 
34 |          stats.beta.cdf(np.linspace(0, 1, 100),a=2,b=2),
35 |         )
36 | 
37 | # LEGEND
38 | plt.text(x=0.1, y=.7, s="pdf (normed)", rotation=52, alpha=.75, weight="bold", color="#008fd5")
39 | plt.text(x=0.45, y=.5, s="cdf", rotation=40, alpha=.75, weight="bold", color="#fc4f30")
40 | 
41 | # TICKS
42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
43 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
44 | 
45 | # TITLE, SUBTITLE & FOOTER
46 | plt.text(x = -.125, y = 1.25, s = "Beta Distribution - Overview",
47 |                fontsize = 26, weight = 'bold', alpha = .75)
48 | plt.text(x = -.125, y = 1.1, 
49 |          s = 'Depicted below are the normed probability density function (pdf) and the cumulative density\nfunction (cdf) of a beta distributed random variable ' + r'$ y \sim Beta(\alpha, \beta)$, given $ \alpha = 2 $ and $ \beta = 2$.',
50 |          fontsize = 19, alpha = .85)
51 | plt.text(x = -.125,y = -0.2,
52 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
53 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/beta/02_sum.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # A = B = 1
24 | plt.plot(np.linspace(0, 1, 200), 
25 |          stats.beta.pdf(np.linspace(0, 1, 200), a=1, b=1),
26 |         )
27 | plt.fill_between(np.linspace(0, 1, 200),
28 |                  stats.beta.pdf(np.linspace(0, 1, 200), a=1, b=1),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # A = B = 10
33 | plt.plot(np.linspace(0, 1, 200), 
34 |          stats.beta.pdf(np.linspace(0, 1, 200), a=10, b=10),
35 |         )
36 | plt.fill_between(np.linspace(0, 1, 200),
37 |                  stats.beta.pdf(np.linspace(0, 1, 200), a=10, b=10),
38 |                  alpha=.15,
39 |                 )
40 | 
41 | # A = B = 100
42 | plt.plot(np.linspace(0, 1, 200), 
43 |          stats.beta.pdf(np.linspace(0, 1, 200), a=100, b=100),
44 |         )
45 | plt.fill_between(np.linspace(0, 1, 200),
46 |                  stats.beta.pdf(np.linspace(0, 1, 200), a=100, b=100),
47 |                  alpha=.15,
48 |                 )
49 | 
50 | # LEGEND
51 | plt.text(x=0.1, y=1.45, s=r"$ \alpha = 1, \beta = 1$", alpha=.75, weight="bold", color="#008fd5")
52 | plt.text(x=0.325, y=3.5, s=r"$ \alpha = 10, \beta = 10$", rotation=35, alpha=.75, weight="bold", color="#fc4f30")
53 | plt.text(x=0.4125, y=8, s=r"$ \alpha = 100, \beta = 100$", rotation=80, alpha=.75, weight="bold", color="#e5ae38")
54 | 
55 | 
56 | # TICKS
57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
59 | 
60 | # TITLE, SUBTITLE & FOOTER
61 | plt.text(x = -.1, y = 13.75, s = r"Beta Distribution - constant $\frac{\alpha}{\beta}$, varying $\alpha + \beta$",
62 |                fontsize = 26, weight = 'bold', alpha = .75)
63 | plt.text(x = -.1, y = 12, 
64 |          s = 'Depicted below are three beta distributed random variables with '+ r'equal $\frac{\alpha}{\beta} $ and varying $\alpha+\beta$'+'.\nAs one can see the sum of ' + r'$\alpha + \beta$ (mainly) sharpens the distribution (the bigger the sharper).',
65 |          fontsize = 19, alpha = .85)
66 | plt.text(x = -.1,y = -2,
67 |          s = '   ©Joshua Görner                                                                                                                                             github.com/jgoerner   ',
68 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/beta/03_fraction.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # A / B = 1/3
24 | plt.plot(np.linspace(0, 1, 200), 
25 |          stats.beta.pdf(np.linspace(0, 1, 200), a=25, b=75),
26 |         )
27 | plt.fill_between(np.linspace(0, 1, 200),
28 |                  stats.beta.pdf(np.linspace(0, 1, 200), a=25, b=75),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # A / B = 1
33 | plt.plot(np.linspace(0, 1, 200), 
34 |          stats.beta.pdf(np.linspace(0, 1, 200), a=50, b=50),
35 |         )
36 | plt.fill_between(np.linspace(0, 1, 200),
37 |                  stats.beta.pdf(np.linspace(0, 1, 200), a=50, b=50),
38 |                  alpha=.15,
39 |                 )
40 | 
41 | # A / B = 3
42 | plt.plot(np.linspace(0, 1, 200), 
43 |          stats.beta.pdf(np.linspace(0, 1, 200), a=75, b=25),
44 |         )
45 | plt.fill_between(np.linspace(0, 1, 200),
46 |                  stats.beta.pdf(np.linspace(0, 1, 200), a=75, b=25),
47 |                  alpha=.15,
48 |                 )
49 | 
50 | # LEGEND
51 | plt.text(x=0.15, y=5, s=r"$ \alpha = 25, \beta = 75$", rotation=80, alpha=.75, weight="bold", color="#008fd5")
52 | plt.text(x=0.39, y=5, s=r"$ \alpha = 50, \beta = 50$", rotation=80, alpha=.75, weight="bold", color="#fc4f30")
53 | plt.text(x=0.65, y=5, s=r"$ \alpha = 100, \beta = 100$", rotation=80, alpha=.75, weight="bold", color="#e5ae38")
54 | 
55 | 
56 | # TICKS
57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
59 | 
60 | # TITLE, SUBTITLE & FOOTER
61 | plt.text(x = -.1, y = 11.75, s = r"Beta Distribution - constant $\alpha + \beta$, varying $\frac{\alpha}{\beta}$",
62 |                fontsize = 26, weight = 'bold', alpha = .75)
63 | plt.text(x = -.1, y = 10, 
64 |          s = 'Depicted below are three beta distributed random variables with '+ r'equal $\alpha+\beta$ and varying $\frac{\alpha}{\beta} $'+'.\nAs one can see the fraction of ' + r'$\frac{\alpha}{\beta} $ (mainly) shifts the distribution ' + r'($\alpha$ towards 1, $\beta$ towards 0).',
65 |          fontsize = 19, alpha = .85)
66 | plt.text(x = -.1,y = -2,
67 |          s = '   ©Joshua Görner                                                                                                                                             github.com/jgoerner   ',
68 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/beta/04_MCMC_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import pymc3 as pm
 3 | import numpy as np
 4 | from scipy import stats
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### GENERATE DATA #####
25 | A_TRUE = 75
26 | B_TRUE = 100
27 | np.random.seed(42)
28 | sample = stats.beta.rvs(a=A_TRUE, b=B_TRUE, size=200)
29 | 
30 | ##### SIMULATION #####
31 | # MODEL BUILDING
32 | with pm.Model() as model:
33 |     a = pm.Uniform("a", upper=200)
34 |     b = pm.Uniform("b", upper=200)
35 |     beta = pm.Beta("beta", alpha=a, beta=b, observed=sample)
36 |     
37 | # MODEL RUN
38 | with model:
39 |     step = pm.Metropolis()
40 |     trace = pm.sample(100000, step=step)
41 |     burned_trace = trace[20000:]
42 | 
43 | # A - 95% CONF INTERVAL
44 | a_s = burned_trace["a"]
45 | a_est_95 = np.mean(a_s) - 2*np.std(a_s), np.mean(a_s) + 2*np.std(a_s)
46 | print("95% of sampled mus are between {:0.3f} and {:0.3f}".format(*a_est_95))
47 | 
48 | # A - 95% CONF INTERVAL
49 | b_s = burned_trace["b"]
50 | b_est_95 = np.mean(b_s) - 2*np.std(b_s), np.mean(b_s) + 2*np.std(b_s)
51 | print("95% of sampled mus are between {:0.3f} and {:0.3f}".format(*b_est_95))
52 | 
53 | #### PLOTTING #####
54 | # SAMPLE DISTRIBUTION
55 | plt.hist(sample, bins=50,normed=True, alpha=.25)
56 | 
57 | # TRUE CURVE
58 | plt.plot(np.linspace(0.3, 0.6, 100), stats.beta.pdf(np.linspace(0.3, 0.6, 100),a=A_TRUE, b=B_TRUE))
59 | 
60 | # ESTIMATED CURVE MCMC
61 | plt.plot(np.linspace(0.3, 0.6, 100), stats.beta.pdf(np.linspace(0.3, 0.6, 100),a=a_s.mean(), b=b_s.mean()))
62 | 
63 | # LEGEND
64 | plt.text(x=0.4125, y=2.5, s="sample", alpha=.75, weight="bold", color="#008fd5")
65 | plt.text(x=0.475, y=8, s="true distrubtion", rotation=-55, alpha=.75, weight="bold", color="#fc4f30")
66 | plt.text(x=0.34, y=9, s="estimated distribution", rotation=55, alpha=.75, weight="bold", color="#e5ae38")
67 | 
68 | # TICKS
69 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
70 | plt.axhline(y = 0.1, color = 'black', linewidth = 1.3, alpha = .7)
71 | 
72 | # TITLE, SUBTITLE & FOOTER
73 | plt.text(x = 0.275, y = 17, s = "Beta Distribution - Parameter Estimation (MCMC)",
74 |                fontsize = 26, weight = 'bold', alpha = .75)
75 | plt.text(x = 0.275, y = 15, 
76 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a beta distribution with '+ r'$\alpha = 75$'+'\nand ' + r'$\beta = 100$ (red). Also the estimated distrubution with $\alpha \sim {:.3f} $ and $\beta \sim {:.3f} $ is shown.'.format(a_s.mean(), b_s.mean()),
77 |          fontsize = 19, alpha = .85)
78 | plt.text(x = 0.275,y = -1.5,
79 |          s = '   ©Joshua Görner                                                                                                                                           github.com/jgoerner   ',
80 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/binomial/01_general.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF
24 | plt.bar(left=np.arange(20), 
25 |         height=(stats.binom.pmf(np.arange(20), p=.5, n=20)/np.max(stats.binom.pmf(np.arange(20), p=.5, n=20))), 
26 |         width=.75,
27 |         alpha=0.75
28 |        )
29 | 
30 | # CDF
31 | plt.plot(np.arange(20),
32 |          stats.binom.cdf(np.arange(20), p=.5, n=20),
33 |          color="#fc4f30",
34 |         )
35 | 
36 | # LEGEND
37 | plt.text(x=4.5, y=.7, s="pmf (normed)", alpha=.75, weight="bold", color="#008fd5")
38 | plt.text(x=14.5, y=.9, s="cdf", alpha=.75, weight="bold", color="#fc4f30")
39 | 
40 | # TICKS
41 | plt.xticks(range(21)[::2])
42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
43 | plt.axhline(y = 0.005, color = 'black', linewidth = 1.3, alpha = .7)
44 | 
45 | # TITLE, SUBTITLE & FOOTER
46 | plt.text(x = -2.5, y = 1.25, s = "Binomial Distribution - Overview",
47 |                fontsize = 26, weight = 'bold', alpha = .75)
48 | plt.text(x = -2.5, y = 1.1, 
49 |          s = 'Depicted below are the normed probability mass function (pmf) and the cumulative density\nfunction (cdf) of a Binomial distributed random variable $ y \sim Binom(N, p) $, given $ N = 20$ and $p =0.5 $.',
50 |          fontsize = 19, alpha = .85)
51 | plt.text(x = -2.5,y = -0.125,
52 |          s = '   ©Joshua Görner                                                                                                                                              github.com/jgoerner   ',
53 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/binomial/02_p.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF P = .2
24 | plt.scatter(np.arange(21),
25 |             (stats.binom.pmf(np.arange(21), p=.2, n=20)),
26 |             alpha=0.75,
27 |             s=100
28 |        )
29 | plt.plot(np.arange(21),
30 |          (stats.binom.pmf(np.arange(21), p=.2, n=20)),
31 |          alpha=0.75,
32 |         )
33 | 
34 | # PDF P = .5
35 | plt.scatter(np.arange(21),
36 |             (stats.binom.pmf(np.arange(21), p=.5, n=20)),
37 |             alpha=0.75,
38 |             s=100
39 |        )
40 | plt.plot(np.arange(21),
41 |          (stats.binom.pmf(np.arange(21), p=.5, n=20)),
42 |          alpha=0.75,
43 |         )
44 | 
45 | # PDF P = .9
46 | plt.scatter(np.arange(21),
47 |             (stats.binom.pmf(np.arange(21), p=.9, n=20)),
48 |             alpha=0.75,
49 |             s=100
50 |        )
51 | plt.plot(np.arange(21),
52 |          (stats.binom.pmf(np.arange(21), p=.9, n=20)),
53 |          alpha=0.75,
54 |         )
55 | 
56 | # LEGEND
57 | plt.text(x=3.5, y=.075, s="$p = 0.2$", alpha=.75, weight="bold", color="#008fd5")
58 | plt.text(x=9.5, y=.075, s="$p = 0.5$", alpha=.75, weight="bold", color="#fc4f30")
59 | plt.text(x=17.5, y=.075, s="$p = 0.9$", alpha=.75, weight="bold", color="#e5ae38")
60 | 
61 | # TICKS
62 | plt.xticks(range(21)[::2])
63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
65 | 
66 | # TITLE, SUBTITLE & FOOTER
67 | plt.text(x = -2.5, y = .37, s = "Binomial Distribution - $p$",
68 |                fontsize = 26, weight = 'bold', alpha = .75)
69 | plt.text(x = -2.5, y = .32, 
70 |          s = 'Depicted below are three Binomial distributed random variables with varying $p $. As one can see\nthe parameter $p$ shifts and skews the distribution.',
71 |          fontsize = 19, alpha = .85)
72 | plt.text(x = -2.5,y = -0.065,
73 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
74 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/binomial/03_n.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF N = 10
24 | plt.scatter(np.arange(11),
25 |             (stats.binom.pmf(np.arange(11), p=.5, n=10)),
26 |             alpha=0.75,
27 |             s=100
28 |        )
29 | plt.plot(np.arange(11),
30 |          (stats.binom.pmf(np.arange(11), p=.5, n=10)),
31 |          alpha=0.75,
32 |         )
33 | 
34 | # PDF N = 15
35 | plt.scatter(np.arange(16),
36 |             (stats.binom.pmf(np.arange(16), p=.5, n=15)),
37 |             alpha=0.75,
38 |             s=100
39 |        )
40 | plt.plot(np.arange(16),
41 |          (stats.binom.pmf(np.arange(16), p=.5, n=15)),
42 |          alpha=0.75,
43 |         )
44 | 
45 | # PDF N = 20
46 | plt.scatter(np.arange(21),
47 |             (stats.binom.pmf(np.arange(21), p=.5, n=20)),
48 |             alpha=0.75,
49 |             s=100
50 |        )
51 | plt.plot(np.arange(21),
52 |          (stats.binom.pmf(np.arange(21), p=.5, n=20)),
53 |          alpha=0.75,
54 |         )
55 | 
56 | # LEGEND
57 | plt.text(x=6, y=.225, s="$N = 10$", alpha=.75, weight="bold", color="#008fd5")
58 | plt.text(x=8.5, y=.2, s="$N = 15$", alpha=.75, weight="bold", color="#fc4f30")
59 | plt.text(x=11, y=.175, s="$N = 20$", alpha=.75, weight="bold", color="#e5ae38")
60 | 
61 | # TICKS
62 | plt.xticks(range(21)[::2])
63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
65 | 
66 | # TITLE, SUBTITLE & FOOTER
67 | plt.text(x = -2.5, y = .31, s = "Binomial Distribution - $N$",
68 |                fontsize = 26, weight = 'bold', alpha = .75)
69 | plt.text(x = -2.5, y = .27, 
70 |          s = 'Depicted below are three Binomial distributed random variables with varying $N$. As one can see\nthe parameter $N$ streches the distribution (the larger $N$ the flatter the distribution).',
71 |          fontsize = 19, alpha = .85)
72 | plt.text(x = -2.5,y = -0.055,
73 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
74 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/binomial/04_mcmc_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import pymc3 as pm
 3 | import numpy as np
 4 | from scipy.stats import binom
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### DATA GENERATION #####
25 | # DRAW A SAMPLE
26 | np.random.seed(42)
27 | sample = stats.binom.rvs(p=0.3, n=200, size=1000)
28 | 
29 | ##### SIMULATION #####
30 | # MODEL BUILDING
31 | with pm.Model() as model:
32 |     p = pm.Beta("p", 1, 1)
33 |     n = pm.DiscreteUniform("n", lower=sample.max(), upper=10*sample.max())
34 |     binomial = pm.Binomial("binomial", p=p, n=n, observed=sample)
35 |     
36 | # MODEL RUN
37 | with model:
38 |     step = pm.Metropolis()
39 |     trace = pm.sample(100000, step=step)
40 |     burned_trace = trace[50000:]
41 | 
42 | # P - 95% CONF INTERVAL
43 | ps = burned_trace["p"]
44 | ps_est_95 = ps.mean() - 2*ps.std(), ps.mean() + 2*ps.std()
45 | print("95% of sampled ps are between {:0.3f} and {:0.3f}".format(*ps_est_95))
46 | 
47 | # N - 95% CONF INTERVAL
48 | ns = burned_trace["n"]
49 | ns_est_95 = ns.mean() - 2*ns.std(), ns.mean() + 2*ns.std()
50 | print("95% of sampled Ns are between {:0.3f} and {:0.3f}".format(*ns_est_95))
51 | 
52 | ##### PLOTTING #####
53 | # SAMPLE
54 | plt.hist(sample, 
55 |          bins=30, 
56 |          normed=True,
57 |          alpha=.25,
58 |         )
59 | 
60 | # TRUE CURVE
61 | plt.plot(np.arange(40, 90), 
62 |          stats.binom.pmf(np.arange(40, 90), 
63 |                          p=0.3, 
64 |                          n=200,
65 |                         ),
66 |         )
67 | 
68 | # ESTIMATED CURVE
69 | plt.plot(np.arange(40, 90), 
70 |          stats.binom.pmf(np.arange(40, 90), 
71 |                          p=burned_trace["p"].mean(), 
72 |                          n=burned_trace["n"].mean(),
73 |                         ),
74 |         )
75 | 
76 | # LEGEND
77 | plt.text(x=58, y=.03, s="sample", alpha=.75, weight="bold", color="#008fd5")
78 | plt.text(x=48, y=.055, s="true distrubtion", rotation=50, alpha=.75, weight="bold", color="#fc4f30")
79 | plt.text(x=68, y=.055, s="estimated distribution", rotation=-50, alpha=.75, weight="bold", color="#e5ae38")
80 | 
81 | # TICKS
82 | plt.xticks(range(40, 91)[::4])
83 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
84 | plt.axhline(y = 0.0009, color = 'black', linewidth = 1.3, alpha = .7)
85 | 
86 | # TITLE, SUBTITLE & FOOTER
87 | plt.text(x = 34, y = 0.135, s = "Binomial Distribution - Parameter Estimation (MCMC)",
88 |                fontsize = 26, weight = 'bold', alpha = .75)
89 | plt.text(x = 34, y = 0.12, 
90 |          s = 'Depicted below is the distribution of a sample drawn from a Binomial distribution with $N = 100$\nand $p = 0.3$. Additionally the estimated distrubution with $N \sim {:.3f}$ and $p \sim {:.2f}$ is shown.'.format(np.mean(ns), np.mean(ps)),
91 |          fontsize = 19, alpha = .85)
92 | plt.text(x = 34, y = -0.02,
93 |          s = '   ©Joshua Görner                                                                                                                                                   github.com/jgoerner   ',
94 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/cauchy/01_general.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF
24 | plt.plot(np.linspace(-6, 6, 100),
25 |          stats.cauchy.pdf(np.linspace(-6, 6, 100))/np.max(stats.cauchy.pdf(np.linspace(-6, 6, 100))),
26 |         )
27 | plt.fill_between(np.linspace(-6, 6, 100),
28 |                  stats.cauchy.pdf(np.linspace(-6, 6, 100))/np.max(stats.cauchy.pdf(np.linspace(-6, 6, 100))),
29 |                  alpha=.15,
30 |                 )
31 | # CDF
32 | plt.plot(np.linspace(-6, 6, 100),
33 |          stats.cauchy.cdf(np.linspace(-6, 6, 100)),
34 |         )
35 | 
36 | # LEGEND
37 | plt.text(x=2, y=.25, s="pdf", rotation=-50, alpha=.75, weight="bold", color="#008fd5")
38 | plt.text(x=-.4, y=.5, s="cdf", rotation=55, alpha=.75, weight="bold", color="#fc4f30")
39 | 
40 | # TICKS
41 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
42 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
43 | 
44 | # TITLE, SUBTITLE & FOOTER
45 | plt.text(x = -7.25, y = 1.25, s = "Cauchy - Overview",
46 |                fontsize = 26, weight = 'bold', alpha = .75)
47 | plt.text(x = -7.25, y = 1.1,
48 |          s = ("Depicted below are the normed probability density function (pdf) and the cumulative density \nfunction (cdf) of a cauchy distributed random variable $ x \sim Cauchy(\lambda , x_0)$"
49 |               " given $\lambda = 1,  x_0 = 0$"),
50 |          fontsize = 19, alpha = .85)
51 | plt.text(x = -7.25,y = -0.2,
52 |          s = '   © Hagen Mohr                                                                                                                                               github.com/jgoerner   ',
53 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/cauchy/02_x_0.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF MU = 0
24 | plt.plot(np.linspace(-6, 6, 100),
25 |          stats.cauchy.pdf(np.linspace(-6, 6, 100)),
26 |         )
27 | plt.fill_between(np.linspace(-6, 6, 100),
28 |                  stats.cauchy.pdf(np.linspace(-6, 6, 100)),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # PDF MU = 2
33 | plt.plot(np.linspace(-6, 6, 100),
34 |          stats.cauchy.pdf(np.linspace(-6, 6, 100), loc=2),
35 |         )
36 | plt.fill_between(np.linspace(-6, 6, 100),
37 |                  stats.cauchy.pdf(np.linspace(-6, 6, 100),loc=2),
38 |                  alpha=.15,
39 |                 )
40 | 
41 | # PDF MU = -2
42 | plt.plot(np.linspace(-6, 6, 100),
43 |          stats.cauchy.pdf(np.linspace(-6, 6, 100), loc=-2),
44 |         )
45 | plt.fill_between(np.linspace(-6, 6, 100),
46 |                  stats.cauchy.pdf(np.linspace(-6, 6, 100),loc=-2),
47 |                  alpha=.15,
48 |                 )
49 | 
50 | # LEGEND
51 | plt.text(x=-1, y=.25, s="$ x_0 = 0$", rotation=70, alpha=.75, weight="bold", color="#008fd5")
52 | plt.text(x=1, y=.25, s="$ x_0 = 2$", rotation=70, alpha=.75, weight="bold", color="#fc4f30")
53 | plt.text(x=-3.125, y=.25, s="$ x_0 = -2$", rotation=70, alpha=.75, weight="bold", color="#e5ae38")
54 | 
55 | 
56 | # TICKS
57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
59 | 
60 | # TITLE, SUBTITLE & FOOTER
61 | plt.text(x = -7.25, y = 0.395, s = "Cauchy Distribution - $ x_0 $",
62 |                fontsize = 26, weight = 'bold', alpha = .75)
63 | plt.text(x = -7.25, y = 0.35,
64 |          s = 'Depicted below are three Cauchy distributed random variables with varying $ x_0 $. As one can \neasily see the parameter $x_0$ shifts the distribution along the x-axis.',
65 |          fontsize = 19, alpha = .85)
66 | plt.text(x = -7.25,y = -0.05,
67 |          s = '   © Hagen Mohr                                                                                                                                               github.com/jgoerner   ',
68 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/cauchy/03_lambda.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF lambda = 1
24 | plt.plot(np.linspace(-6, 6, 100),
25 |          stats.cauchy.pdf(np.linspace(-6, 6, 100)),
26 |         )
27 | plt.fill_between(np.linspace(-6, 6, 100),
28 |                  stats.cauchy.pdf(np.linspace(-6, 6, 100)),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # PDF lambda = 2
33 | plt.plot(np.linspace(-6, 6, 100),
34 |          stats.cauchy.pdf(np.linspace(-6, 6, 100), scale=2),
35 |         )
36 | plt.fill_between(np.linspace(-6, 6, 100),
37 |                  stats.cauchy.pdf(np.linspace(-6, 6, 100),scale=2),
38 |                  alpha=.15,
39 |                 )
40 | 
41 | # PDF lambda = 0.5
42 | plt.plot(np.linspace(-6, 6, 100),
43 |          stats.cauchy.pdf(np.linspace(-6, 6, 100), scale=0.5),
44 |         )
45 | plt.fill_between(np.linspace(-6, 6, 100),
46 |                  stats.cauchy.pdf(np.linspace(-6, 6, 100),scale=0.5),
47 |                  alpha=.15,
48 |                 )
49 | 
50 | # LEGEND
51 | plt.text(x=-1.25, y=.3, s="$ \lambda = 1$", rotation=51, alpha=.75, weight="bold", color="#008fd5")
52 | plt.text(x=-2.5, y=.13, s="$ \lambda = 2$", rotation=11, alpha=.75, weight="bold", color="#fc4f30")
53 | plt.text(x=-0.75, y=.55, s="$ \lambda = 0.5$", rotation=75, alpha=.75, weight="bold", color="#e5ae38")
54 | 
55 | 
56 | # TICKS
57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
59 | 
60 | # TITLE, SUBTITLE & FOOTER
61 | plt.text(x = -7.25, y = 0.77, s = "Cauchy Distribution - $ \lambda $",
62 |                fontsize = 26, weight = 'bold', alpha = .75)
63 | plt.text(x = -7.25, y = 0.68,
64 |          s = ("Depicted below are three Cauchy distributed random variables with varying $\lambda$. " +
65 |              "It becomes \napparent, that $\lambda$ streches or tightens the distribution" +
66 |              " (the smaller $\lambda$ the higher the peak)"),
67 |          fontsize = 19, alpha = .85)
68 | plt.text(x = -7.25,y = -0.1,
69 |          s = '   © Hagen Mohr                                                                                                                                               github.com/jgoerner   ',
70 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/cauchy/04_MCMC_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import pymc3 as pm
 3 | import numpy as np
 4 | from scipy import stats
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### GENERATE DATA #####
25 | x_0_true = 10
26 | lambd_true = 1.5
27 | np.random.seed(42)
28 | sample = stats.cauchy.rvs(loc=x_0_true, scale=lambd_true, size=200)
29 | 
30 | ##### SIMULATION #####
31 | # MODEL BUILDING
32 | with pm.Model() as model:
33 |     x_0 = pm.Uniform("x_0", upper=50) # technically x_0 could take on negative values - not tested here
34 |     lambd = pm.Uniform("lambda", upper=20) # lambda is always > 0
35 |     cauchy = pm.Cauchy("cauchy", alpha=x_0, beta=lambd, observed=sample)
36 | 
37 | # MODEL RUN
38 | with model:
39 |     trace = pm.sample(draws=100000)
40 |     burned_trace = trace[20000:]
41 | 
42 | # x_0 - 95% CONF INTERVAL
43 | x_0s = burned_trace["x_0"]
44 | x_0_est_95 = np.mean(x_0s) - 2*np.std(x_0s), np.mean(x_0s) + 2*np.std(x_0s)
45 | print("95% of sampled x_0s are between {:0.3f} and {:0.3f}".format(*x_0_est_95))
46 | 
47 | # Lambda - 95% CONF INTERVAL
48 | lambds = burned_trace["lambda"]
49 | lambd_est_95 = np.mean(lambds) - 2*np.std(lambds), np.mean(lambds) + 2*np.std(lambds)
50 | print("95% of sampled lambdas are between {:0.3f} and {:0.3f}".format(*lambd_est_95))
51 | 
52 | #### PLOTTING #####
53 | # SAMPLE DISTRIBUTION
54 | plt.hist(sample, bins=50,normed=True, alpha=.25, range=[-10, 30])
55 | 
56 | # TRUE CURVE
57 | plt.plot(np.linspace(-10, 30, 50), stats.cauchy.pdf(np.linspace(-10, 30, 50),loc=x_0_true, scale=lambd_true))
58 | 
59 | # ESTIMATED CURVE MCMC
60 | plt.plot(np.linspace(-10, 30, 50), stats.cauchy.pdf(np.linspace(-10, 30, 50),loc=np.mean(x_0s), scale=np.mean(lambds)))
61 | 
62 | # LEGEND
63 | plt.text(x=8.5, y=.05, s="sample", alpha=.75, weight="bold", color="#008fd5")
64 | plt.text(x=13, y=.1, s="true distrubtion", rotation=0, alpha=.75, weight="bold", color="#fc4f30")
65 | plt.text(x=-1.5, y=.1, s="estimated distribution", rotation=0, alpha=.75, weight="bold", color="#e5ae38")
66 | 
67 | # TICKS
68 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
69 | plt.axhline(y = 0.001, color = 'black', linewidth = 1.3, alpha = .7)
70 | 
71 | # TITLE, SUBTITLE & FOOTER
72 | plt.text(x = -15, y = 0.255, s = "Cauchy - Parameter Estimation (MCMC)",
73 |                fontsize = 26, weight = 'bold', alpha = .75)
74 | plt.text(x = -15, y = 0.225,
75 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a cauchy distribution with ' + r'$x_0 = 10$' + '\nand ' + r'$\lambda = 1.5$ (red). ' + r'Also the estimated distrubution with $x_0 \sim {:.3f} $ and $\lambda \sim {:.3f} $ is shown (yellow).'.format(np.mean(x_0s), np.mean(lambds)),
76 |          fontsize = 19, alpha = .85)
77 | plt.text(x = -15,y = -0.025,
78 |          s = '   © Hagen Mohr                                                                                                                                                     github.com/jgoerner   ',
79 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/chi2/01_general.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF
24 | plt.plot(np.linspace(0, 20, 100), 
25 |          stats.chi2.pdf(np.linspace(0, 20, 100), df=4) / np.max(stats.chi2.pdf(np.linspace(0, 20, 100), df=4)),
26 |         )
27 | plt.fill_between(np.linspace(0, 20, 100),
28 |                  stats.chi2.pdf(np.linspace(0, 20, 100), df=4) / np.max(stats.chi2.pdf(np.linspace(0, 20, 100), df=4)),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # CDF
33 | plt.plot(np.linspace(0, 20, 100), 
34 |          stats.chi2.cdf(np.linspace(0, 20, 100), df=4),
35 |         )
36 | 
37 | # LEGEND
38 | plt.xticks(np.arange(0, 21, 2))
39 | plt.text(x=11, y=.25, s="pdf (normed)", alpha=.75, weight="bold", color="#008fd5")
40 | plt.text(x=11, y=.85, s="cdf", alpha=.75, weight="bold", color="#fc4f30")
41 | 
42 | # TICKS
43 | plt.xticks(np.arange(0, 21, 2))
44 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
45 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
46 | 
47 | # TITLE, SUBTITLE & FOOTER
48 | plt.text(x = -2, y = 1.25, s = r"Chi-Squared $(\chi^{2})$ Distribution - Overview",
49 |                fontsize = 26, weight = 'bold', alpha = .75)
50 | plt.text(x = -2, y = 1.1, 
51 |          s = 'Depicted below are the normed probability density function (pdf) and the cumulative density\nfunction (cdf) of a Chi-Squared distributed random variable $ y \sim \chi^{2}(k) $, given $k$=4.',
52 |          fontsize = 19, alpha = .85)
53 | plt.text(x = -2,y = -0.2,
54 |          s = '   ©Joshua Görner                                                                                                                                             github.com/jgoerner   ',
55 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/chi2/02_k.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF k = 1
24 | plt.plot(np.linspace(0, 15, 500), 
25 |          stats.chi2.pdf(np.linspace(0, 15, 500), df=1),
26 |         )
27 | plt.fill_between(np.linspace(0, 15, 500),
28 |                  stats.chi2.pdf(np.linspace(0, 15, 500), df=1),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # PDF k = 3
33 | plt.plot(np.linspace(0, 15, 100), 
34 |          stats.chi2.pdf(np.linspace(0, 15, 100), df=3),
35 |         )
36 | plt.fill_between(np.linspace(0, 15, 100),
37 |                  stats.chi2.pdf(np.linspace(0, 15, 100), df=3),
38 |                  alpha=.15,
39 |                 )
40 | 
41 | # PDF k = 6
42 | plt.plot(np.linspace(0, 15, 100), 
43 |          stats.chi2.pdf(np.linspace(0, 15, 100), df=6),
44 |         )
45 | plt.fill_between(np.linspace(0, 15, 100),
46 |                  stats.chi2.pdf(np.linspace(0, 15, 100), df=6),
47 |                  alpha=.15,
48 |                 )
49 | 
50 | # LEGEND
51 | plt.text(x=.5, y=.7, s="$ k = 1$", rotation=-65, alpha=.75, weight="bold", color="#008fd5")
52 | plt.text(x=1.5, y=.35, s="$ k = 3$", alpha=.75, weight="bold", color="#fc4f30")
53 | plt.text(x=5, y=.2, s="$ k = 6$", alpha=.75, weight="bold", color="#e5ae38")
54 | 
55 | 
56 | # TICKS
57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
59 | 
60 | # TITLE, SUBTITLE & FOOTER
61 | plt.text(x = -1.5, y = 2.8, s = "Chi-Squared Distribution - $ k $",
62 |                fontsize = 26, weight = 'bold', alpha = .75)
63 | plt.text(x = -1.5, y = 2.5, 
64 |          s = 'Depicted below are three Chi-Squared distributed random variables with varying $ k $. As one can\nsee the parameter $k$ smoothens the distribution and softens the skewness.',
65 |          fontsize = 19, alpha = .85)
66 | plt.text(x = -1.5,y = -0.4,
67 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
68 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/chi2/03_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | ##### COMPUTATION #####
24 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
25 | k_real = 2
26 | 
27 | # DRAW A SAMPLE OF N=1000
28 | np.random.seed(42)
29 | sample = stats.chi2.rvs(df=k_real, size=1000)
30 | 
31 | # ESTIMATE K
32 | k_est = np.mean(sample)
33 | print("Estimated k: {}".format(k_est))
34 | 
35 | ##### PLOTTING #####
36 | # SAMPLE DISTRIBUTION
37 | plt.hist(sample, bins=50,normed=True, alpha=.25)
38 | 
39 | # TRUE CURVE
40 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=k_real))
41 | 
42 | # ESTIMATED CURVE
43 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=k_est))
44 | 
45 | # LEGEND
46 | plt.text(x=.75, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5")
47 | plt.text(x=3, y=.15, s="true distrubtion", alpha=.75, weight="bold", color="#fc4f30")
48 | plt.text(x=1, y=.4, s="estimated distribution", alpha=.75, weight="bold", color="#e5ae38")
49 | 
50 | # TICKS
51 | plt.xticks(range(0, 19)[::2])
52 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
53 | plt.axhline(y = 0.003, color = 'black', linewidth = 1.3, alpha = .7)
54 | 
55 | # TITLE, SUBTITLE & FOOTER
56 | plt.text(x = -2, y = 0.675, s = "Chi-Squared Distribution - Parameter Estimation",
57 |                fontsize = 26, weight = 'bold', alpha = .75)
58 | plt.text(x = -2, y = 0.6, 
59 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a Chi-Squared distribution with \n$k=2$ (red). Also the estimated distrubution with $k \sim {:.3f} $ is shown (yellow).'.format(np.mean(sample)),
60 |          fontsize = 19, alpha = .85)
61 | plt.text(x = -2,y = -0.075,
62 |          s = '   ©Joshua Görner                                                                                                                                               github.com/jgoerner   ',
63 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/chi2/04_MCMC_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import pymc3 as pm
 4 | import scipy.stats as stats
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### COMPUTATION #####
25 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
26 | k_real = 2
27 | 
28 | # DRAW A SAMPLE OF N=1000
29 | np.random.seed(42)
30 | sample = stats.chi2.rvs(df=k_real, size=1000)
31 | 
32 | ##### SIMULATION #####
33 | # MODEL BUILDING
34 | with pm.Model() as model:
35 |     k = pm.DiscreteUniform("k", lower=0, upper=np.mean(sample)*7) # mean + 3stds
36 |     chi_2 = pm.ChiSquared("chi2", nu=k, observed=sample)
37 |     
38 | 
39 | # MODEL RUN
40 | with model:
41 |     trace = pm.sample(50000)
42 |     burned_trace = trace[45000:]
43 | 
44 | # MU - 95% CONF INTERVAL
45 | ks = burned_trace["k"]
46 | k_est_95 = np.mean(ks) - 2*np.std(ks), np.mean(ks) + 2*np.std(ks)
47 | print("95% of sampled mus are between {} and {}".format(*k_est_95))
48 | 
49 | ##### PLOTTING #####
50 | # SAMPLE DISTRIBUTION
51 | plt.hist(sample, bins=50,normed=True, alpha=.25)
52 | 
53 | # TRUE CURVE
54 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=k_real), linestyle="--")
55 | 
56 | # ESTIMATED CURVE
57 | plt.plot(np.linspace(0, 18, 1000), stats.chi2.pdf(np.linspace(0, 18, 1000),df=np.mean(ks)), linestyle=":")
58 | 
59 | # LEGEND
60 | plt.text(x=.75, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5")
61 | plt.text(x=3, y=.15, s="true distrubtion", alpha=.75, weight="bold", color="#fc4f30")
62 | plt.text(x=1, y=.4, s="estimated distribution", alpha=.75, weight="bold", color="#e5ae38")
63 | 
64 | # TICKS
65 | plt.xticks(range(0, 19)[::2])
66 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
67 | plt.axhline(y = 0.003, color = 'black', linewidth = 1.3, alpha = .7)
68 | 
69 | # TITLE, SUBTITLE & FOOTER
70 | plt.text(x = -2, y = 0.675, s = "Chi-Squared Distribution - Parameter Estimation (MCMC)",
71 |                fontsize = 26, weight = 'bold', alpha = .75)
72 | plt.text(x = -2, y = 0.6, 
73 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a Chi-Squared distribution with \n$k=2$ (red). Also the estimated distrubution with $k \sim {} $ is shown (yellow).'.format(np.mean(ks)),
74 |          fontsize = 19, alpha = .85)
75 | plt.text(x = -2,y = -0.075,
76 |          s = '   ©Joshua Görner                                                                                                                                               github.com/jgoerner   ',
77 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/geometric/01_general.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF
24 | plt.bar(left=np.arange(10), 
25 |         height=(stats.geom.pmf(np.arange(10), p=.5)/np.max(stats.geom.pmf(np.arange(10), p=.5))), 
26 |         width=.75,
27 |         alpha=0.75
28 |        )
29 | 
30 | # CDF
31 | plt.plot(np.arange(10),
32 |          stats.geom.cdf(np.arange(10), p=.5),
33 |          color="#fc4f30",
34 |         )
35 | 
36 | # LEGEND
37 | plt.text(x=3.5, y=.3, s="pmf (normed)", alpha=.75, weight="bold", color="#008fd5")
38 | plt.text(x=2.5, y=.7, s="cdf", alpha=.75, weight="bold", color="#fc4f30")
39 | 
40 | # TICKS
41 | plt.xticks(range(11))
42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
43 | plt.axhline(y = 0.005, color = 'black', linewidth = 1.3, alpha = .7)
44 | 
45 | # TITLE, SUBTITLE & FOOTER
46 | plt.text(x = -1.5, y = 1.25, s = "Geometric Distribution - Overview",
47 |                fontsize = 26, weight = 'bold', alpha = .75)
48 | plt.text(x = -1.5, y = 1.1, 
49 |          s = 'Depicted below are the normed probability mass function (pmf) and the cumulative density\nfunction (cdf) of a Geometric distributed random variable $ y \sim Geom(p) $, given parameter $p =0.5 $.',
50 |          fontsize = 19, alpha = .85)
51 | plt.text(x = -1.5,y = -0.125,
52 |          s = '   ©Joshua Görner                                                                                                                                                   github.com/jgoerner   ',
53 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/geometric/02_p.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF P = .2
24 | plt.scatter(np.arange(11),
25 |             (stats.geom.pmf(np.arange(11), p=.2)),
26 |             alpha=0.75,
27 |             s=100
28 |        )
29 | plt.plot(np.arange(11),
30 |          (stats.geom.pmf(np.arange(11), p=.2)),
31 |          alpha=0.75,
32 |         )
33 | 
34 | # PDF P = .5
35 | plt.scatter(np.arange(11),
36 |             (stats.geom.pmf(np.arange(11), p=.5)),
37 |             alpha=0.75,
38 |             s=100
39 |        )
40 | plt.plot(np.arange(11),
41 |          (stats.geom.pmf(np.arange(11), p=.5)),
42 |          alpha=0.75,
43 |         )
44 | 
45 | # PDF P = .9
46 | plt.scatter(np.arange(11),
47 |             (stats.geom.pmf(np.arange(11), p=.9)),
48 |             alpha=0.75,
49 |             s=100
50 |        )
51 | plt.plot(np.arange(11),
52 |          (stats.geom.pmf(np.arange(11), p=.9)),
53 |          alpha=0.75,
54 |         )
55 | 
56 | # LEGEND
57 | plt.text(x=4.25, y=.15, s="$p = 0.2$", alpha=.75, weight="bold", color="#008fd5")
58 | plt.text(x=2.5, y=.25, s="$p = 0.5$", alpha=.75, weight="bold", color="#fc4f30")
59 | plt.text(x=1.5, y=.7, s="$p = 0.9$", alpha=.75, weight="bold", color="#e5ae38")
60 | 
61 | # TICKS
62 | plt.xticks(range(11))
63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
65 | 
66 | # TITLE, SUBTITLE & FOOTER
67 | plt.text(x = -1, y = 1.125, s = "Geometric Distribution - $p$",
68 |                fontsize = 26, weight = 'bold', alpha = .75)
69 | plt.text(x = -1, y = 1, 
70 |          s = 'Depicted below are three Geometric distributed random variables with varying $p $. As one can\nsee the parameter $p$ flattens the distribution (the larger p the sharper the distribution).',
71 |          fontsize = 19, alpha = .85)
72 | plt.text(x = -1,y = -0.175,
73 |          s = '   ©Joshua Görner                                                                                                                                             github.com/jgoerner   ',
74 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/geometric/03_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | from collections import Counter
 3 | import numpy as np
 4 | from scipy.stats import geom
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### COMPUTATION #####
25 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
26 | p_real = 0.3
27 | 
28 | # DRAW A SAMPLE OF N=100
29 | np.random.seed(42)
30 | sample = geom.rvs(p=p_real, size=100)
31 | 
32 | # ESTIMATE P
33 | p_est = 1.0/np.mean(sample)
34 | print("Estimated p: {}".format(p_est))
35 | 
36 | ##### PLOTTING #####
37 | # SAMPLE DISTRIBUTION
38 | cnt = Counter(sample)
39 | cnt[0] = 0 # added to fit pmf
40 | _, values = zip(*sorted(cnt.items()))
41 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25);
42 | 
43 | # TRUE CURVE
44 | plt.plot(range(18), geom.pmf(k=range(18), p=p_real), color="#fc4f30")
45 | 
46 | # ESTIMATED CURVE
47 | plt.plot(range(18), geom.pmf(k=range(18), p=p_est), color="#e5ae38")
48 | 
49 | # LEGEND
50 | plt.text(x=2, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5")
51 | plt.text(x=6.5, y=.075, s="true distrubtion", rotation=-15, alpha=.75, weight="bold", color="#fc4f30")
52 | plt.text(x=2, y=.275, s="estimated distribution", rotation=-60, alpha=.75, weight="bold", color="#e5ae38")
53 | 
54 | # TICKS
55 | plt.xticks(range(17)[::2])
56 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
57 | plt.axhline(y = 0.002, color = 'black', linewidth = 1.3, alpha = .7)
58 | 
59 | # TITLE, SUBTITLE & FOOTER
60 | plt.text(x = -2.5, y = 0.425, s = "Geometric Distribution - Parameter Estimation",
61 |                fontsize = 26, weight = 'bold', alpha = .75)
62 | plt.text(x = -2.5, y = 0.375, 
63 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a Geometric distribution with\n$p = 0.3$ (red). Also the estimated distrubution with $p \sim {:.3f}$ is shown (yellow).'.format(np.mean(sample)),
64 |          fontsize = 19, alpha = .85)
65 | plt.text(x = -2.5,y = -0.04,
66 |          s = '   ©Joshua Görner                                                                                                                                                github.com/jgoerner   ',
67 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/geometric/04_MCMC_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | from collections import Counter
 3 | import numpy as np
 4 | from scipy.stats import geom
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | import pymc3 as pm
 9 | 
10 | # PLOTTING CONFIG
11 | %matplotlib inline
12 | style.use('fivethirtyeight')
13 | plt.rcParams["figure.figsize"] = (14, 7)
14 | HTML("""
15 | <style>
16 | .output_png {
17 |     display: table-cell;
18 |     text-align: center;
19 |     vertical-align: center;
20 | }
21 | </style>
22 | """)
23 | plt.figure(dpi=100)
24 | 
25 | ##### COMPUTATION #####
26 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
27 | p_real = 0.3
28 | 
29 | # DRAW A SAMPLE OF N=1000
30 | np.random.seed(42)
31 | sample = geom.rvs(p=p_real, size=100)
32 | 
33 | ##### SIMULATION #####
34 | # MODEL BUILDING
35 | with pm.Model() as model:
36 |     p = pm.Uniform("p")
37 |     geometric = pm.Geometric("geometric", p=p, observed=sample)
38 |     
39 | # MODEL RUN
40 | with model:
41 |     step = pm.Metropolis()
42 |     trace = pm.sample(100000, step=step)
43 |     burned_trace = trace[50000:]
44 | 
45 | # P - 95% CONF INTERVAL
46 | ps = burned_trace["p"]
47 | ps_est_95 = ps.mean() - 2*ps.std(), ps.mean() + 2*ps.std()
48 | print("95% of sampled ps are between {:0.3f} and {:0.3f}".format(*ps_est_95))
49 | 
50 | ##### PLOTTING #####
51 | # SAMPLE DISTRIBUTION
52 | cnt = Counter(sample)
53 | cnt[0] = 0 # added to fit pmf
54 | _, values = zip(*sorted(cnt.items()))
55 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25);
56 | 
57 | # TRUE CURVE
58 | plt.plot(range(18), geom.pmf(k=range(18), p=p_real), color="#fc4f30")
59 | 
60 | # ESTIMATED CURVE
61 | plt.plot(range(18), geom.pmf(k=range(18), p=ps.mean()), color="#e5ae38")
62 | 
63 | # LEGEND
64 | plt.text(x=2, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5")
65 | plt.text(x=6.5, y=.075, s="true distrubtion", rotation=-15, alpha=.75, weight="bold", color="#fc4f30")
66 | plt.text(x=2, y=.275, s="estimated distribution", rotation=-60, alpha=.75, weight="bold", color="#e5ae38")
67 | 
68 | # TICKS
69 | plt.xticks(range(17)[::2])
70 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
71 | plt.axhline(y = 0.002, color = 'black', linewidth = 1.3, alpha = .7)
72 | 
73 | # TITLE, SUBTITLE & FOOTER
74 | plt.text(x = -2.5, y = 0.425, s = "Geometric Distribution - Parameter Estimation (MCMC)",
75 |                fontsize = 26, weight = 'bold', alpha = .75)
76 | plt.text(x = -2.5, y = 0.375, 
77 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a Geometric distribution with\n$p = 0.3$ (red). Also the estimated distrubution with $p \sim {:.3f}$ is shown (yellow).'.format(ps.mean()),
78 |          fontsize = 19, alpha = .85)
79 | plt.text(x = -2.5,y = -0.04,
80 |          s = '   ©Joshua Görner                                                                                                                                                github.com/jgoerner   ',
81 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/normal/01_general.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF
24 | plt.plot(np.linspace(-4, 4, 100), 
25 |          stats.norm.pdf(np.linspace(-4, 4, 100)) / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))),
26 |         )
27 | plt.fill_between(np.linspace(-4, 4, 100),
28 |                  stats.norm.pdf(np.linspace(-4, 4, 100)) / np.max(stats.norm.pdf(np.linspace(-3, 3, 100))),
29 |                  alpha=.15,
30 |                 )
31 | # CDF
32 | plt.plot(np.linspace(-4, 4, 100), 
33 |          stats.norm.cdf(np.linspace(-4, 4, 100)),
34 |         )
35 | 
36 | # LEGEND
37 | plt.text(x=-1.5, y=.7, s="pdf (normed)", rotation=65, alpha=.75, weight="bold", color="#008fd5")
38 | plt.text(x=-.4, y=.5, s="cdf", rotation=55, alpha=.75, weight="bold", color="#fc4f30")
39 | 
40 | # TICKS
41 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
42 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
43 | 
44 | # TITLE, SUBTITLE & FOOTER
45 | plt.text(x = -5, y = 1.25, s = "Normal Distribution - Overview",
46 |                fontsize = 26, weight = 'bold', alpha = .75)
47 | plt.text(x = -5, y = 1.1, 
48 |          s = 'Depicted below are the normed probability density function (pdf) and the cumulative density\nfunction (cdf) of a normally distributed random variable $ y \sim \mathcal{N}(\mu,\sigma) $, given $ \mu = 0 $ and $ \sigma = 1$.',
49 |          fontsize = 19, alpha = .85)
50 | plt.text(x = -5,y = -0.2,
51 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
52 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/normal/02_mu.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF MU = 0
24 | plt.plot(np.linspace(-4, 4, 100), 
25 |          stats.norm.pdf(np.linspace(-4, 4, 100)),
26 |         )
27 | plt.fill_between(np.linspace(-4, 4, 100),
28 |                  stats.norm.pdf(np.linspace(-4, 4, 100)),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # PDF MU = 2
33 | plt.plot(np.linspace(-4, 4, 100), 
34 |          stats.norm.pdf(np.linspace(-4, 4, 100), loc=2),
35 |         )
36 | plt.fill_between(np.linspace(-4, 4, 100),
37 |                  stats.norm.pdf(np.linspace(-4, 4, 100),loc=2),
38 |                  alpha=.15,
39 |                 )
40 | 
41 | # PDF MU = -2
42 | plt.plot(np.linspace(-4, 4, 100), 
43 |          stats.norm.pdf(np.linspace(-4, 4, 100), loc=-2),
44 |         )
45 | plt.fill_between(np.linspace(-4, 4, 100),
46 |                  stats.norm.pdf(np.linspace(-4, 4, 100),loc=-2),
47 |                  alpha=.15,
48 |                 )
49 | 
50 | # LEGEND
51 | plt.text(x=-1, y=.35, s="$ \mu = 0$", rotation=65, alpha=.75, weight="bold", color="#008fd5")
52 | plt.text(x=1, y=.35, s="$ \mu = 2$", rotation=65, alpha=.75, weight="bold", color="#fc4f30")
53 | plt.text(x=-3, y=.35, s="$ \mu = -2$", rotation=65, alpha=.75, weight="bold", color="#e5ae38")
54 | 
55 | 
56 | # TICKS
57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
59 | 
60 | # TITLE, SUBTITLE & FOOTER
61 | plt.text(x = -5, y = 0.51, s = "Normal Distribution - $ \mu $",
62 |                fontsize = 26, weight = 'bold', alpha = .75)
63 | plt.text(x = -5, y = 0.45, 
64 |          s = 'Depicted below are three normally distributed random variables with varying $ \mu $. As one can easily\nsee the parameter $\mu$ shifts the distribution along the x-axis.',
65 |          fontsize = 19, alpha = .85)
66 | plt.text(x = -5,y = -0.075,
67 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
68 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/normal/03_sigma.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF SIGMA = 1
24 | plt.plot(np.linspace(-4, 4, 100), 
25 |          stats.norm.pdf(np.linspace(-4, 4, 100), scale=1),
26 |         )
27 | plt.fill_between(np.linspace(-4, 4, 100),
28 |                  stats.norm.pdf(np.linspace(-4, 4, 100), scale=1),
29 |                  alpha=.15,
30 |                 )
31 | 
32 | # PDF SIGMA = 2
33 | plt.plot(np.linspace(-4, 4, 100), 
34 |          stats.norm.pdf(np.linspace(-4, 4, 100), scale=2),
35 |         )
36 | plt.fill_between(np.linspace(-4, 4, 100),
37 |                  stats.norm.pdf(np.linspace(-4, 4, 100), scale=2),
38 |                  alpha=.15,
39 |                 )
40 | 
41 | # PDF SIGMA = 0.5
42 | plt.plot(np.linspace(-4, 4, 100), 
43 |          stats.norm.pdf(np.linspace(-4, 4, 100), scale=0.5),
44 |         )
45 | plt.fill_between(np.linspace(-4, 4, 100),
46 |                  stats.norm.pdf(np.linspace(-4, 4, 100), scale=0.5),
47 |                  alpha=.15,
48 |                 )
49 | 
50 | # LEGEND
51 | plt.text(x=-1.25, y=.3, s="$ \sigma = 1$", rotation=51, alpha=.75, weight="bold", color="#008fd5")
52 | plt.text(x=-2.5, y=.13, s="$ \sigma = 2$", rotation=11, alpha=.75, weight="bold", color="#fc4f30")
53 | plt.text(x=-0.75, y=.55, s="$ \sigma = 0.5$", rotation=75, alpha=.75, weight="bold", color="#e5ae38")
54 | 
55 | 
56 | # TICKS
57 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
58 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
59 | 
60 | # TITLE, SUBTITLE & FOOTER
61 | plt.text(x = -5, y = 0.98, s = "Normal Distribution - $ \sigma $",
62 |                fontsize = 26, weight = 'bold', alpha = .75)
63 | plt.text(x = -5, y = 0.87, 
64 |          s = 'Depicted below are three normally distributed random variables with varying $\sigma $. As one can easily\nsee the parameter $\sigma$ "sharpens" the distribution (the smaller $ \sigma $ the sharper the function).',
65 |          fontsize = 19, alpha = .85)
66 | plt.text(x = -5,y = -0.15,
67 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
68 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/normal/04_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | ##### COMPUTATION #####
24 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
25 | mu_real = 10
26 | sigma_real = 2
27 | 
28 | # DRAW A SAMPLE OF N=1000
29 | np.random.seed(42)
30 | sample = stats.norm.rvs(loc=mu_real, scale=sigma_real, size=1000)
31 | 
32 | # ESTIMATE MU AND SIGMA
33 | mu_est = np.mean(sample)
34 | sigma_est = np.std(sample)
35 | print("Estimated MU: {}\nEstimated SIGMA: {}".format(mu_est, sigma_est))
36 | 
37 | ##### PLOTTING #####
38 | # SAMPLE DISTRIBUTION
39 | plt.hist(sample, bins=50,normed=True, alpha=.25)
40 | 
41 | # TRUE CURVE
42 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=mu_real, scale=sigma_real))
43 | 
44 | # ESTIMATED CURVE
45 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=np.mean(sample), scale=np.std(sample)))
46 | 
47 | # LEGEND
48 | plt.text(x=9.5, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5")
49 | plt.text(x=7, y=.2, s="true distrubtion", rotation=55, alpha=.75, weight="bold", color="#fc4f30")
50 | plt.text(x=5, y=.12, s="estimated distribution", rotation=55, alpha=.75, weight="bold", color="#e5ae38")
51 | 
52 | # TICKS
53 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
54 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
55 | 
56 | # TITLE, SUBTITLE & FOOTER
57 | plt.text(x = 0, y = 0.3, s = "Normal Distribution",
58 |                fontsize = 26, weight = 'bold', alpha = .75)
59 | plt.text(x = 0, y = 0.265, 
60 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a normal distribution with $\mu = 10$\nand $\sigma = 2$ (red). Also the estimated distrubution with $\mu \sim {:.3f} $ and $\sigma \sim {:.3f} $ is shown (yellow).'.format(np.mean(sample), np.std(sample)),
61 |          fontsize = 19, alpha = .85)
62 | plt.text(x = 0,y = -0.025,
63 |          s = '   ©Joshua Görner                                                                                                                                                   github.com/jgoerner   ',
64 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/normal/05_MCMC_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import pymc3 as pm
 3 | import numpy as np
 4 | from scipy.stats import norm
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### SIMULATION #####
25 | # MODEL BUILDING
26 | with pm.Model() as model:
27 |     mu = pm.Uniform("mu", upper=20)
28 |     std = pm.Uniform("std", upper=5)
29 |     normal = pm.Normal("normal", mu=mu, sd=std, observed=sample)
30 |     
31 | # MODEL RUN
32 | with model:
33 |     step = pm.Metropolis()
34 |     trace = pm.sample(50000, step=step)
35 |     burned_trace = trace[45000:]
36 |     
37 | # MU - 95% CONF INTERVAL
38 | mus = burned_trace["mu"]
39 | mu_est_95 = np.mean(mus) - 2*np.std(mus), np.mean(mus) + 2*np.std(mus)
40 | print("95% of sampled mus are between {:0.3f} and {:0.3f}".format(*mu_est_95))
41 | 
42 | # STD - 95% CONF INTERVAL
43 | stds = burned_trace["std"]
44 | std_est_95 = np.mean(stds) - 2*np.std(stds), np.mean(stds) + 2*np.std(stds)
45 | print("95% of sampled sigmas are between {:0.3f} and {:0.3f}".format(*std_est_95))
46 | 
47 | #### PLOTTING #####
48 | # SAMPLE DISTRIBUTION
49 | plt.hist(sample, bins=50,normed=True, alpha=.25)
50 | 
51 | # TRUE CURVE
52 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=mu_real, scale=sigma_real))
53 | 
54 | # ESTIMATED CURVE MCMC
55 | plt.plot(np.linspace(2, 18, 1000), norm.pdf(np.linspace(2, 18, 1000),loc=np.mean(mus), scale=np.mean(stds)))
56 | 
57 | # LEGEND
58 | plt.text(x=9.5, y=.1, s="sample", alpha=.75, weight="bold", color="#008fd5")
59 | plt.text(x=7, y=.2, s="true distrubtion", rotation=55, alpha=.75, weight="bold", color="#fc4f30")
60 | plt.text(x=5, y=.12, s="estimated distribution", rotation=55, alpha=.75, weight="bold", color="#e5ae38")
61 | 
62 | # TICKS
63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
65 | 
66 | # TITLE, SUBTITLE & FOOTER
67 | plt.text(x = 0, y = 0.3, s = "Normal Distribution - Parameter Estimation (MCMC)",
68 |                fontsize = 26, weight = 'bold', alpha = .75)
69 | plt.text(x = 0, y = 0.265, 
70 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a normal distribution with $\mu = 10$\nand $\sigma = 2$ (red). Also the estimated distrubution with $\mu \sim {:.3f} $ and $\sigma \sim {:.3f} $ is shown (yellow).'.format(np.mean(mus), np.mean(stds)),
71 |          fontsize = 19, alpha = .85)
72 | plt.text(x = 0,y = -0.025,
73 |          s = '   ©Joshua Görner                                                                                                                                                   github.com/jgoerner   ',
74 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/poisson/01_general.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF
24 | plt.bar(left=np.arange(20), 
25 |         height=(stats.poisson.pmf(np.arange(20), mu=5)/np.max(stats.poisson.pmf(np.arange(20), mu=5))), 
26 |         width=.75,
27 |         alpha=0.75
28 |        )
29 | 
30 | # CDF
31 | plt.plot(np.arange(20), 
32 |          stats.poisson.cdf(np.arange(20), mu=5),
33 |          color="#fc4f30",
34 |         )
35 | 
36 | # LEGEND
37 | plt.text(x=8, y=.45, s="pmf (normed)", alpha=.75, weight="bold", color="#008fd5")
38 | plt.text(x=8.5, y=.9, s="cdf", alpha=.75, weight="bold", color="#fc4f30")
39 | 
40 | # TICKS
41 | plt.xticks(range(21)[::2])
42 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
43 | plt.axhline(y = 0.005, color = 'black', linewidth = 1.3, alpha = .7)
44 | 
45 | # TITLE, SUBTITLE & FOOTER
46 | plt.text(x = -2.5, y = 1.25, s = "Poisson Distribution - Overview",
47 |                fontsize = 26, weight = 'bold', alpha = .75)
48 | plt.text(x = -2.5, y = 1.1, 
49 |          s = 'Depicted below are the normed probability mass function (pmf) and the cumulative density\nfunction (cdf) of a Poisson distributed random variable $ y \sim Poi(\lambda) $, given $ \lambda = 5 $.',
50 |          fontsize = 19, alpha = .85)
51 | plt.text(x = -2.5,y = -0.125,
52 |          s = '   ©Joshua Görner                                                                                                                                              github.com/jgoerner   ',
53 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/poisson/02_lambda.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import numpy as np
 3 | import scipy.stats as stats
 4 | import matplotlib.pyplot as plt
 5 | import matplotlib.style as style
 6 | from IPython.core.display import HTML
 7 | 
 8 | # PLOTTING CONFIG
 9 | %matplotlib inline
10 | style.use('fivethirtyeight')
11 | plt.rcParams["figure.figsize"] = (14, 7)
12 | HTML("""
13 | <style>
14 | .output_png {
15 |     display: table-cell;
16 |     text-align: center;
17 |     vertical-align: center;
18 | }
19 | </style>
20 | """)
21 | plt.figure(dpi=100)
22 | 
23 | # PDF LAM = 1
24 | plt.scatter(np.arange(20),
25 |             (stats.poisson.pmf(np.arange(20), mu=1)),#/np.max(stats.poisson.pmf(np.arange(20), mu=1))),
26 |             alpha=0.75,
27 |             s=100
28 |        )
29 | plt.plot(np.arange(20),
30 |          (stats.poisson.pmf(np.arange(20), mu=1)),#/np.max(stats.poisson.pmf(np.arange(20), mu=1))),
31 |          alpha=0.75,
32 |         )
33 | 
34 | # PDF LAM = 5
35 | plt.scatter(np.arange(20),
36 |             (stats.poisson.pmf(np.arange(20), mu=5)),
37 |             alpha=0.75,
38 |             s=100
39 |        )
40 | plt.plot(np.arange(20),
41 |          (stats.poisson.pmf(np.arange(20), mu=5)),
42 |          alpha=0.75,
43 |         )
44 | 
45 | # PDF LAM = 10
46 | plt.scatter(np.arange(20),
47 |             (stats.poisson.pmf(np.arange(20), mu=10)),
48 |             alpha=0.75,
49 |             s=100
50 |        )
51 | plt.plot(np.arange(20),
52 |          (stats.poisson.pmf(np.arange(20), mu=10)),
53 |          alpha=0.75,
54 |         )
55 | 
56 | # LEGEND
57 | plt.text(x=3, y=.1, s="$\lambda = 1$", alpha=.75, rotation=-65, weight="bold", color="#008fd5")
58 | plt.text(x=8.25, y=.075, s="$\lambda = 5$", alpha=.75, rotation=-35, weight="bold", color="#fc4f30")
59 | plt.text(x=14.5, y=.06, s="$\lambda = 10$", alpha=.75, rotation=-20, weight="bold", color="#e5ae38")
60 | 
61 | # TICKS
62 | plt.xticks(range(21)[::2])
63 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
64 | plt.axhline(y = 0, color = 'black', linewidth = 1.3, alpha = .7)
65 | 
66 | # TITLE, SUBTITLE & FOOTER
67 | plt.text(x = -2.5, y = .475, s = "Poisson Distribution - $\lambda$",
68 |                fontsize = 26, weight = 'bold', alpha = .75)
69 | plt.text(x = -2.5, y = .425, 
70 |          s = 'Depicted below are three Poisson distributed random variables with varying $\lambda $. As one can easily\nsee the parameter $\lambda$ shifts and flattens the distribution (the smaller $ \lambda $ the sharper the function).',
71 |          fontsize = 19, alpha = .85)
72 | plt.text(x = -2.5,y = -0.075,
73 |          s = '   ©Joshua Görner                                                                                                                                                 github.com/jgoerner   ',
74 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/poisson/03_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | from collections import Counter
 3 | import numpy as np
 4 | from scipy.stats import poisson
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### COMPUTATION #####
25 | # DECLARING THE "TRUE" PARAMETERS UNDERLYING THE SAMPLE
26 | lambda_real = 7
27 | 
28 | # DRAW A SAMPLE OF N=1000
29 | np.random.seed(42)
30 | sample = poisson.rvs(mu=lambda_real, size=1000)
31 | 
32 | # ESTIMATE MU AND SIGMA
33 | lambda_est = np.mean(sample)
34 | print("Estimated LAMBDA: {}".format(lambda_est))
35 | 
36 | ##### PLOTTING #####
37 | # SAMPLE DISTRIBUTION
38 | cnt = Counter(sample)
39 | _, values = zip(*sorted(cnt.items()))
40 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25);
41 | 
42 | # TRUE CURVE
43 | plt.plot(range(18), poisson.pmf(k=range(18), mu=lambda_real), color="#fc4f30")
44 | 
45 | # ESTIMATED CURVE
46 | plt.plot(range(18), poisson.pmf(k=range(18), mu=lambda_est), color="#e5ae38")
47 | 
48 | # LEGEND
49 | plt.text(x=6, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5")
50 | plt.text(x=3.5, y=.14, s="true distrubtion", rotation=60, alpha=.75, weight="bold", color="#fc4f30")
51 | plt.text(x=1, y=.08, s="estimated distribution", rotation=60, alpha=.75, weight="bold", color="#e5ae38")
52 | 
53 | # TICKS
54 | plt.xticks(range(17)[::2])
55 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
56 | plt.axhline(y = 0.0009, color = 'black', linewidth = 1.3, alpha = .7)
57 | 
58 | # TITLE, SUBTITLE & FOOTER
59 | plt.text(x = -2.5, y = 0.19, s = "Poisson Distribution - Parameter Estimation",
60 |                fontsize = 26, weight = 'bold', alpha = .75)
61 | plt.text(x = -2.5, y = 0.17, 
62 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a Poisson distribution with $\lambda = 7$.\nAlso the estimated distrubution with $\lambda \sim {:.3f}$ is shown (yellow).'.format(np.mean(sample)),
63 |          fontsize = 19, alpha = .85)
64 | plt.text(x = -2.5,y = -0.02,
65 |          s = '   ©Joshua Görner                                                                                                                                                   github.com/jgoerner   ',
66 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------
/src/poisson/04_mcmc_estimation.py:
--------------------------------------------------------------------------------
 1 | # IMPORTS
 2 | import pymc3 as pm
 3 | import numpy as np
 4 | from scipy.stats import norm
 5 | import matplotlib.pyplot as plt
 6 | import matplotlib.style as style
 7 | from IPython.core.display import HTML
 8 | 
 9 | # PLOTTING CONFIG
10 | %matplotlib inline
11 | style.use('fivethirtyeight')
12 | plt.rcParams["figure.figsize"] = (14, 7)
13 | HTML("""
14 | <style>
15 | .output_png {
16 |     display: table-cell;
17 |     text-align: center;
18 |     vertical-align: center;
19 | }
20 | </style>
21 | """)
22 | plt.figure(dpi=100)
23 | 
24 | ##### SIMULATION #####
25 | # MODEL BUILDING
26 | with pm.Model() as model:
27 |     lam = pm.Uniform("lambda", upper=20)
28 |     normal = pm.Poisson("poisson", mu=lam, observed=sample)
29 |     
30 | # MODEL RUN
31 | with model:
32 |     step = pm.Metropolis()
33 |     trace = pm.sample(50000, step=step)
34 |     burned_trace = trace[45000:]
35 | 
36 | # LAMBDA - 95% CONF INTERVAL
37 | lambdas = burned_trace["lambda"]
38 | lambda_est_95 = np.mean(lambdas) - 2*np.std(lambdas), np.mean(lambdas) + 2*np.std(lambdas)
39 | print("95% of sampled lambdas are between {:0.3f} and {:0.3f}".format(*lambda_est_95))
40 | 
41 | # SAMPLE DISTRIBUTION
42 | cnt = Counter(sample)
43 | _, values = zip(*sorted(cnt.items()))
44 | plt.bar(range(len(values)), values/np.sum(values), alpha=0.25);
45 | 
46 | # TRUE CURVE
47 | plt.plot(range(18), poisson.pmf(k=range(18), mu=lambda_real), color="#fc4f30")
48 | 
49 | # ESTIMATED CURVE
50 | plt.plot(range(18), poisson.pmf(k=range(18), mu=np.mean(lambdas)), color="#e5ae38")
51 | 
52 | # LEGEND
53 | plt.text(x=6, y=.06, s="sample", alpha=.75, weight="bold", color="#008fd5")
54 | plt.text(x=3.5, y=.14, s="true distrubtion", rotation=60, alpha=.75, weight="bold", color="#fc4f30")
55 | plt.text(x=1, y=.08, s="estimated distribution", rotation=60, alpha=.75, weight="bold", color="#e5ae38")
56 | 
57 | # TICKS
58 | plt.xticks(range(17)[::2])
59 | plt.tick_params(axis = 'both', which = 'major', labelsize = 18)
60 | plt.axhline(y = 0.0009, color = 'black', linewidth = 1.3, alpha = .7)
61 | 
62 | # TITLE, SUBTITLE & FOOTER
63 | plt.text(x = -2.5, y = 0.19, s = "Poisson Distribution - Parameter Estimation (MCMC)",
64 |                fontsize = 26, weight = 'bold', alpha = .75)
65 | plt.text(x = -2.5, y = 0.17, 
66 |          s = 'Depicted below is the distribution of a sample (blue) drawn from a Poisson distribution with $\lambda = 7$.\nAlso the estimated distrubution with $\lambda \sim {:.3f}$ is shown (yellow).'.format(np.mean(lambdas)),
67 |          fontsize = 19, alpha = .85)
68 | plt.text(x = -2.5,y = -0.02,
69 |          s = '   ©Joshua Görner                                                                                                                                                   github.com/jgoerner   ',
70 |          fontsize = 14, color = '#f0f0f0', backgroundcolor = 'grey');


--------------------------------------------------------------------------------