├── .github
    └── workflows
    │   ├── ci.yml
    │   └── deploy-jupyter-book.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── Makefile
├── README.md
├── binder
    ├── requirements.txt
    └── trigger_binder.sh
├── book
    ├── _config.yml
    ├── _toc.yml
    ├── assets
    │   └── logo.png
    ├── introduction.md
    ├── notebooks
    │   ├── HEP
    │   │   └── Extended-Likelihood.ipynb
    │   ├── Introductory
    │   │   ├── Chi-Squared-Distribution.ipynb
    │   │   ├── Error-on-means.ipynb
    │   │   ├── Gaussian-Distribution.ipynb
    │   │   ├── Gaussian-Sampling.ipynb
    │   │   ├── Likelihood-Function.ipynb
    │   │   └── probability-integral-transform.ipynb
    │   └── simulation
    │   │   └── Rejection-Sampling-MC.ipynb
    └── requirements.txt
├── dev-requirements.txt
├── pyproject.toml
└── tests
    └── test_notebooks.py


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Python testing
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - main
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 |   test:
12 | 
13 |     strategy:
14 |       matrix:
15 |         platform: [ubuntu-latest, macos-latest]
16 |         python-version: ['3.9']
17 |     runs-on: ${{ matrix.platform }}
18 | 
19 |     steps:
20 |     - uses: actions/checkout@v2
21 |     - name: Set up Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v2
23 |       with:
24 |         version: ${{ matrix.python-version }}
25 | 
26 |     - name: Install Python dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip setuptools wheel
29 |         python -m pip install --requirement binder/requirements.txt
30 |         python -m pip install --upgrade --requirement dev-requirements.txt
31 | 
32 |     - name: List installed dependencies
33 |       run: python -m pip list
34 | 
35 |     - name: Lint with flake8
36 |       run: |
37 |         python -m pip install flake8
38 |         # stop the build if there are Python syntax errors or undefined names
39 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
40 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
41 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
42 | 
43 |     - name: Test with pytest
44 |       run: |
45 |         pytest
46 | 


--------------------------------------------------------------------------------
/.github/workflows/deploy-jupyter-book.yml:
--------------------------------------------------------------------------------
 1 | name: Deploy Jupyter Book
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |     - main
 7 |   pull_request:
 8 |   workflow_dispatch:
 9 | 
10 | jobs:
11 | 
12 |   deploy-book:
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v2
17 | 
18 |     - name: Set up Python 3.9
19 |       uses: actions/setup-python@v2
20 |       with:
21 |         python-version: '3.9'
22 | 
23 |     - name: Install Python dependencies
24 |       run: |
25 |         python -m pip install --upgrade pip setuptools wheel
26 |         python -m pip install --requirement binder/requirements.txt
27 |         python -m pip install --requirement book/requirements.txt
28 | 
29 |     - name: List installed dependencies
30 |       run: python -m pip list
31 | 
32 |     - name: Build the book
33 |       run: |
34 |         python -c "import matplotlib.pyplot as plt; plt" # Generate fonts
35 |         jupyter-book build book/
36 | 
37 |     - name: Deploy Jupyter book to GitHub pages
38 |       if: success() && github.event_name == 'push' && github.ref == 'refs/heads/main'
39 |       uses: peaceiris/actions-gh-pages@v3
40 |       with:
41 |         github_token: ${{ secrets.GITHUB_TOKEN }}
42 |         publish_dir: book/_build/html
43 |         force_orphan: true
44 |         user_name: 'github-actions[bot]'
45 |         user_email: 'github-actions[bot]@users.noreply.github.com'
46 |         commit_message: Deploy to GitHub pages
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | MANIFEST
 2 | build
 3 | dist
 4 | _build
 5 | docs/man/*.gz
 6 | docs/source/api/generated
 7 | docs/source/config.rst
 8 | docs/gh-pages
 9 | notebook/static/components
10 | notebook/static/style/*.min.css*
11 | notebook/static/*/js/built/
12 | notebook/static/*/built/
13 | notebook/static/built/
14 | notebook/static/*/js/main.min.js*
15 | notebook/static/lab/*bundle.js
16 | node_modules
17 | *.py[co]
18 | __pycache__
19 | *.egg-info
20 | *~
21 | *.bak
22 | .ipynb_checkpoints
23 | .tox
24 | .DS_Store
25 | \#*#
26 | .#*
27 | .coverage
28 | src
29 | 
30 | *.swp
31 | *.map
32 | .idea/
33 | Read the Docs
34 | config.rst
35 | 
36 | .pytest_cache
37 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 3 |     rev: v4.0.1
 4 |     hooks:
 5 |     - id: check-added-large-files
 6 |     - id: check-case-conflict
 7 |     - id: check-merge-conflict
 8 |     - id: check-symlinks
 9 |     - id: check-json
10 |     - id: check-yaml
11 |     - id: check-toml
12 |     - id: check-xml
13 |     - id: debug-statements
14 |     - id: end-of-file-fixer
15 |     - id: mixed-line-ending
16 |     - id: trailing-whitespace
17 | 
18 | -   repo: https://github.com/psf/black
19 |     rev: 21.12b0
20 |     hooks:
21 |     - id: black-jupyter
22 | 
23 | -   repo: https://github.com/asottile/pyupgrade
24 |     rev: v2.29.1
25 |     hooks:
26 |     - id: pyupgrade
27 |       args: ["--py39-plus"]
28 | 
29 | -   repo: https://github.com/nbQA-dev/nbQA
30 |     rev: 1.2.2
31 |     hooks:
32 |     - id: nbqa-pyupgrade
33 |       additional_dependencies: [pyupgrade==2.29.1]
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Matthew Feickert
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | all: build
 2 | 
 3 | defualt: build
 4 | 
 5 | build:
 6 | 	jupyter-book build book/
 7 | 
 8 | clean: book/_build
 9 | 	rm -rf book/_build
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Statistics Notes
 2 | 
 3 | Personal notes on statistics with a focus on applications to experimental high energy physics.
 4 | 
 5 | Most of the notes are in the form of [Jupyter](http://jupyter.org/) notebooks, which are organized in the `Notebooks` directory.
 6 | 
 7 | [![license](https://img.shields.io/github/license/matthewfeickert/Statistics-Notes.svg)]()
 8 | [![DOI](https://zenodo.org/badge/91207877.svg)](https://zenodo.org/badge/latestdoi/91207877)
 9 | [![Deploy Jupyter Book](https://github.com/matthewfeickert/Statistics-Notes/workflows/Deploy%20Jupyter%20Book/badge.svg?branch=main)](https://matthewfeickert.github.io/Statistics-Notes/)
10 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/matthewfeickert/Statistics-Notes/main)
11 | [![nbviewer](https://img.shields.io/badge/view%20on-nbviewer-brightgreen.svg)](http://nbviewer.jupyter.org/github/matthewfeickert/Statistics-Notes/tree/main/book/notebooks/)
12 | 
13 | ## References
14 | 
15 | - [_Data Analysis in High Energy Physics: A Practical Guide to Statistical Methods_](http://eu.wiley.com/WileyCDA/WileyTitle/productCd-3527410589.html), Behnke et al., (2013)
16 | - [_Statistical Data Analysis_](http://www.pp.rhul.ac.uk/~cowan/sda/), Glen Cowan, (1998)
17 | 
18 | ## Contributing
19 | 
20 | If you find an error please open an issue. If you find a typo please either open an issue, or, if you're feeling generous, open a pull request with your fix.
21 | 
22 | ## Authors
23 | 
24 | Primary Author: [Matthew Feickert](http://www.matthewfeickert.com/)
25 | 
26 | ## Acknowledgments
27 | 
28 | - All badges made by [shields.io](http://shields.io/)
29 | 


--------------------------------------------------------------------------------
/binder/requirements.txt:
--------------------------------------------------------------------------------
1 | ipywidgets
2 | matplotlib==3.5.0
3 | scipy==1.7.3
4 | sympy==1.9
5 | 


--------------------------------------------------------------------------------
/binder/trigger_binder.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | function trigger_binder() {
 4 |     local URL="${1}"
 5 | 
 6 |     curl -L --connect-timeout 10 --max-time 30 "${URL}"
 7 |     curl_return=$?
 8 | 
 9 |     # Return code 28 is when the --max-time is reached
10 |     if [ "${curl_return}" -eq 0 ] || [ "${curl_return}" -eq 28 ]; then
11 |         if [[ "${curl_return}" -eq 28 ]]; then
12 |             printf "\nBinder build started.\nCheck back soon.\n"
13 |         fi
14 |     else
15 |         return "${curl_return}"
16 |     fi
17 | 
18 |     return 0
19 | }
20 | 
21 | function main() {
22 |     # 1: the Binder build API URL to curl
23 |     trigger_binder $1
24 | }
25 | 
26 | main "$@" || exit 1
27 | 


--------------------------------------------------------------------------------
/book/_config.yml:
--------------------------------------------------------------------------------
 1 | # https://jupyterbook.org/customize/config.html
 2 | # Book settings
 3 | title: Statistics Notes
 4 | author: Matthew Feickert
 5 | logo: assets/logo.png
 6 | 
 7 | # HTML-specific settings
 8 | html:
 9 |   use_repository_button: true
10 |   navbar_number_sections: true
11 |   use_issues_button: true
12 |   use_edit_page_button: true
13 | 
14 | # Information about where the book exists on the web
15 | repository:
16 |   url: https://github.com/matthewfeickert/Statistics-Notes
17 |   path_to_book: book
18 |   branch: main
19 | 
20 | # Launch button settings
21 | launch_buttons:
22 |   notebook_interface: jupyterlab
23 |   binderhub_url: https://mybinder.org
24 |   colab_url: https://colab.research.google.com
25 | 
26 | latex:
27 |   latex_documents:
28 |     targetname: book.tex
29 | 


--------------------------------------------------------------------------------
/book/_toc.yml:
--------------------------------------------------------------------------------
 1 | # https://jupyterbook.org/customize/toc.html
 2 | format: jb-book
 3 | root: introduction
 4 | parts:
 5 | - caption: Introduction
 6 |   chapters:
 7 |   - file: notebooks/Introductory/Likelihood-Function
 8 |     title: The Likelihood Function
 9 |   - file: notebooks/Introductory/Error-on-means.ipynb
10 |     title: Uncertaninty on Means
11 |   - file: notebooks/Introductory/Gaussian-Sampling.ipynb
12 |     title: Gaussian Sampling
13 | 
14 | - caption: Probability Distributions
15 |   chapters:
16 |   - file: notebooks/Introductory/Gaussian-Distribution.ipynb
17 |     title: The Normal Distribution
18 |   - file: notebooks/Introductory/Chi-Squared-Distribution.ipynb
19 |     title: Chi-Squared Distribution
20 | 
21 | - caption: Results of Probability Theory
22 |   chapters:
23 |   - file: notebooks/Introductory/probability-integral-transform.ipynb
24 |     title: Probability Integral Transform
25 | 
26 | - caption: Simulation
27 |   chapters:
28 |   - file: notebooks/simulation/Rejection-Sampling-MC.ipynb
29 |     title: Monte Carlo Simulation via Rejection Sampling
30 | 
31 | - caption: Concepts in HEP
32 |   chapters:
33 |   - file: notebooks/HEP/Extended-Likelihood.ipynb
34 |     title: Extended Likelihood
35 | 


--------------------------------------------------------------------------------
/book/assets/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/matthewfeickert/Statistics-Notes/088181920b0f560fdd2ed593d3653f67baa56190/book/assets/logo.png


--------------------------------------------------------------------------------
/book/introduction.md:
--------------------------------------------------------------------------------
 1 | # Statistics Notes
 2 | 
 3 | Personal notes on statistics with a focus on applications to experimental high energy physics.
 4 | 
 5 | [![DOI](https://zenodo.org/badge/91207877.svg)](https://zenodo.org/badge/latestdoi/91207877)
 6 | [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/matthewfeickert/Statistics-Notes/main)
 7 | 
 8 | ## References
 9 | 
10 | - [_Data Analysis in High Energy Physics: A Practical Guide to Statistical Methods_](http://eu.wiley.com/WileyCDA/WileyTitle/productCd-3527410589.html), Behnke et al., (2013)
11 | - [_Statistical Data Analysis_](http://www.pp.rhul.ac.uk/~cowan/sda/), Glen Cowan, (1998)
12 | 


--------------------------------------------------------------------------------
/book/notebooks/HEP/Extended-Likelihood.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Extended Likelihood"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Unbinned Extended Likelihood"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "Let $x$ be a random variable distributed according to a p.d.f. $~f\\left(x\\,\\middle|\\,\\vec{\\theta}\\right)$,"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "$$\n",
 29 |     "\\begin{equation*}\n",
 30 |     "x \\sim f\\left(x\\,\\middle|\\,\\vec{\\theta}\\right),\n",
 31 |     "\\end{equation*}\n",
 32 |     "$$"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "metadata": {},
 38 |    "source": [
 39 |     "with $n$ observations, $\\vec{x} = \\left(x_1, \\cdots, x_n\\right)$, and $m$ unknown parameters, $\\vec{\\theta} = \\left(\\theta_1, \\cdots, \\theta_m\\right)$. The likelihood would normally then be"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "$$\n",
 47 |     "L\\left(\\vec{\\theta}\\right) = \\prod_{i=1}^{n} f\\left(x_i; \\vec{\\theta}\\right).\n",
 48 |     "$$"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "However, if $n$ itself is a Poisson random variable with mean $\\nu$,"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "markdown",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "$$\n",
 63 |     "n \\sim \\text{Pois}\\left(n \\,\\middle|\\, \\nu\\right),\n",
 64 |     "$$"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "then it follows that"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "$$\n",
 79 |     "\\begin{align}\n",
 80 |     "L\\left(\\nu; \\vec{\\theta}\\right) &= \\text{Pois}\\left(n; \\nu\\right) \\prod_{i=1}^{n} f\\left(x_i; \\vec{\\theta}\\right) \\notag\\\\\n",
 81 |     "    &= \\frac{\\nu^{n}\\,e^{-\\nu}}{n!} \\prod_{i=1}^{n} f\\left(x_i; \\vec{\\theta}\\right) \\notag\\\\\n",
 82 |     "    &= \\frac{e^{-\\nu}}{n!} \\prod_{i=1}^{n} \\nu\\, f\\left(x_i; \\vec{\\theta}\\right).\n",
 83 |     "%\\label{eq_extended-likelihood}\n",
 84 |     "\\end{align}\n",
 85 |     "$$"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "This equation is known as the \"extended likelihood function\", as we have \"extended\" the information encoded in the likelihood to include the expected number of events &mdash; a quantity of great importance to physicists. It can be see from inspection though that the extended likelihood still follows the form of a likelihood, so no different treatment is required in finding its MLE estimators."
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "markdown",
 97 |    "metadata": {},
 98 |    "source": [
 99 |     "### $\\nu$ is dependent on $\\vec{\\theta}$"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "In the instance that $\\nu$ is a function of $\\vec{\\theta}$, $\\nu = \\nu\\left(\\vec{\\theta}\\right)$, then"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "$$\n",
114 |     "L\\left(\\vec{\\theta}\\right) = \\frac{e^{-\\nu\\left(\\vec{\\theta}\\right)}}{n!} \\prod_{i=1}^{n} \\nu\\left(\\vec{\\theta}\\right)\\, f\\left(x_i; \\vec{\\theta}\\right),\n",
115 |     "$$"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "such that"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "$$\n",
130 |     "\\ln L\\left(\\vec{\\theta}\\right) = - \\nu\\left(\\vec{\\theta}\\right) - \\ln n! + \\sum_{i=1}^{n} \\ln\\left(\\nu\\left(\\vec{\\theta}\\right)\\, f\\left(x_i; \\vec{\\theta}\\right)\\right),\n",
131 |     "$$"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "where $n$ is a constant of the data, and so will have no effect on finding the estimators of any parameters, leading it to be safely ignored. Thus,"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "markdown",
143 |    "metadata": {},
144 |    "source": [
145 |     "$$\n",
146 |     "\\begin{equation}\n",
147 |     "\\boxed{-\\ln L\\left(\\vec{\\theta}\\right) = \\nu\\left(\\vec{\\theta}\\right) -\\sum_{i=1}^{n} \\ln\\left(\\nu\\left(\\vec{\\theta}\\right)\\, f\\left(x_i; \\vec{\\theta}\\right)\\right)}\\,.\n",
148 |     "\\end{equation}\n",
149 |     "$$"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "Note that as the resultant estimators, $\\hat{\\vec{\\theta}}$, exploit information from both $n$ and $x$ this should generally lead to smaller variations for $\\hat{\\vec{\\theta}}$."
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "### $\\nu$ is independent of $\\vec{\\theta}$"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "In the instance that $\\nu$ is independent of $\\vec{\\theta}$,"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "$$\n",
178 |     "L\\left(\\nu; \\vec{\\theta}\\right) = \\frac{e^{-\\nu}}{n!} \\prod_{i=1}^{n} \\nu\\, f\\left(x_i; \\vec{\\theta}\\right),\n",
179 |     "$$"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "markdown",
184 |    "metadata": {},
185 |    "source": [
186 |     "then"
187 |    ]
188 |   },
189 |   {
190 |    "cell_type": "markdown",
191 |    "metadata": {},
192 |    "source": [
193 |     "$$\n",
194 |     "\\begin{split}\n",
195 |     "\\ln L\\left(\\nu; \\vec{\\theta}\\right) &= - \\nu - \\ln n! + \\sum_{i=1}^{n} \\ln\\left(\\nu\\, f\\left(x_i; \\vec{\\theta}\\right)\\right)\\\\\n",
196 |     "    &= - \\nu  + \\sum_{i=1}^{n} \\left(\\ln\\nu + \\ln f\\left(x_i; \\vec{\\theta}\\right)\\right) - \\ln n! \\\\\n",
197 |     "    &= - \\nu + n \\ln\\nu  + \\sum_{i=1}^{n} \\ln f\\left(x_i; \\vec{\\theta}\\right) - \\ln n!\\,,\n",
198 |     "\\end{split}\n",
199 |     "$$"
200 |    ]
201 |   },
202 |   {
203 |    "cell_type": "markdown",
204 |    "metadata": {},
205 |    "source": [
206 |     "such that"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "$$\n",
214 |     "\\begin{equation}\n",
215 |     "\\boxed{-\\ln L\\left(\\nu; \\vec{\\theta}\\right) = \\nu - n \\ln\\nu  - \\sum_{i=1}^{n} \\ln f\\left(x_i; \\vec{\\theta}\\right)}\\,.\n",
216 |     "\\end{equation}\n",
217 |     "$$"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "As $L$ is maximized with respect to a variable $\\alpha$ when $−\\ln ⁡L$ is minimized,"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "$$\n",
232 |     "\\frac{\\partial \\left(-\\ln L\\right)}{\\partial \\alpha} = 0,\n",
233 |     "$$"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "then it is seen from"
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "$$\n",
248 |     "\\frac{\\partial \\left(-\\ln L\\left(\\nu; \\vec{\\theta}\\right)\\right)}{\\partial \\nu} = 1 - \\frac{n}{\\nu} = 0,\n",
249 |     "$$"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "markdown",
254 |    "metadata": {},
255 |    "source": [
256 |     "that the maximum likelihood estimator for $\\nu$ is\n",
257 |     "\\begin{equation}\n",
258 |     "\\hat{\\nu} = n\\,,\n",
259 |     "\\end{equation}"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "metadata": {},
265 |    "source": [
266 |     "and that"
267 |    ]
268 |   },
269 |   {
270 |    "cell_type": "markdown",
271 |    "metadata": {},
272 |    "source": [
273 |     "$$\n",
274 |     "\\frac{\\partial \\left(-\\ln L\\left(\\nu; \\vec{\\theta}\\right)\\right)}{\\partial \\theta_j} = 0,\n",
275 |     "$$"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "markdown",
280 |    "metadata": {},
281 |    "source": [
282 |     "results in the the same estimators $\\hat{\\vec{\\theta}}$ as in the \"usual\" maximum likelihood case."
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "If the p.d.f. is of the form of a mixture model,"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "markdown",
294 |    "metadata": {},
295 |    "source": [
296 |     "$$\n",
297 |     "f\\left(x; \\vec{\\theta}\\right) = \\sum_{i=1}^{m} \\theta_i\\, f_i\\left(x\\right),\n",
298 |     "$$"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "metadata": {},
304 |    "source": [
305 |     "and an estimate of the weights is of interest, then as the parameters are not fully independent, given the constraint"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "markdown",
310 |    "metadata": {},
311 |    "source": [
312 |     "$$\n",
313 |     "\\sum_{i=1}^{m} \\theta_i = 1,\n",
314 |     "$$"
315 |    ]
316 |   },
317 |   {
318 |    "cell_type": "markdown",
319 |    "metadata": {},
320 |    "source": [
321 |     "then one of the $m$ parameters can be replaced with"
322 |    ]
323 |   },
324 |   {
325 |    "cell_type": "markdown",
326 |    "metadata": {},
327 |    "source": [
328 |     "$$\n",
329 |     "1 - \\sum_{i=1}^{m-1} \\theta_i,\n",
330 |     "$$"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "so that the p.d.f. only constrains $m-1$ parameters. This then allows the the likelihood to be constructed that allows to find the estimator for the unconstrained parameter."
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "markdown",
342 |    "metadata": {},
343 |    "source": [
344 |     "Equivalently, the extended likelihood function can be used, as"
345 |    ]
346 |   },
347 |   {
348 |    "cell_type": "markdown",
349 |    "metadata": {},
350 |    "source": [
351 |     "$$\n",
352 |     "\\begin{split}\n",
353 |     "\\ln L\\left(\\nu; \\vec{\\theta}\\right) &= - \\nu + n \\ln\\nu  + \\sum_{i=1}^{n} \\ln f\\left(x_i; \\vec{\\theta}\\right) \\\\\n",
354 |     "    &= - \\nu  + \\sum_{i=1}^{n} \\ln \\left(\\nu\\,f\\left(x_i; \\vec{\\theta}\\right)\\right)\\\\\n",
355 |     "    &= - \\nu  + \\sum_{i=1}^{n} \\ln \\left(\\sum_{j=1}^{m} \\nu\\,\\theta_j\\, f_j\\left(x_i\\right)\\right).\n",
356 |     "\\end{split}\n",
357 |     "$$"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "Letting $\\mu_i$, the expected number of events of type $i$, be $\\mu_i \\equiv \\theta_i \\nu$, for $\\vec{\\mu} = \\left(\\mu_1, \\cdots, \\mu_m\\right)$, then"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "$$\n",
372 |     "\\ln L\\left(\\vec{\\mu}\\right) = - \\sum_{j=1}^{m} \\mu_j  + \\sum_{i=1}^{n} \\ln \\left(\\sum_{j=1}^{m} \\mu_j\\, f_j\\left(x_i\\right)\\right).\n",
373 |     "$$"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "markdown",
378 |    "metadata": {},
379 |    "source": [
380 |     "Here, $\\vec{\\mu}$ are unconstrained and all parameters are treated symmetrically, such that $\\hat{\\mu_i}$ give the maximum likelihood estimator means of number of events of type $i$."
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "#### [Toy Example](http://www.physi.uni-heidelberg.de/~menzemer/Stat0708/statistik_vorlesung_7.pdf#page=10)"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "code",
392 |    "execution_count": null,
393 |    "metadata": {},
394 |    "outputs": [],
395 |    "source": [
396 |     "import numpy as np\n",
397 |     "from scipy.optimize import minimize"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "metadata": {},
404 |    "outputs": [],
405 |    "source": [
406 |     "def NLL(x, n, S, B):\n",
407 |     "    nll = sum(\n",
408 |     "        (x[0] * S[meas] + B[meas]) - (n[meas] * np.log(x[0] * S[meas] + B[meas]))\n",
409 |     "        for meas in np.arange(0, len(n))\n",
410 |     "    )\n",
411 |     "    return nll"
412 |    ]
413 |   },
414 |   {
415 |    "cell_type": "code",
416 |    "execution_count": null,
417 |    "metadata": {},
418 |    "outputs": [],
419 |    "source": [
420 |     "n_observed = [6, 24]\n",
421 |     "f = np.array([1.0])\n",
422 |     "S = [0.9, 4.0]\n",
423 |     "B = [0.2, 24.0]\n",
424 |     "\n",
425 |     "model = minimize(NLL, f, args=(n_observed, S, B), method=\"L-BFGS-B\", bounds=[(0, 10)])\n",
426 |     "print(f\"The MLE estimate for f: {model.x[0]}\")"
427 |    ]
428 |   },
429 |   {
430 |    "cell_type": "markdown",
431 |    "metadata": {},
432 |    "source": [
433 |     "#### HistFactory Example"
434 |    ]
435 |   },
436 |   {
437 |    "cell_type": "markdown",
438 |    "metadata": {},
439 |    "source": [
440 |     "Consider a single channel with one signal and one bakcagound contribution (and no systematics). For $n$ events, signal model $f_{S}(x_e)$, background model $f_{B}(x_e)$, $S$ expected signal events, $B$ expected backagound events, and signal fraciton $\\mu$, a \"marked Poisson model\" [2] may be constructed, which treating the data as fixed results in the likelihood of"
441 |    ]
442 |   },
443 |   {
444 |    "cell_type": "markdown",
445 |    "metadata": {},
446 |    "source": [
447 |     "$$\n",
448 |     "\\begin{split}\n",
449 |     "L\\left(\\mu\\right) &= \\text{Pois}\\left(n \\,\\middle|\\, \\mu S + B\\right) \\prod_{e=1}^{n} \\frac{\\mu S\\, f_{S}\\left(x_e\\right) + B\\, f_{B}\\left(x_e\\right)}{\\mu S + B}\\\\\n",
450 |     "    &= \\frac{\\left(\\mu S + B\\right)^{n} e^{-\\left(\\mu S + B\\right)}}{n!} \\prod_{e=1}^{n} \\frac{\\mu S\\, f_{S}\\left(x_e\\right) + B\\, f_{B}\\left(x_e\\right)}{\\mu S + B}\\\\\n",
451 |     "    &= \\frac{e^{-\\left(\\mu S + B\\right)}}{n!} \\prod_{e=1}^{n} \\left(\\,\\mu S\\, f_{S}\\left(x_e\\right) + B\\, f_{B}\\left(x_e\\right)\\right),\n",
452 |     "\\end{split}\n",
453 |     "$$"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "markdown",
458 |    "metadata": {},
459 |    "source": [
460 |     "and so"
461 |    ]
462 |   },
463 |   {
464 |    "cell_type": "markdown",
465 |    "metadata": {},
466 |    "source": [
467 |     "$$\n",
468 |     "-\\ln L\\left(\\mu\\right) = \\left(\\mu S + B\\right) - \\sum_{e=1}^{n} \\ln \\left(\\,\\mu S\\, f_{S}\\left(x_e\\right) + B\\, f_{B}\\left(x_e\\right)\\right) + \\underbrace{\\ln n!}_{\\text{constant}}.\n",
469 |     "$$"
470 |    ]
471 |   },
472 |   {
473 |    "cell_type": "markdown",
474 |    "metadata": {},
475 |    "source": [
476 |     "## Binned Extended Likelihood"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "markdown",
481 |    "metadata": {},
482 |    "source": [
483 |     "## References and Acknowledgements\n",
484 |     "1. [Statistical Data Analysis](http://www.pp.rhul.ac.uk/~cowan/sda/), Glen Cowan, 1998\n",
485 |     "2. ROOT collaboration, K. Cranmer, G. Lewis, L. Moneta, A. Shibata and W. Verkerke, [_HistFactory: A tool for creating statistical models for use with RooFit and RooStats_](http://inspirehep.net/record/1236448), 2012.\n",
486 |     "3. [Vince Croft](https://www.nikhef.nl/~vcroft/), Discussions with the author at CERN, July 2017"
487 |    ]
488 |   }
489 |  ],
490 |  "metadata": {
491 |   "kernelspec": {
492 |    "display_name": "Python 3",
493 |    "language": "python",
494 |    "name": "python3"
495 |   },
496 |   "language_info": {
497 |    "codemirror_mode": {
498 |     "name": "ipython",
499 |     "version": 3
500 |    },
501 |    "file_extension": ".py",
502 |    "mimetype": "text/x-python",
503 |    "name": "python",
504 |    "nbconvert_exporter": "python",
505 |    "pygments_lexer": "ipython3",
506 |    "version": "3.7.5"
507 |   }
508 |  },
509 |  "nbformat": 4,
510 |  "nbformat_minor": 4
511 | }
512 | 


--------------------------------------------------------------------------------
/book/notebooks/Introductory/Chi-Squared-Distribution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# The $\\chi^2$ Distribution"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## $\\chi^2$ Test Statistic"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "If we make $n$ ranom samples (observations) from Gaussian (Normal) distributions with known means, $\\mu_i$, and known variances, $\\sigma_i^2$, it is seen that the total squared deviation,"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "$$\n",
 29 |     "\\chi^2 = \\sum_{i=1}^{n} \\left(\\frac{x_i - \\mu_i}{\\sigma_i}\\right)^2\\,,\n",
 30 |     "$$"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "follows a $\\chi^2$ distribution with $n$ degrees of freedom."
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "## Probability Distribution Function"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "The $\\chi^2$ probability distribution function for $k$ degrees of freedom (the number of parameters that are allowed to vary) is given by"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "$$\n",
 59 |     "f\\left(\\chi^2\\,;k\\right) = \\frac{\\displaystyle 1}{\\displaystyle 2^{k/2} \\,\\Gamma\\left(k\\,/2\\right)}\\, \\chi^{k-2}\\,e^{-\\chi^2/2}\\,,\n",
 60 |     "$$"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "where if there are no constrained variables the number of degrees of freedom, $k$, is equal to the number of observations, $k=n$. The p.d.f. is often abbreviated in notation from $f\\left(\\chi^2\\,;k\\right)$ to $\\chi^2_k$."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "A reminder that for integer values of $k$, the Gamma function is $\\Gamma\\left(k\\right) = \\left(k-1\\right)!$, and that $\\Gamma\\left(x+1\\right) = x\\Gamma\\left(x\\right)$, and $\\Gamma\\left(1/2\\right) = \\sqrt{\\pi}$."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "## Mean"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "Letting $\\chi^2=z$, and noting that the form of the Gamma function is"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "$$\n",
 96 |     "\\Gamma\\left(z\\right) = \\int\\limits_{0}^{\\infty} x^{z-1}\\,e^{-x}\\,dx,\n",
 97 |     "$$"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "metadata": {},
103 |    "source": [
104 |     "it is seen that the mean of the $\\chi^2$ distribution $f\\left(\\chi^2 ; k\\right)$ is"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "$$\n",
112 |     "\\begin{align}\n",
113 |     "\\mu &= \\textrm{E}\\left[z\\right] = \\displaystyle\\int\\limits_{0}^{\\infty} z\\, \\frac{\\displaystyle 1}{\\displaystyle 2^{k/2} \\,\\Gamma\\left(k\\,/2\\right)}\\, z^{k/2-1}\\,e^{-z\\,/2}\\,dz \\\\\n",
114 |     "    &= \\displaystyle \\frac{\\displaystyle 1}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} \\int\\limits_{0}^{\\infty} \\left(\\frac{z}{2}\\right)^{k/2}\\,e^{-z\\,/2}\\,dz = \\displaystyle \\frac{\\displaystyle 1}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} \\int\\limits_{0}^{\\infty} x^{k/2}\\,e^{-x}\\,2 \\,dx \\\\\n",
115 |     "    &= \\displaystyle \\frac{\\displaystyle 2 \\,\\Gamma\\left(k\\,/2 + 1\\right)}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} \\\\\n",
116 |     "    &= \\displaystyle 2 \\frac{k}{2} \\frac{\\displaystyle \\Gamma\\left(k\\,/2\\right)}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} \\\\\n",
117 |     "    &= k.\n",
118 |     "\\end{align}\n",
119 |     "$$"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "markdown",
124 |    "metadata": {},
125 |    "source": [
126 |     "## Variance"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "markdown",
131 |    "metadata": {},
132 |    "source": [
133 |     "Likewise, the variance is"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "markdown",
138 |    "metadata": {},
139 |    "source": [
140 |     "$$\n",
141 |     "\\begin{align}\n",
142 |     "\\textrm{Var}\\left[z\\right] &= \\textrm{E}\\left[\\left(z-\\textrm{E}\\left[z\\right]\\right)^2\\right] = \\displaystyle\\int\\limits_{0}^{\\infty} \\left(z - k\\right)^2\\, \\frac{\\displaystyle 1}{\\displaystyle 2^{k/2} \\,\\Gamma\\left(k\\,/2\\right)}\\, z^{k/2-1}\\,e^{-z\\,/2}\\,dz \\\\\n",
143 |     "    &= \\displaystyle\\int\\limits_{0}^{\\infty} z^2\\, f\\left(z \\,; k\\right)\\,dz - 2k\\int\\limits_{0}^{\\infty} z\\,\\,f\\left(z \\,; k\\right)\\,dz + k^2\\int\\limits_{0}^{\\infty} f\\left(z \\,; k\\right)\\,dz \\\\\n",
144 |     "    &= \\displaystyle\\int\\limits_{0}^{\\infty} z^2 \\frac{\\displaystyle 1}{\\displaystyle 2^{k/2} \\,\\Gamma\\left(k\\,/2\\right)}\\, z^{k/2-1}\\,e^{-z\\,/2}\\,dz - 2k^2 + k^2\\\\\n",
145 |     "    &= \\displaystyle\\int\\limits_{0}^{\\infty} \\frac{\\displaystyle 1}{\\displaystyle 2^{k/2} \\,\\Gamma\\left(k\\,/2\\right)}\\, z^{k/2+1}\\,e^{-z\\,/2}\\,dz - k^2\\\\\n",
146 |     "    &= \\frac{\\displaystyle 2}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} \\displaystyle\\int\\limits_{0}^{\\infty} \\left(\\frac{z}{2}\\right)^{k/2+1}\\,e^{-z\\,/2}\\,dz - k^2 = \\frac{\\displaystyle 2}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} \\displaystyle\\int\\limits_{0}^{\\infty} x^{k/2+1}\\,e^{-x}\\,2\\,dx - k^2 \\\\\n",
147 |     "    &= \\displaystyle \\frac{\\displaystyle 4 \\,\\Gamma\\left(k\\,/2 + 2\\right)}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} - k^2 \\\\\n",
148 |     "    &= \\displaystyle 4 \\left(\\frac{k}{2} + 1\\right) \\frac{\\displaystyle \\Gamma\\left(k\\,/2 + 1\\right)}{\\displaystyle \\Gamma\\left(k\\,/2\\right)} - k^2 \\\\\n",
149 |     "    &= \\displaystyle 4 \\left(\\frac{k}{2} + 1\\right) \\frac{k}{2} - k^2 \\\\\n",
150 |     "    &= k^2 + 2k - k^2 \\\\\n",
151 |     "    &= 2k,\n",
152 |     "\\end{align}\n",
153 |     "$$"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "markdown",
158 |    "metadata": {},
159 |    "source": [
160 |     "such that the standard deviation is"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "$$\n",
168 |     "\\sigma = \\sqrt{2k}\\,.\n",
169 |     "$$"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "markdown",
174 |    "metadata": {},
175 |    "source": [
176 |     "Given this information we now plot the $\\chi^2$ p.d.f. with various numbers of degrees of freedom to visualize how the distribution's behaviour"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {},
183 |    "outputs": [],
184 |    "source": [
185 |     "import numpy as np\n",
186 |     "import scipy.stats as stats\n",
187 |     "\n",
188 |     "import matplotlib.pyplot as plt"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "# Plot the chi^2 distribution\n",
198 |     "x = np.linspace(0.0, 10.0, num=1000)\n",
199 |     "\n",
200 |     "[plt.plot(x, stats.chi2.pdf(x, df=ndf), label=fr\"$k = ${ndf}\") for ndf in range(1, 7)]\n",
201 |     "\n",
202 |     "plt.ylim(-0.01, 0.5)\n",
203 |     "\n",
204 |     "plt.xlabel(r\"$x=\\chi^2$\")\n",
205 |     "plt.ylabel(r\"$f\\left(x;k\\right)$\")\n",
206 |     "plt.title(r\"$\\chi^2$ distribution for various degrees of freedom\")\n",
207 |     "\n",
208 |     "plt.legend(loc=\"best\")\n",
209 |     "\n",
210 |     "plt.show();"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "## Cumulative Distribution Function"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "markdown",
222 |    "metadata": {},
223 |    "source": [
224 |     "The cumulative distribution function (CDF) for the $\\chi^2$ distribution is (letting $z=\\chi^2$)"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "markdown",
229 |    "metadata": {},
230 |    "source": [
231 |     "$$\n",
232 |     "\\begin{split}\n",
233 |     "F_{\\chi^2}\\left(x\\,; k\\right) &= \\int\\limits_{0}^{x} f_{\\chi^2}\\left(z\\,; k\\right) \\,dz \\\\\n",
234 |     "    &= \\int\\limits_{0}^{x} \\frac{\\displaystyle 1}{\\displaystyle 2^{k/2} \\,\\Gamma\\left(k\\,/2\\right)}\\, z^{k/2-1}\\,e^{-z/2} \\,dz \\\\\n",
235 |     "    &= \\int\\limits_{0}^{x} \\frac{\\displaystyle 1}{\\displaystyle 2 \\,\\Gamma\\left(k\\,/2\\right)}\\, \\left(\\frac{z}{2}\\right)^{k/2-1}\\,e^{-z/2} \\,dz = \\frac{1}{\\displaystyle 2 \\,\\Gamma\\left(k\\,/2\\right)}\\int\\limits_{0}^{x/2} t^{k/2-1}\\,e^{-t} \\,2\\,dt \\\\\n",
236 |     "    &= \\frac{1}{\\displaystyle \\Gamma\\left(k\\,/2\\right)}\\int\\limits_{0}^{x/2} t^{k/2-1}\\,e^{-t} \\,dt\n",
237 |     "\\end{split}\n",
238 |     "$$"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "Noting the form of the [lower incomplete gamma function](https://en.wikipedia.org/wiki/Incomplete_gamma_function) is"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "$$\n",
253 |     "\\gamma\\left(s,x\\right) = \\int\\limits_{0}^{x} t^{s-1}\\,e^{-t} \\,dt\\,,\n",
254 |     "$$"
255 |    ]
256 |   },
257 |   {
258 |    "cell_type": "markdown",
259 |    "metadata": {},
260 |    "source": [
261 |     "and the form of the [regularized Gamma function](https://en.wikipedia.org/wiki/Incomplete_gamma_function#Regularized_Gamma_functions_and_Poisson_random_variables) is"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "metadata": {},
267 |    "source": [
268 |     "$$\n",
269 |     "P\\left(s,x\\right) = \\frac{\\gamma\\left(s,x\\right)}{\\Gamma\\left(s\\right)}\\,,\n",
270 |     "$$"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "markdown",
275 |    "metadata": {},
276 |    "source": [
277 |     "it is seen that"
278 |    ]
279 |   },
280 |   {
281 |    "cell_type": "markdown",
282 |    "metadata": {},
283 |    "source": [
284 |     "$$\n",
285 |     "\\begin{split}\n",
286 |     "F_{\\chi^2}\\left(x\\,; k\\right) &= \\frac{1}{\\displaystyle \\Gamma\\left(k\\,/2\\right)}\\int\\limits_{0}^{x/2} t^{k/2-1}\\,e^{-t} \\,dt \\\\\n",
287 |     "    &= \\frac{\\displaystyle \\gamma\\left(\\frac{k}{2},\\frac{x}{2}\\right)}{\\displaystyle \\Gamma\\left(\\frac{k}{2}\\right)} \\\\\n",
288 |     "    &= P\\left(\\frac{k}{2},\\frac{x}{2}\\right)\\,.\n",
289 |     "\\end{split}\n",
290 |     "$$"
291 |    ]
292 |   },
293 |   {
294 |    "cell_type": "markdown",
295 |    "metadata": {},
296 |    "source": [
297 |     "Thus, it is seen that the compliment to the CDF (the complementary cumulative distribution function (CCDF)),"
298 |    ]
299 |   },
300 |   {
301 |    "cell_type": "markdown",
302 |    "metadata": {},
303 |    "source": [
304 |     "$$\n",
305 |     "\\bar{F}_{\\chi^2}\\left(x\\,; k\\right) = 1-F_{\\chi^2}\\left(x\\,; k\\right),\n",
306 |     "$$"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "markdown",
311 |    "metadata": {},
312 |    "source": [
313 |     "represents a one-sided (one-tailed) $p$-value for observing a $\\chi^2$ given a model &mdash; that is, the probability to observe a $\\chi^2$ value greater than or equal to that which was observed."
314 |    ]
315 |   },
316 |   {
317 |    "cell_type": "code",
318 |    "execution_count": null,
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "def chi2_ccdf(x, df):\n",
323 |     "    \"\"\"The complementary cumulative distribution function\n",
324 |     "\n",
325 |     "    Args:\n",
326 |     "        x: the value of chi^2\n",
327 |     "        df: the number of degrees of freedom\n",
328 |     "\n",
329 |     "    Returns:\n",
330 |     "        1 - the cumulative distribution function\n",
331 |     "    \"\"\"\n",
332 |     "    return 1.0 - stats.chi2.cdf(x=x, df=df)"
333 |    ]
334 |   },
335 |   {
336 |    "cell_type": "code",
337 |    "execution_count": null,
338 |    "metadata": {},
339 |    "outputs": [],
340 |    "source": [
341 |     "x = np.linspace(0.0, 10.0, num=1000)\n",
342 |     "fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(14, 4.5))\n",
343 |     "\n",
344 |     "for ndf in range(1, 7):\n",
345 |     "    axes[0].plot(x, stats.chi2.cdf(x, df=ndf), label=fr\"$k = ${ndf}\")\n",
346 |     "    axes[1].plot(x, chi2_ccdf(x, df=ndf), label=fr\"$k = ${ndf}\")\n",
347 |     "\n",
348 |     "axes[0].set_xlabel(r\"$x=\\chi^2$\")\n",
349 |     "axes[0].set_ylabel(r\"$F\\left(x;k\\right)$\")\n",
350 |     "axes[0].set_title(r\"$\\chi^2$ CDF for various degrees of freedom\")\n",
351 |     "\n",
352 |     "axes[0].legend(loc=\"best\")\n",
353 |     "\n",
354 |     "axes[1].set_xlabel(r\"$x=\\chi^2$\")\n",
355 |     "axes[1].set_ylabel(r\"$\\bar{F}\\left(x;k\\right) = p$-value\")\n",
356 |     "axes[1].set_title(r\"$\\chi^2$ CCDF ($p$-value) for various degrees of freedom\")\n",
357 |     "\n",
358 |     "axes[1].legend(loc=\"best\")\n",
359 |     "\n",
360 |     "plt.show();"
361 |    ]
362 |   },
363 |   {
364 |    "cell_type": "markdown",
365 |    "metadata": {},
366 |    "source": [
367 |     "## Binned $\\chi^2$ per Degree of Freedom"
368 |    ]
369 |   },
370 |   {
371 |    "cell_type": "markdown",
372 |    "metadata": {},
373 |    "source": [
374 |     "TODO"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "markdown",
379 |    "metadata": {},
380 |    "source": [
381 |     "## References"
382 |    ]
383 |   },
384 |   {
385 |    "cell_type": "markdown",
386 |    "metadata": {},
387 |    "source": [
388 |     "- \\[1\\] G. Cowan, _Statistical Data Analysis_, Oxford University Press, 1998\n",
389 |     "- \\[2\\] G. Cowan, \"Goodness of fit and Wilk's theorem\", Notes, 2013"
390 |    ]
391 |   }
392 |  ],
393 |  "metadata": {
394 |   "kernelspec": {
395 |    "display_name": "Python 3",
396 |    "language": "python",
397 |    "name": "python3"
398 |   },
399 |   "language_info": {
400 |    "codemirror_mode": {
401 |     "name": "ipython",
402 |     "version": 3
403 |    },
404 |    "file_extension": ".py",
405 |    "mimetype": "text/x-python",
406 |    "name": "python",
407 |    "nbconvert_exporter": "python",
408 |    "pygments_lexer": "ipython3",
409 |    "version": "3.7.5"
410 |   }
411 |  },
412 |  "nbformat": 4,
413 |  "nbformat_minor": 4
414 | }
415 | 


--------------------------------------------------------------------------------
/book/notebooks/Introductory/Error-on-means.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Unweighted and Weighted Means"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import scipy.stats as stats\n",
 18 |     "\n",
 19 |     "import matplotlib.pyplot as plt\n",
 20 |     "import matplotlib.patches as mpathces"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "## Maximum Likelihood Estimator motivated \"derivations\""
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "markdown",
 32 |    "metadata": {},
 33 |    "source": [
 34 |     "### Unweighted Means"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "If we make $n$ identical statistically independent (isi) measurements of a random variable $x$, such that the measurements collected form data $\\vec{x} = \\left\\{x_i, \\cdots, x_n\\right\\}$, from a Gaussian (Normal) distribution,"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "$$\n",
 49 |     "\\begin{equation}\n",
 50 |     "L\\left(\\vec{x}; \\vec{\\theta}\\right) = \\prod_{i=1}^{n} f(x_i; \\mu, \\sigma) = \\frac{1}{(2\\pi)^{n/2} \\sigma^{n}} \\exp\\left(-\\frac{1}{2\\sigma^2} \\sum_{i=1}^{n} \\left(x_i - \\mu\\right)^2 \\right)\n",
 51 |     "\\end{equation}\n",
 52 |     "$$"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "markdown",
 57 |    "metadata": {},
 58 |    "source": [
 59 |     "then"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "markdown",
 64 |    "metadata": {},
 65 |    "source": [
 66 |     "$$\n",
 67 |     "\\begin{equation}\n",
 68 |     "-\\ln L = \\frac{n}{2} \\ln\\left(2\\pi\\right) + n \\ln \\sigma + \\frac{1}{2\\sigma^2} \\sum_{i=1}^{n}\\left(x_i - \\mu\\right)^2\n",
 69 |     "\\end{equation}\n",
 70 |     "$$"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "markdown",
 75 |    "metadata": {},
 76 |    "source": [
 77 |     "and so $L$ is maximized with respect to a variable $\\alpha$ when $-\\ln L$ is minimized,"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "$$\n",
 85 |     "\\begin{equation*}\n",
 86 |     "\\frac{\\partial \\left(-\\ln L\\right)}{\\partial \\alpha} = 0.\n",
 87 |     "\\end{equation*}\n",
 88 |     "$$"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "markdown",
 93 |    "metadata": {},
 94 |    "source": [
 95 |     "Thus, $L$ is maximized when"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "markdown",
100 |    "metadata": {},
101 |    "source": [
102 |     "$$\n",
103 |     "\\begin{equation*}\n",
104 |     "\\frac{\\partial \\left(-\\ln L\\right)}{\\partial \\mu} = -\\frac{1}{\\sigma^2} \\sum_{i=1}^{n}\\left(x_i - \\mu\\right) = 0,\n",
105 |     "\\end{equation*}\n",
106 |     "$$"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "which occurs for"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "markdown",
118 |    "metadata": {},
119 |    "source": [
120 |     "$$\n",
121 |     "\\begin{equation*}\n",
122 |     "\\sum_{i=1}^{n} x_i = n \\mu,\n",
123 |     "\\end{equation*}\n",
124 |     "$$"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "such that the best estimate for true parameter $\\mu$ is"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "$$\n",
139 |     "\\begin{equation}\n",
140 |     "\\boxed{\\hat{\\mu} = \\frac{1}{n} \\sum_{i=1}^{n} x_i = \\bar{x}\\,}\\,,\n",
141 |     "\\end{equation}\n",
142 |     "$$"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "and $L$ is maximized when"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "$$\n",
157 |     "\\begin{equation*}\n",
158 |     "\\frac{\\partial \\left(-\\ln L\\right)}{\\partial \\sigma} = \\frac{n}{\\sigma} - \\frac{1}{\\sigma^3} \\sum_{i=1}^{n} \\left(x_i - \\mu\\right) = 0,\n",
159 |     "\\end{equation*}\n",
160 |     "$$"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "which occurs for"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "$$\n",
175 |     "\\begin{equation*}\n",
176 |     "n\\sigma^2 = \\sum_{i=1}^{n} \\left(x_i - \\mu\\right)^2,\n",
177 |     "\\end{equation*}\n",
178 |     "$$"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "markdown",
183 |    "metadata": {},
184 |    "source": [
185 |     "which is"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "$$\n",
193 |     "\\begin{equation*}\n",
194 |     "\\sigma = \\sqrt{\\frac{1}{n}\\sum_{i=1}^{n} \\left(x_i - \\mu\\right)^2}.\n",
195 |     "\\end{equation*}\n",
196 |     "$$"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "markdown",
201 |    "metadata": {},
202 |    "source": [
203 |     "However, $\\mu$ is an unknown true parameter, and the best estimate of it is $\\hat{\\mu}$, which is in no\n",
204 |     "manner required to be equal to $\\mu$. Thus, the best estimate of $\\sigma$ is"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "$$\n",
212 |     "\\begin{equation}\n",
213 |     "\\boxed{\\hat{\\sigma}_{\\hat{\\mu}} = \\sqrt{\\frac{1}{n}\\sum_{i=1}^{n} \\left(x_i - \\hat{\\mu}\\right)^2} = \\sqrt{\\frac{1}{n}\\sum_{i=1}^{n} \\left(x_i - \\bar{x}\\,\\right)^2}\\,}\\,.\n",
214 |     "\\end{equation}\n",
215 |     "$$"
216 |    ]
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "metadata": {},
221 |    "source": [
222 |     "If the separation from the mean of each observation, $\\left(x_i - \\bar{x}\\right) = \\delta x = \\text{constant}$, are the same then the uncertainty on the mean is found to be"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "markdown",
227 |    "metadata": {},
228 |    "source": [
229 |     "$$\n",
230 |     "\\begin{equation*}\n",
231 |     "\\sigma_{\\hat{\\mu}} = \\frac{\\delta x}{\\sqrt{n}},\n",
232 |     "\\end{equation*}\n",
233 |     "$$"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "markdown",
238 |    "metadata": {},
239 |    "source": [
240 |     "which is often referred to as the \"standard error\"."
241 |    ]
242 |   },
243 |   {
244 |    "cell_type": "markdown",
245 |    "metadata": {},
246 |    "source": [
247 |     "---\n",
248 |     "So, for a population of measurements sampled from a distribution, it can be said that the sample mean is"
249 |    ]
250 |   },
251 |   {
252 |    "cell_type": "markdown",
253 |    "metadata": {},
254 |    "source": [
255 |     "$$\\mu = \\frac{1}{n} \\sum_{i=1}^{n} x_i = \\bar{x},$$"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {},
261 |    "source": [
262 |     "and the standard deviation of the sample is"
263 |    ]
264 |   },
265 |   {
266 |    "cell_type": "markdown",
267 |    "metadata": {},
268 |    "source": [
269 |     "$$\n",
270 |     "\\begin{equation*}\n",
271 |     "\\sigma = \\sqrt{\\frac{1}{n}\\sum_{i=1}^{n} \\left(x_i - \\bar{x}\\,\\right)^2}.\n",
272 |     "\\end{equation*}\n",
273 |     "$$"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "markdown",
278 |    "metadata": {},
279 |    "source": [
280 |     "---"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "markdown",
285 |    "metadata": {},
286 |    "source": [
287 |     "### Weighted Means"
288 |    ]
289 |   },
290 |   {
291 |    "cell_type": "markdown",
292 |    "metadata": {},
293 |    "source": [
294 |     "Assume that $n$ individual measurements $x_i$ are spread around (unknown) true value $\\theta$ according to a Gaussian distribution, each with known width $\\sigma_i$."
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "markdown",
299 |    "metadata": {},
300 |    "source": [
301 |     "This then leads to the likelihood function"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "markdown",
306 |    "metadata": {},
307 |    "source": [
308 |     "$$\n",
309 |     "\\begin{equation*}\n",
310 |     "L(\\theta) = \\prod_{i=1}^{n} \\frac{1}{\\sqrt{2\\pi}\\sigma_i} \\exp\\left(-\\frac{\\left(x_i - \\theta\\right)^2}{2\\sigma_i^2} \\right)\n",
311 |     "\\end{equation*}\n",
312 |     "$$"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "markdown",
317 |    "metadata": {},
318 |    "source": [
319 |     "and so negative log-likelihood"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "markdown",
324 |    "metadata": {},
325 |    "source": [
326 |     "$$\n",
327 |     "\\begin{equation}\n",
328 |     "-\\ln L = \\frac{1}{2} \\ln\\left(2\\pi\\right) + \\ln \\sigma_i + \\frac{1}{2\\sigma_i^2} \\sum_{i=1}^{n}\\left(x_i - \\theta\\right)^2.\n",
329 |     "\\end{equation}\n",
330 |     "$$"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "markdown",
335 |    "metadata": {},
336 |    "source": [
337 |     "As before, $L$ is maximized with respect to a variable $\\alpha$ when $-\\ln L$ is minimized,"
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "markdown",
342 |    "metadata": {},
343 |    "source": [
344 |     "$$\n",
345 |     "\\begin{equation*}\n",
346 |     "\\frac{\\partial \\left(-\\ln L\\right)}{\\partial \\alpha} = 0,\n",
347 |     "\\end{equation*}\n",
348 |     "$$"
349 |    ]
350 |   },
351 |   {
352 |    "cell_type": "markdown",
353 |    "metadata": {},
354 |    "source": [
355 |     "and so $L$ is maximized with respect to $\\theta$ when"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "markdown",
360 |    "metadata": {},
361 |    "source": [
362 |     "$$\n",
363 |     "\\begin{equation*}\n",
364 |     "\\frac{\\partial \\left(-\\ln L\\right)}{\\partial \\theta} = -\\sum_{i=1}^{n} \\frac{x_i - \\theta}{\\sigma_i^2} = 0,\n",
365 |     "\\end{equation*}\n",
366 |     "$$"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "markdown",
371 |    "metadata": {},
372 |    "source": [
373 |     "which occurs for"
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "markdown",
378 |    "metadata": {},
379 |    "source": [
380 |     "$$\n",
381 |     "\\begin{equation*}\n",
382 |     "\\sum_{i=1}^{n} \\frac{x_i}{\\sigma_i^2} = \\theta \\sum_{i=1}^{n} \\frac{1}{\\sigma_i^2},\n",
383 |     "\\end{equation*}\n",
384 |     "$$"
385 |    ]
386 |   },
387 |   {
388 |    "cell_type": "markdown",
389 |    "metadata": {},
390 |    "source": [
391 |     "which is"
392 |    ]
393 |   },
394 |   {
395 |    "cell_type": "markdown",
396 |    "metadata": {},
397 |    "source": [
398 |     "$$\n",
399 |     "\\begin{equation}\n",
400 |     "\\hat{\\theta} = \\frac{\\displaystyle\\sum_{i=1}^{n} \\frac{x_i}{\\sigma_i^2}}{\\displaystyle\\sum_{i=1}^{n}\\frac{1}{\\sigma_i^2}}.\n",
401 |     "\\end{equation}\n",
402 |     "$$"
403 |    ]
404 |   },
405 |   {
406 |    "cell_type": "markdown",
407 |    "metadata": {},
408 |    "source": [
409 |     "Note that by defining \"weights\" to be"
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "markdown",
414 |    "metadata": {},
415 |    "source": [
416 |     "$$\n",
417 |     "\\begin{equation*}\n",
418 |     "w_i = \\frac{1}{\\sigma_1^2},\n",
419 |     "\\end{equation*}\n",
420 |     "$$"
421 |    ]
422 |   },
423 |   {
424 |    "cell_type": "markdown",
425 |    "metadata": {},
426 |    "source": [
427 |     "this can be expressed as"
428 |    ]
429 |   },
430 |   {
431 |    "cell_type": "markdown",
432 |    "metadata": {},
433 |    "source": [
434 |     "$$\n",
435 |     "\\begin{equation}\n",
436 |     "\\boxed{\\hat{\\theta} = \\frac{\\displaystyle\\sum_{i=1}^{n} w_i\\, x_i}{\\displaystyle\\sum_{i=1}^{n}w_i}}\\,,\n",
437 |     "\\end{equation}\n",
438 |     "$$"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "markdown",
443 |    "metadata": {},
444 |    "source": [
445 |     "making the term \"weighted mean\" very transparent."
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "markdown",
450 |    "metadata": {},
451 |    "source": [
452 |     "To find the standard deviation on the weighted mean, we first look to the variance, $\\sigma^2$. [4]"
453 |    ]
454 |   },
455 |   {
456 |    "cell_type": "markdown",
457 |    "metadata": {},
458 |    "source": [
459 |     "$$\n",
460 |     "\\begin{align*}\n",
461 |     "\\sigma^2 &= \\text{E}\\left[\\left(\\hat{\\theta} - \\text{E}\\left[\\hat{\\theta}\\right]\\right)^2\\right] \\\\\n",
462 |     "    &= \\text{E}\\left[\\left(\\frac{\\displaystyle\\sum_{i=1}^{n} w_i\\, x_i}{\\displaystyle\\sum_{i=1}^{n}w_i} - \\text{E}\\left[\\frac{\\displaystyle\\sum_{i=1}^{n} w_i\\, x_i}{\\displaystyle\\sum_{i=1}^{n}w_i}\\right]\\,\\right)^2\\right] \\\\\n",
463 |     "    &= \\frac{1}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} \\text{E} \\left[ \\displaystyle\\left(\\sum_{i=1}^{n} w_i\\,x_i\\right)^2 - 2 \\displaystyle\\left(\\sum_{i=1}^{n} w_i\\,x_i\\right) \\displaystyle\\left(\\sum_{i=j}^{n} w_j\\, \\text{E}\\left[x_j\\right]\\right) + \\displaystyle\\left(\\sum_{i=1}^{n} w_i\\, \\text{E}\\left[x_i\\right]\\right)^2 \\right] \\\\\n",
464 |     "    &= \\frac{1}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} \\text{E} \\left[ \\sum_{i,j}^{n} w_i\\, x_i w_j\\, x_j - 2 \\sum_{i,j}^{n} w_i\\, x_i w_j\\, \\text{E}\\left[x_j\\right] + \\sum_{i,j}^{n} w_i\\, \\text{E}\\left[x_i\\right] w_j\\, \\text{E}\\left[x_j\\right] \\right] \\\\\n",
465 |     "    &= \\frac{1}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} \\sum_{i,j}^{n} w_i w_j \\left( \\text{E}\\left[ x_i x_j \\right] - 2 \\text{E}\\left[ x_i \\right]\\text{E}\\left[ x_j \\right] + \\text{E}\\left[ x_i \\right]\\text{E}\\left[ x_j \\right] \\right) \\\\\n",
466 |     "    &= \\frac{1}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} \\sum_{i,j}^{n} w_i w_j \\left( \\text{E}\\left[ x_i x_j \\right] - \\text{E}\\left[ x_i \\right]\\text{E}\\left[ x_j \\right] \\right) \\\\\n",
467 |     "    &= \\frac{1}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} \\sum_{i,j}^{n} w_i w_j \\,\\text{Cov}\\left( x_i, x_j \\right) = \\left\\{\n",
468 |     "\\begin{array}{ll}\n",
469 |     "\\frac{\\displaystyle1}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} \\displaystyle\\sum_{i}^{n} \\left( w_i \\sigma_i \\right)^2\\,, & x_i \\text{ and } x_j \\text{ statistically independent}, \\\\\n",
470 |     "0\\,, &\\text{ otherwise},\n",
471 |     "\\end{array}\n",
472 |     "\\right. \\\\\n",
473 |     "    &= \\frac{\\displaystyle\\sum_{i}^{n} \\left( \\sigma_i^{-2} \\sigma_i \\right)^2}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} = \\frac{\\displaystyle\\sum_{i}^{n} w_i}{\\displaystyle\\left(\\sum_{i=1}^{n} w_i\\right)^2} \\\\\n",
474 |     "    &= \\frac{\\displaystyle 1}{\\displaystyle\\sum_{i=1}^{n} w_i}\n",
475 |     "\\end{align*}\n",
476 |     "$$"
477 |    ]
478 |   },
479 |   {
480 |    "cell_type": "markdown",
481 |    "metadata": {},
482 |    "source": [
483 |     "Thus, it is seen that the standard deviation on the weighted mean is"
484 |    ]
485 |   },
486 |   {
487 |    "cell_type": "markdown",
488 |    "metadata": {},
489 |    "source": [
490 |     "$$\n",
491 |     "\\begin{equation}\n",
492 |     "\\boxed{\\sigma_{\\hat{\\theta}} = \\sqrt{\\frac{\\displaystyle 1}{\\displaystyle\\sum_{i=1}^{n} w_i}} = \\left(\\displaystyle\\sum_{i=1}^{n} \\frac{1}{\\sigma_i^2}\\right)^{-1/2}}\\,.\n",
493 |     "\\end{equation}\n",
494 |     "$$"
495 |    ]
496 |   },
497 |   {
498 |    "cell_type": "markdown",
499 |    "metadata": {},
500 |    "source": [
501 |     "Notice that in the event that the uncertainties are uniform for each observation, $\\sigma_i = \\delta x$, the above yields the same result as the unweighted mean. $\\checkmark$"
502 |    ]
503 |   },
504 |   {
505 |    "cell_type": "markdown",
506 |    "metadata": {},
507 |    "source": [
508 |     "After this aside it is worth pointing out that [1] have a very elegant demonstration that"
509 |    ]
510 |   },
511 |   {
512 |    "cell_type": "markdown",
513 |    "metadata": {},
514 |    "source": [
515 |     "$$\n",
516 |     "\\begin{equation*}\n",
517 |     "\\sigma_{\\hat{\\theta}} = \\left(\\frac{\\partial^2\\left(- \\ln L\\right)}{\\partial\\, \\theta^2}\\right)^{-1/2} = \\left(\\displaystyle\\sum_{i=1}^{n} \\frac{1}{\\sigma_i^2}\\right)^{-1/2}.\n",
518 |     "\\end{equation*}\n",
519 |     "$$"
520 |    ]
521 |   },
522 |   {
523 |    "cell_type": "markdown",
524 |    "metadata": {},
525 |    "source": [
526 |     "---\n",
527 |     "So, the average of $n$ measurements of quantity $\\theta$, with individual measurements, $x_i$, Gaussianly distributed about (unknown) true value $\\theta$ with known width $\\sigma_i$, is the weighted mean"
528 |    ]
529 |   },
530 |   {
531 |    "cell_type": "markdown",
532 |    "metadata": {},
533 |    "source": [
534 |     "$$\n",
535 |     "\\begin{equation*}\n",
536 |     "\\hat{\\theta} = \\frac{\\displaystyle\\sum_{i=1}^{n} w_i\\, x_i}{\\displaystyle\\sum_{i=1}^{n}w_i},\n",
537 |     "\\end{equation*}\n",
538 |     "$$"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "markdown",
543 |    "metadata": {},
544 |    "source": [
545 |     "with weights $w_i = \\sigma_i^{-2}$, with standard deviation on the weighted mean"
546 |    ]
547 |   },
548 |   {
549 |    "cell_type": "markdown",
550 |    "metadata": {},
551 |    "source": [
552 |     "$$\n",
553 |     "\\begin{equation*}\n",
554 |     "\\sigma_{\\hat{\\theta}} = \\sqrt{\\frac{\\displaystyle 1}{\\displaystyle\\sum_{i=1}^{n} w_i}} = \\left(\\displaystyle\\sum_{i=1}^{n} \\frac{1}{\\sigma_i^2}\\right)^{-1/2}.\n",
555 |     "\\end{equation*}\n",
556 |     "$$"
557 |    ]
558 |   },
559 |   {
560 |    "cell_type": "markdown",
561 |    "metadata": {},
562 |    "source": [
563 |     "---"
564 |    ]
565 |   },
566 |   {
567 |    "cell_type": "markdown",
568 |    "metadata": {},
569 |    "source": [
570 |     "## Specific Examples"
571 |    ]
572 |   },
573 |   {
574 |    "cell_type": "markdown",
575 |    "metadata": {},
576 |    "source": [
577 |     "Given the measurements"
578 |    ]
579 |   },
580 |   {
581 |    "cell_type": "markdown",
582 |    "metadata": {},
583 |    "source": [
584 |     "$$\n",
585 |     "\\vec{x} = \\left\\{10, 9, 11\\right\\}\n",
586 |     "$$"
587 |    ]
588 |   },
589 |   {
590 |    "cell_type": "markdown",
591 |    "metadata": {},
592 |    "source": [
593 |     "with uncertanties"
594 |    ]
595 |   },
596 |   {
597 |    "cell_type": "markdown",
598 |    "metadata": {},
599 |    "source": [
600 |     "$$\\vec{\\sigma_x} = \\left\\{1, 2, 3\\right\\}$$"
601 |    ]
602 |   },
603 |   {
604 |    "cell_type": "code",
605 |    "execution_count": null,
606 |    "metadata": {},
607 |    "outputs": [],
608 |    "source": [
609 |     "x_data = [10, 9, 11]\n",
610 |     "x_uncertainty = [1, 2, 3]"
611 |    ]
612 |   },
613 |   {
614 |    "cell_type": "code",
615 |    "execution_count": null,
616 |    "metadata": {},
617 |    "outputs": [],
618 |    "source": [
619 |     "numerator = sum(x / (sigma_x ** 2) for x, sigma_x in zip(x_data, x_uncertainty))\n",
620 |     "denominator = sum(1 / (sigma_x ** 2) for sigma_x in x_uncertainty)\n",
621 |     "\n",
622 |     "print(f\"hand calculated weighted mean: {numerator / denominator}\")"
623 |    ]
624 |   },
625 |   {
626 |    "cell_type": "markdown",
627 |    "metadata": {},
628 |    "source": [
629 |     "Using [NumPy's `average` method](https://docs.scipy.org/doc/numpy/reference/generated/numpy.average.html)"
630 |    ]
631 |   },
632 |   {
633 |    "cell_type": "code",
634 |    "execution_count": null,
635 |    "metadata": {},
636 |    "outputs": [],
637 |    "source": [
638 |     "# unweighted mean\n",
639 |     "np.average(x_data)"
640 |    ]
641 |   },
642 |   {
643 |    "cell_type": "code",
644 |    "execution_count": null,
645 |    "metadata": {},
646 |    "outputs": [],
647 |    "source": [
648 |     "x_weights = [1 / (uncert ** 2) for uncert in x_uncertainty]\n",
649 |     "# weighted mean\n",
650 |     "weighted_mean = np.average(x_data, weights=x_weights)\n",
651 |     "print(weighted_mean)"
652 |    ]
653 |   },
654 |   {
655 |    "cell_type": "code",
656 |    "execution_count": null,
657 |    "metadata": {},
658 |    "outputs": [],
659 |    "source": [
660 |     "# no method to do this in NumPy!?\n",
661 |     "sigma = np.sqrt(1 / np.sum(x_weights))\n",
662 |     "print(f\"hand calculated uncertaintiy on weighted mean: {sigma}\")"
663 |    ]
664 |   },
665 |   {
666 |    "cell_type": "code",
667 |    "execution_count": null,
668 |    "metadata": {},
669 |    "outputs": [],
670 |    "source": [
671 |     "# A second way to find the uncertainty on the weighted mean\n",
672 |     "summand = sum((x * w) for x, w in zip(x_data, x_weights))\n",
673 |     "np.sqrt(np.average(x_data, weights=x_weights) / summand)"
674 |    ]
675 |   },
676 |   {
677 |    "cell_type": "markdown",
678 |    "metadata": {},
679 |    "source": [
680 |     "Let's plot the data now and take a look at the results"
681 |    ]
682 |   },
683 |   {
684 |    "cell_type": "code",
685 |    "execution_count": null,
686 |    "metadata": {},
687 |    "outputs": [],
688 |    "source": [
689 |     "def draw_weighted_mean(data, errors, w_mean, w_uncert):\n",
690 |     "    plt.figure(1)\n",
691 |     "\n",
692 |     "    # the data to be plotted\n",
693 |     "    x = [i + 1 for i in range(len(data))]\n",
694 |     "\n",
695 |     "    x_min = x[x.index(min(x))]\n",
696 |     "    x_max = x[x.index(max(x))]\n",
697 |     "\n",
698 |     "    y = data\n",
699 |     "    y_min = y[y.index(min(y))]\n",
700 |     "    y_max = y[y.index(max(y))]\n",
701 |     "\n",
702 |     "    err_max = errors[errors.index(max(errors))]\n",
703 |     "\n",
704 |     "    # plot data\n",
705 |     "    plt.errorbar(x, y, xerr=0, yerr=errors, fmt=\"o\", color=\"black\")\n",
706 |     "    # plot weighted mean\n",
707 |     "    plt.plot((x_min, x_max), (w_mean, w_mean), color=\"blue\")\n",
708 |     "    # plot uncertainty on weighted mean\n",
709 |     "    plt.plot(\n",
710 |     "        (x_min, x_max),\n",
711 |     "        (w_mean - w_uncert, w_mean - w_uncert),\n",
712 |     "        color=\"gray\",\n",
713 |     "        linestyle=\"--\",\n",
714 |     "    )\n",
715 |     "    plt.plot(\n",
716 |     "        (x_min, x_max),\n",
717 |     "        (w_mean + w_uncert, w_mean + w_uncert),\n",
718 |     "        color=\"gray\",\n",
719 |     "        linestyle=\"--\",\n",
720 |     "    )\n",
721 |     "\n",
722 |     "    # Axes\n",
723 |     "    plt.xlabel(\"Individual measurements\")\n",
724 |     "    plt.ylabel(\"Value of measruement\")\n",
725 |     "    # view range\n",
726 |     "    epsilon = 0.1\n",
727 |     "    plt.xlim(x_min - epsilon, x_max + epsilon)\n",
728 |     "    plt.ylim([y_min - err_max, 1.5 * y_max + err_max])\n",
729 |     "\n",
730 |     "    # ax = figure().gca()\n",
731 |     "    # ax.xaxis.set_major_locator(MaxNLocator(integer=True))\n",
732 |     "\n",
733 |     "    # Legends\n",
734 |     "    wmean_patch = mpathces.Patch(\n",
735 |     "        color=\"blue\", label=fr\"Weighted mean: $\\mu={w_mean:0.3f}$\"\n",
736 |     "    )\n",
737 |     "    uncert_patch = mpathces.Patch(\n",
738 |     "        color=\"gray\",\n",
739 |     "        label=fr\"Uncertainty on the weighted mean: $\\pm{w_uncert:0.3f}$\",\n",
740 |     "    )\n",
741 |     "    plt.legend(handles=[wmean_patch, uncert_patch])\n",
742 |     "\n",
743 |     "    plt.show()"
744 |    ]
745 |   },
746 |   {
747 |    "cell_type": "code",
748 |    "execution_count": null,
749 |    "metadata": {},
750 |    "outputs": [],
751 |    "source": [
752 |     "draw_weighted_mean(x_data, x_uncertainty, weighted_mean, sigma)"
753 |    ]
754 |   },
755 |   {
756 |    "cell_type": "markdown",
757 |    "metadata": {},
758 |    "source": [
759 |     "Now let's do this again but with data that are Normally distributed about a mean value"
760 |    ]
761 |   },
762 |   {
763 |    "cell_type": "code",
764 |    "execution_count": null,
765 |    "metadata": {},
766 |    "outputs": [],
767 |    "source": [
768 |     "true_mu = np.random.uniform(3, 9)\n",
769 |     "true_sigma = np.random.uniform(0.1, 2.0)\n",
770 |     "n_samples = 20\n",
771 |     "\n",
772 |     "samples = np.random.normal(true_mu, true_sigma, n_samples).tolist()\n",
773 |     "gauss_errs = np.random.normal(2, 0.4, n_samples).tolist()\n",
774 |     "\n",
775 |     "weights = [1 / (uncert ** 2) for uncert in gauss_errs]\n",
776 |     "\n",
777 |     "draw_weighted_mean(\n",
778 |     "    samples,\n",
779 |     "    gauss_errs,\n",
780 |     "    np.average(samples, weights=weights),\n",
781 |     "    np.sqrt(1 / np.sum(weights)),\n",
782 |     ")"
783 |    ]
784 |   },
785 |   {
786 |    "cell_type": "markdown",
787 |    "metadata": {},
788 |    "source": [
789 |     "## References"
790 |    ]
791 |   },
792 |   {
793 |    "cell_type": "markdown",
794 |    "metadata": {},
795 |    "source": [
796 |     "1. [_Data Analysis in High Energy Physics_](http://eu.wiley.com/WileyCDA/WileyTitle/productCd-3527410589.html), Behnke et al., 2013, $\\S$ 2.3.3.1\n",
797 |     "2. [_Statistical Data Analysis_](http://www.pp.rhul.ac.uk/~cowan/sda/), Glen Cowan, 1998\n",
798 |     "3. University of Marlyand, Physics 261, [Notes on Error Propagation](http://www.physics.umd.edu/courses/Phys261/F06/ErrorPropagation.pdf)\n",
799 |     "4. Physics Stack Exchange, [_How do you find the uncertainty of a weighted average?_](https://physics.stackexchange.com/questions/15197/how-do-you-find-the-uncertainty-of-a-weighted-average)"
800 |    ]
801 |   }
802 |  ],
803 |  "metadata": {
804 |   "kernelspec": {
805 |    "display_name": "Python 3",
806 |    "language": "python",
807 |    "name": "python3"
808 |   },
809 |   "language_info": {
810 |    "codemirror_mode": {
811 |     "name": "ipython",
812 |     "version": 3
813 |    },
814 |    "file_extension": ".py",
815 |    "mimetype": "text/x-python",
816 |    "name": "python",
817 |    "nbconvert_exporter": "python",
818 |    "pygments_lexer": "ipython3",
819 |    "version": "3.7.5"
820 |   }
821 |  },
822 |  "nbformat": 4,
823 |  "nbformat_minor": 4
824 | }
825 | 


--------------------------------------------------------------------------------
/book/notebooks/Introductory/Gaussian-Distribution.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# The Gaussian (Normal) Distribution"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "## Probability Distribution Function"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "The Normal (Gaussian) distribution probability distribution function is"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "$$\n",
 29 |     "f\\left(x; \\mu, \\sigma\\right) = \\frac{1}{\\sqrt{2\\pi}\\sigma}\\,e^{-\\left(x-\\mu\\right)^2/2\\sigma^2},\n",
 30 |     "$$"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "markdown",
 35 |    "metadata": {},
 36 |    "source": [
 37 |     "normalized to unity,"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "markdown",
 42 |    "metadata": {},
 43 |    "source": [
 44 |     "$$\n",
 45 |     "\\int\\limits_{-\\infty}^{\\infty} f\\left(x; \\mu,\\sigma\\right)\\,dx = 1,\n",
 46 |     "$$"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "is symmetrically distributed about its mean, $\\mu$, with width $\\sigma$."
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "markdown",
 58 |    "metadata": {},
 59 |    "source": [
 60 |     "## Full Width at Half Maximum (FWHM)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "The full width at half maximum (FWHM) is the distance between points on a curve at which the function reaches half its maximum value. The FWHM is often used to describe the \"width\" of a distribution."
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "markdown",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "For a 1-dimensional Gaussian, it is seen that as the maximum value occurs as $x = \\mu$ (by definition), half of the maximum value is"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "$$\n",
 82 |     "\\begin{align*}\n",
 83 |     "\\left.\\frac{1}{\\sqrt{2\\pi}\\sigma}\\,e^{-\\left(x-\\mu\\right)^2/2\\sigma^2}\\right|_{x = x \\text{ of (max/2)}} &= \\frac{1}{2} f\\left(x_{\\text{max}}\\right)\\\\\n",
 84 |     "    &= \\frac{1}{2} f\\left(\\mu\\right) = \\frac{1}{2} \\frac{1}{\\sqrt{2\\pi}\\sigma},\n",
 85 |     "\\end{align*}\n",
 86 |     "$$"
 87 |    ]
 88 |   },
 89 |   {
 90 |    "cell_type": "markdown",
 91 |    "metadata": {},
 92 |    "source": [
 93 |     "resulting in the equality"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "markdown",
 98 |    "metadata": {},
 99 |    "source": [
100 |     "$$\n",
101 |     "e^{-\\left(x-\\mu\\right)^2/2\\sigma^2} = \\frac{1}{2},\n",
102 |     "$$"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "markdown",
107 |    "metadata": {},
108 |    "source": [
109 |     "which is (taking the log)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "markdown",
114 |    "metadata": {},
115 |    "source": [
116 |     "$$\n",
117 |     "-\\frac{\\left(x-\\mu\\right)^2}{2\\sigma^2} = -\\ln2.\n",
118 |     "$$"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "metadata": {},
124 |    "source": [
125 |     "Thus, solving the equality,"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "markdown",
130 |    "metadata": {},
131 |    "source": [
132 |     "$$\n",
133 |     "\\left(x-\\mu\\right)^2 = 2\\sigma^2 \\ln2,\n",
134 |     "$$"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "markdown",
139 |    "metadata": {},
140 |    "source": [
141 |     "yields"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "markdown",
146 |    "metadata": {},
147 |    "source": [
148 |     "$$\n",
149 |     "x_{\\pm} = \\pm \\sigma \\sqrt{2 \\ln 2} + \\mu.\n",
150 |     "$$"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "metadata": {},
156 |    "source": [
157 |     "Thus, the FWHM is"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "markdown",
162 |    "metadata": {},
163 |    "source": [
164 |     "$$\n",
165 |     "\\begin{align*}\n",
166 |     "\\text{FWHM} &= x_{+} - x_{-}\\\\\n",
167 |     "    &= \\left(\\sigma \\sqrt{2 \\ln 2} + \\mu\\right) - \\left(-\\sigma \\sqrt{2 \\ln 2} + \\mu\\right)\\\\\n",
168 |     "    &= \\boxed{2\\sqrt{2\\ln2}\\sigma}\\,.\n",
169 |     "\\end{align*}\n",
170 |     "$$"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "**N.B.:** It is seen that the FWHM for a Gaussian is _independent_ of both the normalization constant and the mean, and is only dependent on the standard devaiation of the Gaussian."
178 |    ]
179 |   },
180 |   {
181 |    "cell_type": "markdown",
182 |    "metadata": {},
183 |    "source": [
184 |     "## Probability and the Error Function"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "markdown",
189 |    "metadata": {},
190 |    "source": [
191 |     "The probability that a Normally distributed random variable will lie in a range of values symmetrically integrated over is given by"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "markdown",
196 |    "metadata": {},
197 |    "source": [
198 |     "$$\n",
199 |     "\\begin{align*}\n",
200 |     "\\text{Pr}\\left(\\mu - y \\leq x \\leq \\mu + y\\right) &= \\int\\limits_{\\mu - y}^{\\mu + y}\\frac{1}{\\sqrt{2\\pi}\\sigma}\\,e^{-\\left(x-\\mu\\right)^2/2\\sigma^2}\\,dx\\\\\n",
201 |     "    &= \\int\\limits_{\\mu}^{\\mu + y}\\frac{2}{\\sqrt{2\\pi}\\sigma}\\,e^{-\\left(x-\\mu\\right)^2/2\\sigma^2}\\,dx.\n",
202 |     "\\end{align*}\n",
203 |     "$$"
204 |    ]
205 |   },
206 |   {
207 |    "cell_type": "markdown",
208 |    "metadata": {},
209 |    "source": [
210 |     "Making the substitution"
211 |    ]
212 |   },
213 |   {
214 |    "cell_type": "markdown",
215 |    "metadata": {},
216 |    "source": [
217 |     "$$\n",
218 |     "t = \\frac{\\left(x-\\mu\\right)}{\\sqrt{2}\\sigma},\n",
219 |     "$$"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "markdown",
224 |    "metadata": {},
225 |    "source": [
226 |     "then"
227 |    ]
228 |   },
229 |   {
230 |    "cell_type": "markdown",
231 |    "metadata": {},
232 |    "source": [
233 |     "$$\n",
234 |     "\\begin{align*}\n",
235 |     "\\text{Pr}\\left(\\mu - y \\leq x \\leq \\mu + y\\right) &= \\int\\limits_{\\mu}^{\\mu + y}\\frac{2}{\\sqrt{2\\pi}\\sigma}\\,e^{-\\left(x-\\mu\\right)^2/2\\sigma^2}\\,dx\\\\\n",
236 |     "    &= \\boxed{\\frac{2}{\\sqrt{\\pi}} \\int\\limits_{0}^{y/\\sqrt{2}\\sigma}e^{-t^2}\\,dt \\equiv \\text{erf}\\left(\\frac{y}{\\sqrt{2}\\sigma}\\right)}\\,.\n",
237 |     "\\end{align*}\n",
238 |     "$$"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "## Cumulative Distribution Function (cdf)"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "metadata": {},
251 |    "source": [
252 |     "For the cumulative distribution function (cdf),"
253 |    ]
254 |   },
255 |   {
256 |    "cell_type": "markdown",
257 |    "metadata": {},
258 |    "source": [
259 |     "$$\n",
260 |     "\\Phi\\left(x\\right) = \\int\\limits_{-\\infty}^{x}f\\left(t;\\mu,\\sigma\\right)\\,dt,\n",
261 |     "$$"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "markdown",
266 |    "metadata": {},
267 |    "source": [
268 |     "it is seen, noting from the form of the error function, that"
269 |    ]
270 |   },
271 |   {
272 |    "cell_type": "markdown",
273 |    "metadata": {},
274 |    "source": [
275 |     "$$\n",
276 |     "\\begin{equation*}\n",
277 |     "\\frac{2}{\\sqrt{\\pi}} \\int\\limits_{0}^{y/\\sqrt{2}\\sigma}e^{-t^2}\\,dt = \\frac{2}{\\sqrt{2 \\pi}} \\int\\limits_{0}^{y/\\sigma}e^{-t^2/2}\\,dt = \\text{erf}\\left(\\frac{y}{\\sqrt{2}\\sigma}\\right),\n",
278 |     "\\end{equation*}\n",
279 |     "$$"
280 |    ]
281 |   },
282 |   {
283 |    "cell_type": "code",
284 |    "execution_count": null,
285 |    "metadata": {},
286 |    "outputs": [],
287 |    "source": [
288 |     "import sympy as sym\n",
289 |     "\n",
290 |     "sym.init_printing(use_unicode=True, wrap_line=False, no_global=True)\n",
291 |     "from sympy.abc import sigma"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "metadata": {},
298 |    "outputs": [],
299 |    "source": [
300 |     "(\n",
301 |     "    t,\n",
302 |     "    y,\n",
303 |     ") = sym.symbols(\"t y\")\n",
304 |     "sym.integrate(\n",
305 |     "    (2 / sym.sqrt(sym.pi)) * sym.exp(-(t ** 2)), (t, 0, y / (sym.sqrt(2) * sigma))\n",
306 |     ")"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "metadata": {},
313 |    "outputs": [],
314 |    "source": [
315 |     "sym.integrate((2 / sym.sqrt(2 * sym.pi)) * sym.exp(-(t ** 2) / 2), (t, 0, y / sigma))"
316 |    ]
317 |   },
318 |   {
319 |    "cell_type": "markdown",
320 |    "metadata": {},
321 |    "source": [
322 |     "then for the standard Gaussian ($\\mu = 0$, $\\sigma=1$)"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "markdown",
327 |    "metadata": {},
328 |    "source": [
329 |     "$$\n",
330 |     "\\begin{align*}\n",
331 |     "\\Phi\\left(x\\right) &= \\frac{1}{\\sqrt{2\\pi}}\\int\\limits_{-\\infty}^{x} e^{-t^2/2}\\,dt\\\\\n",
332 |     "    &= \\frac{1}{\\sqrt{2\\pi}}\\int\\limits_{-\\infty}^{0} e^{-t^2/2}\\,dt + \\frac{1}{\\sqrt{2\\pi}}\\int\\limits_{0}^{x} e^{-t^2/2}\\,dt\\\\\n",
333 |     "    &= \\frac{1}{2} + \\frac{1}{2} \\text{erf}\\left(\\frac{x}{\\sqrt{2}}\\right)\\\\\n",
334 |     "    &= \\frac{1}{2} \\left(1 + \\text{erf}\\left(\\frac{x}{\\sqrt{2}}\\right)\\right)\n",
335 |     "\\end{align*}\n",
336 |     "$$"
337 |    ]
338 |   },
339 |   {
340 |    "cell_type": "markdown",
341 |    "metadata": {},
342 |    "source": [
343 |     "so it is likewise seen that"
344 |    ]
345 |   },
346 |   {
347 |    "cell_type": "markdown",
348 |    "metadata": {},
349 |    "source": [
350 |     "$$\n",
351 |     "\\begin{align*}\n",
352 |     "\\text{Pr}\\left(\\mu - n \\sigma \\leq x \\leq \\mu + n\\sigma\\right) &= \\Phi(n) - \\Phi(-n)\\\\\n",
353 |     "    &= \\Phi(n) - \\left(1-\\Phi(n)\\right)\\\\\n",
354 |     "    &= \\frac{1}{2} \\left(1 + \\text{erf}\\left(\\frac{n}{\\sqrt{2}}\\right)\\right) - \\left[1-\\frac{1}{2} \\left(1 + \\text{erf}\\left(\\frac{n}{\\sqrt{2}}\\right)\\right)\\right]\\\\\n",
355 |     "    &= \\text{erf}\\left(\\frac{n}{\\sqrt{2}}\\right).\n",
356 |     "\\end{align*}\n",
357 |     "$$"
358 |    ]
359 |   },
360 |   {
361 |    "cell_type": "markdown",
362 |    "metadata": {},
363 |    "source": [
364 |     "**Show this for:** generic normal distribution $f$ with mean $\\mu$ and std $\\sigma$"
365 |    ]
366 |   },
367 |   {
368 |    "cell_type": "markdown",
369 |    "metadata": {},
370 |    "source": [
371 |     "It is noted that in the case that $\\left|y\\right|=n\\sigma$,"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "markdown",
376 |    "metadata": {},
377 |    "source": [
378 |     "$$\n",
379 |     "\\text{Pr}\\left(\\mu - y \\leq x \\leq \\mu + y\\right) = \\text{Pr}\\left(\\mu - n\\sigma \\leq x \\leq \\mu + n\\sigma\\right) = \\text{erf}\\left(\\frac{n}{\\sqrt{2}}\\right).\n",
380 |     "$$"
381 |    ]
382 |   },
383 |   {
384 |    "cell_type": "markdown",
385 |    "metadata": {},
386 |    "source": [
387 |     "So, for $n=1$,"
388 |    ]
389 |   },
390 |   {
391 |    "cell_type": "markdown",
392 |    "metadata": {},
393 |    "source": [
394 |     "$$\n",
395 |     "\\text{Pr}\\left(\\mu - \\sigma \\leq x \\leq \\mu + \\sigma\\right) = \\text{erf}\\left(\\frac{1}{\\sqrt{2}}\\right)\n",
396 |     "$$"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "However, at this point we are at an impass analytically, as the integral of a Gaussian function over a finite range has no analytical solution, and must be evaluated numerically."
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "code",
408 |    "execution_count": null,
409 |    "metadata": {},
410 |    "outputs": [],
411 |    "source": [
412 |     "import math\n",
413 |     "from scipy import special as special"
414 |    ]
415 |   },
416 |   {
417 |    "cell_type": "code",
418 |    "execution_count": null,
419 |    "metadata": {},
420 |    "outputs": [],
421 |    "source": [
422 |     "def prob_n_sigma(n):\n",
423 |     "    return special.erf(n / math.sqrt(2.0))"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "metadata": {},
430 |    "outputs": [],
431 |    "source": [
432 |     "prob_n_sigma(1)"
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "markdown",
437 |    "metadata": {},
438 |    "source": [
439 |     "## $p$-values"
440 |    ]
441 |   },
442 |   {
443 |    "cell_type": "markdown",
444 |    "metadata": {},
445 |    "source": [
446 |     "### Two-Tailed  $p$-value"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "markdown",
451 |    "metadata": {},
452 |    "source": [
453 |     "### One-Tailed  $p$-value"
454 |    ]
455 |   }
456 |  ],
457 |  "metadata": {
458 |   "kernelspec": {
459 |    "display_name": "Python 3",
460 |    "language": "python",
461 |    "name": "python3"
462 |   },
463 |   "language_info": {
464 |    "codemirror_mode": {
465 |     "name": "ipython",
466 |     "version": 3
467 |    },
468 |    "file_extension": ".py",
469 |    "mimetype": "text/x-python",
470 |    "name": "python",
471 |    "nbconvert_exporter": "python",
472 |    "pygments_lexer": "ipython3",
473 |    "version": "3.7.5"
474 |   }
475 |  },
476 |  "nbformat": 4,
477 |  "nbformat_minor": 4
478 | }
479 | 


--------------------------------------------------------------------------------
/book/notebooks/Introductory/probability-integral-transform.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "[Matthew Feickert](http://www.matthewfeickert.com/), October 2016"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import numpy as np\n",
 17 |     "import scipy.stats as stats\n",
 18 |     "import scipy.optimize as optimize\n",
 19 |     "\n",
 20 |     "import matplotlib.patches as mpatches\n",
 21 |     "import matplotlib.pyplot as plt"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "# Probability integral transform"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "The probability integral transform states that if $X$ is a continuous random variable with cumulative distribution function $F_{X}$, then the random variable $\\displaystyle Y=F_{X}(X)$ has a uniform distribution on $[0, 1]$. The inverse of this is the \"[inverse probability integral transform](https://en.wikipedia.org/wiki/Inverse_transform_sampling).'' [[1]](#Ref:Wikipedia_Probability-integral-transform)"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "## \"Proof\""
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "markdown",
 47 |    "metadata": {},
 48 |    "source": [
 49 |     "Let a random variable, $Y$, be defined by $Y = \\displaystyle F_{X}(X)$ where $X$ is another random variable. Then,"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "markdown",
 54 |    "metadata": {},
 55 |    "source": [
 56 |     "$$\n",
 57 |     "\\begin{split}\n",
 58 |     "F_{Y}(y) &= \\text{P}(Y \\leq y)\\\\\n",
 59 |     " &= \\text{P}(F_{X}(X) \\leq y) \\\\\n",
 60 |     " &= \\text{P}(F_{X}(X) \\leq F_{X}^{-1}(y)) \\\\\n",
 61 |     " &= F_{X}(F_{X}^{-1}(y)) \\\\\n",
 62 |     " &= y.\n",
 63 |     "\\end{split}\n",
 64 |     "$$"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "markdown",
 69 |    "metadata": {},
 70 |    "source": [
 71 |     "$\\displaystyle F_{Y}(y) = y$ describes the cumulative distribution function of a random variable, i.e., $Y = \\displaystyle F_{X}(X)$, uniformly distributed on $[0,1]$. [[1]](#Ref:Wikipedia_Probability-integral-transform)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "markdown",
 76 |    "metadata": {},
 77 |    "source": [
 78 |     "### Alternative View: A Change of variables"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "markdown",
 83 |    "metadata": {},
 84 |    "source": [
 85 |     "Consider the variable transformation"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "markdown",
 90 |    "metadata": {},
 91 |    "source": [
 92 |     "$$\n",
 93 |     "\\begin{equation*}\n",
 94 |     "y(x) = F(x) = \\int\\limits_{-\\infty}^{x} f(x')\\,dx'.\n",
 95 |     "\\end{equation*}\n",
 96 |     "$$"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "So the transofrmation from the distribution $f(x)$ to the distribution $f(y)$ required a Jacobian (determinant),"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "markdown",
108 |    "metadata": {},
109 |    "source": [
110 |     "$$\n",
111 |     "\\begin{split}\n",
112 |     "\\frac{dy}{dx} &= \\frac{d}{dx} \\int\\limits_{-\\infty}^{x} f(x')\\,dx' \\\\\n",
113 |     " &= f(x),\n",
114 |     "\\end{split}\n",
115 |     "$$"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "markdown",
120 |    "metadata": {},
121 |    "source": [
122 |     "such that the distribution of $y$ is"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "$$\n",
130 |     "\\begin{split}\n",
131 |     "f(y) &= \\left(\\frac{1}{\\left|\\frac{dy}{dx}\\right|}\\right) \\,f(x) \\\\\n",
132 |     " &= \\frac{f(x)}{f(x)}\\\\\n",
133 |     " &= 1,\n",
134 |     "\\end{split}\n",
135 |     "$$"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "markdown",
140 |    "metadata": {},
141 |    "source": [
142 |     "which is the probability density function for the Uniform distribution from $[0,1]$. [[2]](#Ref:Cranmer)"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "## Example:"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "markdown",
154 |    "metadata": {},
155 |    "source": [
156 |     "Let's *uniformly* sample $n$ measurements from a Gaussian distribution with true mean $\\mu$ and true standard deviation $\\sigma$."
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "x = np.linspace(-5.0, 5.0, num=10000)\n",
166 |     "# mean and standard deviation\n",
167 |     "mu = 0\n",
168 |     "sigma = 1\n",
169 |     "\n",
170 |     "# sample the distribution\n",
171 |     "number_of_samples = 5000\n",
172 |     "samples = np.random.normal(mu, sigma, number_of_samples)\n",
173 |     "samples.sort()"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {},
180 |    "outputs": [],
181 |    "source": [
182 |     "# get sample parameters\n",
183 |     "sample_mean = np.mean(samples)\n",
184 |     "sample_std = np.std(samples)"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": [
193 |     "true_distribution = stats.norm.pdf(x, mu, sigma)"
194 |    ]
195 |   },
196 |   {
197 |    "cell_type": "markdown",
198 |    "metadata": {},
199 |    "source": [
200 |     "Histogram the distribution"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {},
207 |    "outputs": [],
208 |    "source": [
209 |     "n_bins = 1\n",
210 |     "if number_of_samples < 50:\n",
211 |     "    n_bins = number_of_samples\n",
212 |     "else:\n",
213 |     "    n_bins = 50\n",
214 |     "\n",
215 |     "# Plots\n",
216 |     "plt.figure(1)\n",
217 |     "\n",
218 |     "# Plot histogram of samples\n",
219 |     "hist_count, bins, _ = plt.hist(\n",
220 |     "    samples, n_bins, density=True\n",
221 |     ")  # Norm to keep distribution in view\n",
222 |     "# Plot distribution using sample parameters\n",
223 |     "plt.plot(x, true_distribution, linewidth=2, color=\"black\")\n",
224 |     "\n",
225 |     "# Axes\n",
226 |     "plt.title(\"Plot of distribution from samples\")\n",
227 |     "plt.xlabel(\"$x$\")\n",
228 |     "plt.ylabel(\"count (normalized to unit area)\")\n",
229 |     "sample_window_w = sample_std * 1.5\n",
230 |     "# plt.xlim([sample_mean - sample_window_w, sample_mean + sample_window_w])\n",
231 |     "plt.xlim([-4, 4])\n",
232 |     "plt.ylim([0, hist_count.max() * 1.6])\n",
233 |     "\n",
234 |     "# Legends\n",
235 |     "sample_patch = mpatches.Patch(\n",
236 |     "    color=\"black\", label=fr\"distribution: $f(x;\\mu={mu},\\sigma={sigma})$\"\n",
237 |     ")\n",
238 |     "data_patch = mpatches.Patch(\n",
239 |     "    color=\"blue\",\n",
240 |     "    label=f\"Histogram of {number_of_samples} samples of the distribution\",\n",
241 |     ")\n",
242 |     "\n",
243 |     "plt.legend(handles=[data_patch, sample_patch])\n",
244 |     "\n",
245 |     "plt.show()\n",
246 |     "# print(samples)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "markdown",
251 |    "metadata": {},
252 |    "source": [
253 |     "Now let's feed our samples through the cumulative distribution function"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {},
260 |    "outputs": [],
261 |    "source": [
262 |     "# Plots\n",
263 |     "plt.figure(1)\n",
264 |     "\n",
265 |     "# Plot distribution using sample parameters\n",
266 |     "plt.plot(x, stats.norm.cdf(x), linewidth=2, color=\"black\")\n",
267 |     "\n",
268 |     "# Axes\n",
269 |     "plt.title(\"cumulative distribution function for the Gaussian\")\n",
270 |     "plt.xlabel(\"$x$\")\n",
271 |     "plt.ylabel(\"$F(x)$\")\n",
272 |     "\n",
273 |     "plt.xlim([-5, 5])\n",
274 |     "plt.ylim([0, 1.1])\n",
275 |     "\n",
276 |     "plt.show()"
277 |    ]
278 |   },
279 |   {
280 |    "cell_type": "code",
281 |    "execution_count": null,
282 |    "metadata": {},
283 |    "outputs": [],
284 |    "source": [
285 |     "output = stats.norm.cdf(samples)\n",
286 |     "# print(output)"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "metadata": {},
292 |    "source": [
293 |     "Now let's plot the output and compare it to the uniform distribution."
294 |    ]
295 |   },
296 |   {
297 |    "cell_type": "code",
298 |    "execution_count": null,
299 |    "metadata": {},
300 |    "outputs": [],
301 |    "source": [
302 |     "# uniform distribution\n",
303 |     "uniform_distribution = stats.uniform.pdf(x)"
304 |    ]
305 |   },
306 |   {
307 |    "cell_type": "code",
308 |    "execution_count": null,
309 |    "metadata": {
310 |     "scrolled": true
311 |    },
312 |    "outputs": [],
313 |    "source": [
314 |     "# Plots\n",
315 |     "plt.figure(1)\n",
316 |     "\n",
317 |     "# Plot histogram of samples\n",
318 |     "hist_count, bins, _ = plt.hist(\n",
319 |     "    output, n_bins, density=True\n",
320 |     ")  # Norm to keep distribution in view\n",
321 |     "# Plot distribution using sample parameters\n",
322 |     "plt.plot(x, uniform_distribution, linewidth=2, color=\"black\")\n",
323 |     "# Axes\n",
324 |     "plt.title(\"Samples from the Gaussian transformed\")\n",
325 |     "plt.xlabel(\"$y$\")\n",
326 |     "plt.ylabel(\"count (normalized to unit area)\")\n",
327 |     "\n",
328 |     "plt.xlim([-0.25, 1.25])\n",
329 |     "plt.ylim([0, hist_count.max() * 1.4])\n",
330 |     "\n",
331 |     "# Legends\n",
332 |     "sample_patch = mpatches.Patch(\n",
333 |     "    color=\"black\", label=f\"Uniform distribution on $[{0},{1}]$\"\n",
334 |     ")\n",
335 |     "data_patch = mpatches.Patch(color=\"blue\", label=\"Histogram of transform of the samples\")\n",
336 |     "\n",
337 |     "plt.legend(handles=[data_patch, sample_patch])\n",
338 |     "\n",
339 |     "plt.show()"
340 |    ]
341 |   },
342 |   {
343 |    "cell_type": "markdown",
344 |    "metadata": {},
345 |    "source": [
346 |     "Now let's use the quantile function to recover the original Gaussian distribution from the Uniform distribution."
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": null,
352 |    "metadata": {},
353 |    "outputs": [],
354 |    "source": [
355 |     "recovered = stats.norm.ppf(output)"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": null,
361 |    "metadata": {},
362 |    "outputs": [],
363 |    "source": [
364 |     "# Plots\n",
365 |     "plt.figure(1)\n",
366 |     "\n",
367 |     "# Plot histogram of samples\n",
368 |     "hist_count, bins, _ = plt.hist(\n",
369 |     "    recovered, n_bins, density=True\n",
370 |     ")  # Norm to keep distribution in view\n",
371 |     "# Plot distribution using sample parameters\n",
372 |     "plt.plot(x, true_distribution, linewidth=2, color=\"black\")\n",
373 |     "# Axes\n",
374 |     "plt.title(\"Samples transformed from the Uniform\")\n",
375 |     "plt.xlabel(\"$x$\")\n",
376 |     "plt.ylabel(\"count (normalized to unit area)\")\n",
377 |     "\n",
378 |     "plt.xlim([-4, 4])\n",
379 |     "plt.ylim([0, hist_count.max() * 1.6])\n",
380 |     "\n",
381 |     "# Legends\n",
382 |     "sample_patch = mpatches.Patch(\n",
383 |     "    color=\"black\", label=fr\"distribution: $f(x;\\mu={mu},\\sigma={sigma})$\"\n",
384 |     ")\n",
385 |     "data_patch = mpatches.Patch(color=\"blue\", label=\"Histogram of transform of the samples\")\n",
386 |     "\n",
387 |     "plt.legend(handles=[data_patch, sample_patch])\n",
388 |     "\n",
389 |     "plt.show()"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "## Summary"
397 |    ]
398 |   },
399 |   {
400 |    "cell_type": "markdown",
401 |    "metadata": {},
402 |    "source": [
403 |     "Thus, we have seen that taking a distribution (pdf) $X$, and transforming it through the distribution's cdf will result in an uniform distribution, $U$, and that transforming $U$ through the quantile function of $X$ will result in the pdf of $X$. Transformations back and forth between $X$ and $U$ by careful use of the properties of the cdf and quantile functions is a useful thing to think about. [[3]](#Ref:Hetherly)"
404 |    ]
405 |   },
406 |   {
407 |    "cell_type": "markdown",
408 |    "metadata": {},
409 |    "source": [
410 |     "# References\n",
411 |     " 1. <a id='Ref:Wikipedia_Probability-integral-transform'></a> Wikipedia, [Probability integral transform](https://en.wikipedia.org/wiki/Probability_integral_transform)\n",
412 |     " 2. <a id='Ref:Cranmer'></a> K. Cranmer, [\"How do distributions transform under a change of variables?\"](http://nbviewer.jupyter.org/github/cranmer/intro-exp-phys-II/blob/master/change-of-variables.ipynb)\n",
413 |     " 3. <a id='Ref:Hetherly'></a> J. Hetherly, Discussions with the author while at CERN, October 2016"
414 |    ]
415 |   }
416 |  ],
417 |  "metadata": {
418 |   "anaconda-cloud": {},
419 |   "kernelspec": {
420 |    "display_name": "Python 3",
421 |    "language": "python",
422 |    "name": "python3"
423 |   },
424 |   "language_info": {
425 |    "codemirror_mode": {
426 |     "name": "ipython",
427 |     "version": 3
428 |    },
429 |    "file_extension": ".py",
430 |    "mimetype": "text/x-python",
431 |    "name": "python",
432 |    "nbconvert_exporter": "python",
433 |    "pygments_lexer": "ipython3",
434 |    "version": "3.7.5"
435 |   }
436 |  },
437 |  "nbformat": 4,
438 |  "nbformat_minor": 4
439 | }
440 | 


--------------------------------------------------------------------------------
/book/notebooks/simulation/Rejection-Sampling-MC.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Rejection Sampling"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Rejection sampling, or \"accept-reject Monte Carlo\" is a Monte Carlo method used to generate obsrvations from distributions. As it is a Monte Carlo it can also be used for numerical integration."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "markdown",
 19 |    "metadata": {},
 20 |    "source": [
 21 |     "## Monte Carlo Integration"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "markdown",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "### Example: Approximation of $\\pi$"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "markdown",
 33 |    "metadata": {},
 34 |    "source": [
 35 |     "Enclose a quadrant of a circle of radius $1$ in a square of side length $1$. Then uniformly sample points inside the bounds of the square in Cartesian coordinates. If the point lies inside the circle quadrant record this information. At the ends of many throws the ratio of points inside the circle to all points thrown will approximate the ratio of the area of the cricle quadrant to the area of the square"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "$$\n",
 43 |     "\\frac{\\text{points inside circle}}{\\text{all points thrown}} \\approx \\frac{\\text{area of circle quadrant}}{\\text{area of square}} = \\frac{\\pi r^2}{4\\, l^2} = \\frac{\\pi}{4},\n",
 44 |     "$$"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "markdown",
 49 |    "metadata": {},
 50 |    "source": [
 51 |     "thus, an approximation of $\\pi$ can be found to be"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "markdown",
 56 |    "metadata": {},
 57 |    "source": [
 58 |     "$$\n",
 59 |     "\\pi \\approx 4 \\cdot \\frac{\\text{points inside circle}}{\\text{all points thrown}}.\n",
 60 |     "$$"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "import numpy as np\n",
 70 |     "import matplotlib.pyplot as plt"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "def approximate_pi(n_throws=10000, draw=True):\n",
 80 |     "    n_circle_points = 0\n",
 81 |     "\n",
 82 |     "    x_coord = np.random.uniform(0, 1, n_throws)\n",
 83 |     "    y_coord = np.random.uniform(0, 1, n_throws)\n",
 84 |     "\n",
 85 |     "    circle_x = []\n",
 86 |     "    circle_y = []\n",
 87 |     "    outside_x = []\n",
 88 |     "    outside_y = []\n",
 89 |     "\n",
 90 |     "    for x, y in zip(x_coord, y_coord):\n",
 91 |     "        radius = np.sqrt(x ** 2 + y ** 2)\n",
 92 |     "        if 1 > radius:\n",
 93 |     "            n_circle_points += 1\n",
 94 |     "            circle_x.append(x)\n",
 95 |     "            circle_y.append(y)\n",
 96 |     "        else:\n",
 97 |     "            outside_x.append(x)\n",
 98 |     "            outside_y.append(y)\n",
 99 |     "\n",
100 |     "    approx_pi = 4 * (n_circle_points / n_throws)\n",
101 |     "    print(f\"The approximation of pi after {n_throws} throws is: {approx_pi}\")\n",
102 |     "\n",
103 |     "    if draw:\n",
104 |     "        plt.plot(circle_x, circle_y, \"ro\")\n",
105 |     "        plt.plot(outside_x, outside_y, \"bo\")\n",
106 |     "        plt.xlabel(r\"$x$\")\n",
107 |     "        plt.ylabel(r\"$y$\")\n",
108 |     "        plt.show()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "approximate_pi()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "markdown",
122 |    "metadata": {},
123 |    "source": [
124 |     "## Sampling Distributions"
125 |    ]
126 |   },
127 |   {
128 |    "cell_type": "markdown",
129 |    "metadata": {},
130 |    "source": [
131 |     "To approximate a statistical distribution one can also use accept-reject Monte Carlo to approximate the distribution."
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "### Example: Approximation of Gaussian Distribution"
139 |    ]
140 |   },
141 |   {
142 |    "cell_type": "code",
143 |    "execution_count": null,
144 |    "metadata": {},
145 |    "outputs": [],
146 |    "source": [
147 |     "import scipy.stats as stats"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "markdown",
152 |    "metadata": {},
153 |    "source": [
154 |     "The Gaussian has a known analytic form"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "markdown",
159 |    "metadata": {},
160 |    "source": [
161 |     "$$\n",
162 |     "f\\left(\\vec{x}\\,\\middle|\\,\\mu, \\sigma\\right) = \\frac{1}{\\sqrt{2\\pi}\\, \\sigma} e^{-\\left(x-\\mu\\right)^2/2\\sigma^2}\n",
163 |     "$$"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {},
170 |    "outputs": [],
171 |    "source": [
172 |     "x = np.linspace(-5.0, 5.0, num=10000)\n",
173 |     "plt.plot(x, stats.norm.pdf(x, 0, 1), linewidth=2, color=\"black\")\n",
174 |     "\n",
175 |     "# Axes\n",
176 |     "# plt.title('Plot of  $f(x;\\mu,\\sigma)$')\n",
177 |     "plt.xlabel(r\"$x$\")\n",
178 |     "plt.ylabel(r\"$f(\\vec{x}|\\mu,\\sigma)$\")\n",
179 |     "# dist_window_w = sigma * 2\n",
180 |     "plt.xlim([-5, 5])\n",
181 |     "plt.show()"
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "markdown",
186 |    "metadata": {},
187 |    "source": [
188 |     "Given this it is seen that the Gaussian's maximum is at its mean. For the standard Gaussian this is at $\\mu = 0$, and so it has a maximum at $1/\\sqrt{2\\pi}\\,\\sigma \\approx 0.39$. Thus, this can be the maximum height of a rectangle that we need to throw our points in."
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "def approximate_Guassian(n_throws=10000, x_range=[-5, 5], draw=True):\n",
198 |     "    n_accept = 0\n",
199 |     "\n",
200 |     "    x_coord = np.random.uniform(x_range[0], x_range[1], n_throws)\n",
201 |     "    y_coord = np.random.uniform(0, stats.norm.pdf(0, 0, 1), n_throws)\n",
202 |     "    # Use Freedman–Diaconis rule\n",
203 |     "    # https://en.wikipedia.org/wiki/Freedman%E2%80%93Diaconis_rule\n",
204 |     "    h = 2 * stats.iqr(x_coord) / np.cbrt([n_throws])\n",
205 |     "    n_bins = int((x_range[1] - x_range[0]) / h)\n",
206 |     "\n",
207 |     "    accept_x = []\n",
208 |     "    accept_y = []\n",
209 |     "    reject_x = []\n",
210 |     "    reject_y = []\n",
211 |     "\n",
212 |     "    for x, y in zip(x_coord, y_coord):\n",
213 |     "        if stats.norm.pdf(x, 0, 1) > y:\n",
214 |     "            n_accept += 1\n",
215 |     "            accept_x.append(x)\n",
216 |     "            accept_y.append(y)\n",
217 |     "        else:\n",
218 |     "            reject_x.append(x)\n",
219 |     "            reject_y.append(y)\n",
220 |     "\n",
221 |     "    if draw:\n",
222 |     "        fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(1.2 * 14, 1.2 * 4.5))\n",
223 |     "\n",
224 |     "        x_space = np.linspace(x_range[0], x_range[1], num=10000)\n",
225 |     "        axes[0].plot(accept_x, accept_y, \"ro\")\n",
226 |     "        axes[0].plot(reject_x, reject_y, \"bo\")\n",
227 |     "        axes[0].plot(x_space, stats.norm.pdf(x_space, 0, 1), linewidth=2, color=\"black\")\n",
228 |     "        axes[0].set_xlabel(r\"$x$\")\n",
229 |     "        axes[0].set_ylabel(r\"$y$\")\n",
230 |     "        axes[0].set_title(r\"Sampled space of $f(\\vec{x}|\\mu,\\sigma)$\")\n",
231 |     "\n",
232 |     "        hist_count, bins, _ = axes[1].hist(accept_x, n_bins, density=True)\n",
233 |     "        axes[1].set_xlabel(r\"$x$\")\n",
234 |     "        axes[1].set_ylabel(\"Arbitrary normalized units\")\n",
235 |     "        axes[1].set_title(r\"Normalized binned distribution of accepted toys\")\n",
236 |     "\n",
237 |     "        plt.xlim(x_range)\n",
238 |     "        plt.show()"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {},
245 |    "outputs": [],
246 |    "source": [
247 |     "approximate_Guassian()"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "markdown",
252 |    "metadata": {},
253 |    "source": [
254 |     "This exercise is trivial but for more complex functional forms with more difficult integrals it can be a powerful numerical technique."
255 |    ]
256 |   }
257 |  ],
258 |  "metadata": {
259 |   "kernelspec": {
260 |    "display_name": "Python 3",
261 |    "language": "python",
262 |    "name": "python3"
263 |   },
264 |   "language_info": {
265 |    "codemirror_mode": {
266 |     "name": "ipython",
267 |     "version": 3
268 |    },
269 |    "file_extension": ".py",
270 |    "mimetype": "text/x-python",
271 |    "name": "python",
272 |    "nbconvert_exporter": "python",
273 |    "pygments_lexer": "ipython3",
274 |    "version": "3.7.5"
275 |   }
276 |  },
277 |  "nbformat": 4,
278 |  "nbformat_minor": 4
279 | }
280 | 


--------------------------------------------------------------------------------
/book/requirements.txt:
--------------------------------------------------------------------------------
1 | jupyter-book==0.12.1
2 | 


--------------------------------------------------------------------------------
/dev-requirements.txt:
--------------------------------------------------------------------------------
1 | # lint
2 | pre-commit
3 | pyupgrade
4 | nbqa
5 | black
6 | # test
7 | papermill~=2.0
8 | pytest
9 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 88
 3 | include = '\.pyi?$'
 4 | exclude = '''
 5 | /(
 6 |     \.git
 7 |   | .eggs
 8 |   | build
 9 | )/
10 | '''
11 | 
12 | [tool.nbqa.config]
13 | black = "pyproject.toml"
14 | 
15 | [tool.nbqa.mutate]
16 | black = 1
17 | pyupgrade = 1
18 | 
19 | [tool.nbqa.addopts]
20 | pyupgrade = ["--py36-plus"]
21 | 


--------------------------------------------------------------------------------
/tests/test_notebooks.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import papermill as pm
 3 | import pytest
 4 | 
 5 | 
 6 | @pytest.fixture()
 7 | def common_kwargs(tmpdir):
 8 |     outputnb = tmpdir.join("output.ipynb")
 9 |     return {
10 |         "output_path": str(outputnb),
11 |         "kernel_name": f"python{sys.version_info.major}",
12 |     }
13 | 
14 | 
15 | def test_likelihood_function(common_kwargs):
16 |     pm.execute_notebook(
17 |         "book/notebooks/Introductory/Likelihood-Function.ipynb", **common_kwargs
18 |     )
19 | 
20 | 
21 | def test_error_on_means(common_kwargs):
22 |     pm.execute_notebook(
23 |         "book/notebooks/Introductory/Error-on-means.ipynb", **common_kwargs
24 |     )
25 | 
26 | 
27 | def test_gaussian_sampling(common_kwargs):
28 |     pm.execute_notebook(
29 |         "book/notebooks/Introductory/Gaussian-Sampling.ipynb", **common_kwargs
30 |     )
31 | 
32 | 
33 | def test_normal_dist(common_kwargs):
34 |     pm.execute_notebook(
35 |         "book/notebooks/Introductory/Gaussian-Distribution.ipynb", **common_kwargs
36 |     )
37 | 
38 | 
39 | def test_chi_square_dist(common_kwargs):
40 |     pm.execute_notebook(
41 |         "book/notebooks/Introductory/Chi-Squared-Distribution.ipynb", **common_kwargs
42 |     )
43 | 
44 | 
45 | def test_probability_integral_transform(common_kwargs):
46 |     pm.execute_notebook(
47 |         "book/notebooks/Introductory/probability-integral-transform.ipynb",
48 |         **common_kwargs,
49 |     )
50 | 
51 | 
52 | def test_rejection_sampling(common_kwargs):
53 |     pm.execute_notebook(
54 |         "book/notebooks/simulation/Rejection-Sampling-MC.ipynb", **common_kwargs
55 |     )
56 | 
57 | 
58 | def test_extended_likelihood(common_kwargs):
59 |     pm.execute_notebook("book/notebooks/HEP/Extended-Likelihood.ipynb", **common_kwargs)
60 | 


--------------------------------------------------------------------------------