├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .markdownlint.yaml ├── .pre-commit-config.yaml ├── .spell.utf-8.add ├── LICENSE ├── README.md ├── intro-toc.png ├── notebooks ├── T1 - DA & EnKF.ipynb ├── T2 - Gaussian distribution.ipynb ├── T3 - Bayesian inference.ipynb ├── T4 - Time series filtering.ipynb ├── T5 - Multivariate Kalman filter.ipynb ├── T6 - Geostats & Kriging [optional].ipynb ├── T7 - Chaos & Lorenz [optional].ipynb ├── T8 - Monte-Carlo & ensembles.ipynb ├── T9 - Writing your own EnKF.ipynb ├── dpr_config.yaml ├── resources │ ├── DA_bridges.jpg │ ├── HMM.svg │ ├── HMM.tex │ ├── __init__.py │ ├── answers.py │ ├── colab_bootstrap.sh │ ├── darc_envisat_analyses.mp4 │ ├── exc-2.4-iii.png │ ├── exc-2.5-iv.png │ ├── exc-2.5.png │ ├── illust_EnKF │ │ ├── illust_EnKF.py │ │ ├── illust_EnKF_0.png │ │ ├── illust_EnKF_1.png │ │ ├── illust_EnKF_2.png │ │ ├── illust_EnKF_3.png │ │ ├── illust_EnKF_4.png │ │ ├── illust_EnKF_5.png │ │ ├── illust_EnKF_6.png │ │ └── illust_EnKF_7.png │ └── macros.py └── scripts │ ├── T1 - DA & EnKF.md │ ├── T1 - DA & EnKF.py │ ├── T2 - Gaussian distribution.md │ ├── T2 - Gaussian distribution.py │ ├── T3 - Bayesian inference.md │ ├── T3 - Bayesian inference.py │ ├── T4 - Time series filtering.md │ ├── T4 - Time series filtering.py │ ├── T5 - Multivariate Kalman filter.md │ ├── T5 - Multivariate Kalman filter.py │ ├── T6 - Geostats & Kriging [optional].md │ ├── T6 - Geostats & Kriging [optional].py │ ├── T7 - Chaos & Lorenz [optional].md │ ├── T7 - Chaos & Lorenz [optional].py │ ├── T8 - Monte-Carlo & ensembles.md │ ├── T8 - Monte-Carlo & ensembles.py │ ├── T9 - Writing your own EnKF.md │ └── T9 - Writing your own EnKF.py ├── requirements-dev.txt ├── requirements.txt └── tests └── test_all.py /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master", "dev" ] 8 | schedule: 9 | - cron: '0 9 * * MON' 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | main: 16 | 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python 3.12 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: "3.12" # try to keep similar to Colab 25 | 26 | # Takes too long to run (>40min) 27 | # Could use docker instead? https://github.com/wsvn53/docker-colab-runtime-local 28 | # - name: Setup environment similar to Colab 29 | # run: | 30 | # python -m pip install --upgrade pip 31 | # wget https://raw.githubusercontent.com/googlecolab/backend-info/main/pip-freeze.txt -O colab-freeze.txt 32 | # cat colab-freeze.txt | grep -v '^#' | xargs -n 1 pip install # 1-at-a-time ⇒ ignore errors 33 | 34 | - name: Install dependencies 35 | run: | 36 | python -m pip install --upgrade pip 37 | pip install -r requirements-dev.txt 38 | - name: Run tests 39 | run: | 40 | tests/test_all.py 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See more at https://www.gitignore.io 2 | 3 | ############################## 4 | ### macOS ### 5 | .DS_Store 6 | ._* 7 | 8 | ############################## 9 | ### Windows ### 10 | ############################## 11 | Thumbs.db 12 | [Dd]esktop.ini 13 | 14 | ############################## 15 | ### Python ### 16 | ############################## 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | pip-wheel-metadata/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # Sphinx documentation 47 | docs/_build/ 48 | 49 | # Jupyter Notebook 50 | .ipynb_checkpoints 51 | 52 | 53 | ########## 54 | # Custom # 55 | ########## 56 | tests/T1*.py 57 | tests/T2*.py 58 | tests/T3*.py 59 | tests/T4*.py 60 | tests/T5*.py 61 | tests/T6*.py 62 | tests/T7*.py 63 | tests/T8*.py 64 | tests/T9*.py 65 | notebooks/resources/illust_EnKF/from_Matlab 66 | -------------------------------------------------------------------------------- /.markdownlint.yaml: -------------------------------------------------------------------------------- 1 | # markdownlint (js library) config 2 | heading-increment: false 3 | no-inline-html: false 4 | emphasis-style: false 5 | first-line-h1: false 6 | line-length: false 7 | # MD029: 8 | # style: "ordered" 9 | # MD007: false 10 | # "MD007": { "indent": 4 } 11 | # Checks anchor links, but insists on de-capitalization but that breaks links (tested in chrome) 12 | link-fragments: false 13 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Use `pre-commit autoupdate --bleeding-edge` to set to most recent version 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v3.4.0 5 | hooks: 6 | - id: check-yaml 7 | # - id: check-added-large-files 8 | - id: detect-private-key 9 | - id: check-merge-conflict 10 | - id: debug-statements 11 | # Want to custom organize requirements 12 | # - id: requirements-txt-fixer 13 | - id: end-of-file-fixer 14 | - id: mixed-line-ending 15 | args: ['--fix=no'] 16 | - id: trailing-whitespace 17 | exclude: | 18 | (?x)( # make whitespace in this regex insignificant and allow comments 19 | ^README.md| # I use double-space line-endings a lot in my MD. 20 | ^notebooks/resources/answers.py| # MD also used in answers. 21 | ^notebooks/scripts/.*.py| # MD also used in answers. 22 | ) 23 | 24 | - repo: https://github.com/patnr/nbhooks.git 25 | rev: v1.4.1 26 | hooks: 27 | - id: nb-ensure-clean 28 | # Optional WHITELIST of metadata keys (you can use regex) 29 | args: [--meta, pin_output, --meta, lines_to_next_cell, --meta, lines_to_end_of_cell_marker] 30 | 31 | - repo: https://github.com/mwouts/jupytext 32 | rev: v1.17.2 # ensure equal to main venv 33 | hooks: 34 | - id: jupytext 35 | args: [--sync] 36 | -------------------------------------------------------------------------------- /.spell.utf-8.add: -------------------------------------------------------------------------------- 1 | T1 2 | T2 3 | T3 4 | T4 5 | T5 6 | T6 7 | T7 8 | T8 9 | T9 10 | Theoria Motus Corporum Coelestium in Sectionibus Conicis Solem Ambientium 11 | checkmarks 12 | Lighthill-Whitham-Richards 13 | Exc 14 | Théorie Analytique des Probabilités 15 | TrueSkill 16 | Glicko 17 | Michaelis-Menten 18 | pdf's 19 | Lklhd 20 | BR2 21 | G1 22 | AR1 23 | DynMod 24 | ObsMod 25 | dropdown 26 | C1 27 | C2 28 | K1 29 | numpy's 30 | LG1 31 | Pötscher 32 | Preinerstorfer 33 | Benedikt 34 | Nodet 35 | Asch 36 | Wikle 37 | Wikle, C. K. 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Nansen Environmental and Remote Sensing Center 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intro to data assimilation (DA) and the EnKF 2 | 3 | An interactive (Jupyter notebook) tutorial. 4 | Jump right in (no installation!) by clicking 5 | the button of one of these cloud computing providers: 6 | 7 | - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/nansencenter/DA-tutorials) 8 | (requires Google login) 9 | - [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nansencenter/DA-tutorials/master) 10 | (no login but can be slow to start) 11 | 12 | *Prerequisites*: basics of calculus, matrices (e.g. inverses), 13 | random variables, Python (numpy). 14 | 15 | ![ToC](./intro-toc.png) 16 | 17 | ### Instructions for working locally 18 | 19 | If you prefer, you can also run these notebooks on your own (Linux/Windows/Mac) computer. 20 | This is a bit snappier than running them online. 21 | 22 | 1. **Prerequisite**: Python 3.12. 23 | If you're an expert, setup a python environment however you like. 24 | Otherwise: 25 | Install [Anaconda](https://www.anaconda.com/download), then 26 | open the [Anaconda terminal](https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html#starting-conda) 27 | and run the following commands: 28 | 29 | ```bash 30 | conda create --yes --name my-env python=3.12 31 | conda activate my-env 32 | python --version 33 | ``` 34 | 35 | Ensure the printed version is 3.12. 36 | *Keep using the same terminal for the commands below.* 37 | 38 | 2. **Install**: 39 | 40 | - Download and unzip (or `git clone`) 41 | this repository (see the green button up top) 42 | - Move the resulting folder wherever you like 43 | - `cd` into the folder 44 | - Install requirements: 45 | `pip install -r path/to/requirements.txt` 46 | 47 | 3. **Launch the Jupyter notebooks**: 48 | 49 | - Launch the "notebook server" by executing: 50 | `jupyter-notebook` 51 | This will open up a page in your web browser that is a file navigator. 52 | - Enter the folder `DA-tutorials/notebooks`, and click on a tutorial (`T1... .ipynb`). 53 | 54 | 55 | 56 | 57 | ## Developer notes 58 | 59 | *Please don't hesitate to submit issues or pull requests!* 60 | 61 | [![GitHub CI](https://github.com/nansencenter/DA-tutorials/actions/workflows/tests.yml/badge.svg)](https://github.com/nansencenter/DA-tutorials/actions) 62 | 63 | #### Why `scripts/` dir? 64 | 65 | - Easier to read git diffs 66 | - Enable importing from notebook (script mirrors) 67 | 68 | ## Target audience 69 | 70 | Students in their 3rd year or above in university. 71 | 72 | Prerequisites: some experience with of numpy (Python) as well as 1st-year, university-level statistics and linear algebra. 73 | 74 | ## Citing 75 | 76 | Please cite as 77 | 78 | ```bib 79 | @phdthesis{raanes2016thesis, 80 | author = {Raanes, Patrick N.}, 81 | title = {Improvements to Ensemble Methods for Data Assimilation in the Geosciences}, 82 | school = {University of Oxford}, 83 | year = {2016}, 84 | month = {January} 85 | } 86 | ``` 87 | -------------------------------------------------------------------------------- /intro-toc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/intro-toc.png -------------------------------------------------------------------------------- /notebooks/dpr_config.yaml: -------------------------------------------------------------------------------- 1 | liveplotting: no 2 | store_u: yes 3 | -------------------------------------------------------------------------------- /notebooks/resources/DA_bridges.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/DA_bridges.jpg -------------------------------------------------------------------------------- /notebooks/resources/HMM.tex: -------------------------------------------------------------------------------- 1 | % Convert to svg using pdf2svg or https://pdftoimage.com/pdf-to-svg 2 | 3 | \documentclass{standalone} 4 | \usepackage{tikz} 5 | \usetikzlibrary{positioning, arrows.meta} 6 | 7 | \usepackage{bm} 8 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 9 | % \newcommand{\vect}[1]{{\mathbf{#1}}} 10 | \newcommand{\vect}[1]{{\bm{#1}}} 11 | \newcommand{\x}[0]{\vect{x}} 12 | \newcommand{\y}[0]{\vect{y}} 13 | 14 | \begin{document} 15 | \begin{tikzpicture}[ 16 | very thick, 17 | font=\large, 18 | state/.style={circle, draw, minimum size=1cm, text centered, inner sep=0pt}, 19 | obs/.style={circle, draw, minimum size=1cm, text centered, inner sep=0pt}, 20 | arrow/.style={-latex}, 21 | node distance=1.9cm 22 | ] 23 | % States 24 | \node[state] (x0) {$\x_0$}; 25 | \node[state, right=of x0] (x1) {$\x_1$}; 26 | \node[right=3mm of x1] (x2dots) {\Huge \hspace{-2mm}$\dots$}; 27 | \node[state, right=of x2dots] (xk) {$\x_k$}; 28 | \node[right=3mm of xk] (xkdots) {\Huge \hspace{-2mm} $\dots$}; 29 | \node[state, right=of xkdots] (xK) {$\x_K$}; 30 | 31 | % Observations 32 | % \node[obs, below=of x0] (y0) {$\y_0$}; 33 | \node[obs, below=of x1] (y1) {$\y_1$}; 34 | \node[obs, below=of xk] (yk) {$\y_k$}; 35 | \node[obs, below=of xK] (yK) {$\y_K$}; 36 | 37 | % Dynamical model 38 | \draw[arrow] (x0) to[bend left=15] node[midway, above] {$p(\x_1 | \x_{0})$} (x1); 39 | \draw[arrow] (x2dots) to[bend left=15] node[midway, above] {$p(\x_k | \x_{k-1})$} (xk); 40 | \draw[arrow] (xkdots) to[bend left=15] node[midway, above] {$p(\x_K | \x_{K-1})$} (xK); 41 | 42 | % Observation model 43 | % \draw[arrow] (x0) to[bend left=15] node[pos=0.35, left] {$\mathscr{H}_0$} (y0); 44 | \draw[arrow] (x1) to[bend left=15] node[pos=0.35, right] {$p(\y_1 | \x_1)$} (y1); 45 | \draw[arrow] (xk) to[bend left=15] node[pos=0.35, right] {$p(\y_k | \x_k)$} (yk); 46 | \draw[arrow] (xK) to[bend left=15] node[pos=0.35, left, xshift=1mm] {$p(\y_K | \x_K)$} (yK); 47 | 48 | % Horizontal line and labels 49 | \draw[dashed, draw=gray!90, line width=0.7pt] (x0.west |- 0,-1.8) -- (xK.east |- 0,-1.8); 50 | \node[anchor=south west, align=left, fill=yellow!15, xshift=1mm] at (x0.west |- 0,-1.7) {\normalsize Markov chain\\\normalsize(hidden states)}; 51 | \node[anchor=north west, align=left, fill=yellow!15, xshift=1mm] at (x0.west |- 0,-1.9) {\normalsize Measurements\\\normalsize(observed)}; 52 | 53 | 54 | \end{tikzpicture} 55 | \end{document} 56 | -------------------------------------------------------------------------------- /notebooks/resources/__init__.py: -------------------------------------------------------------------------------- 1 | """Additional styles and resources for tutorials. 2 | 3 | Our didactic goal is to put as little as possible in here. 4 | """ 5 | import os 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | import matplotlib as mpl 10 | import mpl_tools 11 | 12 | 13 | import matplotlib.pyplot as plt 14 | plt.style.use("seaborn-v0_8") 15 | 16 | 17 | # Should PRECEDE plt.ion() 18 | try: 19 | # Note: Colab only supports `%matplotlib inline` ⇒ no point loading other. 20 | # NOTE: Colab: must use plt.show() to avoid duplicate figures. 21 | import google.colab # type: ignore 22 | # Colab only supports mpl inline backend 23 | 24 | # Make figures and fonts larger. 25 | mpl.rcParams.update({'font.size': 15}) 26 | mpl.rcParams.update({'figure.figsize': [10,6]}) 27 | except ImportError: 28 | if mpl_tools.is_notebook_or_qt: 29 | # NB: `nbAgg` steals focus from interactive sliders, 30 | # and re-generates entire figure (not just canvas). 31 | # mpl.use('nbAgg') # = %matplotlib notebook 32 | pass # all notebooks use `%matplotlib inline` anyway 33 | else: 34 | # Regular python (or ipython) session 35 | pass 36 | 37 | # Must NOT be in 1st cell of the notebook, 38 | # because Colab does %matplotlib inline at startup (I think), resetting rcParams. 39 | mpl.rcParams.update({'lines.linewidth': 2.5}) 40 | 41 | # Load answers 42 | from .answers import show_answer 43 | 44 | # Load widgets 45 | from ipywidgets import Image, interactive, HBox, VBox, IntSlider, SelectMultiple 46 | from IPython.display import display 47 | 48 | 49 | def interact(top=None, right=None, bottom=None, left=None, **kwargs): 50 | """Like `ipywidgets.interact(**kwargs)` but with layout shortcuts. 51 | 52 | Also provides `disable` function to help importing notebooks. 53 | 54 | Set `bottom` or any other `side` argument to `True` to place all controls there, 55 | relative to the central output (typically figure). 56 | Otherwise, use a list (or comma-separated string) to select which controls to place there. 57 | Use *nested* lists to re-group/order them. 58 | The underlying mechanism is CSS flex box (typically without "wrap"). 59 | 60 | If the last element of a `side` is a dict, then it will be written as attributes 61 | to the CSS `layout` attribute, ref [1]. 62 | Support for the `style` attribute [2] is not yet implemented. 63 | 64 | Similarly, if the last element of any `kwargs` is a dict, then it will be written as attributes 65 | (e.g. `description (str)`, 'readout (bool)', `continuous_update (bool)`, `orientation (str)`) 66 | to the widget, ref [3]. 67 | 68 | Only tested with "inline" backend (Colab and locally). 69 | Also see `~/P/HistoryMatching/tools/plotting.py` 70 | 71 | [1]: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Layout.html 72 | [2]: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Styling.html 73 | [3]: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html# 74 | 75 | Example: 76 | 77 | >>> v = dict(orientation="vertical", layout=dict(height="80%")) 78 | ... @interact(a=(1., 6., v), 79 | ... b=(1., 7.), 80 | ... bottom=True, # put rest here 81 | ... top='b,c', 82 | ... right=[['a', dict(height="100%", align_items="center")],['e']]) 83 | ... def f(a=3.0, b=4, c=True, d=5, e=6): 84 | ... plt.figure(figsize=(4, 5)) 85 | ... xx = np.linspace(0, 3, 21) 86 | ... if c: plt.plot(xx, e*d/a + xx**b) 87 | ... else: plt.plot(xx, b + xx) 88 | ... plt.show() 89 | """ 90 | 91 | def get_dict(iterable): 92 | if iterable and isinstance(iterable[-1], dict): 93 | return iterable[-1] 94 | else: 95 | return {} 96 | 97 | def boxit(ww, horizontal=True): 98 | """Apply box to lists, recursively (alternating between `HBox` and `VBox`).""" 99 | if (layout := get_dict(ww)): 100 | ww = ww[:-1] 101 | 102 | for i, w in enumerate(ww): 103 | if hasattr(w, '__iter__'): 104 | ww[i] = boxit(w, not horizontal) 105 | 106 | box = HBox if horizontal else VBox 107 | return box(ww, layout=layout) 108 | 109 | def pop_widgets(ww, labels): 110 | """Replace items in nested list `labels` by matching elements from `ww`. 111 | 112 | Essentially `[ww.pop(i) for i, w in enumerate(ww) if w.description == lbl]` 113 | but if `w` is a list, then recurse. 114 | """ 115 | # Validate 116 | if not labels: 117 | return [] 118 | elif labels == True: 119 | cp = ww.copy() 120 | ww.clear() 121 | return cp 122 | elif isinstance(labels, str): 123 | labels = labels.split(',') 124 | 125 | # Main 126 | ww2 = [] 127 | for lbl in labels: 128 | if isinstance(lbl, dict): 129 | # Forward as is 130 | w = lbl 131 | elif isinstance(lbl, list): 132 | # Recurse 133 | w = pop_widgets(ww, lbl) 134 | else: 135 | # Pop 136 | i = [i for i, w in enumerate(ww) if w.description == lbl] 137 | try: 138 | i = i[0] 139 | except IndexError: 140 | raise IndexError(f'Did you specify {lbl} twice in the layout?') 141 | w = ww.pop(i) 142 | ww2.append(w) 143 | return ww2 144 | 145 | sides = dict(top=top, right=right, bottom=bottom, left=left) 146 | 147 | # Pop attributes (if any) for controls 148 | attrs = {} 149 | for key, iterable in kwargs.items(): 150 | if (dct := get_dict(iterable)): 151 | attrs[key] = dct 152 | kwargs[key] = type(iterable)(iterable[:-1]) # preserve list or tuple 153 | 154 | def decorator(fun): 155 | # Auto-parse kwargs, add 'observers' 156 | linked = interactive(fun, **kwargs) 157 | *ww, out = linked.children 158 | # display(HBox([out, VBox(ww)])) 159 | 160 | # Styling of individual control widgets 161 | for w in ww: 162 | for attr, val in attrs.get(w.description, {}).items(): 163 | setattr(w, attr, val) 164 | # Defaults 165 | try: 166 | # Disable continuous_update on Colab 167 | import google.colab # type: ignore 168 | w.continuous_update = False 169 | except ImportError: 170 | pass 171 | w.style.description_width = "max-content" 172 | if getattr(w, 'orientation', '') == "vertical": 173 | w.layout.width = "2em" 174 | 175 | on = {side: pop_widgets(ww, labels) for side, labels in sides.items()} 176 | on['right'] = ww + on['right'] # put any remainder on the right (before any dict) 177 | 178 | # Dashbord composition 179 | # I considered AppLayout, but was more comfortable with combining boxes 180 | left = boxit(on['left'], False) 181 | right = boxit(on['right'], False) 182 | top = boxit(on['top'], True) 183 | bottom = boxit(on['bottom'], True) 184 | 185 | dashboard = VBox([top, HBox([left, out, right]), bottom]) 186 | 187 | display(dashboard); 188 | linked.update() # necessary on Colab 189 | 190 | if interact.disabled: 191 | # Used with hacky `import_from_nb` 192 | return (lambda fun: (lambda _: None)) 193 | elif not mpl_tools.is_notebook_or_qt: 194 | # Return dummy (to plot without interactivity) 195 | return (lambda fun: fun()) 196 | else: 197 | return decorator 198 | 199 | interact.disabled = False 200 | 201 | 202 | def cInterval(mu, sigma2, flat=True): 203 | """Compute +/- 1-sigma (std.dev.) confidence/credible intervals (CI).""" 204 | s1 = np.sqrt(sigma2) 205 | a = mu - s1 206 | b = mu + s1 207 | if flat: 208 | return a.flatten(), b.flatten() 209 | else: 210 | return a, b 211 | 212 | 213 | def axes_with_marginals(): 214 | from matplotlib import pyplot as plt 215 | fig, ((ax, yax), (xax, _)) = plt.subplots( 216 | 2, 2, sharex='col', sharey='row', 217 | figsize=(6, 6), 218 | gridspec_kw={'height_ratios':[5,1], 219 | 'width_ratios' :[5,1], 220 | 'wspace': .1, 221 | 'hspace': .1}) 222 | _.set_visible(False) 223 | ax.set_aspect('equal') 224 | return fig, (ax, yax, xax) 225 | 226 | 227 | def get_jointplotter(grid1d): 228 | fig, (ax, yax, xax) = axes_with_marginals() 229 | dx = grid1d[1] - grid1d[0] 230 | def plotter(Z, colors=None, alpha=.3, linewidths=1, **kwargs): 231 | Z = Z / Z.sum() / dx**2 232 | lvls = np.logspace(-3, 3, 21) 233 | # h = ax.contourf(grid1d, grid1d, Z, colors=colors, levels=lvls, alpha=alpha) 234 | # _ = ax.contour(grid1d, grid1d, Z, colors='black', levels=lvls, linewidths=.7, alpha=alpha) 235 | h = ax.contour(grid1d, grid1d, Z, colors=colors, levels=lvls, linewidths=linewidths, **kwargs) 236 | 237 | margx = dx * Z.sum(0) 238 | margy = dx * Z.sum(1) 239 | xax.fill_between(grid1d, margx, color=colors, alpha=alpha) 240 | yax.fill_betweenx(grid1d, 0, margy, color=colors, alpha=alpha) 241 | 242 | return h.legend_elements()[0][0] 243 | return ax, plotter 244 | 245 | 246 | def frame(data, ax, zoom=1): 247 | """Do `ax.set_{x/y/z}lim()` based on `data`, using given `zoom` (power of 10).""" 248 | zoom = 10**(zoom - 1) 249 | for ens, dim in zip(data.T, 'xyz'): 250 | a = ens.min() 251 | b = ens.max() 252 | m = (a + b)/2 253 | w = b - a 254 | setter = getattr(ax, f'set_{dim}lim') 255 | setter([m - w/2/zoom, 256 | m + w/2/zoom]) 257 | 258 | 259 | def envisat_video(): 260 | caption = """Illustration of DA for the ozone layer in 2002. 261 |

262 | LEFT: Satellite data (i.e. all that is observed). 263 | RIGHT: Simulation model with assimilated data. 264 |

265 | Could you have perceived the splitting of the ozone hole. only from the satellite data? 266 |

267 | Attribution: William A. Lahoz, DARC. 268 | """ 269 | 270 | import io 271 | import base64 272 | from IPython.display import HTML 273 | 274 | video = io.open(Path(__file__).parent / 'darc_envisat_analyses.mp4', 'r+b').read() 275 | encoded = base64.b64encode(video) 276 | vid = HTML(data=''' 277 |
278 | 281 |
{1}
282 |
283 | '''.format(encoded.decode('ascii'),caption)) 284 | return vid 285 | 286 | 287 | def EnKF_animation(): 288 | # Initialize 289 | path_ = str(Path(__file__).parent / "illust_EnKF/illust_EnKF_") 290 | image = Image( 291 | value=open(path_ + "1.png", "rb").read(), 292 | format='png', 293 | width=800, 294 | height=600, 295 | ) 296 | 297 | def update_image(i=1): 298 | image.value=open(path_ + str(i) + ".png", "rb").read() 299 | 300 | slider = interactive(update_image, i=(1, 7, 1)) 301 | return VBox([slider, image]) 302 | 303 | 304 | def import_from_nb(name: str, objs: list): 305 | """Import `objs` from `notebooks/name*.py` (1st match). 306 | 307 | This is of course a terrible hack: 308 | 309 | - Necessitates that imported notebook contain only light computations 310 | (unless controlled by interact.disabled) 311 | - Does not include any changes made by students. This is mainly a benefit, 312 | but could be said to break the principle of least surprise. 313 | - Students might benefit from a little repetition anyway. 314 | 315 | But notebooks are learning materials -- not production code -- 316 | and this helps tie together different tutorials of the course. 317 | """ 318 | NBDIR = Path(__file__).parents[1] 319 | notebk = next(NBDIR.glob(name + "*.ipynb")) 320 | script = (NBDIR / "scripts" / notebk.relative_to(NBDIR)).with_suffix('.py') 321 | import_from_nb.nesting_level += 1 322 | 323 | interact.disabled = True 324 | try: 325 | name = str(script.relative_to(NBDIR).with_suffix("")).replace(os.sep, ".") 326 | module = __import__(name) 327 | script = getattr(module, script.stem) # works despite weird chars 328 | finally: 329 | # Dont re-enable if nested 330 | if not import_from_nb.nesting_level >= 2: 331 | interact.disabled = False 332 | import_from_nb.nesting_level -= 1 333 | return [getattr(script, x) for x in objs] 334 | 335 | import_from_nb.nesting_level = 0 336 | -------------------------------------------------------------------------------- /notebooks/resources/colab_bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Colab doesn't provide 4 | # - Auto-installing requirements.txt 5 | # - Pre-loading data/modules (aside from the notebook itself) 6 | # This script takes care of the above by cloning the full (shallow) repo. 7 | 8 | # Install requirements 9 | main () { 10 | set -e 11 | 12 | # Clear any existing REPO for a fresh git clone 13 | rm -rf REPO 14 | 15 | # Download repo 16 | URL=https://github.com/nansencenter/DA-tutorials.git 17 | if [[ ! -d REPO ]]; then git clone --depth=1 $URL REPO; fi 18 | 19 | # https://pythonspeed.com/articles/upgrade-pip/ 20 | pip install --upgrade pip 21 | 22 | # Install requirements 23 | pip install -r REPO/requirements.txt 24 | 25 | # Put notebook/ (including hidden files) in PWD 26 | shopt -s dotglob 27 | cp -r REPO/notebooks/* ./ 28 | } 29 | 30 | # Only run if we're on colab 31 | if python -c "import google.colab" 2>/dev/null; then 32 | 33 | # Use `bash -s -- --debug` to get verbose output 34 | if echo $@ | grep -E -- '(--debug|-v)' > /dev/null ; then 35 | main 36 | else 37 | # Quiet 38 | main > /dev/null 2>&1 39 | fi 40 | 41 | echo "Initialization for Colab done." 42 | else 43 | echo "Not running on Colab => Didn't do anything." 44 | fi 45 | -------------------------------------------------------------------------------- /notebooks/resources/darc_envisat_analyses.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/darc_envisat_analyses.mp4 -------------------------------------------------------------------------------- /notebooks/resources/exc-2.4-iii.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/exc-2.4-iii.png -------------------------------------------------------------------------------- /notebooks/resources/exc-2.5-iv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/exc-2.5-iv.png -------------------------------------------------------------------------------- /notebooks/resources/exc-2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/exc-2.5.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF.py: -------------------------------------------------------------------------------- 1 | """Script to patch png figures 2 | from Matlab script DATUM/illust_EnKF_1.m 3 | together with text titles, as given below. 4 | """ 5 | 6 | from pathlib import Path 7 | 8 | import matplotlib as mpl 9 | 10 | mpl.rcParams["text.latex.preamble"] = r"\usepackage{mathrsfs}" 11 | 12 | from matplotlib.image import imread 13 | import matplotlib.pyplot as plt 14 | 15 | plt.ion() 16 | 17 | txts = [] 18 | txts += [ 19 | "We consider a single cycle of the EnKF, " 20 | "starting with the analysis state\n" 21 | "at time $(k-1)$. " 22 | "The contours are level curves of " 23 | "$\|\mathbf{x}-\mathbf{\\bar{x}}^{a}_{k-1}\|_{\mathbf{\\bar{P}}^{a}_{k-1}}$..." 24 | ] 25 | txts += [ 26 | "We consider a single cycle of the EnKF, " 27 | "starting with the analysis ensemble\n" 28 | "at time $(k-1)$, $\{\mathbf{x}_n^{a}\}_{n=1..N}$, " 29 | "with mean and cov. estimates $\mathbf{\\bar{x}}^{a}_{k-1}$ and $\mathbf{\\bar{P}}^{a}_{k-1}$.\n" 30 | "The contours are level curves of " 31 | "$\|\mathbf{x}-\mathbf{\\bar{x}}^{a}_{k-1}\|_{\mathbf{\\bar{P}}^{a}_{k-1}}$.", 32 | ] 33 | txts += [ 34 | "The ensemble is forecasted from time $(k-1)$ to $k$ " 35 | "by the dyn. " 36 | "model $\mathscr{M}$.\n We now denote it using the superscript $f$." 37 | ] 38 | txts += [ 39 | "Now we consider the analysis at time $k$. The ensemble \emph{could} be used\n" 40 | "to compute the estimates $\mathbf{\\bar{x}}^{f}_k$ and $\mathbf{\\bar{P}}^{f}_k$, " 41 | "hence the new contour curves." 42 | ] 43 | txts += ["Whereupon an obs. likelihood..."] 44 | txts += ["...\emph{would} yield a posterior by Bayes' rule."] 45 | txts += [ 46 | "What we \emph{equivalently} do instead,\nis to compute the Kalman gain " 47 | "using the estimate $\mathbf{\\bar{P}}^{f}_k$." 48 | ] 49 | txts += [ 50 | "The Kalman gain is then used to shift the ensemble.\n" 51 | "We know that it gets shifted to where the (implicit) posterior lies.\n" 52 | "The cycle can then begin again, from $k$ to $k+1$." 53 | ] 54 | 55 | # Hack to keep line-spacing constant with/out TeX 56 | placeholder = "\phantom{$\{x_n^f\}_{n=1}^N$}" 57 | placeholder += "." # phantom w/o anything causes stuff to disappear 58 | for i, t in enumerate(txts): 59 | t = t.split("\n") 60 | t = [placeholder] * (2 - len(t)) + t # ensure 2 lines 61 | # t = [ln+LE for ln in t] 62 | txts[i] = "\n".join(t) 63 | 64 | 65 | def crop(img): 66 | "Crop Matlab-outputted image" 67 | top = int(0.15 * img.shape[0]) 68 | btm = int((1 - 0.20) * img.shape[0]) 69 | lft = int(0.10 * img.shape[1]) 70 | rgt = int((1 - 0.09) * img.shape[1]) 71 | return img[top:btm, lft:rgt] 72 | 73 | 74 | PWD = Path(__file__).parent 75 | 76 | 77 | def illust_EnKF(i): 78 | plt.close(1) 79 | plt.figure(1, figsize=(8, 6)) 80 | axI = plt.subplot(111) 81 | axI.set_axis_off() 82 | name = "illust_EnKF_prez_" + str(i + 8) + ".png" 83 | name = PWD / "from_Matlab" / name 84 | img = imread(name) 85 | img = crop(img) 86 | axI.imshow(img) 87 | axI.set_title(txts[i], loc="left", usetex=True, size=15) 88 | 89 | 90 | for i, txt in enumerate(txts): 91 | illust_EnKF(i) 92 | plt.pause(0.2) 93 | name = "illust_EnKF_" + str(i) + ".png" 94 | print("Saving", PWD / name) 95 | plt.savefig(PWD / name) 96 | -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_0.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_1.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_2.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_3.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_4.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_5.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_6.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_7.png -------------------------------------------------------------------------------- /notebooks/resources/macros.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Macros must be included in 4 | # - every notebook. 5 | # - every answer that uses them. 6 | 7 | 8 | from pathlib import Path 9 | import re 10 | import sys 11 | 12 | import nbformat 13 | 14 | 15 | HEADER = r'''% ######################################## Loading TeX (MathJax)... Please wait ########################################''' 16 | macros=r''' 17 | \newcommand{\Reals}{\mathbb{R}} 18 | \newcommand{\Expect}[0]{\mathbb{E}} 19 | \newcommand{\NormDist}{\mathscr{N}} 20 | 21 | \newcommand{\DynMod}[0]{\mathscr{M}} 22 | \newcommand{\ObsMod}[0]{\mathscr{H}} 23 | 24 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} % ALWAYS 25 | %\newcommand{\mat}[1]{{\pmb{\mathsf{#1}}}} 26 | \newcommand{\bvec}[1]{{\mathbf{#1}}} % ALWAYS 27 | 28 | \newcommand{\trsign}{{\mathsf{T}}} % ALWAYS 29 | \newcommand{\tr}{^{\trsign}} % ALWAYS 30 | \newcommand{\ceq}[0]{\mathrel{≔}} 31 | \newcommand{\xDim}[0]{D} 32 | \newcommand{\supa}[0]{^\text{a}} 33 | \newcommand{\supf}[0]{^\text{f}} 34 | 35 | \newcommand{\I}[0]{\mat{I}} % ALWAYS 36 | \newcommand{\K}[0]{\mat{K}} 37 | \newcommand{\bP}[0]{\mat{P}} 38 | \newcommand{\bH}[0]{\mat{H}} 39 | \newcommand{\bF}[0]{\mat{F}} 40 | \newcommand{\R}[0]{\mat{R}} 41 | \newcommand{\Q}[0]{\mat{Q}} 42 | \newcommand{\B}[0]{\mat{B}} 43 | \newcommand{\C}[0]{\mat{C}} 44 | \newcommand{\Ri}[0]{\R^{-1}} 45 | \newcommand{\Bi}[0]{\B^{-1}} 46 | \newcommand{\X}[0]{\mat{X}} 47 | \newcommand{\A}[0]{\mat{A}} 48 | \newcommand{\Y}[0]{\mat{Y}} 49 | \newcommand{\E}[0]{\mat{E}} 50 | \newcommand{\U}[0]{\mat{U}} 51 | \newcommand{\V}[0]{\mat{V}} 52 | 53 | \newcommand{\x}[0]{\bvec{x}} 54 | \newcommand{\y}[0]{\bvec{y}} 55 | \newcommand{\z}[0]{\bvec{z}} 56 | \newcommand{\q}[0]{\bvec{q}} 57 | \newcommand{\r}[0]{\bvec{r}} 58 | \newcommand{\bb}[0]{\bvec{b}} 59 | 60 | \newcommand{\bx}[0]{\bvec{\bar{x}}} 61 | \newcommand{\by}[0]{\bvec{\bar{y}}} 62 | \newcommand{\barB}[0]{\mat{\bar{B}}} 63 | \newcommand{\barP}[0]{\mat{\bar{P}}} 64 | \newcommand{\barC}[0]{\mat{\bar{C}}} 65 | \newcommand{\barK}[0]{\mat{\bar{K}}} 66 | 67 | \newcommand{\D}[0]{\mat{D}} 68 | \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} 69 | \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} 70 | 71 | \newcommand{\ones}[0]{\bvec{1}} % ALWAYS 72 | \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 73 | ''' 74 | macros = [ln for ln in macros.splitlines() if ln and not ln.startswith('%')] 75 | always = [i for i, ln in enumerate(macros) if "ALWAYS" in ln] 76 | macros = [m.replace("% ALWAYS","").rstrip() for m in macros] 77 | 78 | # Convert to {macro_name: macro_lineno} 79 | declaration = re.compile(r'''^\\newcommand{(.+?)}''') 80 | lineno_by_name = {} 81 | for i, ln in enumerate(macros): 82 | match = declaration.match(ln) 83 | if match: lineno_by_name[match.group(1)] = i 84 | 85 | # Regex for macro, for ex. \mat, including \mat_, but not \mathbf: 86 | no_escape = lambda s: s.replace("\\",r"\\") 87 | delimit = lambda m: re.compile( no_escape(m) + r'(_|\b)' ) 88 | 89 | 90 | def include_macros(content): 91 | """Include macros in answers. Only those that are required.""" 92 | # Find macros present in content 93 | necessary = [i for macro, i in lineno_by_name.items() if delimit(macro).search(content)] 94 | # Include in content 95 | if necessary: 96 | mm = [macros[i] for i in necessary] 97 | # PRE-pend those that should always be there 98 | mm = [macros[i] for i in always if (macros[i] not in mm)] + mm 99 | # Escape underscore coz md2html sometimes interprets it as . 100 | mm = [m.replace("_","\\_") for m in mm] 101 | # Include surrounding dollar signs 102 | mm = ["$"] + mm + ["$"] 103 | # Avoid accidental $$ 104 | space = " " if content.startswith("$") else "" 105 | # Collect 106 | content = "\n".join(mm) + space + content 107 | return content 108 | 109 | 110 | def update_1nbscript(f: Path): 111 | """Update the macros of a notebook script (synced with `jupytext`).""" 112 | print(f.name.ljust(40), end=": ") 113 | lines = f.read_text().splitlines() 114 | mLine = "# " + " ".join(macros) 115 | 116 | try: 117 | iHeader = lines.index("# " + HEADER) 118 | except (ValueError, AssertionError): 119 | print("Could not locate pre-existing macros") 120 | return 121 | 122 | if not (lines[iHeader-1] == "# $" and 123 | lines[iHeader+2] == "# $"): 124 | print("Could not parse macros") 125 | 126 | # elif lines[iHeader+1] == mLine: 127 | # print("Macros already up to date.") 128 | 129 | else: 130 | # lines[iHeader] = "# % ##### NEW HEADER ######" 131 | lines[iHeader+1] = mLine 132 | f.write_text("\n".join(lines)) 133 | print("Macros updated!") 134 | 135 | 136 | if __name__ == "__main__" and any("update" in arg for arg in sys.argv): 137 | for f in sorted((Path(__file__).parents[1] / "scripts").glob("T*.py")): 138 | update_1nbscript(f) 139 | -------------------------------------------------------------------------------- /notebooks/scripts/T2 - Gaussian distribution.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | formats: ipynb,scripts//py:light,scripts//md 5 | text_representation: 6 | extension: .md 7 | format_name: markdown 8 | format_version: '1.3' 9 | jupytext_version: 1.17.2 10 | kernelspec: 11 | display_name: Python 3 (ipykernel) 12 | language: python 13 | name: python3 14 | --- 15 | 16 | ```python 17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 19 | ``` 20 | 21 | ```python 22 | from resources import show_answer, interact 23 | %matplotlib inline 24 | import numpy as np 25 | import scipy as sp 26 | import matplotlib.pyplot as plt 27 | plt.ion(); 28 | ``` 29 | 30 | # T2 - The Gaussian (Normal) distribution 31 | 32 | We begin by reviewing the most useful of probability distributions. 33 | But first, let's refresh some basic theory. 34 | $ 35 | \newcommand{\Reals}{\mathbb{R}} 36 | \newcommand{\Expect}[0]{\mathbb{E}} 37 | \newcommand{\NormDist}{\mathscr{N}} 38 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 39 | \newcommand{\bvec}[1]{{\mathbf{#1}}} 40 | \newcommand{\trsign}{{\mathsf{T}}} 41 | \newcommand{\tr}{^{\trsign}} 42 | \newcommand{\xDim}[0]{D} 43 | \newcommand{\x}[0]{\bvec{x}} 44 | \newcommand{\X}[0]{\mat{X}} 45 | $ 46 | 47 | ## Probability essentials 48 | 49 | As stated by James Bernoulli (1713) and elucidated by [Laplace (1812)](#References): 50 | 51 | > The Probability for an event is the ratio of the number of cases favorable to it, to the number of all 52 | > cases possible when nothing leads us to expect that any one of these cases should occur more than any other, 53 | > which renders them, for us, equally possible: 54 | 55 | $$ \mathbb{P}(\text{event}) = \frac{\text{number of} \textit{ favorable } \text{outcomes}}{\text{number of} \textit{ possible } \text{outcomes}} $$ 56 | 57 | A **random variable** is a *quantity* taking random values, described in terms of **distributions**. 58 | 59 | - A *discrete* random variable, $X$, has a probability *mass* function (**pmf**) defined by $p(x) = \mathbb{P}(X{=}x)$. 60 | Sometimes we write $p_X(x)$ to distinguish it from $p_Y(y)$. 61 | - The *joint* probability of two random variables $X$ and $Y$ is defined by their intersection: 62 | $p(x, y) = \mathbb{P}(X{=}x \cap Y{=}y)$. 63 | - The *marginal* $p(x)$ is obtained by summing over all $y$, and vice versa. 64 | - The *conditional* probability of $X$ *given* $y$ is $p(x|y) = \frac{p(x,y)}{p(y)}$. 65 | - *Independence* means $p(x,y) = p(x) \, p(y)$ for all $x, y$. 66 | - The cumulative distribution function (**cdf**) is defined as $F(x) = \mathbb{P}(X \le x)$. 67 | 68 | We will mainly be concerned with *continuous* random variables. 69 | Their probability *density* function (**pdf**) can be defined as $p(x) = F'(x)$ or, equivalently, 70 | 71 | $$p(x) = \lim_{h \to 0} \frac{\mathbb{P}(X \in [x,\, x{+} h])}{h} \,.$$ 72 | 73 | The **sample average** of draws from a random variable $X$ 74 | is denoted with an overhead bar: 75 | $$ \bar{x} := \frac{1}{N} \sum_{n=1}^{N} x_n \,. $$ 76 | By the *law of large numbers (LLN)*, the sample average converges as $N \to \infty$ to the **expected value** (sometimes called the **mean**): 77 | $$ \Expect[X] ≔ \int x \, p(x) \, d x \,, $$ 78 | where the (omitted) domain of integration is *all values of $x$*. 79 | Two important properties follow immediately: 80 | 81 | - *Linearity*: $\Expect[aX + Y] = a \Expect[X] + \Expect[Y]$. 82 | - *Total expectation*: $\Expect[\Expect[X|Y]] = \Expect[X]$. 83 | 84 | ## The univariate (a.k.a. 1-dimensional, scalar) Gaussian 85 | 86 | If $X$ is Gaussian (also known as "Normal"), we write 87 | $X \sim \NormDist(\mu, \sigma^2)$, or $p(x) = \NormDist(x \mid \mu, \sigma^2)$, 88 | where the parameters $\mu$ and $\sigma^2$ are called the mean and variance 89 | (for reasons that will become clear below). 90 | The Gaussian pdf, for $x \in (-\infty, +\infty)$, is 91 | $$ \large \NormDist(x \mid \mu, \sigma^2) = (2 \pi \sigma^2)^{-1/2} e^{-(x-\mu)^2/2 \sigma^2} \, . \tag{G1} $$ 92 | 93 | Run the cell below to define a function to compute the pdf (G1) using the `scipy` library. 94 | 95 | ```python 96 | def pdf_G1(x, mu, sigma2): 97 | "Univariate Gaussian pdf" 98 | pdf_values = sp.stats.norm.pdf(x, loc=mu, scale=np.sqrt(sigma2)) 99 | return pdf_values 100 | ``` 101 | 102 | Computers typically represent functions *numerically* by their values at a set of grid points (nodes), 103 | an approach called ***discretisation***. 104 | 105 | ```python 106 | bounds = -20, 20 107 | N = 201 # num of grid points 108 | grid1d = np.linspace(*bounds,N) # grid 109 | dx = grid1d[1] - grid1d[0] # grid spacing 110 | ``` 111 | 112 | Feel free to return here later and change the grid resolution to see how 113 | it affects the cells below (after re-running them). 114 | 115 | The following code plots the Gaussian pdf. 116 | 117 | ```python 118 | hist = [] 119 | @interact(mu=bounds, sigma=(.1, 10, 1)) 120 | def plot_pdf(mu=0, sigma=5): 121 | plt.figure(figsize=(6, 2)) 122 | colors = plt.get_cmap('hsv')([(k-len(hist))%9/9 for k in range(9)]) 123 | plt.xlim(*bounds) 124 | plt.ylim(0, .2) 125 | hist.insert(0, pdf_G1(grid1d, mu, sigma**2)) 126 | for density_values, color in zip(hist, colors): 127 | plt.plot(grid1d, density_values, c=color) 128 | plt.show() 129 | ``` 130 | 131 | #### Exc -- parameter influence 132 | 133 | Experiment with `mu` and `sigma` to answer these questions: 134 | 135 | - How does the pdf curve change when `mu` changes? (Several options may be correct or incorrect) 136 | 137 | 1. It changes the curve into a uniform distribution. 138 | 1. It changes the width of the curve. 139 | 1. It shifts the peak of the curve to the left or right. 140 | 1. It changes the height of the curve. 141 | 1. It transforms the curve into a binomial distribution. 142 | 1. It makes the curve wider or narrower. 143 | 1. It modifies the skewness (asymmetry) of the curve. 144 | 1. It causes the curve to expand vertically while keeping the width the same. 145 | 1. It translates the curve horizontally. 146 | 1. It alters the kurtosis (peakedness) of the curve. 147 | 1. It rotates the curve around the origin. 148 | 1. It makes the curve a straight line. 149 | - How does the pdf curve change when you increase `sigma`? 150 | Refer to the same options as the previous question. 151 | - In a few words, describe the shape of the Gaussian pdf curve. 152 | Does this remind you of anything? *Hint: it should be clear as a bell!* 153 | 154 | **Exc -- Implementation:** Change the implementation of `pdf_G1` so that it does not use `scipy`, but instead uses your own code (with `numpy` only). Re-run all of the above cells and check that you get the same plots as before. 155 | *Hint: `**` is the exponentiation/power operator, but $e^x$ is more efficiently computed with `np.exp(x)`* 156 | 157 | ```python 158 | # show_answer('pdf_G1') 159 | ``` 160 | 161 | **Exc -- Derivatives:** Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1). 162 | Use pen, paper, and calculus to answer the following questions, 163 | which will help you remember some key properties of the distribution. 164 | 165 | - (i) Find $x$ such that $p(x) = 0$. 166 | - (ii) Where is the location of the **mode (maximum)** of the density? 167 | I.e. find $x$ such that $\frac{d p}{d x}(x) = 0$. 168 | *Hint: begin by writing $p(x)$ as $c e^{- J(x)}$ for some $J(x)$.* 169 | - (iii) Where is the **inflection point**? I.e. where $\frac{d^2 p}{d x^2}(x) = 0$. 170 | - (iv) *Optional*: Some forms of *sensitivity analysis* (typically for non-Gaussian $p$) consist in estimating/approximating the Hessian, i.e. $\frac{d^2 \log p}{d x^2}$. Explain what this has to do with *uncertainty quantification*. 171 | 172 | 173 | 174 | #### Exc (optional) -- Change of variables 175 | 176 | Let $Z = \phi(X)$ for some monotonic function $\phi$, 177 | and let $p_x$ and $p_z$ be their probability density functions (pdf). 178 | 179 | - (a): Show that $p_z(z) = p_x\big(\phi^{-1}(z)\big) \frac{1}{|\phi'(z)|}$, 180 | - (b): Show that you don't need to derive the density of $z$ in order to compute its expectation, i.e. that 181 | $$ \Expect[Z] = \int \phi(x) \, p_x(x) \, d x ≕ \Expect[\phi(x)] \,,$$ 182 | *Hint: while the proof is convoluted, the result itself is [pretty intuitive](https://en.wikipedia.org/wiki/Law_of_the_unconscious_statistician).* 183 | 184 | ```python 185 | # show_answer('CVar in proba') 186 | ``` 187 | 188 | 189 | 190 | #### Exc (optional) -- Integrals 191 | 192 | Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1). Abbreviate it as $c = (2 \pi \sigma^2)^{-1/2}$. 193 | Use pen, paper, and calculus to show that 194 | 195 | - (i) the first parameter, $\mu$, indicates its **mean**, i.e. that $$\mu = \Expect[X] \,.$$ 196 | *Hint: you can rely on the result of (iii)* 197 | - (ii) the second parameter, $\sigma^2>0$, indicates its **variance**, 198 | i.e. that $$\sigma^2 = \mathbb{Var}(X) \mathrel{≔} \Expect[(X-\mu)^2] \,.$$ 199 | *Hint: use $x^2 = x x$ to enable integration by parts.* 200 | - (iii) $E[1] = 1$, 201 | thus proving that (G1) indeed uses the right normalising constant. 202 | *Hint: Neither Bernoulli and Laplace managed this, 203 | until [Gauss (1809)](#References) did by first deriving $(E[1])^2$. 204 | For more (visual) help, watch [3Blue1Brown](https://www.youtube.com/watch?v=cy8r7WSuT1I&t=3m52s).* 205 | 206 | ```python 207 | # show_answer('Gauss integrals') 208 | ``` 209 | 210 | **Exc (optional) -- Riemann sums**: 211 | Recall that integrals compute the "area under the curve". 212 | On a discrete grid, they can be approximated using the [Trapezoidal rule](https://en.wikipedia.org/wiki/Riemann_sum#Trapezoidal_rule). 213 | 214 | - (a) Replace the prefab code below with your own implementation, using `sum()`, 215 | to compute the mean and variance of a pdf represented on a grid. 216 | - (b) Use `np.trapezoid` to compute the probability that a scalar Gaussian $X$ lies within $1$ standard deviation of its mean. 217 | *Hint: the numerical answer you should find is $\mathbb{P}(X \in [\mu {-} \sigma, \mu {+} \sigma]) \approx 68\%$.* 218 | 219 | ```python 220 | def mean_and_var(pdf_values, grid): 221 | f, x = pdf_values, grid 222 | mu = np.trapezoid(f*x, x) 223 | s2 = np.trapezoid(f*(x-mu)**2, x) 224 | return mu, s2 225 | 226 | mu, sigma = 0, 2 # example 227 | pdf_vals = pdf_G1(grid1d, mu=mu, sigma2=sigma**2) 228 | 'Should equal mu and sigma2: %f, %f' % mean_and_var(pdf_vals, grid1d) 229 | ``` 230 | 231 | ```python 232 | # show_answer('Riemann sums', 'a') 233 | ``` 234 | 235 | **Exc -- The uniform pdf**: 236 | Below is the pdf of the [uniform/flat/box distribution](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)) 237 | for a given mean and variance. 238 | 239 | - Use `mean_and_var()` to verify `pdf_U1` (as is). 240 | - Replace `_G1` with `_U1` in the code generating the above interactive plot. 241 | - Why are the walls (ever so slightly) inclined? 242 | - Write your own implementation below, and check that it reproduces the `scipy` version already in place. 243 | 244 | ```python 245 | def pdf_U1(x, mu, sigma2): 246 | a = mu - np.sqrt(3*sigma2) 247 | b = mu + np.sqrt(3*sigma2) 248 | pdf_values = sp.stats.uniform(loc=a, scale=(b-a)).pdf(x) 249 | # Your own implementation: 250 | # height = ... 251 | # pdf_values = height * np.ones_like(x) 252 | # pdf_values[xb] = ... 254 | return pdf_values 255 | ``` 256 | 257 | ```python 258 | # show_answer('pdf_U1') 259 | ``` 260 | 261 | ## The multivariate (i.e. vector) Gaussian 262 | 263 | A *multivariate* random variable, i.e. a **vector**, is simply a collection of scalar variables (on the same probability space). 264 | Its distribution is the *joint* distribution of its components. 265 | The pdf of the multivariate Gaussian (for any dimension $\ge 1$) is 266 | 267 | $$\large \NormDist(\x \mid \mathbf{\mu}, \mathbf{\Sigma}) = 268 | |2 \pi \mathbf{\Sigma}|^{-1/2} \, \exp\Big(-\frac{1}{2}\|\x-\mathbf{\mu}\|^2_\mathbf{\Sigma} \Big) \,, \tag{GM} $$ 269 | where $|.|$ represents the matrix determinant, 270 | and $\|.\|_\mathbf{W}$ represents a weighted 2-norm: $\|\x\|^2_\mathbf{W} = \x^T \mathbf{W}^{-1} \x$. 271 | 272 |
273 | 274 | $\mathbf{W}$ must be symmetric-positive-definite (SPD) because ... (optional reading 🔍) 275 | 276 | 277 | - The norm (a quadratic form) is invariant to any asymmetry in the weight matrix. 278 | - The density (GM) would not be integrable (over $\Reals^{\xDim}$) if $\x\tr \mathbf{\Sigma} \x > 0$. 279 | 280 | - - - 281 |
282 | 283 | It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1). 284 | Moreover, [as above](#Exc-(optional)----Integrals), it can be shown that 285 | 286 | - $\mathbf{\mu} = \Expect[\X]$, 287 | - $\mathbf{\Sigma} = \Expect[(\X-\mu)(\X-\mu)\tr]$, 288 | 289 | That is, the elements of $\mathbf{\Sigma}$ are the individual covariances: 290 | $\Sigma_{i,j} = \Expect[(X_i-\mu_i)(X_j-\mu_j)] =: \mathbb{Cov}(X_i, X_j)$. 291 | On the diagonal ($i=j$), they are variances: $\Sigma_{i,i} = \mathbb{Var}(X_i)$. 292 | Therefore $\mathbf{\Sigma}$ is called the *covariance matrix*. 293 | 294 | The following implements the pdf (GM). Take a moment to digest the code, but don't worry if you don't understand it all. Hints: 295 | 296 | - `@` produces matrix multiplication (`*` in `Matlab`); 297 | - `*` produces array multiplication (`.*` in `Matlab`); 298 | - `axis=-1` makes `np.sum()` work along the last dimension of an ND-array. 299 | 300 | ```python 301 | from numpy.linalg import det, inv 302 | 303 | def weighted_norm22(points, Wi): 304 | "Computes the weighted norm of each vector (row in `points`)." 305 | return np.sum( (points @ inv(Wi)) * points, axis=-1) 306 | 307 | def pdf_GM(points, mu, Sigma): 308 | "pdf -- Gaussian, Multivariate: N(x | mu, Sigma) for each x in `points`." 309 | c = np.sqrt(det(2*np.pi*Sigma)) 310 | return 1/c * np.exp(-0.5*weighted_norm22(points - mu, Sigma)) 311 | ``` 312 | 313 | The following code plots the pdf as contour (level) curves. 314 | 315 | ```python 316 | grid2d = np.dstack(np.meshgrid(grid1d, grid1d)) 317 | 318 | @interact(corr=(-1, 1, .001), std_x=(1e-5, 10, 1)) 319 | def plot_pdf_G2(corr=0.7, std_x=1): 320 | # Form covariance matrix (C) from input and some constants 321 | var_x = std_x**2 322 | var_y = 1 323 | cv_xy = np.sqrt(var_x * var_y) * corr 324 | C = 25 * np.array([[var_x, cv_xy], 325 | [cv_xy, var_y]]) 326 | # Evaluate (compute) 327 | density_values = pdf_GM(grid2d, mu=0, Sigma=C) 328 | # Plot 329 | plt.figure(figsize=(4, 4)) 330 | height = 1/np.sqrt(det(2*np.pi*C)) 331 | plt.contour(grid1d, grid1d, density_values, 332 | levels=np.linspace(1e-4, height, 11), cmap="plasma") 333 | plt.axis('equal'); 334 | plt.show() 335 | ``` 336 | 337 | The code defines the covariance `cv_xy` from the input ***correlation*** `corr`. 338 | This is a coefficient (number), defined for any two random variables $x$ and $y$ (not necessarily Gaussian) by 339 | $$ \rho[X,Y]=\frac{\mathbb{Cov}[X,Y]}{\sigma_x \sigma_y} \,.$$ 340 | This correlation quantifies (defines) the ***linear dependence*** between $X$ and $Y$. Indeed, 341 | 342 | - $-1\leq \rho \leq 1$ (by Cauchy-Swartz) 343 | - **If** $X$ and $Y$ are *independent*, then $\rho[X,Y]=0$. 344 | 345 | **Exc -- Correlation influence:** How do the contours look? Try to understand why. Cases: 346 | 347 | - (a) correlation=0. 348 | - (b) correlation=0.99. 349 | - (c) correlation=0.5. (Note that we've used `plt.axis('equal')`). 350 | - (d) correlation=0.5, but with non-equal variances. 351 | 352 | Finally (optional): why does the code "crash" when `corr = +/- 1`? Is this a good or a bad thing? 353 | 354 | **Exc Correlation game:** [Play](http://guessthecorrelation.com/) until you get a score (gold coins) of 5 or more. 355 | 356 | **Exc -- Correlation disambiguation:** 357 | 358 | - What's the difference between correlation and covariance (in words)? 359 | - What's the difference between non-zero (C) correlation (or covariance) and (D) dependence? 360 | *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).* 361 | - Does $C \Rightarrow D$ or the converse? 362 | - What about the negation, $\neg D \Rightarrow \neg C$, or its converse?* 363 | - What about the (jointly) Gaussian case? 364 | - Does correlation (or dependence) imply causation? 365 | - Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other. 366 | Does information about $y$ give you information about $x$? 367 | 368 | **Exc (optional) -- Gaussian ubiquity:** Why are we so fond of the Gaussian assumption? 369 | 370 | ```python 371 | # show_answer('Why Gaussian') 372 | ``` 373 | 374 | ## Summary 375 | 376 | The Normal/Gaussian distribution is bell-shaped. 377 | Its parameters are the mean and the variance. 378 | In the multivariate case, the mean is a vector, 379 | while the second parameter becomes a covariance *matrix*, 380 | whose off-diagonal elements represent scaled correlation factors, 381 | which measure *linear* dependence. 382 | 383 | ### Next: [T3 - Bayesian inference](T3%20-%20Bayesian%20inference.ipynb) 384 | 385 | 386 | 387 | ### References 388 | 389 | - **Laplace (1812)**: P. S. Laplace, "Théorie Analytique des Probabilités", 1812. 390 | - **Gauss (1809)**: Gauss, C. F. (1809). *Theoria Motus Corporum Coelestium in Sectionibus Conicis Solem Ambientium*. Specifically, Book II, Section 3, Art. 177-179, where he presents the method of least squares (which will be very relevant to us) and its probabilistic justification based on the normal distribution of errors. 391 | -------------------------------------------------------------------------------- /notebooks/scripts/T2 - Gaussian distribution.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py:light,scripts//md 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.17.2 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact 20 | # %matplotlib inline 21 | import numpy as np 22 | import scipy as sp 23 | import matplotlib.pyplot as plt 24 | plt.ion(); 25 | 26 | 27 | # # T2 - The Gaussian (Normal) distribution 28 | # 29 | # We begin by reviewing the most useful of probability distributions. 30 | # But first, let's refresh some basic theory. 31 | # $ 32 | # \newcommand{\Reals}{\mathbb{R}} 33 | # \newcommand{\Expect}[0]{\mathbb{E}} 34 | # \newcommand{\NormDist}{\mathscr{N}} 35 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}} 36 | # \newcommand{\bvec}[1]{{\mathbf{#1}}} 37 | # \newcommand{\trsign}{{\mathsf{T}}} 38 | # \newcommand{\tr}{^{\trsign}} 39 | # \newcommand{\xDim}[0]{D} 40 | # \newcommand{\x}[0]{\bvec{x}} 41 | # \newcommand{\X}[0]{\mat{X}} 42 | # $ 43 | # 44 | # ## Probability essentials 45 | # 46 | # As stated by James Bernoulli (1713) and elucidated by [Laplace (1812)](#References): 47 | # 48 | # > The Probability for an event is the ratio of the number of cases favorable to it, to the number of all 49 | # > cases possible when nothing leads us to expect that any one of these cases should occur more than any other, 50 | # > which renders them, for us, equally possible: 51 | # 52 | # $$ \mathbb{P}(\text{event}) = \frac{\text{number of} \textit{ favorable } \text{outcomes}}{\text{number of} \textit{ possible } \text{outcomes}} $$ 53 | # 54 | # A **random variable** is a *quantity* taking random values, described in terms of **distributions**. 55 | # 56 | # - A *discrete* random variable, $X$, has a probability *mass* function (**pmf**) defined by $p(x) = \mathbb{P}(X{=}x)$. 57 | # Sometimes we write $p_X(x)$ to distinguish it from $p_Y(y)$. 58 | # - The *joint* probability of two random variables $X$ and $Y$ is defined by their intersection: 59 | # $p(x, y) = \mathbb{P}(X{=}x \cap Y{=}y)$. 60 | # - The *marginal* $p(x)$ is obtained by summing over all $y$, and vice versa. 61 | # - The *conditional* probability of $X$ *given* $y$ is $p(x|y) = \frac{p(x,y)}{p(y)}$. 62 | # - *Independence* means $p(x,y) = p(x) \, p(y)$ for all $x, y$. 63 | # - The cumulative distribution function (**cdf**) is defined as $F(x) = \mathbb{P}(X \le x)$. 64 | # 65 | # We will mainly be concerned with *continuous* random variables. 66 | # Their probability *density* function (**pdf**) can be defined as $p(x) = F'(x)$ or, equivalently, 67 | # 68 | # $$p(x) = \lim_{h \to 0} \frac{\mathbb{P}(X \in [x,\, x{+} h])}{h} \,.$$ 69 | # 70 | # The **sample average** of draws from a random variable $X$ 71 | # is denoted with an overhead bar: 72 | # $$ \bar{x} := \frac{1}{N} \sum_{n=1}^{N} x_n \,. $$ 73 | # By the *law of large numbers (LLN)*, the sample average converges as $N \to \infty$ to the **expected value** (sometimes called the **mean**): 74 | # $$ \Expect[X] ≔ \int x \, p(x) \, d x \,, $$ 75 | # where the (omitted) domain of integration is *all values of $x$*. 76 | # Two important properties follow immediately: 77 | # 78 | # - *Linearity*: $\Expect[aX + Y] = a \Expect[X] + \Expect[Y]$. 79 | # - *Total expectation*: $\Expect[\Expect[X|Y]] = \Expect[X]$. 80 | # 81 | # ## The univariate (a.k.a. 1-dimensional, scalar) Gaussian 82 | # 83 | # If $X$ is Gaussian (also known as "Normal"), we write 84 | # $X \sim \NormDist(\mu, \sigma^2)$, or $p(x) = \NormDist(x \mid \mu, \sigma^2)$, 85 | # where the parameters $\mu$ and $\sigma^2$ are called the mean and variance 86 | # (for reasons that will become clear below). 87 | # The Gaussian pdf, for $x \in (-\infty, +\infty)$, is 88 | # $$ \large \NormDist(x \mid \mu, \sigma^2) = (2 \pi \sigma^2)^{-1/2} e^{-(x-\mu)^2/2 \sigma^2} \, . \tag{G1} $$ 89 | # 90 | # Run the cell below to define a function to compute the pdf (G1) using the `scipy` library. 91 | 92 | def pdf_G1(x, mu, sigma2): 93 | "Univariate Gaussian pdf" 94 | pdf_values = sp.stats.norm.pdf(x, loc=mu, scale=np.sqrt(sigma2)) 95 | return pdf_values 96 | 97 | 98 | # Computers typically represent functions *numerically* by their values at a set of grid points (nodes), 99 | # an approach called ***discretisation***. 100 | 101 | bounds = -20, 20 102 | N = 201 # num of grid points 103 | grid1d = np.linspace(*bounds,N) # grid 104 | dx = grid1d[1] - grid1d[0] # grid spacing 105 | 106 | # Feel free to return here later and change the grid resolution to see how 107 | # it affects the cells below (after re-running them). 108 | # 109 | # The following code plots the Gaussian pdf. 110 | 111 | hist = [] 112 | @interact(mu=bounds, sigma=(.1, 10, 1)) 113 | def plot_pdf(mu=0, sigma=5): 114 | plt.figure(figsize=(6, 2)) 115 | colors = plt.get_cmap('hsv')([(k-len(hist))%9/9 for k in range(9)]) 116 | plt.xlim(*bounds) 117 | plt.ylim(0, .2) 118 | hist.insert(0, pdf_G1(grid1d, mu, sigma**2)) 119 | for density_values, color in zip(hist, colors): 120 | plt.plot(grid1d, density_values, c=color) 121 | plt.show() 122 | 123 | 124 | # #### Exc -- parameter influence 125 | # 126 | # Experiment with `mu` and `sigma` to answer these questions: 127 | # 128 | # - How does the pdf curve change when `mu` changes? (Several options may be correct or incorrect) 129 | # 130 | # 1. It changes the curve into a uniform distribution. 131 | # 1. It changes the width of the curve. 132 | # 1. It shifts the peak of the curve to the left or right. 133 | # 1. It changes the height of the curve. 134 | # 1. It transforms the curve into a binomial distribution. 135 | # 1. It makes the curve wider or narrower. 136 | # 1. It modifies the skewness (asymmetry) of the curve. 137 | # 1. It causes the curve to expand vertically while keeping the width the same. 138 | # 1. It translates the curve horizontally. 139 | # 1. It alters the kurtosis (peakedness) of the curve. 140 | # 1. It rotates the curve around the origin. 141 | # 1. It makes the curve a straight line. 142 | # - How does the pdf curve change when you increase `sigma`? 143 | # Refer to the same options as the previous question. 144 | # - In a few words, describe the shape of the Gaussian pdf curve. 145 | # Does this remind you of anything? *Hint: it should be clear as a bell!* 146 | # 147 | # **Exc -- Implementation:** Change the implementation of `pdf_G1` so that it does not use `scipy`, but instead uses your own code (with `numpy` only). Re-run all of the above cells and check that you get the same plots as before. 148 | # *Hint: `**` is the exponentiation/power operator, but $e^x$ is more efficiently computed with `np.exp(x)`* 149 | 150 | # + 151 | # show_answer('pdf_G1') 152 | # - 153 | 154 | # **Exc -- Derivatives:** Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1). 155 | # Use pen, paper, and calculus to answer the following questions, 156 | # which will help you remember some key properties of the distribution. 157 | # 158 | # - (i) Find $x$ such that $p(x) = 0$. 159 | # - (ii) Where is the location of the **mode (maximum)** of the density? 160 | # I.e. find $x$ such that $\frac{d p}{d x}(x) = 0$. 161 | # *Hint: begin by writing $p(x)$ as $c e^{- J(x)}$ for some $J(x)$.* 162 | # - (iii) Where is the **inflection point**? I.e. where $\frac{d^2 p}{d x^2}(x) = 0$. 163 | # - (iv) *Optional*: Some forms of *sensitivity analysis* (typically for non-Gaussian $p$) consist in estimating/approximating the Hessian, i.e. $\frac{d^2 \log p}{d x^2}$. Explain what this has to do with *uncertainty quantification*. 164 | # 165 | # 166 | # 167 | # #### Exc (optional) -- Change of variables 168 | # 169 | # Let $Z = \phi(X)$ for some monotonic function $\phi$, 170 | # and let $p_x$ and $p_z$ be their probability density functions (pdf). 171 | # 172 | # - (a): Show that $p_z(z) = p_x\big(\phi^{-1}(z)\big) \frac{1}{|\phi'(z)|}$, 173 | # - (b): Show that you don't need to derive the density of $z$ in order to compute its expectation, i.e. that 174 | # $$ \Expect[Z] = \int \phi(x) \, p_x(x) \, d x ≕ \Expect[\phi(x)] \,,$$ 175 | # *Hint: while the proof is convoluted, the result itself is [pretty intuitive](https://en.wikipedia.org/wiki/Law_of_the_unconscious_statistician).* 176 | 177 | # + 178 | # show_answer('CVar in proba') 179 | # - 180 | 181 | # 182 | # 183 | # #### Exc (optional) -- Integrals 184 | # 185 | # Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1). Abbreviate it as $c = (2 \pi \sigma^2)^{-1/2}$. 186 | # Use pen, paper, and calculus to show that 187 | # 188 | # - (i) the first parameter, $\mu$, indicates its **mean**, i.e. that $$\mu = \Expect[X] \,.$$ 189 | # *Hint: you can rely on the result of (iii)* 190 | # - (ii) the second parameter, $\sigma^2>0$, indicates its **variance**, 191 | # i.e. that $$\sigma^2 = \mathbb{Var}(X) \mathrel{≔} \Expect[(X-\mu)^2] \,.$$ 192 | # *Hint: use $x^2 = x x$ to enable integration by parts.* 193 | # - (iii) $E[1] = 1$, 194 | # thus proving that (G1) indeed uses the right normalising constant. 195 | # *Hint: Neither Bernoulli and Laplace managed this, 196 | # until [Gauss (1809)](#References) did by first deriving $(E[1])^2$. 197 | # For more (visual) help, watch [3Blue1Brown](https://www.youtube.com/watch?v=cy8r7WSuT1I&t=3m52s).* 198 | 199 | # + 200 | # show_answer('Gauss integrals') 201 | # - 202 | 203 | # **Exc (optional) -- Riemann sums**: 204 | # Recall that integrals compute the "area under the curve". 205 | # On a discrete grid, they can be approximated using the [Trapezoidal rule](https://en.wikipedia.org/wiki/Riemann_sum#Trapezoidal_rule). 206 | # 207 | # - (a) Replace the prefab code below with your own implementation, using `sum()`, 208 | # to compute the mean and variance of a pdf represented on a grid. 209 | # - (b) Use `np.trapezoid` to compute the probability that a scalar Gaussian $X$ lies within $1$ standard deviation of its mean. 210 | # *Hint: the numerical answer you should find is $\mathbb{P}(X \in [\mu {-} \sigma, \mu {+} \sigma]) \approx 68\%$.* 211 | 212 | # + 213 | def mean_and_var(pdf_values, grid): 214 | f, x = pdf_values, grid 215 | mu = np.trapezoid(f*x, x) 216 | s2 = np.trapezoid(f*(x-mu)**2, x) 217 | return mu, s2 218 | 219 | mu, sigma = 0, 2 # example 220 | pdf_vals = pdf_G1(grid1d, mu=mu, sigma2=sigma**2) 221 | 'Should equal mu and sigma2: %f, %f' % mean_and_var(pdf_vals, grid1d) 222 | 223 | 224 | # + 225 | # show_answer('Riemann sums', 'a') 226 | # - 227 | 228 | # **Exc -- The uniform pdf**: 229 | # Below is the pdf of the [uniform/flat/box distribution](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)) 230 | # for a given mean and variance. 231 | # 232 | # - Use `mean_and_var()` to verify `pdf_U1` (as is). 233 | # - Replace `_G1` with `_U1` in the code generating the above interactive plot. 234 | # - Why are the walls (ever so slightly) inclined? 235 | # - Write your own implementation below, and check that it reproduces the `scipy` version already in place. 236 | 237 | def pdf_U1(x, mu, sigma2): 238 | a = mu - np.sqrt(3*sigma2) 239 | b = mu + np.sqrt(3*sigma2) 240 | pdf_values = sp.stats.uniform(loc=a, scale=(b-a)).pdf(x) 241 | # Your own implementation: 242 | # height = ... 243 | # pdf_values = height * np.ones_like(x) 244 | # pdf_values[xb] = ... 246 | return pdf_values 247 | 248 | 249 | # + 250 | # show_answer('pdf_U1') 251 | # - 252 | 253 | # ## The multivariate (i.e. vector) Gaussian 254 | # 255 | # A *multivariate* random variable, i.e. a **vector**, is simply a collection of scalar variables (on the same probability space). 256 | # Its distribution is the *joint* distribution of its components. 257 | # The pdf of the multivariate Gaussian (for any dimension $\ge 1$) is 258 | # 259 | # $$\large \NormDist(\x \mid \mathbf{\mu}, \mathbf{\Sigma}) = 260 | # |2 \pi \mathbf{\Sigma}|^{-1/2} \, \exp\Big(-\frac{1}{2}\|\x-\mathbf{\mu}\|^2_\mathbf{\Sigma} \Big) \,, \tag{GM} $$ 261 | # where $|.|$ represents the matrix determinant, 262 | # and $\|.\|_\mathbf{W}$ represents a weighted 2-norm: $\|\x\|^2_\mathbf{W} = \x^T \mathbf{W}^{-1} \x$. 263 | # 264 | #
265 | # 266 | # $\mathbf{W}$ must be symmetric-positive-definite (SPD) because ... (optional reading 🔍) 267 | # 268 | # 269 | # - The norm (a quadratic form) is invariant to any asymmetry in the weight matrix. 270 | # - The density (GM) would not be integrable (over $\Reals^{\xDim}$) if $\x\tr \mathbf{\Sigma} \x > 0$. 271 | # 272 | # - - - 273 | #
274 | # 275 | # It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1). 276 | # Moreover, [as above](#Exc-(optional)----Integrals), it can be shown that 277 | # 278 | # - $\mathbf{\mu} = \Expect[\X]$, 279 | # - $\mathbf{\Sigma} = \Expect[(\X-\mu)(\X-\mu)\tr]$, 280 | # 281 | # That is, the elements of $\mathbf{\Sigma}$ are the individual covariances: 282 | # $\Sigma_{i,j} = \Expect[(X_i-\mu_i)(X_j-\mu_j)] =: \mathbb{Cov}(X_i, X_j)$. 283 | # On the diagonal ($i=j$), they are variances: $\Sigma_{i,i} = \mathbb{Var}(X_i)$. 284 | # Therefore $\mathbf{\Sigma}$ is called the *covariance matrix*. 285 | # 286 | # The following implements the pdf (GM). Take a moment to digest the code, but don't worry if you don't understand it all. Hints: 287 | # 288 | # - `@` produces matrix multiplication (`*` in `Matlab`); 289 | # - `*` produces array multiplication (`.*` in `Matlab`); 290 | # - `axis=-1` makes `np.sum()` work along the last dimension of an ND-array. 291 | 292 | # + 293 | from numpy.linalg import det, inv 294 | 295 | def weighted_norm22(points, Wi): 296 | "Computes the weighted norm of each vector (row in `points`)." 297 | return np.sum( (points @ inv(Wi)) * points, axis=-1) 298 | 299 | def pdf_GM(points, mu, Sigma): 300 | "pdf -- Gaussian, Multivariate: N(x | mu, Sigma) for each x in `points`." 301 | c = np.sqrt(det(2*np.pi*Sigma)) 302 | return 1/c * np.exp(-0.5*weighted_norm22(points - mu, Sigma)) 303 | 304 | 305 | # - 306 | 307 | # The following code plots the pdf as contour (level) curves. 308 | 309 | # + 310 | grid2d = np.dstack(np.meshgrid(grid1d, grid1d)) 311 | 312 | @interact(corr=(-1, 1, .001), std_x=(1e-5, 10, 1)) 313 | def plot_pdf_G2(corr=0.7, std_x=1): 314 | # Form covariance matrix (C) from input and some constants 315 | var_x = std_x**2 316 | var_y = 1 317 | cv_xy = np.sqrt(var_x * var_y) * corr 318 | C = 25 * np.array([[var_x, cv_xy], 319 | [cv_xy, var_y]]) 320 | # Evaluate (compute) 321 | density_values = pdf_GM(grid2d, mu=0, Sigma=C) 322 | # Plot 323 | plt.figure(figsize=(4, 4)) 324 | height = 1/np.sqrt(det(2*np.pi*C)) 325 | plt.contour(grid1d, grid1d, density_values, 326 | levels=np.linspace(1e-4, height, 11), cmap="plasma") 327 | plt.axis('equal'); 328 | plt.show() 329 | # - 330 | 331 | # The code defines the covariance `cv_xy` from the input ***correlation*** `corr`. 332 | # This is a coefficient (number), defined for any two random variables $x$ and $y$ (not necessarily Gaussian) by 333 | # $$ \rho[X,Y]=\frac{\mathbb{Cov}[X,Y]}{\sigma_x \sigma_y} \,.$$ 334 | # This correlation quantifies (defines) the ***linear dependence*** between $X$ and $Y$. Indeed, 335 | # 336 | # - $-1\leq \rho \leq 1$ (by Cauchy-Swartz) 337 | # - **If** $X$ and $Y$ are *independent*, then $\rho[X,Y]=0$. 338 | # 339 | # **Exc -- Correlation influence:** How do the contours look? Try to understand why. Cases: 340 | # 341 | # - (a) correlation=0. 342 | # - (b) correlation=0.99. 343 | # - (c) correlation=0.5. (Note that we've used `plt.axis('equal')`). 344 | # - (d) correlation=0.5, but with non-equal variances. 345 | # 346 | # Finally (optional): why does the code "crash" when `corr = +/- 1`? Is this a good or a bad thing? 347 | # 348 | # **Exc Correlation game:** [Play](http://guessthecorrelation.com/) until you get a score (gold coins) of 5 or more. 349 | # 350 | # **Exc -- Correlation disambiguation:** 351 | # 352 | # - What's the difference between correlation and covariance (in words)? 353 | # - What's the difference between non-zero (C) correlation (or covariance) and (D) dependence? 354 | # *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).* 355 | # - Does $C \Rightarrow D$ or the converse? 356 | # - What about the negation, $\neg D \Rightarrow \neg C$, or its converse?* 357 | # - What about the (jointly) Gaussian case? 358 | # - Does correlation (or dependence) imply causation? 359 | # - Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other. 360 | # Does information about $y$ give you information about $x$? 361 | # 362 | # **Exc (optional) -- Gaussian ubiquity:** Why are we so fond of the Gaussian assumption? 363 | 364 | # + 365 | # show_answer('Why Gaussian') 366 | # - 367 | 368 | # ## Summary 369 | # 370 | # The Normal/Gaussian distribution is bell-shaped. 371 | # Its parameters are the mean and the variance. 372 | # In the multivariate case, the mean is a vector, 373 | # while the second parameter becomes a covariance *matrix*, 374 | # whose off-diagonal elements represent scaled correlation factors, 375 | # which measure *linear* dependence. 376 | # 377 | # ### Next: [T3 - Bayesian inference](T3%20-%20Bayesian%20inference.ipynb) 378 | # 379 | # 380 | # 381 | # ### References 382 | # 383 | # - **Laplace (1812)**: P. S. Laplace, "Théorie Analytique des Probabilités", 1812. 384 | # - **Gauss (1809)**: Gauss, C. F. (1809). *Theoria Motus Corporum Coelestium in Sectionibus Conicis Solem Ambientium*. Specifically, Book II, Section 3, Art. 177-179, where he presents the method of least squares (which will be very relevant to us) and its probabilistic justification based on the normal distribution of errors. 385 | -------------------------------------------------------------------------------- /notebooks/scripts/T4 - Time series filtering.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | formats: ipynb,scripts//py:light,scripts//md 5 | text_representation: 6 | extension: .md 7 | format_name: markdown 8 | format_version: '1.3' 9 | jupytext_version: 1.17.2 10 | kernelspec: 11 | display_name: Python 3 (ipykernel) 12 | language: python 13 | name: python3 14 | --- 15 | 16 | ```python 17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 19 | ``` 20 | 21 | ```python 22 | from resources import show_answer, interact, cInterval 23 | %matplotlib inline 24 | import numpy as np 25 | import numpy.random as rnd 26 | import matplotlib.pyplot as plt 27 | plt.ion(); 28 | ``` 29 | 30 | # T4 - Time series filtering 31 | 32 | Before exploring the full (multivariate) Kalman filter (KF), 33 | let's first consider scalar but time-dependent (temporal/sequential) problems. 34 | $ 35 | \newcommand{\Expect}[0]{\mathbb{E}} 36 | \newcommand{\NormDist}{\mathscr{N}} 37 | \newcommand{\DynMod}[0]{\mathscr{M}} 38 | \newcommand{\ObsMod}[0]{\mathscr{H}} 39 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 40 | \newcommand{\bvec}[1]{{\mathbf{#1}}} 41 | \newcommand{\supa}[0]{^\text{a}} 42 | \newcommand{\supf}[0]{^\text{f}} 43 | $ 44 | 45 | Consider the scalar, stochastic process $\{x_k\}$, 46 | generated for sequentially increasing time index $k$ by 47 | 48 | $$ x_{k+1} = \DynMod_k x_k + q_k \,. \tag{DynMod} $$ 49 | 50 | For our present purposes, the **dynamical "model"** $\DynMod_k$ is simply a known number. 51 | Suppose we get observations $\{y_k\}$ as in: 52 | 53 | $$ y_k = \ObsMod_k x_k + r_k \,, \tag{ObsMod} $$ 54 | 55 | The noises and $x_0$ are assumed to be independent of each other and across time 56 | (i.e., $\varepsilon_k$ is independent of $\varepsilon_l$ for $k \neq l$), 57 | and Gaussian with known parameters: 58 | $$x_0 \sim \NormDist(x\supa_0, P\supa_0),\quad 59 | q_k \sim \NormDist(0, Q_k),\quad 60 | r_k \sim \NormDist(0, R_k) \,.$$ 61 | 62 | 63 | 64 | ## Example problem: AR(1) 65 | 66 | For simplicity (though the KF does not require these assumptions), 67 | suppose that $\DynMod_k = \DynMod$, i.e., it is constant in time. 68 | Then $\{x_k\}$ forms a so-called order-1 auto-regressive process [[Wikipedia](https://en.wikipedia.org/wiki/Autoregressive_model#Example:_An_AR(1)_process)]. 69 | Similarly, we drop the time dependence (subscript $k$) from $\ObsMod_k, Q_k, R_k$. 70 | The code below simulates a random realization of this process. 71 | 72 | ```python 73 | # Use H=1 so that it makes sense to plot data on the same axes as the state. 74 | H = 1 75 | 76 | # Initial estimate 77 | xa = 0 # mean 78 | Pa = 10 # variance 79 | 80 | def simulate(nTime, xa, Pa, M, H, Q, R): 81 | """Simulate synthetic truth (x) and observations (y).""" 82 | x = xa + np.sqrt(Pa)*rnd.randn() # Draw initial condition 83 | truths = np.zeros(nTime) # Allocate 84 | obsrvs = np.zeros(nTime) # Allocate 85 | for k in range(nTime): # Loop in time 86 | x = M * x + np.sqrt(Q)*rnd.randn() # Dynamics 87 | y = H * x + np.sqrt(R)*rnd.randn() # Measurement 88 | truths[k] = x # Assign 89 | obsrvs[k] = y # Assign 90 | return truths, obsrvs 91 | ``` 92 | 93 | The following code plots the process. *You don't need to read or understand it*. 94 | 95 | ```python 96 | @interact(seed=(1, 12), M=(0, 1.03, .01), nTime=(0, 100), 97 | logR=(-9, 9), logR_bias=(-9, 9), 98 | logQ=(-9, 9), logQ_bias=(-9, 9)) 99 | def exprmt(seed=4, nTime=50, M=0.97, logR=1, logQ=1, analyses_only=False, logR_bias=0, logQ_bias=0): 100 | R, Q, Q_bias, R_bias = 4.0**np.array([logR, logQ, logQ_bias, logR_bias]) 101 | 102 | rnd.seed(seed) 103 | truths, obsrvs = simulate(nTime, xa, Pa, M, H, Q, R) 104 | 105 | plt.figure(figsize=(9, 6)) 106 | kk = 1 + np.arange(nTime) 107 | plt.plot(kk, truths, 'k' , label='True state ($x$)') 108 | plt.plot(kk, obsrvs, 'g*', label='Noisy obs ($y$)', ms=9) 109 | 110 | try: 111 | estimates, variances = KF(nTime, xa, Pa, M, H, Q*Q_bias, R*R_bias, obsrvs) 112 | if analyses_only: 113 | plt.plot(kk, estimates[:, 1], label=r'Kalman$^a$ ± 1$\sigma$') 114 | plt.fill_between(kk, *cInterval(estimates[:, 1], variances[:, 1]), alpha=.2) 115 | else: 116 | kk2 = kk.repeat(2) 117 | plt.plot(kk2, estimates.flatten(), label=r'Kalman ± 1$\sigma$') 118 | plt.fill_between(kk2, *cInterval(estimates, variances), alpha=.2) 119 | except NameError: 120 | pass 121 | 122 | sigproc = {} 123 | ### INSERT ANSWER TO EXC "signal processing" HERE ### 124 | # sigproc['some method'] = ... 125 | for method, estimate in sigproc.items(): 126 | plt.plot(kk[:len(estimate)], estimate, label=method) 127 | 128 | plt.xlabel('Time index (k)') 129 | plt.legend(loc='upper left') 130 | plt.axhline(0, c='k', lw=1, ls='--') 131 | plt.show() 132 | ``` 133 | 134 | **Exc -- AR1 properties:** Answer the following. 135 | 136 | - What does `seed` control? 137 | - Explain what happens when `M=0`. Also consider $Q \rightarrow 0$. 138 | Can you give a name to this `truth` process, 139 | i.e. a link to the relevant Wikipedia page? 140 | What about when `M=1`? 141 | Describe the general nature of the process as `M` changes from 0 to 1. 142 | What about when `M>1`? 143 | - What happens when $R \rightarrow 0$ ? 144 | - What happens when $R \rightarrow \infty$ ? 145 | 146 | ```python 147 | # show_answer('AR1') 148 | ``` 149 | 150 | 151 | 152 | ## The (univariate) Kalman filter (KF) 153 | 154 | Now we have a random variable that evolves in time, that we can *pretend* is unknown, 155 | in order to estimate (or "track") it. 156 | From above, 157 | $p(x_0) = \NormDist(x_0 | x\supa_0, P\supa_0)$ with given parameters. 158 | We also know that $x_k$ evolves according to eqn. (DynMod). 159 | Therefore, as shown in the following exercise, 160 | $p(x_1) = \NormDist(x_1 | x\supf_1, P\supf_1)$, with 161 | $$ 162 | \begin{align} 163 | x\supf_k &= \DynMod \, x\supa_{k-1} \tag{5} \\ 164 | P\supf_k &= \DynMod^2 \, P\supa_{k-1} + Q \tag{6} 165 | \end{align} 166 | $$ 167 | 168 | Formulae (5) and (6) are called the **forecast step** of the KF. 169 | But when $y_1$ becomes available (according to eqn. (ObsMod)), 170 | we can update/condition our estimate of $x_1$, i.e., compute the posterior, 171 | $p(x_1 | y_1) = \NormDist(x_1 \mid x\supa_1, P\supa_1)$, 172 | using the formulae we developed for Bayes' rule with 173 | [Gaussian distributions](T3%20-%20Bayesian%20inference.ipynb#Linear-Gaussian-Bayes'-rule-(1D)). 174 | 175 | $$ 176 | \begin{align} 177 | P\supa_k &= 1/(1/P\supf_k + \ObsMod^2/R) \,, \tag{7} \\\ 178 | x\supa_k &= P\supa_k (x\supf/P\supf_k + \ObsMod y_k/R) \,. \tag{8} 179 | \end{align} 180 | $$ 181 | 182 | This is called the **analysis step** of the KF. 183 | We call this the **analysis step** of the KF. 184 | We can subsequently apply the same two steps again 185 | to produce forecast and analysis estimates for the next time index, $k+1$. 186 | Note that if $k$ is a date index, then "yesterday's forecast becomes today's prior". 187 | 188 | #### Exc -- linear algebra of Gaussian random variables 189 | 190 | - (a) Show the linearity of the expectation operator: 191 | $\Expect [ \DynMod x + b ] = \DynMod \Expect[x] + b$, for some constant $b$. 192 | - (b) Thereby, show that $\mathbb{Var}[ \DynMod x + b ] = \DynMod^2 \mathbb{Var} [x]$. 193 | - (c) *Optional*: Now let $z = x + q$, with $x$ and $q$ independent and Gaussian. 194 | Then the pdf of this sum of random variables, $p_z(z)$, is given by convolution 195 | (hopefully this makes intuitive sense, at least in the discrete case): 196 | $$ p_z(z) = \int p_x(x) \, p_q(z - x) \, d x \,.$$ 197 | Show that $z$ is also Gaussian, 198 | whose mean and variance are the sum of the means and variances (respectively). 199 | *Hint: you will need the result on [completing the square](T3%20-%20Bayesian%20inference.ipynb#Exc----BR-LG1), 200 | specifically the part that we did not make use of for Bayes' rule. 201 | If you get stuck, you can also view the excellent [`3blue1brown`](https://www.youtube.com/watch?v=d_qvLDhkg00&t=266s&ab_channel=3Blue1Brown) on the topic.* 202 | 203 | ```python 204 | # show_answer('Sum of Gaussians', 'a') 205 | ``` 206 | 207 | #### The (general) Bayesian filtering recursions 208 | 209 | In the case of linearity and Gaussianity, 210 | the KF of eqns. (5)-(8) computes the *exact* Bayesian pdfs for $x_k$. 211 | But even without these assumptions, 212 | a general (abstract) Bayesian **recursive** procedure can still be formulated, 213 | relying only on the remaining ("hidden Markov model") assumptions. 214 | 215 | - The analysis "assimilates" $y_k$ to compute $p(x_k | y_{1:k})$, 216 | where $y_{1:k} = y_1, \ldots, y_k$ is shorthand notation. 217 | $$ 218 | p(x_k | y_{1:k}) \propto p(y_k | x_k) \, p(x_k | x_{1:k-1}) 219 | $$ 220 | - The forecast "propagates" the estimate with its uncertainty 221 | to produce $p(x_{k+1}| y_{1:k})$. 222 | $$ 223 | p(x_{k+1} | y_{1:k}) = \int p(x_{k+1} | x_k) \, p(x_k | y_{1:k}) \, d x_k 224 | $$ 225 | 226 | It is important to appreciate the benefits of the recursive form of these computations: 227 | It reflects the recursiveness (Markov property) of nature: 228 | Both in the problem and our solution, time $k+1$ *builds on* time $k$, 229 | so we do not need to re-do the entire problem for each $k$. 230 | At every time $k$, we only deal with functions of one or two variables: $x_k$ and $x_{k+1}$, 231 | which is a much smaller space (for quantifying our densities or covariances) 232 | than that of the joint pdf $p(x_{1:k} | y_{1:k})$. 233 | 234 | Note, however, that our recursive procedure, called ***filtering***, 235 | does *not* compute $p(x_l | y_{1:k})$ for any $l < k$. 236 | In other words, any filtering estimate only contains *past* information. 237 | Updating estimates of the state at previous times is called ***smoothing***. 238 | However, for prediction/forecasting, filtering is all we need: 239 | accurate initial conditions (estimates of the present moment). 240 | 241 | #### Exc -- Implementation 242 | 243 | Below is a very rudimentary sequential estimator (not the KF!), which essentially just does "persistence" forecasts and sets the analysis estimates to the value of the observations (*which is only generally possible in this linear, scalar case*). Run its cell to define it, and then re-run the above interactive animation cell. Then: 244 | 245 | - Implement the KF properly by replacing the forecast and analysis steps below. *Re-run the cell.* 246 | - Try implementing the analysis step both in the "precision" and "gain" forms. 247 | 248 | ```python 249 | def KF(nTime, xa, Pa, M, H, Q, R, obsrvs): 250 | """Kalman filter. PS: (xa, Pa) should be input with *initial* values.""" 251 | ############################ 252 | # TEMPORARY IMPLEMENTATION # 253 | ############################ 254 | estimates = np.zeros((nTime, 2)) 255 | variances = np.zeros((nTime, 2)) 256 | for k in range(nTime): 257 | # Forecast step 258 | xf = xa 259 | Pf = Pa 260 | # Analysis update step 261 | Pa = R / H**2 262 | xa = obsrvs[k] / H 263 | # Assign 264 | estimates[k] = xf, xa 265 | variances[k] = Pf, Pa 266 | return estimates, variances 267 | ``` 268 | 269 | ```python 270 | # show_answer('KF1 code') 271 | ``` 272 | 273 | #### Exc -- KF behaviour 274 | 275 | - Set `logQ` to its minimum, and `M=1`. 276 | We established in Exc "AR1" that the true states are now constant in time (but unknown). 277 | How does the KF fare in estimating it? 278 | Does its uncertainty variance ever reach 0? 279 | - What is the KF uncertainty variance in the case of `M=0`? 280 | 281 | ```python 282 | # show_answer('KF behaviour') 283 | ``` 284 | 285 | 286 | 287 | #### Exc -- Temporal convergence 288 | 289 | In general, $\DynMod$, $\ObsMod$, $Q$, and $R$ depend on time, $k$ 290 | (often to parameterize exogenous/outside factors/forces/conditions), 291 | and there are no limit values that the KF parameters converge to. 292 | But, we assumed that they are all stationary. 293 | In addition, suppose $Q=0$ and $\ObsMod = 1$. 294 | Show that 295 | 296 | - (a) $1/P\supa_k = 1/(\DynMod^2 P\supa_{k-1}) + 1/R$, 297 | by combining the forecast and analysis equations for the variance. 298 | - (b) $1/P\supa_k = 1/P\supa_0 + k/R$, if $\DynMod = 1$. 299 | - (c) $P\supa_{\infty} = 0$, if $\DynMod = 1$. 300 | - (d) $P\supa_{\infty} = 0$, if $\DynMod < 1$. 301 | - (e) $P\supa_{\infty} = R (1-1/\DynMod^2)$, if $\DynMod > 1$. 302 | *Hint: Look for the fixed point of the recursion of part (a).* 303 | 304 | ```python 305 | # show_answer('Asymptotic Riccati', 'a') 306 | ``` 307 | 308 | **Exc (optional) -- Temporal CV, part 2:** 309 | Now we don't assume that $Q$ is zero. Instead 310 | 311 | - (a) Suppose $\DynMod = 0$. What does $P\supa_k$ equal? 312 | - (b) Suppose $\DynMod = 1$. Show that $P\supa_\infty$ 313 | satisfies the quadratic equation: $0 = P^2 + Q P - Q R$. 314 | Thereby, without solving the quadratic equation, show that 315 | - (c) $P\supa_\infty \rightarrow R$ (from below) if $Q \rightarrow +\infty$. 316 | - (d) $P\supa_\infty \rightarrow \sqrt{ Q R}$ (from above) if $Q \rightarrow 0^+$. 317 | 318 | ```python 319 | # show_answer('Asymptotes when Q>0') 320 | ``` 321 | 322 | #### Exc (optional) -- Analytic simplification in the case of an unknown constant 323 | 324 | - Note that in case $Q = 0$, 325 | then $x_{k+1} = \DynMod^k x_0$. 326 | - So if $\DynMod = 1$, then $x_k = x_0$, so we are estimating an unknown *constant*, 327 | and can drop its time index subscript. 328 | - For simplicity, assume $\ObsMod = 1$, and $P^a_0 \rightarrow +\infty$. 329 | - Then $p(x | y_{1:k}) \propto \exp \big\{- \sum_l \| y_l - x \|^2_R / 2 \big\} 330 | = \NormDist(x | \bar{y}, R/k )$, which again follows by completing the square. 331 | - In words, the (accumulated) posterior mean is the sample average, 332 | $\bar{y} = \frac{1}{k}\sum_l y_l$, 333 | and the variance is that of a single observation divided by $k$. 334 | 335 | Show that this is the same posterior that the KF recursions produce. 336 | *Hint: while this is straightforward for the variance, 337 | you will probably want to prove the mean using induction.* 338 | 339 | #### Exc -- Impact of biases 340 | 341 | Re-run the above interactive animation to set the default control values. Answer the following 342 | 343 | - `logR_bias`/`logQ_bias` control the (multiplicative) bias in $R$/$Q$ that is fed to the KF. 344 | What happens when the KF "thinks" the measurement/dynamical error 345 | is (much) smaller than it actually is? 346 | What about larger? 347 | - Re-run the animation to get default values. 348 | Set `logQ` to 0, which will make the following behaviour easier to describe. 349 | In the code, add 20 to the initial `xa` **given to the KF**. 350 | How long does it take for it to recover from this initial bias? 351 | - Multiply `Pa` **given to the KF** by 0.01. What about now? 352 | - Remove the previous biases. 353 | Instead, multiply `M` **given to the KF** by 2, and observe what happens. 354 | Try the same, but dividing `M` by 2. 355 | 356 | ```python 357 | # show_answer('KF with bias') 358 | ``` 359 | 360 | ## Alternative methods 361 | 362 | When it comes to (especially univariate) time series analysis, 363 | the Kalman filter (KF) is not the only option. 364 | For example, **signal processing** offers several alternative filters. 365 | Indeed, the word "filter" in the KF comes from that domain, 366 | where it originally referred to removing high-frequency noise, 367 | since this often leads to a better estimate of the signal. 368 | We will not review signal processing theory here, 369 | but challenge you to make use of what `scipy` already has to offer. 370 | 371 | #### Exc (optional) -- signal processing 372 | 373 | Run the following cell to import and define some more tools. 374 | 375 | ```python 376 | import scipy as sp 377 | import scipy.signal as sig 378 | def nrmlz(x): 379 | return x / x.sum() 380 | def trunc(x, n): 381 | return np.pad(x[:n], (0, len(x)-n)) 382 | ``` 383 | 384 | Now try to "filter" the `obsrvs` to produce estimates of `truth`. 385 | For each method, add your estimate ("filtered signal" in signal processing parlance) 386 | to the `sigproc` dictionary in the interactive animation cell, 387 | using an appropriate name/key (this will automatically include it in the plot). 388 | Use 389 | 390 | - (a) [`sig.wiener`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.wiener.html). 391 | *PS: this is a direct ancestor of the KF*. 392 | - (b) a moving average, for example [`sig.windows.hamming`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.windows.hamming.html). 393 | *Hint: you may also want to use [`sig.convolve`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.convolve.html#scipy.signal.convolve)*. 394 | - (c) a low-pass filter using [`np.fft`](https://docs.scipy.org/doc/scipy/reference/fft.html#). 395 | *Hint: you may also want to use the above `trunc` function.* 396 | - (d) The [`sig.butter`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html) filter. 397 | *Hint: apply with [`sig.filtfilt`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.filtfilt.html).* 398 | - (e) not really a signal processing method: [`sp.interpolate.UniveriateSpline`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.UnivariateSpline.html) 399 | 400 | The answers should be considered examples, not the uniquely right way. 401 | 402 | ```python 403 | # show_answer('signal processing', 'a') 404 | ``` 405 | 406 | But for the above problem (which is linear-Gaussian!), 407 | the KF is guaranteed (on average, in the long run, in terms of mean square error) 408 | to outperform any other method. 409 | We will see cases later (in full-blown state estimation) 410 | where the difference is much clearer, 411 | and indeed it might not even be clear how to apply signal processing methods. 412 | However, the KF has an unfair advantage: we are giving it a lot of information 413 | about the problem (`M, H, R, Q`) that the signal processing methods do not have. 414 | Therefore, those methods typically require a good deal of tuning 415 | (but in practice, so does the KF, since `Q` and `R` are rarely well determined). 416 | 417 | ## Summary 418 | 419 | The Kalman filter (KF) can be derived by applying linear-Gaussian assumptions 420 | to a sequential inference problem. 421 | Generally, the uncertainty never converges to 0, 422 | and the performance of the filter depends entirely on 423 | accurate system parameters (models and error covariance matrices). 424 | 425 | As a subset of state estimation (i.e., the KF), we can do classical time series estimation 426 | [(wherein state-estimation is called the state-space approach)](https://www.google.co.uk/search?q=%22We+now+demonstrate+how+to+put+these+models+into+state+space+form%22&btnG=Search+Books&tbm=bks). 427 | Moreover, DA methods produce uncertainty quantification, which is usually more obscure with time series analysis methods. 428 | 429 | ### Next: [T5 - Multivariate Kalman filter](T5%20-%20Multivariate%20Kalman%20filter.ipynb) 430 | 431 | 432 | 433 | ### References 434 | -------------------------------------------------------------------------------- /notebooks/scripts/T6 - Geostats & Kriging [optional].md: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | cell_metadata_filter: -all 5 | formats: ipynb,scripts//py:light,scripts//md 6 | text_representation: 7 | extension: .md 8 | format_name: markdown 9 | format_version: '1.3' 10 | jupytext_version: 1.17.2 11 | kernelspec: 12 | display_name: Python 3 (ipykernel) 13 | language: python 14 | name: python3 15 | --- 16 | 17 | ```python 18 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 19 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 20 | ``` 21 | 22 | ```python 23 | from resources import show_answer, interact 24 | %matplotlib inline 25 | import numpy as np 26 | import matplotlib.pyplot as plt 27 | import numpy.random as rnd 28 | import scipy.linalg as sla 29 | from mpl_tools.misc import nRowCol 30 | from mpl_tools.place import freshfig 31 | plt.ion(); 32 | ``` 33 | 34 | # T6 - Spatial statistics ("geostatistics") & Kriging 35 | 36 | Covariances between two (or a few) variables is very well, 37 | but if you have not seen it before, the connection between covariances 38 | and geophysical (spatial) fields may not be obvious. 39 | The purpose of this tutorial is to familiarise you with random (spatial) fields 40 | and their estimation. 41 | $ 42 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 43 | \newcommand{\bvec}[1]{{\mathbf{#1}}} 44 | $ 45 | 46 | Set some parameters 47 | 48 | ```python 49 | rnd.seed(3000) 50 | grid1D = np.linspace(0, 1, 21) 51 | N = 15 # ensemble size 52 | ``` 53 | 54 | ## Variograms 55 | 56 | The "Variogram" of a field is essentially `1 - autocovariance`. Thus, it describes the spatial dependence of the field. The mean (1st moment) of a field is usually estimated and described/parametrized with trend lines/surfaces, while higher moments are usually not worth modelling. 57 | 58 | ```python 59 | def variogram(dists, Range=1, kind="Gauss", nugget=0): 60 | """Compute variogram for distance points `dists`.""" 61 | dists = dists / Range 62 | if kind == "Spheric": 63 | gamma = 1.5 * dists - .5 * dists**3 64 | gamma[dists >= 1] = 1 65 | elif kind == "Expo": 66 | dists *= 3 # by convention 67 | gamma = 1 - np.exp(-dists) 68 | else: # "Gauss" 69 | dists *= 3 # by convention 70 | gamma = 1 - np.exp(-(dists)**2) 71 | # Include nugget (discontinuity at 0) 72 | gamma *= (1-nugget) 73 | gamma[dists != 0] += nugget 74 | return gamma 75 | ``` 76 | 77 | #### Plot 78 | 79 | ```python 80 | @interact(Range=(.01, 4), nugget=(0.0, 1, .1)) 81 | def plot_variogram(Range=1, nugget=0): 82 | fig, ax = plt.subplots(figsize=(6, 3)) 83 | ax.set_xlim(0, 1) 84 | ax.set_ylim(0, 1) 85 | for i, kind in enumerate(["Spheric", "Expo", "Gauss"]): 86 | gamma = variogram(grid1D, Range, kind, nugget=nugget) 87 | ax.plot(grid1D, gamma, lw=2, color=f"C{i}", label=kind) 88 | ax.legend(loc="upper left") 89 | plt.show() 90 | ``` 91 | 92 | In order to apply the variogram, we must first compute distances. 93 | The following is a fairly efficient implementation. 94 | 95 | ```python 96 | def dist_euclid(A, B): 97 | """Compute the l2-norm between each point (row) of A and B""" 98 | diff = A[:, None, :] - B 99 | d2 = np.sum(diff**2, axis=-1) 100 | return np.sqrt(d2) 101 | ``` 102 | 103 | Now the full covariance (matrix) between any sets of points can be defined by the following. 104 | 105 | ```python 106 | def covar(coords, **vg_params): 107 | dists = dist_euclid(coords, coords) 108 | return 1 - variogram(dists, **vg_params) 109 | ``` 110 | 111 | ```python 112 | fig, ax = freshfig("1D covar") 113 | C = covar(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3) 114 | ax.matshow(C, cmap="RdBu"); 115 | ``` 116 | 117 | ## Random fields (1D) 118 | 119 | Gaussian random variables (vectors) are fully specified by their mean and covariance. 120 | Once in possession of a covariance matrix, we can use it to sample random variables 121 | by multiplying its Cholesky factor (square root) onto standard normal variables. 122 | 123 | ```python 124 | def gaussian_fields(coords, **vg_params): 125 | """Gen. random (Gaussian) fields at `coords` (no structure/ordering required).""" 126 | C = covar(coords, **vg_params) 127 | L = sla.cholesky(C) 128 | fields = L.T @ rnd.randn(len(L.T), N) 129 | return fields 130 | ``` 131 | 132 | #### Exc 133 | 134 | Use the plotting functionality below to 135 | explain the effect of `Range` and `nugget` 136 | 137 | ```python 138 | fig, ax = freshfig("1D random fields") 139 | fields = gaussian_fields(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3) 140 | ax.plot(grid1D, fields, lw=2); 141 | ``` 142 | 143 | ## Random fields (2D) 144 | 145 | The following sets up a 2d grid. 146 | 147 | ```python 148 | grid2x, grid2y = np.meshgrid(grid1D, grid1D) 149 | grid2x.shape 150 | ``` 151 | 152 | where `grid2y` has the same shape. However, in the following we will "flatten" (a.k.a."(un)ravel", "vectorize", or "string out") this explicitly 2D grid of nodes into a simple list of points in 2D. 153 | 154 | ```python 155 | grid2D = np.dstack([grid2x, grid2y]).reshape((-1, 2)) 156 | grid2D.shape 157 | ``` 158 | 159 | Importantly, none of the following methods actually assume any structure to the list. So we could also work with a completely irregularly spaced set of points. For example, `gaussian_fields` is immediately applicable also to this 2D case. 160 | 161 | ```python 162 | vg_params = dict(Range=1, kind="Gauss", nugget=1e-4) 163 | fields = gaussian_fields(grid2D, **vg_params) 164 | ``` 165 | 166 | Of course, for plotting purposes, we undo the flattening. 167 | 168 | ```python 169 | def contour_plot(ax, field, cmap="nipy_spectral", levels=12, has_obs=True): 170 | field = field.reshape(grid2x.shape) # undo flattening 171 | if has_obs: 172 | ax.plot(*obs_coo.T, "ko", ms=4) 173 | ax.plot(*obs_coo.T, "yo", ms=1) 174 | ax.set(aspect="equal", xticks=[0, 1], yticks=[0, 1]) 175 | return ax.contourf(field, levels=levels, extent=(0, 1, 0, 1), 176 | cmap=cmap, vmin=vmin, vmax=vmax) 177 | 178 | # Fix the color scale for all subsequent `contour_plot`. 179 | # Use `None` to re-compute the color scale for each subplot. 180 | vmin = fields.min() 181 | vmax = fields.max() 182 | ``` 183 | 184 | ```python 185 | fig, axs = freshfig(num="2D random fields", figsize=(5, 4), 186 | nrows=3, ncols=4, sharex=True, sharey=True) 187 | 188 | for ax, field in zip(axs.ravel(), fields.T): 189 | contour_plot(ax, field, has_obs=False) 190 | ``` 191 | 192 | It might be interesting to inspect the covariance matrix in this 2D case. 193 | 194 | ```python 195 | C = covar(grid2D, **vg_params) 196 | fig, ax = freshfig("2D covar") 197 | ax.matshow(C, cmap="RdBu", vmin=0, vmax=1); 198 | ax.grid(False) 199 | ``` 200 | 201 | ## Estimation problem 202 | 203 | For our estimation target we will use one of the above generated random fields. 204 | 205 | ```python 206 | truth = fields.T[0] 207 | ``` 208 | 209 | For the observations, we pick some random grid locations for simplicity 210 | (even though the methods work also with observations not on grid nodes). 211 | 212 | ```python 213 | nObs = 10 214 | obs_idx = rnd.randint(0, len(grid2D), nObs) 215 | obs_coo = grid2D[obs_idx] 216 | observations = truth[obs_idx] 217 | ``` 218 | 219 | ## Spatial interpolant methods 220 | 221 | ```python 222 | # Pre-compute re-used objects 223 | dists_yy = dist_euclid(obs_coo, obs_coo) 224 | dists_xy = dist_euclid(grid2D, obs_coo) 225 | ``` 226 | 227 | ```python 228 | estims = dict(Truth=truth) 229 | vmin=truth.min() 230 | vmax=truth.max() 231 | ``` 232 | 233 | The cells below contain snippets of different spatial interpolation methods, 234 | followed by a cell that plots the interpolants. 235 | Complete the code snippets. 236 | 237 | #### Exc: Nearest neighbour interpolation 238 | 239 | Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation). 240 | 241 | ```python 242 | nearest_obs = np.zeros_like(truth, dtype=int) ### FIX THIS ### 243 | estims["Nearest-n."] = observations[nearest_obs] 244 | ``` 245 | 246 | ```python 247 | # show_answer('nearest neighbour interp') 248 | ``` 249 | 250 | #### Exc: Inverse distance weighting 251 | 252 | Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Inverse_distance_weighting). 253 | *Hint: You can ignore the `errstate` line below. It is just used to "silence warnings" resulting from division by 0 (whose special case is treated in a cell further down).* 254 | 255 | ```python 256 | exponent = 3 257 | with np.errstate(invalid='ignore', divide='ignore'): 258 | weights = np.zeros_like(dists_xy) ### FIX THIS ### 259 | ``` 260 | 261 | ```python 262 | # show_answer('inv-dist weight interp') 263 | ``` 264 | 265 | ```python 266 | # Apply weights 267 | estims["Inv-dist."] = weights @ observations 268 | ``` 269 | 270 | ```python 271 | # Fix singularities 272 | estims["Inv-dist."][obs_idx] = observations 273 | ``` 274 | 275 | #### Exc: Simple Kriging 276 | 277 | Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Kriging#Simple_kriging). 278 | 279 | *Hint: use `sla.solve` or `sla.inv` (less recommended)* 280 | 281 | ```python 282 | ### ANSWER HERE ### 283 | covar_yy = ... 284 | cross_xy = ... 285 | regression_coefficients = weights ### FIX THIS ### -- should be cross_xy / covar_yy 286 | ``` 287 | 288 | ```python 289 | # show_answer('Kriging code') 290 | ``` 291 | 292 | ```python 293 | estims["Kriging"] = regression_coefficients @ observations 294 | ``` 295 | 296 | ### Plot truth, estimates, error 297 | 298 | ```python 299 | fig, axs = freshfig(num="Estimation problem", figsize=(8, 4), squeeze=False, 300 | nrows=2, ncols=len(estims), sharex=True, sharey=True) 301 | 302 | for name, ax1, ax2 in zip(estims, *axs): 303 | ax1.set_title(name) 304 | c1 = contour_plot(ax1, estims[name]) 305 | c2 = contour_plot(ax2, estims[name] - truth, cmap="RdBu") 306 | fig.tight_layout() 307 | fig.subplots_adjust(right=0.85) 308 | cbar = fig.colorbar(c1, cax=fig.add_axes([0.9, 0.15, 0.03, 0.7])) 309 | axs[1, 0].set_ylabel("Errors"); 310 | ``` 311 | 312 | #### Exc: Try different values of `Range` 313 | 314 | - Run code to re-compute Kriging estimate. 315 | - What does setting it to `0.1` cause? What about `100`? 316 | 317 | ```python 318 | @interact(Range=(.01, 40)) 319 | def plot_krieged(Range=1): 320 | vg_params['Range'] = Range 321 | covar_yy = 1 - variogram(dists_yy, **vg_params) 322 | cross_xy = 1 - variogram(dists_xy, **vg_params) 323 | regression_coefficients = sla.solve(covar_yy, cross_xy.T).T 324 | 325 | fig, ax = freshfig(num="Kriging estimates") 326 | c1 = contour_plot(ax, regression_coefficients @ observations) 327 | fig.colorbar(c1); 328 | plt.show() 329 | ``` 330 | 331 | #### Generalizations 332 | 333 | - Unknown mean (Ordinary Kriging) 334 | - Co-Kriging (vector-valued fields) 335 | - Trend surfaces (non-stationarity assumptions) 336 | 337 | ## Summary 338 | 339 | The covariances of random fields can sometimes be described by the autocorrelation function, 340 | or equivalently, the (semi-)variogram. 341 | Covariances form the basis of a family of (geo-)spatial interpolation and approximation 342 | methods known as Kriging, which can also be called/interpreted as 343 | **Radial basis function (RBF) interpolation**, 344 | **Gaussian process regression** (GP) regression. 345 | 346 | - Kriging is derived by minimizing the variance of linear and unbiased estimators. 347 | - RBF interpolation is derived by the explicit desire to fit 348 | N functions to N data points (observations). 349 | - GP regression is derived by conditioning (applying Bayes rule) 350 | to the (supposedly) Gaussian distribution of the random field. 351 | 352 | ### Next: [T7 - Chaos & Lorenz](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb) 353 | -------------------------------------------------------------------------------- /notebooks/scripts/T6 - Geostats & Kriging [optional].py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # cell_metadata_filter: -all 5 | # formats: ipynb,scripts//py:light,scripts//md 6 | # text_representation: 7 | # extension: .py 8 | # format_name: light 9 | # format_version: '1.5' 10 | # jupytext_version: 1.17.2 11 | # kernelspec: 12 | # display_name: Python 3 (ipykernel) 13 | # language: python 14 | # name: python3 15 | # --- 16 | 17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 18 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 19 | 20 | from resources import show_answer, interact 21 | # %matplotlib inline 22 | import numpy as np 23 | import matplotlib.pyplot as plt 24 | import numpy.random as rnd 25 | import scipy.linalg as sla 26 | from mpl_tools.misc import nRowCol 27 | from mpl_tools.place import freshfig 28 | plt.ion(); 29 | 30 | # # T6 - Spatial statistics ("geostatistics") & Kriging 31 | # 32 | # Covariances between two (or a few) variables is very well, 33 | # but if you have not seen it before, the connection between covariances 34 | # and geophysical (spatial) fields may not be obvious. 35 | # The purpose of this tutorial is to familiarise you with random (spatial) fields 36 | # and their estimation. 37 | # $ 38 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}} 39 | # \newcommand{\bvec}[1]{{\mathbf{#1}}} 40 | # $ 41 | # 42 | # Set some parameters 43 | 44 | rnd.seed(3000) 45 | grid1D = np.linspace(0, 1, 21) 46 | N = 15 # ensemble size 47 | 48 | 49 | # ## Variograms 50 | # 51 | # The "Variogram" of a field is essentially `1 - autocovariance`. Thus, it describes the spatial dependence of the field. The mean (1st moment) of a field is usually estimated and described/parametrized with trend lines/surfaces, while higher moments are usually not worth modelling. 52 | 53 | def variogram(dists, Range=1, kind="Gauss", nugget=0): 54 | """Compute variogram for distance points `dists`.""" 55 | dists = dists / Range 56 | if kind == "Spheric": 57 | gamma = 1.5 * dists - .5 * dists**3 58 | gamma[dists >= 1] = 1 59 | elif kind == "Expo": 60 | dists *= 3 # by convention 61 | gamma = 1 - np.exp(-dists) 62 | else: # "Gauss" 63 | dists *= 3 # by convention 64 | gamma = 1 - np.exp(-(dists)**2) 65 | # Include nugget (discontinuity at 0) 66 | gamma *= (1-nugget) 67 | gamma[dists != 0] += nugget 68 | return gamma 69 | 70 | 71 | # #### Plot 72 | 73 | @interact(Range=(.01, 4), nugget=(0.0, 1, .1)) 74 | def plot_variogram(Range=1, nugget=0): 75 | fig, ax = plt.subplots(figsize=(6, 3)) 76 | ax.set_xlim(0, 1) 77 | ax.set_ylim(0, 1) 78 | for i, kind in enumerate(["Spheric", "Expo", "Gauss"]): 79 | gamma = variogram(grid1D, Range, kind, nugget=nugget) 80 | ax.plot(grid1D, gamma, lw=2, color=f"C{i}", label=kind) 81 | ax.legend(loc="upper left") 82 | plt.show() 83 | 84 | 85 | # In order to apply the variogram, we must first compute distances. 86 | # The following is a fairly efficient implementation. 87 | 88 | def dist_euclid(A, B): 89 | """Compute the l2-norm between each point (row) of A and B""" 90 | diff = A[:, None, :] - B 91 | d2 = np.sum(diff**2, axis=-1) 92 | return np.sqrt(d2) 93 | 94 | 95 | # Now the full covariance (matrix) between any sets of points can be defined by the following. 96 | 97 | def covar(coords, **vg_params): 98 | dists = dist_euclid(coords, coords) 99 | return 1 - variogram(dists, **vg_params) 100 | 101 | 102 | fig, ax = freshfig("1D covar") 103 | C = covar(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3) 104 | ax.matshow(C, cmap="RdBu"); 105 | 106 | 107 | # ## Random fields (1D) 108 | # 109 | # Gaussian random variables (vectors) are fully specified by their mean and covariance. 110 | # Once in possession of a covariance matrix, we can use it to sample random variables 111 | # by multiplying its Cholesky factor (square root) onto standard normal variables. 112 | 113 | def gaussian_fields(coords, **vg_params): 114 | """Gen. random (Gaussian) fields at `coords` (no structure/ordering required).""" 115 | C = covar(coords, **vg_params) 116 | L = sla.cholesky(C) 117 | fields = L.T @ rnd.randn(len(L.T), N) 118 | return fields 119 | 120 | 121 | # #### Exc 122 | # 123 | # Use the plotting functionality below to 124 | # explain the effect of `Range` and `nugget` 125 | 126 | fig, ax = freshfig("1D random fields") 127 | fields = gaussian_fields(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3) 128 | ax.plot(grid1D, fields, lw=2); 129 | 130 | # ## Random fields (2D) 131 | # 132 | # The following sets up a 2d grid. 133 | 134 | grid2x, grid2y = np.meshgrid(grid1D, grid1D) 135 | grid2x.shape 136 | 137 | # where `grid2y` has the same shape. However, in the following we will "flatten" (a.k.a."(un)ravel", "vectorize", or "string out") this explicitly 2D grid of nodes into a simple list of points in 2D. 138 | 139 | grid2D = np.dstack([grid2x, grid2y]).reshape((-1, 2)) 140 | grid2D.shape 141 | 142 | # Importantly, none of the following methods actually assume any structure to the list. So we could also work with a completely irregularly spaced set of points. For example, `gaussian_fields` is immediately applicable also to this 2D case. 143 | 144 | vg_params = dict(Range=1, kind="Gauss", nugget=1e-4) 145 | fields = gaussian_fields(grid2D, **vg_params) 146 | 147 | 148 | # Of course, for plotting purposes, we undo the flattening. 149 | 150 | # + 151 | def contour_plot(ax, field, cmap="nipy_spectral", levels=12, has_obs=True): 152 | field = field.reshape(grid2x.shape) # undo flattening 153 | if has_obs: 154 | ax.plot(*obs_coo.T, "ko", ms=4) 155 | ax.plot(*obs_coo.T, "yo", ms=1) 156 | ax.set(aspect="equal", xticks=[0, 1], yticks=[0, 1]) 157 | return ax.contourf(field, levels=levels, extent=(0, 1, 0, 1), 158 | cmap=cmap, vmin=vmin, vmax=vmax) 159 | 160 | # Fix the color scale for all subsequent `contour_plot`. 161 | # Use `None` to re-compute the color scale for each subplot. 162 | vmin = fields.min() 163 | vmax = fields.max() 164 | 165 | # + 166 | fig, axs = freshfig(num="2D random fields", figsize=(5, 4), 167 | nrows=3, ncols=4, sharex=True, sharey=True) 168 | 169 | for ax, field in zip(axs.ravel(), fields.T): 170 | contour_plot(ax, field, has_obs=False) 171 | # - 172 | 173 | # It might be interesting to inspect the covariance matrix in this 2D case. 174 | 175 | C = covar(grid2D, **vg_params) 176 | fig, ax = freshfig("2D covar") 177 | ax.matshow(C, cmap="RdBu", vmin=0, vmax=1); 178 | ax.grid(False) 179 | 180 | # ## Estimation problem 181 | # 182 | # For our estimation target we will use one of the above generated random fields. 183 | 184 | truth = fields.T[0] 185 | 186 | # For the observations, we pick some random grid locations for simplicity 187 | # (even though the methods work also with observations not on grid nodes). 188 | 189 | nObs = 10 190 | obs_idx = rnd.randint(0, len(grid2D), nObs) 191 | obs_coo = grid2D[obs_idx] 192 | observations = truth[obs_idx] 193 | 194 | # ## Spatial interpolant methods 195 | 196 | # Pre-compute re-used objects 197 | dists_yy = dist_euclid(obs_coo, obs_coo) 198 | dists_xy = dist_euclid(grid2D, obs_coo) 199 | 200 | estims = dict(Truth=truth) 201 | vmin=truth.min() 202 | vmax=truth.max() 203 | 204 | # The cells below contain snippets of different spatial interpolation methods, 205 | # followed by a cell that plots the interpolants. 206 | # Complete the code snippets. 207 | # 208 | # #### Exc: Nearest neighbour interpolation 209 | # 210 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation). 211 | 212 | nearest_obs = np.zeros_like(truth, dtype=int) ### FIX THIS ### 213 | estims["Nearest-n."] = observations[nearest_obs] 214 | 215 | # + 216 | # show_answer('nearest neighbour interp') 217 | # - 218 | 219 | # #### Exc: Inverse distance weighting 220 | # 221 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Inverse_distance_weighting). 222 | # *Hint: You can ignore the `errstate` line below. It is just used to "silence warnings" resulting from division by 0 (whose special case is treated in a cell further down).* 223 | 224 | exponent = 3 225 | with np.errstate(invalid='ignore', divide='ignore'): 226 | weights = np.zeros_like(dists_xy) ### FIX THIS ### 227 | 228 | # + 229 | # show_answer('inv-dist weight interp') 230 | # - 231 | 232 | # Apply weights 233 | estims["Inv-dist."] = weights @ observations 234 | 235 | # Fix singularities 236 | estims["Inv-dist."][obs_idx] = observations 237 | 238 | # #### Exc: Simple Kriging 239 | # 240 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Kriging#Simple_kriging). 241 | # 242 | # *Hint: use `sla.solve` or `sla.inv` (less recommended)* 243 | 244 | ### ANSWER HERE ### 245 | covar_yy = ... 246 | cross_xy = ... 247 | regression_coefficients = weights ### FIX THIS ### -- should be cross_xy / covar_yy 248 | 249 | # + 250 | # show_answer('Kriging code') 251 | # - 252 | 253 | estims["Kriging"] = regression_coefficients @ observations 254 | 255 | # ### Plot truth, estimates, error 256 | 257 | # + 258 | fig, axs = freshfig(num="Estimation problem", figsize=(8, 4), squeeze=False, 259 | nrows=2, ncols=len(estims), sharex=True, sharey=True) 260 | 261 | for name, ax1, ax2 in zip(estims, *axs): 262 | ax1.set_title(name) 263 | c1 = contour_plot(ax1, estims[name]) 264 | c2 = contour_plot(ax2, estims[name] - truth, cmap="RdBu") 265 | fig.tight_layout() 266 | fig.subplots_adjust(right=0.85) 267 | cbar = fig.colorbar(c1, cax=fig.add_axes([0.9, 0.15, 0.03, 0.7])) 268 | axs[1, 0].set_ylabel("Errors"); 269 | 270 | 271 | # - 272 | 273 | # #### Exc: Try different values of `Range` 274 | # 275 | # - Run code to re-compute Kriging estimate. 276 | # - What does setting it to `0.1` cause? What about `100`? 277 | 278 | @interact(Range=(.01, 40)) 279 | def plot_krieged(Range=1): 280 | vg_params['Range'] = Range 281 | covar_yy = 1 - variogram(dists_yy, **vg_params) 282 | cross_xy = 1 - variogram(dists_xy, **vg_params) 283 | regression_coefficients = sla.solve(covar_yy, cross_xy.T).T 284 | 285 | fig, ax = freshfig(num="Kriging estimates") 286 | c1 = contour_plot(ax, regression_coefficients @ observations) 287 | fig.colorbar(c1); 288 | plt.show() 289 | 290 | # #### Generalizations 291 | # 292 | # - Unknown mean (Ordinary Kriging) 293 | # - Co-Kriging (vector-valued fields) 294 | # - Trend surfaces (non-stationarity assumptions) 295 | # 296 | # ## Summary 297 | # 298 | # The covariances of random fields can sometimes be described by the autocorrelation function, 299 | # or equivalently, the (semi-)variogram. 300 | # Covariances form the basis of a family of (geo-)spatial interpolation and approximation 301 | # methods known as Kriging, which can also be called/interpreted as 302 | # **Radial basis function (RBF) interpolation**, 303 | # **Gaussian process regression** (GP) regression. 304 | # 305 | # - Kriging is derived by minimizing the variance of linear and unbiased estimators. 306 | # - RBF interpolation is derived by the explicit desire to fit 307 | # N functions to N data points (observations). 308 | # - GP regression is derived by conditioning (applying Bayes rule) 309 | # to the (supposedly) Gaussian distribution of the random field. 310 | # 311 | # ### Next: [T7 - Chaos & Lorenz](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb) 312 | -------------------------------------------------------------------------------- /notebooks/scripts/T7 - Chaos & Lorenz [optional].md: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | formats: ipynb,scripts//py:light,scripts//md 5 | text_representation: 6 | extension: .md 7 | format_name: markdown 8 | format_version: '1.3' 9 | jupytext_version: 1.17.2 10 | kernelspec: 11 | display_name: Python 3 (ipykernel) 12 | language: python 13 | name: python3 14 | --- 15 | 16 | ```python 17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 19 | ``` 20 | 21 | ```python 22 | from resources import show_answer, interact, frame 23 | %matplotlib inline 24 | import numpy as np 25 | import numpy.random as rnd 26 | import matplotlib.pyplot as plt 27 | plt.ion(); 28 | ``` 29 | 30 | # T7 - Chaos & Lorenz 31 | 32 | ***Chaos*** 33 | is also known as the butterfly effect: "a butterfly that flaps its wings in Brazil can 'cause' a hurricane in Texas". 34 | As opposed to the opinions of Descartes/Newton/Laplace, chaos effectively means that even in a deterministic (non-stochastic) universe, we can only predict "so far" into the future. This will be illustrated below using two toy-model dynamical systems made by ***Edward Lorenz***. 35 | $ 36 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 37 | \newcommand{\bvec}[1]{{\mathbf{#1}}} 38 | \newcommand{\xDim}[0]{D} 39 | \newcommand{\x}[0]{\bvec{x}} 40 | $ 41 | 42 | ## Dynamical systems 43 | 44 | Dynamical system are systems (sets of equations) whose variables evolve in time (the equations contains time derivatives). As a branch of mathematics, its theory is mainly concerned with understanding the *behaviour* of solutions (trajectories) of the systems. 45 | 46 | Below is a function to numerically **integrate** 47 | (i.e. step-wise evolve the system forward in time) a set of coupled ODEs. 48 | It relies on `scipy`, but adds some conveniences, 49 | notably taking advantage of Python's `**kwargs` (key-word argument) feature, 50 | to define an internal `dxdt` whose only two arguments are 51 | `x` for the current state, and `t` for time. 52 | 53 | ```python 54 | from scipy.integrate import odeint 55 | from dapper.mods.integration import rk4 56 | dt = 0.01 57 | 58 | def integrate(dxdt, initial_states, final_time, **params): 59 | # Output shape: `(len(initial_states), nTime, len(x))` 60 | dxdt_fixed = lambda x, t: dxdt(x, t, **params) # Fix params 61 | time_steps = np.linspace(0, final_time, 1+int(final_time / dt)) 62 | integrated = [] 63 | ### Replace the following (in the next exercise) ### 64 | for x0 in initial_states: 65 | trajectory = odeint(dxdt_fixed, x0, time_steps) 66 | integrated.append(trajectory) 67 | return np.array(integrated), time_steps 68 | ``` 69 | 70 | In addition, it takes care of looping over `initial_states`, 71 | computing a solution ("phase space trajectory") for each one, 72 | so that we can ask it to compute multiple trajectories at once, 73 | which we call Monte-Carlo simulation, or **ensemble forecasting**. 74 | But *loops are generally slow in Python*. 75 | Fortunately, for simple systems, 76 | we can write our code such that the dynamics get independently (but simultaneously) computed for rows of a *matrix* (rather than a single vector), meaning that each row in the input produces a corresponding row in the output. This in effect leaves `numpy` to do the looping (which it does much quicker than pure Python). 77 | Alternatively, since each simulation is completely independent of another realisation, 78 | they are **"embarrassingly parallelizable"**, which is a good option if the system is very costly to simulate. 79 | The exercise below challenges you to implement the first approach, resulting in much faster visualisation further below. 80 | 81 | #### Exc (optional) -- speed-up by vectorisation & parallelisation 82 | 83 | Replace `odeint` in the code above by `rk4` (which does not care about the size/shape of the input, thereby allowing for matrices, i.e. ensembles). Note that the call signature of `rk4` is similar to `odeint`, except that `time_steps` must be replaced by `t` and `dt`. I.e. it only computes a single time step, `t + dt`, so you must loop over `time_steps` yourself. *Hint: `dxdt(x, t, ...)` generally expect axis-0 (i.e. rows) of `x` to be the dimensions of the state vector -- not independent realisations of the states.* 84 | 85 | ```python 86 | # show_answer('rk4') 87 | ``` 88 | 89 | ## The Lorenz (1963) attractor 90 | 91 | The [Lorenz-63 dynamical system](https://en.wikipedia.org/wiki/Lorenz_system) can be derived as an extreme simplification of *Rayleigh-Bénard convection*: fluid circulation in a shallow layer of fluid uniformly heated (cooled) from below (above). 92 | This produces the following 3 *coupled, nonlinear* ordinary differential equations (ODE): 93 | 94 | $$ 95 | \begin{aligned} 96 | \dot{x} & = \sigma(y-x) \\ 97 | \dot{y} & = \rho x - y - xz \\ 98 | \dot{z} & = -\beta z + xy 99 | \end{aligned} 100 | \tag{1} 101 | $$ 102 | 103 | where the "dot" represents the time derivative, $\frac{d}{dt}$. The state vector is $\x = (x,y,z)$, and the parameters are typically set to $\sigma = 10, \beta=8/3, \rho=28$. The ODEs can be coded as follows (yes, Python supports Unicode, but it might be cumbersome to type out!) 104 | 105 | ```python 106 | def dxdt63(state, time, σ, β, ρ): 107 | x, y, z = state 108 | return np.asarray([σ * (y - x), 109 | x * (ρ - z) - y, 110 | x * y - β * z]) 111 | ``` 112 | 113 | The following illustrated the system. 114 | 115 | ```python 116 | store = ['placeholder'] 117 | @interact( σ=(0.,200), β=(0.,5), ρ=(0.,50), N=(1,100), ε=(0.01,10), Time=(0.,100), zoom=(.1, 4)) 118 | def plot_lorenz63(σ=10, β=8/3, ρ=28 , in3D=True, N=2, ε=0.01, Time=2.0, zoom=1): 119 | rnd.seed(23) 120 | initial_states = [-6.1, 1.2, 32.5] + ε*rnd.randn(N, 3) 121 | trajectories, times = integrate(dxdt63, initial_states, Time, σ=σ, β=β, ρ=ρ) 122 | store[0] = trajectories 123 | if in3D: 124 | ax = plt.figure().add_subplot(111, projection='3d') 125 | for orbit in trajectories: 126 | line, = ax.plot(*(orbit.T), lw=1, alpha=.5) 127 | ax.scatter3D(*orbit[-1], s=40, color=line.get_color()) 128 | ax.axis('off') 129 | frame(trajectories, ax, zoom) 130 | else: 131 | fig, axs = plt.subplots(3, sharex=True, figsize=(5, 4)) 132 | for dim, ax, orbits in zip('xyz', axs, trajectories.T): 133 | start = int(10/dt/zoom) 134 | ax.plot(times[-start:], orbits[-start:], lw=1, alpha=.5) 135 | ax.set_ylabel(dim) 136 | ax.set_xlabel('Time') 137 | plt.show() 138 | ``` 139 | 140 | #### Exc -- Bifurcation hunting 141 | 142 | Classic linear stability analysis involves setting eqn. (1) to zero and considering the eigenvalues (and vectors) of its Jacobian matrix. Here we will go about it mainly by visually inspecting the numerical results of simulations. 143 | Answer the following (to an approximate degree of precision) by gradually increasing $\rho$. 144 | Leave the other model parameters at their defaults, but use `ε`, `N`, `Time` and `zoom` to your advantage. 145 | 146 | - (a) What is the only fixed point for $\rho = 0$? 147 | - (b) At what (larger) value of $\rho$ does this change? 148 | What do you think happened to the original fixed point? 149 | - (c) At what (larger) value of $\rho$ do we see an oscillating (spiraling) motion? 150 | What do you think this entails for the aforementioned eigenvalues? 151 | - (d) Describe the difference in character of the trajectories between $\rho=10$ and $\rho=20$. 152 | - (e) At what (larger) values of $\rho$ do we get chaos? 153 | In other words, when do the trajectories no longer converge to fixed points (or limit cycles)? 154 | - (f) Also try $\rho=144$ (edit the code). What is the nature of the trajectories now? 155 | - (g) *Optional*: Use pen and paper to show that the fixed points of the Lorenz system (1) are 156 | indeed the origin as well as the roots of $x^2=\beta z$ with $y=x$, 157 | but that the latter two only exist for $\rho > 1$. 158 | 159 | In conclusion, while a dynamical system naturally depends on its parameter values (almost by definition), the way in which its behaviour/character depend on it could come as a surprise. 160 | 161 | ```python 162 | # show_answer("Bifurcations63") 163 | ``` 164 | 165 | #### Exc -- Doubling time 166 | 167 | Re-run the animation cell to get default parameter values. 168 | Visually investigate the system's (i.e. the trajectories') **sensitivity to initial conditions** by moving `Time`, `N` and `ε`. What do you reckon is the "doubling time" of the perturbations? I.e. how long do you think it takes (on average) for two trajectories to grow twice as far apart as they started (alternatives: 0.03, 0.3, 3, 30)? What are the implications for any prediction/forecasting we might attempt? 169 | 170 | ```python 171 | # show_answer('Guesstimate 63') 172 | ``` 173 | 174 | ### Averages 175 | 176 | The result actually depends on where in "phase space" the particles started. For example, predictability in the Lorenz system is much shorter when the state is near the center, where the trajectories diverge into the two wings of the butterfly. So to get a universal answer one must average these experiments for many different initial conditions. 177 | Alternatively, since the above system is [ergodic](https://en.wikipedia.org/wiki/Ergodic_theory#Ergodic_theorems), we could also average a single experiment over a very, very long time, obtaining the same statistics (assuming they have converged). Though not strictly implied, ergodicity is closely related to chaos. It means that 178 | 179 | - A trajectory/orbit never quite repeats (the orbit is aperiodic). 180 | - The tracks of the orbits are sufficiently "dense" that they define a manifold 181 | (something that looks like a surface, such as the butterfly wings above, 182 | and for which we can speak of properties like derivatives and fractal dimension). 183 | - Every part (of positive measure) of the manifold can be reached from any other. 184 | - There is a probability density for the manifold, 185 | quantifying the relative amount of time (of an infinite amount) 186 | that the system spends in that neighbourhood. 187 | 188 | Set `N` and `Time` in the above interactive animation to their upper bounds (might take long to run!). 189 | Execute the code cell below. 190 | Do you think the samples behind the histograms are drawn from the same distribution? 191 | In other words, is the Lorenz system ergodic? 192 | 193 | ```python 194 | @interact() 195 | def histograms(): 196 | fig, axs = plt.subplots(ncols=3, sharey=True, figsize=(9, 3)) 197 | def hist(ax, sample, lbl): 198 | ax.hist(sample, density=1, bins=20, label=lbl, alpha=.5) 199 | 200 | trajectories63 = store[0] 201 | for i, (ax, lbl) in enumerate(zip(axs, "xyz")): 202 | hist(ax, trajectories63[:, -1, i], "at final time") 203 | hist(ax, trajectories63[-1, ::int(.2/dt), i], "of final member") 204 | ax.set_title(f"Component {lbl}") 205 | plt.legend(); 206 | ``` 207 | 208 | The long-run distribution of a system may be called its **climatology**. 209 | A somewhat rudimentary weather forecasting initialisation (i.e. DA) technique, 210 | called **optimal interpolation**, 211 | consists in using the climatology as the prior (as opposed to yesterday's forecast) 212 | when applying Bayes' rule (in its [Gaussian guise](T3%20-%20Bayesian%20inference.ipynb#Linear-Gaussian-Bayes'-rule-(1D))) to the observations of the day. 213 | 214 | ## The Lorenz-96 model 215 | 216 | Lorenz-96 is a "spatially 1D" dynamical system of an astoundingly simple design that resemble atmospheric convection, 217 | including nonlinear terms and chaoticity. 218 | Each state variable $\x_i$ can be considered some atmospheric quantity at grid point at a fixed latitude of Earth. The system 219 | is given by the coupled set of ODEs, 220 | $$ 221 | \frac{d \x_i}{dt} = (\x_{i+1} − \x_{i-2}) \x_{i-1} − \x_i + F 222 | \,, 223 | \quad \quad i \in \{1,\ldots,\xDim\} 224 | \,, 225 | $$ 226 | where the subscript indices apply periodically. 227 | 228 | This model is not derived from physics but has similar characteristics, such as 229 |
    230 |
  • there is external forcing, determined by a parameter $F$;
  • 231 |
  • there is internal dissipation, emulated by the linear term;
  • 232 |
  • there is energy-conserving advection, emulated by quadratic terms.
  • 233 |
234 | 235 | [Further description in the very readable original article](https://www.ecmwf.int/sites/default/files/elibrary/1995/75462-predictability-problem-partly-solved_0.pdf). 236 | 237 | **Exc (optional) -- Conservation of energy:** Show that the "total energy" $\sum_{i=1}^{\xDim} \x_i^2$ is preserved by the quadratic terms in the ODE. 238 | *Hint: consider its time derivative.* 239 | 240 | ```python 241 | # show_answer("Lorenz energy") 242 | ``` 243 | 244 | The model is animated below. 245 | 246 | ```python 247 | def s(vector, n): 248 | return np.roll(vector, -n) 249 | 250 | def dxdt96(x, time, Force): 251 | return (s(x, 1) - s(x, -2)) * s(x, -1) - x + Force 252 | 253 | ylims = -10, 20 254 | ``` 255 | 256 | ```python 257 | store = ["placeholder"] 258 | @interact( xDim=(4,60,1), N=(1,30), Force=(0,15.), ε=(0.01,3,0.1), Time=(0.05,90,0.04)) 259 | def plot_lorenz96(xDim=40, N=2, Force=8, ε=0.01, Time=3): 260 | rnd.seed(23) 261 | initial_states = np.zeros((N, xDim)) 262 | initial_states[:, 0] = ε*(10 + rnd.randn(N)) 263 | trajectories, times = integrate(dxdt96, initial_states, Time, Force=Force) 264 | store[0] = trajectories 265 | 266 | plt.figure(figsize=(7, 4)) 267 | plt.plot(np.arange(xDim), trajectories[:, -1].T) 268 | plt.ylim(-10, 20) 269 | plt.show() 270 | ``` 271 | 272 | #### Exc -- Bifurcation hunting 96 273 | 274 | Investigate by moving the sliders (but keep `xDim=40`): Under which settings of the force `F` 275 | 276 | - Do the solutions tend to the steady state $\x_i = F$ for all $i$ ? 277 | - Are the solutions periodic? 278 | - Is the system chaotic (i.e., the solutions are extremely sensitive to initial conditions, 279 | meaning that the predictability horizon is finite) ? 280 | 281 | *PS: another way to visualise spatially 1D systems (or cross-sections) over time is the [Hovmöller diagram](https://en.wikipedia.org/wiki/Hovm%C3%B6ller_diagram), here represented for 1 realisation of the simulations.* 282 | 283 | ```python 284 | @interact() 285 | def Hovmoller(): 286 | plt.contourf(store[0][0], cmap="viridis", vmin=ylims[0], vmax=ylims[1]) 287 | plt.colorbar(); 288 | plt.show() 289 | ``` 290 | 291 | ```python 292 | # show_answer('Bifurcations96', 'a') 293 | ``` 294 | 295 | #### Exc (optional) -- Doubling time 296 | 297 | Maximise `N` (for a large sample), minimise `ε` (to approach linear conditions) and set `Time=1` (a reasonable first guess). Compute a rough estimate of the doubling time in the cell below from the data in `store[0]`, which holds the trajectories, and has shape `(N, len(times))`. 298 | *Hint: The theory for these questions will be described in further detail in the following section.* 299 | 300 | ```python 301 | # show_answer("doubling time") 302 | ``` 303 | 304 | ## The double pendulum 305 | 306 | The [double pendulum](https://en.wikipedia.org/wiki/Double_pendulum) is another classic example of a chaotic system. 307 | It is a little longer to implement, so we'll just load it from [DAPPER](https://github.com/nansencenter/DAPPER/blob/master/dapper/mods/DoublePendulum/__init__.py). 308 | Unlike the Lorenz systems, the divergence of its "$f$" flow field is 0, 309 | so it is conservative, and all of the trajectories preserve their initial energy 310 | (except for what friction our numerical integration causes). 311 | Therefore it does not strictly speaking possess an attractor 312 | nor is it ergodic (but some things might be said upon restriction to the set of initial conditions with equal energy levels?) 313 | 314 | ```python 315 | from numpy import cos, sin, pi 316 | from dapper.mods.DoublePendulum import L1, L2, x0, dxdt 317 | def x012(x): return (0 , L1*sin(x[0]) , L1*sin(x[0]) + L2*sin(x[2])) 318 | def y012(x): return (0, -L1*cos(x[0]), -L1*cos(x[0]) - L2*cos(x[2])) 319 | 320 | x0 = [.9*pi, 0, 0, 0] # Angular pos1, vel1, pos2, vel2 321 | initial_states = x0 + 0.01*np.random.randn(20, 4) 322 | trajectories, times = integrate(lambda x, t: dxdt(x), initial_states, 10) 323 | 324 | @interact(k=(0, len(times)-1, 4), N=(1, len(initial_states))) 325 | def plot_pendulum2(k=1, N=2): 326 | fig, ax = plt.subplots() 327 | ax.set(xlim=(-2, 2), ylim=(-2, 2), aspect="equal") 328 | for x in trajectories[:N, k]: 329 | ax.plot(x012(x), y012(x), '-o') 330 | plt.show() 331 | ``` 332 | 333 | ## Error/perturbation dynamics 334 | 335 | **Exc (optional) -- Perturbation ODE:** Suppose $x(t)$ and $z(t)$ are "twins": they evolve according to the same law $f$: 336 | $$ 337 | \begin{align} 338 | \frac{dx}{dt} &= f(x) \\ 339 | \frac{dz}{dt} &= f(z) \,. 340 | \end{align} 341 | $$ 342 | 343 | Define the "error": $\varepsilon(t) = x(t) - z(t)$. 344 | Suppose $z(0)$ is close to $x(0)$. 345 | Let $F = \frac{df}{dx}(x(t))$. 346 | 347 | - (a) Show that the error evolves according to the ordinary differential equation (ODE) 348 | $$\frac{d \varepsilon}{dt} \approx F \varepsilon \,.$$ 349 | 350 | ```python 351 | # show_answer("error evolution") 352 | ``` 353 | 354 | - (b) Suppose $F$ is constant. Show that the error grows exponentially: $\varepsilon(t) = \varepsilon(0) e^{F t} $. 355 | 356 | ```python 357 | # show_answer("anti-deriv") 358 | ``` 359 | 360 | - (c) 361 | - (1) Suppose $F<0$. 362 | What happens to the error? 363 | What does this mean for predictability? 364 | - (2) Now suppose $F>0$. 365 | Given that all observations are uncertain (i.e. $R_t>0$, if only ever so slightly), 366 | can we ever hope to estimate $x(t)$ with 0 uncertainty? 367 | 368 | ```python 369 | # show_answer("predictability cases") 370 | ``` 371 | 372 | - (d) What is the doubling time of the error? 373 | 374 | ```python 375 | # show_answer("doubling time, Lyapunov") 376 | ``` 377 | 378 | - (e) Consider the ODE derived above. 379 | How might we change it in order to model (i.e. emulate) a saturation of the error at some level? 380 | Can you solve this equation? 381 | 382 | ```python 383 | # show_answer("saturation term") 384 | ``` 385 | 386 | - (f) Now suppose $z(t)$ evolves according to $\frac{dz}{dt} = g(z)$, with $g \neq f$. 387 | What is now the differential equation governing the evolution of the error, $\varepsilon$? 388 | 389 | ```python 390 | # show_answer("linear growth") 391 | ``` 392 | 393 | ## Summary 394 | 395 | Prediction (forecasting) with these systems is challenging because they are chaotic: 396 | small errors grow exponentially. 397 | Therefore there is a limit to how far into the future we can make predictions (skillfully). 398 | Therefore it is crucial to minimize the initial error as much as possible. 399 | This is a task of DA (filtering). 400 | 401 | Also see this [book on chaos and predictability](https://kuiper2000.github.io/chaos_and_predictability/intro.html). 402 | 403 | ### Next: [T8 - Monte-Carlo & ensembles](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) 404 | -------------------------------------------------------------------------------- /notebooks/scripts/T7 - Chaos & Lorenz [optional].py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py:light,scripts//md 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.17.2 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact, frame 20 | # %matplotlib inline 21 | import numpy as np 22 | import numpy.random as rnd 23 | import matplotlib.pyplot as plt 24 | plt.ion(); 25 | 26 | # # T7 - Chaos & Lorenz 27 | # 28 | # ***Chaos*** 29 | # is also known as the butterfly effect: "a butterfly that flaps its wings in Brazil can 'cause' a hurricane in Texas". 30 | # As opposed to the opinions of Descartes/Newton/Laplace, chaos effectively means that even in a deterministic (non-stochastic) universe, we can only predict "so far" into the future. This will be illustrated below using two toy-model dynamical systems made by ***Edward Lorenz***. 31 | # $ 32 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}} 33 | # \newcommand{\bvec}[1]{{\mathbf{#1}}} 34 | # \newcommand{\xDim}[0]{D} 35 | # \newcommand{\x}[0]{\bvec{x}} 36 | # $ 37 | # 38 | # ## Dynamical systems 39 | # 40 | # Dynamical system are systems (sets of equations) whose variables evolve in time (the equations contains time derivatives). As a branch of mathematics, its theory is mainly concerned with understanding the *behaviour* of solutions (trajectories) of the systems. 41 | # 42 | # Below is a function to numerically **integrate** 43 | # (i.e. step-wise evolve the system forward in time) a set of coupled ODEs. 44 | # It relies on `scipy`, but adds some conveniences, 45 | # notably taking advantage of Python's `**kwargs` (key-word argument) feature, 46 | # to define an internal `dxdt` whose only two arguments are 47 | # `x` for the current state, and `t` for time. 48 | 49 | # + 50 | from scipy.integrate import odeint 51 | from dapper.mods.integration import rk4 52 | dt = 0.01 53 | 54 | def integrate(dxdt, initial_states, final_time, **params): 55 | # Output shape: `(len(initial_states), nTime, len(x))` 56 | dxdt_fixed = lambda x, t: dxdt(x, t, **params) # Fix params 57 | time_steps = np.linspace(0, final_time, 1+int(final_time / dt)) 58 | integrated = [] 59 | ### Replace the following (in the next exercise) ### 60 | for x0 in initial_states: 61 | trajectory = odeint(dxdt_fixed, x0, time_steps) 62 | integrated.append(trajectory) 63 | return np.array(integrated), time_steps 64 | 65 | 66 | # - 67 | 68 | # In addition, it takes care of looping over `initial_states`, 69 | # computing a solution ("phase space trajectory") for each one, 70 | # so that we can ask it to compute multiple trajectories at once, 71 | # which we call Monte-Carlo simulation, or **ensemble forecasting**. 72 | # But *loops are generally slow in Python*. 73 | # Fortunately, for simple systems, 74 | # we can write our code such that the dynamics get independently (but simultaneously) computed for rows of a *matrix* (rather than a single vector), meaning that each row in the input produces a corresponding row in the output. This in effect leaves `numpy` to do the looping (which it does much quicker than pure Python). 75 | # Alternatively, since each simulation is completely independent of another realisation, 76 | # they are **"embarrassingly parallelizable"**, which is a good option if the system is very costly to simulate. 77 | # The exercise below challenges you to implement the first approach, resulting in much faster visualisation further below. 78 | # 79 | # #### Exc (optional) -- speed-up by vectorisation & parallelisation 80 | # 81 | # Replace `odeint` in the code above by `rk4` (which does not care about the size/shape of the input, thereby allowing for matrices, i.e. ensembles). Note that the call signature of `rk4` is similar to `odeint`, except that `time_steps` must be replaced by `t` and `dt`. I.e. it only computes a single time step, `t + dt`, so you must loop over `time_steps` yourself. *Hint: `dxdt(x, t, ...)` generally expect axis-0 (i.e. rows) of `x` to be the dimensions of the state vector -- not independent realisations of the states.* 82 | 83 | # + 84 | # show_answer('rk4') 85 | # - 86 | 87 | # ## The Lorenz (1963) attractor 88 | # 89 | # The [Lorenz-63 dynamical system](https://en.wikipedia.org/wiki/Lorenz_system) can be derived as an extreme simplification of *Rayleigh-Bénard convection*: fluid circulation in a shallow layer of fluid uniformly heated (cooled) from below (above). 90 | # This produces the following 3 *coupled, nonlinear* ordinary differential equations (ODE): 91 | # 92 | # $$ 93 | # \begin{aligned} 94 | # \dot{x} & = \sigma(y-x) \\ 95 | # \dot{y} & = \rho x - y - xz \\ 96 | # \dot{z} & = -\beta z + xy 97 | # \end{aligned} 98 | # \tag{1} 99 | # $$ 100 | # 101 | # where the "dot" represents the time derivative, $\frac{d}{dt}$. The state vector is $\x = (x,y,z)$, and the parameters are typically set to $\sigma = 10, \beta=8/3, \rho=28$. The ODEs can be coded as follows (yes, Python supports Unicode, but it might be cumbersome to type out!) 102 | 103 | def dxdt63(state, time, σ, β, ρ): 104 | x, y, z = state 105 | return np.asarray([σ * (y - x), 106 | x * (ρ - z) - y, 107 | x * y - β * z]) 108 | 109 | 110 | # The following illustrated the system. 111 | 112 | store = ['placeholder'] 113 | @interact( σ=(0.,200), β=(0.,5), ρ=(0.,50), N=(1,100), ε=(0.01,10), Time=(0.,100), zoom=(.1, 4)) 114 | def plot_lorenz63(σ=10, β=8/3, ρ=28 , in3D=True, N=2, ε=0.01, Time=2.0, zoom=1): 115 | rnd.seed(23) 116 | initial_states = [-6.1, 1.2, 32.5] + ε*rnd.randn(N, 3) 117 | trajectories, times = integrate(dxdt63, initial_states, Time, σ=σ, β=β, ρ=ρ) 118 | store[0] = trajectories 119 | if in3D: 120 | ax = plt.figure().add_subplot(111, projection='3d') 121 | for orbit in trajectories: 122 | line, = ax.plot(*(orbit.T), lw=1, alpha=.5) 123 | ax.scatter3D(*orbit[-1], s=40, color=line.get_color()) 124 | ax.axis('off') 125 | frame(trajectories, ax, zoom) 126 | else: 127 | fig, axs = plt.subplots(3, sharex=True, figsize=(5, 4)) 128 | for dim, ax, orbits in zip('xyz', axs, trajectories.T): 129 | start = int(10/dt/zoom) 130 | ax.plot(times[-start:], orbits[-start:], lw=1, alpha=.5) 131 | ax.set_ylabel(dim) 132 | ax.set_xlabel('Time') 133 | plt.show() 134 | 135 | 136 | # #### Exc -- Bifurcation hunting 137 | # 138 | # Classic linear stability analysis involves setting eqn. (1) to zero and considering the eigenvalues (and vectors) of its Jacobian matrix. Here we will go about it mainly by visually inspecting the numerical results of simulations. 139 | # Answer the following (to an approximate degree of precision) by gradually increasing $\rho$. 140 | # Leave the other model parameters at their defaults, but use `ε`, `N`, `Time` and `zoom` to your advantage. 141 | # 142 | # - (a) What is the only fixed point for $\rho = 0$? 143 | # - (b) At what (larger) value of $\rho$ does this change? 144 | # What do you think happened to the original fixed point? 145 | # - (c) At what (larger) value of $\rho$ do we see an oscillating (spiraling) motion? 146 | # What do you think this entails for the aforementioned eigenvalues? 147 | # - (d) Describe the difference in character of the trajectories between $\rho=10$ and $\rho=20$. 148 | # - (e) At what (larger) values of $\rho$ do we get chaos? 149 | # In other words, when do the trajectories no longer converge to fixed points (or limit cycles)? 150 | # - (f) Also try $\rho=144$ (edit the code). What is the nature of the trajectories now? 151 | # - (g) *Optional*: Use pen and paper to show that the fixed points of the Lorenz system (1) are 152 | # indeed the origin as well as the roots of $x^2=\beta z$ with $y=x$, 153 | # but that the latter two only exist for $\rho > 1$. 154 | # 155 | # In conclusion, while a dynamical system naturally depends on its parameter values (almost by definition), the way in which its behaviour/character depend on it could come as a surprise. 156 | 157 | # + 158 | # show_answer("Bifurcations63") 159 | # - 160 | 161 | # #### Exc -- Doubling time 162 | # 163 | # Re-run the animation cell to get default parameter values. 164 | # Visually investigate the system's (i.e. the trajectories') **sensitivity to initial conditions** by moving `Time`, `N` and `ε`. What do you reckon is the "doubling time" of the perturbations? I.e. how long do you think it takes (on average) for two trajectories to grow twice as far apart as they started (alternatives: 0.03, 0.3, 3, 30)? What are the implications for any prediction/forecasting we might attempt? 165 | 166 | # + 167 | # show_answer('Guesstimate 63') 168 | # - 169 | 170 | # ### Averages 171 | # 172 | # The result actually depends on where in "phase space" the particles started. For example, predictability in the Lorenz system is much shorter when the state is near the center, where the trajectories diverge into the two wings of the butterfly. So to get a universal answer one must average these experiments for many different initial conditions. 173 | # Alternatively, since the above system is [ergodic](https://en.wikipedia.org/wiki/Ergodic_theory#Ergodic_theorems), we could also average a single experiment over a very, very long time, obtaining the same statistics (assuming they have converged). Though not strictly implied, ergodicity is closely related to chaos. It means that 174 | # 175 | # - A trajectory/orbit never quite repeats (the orbit is aperiodic). 176 | # - The tracks of the orbits are sufficiently "dense" that they define a manifold 177 | # (something that looks like a surface, such as the butterfly wings above, 178 | # and for which we can speak of properties like derivatives and fractal dimension). 179 | # - Every part (of positive measure) of the manifold can be reached from any other. 180 | # - There is a probability density for the manifold, 181 | # quantifying the relative amount of time (of an infinite amount) 182 | # that the system spends in that neighbourhood. 183 | # 184 | # Set `N` and `Time` in the above interactive animation to their upper bounds (might take long to run!). 185 | # Execute the code cell below. 186 | # Do you think the samples behind the histograms are drawn from the same distribution? 187 | # In other words, is the Lorenz system ergodic? 188 | 189 | @interact() 190 | def histograms(): 191 | fig, axs = plt.subplots(ncols=3, sharey=True, figsize=(9, 3)) 192 | def hist(ax, sample, lbl): 193 | ax.hist(sample, density=1, bins=20, label=lbl, alpha=.5) 194 | 195 | trajectories63 = store[0] 196 | for i, (ax, lbl) in enumerate(zip(axs, "xyz")): 197 | hist(ax, trajectories63[:, -1, i], "at final time") 198 | hist(ax, trajectories63[-1, ::int(.2/dt), i], "of final member") 199 | ax.set_title(f"Component {lbl}") 200 | plt.legend(); 201 | 202 | 203 | # The long-run distribution of a system may be called its **climatology**. 204 | # A somewhat rudimentary weather forecasting initialisation (i.e. DA) technique, 205 | # called **optimal interpolation**, 206 | # consists in using the climatology as the prior (as opposed to yesterday's forecast) 207 | # when applying Bayes' rule (in its [Gaussian guise](T3%20-%20Bayesian%20inference.ipynb#Linear-Gaussian-Bayes'-rule-(1D))) to the observations of the day. 208 | # 209 | # ## The Lorenz-96 model 210 | # 211 | # Lorenz-96 is a "spatially 1D" dynamical system of an astoundingly simple design that resemble atmospheric convection, 212 | # including nonlinear terms and chaoticity. 213 | # Each state variable $\x_i$ can be considered some atmospheric quantity at grid point at a fixed latitude of Earth. The system 214 | # is given by the coupled set of ODEs, 215 | # $$ 216 | # \frac{d \x_i}{dt} = (\x_{i+1} − \x_{i-2}) \x_{i-1} − \x_i + F 217 | # \,, 218 | # \quad \quad i \in \{1,\ldots,\xDim\} 219 | # \,, 220 | # $$ 221 | # where the subscript indices apply periodically. 222 | # 223 | # This model is not derived from physics but has similar characteristics, such as 224 | #
    225 | #
  • there is external forcing, determined by a parameter $F$;
  • 226 | #
  • there is internal dissipation, emulated by the linear term;
  • 227 | #
  • there is energy-conserving advection, emulated by quadratic terms.
  • 228 | #
229 | # 230 | # [Further description in the very readable original article](https://www.ecmwf.int/sites/default/files/elibrary/1995/75462-predictability-problem-partly-solved_0.pdf). 231 | # 232 | # **Exc (optional) -- Conservation of energy:** Show that the "total energy" $\sum_{i=1}^{\xDim} \x_i^2$ is preserved by the quadratic terms in the ODE. 233 | # *Hint: consider its time derivative.* 234 | 235 | # + 236 | # show_answer("Lorenz energy") 237 | # - 238 | 239 | # The model is animated below. 240 | 241 | # + 242 | def s(vector, n): 243 | return np.roll(vector, -n) 244 | 245 | def dxdt96(x, time, Force): 246 | return (s(x, 1) - s(x, -2)) * s(x, -1) - x + Force 247 | 248 | ylims = -10, 20 249 | # - 250 | 251 | store = ["placeholder"] 252 | @interact( xDim=(4,60,1), N=(1,30), Force=(0,15.), ε=(0.01,3,0.1), Time=(0.05,90,0.04)) 253 | def plot_lorenz96(xDim=40, N=2, Force=8, ε=0.01, Time=3): 254 | rnd.seed(23) 255 | initial_states = np.zeros((N, xDim)) 256 | initial_states[:, 0] = ε*(10 + rnd.randn(N)) 257 | trajectories, times = integrate(dxdt96, initial_states, Time, Force=Force) 258 | store[0] = trajectories 259 | 260 | plt.figure(figsize=(7, 4)) 261 | plt.plot(np.arange(xDim), trajectories[:, -1].T) 262 | plt.ylim(-10, 20) 263 | plt.show() 264 | 265 | 266 | # #### Exc -- Bifurcation hunting 96 267 | # 268 | # Investigate by moving the sliders (but keep `xDim=40`): Under which settings of the force `F` 269 | # 270 | # - Do the solutions tend to the steady state $\x_i = F$ for all $i$ ? 271 | # - Are the solutions periodic? 272 | # - Is the system chaotic (i.e., the solutions are extremely sensitive to initial conditions, 273 | # meaning that the predictability horizon is finite) ? 274 | # 275 | # *PS: another way to visualise spatially 1D systems (or cross-sections) over time is the [Hovmöller diagram](https://en.wikipedia.org/wiki/Hovm%C3%B6ller_diagram), here represented for 1 realisation of the simulations.* 276 | 277 | @interact() 278 | def Hovmoller(): 279 | plt.contourf(store[0][0], cmap="viridis", vmin=ylims[0], vmax=ylims[1]) 280 | plt.colorbar(); 281 | plt.show() 282 | 283 | 284 | # + 285 | # show_answer('Bifurcations96', 'a') 286 | # - 287 | 288 | # #### Exc (optional) -- Doubling time 289 | # 290 | # Maximise `N` (for a large sample), minimise `ε` (to approach linear conditions) and set `Time=1` (a reasonable first guess). Compute a rough estimate of the doubling time in the cell below from the data in `store[0]`, which holds the trajectories, and has shape `(N, len(times))`. 291 | # *Hint: The theory for these questions will be described in further detail in the following section.* 292 | 293 | # + 294 | # show_answer("doubling time") 295 | # - 296 | 297 | # ## The double pendulum 298 | # 299 | # The [double pendulum](https://en.wikipedia.org/wiki/Double_pendulum) is another classic example of a chaotic system. 300 | # It is a little longer to implement, so we'll just load it from [DAPPER](https://github.com/nansencenter/DAPPER/blob/master/dapper/mods/DoublePendulum/__init__.py). 301 | # Unlike the Lorenz systems, the divergence of its "$f$" flow field is 0, 302 | # so it is conservative, and all of the trajectories preserve their initial energy 303 | # (except for what friction our numerical integration causes). 304 | # Therefore it does not strictly speaking possess an attractor 305 | # nor is it ergodic (but some things might be said upon restriction to the set of initial conditions with equal energy levels?) 306 | 307 | # + 308 | from numpy import cos, sin, pi 309 | from dapper.mods.DoublePendulum import L1, L2, x0, dxdt 310 | def x012(x): return (0 , L1*sin(x[0]) , L1*sin(x[0]) + L2*sin(x[2])) 311 | def y012(x): return (0, -L1*cos(x[0]), -L1*cos(x[0]) - L2*cos(x[2])) 312 | 313 | x0 = [.9*pi, 0, 0, 0] # Angular pos1, vel1, pos2, vel2 314 | initial_states = x0 + 0.01*np.random.randn(20, 4) 315 | trajectories, times = integrate(lambda x, t: dxdt(x), initial_states, 10) 316 | 317 | @interact(k=(0, len(times)-1, 4), N=(1, len(initial_states))) 318 | def plot_pendulum2(k=1, N=2): 319 | fig, ax = plt.subplots() 320 | ax.set(xlim=(-2, 2), ylim=(-2, 2), aspect="equal") 321 | for x in trajectories[:N, k]: 322 | ax.plot(x012(x), y012(x), '-o') 323 | plt.show() 324 | # - 325 | 326 | # ## Error/perturbation dynamics 327 | # 328 | # **Exc (optional) -- Perturbation ODE:** Suppose $x(t)$ and $z(t)$ are "twins": they evolve according to the same law $f$: 329 | # $$ 330 | # \begin{align} 331 | # \frac{dx}{dt} &= f(x) \\ 332 | # \frac{dz}{dt} &= f(z) \,. 333 | # \end{align} 334 | # $$ 335 | # 336 | # Define the "error": $\varepsilon(t) = x(t) - z(t)$. 337 | # Suppose $z(0)$ is close to $x(0)$. 338 | # Let $F = \frac{df}{dx}(x(t))$. 339 | # 340 | # - (a) Show that the error evolves according to the ordinary differential equation (ODE) 341 | # $$\frac{d \varepsilon}{dt} \approx F \varepsilon \,.$$ 342 | 343 | # + 344 | # show_answer("error evolution") 345 | # - 346 | 347 | # - (b) Suppose $F$ is constant. Show that the error grows exponentially: $\varepsilon(t) = \varepsilon(0) e^{F t} $. 348 | 349 | # + 350 | # show_answer("anti-deriv") 351 | # - 352 | 353 | # - (c) 354 | # - (1) Suppose $F<0$. 355 | # What happens to the error? 356 | # What does this mean for predictability? 357 | # - (2) Now suppose $F>0$. 358 | # Given that all observations are uncertain (i.e. $R_t>0$, if only ever so slightly), 359 | # can we ever hope to estimate $x(t)$ with 0 uncertainty? 360 | 361 | # + 362 | # show_answer("predictability cases") 363 | # - 364 | 365 | # - (d) What is the doubling time of the error? 366 | 367 | # + 368 | # show_answer("doubling time, Lyapunov") 369 | # - 370 | 371 | # - (e) Consider the ODE derived above. 372 | # How might we change it in order to model (i.e. emulate) a saturation of the error at some level? 373 | # Can you solve this equation? 374 | 375 | # + 376 | # show_answer("saturation term") 377 | # - 378 | 379 | # - (f) Now suppose $z(t)$ evolves according to $\frac{dz}{dt} = g(z)$, with $g \neq f$. 380 | # What is now the differential equation governing the evolution of the error, $\varepsilon$? 381 | 382 | # + 383 | # show_answer("linear growth") 384 | # - 385 | 386 | # ## Summary 387 | # 388 | # Prediction (forecasting) with these systems is challenging because they are chaotic: 389 | # small errors grow exponentially. 390 | # Therefore there is a limit to how far into the future we can make predictions (skillfully). 391 | # Therefore it is crucial to minimize the initial error as much as possible. 392 | # This is a task of DA (filtering). 393 | # 394 | # Also see this [book on chaos and predictability](https://kuiper2000.github.io/chaos_and_predictability/intro.html). 395 | # 396 | # ### Next: [T8 - Monte-Carlo & ensembles](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) 397 | -------------------------------------------------------------------------------- /notebooks/scripts/T8 - Monte-Carlo & ensembles.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | formats: ipynb,scripts//py:light,scripts//md 5 | text_representation: 6 | extension: .md 7 | format_name: markdown 8 | format_version: '1.3' 9 | jupytext_version: 1.17.2 10 | kernelspec: 11 | display_name: Python 3 (ipykernel) 12 | language: python 13 | name: python3 14 | --- 15 | 16 | ```python 17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 19 | ``` 20 | 21 | ```python 22 | from resources import show_answer, interact, import_from_nb 23 | %matplotlib inline 24 | import numpy as np 25 | import matplotlib as mpl 26 | import scipy.stats as ss 27 | import numpy.random as rnd 28 | import matplotlib.pyplot as plt 29 | from scipy.stats import gaussian_kde 30 | plt.ion(); 31 | ``` 32 | 33 | ```python 34 | (pdf_G1, grid1d) = import_from_nb("T2", ("pdf_G1", "grid1d")) 35 | ``` 36 | 37 | # T8 - The ensemble (Monte-Carlo) approach 38 | **Monte-Carlo methods** are a class of computational algorithms that rely on random/stochastic sampling. They generally trade off higher (though random!) error for lower technical complexity [[1]](#Footnote-1:). Examples from optimisation include randomly choosing search directions, swarms, evolutionary mutations, or perturbations for gradient approximation. Another application area is the computation of (deterministic) integrals via sample averages, which is rooted in the fact that any integral can be formulated as expectations, as well as the law of large numbers (LLN). This is actually a surprisingly large class of problems, including for example a way to [approximate the value of $\pi$](https://en.wikipedia.org/wiki/Monte_Carlo_method#Overview). Moreover, many integrals of interest are inherently expectations, but over probability distributions that are not tractable, as they arise from a complicated random or uncertain process [[2]](#Footnote-2:), whereas a Monte-Carlo sample thereof can be obtained simply by simulating the process. 39 | $ 40 | \newcommand{\Expect}[0]{\mathbb{E}} 41 | \newcommand{\NormDist}{\mathscr{N}} 42 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 43 | \newcommand{\bvec}[1]{{\mathbf{#1}}} 44 | \newcommand{\trsign}{{\mathsf{T}}} 45 | \newcommand{\tr}{^{\trsign}} 46 | \newcommand{\ceq}[0]{\mathrel{≔}} 47 | \newcommand{\xDim}[0]{D} 48 | \newcommand{\I}[0]{\mat{I}} 49 | \newcommand{\X}[0]{\mat{X}} 50 | \newcommand{\Y}[0]{\mat{Y}} 51 | \newcommand{\E}[0]{\mat{E}} 52 | \newcommand{\x}[0]{\bvec{x}} 53 | \newcommand{\y}[0]{\bvec{y}} 54 | \newcommand{\z}[0]{\bvec{z}} 55 | \newcommand{\bx}[0]{\bvec{\bar{x}}} 56 | \newcommand{\by}[0]{\bvec{\bar{y}}} 57 | \newcommand{\barC}[0]{\mat{\bar{C}}} 58 | \newcommand{\ones}[0]{\bvec{1}} 59 | \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 60 | $ 61 | 62 | **An ensemble** is an *i.i.d.* sample. I.e. a set of "members" ("particles", "realizations", or "sample points") that have been drawn ("sampled") independently from the same distribution. With the EnKF, these assumptions are generally tenuous, but pragmatic. 63 | In particular, an ensemble can be used to characterize uncertainty: either by using it to compute (estimate) *statistics* thereof, such as the mean, median, variance, covariance, skewness, confidence intervals, etc (any function of the ensemble can be seen as a "statistic"), or by using it to reconstruct the distribution/density from which it is sampled. The latter is illustrated by the plot below. 64 | 65 | Take a moment to digest its code. Note: 66 | 67 | - The sample/ensemble is plotted as thin narrow lines. 68 | Note that it is generated via `randn`, which samples from $\NormDist(0, 1)$. 69 | - The "Parametric" density estimate is defined by estimating the mean and the variance, 70 | and using those estimates to define a Gaussian density (with those parameters). 71 | - We will not detail the KDE method, but it can be considered as a "continuous" version of a histogram. 72 | 73 | ```python 74 | mu = 0 75 | sigma2 = 25 76 | N = 80 77 | 78 | @interact( seed=(1, 10), nbins=(2, 60), bw=(0.1, 1)) 79 | def pdf_reconstructions(seed=5, nbins=10, bw=.3): 80 | rnd.seed(seed) 81 | E = mu + np.sqrt(sigma2)*rnd.randn(N) 82 | 83 | fig, ax = plt.subplots() 84 | ax.plot(grid1d, pdf_G1(grid1d, mu, sigma2), lw=5, label="True") 85 | ax.plot(E, np.zeros(N), '|k', ms=100, mew=.4, label="_raw ens") 86 | ax.hist(E, nbins, density=1, alpha=.7, color="C5", label="Histogram") 87 | ax.plot(grid1d, pdf_G1(grid1d, np.mean(E), np.var(E)), lw=5, label="Parametric") 88 | ax.plot(grid1d, gaussian_kde(E.ravel(), bw**2).evaluate(grid1d), lw=5, label="KDE") 89 | ax.set_ylim(top=(3*sigma2)**-.5) 90 | ax.legend() 91 | plt.show() 92 | ``` 93 | 94 | **Exc -- A matter of taste?:** 95 | - Which approximation to the true pdf looks better? 96 | - Which approximation starts with more information? 97 | What is the downside of making such assumptions? 98 | - What value of `bw` causes the "KDE" method to most closely 99 | reproduce/recover the "Parametric" method? 100 | What about the "Histogram" method? 101 | *PS: we might say that the KDE method "bridges" the other two.*. 102 | 103 | 104 | The widget above illustrated how to estimate or reconstruct a distribution on the basis of a sample. But for the EnKF, we also need to know how to go the other way: drawing a sample from a (multivariate) Gaussian distribution... 105 | 106 | **Exc -- Multivariate Gaussian sampling:** 107 | Suppose $\z$ is a standard Gaussian, 108 | i.e. $p(\z) = \NormDist(\z \mid \bvec{0},\I_{\xDim})$, 109 | where $\I_{\xDim}$ is the $\xDim$-dimensional identity matrix. 110 | Let $\x = \mat{L}\z + \mu$. 111 | 112 | * (a -- optional). Refer to the exercise on [change of variables](T2%20-%20Gaussian%20distribution.ipynb#Exc-(optional)----Change-of-variables) to show that $p(\x) = \NormDist(\x \mid \mu, \mat{C})$, where $\mat{C} = \mat{L}^{}\mat{L}^T$. 113 | * (b). The code below samples $N = 100$ realizations of $\x$ 114 | and collects them in an ${\xDim}$-by-$N$ "ensemble matrix" $\E$. 115 | But `for` loops are slow in plain Python (and Matlab). 116 | Replace it with something akin to `E = mu + L@Z`. 117 | *Hint: this code snippet fails because it's trying to add a vector to a matrix.* 118 | 119 | ```python 120 | mu = np.array([1, 100, 5]) 121 | xDim = len(mu) 122 | L = np.diag(1+np.arange(xDim)) 123 | C = L @ L.T 124 | Z = rnd.randn(xDim, N) 125 | 126 | # Using a loop ("slow") 127 | E = np.zeros((xDim, N)) 128 | for n in range(N): 129 | E[:, n] = mu + L@Z[:, n] 130 | ``` 131 | 132 | ```python 133 | # show_answer('Gaussian sampling', 'b') 134 | ``` 135 | 136 | The following prints some numbers that can be used to ascertain if you got it right. 137 | Note that the estimates will never be exact: 138 | they contain some amount of random error, a.k.a. ***sampling error***. 139 | 140 | ```python 141 | with np.printoptions(precision=1, suppress=True): 142 | print("Estimated mean =", np.mean(E, axis=1)) 143 | print("Estimated cov =", np.cov(E), sep="\n") 144 | ``` 145 | 146 | **Exc -- Moment estimation code:** Above, we used numpy's (`np`) functions to compute the sample-estimated mean and covariance matrix, 147 | $\bx$ and $\barC$, 148 | from the ensemble matrix $\E$. 149 | Now, instead, implement these estimators yourself: 150 | $$\begin{align}\bx &\ceq \frac{1}{N} \sum_{n=1}^N \x_n \,, \\ 151 | \barC &\ceq \frac{1}{N-1} \sum_{n=1}^N (\x_n - \bx) (\x_n - \bx)^T \,. \end{align}$$ 152 | 153 | ```python 154 | # Don't use numpy's mean, cov, but feel free to use a `for` loop. 155 | def estimate_mean_and_cov(E): 156 | xDim, N = E.shape 157 | 158 | ### FIX THIS ### 159 | x_bar = np.zeros(xDim) 160 | C_bar = np.zeros((xDim, xDim)) 161 | 162 | return x_bar, C_bar 163 | 164 | x_bar, C_bar = estimate_mean_and_cov(E) 165 | with np.printoptions(precision=1): 166 | print("Mean =", x_bar) 167 | print("Covar =", C_bar, sep="\n") 168 | ``` 169 | 170 | ```python 171 | # show_answer('ensemble moments, loop') 172 | ``` 173 | 174 | **Exc -- An obsession?:** Why do we normalize by $(N-1)$ for the covariance computation? 175 | 176 | ```python 177 | # show_answer('Why (N-1)') 178 | ``` 179 | 180 | It can be shown that the above estimators for the mean and the covariance are *consistent and unbiased*. 181 | ***Consistent*** means that if we let $N \rightarrow \infty$, their sampling error will vanish ("almost surely"). 182 | ***Unbiased*** means that if we repeat the estimation experiment many times (but use a fixed, finite $N$), 183 | then the average of sampling errors will also vanish. 184 | Under relatively mild regularity conditions, the [absence of bias implies consistency](https://en.wikipedia.org/wiki/Consistent_estimator#Bias_versus_consistency). 185 | 186 | 187 | The following computes a large number ($K$) of $\barC$ and $1/\barC$, estimated with a given ensemble size ($N$). 188 | Note that the true variance is $C = 1$. 189 | The histograms of the estimates is plotted, along with vertical lines displaying the mean values. 190 | 191 | ```python 192 | K = 10000 193 | @interact(N=(2, 30), bottom=True) 194 | def var_and_precision_estimates(N=4): 195 | E = rnd.randn(K, N) 196 | estims = np.var(E, ddof=1, axis=-1) 197 | bins = np.linspace(0, 6, 40) 198 | plt.figure() 199 | plt.hist(estims, bins, alpha=.6, density=1) 200 | plt.hist(1/estims, bins, alpha=.6, density=1) 201 | plt.axvline(np.mean(estims), color="C0", label="C") 202 | plt.axvline(np.mean(1/estims), color="C1", label="1/C") 203 | plt.legend() 204 | plt.show() 205 | ``` 206 | 207 | **Exc -- There's bias, and then there's bias:** 208 | - Note that $1/\barC$ does not appear to be an unbiased estimate of $1/C = 1$. 209 | Explain this by referring to a well-known property of the expectation, $\Expect$. 210 | In view of this, consider the role and utility of "unbiasedness" in estimation. 211 | - What, roughly, is the dependence of the mean values (vertical lines) on the ensemble size? 212 | What do they tend to as $N$ goes to $0$? 213 | What about $+\infty$ ? 214 | - Optional: What are the theoretical distributions of $\barC$ and $1/\barC$ ? 215 | 216 | ```python 217 | # show_answer('variance estimate statistics') 218 | ``` 219 | 220 | **Exc (optional) -- Error notions:** 221 | * (a). What's the difference between error and residual? 222 | * (b). What's the difference between error and bias? 223 | * (c). Show that `"mean-square-error" (RMSE^2) = Bias^2 + Var`. 224 | *Hint: Let $e = \hat{\theta} - \theta$ be the random "error" referred to above. 225 | Express each term using the expectation $\Expect$.* 226 | 227 | ```python 228 | # show_answer('errors') 229 | ``` 230 | 231 | **Exc -- Vectorization:** Python (numpy) is quicker if you "vectorize" loops (similar to Matlab and other high-level languages). 232 | This is eminently possible with computations of ensemble moments: 233 | Let $\X \ceq 234 | \begin{bmatrix} 235 | \x_1 -\bx, & \ldots & \x_N -\bx 236 | \end{bmatrix} \,.$ 237 | * (a). Show that $\X = \E \AN$, where $\ones$ is the column vector of length $N$ with all elements equal to $1$. 238 | *Hint: consider column $n$ of $\X$.* 239 | *PS: it can be shown that $\ones \ones\tr / N$ and its complement is a "projection matrix".* 240 | * (b). Show that $\barC = \X \X^T /(N-1)$. 241 | * (c). Code up this, latest, formula for $\barC$ and insert it in `estimate_mean_and_cov(E)` 242 | 243 | ```python 244 | # show_answer('ensemble moments vectorized') 245 | ``` 246 | 247 | **Exc -- Moment estimation code, part 2:** The cross-covariance between two random vectors, $\bx$ and $\by$, is given by 248 | $$\begin{align} 249 | \barC_{\x,\y} 250 | &\ceq \frac{1}{N-1} \sum_{n=1}^N 251 | (\x_n - \bx) (\y_n - \by)^T \\\ 252 | &= \X \Y^T /(N-1) 253 | \end{align}$$ 254 | where $\Y$ is, similar to $\X$, the matrix whose columns are $\y_n - \by$ for $n=1,\ldots,N$. 255 | Note that this is simply the covariance formula, but for two different variables. 256 | I.e. if $\Y = \X$, then $\barC_{\x,\y} = \barC_{\x}$ (which we have denoted $\barC$ in the above). 257 | 258 | Implement the cross-covariance estimator in the code-cell below. 259 | 260 | ```python 261 | def estimate_cross_cov(Ex, Ey): 262 | Cxy = np.zeros((len(Ex), len(Ey))) ### INSERT ANSWER ### 263 | return Cxy 264 | ``` 265 | 266 | ```python 267 | # show_answer('estimate cross') 268 | ``` 269 | 270 | ## Summary 271 | Parametric assumptions (e.g. assuming Gaussianity) can be useful in approximating distributions. 272 | Sample covariance estimates can be expressed and computed in a vectorized form. 273 | 274 | ### Next: [T9 - Writing your own EnKF](T9%20-%20Writing%20your%20own%20EnKF.ipynb) 275 | 276 | - - - 277 | 278 | - ###### Footnote 1: 279 | 280 | Essentially its (pseudo) randomness means that it is easy to avoid biases. 281 | For example, the Monte-Carlo approach is particularly useful 282 | when grid-based quadrature is difficult, as is often the case for high-dimensional problems. 283 | A common misconception in DA is that MC is somehow more efficient 284 | than deterministic quadrature in high dimensions, $D$. 285 | The confusion arises because, from Chebyshev inequality, we know that 286 | the error of the MC approximation asymptotically converges to zero at a rate proportional to $1/\sqrt{N}$, 287 | while that of quadrature methods typically converges proportional to $1 / N^{1/D}$. 288 | But not only is the coefficient dependent on $D$ (and worse for MC), 289 | also (conjecture!) for any $D$ and $N$ you can always find a gridding strategy that has lower error. 290 | For example, quasi-random (latin hypercube, etc) are easily recommended 291 | in the pure context of hypercube integrals. 292 | - ###### Footnote 2: 293 | 294 | The derivation of the corresponding density might involve 295 | high-dimensional Jacobians for the change-of-variables formula, 296 | or its generalisation for non-bijective transformations, 297 | or to the Chapman-Kolmogorov equations in the case of interacting random variables, 298 | or its time-continuous form of Fokker-Planck. 299 | 300 | 301 | 302 | ### References 303 | -------------------------------------------------------------------------------- /notebooks/scripts/T8 - Monte-Carlo & ensembles.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py:light,scripts//md 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.17.2 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact, import_from_nb 20 | # %matplotlib inline 21 | import numpy as np 22 | import matplotlib as mpl 23 | import scipy.stats as ss 24 | import numpy.random as rnd 25 | import matplotlib.pyplot as plt 26 | from scipy.stats import gaussian_kde 27 | plt.ion(); 28 | 29 | (pdf_G1, grid1d) = import_from_nb("T2", ("pdf_G1", "grid1d")) 30 | 31 | # # T8 - The ensemble (Monte-Carlo) approach 32 | # **Monte-Carlo methods** are a class of computational algorithms that rely on random/stochastic sampling. They generally trade off higher (though random!) error for lower technical complexity [[1]](#Footnote-1:). Examples from optimisation include randomly choosing search directions, swarms, evolutionary mutations, or perturbations for gradient approximation. Another application area is the computation of (deterministic) integrals via sample averages, which is rooted in the fact that any integral can be formulated as expectations, as well as the law of large numbers (LLN). This is actually a surprisingly large class of problems, including for example a way to [approximate the value of $\pi$](https://en.wikipedia.org/wiki/Monte_Carlo_method#Overview). Moreover, many integrals of interest are inherently expectations, but over probability distributions that are not tractable, as they arise from a complicated random or uncertain process [[2]](#Footnote-2:), whereas a Monte-Carlo sample thereof can be obtained simply by simulating the process. 33 | # $ 34 | # \newcommand{\Expect}[0]{\mathbb{E}} 35 | # \newcommand{\NormDist}{\mathscr{N}} 36 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}} 37 | # \newcommand{\bvec}[1]{{\mathbf{#1}}} 38 | # \newcommand{\trsign}{{\mathsf{T}}} 39 | # \newcommand{\tr}{^{\trsign}} 40 | # \newcommand{\ceq}[0]{\mathrel{≔}} 41 | # \newcommand{\xDim}[0]{D} 42 | # \newcommand{\I}[0]{\mat{I}} 43 | # \newcommand{\X}[0]{\mat{X}} 44 | # \newcommand{\Y}[0]{\mat{Y}} 45 | # \newcommand{\E}[0]{\mat{E}} 46 | # \newcommand{\x}[0]{\bvec{x}} 47 | # \newcommand{\y}[0]{\bvec{y}} 48 | # \newcommand{\z}[0]{\bvec{z}} 49 | # \newcommand{\bx}[0]{\bvec{\bar{x}}} 50 | # \newcommand{\by}[0]{\bvec{\bar{y}}} 51 | # \newcommand{\barC}[0]{\mat{\bar{C}}} 52 | # \newcommand{\ones}[0]{\bvec{1}} 53 | # \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 54 | # $ 55 | # 56 | # **An ensemble** is an *i.i.d.* sample. I.e. a set of "members" ("particles", "realizations", or "sample points") that have been drawn ("sampled") independently from the same distribution. With the EnKF, these assumptions are generally tenuous, but pragmatic. 57 | # In particular, an ensemble can be used to characterize uncertainty: either by using it to compute (estimate) *statistics* thereof, such as the mean, median, variance, covariance, skewness, confidence intervals, etc (any function of the ensemble can be seen as a "statistic"), or by using it to reconstruct the distribution/density from which it is sampled. The latter is illustrated by the plot below. 58 | # 59 | # Take a moment to digest its code. Note: 60 | # 61 | # - The sample/ensemble is plotted as thin narrow lines. 62 | # Note that it is generated via `randn`, which samples from $\NormDist(0, 1)$. 63 | # - The "Parametric" density estimate is defined by estimating the mean and the variance, 64 | # and using those estimates to define a Gaussian density (with those parameters). 65 | # - We will not detail the KDE method, but it can be considered as a "continuous" version of a histogram. 66 | 67 | # + 68 | mu = 0 69 | sigma2 = 25 70 | N = 80 71 | 72 | @interact( seed=(1, 10), nbins=(2, 60), bw=(0.1, 1)) 73 | def pdf_reconstructions(seed=5, nbins=10, bw=.3): 74 | rnd.seed(seed) 75 | E = mu + np.sqrt(sigma2)*rnd.randn(N) 76 | 77 | fig, ax = plt.subplots() 78 | ax.plot(grid1d, pdf_G1(grid1d, mu, sigma2), lw=5, label="True") 79 | ax.plot(E, np.zeros(N), '|k', ms=100, mew=.4, label="_raw ens") 80 | ax.hist(E, nbins, density=1, alpha=.7, color="C5", label="Histogram") 81 | ax.plot(grid1d, pdf_G1(grid1d, np.mean(E), np.var(E)), lw=5, label="Parametric") 82 | ax.plot(grid1d, gaussian_kde(E.ravel(), bw**2).evaluate(grid1d), lw=5, label="KDE") 83 | ax.set_ylim(top=(3*sigma2)**-.5) 84 | ax.legend() 85 | plt.show() 86 | 87 | 88 | # - 89 | 90 | # **Exc -- A matter of taste?:** 91 | # - Which approximation to the true pdf looks better? 92 | # - Which approximation starts with more information? 93 | # What is the downside of making such assumptions? 94 | # - What value of `bw` causes the "KDE" method to most closely 95 | # reproduce/recover the "Parametric" method? 96 | # What about the "Histogram" method? 97 | # *PS: we might say that the KDE method "bridges" the other two.*. 98 | 99 | # The widget above illustrated how to estimate or reconstruct a distribution on the basis of a sample. But for the EnKF, we also need to know how to go the other way: drawing a sample from a (multivariate) Gaussian distribution... 100 | # 101 | # **Exc -- Multivariate Gaussian sampling:** 102 | # Suppose $\z$ is a standard Gaussian, 103 | # i.e. $p(\z) = \NormDist(\z \mid \bvec{0},\I_{\xDim})$, 104 | # where $\I_{\xDim}$ is the $\xDim$-dimensional identity matrix. 105 | # Let $\x = \mat{L}\z + \mu$. 106 | # 107 | # * (a -- optional). Refer to the exercise on [change of variables](T2%20-%20Gaussian%20distribution.ipynb#Exc-(optional)----Change-of-variables) to show that $p(\x) = \NormDist(\x \mid \mu, \mat{C})$, where $\mat{C} = \mat{L}^{}\mat{L}^T$. 108 | # * (b). The code below samples $N = 100$ realizations of $\x$ 109 | # and collects them in an ${\xDim}$-by-$N$ "ensemble matrix" $\E$. 110 | # But `for` loops are slow in plain Python (and Matlab). 111 | # Replace it with something akin to `E = mu + L@Z`. 112 | # *Hint: this code snippet fails because it's trying to add a vector to a matrix.* 113 | 114 | # + 115 | mu = np.array([1, 100, 5]) 116 | xDim = len(mu) 117 | L = np.diag(1+np.arange(xDim)) 118 | C = L @ L.T 119 | Z = rnd.randn(xDim, N) 120 | 121 | # Using a loop ("slow") 122 | E = np.zeros((xDim, N)) 123 | for n in range(N): 124 | E[:, n] = mu + L@Z[:, n] 125 | 126 | # + 127 | # show_answer('Gaussian sampling', 'b') 128 | # - 129 | 130 | # The following prints some numbers that can be used to ascertain if you got it right. 131 | # Note that the estimates will never be exact: 132 | # they contain some amount of random error, a.k.a. ***sampling error***. 133 | 134 | with np.printoptions(precision=1, suppress=True): 135 | print("Estimated mean =", np.mean(E, axis=1)) 136 | print("Estimated cov =", np.cov(E), sep="\n") 137 | 138 | 139 | # **Exc -- Moment estimation code:** Above, we used numpy's (`np`) functions to compute the sample-estimated mean and covariance matrix, 140 | # $\bx$ and $\barC$, 141 | # from the ensemble matrix $\E$. 142 | # Now, instead, implement these estimators yourself: 143 | # $$\begin{align}\bx &\ceq \frac{1}{N} \sum_{n=1}^N \x_n \,, \\ 144 | # \barC &\ceq \frac{1}{N-1} \sum_{n=1}^N (\x_n - \bx) (\x_n - \bx)^T \,. \end{align}$$ 145 | 146 | # + 147 | # Don't use numpy's mean, cov, but feel free to use a `for` loop. 148 | def estimate_mean_and_cov(E): 149 | xDim, N = E.shape 150 | 151 | ### FIX THIS ### 152 | x_bar = np.zeros(xDim) 153 | C_bar = np.zeros((xDim, xDim)) 154 | 155 | return x_bar, C_bar 156 | 157 | x_bar, C_bar = estimate_mean_and_cov(E) 158 | with np.printoptions(precision=1): 159 | print("Mean =", x_bar) 160 | print("Covar =", C_bar, sep="\n") 161 | 162 | # + 163 | # show_answer('ensemble moments, loop') 164 | # - 165 | 166 | # **Exc -- An obsession?:** Why do we normalize by $(N-1)$ for the covariance computation? 167 | 168 | # + 169 | # show_answer('Why (N-1)') 170 | # - 171 | 172 | # It can be shown that the above estimators for the mean and the covariance are *consistent and unbiased*. 173 | # ***Consistent*** means that if we let $N \rightarrow \infty$, their sampling error will vanish ("almost surely"). 174 | # ***Unbiased*** means that if we repeat the estimation experiment many times (but use a fixed, finite $N$), 175 | # then the average of sampling errors will also vanish. 176 | # Under relatively mild regularity conditions, the [absence of bias implies consistency](https://en.wikipedia.org/wiki/Consistent_estimator#Bias_versus_consistency). 177 | 178 | # The following computes a large number ($K$) of $\barC$ and $1/\barC$, estimated with a given ensemble size ($N$). 179 | # Note that the true variance is $C = 1$. 180 | # The histograms of the estimates is plotted, along with vertical lines displaying the mean values. 181 | 182 | K = 10000 183 | @interact(N=(2, 30), bottom=True) 184 | def var_and_precision_estimates(N=4): 185 | E = rnd.randn(K, N) 186 | estims = np.var(E, ddof=1, axis=-1) 187 | bins = np.linspace(0, 6, 40) 188 | plt.figure() 189 | plt.hist(estims, bins, alpha=.6, density=1) 190 | plt.hist(1/estims, bins, alpha=.6, density=1) 191 | plt.axvline(np.mean(estims), color="C0", label="C") 192 | plt.axvline(np.mean(1/estims), color="C1", label="1/C") 193 | plt.legend() 194 | plt.show() 195 | 196 | 197 | # **Exc -- There's bias, and then there's bias:** 198 | # - Note that $1/\barC$ does not appear to be an unbiased estimate of $1/C = 1$. 199 | # Explain this by referring to a well-known property of the expectation, $\Expect$. 200 | # In view of this, consider the role and utility of "unbiasedness" in estimation. 201 | # - What, roughly, is the dependence of the mean values (vertical lines) on the ensemble size? 202 | # What do they tend to as $N$ goes to $0$? 203 | # What about $+\infty$ ? 204 | # - Optional: What are the theoretical distributions of $\barC$ and $1/\barC$ ? 205 | 206 | # + 207 | # show_answer('variance estimate statistics') 208 | # - 209 | 210 | # **Exc (optional) -- Error notions:** 211 | # * (a). What's the difference between error and residual? 212 | # * (b). What's the difference between error and bias? 213 | # * (c). Show that `"mean-square-error" (RMSE^2) = Bias^2 + Var`. 214 | # *Hint: Let $e = \hat{\theta} - \theta$ be the random "error" referred to above. 215 | # Express each term using the expectation $\Expect$.* 216 | 217 | # + 218 | # show_answer('errors') 219 | # - 220 | 221 | # **Exc -- Vectorization:** Python (numpy) is quicker if you "vectorize" loops (similar to Matlab and other high-level languages). 222 | # This is eminently possible with computations of ensemble moments: 223 | # Let $\X \ceq 224 | # \begin{bmatrix} 225 | # \x_1 -\bx, & \ldots & \x_N -\bx 226 | # \end{bmatrix} \,.$ 227 | # * (a). Show that $\X = \E \AN$, where $\ones$ is the column vector of length $N$ with all elements equal to $1$. 228 | # *Hint: consider column $n$ of $\X$.* 229 | # *PS: it can be shown that $\ones \ones\tr / N$ and its complement is a "projection matrix".* 230 | # * (b). Show that $\barC = \X \X^T /(N-1)$. 231 | # * (c). Code up this, latest, formula for $\barC$ and insert it in `estimate_mean_and_cov(E)` 232 | 233 | # + 234 | # show_answer('ensemble moments vectorized') 235 | # - 236 | 237 | # **Exc -- Moment estimation code, part 2:** The cross-covariance between two random vectors, $\bx$ and $\by$, is given by 238 | # $$\begin{align} 239 | # \barC_{\x,\y} 240 | # &\ceq \frac{1}{N-1} \sum_{n=1}^N 241 | # (\x_n - \bx) (\y_n - \by)^T \\\ 242 | # &= \X \Y^T /(N-1) 243 | # \end{align}$$ 244 | # where $\Y$ is, similar to $\X$, the matrix whose columns are $\y_n - \by$ for $n=1,\ldots,N$. 245 | # Note that this is simply the covariance formula, but for two different variables. 246 | # I.e. if $\Y = \X$, then $\barC_{\x,\y} = \barC_{\x}$ (which we have denoted $\barC$ in the above). 247 | # 248 | # Implement the cross-covariance estimator in the code-cell below. 249 | 250 | def estimate_cross_cov(Ex, Ey): 251 | Cxy = np.zeros((len(Ex), len(Ey))) ### INSERT ANSWER ### 252 | return Cxy 253 | 254 | # + 255 | # show_answer('estimate cross') 256 | # - 257 | 258 | # ## Summary 259 | # Parametric assumptions (e.g. assuming Gaussianity) can be useful in approximating distributions. 260 | # Sample covariance estimates can be expressed and computed in a vectorized form. 261 | # 262 | # ### Next: [T9 - Writing your own EnKF](T9%20-%20Writing%20your%20own%20EnKF.ipynb) 263 | # 264 | # - - - 265 | # 266 | # - ###### Footnote 1: 267 | # 268 | # Essentially its (pseudo) randomness means that it is easy to avoid biases. 269 | # For example, the Monte-Carlo approach is particularly useful 270 | # when grid-based quadrature is difficult, as is often the case for high-dimensional problems. 271 | # A common misconception in DA is that MC is somehow more efficient 272 | # than deterministic quadrature in high dimensions, $D$. 273 | # The confusion arises because, from Chebyshev inequality, we know that 274 | # the error of the MC approximation asymptotically converges to zero at a rate proportional to $1/\sqrt{N}$, 275 | # while that of quadrature methods typically converges proportional to $1 / N^{1/D}$. 276 | # But not only is the coefficient dependent on $D$ (and worse for MC), 277 | # also (conjecture!) for any $D$ and $N$ you can always find a gridding strategy that has lower error. 278 | # For example, quasi-random (latin hypercube, etc) are easily recommended 279 | # in the pure context of hypercube integrals. 280 | # - ###### Footnote 2: 281 | # 282 | # The derivation of the corresponding density might involve 283 | # high-dimensional Jacobians for the change-of-variables formula, 284 | # or its generalisation for non-bijective transformations, 285 | # or to the Chapman-Kolmogorov equations in the case of interacting random variables, 286 | # or its time-continuous form of Fokker-Planck. 287 | # 288 | # 289 | # 290 | # ### References 291 | -------------------------------------------------------------------------------- /notebooks/scripts/T9 - Writing your own EnKF.md: -------------------------------------------------------------------------------- 1 | --- 2 | jupyter: 3 | jupytext: 4 | formats: ipynb,scripts//py:light,scripts//md 5 | text_representation: 6 | extension: .md 7 | format_name: markdown 8 | format_version: '1.3' 9 | jupytext_version: 1.17.2 10 | kernelspec: 11 | display_name: Python 3 (ipykernel) 12 | language: python 13 | name: python3 14 | --- 15 | 16 | ```python 17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 19 | ``` 20 | 21 | ```python 22 | from resources import show_answer, EnKF_animation 23 | import numpy as np 24 | import matplotlib as mpl 25 | import numpy.random as rnd 26 | import matplotlib.pyplot as plt 27 | from tqdm.auto import tqdm 28 | plt.ion(); 29 | ``` 30 | 31 | # T9 - Writing your own EnKF 32 | In this tutorial we're going to code an EnKF implementation using numpy. 33 | As with the KF, the EnKF consists of the recursive application of 34 | a forecast step and an analysis step. 35 | $ 36 | \newcommand{\Reals}{\mathbb{R}} 37 | \newcommand{\Expect}[0]{\mathbb{E}} 38 | \newcommand{\NormDist}{\mathscr{N}} 39 | \newcommand{\DynMod}[0]{\mathscr{M}} 40 | \newcommand{\ObsMod}[0]{\mathscr{H}} 41 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 42 | \newcommand{\bvec}[1]{{\mathbf{#1}}} 43 | \newcommand{\trsign}{{\mathsf{T}}} 44 | \newcommand{\tr}{^{\trsign}} 45 | \newcommand{\ceq}[0]{\mathrel{≔}} 46 | \newcommand{\xDim}[0]{D} 47 | \newcommand{\supa}[0]{^\text{a}} 48 | \newcommand{\supf}[0]{^\text{f}} 49 | \newcommand{\I}[0]{\mat{I}} 50 | \newcommand{\K}[0]{\mat{K}} 51 | \newcommand{\bP}[0]{\mat{P}} 52 | \newcommand{\bH}[0]{\mat{H}} 53 | \newcommand{\R}[0]{\mat{R}} 54 | \newcommand{\Q}[0]{\mat{Q}} 55 | \newcommand{\Ri}[0]{\R^{-1}} 56 | \newcommand{\X}[0]{\mat{X}} 57 | \newcommand{\Y}[0]{\mat{Y}} 58 | \newcommand{\E}[0]{\mat{E}} 59 | \newcommand{\x}[0]{\bvec{x}} 60 | \newcommand{\y}[0]{\bvec{y}} 61 | \newcommand{\q}[0]{\bvec{q}} 62 | \newcommand{\r}[0]{\bvec{r}} 63 | \newcommand{\bx}[0]{\bvec{\bar{x}}} 64 | \newcommand{\by}[0]{\bvec{\bar{y}}} 65 | \newcommand{\barP}[0]{\mat{\bar{P}}} 66 | \newcommand{\barK}[0]{\mat{\bar{K}}} 67 | \newcommand{\D}[0]{\mat{D}} 68 | \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} 69 | \newcommand{\ones}[0]{\bvec{1}} 70 | $ 71 | 72 | 73 | This presentation follows the traditional template, presenting the EnKF as the "the Monte Carlo version of the KF 74 | where the state covariance is estimated by the ensemble covariance". 75 | It is not obvious that this postulated method should work; 76 | indeed, it is only justified upon inspection of its properties, 77 | deferred to later. 78 | 79 | 80 | NB: 81 | Since we're going to focus on a single filtering cycle (at a time), 82 | the subscript $k$ is dropped. Moreover,
83 | The superscript $f$ indicates that $\{\x_n\supf\}_{n=1..N}$ is the forecast (prior) ensemble.
84 | The superscript $a$ indicates that $\{\x_n\supa\}_{n=1..N}$ is the analysis (posterior) ensemble. 85 |
86 | 87 | ### The forecast step 88 | Suppose $\{\x_n\supa\}_{n=1..N}$ is an iid. sample from $p(\x_{k-1} \mid \y_1,\ldots, \y_{k-1})$, which may or may not be Gaussian. 89 | 90 | The forecast step of the EnKF consists of a Monte Carlo simulation 91 | of the forecast dynamics for each $\x_n$: 92 | $$ 93 | \forall n, \quad \x\supf_n = \DynMod(\x_n\supa) + \q_n \,, \\ 94 | $$ 95 | where $\{\q_n\}_{n=1..N}$ are sampled iid. from $\NormDist(\bvec{0},\Q)$, 96 | or whatever noise model is assumed, 97 | and $\DynMod$ is the model dynamics. 98 | The dynamics could consist of *any* function, i.e. the EnKF can be applied with nonlinear models. 99 | 100 | The ensemble, $\{\x_n\supf\}_{n=1..N}$, is then an iid. sample from the forecast pdf, 101 | $p(\x_k \mid \y_1,\ldots,\y_{k-1})$. This follows from the definition of the latter, so it is a relatively trivial idea and way to obtain this pdf. However, before Monte-Carlo methods were computationally feasible, the computation of the forecast pdf required computing the [Chapman-Kolmogorov equation](https://en.wikipedia.org/wiki/Chapman%E2%80%93Kolmogorov_equation), which constituted a major hurdle for filtering methods. 102 | 103 | ### The analysis update step 104 | of the ensemble is given by: 105 | $$\begin{align} 106 | \forall n, \quad \x\supa_n &= \x_n\supf + \barK \left\{\y - \r_n - \ObsMod(\x_n\supf) \right\} 107 | \,, \\ 108 | \text{or,}\quad 109 | \E\supa &= \E\supf + \barK \left\{\y\ones\tr - \Dobs - \ObsMod(\E\supf) \right\} \,, 110 | \tag{4} 111 | \end{align} 112 | $$ 113 | where the "observation perturbations", $\r_n$, are sampled iid. from the observation noise model, e.g. $\NormDist(\bvec{0},\R)$, 114 | and form the columns of $\Dobs$, 115 | and the observation operator (again, any type of function), $\ObsMod$, is applied column-wise to $\E\supf$. 116 | 117 | The gain $\barK$ is defined by inserting the ensemble estimates for 118 | * (i) $\bP\supf \bH\tr$: the cross-covariance between $\x\supf$ and $\ObsMod(\x\supf)$, and 119 | * (ii) $\bH \bP\supf \bH\tr$: the covariance matrix of $\ObsMod(\x\supf)$, 120 | 121 | in the formula for $\K$, namely eqn. (K1) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb). 122 | Using the estimators from [T8](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) yields 123 | 124 | $$\begin{align} 125 | \barK &= \X \Y\tr ( \Y \Y\tr + (N{-}1) \R )^{-1} \,, \tag{5a} 126 | \end{align} 127 | $$ 128 | 129 | where $\Y \in \Reals^{P \times N}$ 130 | is the centered, *observed* ensemble 131 | $\Y \ceq 132 | \begin{bmatrix} 133 | \y_1 -\by, & \ldots & \y_n -\by, & \ldots & \y_N -\by 134 | \end{bmatrix} \,,$ where $\y_n = \ObsMod(\x_n\supf)$. 135 | 136 | The EnKF is summarized in the animation below. 137 | 138 | ```python 139 | EnKF_animation() 140 | ``` 141 | 142 | #### Exc -- Woodbury for the ensemble subspace 143 | (a) Use the Woodbury identity (C2) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb) to show that eqn. (5a) can also be written 144 | $$\begin{align} 145 | \barK &= \X ( \Y\tr \Ri \Y + (N{-}1)\I_N )^{-1} \Y\tr \Ri \,. \tag{5b} 146 | \end{align} 147 | $$ 148 | (b) What is the potential benefit of (5b) vs. (5a) ? 149 | 150 | 151 | #### Exc -- KG workings 152 | The above animation assumed that the observation operator is just the identity matrix, $\I$, rather than a general observation operator, $\ObsMod()$. Meanwhile, the Kalman gain used by the EnKF, eqn. (5a), is applicable for any $\ObsMod()$. On the other hand, the formula (5a) consists solely of linear algebra. Therefore it cannot perfectly represent any general (nonlinear) $\ObsMod()$. So how does it actually treat the observation operator? What meaning can we assign to the resulting updates? 153 | *Hint*: consider the limit of $\R \rightarrow 0$. 154 | 155 | 156 | #### Exc -- EnKF nobias (a) 157 | Consider the ensemble averages, 158 | - $\bx\supa = \frac{1}{N}\sum_{n=1}^N \x\supa_n$, and 159 | - $\bx\supf = \frac{1}{N}\sum_{n=1}^N \x\supf_n$, 160 | 161 | and recall that the analysis step, eqn. (4), defines $\x\supa_n$ from $\x\supf_n$. 162 | 163 | 164 | (a) Show that, in case $\ObsMod$ is linear (the matrix $\bH$), 165 | $$\begin{align} 166 | \Expect \bx\supa &= \bx\supf + \barK \left\{\y\ones\tr - \bH\bx\supf \right\} \,, \tag{6} 167 | \end{align} 168 | $$ 169 | where the expectation, $\Expect$, is taken with respect to $\Dobs$ only (i.e. not the sampling of the forecast ensemble, $\E\supf$ itself). 170 | 171 | What does this mean? 172 | 173 | 174 | ```python 175 | # show_answer("EnKF_nobias_a") 176 | ``` 177 | 178 | #### Exc (optional) -- EnKF nobias (b) 179 | Consider the ensemble covariance matrices: 180 | $$\begin{align} 181 | \barP\supf &= \frac{1}{N-1} \X{\X}\tr \,, \tag{7a} \\\ 182 | \barP\supa &= \frac{1}{N-1} \X\supa{\X\supa}\tr \,. \tag{7b} 183 | \end{align}$$ 184 | 185 | Now, denote the centralized observation perturbations 186 | $\D \ceq 187 | \begin{bmatrix} 188 | \r_1 -\bar{\r}, & \ldots & \r_n -\bar{\r}, & \ldots & \r_N -\bar{\r} 189 | \end{bmatrix} $. 190 | Note that $\D \ones = \bvec{0}$ and that 191 | $$ 192 | \begin{align} 193 | \label{eqn:R_sample_cov_of_D} 194 | \frac{1}{N-1} \D \D\tr &= \R \,, \tag{9a} \\\ 195 | \label{eqn:zero_AD_cov} 196 | \X \D\tr &= \bvec{0} \tag{9b} 197 | \end{align} 198 | $$ 199 | is satisfied in the expected sense, i.e. by taking the expectation on the left-hand side. 200 | Thereby, show that 201 | 202 | $$\begin{align} 203 | \Expect \, \barP\supa &= [\I_{\xDim} - \barK \bH]\barP\supf \, . \tag{10} 204 | \end{align}$$ 205 | 206 | ```python 207 | # show_answer("EnKF_nobias_b") 208 | ``` 209 | 210 | #### Exc (optional) -- EnKF bias (c) 211 | Show that, if no observation perturbations are used in eqn. (4), then $\barP\supa$ would be too small. 212 | 213 | ```python 214 | # show_answer("EnKF_without_perturbations") 215 | ``` 216 | 217 | ## Experimental setup 218 | 219 | Before making the EnKF, we'll set up an experiment to test it with, so that you can check if you've implemented a working method or not. 220 | 221 | To that end, we'll use the Lorenz-63 model, from [T7](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb). The coupled ODEs are recalled here, but with some of the parameters fixed. 222 | 223 | ```python 224 | xDim = 3 225 | 226 | def dxdt(x, sig=10, rho=28, beta=8/3): 227 | x,y,z = x 228 | d = np.zeros(3) 229 | d[0] = sig*(y - x) 230 | d[1] = rho*x - y - x*z 231 | d[2] = x*y - beta*z 232 | return d 233 | ``` 234 | 235 | Next, we make the forecast model $\DynMod$ out of $\frac{d \x}{dt}$ such that $\x(t+dt) = \DynMod(\x(t),t,dt)$. We'll make use of the "4th order Runge-Kutta" integrator `rk4`. 236 | 237 | ```python 238 | from dapper.mods.integration import rk4 239 | 240 | def Dyn(E, t0, dt): 241 | 242 | def step(x0): 243 | return rk4(lambda x, t: dxdt(x), x0, t0, dt) 244 | 245 | if E.ndim == 1: 246 | # Truth (single state vector) case 247 | E = step(E) 248 | else: 249 | # Ensemble case 250 | for n in range(E.shape[1]): 251 | E[:, n] = step(E[:, n]) 252 | 253 | return E 254 | 255 | Q12 = np.zeros((xDim, xDim)) 256 | Q = Q12 @ Q12.T 257 | ``` 258 | 259 | Notice the loop over each ensemble member. For better performance, this should be vectorized, if possible. Or, if the forecast model is computationally demanding (as is typically the case in real applications), the loop should be parallelized: i.e. the forecast simulations should be distributed to separate computers. 260 | 261 | 262 | The following are the time settings that we will use 263 | 264 | ```python 265 | dt = 0.01 # integrational time step 266 | dko = 25 # number of steps between observations 267 | dto = dko*dt # time between observations 268 | Ko = 60 # total number of observations 269 | nTime = dko*(Ko+1) # total number of time steps 270 | ``` 271 | 272 | Initial conditions 273 | 274 | ```python 275 | xa = np.array([1.509, -1.531, 25.46]) 276 | Pa12 = np.eye(3) 277 | ``` 278 | 279 | Observation model settings 280 | 281 | ```python 282 | p = 3 # ndim obs 283 | def Obs(E, t): 284 | return E[:p] if E.ndim == 1 else E[:p, :] 285 | 286 | R12 = np.sqrt(2)*np.eye(p) 287 | R = R12 @ R12.T 288 | ``` 289 | 290 | Generate synthetic truth and observations 291 | 292 | ```python 293 | # Init 294 | truths = np.zeros((nTime+1, xDim)) 295 | obsrvs = np.zeros((Ko+1, p)) 296 | truths[0] = xa + Pa12 @ rnd.randn(xDim) 297 | ``` 298 | 299 | ```python 300 | # Loop 301 | for k in range(1, nTime+1): 302 | truths[k] = Dyn(truths[k-1], (k-1)*dt, dt) 303 | truths[k] += Q12 @ rnd.randn(xDim) 304 | if k % dko == 0: 305 | Ko = k//dko-1 306 | obsrvs[Ko] = Obs(truths[k], np.nan) + R12 @ rnd.randn(p) 307 | ``` 308 | 309 | ## EnKF implementation 310 | 311 | 312 | We will make use of `estimate_mean_and_cov` and `estimate_cross_cov` from the previous section. Paste them in below. 313 | 314 | ```python 315 | # def estimate_mean_and_cov ... 316 | ``` 317 | 318 | **Exc -- EnKF implementation:** Complete the code below 319 | 320 | ```python 321 | # Useful linear algebra: compute B/A 322 | import numpy.linalg as nla 323 | 324 | ens_means = np.zeros((nTime+1, xDim)) 325 | ens_vrncs = np.zeros((nTime+1, xDim)) 326 | 327 | def my_EnKF(N): 328 | """My implementation of the EnKF.""" 329 | ### Init ### 330 | E = np.zeros((xDim, N)) 331 | for k in tqdm(range(1, nTime+1)): 332 | t = k*dt 333 | ### Forecast ## 334 | # E = ... # use model 335 | # E = ... # add noise 336 | if k % dko == 0: 337 | ### Analysis ## 338 | y = obsrvs[[k//dko-1]].T # current observation 339 | Eo = Obs(E, t) # observed ensemble 340 | # Compute ensemble moments 341 | PH = ... 342 | HPH = ... 343 | # Compute Kalman Gain 344 | KG = ... 345 | # Generate perturbations 346 | Perturb = ... 347 | # Update ensemble with KG 348 | # E = ... 349 | # Save statistics 350 | ens_means[k] = np.mean(E, axis=1) 351 | ens_vrncs[k] = np.var(E, axis=1, ddof=1) 352 | ``` 353 | 354 | Notice that we only store some stats (`ens_means`). This is because in large systems, 355 | keeping the entire ensemble (or its covariance) in memory is probably too much. 356 | 357 | ```python 358 | # show_answer('EnKF v1') 359 | ``` 360 | 361 | Now let's try out its capabilities 362 | 363 | ```python 364 | # Run assimilation 365 | my_EnKF(10) 366 | 367 | # Plot 368 | fig, axs = plt.subplots(nrows=3, sharex=True) 369 | for i in range(3): 370 | axs[i].plot(dt*np.arange(nTime+1), truths [:, i], 'k', label="Truth") 371 | axs[i].plot(dt*np.arange(nTime+1), ens_means[:, i], 'b', label="Estimate") 372 | if i 423 | 424 | ### References 425 | -------------------------------------------------------------------------------- /notebooks/scripts/T9 - Writing your own EnKF.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py:light,scripts//md 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.17.2 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, EnKF_animation 20 | import numpy as np 21 | import matplotlib as mpl 22 | import numpy.random as rnd 23 | import matplotlib.pyplot as plt 24 | from tqdm.auto import tqdm 25 | plt.ion(); 26 | 27 | # # T9 - Writing your own EnKF 28 | # In this tutorial we're going to code an EnKF implementation using numpy. 29 | # As with the KF, the EnKF consists of the recursive application of 30 | # a forecast step and an analysis step. 31 | # $ 32 | # \newcommand{\Reals}{\mathbb{R}} 33 | # \newcommand{\Expect}[0]{\mathbb{E}} 34 | # \newcommand{\NormDist}{\mathscr{N}} 35 | # \newcommand{\DynMod}[0]{\mathscr{M}} 36 | # \newcommand{\ObsMod}[0]{\mathscr{H}} 37 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}} 38 | # \newcommand{\bvec}[1]{{\mathbf{#1}}} 39 | # \newcommand{\trsign}{{\mathsf{T}}} 40 | # \newcommand{\tr}{^{\trsign}} 41 | # \newcommand{\ceq}[0]{\mathrel{≔}} 42 | # \newcommand{\xDim}[0]{D} 43 | # \newcommand{\supa}[0]{^\text{a}} 44 | # \newcommand{\supf}[0]{^\text{f}} 45 | # \newcommand{\I}[0]{\mat{I}} 46 | # \newcommand{\K}[0]{\mat{K}} 47 | # \newcommand{\bP}[0]{\mat{P}} 48 | # \newcommand{\bH}[0]{\mat{H}} 49 | # \newcommand{\R}[0]{\mat{R}} 50 | # \newcommand{\Q}[0]{\mat{Q}} 51 | # \newcommand{\Ri}[0]{\R^{-1}} 52 | # \newcommand{\X}[0]{\mat{X}} 53 | # \newcommand{\Y}[0]{\mat{Y}} 54 | # \newcommand{\E}[0]{\mat{E}} 55 | # \newcommand{\x}[0]{\bvec{x}} 56 | # \newcommand{\y}[0]{\bvec{y}} 57 | # \newcommand{\q}[0]{\bvec{q}} 58 | # \newcommand{\r}[0]{\bvec{r}} 59 | # \newcommand{\bx}[0]{\bvec{\bar{x}}} 60 | # \newcommand{\by}[0]{\bvec{\bar{y}}} 61 | # \newcommand{\barP}[0]{\mat{\bar{P}}} 62 | # \newcommand{\barK}[0]{\mat{\bar{K}}} 63 | # \newcommand{\D}[0]{\mat{D}} 64 | # \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} 65 | # \newcommand{\ones}[0]{\bvec{1}} 66 | # $ 67 | 68 | # This presentation follows the traditional template, presenting the EnKF as the "the Monte Carlo version of the KF 69 | # where the state covariance is estimated by the ensemble covariance". 70 | # It is not obvious that this postulated method should work; 71 | # indeed, it is only justified upon inspection of its properties, 72 | # deferred to later. 73 | # 74 | # 75 | # NB: 76 | # Since we're going to focus on a single filtering cycle (at a time), 77 | # the subscript $k$ is dropped. Moreover,
78 | # The superscript $f$ indicates that $\{\x_n\supf\}_{n=1..N}$ is the forecast (prior) ensemble.
79 | # The superscript $a$ indicates that $\{\x_n\supa\}_{n=1..N}$ is the analysis (posterior) ensemble. 80 | #
81 | # 82 | # ### The forecast step 83 | # Suppose $\{\x_n\supa\}_{n=1..N}$ is an iid. sample from $p(\x_{k-1} \mid \y_1,\ldots, \y_{k-1})$, which may or may not be Gaussian. 84 | # 85 | # The forecast step of the EnKF consists of a Monte Carlo simulation 86 | # of the forecast dynamics for each $\x_n$: 87 | # $$ 88 | # \forall n, \quad \x\supf_n = \DynMod(\x_n\supa) + \q_n \,, \\ 89 | # $$ 90 | # where $\{\q_n\}_{n=1..N}$ are sampled iid. from $\NormDist(\bvec{0},\Q)$, 91 | # or whatever noise model is assumed, 92 | # and $\DynMod$ is the model dynamics. 93 | # The dynamics could consist of *any* function, i.e. the EnKF can be applied with nonlinear models. 94 | # 95 | # The ensemble, $\{\x_n\supf\}_{n=1..N}$, is then an iid. sample from the forecast pdf, 96 | # $p(\x_k \mid \y_1,\ldots,\y_{k-1})$. This follows from the definition of the latter, so it is a relatively trivial idea and way to obtain this pdf. However, before Monte-Carlo methods were computationally feasible, the computation of the forecast pdf required computing the [Chapman-Kolmogorov equation](https://en.wikipedia.org/wiki/Chapman%E2%80%93Kolmogorov_equation), which constituted a major hurdle for filtering methods. 97 | # 98 | # ### The analysis update step 99 | # of the ensemble is given by: 100 | # $$\begin{align} 101 | # \forall n, \quad \x\supa_n &= \x_n\supf + \barK \left\{\y - \r_n - \ObsMod(\x_n\supf) \right\} 102 | # \,, \\ 103 | # \text{or,}\quad 104 | # \E\supa &= \E\supf + \barK \left\{\y\ones\tr - \Dobs - \ObsMod(\E\supf) \right\} \,, 105 | # \tag{4} 106 | # \end{align} 107 | # $$ 108 | # where the "observation perturbations", $\r_n$, are sampled iid. from the observation noise model, e.g. $\NormDist(\bvec{0},\R)$, 109 | # and form the columns of $\Dobs$, 110 | # and the observation operator (again, any type of function), $\ObsMod$, is applied column-wise to $\E\supf$. 111 | # 112 | # The gain $\barK$ is defined by inserting the ensemble estimates for 113 | # * (i) $\bP\supf \bH\tr$: the cross-covariance between $\x\supf$ and $\ObsMod(\x\supf)$, and 114 | # * (ii) $\bH \bP\supf \bH\tr$: the covariance matrix of $\ObsMod(\x\supf)$, 115 | # 116 | # in the formula for $\K$, namely eqn. (K1) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb). 117 | # Using the estimators from [T8](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) yields 118 | # 119 | # $$\begin{align} 120 | # \barK &= \X \Y\tr ( \Y \Y\tr + (N{-}1) \R )^{-1} \,, \tag{5a} 121 | # \end{align} 122 | # $$ 123 | # 124 | # where $\Y \in \Reals^{P \times N}$ 125 | # is the centered, *observed* ensemble 126 | # $\Y \ceq 127 | # \begin{bmatrix} 128 | # \y_1 -\by, & \ldots & \y_n -\by, & \ldots & \y_N -\by 129 | # \end{bmatrix} \,,$ where $\y_n = \ObsMod(\x_n\supf)$. 130 | # 131 | # The EnKF is summarized in the animation below. 132 | 133 | EnKF_animation() 134 | 135 | # #### Exc -- Woodbury for the ensemble subspace 136 | # (a) Use the Woodbury identity (C2) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb) to show that eqn. (5a) can also be written 137 | # $$\begin{align} 138 | # \barK &= \X ( \Y\tr \Ri \Y + (N{-}1)\I_N )^{-1} \Y\tr \Ri \,. \tag{5b} 139 | # \end{align} 140 | # $$ 141 | # (b) What is the potential benefit of (5b) vs. (5a) ? 142 | 143 | # #### Exc -- KG workings 144 | # The above animation assumed that the observation operator is just the identity matrix, $\I$, rather than a general observation operator, $\ObsMod()$. Meanwhile, the Kalman gain used by the EnKF, eqn. (5a), is applicable for any $\ObsMod()$. On the other hand, the formula (5a) consists solely of linear algebra. Therefore it cannot perfectly represent any general (nonlinear) $\ObsMod()$. So how does it actually treat the observation operator? What meaning can we assign to the resulting updates? 145 | # *Hint*: consider the limit of $\R \rightarrow 0$. 146 | 147 | # #### Exc -- EnKF nobias (a) 148 | # Consider the ensemble averages, 149 | # - $\bx\supa = \frac{1}{N}\sum_{n=1}^N \x\supa_n$, and 150 | # - $\bx\supf = \frac{1}{N}\sum_{n=1}^N \x\supf_n$, 151 | # 152 | # and recall that the analysis step, eqn. (4), defines $\x\supa_n$ from $\x\supf_n$. 153 | # 154 | # 155 | # (a) Show that, in case $\ObsMod$ is linear (the matrix $\bH$), 156 | # $$\begin{align} 157 | # \Expect \bx\supa &= \bx\supf + \barK \left\{\y\ones\tr - \bH\bx\supf \right\} \,, \tag{6} 158 | # \end{align} 159 | # $$ 160 | # where the expectation, $\Expect$, is taken with respect to $\Dobs$ only (i.e. not the sampling of the forecast ensemble, $\E\supf$ itself). 161 | # 162 | # What does this mean? 163 | 164 | # + 165 | # show_answer("EnKF_nobias_a") 166 | # - 167 | 168 | # #### Exc (optional) -- EnKF nobias (b) 169 | # Consider the ensemble covariance matrices: 170 | # $$\begin{align} 171 | # \barP\supf &= \frac{1}{N-1} \X{\X}\tr \,, \tag{7a} \\\ 172 | # \barP\supa &= \frac{1}{N-1} \X\supa{\X\supa}\tr \,. \tag{7b} 173 | # \end{align}$$ 174 | # 175 | # Now, denote the centralized observation perturbations 176 | # $\D \ceq 177 | # \begin{bmatrix} 178 | # \r_1 -\bar{\r}, & \ldots & \r_n -\bar{\r}, & \ldots & \r_N -\bar{\r} 179 | # \end{bmatrix} $. 180 | # Note that $\D \ones = \bvec{0}$ and that 181 | # $$ 182 | # \begin{align} 183 | # \label{eqn:R_sample_cov_of_D} 184 | # \frac{1}{N-1} \D \D\tr &= \R \,, \tag{9a} \\\ 185 | # \label{eqn:zero_AD_cov} 186 | # \X \D\tr &= \bvec{0} \tag{9b} 187 | # \end{align} 188 | # $$ 189 | # is satisfied in the expected sense, i.e. by taking the expectation on the left-hand side. 190 | # Thereby, show that 191 | # 192 | # $$\begin{align} 193 | # \Expect \, \barP\supa &= [\I_{\xDim} - \barK \bH]\barP\supf \, . \tag{10} 194 | # \end{align}$$ 195 | 196 | # + 197 | # show_answer("EnKF_nobias_b") 198 | # - 199 | 200 | # #### Exc (optional) -- EnKF bias (c) 201 | # Show that, if no observation perturbations are used in eqn. (4), then $\barP\supa$ would be too small. 202 | 203 | # + 204 | # show_answer("EnKF_without_perturbations") 205 | # - 206 | 207 | # ## Experimental setup 208 | # 209 | # Before making the EnKF, we'll set up an experiment to test it with, so that you can check if you've implemented a working method or not. 210 | # 211 | # To that end, we'll use the Lorenz-63 model, from [T7](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb). The coupled ODEs are recalled here, but with some of the parameters fixed. 212 | 213 | # + 214 | xDim = 3 215 | 216 | def dxdt(x, sig=10, rho=28, beta=8/3): 217 | x,y,z = x 218 | d = np.zeros(3) 219 | d[0] = sig*(y - x) 220 | d[1] = rho*x - y - x*z 221 | d[2] = x*y - beta*z 222 | return d 223 | 224 | 225 | # - 226 | 227 | # Next, we make the forecast model $\DynMod$ out of $\frac{d \x}{dt}$ such that $\x(t+dt) = \DynMod(\x(t),t,dt)$. We'll make use of the "4th order Runge-Kutta" integrator `rk4`. 228 | 229 | # + 230 | from dapper.mods.integration import rk4 231 | 232 | def Dyn(E, t0, dt): 233 | 234 | def step(x0): 235 | return rk4(lambda x, t: dxdt(x), x0, t0, dt) 236 | 237 | if E.ndim == 1: 238 | # Truth (single state vector) case 239 | E = step(E) 240 | else: 241 | # Ensemble case 242 | for n in range(E.shape[1]): 243 | E[:, n] = step(E[:, n]) 244 | 245 | return E 246 | 247 | Q12 = np.zeros((xDim, xDim)) 248 | Q = Q12 @ Q12.T 249 | # - 250 | 251 | # Notice the loop over each ensemble member. For better performance, this should be vectorized, if possible. Or, if the forecast model is computationally demanding (as is typically the case in real applications), the loop should be parallelized: i.e. the forecast simulations should be distributed to separate computers. 252 | 253 | # The following are the time settings that we will use 254 | 255 | dt = 0.01 # integrational time step 256 | dko = 25 # number of steps between observations 257 | dto = dko*dt # time between observations 258 | Ko = 60 # total number of observations 259 | nTime = dko*(Ko+1) # total number of time steps 260 | 261 | # Initial conditions 262 | 263 | xa = np.array([1.509, -1.531, 25.46]) 264 | Pa12 = np.eye(3) 265 | 266 | # Observation model settings 267 | 268 | # + 269 | p = 3 # ndim obs 270 | def Obs(E, t): 271 | return E[:p] if E.ndim == 1 else E[:p, :] 272 | 273 | R12 = np.sqrt(2)*np.eye(p) 274 | R = R12 @ R12.T 275 | # - 276 | 277 | # Generate synthetic truth and observations 278 | 279 | # Init 280 | truths = np.zeros((nTime+1, xDim)) 281 | obsrvs = np.zeros((Ko+1, p)) 282 | truths[0] = xa + Pa12 @ rnd.randn(xDim) 283 | 284 | # Loop 285 | for k in range(1, nTime+1): 286 | truths[k] = Dyn(truths[k-1], (k-1)*dt, dt) 287 | truths[k] += Q12 @ rnd.randn(xDim) 288 | if k % dko == 0: 289 | Ko = k//dko-1 290 | obsrvs[Ko] = Obs(truths[k], np.nan) + R12 @ rnd.randn(p) 291 | 292 | # ## EnKF implementation 293 | 294 | # We will make use of `estimate_mean_and_cov` and `estimate_cross_cov` from the previous section. Paste them in below. 295 | 296 | # + 297 | # def estimate_mean_and_cov ... 298 | # - 299 | 300 | # **Exc -- EnKF implementation:** Complete the code below 301 | 302 | # + 303 | # Useful linear algebra: compute B/A 304 | import numpy.linalg as nla 305 | 306 | ens_means = np.zeros((nTime+1, xDim)) 307 | ens_vrncs = np.zeros((nTime+1, xDim)) 308 | 309 | def my_EnKF(N): 310 | """My implementation of the EnKF.""" 311 | ### Init ### 312 | E = np.zeros((xDim, N)) 313 | for k in tqdm(range(1, nTime+1)): 314 | t = k*dt 315 | ### Forecast ## 316 | # E = ... # use model 317 | # E = ... # add noise 318 | if k % dko == 0: 319 | ### Analysis ## 320 | y = obsrvs[[k//dko-1]].T # current observation 321 | Eo = Obs(E, t) # observed ensemble 322 | # Compute ensemble moments 323 | PH = ... 324 | HPH = ... 325 | # Compute Kalman Gain 326 | KG = ... 327 | # Generate perturbations 328 | Perturb = ... 329 | # Update ensemble with KG 330 | # E = ... 331 | # Save statistics 332 | ens_means[k] = np.mean(E, axis=1) 333 | ens_vrncs[k] = np.var(E, axis=1, ddof=1) 334 | 335 | 336 | # - 337 | 338 | # Notice that we only store some stats (`ens_means`). This is because in large systems, 339 | # keeping the entire ensemble (or its covariance) in memory is probably too much. 340 | 341 | # + 342 | # show_answer('EnKF v1') 343 | # - 344 | 345 | # Now let's try out its capabilities 346 | 347 | # + 348 | # Run assimilation 349 | my_EnKF(10) 350 | 351 | # Plot 352 | fig, axs = plt.subplots(nrows=3, sharex=True) 353 | for i in range(3): 354 | axs[i].plot(dt*np.arange(nTime+1), truths [:, i], 'k', label="Truth") 355 | axs[i].plot(dt*np.arange(nTime+1), ens_means[:, i], 'b', label="Estimate") 356 | if i 408 | # 409 | # ### References 410 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | jupyter_nbextensions_configurator<0.6.4 # compatible with "notebook<6.5" 3 | jupytext 4 | pre-commit 5 | requests 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # DAPPER is maintained for compatibility with Colab. 2 | # https://github.com/nansencenter/DAPPER/issues/41#issuecomment-1381616971 3 | # -e ${HOME}/path/DAPPER 4 | dapper==1.7.3 5 | 6 | ipywidgets 7 | # Fix error "zmq message arrived on closed channel" ... "assert 0 < size <= self._size" 8 | # https://github.com/jupyter/notebook/issues/6721#issuecomment-1662440259 9 | jupyter_client<8 10 | tornado<6.2 11 | 12 | markdown 13 | -------------------------------------------------------------------------------- /tests/test_all.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Test that all notebooks run without error. 3 | 4 | Also test stuff relating to `show_answer`. 5 | 6 | These tests are not for use with pytest (does not use asserts, orchestrates itself). 7 | Simply run the script as any regular Python script. 8 | Why: Mainly because it did not seem necessary. Also I find debugging with pytest somewhat hard. 9 | """ 10 | 11 | from pathlib import Path 12 | import os 13 | import subprocess 14 | import sys 15 | import requests 16 | from urllib.parse import unquote 17 | 18 | from markdown import markdown as md2html 19 | 20 | 21 | UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' 22 | ROOT = Path(__file__).parents[1] 23 | 24 | 25 | def _report_error(msg): 26 | # raise AssertionError(msg) # for post-portem debugging 27 | print(msg) 28 | return True 29 | 30 | 31 | def _find_anchor(fname: Path, anchor): 32 | lines = fname.read_text().splitlines() 33 | # filter for "# ### Example heading" or "# - ### Heading in bullet point" 34 | headings = [x for x in lines if x.startswith("# #") or x.startswith("# - #")] 35 | headings = [x.lstrip("# -") for x in headings] 36 | headings = [x.replace(" ", "-") for x in headings] 37 | return anchor in headings 38 | 39 | 40 | def assert_all_links_work(lines, fname): 41 | failed = False 42 | for i, line in enumerate(lines): 43 | 44 | # Skip 45 | if not line.startswith("#"): 46 | continue 47 | if any(x in line for x in [ 48 | "www.google.com/search", # because md2html fails to parse 49 | "www.example.com"]): 50 | continue 51 | 52 | # First do a *quick* scan for links. 53 | if "](" in line or "http" in line: 54 | # Extract link 55 | html = md2html(line) # since extracting url from md w/ regex is hard 56 | # PS: linebreaks in links ⇒ failure (as desired) 57 | link = html.split('href="')[1].split('">')[0] 58 | # fix parsing error for links ending in ')' 59 | if "))" in link: 60 | link = link.split("))")[0] + ")" 61 | 62 | # Common error message 63 | def errm(issue): 64 | return f"Issue on line {i} with {issue} link\n {link}" 65 | 66 | # Internet links 67 | if "http" in link: 68 | response = None 69 | try: 70 | response = requests.head(link, headers={'User-Agent': UA}, allow_redirects=True, timeout=10) 71 | if response.status_code in (403, 405): 72 | # Fallback to GET if HEAD is not allowed or forbidden 73 | response = requests.get(link, headers={'User-Agent': UA}, allow_redirects=True, timeout=10) 74 | # Ignore status code 429 (Too Many Requests) 75 | if response.status_code == 429: 76 | continue 77 | assert response.status_code < 400 78 | except Exception as e: 79 | # Known problematic domains 80 | skip_domains = ["stack", "wiley.com", "springer.com", "elsevier.com"] 81 | status = response.status_code if response is not None else "N/A" 82 | skip = os.getenv("GITHUB_ACTIONS") and any(domain in link for domain in skip_domains) or status == 429 83 | if not skip: 84 | failed |= True 85 | _report_error(errm("**requesting**") + 86 | f"\nStatus code: {status}\nError: {e}") 87 | 88 | # Local links 89 | else: 90 | link = unquote(link) 91 | link_fname, *link_anchor = link.split("#") 92 | 93 | # Validate filename 94 | if link_fname: 95 | if not (ROOT / "notebooks" / link_fname).is_file(): 96 | failed |= _report_error(errm("**filename** of")) 97 | 98 | # Validate anchor 99 | if link_anchor: 100 | if not link_fname: 101 | # Anchor only ⇒ same file 102 | link_fname = fname 103 | else: 104 | # Change "T4...ipynb" --> "tests/T4...py" 105 | link_fname = (ROOT / "tests" / link_fname).with_suffix(".py") 106 | 107 | if not _find_anchor(link_fname, link_anchor[0]): 108 | failed |= _report_error(errm("**anchor tag** of")) 109 | return failed 110 | 111 | 112 | def assert_show_answer(lines, _fname): 113 | """Misc checks on `show_answer`""" 114 | failed = False 115 | found_import = False 116 | for i, line in enumerate(lines): 117 | found_import |= ("show_answer" in line and "import" in line) 118 | if line.lstrip().startswith("show_answer"): 119 | print(f"`show_answer` uncommented on line {i}") 120 | failed |= True 121 | if not found_import: 122 | print("`import show_answer` not found.") 123 | failed = True 124 | return failed 125 | 126 | 127 | def uncomment_show_answer(lines): 128 | """Causes checking existance of answer when script gets run.""" 129 | for i, line in enumerate(lines): 130 | OLD = "# show_answer" 131 | NEW = "show_answer" 132 | if line.startswith(OLD): 133 | lines[i] = line.replace(OLD, NEW) 134 | return lines 135 | 136 | 137 | def make_script_runnable_by_fixing_sys_path(lines): 138 | """Makes it seem like CWD is `notebooks`.""" 139 | return ['import sys', 140 | f"""sys.path.insert(0, '{ROOT / "notebooks"}')""", 141 | ] + lines 142 | 143 | 144 | ## Convert: notebooks/T*.ipynb --> tests/T*.py 145 | print("\nConverting from notebooks/...ipynb to tests/...py") 146 | print("========================================") 147 | text = dict(capture_output=True, text=True) 148 | converted = [] 149 | ipynbs = sorted((ROOT / "notebooks").glob("T*.ipynb")) 150 | for f in ipynbs: 151 | script = (ROOT / "tests" / f.name).with_suffix('.py') 152 | # script = (ROOT / "notebooks" / "scripts" / f.name).with_suffix('.py') 153 | converted.append(script) 154 | cmd = ["jupytext", "--output", str(script), str(f)] 155 | print(subprocess.run(cmd, **text, check=True).stdout) 156 | 157 | 158 | ## Static checks. Also: modify scripts 159 | erred = [] 160 | for script in converted: 161 | print("\nStatic analysis for", script.stem) 162 | print("========================================") 163 | lines = script.read_text().splitlines() 164 | failed = False 165 | 166 | # Validatation checks 167 | failed |= assert_all_links_work(lines, script) 168 | failed |= assert_show_answer(lines, script) 169 | 170 | # Modify script in preparation of running it 171 | lines = uncomment_show_answer(lines) 172 | lines = make_script_runnable_by_fixing_sys_path(lines) 173 | 174 | if failed: 175 | erred.append(script) 176 | script.write_text("\n".join(lines)) 177 | 178 | 179 | print("\nStatic analysis for", "answers.py") 180 | print("========================================") 181 | sys.path.insert(0, f"{ROOT / 'notebooks'}") 182 | import resources.answers # type: ignore # noqa 183 | for key, answer in resources.answers.answers.items(): 184 | lines = ["# " + line for line in answer[1].splitlines()] 185 | fname = Path(resources.answers.__file__ + ":" + key) 186 | if assert_all_links_work(lines, fname): 187 | erred.append(fname) 188 | 189 | 190 | ## Run ipynbs as python scripts 191 | for script in converted: 192 | print("\nRunning", script.name) 193 | print("========================================") 194 | run = subprocess.run(["python", str(script)], **text, check=False) 195 | # print(run.stdout) 196 | if run.returncode: 197 | erred.append(script) 198 | print(run.stderr, file=sys.stderr) 199 | 200 | # Provide return code 201 | if erred: 202 | print("========================================") 203 | print("FOUND ISSUES") 204 | print("========================================") 205 | print(*["- " + str(f) for f in erred], file=sys.stderr) 206 | print("See above for individual tracebacks.") 207 | sys.exit(1) 208 | --------------------------------------------------------------------------------