├── .github └── workflows │ └── tests.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── intro-toc.png ├── notebooks ├── T1 - DA & EnKF.ipynb ├── T2 - Gaussian distribution.ipynb ├── T3 - Bayesian inference.ipynb ├── T4 - Time series filtering.ipynb ├── T5 - Multivariate Kalman filter.ipynb ├── T6 - Geostats & Kriging (optional).ipynb ├── T7 - Chaos & Lorenz (optional).ipynb ├── T8 - Monte-Carlo & ensembles.ipynb ├── T9 - Writing your own EnKF.ipynb ├── dpr_config.yaml ├── resources │ ├── DA_bridges.jpg │ ├── HMM.svg │ ├── HMM.tex │ ├── __init__.py │ ├── answers.py │ ├── colab_bootstrap.sh │ ├── darc_envisat_analyses.mp4 │ ├── exc-2.4-iii.png │ ├── exc-2.5-iv.png │ ├── exc-2.5.png │ ├── illust_EnKF │ │ ├── illust_EnKF.py │ │ ├── illust_EnKF_0.png │ │ ├── illust_EnKF_1.png │ │ ├── illust_EnKF_2.png │ │ ├── illust_EnKF_3.png │ │ ├── illust_EnKF_4.png │ │ ├── illust_EnKF_5.png │ │ ├── illust_EnKF_6.png │ │ └── illust_EnKF_7.png │ ├── macros.py │ └── spellfile.utf-8.add └── scripts │ ├── T1 - DA & EnKF.py │ ├── T2 - Gaussian distribution.py │ ├── T3 - Bayesian inference.py │ ├── T4 - Time series filtering.py │ ├── T5 - Multivariate Kalman filter.py │ ├── T6 - Geostats & Kriging (optional).py │ ├── T7 - Chaos & Lorenz (optional).py │ ├── T8 - Monte-Carlo & ensembles.py │ └── T9 - Writing your own EnKF.py ├── requirements-dev.txt ├── requirements.txt └── tests └── test_all.py /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [ "master" ] 6 | pull_request: 7 | branches: [ "master", "dev" ] 8 | schedule: 9 | - cron: '0 9 * * MON' 10 | 11 | permissions: 12 | contents: read 13 | 14 | jobs: 15 | main: 16 | 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | - name: Set up Python 3.10 22 | uses: actions/setup-python@v5 23 | with: 24 | python-version: "3.10" # try to keep similar to Colab 25 | 26 | # Takes too long to run (>40min) 27 | # Could use docker instead? https://github.com/wsvn53/docker-colab-runtime-local 28 | # - name: Setup environment similar to Colab 29 | # run: | 30 | # python -m pip install --upgrade pip 31 | # wget https://raw.githubusercontent.com/googlecolab/backend-info/main/pip-freeze.txt -O colab-freeze.txt 32 | # cat colab-freeze.txt | grep -v '^#' | xargs -n 1 pip install # 1-at-a-time ⇒ ignore errors 33 | 34 | - name: Install dependencies 35 | run: | 36 | python -m pip install --upgrade pip 37 | pip install -r requirements-dev.txt 38 | - name: Run tests 39 | run: | 40 | tests/test_all.py 41 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See more at https://www.gitignore.io 2 | 3 | ############################## 4 | ### macOS ### 5 | .DS_Store 6 | ._* 7 | 8 | ############################## 9 | ### Windows ### 10 | ############################## 11 | Thumbs.db 12 | [Dd]esktop.ini 13 | 14 | ############################## 15 | ### Python ### 16 | ############################## 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | pip-wheel-metadata/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # Sphinx documentation 47 | docs/_build/ 48 | 49 | # Jupyter Notebook 50 | .ipynb_checkpoints 51 | 52 | 53 | ########## 54 | # Custom # 55 | ########## 56 | tests/T1*.py 57 | tests/T2*.py 58 | tests/T3*.py 59 | tests/T4*.py 60 | tests/T5*.py 61 | tests/T6*.py 62 | tests/T7*.py 63 | tests/T8*.py 64 | tests/T9*.py 65 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Use `pre-commit autoupdate --bleeding-edge` to set to most recent version 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v3.4.0 5 | hooks: 6 | - id: check-yaml 7 | - id: check-added-large-files 8 | - id: detect-private-key 9 | - id: check-merge-conflict 10 | - id: debug-statements 11 | - id: requirements-txt-fixer 12 | - id: end-of-file-fixer 13 | - id: mixed-line-ending 14 | args: ['--fix=no'] 15 | - id: trailing-whitespace 16 | exclude: | 17 | (?x)( # make whitespace in this regex insignificant and allow comments 18 | ^README.md| # I use double-space line-endings a lot in my MD. 19 | ^notebooks/resources/answers.py| # MD also used in answers. 20 | ^notebooks/scripts/.*.py| # MD also used in answers. 21 | ) 22 | 23 | - repo: https://github.com/patnr/nbhooks.git 24 | rev: v1.4.1 25 | hooks: 26 | - id: nb-ensure-clean 27 | # Optional WHITELIST of metadata keys (you can use regex) 28 | args: [--meta, pin_output, --meta, lines_to_next_cell, --meta, lines_to_end_of_cell_marker] 29 | 30 | - repo: https://github.com/mwouts/jupytext 31 | rev: v1.15.1 32 | hooks: 33 | - id: jupytext 34 | args: [--sync] 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Nansen Environmental and Remote Sensing Center 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intro to data assimilation (DA) and the EnKF 2 | 3 | An interactive (Jupyter notebook) tutorial. 4 | Jump right in (no installation!) by clicking 5 | the button of one of these cloud computing providers: 6 | 7 | - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/nansencenter/DA-tutorials) 8 | (requires Google login) 9 | - [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nansencenter/DA-tutorials/master) 10 | (no login but can be slow to start) 11 | 12 | *Prerequisites*: basics of calculus, matrices (e.g. inverses), 13 | random variables, Python (numpy). 14 | 15 | ![ToC](./intro-toc.png) 16 | 17 | ### Instructions for working locally 18 | 19 | If you prefer, you can also run these notebooks on your own (Linux/Windows/Mac) computer. 20 | This is a bit snappier than running them online. 21 | 22 | 1. **Prerequisite**: Python 3.9. 23 | If you're an expert, setup a python environment however you like. 24 | Otherwise: 25 | Install [Anaconda](https://www.anaconda.com/download), then 26 | open the [Anaconda terminal](https://docs.conda.io/projects/conda/en/latest/user-guide/getting-started.html#starting-conda) 27 | and run the following commands: 28 | 29 | ```bash 30 | conda create --yes --name my-env python=3.9 31 | conda activate my-env 32 | python --version 33 | ``` 34 | 35 | Ensure the printed version is 3.9. 36 | *Keep using the same terminal for the commands below.* 37 | 38 | 2. **Install**: 39 | 40 | - Download and unzip (or `git clone`) 41 | this repository (see the green button up top) 42 | - Move the resulting folder wherever you like 43 | - `cd` into the folder 44 | - Install requirements: 45 | `pip install -r path/to/requirements.txt` 46 | 47 | 3. **Launch the Jupyter notebooks**: 48 | 49 | - Launch the "notebook server" by executing: 50 | `jupyter-notebook` 51 | This will open up a page in your web browser that is a file navigator. 52 | - Enter the folder `DA-tutorials/notebooks`, and click on a tutorial (`T1... .ipynb`). 53 | 54 | 55 | 56 | 57 | ## Developer notes 58 | 59 | *Please don't hesitate to submit issues or pull requests!* 60 | 61 | [![GitHub CI](https://github.com/nansencenter/DA-tutorials/actions/workflows/tests.yml/badge.svg)](https://github.com/nansencenter/DA-tutorials/actions) 62 | 63 | #### Why `scripts/` dir? 64 | 65 | - Easier to read git diffs 66 | - Enable importing from notebook (script mirrors) 67 | -------------------------------------------------------------------------------- /intro-toc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/intro-toc.png -------------------------------------------------------------------------------- /notebooks/T1 - DA & EnKF.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "ad962db6", 6 | "metadata": {}, 7 | "source": [ 8 | "# T1 - Data assimilation (DA) & the ensemble Kalman filter (EnKF)\n", 9 | "*Copyright (c) 2020, Patrick N. Raanes\n", 10 | "$\n", 11 | "% ######################################## Loading TeX (MathJax)... Please wait ########################################\n", 12 | "\\newcommand{\\Reals}{\\mathbb{R}} \\newcommand{\\Expect}[0]{\\mathbb{E}} \\newcommand{\\NormDist}{\\mathscr{N}} \\newcommand{\\DynMod}[0]{\\mathscr{M}} \\newcommand{\\ObsMod}[0]{\\mathscr{H}} \\newcommand{\\mat}[1]{{\\mathbf{{#1}}}} \\newcommand{\\bvec}[1]{{\\mathbf{#1}}} \\newcommand{\\trsign}{{\\mathsf{T}}} \\newcommand{\\tr}{^{\\trsign}} \\newcommand{\\ceq}[0]{\\mathrel{≔}} \\newcommand{\\xDim}[0]{D} \\newcommand{\\supa}[0]{^\\text{a}} \\newcommand{\\supf}[0]{^\\text{f}} \\newcommand{\\I}[0]{\\mat{I}} \\newcommand{\\K}[0]{\\mat{K}} \\newcommand{\\bP}[0]{\\mat{P}} \\newcommand{\\bH}[0]{\\mat{H}} \\newcommand{\\bF}[0]{\\mat{F}} \\newcommand{\\R}[0]{\\mat{R}} \\newcommand{\\Q}[0]{\\mat{Q}} \\newcommand{\\B}[0]{\\mat{B}} \\newcommand{\\C}[0]{\\mat{C}} \\newcommand{\\Ri}[0]{\\R^{-1}} \\newcommand{\\Bi}[0]{\\B^{-1}} \\newcommand{\\X}[0]{\\mat{X}} \\newcommand{\\A}[0]{\\mat{A}} \\newcommand{\\Y}[0]{\\mat{Y}} \\newcommand{\\E}[0]{\\mat{E}} \\newcommand{\\U}[0]{\\mat{U}} \\newcommand{\\V}[0]{\\mat{V}} \\newcommand{\\x}[0]{\\bvec{x}} \\newcommand{\\y}[0]{\\bvec{y}} \\newcommand{\\z}[0]{\\bvec{z}} \\newcommand{\\q}[0]{\\bvec{q}} \\newcommand{\\br}[0]{\\bvec{r}} \\newcommand{\\bb}[0]{\\bvec{b}} \\newcommand{\\bx}[0]{\\bvec{\\bar{x}}} \\newcommand{\\by}[0]{\\bvec{\\bar{y}}} \\newcommand{\\barB}[0]{\\mat{\\bar{B}}} \\newcommand{\\barP}[0]{\\mat{\\bar{P}}} \\newcommand{\\barC}[0]{\\mat{\\bar{C}}} \\newcommand{\\barK}[0]{\\mat{\\bar{K}}} \\newcommand{\\D}[0]{\\mat{D}} \\newcommand{\\Dobs}[0]{\\mat{D}_{\\text{obs}}} \\newcommand{\\Dmod}[0]{\\mat{D}_{\\text{obs}}} \\newcommand{\\ones}[0]{\\bvec{1}} \\newcommand{\\AN}[0]{\\big( \\I_N - \\ones \\ones\\tr / N \\big)}\n", 13 | "$" 14 | ] 15 | }, 16 | { 17 | "cell_type": "markdown", 18 | "id": "2fc12b06", 19 | "metadata": {}, 20 | "source": [ 21 | "### Jupyter\n", 22 | "The \"document\" you're currently reading is a *Jupyter notebook*.\n", 23 | "As you can see, it consists of a sequence of **cells**,\n", 24 | "which can be code (Python) or text (markdown).\n", 25 | "For example, try editing the cell below (double-click it)\n", 26 | "to insert your name, and running it." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "5abffac7", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "name = \"Batman\"\n", 37 | "print(\"Hello world! I'm \" + name)\n", 38 | "for i, c in enumerate(name):\n", 39 | " print(i, c)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "8bada75c", 45 | "metadata": {}, 46 | "source": [ 47 | "You will likely be more efficient if you know these **keyboard shortcuts**:\n", 48 | "\n", 49 | "| Navigate | Edit | Exit | Run | Run & go to next |\n", 50 | "|-------------------------------|-------------------|----------------|----------------------------------|-----------------------------------|\n", 51 | "| and | Enter | Esc | Ctrl+Enter | Shift+Enter |\n", 52 | "\n", 53 | "Actually, a notebook connects to a background **session (kernel/runtime/interpreter)** of Python, and all of the code cells (in a given notebook) are connected, meaning that they share variables, functions, and classes. You can start afresh by clicking `restart` somewhere in the top menu bar. The **order** in which you run the cells matters, and from now on,\n", 54 | "\n", 55 | " the 1st code cell in each tutorial will be the following, which you must run before others. But if you're on Windows, then you must first delete the line starting with `!wget` (actually it's only needed when running on Google Colab).\n", 56 | "" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "id": "d4828c0e", 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "remote = \"https://raw.githubusercontent.com/nansencenter/DA-tutorials\"\n", 67 | "!wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s\n", 68 | "from resources import show_answer, envisat_video" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "id": "f7544379", 74 | "metadata": {}, 75 | "source": [ 76 | "### Python\n", 77 | "\n", 78 | "There is a huge amount of libraries available in **Python**, including the popular `scipy` and `matplotlib` packages, both with the essential `numpy` library at their core. They're usually abbreviated `sp`, `mpl` (and `plt`), and `np`. Try them out by running the following cell." 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "id": "e20e5037", 85 | "metadata": {}, 86 | "outputs": [], 87 | "source": [ 88 | "import numpy as np\n", 89 | "import matplotlib.pyplot as plt\n", 90 | "plt.ion();\n", 91 | "\n", 92 | "# Use numpy's arrays for vectors and matrices. Example constructions:\n", 93 | "a = np.arange(10) # Alternatively: np.array([0,1,2,3,4,5,6,7,8,9])\n", 94 | "I = 2*np.eye(10) # Alternatively: np.diag(2*np.ones(10))\n", 95 | "\n", 96 | "print(\"Indexing examples:\")\n", 97 | "print(\"a =\", a)\n", 98 | "print(\"a[3] =\", a[3])\n", 99 | "print(\"a[0:3] =\", a[0:3])\n", 100 | "print(\"a[:3] =\", a[:3])\n", 101 | "print(\"a[3:] =\", a[3:])\n", 102 | "print(\"a[-1] =\", a[-1])\n", 103 | "print(\"I[:3,:3] =\", I[:3,:3], sep=\"\\n\")\n", 104 | "\n", 105 | "print(\"\\nLinear algebra examples:\")\n", 106 | "print(\"100+a =\", 100+a)\n", 107 | "print(\"I@a =\", I@a)\n", 108 | "print(\"I*a =\", I*a, sep=\"\\n\")\n", 109 | "\n", 110 | "plt.title(\"Plotting example\")\n", 111 | "plt.ylabel(\"i $x^2$\")\n", 112 | "for i in range(4):\n", 113 | " plt.plot(i * a**2, label=\"i = %d\"%i)\n", 114 | "plt.legend();" 115 | ] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "id": "56bca4ea", 120 | "metadata": {}, 121 | "source": [ 122 | "These tutorials require that you are able to understand the above code, but not much beyond that.\n", 123 | "Some exercises will ask you to do some programming, but understanding the pre-written code is also important.\n", 124 | "The interesting parts of the code can all be found in the notebooks themselves\n", 125 | "(as opposed to being hidden away via imports).\n", 126 | "Beware, however, that it is not generally production-ready.\n", 127 | "For example, it overuses global variables, and is lacking in vectorisation,\n", 128 | "generally for the benefit of terseness and simplicity." 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "a6ca0c01", 134 | "metadata": {}, 135 | "source": [ 136 | "### Data assimilation (DA)\n", 137 | "\n", 138 | "**State estimation** (a.k.a. **sequential inference**)\n", 139 | "is the estimation of unknown/uncertain quantities of **dynamical systems**\n", 140 | "based on imprecise (noisy) data/observations. This is similar to time series estimation and signal processing,\n", 141 | "but focuse on the case where we have a good (skillful) predictive model of the dynamical system,\n", 142 | "so that we can relate information (estimates) of its *state* at one time to another.\n", 143 | "\n", 144 | "For example, in guidance systems, the *state variable* (vector) consists of at least 6 elements: 3 for the current position and 3 for velocity, whose trajectories we wish to track in time. More sophisticated systems can also include acceleration and/or angular quantities. The *dynamicl model* then consists of the fact that displacement is the time integral of the velocity, while the velocity is the integral of acceleration. The noisy *observations* can come from altimetry, sextants, speedometers, compass readings, accelerometers, gyroscopes, or fuel-gauges. The essential point is that we have an *observational model* predicting the observations from the state. For example, the altimeter model is simply the function that selects the $z$ coordinate from the state vector, while the force experienced by an accelerometer can be modelled by Newton's second law of motion, $F = m a$.\n", 145 | "\n", 146 | "In the context of large dynamical systems, especially in geoscience\n", 147 | "(climate, ocean, hydrology, petroleum)\n", 148 | "state estimation is known as **data assimilation** (DA),\n", 149 | "and is thought of as a \"bridge\" between data and models,\n", 150 | "as illustrated on the right (source: https://aics.riken.jp/en)\n", 151 | "DA \"bridges\" data and models..\n", 152 | "For example, in weather applications, the dynamical model is an atmospheric fluid-mechanical simulator, the state variable consists of the fields of pressure, humidity, and wind quanities discretized on a grid,\n", 153 | "and the observations may come from satellite or weather stations.\n", 154 | "\n", 155 | "The most famous state estimation techniques is the ***Kalman filter (KF)***, which was developed to steer the Apollo mission rockets to the moon. The KF also has applications outside of control systems, such as speech recognition, video tracking, finance. But when it was first proposed to apply the KF to DA (specifically, weather forecasting), the idea sounded ludicrous because of some severe **technical challenges in DA (vs. \"classic\" state estimation)**:\n", 156 | " * size of data and models;\n", 157 | " * nonlinearity of models;\n", 158 | " * sparsity and inhomogeneous-ness of data.\n", 159 | "\n", 160 | "Some of these challenges may be recognized in the video below. Can you spot them?" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "id": "3a42cd08", 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "envisat_video()" 171 | ] 172 | }, 173 | { 174 | "cell_type": "markdown", 175 | "id": "10632e39", 176 | "metadata": {}, 177 | "source": [ 178 | "### The EnKF\n", 179 | "The EnKF an ensemble (Monte-Carlo) formulation of the KF\n", 180 | "that manages (fairly well) to deal with the above challenges in DA.\n", 181 | "\n", 182 | "For those familiar with the method of 4D-Var, **further advantages of the EnKF** include it being:\n", 183 | " * Non-invasive: the models are treated as black boxes, and no explicit Jacobian is required.\n", 184 | " * Bayesian:\n", 185 | " * provides ensemble of possible realities;\n", 186 | " - arguably the most practical form of \"uncertainty quantification\";\n", 187 | " - ideal way to initialize \"ensemble forecasts\";\n", 188 | " * uses \"flow-dependent\" background covariances in the analysis.\n", 189 | " * Embarrassingly parallelizable:\n", 190 | " * distributed across realizations for model forecasting;\n", 191 | " * distributed across local domains for observation analysis.\n", 192 | "\n", 193 | "The rest of this tutorial provides an EnKF-centric presentation of DA." 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "id": "6555b987", 199 | "metadata": {}, 200 | "source": [ 201 | "### DAPPER example\n", 202 | "This tutorial builds on the underlying package, DAPPER, made for academic research in DA and its dissemination. For example, the code below is taken from `DAPPER/example_1.py`. It illustrates DA on a small toy problem. At the end of these tutorials, you should be able to reproduce (from the ground up) this type of experiment.\n", 203 | "\n", 204 | "Run the cells in order and try to interpret the output.\n", 205 | "\n", 206 | "Don't worry if you can't understand what's going on -- we will discuss it later throughout the tutorials.\n", 207 | "\n" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "id": "eada171d", 214 | "metadata": {}, 215 | "outputs": [], 216 | "source": [ 217 | "import dapper as dpr\n", 218 | "import dapper.da_methods as da\n", 219 | "\n", 220 | "# Load experiment setup: the hidden Markov model (HMM)\n", 221 | "from dapper.mods.Lorenz63.sakov2012 import HMM\n", 222 | "HMM.tseq.T = 30 # shorten experiment\n", 223 | "\n", 224 | "# Simulate synthetic truth (xx) and noisy obs (yy)\n", 225 | "xx, yy = HMM.simulate()\n", 226 | "\n", 227 | "# Specify a DA method configuration (\"xp\" is short for \"experiment\")\n", 228 | "# xp = da.OptInterp()\n", 229 | "# xp = da.Var3D()\n", 230 | "# xp = da.ExtKF(infl=90)\n", 231 | "xp = da.EnKF('Sqrt', N=10, infl=1.02, rot=True)\n", 232 | "# xp = da.PartFilt(N=100, reg=2.4, NER=0.3)\n", 233 | "\n", 234 | "# Assimilate yy, knowing the HMM; xx is used to assess the performance\n", 235 | "xp.assimilate(HMM, xx, yy)\n", 236 | "\n", 237 | "# #### Average the time series of various statistics\n", 238 | "# print(xp.stats) # ⇒ long printout\n", 239 | "xp.stats.average_in_time()\n", 240 | "\n", 241 | "print(xp.avrgs.tabulate(['rmse.a', 'rmv.a']))" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": null, 247 | "id": "65df0a6d", 248 | "metadata": {}, 249 | "outputs": [], 250 | "source": [ 251 | "xp.stats.replay()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": null, 257 | "id": "16807f64", 258 | "metadata": {}, 259 | "outputs": [], 260 | "source": [ 261 | "# Some more diagnostics\n", 262 | "if False:\n", 263 | " import dapper.tools.viz as viz\n", 264 | " viz.plot_rank_histogram(xp.stats)\n", 265 | " viz.plot_err_components(xp.stats)\n", 266 | " viz.plot_hovmoller(xx)" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "id": "615e4fa2", 272 | "metadata": {}, 273 | "source": [ 274 | "### Vocabulary exercises\n", 275 | "**Exc -- Word association:**\n", 276 | "Fill in the `x`'s in the table to group the words with similar meaning.\n", 277 | "\n", 278 | "`Sample, Random, Measurements, Forecast initialisation, Monte-Carlo, Observations, Set of draws`\n", 279 | "\n", 280 | "- Ensemble, x, x\n", 281 | "- Stochastic, x, x\n", 282 | "- Data, x, x\n", 283 | "- Filtering, x\n" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": null, 289 | "id": "d4d80e88", 290 | "metadata": {}, 291 | "outputs": [], 292 | "source": [ 293 | "# show_answer('thesaurus 1')" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "id": "b78a8dd7", 299 | "metadata": {}, 300 | "source": [ 301 | "* \"The answer\" is given from the perspective of DA. Do you agree with it?\n", 302 | "* Can you describe the (important!) nuances between the similar words?" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "id": "e9fa6219", 308 | "metadata": {}, 309 | "source": [ 310 | "**Exc (optional) -- Word association 2:**\n", 311 | "Also group these words:\n", 312 | "\n", 313 | "`Inverse problems, Operator, Sample point, Transform(ation), Knowledge, Relation, Probability, Mapping, Particle, Sequential, Inversion, Realization, Relative frequency, Information, Iterative, Estimate, Estimation, Single draw, Serial, Regression, Model, Fitting, Uncertainty`\n", 314 | "\n", 315 | "- Statistical inference, x, x, x, x, x\n", 316 | "- Ensemble member, x, x, x, x\n", 317 | "- Quantitative belief, x, x, x, x, x, x\n", 318 | "- Recursive, x, x, x\n", 319 | "- Function, x, x, x, x, x" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "id": "d6431a12", 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "# show_answer('thesaurus 2')" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "id": "37d394ce", 335 | "metadata": {}, 336 | "source": [ 337 | "**Exc (optional) -- intro discussion:** Prepare to discuss the following questions. Use any tool at your disposal.\n", 338 | "* (a) What is a \"dynamical system\"?\n", 339 | "* (b) What are \"state variables\"? How do they differ from parameters?\n", 340 | "* (c) What are \"prognostic\" variables? How do they differ from \"diagnostic\" variables?\n", 341 | "* (d) What is DA?\n", 342 | "* (e) Is DA a science, an engineering art, or a dark art?\n", 343 | "* (f) What is the point of \"Hidden Markov Models\"?" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "id": "a87b3af4", 350 | "metadata": {}, 351 | "outputs": [], 352 | "source": [ 353 | "# show_answer('Discussion topics 1')" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "id": "38f98fca", 359 | "metadata": {}, 360 | "source": [ 361 | "### Next: [T2 - Gaussian distribution](T2%20-%20Gaussian%20distribution.ipynb)" 362 | ] 363 | } 364 | ], 365 | "metadata": { 366 | "jupytext": { 367 | "formats": "ipynb,scripts//py:light" 368 | }, 369 | "kernelspec": { 370 | "display_name": "Python 3 (ipykernel)", 371 | "language": "python", 372 | "name": "python3" 373 | }, 374 | "language_info": { 375 | "codemirror_mode": { 376 | "name": "ipython", 377 | "version": 3 378 | }, 379 | "file_extension": ".py", 380 | "mimetype": "text/x-python", 381 | "name": "python", 382 | "nbconvert_exporter": "python", 383 | "pygments_lexer": "ipython3", 384 | "version": "3.9.16" 385 | } 386 | }, 387 | "nbformat": 4, 388 | "nbformat_minor": 5 389 | } 390 | -------------------------------------------------------------------------------- /notebooks/T2 - Gaussian distribution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "96f94593", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "remote = \"https://raw.githubusercontent.com/nansencenter/DA-tutorials\"\n", 11 | "!wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "6cb7f04e", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from resources import show_answer, interact\n", 22 | "%matplotlib inline\n", 23 | "import numpy as np\n", 24 | "import scipy as sp\n", 25 | "import matplotlib.pyplot as plt\n", 26 | "plt.ion();" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "id": "1ff70496", 32 | "metadata": {}, 33 | "source": [ 34 | "Before discussing sequential, time-dependent inference,\n", 35 | "we need to know how to estimate unknowns based on a single data/observations (vector).\n", 36 | "But before discussing *Bayes' rule*,\n", 37 | "we should review the most useful of probability distributions.\n", 38 | "# T2 - The Gaussian (Normal) distribution\n", 39 | "$\n", 40 | "% ######################################## Loading TeX (MathJax)... Please wait ########################################\n", 41 | "\\newcommand{\\Reals}{\\mathbb{R}} \\newcommand{\\Expect}[0]{\\mathbb{E}} \\newcommand{\\NormDist}{\\mathscr{N}} \\newcommand{\\DynMod}[0]{\\mathscr{M}} \\newcommand{\\ObsMod}[0]{\\mathscr{H}} \\newcommand{\\mat}[1]{{\\mathbf{{#1}}}} \\newcommand{\\bvec}[1]{{\\mathbf{#1}}} \\newcommand{\\trsign}{{\\mathsf{T}}} \\newcommand{\\tr}{^{\\trsign}} \\newcommand{\\ceq}[0]{\\mathrel{≔}} \\newcommand{\\xDim}[0]{D} \\newcommand{\\supa}[0]{^\\text{a}} \\newcommand{\\supf}[0]{^\\text{f}} \\newcommand{\\I}[0]{\\mat{I}} \\newcommand{\\K}[0]{\\mat{K}} \\newcommand{\\bP}[0]{\\mat{P}} \\newcommand{\\bH}[0]{\\mat{H}} \\newcommand{\\bF}[0]{\\mat{F}} \\newcommand{\\R}[0]{\\mat{R}} \\newcommand{\\Q}[0]{\\mat{Q}} \\newcommand{\\B}[0]{\\mat{B}} \\newcommand{\\C}[0]{\\mat{C}} \\newcommand{\\Ri}[0]{\\R^{-1}} \\newcommand{\\Bi}[0]{\\B^{-1}} \\newcommand{\\X}[0]{\\mat{X}} \\newcommand{\\A}[0]{\\mat{A}} \\newcommand{\\Y}[0]{\\mat{Y}} \\newcommand{\\E}[0]{\\mat{E}} \\newcommand{\\U}[0]{\\mat{U}} \\newcommand{\\V}[0]{\\mat{V}} \\newcommand{\\x}[0]{\\bvec{x}} \\newcommand{\\y}[0]{\\bvec{y}} \\newcommand{\\z}[0]{\\bvec{z}} \\newcommand{\\q}[0]{\\bvec{q}} \\newcommand{\\br}[0]{\\bvec{r}} \\newcommand{\\bb}[0]{\\bvec{b}} \\newcommand{\\bx}[0]{\\bvec{\\bar{x}}} \\newcommand{\\by}[0]{\\bvec{\\bar{y}}} \\newcommand{\\barB}[0]{\\mat{\\bar{B}}} \\newcommand{\\barP}[0]{\\mat{\\bar{P}}} \\newcommand{\\barC}[0]{\\mat{\\bar{C}}} \\newcommand{\\barK}[0]{\\mat{\\bar{K}}} \\newcommand{\\D}[0]{\\mat{D}} \\newcommand{\\Dobs}[0]{\\mat{D}_{\\text{obs}}} \\newcommand{\\Dmod}[0]{\\mat{D}_{\\text{obs}}} \\newcommand{\\ones}[0]{\\bvec{1}} \\newcommand{\\AN}[0]{\\big( \\I_N - \\ones \\ones\\tr / N \\big)}\n", 42 | "$\n", 43 | "Computers generally represent functions *numerically* by their values on a grid\n", 44 | "of points (nodes), an approach called ***discretisation***.\n", 45 | "Don't hesitate to change the grid resolution as you go along!" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "80586aed", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "bounds = -20, 20\n", 56 | "N = 201 # num of grid points\n", 57 | "grid1d = np.linspace(*bounds,N) # grid\n", 58 | "dx = grid1d[1] - grid1d[0] # grid spacing" 59 | ] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "id": "0e40b3ce", 64 | "metadata": {}, 65 | "source": [ 66 | "## The univariate (a.k.a. 1-dimensional, scalar) case\n", 67 | "Consider the Gaussian random variable $x \\sim \\NormDist(\\mu, \\sigma^2)$. \n", 68 | "Its probability density function (**pdf**),\n", 69 | "$\n", 70 | "p(x) = \\NormDist(x \\mid \\mu, \\sigma^2)\n", 71 | "$ for $x \\in (-\\infty, +\\infty)$,\n", 72 | "is given by\n", 73 | "$$\\begin{align}\n", 74 | "\\NormDist(x \\mid \\mu, \\sigma^2) = (2 \\pi \\sigma^2)^{-1/2} e^{-(x-\\mu)^2/2 \\sigma^2} \\,. \\tag{G1}\n", 75 | "\\end{align}$$\n", 76 | "\n", 77 | "Run the cell below to define a function to compute the pdf (G1) using the `scipy` library." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "id": "b3be2918", 84 | "metadata": {}, 85 | "outputs": [], 86 | "source": [ 87 | "def pdf_G1(x, mu, sigma2):\n", 88 | " \"Univariate Gaussian pdf\"\n", 89 | " pdf_values = sp.stats.norm.pdf(x, loc=mu, scale=np.sqrt(sigma2))\n", 90 | " return pdf_values" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "id": "069911f0", 96 | "metadata": {}, 97 | "source": [ 98 | "The following code plots the Gaussian pdf." 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": null, 104 | "id": "0442e503", 105 | "metadata": { 106 | "lines_to_next_cell": 1 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "hist = []\n", 111 | "@interact(mu=bounds, sigma=(.1, 10, 1))\n", 112 | "def plot_pdf(mu=0, sigma=5):\n", 113 | " plt.figure(figsize=(6, 2))\n", 114 | " colors = plt.get_cmap('hsv')([(k-len(hist))%9/9 for k in range(9)])\n", 115 | " plt.xlim(*bounds)\n", 116 | " plt.ylim(0, .2)\n", 117 | " hist.insert(0, pdf_G1(grid1d, mu, sigma**2))\n", 118 | " for density_values, color in zip(hist, colors):\n", 119 | " plt.plot(grid1d, density_values, c=color)\n", 120 | " plt.show()" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "id": "8e0059c2", 126 | "metadata": {}, 127 | "source": [ 128 | "#### Exc -- parameter influence\n", 129 | "Play around with `mu` and `sigma` to answer these questions:\n", 130 | " * How does the pdf curve change when `mu` changes?\n", 131 | " * How does the pdf curve change when you increase `sigma`?\n", 132 | " * In a few words, describe the shape of the Gaussian pdf curve.\n", 133 | " Does this ring a bell? *Hint: it should be clear as a bell!*" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "id": "d266ac66", 139 | "metadata": {}, 140 | "source": [ 141 | "**Exc -- Implementation:** Change the implementation of `pdf_G1` so as to not use `scipy`, but your own code (using `numpy` only). Re-run all of the above cells and check that you get the same plots as before. \n", 142 | "*Hint: `**` is the exponentiation/power operator, but $e^x$ is more efficiently computed with `np.exp(x)`*" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "id": "b3e1106d", 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "# show_answer('pdf_G1')" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "id": "94b6d541", 158 | "metadata": {}, 159 | "source": [ 160 | "**Exc -- Derivatives:** Recall $p(x) = \\NormDist(x \\mid \\mu, \\sigma^2)$ from eqn (G1). \n", 161 | "Use pen, paper, and calculus to answer the following questions, \n", 162 | "which derive some helpful mnemonics about the distribution.\n", 163 | "\n", 164 | " * (i) Find $x$ such that $p(x) = 0$.\n", 165 | " * (ii) Where is the location of the **mode (maximum)** of the density? \n", 166 | " I.e. find $x$ such that $\\frac{d p}{d x}(x) = 0$.\n", 167 | " *Hint: begin by writing $p(x)$ as $c e^{- J(x)}$ for some $J(x)$.*\n", 168 | " * (iii) Where is the **inflection point**? I.e. where $\\frac{d^2 p}{d x^2}(x) = 0$.\n", 169 | " * (iv) *Optional*: Some forms of *sensitivity analysis* (typically for non-Gaussian $p$) consist in estimating/approximating the Hessian, i.e. $\\frac{d^2 \\log p}{d x^2}$. Explain what this has to do with *uncertainty quantification*." 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "id": "90e3d08a", 175 | "metadata": {}, 176 | "source": [ 177 | "#### Exc (optional) -- Probability and Change of variables\n", 178 | "Let $z = \\phi(x)$ for some monotonic function $\\phi$,\n", 179 | "and $p_x$ and $p_z$ be their probability density functions (pdf).\n", 180 | "- (a): Show that $p_z(z) = p_x\\big(\\phi^{-1}(z)\\big) \\frac{1}{|\\phi'(z)|}$,\n", 181 | "- (b): Recall the definition of the expectation, $ \\Expect[x] ≔ \\int x \\, p_x(x) \\, d x $, where ***the integral is over the domain***\n", 182 | " (i.e. from $-\\infty$ to $+\\infty$ in the case of Gaussian distributions).\n", 183 | " Show that you don't need to derive the density of $z$ in order to compute its expectation, i.e. that\n", 184 | " $$ \\Expect[z] = \\int \\phi(x) \\, p_x(x) \\, d x ≕ \\Expect[\\phi(x)] \\,,$$\n", 185 | " *Hint: while the proof is convoluted, the result itself is [pretty intuitive](https://en.wikipedia.org/wiki/Law_of_the_unconscious_statistician).*" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "id": "92777530", 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "# show_answer('CVar in proba')" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "id": "8311df4c", 201 | "metadata": {}, 202 | "source": [ 203 | "#### Exc (optional) -- Integrals\n", 204 | "Recall $p(x) = \\NormDist(x \\mid \\mu, \\sigma^2)$ from eqn (G1). Abbreviate it using $c = (2 \\pi \\sigma^2)^{-1/2}$. \n", 205 | "Use pen, paper, and calculus to show that\n", 206 | " - (i) the first parameter, $\\mu$, indicates its **mean**, i.e. that $$\\mu = \\Expect[x] \\,.$$\n", 207 | " *Hint: you can rely on the result of (iii)*\n", 208 | " - (ii) the second parameter, $\\sigma^2>0$, indicates its **variance**,\n", 209 | " i.e. that $$\\sigma^2 = \\mathbb{Var}(x) \\mathrel{≔} \\Expect[(x-\\mu)^2] \\,.$$\n", 210 | " *Hint: use $x^2 = x x$ to enable integration by parts.*\n", 211 | " - (iii) $E[1] = 1$, \n", 212 | " thus proving that (G1) indeed uses the right normalising constant. \n", 213 | " *Hint: Neither Bernouilli and Laplace managed this,\n", 214 | " until Gauss did by first deriving $(E[1])^2$. \n", 215 | " For more (visual) help, watch [3Blue1Brown](https://www.youtube.com/watch?v=cy8r7WSuT1I&t=3m52s).*" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "id": "5470edc9", 222 | "metadata": {}, 223 | "outputs": [], 224 | "source": [ 225 | "# show_answer('Gauss integrals')" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "id": "ce40fa45", 231 | "metadata": {}, 232 | "source": [ 233 | "**Exc -- The uniform pdf**:\n", 234 | "Below is the pdf of the [uniform/flat/box distribution](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous))\n", 235 | "for a given mean and variance.\n", 236 | "- Replace `_G1` by `_U1` in the code generating the above interactive plot.\n", 237 | "- Why are the walls (ever so slightly) inclined?\n", 238 | "- Write your own implementation below, and check that it reproduces the `scipy` version already in place." 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "id": "35c1e382", 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "def pdf_U1(x, mu, sigma2):\n", 249 | " a = mu - np.sqrt(3*sigma2)\n", 250 | " b = mu + np.sqrt(3*sigma2)\n", 251 | " pdf_values = sp.stats.uniform(loc=a, scale=(b-a)).pdf(x)\n", 252 | " # Your own implementation:\n", 253 | " # height = ...\n", 254 | " # pdf_values = height * np.ones_like(x)\n", 255 | " # pdf_values[xb] = ...\n", 257 | " return pdf_values" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": null, 263 | "id": "94e09bbf", 264 | "metadata": {}, 265 | "outputs": [], 266 | "source": [ 267 | "# show_answer('pdf_U1')" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "id": "b28223c0", 273 | "metadata": {}, 274 | "source": [ 275 | "## The multivariate (i.e. vector) case\n", 276 | "Here's the pdf of the *multivariate* Gaussian (for any dimension $\\ge 1$):\n", 277 | "$$\\begin{align}\n", 278 | "\\NormDist(\\x \\mid \\mathbf{\\mu}, \\mathbf{\\Sigma})\n", 279 | "&=\n", 280 | "|2 \\pi \\mathbf{\\Sigma}|^{-1/2} \\, \\exp\\Big(-\\frac{1}{2}\\|\\x-\\mathbf{\\mu}\\|^2_\\mathbf{\\Sigma} \\Big) \\,, \\tag{GM}\n", 281 | "\\end{align}$$\n", 282 | "where $|.|$ represents the matrix determinant, \n", 283 | "and $\\|.\\|_\\mathbf{W}$ represents a weighted 2-norm: $\\|\\x\\|^2_\\mathbf{W} = \\x^T \\mathbf{W}^{-1} \\x$. \n", 284 | "*PS: The norm (quadratic form) is invariant to antisymmetry in the weight matrix,\n", 285 | "so we take $\\mathbf{\\Sigma}$ to be symmetric.\n", 286 | "Further, the density (GM) is only integrable over $\\Reals^{\\xDim}$ if $\\mathbf{\\Sigma}$ is positive-definite.*\n", 287 | "\n", 288 | "It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1).\n", 289 | "Moreover, [as above](#Exc-(optional)----Integrals) it can be shown that\n", 290 | "- $\\mathbf{\\mu} = \\Expect[\\x]$,\n", 291 | "- $\\mathbf{\\Sigma} = \\Expect[(\\x-\\mu)(\\x-\\mu)\\tr]$.\n", 292 | "\n", 293 | "Note that that the elements of $\\mathbf{\\Sigma}$ are individual covariances,\n", 294 | "$\\Sigma_{i,j} = \\Expect[(x_i-\\mu_i)(x_j-\\mu_j)] = \\mathbb{Cov}(x_i, x_j)$.\n", 295 | "Therefore $\\mathbf{\\Sigma}$ is called the *covariance (matrix)*.\n", 296 | "and its diagonal entries are simply variances, $\\Sigma_{i,i} = \\mathbb{Var}(x_i)$.\n", 297 | "\n", 298 | "The following implements the pdf (GM). Take a moment to digest the code, but don't worry if you don't understand it all. Hints:\n", 299 | " * `@` produces matrix multiplication (`*` in `Matlab`);\n", 300 | " * `*` produces array multiplication (`.*` in `Matlab`);\n", 301 | " * `axis=-1` makes `np.sum()` work along the last dimension of an ND-array." 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "id": "6f75d1bd", 308 | "metadata": {}, 309 | "outputs": [], 310 | "source": [ 311 | "from numpy.linalg import det, inv\n", 312 | "\n", 313 | "def weighted_norm22(points, Wi):\n", 314 | " \"Computes the weighted norm of each vector (row in `points`).\"\n", 315 | " return np.sum( (points @ inv(Wi)) * points, axis=-1)\n", 316 | "\n", 317 | "def pdf_GM(points, mu, Sigma):\n", 318 | " \"pdf -- Gaussian, Multivariate: N(x | mu, Sigma) for each x in `points`.\"\n", 319 | " c = np.sqrt(det(2*np.pi*Sigma))\n", 320 | " return 1/c * np.exp(-0.5*weighted_norm22(points - mu, Sigma))" 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "id": "52569aaf", 326 | "metadata": {}, 327 | "source": [ 328 | "The following code plots the pdf as contour (iso-density) curves." 329 | ] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "execution_count": null, 334 | "id": "5dc581fa", 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "grid2d = np.dstack(np.meshgrid(grid1d, grid1d))\n", 339 | "\n", 340 | "@interact(corr=(-1, 1, .001), std_x=(1e-5, 10, 1))\n", 341 | "def plot_pdf_G2(corr=0.7, std_x=1):\n", 342 | " # Form covariance matrix (C) from input and some constants\n", 343 | " var_x = std_x**2\n", 344 | " var_y = 1\n", 345 | " cv_xy = np.sqrt(var_x * var_y) * corr\n", 346 | " C = 25 * np.array([[var_x, cv_xy],\n", 347 | " [cv_xy, var_y]])\n", 348 | " # Evaluate (compute)\n", 349 | " density_values = pdf_GM(grid2d, mu=0, Sigma=C)\n", 350 | " # Plot\n", 351 | " plt.figure(figsize=(4, 4))\n", 352 | " height = 1/np.sqrt(det(2*np.pi*C))\n", 353 | " plt.contour(grid1d, grid1d, density_values,\n", 354 | " levels=np.linspace(1e-4, height, 11), cmap=\"plasma\")\n", 355 | " plt.axis('equal');\n", 356 | " plt.show()" 357 | ] 358 | }, 359 | { 360 | "cell_type": "markdown", 361 | "id": "34bfbdaa", 362 | "metadata": {}, 363 | "source": [ 364 | "**Exc -- Correlation influence:** How do the contours look? Try to understand why. Cases:\n", 365 | " * (a) correlation=0.\n", 366 | " * (b) correlation=0.99.\n", 367 | " * (c) correlation=0.5. (Note that we've used `plt.axis('equal')`).\n", 368 | " * (d) correlation=0.5, but with non-equal variances.\n", 369 | "\n", 370 | "Finally (optional): why does the code \"crash\" when `corr = +/- 1` ? Is this a good or a bad thing? \n", 371 | "*Hint: do you like playing with fire?*" 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "id": "ae90de7d", 377 | "metadata": {}, 378 | "source": [ 379 | "**Exc Correlation game:** Play [here](http://guessthecorrelation.com/) until you get a score (gold coins) of 5 or more. \n", 380 | "*PS: you can probably tell that the samples are not drawn from Gaussian distributions. However, the quantitiy $\\mathbb{Cov}(x_i, x_i)$ is well defined and can be estimated from the samples.*" 381 | ] 382 | }, 383 | { 384 | "cell_type": "markdown", 385 | "id": "c6f56365", 386 | "metadata": {}, 387 | "source": [ 388 | "**Exc -- Correlation disambiguation:**\n", 389 | "* What's the difference between correlation and covariance?\n", 390 | "* What's the difference between non-zero (C) correlation (or covariance) and (D) dependence?\n", 391 | " *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).* \n", 392 | " - Does $C \\Rightarrow D$ or the converse? \n", 393 | " - What about the negation, $\\neg D \\Rightarrow \\neg C$, or its converse?* \n", 394 | " - What about the the (jointly) Gaussian case?\n", 395 | "* Does correlation (or dependence) imply causation?\n", 396 | "* Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other.\n", 397 | " Does information about $y$ give you information about $x$?" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "id": "a7fcfd1a", 403 | "metadata": {}, 404 | "source": [ 405 | "**Exc (optional) -- Gaussian ubuiqity:** Why are we so fond of the Gaussian assumption?" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": null, 411 | "id": "e43ac120", 412 | "metadata": {}, 413 | "outputs": [], 414 | "source": [ 415 | "# show_answer('Why Gaussian')" 416 | ] 417 | }, 418 | { 419 | "cell_type": "markdown", 420 | "id": "ace12e0d", 421 | "metadata": {}, 422 | "source": [ 423 | "## Summary\n", 424 | "The Normal/Gaussian distribution is bell-shaped.\n", 425 | "Its parameters are the mean and the variance.\n", 426 | "In the multivariate case, the mean is a vector,\n", 427 | "while the second parameter becomes a covariance *matrix*,\n", 428 | "whose off-diagonal elements represent scaled correlation factors,\n", 429 | "which measure *linear* dependence.\n", 430 | "\n", 431 | "### Next: [T3 - Bayesian inference](T3%20-%20Bayesian%20inference.ipynb)" 432 | ] 433 | } 434 | ], 435 | "metadata": { 436 | "anaconda-cloud": {}, 437 | "jupytext": { 438 | "formats": "ipynb,scripts//py" 439 | }, 440 | "kernelspec": { 441 | "display_name": "Python 3 (ipykernel)", 442 | "language": "python", 443 | "name": "python3" 444 | }, 445 | "language_info": { 446 | "codemirror_mode": { 447 | "name": "ipython", 448 | "version": 3 449 | }, 450 | "file_extension": ".py", 451 | "mimetype": "text/x-python", 452 | "name": "python", 453 | "nbconvert_exporter": "python", 454 | "pygments_lexer": "ipython3", 455 | "version": "3.9.16" 456 | } 457 | }, 458 | "nbformat": 4, 459 | "nbformat_minor": 5 460 | } 461 | -------------------------------------------------------------------------------- /notebooks/T8 - Monte-Carlo & ensembles.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "3703145a", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "remote = \"https://raw.githubusercontent.com/nansencenter/DA-tutorials\"\n", 11 | "!wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "9ea12014", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from resources import show_answer, interact, import_from_nb\n", 22 | "%matplotlib inline\n", 23 | "import numpy as np\n", 24 | "import matplotlib as mpl\n", 25 | "import scipy.stats as ss\n", 26 | "import numpy.random as rnd\n", 27 | "import matplotlib.pyplot as plt\n", 28 | "from scipy.stats import gaussian_kde\n", 29 | "plt.ion();" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "id": "dad1ac32", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "(pdf_G1, grid1d) = import_from_nb(\"T2\", (\"pdf_G1\", \"grid1d\"))" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "id": "d0e48e86", 45 | "metadata": {}, 46 | "source": [ 47 | "# T8 - The ensemble (Monte-Carlo) approach\n", 48 | "is an approximate method for doing Bayesian inference.\n", 49 | "Instead of computing the full (gridvalues, or parameters, of the) posterior distributions,\n", 50 | "we instead try to generate ensembles from them.\n", 51 | "An ensemble is an *iid* sample. I.e. a set of \"members\" (\"particles\", \"realizations\", or \"sample points\") that have been drawn (\"sampled\") independently from the same distribution. With the EnKF, these assumptions are generally tenuous, but pragmatic.\n", 52 | "$\n", 53 | "% ######################################## Loading TeX (MathJax)... Please wait ########################################\n", 54 | "\\newcommand{\\Reals}{\\mathbb{R}} \\newcommand{\\Expect}[0]{\\mathbb{E}} \\newcommand{\\NormDist}{\\mathscr{N}} \\newcommand{\\DynMod}[0]{\\mathscr{M}} \\newcommand{\\ObsMod}[0]{\\mathscr{H}} \\newcommand{\\mat}[1]{{\\mathbf{{#1}}}} \\newcommand{\\bvec}[1]{{\\mathbf{#1}}} \\newcommand{\\trsign}{{\\mathsf{T}}} \\newcommand{\\tr}{^{\\trsign}} \\newcommand{\\ceq}[0]{\\mathrel{≔}} \\newcommand{\\xDim}[0]{D} \\newcommand{\\supa}[0]{^\\text{a}} \\newcommand{\\supf}[0]{^\\text{f}} \\newcommand{\\I}[0]{\\mat{I}} \\newcommand{\\K}[0]{\\mat{K}} \\newcommand{\\bP}[0]{\\mat{P}} \\newcommand{\\bH}[0]{\\mat{H}} \\newcommand{\\bF}[0]{\\mat{F}} \\newcommand{\\R}[0]{\\mat{R}} \\newcommand{\\Q}[0]{\\mat{Q}} \\newcommand{\\B}[0]{\\mat{B}} \\newcommand{\\C}[0]{\\mat{C}} \\newcommand{\\Ri}[0]{\\R^{-1}} \\newcommand{\\Bi}[0]{\\B^{-1}} \\newcommand{\\X}[0]{\\mat{X}} \\newcommand{\\A}[0]{\\mat{A}} \\newcommand{\\Y}[0]{\\mat{Y}} \\newcommand{\\E}[0]{\\mat{E}} \\newcommand{\\U}[0]{\\mat{U}} \\newcommand{\\V}[0]{\\mat{V}} \\newcommand{\\x}[0]{\\bvec{x}} \\newcommand{\\y}[0]{\\bvec{y}} \\newcommand{\\z}[0]{\\bvec{z}} \\newcommand{\\q}[0]{\\bvec{q}} \\newcommand{\\br}[0]{\\bvec{r}} \\newcommand{\\bb}[0]{\\bvec{b}} \\newcommand{\\bx}[0]{\\bvec{\\bar{x}}} \\newcommand{\\by}[0]{\\bvec{\\bar{y}}} \\newcommand{\\barB}[0]{\\mat{\\bar{B}}} \\newcommand{\\barP}[0]{\\mat{\\bar{P}}} \\newcommand{\\barC}[0]{\\mat{\\bar{C}}} \\newcommand{\\barK}[0]{\\mat{\\bar{K}}} \\newcommand{\\D}[0]{\\mat{D}} \\newcommand{\\Dobs}[0]{\\mat{D}_{\\text{obs}}} \\newcommand{\\Dmod}[0]{\\mat{D}_{\\text{obs}}} \\newcommand{\\ones}[0]{\\bvec{1}} \\newcommand{\\AN}[0]{\\big( \\I_N - \\ones \\ones\\tr / N \\big)}\n", 55 | "$\n", 56 | "\n", 57 | "Ensembles can be used to characterize uncertainty: either by using it to compute (estimate) *statistics* thereof, such as the mean, median, variance, covariance, skewness, confidence intervals, etc (any function of the ensemble can be seen as a \"statistic\"), or by using it to reconstruct the distribution/density from which it is sampled. The latter is illustrated by the plot below. Take a moment to digest its code, and then answer the following exercises." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "id": "fb570908", 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "mu = 0\n", 68 | "sigma2 = 25\n", 69 | "N = 80\n", 70 | "\n", 71 | "@interact( seed=(1, 10), nbins=(2, 60), bw=(0.1, 1))\n", 72 | "def pdf_reconstructions(seed=5, nbins=10, bw=.3):\n", 73 | " rnd.seed(seed)\n", 74 | " E = mu + np.sqrt(sigma2)*rnd.randn(N)\n", 75 | "\n", 76 | " fig, ax = plt.subplots()\n", 77 | " ax.plot(grid1d, pdf_G1(grid1d, mu, sigma2), lw=5, label=\"True\")\n", 78 | " ax.plot(E, np.zeros(N), '|k', ms=100, mew=.4, label=\"_raw ens\")\n", 79 | " ax.hist(E, nbins, density=1, alpha=.7, color=\"C5\", label=\"Histogram\")\n", 80 | " ax.plot(grid1d, pdf_G1(grid1d, np.mean(E), np.var(E)), lw=5, label=\"Parametric\")\n", 81 | " ax.plot(grid1d, gaussian_kde(E.ravel(), bw**2).evaluate(grid1d), lw=5, label=\"KDE\")\n", 82 | " ax.set_ylim(top=(3*sigma2)**-.5)\n", 83 | " ax.legend()\n", 84 | " plt.show()" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "id": "972fc3c6", 90 | "metadata": {}, 91 | "source": [ 92 | "**Exc -- A matter of taste?:**\n", 93 | "- Which approximation to the true pdf looks better?\n", 94 | "- Which approximation starts with more information? \n", 95 | " What is the downside of making such assumptions?\n", 96 | "- What value of `bw` causes the \"KDE\" method to most closely\n", 97 | " reproduce/recover the \"Parametric\" method?\n", 98 | " What about the \"Histogram\" method? \n", 99 | " *PS: we might say that the KDE method \"bridges\" the other two.*." 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "4ba33d56", 105 | "metadata": {}, 106 | "source": [ 107 | "Being able to sample a multivariate Gaussian distribution is a building block of the EnKF.\n", 108 | "That is the objective of the following exercise.\n", 109 | "\n", 110 | "**Exc -- Multivariate Gaussian sampling:**\n", 111 | "Suppose $\\z$ is a standard Gaussian,\n", 112 | "i.e. $p(\\z) = \\NormDist(\\z \\mid \\bvec{0},\\I_{\\xDim})$,\n", 113 | "where $\\I_{\\xDim}$ is the $\\xDim$-dimensional identity matrix. \n", 114 | "Let $\\x = \\mat{L}\\z + \\mu$.\n", 115 | "\n", 116 | " * (a -- optional). Refer to the exercise on [change of variables](T2%20-%20Gaussian%20distribution.ipynb#Exc-(optional)----Probability-and-Change-of-variables) to show that $p(\\x) = \\mathcal{N}(\\x \\mid \\mu, \\mat{C})$, where $\\mat{C} = \\mat{L}^{}\\mat{L}^T$.\n", 117 | " * (b). The code below samples $N = 100$ realizations of $\\x$\n", 118 | " and collects them in an ${\\xDim}$-by-$N$ \"ensemble matrix\" $\\E$.\n", 119 | " But `for` loops are slow in plain Python (and Matlab).\n", 120 | " Replace it with something akin to `E = mu + L@Z`.\n", 121 | " *Hint: this code snippet fails because it's trying to add a vector to a matrix.*" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "id": "84bec352", 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "mu = np.array([1, 100, 5])\n", 132 | "xDim = len(mu)\n", 133 | "L = np.diag(1+np.arange(xDim))\n", 134 | "C = L @ L.T\n", 135 | "Z = rnd.randn(xDim, N)\n", 136 | "\n", 137 | "# Using a loop (\"slow\")\n", 138 | "E = np.zeros((xDim, N))\n", 139 | "for n in range(N):\n", 140 | " E[:, n] = mu + L@Z[:, n]" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "id": "4d254ff9", 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# show_answer('Gaussian sampling', 'b')" 151 | ] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "id": "3f46e7f6", 156 | "metadata": {}, 157 | "source": [ 158 | "The following prints some numbers that can be used to ascertain if you got it right.\n", 159 | "Note that the estimates will never be exact:\n", 160 | "they contain some amount of random error, a.k.a. ***sampling error***." 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "id": "311ffb67", 167 | "metadata": {}, 168 | "outputs": [], 169 | "source": [ 170 | "with np.printoptions(precision=1):\n", 171 | " print(\"Estimated mean =\", np.mean(E, axis=1))\n", 172 | " print(\"Estimated cov =\", np.cov(E), sep=\"\\n\")" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "id": "809f7a8a", 178 | "metadata": {}, 179 | "source": [ 180 | "**Exc -- Moment estimation code:** Above, we used numpy's (`np`) functions to compute the sample-estimated mean and covariance matrix,\n", 181 | "$\\bx$ and $\\barC$,\n", 182 | "from the ensemble matrix $\\E$.\n", 183 | "Now, instead, implement these estimators yourself:\n", 184 | "$$\\begin{align}\\bx &\\ceq \\frac{1}{N} \\sum_{n=1}^N \\x_n \\,, \\\\\n", 185 | " \\barC &\\ceq \\frac{1}{N-1} \\sum_{n=1}^N (\\x_n - \\bx) (\\x_n - \\bx)^T \\,. \\end{align}$$" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "id": "b7227350", 192 | "metadata": {}, 193 | "outputs": [], 194 | "source": [ 195 | "# Don't use numpy's mean, cov, but rather a `for` loop.\n", 196 | "def estimate_mean_and_cov(E):\n", 197 | " xDim, N = E.shape\n", 198 | "\n", 199 | " ### FIX THIS ###\n", 200 | " x_bar = np.zeros(xDim)\n", 201 | " C_bar = np.zeros((xDim, xDim))\n", 202 | "\n", 203 | " return x_bar, C_bar\n", 204 | "\n", 205 | "x_bar, C_bar = estimate_mean_and_cov(E)\n", 206 | "with np.printoptions(precision=1):\n", 207 | " print(\"Mean =\", x_bar)\n", 208 | " print(\"Covar =\", C_bar, sep=\"\\n\")" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "id": "a8b3983c", 215 | "metadata": {}, 216 | "outputs": [], 217 | "source": [ 218 | "# show_answer('ensemble moments, loop')" 219 | ] 220 | }, 221 | { 222 | "cell_type": "markdown", 223 | "id": "bcd7960a", 224 | "metadata": {}, 225 | "source": [ 226 | "**Exc -- An obsession?:** Why do we normalize by $(N-1)$ for the covariance computation?" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": null, 232 | "id": "994e9487", 233 | "metadata": {}, 234 | "outputs": [], 235 | "source": [ 236 | "# show_answer('Why (N-1)')" 237 | ] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "id": "471bc6ef", 242 | "metadata": {}, 243 | "source": [ 244 | "It can be shown that the above estimators are ***consistent and unbiased***.\n", 245 | "Thus, if we let $N \\rightarrow \\infty$, their sampling error will vanish (\"almost surely\"),\n", 246 | "and we therefor say that our estimators are *consistent*.\n", 247 | "Meanwhile, if we repeat the estimation experiment many times (but use a fixed, finite $N$),\n", 248 | "then the average of sampling errors will also vanish, since our estimators are also *unbiased*.\n", 249 | "Under relatively mild assumptions, the [absence of bias implies concistency](https://en.wikipedia.org/wiki/Consistent_estimator#Bias_versus_consistency)." 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "id": "279989f1", 255 | "metadata": {}, 256 | "source": [ 257 | "The following computes a large number ($K$) of $\\barC$ and $1/\\barC$, estimated with a given ensemble size ($N$).\n", 258 | "Note that the true variance is $C = 1$.\n", 259 | "The histograms of the estimates is plotted, along with vertical lines displaying the mean values." 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": null, 265 | "id": "d57e0d3c", 266 | "metadata": {}, 267 | "outputs": [], 268 | "source": [ 269 | "K = 10000\n", 270 | "@interact(N=(2, 30), bottom=True)\n", 271 | "def var_and_precision_estimates(N=4):\n", 272 | " E = rnd.randn(K, N)\n", 273 | " estims = np.var(E, ddof=1, axis=-1)\n", 274 | " bins = np.linspace(0, 6, 40)\n", 275 | " plt.figure()\n", 276 | " plt.hist(estims, bins, alpha=.6, density=1)\n", 277 | " plt.hist(1/estims, bins, alpha=.6, density=1)\n", 278 | " plt.axvline(np.mean(estims), color=\"C0\", label=\"C\")\n", 279 | " plt.axvline(np.mean(1/estims), color=\"C1\", label=\"1/C\")\n", 280 | " plt.legend()\n", 281 | " plt.show()" 282 | ] 283 | }, 284 | { 285 | "cell_type": "markdown", 286 | "id": "39a66ff5", 287 | "metadata": {}, 288 | "source": [ 289 | "**Exc -- There's bias, and then there's bias:**\n", 290 | "- Note that $1/\\barC$ does not appear to be an unbiased estimate of $1/C = 1$. \n", 291 | " Explain this by referring to a well-known property of the expectation, $\\Expect$. \n", 292 | " In view of this, consider the role and utility of \"unbiasedness\" in estimation.\n", 293 | "- What, roughly, is the dependence of the mean values (vertical lines) on the ensemble size? \n", 294 | " What do they tend to as $N$ goes to $0$? \n", 295 | " What about $+\\infty$ ?\n", 296 | "- Optional: What are the theoretical distributions of $\\barC$ and $1/\\barC$ ?" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": null, 302 | "id": "4cb10b89", 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "# show_answer('variance estimate statistics')" 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "id": "67b387e8", 312 | "metadata": {}, 313 | "source": [ 314 | "**Exc (optional) -- Error notions:**\n", 315 | " * (a). What's the difference between error and residual?\n", 316 | " * (b). What's the difference between error and bias?\n", 317 | " * (c). Show that `\"mean-square-error\" (RMSE^2) = Bias^2 + Var`. \n", 318 | " *Hint: Let $e = \\hat{\\theta} - \\theta$ be the random \"error\" referred to above.\n", 319 | " Express each term using the expectation $\\Expect$.*" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "id": "a38ee124", 326 | "metadata": {}, 327 | "outputs": [], 328 | "source": [ 329 | "# show_answer('errors')" 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "id": "e35f757e", 335 | "metadata": {}, 336 | "source": [ 337 | "**Exc -- Vectorization:** Like Matlab, Python (numpy) is quicker if you \"vectorize\" loops.\n", 338 | "This is eminently possible with computations of ensemble moments. \n", 339 | "Let $\\X \\ceq\n", 340 | "\\begin{bmatrix}\n", 341 | "\t\t\\x_1 -\\bx, & \\ldots & \\x_N -\\bx\n", 342 | "\t\\end{bmatrix} \\,.$\n", 343 | " * (a). Show that $\\X = \\E \\AN$, where $\\ones$ is the column vector of length $N$ with all elements equal to $1$. \n", 344 | " *Hint: consider column $n$ of $\\X$.* \n", 345 | " *PS: it can be shown that $\\ones \\ones\\tr / N$ and its complement is a \"projection matrix\".*\n", 346 | " * (b). Show that $\\barC = \\X \\X^T /(N-1)$.\n", 347 | " * (c). Code up this, latest, formula for $\\barC$ and insert it in `estimate_mean_and_cov(E)`" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": null, 353 | "id": "b0e2f553", 354 | "metadata": {}, 355 | "outputs": [], 356 | "source": [ 357 | "# show_answer('ensemble moments vectorized')" 358 | ] 359 | }, 360 | { 361 | "cell_type": "markdown", 362 | "id": "584a9ac3", 363 | "metadata": {}, 364 | "source": [ 365 | "**Exc -- Moment estimation code, part 2:** The cross-covariance between two random vectors, $\\bx$ and $\\by$, is given by\n", 366 | "$$\\begin{align}\n", 367 | "\\barC_{\\x,\\y}\n", 368 | "&\\ceq \\frac{1}{N-1} \\sum_{n=1}^N\n", 369 | "(\\x_n - \\bx) (\\y_n - \\by)^T \\\\\\\n", 370 | "&= \\X \\Y^T /(N-1)\n", 371 | "\\end{align}$$\n", 372 | "where $\\Y$ is, similar to $\\X$, the matrix whose columns are $\\y_n - \\by$ for $n=1,\\ldots,N$. \n", 373 | "Note that this is simply the covariance formula, but for two different variables. \n", 374 | "I.e. if $\\Y = \\X$, then $\\barC_{\\x,\\y} = \\barC_{\\x}$ (which we have denoted $\\barC$ in the above).\n", 375 | "\n", 376 | "Implement the cross-covariance estimator in the code-cell below." 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": null, 382 | "id": "bd9eb4ed", 383 | "metadata": {}, 384 | "outputs": [], 385 | "source": [ 386 | "def estimate_cross_cov(Ex, Ey):\n", 387 | " Cxy = np.zeros((len(Ex), len(Ey))) ### INSERT ANSWER ###\n", 388 | " return Cxy" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": null, 394 | "id": "45524f4e", 395 | "metadata": {}, 396 | "outputs": [], 397 | "source": [ 398 | "# show_answer('estimate cross')" 399 | ] 400 | }, 401 | { 402 | "cell_type": "markdown", 403 | "id": "75527f06", 404 | "metadata": {}, 405 | "source": [ 406 | "## Summary\n", 407 | "Parametric assumptions (e.g. assuming Gaussianity) can be useful in approximating distributions.\n", 408 | "Sample covariance estimates can be expressed and computed in a vectorized form.\n", 409 | "\n", 410 | "### Next: [T9 - Writing your own EnKF](T9%20-%20Writing%20your%20own%20EnKF.ipynb)" 411 | ] 412 | } 413 | ], 414 | "metadata": { 415 | "anaconda-cloud": {}, 416 | "jupytext": { 417 | "formats": "ipynb,scripts//py" 418 | }, 419 | "kernelspec": { 420 | "display_name": "Python 3 (ipykernel)", 421 | "language": "python", 422 | "name": "python3" 423 | }, 424 | "language_info": { 425 | "codemirror_mode": { 426 | "name": "ipython", 427 | "version": 3 428 | }, 429 | "file_extension": ".py", 430 | "mimetype": "text/x-python", 431 | "name": "python", 432 | "nbconvert_exporter": "python", 433 | "pygments_lexer": "ipython3", 434 | "version": "3.9.16" 435 | } 436 | }, 437 | "nbformat": 4, 438 | "nbformat_minor": 5 439 | } 440 | -------------------------------------------------------------------------------- /notebooks/dpr_config.yaml: -------------------------------------------------------------------------------- 1 | liveplotting: no 2 | store_u: yes 3 | -------------------------------------------------------------------------------- /notebooks/resources/DA_bridges.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/DA_bridges.jpg -------------------------------------------------------------------------------- /notebooks/resources/HMM.tex: -------------------------------------------------------------------------------- 1 | % Convert to svg using pdf2svg or https://pdftoimage.com/pdf-to-svg 2 | 3 | \documentclass{standalone} 4 | \usepackage{tikz} 5 | \usetikzlibrary{positioning, arrows.meta} 6 | 7 | \usepackage{bm} 8 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} 9 | % \newcommand{\vect}[1]{{\mathbf{#1}}} 10 | \newcommand{\vect}[1]{{\bm{#1}}} 11 | \newcommand{\x}[0]{\vect{x}} 12 | \newcommand{\y}[0]{\vect{y}} 13 | 14 | \begin{document} 15 | \begin{tikzpicture}[ 16 | very thick, 17 | font=\large, 18 | state/.style={circle, draw, minimum size=1cm, text centered, inner sep=0pt}, 19 | obs/.style={circle, draw, minimum size=1cm, text centered, inner sep=0pt}, 20 | arrow/.style={-latex}, 21 | node distance=1.9cm 22 | ] 23 | % States 24 | \node[state] (x0) {$\x_0$}; 25 | \node[state, right=of x0] (x1) {$\x_1$}; 26 | \node[right=3mm of x1] (x2dots) {\Huge \hspace{-2mm}$\dots$}; 27 | \node[state, right=of x2dots] (xk) {$\x_k$}; 28 | \node[right=3mm of xk] (xkdots) {\Huge \hspace{-2mm} $\dots$}; 29 | \node[state, right=of xkdots] (xK) {$\x_K$}; 30 | 31 | % Observations 32 | % \node[obs, below=of x0] (y0) {$\y_0$}; 33 | \node[obs, below=of x1] (y1) {$\y_1$}; 34 | \node[obs, below=of xk] (yk) {$\y_k$}; 35 | \node[obs, below=of xK] (yK) {$\y_K$}; 36 | 37 | % Dynamical model 38 | \draw[arrow] (x0) to[bend left=15] node[midway, above] {$p(\x_1 | \x_{0})$} (x1); 39 | \draw[arrow] (x2dots) to[bend left=15] node[midway, above] {$p(\x_k | \x_{k-1})$} (xk); 40 | \draw[arrow] (xkdots) to[bend left=15] node[midway, above] {$p(\x_K | \x_{K-1})$} (xK); 41 | 42 | % Observation model 43 | % \draw[arrow] (x0) to[bend left=15] node[pos=0.35, left] {$\mathscr{H}_0$} (y0); 44 | \draw[arrow] (x1) to[bend left=15] node[pos=0.35, right] {$p(\y_1 | \x_1)$} (y1); 45 | \draw[arrow] (xk) to[bend left=15] node[pos=0.35, right] {$p(\y_k | \x_k)$} (yk); 46 | \draw[arrow] (xK) to[bend left=15] node[pos=0.35, left, xshift=1mm] {$p(\y_K | \x_K)$} (yK); 47 | 48 | % Horizontal line and labels 49 | \draw[dashed, draw=gray!90, line width=0.7pt] (x0.west |- 0,-1.8) -- (xK.east |- 0,-1.8); 50 | \node[anchor=south west, align=left, fill=yellow!15, xshift=1mm] at (x0.west |- 0,-1.7) {\normalsize Markov chain\\\normalsize(hidden states)}; 51 | \node[anchor=north west, align=left, fill=yellow!15, xshift=1mm] at (x0.west |- 0,-1.9) {\normalsize Measurements\\\normalsize(observed)}; 52 | 53 | 54 | \end{tikzpicture} 55 | \end{document} 56 | -------------------------------------------------------------------------------- /notebooks/resources/__init__.py: -------------------------------------------------------------------------------- 1 | """Additional styles and resources for tutorials. 2 | 3 | Our didactic goal is to put as little as possible in here. 4 | """ 5 | import os 6 | from pathlib import Path 7 | 8 | import numpy as np 9 | import matplotlib as mpl 10 | import mpl_tools 11 | 12 | 13 | import matplotlib.pyplot as plt 14 | plt.style.use("seaborn-v0_8") 15 | 16 | 17 | # Should PRECEDE plt.ion() 18 | try: 19 | # Note: Colab only supports `%matplotlib inline` ⇒ no point loading other. 20 | # NOTE: Colab: must use plt.show() to avoid duplicate figures. 21 | import google.colab # type: ignore 22 | # Colab only supports mpl inline backend 23 | 24 | # Make figures and fonts larger. 25 | mpl.rcParams.update({'font.size': 15}) 26 | mpl.rcParams.update({'figure.figsize': [10,6]}) 27 | except ImportError: 28 | if mpl_tools.is_notebook_or_qt: 29 | # NB: `nbAgg` steals focus from interactive sliders, 30 | # and re-generates entire figure (not just canvas). 31 | # mpl.use('nbAgg') # = %matplotlib notebook 32 | pass # all notebooks use `%matplotlib inline` anyway 33 | else: 34 | # Regular python (or ipython) session 35 | pass 36 | 37 | # Must NOT be in 1st cell of the notebook, 38 | # because Colab does %matplotlib inline at startup (I think), resetting rcParams. 39 | mpl.rcParams.update({'lines.linewidth': 2.5}) 40 | 41 | # Load answers 42 | from .answers import show_answer 43 | 44 | # Load widgets 45 | from ipywidgets import Image, interactive, HBox, VBox, IntSlider, SelectMultiple 46 | from IPython.display import display 47 | 48 | 49 | def interact(top=None, right=None, bottom=None, left=None, **kwargs): 50 | """Like `ipywidgets.interact(**kwargs)` but with layout shortcuts. 51 | 52 | Also provides `disable` function to help importing notebooks. 53 | 54 | Set `bottom` or any other `side` argument to `True` to place all controls there, 55 | relative to the central output (typically figure). 56 | Otherwise, use a list (or comma-separated string) to select which controls to place there. 57 | Use *nested* lists to re-group/order them. 58 | The underlying mechanism is CSS flex box (typically without "wrap"). 59 | 60 | If the last element of a `side` is a dict, then it will be written as attributes 61 | to the CSS `layout` attribute, ref [1]. 62 | Support for the `style` attribute [2] is not yet implemented. 63 | 64 | Similarly, if the last element of any `kwargs` is a dict, then it will be written as attributes 65 | (e.g. `description (str)`, 'readout (bool)', `continuous_update (bool)`, `orientation (str)`) 66 | to the widget, ref [3]. 67 | 68 | Only tested with "inline" backend (Colab and locally). 69 | Also see `~/P/HistoryMatching/tools/plotting.py` 70 | 71 | [1]: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Layout.html 72 | [2]: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Styling.html 73 | [3]: https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html# 74 | 75 | Example: 76 | 77 | >>> v = dict(orientation="vertical", layout=dict(height="80%")) 78 | ... @interact(a=(1., 6., v), 79 | ... b=(1., 7.), 80 | ... bottom=True, # put rest here 81 | ... top='b,c', 82 | ... right=[['a', dict(height="100%", align_items="center")],['e']]) 83 | ... def f(a=3.0, b=4, c=True, d=5, e=6): 84 | ... plt.figure(figsize=(4, 5)) 85 | ... xx = np.linspace(0, 3, 21) 86 | ... if c: plt.plot(xx, e*d/a + xx**b) 87 | ... else: plt.plot(xx, b + xx) 88 | ... plt.show() 89 | """ 90 | 91 | def get_dict(iterable): 92 | if iterable and isinstance(iterable[-1], dict): 93 | return iterable[-1] 94 | else: 95 | return {} 96 | 97 | def boxit(ww, horizontal=True): 98 | """Apply box to lists, recursively (alternating between `HBox` and `VBox`).""" 99 | if (layout := get_dict(ww)): 100 | ww = ww[:-1] 101 | 102 | for i, w in enumerate(ww): 103 | if hasattr(w, '__iter__'): 104 | ww[i] = boxit(w, not horizontal) 105 | 106 | box = HBox if horizontal else VBox 107 | return box(ww, layout=layout) 108 | 109 | def pop_widgets(ww, labels): 110 | """Replace items in nested list `labels` by matching elements from `ww`. 111 | 112 | Essentially `[ww.pop(i) for i, w in enumerate(ww) if w.description == lbl]` 113 | but if `w` is a list, then recurse. 114 | """ 115 | # Validate 116 | if not labels: 117 | return [] 118 | elif labels == True: 119 | cp = ww.copy() 120 | ww.clear() 121 | return cp 122 | elif isinstance(labels, str): 123 | labels = labels.split(',') 124 | 125 | # Main 126 | ww2 = [] 127 | for lbl in labels: 128 | if isinstance(lbl, dict): 129 | # Forward as is 130 | w = lbl 131 | elif isinstance(lbl, list): 132 | # Recurse 133 | w = pop_widgets(ww, lbl) 134 | else: 135 | # Pop 136 | i = [i for i, w in enumerate(ww) if w.description == lbl] 137 | try: 138 | i = i[0] 139 | except IndexError: 140 | raise IndexError(f'Did you specify {lbl} twice in the layout?') 141 | w = ww.pop(i) 142 | ww2.append(w) 143 | return ww2 144 | 145 | sides = dict(top=top, right=right, bottom=bottom, left=left) 146 | 147 | # Pop attributes (if any) for controls 148 | attrs = {} 149 | for key, iterable in kwargs.items(): 150 | if (dct := get_dict(iterable)): 151 | attrs[key] = dct 152 | kwargs[key] = type(iterable)(iterable[:-1]) # preserve list or tuple 153 | 154 | def decorator(fun): 155 | # Auto-parse kwargs, add 'observers' 156 | linked = interactive(fun, **kwargs) 157 | *ww, out = linked.children 158 | # display(HBox([out, VBox(ww)])) 159 | 160 | # Styling of individual control widgets 161 | for w in ww: 162 | for attr, val in attrs.get(w.description, {}).items(): 163 | setattr(w, attr, val) 164 | # Defaults 165 | try: 166 | # Disable continuous_update on Colab 167 | import google.colab # type: ignore 168 | w.continuous_update = False 169 | except ImportError: 170 | pass 171 | w.style.description_width = "max-content" 172 | if getattr(w, 'orientation', '') == "vertical": 173 | w.layout.width = "2em" 174 | 175 | on = {side: pop_widgets(ww, labels) for side, labels in sides.items()} 176 | on['right'] = ww + on['right'] # put any remainder on the right (before any dict) 177 | 178 | # Dashbord composition 179 | # I considered AppLayout, but was more comfortable with combining boxes 180 | left = boxit(on['left'], False) 181 | right = boxit(on['right'], False) 182 | top = boxit(on['top'], True) 183 | bottom = boxit(on['bottom'], True) 184 | 185 | dashboard = VBox([top, HBox([left, out, right]), bottom]) 186 | 187 | display(dashboard); 188 | linked.update() # necessary on Colab 189 | 190 | if interact.disabled: 191 | # Used with hacky `import_from_nb` 192 | return (lambda fun: (lambda _: None)) 193 | elif not mpl_tools.is_notebook_or_qt: 194 | # Return dummy (to plot without interactivity) 195 | return (lambda fun: fun()) 196 | else: 197 | return decorator 198 | 199 | interact.disabled = False 200 | 201 | 202 | def cInterval(mu, sigma2, flat=True): 203 | """Compute +/- 1-sigma (std.dev.) confidence/credible intervals (CI).""" 204 | s1 = np.sqrt(sigma2) 205 | a = mu - s1 206 | b = mu + s1 207 | if flat: 208 | return a.flatten(), b.flatten() 209 | else: 210 | return a, b 211 | 212 | 213 | def axes_with_marginals(): 214 | from matplotlib import pyplot as plt 215 | fig, ((ax, yax), (xax, _)) = plt.subplots( 216 | 2, 2, sharex='col', sharey='row', 217 | figsize=(6, 6), 218 | gridspec_kw={'height_ratios':[5,1], 219 | 'width_ratios' :[5,1], 220 | 'wspace': .1, 221 | 'hspace': .1}) 222 | _.set_visible(False) 223 | ax.set_aspect('equal') 224 | return fig, (ax, yax, xax) 225 | 226 | 227 | def get_jointplotter(grid1d): 228 | fig, (ax, yax, xax) = axes_with_marginals() 229 | dx = grid1d[1] - grid1d[0] 230 | def plotter(Z, colors=None, alpha=.3, linewidths=1, **kwargs): 231 | Z = Z / Z.sum() / dx**2 232 | lvls = np.logspace(-3, 3, 21) 233 | # h = ax.contourf(grid1d, grid1d, Z, colors=colors, levels=lvls, alpha=alpha) 234 | # _ = ax.contour(grid1d, grid1d, Z, colors='black', levels=lvls, linewidths=.7, alpha=alpha) 235 | h = ax.contour(grid1d, grid1d, Z, colors=colors, levels=lvls, linewidths=linewidths, **kwargs) 236 | 237 | margx = dx * Z.sum(0) 238 | margy = dx * Z.sum(1) 239 | xax.fill_between(grid1d, margx, color=colors, alpha=alpha) 240 | yax.fill_betweenx(grid1d, 0, margy, color=colors, alpha=alpha) 241 | 242 | return h.legend_elements()[0][0] 243 | return ax, plotter 244 | 245 | 246 | def frame(data, ax, zoom=1): 247 | """Do `ax.set_{x/y/z}lim()` based on `data`, using given `zoom` (power of 10).""" 248 | zoom = 10**(zoom - 1) 249 | for ens, dim in zip(data.T, 'xyz'): 250 | a = ens.min() 251 | b = ens.max() 252 | m = (a + b)/2 253 | w = b - a 254 | setter = getattr(ax, f'set_{dim}lim') 255 | setter([m - w/2/zoom, 256 | m + w/2/zoom]) 257 | 258 | 259 | def envisat_video(): 260 | caption = """Illustration of DA for the ozone layer in 2002. 261 |

262 | LEFT: Satellite data (i.e. all that is observed). 263 | RIGHT: Simulation model with assimilated data. 264 |

265 | Could you have perceived the splitting of the ozone hole. only from the satellite data? 266 |

267 | Attribution: William A. Lahoz, DARC. 268 | """ 269 | 270 | import io 271 | import base64 272 | from IPython.display import HTML 273 | 274 | video = io.open(Path(__file__).parent / 'darc_envisat_analyses.mp4', 'r+b').read() 275 | encoded = base64.b64encode(video) 276 | vid = HTML(data=''' 277 |
278 | 281 |
{1}
282 |
283 | '''.format(encoded.decode('ascii'),caption)) 284 | return vid 285 | 286 | 287 | def EnKF_animation(): 288 | # Initialize 289 | path_ = str(Path(__file__).parent / "illust_EnKF/illust_EnKF_") 290 | image = Image( 291 | value=open(path_ + "0.png", "rb").read(), 292 | format='png', 293 | width=800, 294 | height=600, 295 | ) 296 | 297 | def update_image(i=0): 298 | image.value=open(path_ + str(i) + ".png", "rb").read() 299 | 300 | slider = interactive(update_image, i=(0, 7, 1)) 301 | return VBox([slider, image]) 302 | 303 | 304 | def import_from_nb(name: str, objs: list): 305 | """Import `objs` from `notebooks/name*.py` (1st match). 306 | 307 | Might not want to do this because it uses `sys.path` manipulation, 308 | imposes that the notebook contain 309 | 310 | - only light computations (unless controlled by interact.disabled) 311 | - the version we want (as opposed to it being implemented by students) 312 | 313 | and because a little repetition never hurt nobody. 314 | """ 315 | NBDIR = Path(__file__).parents[1] 316 | notebk = next(NBDIR.glob(name + "*.ipynb")) 317 | script = (NBDIR / "scripts" / notebk.relative_to(NBDIR)).with_suffix('.py') 318 | 319 | interact.disabled = True 320 | try: 321 | name = str(script.relative_to(NBDIR).with_suffix("")).replace(os.sep, ".") 322 | script = getattr(__import__(name), script.stem) # works despite weird chars 323 | finally: 324 | interact.disabled = False 325 | return [getattr(script, x) for x in objs] 326 | -------------------------------------------------------------------------------- /notebooks/resources/colab_bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Colab doesn't provide 4 | # - Auto-installing requirements.txt 5 | # - Pre-loading data/modules (aside from the notebook itself) 6 | # This script takes care of the above by cloning the full (shallow) repo. 7 | 8 | # Install requirements 9 | main () { 10 | set -e 11 | 12 | # Clear any existing REPO for a fresh git clone 13 | rm -rf REPO 14 | 15 | # Download repo 16 | URL=https://github.com/nansencenter/DA-tutorials.git 17 | if [[ ! -d REPO ]]; then git clone --depth=1 $URL REPO; fi 18 | 19 | # https://pythonspeed.com/articles/upgrade-pip/ 20 | pip install --upgrade pip 21 | 22 | # Install requirements 23 | pip install -r REPO/requirements.txt 24 | 25 | # Put notebook/ (including hidden files) in PWD 26 | shopt -s dotglob 27 | cp -r REPO/notebooks/* ./ 28 | } 29 | 30 | # Only run if we're on colab 31 | if python -c "import google.colab" 2>/dev/null; then 32 | 33 | # Use `bash -s -- --debug` to get verbose output 34 | if echo $@ | grep -E -- '(--debug|-v)' > /dev/null ; then 35 | main 36 | else 37 | # Quiet 38 | main > /dev/null 2>&1 39 | fi 40 | 41 | echo "Initialization for Colab done." 42 | else 43 | echo "Not running on Colab => Didn't do anything." 44 | fi 45 | -------------------------------------------------------------------------------- /notebooks/resources/darc_envisat_analyses.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/darc_envisat_analyses.mp4 -------------------------------------------------------------------------------- /notebooks/resources/exc-2.4-iii.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/exc-2.4-iii.png -------------------------------------------------------------------------------- /notebooks/resources/exc-2.5-iv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/exc-2.5-iv.png -------------------------------------------------------------------------------- /notebooks/resources/exc-2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/exc-2.5.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF.py: -------------------------------------------------------------------------------- 1 | """Script to patch png figures 2 | from Matlab script DATUM/illust_EnKF_1.m 3 | together with text titles, as given below. 4 | """ 5 | 6 | from matplotlib.image import imread 7 | import matplotlib.pyplot as plt 8 | plt.ion() 9 | 10 | txts = [] 11 | txts += ['We consider a single cycle of the EnKF,' 12 | 'starting with the analysis state\n' 13 | 'at time $(k-1)$.' 14 | 'The contours are "iso-density" curves of ' 15 | '$\|\mathbf{x}-\mathbf{x}^{\text{a}}_{k-1}\|_{\bP^{\text{a}}_{k-1}}$.'] 16 | txts += ['The ensemble $\{\mathbf{x}_n^{\text{a}}\}_{n=1..N}$ is (assumed) sampled from ' 17 | 'this distribution.'] 18 | txts += ['The ensemble is forecasted from time $(k-1)$ to $k$ ' 19 | 'using the dynamical\n' 20 | 'model $\mathscr{M}$. We now denote it using the superscript $f$.'] 21 | txts += ['Now we consider the analysis at time $k$. The ensemble is used\n' 22 | 'to compute the estimates $\mathbf{\\bar{b}}_k$ and $\mathbf{\\bar{B}}_k$, ' 23 | 'hence the new contour curves.'] 24 | txts += ['The obs. likelihood is taken into account...'] 25 | txts += ["...which (implicitly) yields this posterior (Bayes' rule)."] 26 | txts += ['What we actually do, however,\n' 27 | 'is to compute the Kalman gain from ' 28 | '$\\bar{\mathbf{x}}^\text{f}_k$ and $\\bar{\bP}^\text{f}_k$.'] 29 | txts += ['The Kalman gain is then used to shift the ensemble such that ' 30 | 'it represents\n' 31 | 'the (implicit) posterior. The cycle can then begin again, ' 32 | 'from $k$ to $k+1$.'] 33 | 34 | # Hack to keep line-spacing constant with/out TeX 35 | placeholder = '\phantom{$\{x_n^\text{f}\}_{n=1}^N$}' 36 | placeholder += "." # phantom w/o anything causes stuff to disappear 37 | for i,t in enumerate(txts): 38 | t = t.split("\n") 39 | t = [placeholder]*(2-len(t)) + t # ensure 2 lines 40 | # t = [ln+LE for ln in t] 41 | txts[i] = "\n".join(t) 42 | 43 | 44 | def crop(img): 45 | "Crop Matlab-outputted image" 46 | top = int( 0.15*img.shape[0]) 47 | btm = int((1-0.20)*img.shape[0]) 48 | lft = int( 0.10*img.shape[1]) 49 | rgt = int((1-0.09)*img.shape[1]) 50 | return img[top:btm,lft:rgt] 51 | 52 | from pathlib import Path 53 | PWD = Path(__file__).parent 54 | 55 | def illust_EnKF(i): 56 | plt.close(1) 57 | plt.figure(1,figsize=(8,6)) 58 | axI = plt.subplot(111) 59 | axI.set_axis_off() 60 | name = 'illust_EnKF_prez_'+str(i+8)+'.png' 61 | name = PWD/"from_Matlab"/name 62 | img = imread(name) 63 | img = crop(img) 64 | axI.imshow(img) 65 | axI.set_title(txts[i],loc='left',usetex=True,size=15) 66 | 67 | for i, txt in enumerate(txts): 68 | illust_EnKF(i) 69 | plt.pause(.2) 70 | name = "illust_EnKF_"+str(i)+".png" 71 | print("Saving", PWD/name) 72 | plt.savefig(PWD/name) 73 | -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_0.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_1.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_2.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_3.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_4.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_5.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_6.png -------------------------------------------------------------------------------- /notebooks/resources/illust_EnKF/illust_EnKF_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nansencenter/DA-tutorials/1cff8de93d8c07eb14926f02b7f519757f3ea80b/notebooks/resources/illust_EnKF/illust_EnKF_7.png -------------------------------------------------------------------------------- /notebooks/resources/macros.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Macros must be included in 4 | # - every notebook. 5 | # - every answer that uses them. 6 | 7 | 8 | from pathlib import Path 9 | import re 10 | import sys 11 | 12 | import nbformat 13 | 14 | 15 | HEADER = r'''% ######################################## Loading TeX (MathJax)... Please wait ########################################''' 16 | macros=r''' 17 | \newcommand{\Reals}{\mathbb{R}} 18 | \newcommand{\Expect}[0]{\mathbb{E}} 19 | \newcommand{\NormDist}{\mathscr{N}} 20 | 21 | \newcommand{\DynMod}[0]{\mathscr{M}} 22 | \newcommand{\ObsMod}[0]{\mathscr{H}} 23 | 24 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} % ALWAYS 25 | %\newcommand{\mat}[1]{{\pmb{\mathsf{#1}}}} 26 | \newcommand{\bvec}[1]{{\mathbf{#1}}} % ALWAYS 27 | 28 | \newcommand{\trsign}{{\mathsf{T}}} % ALWAYS 29 | \newcommand{\tr}{^{\trsign}} % ALWAYS 30 | \newcommand{\ceq}[0]{\mathrel{≔}} 31 | \newcommand{\xDim}[0]{D} 32 | \newcommand{\supa}[0]{^\text{a}} 33 | \newcommand{\supf}[0]{^\text{f}} 34 | 35 | \newcommand{\I}[0]{\mat{I}} % ALWAYS 36 | \newcommand{\K}[0]{\mat{K}} 37 | \newcommand{\bP}[0]{\mat{P}} 38 | \newcommand{\bH}[0]{\mat{H}} 39 | \newcommand{\bF}[0]{\mat{F}} 40 | \newcommand{\R}[0]{\mat{R}} 41 | \newcommand{\Q}[0]{\mat{Q}} 42 | \newcommand{\B}[0]{\mat{B}} 43 | \newcommand{\C}[0]{\mat{C}} 44 | \newcommand{\Ri}[0]{\R^{-1}} 45 | \newcommand{\Bi}[0]{\B^{-1}} 46 | \newcommand{\X}[0]{\mat{X}} 47 | \newcommand{\A}[0]{\mat{A}} 48 | \newcommand{\Y}[0]{\mat{Y}} 49 | \newcommand{\E}[0]{\mat{E}} 50 | \newcommand{\U}[0]{\mat{U}} 51 | \newcommand{\V}[0]{\mat{V}} 52 | 53 | \newcommand{\x}[0]{\bvec{x}} 54 | \newcommand{\y}[0]{\bvec{y}} 55 | \newcommand{\z}[0]{\bvec{z}} 56 | \newcommand{\q}[0]{\bvec{q}} 57 | \newcommand{\br}[0]{\bvec{r}} 58 | \newcommand{\bb}[0]{\bvec{b}} 59 | 60 | \newcommand{\bx}[0]{\bvec{\bar{x}}} 61 | \newcommand{\by}[0]{\bvec{\bar{y}}} 62 | \newcommand{\barB}[0]{\mat{\bar{B}}} 63 | \newcommand{\barP}[0]{\mat{\bar{P}}} 64 | \newcommand{\barC}[0]{\mat{\bar{C}}} 65 | \newcommand{\barK}[0]{\mat{\bar{K}}} 66 | 67 | \newcommand{\D}[0]{\mat{D}} 68 | \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} 69 | \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} 70 | 71 | \newcommand{\ones}[0]{\bvec{1}} % ALWAYS 72 | \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 73 | ''' 74 | macros = [ln for ln in macros.splitlines() if ln and not ln.startswith('%')] 75 | always = [i for i, ln in enumerate(macros) if "ALWAYS" in ln] 76 | macros = [m.replace("% ALWAYS","").rstrip() for m in macros] 77 | 78 | # Convert to {macro_name: macro_lineno} 79 | declaration = re.compile(r'''^\\newcommand{(.+?)}''') 80 | lineno_by_name = {} 81 | for i, ln in enumerate(macros): 82 | match = declaration.match(ln) 83 | if match: lineno_by_name[match.group(1)] = i 84 | 85 | # Regex for macro, for ex. \mat, including \mat_, but not \mathbf: 86 | no_escape = lambda s: s.replace("\\",r"\\") 87 | delimit = lambda m: re.compile( no_escape(m) + r'(_|\b)' ) 88 | 89 | 90 | def include_macros(content): 91 | """Include macros in answers. Only those that are required.""" 92 | # Find macros present in content 93 | necessary = [i for macro, i in lineno_by_name.items() if delimit(macro).search(content)] 94 | # Include in content 95 | if necessary: 96 | mm = [macros[i] for i in necessary] 97 | # PRE-pend those that should always be there 98 | mm = [macros[i] for i in always if (macros[i] not in mm)] + mm 99 | # Escape underscore coz md2html sometimes interprets it as . 100 | mm = [m.replace("_","\\_") for m in mm] 101 | # Include surrounding dollar signs 102 | mm = ["$"] + mm + ["$"] 103 | # Avoid accidental $$ 104 | space = " " if content.startswith("$") else "" 105 | # Collect 106 | content = "\n".join(mm) + space + content 107 | return content 108 | 109 | 110 | def update_1nbscript(f: Path): 111 | """Update the macros of a notebook script (synced with `jupytext`).""" 112 | print(f.name.ljust(40), end=": ") 113 | lines = f.read_text().splitlines() 114 | mLine = "# " + " ".join(macros) 115 | 116 | try: 117 | iHeader = lines.index("# " + HEADER) 118 | except (ValueError, AssertionError): 119 | print("Could not locate pre-existing macros") 120 | return 121 | 122 | if not (lines[iHeader-1] == "# $" and 123 | lines[iHeader+2] == "# $"): 124 | print("Could not parse macros") 125 | 126 | # elif lines[iHeader+1] == mLine: 127 | # print("Macros already up to date.") 128 | 129 | else: 130 | # lines[iHeader] = "# % ##### NEW HEADER ######" 131 | lines[iHeader+1] = mLine 132 | f.write_text("\n".join(lines)) 133 | print("Macros updated!") 134 | 135 | 136 | if __name__ == "__main__" and any("update" in arg for arg in sys.argv): 137 | for f in sorted((Path(__file__).parents[1] / "scripts").glob("T*.py")): 138 | update_1nbscript(f) 139 | -------------------------------------------------------------------------------- /notebooks/resources/spellfile.utf-8.add: -------------------------------------------------------------------------------- 1 | T1 2 | T2 3 | T3 4 | T4 5 | T5 6 | T6 7 | T7 8 | T8 9 | T9 10 | T10 11 | arange 12 | numpy's 13 | np 14 | diag 15 | sep 16 | plt 17 | ylabel 18 | href 19 | Exc 20 | figlist 21 | rmv 22 | rmse 23 | avrgs 24 | config 25 | yy 26 | py 27 | G1 28 | num 29 | linspace 30 | dx 31 | figsize 32 | mathcal 33 | eqn 34 | frac 35 | infty 36 | NormDist 37 | linalg 38 | det 39 | inv 40 | norm22 41 | ww 42 | meshgrid 43 | dstack 44 | var1 45 | var2 46 | cov12 47 | Cov 48 | pdf's 49 | wikipedia 50 | lklhd_values 51 | gridpoint 52 | vals 53 | lklhd 54 | postr 55 | xhat 56 | vals2 57 | ylim 58 | ymax 59 | rightarrow 60 | propto 61 | U1 62 | circ 63 | #nivarite 64 | exc 65 | eqns 66 | cov 67 | mathcal 68 | randn 69 | IntSlider 70 | kk 71 | lin 72 | pw 73 | bb 74 | xxhat 75 | kf 76 | ka 77 | xlim 78 | xlabel 79 | loc 80 | ldots 81 | LinReg 82 | deriv 83 | DynMod 84 | Dyn 85 | leq 86 | forall 87 | ipynb 88 | bP 89 | bH 90 | br 91 | qquad 92 | bvec 93 | Ri 94 | ul 95 | li 96 | C1 97 | C2 98 | K1 99 | K2 100 | ExtKF 101 | ExtRTS 102 | sp 103 | hstack 104 | len 105 | th 106 | ESig 107 | gaussian 108 | filtfilt 109 | padlen 110 | UnivariateSpline 111 | SelectMultiple 112 | dko 113 | GaussRV 114 | MyMethod 115 | Var3D 116 | OptInterp 117 | PartFilt 118 | Bénard 119 | dt 120 | dxdt 121 | varepsilon 122 | neq 123 | eps 124 | gridvalues 125 | iid 126 | B12 127 | ss 128 | nbins 129 | mpl 130 | kde 131 | ndim 132 | cholesky 133 | printoptions 134 | matshow 135 | cmap 136 | colorbar 137 | barB 138 | ceq 139 | bmatrix 140 | barC 141 | barK 142 | ObsMod 143 | nobias 144 | barP 145 | sig 146 | Kutta 147 | rk4 148 | t0 149 | x0 150 | chol 151 | dto 152 | Ko 153 | mu0 154 | P0 155 | Init 156 | Ko 157 | nla 158 | Eo 159 | HBH 160 | v1 161 | axs 162 | cd 163 | Lorenz63 164 | sak12 165 | PertObs 166 | infl 167 | da 168 | MYSTAT 169 | iEnKF 170 | NER 171 | iMax 172 | EnKF's 173 | Lorenz96 174 | sak08 175 | jupyter 176 | liveplotting 177 | assimilator 178 | TwinSetup 179 | -------------------------------------------------------------------------------- /notebooks/scripts/T1 - DA & EnKF.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py:light 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.15.1 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | # # T1 - Data assimilation (DA) & the ensemble Kalman filter (EnKF) 17 | # *Copyright (c) 2020, Patrick N. Raanes 18 | # $ 19 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 20 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 21 | # $ 22 | 23 | # ### Jupyter 24 | # The "document" you're currently reading is a *Jupyter notebook*. 25 | # As you can see, it consists of a sequence of **cells**, 26 | # which can be code (Python) or text (markdown). 27 | # For example, try editing the cell below (double-click it) 28 | # to insert your name, and running it. 29 | 30 | name = "Batman" 31 | print("Hello world! I'm " + name) 32 | for i, c in enumerate(name): 33 | print(i, c) 34 | 35 | # You will likely be more efficient if you know these **keyboard shortcuts**: 36 | # 37 | # | Navigate | Edit | Exit | Run | Run & go to next | 38 | # |-------------------------------|-------------------|----------------|----------------------------------|-----------------------------------| 39 | # | and | Enter | Esc | Ctrl+Enter | Shift+Enter | 40 | # 41 | # Actually, a notebook connects to a background **session (kernel/runtime/interpreter)** of Python, and all of the code cells (in a given notebook) are connected, meaning that they share variables, functions, and classes. You can start afresh by clicking `restart` somewhere in the top menu bar. The **order** in which you run the cells matters, and from now on, 42 | # 43 | # the 1st code cell in each tutorial will be the following, which you must run before others. But if you're on Windows, then you must first delete the line starting with `!wget` (actually it's only needed when running on Google Colab). 44 | # 45 | 46 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 47 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 48 | from resources import show_answer, envisat_video 49 | 50 | # ### Python 51 | # 52 | # There is a huge amount of libraries available in **Python**, including the popular `scipy` and `matplotlib` packages, both with the essential `numpy` library at their core. They're usually abbreviated `sp`, `mpl` (and `plt`), and `np`. Try them out by running the following cell. 53 | 54 | # + 55 | import numpy as np 56 | import matplotlib.pyplot as plt 57 | plt.ion(); 58 | 59 | # Use numpy's arrays for vectors and matrices. Example constructions: 60 | a = np.arange(10) # Alternatively: np.array([0,1,2,3,4,5,6,7,8,9]) 61 | I = 2*np.eye(10) # Alternatively: np.diag(2*np.ones(10)) 62 | 63 | print("Indexing examples:") 64 | print("a =", a) 65 | print("a[3] =", a[3]) 66 | print("a[0:3] =", a[0:3]) 67 | print("a[:3] =", a[:3]) 68 | print("a[3:] =", a[3:]) 69 | print("a[-1] =", a[-1]) 70 | print("I[:3,:3] =", I[:3,:3], sep="\n") 71 | 72 | print("\nLinear algebra examples:") 73 | print("100+a =", 100+a) 74 | print("I@a =", I@a) 75 | print("I*a =", I*a, sep="\n") 76 | 77 | plt.title("Plotting example") 78 | plt.ylabel("i $x^2$") 79 | for i in range(4): 80 | plt.plot(i * a**2, label="i = %d"%i) 81 | plt.legend(); 82 | # - 83 | 84 | # These tutorials require that you are able to understand the above code, but not much beyond that. 85 | # Some exercises will ask you to do some programming, but understanding the pre-written code is also important. 86 | # The interesting parts of the code can all be found in the notebooks themselves 87 | # (as opposed to being hidden away via imports). 88 | # Beware, however, that it is not generally production-ready. 89 | # For example, it overuses global variables, and is lacking in vectorisation, 90 | # generally for the benefit of terseness and simplicity. 91 | 92 | # ### Data assimilation (DA) 93 | # 94 | # **State estimation** (a.k.a. **sequential inference**) 95 | # is the estimation of unknown/uncertain quantities of **dynamical systems** 96 | # based on imprecise (noisy) data/observations. This is similar to time series estimation and signal processing, 97 | # but focuse on the case where we have a good (skillful) predictive model of the dynamical system, 98 | # so that we can relate information (estimates) of its *state* at one time to another. 99 | # 100 | # For example, in guidance systems, the *state variable* (vector) consists of at least 6 elements: 3 for the current position and 3 for velocity, whose trajectories we wish to track in time. More sophisticated systems can also include acceleration and/or angular quantities. The *dynamicl model* then consists of the fact that displacement is the time integral of the velocity, while the velocity is the integral of acceleration. The noisy *observations* can come from altimetry, sextants, speedometers, compass readings, accelerometers, gyroscopes, or fuel-gauges. The essential point is that we have an *observational model* predicting the observations from the state. For example, the altimeter model is simply the function that selects the $z$ coordinate from the state vector, while the force experienced by an accelerometer can be modelled by Newton's second law of motion, $F = m a$. 101 | # 102 | # In the context of large dynamical systems, especially in geoscience 103 | # (climate, ocean, hydrology, petroleum) 104 | # state estimation is known as **data assimilation** (DA), 105 | # and is thought of as a "bridge" between data and models, 106 | # as illustrated on the right (source: https://aics.riken.jp/en) 107 | # DA "bridges" data and models.. 108 | # For example, in weather applications, the dynamical model is an atmospheric fluid-mechanical simulator, the state variable consists of the fields of pressure, humidity, and wind quanities discretized on a grid, 109 | # and the observations may come from satellite or weather stations. 110 | # 111 | # The most famous state estimation techniques is the ***Kalman filter (KF)***, which was developed to steer the Apollo mission rockets to the moon. The KF also has applications outside of control systems, such as speech recognition, video tracking, finance. But when it was first proposed to apply the KF to DA (specifically, weather forecasting), the idea sounded ludicrous because of some severe **technical challenges in DA (vs. "classic" state estimation)**: 112 | # * size of data and models; 113 | # * nonlinearity of models; 114 | # * sparsity and inhomogeneous-ness of data. 115 | # 116 | # Some of these challenges may be recognized in the video below. Can you spot them? 117 | 118 | envisat_video() 119 | 120 | # ### The EnKF 121 | # The EnKF an ensemble (Monte-Carlo) formulation of the KF 122 | # that manages (fairly well) to deal with the above challenges in DA. 123 | # 124 | # For those familiar with the method of 4D-Var, **further advantages of the EnKF** include it being: 125 | # * Non-invasive: the models are treated as black boxes, and no explicit Jacobian is required. 126 | # * Bayesian: 127 | # * provides ensemble of possible realities; 128 | # - arguably the most practical form of "uncertainty quantification"; 129 | # - ideal way to initialize "ensemble forecasts"; 130 | # * uses "flow-dependent" background covariances in the analysis. 131 | # * Embarrassingly parallelizable: 132 | # * distributed across realizations for model forecasting; 133 | # * distributed across local domains for observation analysis. 134 | # 135 | # The rest of this tutorial provides an EnKF-centric presentation of DA. 136 | 137 | # ### DAPPER example 138 | # This tutorial builds on the underlying package, DAPPER, made for academic research in DA and its dissemination. For example, the code below is taken from `DAPPER/example_1.py`. It illustrates DA on a small toy problem. At the end of these tutorials, you should be able to reproduce (from the ground up) this type of experiment. 139 | # 140 | # Run the cells in order and try to interpret the output. 141 | # 142 | # Don't worry if you can't understand what's going on -- we will discuss it later throughout the tutorials. 143 | # 144 | # 145 | 146 | # + 147 | import dapper as dpr 148 | import dapper.da_methods as da 149 | 150 | # Load experiment setup: the hidden Markov model (HMM) 151 | from dapper.mods.Lorenz63.sakov2012 import HMM 152 | HMM.tseq.T = 30 # shorten experiment 153 | 154 | # Simulate synthetic truth (xx) and noisy obs (yy) 155 | xx, yy = HMM.simulate() 156 | 157 | # Specify a DA method configuration ("xp" is short for "experiment") 158 | # xp = da.OptInterp() 159 | # xp = da.Var3D() 160 | # xp = da.ExtKF(infl=90) 161 | xp = da.EnKF('Sqrt', N=10, infl=1.02, rot=True) 162 | # xp = da.PartFilt(N=100, reg=2.4, NER=0.3) 163 | 164 | # Assimilate yy, knowing the HMM; xx is used to assess the performance 165 | xp.assimilate(HMM, xx, yy) 166 | 167 | # #### Average the time series of various statistics 168 | # print(xp.stats) # ⇒ long printout 169 | xp.stats.average_in_time() 170 | 171 | print(xp.avrgs.tabulate(['rmse.a', 'rmv.a'])) 172 | # - 173 | 174 | xp.stats.replay() 175 | 176 | # Some more diagnostics 177 | if False: 178 | import dapper.tools.viz as viz 179 | viz.plot_rank_histogram(xp.stats) 180 | viz.plot_err_components(xp.stats) 181 | viz.plot_hovmoller(xx) 182 | 183 | # ### Vocabulary exercises 184 | # **Exc -- Word association:** 185 | # Fill in the `x`'s in the table to group the words with similar meaning. 186 | # 187 | # `Sample, Random, Measurements, Forecast initialisation, Monte-Carlo, Observations, Set of draws` 188 | # 189 | # - Ensemble, x, x 190 | # - Stochastic, x, x 191 | # - Data, x, x 192 | # - Filtering, x 193 | # 194 | 195 | # + 196 | # show_answer('thesaurus 1') 197 | # - 198 | 199 | # * "The answer" is given from the perspective of DA. Do you agree with it? 200 | # * Can you describe the (important!) nuances between the similar words? 201 | 202 | # **Exc (optional) -- Word association 2:** 203 | # Also group these words: 204 | # 205 | # `Inverse problems, Operator, Sample point, Transform(ation), Knowledge, Relation, Probability, Mapping, Particle, Sequential, Inversion, Realization, Relative frequency, Information, Iterative, Estimate, Estimation, Single draw, Serial, Regression, Model, Fitting, Uncertainty` 206 | # 207 | # - Statistical inference, x, x, x, x, x 208 | # - Ensemble member, x, x, x, x 209 | # - Quantitative belief, x, x, x, x, x, x 210 | # - Recursive, x, x, x 211 | # - Function, x, x, x, x, x 212 | 213 | # + 214 | # show_answer('thesaurus 2') 215 | # - 216 | 217 | # **Exc (optional) -- intro discussion:** Prepare to discuss the following questions. Use any tool at your disposal. 218 | # * (a) What is a "dynamical system"? 219 | # * (b) What are "state variables"? How do they differ from parameters? 220 | # * (c) What are "prognostic" variables? How do they differ from "diagnostic" variables? 221 | # * (d) What is DA? 222 | # * (e) Is DA a science, an engineering art, or a dark art? 223 | # * (f) What is the point of "Hidden Markov Models"? 224 | 225 | # + 226 | # show_answer('Discussion topics 1') 227 | # - 228 | 229 | # ### Next: [T2 - Gaussian distribution](T2%20-%20Gaussian%20distribution.ipynb) 230 | -------------------------------------------------------------------------------- /notebooks/scripts/T2 - Gaussian distribution.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.15.1 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact 20 | # %matplotlib inline 21 | import numpy as np 22 | import scipy as sp 23 | import matplotlib.pyplot as plt 24 | plt.ion(); 25 | 26 | # Before discussing sequential, time-dependent inference, 27 | # we need to know how to estimate unknowns based on a single data/observations (vector). 28 | # But before discussing *Bayes' rule*, 29 | # we should review the most useful of probability distributions. 30 | # # T2 - The Gaussian (Normal) distribution 31 | # $ 32 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 33 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 34 | # $ 35 | # Computers generally represent functions *numerically* by their values on a grid 36 | # of points (nodes), an approach called ***discretisation***. 37 | # Don't hesitate to change the grid resolution as you go along! 38 | 39 | bounds = -20, 20 40 | N = 201 # num of grid points 41 | grid1d = np.linspace(*bounds,N) # grid 42 | dx = grid1d[1] - grid1d[0] # grid spacing 43 | 44 | 45 | # ## The univariate (a.k.a. 1-dimensional, scalar) case 46 | # Consider the Gaussian random variable $x \sim \NormDist(\mu, \sigma^2)$. 47 | # Its probability density function (**pdf**), 48 | # $ 49 | # p(x) = \NormDist(x \mid \mu, \sigma^2) 50 | # $ for $x \in (-\infty, +\infty)$, 51 | # is given by 52 | # $$\begin{align} 53 | # \NormDist(x \mid \mu, \sigma^2) = (2 \pi \sigma^2)^{-1/2} e^{-(x-\mu)^2/2 \sigma^2} \,. \tag{G1} 54 | # \end{align}$$ 55 | # 56 | # Run the cell below to define a function to compute the pdf (G1) using the `scipy` library. 57 | 58 | def pdf_G1(x, mu, sigma2): 59 | "Univariate Gaussian pdf" 60 | pdf_values = sp.stats.norm.pdf(x, loc=mu, scale=np.sqrt(sigma2)) 61 | return pdf_values 62 | 63 | 64 | # The following code plots the Gaussian pdf. 65 | 66 | hist = [] 67 | @interact(mu=bounds, sigma=(.1, 10, 1)) 68 | def plot_pdf(mu=0, sigma=5): 69 | plt.figure(figsize=(6, 2)) 70 | colors = plt.get_cmap('hsv')([(k-len(hist))%9/9 for k in range(9)]) 71 | plt.xlim(*bounds) 72 | plt.ylim(0, .2) 73 | hist.insert(0, pdf_G1(grid1d, mu, sigma**2)) 74 | for density_values, color in zip(hist, colors): 75 | plt.plot(grid1d, density_values, c=color) 76 | plt.show() 77 | 78 | # #### Exc -- parameter influence 79 | # Play around with `mu` and `sigma` to answer these questions: 80 | # * How does the pdf curve change when `mu` changes? 81 | # * How does the pdf curve change when you increase `sigma`? 82 | # * In a few words, describe the shape of the Gaussian pdf curve. 83 | # Does this ring a bell? *Hint: it should be clear as a bell!* 84 | 85 | # **Exc -- Implementation:** Change the implementation of `pdf_G1` so as to not use `scipy`, but your own code (using `numpy` only). Re-run all of the above cells and check that you get the same plots as before. 86 | # *Hint: `**` is the exponentiation/power operator, but $e^x$ is more efficiently computed with `np.exp(x)`* 87 | 88 | # + 89 | # show_answer('pdf_G1') 90 | # - 91 | 92 | # **Exc -- Derivatives:** Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn (G1). 93 | # Use pen, paper, and calculus to answer the following questions, 94 | # which derive some helpful mnemonics about the distribution. 95 | # 96 | # * (i) Find $x$ such that $p(x) = 0$. 97 | # * (ii) Where is the location of the **mode (maximum)** of the density? 98 | # I.e. find $x$ such that $\frac{d p}{d x}(x) = 0$. 99 | # *Hint: begin by writing $p(x)$ as $c e^{- J(x)}$ for some $J(x)$.* 100 | # * (iii) Where is the **inflection point**? I.e. where $\frac{d^2 p}{d x^2}(x) = 0$. 101 | # * (iv) *Optional*: Some forms of *sensitivity analysis* (typically for non-Gaussian $p$) consist in estimating/approximating the Hessian, i.e. $\frac{d^2 \log p}{d x^2}$. Explain what this has to do with *uncertainty quantification*. 102 | 103 | # #### Exc (optional) -- Probability and Change of variables 104 | # Let $z = \phi(x)$ for some monotonic function $\phi$, 105 | # and $p_x$ and $p_z$ be their probability density functions (pdf). 106 | # - (a): Show that $p_z(z) = p_x\big(\phi^{-1}(z)\big) \frac{1}{|\phi'(z)|}$, 107 | # - (b): Recall the definition of the expectation, $ \Expect[x] ≔ \int x \, p_x(x) \, d x $, where ***the integral is over the domain*** 108 | # (i.e. from $-\infty$ to $+\infty$ in the case of Gaussian distributions). 109 | # Show that you don't need to derive the density of $z$ in order to compute its expectation, i.e. that 110 | # $$ \Expect[z] = \int \phi(x) \, p_x(x) \, d x ≕ \Expect[\phi(x)] \,,$$ 111 | # *Hint: while the proof is convoluted, the result itself is [pretty intuitive](https://en.wikipedia.org/wiki/Law_of_the_unconscious_statistician).* 112 | 113 | # + 114 | # show_answer('CVar in proba') 115 | # - 116 | 117 | # #### Exc (optional) -- Integrals 118 | # Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn (G1). Abbreviate it using $c = (2 \pi \sigma^2)^{-1/2}$. 119 | # Use pen, paper, and calculus to show that 120 | # - (i) the first parameter, $\mu$, indicates its **mean**, i.e. that $$\mu = \Expect[x] \,.$$ 121 | # *Hint: you can rely on the result of (iii)* 122 | # - (ii) the second parameter, $\sigma^2>0$, indicates its **variance**, 123 | # i.e. that $$\sigma^2 = \mathbb{Var}(x) \mathrel{≔} \Expect[(x-\mu)^2] \,.$$ 124 | # *Hint: use $x^2 = x x$ to enable integration by parts.* 125 | # - (iii) $E[1] = 1$, 126 | # thus proving that (G1) indeed uses the right normalising constant. 127 | # *Hint: Neither Bernouilli and Laplace managed this, 128 | # until Gauss did by first deriving $(E[1])^2$. 129 | # For more (visual) help, watch [3Blue1Brown](https://www.youtube.com/watch?v=cy8r7WSuT1I&t=3m52s).* 130 | 131 | # + 132 | # show_answer('Gauss integrals') 133 | # - 134 | 135 | # **Exc -- The uniform pdf**: 136 | # Below is the pdf of the [uniform/flat/box distribution](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous)) 137 | # for a given mean and variance. 138 | # - Replace `_G1` by `_U1` in the code generating the above interactive plot. 139 | # - Why are the walls (ever so slightly) inclined? 140 | # - Write your own implementation below, and check that it reproduces the `scipy` version already in place. 141 | 142 | def pdf_U1(x, mu, sigma2): 143 | a = mu - np.sqrt(3*sigma2) 144 | b = mu + np.sqrt(3*sigma2) 145 | pdf_values = sp.stats.uniform(loc=a, scale=(b-a)).pdf(x) 146 | # Your own implementation: 147 | # height = ... 148 | # pdf_values = height * np.ones_like(x) 149 | # pdf_values[xb] = ... 151 | return pdf_values 152 | 153 | 154 | # + 155 | # show_answer('pdf_U1') 156 | # - 157 | 158 | # ## The multivariate (i.e. vector) case 159 | # Here's the pdf of the *multivariate* Gaussian (for any dimension $\ge 1$): 160 | # $$\begin{align} 161 | # \NormDist(\x \mid \mathbf{\mu}, \mathbf{\Sigma}) 162 | # &= 163 | # |2 \pi \mathbf{\Sigma}|^{-1/2} \, \exp\Big(-\frac{1}{2}\|\x-\mathbf{\mu}\|^2_\mathbf{\Sigma} \Big) \,, \tag{GM} 164 | # \end{align}$$ 165 | # where $|.|$ represents the matrix determinant, 166 | # and $\|.\|_\mathbf{W}$ represents a weighted 2-norm: $\|\x\|^2_\mathbf{W} = \x^T \mathbf{W}^{-1} \x$. 167 | # *PS: The norm (quadratic form) is invariant to antisymmetry in the weight matrix, 168 | # so we take $\mathbf{\Sigma}$ to be symmetric. 169 | # Further, the density (GM) is only integrable over $\Reals^{\xDim}$ if $\mathbf{\Sigma}$ is positive-definite.* 170 | # 171 | # It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1). 172 | # Moreover, [as above](#Exc-(optional)----Integrals) it can be shown that 173 | # - $\mathbf{\mu} = \Expect[\x]$, 174 | # - $\mathbf{\Sigma} = \Expect[(\x-\mu)(\x-\mu)\tr]$. 175 | # 176 | # Note that that the elements of $\mathbf{\Sigma}$ are individual covariances, 177 | # $\Sigma_{i,j} = \Expect[(x_i-\mu_i)(x_j-\mu_j)] = \mathbb{Cov}(x_i, x_j)$. 178 | # Therefore $\mathbf{\Sigma}$ is called the *covariance (matrix)*. 179 | # and its diagonal entries are simply variances, $\Sigma_{i,i} = \mathbb{Var}(x_i)$. 180 | # 181 | # The following implements the pdf (GM). Take a moment to digest the code, but don't worry if you don't understand it all. Hints: 182 | # * `@` produces matrix multiplication (`*` in `Matlab`); 183 | # * `*` produces array multiplication (`.*` in `Matlab`); 184 | # * `axis=-1` makes `np.sum()` work along the last dimension of an ND-array. 185 | 186 | # + 187 | from numpy.linalg import det, inv 188 | 189 | def weighted_norm22(points, Wi): 190 | "Computes the weighted norm of each vector (row in `points`)." 191 | return np.sum( (points @ inv(Wi)) * points, axis=-1) 192 | 193 | def pdf_GM(points, mu, Sigma): 194 | "pdf -- Gaussian, Multivariate: N(x | mu, Sigma) for each x in `points`." 195 | c = np.sqrt(det(2*np.pi*Sigma)) 196 | return 1/c * np.exp(-0.5*weighted_norm22(points - mu, Sigma)) 197 | 198 | 199 | # - 200 | 201 | # The following code plots the pdf as contour (iso-density) curves. 202 | 203 | # + 204 | grid2d = np.dstack(np.meshgrid(grid1d, grid1d)) 205 | 206 | @interact(corr=(-1, 1, .001), std_x=(1e-5, 10, 1)) 207 | def plot_pdf_G2(corr=0.7, std_x=1): 208 | # Form covariance matrix (C) from input and some constants 209 | var_x = std_x**2 210 | var_y = 1 211 | cv_xy = np.sqrt(var_x * var_y) * corr 212 | C = 25 * np.array([[var_x, cv_xy], 213 | [cv_xy, var_y]]) 214 | # Evaluate (compute) 215 | density_values = pdf_GM(grid2d, mu=0, Sigma=C) 216 | # Plot 217 | plt.figure(figsize=(4, 4)) 218 | height = 1/np.sqrt(det(2*np.pi*C)) 219 | plt.contour(grid1d, grid1d, density_values, 220 | levels=np.linspace(1e-4, height, 11), cmap="plasma") 221 | plt.axis('equal'); 222 | plt.show() 223 | # - 224 | 225 | # **Exc -- Correlation influence:** How do the contours look? Try to understand why. Cases: 226 | # * (a) correlation=0. 227 | # * (b) correlation=0.99. 228 | # * (c) correlation=0.5. (Note that we've used `plt.axis('equal')`). 229 | # * (d) correlation=0.5, but with non-equal variances. 230 | # 231 | # Finally (optional): why does the code "crash" when `corr = +/- 1` ? Is this a good or a bad thing? 232 | # *Hint: do you like playing with fire?* 233 | 234 | # **Exc Correlation game:** Play [here](http://guessthecorrelation.com/) until you get a score (gold coins) of 5 or more. 235 | # *PS: you can probably tell that the samples are not drawn from Gaussian distributions. However, the quantitiy $\mathbb{Cov}(x_i, x_i)$ is well defined and can be estimated from the samples.* 236 | 237 | # **Exc -- Correlation disambiguation:** 238 | # * What's the difference between correlation and covariance? 239 | # * What's the difference between non-zero (C) correlation (or covariance) and (D) dependence? 240 | # *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).* 241 | # - Does $C \Rightarrow D$ or the converse? 242 | # - What about the negation, $\neg D \Rightarrow \neg C$, or its converse?* 243 | # - What about the the (jointly) Gaussian case? 244 | # * Does correlation (or dependence) imply causation? 245 | # * Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other. 246 | # Does information about $y$ give you information about $x$? 247 | 248 | # **Exc (optional) -- Gaussian ubuiqity:** Why are we so fond of the Gaussian assumption? 249 | 250 | # + 251 | # show_answer('Why Gaussian') 252 | # - 253 | 254 | # ## Summary 255 | # The Normal/Gaussian distribution is bell-shaped. 256 | # Its parameters are the mean and the variance. 257 | # In the multivariate case, the mean is a vector, 258 | # while the second parameter becomes a covariance *matrix*, 259 | # whose off-diagonal elements represent scaled correlation factors, 260 | # which measure *linear* dependence. 261 | # 262 | # ### Next: [T3 - Bayesian inference](T3%20-%20Bayesian%20inference.ipynb) 263 | -------------------------------------------------------------------------------- /notebooks/scripts/T4 - Time series filtering.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.15.1 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact, cInterval 20 | # %matplotlib inline 21 | import numpy as np 22 | import numpy.random as rnd 23 | import matplotlib.pyplot as plt 24 | plt.ion(); 25 | 26 | # # T4 - Time series filtering 27 | # Before we look at the full (multivariate) Kalman filter, 28 | # let's get more familiar with time-dependent (temporal/sequential) problems. 29 | # $ 30 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 31 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 32 | # $ 33 | 34 | # ## Example problem: AR(1) 35 | # Consider the scalar, stochastic process $\{x_k\}$ generated by 36 | # $$ x_{x+1} = \DynMod_k x_k + q_k \,, \tag{Dyn} $$ 37 | # for sequentially increasing time index $k$, 38 | # where $q_k$ is white noise ($q_k$ independent of $q_l$ for $k \neq l$). 39 | # For our present purposes, the dynamical "model", 40 | # $\DynMod_k$ is just some number that we know. 41 | # Merely to alleviate the burden of bookkeeping, 42 | # we henceforth assume that it is constant in time. 43 | # Then $\{x_k\}$ is a so-called order-1 auto-regressive process, 44 | # i.e. [AR(1)](https://en.wikipedia.org/wiki/Autoregressive_model#Example:_An_AR(1)_process). 45 | # Suppose we get observations, $\{y_k\}$, corrupted by noise, as in 46 | # $$ y_k = \ObsMod_k x_k + r_k \,, \tag{Obs} $$ 47 | # where the noise, $r_k$, is again independent of everything. 48 | # Moreover, for simplicity, 49 | # assume that the measurement model, $\ObsMod$, is independent of $k$, 50 | # and for each $k$, let 51 | # - $q_k \sim \NormDist(0, Q)$, 52 | # - $r_k \sim \NormDist(0, R)$. 53 | # 54 | # Also assume $x_0 \sim \NormDist(x\supa_0, P\supa_0)$. 55 | # The code below simulates a random realisation of this process. 56 | 57 | # + 58 | # Use H=1 so that it makes sense to plot data on same axes as state. 59 | H = 1 60 | 61 | # Initial estimate 62 | xa = 0 # mean 63 | Pa = 10 # variance 64 | 65 | def simulate(nTime, xa, Pa, M, H, Q, R): 66 | """Simulate synthetic truth (x) and observations (y).""" 67 | x = xa + np.sqrt(Pa)*rnd.randn() # Draw initial condition 68 | truths = np.zeros(nTime) # Allocate 69 | obsrvs = np.zeros(nTime) # Allocate 70 | for k in range(nTime): # Loop in time 71 | x = M * x + np.sqrt(Q)*rnd.randn() # Dynamics 72 | y = H * x + np.sqrt(R)*rnd.randn() # Measurement 73 | truths[k] = x # Assign 74 | obsrvs[k] = y # Assign 75 | return truths, obsrvs 76 | 77 | 78 | # - 79 | 80 | # The following plots the process. *You don't need to read & understand it*, 81 | # but if you find that there are not enough sliders to play around with, 82 | # feel free to alter the code to suit your needs 83 | # (for example, you can comment out the line plotting observations, or `cInterval`). 84 | # *PS: Some of the sliders get activated later.* 85 | 86 | @interact(seed=(1, 12), M=(0, 1.03, .01), nTime=(0, 100), 87 | logR=(-9, 9), logR_bias=(-9, 9), 88 | logQ=(-9, 9), logQ_bias=(-9, 9)) 89 | def exprmt(seed=4, nTime=50, M=0.97, logR=1, logQ=1, analyses_only=False, logR_bias=0, logQ_bias=0): 90 | R, Q, Q_bias, R_bias = 4.0**np.array([logR, logQ, logQ_bias, logR_bias]) 91 | 92 | rnd.seed(seed) 93 | truths, obsrvs = simulate(nTime, xa, Pa, M, H, Q, R) 94 | 95 | plt.figure(figsize=(9, 6)) 96 | kk = 1 + np.arange(nTime) 97 | plt.plot(kk, truths, 'k' , label='True state ($x$)') 98 | plt.plot(kk, obsrvs, 'g*', label='Noisy obs ($y$)', ms=9) 99 | 100 | try: 101 | estimates, variances = KF(nTime, xa, Pa, M, H, Q*Q_bias, R*R_bias, obsrvs) 102 | if analyses_only: 103 | plt.plot(kk, estimates[:, 1], label='Kalman$^a$ ± 1$\sigma$') 104 | plt.fill_between(kk, *cInterval(estimates[:, 1], variances[:, 1]), alpha=.2) 105 | else: 106 | kk2 = kk.repeat(2) 107 | plt.plot(kk2, estimates.flatten(), label='Kalman ± 1$\sigma$') 108 | plt.fill_between(kk2, *cInterval(estimates, variances), alpha=.2) 109 | except NameError: 110 | pass 111 | 112 | sigproc = {} 113 | ### INSERT ANSWER TO EXC "signal processing" HERE ### 114 | # sigproc['some method'] = ... 115 | for method, estimate in sigproc.items(): 116 | plt.plot(kk[:len(estimate)], estimate, label=method) 117 | 118 | plt.xlabel('Time index (k)') 119 | plt.legend(loc='upper left') 120 | plt.axhline(0, c='k', lw=1, ls='--') 121 | plt.show() 122 | 123 | 124 | # **Exc -- AR1 properties:** Answer the following. 125 | # - What does `seed` control? 126 | # - Explain what happens when `M=0`. Also consider $Q \rightarrow 0$. 127 | # Can you give a name to this `truth` process, 128 | # i.e. a link to the relevant Wikipedia page? 129 | # What about when `M=1`? 130 | # Describe the general nature of the process as `M` changes from 0 to 1. 131 | # What about when `M>1`? 132 | # - What happens when $R \rightarrow 0$ ? 133 | # - What happens when $R \rightarrow \infty$ ? 134 | 135 | # + 136 | # show_answer('AR1') 137 | # - 138 | 139 | # ## The (univariate) Kalman filter (KF) 140 | 141 | # Now we have a random variable that evolves in time, that we can pretend is unknown, 142 | # in order to estimate (or "track") it. 143 | # More specifically, while $x_0$ is not strictly known, 144 | # it is assumed that $p(x_0) = \NormDist(x_0 | x\supa_0, P\supa_0)$ with known parameters. 145 | # We also know that $x_k$ evolves according to eqn. (Dyn). 146 | # Therefore, as shown in the following exercise, 147 | # $p(x_1) = \NormDist(x_1 | x\supf_1, P\supf_1)$, with 148 | # $$\begin{align} 149 | # x\supf_k &= \DynMod \, x\supa_{k-1} \tag{5} \\ 150 | # P\supf_k &= \DynMod^2 \, P\supa_{k-1} + Q \tag{6} 151 | # \end{align}$$ 152 | # 153 | # Formulae (5) and (6) are called the **forecast step** of the KF. 154 | # But when $y_1$ becomes available, according to eqn. (Obs), 155 | # then we can update/condition our estimate of $x_1$, i.e. compute the posterior, 156 | # $p(x_1 | y_1) = \NormDist(x_1 \mid x\supa_1, P\supa_1) \,,$ 157 | # using the formulae we developed for Bayes' rule with 158 | # [Gaussian distributions](T3%20-%20Bayesian%20inference.ipynb#Gaussian-Gaussian-Bayes'-rule-(1D)). 159 | # 160 | # $$\begin{align} 161 | # P\supa_k&= 1/(1/P\supf_k + \ObsMod^2/R) \,, \tag{7} \\\ 162 | # x\supa_k &= P\supa_k (x\supf/P\supf_k + \ObsMod y_k/R) \,. \tag{8} 163 | # \end{align}$$ 164 | # 165 | # We call this the **analysis step** of the KF. 166 | # We can subsequently apply the same two steps again 167 | # to produce forecast and analysis estimates for the next time index, $k+1$. 168 | # Note that if $k$ is a date index, then we can say that "yesterday's forecast becomes today's prior". 169 | # 170 | # #### Exc -- linear algebra of Gaussian random variables 171 | # - (a) Show the linearity of the expectation operator: 172 | # $\Expect [ \DynMod x + b ] = \DynMod \Expect[x] + b$, for some constant $b$. 173 | # - (b) Thereby, show that $\mathbb{Var}[ \DynMod x + b ] = \DynMod^2 \mathbb{Var} [x]$. 174 | # - (c) *Optional*: Now let $z = x + q$, with $x$ and $q$ independent and Guassian. 175 | # Then the pdf of this sum of random variables, $p_z(z)$, is given by convolution 176 | # (hopefully this makes intuitive sense, at least in the discrete case): 177 | # $$ p_z(z) = \int p_x(x) \, p_q(z - x) \, d x \,.$$ 178 | # Show that $z$ is also Gaussian, 179 | # whose mean and variance are the sum of the means and variances (respectively). 180 | # *Hint: you will need the result on [completing the square](T3%20-%20Bayesian%20inference.ipynb#Exc----GG-Bayes), 181 | # specifically the part that we did not make use of for Bayes' rule. 182 | # If you get stuck, you can also view the excellent [3blue1brown](https://www.youtube.com/watch?v=d_qvLDhkg00&t=266s&ab_channel=3Blue1Brown) on the topic.* 183 | 184 | # + 185 | # show_answer('Sum of Gaussians', 'a') 186 | # - 187 | 188 | # #### Exc (optional) -- The Bayesian filtering recursion 189 | 190 | # In the particular case of linearity and Gaussianity as assumed above, 191 | # the KF computes the *exact* Bayesian pdf's for $x_k$. 192 | # But even without them, a general/abstract Bayesian **recursive** procedure can still be formulated, 193 | # relying solely on the remaining ("hidden Markov model") assumptions. 194 | # Prove the following formulae: 195 | # 196 | # - The analysis "assimilates" $y_k$ to compute $p(x_k | y_{1:k})$, 197 | # where $y_{1:k} = y_1, \ldots, y_k$ is shorthand notation. 198 | # $$ 199 | # p(x_k | y_{1:k}) \propto p(y_k | x_k) \, p(x_k | x_{1:k-1}) 200 | # $$ 201 | # - The forecast "propagates" the estimate with its uncertainty 202 | # to produce $p(x_{k+1}| y_{1:k})$. 203 | # $$ 204 | # p(x_{k+1} | y_{1:k}) = \int p(x_{k+1} | x_k) \, p(x_k | y_{1:k}) \, d x_k 205 | # $$ 206 | # 207 | # It is important to appreciate two benefits of the recursion. 208 | # 209 | # - The recursiveness of the procedure reflects the recursiveness (Markov property) of nature: 210 | # Both in the problem and our solution, time $k+1$ *builds on* time $k$. 211 | # It means that we do not have to re-do the entire problem for each $k$. 212 | # - At every time $k$ we only deal with functions of 1 or 2 variables: $x_k$ and $x_{k+1}$. 213 | # This is a significantly smaller domain 214 | # (in which to quanitify our densities or covariances) than that of the joint pdf $p(x_{1:k} | y_{1:k})$. 215 | # Ref. [curse of dimensionality](T3%20-%20Bayesian%20inference.ipynb#Exc-(optional)----Curse-of-dimensionality,-part-1). 216 | # 217 | # Note, however, that our recursive procedure, called ***filtering***, 218 | # does *not* compute $p(x_l | y_{1:k})$ for any $l 1$. 278 | # *Hint: Look for the fixed point of the recursion of part (a).* 279 | 280 | # + 281 | # show_answer('Asymptotic Riccati', 'a') 282 | # - 283 | 284 | # **Exc (optional) -- Temporal CV, part 2:** 285 | # Now we don't assume that $Q$ is zero. Instead 286 | # - (a) Suppose $\DynMod = 0$. What does $P\supa_k$ equal? 287 | # - (b) Suppose $\DynMod = 1$. Show that $P\supa_\infty$ 288 | # satisfies the quadratic equation: $0 = P^2 + Q P - Q R$. 289 | # Thereby, without solving the quadratic equation, show that 290 | # - (c) $P\supa_\infty \rightarrow R$ (from below) if $Q \rightarrow +\infty$. 291 | # - (d) $P\supa_\infty \rightarrow \sqrt{ Q R}$ (from above) if $Q \rightarrow 0^+$. 292 | 293 | # + 294 | # show_answer('Asymptotes when Q>0') 295 | # - 296 | 297 | # #### Exc (optional) -- Analytic simplification in the case of an unknown constant 298 | # 299 | # - Note that in case $Q = 0$, 300 | # then $x_{k+1} = \DynMod^k x_0$. 301 | # - So if $\DynMod = 1$, then $x_k = x_0$, so we are estimating an unknown *constant*, 302 | # and can drop its time index subscript. 303 | # - For simplicity, assume $\ObsMod = 1$, and $P^a_0 \rightarrow +\infty$. 304 | # - Then $p(x | y_{1:k}) \propto \exp \big\{- \sum_l \| y_l - x \|^2_R / 2 \big\} 305 | # = \NormDist(x | \bar{y}, R/k )$, which again follows by completing the square. 306 | # - In words, the (accumulated) posterior mean is the sample average, 307 | # $\bar{y} = \frac{1}{k}\sum_l y_l$, 308 | # and the variance is that of a single observation divided by $k$. 309 | # 310 | # Show that this is the same posterior that the KF recursions produce. 311 | # *Hint: while this is straightforward for the variance, 312 | # you will probably want to prove the mean using induction.* 313 | 314 | # #### Exc -- Impact of biases 315 | # Re-run the above interative animation to set the default control values. Answer the following 316 | # 317 | # - `logR_bias`/`logQ_bias` control the (multiplicative) bias in $R$/$Q$ that is fed to the KF. 318 | # What happens when the KF "thinks" the measurement/dynamical error 319 | # is (much) smaller than it actually is? 320 | # What about larger? 321 | # - Re-run the animation to get default values. 322 | # Set `logQ` to 0, which will make the following behaviour easier to describe. 323 | # In the code, add 20 to the initial `xa` **given to the KF**. 324 | # How long does it take for it to recover from this initial bias? 325 | # - Multiply `Pa` **given to the KF** by 0.01. What about now? 326 | # - Remove the previous biases. 327 | # Instead, multiply `M` **given to the KF** by 2, and observe what happens. 328 | # Try the same, but dividing `M` by 2. 329 | 330 | # + 331 | # show_answer('KF with bias') 332 | # - 333 | 334 | # ## Alternative methods 335 | # 336 | # When it comes to (especially univariate) time series analysis, 337 | # the Kalman filter (KF) is not the only game in town. 338 | # For example, **signal processing** offers several alternative filters. 339 | # Indeed, the word "filter" in the KF originates in that domain, 340 | # where it originally referred to the removal of high-frequency noise, 341 | # since this tends to coincide with an improved estimate of the signal. 342 | # We will not review any signal processing theory here, 343 | # but challenge you to make use of what `scipy` already has to offer. 344 | # 345 | # #### Exc (optional) -- signal processing 346 | # Run the following cell to import and define some more tools. 347 | 348 | import scipy as sp 349 | import scipy.signal as sig 350 | def nrmlz(x): 351 | return x / x.sum() 352 | def trunc(x, n): 353 | return np.pad(x[:n], (0, len(x)-n)) 354 | 355 | # Now try to "filter" the `obsrvs` to produce estimates of `truth`. 356 | # In each case, add your estimate ("filtered signal" in that domain's parlance) 357 | # to the `sigproc` dictionnary in the interactive animation cell, 358 | # with an appropriate name/key (this will automatically include it in the plotting). 359 | # Use 360 | # - (a) [`sig.wiener`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.wiener.html). 361 | # *PS: this is a direct ancestor of the KF*. 362 | # - (b) a moving average, for example [`sig.windows.hamming`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.windows.hamming.html). 363 | # *Hint: you may also want to use [`sig.convolve`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.convolve.html#scipy.signal.convolve)*. 364 | # - (c) a low-pass filter using [`np.fft`](https://docs.scipy.org/doc/scipy/reference/fft.html#). 365 | # *Hint: you may also want to use the above `trunc` function.* 366 | # - (d) The [`sig.butter`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html) filter. 367 | # *Hint: apply with [`sig.filtfilt`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.filtfilt.html).* 368 | # - (e) not really a signal processing method: [`sp.interpolate.UniveriateSpline`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.UnivariateSpline.html) 369 | # 370 | # The answers should be considered examples, not the uniquely right way. 371 | 372 | # + 373 | # show_answer('signal processing', 'a') 374 | # - 375 | 376 | # But for the above problem (which is linear-Gaussian!), 377 | # the KF is guaranteed (on average, in the long run, in terms of mean square error) 378 | # to outperform any other method. 379 | # We will see cases later (of full-blown state estimation) 380 | # where the difference is much clearer, 381 | # and indeed it might not even be clear how to apply signal processing methods. 382 | # However, the KF has an unfair advantage: we are giving it a ton of information 383 | # about the problem (`M, H, R, Q`) that the signal processing methods do not get. 384 | # Therefore, they typically also require a good deal of tuning 385 | # (in practice, so does the KF, since `Q` and `R` are rarely well determined). 386 | 387 | # ## Summary 388 | # The Kalman filter (KF) can be derived by applying linear-Gaussian assumptions 389 | # to a sequential inference problem. 390 | # Generally, the uncertainty never converges to 0, 391 | # and the performance of the filter is wholly contingent on 392 | # accurate system parameters (models and error covariance matrices). 393 | # 394 | # As a subset of state estimation (i.e. the KF) we can do time series estimation 395 | # [(wherein state-estimation is called state-space approach)](https://www.google.com/search?q="We+now+demonstrate+how+to+put+these+models+into+state+space+form"). 396 | # Moreover, DA methods produce uncertainty quantification, something which is usually more obscure with time series analysis methods. 397 | # 398 | # ### Next: [T5 - Multivariate Kalman filter](T5%20-%20Multivariate%20Kalman%20filter.ipynb) 399 | -------------------------------------------------------------------------------- /notebooks/scripts/T5 - Multivariate Kalman filter.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py:light 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.15.1 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact, cInterval 20 | # %matplotlib inline 21 | import numpy as np 22 | import numpy.random as rnd 23 | from scipy.linalg import inv 24 | import matplotlib.pyplot as plt 25 | plt.ion(); 26 | 27 | 28 | # # T5 - The Kalman filter (KF) -- multivariate 29 | # Dealing with vectors and matrices is a lot like plain numbers. But some things get more complicated. 30 | # $ 31 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 32 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 33 | # $ 34 | 35 | # ## Another time series problem, now multivariate 36 | # 37 | # Recall the AR(1) process from the previous tutorial: $x_{k+1} = \DynMod x_k + q_k$. 38 | # - It could result from discretizing [exponential decay](https://en.wikipedia.org/wiki/Exponential_decay): 39 | # $\frac{d x}{d t} = - \beta x \,,$ for some $\beta \geq 0$, and 40 | # adding some white noise, $\frac{d q}{d t}$. 41 | # - Discretisation 42 | # - using explicit-Euler produces $\DynMod = (1 - \beta\, \Delta t)$, 43 | # - using implicit-Euler produces $\DynMod = 1/(1 + \beta\, \Delta t)$. 44 | # - such that $x_{k+1}$ equals the analytic solution requires $\DynMod = e^{- \beta\, \Delta t}$. 45 | # - *PS: note that the 1-st order Taylor expansion of each scheme is the same.* 46 | # - Recall that $\{x_k\}$ became a (noisy) constant (horizontal) line when $\DynMod = 1$, 47 | # which makes sense since then $\beta = 0$. 48 | # Similarly, a straight (sloping) line would result from 49 | # $\frac{d^2 x}{d t^2} = 0 \,.$ 50 | # 51 | # To make matters more interesting we're now going to consider the $\xDim$-th order model: 52 | # $\displaystyle \frac{d^{\xDim} x}{d t^\xDim} = 0 \,.$ 53 | # - This can be rewritten as a 1-st order *vector* (i.e. coupled system of) ODE: 54 | # $\frac{d x_i}{d t} = x_{i+1} \,,$ and $x_{{\xDim}+1} = 0$ 55 | # where the subscript $i$ is now instead the *index* of the state vector element. 56 | # - Again we include noise, $\frac{d q_i}{d t}$, 57 | # and damping (exponential decay), $- \beta x_i$, to each component. 58 | # - In total, $ \frac{d x_i}{d t} = x_{i+1} - \beta x_i + \frac{d q_i}{d t} \, .$ 59 | # - Discretizing with time step $\Delta t=1$ produces 60 | # $ x_{k+1, i} = x_{k, i+1} + 0.9 x_{k, i} + q_{k, i}\,,$ 61 | # i.e. $\beta = 0.1$ or $\beta = -\log(0.9)$ depending on which scheme was used. 62 | # 63 | # Thus, $\x_{k+1} = \DynMod \x_k + \q_k$, with $\DynMod$ the matrix specified below. 64 | 65 | # + 66 | xDim = 4 # state (x) length, also model order 67 | M = 0.9*np.eye(xDim) + np.diag(np.ones(xDim-1), 1) 68 | print("M =", M, sep="\n") 69 | 70 | nTime = 100 71 | Q = 0.01**2 * np.diag(1+np.arange(xDim)) 72 | # - 73 | 74 | # #### Observing system 75 | # The above will generate a $\xDim$-dimensional time series. 76 | # But we will only observe the 1st (`0`th in Python) element/component of the state vector. 77 | # We say that the other components are **hidden**. 78 | 79 | # + 80 | H = np.zeros((1, xDim)) 81 | H[0, 0] = 1.0 82 | print("H =", H) 83 | 84 | R = 30**2 * np.identity(1) 85 | # - 86 | 87 | # #### Simulation 88 | # The following simulates a synthetic truth (x) time series and observations (y). 89 | # In particular, note the use of `@` for matrix/vector algebra, in place of `*` as in the [scalar case of the previous tutorial](T4%20-%20Time%20series%20filtering.ipynb#Example-problem:-AR(1)). 90 | 91 | # + 92 | rnd.seed(4) 93 | 94 | # Initial condition 95 | xa = np.zeros(xDim) 96 | Pa = 0.1**2 * np.diag(np.arange(xDim)) 97 | x = xa + np.sqrt(Pa) @ rnd.randn(xDim) 98 | 99 | truths = np.zeros((nTime, xDim)) 100 | obsrvs = np.zeros((nTime, len(H))) 101 | for k in range(nTime): 102 | x = M @ x + np.sqrt(Q) @ rnd.randn(xDim) 103 | y = H @ x + np.sqrt(R) @ rnd.randn(1) 104 | truths[k] = x 105 | obsrvs[k] = y 106 | 107 | for i, x in enumerate(truths.T): 108 | magnification = (i+1)**4 # for illustration purposes 109 | plt.plot(magnification*x, label=fr"${magnification}\,x_{i}$") 110 | plt.legend(); 111 | # - 112 | # ## The KF forecast step 113 | # 114 | # The forecast step (and its derivation) remains essentially unchanged from the [univariate case](T4%20-%20Time%20series%20filtering.ipynb#The-(univariate)-Kalman-filter-(KF)). 115 | # The only difference is that $\DynMod$ is now a *matrix*, as well as the use of the transpose ${}^T$ in the covariance equation: 116 | # $\begin{align} 117 | # \x\supf_k 118 | # &= \DynMod_{k-1} \x\supa_{k-1} \,, \tag{1a} \\\ 119 | # \bP\supf_k 120 | # &= \DynMod_{k-1} \bP\supa_{k-1} \DynMod_{k-1}^T + \Q_{k-1} \,. \tag{1b} 121 | # \end{align}$ 122 | # 123 | # ## The KF analysis step 124 | # 125 | # It may be shown that the prior $p(\x) = \NormDist(\x \mid \x\supf,\bP\supf)$ 126 | # and likelihood $p(\y|\x) = \NormDist(\y \mid \ObsMod \x,\R)$, 127 | # yield the posterior: 128 | # \begin{align} 129 | # p(\x|\y) 130 | # &= \NormDist(\x \mid \x\supa, \bP\supa) \tag{4} 131 | # \,, 132 | # \end{align} 133 | # where the posterior/analysis mean (vector) and covariance (matrix) are given by: 134 | # \begin{align} 135 | # \bP\supa &= \big(\ObsMod\tr \Ri \ObsMod + (\bP\supf)^{-1}\big)^{-1} \,, \tag{5} \\ 136 | # \x\supa &= \bP\supa\left[\ObsMod\tr \Ri \y + (\bP\supf)^{-1} \x\supf\right] \tag{6} \,, 137 | # \end{align} 138 | # *PS: all of the objects in the analysis equations could also be subscripted by the time index ($k$), but that seems unnecessary (since it is the same one for all of the objects involved).* 139 | # 140 | # **Exc (optional) -- The 'precision' form of the KF:** Prove eqns (4-6). 141 | # *Hint: similar to the [univariate case](T3%20-%20Bayesian%20inference.ipynb#Exc----GG-Bayes), the main part lies in "completing the square" in $\x$.* 142 | 143 | # + 144 | # show_answer('KF precision') 145 | # - 146 | 147 | # ## Implementation & illustration 148 | 149 | estims = np.zeros((nTime, 2, xDim)) 150 | covars = np.zeros((nTime, 2, xDim, xDim)) 151 | for k in range(nTime): 152 | # Forecast step 153 | xf = M@xa 154 | Pf = M@Pa@M.T + Q 155 | # Analysis update step 156 | y = obsrvs[k] 157 | Pa = inv( inv(Pf) + H.T@inv(R)@H ) 158 | xa = Pa @ ( inv(Pf)@xf + H.T@inv(R)@y ) 159 | # Assign 160 | estims[k] = xf, xa 161 | covars[k] = Pf, Pa 162 | 163 | # Using `inv` is very bad practice, since it is not numerically stable. 164 | # You generally want to use `scipy.linalg.solve` instead, or a more fine-grained matrix decomposition routine. 165 | # But that is not possible here, since we have no "right hand side" to solve for in the formula for `Pa`. 166 | # We'll address this point later. 167 | # 168 | # 169 | # *Caution!: Because of our haphazard use of global variables, re-running the KF (without re-running the truth-generating cell) will take as initial condition the endpoint of the previous run.* 170 | # 171 | # 172 | # Use the following to plot the result. 173 | 174 | fig, axs = plt.subplots(figsize=(10, 6), nrows=xDim, sharex=True) 175 | for i, (ax, truth, estim) in enumerate(zip(axs, truths.T, estims.T)): 176 | kk = 1 + np.arange(nTime) 177 | kk2 = kk.repeat(2) 178 | ax.plot(kk, truth, c='k') 179 | ax.plot(kk2, estim.T.flatten()) 180 | ax.fill_between(kk2, *cInterval(estim.T, covars[..., i, i]), alpha=.2) 181 | if i == 0 and H[0, 0] == 1 and np.sum(np.abs(H)) == 1: 182 | ax.plot(kk, obsrvs, '.') 183 | ax.set_ylabel(f"$x_{i}$") 184 | ax.set_xlim([0, nTime]) 185 | 186 | 187 | # Note that the other, *unobserved* components also get updated. As you can tell from eqn. (5), the KF will update such *hidden* components as long as $\bP\supf$ is not diagonal (i.e. as long as there are correlations between the state components). Let us inspect this correlation matrix. Run the cell below, and note that 188 | # - It converges in time to a fixed value, as we might expect from [T4](T4%20-%20Time%20series%20filtering.ipynb#Exc----Temporal-convergence). 189 | # - There are no negative correlations in this case, which is perhaps a bit boring. 190 | 191 | @interact(k=(1, nTime)) 192 | def plot_correlation_matrix(k=1, analysis=True): 193 | Pf, Pa = covars[k-1] 194 | covmat = Pa if analysis else Pf 195 | stds = np.sqrt(np.diag(covmat)) 196 | corrmat = covmat / np.outer(stds, stds) 197 | plt.matshow(corrmat, cmap='coolwarm', vmin=-1, vmax=+1) 198 | plt.grid(False) 199 | plt.colorbar(shrink=0.5) 200 | plt.show() 201 | 202 | # ## Woodbury and the Kalman gain 203 | # The KF formulae, as specified above, can be pretty expensive... 204 | # 205 | # #### Exc (optional) -- flops and MBs 206 | # Suppose the length of $\x$ is $\xDim$ and denote its covariance matrix by $\bP$. 207 | # * (a) What's the size of $\bP$? 208 | # * (b) To leading order, how many "flops" (elementary additions and multiplications) are required 209 | # to compute the "precision form" of the KF update equation, eqn (5) ? 210 | # *Hint: Assume the computationally demanding part is the [Cholesky decomposition](https://en.wikipedia.org/wiki/Cholesky_decomposition#Computation).* 211 | # * (c) How much memory (bytes) is required to hold its covariance matrix $\bP$ ? 212 | # * (d) How many megabytes (MB) is that if $\xDim$ is a million, 213 | # as in our [$1^\circ$ (110km) resolution Earth atmosphere model](T3%20-%20Bayesian%20inference.ipynb#Exc-(optional)----Curse-of-dimensionality,-part-1). 214 | # * (e) How many times more MB or flops are needed if you double the resolution (in all 3 dimensions) ? 215 | 216 | # + 217 | # show_answer('nD-covars are big') 218 | # - 219 | 220 | # This is one of the principal reasons why basic extended KF is infeasible for DA. In the following we derive the "gain" form of the KF analysis update, which should help at least a little bit. 221 | # 222 | # #### Exc -- The "Woodbury" matrix inversion identity 223 | # The following is known as the Sherman-Morrison-Woodbury lemma/identity, 224 | # $$\begin{align} 225 | # \bP = \left( \B^{-1} + \V\tr \R^{-1} \U \right)^{-1} 226 | # = 227 | # \B - \B \V\tr \left( \R + \U \B \V\tr \right)^{-1} \U \B \,, 228 | # \tag{W} 229 | # \end{align}$$ 230 | # which holds for any (suitably shaped matrices) 231 | # $\B$, $\R$, $\V,\U$ *such that the above exists*. 232 | # 233 | # Prove the identity. *Hint: don't derive it, just prove it!* 234 | 235 | # + 236 | # show_answer('Woodbury general') 237 | # - 238 | 239 | # #### Exc (optional) -- Matrix shape compatibility 240 | # - Show that $\B$ and $\R$ must be square. 241 | # - Show that $\U$ and $\V$ are not necessarily square, but must have the same dimensions. 242 | # - Show that $\B$ and $\R$ are not necessarily of equal size. 243 | # 244 | 245 | # The above exercise makes it clear that the Woodbury identity may be used to compute $\bP$ by inverting matrices of the size of $\R$ rather than the size of $\B$. 246 | # Of course, if $\R$ is bigger than $\B$, then the identity is useful the other way around. 247 | 248 | # #### Exc (optional) -- Corollary 1 249 | # Prove that, for any symmetric, positive-definite 250 | # ([SPD](https://en.wikipedia.org/wiki/Definiteness_of_a_matrix#Properties)) 251 | # matrices $\R$ and $\B$, and any matrix $\ObsMod$, 252 | # $$\begin{align} 253 | # \left(\ObsMod\tr \R^{-1} \ObsMod + \B^{-1}\right)^{-1} 254 | # &= 255 | # \B - \B \ObsMod\tr \left( \R + \ObsMod \B \ObsMod\tr \right)^{-1} \ObsMod \B \tag{C1} 256 | # \,. 257 | # \end{align}$$ 258 | 259 | # + 260 | # show_answer('inv(SPD + SPD)') 261 | # - 262 | 263 | # #### Exc (optional) -- Corollary 2 264 | # Prove that, for the same matrices as for Corollary C1, 265 | # $$\begin{align} 266 | # \left(\ObsMod\tr \R^{-1} \ObsMod + \B^{-1}\right)^{-1}\ObsMod\tr \R^{-1} 267 | # &= \B \ObsMod\tr \left( \R + \ObsMod \B \ObsMod\tr \right)^{-1} 268 | # \tag{C2} 269 | # \, . 270 | # \end{align}$$ 271 | 272 | # + 273 | # show_answer('Woodbury C2') 274 | # - 275 | 276 | # #### Exc -- The "Gain" form of the KF 277 | # Now, let's go back to the KF, eqns (5) and (6). Since $\bP\supf$ and $\R$ are covariance matrices, they are symmetric-positive. In addition, we will assume that they are full-rank, making them SPD and invertible. 278 | # 279 | # Define the Kalman gain by: 280 | # $$\begin{align} 281 | # \K &= \bP\supf \ObsMod\tr \big(\ObsMod \bP\supf \ObsMod\tr + \R\big)^{-1} \,. \tag{K1} 282 | # \end{align}$$ 283 | # * (a) Apply (C1) to eqn (5) to obtain the Kalman gain form of analysis/posterior covariance matrix: 284 | # $$\begin{align} 285 | # \bP\supa &= [\I_{\xDim} - \K \ObsMod]\bP\supf \,. \tag{8} 286 | # \end{align}$$ 287 | # 288 | # * (b) Apply (C2) to (5) to obtain the identity 289 | # $$\begin{align} 290 | # \K &= \bP\supa \ObsMod\tr \R^{-1} \,. \tag{K2} 291 | # \end{align}$$ 292 | # 293 | # * (c) Show that $\bP\supa (\bP\supf)^{-1} = [\I_{\xDim} - \K \ObsMod]$. 294 | # * (d) Use (b) and (c) to obtain the Kalman gain form of analysis/posterior covariance 295 | # $$\begin{align} 296 | # \x\supa &= \x\supf + \K\left[\y - \ObsMod \x\supf\right] \, . \tag{9} 297 | # \end{align}$$ 298 | # Together, eqns (8) and (9) define the Kalman gain form of the KF update. 299 | # Note that the inversion (eqn 7) involved is of the size of $\R$, while in eqn (5) it is of the size of $\bP\supf$. 300 | # 301 | # #### Exc -- KF implemented with gain 302 | # Implement the Kalman gain form in place of the precision form of the KF, including 303 | # - Use `scipy.linalg.solve`. 304 | # - Re-run all cells. 305 | # - Verify that you get the same result as before. 306 | 307 | # ## Summary 308 | # We have derived two forms of the multivariate KF analysis update step: the 309 | # "precision matrix" form, and the "Kalman gain" form. The latter is especially 310 | # practical when the number of observations is smaller than the length of the 311 | # state vector. Still, the best is yet to come: the ability to handle very 312 | # large and chaotic systems 313 | # (which are more fun than stochastically driven signals such as above). 314 | # 315 | # ### Next: [T6 - Spatial statistics ("geostatistics") & Kriging](T6%20-%20Geostats%20%26%20Kriging%20(optional).ipynb) 316 | -------------------------------------------------------------------------------- /notebooks/scripts/T6 - Geostats & Kriging (optional).py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # cell_metadata_filter: -all 5 | # formats: ipynb,scripts//py 6 | # text_representation: 7 | # extension: .py 8 | # format_name: light 9 | # format_version: '1.5' 10 | # jupytext_version: 1.15.1 11 | # kernelspec: 12 | # display_name: Python 3 (ipykernel) 13 | # language: python 14 | # name: python3 15 | # --- 16 | 17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 18 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 19 | 20 | from resources import show_answer, interact 21 | # %matplotlib inline 22 | import numpy as np 23 | import matplotlib.pyplot as plt 24 | import numpy.random as rnd 25 | import scipy.linalg as sla 26 | from mpl_tools.misc import nRowCol 27 | from mpl_tools.place import freshfig 28 | plt.ion(); 29 | 30 | # # T6 - Spatial statistics ("geostatistics") & Kriging 31 | # 32 | # Covariances between two (or a few) variables is very well, 33 | # but if you have not seen it before, the connection between covariances 34 | # and geophysical (spatial) fields may not be obvious. 35 | # The purpose of this tutorial is to familiarise you with random (spatial) fields 36 | # and their estimation. 37 | # $ 38 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 39 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 40 | # $ 41 | 42 | # Set some parameters 43 | 44 | rnd.seed(3000) 45 | grid1D = np.linspace(0, 1, 21) 46 | N = 15 # ensemble size 47 | 48 | # ## Variograms 49 | # The "Variogram" of a field is essentially `1 - autocovariance`. Thus, it describes the spatial dependence of the field. The mean (1st moment) of a field is usually estimated and described/parametrized with trend lines/surfaces, while higher moments are usually not worth modelling. 50 | 51 | def variogram(dists, Range=1, kind="Gauss", nugget=0): 52 | """Compute variogram for distance points `dists`.""" 53 | dists = dists / Range 54 | if kind == "Spheric": 55 | gamma = 1.5 * dists - .5 * dists**3 56 | gamma[dists >= 1] = 1 57 | elif kind == "Expo": 58 | dists *= 3 # by convention 59 | gamma = 1 - np.exp(-dists) 60 | else: # "Gauss" 61 | dists *= 3 # by convention 62 | gamma = 1 - np.exp(-(dists)**2) 63 | # Include nugget (discontinuity at 0) 64 | gamma *= (1-nugget) 65 | gamma[dists != 0] += nugget 66 | return gamma 67 | 68 | 69 | # #### Plot 70 | 71 | @interact(Range=(.01, 4), nugget=(0.0, 1, .1)) 72 | def plot_variogram(Range=1, nugget=0): 73 | fig, ax = plt.subplots(figsize=(6, 3)) 74 | ax.set_xlim(0, 1) 75 | ax.set_ylim(0, 1) 76 | for i, kind in enumerate(["Spheric", "Expo", "Gauss"]): 77 | gamma = variogram(grid1D, Range, kind, nugget=nugget) 78 | ax.plot(grid1D, gamma, lw=2, color=f"C{i}", label=kind) 79 | ax.legend(loc="upper left") 80 | plt.show() 81 | 82 | 83 | # In order to apply the variogram, we must first compute distances. 84 | # The following is a fairly efficient implementation. 85 | 86 | def dist_euclid(A, B): 87 | """Compute the l2-norm between each point (row) of A and B""" 88 | diff = A[:, None, :] - B 89 | d2 = np.sum(diff**2, axis=-1) 90 | return np.sqrt(d2) 91 | 92 | # Now the full covariance (matrix) between any sets of points can be defined by the following. 93 | 94 | def covar(coords, **vg_params): 95 | dists = dist_euclid(coords, coords) 96 | return 1 - variogram(dists, **vg_params) 97 | 98 | 99 | fig, ax = freshfig("1D covar") 100 | C = covar(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3) 101 | ax.matshow(C, cmap="RdBu"); 102 | 103 | 104 | # ## Random fields (1D) 105 | 106 | # Gaussian random variables (vectors) are fully specified by their mean and covariance. 107 | # Once in posession of a covariance matrix, we can use it to sample random variables 108 | # by multiplying its cholesky factor (square root) onto standard normal variables. 109 | 110 | def gaussian_fields(coords, **vg_params): 111 | """Gen. random (Gaussian) fields at `coords` (no structure/ordering required).""" 112 | C = covar(coords, **vg_params) 113 | L = sla.cholesky(C) 114 | fields = L.T @ rnd.randn(len(L.T), N) 115 | return fields 116 | 117 | # #### Exc 118 | # Use the plotting functionality below to 119 | # explain the effect of `Range` and `nugget` 120 | 121 | 122 | fig, ax = freshfig("1D random fields") 123 | fields = gaussian_fields(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3) 124 | ax.plot(grid1D, fields, lw=2); 125 | 126 | # ## Random fields (2D) 127 | # The following sets up a 2d grid. 128 | 129 | grid2x, grid2y = np.meshgrid(grid1D, grid1D) 130 | grid2x.shape 131 | 132 | # where `grid2y` has the same shape. 133 | # 134 | # However, in the following we will "flatten" (a.k.a."(un)ravel", "vectorize", or "string out") this explicitly 2D grid of nodes into a simple list of points in 2D. Importantly, none of the following methods actually assume any structure to the list. So we could also work with a completely irregularly spaced set of points. 135 | 136 | grid2D = np.dstack([grid2x, grid2y]).reshape((-1, 2)) 137 | grid2D.shape 138 | 139 | # For example, `gaussian_fields` is immediately applicable also to this 2D case. 140 | 141 | vg_params = dict(Range=1, kind="Gauss", nugget=1e-4) 142 | fields = gaussian_fields(grid2D, **vg_params) 143 | 144 | 145 | # Of course, for plotting purposes, we undo the flattening. 146 | 147 | # + 148 | def contour_plot(ax, field, cmap="nipy_spectral", levels=12, has_obs=True): 149 | field = field.reshape(grid2x.shape) # undo flattening 150 | if has_obs: 151 | ax.plot(*obs_coo.T, "ko", ms=4) 152 | ax.plot(*obs_coo.T, "yo", ms=1) 153 | ax.set(aspect="equal", xticks=[0, 1], yticks=[0, 1]) 154 | return ax.contourf(field, levels=levels, extent=(0, 1, 0, 1), 155 | cmap=cmap, vmin=vmin, vmax=vmax) 156 | 157 | # Fix the color scale for all subsequent `contour_plot`. 158 | # Use `None` to re-compute the color scale for each subplot. 159 | vmin = fields.min() 160 | vmax = fields.max() 161 | 162 | # + 163 | fig, axs = freshfig(num="2D random fields", figsize=(5, 4), 164 | nrows=3, ncols=4, sharex=True, sharey=True) 165 | 166 | for ax, field in zip(axs.ravel(), fields.T): 167 | contour_plot(ax, field, has_obs=False) 168 | # - 169 | 170 | # It might be interesting to inspect the covariance matrix in this 2D case. 171 | 172 | C = covar(grid2D, **vg_params) 173 | fig, ax = freshfig("2D covar") 174 | ax.matshow(C, cmap="RdBu", vmin=0, vmax=1); 175 | ax.grid(False) 176 | 177 | # ## Estimation problem 178 | 179 | # For our estimation target we will use one of the above generated random fields. 180 | 181 | truth = fields.T[0] 182 | 183 | # For the observations, we pick some random grid locations for simplicity 184 | # (even though the methods work also with observations not on grid nodes). 185 | 186 | nObs = 10 187 | obs_idx = rnd.randint(0, len(grid2D), nObs) 188 | obs_coo = grid2D[obs_idx] 189 | observations = truth[obs_idx] 190 | 191 | # ## Spatial interpolant methods 192 | 193 | # Pre-compute re-used objects 194 | dists_yy = dist_euclid(obs_coo, obs_coo) 195 | dists_xy = dist_euclid(grid2D, obs_coo) 196 | 197 | estims = dict(Truth=truth) 198 | vmin=truth.min() 199 | vmax=truth.max() 200 | 201 | 202 | # The cells below contain snippets of different spatial interpolation methods, 203 | # followed by a cell that plots the interpolants. 204 | # Complete the code snippets. 205 | 206 | # #### Exc: Nearest neighbour interpolation 207 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation). 208 | 209 | nearest_obs = np.zeros_like(truth, dtype=int) ### FIX THIS ### 210 | estims["Nearest-n."] = observations[nearest_obs] 211 | 212 | # + 213 | # show_answer('nearest neighbour interp') 214 | # - 215 | 216 | # #### Exc: Inverse distance weighting 217 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Inverse_distance_weighting). 218 | # *Hint*: You can ignore the `errstate` line below. It is just used to "silence warnings" resulting from division by 0 (whose special case is treated in a cell further down). 219 | 220 | exponent = 3 221 | with np.errstate(invalid='ignore', divide='ignore'): 222 | weights = np.zeros_like(dists_xy) ### FIX THIS ### 223 | 224 | # + 225 | # show_answer('inv-dist weight interp') 226 | # - 227 | 228 | # Apply weights 229 | estims["Inv-dist."] = weights @ observations 230 | 231 | # Fix singularities 232 | estims["Inv-dist."][obs_idx] = observations 233 | 234 | 235 | # #### Exc: Simple Kriging 236 | # Hint: use `sla.solve` or `sla.inv` (less recommended) 237 | 238 | ### ANSWER HERE ### 239 | covar_yy = ... 240 | cross_xy = ... 241 | regression_coefficients = weights ### FIX THIS ### -- should be cross_xy / covar_yy 242 | 243 | # + 244 | # show_answer('Kriging code') 245 | # - 246 | 247 | estims["Kriging"] = regression_coefficients @ observations 248 | 249 | 250 | # ### Plot truth, estimates, error 251 | 252 | # + 253 | fig, axs = freshfig(num="Estimation problem", figsize=(8, 4), squeeze=False, 254 | nrows=2, ncols=len(estims), sharex=True, sharey=True) 255 | 256 | for name, ax1, ax2 in zip(estims, *axs): 257 | ax1.set_title(name) 258 | c1 = contour_plot(ax1, estims[name]) 259 | c2 = contour_plot(ax2, estims[name] - truth, cmap="RdBu") 260 | fig.tight_layout() 261 | fig.subplots_adjust(right=0.85) 262 | cbar = fig.colorbar(c1, cax=fig.add_axes([0.9, 0.15, 0.03, 0.7])) 263 | 264 | 265 | # - 266 | 267 | # #### Exc: Try different values of `Range`. 268 | # - Run code to re-compute Kriging estimate. 269 | # - What does setting it to `0.1` cause? What about `100`? 270 | 271 | @interact(Range=(.01, 40)) 272 | def plot_krieged(Range=1): 273 | vg_params['Range'] = Range 274 | covar_yy = 1 - variogram(dists_yy, **vg_params) 275 | cross_xy = 1 - variogram(dists_xy, **vg_params) 276 | regression_coefficients = sla.solve(covar_yy, cross_xy.T).T 277 | 278 | fig, ax = freshfig(num="Kriging estimates") 279 | c1 = contour_plot(ax, regression_coefficients @ observations) 280 | fig.colorbar(c1); 281 | plt.show() 282 | 283 | # #### Generalizations 284 | # 285 | # - Unknown mean (Ordinary Kriging) 286 | # - Co-Kriging (vector-valued fields) 287 | # - Trend surfaces (non-stationarity assumptions) 288 | 289 | 290 | # ## Summary 291 | # The covariances of random fields can sometimes be described by the autocorrelation function, 292 | # or equivalently, the (semi-)variogram. 293 | # Covariances form the basis of a family of (geo-)spatial interpolation and approximation 294 | # methods known as Kriging, which can also be called/interpreted as 295 | # **Radial basis function (RBF) interpolation**, 296 | # **Gaussian process regression** (GP) regression. 297 | # 298 | # - Kriging is derived by minimizing the variance of linear and unbiased estimators. 299 | # - RBF interpolation is derived by the explicit desire to fit 300 | # N functions to N data points (observations). 301 | # - GP regression is derived by conditioning (applying Bayes rule) 302 | # to the (supposedly) Gaussian distribution of the random field. 303 | # 304 | # ### Next: [T7 - Chaos & Lorenz](T7%20-%20Chaos%20%26%20Lorenz%20(optional).ipynb) 305 | -------------------------------------------------------------------------------- /notebooks/scripts/T7 - Chaos & Lorenz (optional).py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.15.1 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact, frame 20 | # %matplotlib inline 21 | import numpy as np 22 | import numpy.random as rnd 23 | import matplotlib.pyplot as plt 24 | plt.ion(); 25 | 26 | # # T7 - Chaos & Lorenz 27 | # ***Chaos*** 28 | # is also known as the butterfly effect: "a butterfly that flaps its wings in Brazil can 'cause' a hurricane in Texas". 29 | # As opposed to the opinions of Descartes/Newton/Laplace, chaos effectively means that even in a deterministic (non-stochastic) universe, we can only predict "so far" into the future. This will be illustrated below using two toy-model dynamical systems made by ***Edward Lorenz***. 30 | # $ 31 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 32 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 33 | # $ 34 | 35 | # ## Dynamical systems 36 | # Dynamical system are systems (sets of equations) whose variables evolve in time (the equations contains time derivatives). As a branch of mathematics, its theory is mainly concerned with understanding the *behaviour* of solutions (trajectories) of the systems. 37 | # 38 | # Below is a function to numerically **integrate** 39 | # (i.e. step-wise evolve the system forward in time) a set of coupled ODEs. 40 | # It relies on `scipy`, but adds some conveniences, 41 | # notably taking advantage of Python's `**kwargs` (key-word argument) feature, 42 | # to define an internal `dxdt` whose only two arguments are 43 | # `x` for the current state, and `t` for time. 44 | 45 | # + 46 | from scipy.integrate import odeint 47 | from dapper.mods.integration import rk4 48 | dt = 0.01 49 | 50 | def integrate(dxdt, initial_states, final_time, **params): 51 | # Output shape: `(len(initial_states), nTime, len(x))` 52 | dxdt_fixed = lambda x, t: dxdt(x, t, **params) # Fix params 53 | time_steps = np.linspace(0, final_time, 1+int(final_time / dt)) 54 | integrated = [] 55 | ### Replace the following (in the next exercise) ### 56 | for x0 in initial_states: 57 | trajectory = odeint(dxdt_fixed, x0, time_steps) 58 | integrated.append(trajectory) 59 | return np.array(integrated), time_steps 60 | 61 | 62 | # - 63 | 64 | # In addition, it takes care of looping over `initial_states`, 65 | # computing a solution ("phase space trajectory") for each one, 66 | # so that we can ask it to compute multiple trajectories at once, 67 | # which we call Monte-Carlo simulation, or **ensemble forecasting**. 68 | # But *loops are generally slow in Python*. 69 | # Fortunately, for simple systems, 70 | # we can write our code such that the dynamics get independently (but simultaneously) computed for rows of a *matrix* (rather than a single vector), meaning that each row in the input produces a corresponding row in the output. This in effect leaves `numpy` to do the looping (which it does much quicker than pure Python). 71 | # Alternatively, since each simulation is completely independent of another realisation, 72 | # they can "embarrasingly" easily be parallelized, which is a good option if the system is very costly to simulate. 73 | # The exercise below challenges you to implement the first approach, resulting in much faster visualisation further below. 74 | # 75 | # #### Exc (optional) -- speed-up by vectorisation & parallelisation 76 | # Replace `odeint` in the code above by `rk4` (which does not care about the size/shape of the input, thereby allowing for matrices, i.e. ensembles). Note that the call signature of `rk4` is similar to `odeint`, except that `time_steps` must be replaced by `t` and `dt`. I.e. it only computes a single time step, `t + dt`, so you must loop over `time_steps` yourself. *Hint: `dxdt(x, t, ...)` generally expect axis-0 (i.e. rows) of `x` to be the dimensions of the state vector -- not independent realisations of the states.* 77 | 78 | # + 79 | # show_answer('rk4') 80 | # - 81 | 82 | # ## The Lorenz (1963) attractor 83 | # 84 | # The [Lorenz-63 dynamical system](https://en.wikipedia.org/wiki/Lorenz_system) can be derived as an extreme simplification of *Rayleigh-Bénard convection*: fluid circulation in a shallow layer of fluid uniformly heated (cooled) from below (above). 85 | # This produces the following 3 *coupled, nonlinear* ordinary differential equations (ODE): 86 | # 87 | # $$ 88 | # \begin{aligned} 89 | # \dot{x} & = \sigma(y-x) \\ 90 | # \dot{y} & = \rho x - y - xz \\ 91 | # \dot{z} & = -\beta z + xy 92 | # \end{aligned} 93 | # \tag{1} 94 | # $$ 95 | # 96 | # where the "dot" represents the time derivative, $\frac{d}{dt}$. The state vector is $\x = (x,y,z)$, and the parameters are typically set to $\sigma = 10, \beta=8/3, \rho=28$. The ODEs can be coded as follows (yes, Python supports Unicode, but it might be cumbersome to type out!) 97 | 98 | def dxdt63(state, time, σ, β, ρ): 99 | x, y, z = state 100 | return np.asarray([σ * (y - x), 101 | x * (ρ - z) - y, 102 | x * y - β * z]) 103 | 104 | 105 | # The following illustrated the system. 106 | 107 | store = ['placeholder'] 108 | @interact( σ=(0.,200), β=(0.,5), ρ=(0.,50), N=(1,100), ε=(0.01,10), Time=(0.,100), zoom=(.1, 4)) 109 | def plot_lorenz63(σ=10, β=8/3, ρ=28 , in3D=True, N=2, ε=0.01, Time=2.0, zoom=1): 110 | rnd.seed(23) 111 | initial_states = [-6.1, 1.2, 32.5] + ε*rnd.randn(N, 3) 112 | trajectories, times = integrate(dxdt63, initial_states, Time, σ=σ, β=β, ρ=ρ) 113 | store[0] = trajectories 114 | if in3D: 115 | ax = plt.figure().add_subplot(111, projection='3d') 116 | for orbit in trajectories: 117 | line, = ax.plot(*(orbit.T), lw=1, alpha=.5) 118 | ax.scatter3D(*orbit[-1], s=40, color=line.get_color()) 119 | ax.axis('off') 120 | frame(trajectories, ax, zoom) 121 | else: 122 | fig, axs = plt.subplots(3, sharex=True, figsize=(5, 4)) 123 | for dim, ax, orbits in zip('xyz', axs, trajectories.T): 124 | start = int(10/dt/zoom) 125 | ax.plot(times[-start:], orbits[-start:], lw=1, alpha=.5) 126 | ax.set_ylabel(dim) 127 | ax.set_xlabel('Time') 128 | plt.show() 129 | 130 | 131 | # #### Exc -- Bifurcation hunting 132 | # Classic linear stability analysis involves setting eqn. (1) to zero and considering the eigenvalues (and vectors) of its Jacobian matrix. Here we will go about it mainly by visually inspecting the numerical results of simulations. 133 | # Answer the following (to an approximate degree of precision) by graduallying increasing $\rho$. 134 | # Leave the other model parameters at their defaults, but use `ε`, `N`, `Time` and `zoom` to your advantage. 135 | # - (a) What is the only fixed point for $\rho = 0$? 136 | # - (b) At what (larger) value of $\rho$ does this change? 137 | # What do you think happened to the original fixed point? 138 | # - (c) At what (larger) value of $\rho$ do we see an oscillating (spiraling) motion? 139 | # What do you think this entails for the aforementioned eigenvalues? 140 | # - (d) Describe the difference in character of the trajectories between $\rho=10$ and $\rho=20$. 141 | # - (e) At what (larger) values of $\rho$ do we get chaos? 142 | # In other words, when do the trajectories no longer converge to fixed points (or limit cycles)? 143 | # - (f) Also try $\rho=144$ (edit the code). What is the nature of the trajectories now? 144 | # - (g) *Optional*: Use pen and paper to show that the fixed points of the Lorenz system (1) are 145 | # indeed the origin as well as the roots of $x^2=\beta z$ with $y=x$, 146 | # but that the latter two only exist for $\rho > 1$. 147 | # 148 | # In conclusion, while a dynamical system naturally depends on its paramater values (almost by definition), the way in which its behaviour/character depend on it could come as a surprise. 149 | 150 | # + 151 | # show_answer("Bifurcations63") 152 | # - 153 | 154 | # #### Exc -- Doubling time 155 | # Re-run the animation cell to get default parameter values. 156 | # Visually investigate the system's (i.e. the trajectories') **sensitivity to initial conditions** by moving `Time`, `N` and `ε`. What do you reckon is the "doubling time" of the perturbations? I.e. how long do you think it takes (on average) for two trajectories to grow twice as far apart as they started (alternatives: 0.03, 0.3, 3, 30)? What are the implications for any prediction/forecasting we might attempt? 157 | 158 | # + 159 | # show_answer('Guesstimate 63') 160 | # - 161 | 162 | # ### Averages 163 | # 164 | # The result actually depends on where in "phase space" the particles started. For example, predictability in the Lorenz system is much shorter when the state is near the center, where the trajectories diverge into the two wings of the butterfly. So to get a universal answer one must average these experiments for many different initial conditions. 165 | # Alternatively, since the above system is [ergodic](https://en.wikipedia.org/wiki/Ergodic_theory#Ergodic_theorems), we could also average a single experiment over a very, very long time, obtaining the same statistics (assuming they have converged). Though not strictly implied, ergodicity is closely related to chaos. It means that 166 | # 167 | # - A trajectory/orbit never quite repeats (the orbit is aperiodic). 168 | # - The tracks of the orbits are sufficiently "dense" that they define a manifold 169 | # (something that looks like a surface, such as the butterfly wings above, 170 | # and for which we can speak of properties like derivatives and fractal dimension). 171 | # - Every part (of positive measure) of the manifold can be reached from any other. 172 | # - There is a probability density for the manifold, 173 | # quantifying the relative amount of time (of an infinite amount) 174 | # that the system spends in that neighbourhood. 175 | # 176 | # Set `N` and `Time` in the above interactive animation to their upper bounds (might take long to run!). 177 | # Execute the code cell below. 178 | # Do you think the samples behind the histograms are drawn from the same distribution? 179 | # In other words, is the Lorenz system ergodic? 180 | 181 | @interact() 182 | def histograms(): 183 | fig, axs = plt.subplots(ncols=3, sharey=True, figsize=(9, 3)) 184 | def hist(ax, sample, lbl): 185 | ax.hist(sample, density=1, bins=20, label=lbl, alpha=.5) 186 | 187 | trajectories63 = store[0] 188 | for i, (ax, lbl) in enumerate(zip(axs, "xyz")): 189 | hist(ax, trajectories63[:, -1, i], "at final time") 190 | hist(ax, trajectories63[-1, ::int(.2/dt), i], "of final member") 191 | ax.set_title(f"Component {lbl}") 192 | plt.legend(); 193 | 194 | 195 | # The long-run distribution of a system may be called its **climatology**. 196 | # A somewhat rudimentary weather forecasting initialisation (i.e. DA) technique, 197 | # called **optimal interpolation**, 198 | # consists in using the climatology as the prior (as opposed to yesterday's forecast) 199 | # when applying Bayes' rule (in its [Gaussian guise](T3%20-%20Bayesian%20inference.ipynb#Gaussian-Gaussian-Bayes'-rule-(1D))) to the observations of the day. 200 | 201 | # ## The Lorenz-96 model 202 | # 203 | # Lorenz-96 is a "spatially 1D" dynamical system of an astoundingly simple design that resemble atmospheric convection, 204 | # including nonlinear terms and chaoticity. 205 | # Each state variable $\x_i$ can be considered some atmospheric quantity at grid point at a fixed latitude of Earth. The system 206 | # is given by the coupled set of ODEs, 207 | # $$ 208 | # \frac{d \x_i}{dt} = (\x_{i+1} − \x_{i-2}) \x_{i-1} − \x_i + F 209 | # \,, 210 | # \quad \quad i \in \{1,\ldots,\xDim\} 211 | # \,, 212 | # $$ 213 | # where the subscript indices apply periodically. 214 | # 215 | # This model is not derived from physics but has similar characteristics, such as 216 | #
    217 | #
  • there is external forcing, determined by a parameter $F$;
  • 218 | #
  • there is internal dissipation, emulated by the linear term;
  • 219 | #
  • there is energy-conserving advection, emulated by quadratic terms.
  • 220 | #
221 | # 222 | # [Further description in the very readable original article](https://www.ecmwf.int/sites/default/files/elibrary/1995/75462-predictability-problem-partly-solved_0.pdf). 223 | 224 | # **Exc (optional) -- Conservation of energy:** Show that the "total energy" $\sum_{i=1}^{\xDim} \x_i^2$ is preserved by the quadratic terms in the ODE. 225 | # *Hint: consider its time derivative.* 226 | 227 | # + 228 | # show_answer("Lorenz energy") 229 | # - 230 | 231 | # The model is animated below. 232 | 233 | # + 234 | def s(vector, n): 235 | return np.roll(vector, -n) 236 | 237 | def dxdt96(x, time, Force): 238 | return (s(x, 1) - s(x, -2)) * s(x, -1) - x + Force 239 | 240 | ylims = -10, 20 241 | # - 242 | 243 | store = ["placeholder"] 244 | @interact( xDim=(4,60,1), N=(1,30), Force=(0,15.), ε=(0.01,3,0.1), Time=(0.05,90,0.04)) 245 | def plot_lorenz96(xDim=40, N=2, Force=8, ε=0.01, Time=3): 246 | rnd.seed(23) 247 | initial_states = np.zeros((N, xDim)) 248 | initial_states[:, 0] = ε*(10 + rnd.randn(N)) 249 | trajectories, times = integrate(dxdt96, initial_states, Time, Force=Force) 250 | store[0] = trajectories 251 | 252 | plt.figure(figsize=(7, 4)) 253 | plt.plot(np.arange(xDim), trajectories[:, -1].T) 254 | plt.ylim(-10, 20) 255 | plt.show() 256 | 257 | # #### Exc -- Bifurcation hunting 96 258 | # Investigate by moving the sliders (but keep `xDim=40`): Under which settings of the force `F` 259 | # 260 | # - Do the solutions tend to the steady state $\x_i = F$ for all $i$ ? 261 | # - Are the solutions periodic? 262 | # - Is the system chaotic (i.e., the solutions are extremely sensitive to initial conditions, 263 | # meaning that the predictability horizon is finite) ? 264 | # 265 | # *PS: another way to visualise spatially 1D systems (or cross-sections) over time is the [Hovmöller diagram](https://en.wikipedia.org/wiki/Hovm%C3%B6ller_diagram), here represented for 1 realisation of the simulations.* 266 | 267 | @interact() 268 | def Hovmoller(): 269 | plt.contourf(store[0][0], cmap="viridis", vmin=ylims[0], vmax=ylims[1]) 270 | plt.colorbar(); 271 | plt.show() 272 | 273 | 274 | # + 275 | # show_answer('Bifurcations96', 'a') 276 | # - 277 | 278 | # #### Exc (optional) -- Doubling time 279 | # Maximise `N` (for a large sample), minimise `ε` (to approach linear conditions) and set `Time=1` (a reasonable first guess). Compute a rough estimate of the doubling time in the cell below from the data in `store[0]`, which holds the trajectories, and has shape `(N, len(times))`. 280 | # *Hint: The theory for these questions will be described in further detail in the following section.* 281 | 282 | # + 283 | # show_answer("doubling time") 284 | # - 285 | 286 | # ## The double pendulum 287 | 288 | # The [double pendulum](https://en.wikipedia.org/wiki/Double_pendulum) is another classic example of a chaotic system. 289 | # It is a little longer to implement, so we'll just load it from [DAPPER](https://github.com/nansencenter/DAPPER/blob/master/dapper/mods/DoublePendulum/__init__.py). 290 | # Unlike the Lorenz systems, the divergence of its "$f$" flow field is 0, 291 | # so it is conservative, and all of the trajectories preserve their initial energy 292 | # (except for what friction our numerical integration causes). 293 | # Therefore it does not strictly speaking posess an attractor 294 | # nor is it ergodic (but some things might be said upon restriction to the set of initial conditions with equal energy levels?) 295 | 296 | # + 297 | from numpy import cos, sin, pi 298 | from dapper.mods.DoublePendulum import L1, L2, x0, dxdt 299 | def x012(x): return (0 , L1*sin(x[0]) , L1*sin(x[0]) + L2*sin(x[2])) 300 | def y012(x): return (0, -L1*cos(x[0]), -L1*cos(x[0]) - L2*cos(x[2])) 301 | 302 | x0 = [.9*pi, 0, 0, 0] # Angular pos1, vel1, pos2, vel2 303 | initial_states = x0 + 0.01*np.random.randn(20, 4) 304 | trajectories, times = integrate(lambda x, t: dxdt(x), initial_states, 10) 305 | 306 | @interact(k=(0, len(times)-1, 4), N=(1, len(initial_states))) 307 | def plot_pendulum2(k=1, N=2): 308 | fig, ax = plt.subplots() 309 | ax.set(xlim=(-2, 2), ylim=(-2, 2), aspect="equal") 310 | for x in trajectories[:N, k]: 311 | ax.plot(x012(x), y012(x), '-o') 312 | plt.show() 313 | # - 314 | 315 | # ## Error/perturbation dynamics 316 | 317 | # **Exc (optional) -- Perturbation ODE:** Suppose $x(t)$ and $z(t)$ are "twins": they evolve according to the same law $f$: 318 | # $$ 319 | # \begin{align} 320 | # \frac{dx}{dt} &= f(x) \\ 321 | # \frac{dz}{dt} &= f(z) \,. 322 | # \end{align} 323 | # $$ 324 | # 325 | # Define the "error": $\varepsilon(t) = x(t) - z(t)$. 326 | # Suppose $z(0)$ is close to $x(0)$. 327 | # Let $F = \frac{df}{dx}(x(t))$. 328 | # 329 | # * (a) Show that the error evolves according to the ordinary differential equation (ODE) 330 | # $$\frac{d \varepsilon}{dt} \approx F \varepsilon \,.$$ 331 | 332 | # + 333 | # show_answer("error evolution") 334 | # - 335 | 336 | # * (b) Suppose $F$ is constant. Show that the error grows exponentially: $\varepsilon(t) = \varepsilon(0) e^{F t} $. 337 | 338 | # + 339 | # show_answer("anti-deriv") 340 | # - 341 | 342 | # * (c) 343 | # * (1) Suppose $F<0$. 344 | # What happens to the error? 345 | # What does this mean for predictability? 346 | # * (2) Now suppose $F>0$. 347 | # Given that all observations are uncertain (i.e. $R_t>0$, if only ever so slightly), 348 | # can we ever hope to estimate $x(t)$ with 0 uncertainty? 349 | 350 | # + 351 | # show_answer("predictability cases") 352 | # - 353 | 354 | # - (d) What is the doubling time of the error? 355 | 356 | # + 357 | # show_answer("doubling time, Lyapunov") 358 | # - 359 | 360 | # * (e) Consider the ODE derived above. 361 | # How might we change it in order to model (i.e. emulate) a saturation of the error at some level? 362 | # Can you solve this equation? 363 | 364 | # + 365 | # show_answer("saturation term") 366 | # - 367 | 368 | # * (f) Now suppose $z(t)$ evolves according to $\frac{dz}{dt} = g(z)$, with $g \neq f$. 369 | # What is now the differential equation governing the evolution of the error, $\varepsilon$? 370 | 371 | # + 372 | # show_answer("linear growth") 373 | # - 374 | 375 | # ## Summary 376 | # Prediction (forecasting) with these systems is challenging because they are chaotic: 377 | # small errors grow exponentially. 378 | # Therefore there is a limit to how far into the future we can make predictions (skillfully). 379 | # Therefore it is crucial to minimize the initial error as much as possible. 380 | # This is a task of DA (filtering). 381 | # 382 | # Also see this [book on chaos and predictability](https://kuiper2000.github.io/chaos_and_predictability/intro.html). 383 | # 384 | # ### Next: [T8 - Monte-Carlo & ensembles](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) 385 | -------------------------------------------------------------------------------- /notebooks/scripts/T8 - Monte-Carlo & ensembles.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.15.1 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, interact, import_from_nb 20 | # %matplotlib inline 21 | import numpy as np 22 | import matplotlib as mpl 23 | import scipy.stats as ss 24 | import numpy.random as rnd 25 | import matplotlib.pyplot as plt 26 | from scipy.stats import gaussian_kde 27 | plt.ion(); 28 | 29 | (pdf_G1, grid1d) = import_from_nb("T2", ("pdf_G1", "grid1d")) 30 | 31 | # # T8 - The ensemble (Monte-Carlo) approach 32 | # is an approximate method for doing Bayesian inference. 33 | # Instead of computing the full (gridvalues, or parameters, of the) posterior distributions, 34 | # we instead try to generate ensembles from them. 35 | # An ensemble is an *iid* sample. I.e. a set of "members" ("particles", "realizations", or "sample points") that have been drawn ("sampled") independently from the same distribution. With the EnKF, these assumptions are generally tenuous, but pragmatic. 36 | # $ 37 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 38 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 39 | # $ 40 | # 41 | # Ensembles can be used to characterize uncertainty: either by using it to compute (estimate) *statistics* thereof, such as the mean, median, variance, covariance, skewness, confidence intervals, etc (any function of the ensemble can be seen as a "statistic"), or by using it to reconstruct the distribution/density from which it is sampled. The latter is illustrated by the plot below. Take a moment to digest its code, and then answer the following exercises. 42 | 43 | # + 44 | mu = 0 45 | sigma2 = 25 46 | N = 80 47 | 48 | @interact( seed=(1, 10), nbins=(2, 60), bw=(0.1, 1)) 49 | def pdf_reconstructions(seed=5, nbins=10, bw=.3): 50 | rnd.seed(seed) 51 | E = mu + np.sqrt(sigma2)*rnd.randn(N) 52 | 53 | fig, ax = plt.subplots() 54 | ax.plot(grid1d, pdf_G1(grid1d, mu, sigma2), lw=5, label="True") 55 | ax.plot(E, np.zeros(N), '|k', ms=100, mew=.4, label="_raw ens") 56 | ax.hist(E, nbins, density=1, alpha=.7, color="C5", label="Histogram") 57 | ax.plot(grid1d, pdf_G1(grid1d, np.mean(E), np.var(E)), lw=5, label="Parametric") 58 | ax.plot(grid1d, gaussian_kde(E.ravel(), bw**2).evaluate(grid1d), lw=5, label="KDE") 59 | ax.set_ylim(top=(3*sigma2)**-.5) 60 | ax.legend() 61 | plt.show() 62 | 63 | 64 | # - 65 | 66 | # **Exc -- A matter of taste?:** 67 | # - Which approximation to the true pdf looks better? 68 | # - Which approximation starts with more information? 69 | # What is the downside of making such assumptions? 70 | # - What value of `bw` causes the "KDE" method to most closely 71 | # reproduce/recover the "Parametric" method? 72 | # What about the "Histogram" method? 73 | # *PS: we might say that the KDE method "bridges" the other two.*. 74 | 75 | # Being able to sample a multivariate Gaussian distribution is a building block of the EnKF. 76 | # That is the objective of the following exercise. 77 | # 78 | # **Exc -- Multivariate Gaussian sampling:** 79 | # Suppose $\z$ is a standard Gaussian, 80 | # i.e. $p(\z) = \NormDist(\z \mid \bvec{0},\I_{\xDim})$, 81 | # where $\I_{\xDim}$ is the $\xDim$-dimensional identity matrix. 82 | # Let $\x = \mat{L}\z + \mu$. 83 | # 84 | # * (a -- optional). Refer to the exercise on [change of variables](T2%20-%20Gaussian%20distribution.ipynb#Exc-(optional)----Probability-and-Change-of-variables) to show that $p(\x) = \mathcal{N}(\x \mid \mu, \mat{C})$, where $\mat{C} = \mat{L}^{}\mat{L}^T$. 85 | # * (b). The code below samples $N = 100$ realizations of $\x$ 86 | # and collects them in an ${\xDim}$-by-$N$ "ensemble matrix" $\E$. 87 | # But `for` loops are slow in plain Python (and Matlab). 88 | # Replace it with something akin to `E = mu + L@Z`. 89 | # *Hint: this code snippet fails because it's trying to add a vector to a matrix.* 90 | 91 | # + 92 | mu = np.array([1, 100, 5]) 93 | xDim = len(mu) 94 | L = np.diag(1+np.arange(xDim)) 95 | C = L @ L.T 96 | Z = rnd.randn(xDim, N) 97 | 98 | # Using a loop ("slow") 99 | E = np.zeros((xDim, N)) 100 | for n in range(N): 101 | E[:, n] = mu + L@Z[:, n] 102 | 103 | # + 104 | # show_answer('Gaussian sampling', 'b') 105 | # - 106 | 107 | # The following prints some numbers that can be used to ascertain if you got it right. 108 | # Note that the estimates will never be exact: 109 | # they contain some amount of random error, a.k.a. ***sampling error***. 110 | 111 | with np.printoptions(precision=1): 112 | print("Estimated mean =", np.mean(E, axis=1)) 113 | print("Estimated cov =", np.cov(E), sep="\n") 114 | 115 | 116 | # **Exc -- Moment estimation code:** Above, we used numpy's (`np`) functions to compute the sample-estimated mean and covariance matrix, 117 | # $\bx$ and $\barC$, 118 | # from the ensemble matrix $\E$. 119 | # Now, instead, implement these estimators yourself: 120 | # $$\begin{align}\bx &\ceq \frac{1}{N} \sum_{n=1}^N \x_n \,, \\ 121 | # \barC &\ceq \frac{1}{N-1} \sum_{n=1}^N (\x_n - \bx) (\x_n - \bx)^T \,. \end{align}$$ 122 | 123 | # + 124 | # Don't use numpy's mean, cov, but rather a `for` loop. 125 | def estimate_mean_and_cov(E): 126 | xDim, N = E.shape 127 | 128 | ### FIX THIS ### 129 | x_bar = np.zeros(xDim) 130 | C_bar = np.zeros((xDim, xDim)) 131 | 132 | return x_bar, C_bar 133 | 134 | x_bar, C_bar = estimate_mean_and_cov(E) 135 | with np.printoptions(precision=1): 136 | print("Mean =", x_bar) 137 | print("Covar =", C_bar, sep="\n") 138 | 139 | # + 140 | # show_answer('ensemble moments, loop') 141 | # - 142 | 143 | # **Exc -- An obsession?:** Why do we normalize by $(N-1)$ for the covariance computation? 144 | 145 | # + 146 | # show_answer('Why (N-1)') 147 | # - 148 | 149 | # It can be shown that the above estimators are ***consistent and unbiased***. 150 | # Thus, if we let $N \rightarrow \infty$, their sampling error will vanish ("almost surely"), 151 | # and we therefor say that our estimators are *consistent*. 152 | # Meanwhile, if we repeat the estimation experiment many times (but use a fixed, finite $N$), 153 | # then the average of sampling errors will also vanish, since our estimators are also *unbiased*. 154 | # Under relatively mild assumptions, the [absence of bias implies concistency](https://en.wikipedia.org/wiki/Consistent_estimator#Bias_versus_consistency). 155 | 156 | # The following computes a large number ($K$) of $\barC$ and $1/\barC$, estimated with a given ensemble size ($N$). 157 | # Note that the true variance is $C = 1$. 158 | # The histograms of the estimates is plotted, along with vertical lines displaying the mean values. 159 | 160 | K = 10000 161 | @interact(N=(2, 30), bottom=True) 162 | def var_and_precision_estimates(N=4): 163 | E = rnd.randn(K, N) 164 | estims = np.var(E, ddof=1, axis=-1) 165 | bins = np.linspace(0, 6, 40) 166 | plt.figure() 167 | plt.hist(estims, bins, alpha=.6, density=1) 168 | plt.hist(1/estims, bins, alpha=.6, density=1) 169 | plt.axvline(np.mean(estims), color="C0", label="C") 170 | plt.axvline(np.mean(1/estims), color="C1", label="1/C") 171 | plt.legend() 172 | plt.show() 173 | 174 | 175 | # **Exc -- There's bias, and then there's bias:** 176 | # - Note that $1/\barC$ does not appear to be an unbiased estimate of $1/C = 1$. 177 | # Explain this by referring to a well-known property of the expectation, $\Expect$. 178 | # In view of this, consider the role and utility of "unbiasedness" in estimation. 179 | # - What, roughly, is the dependence of the mean values (vertical lines) on the ensemble size? 180 | # What do they tend to as $N$ goes to $0$? 181 | # What about $+\infty$ ? 182 | # - Optional: What are the theoretical distributions of $\barC$ and $1/\barC$ ? 183 | 184 | # + 185 | # show_answer('variance estimate statistics') 186 | # - 187 | 188 | # **Exc (optional) -- Error notions:** 189 | # * (a). What's the difference between error and residual? 190 | # * (b). What's the difference between error and bias? 191 | # * (c). Show that `"mean-square-error" (RMSE^2) = Bias^2 + Var`. 192 | # *Hint: Let $e = \hat{\theta} - \theta$ be the random "error" referred to above. 193 | # Express each term using the expectation $\Expect$.* 194 | 195 | # + 196 | # show_answer('errors') 197 | # - 198 | 199 | # **Exc -- Vectorization:** Like Matlab, Python (numpy) is quicker if you "vectorize" loops. 200 | # This is eminently possible with computations of ensemble moments. 201 | # Let $\X \ceq 202 | # \begin{bmatrix} 203 | # \x_1 -\bx, & \ldots & \x_N -\bx 204 | # \end{bmatrix} \,.$ 205 | # * (a). Show that $\X = \E \AN$, where $\ones$ is the column vector of length $N$ with all elements equal to $1$. 206 | # *Hint: consider column $n$ of $\X$.* 207 | # *PS: it can be shown that $\ones \ones\tr / N$ and its complement is a "projection matrix".* 208 | # * (b). Show that $\barC = \X \X^T /(N-1)$. 209 | # * (c). Code up this, latest, formula for $\barC$ and insert it in `estimate_mean_and_cov(E)` 210 | 211 | # + 212 | # show_answer('ensemble moments vectorized') 213 | # - 214 | 215 | # **Exc -- Moment estimation code, part 2:** The cross-covariance between two random vectors, $\bx$ and $\by$, is given by 216 | # $$\begin{align} 217 | # \barC_{\x,\y} 218 | # &\ceq \frac{1}{N-1} \sum_{n=1}^N 219 | # (\x_n - \bx) (\y_n - \by)^T \\\ 220 | # &= \X \Y^T /(N-1) 221 | # \end{align}$$ 222 | # where $\Y$ is, similar to $\X$, the matrix whose columns are $\y_n - \by$ for $n=1,\ldots,N$. 223 | # Note that this is simply the covariance formula, but for two different variables. 224 | # I.e. if $\Y = \X$, then $\barC_{\x,\y} = \barC_{\x}$ (which we have denoted $\barC$ in the above). 225 | # 226 | # Implement the cross-covariance estimator in the code-cell below. 227 | 228 | def estimate_cross_cov(Ex, Ey): 229 | Cxy = np.zeros((len(Ex), len(Ey))) ### INSERT ANSWER ### 230 | return Cxy 231 | 232 | # + 233 | # show_answer('estimate cross') 234 | # - 235 | 236 | # ## Summary 237 | # Parametric assumptions (e.g. assuming Gaussianity) can be useful in approximating distributions. 238 | # Sample covariance estimates can be expressed and computed in a vectorized form. 239 | # 240 | # ### Next: [T9 - Writing your own EnKF](T9%20-%20Writing%20your%20own%20EnKF.ipynb) 241 | -------------------------------------------------------------------------------- /notebooks/scripts/T9 - Writing your own EnKF.py: -------------------------------------------------------------------------------- 1 | # --- 2 | # jupyter: 3 | # jupytext: 4 | # formats: ipynb,scripts//py 5 | # text_representation: 6 | # extension: .py 7 | # format_name: light 8 | # format_version: '1.5' 9 | # jupytext_version: 1.15.1 10 | # kernelspec: 11 | # display_name: Python 3 (ipykernel) 12 | # language: python 13 | # name: python3 14 | # --- 15 | 16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials" 17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s 18 | 19 | from resources import show_answer, EnKF_animation 20 | import numpy as np 21 | import matplotlib as mpl 22 | import numpy.random as rnd 23 | import matplotlib.pyplot as plt 24 | from tqdm.auto import tqdm 25 | plt.ion(); 26 | 27 | # # T9 - Writing your own EnKF 28 | # In this tutorial we're going to code an EnKF implementation using numpy. 29 | # As with the KF, the EnKF consists of the recursive application of 30 | # a forecast step and an analysis step. 31 | # $ 32 | # % ######################################## Loading TeX (MathJax)... Please wait ######################################## 33 | # \newcommand{\Reals}{\mathbb{R}} \newcommand{\Expect}[0]{\mathbb{E}} \newcommand{\NormDist}{\mathscr{N}} \newcommand{\DynMod}[0]{\mathscr{M}} \newcommand{\ObsMod}[0]{\mathscr{H}} \newcommand{\mat}[1]{{\mathbf{{#1}}}} \newcommand{\bvec}[1]{{\mathbf{#1}}} \newcommand{\trsign}{{\mathsf{T}}} \newcommand{\tr}{^{\trsign}} \newcommand{\ceq}[0]{\mathrel{≔}} \newcommand{\xDim}[0]{D} \newcommand{\supa}[0]{^\text{a}} \newcommand{\supf}[0]{^\text{f}} \newcommand{\I}[0]{\mat{I}} \newcommand{\K}[0]{\mat{K}} \newcommand{\bP}[0]{\mat{P}} \newcommand{\bH}[0]{\mat{H}} \newcommand{\bF}[0]{\mat{F}} \newcommand{\R}[0]{\mat{R}} \newcommand{\Q}[0]{\mat{Q}} \newcommand{\B}[0]{\mat{B}} \newcommand{\C}[0]{\mat{C}} \newcommand{\Ri}[0]{\R^{-1}} \newcommand{\Bi}[0]{\B^{-1}} \newcommand{\X}[0]{\mat{X}} \newcommand{\A}[0]{\mat{A}} \newcommand{\Y}[0]{\mat{Y}} \newcommand{\E}[0]{\mat{E}} \newcommand{\U}[0]{\mat{U}} \newcommand{\V}[0]{\mat{V}} \newcommand{\x}[0]{\bvec{x}} \newcommand{\y}[0]{\bvec{y}} \newcommand{\z}[0]{\bvec{z}} \newcommand{\q}[0]{\bvec{q}} \newcommand{\br}[0]{\bvec{r}} \newcommand{\bb}[0]{\bvec{b}} \newcommand{\bx}[0]{\bvec{\bar{x}}} \newcommand{\by}[0]{\bvec{\bar{y}}} \newcommand{\barB}[0]{\mat{\bar{B}}} \newcommand{\barP}[0]{\mat{\bar{P}}} \newcommand{\barC}[0]{\mat{\bar{C}}} \newcommand{\barK}[0]{\mat{\bar{K}}} \newcommand{\D}[0]{\mat{D}} \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}} \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}} \newcommand{\ones}[0]{\bvec{1}} \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)} 34 | # $ 35 | 36 | # This presentation follows the traditional template, presenting the EnKF as the "the Monte Carlo version of the KF 37 | # where the state covariance is estimated by the ensemble covariance". 38 | # It is not obvious that this postulated method should work; 39 | # indeed, it is only justified upon inspection of its properties, 40 | # deferred to later. 41 | # 42 | # 43 | # NB: 44 | # Since we're going to focus on a single filtering cycle (at a time), 45 | # the subscript $k$ is dropped. Moreover,
46 | # The superscript $f$ indicates that $\{\x_n\supf\}_{n=1..N}$ is the forecast (prior) ensemble.
47 | # The superscript $a$ indicates that $\{\x_n\supa\}_{n=1..N}$ is the analysis (posterior) ensemble. 48 | #
49 | # 50 | # ### The forecast step 51 | # Suppose $\{\x_n\supa\}_{n=1..N}$ is an iid. sample from $p(\x_{k-1} \mid \y_1,\ldots, \y_{k-1})$, which may or may not be Gaussian. 52 | # 53 | # The forecast step of the EnKF consists of a Monte Carlo simulation 54 | # of the forecast dynamics for each $\x_n\supa$: 55 | # $$ 56 | # \forall n, \quad \x\supf_n = \DynMod(\x_n\supa) + \q_n \,, \\ 57 | # $$ 58 | # where $\{\q_n\}_{n=1..N}$ are sampled iid. from $\NormDist(\bvec{0},\Q)$, 59 | # or whatever noise model is assumed, 60 | # and $\DynMod$ is the model dynamics. 61 | # The dynamics could consist of *any* function, i.e. the EnKF can be applied with nonlinear models. 62 | # 63 | # The ensemble, $\{\x_n\supf\}_{n=1..N}$, is then an iid. sample from the forecast pdf, 64 | # $p(\x_k \mid \y_1,\ldots,\y_{k-1})$. This follows from the definition of the latter, so it is a relatively trivial idea and way to obtain this pdf. However, before Monte-Carlo methods were computationally feasible, the computation of the forecast pdf required computing the [Chapman-Kolmogorov equation](https://en.wikipedia.org/wiki/Chapman%E2%80%93Kolmogorov_equation), which constituted a major hurdle for filtering methods. 65 | # 66 | # ### The analysis update step 67 | # of the ensemble is given by: 68 | # $$\begin{align} 69 | # \forall n, \quad \x\supa_n &= \x_n\supf + \barK \left\{\y - \br_n - \ObsMod(\x_n\supf) \right\} 70 | # \,, \\ 71 | # \text{or,}\quad 72 | # \E\supa &= \E\supf + \barK \left\{\y\ones\tr - \Dobs - \ObsMod(\E\supf) \right\} \,, 73 | # \tag{4} 74 | # \end{align} 75 | # $$ 76 | # where the "observation perturbations", $\br_n$, are sampled iid. from the observation noise model, e.g. $\NormDist(\bvec{0},\R)$, 77 | # and form the columns of $\Dobs$, 78 | # and the observation operator (again, any type of function) $\ObsMod$ is applied column-wise to $\E\supf$. 79 | # 80 | # The gain $\barK$ is defined by inserting the ensemble estimates for 81 | # * (i) $\B \bH\tr$: the cross-covariance between $\x\supf$ and $\ObsMod(\x\supf)$, and 82 | # * (ii) $\bH \B \bH\tr$: the covariance matrix of $\ObsMod(\x\supf)$, 83 | # 84 | # in the formula for $\K$, namely eqn. (K1) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb). 85 | # Using the estimators from [T8](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) yields 86 | # $$\begin{align} 87 | # \barK &= \X \Y\tr ( \Y \Y\tr + (N{-}1) \R )^{-1} \,, \tag{5a} 88 | # \end{align} 89 | # $$ 90 | # where $\Y \in \Reals^{P \times N}$ 91 | # is the centered, *observed* ensemble 92 | # $\Y \ceq 93 | # \begin{bmatrix} 94 | # \y_1 -\by, & \ldots & \y_n -\by, & \ldots & \y_N -\by 95 | # \end{bmatrix} \,,$ where $\y_n = \ObsMod(\x_n\supf)$. 96 | # 97 | # The EnKF is summarized in the animation below. 98 | 99 | EnKF_animation() 100 | 101 | # #### Exc -- Woodbury for the ensemble subspace 102 | # (a) Use the Woodbury identity (C2) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb) to show that eqn. (5) can also be written 103 | # $$\begin{align} 104 | # \barK &= \X ( \Y\tr \Ri \Y + (N{-}1)\I_N )^{-1} \Y\tr \Ri \,. \tag{5b} 105 | # \end{align} 106 | # $$ 107 | # (b) What is the potential benefit? 108 | 109 | # #### Exc -- KG workings 110 | # The above animation assumed that the observation operator is just the identity matrix, $\I$, rather than a general observation operator, $\ObsMod()$. Meanwhile, the Kalman gain used by the EnKF, eqn. (5a), is applicable for any $\ObsMod()$. On the other hand, the formula (5a) consists solely of linear algebra. Therefore it cannot perfectly represent any general (nonlinear) $\ObsMod()$. So how does it actually treat the observation operator? What meaning can we assign to the resulting updates? 111 | # *Hint*: consider the limit of $\R \rightarrow 0$. 112 | 113 | # #### Exc -- EnKF nobias (a) 114 | # Consider the ensemble averages, 115 | # - $\bx\supa = \frac{1}{N}\sum_{n=1}^N \x\supa_n$, and 116 | # - $\bx\supf = \frac{1}{N}\sum_{n=1}^N \x\supf_n$, 117 | # 118 | # and recall that the analysis step, eqn. (4), defines $\x\supa_n$ from $\x\supf_n$. 119 | # 120 | # 121 | # (a) Show that, in case $\ObsMod$ is linear (the matrix $\bH$), 122 | # $$\begin{align} 123 | # \Expect \bx\supa &= \bx\supf + \barK \left\{\y\ones\tr - \bH\bx\supf \right\} \,, \tag{6} 124 | # \end{align} 125 | # $$ 126 | # where the expectation, $\Expect$, is taken with respect to $\Dobs$ only (i.e. not the sampling of the forecast ensemble, $\E\supf$ itself). 127 | # 128 | # What does this mean? 129 | 130 | # + 131 | # show_answer("EnKF_nobias_a") 132 | # - 133 | 134 | # #### Exc (optional) -- EnKF nobias (b) 135 | # Consider the ensemble covariance matrices: 136 | # $$\begin{align} 137 | # \barB &= \frac{1}{N-1} \X{\X}\tr \,, \tag{7a} \\\ 138 | # \barP &= \frac{1}{N-1} \X\supa{\X\supa}\tr \,. \tag{7b} 139 | # \end{align}$$ 140 | # 141 | # Now, denote the centralized observation perturbations: 142 | # $$\begin{align} 143 | # \D &= \Dobs - \bar{\br}\ones\tr \\\ 144 | # &= \Dobs\AN \,. \tag{8} 145 | # \end{align}$$ 146 | # Note that $\D \ones = \bvec{0}$ and, with expectation over $\Dobs$, 147 | # $$ 148 | # \begin{align} 149 | # \label{eqn:R_sample_cov_of_D} 150 | # \frac{1}{N-1}\D \D\tr = \R \,, \tag{9a} \\\ 151 | # \label{eqn:zero_AD_cov} 152 | # \X \D\tr = \bvec{0} \,. \tag{9b} 153 | # \end{align} 154 | # $$ 155 | # Assuming eqns (8) and (9) hold true, show that 156 | # $$\begin{align} 157 | # \barP &= [\I_{\xDim} - \barK \bH]\barB \, . \tag{10} 158 | # \end{align}$$ 159 | 160 | # + 161 | # show_answer("EnKF_nobias_b") 162 | # - 163 | 164 | # #### Exc (optional) -- EnKF bias (c) 165 | # Show that, if no observation perturbations are used in eqn. (4), then $\barP$ would be too small. 166 | 167 | # + 168 | # show_answer("EnKF_without_perturbations") 169 | # - 170 | 171 | # ## Experimental setup 172 | # 173 | # Before making the EnKF, we'll set up an experiment to test it with, so that you can check if you've implemented a working method or not. 174 | # 175 | # To that end, we'll use the Lorenz-63 model, from [T7](T7%20-%20Chaos%20%26%20Lorenz%20(optional).ipynb). The coupled ODEs are recalled here, but with some of the parameters fixed. 176 | 177 | # + 178 | xDim = 3 179 | 180 | def dxdt(x, sig=10, rho=28, beta=8/3): 181 | x,y,z = x 182 | d = np.zeros(3) 183 | d[0] = sig*(y - x) 184 | d[1] = rho*x - y - x*z 185 | d[2] = x*y - beta*z 186 | return d 187 | 188 | 189 | # - 190 | 191 | # Next, we make the forecast model $\DynMod$ out of $\frac{d \x}{dt}$ such that $\x(t+dt) = \DynMod(\x(t),t,dt)$. We'll make use of the "4th order Runge-Kutta" integrator `rk4`. 192 | 193 | # + 194 | from dapper.mods.integration import rk4 195 | 196 | def Dyn(E, t0, dt): 197 | 198 | def step(x0): 199 | return rk4(lambda x, t: dxdt(x), x0, t0, dt) 200 | 201 | if E.ndim == 1: 202 | # Truth (single state vector) case 203 | E = step(E) 204 | else: 205 | # Ensemble case 206 | for n in range(E.shape[1]): 207 | E[:, n] = step(E[:, n]) 208 | 209 | return E 210 | 211 | Q12 = np.zeros((xDim, xDim)) 212 | Q = Q12 @ Q12.T 213 | # - 214 | 215 | # Notice the loop over each ensemble member. For better performance, this should be vectorized, if possible. Or, if the forecast model is computationally demanding (as is typically the case in real applications), the loop should be parallelized: i.e. the forecast simulations should be distributed to separate computers. 216 | 217 | # The following are the time settings that we will use 218 | 219 | dt = 0.01 # integrational time step 220 | dko = 25 # number of steps between observations 221 | dto = dko*dt # time between observations 222 | Ko = 60 # total number of observations 223 | nTime = dko*(Ko+1) # total number of time steps 224 | 225 | # Initial conditions 226 | 227 | xa = np.array([1.509, -1.531, 25.46]) 228 | Pa12 = np.eye(3) 229 | 230 | # Observation model settings 231 | 232 | # + 233 | p = 3 # ndim obs 234 | def Obs(E, t): 235 | return E[:p] if E.ndim == 1 else E[:p, :] 236 | 237 | R12 = np.sqrt(2)*np.eye(p) 238 | R = R12 @ R12.T 239 | # - 240 | 241 | # Generate synthetic truth and observations 242 | 243 | # Init 244 | truths = np.zeros((nTime+1, xDim)) 245 | obsrvs = np.zeros((Ko+1, p)) 246 | truths[0] = xa + Pa12 @ rnd.randn(xDim) 247 | 248 | # Loop 249 | for k in range(1, nTime+1): 250 | truths[k] = Dyn(truths[k-1], (k-1)*dt, dt) 251 | truths[k] += Q12 @ rnd.randn(xDim) 252 | if k % dko == 0: 253 | Ko = k//dko-1 254 | obsrvs[Ko] = Obs(truths[k], np.nan) + R12 @ rnd.randn(p) 255 | 256 | # ## EnKF implementation 257 | 258 | # We will make use of `estimate_mean_and_cov` and `estimate_cross_cov` from the previous section. Paste them in below. 259 | 260 | # + 261 | # def estimate_mean_and_cov ... 262 | # - 263 | 264 | # **Exc -- EnKF implementation:** Complete the code below 265 | 266 | # + 267 | # Useful linear algebra: compute B/A 268 | import numpy.linalg as nla 269 | 270 | ens_means = np.zeros((nTime+1, xDim)) 271 | ens_vrncs = np.zeros((nTime+1, xDim)) 272 | 273 | def my_EnKF(N): 274 | """My implementation of the EnKF.""" 275 | ### Init ### 276 | E = np.zeros((xDim, N)) 277 | for k in tqdm(range(1, nTime+1)): 278 | t = k*dt 279 | ### Forecast ## 280 | # E = ... # use model 281 | # E = ... # add noise 282 | if k % dko == 0: 283 | ### Analysis ## 284 | y = obsrvs[[k//dko-1]].T # current observation 285 | Eo = Obs(E, t) # observed ensemble 286 | # Compute ensemble moments 287 | PH = ... 288 | HPH = ... 289 | # Compute Kalman Gain 290 | KG = ... 291 | # Generate perturbations 292 | Perturb = ... 293 | # Update ensemble with KG 294 | # E = ... 295 | # Save statistics 296 | ens_means[k] = np.mean(E, axis=1) 297 | ens_vrncs[k] = np.var(E, axis=1, ddof=1) 298 | # - 299 | 300 | # Notice that we only store some stats (`ens_means`). This is because in large systems, 301 | # keeping the entire ensemble (or its covariance) in memory is probably too much. 302 | 303 | # + 304 | # show_answer('EnKF v1') 305 | # - 306 | 307 | # Now let's try out its capabilities 308 | 309 | # + 310 | # Run assimilation 311 | my_EnKF(10) 312 | 313 | # Plot 314 | fig, axs = plt.subplots(nrows=3, sharex=True) 315 | for i in range(3): 316 | axs[i].plot(dt*np.arange(nTime+1), truths [:, i], 'k', label="Truth") 317 | axs[i].plot(dt*np.arange(nTime+1), ens_means[:, i], 'b', label="Estimate") 318 | if i')[0] 56 | # fix parsing error for links ending in ')' 57 | if "))" in link: 58 | link = link.split("))")[0] + ")" 59 | 60 | # Common error message 61 | def errm(issue): 62 | return f"Issue on line {i} with {issue} link\n {link}" 63 | 64 | # Internet links 65 | if "http" in link: 66 | try: 67 | response = requests.head(link, headers={'User-Agent': UA}) 68 | # https://developer.mozilla.org/en-US/docs/Web/HTTP/Status 69 | assert response.status_code < 400 70 | except Exception: 71 | # Stackoverflow does not like GitHub CI IPs? 72 | # https://meta.stackexchange.com/questions/443 73 | skip = "stack" in link and response.status_code == 403 74 | if not skip: 75 | failed |= True 76 | _report_error(errm("**requesting**") + 77 | f"\nStatus code: {response.status_code}") 78 | 79 | # Local links 80 | else: 81 | link = unquote(link) 82 | link_fname, *link_anchor = link.split("#") 83 | 84 | # Validate filename 85 | if link_fname: 86 | if not (ROOT / "notebooks" / link_fname).is_file(): 87 | failed |= _report_error(errm("**filename** of")) 88 | 89 | # Validate anchor 90 | if link_anchor: 91 | if not link_fname: 92 | # Anchor only ⇒ same file 93 | link_fname = fname 94 | else: 95 | # Change "T4...ipynb" --> "tests/T4...py" 96 | link_fname = (ROOT / "tests" / link_fname).with_suffix(".py") 97 | 98 | if not _find_anchor(link_fname, link_anchor[0]): 99 | failed |= _report_error(errm("**anchor tag** of")) 100 | return failed 101 | 102 | 103 | def assert_show_answer(lines, _fname): 104 | """Misc checks on `show_answer`""" 105 | failed = False 106 | found_import = False 107 | for i, line in enumerate(lines): 108 | found_import |= ("show_answer" in line and "import" in line) 109 | if line.lstrip().startswith("show_answer"): 110 | print(f"`show_answer` uncommented on line {i}") 111 | failed |= True 112 | if not found_import: 113 | print("`import show_answer` not found.") 114 | failed = True 115 | return failed 116 | 117 | 118 | def uncomment_show_answer(lines): 119 | """Causes checking existance of answer when script gets run.""" 120 | for i, line in enumerate(lines): 121 | OLD = "# show_answer" 122 | NEW = "show_answer" 123 | if line.startswith(OLD): 124 | lines[i] = line.replace(OLD, NEW) 125 | return lines 126 | 127 | 128 | def make_script_runnable_by_fixing_sys_path(lines): 129 | """Makes it seem like CWD is `notebooks`.""" 130 | return ['import sys', 131 | f"""sys.path.insert(0, '{ROOT / "notebooks"}')""", 132 | ] + lines 133 | 134 | 135 | ## Convert: notebooks/T*.ipynb --> tests/T*.py 136 | print("\nConverting from notebooks/...ipynb to tests/...py") 137 | print("========================================") 138 | text = dict(capture_output=True, text=True) 139 | converted = [] 140 | ipynbs = sorted((ROOT / "notebooks").glob("T*.ipynb")) 141 | for f in ipynbs: 142 | script = (ROOT / "tests" / f.name).with_suffix('.py') 143 | converted.append(script) 144 | cmd = ["jupytext", "--output", str(script), str(f)] 145 | print(subprocess.run(cmd, **text, check=True).stdout) 146 | 147 | 148 | ## Static checks. Also: modify scripts 149 | erred = [] 150 | for script in converted: 151 | print("\nStatic analysis for", script.stem) 152 | print("========================================") 153 | lines = script.read_text().splitlines() 154 | failed = False 155 | 156 | # Validatation checks 157 | failed |= assert_all_links_work(lines, script) 158 | failed |= assert_show_answer(lines, script) 159 | 160 | # Modify script in preparation of running it 161 | lines = uncomment_show_answer(lines) 162 | lines = make_script_runnable_by_fixing_sys_path(lines) 163 | 164 | if failed: 165 | erred.append(script) 166 | script.write_text("\n".join(lines)) 167 | 168 | 169 | print("\nStatic analysis for", "answers.py") 170 | print("========================================") 171 | sys.path.insert(0, f"{ROOT / 'notebooks'}") 172 | import resources.answers # type: ignore # noqa 173 | for key, answer in resources.answers.answers.items(): 174 | lines = ["# " + line for line in answer[1].splitlines()] 175 | fname = Path(resources.answers.__file__ + ":" + key) 176 | if assert_all_links_work(lines, fname): 177 | erred.append(fname) 178 | 179 | 180 | ## Run ipynbs as python scripts 181 | for script in converted: 182 | print("\nRunning", script.name) 183 | print("========================================") 184 | run = subprocess.run(["python", str(script)], **text, check=False) 185 | # print(run.stdout) 186 | if run.returncode: 187 | erred.append(script) 188 | print(run.stderr, file=sys.stderr) 189 | 190 | # Provide return code 191 | if erred: 192 | print("========================================") 193 | print("FOUND ISSUES") 194 | print("========================================") 195 | print(*["- " + str(f) for f in erred], file=sys.stderr) 196 | print("See above for individual tracebacks.") 197 | sys.exit(1) 198 | --------------------------------------------------------------------------------