267 | Attribution: William A. Lahoz, DARC.
268 | """
269 |
270 | import io
271 | import base64
272 | from IPython.display import HTML
273 |
274 | video = io.open(Path(__file__).parent / 'darc_envisat_analyses.mp4', 'r+b').read()
275 | encoded = base64.b64encode(video)
276 | vid = HTML(data='''
277 |
278 |
281 | {1}
282 |
283 | '''.format(encoded.decode('ascii'),caption))
284 | return vid
285 |
286 |
287 | def EnKF_animation():
288 | # Initialize
289 | path_ = str(Path(__file__).parent / "illust_EnKF/illust_EnKF_")
290 | image = Image(
291 | value=open(path_ + "1.png", "rb").read(),
292 | format='png',
293 | width=800,
294 | height=600,
295 | )
296 |
297 | def update_image(i=1):
298 | image.value=open(path_ + str(i) + ".png", "rb").read()
299 |
300 | slider = interactive(update_image, i=(1, 7, 1))
301 | return VBox([slider, image])
302 |
303 |
304 | def import_from_nb(name: str, objs: list):
305 | """Import `objs` from `notebooks/name*.py` (1st match).
306 |
307 | This is of course a terrible hack:
308 |
309 | - Necessitates that imported notebook contain only light computations
310 | (unless controlled by interact.disabled)
311 | - Does not include any changes made by students. This is mainly a benefit,
312 | but could be said to break the principle of least surprise.
313 | - Students might benefit from a little repetition anyway.
314 |
315 | But notebooks are learning materials -- not production code --
316 | and this helps tie together different tutorials of the course.
317 | """
318 | NBDIR = Path(__file__).parents[1]
319 | notebk = next(NBDIR.glob(name + "*.ipynb"))
320 | script = (NBDIR / "scripts" / notebk.relative_to(NBDIR)).with_suffix('.py')
321 | import_from_nb.nesting_level += 1
322 |
323 | interact.disabled = True
324 | try:
325 | name = str(script.relative_to(NBDIR).with_suffix("")).replace(os.sep, ".")
326 | module = __import__(name)
327 | script = getattr(module, script.stem) # works despite weird chars
328 | finally:
329 | # Dont re-enable if nested
330 | if not import_from_nb.nesting_level >= 2:
331 | interact.disabled = False
332 | import_from_nb.nesting_level -= 1
333 | return [getattr(script, x) for x in objs]
334 |
335 | import_from_nb.nesting_level = 0
336 |
--------------------------------------------------------------------------------
/notebooks/resources/colab_bootstrap.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # Colab doesn't provide
4 | # - Auto-installing requirements.txt
5 | # - Pre-loading data/modules (aside from the notebook itself)
6 | # This script takes care of the above by cloning the full (shallow) repo.
7 |
8 | # Install requirements
9 | main () {
10 | set -e
11 |
12 | # Clear any existing REPO for a fresh git clone
13 | rm -rf REPO
14 |
15 | # Download repo
16 | URL=https://github.com/nansencenter/DA-tutorials.git
17 | if [[ ! -d REPO ]]; then git clone --depth=1 $URL REPO; fi
18 |
19 | # https://pythonspeed.com/articles/upgrade-pip/
20 | pip install --upgrade pip
21 |
22 | # Install requirements
23 | pip install -r REPO/requirements.txt
24 |
25 | # Put notebook/ (including hidden files) in PWD
26 | shopt -s dotglob
27 | cp -r REPO/notebooks/* ./
28 | }
29 |
30 | # Only run if we're on colab
31 | if python -c "import google.colab" 2>/dev/null; then
32 |
33 | # Use `bash -s -- --debug` to get verbose output
34 | if echo $@ | grep -E -- '(--debug|-v)' > /dev/null ; then
35 | main
36 | else
37 | # Quiet
38 | main > /dev/null 2>&1
39 | fi
40 |
41 | echo "Initialization for Colab done."
42 | else
43 | echo "Not running on Colab => Didn't do anything."
44 | fi
45 |
--------------------------------------------------------------------------------
/notebooks/resources/darc_envisat_analyses.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/darc_envisat_analyses.mp4
--------------------------------------------------------------------------------
/notebooks/resources/exc-2.4-iii.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/exc-2.4-iii.png
--------------------------------------------------------------------------------
/notebooks/resources/exc-2.5-iv.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/exc-2.5-iv.png
--------------------------------------------------------------------------------
/notebooks/resources/exc-2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/exc-2.5.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF.py:
--------------------------------------------------------------------------------
1 | """Script to patch png figures
2 | from Matlab script DATUM/illust_EnKF_1.m
3 | together with text titles, as given below.
4 | """
5 |
6 | from pathlib import Path
7 |
8 | import matplotlib as mpl
9 |
10 | mpl.rcParams["text.latex.preamble"] = r"\usepackage{mathrsfs}"
11 |
12 | from matplotlib.image import imread
13 | import matplotlib.pyplot as plt
14 |
15 | plt.ion()
16 |
17 | txts = []
18 | txts += [
19 | "We consider a single cycle of the EnKF, "
20 | "starting with the analysis state\n"
21 | "at time $(k-1)$. "
22 | "The contours are level curves of "
23 | "$\|\mathbf{x}-\mathbf{\\bar{x}}^{a}_{k-1}\|_{\mathbf{\\bar{P}}^{a}_{k-1}}$..."
24 | ]
25 | txts += [
26 | "We consider a single cycle of the EnKF, "
27 | "starting with the analysis ensemble\n"
28 | "at time $(k-1)$, $\{\mathbf{x}_n^{a}\}_{n=1..N}$, "
29 | "with mean and cov. estimates $\mathbf{\\bar{x}}^{a}_{k-1}$ and $\mathbf{\\bar{P}}^{a}_{k-1}$.\n"
30 | "The contours are level curves of "
31 | "$\|\mathbf{x}-\mathbf{\\bar{x}}^{a}_{k-1}\|_{\mathbf{\\bar{P}}^{a}_{k-1}}$.",
32 | ]
33 | txts += [
34 | "The ensemble is forecasted from time $(k-1)$ to $k$ "
35 | "by the dyn. "
36 | "model $\mathscr{M}$.\n We now denote it using the superscript $f$."
37 | ]
38 | txts += [
39 | "Now we consider the analysis at time $k$. The ensemble \emph{could} be used\n"
40 | "to compute the estimates $\mathbf{\\bar{x}}^{f}_k$ and $\mathbf{\\bar{P}}^{f}_k$, "
41 | "hence the new contour curves."
42 | ]
43 | txts += ["Whereupon an obs. likelihood..."]
44 | txts += ["...\emph{would} yield a posterior by Bayes' rule."]
45 | txts += [
46 | "What we \emph{equivalently} do instead,\nis to compute the Kalman gain "
47 | "using the estimate $\mathbf{\\bar{P}}^{f}_k$."
48 | ]
49 | txts += [
50 | "The Kalman gain is then used to shift the ensemble.\n"
51 | "We know that it gets shifted to where the (implicit) posterior lies.\n"
52 | "The cycle can then begin again, from $k$ to $k+1$."
53 | ]
54 |
55 | # Hack to keep line-spacing constant with/out TeX
56 | placeholder = "\phantom{$\{x_n^f\}_{n=1}^N$}"
57 | placeholder += "." # phantom w/o anything causes stuff to disappear
58 | for i, t in enumerate(txts):
59 | t = t.split("\n")
60 | t = [placeholder] * (2 - len(t)) + t # ensure 2 lines
61 | # t = [ln+LE for ln in t]
62 | txts[i] = "\n".join(t)
63 |
64 |
65 | def crop(img):
66 | "Crop Matlab-outputted image"
67 | top = int(0.15 * img.shape[0])
68 | btm = int((1 - 0.20) * img.shape[0])
69 | lft = int(0.10 * img.shape[1])
70 | rgt = int((1 - 0.09) * img.shape[1])
71 | return img[top:btm, lft:rgt]
72 |
73 |
74 | PWD = Path(__file__).parent
75 |
76 |
77 | def illust_EnKF(i):
78 | plt.close(1)
79 | plt.figure(1, figsize=(8, 6))
80 | axI = plt.subplot(111)
81 | axI.set_axis_off()
82 | name = "illust_EnKF_prez_" + str(i + 8) + ".png"
83 | name = PWD / "from_Matlab" / name
84 | img = imread(name)
85 | img = crop(img)
86 | axI.imshow(img)
87 | axI.set_title(txts[i], loc="left", usetex=True, size=15)
88 |
89 |
90 | for i, txt in enumerate(txts):
91 | illust_EnKF(i)
92 | plt.pause(0.2)
93 | name = "illust_EnKF_" + str(i) + ".png"
94 | print("Saving", PWD / name)
95 | plt.savefig(PWD / name)
96 |
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_0.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_1.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_2.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_3.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_4.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_5.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_6.png
--------------------------------------------------------------------------------
/notebooks/resources/illust_EnKF/illust_EnKF_7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nansencenter/DA-tutorials/0f88c6da7a1a09d9c4794f72dc7b6e5f0304f79c/notebooks/resources/illust_EnKF/illust_EnKF_7.png
--------------------------------------------------------------------------------
/notebooks/resources/macros.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | # Macros must be included in
4 | # - every notebook.
5 | # - every answer that uses them.
6 |
7 |
8 | from pathlib import Path
9 | import re
10 | import sys
11 |
12 | import nbformat
13 |
14 |
15 | HEADER = r'''% ######################################## Loading TeX (MathJax)... Please wait ########################################'''
16 | macros=r'''
17 | \newcommand{\Reals}{\mathbb{R}}
18 | \newcommand{\Expect}[0]{\mathbb{E}}
19 | \newcommand{\NormDist}{\mathscr{N}}
20 |
21 | \newcommand{\DynMod}[0]{\mathscr{M}}
22 | \newcommand{\ObsMod}[0]{\mathscr{H}}
23 |
24 | \newcommand{\mat}[1]{{\mathbf{{#1}}}} % ALWAYS
25 | %\newcommand{\mat}[1]{{\pmb{\mathsf{#1}}}}
26 | \newcommand{\bvec}[1]{{\mathbf{#1}}} % ALWAYS
27 |
28 | \newcommand{\trsign}{{\mathsf{T}}} % ALWAYS
29 | \newcommand{\tr}{^{\trsign}} % ALWAYS
30 | \newcommand{\ceq}[0]{\mathrel{≔}}
31 | \newcommand{\xDim}[0]{D}
32 | \newcommand{\supa}[0]{^\text{a}}
33 | \newcommand{\supf}[0]{^\text{f}}
34 |
35 | \newcommand{\I}[0]{\mat{I}} % ALWAYS
36 | \newcommand{\K}[0]{\mat{K}}
37 | \newcommand{\bP}[0]{\mat{P}}
38 | \newcommand{\bH}[0]{\mat{H}}
39 | \newcommand{\bF}[0]{\mat{F}}
40 | \newcommand{\R}[0]{\mat{R}}
41 | \newcommand{\Q}[0]{\mat{Q}}
42 | \newcommand{\B}[0]{\mat{B}}
43 | \newcommand{\C}[0]{\mat{C}}
44 | \newcommand{\Ri}[0]{\R^{-1}}
45 | \newcommand{\Bi}[0]{\B^{-1}}
46 | \newcommand{\X}[0]{\mat{X}}
47 | \newcommand{\A}[0]{\mat{A}}
48 | \newcommand{\Y}[0]{\mat{Y}}
49 | \newcommand{\E}[0]{\mat{E}}
50 | \newcommand{\U}[0]{\mat{U}}
51 | \newcommand{\V}[0]{\mat{V}}
52 |
53 | \newcommand{\x}[0]{\bvec{x}}
54 | \newcommand{\y}[0]{\bvec{y}}
55 | \newcommand{\z}[0]{\bvec{z}}
56 | \newcommand{\q}[0]{\bvec{q}}
57 | \newcommand{\r}[0]{\bvec{r}}
58 | \newcommand{\bb}[0]{\bvec{b}}
59 |
60 | \newcommand{\bx}[0]{\bvec{\bar{x}}}
61 | \newcommand{\by}[0]{\bvec{\bar{y}}}
62 | \newcommand{\barB}[0]{\mat{\bar{B}}}
63 | \newcommand{\barP}[0]{\mat{\bar{P}}}
64 | \newcommand{\barC}[0]{\mat{\bar{C}}}
65 | \newcommand{\barK}[0]{\mat{\bar{K}}}
66 |
67 | \newcommand{\D}[0]{\mat{D}}
68 | \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}}
69 | \newcommand{\Dmod}[0]{\mat{D}_{\text{obs}}}
70 |
71 | \newcommand{\ones}[0]{\bvec{1}} % ALWAYS
72 | \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)}
73 | '''
74 | macros = [ln for ln in macros.splitlines() if ln and not ln.startswith('%')]
75 | always = [i for i, ln in enumerate(macros) if "ALWAYS" in ln]
76 | macros = [m.replace("% ALWAYS","").rstrip() for m in macros]
77 |
78 | # Convert to {macro_name: macro_lineno}
79 | declaration = re.compile(r'''^\\newcommand{(.+?)}''')
80 | lineno_by_name = {}
81 | for i, ln in enumerate(macros):
82 | match = declaration.match(ln)
83 | if match: lineno_by_name[match.group(1)] = i
84 |
85 | # Regex for macro, for ex. \mat, including \mat_, but not \mathbf:
86 | no_escape = lambda s: s.replace("\\",r"\\")
87 | delimit = lambda m: re.compile( no_escape(m) + r'(_|\b)' )
88 |
89 |
90 | def include_macros(content):
91 | """Include macros in answers. Only those that are required."""
92 | # Find macros present in content
93 | necessary = [i for macro, i in lineno_by_name.items() if delimit(macro).search(content)]
94 | # Include in content
95 | if necessary:
96 | mm = [macros[i] for i in necessary]
97 | # PRE-pend those that should always be there
98 | mm = [macros[i] for i in always if (macros[i] not in mm)] + mm
99 | # Escape underscore coz md2html sometimes interprets it as .
100 | mm = [m.replace("_","\\_") for m in mm]
101 | # Include surrounding dollar signs
102 | mm = ["$"] + mm + ["$"]
103 | # Avoid accidental $$
104 | space = " " if content.startswith("$") else ""
105 | # Collect
106 | content = "\n".join(mm) + space + content
107 | return content
108 |
109 |
110 | def update_1nbscript(f: Path):
111 | """Update the macros of a notebook script (synced with `jupytext`)."""
112 | print(f.name.ljust(40), end=": ")
113 | lines = f.read_text().splitlines()
114 | mLine = "# " + " ".join(macros)
115 |
116 | try:
117 | iHeader = lines.index("# " + HEADER)
118 | except (ValueError, AssertionError):
119 | print("Could not locate pre-existing macros")
120 | return
121 |
122 | if not (lines[iHeader-1] == "# $" and
123 | lines[iHeader+2] == "# $"):
124 | print("Could not parse macros")
125 |
126 | # elif lines[iHeader+1] == mLine:
127 | # print("Macros already up to date.")
128 |
129 | else:
130 | # lines[iHeader] = "# % ##### NEW HEADER ######"
131 | lines[iHeader+1] = mLine
132 | f.write_text("\n".join(lines))
133 | print("Macros updated!")
134 |
135 |
136 | if __name__ == "__main__" and any("update" in arg for arg in sys.argv):
137 | for f in sorted((Path(__file__).parents[1] / "scripts").glob("T*.py")):
138 | update_1nbscript(f)
139 |
--------------------------------------------------------------------------------
/notebooks/scripts/T2 - Gaussian distribution.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | formats: ipynb,scripts//py:light,scripts//md
5 | text_representation:
6 | extension: .md
7 | format_name: markdown
8 | format_version: '1.3'
9 | jupytext_version: 1.17.2
10 | kernelspec:
11 | display_name: Python 3 (ipykernel)
12 | language: python
13 | name: python3
14 | ---
15 |
16 | ```python
17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
19 | ```
20 |
21 | ```python
22 | from resources import show_answer, interact
23 | %matplotlib inline
24 | import numpy as np
25 | import scipy as sp
26 | import matplotlib.pyplot as plt
27 | plt.ion();
28 | ```
29 |
30 | # T2 - The Gaussian (Normal) distribution
31 |
32 | We begin by reviewing the most useful of probability distributions.
33 | But first, let's refresh some basic theory.
34 | $
35 | \newcommand{\Reals}{\mathbb{R}}
36 | \newcommand{\Expect}[0]{\mathbb{E}}
37 | \newcommand{\NormDist}{\mathscr{N}}
38 | \newcommand{\mat}[1]{{\mathbf{{#1}}}}
39 | \newcommand{\bvec}[1]{{\mathbf{#1}}}
40 | \newcommand{\trsign}{{\mathsf{T}}}
41 | \newcommand{\tr}{^{\trsign}}
42 | \newcommand{\xDim}[0]{D}
43 | \newcommand{\x}[0]{\bvec{x}}
44 | \newcommand{\X}[0]{\mat{X}}
45 | $
46 |
47 | ## Probability essentials
48 |
49 | As stated by James Bernoulli (1713) and elucidated by [Laplace (1812)](#References):
50 |
51 | > The Probability for an event is the ratio of the number of cases favorable to it, to the number of all
52 | > cases possible when nothing leads us to expect that any one of these cases should occur more than any other,
53 | > which renders them, for us, equally possible:
54 |
55 | $$ \mathbb{P}(\text{event}) = \frac{\text{number of} \textit{ favorable } \text{outcomes}}{\text{number of} \textit{ possible } \text{outcomes}} $$
56 |
57 | A **random variable** is a *quantity* taking random values, described in terms of **distributions**.
58 |
59 | - A *discrete* random variable, $X$, has a probability *mass* function (**pmf**) defined by $p(x) = \mathbb{P}(X{=}x)$.
60 | Sometimes we write $p_X(x)$ to distinguish it from $p_Y(y)$.
61 | - The *joint* probability of two random variables $X$ and $Y$ is defined by their intersection:
62 | $p(x, y) = \mathbb{P}(X{=}x \cap Y{=}y)$.
63 | - The *marginal* $p(x)$ is obtained by summing over all $y$, and vice versa.
64 | - The *conditional* probability of $X$ *given* $y$ is $p(x|y) = \frac{p(x,y)}{p(y)}$.
65 | - *Independence* means $p(x,y) = p(x) \, p(y)$ for all $x, y$.
66 | - The cumulative distribution function (**cdf**) is defined as $F(x) = \mathbb{P}(X \le x)$.
67 |
68 | We will mainly be concerned with *continuous* random variables.
69 | Their probability *density* function (**pdf**) can be defined as $p(x) = F'(x)$ or, equivalently,
70 |
71 | $$p(x) = \lim_{h \to 0} \frac{\mathbb{P}(X \in [x,\, x{+} h])}{h} \,.$$
72 |
73 | The **sample average** of draws from a random variable $X$
74 | is denoted with an overhead bar:
75 | $$ \bar{x} := \frac{1}{N} \sum_{n=1}^{N} x_n \,. $$
76 | By the *law of large numbers (LLN)*, the sample average converges as $N \to \infty$ to the **expected value** (sometimes called the **mean**):
77 | $$ \Expect[X] ≔ \int x \, p(x) \, d x \,, $$
78 | where the (omitted) domain of integration is *all values of $x$*.
79 | Two important properties follow immediately:
80 |
81 | - *Linearity*: $\Expect[aX + Y] = a \Expect[X] + \Expect[Y]$.
82 | - *Total expectation*: $\Expect[\Expect[X|Y]] = \Expect[X]$.
83 |
84 | ## The univariate (a.k.a. 1-dimensional, scalar) Gaussian
85 |
86 | If $X$ is Gaussian (also known as "Normal"), we write
87 | $X \sim \NormDist(\mu, \sigma^2)$, or $p(x) = \NormDist(x \mid \mu, \sigma^2)$,
88 | where the parameters $\mu$ and $\sigma^2$ are called the mean and variance
89 | (for reasons that will become clear below).
90 | The Gaussian pdf, for $x \in (-\infty, +\infty)$, is
91 | $$ \large \NormDist(x \mid \mu, \sigma^2) = (2 \pi \sigma^2)^{-1/2} e^{-(x-\mu)^2/2 \sigma^2} \, . \tag{G1} $$
92 |
93 | Run the cell below to define a function to compute the pdf (G1) using the `scipy` library.
94 |
95 | ```python
96 | def pdf_G1(x, mu, sigma2):
97 | "Univariate Gaussian pdf"
98 | pdf_values = sp.stats.norm.pdf(x, loc=mu, scale=np.sqrt(sigma2))
99 | return pdf_values
100 | ```
101 |
102 | Computers typically represent functions *numerically* by their values at a set of grid points (nodes),
103 | an approach called ***discretisation***.
104 |
105 | ```python
106 | bounds = -20, 20
107 | N = 201 # num of grid points
108 | grid1d = np.linspace(*bounds,N) # grid
109 | dx = grid1d[1] - grid1d[0] # grid spacing
110 | ```
111 |
112 | Feel free to return here later and change the grid resolution to see how
113 | it affects the cells below (after re-running them).
114 |
115 | The following code plots the Gaussian pdf.
116 |
117 | ```python
118 | hist = []
119 | @interact(mu=bounds, sigma=(.1, 10, 1))
120 | def plot_pdf(mu=0, sigma=5):
121 | plt.figure(figsize=(6, 2))
122 | colors = plt.get_cmap('hsv')([(k-len(hist))%9/9 for k in range(9)])
123 | plt.xlim(*bounds)
124 | plt.ylim(0, .2)
125 | hist.insert(0, pdf_G1(grid1d, mu, sigma**2))
126 | for density_values, color in zip(hist, colors):
127 | plt.plot(grid1d, density_values, c=color)
128 | plt.show()
129 | ```
130 |
131 | #### Exc -- parameter influence
132 |
133 | Experiment with `mu` and `sigma` to answer these questions:
134 |
135 | - How does the pdf curve change when `mu` changes? (Several options may be correct or incorrect)
136 |
137 | 1. It changes the curve into a uniform distribution.
138 | 1. It changes the width of the curve.
139 | 1. It shifts the peak of the curve to the left or right.
140 | 1. It changes the height of the curve.
141 | 1. It transforms the curve into a binomial distribution.
142 | 1. It makes the curve wider or narrower.
143 | 1. It modifies the skewness (asymmetry) of the curve.
144 | 1. It causes the curve to expand vertically while keeping the width the same.
145 | 1. It translates the curve horizontally.
146 | 1. It alters the kurtosis (peakedness) of the curve.
147 | 1. It rotates the curve around the origin.
148 | 1. It makes the curve a straight line.
149 | - How does the pdf curve change when you increase `sigma`?
150 | Refer to the same options as the previous question.
151 | - In a few words, describe the shape of the Gaussian pdf curve.
152 | Does this remind you of anything? *Hint: it should be clear as a bell!*
153 |
154 | **Exc -- Implementation:** Change the implementation of `pdf_G1` so that it does not use `scipy`, but instead uses your own code (with `numpy` only). Re-run all of the above cells and check that you get the same plots as before.
155 | *Hint: `**` is the exponentiation/power operator, but $e^x$ is more efficiently computed with `np.exp(x)`*
156 |
157 | ```python
158 | # show_answer('pdf_G1')
159 | ```
160 |
161 | **Exc -- Derivatives:** Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1).
162 | Use pen, paper, and calculus to answer the following questions,
163 | which will help you remember some key properties of the distribution.
164 |
165 | - (i) Find $x$ such that $p(x) = 0$.
166 | - (ii) Where is the location of the **mode (maximum)** of the density?
167 | I.e. find $x$ such that $\frac{d p}{d x}(x) = 0$.
168 | *Hint: begin by writing $p(x)$ as $c e^{- J(x)}$ for some $J(x)$.*
169 | - (iii) Where is the **inflection point**? I.e. where $\frac{d^2 p}{d x^2}(x) = 0$.
170 | - (iv) *Optional*: Some forms of *sensitivity analysis* (typically for non-Gaussian $p$) consist in estimating/approximating the Hessian, i.e. $\frac{d^2 \log p}{d x^2}$. Explain what this has to do with *uncertainty quantification*.
171 |
172 |
173 |
174 | #### Exc (optional) -- Change of variables
175 |
176 | Let $Z = \phi(X)$ for some monotonic function $\phi$,
177 | and let $p_x$ and $p_z$ be their probability density functions (pdf).
178 |
179 | - (a): Show that $p_z(z) = p_x\big(\phi^{-1}(z)\big) \frac{1}{|\phi'(z)|}$,
180 | - (b): Show that you don't need to derive the density of $z$ in order to compute its expectation, i.e. that
181 | $$ \Expect[Z] = \int \phi(x) \, p_x(x) \, d x ≕ \Expect[\phi(x)] \,,$$
182 | *Hint: while the proof is convoluted, the result itself is [pretty intuitive](https://en.wikipedia.org/wiki/Law_of_the_unconscious_statistician).*
183 |
184 | ```python
185 | # show_answer('CVar in proba')
186 | ```
187 |
188 |
189 |
190 | #### Exc (optional) -- Integrals
191 |
192 | Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1). Abbreviate it as $c = (2 \pi \sigma^2)^{-1/2}$.
193 | Use pen, paper, and calculus to show that
194 |
195 | - (i) the first parameter, $\mu$, indicates its **mean**, i.e. that $$\mu = \Expect[X] \,.$$
196 | *Hint: you can rely on the result of (iii)*
197 | - (ii) the second parameter, $\sigma^2>0$, indicates its **variance**,
198 | i.e. that $$\sigma^2 = \mathbb{Var}(X) \mathrel{≔} \Expect[(X-\mu)^2] \,.$$
199 | *Hint: use $x^2 = x x$ to enable integration by parts.*
200 | - (iii) $E[1] = 1$,
201 | thus proving that (G1) indeed uses the right normalising constant.
202 | *Hint: Neither Bernoulli and Laplace managed this,
203 | until [Gauss (1809)](#References) did by first deriving $(E[1])^2$.
204 | For more (visual) help, watch [3Blue1Brown](https://www.youtube.com/watch?v=cy8r7WSuT1I&t=3m52s).*
205 |
206 | ```python
207 | # show_answer('Gauss integrals')
208 | ```
209 |
210 | **Exc (optional) -- Riemann sums**:
211 | Recall that integrals compute the "area under the curve".
212 | On a discrete grid, they can be approximated using the [Trapezoidal rule](https://en.wikipedia.org/wiki/Riemann_sum#Trapezoidal_rule).
213 |
214 | - (a) Replace the prefab code below with your own implementation, using `sum()`,
215 | to compute the mean and variance of a pdf represented on a grid.
216 | - (b) Use `np.trapezoid` to compute the probability that a scalar Gaussian $X$ lies within $1$ standard deviation of its mean.
217 | *Hint: the numerical answer you should find is $\mathbb{P}(X \in [\mu {-} \sigma, \mu {+} \sigma]) \approx 68\%$.*
218 |
219 | ```python
220 | def mean_and_var(pdf_values, grid):
221 | f, x = pdf_values, grid
222 | mu = np.trapezoid(f*x, x)
223 | s2 = np.trapezoid(f*(x-mu)**2, x)
224 | return mu, s2
225 |
226 | mu, sigma = 0, 2 # example
227 | pdf_vals = pdf_G1(grid1d, mu=mu, sigma2=sigma**2)
228 | 'Should equal mu and sigma2: %f, %f' % mean_and_var(pdf_vals, grid1d)
229 | ```
230 |
231 | ```python
232 | # show_answer('Riemann sums', 'a')
233 | ```
234 |
235 | **Exc -- The uniform pdf**:
236 | Below is the pdf of the [uniform/flat/box distribution](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous))
237 | for a given mean and variance.
238 |
239 | - Use `mean_and_var()` to verify `pdf_U1` (as is).
240 | - Replace `_G1` with `_U1` in the code generating the above interactive plot.
241 | - Why are the walls (ever so slightly) inclined?
242 | - Write your own implementation below, and check that it reproduces the `scipy` version already in place.
243 |
244 | ```python
245 | def pdf_U1(x, mu, sigma2):
246 | a = mu - np.sqrt(3*sigma2)
247 | b = mu + np.sqrt(3*sigma2)
248 | pdf_values = sp.stats.uniform(loc=a, scale=(b-a)).pdf(x)
249 | # Your own implementation:
250 | # height = ...
251 | # pdf_values = height * np.ones_like(x)
252 | # pdf_values[xb] = ...
254 | return pdf_values
255 | ```
256 |
257 | ```python
258 | # show_answer('pdf_U1')
259 | ```
260 |
261 | ## The multivariate (i.e. vector) Gaussian
262 |
263 | A *multivariate* random variable, i.e. a **vector**, is simply a collection of scalar variables (on the same probability space).
264 | Its distribution is the *joint* distribution of its components.
265 | The pdf of the multivariate Gaussian (for any dimension $\ge 1$) is
266 |
267 | $$\large \NormDist(\x \mid \mathbf{\mu}, \mathbf{\Sigma}) =
268 | |2 \pi \mathbf{\Sigma}|^{-1/2} \, \exp\Big(-\frac{1}{2}\|\x-\mathbf{\mu}\|^2_\mathbf{\Sigma} \Big) \,, \tag{GM} $$
269 | where $|.|$ represents the matrix determinant,
270 | and $\|.\|_\mathbf{W}$ represents a weighted 2-norm: $\|\x\|^2_\mathbf{W} = \x^T \mathbf{W}^{-1} \x$.
271 |
272 |
273 |
274 | $\mathbf{W}$ must be symmetric-positive-definite (SPD) because ... (optional reading 🔍)
275 |
276 |
277 | - The norm (a quadratic form) is invariant to any asymmetry in the weight matrix.
278 | - The density (GM) would not be integrable (over $\Reals^{\xDim}$) if $\x\tr \mathbf{\Sigma} \x > 0$.
279 |
280 | - - -
281 |
282 |
283 | It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1).
284 | Moreover, [as above](#Exc-(optional)----Integrals), it can be shown that
285 |
286 | - $\mathbf{\mu} = \Expect[\X]$,
287 | - $\mathbf{\Sigma} = \Expect[(\X-\mu)(\X-\mu)\tr]$,
288 |
289 | That is, the elements of $\mathbf{\Sigma}$ are the individual covariances:
290 | $\Sigma_{i,j} = \Expect[(X_i-\mu_i)(X_j-\mu_j)] =: \mathbb{Cov}(X_i, X_j)$.
291 | On the diagonal ($i=j$), they are variances: $\Sigma_{i,i} = \mathbb{Var}(X_i)$.
292 | Therefore $\mathbf{\Sigma}$ is called the *covariance matrix*.
293 |
294 | The following implements the pdf (GM). Take a moment to digest the code, but don't worry if you don't understand it all. Hints:
295 |
296 | - `@` produces matrix multiplication (`*` in `Matlab`);
297 | - `*` produces array multiplication (`.*` in `Matlab`);
298 | - `axis=-1` makes `np.sum()` work along the last dimension of an ND-array.
299 |
300 | ```python
301 | from numpy.linalg import det, inv
302 |
303 | def weighted_norm22(points, Wi):
304 | "Computes the weighted norm of each vector (row in `points`)."
305 | return np.sum( (points @ inv(Wi)) * points, axis=-1)
306 |
307 | def pdf_GM(points, mu, Sigma):
308 | "pdf -- Gaussian, Multivariate: N(x | mu, Sigma) for each x in `points`."
309 | c = np.sqrt(det(2*np.pi*Sigma))
310 | return 1/c * np.exp(-0.5*weighted_norm22(points - mu, Sigma))
311 | ```
312 |
313 | The following code plots the pdf as contour (level) curves.
314 |
315 | ```python
316 | grid2d = np.dstack(np.meshgrid(grid1d, grid1d))
317 |
318 | @interact(corr=(-1, 1, .001), std_x=(1e-5, 10, 1))
319 | def plot_pdf_G2(corr=0.7, std_x=1):
320 | # Form covariance matrix (C) from input and some constants
321 | var_x = std_x**2
322 | var_y = 1
323 | cv_xy = np.sqrt(var_x * var_y) * corr
324 | C = 25 * np.array([[var_x, cv_xy],
325 | [cv_xy, var_y]])
326 | # Evaluate (compute)
327 | density_values = pdf_GM(grid2d, mu=0, Sigma=C)
328 | # Plot
329 | plt.figure(figsize=(4, 4))
330 | height = 1/np.sqrt(det(2*np.pi*C))
331 | plt.contour(grid1d, grid1d, density_values,
332 | levels=np.linspace(1e-4, height, 11), cmap="plasma")
333 | plt.axis('equal');
334 | plt.show()
335 | ```
336 |
337 | The code defines the covariance `cv_xy` from the input ***correlation*** `corr`.
338 | This is a coefficient (number), defined for any two random variables $x$ and $y$ (not necessarily Gaussian) by
339 | $$ \rho[X,Y]=\frac{\mathbb{Cov}[X,Y]}{\sigma_x \sigma_y} \,.$$
340 | This correlation quantifies (defines) the ***linear dependence*** between $X$ and $Y$. Indeed,
341 |
342 | - $-1\leq \rho \leq 1$ (by Cauchy-Swartz)
343 | - **If** $X$ and $Y$ are *independent*, then $\rho[X,Y]=0$.
344 |
345 | **Exc -- Correlation influence:** How do the contours look? Try to understand why. Cases:
346 |
347 | - (a) correlation=0.
348 | - (b) correlation=0.99.
349 | - (c) correlation=0.5. (Note that we've used `plt.axis('equal')`).
350 | - (d) correlation=0.5, but with non-equal variances.
351 |
352 | Finally (optional): why does the code "crash" when `corr = +/- 1`? Is this a good or a bad thing?
353 |
354 | **Exc Correlation game:** [Play](http://guessthecorrelation.com/) until you get a score (gold coins) of 5 or more.
355 |
356 | **Exc -- Correlation disambiguation:**
357 |
358 | - What's the difference between correlation and covariance (in words)?
359 | - What's the difference between non-zero (C) correlation (or covariance) and (D) dependence?
360 | *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).*
361 | - Does $C \Rightarrow D$ or the converse?
362 | - What about the negation, $\neg D \Rightarrow \neg C$, or its converse?*
363 | - What about the (jointly) Gaussian case?
364 | - Does correlation (or dependence) imply causation?
365 | - Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other.
366 | Does information about $y$ give you information about $x$?
367 |
368 | **Exc (optional) -- Gaussian ubiquity:** Why are we so fond of the Gaussian assumption?
369 |
370 | ```python
371 | # show_answer('Why Gaussian')
372 | ```
373 |
374 | ## Summary
375 |
376 | The Normal/Gaussian distribution is bell-shaped.
377 | Its parameters are the mean and the variance.
378 | In the multivariate case, the mean is a vector,
379 | while the second parameter becomes a covariance *matrix*,
380 | whose off-diagonal elements represent scaled correlation factors,
381 | which measure *linear* dependence.
382 |
383 | ### Next: [T3 - Bayesian inference](T3%20-%20Bayesian%20inference.ipynb)
384 |
385 |
386 |
387 | ### References
388 |
389 | - **Laplace (1812)**: P. S. Laplace, "Théorie Analytique des Probabilités", 1812.
390 | - **Gauss (1809)**: Gauss, C. F. (1809). *Theoria Motus Corporum Coelestium in Sectionibus Conicis Solem Ambientium*. Specifically, Book II, Section 3, Art. 177-179, where he presents the method of least squares (which will be very relevant to us) and its probabilistic justification based on the normal distribution of errors.
391 |
--------------------------------------------------------------------------------
/notebooks/scripts/T2 - Gaussian distribution.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # jupyter:
3 | # jupytext:
4 | # formats: ipynb,scripts//py:light,scripts//md
5 | # text_representation:
6 | # extension: .py
7 | # format_name: light
8 | # format_version: '1.5'
9 | # jupytext_version: 1.17.2
10 | # kernelspec:
11 | # display_name: Python 3 (ipykernel)
12 | # language: python
13 | # name: python3
14 | # ---
15 |
16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
18 |
19 | from resources import show_answer, interact
20 | # %matplotlib inline
21 | import numpy as np
22 | import scipy as sp
23 | import matplotlib.pyplot as plt
24 | plt.ion();
25 |
26 |
27 | # # T2 - The Gaussian (Normal) distribution
28 | #
29 | # We begin by reviewing the most useful of probability distributions.
30 | # But first, let's refresh some basic theory.
31 | # $
32 | # \newcommand{\Reals}{\mathbb{R}}
33 | # \newcommand{\Expect}[0]{\mathbb{E}}
34 | # \newcommand{\NormDist}{\mathscr{N}}
35 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}}
36 | # \newcommand{\bvec}[1]{{\mathbf{#1}}}
37 | # \newcommand{\trsign}{{\mathsf{T}}}
38 | # \newcommand{\tr}{^{\trsign}}
39 | # \newcommand{\xDim}[0]{D}
40 | # \newcommand{\x}[0]{\bvec{x}}
41 | # \newcommand{\X}[0]{\mat{X}}
42 | # $
43 | #
44 | # ## Probability essentials
45 | #
46 | # As stated by James Bernoulli (1713) and elucidated by [Laplace (1812)](#References):
47 | #
48 | # > The Probability for an event is the ratio of the number of cases favorable to it, to the number of all
49 | # > cases possible when nothing leads us to expect that any one of these cases should occur more than any other,
50 | # > which renders them, for us, equally possible:
51 | #
52 | # $$ \mathbb{P}(\text{event}) = \frac{\text{number of} \textit{ favorable } \text{outcomes}}{\text{number of} \textit{ possible } \text{outcomes}} $$
53 | #
54 | # A **random variable** is a *quantity* taking random values, described in terms of **distributions**.
55 | #
56 | # - A *discrete* random variable, $X$, has a probability *mass* function (**pmf**) defined by $p(x) = \mathbb{P}(X{=}x)$.
57 | # Sometimes we write $p_X(x)$ to distinguish it from $p_Y(y)$.
58 | # - The *joint* probability of two random variables $X$ and $Y$ is defined by their intersection:
59 | # $p(x, y) = \mathbb{P}(X{=}x \cap Y{=}y)$.
60 | # - The *marginal* $p(x)$ is obtained by summing over all $y$, and vice versa.
61 | # - The *conditional* probability of $X$ *given* $y$ is $p(x|y) = \frac{p(x,y)}{p(y)}$.
62 | # - *Independence* means $p(x,y) = p(x) \, p(y)$ for all $x, y$.
63 | # - The cumulative distribution function (**cdf**) is defined as $F(x) = \mathbb{P}(X \le x)$.
64 | #
65 | # We will mainly be concerned with *continuous* random variables.
66 | # Their probability *density* function (**pdf**) can be defined as $p(x) = F'(x)$ or, equivalently,
67 | #
68 | # $$p(x) = \lim_{h \to 0} \frac{\mathbb{P}(X \in [x,\, x{+} h])}{h} \,.$$
69 | #
70 | # The **sample average** of draws from a random variable $X$
71 | # is denoted with an overhead bar:
72 | # $$ \bar{x} := \frac{1}{N} \sum_{n=1}^{N} x_n \,. $$
73 | # By the *law of large numbers (LLN)*, the sample average converges as $N \to \infty$ to the **expected value** (sometimes called the **mean**):
74 | # $$ \Expect[X] ≔ \int x \, p(x) \, d x \,, $$
75 | # where the (omitted) domain of integration is *all values of $x$*.
76 | # Two important properties follow immediately:
77 | #
78 | # - *Linearity*: $\Expect[aX + Y] = a \Expect[X] + \Expect[Y]$.
79 | # - *Total expectation*: $\Expect[\Expect[X|Y]] = \Expect[X]$.
80 | #
81 | # ## The univariate (a.k.a. 1-dimensional, scalar) Gaussian
82 | #
83 | # If $X$ is Gaussian (also known as "Normal"), we write
84 | # $X \sim \NormDist(\mu, \sigma^2)$, or $p(x) = \NormDist(x \mid \mu, \sigma^2)$,
85 | # where the parameters $\mu$ and $\sigma^2$ are called the mean and variance
86 | # (for reasons that will become clear below).
87 | # The Gaussian pdf, for $x \in (-\infty, +\infty)$, is
88 | # $$ \large \NormDist(x \mid \mu, \sigma^2) = (2 \pi \sigma^2)^{-1/2} e^{-(x-\mu)^2/2 \sigma^2} \, . \tag{G1} $$
89 | #
90 | # Run the cell below to define a function to compute the pdf (G1) using the `scipy` library.
91 |
92 | def pdf_G1(x, mu, sigma2):
93 | "Univariate Gaussian pdf"
94 | pdf_values = sp.stats.norm.pdf(x, loc=mu, scale=np.sqrt(sigma2))
95 | return pdf_values
96 |
97 |
98 | # Computers typically represent functions *numerically* by their values at a set of grid points (nodes),
99 | # an approach called ***discretisation***.
100 |
101 | bounds = -20, 20
102 | N = 201 # num of grid points
103 | grid1d = np.linspace(*bounds,N) # grid
104 | dx = grid1d[1] - grid1d[0] # grid spacing
105 |
106 | # Feel free to return here later and change the grid resolution to see how
107 | # it affects the cells below (after re-running them).
108 | #
109 | # The following code plots the Gaussian pdf.
110 |
111 | hist = []
112 | @interact(mu=bounds, sigma=(.1, 10, 1))
113 | def plot_pdf(mu=0, sigma=5):
114 | plt.figure(figsize=(6, 2))
115 | colors = plt.get_cmap('hsv')([(k-len(hist))%9/9 for k in range(9)])
116 | plt.xlim(*bounds)
117 | plt.ylim(0, .2)
118 | hist.insert(0, pdf_G1(grid1d, mu, sigma**2))
119 | for density_values, color in zip(hist, colors):
120 | plt.plot(grid1d, density_values, c=color)
121 | plt.show()
122 |
123 |
124 | # #### Exc -- parameter influence
125 | #
126 | # Experiment with `mu` and `sigma` to answer these questions:
127 | #
128 | # - How does the pdf curve change when `mu` changes? (Several options may be correct or incorrect)
129 | #
130 | # 1. It changes the curve into a uniform distribution.
131 | # 1. It changes the width of the curve.
132 | # 1. It shifts the peak of the curve to the left or right.
133 | # 1. It changes the height of the curve.
134 | # 1. It transforms the curve into a binomial distribution.
135 | # 1. It makes the curve wider or narrower.
136 | # 1. It modifies the skewness (asymmetry) of the curve.
137 | # 1. It causes the curve to expand vertically while keeping the width the same.
138 | # 1. It translates the curve horizontally.
139 | # 1. It alters the kurtosis (peakedness) of the curve.
140 | # 1. It rotates the curve around the origin.
141 | # 1. It makes the curve a straight line.
142 | # - How does the pdf curve change when you increase `sigma`?
143 | # Refer to the same options as the previous question.
144 | # - In a few words, describe the shape of the Gaussian pdf curve.
145 | # Does this remind you of anything? *Hint: it should be clear as a bell!*
146 | #
147 | # **Exc -- Implementation:** Change the implementation of `pdf_G1` so that it does not use `scipy`, but instead uses your own code (with `numpy` only). Re-run all of the above cells and check that you get the same plots as before.
148 | # *Hint: `**` is the exponentiation/power operator, but $e^x$ is more efficiently computed with `np.exp(x)`*
149 |
150 | # +
151 | # show_answer('pdf_G1')
152 | # -
153 |
154 | # **Exc -- Derivatives:** Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1).
155 | # Use pen, paper, and calculus to answer the following questions,
156 | # which will help you remember some key properties of the distribution.
157 | #
158 | # - (i) Find $x$ such that $p(x) = 0$.
159 | # - (ii) Where is the location of the **mode (maximum)** of the density?
160 | # I.e. find $x$ such that $\frac{d p}{d x}(x) = 0$.
161 | # *Hint: begin by writing $p(x)$ as $c e^{- J(x)}$ for some $J(x)$.*
162 | # - (iii) Where is the **inflection point**? I.e. where $\frac{d^2 p}{d x^2}(x) = 0$.
163 | # - (iv) *Optional*: Some forms of *sensitivity analysis* (typically for non-Gaussian $p$) consist in estimating/approximating the Hessian, i.e. $\frac{d^2 \log p}{d x^2}$. Explain what this has to do with *uncertainty quantification*.
164 | #
165 | #
166 | #
167 | # #### Exc (optional) -- Change of variables
168 | #
169 | # Let $Z = \phi(X)$ for some monotonic function $\phi$,
170 | # and let $p_x$ and $p_z$ be their probability density functions (pdf).
171 | #
172 | # - (a): Show that $p_z(z) = p_x\big(\phi^{-1}(z)\big) \frac{1}{|\phi'(z)|}$,
173 | # - (b): Show that you don't need to derive the density of $z$ in order to compute its expectation, i.e. that
174 | # $$ \Expect[Z] = \int \phi(x) \, p_x(x) \, d x ≕ \Expect[\phi(x)] \,,$$
175 | # *Hint: while the proof is convoluted, the result itself is [pretty intuitive](https://en.wikipedia.org/wiki/Law_of_the_unconscious_statistician).*
176 |
177 | # +
178 | # show_answer('CVar in proba')
179 | # -
180 |
181 | #
182 | #
183 | # #### Exc (optional) -- Integrals
184 | #
185 | # Recall $p(x) = \NormDist(x \mid \mu, \sigma^2)$ from eqn. (G1). Abbreviate it as $c = (2 \pi \sigma^2)^{-1/2}$.
186 | # Use pen, paper, and calculus to show that
187 | #
188 | # - (i) the first parameter, $\mu$, indicates its **mean**, i.e. that $$\mu = \Expect[X] \,.$$
189 | # *Hint: you can rely on the result of (iii)*
190 | # - (ii) the second parameter, $\sigma^2>0$, indicates its **variance**,
191 | # i.e. that $$\sigma^2 = \mathbb{Var}(X) \mathrel{≔} \Expect[(X-\mu)^2] \,.$$
192 | # *Hint: use $x^2 = x x$ to enable integration by parts.*
193 | # - (iii) $E[1] = 1$,
194 | # thus proving that (G1) indeed uses the right normalising constant.
195 | # *Hint: Neither Bernoulli and Laplace managed this,
196 | # until [Gauss (1809)](#References) did by first deriving $(E[1])^2$.
197 | # For more (visual) help, watch [3Blue1Brown](https://www.youtube.com/watch?v=cy8r7WSuT1I&t=3m52s).*
198 |
199 | # +
200 | # show_answer('Gauss integrals')
201 | # -
202 |
203 | # **Exc (optional) -- Riemann sums**:
204 | # Recall that integrals compute the "area under the curve".
205 | # On a discrete grid, they can be approximated using the [Trapezoidal rule](https://en.wikipedia.org/wiki/Riemann_sum#Trapezoidal_rule).
206 | #
207 | # - (a) Replace the prefab code below with your own implementation, using `sum()`,
208 | # to compute the mean and variance of a pdf represented on a grid.
209 | # - (b) Use `np.trapezoid` to compute the probability that a scalar Gaussian $X$ lies within $1$ standard deviation of its mean.
210 | # *Hint: the numerical answer you should find is $\mathbb{P}(X \in [\mu {-} \sigma, \mu {+} \sigma]) \approx 68\%$.*
211 |
212 | # +
213 | def mean_and_var(pdf_values, grid):
214 | f, x = pdf_values, grid
215 | mu = np.trapezoid(f*x, x)
216 | s2 = np.trapezoid(f*(x-mu)**2, x)
217 | return mu, s2
218 |
219 | mu, sigma = 0, 2 # example
220 | pdf_vals = pdf_G1(grid1d, mu=mu, sigma2=sigma**2)
221 | 'Should equal mu and sigma2: %f, %f' % mean_and_var(pdf_vals, grid1d)
222 |
223 |
224 | # +
225 | # show_answer('Riemann sums', 'a')
226 | # -
227 |
228 | # **Exc -- The uniform pdf**:
229 | # Below is the pdf of the [uniform/flat/box distribution](https://en.wikipedia.org/wiki/Uniform_distribution_(continuous))
230 | # for a given mean and variance.
231 | #
232 | # - Use `mean_and_var()` to verify `pdf_U1` (as is).
233 | # - Replace `_G1` with `_U1` in the code generating the above interactive plot.
234 | # - Why are the walls (ever so slightly) inclined?
235 | # - Write your own implementation below, and check that it reproduces the `scipy` version already in place.
236 |
237 | def pdf_U1(x, mu, sigma2):
238 | a = mu - np.sqrt(3*sigma2)
239 | b = mu + np.sqrt(3*sigma2)
240 | pdf_values = sp.stats.uniform(loc=a, scale=(b-a)).pdf(x)
241 | # Your own implementation:
242 | # height = ...
243 | # pdf_values = height * np.ones_like(x)
244 | # pdf_values[xb] = ...
246 | return pdf_values
247 |
248 |
249 | # +
250 | # show_answer('pdf_U1')
251 | # -
252 |
253 | # ## The multivariate (i.e. vector) Gaussian
254 | #
255 | # A *multivariate* random variable, i.e. a **vector**, is simply a collection of scalar variables (on the same probability space).
256 | # Its distribution is the *joint* distribution of its components.
257 | # The pdf of the multivariate Gaussian (for any dimension $\ge 1$) is
258 | #
259 | # $$\large \NormDist(\x \mid \mathbf{\mu}, \mathbf{\Sigma}) =
260 | # |2 \pi \mathbf{\Sigma}|^{-1/2} \, \exp\Big(-\frac{1}{2}\|\x-\mathbf{\mu}\|^2_\mathbf{\Sigma} \Big) \,, \tag{GM} $$
261 | # where $|.|$ represents the matrix determinant,
262 | # and $\|.\|_\mathbf{W}$ represents a weighted 2-norm: $\|\x\|^2_\mathbf{W} = \x^T \mathbf{W}^{-1} \x$.
263 | #
264 | #
265 | #
266 | # $\mathbf{W}$ must be symmetric-positive-definite (SPD) because ... (optional reading 🔍)
267 | #
268 | #
269 | # - The norm (a quadratic form) is invariant to any asymmetry in the weight matrix.
270 | # - The density (GM) would not be integrable (over $\Reals^{\xDim}$) if $\x\tr \mathbf{\Sigma} \x > 0$.
271 | #
272 | # - - -
273 | #
274 | #
275 | # It is important to recognize how similar eqn. (GM) is to the univariate (scalar) case (G1).
276 | # Moreover, [as above](#Exc-(optional)----Integrals), it can be shown that
277 | #
278 | # - $\mathbf{\mu} = \Expect[\X]$,
279 | # - $\mathbf{\Sigma} = \Expect[(\X-\mu)(\X-\mu)\tr]$,
280 | #
281 | # That is, the elements of $\mathbf{\Sigma}$ are the individual covariances:
282 | # $\Sigma_{i,j} = \Expect[(X_i-\mu_i)(X_j-\mu_j)] =: \mathbb{Cov}(X_i, X_j)$.
283 | # On the diagonal ($i=j$), they are variances: $\Sigma_{i,i} = \mathbb{Var}(X_i)$.
284 | # Therefore $\mathbf{\Sigma}$ is called the *covariance matrix*.
285 | #
286 | # The following implements the pdf (GM). Take a moment to digest the code, but don't worry if you don't understand it all. Hints:
287 | #
288 | # - `@` produces matrix multiplication (`*` in `Matlab`);
289 | # - `*` produces array multiplication (`.*` in `Matlab`);
290 | # - `axis=-1` makes `np.sum()` work along the last dimension of an ND-array.
291 |
292 | # +
293 | from numpy.linalg import det, inv
294 |
295 | def weighted_norm22(points, Wi):
296 | "Computes the weighted norm of each vector (row in `points`)."
297 | return np.sum( (points @ inv(Wi)) * points, axis=-1)
298 |
299 | def pdf_GM(points, mu, Sigma):
300 | "pdf -- Gaussian, Multivariate: N(x | mu, Sigma) for each x in `points`."
301 | c = np.sqrt(det(2*np.pi*Sigma))
302 | return 1/c * np.exp(-0.5*weighted_norm22(points - mu, Sigma))
303 |
304 |
305 | # -
306 |
307 | # The following code plots the pdf as contour (level) curves.
308 |
309 | # +
310 | grid2d = np.dstack(np.meshgrid(grid1d, grid1d))
311 |
312 | @interact(corr=(-1, 1, .001), std_x=(1e-5, 10, 1))
313 | def plot_pdf_G2(corr=0.7, std_x=1):
314 | # Form covariance matrix (C) from input and some constants
315 | var_x = std_x**2
316 | var_y = 1
317 | cv_xy = np.sqrt(var_x * var_y) * corr
318 | C = 25 * np.array([[var_x, cv_xy],
319 | [cv_xy, var_y]])
320 | # Evaluate (compute)
321 | density_values = pdf_GM(grid2d, mu=0, Sigma=C)
322 | # Plot
323 | plt.figure(figsize=(4, 4))
324 | height = 1/np.sqrt(det(2*np.pi*C))
325 | plt.contour(grid1d, grid1d, density_values,
326 | levels=np.linspace(1e-4, height, 11), cmap="plasma")
327 | plt.axis('equal');
328 | plt.show()
329 | # -
330 |
331 | # The code defines the covariance `cv_xy` from the input ***correlation*** `corr`.
332 | # This is a coefficient (number), defined for any two random variables $x$ and $y$ (not necessarily Gaussian) by
333 | # $$ \rho[X,Y]=\frac{\mathbb{Cov}[X,Y]}{\sigma_x \sigma_y} \,.$$
334 | # This correlation quantifies (defines) the ***linear dependence*** between $X$ and $Y$. Indeed,
335 | #
336 | # - $-1\leq \rho \leq 1$ (by Cauchy-Swartz)
337 | # - **If** $X$ and $Y$ are *independent*, then $\rho[X,Y]=0$.
338 | #
339 | # **Exc -- Correlation influence:** How do the contours look? Try to understand why. Cases:
340 | #
341 | # - (a) correlation=0.
342 | # - (b) correlation=0.99.
343 | # - (c) correlation=0.5. (Note that we've used `plt.axis('equal')`).
344 | # - (d) correlation=0.5, but with non-equal variances.
345 | #
346 | # Finally (optional): why does the code "crash" when `corr = +/- 1`? Is this a good or a bad thing?
347 | #
348 | # **Exc Correlation game:** [Play](http://guessthecorrelation.com/) until you get a score (gold coins) of 5 or more.
349 | #
350 | # **Exc -- Correlation disambiguation:**
351 | #
352 | # - What's the difference between correlation and covariance (in words)?
353 | # - What's the difference between non-zero (C) correlation (or covariance) and (D) dependence?
354 | # *Hint: consider this [image](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#/media/File:Correlation_examples2.svg).*
355 | # - Does $C \Rightarrow D$ or the converse?
356 | # - What about the negation, $\neg D \Rightarrow \neg C$, or its converse?*
357 | # - What about the (jointly) Gaussian case?
358 | # - Does correlation (or dependence) imply causation?
359 | # - Suppose $x$ and $y$ have non-zero correlation, but neither one causes the other.
360 | # Does information about $y$ give you information about $x$?
361 | #
362 | # **Exc (optional) -- Gaussian ubiquity:** Why are we so fond of the Gaussian assumption?
363 |
364 | # +
365 | # show_answer('Why Gaussian')
366 | # -
367 |
368 | # ## Summary
369 | #
370 | # The Normal/Gaussian distribution is bell-shaped.
371 | # Its parameters are the mean and the variance.
372 | # In the multivariate case, the mean is a vector,
373 | # while the second parameter becomes a covariance *matrix*,
374 | # whose off-diagonal elements represent scaled correlation factors,
375 | # which measure *linear* dependence.
376 | #
377 | # ### Next: [T3 - Bayesian inference](T3%20-%20Bayesian%20inference.ipynb)
378 | #
379 | #
380 | #
381 | # ### References
382 | #
383 | # - **Laplace (1812)**: P. S. Laplace, "Théorie Analytique des Probabilités", 1812.
384 | # - **Gauss (1809)**: Gauss, C. F. (1809). *Theoria Motus Corporum Coelestium in Sectionibus Conicis Solem Ambientium*. Specifically, Book II, Section 3, Art. 177-179, where he presents the method of least squares (which will be very relevant to us) and its probabilistic justification based on the normal distribution of errors.
385 |
--------------------------------------------------------------------------------
/notebooks/scripts/T4 - Time series filtering.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | formats: ipynb,scripts//py:light,scripts//md
5 | text_representation:
6 | extension: .md
7 | format_name: markdown
8 | format_version: '1.3'
9 | jupytext_version: 1.17.2
10 | kernelspec:
11 | display_name: Python 3 (ipykernel)
12 | language: python
13 | name: python3
14 | ---
15 |
16 | ```python
17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
19 | ```
20 |
21 | ```python
22 | from resources import show_answer, interact, cInterval
23 | %matplotlib inline
24 | import numpy as np
25 | import numpy.random as rnd
26 | import matplotlib.pyplot as plt
27 | plt.ion();
28 | ```
29 |
30 | # T4 - Time series filtering
31 |
32 | Before exploring the full (multivariate) Kalman filter (KF),
33 | let's first consider scalar but time-dependent (temporal/sequential) problems.
34 | $
35 | \newcommand{\Expect}[0]{\mathbb{E}}
36 | \newcommand{\NormDist}{\mathscr{N}}
37 | \newcommand{\DynMod}[0]{\mathscr{M}}
38 | \newcommand{\ObsMod}[0]{\mathscr{H}}
39 | \newcommand{\mat}[1]{{\mathbf{{#1}}}}
40 | \newcommand{\bvec}[1]{{\mathbf{#1}}}
41 | \newcommand{\supa}[0]{^\text{a}}
42 | \newcommand{\supf}[0]{^\text{f}}
43 | $
44 |
45 | Consider the scalar, stochastic process $\{x_k\}$,
46 | generated for sequentially increasing time index $k$ by
47 |
48 | $$ x_{k+1} = \DynMod_k x_k + q_k \,. \tag{DynMod} $$
49 |
50 | For our present purposes, the **dynamical "model"** $\DynMod_k$ is simply a known number.
51 | Suppose we get observations $\{y_k\}$ as in:
52 |
53 | $$ y_k = \ObsMod_k x_k + r_k \,, \tag{ObsMod} $$
54 |
55 | The noises and $x_0$ are assumed to be independent of each other and across time
56 | (i.e., $\varepsilon_k$ is independent of $\varepsilon_l$ for $k \neq l$),
57 | and Gaussian with known parameters:
58 | $$x_0 \sim \NormDist(x\supa_0, P\supa_0),\quad
59 | q_k \sim \NormDist(0, Q_k),\quad
60 | r_k \sim \NormDist(0, R_k) \,.$$
61 |
62 |
63 |
64 | ## Example problem: AR(1)
65 |
66 | For simplicity (though the KF does not require these assumptions),
67 | suppose that $\DynMod_k = \DynMod$, i.e., it is constant in time.
68 | Then $\{x_k\}$ forms a so-called order-1 auto-regressive process [[Wikipedia](https://en.wikipedia.org/wiki/Autoregressive_model#Example:_An_AR(1)_process)].
69 | Similarly, we drop the time dependence (subscript $k$) from $\ObsMod_k, Q_k, R_k$.
70 | The code below simulates a random realization of this process.
71 |
72 | ```python
73 | # Use H=1 so that it makes sense to plot data on the same axes as the state.
74 | H = 1
75 |
76 | # Initial estimate
77 | xa = 0 # mean
78 | Pa = 10 # variance
79 |
80 | def simulate(nTime, xa, Pa, M, H, Q, R):
81 | """Simulate synthetic truth (x) and observations (y)."""
82 | x = xa + np.sqrt(Pa)*rnd.randn() # Draw initial condition
83 | truths = np.zeros(nTime) # Allocate
84 | obsrvs = np.zeros(nTime) # Allocate
85 | for k in range(nTime): # Loop in time
86 | x = M * x + np.sqrt(Q)*rnd.randn() # Dynamics
87 | y = H * x + np.sqrt(R)*rnd.randn() # Measurement
88 | truths[k] = x # Assign
89 | obsrvs[k] = y # Assign
90 | return truths, obsrvs
91 | ```
92 |
93 | The following code plots the process. *You don't need to read or understand it*.
94 |
95 | ```python
96 | @interact(seed=(1, 12), M=(0, 1.03, .01), nTime=(0, 100),
97 | logR=(-9, 9), logR_bias=(-9, 9),
98 | logQ=(-9, 9), logQ_bias=(-9, 9))
99 | def exprmt(seed=4, nTime=50, M=0.97, logR=1, logQ=1, analyses_only=False, logR_bias=0, logQ_bias=0):
100 | R, Q, Q_bias, R_bias = 4.0**np.array([logR, logQ, logQ_bias, logR_bias])
101 |
102 | rnd.seed(seed)
103 | truths, obsrvs = simulate(nTime, xa, Pa, M, H, Q, R)
104 |
105 | plt.figure(figsize=(9, 6))
106 | kk = 1 + np.arange(nTime)
107 | plt.plot(kk, truths, 'k' , label='True state ($x$)')
108 | plt.plot(kk, obsrvs, 'g*', label='Noisy obs ($y$)', ms=9)
109 |
110 | try:
111 | estimates, variances = KF(nTime, xa, Pa, M, H, Q*Q_bias, R*R_bias, obsrvs)
112 | if analyses_only:
113 | plt.plot(kk, estimates[:, 1], label=r'Kalman$^a$ ± 1$\sigma$')
114 | plt.fill_between(kk, *cInterval(estimates[:, 1], variances[:, 1]), alpha=.2)
115 | else:
116 | kk2 = kk.repeat(2)
117 | plt.plot(kk2, estimates.flatten(), label=r'Kalman ± 1$\sigma$')
118 | plt.fill_between(kk2, *cInterval(estimates, variances), alpha=.2)
119 | except NameError:
120 | pass
121 |
122 | sigproc = {}
123 | ### INSERT ANSWER TO EXC "signal processing" HERE ###
124 | # sigproc['some method'] = ...
125 | for method, estimate in sigproc.items():
126 | plt.plot(kk[:len(estimate)], estimate, label=method)
127 |
128 | plt.xlabel('Time index (k)')
129 | plt.legend(loc='upper left')
130 | plt.axhline(0, c='k', lw=1, ls='--')
131 | plt.show()
132 | ```
133 |
134 | **Exc -- AR1 properties:** Answer the following.
135 |
136 | - What does `seed` control?
137 | - Explain what happens when `M=0`. Also consider $Q \rightarrow 0$.
138 | Can you give a name to this `truth` process,
139 | i.e. a link to the relevant Wikipedia page?
140 | What about when `M=1`?
141 | Describe the general nature of the process as `M` changes from 0 to 1.
142 | What about when `M>1`?
143 | - What happens when $R \rightarrow 0$ ?
144 | - What happens when $R \rightarrow \infty$ ?
145 |
146 | ```python
147 | # show_answer('AR1')
148 | ```
149 |
150 |
151 |
152 | ## The (univariate) Kalman filter (KF)
153 |
154 | Now we have a random variable that evolves in time, that we can *pretend* is unknown,
155 | in order to estimate (or "track") it.
156 | From above,
157 | $p(x_0) = \NormDist(x_0 | x\supa_0, P\supa_0)$ with given parameters.
158 | We also know that $x_k$ evolves according to eqn. (DynMod).
159 | Therefore, as shown in the following exercise,
160 | $p(x_1) = \NormDist(x_1 | x\supf_1, P\supf_1)$, with
161 | $$
162 | \begin{align}
163 | x\supf_k &= \DynMod \, x\supa_{k-1} \tag{5} \\
164 | P\supf_k &= \DynMod^2 \, P\supa_{k-1} + Q \tag{6}
165 | \end{align}
166 | $$
167 |
168 | Formulae (5) and (6) are called the **forecast step** of the KF.
169 | But when $y_1$ becomes available (according to eqn. (ObsMod)),
170 | we can update/condition our estimate of $x_1$, i.e., compute the posterior,
171 | $p(x_1 | y_1) = \NormDist(x_1 \mid x\supa_1, P\supa_1)$,
172 | using the formulae we developed for Bayes' rule with
173 | [Gaussian distributions](T3%20-%20Bayesian%20inference.ipynb#Linear-Gaussian-Bayes'-rule-(1D)).
174 |
175 | $$
176 | \begin{align}
177 | P\supa_k &= 1/(1/P\supf_k + \ObsMod^2/R) \,, \tag{7} \\\
178 | x\supa_k &= P\supa_k (x\supf/P\supf_k + \ObsMod y_k/R) \,. \tag{8}
179 | \end{align}
180 | $$
181 |
182 | This is called the **analysis step** of the KF.
183 | We call this the **analysis step** of the KF.
184 | We can subsequently apply the same two steps again
185 | to produce forecast and analysis estimates for the next time index, $k+1$.
186 | Note that if $k$ is a date index, then "yesterday's forecast becomes today's prior".
187 |
188 | #### Exc -- linear algebra of Gaussian random variables
189 |
190 | - (a) Show the linearity of the expectation operator:
191 | $\Expect [ \DynMod x + b ] = \DynMod \Expect[x] + b$, for some constant $b$.
192 | - (b) Thereby, show that $\mathbb{Var}[ \DynMod x + b ] = \DynMod^2 \mathbb{Var} [x]$.
193 | - (c) *Optional*: Now let $z = x + q$, with $x$ and $q$ independent and Gaussian.
194 | Then the pdf of this sum of random variables, $p_z(z)$, is given by convolution
195 | (hopefully this makes intuitive sense, at least in the discrete case):
196 | $$ p_z(z) = \int p_x(x) \, p_q(z - x) \, d x \,.$$
197 | Show that $z$ is also Gaussian,
198 | whose mean and variance are the sum of the means and variances (respectively).
199 | *Hint: you will need the result on [completing the square](T3%20-%20Bayesian%20inference.ipynb#Exc----BR-LG1),
200 | specifically the part that we did not make use of for Bayes' rule.
201 | If you get stuck, you can also view the excellent [`3blue1brown`](https://www.youtube.com/watch?v=d_qvLDhkg00&t=266s&ab_channel=3Blue1Brown) on the topic.*
202 |
203 | ```python
204 | # show_answer('Sum of Gaussians', 'a')
205 | ```
206 |
207 | #### The (general) Bayesian filtering recursions
208 |
209 | In the case of linearity and Gaussianity,
210 | the KF of eqns. (5)-(8) computes the *exact* Bayesian pdfs for $x_k$.
211 | But even without these assumptions,
212 | a general (abstract) Bayesian **recursive** procedure can still be formulated,
213 | relying only on the remaining ("hidden Markov model") assumptions.
214 |
215 | - The analysis "assimilates" $y_k$ to compute $p(x_k | y_{1:k})$,
216 | where $y_{1:k} = y_1, \ldots, y_k$ is shorthand notation.
217 | $$
218 | p(x_k | y_{1:k}) \propto p(y_k | x_k) \, p(x_k | x_{1:k-1})
219 | $$
220 | - The forecast "propagates" the estimate with its uncertainty
221 | to produce $p(x_{k+1}| y_{1:k})$.
222 | $$
223 | p(x_{k+1} | y_{1:k}) = \int p(x_{k+1} | x_k) \, p(x_k | y_{1:k}) \, d x_k
224 | $$
225 |
226 | It is important to appreciate the benefits of the recursive form of these computations:
227 | It reflects the recursiveness (Markov property) of nature:
228 | Both in the problem and our solution, time $k+1$ *builds on* time $k$,
229 | so we do not need to re-do the entire problem for each $k$.
230 | At every time $k$, we only deal with functions of one or two variables: $x_k$ and $x_{k+1}$,
231 | which is a much smaller space (for quantifying our densities or covariances)
232 | than that of the joint pdf $p(x_{1:k} | y_{1:k})$.
233 |
234 | Note, however, that our recursive procedure, called ***filtering***,
235 | does *not* compute $p(x_l | y_{1:k})$ for any $l < k$.
236 | In other words, any filtering estimate only contains *past* information.
237 | Updating estimates of the state at previous times is called ***smoothing***.
238 | However, for prediction/forecasting, filtering is all we need:
239 | accurate initial conditions (estimates of the present moment).
240 |
241 | #### Exc -- Implementation
242 |
243 | Below is a very rudimentary sequential estimator (not the KF!), which essentially just does "persistence" forecasts and sets the analysis estimates to the value of the observations (*which is only generally possible in this linear, scalar case*). Run its cell to define it, and then re-run the above interactive animation cell. Then:
244 |
245 | - Implement the KF properly by replacing the forecast and analysis steps below. *Re-run the cell.*
246 | - Try implementing the analysis step both in the "precision" and "gain" forms.
247 |
248 | ```python
249 | def KF(nTime, xa, Pa, M, H, Q, R, obsrvs):
250 | """Kalman filter. PS: (xa, Pa) should be input with *initial* values."""
251 | ############################
252 | # TEMPORARY IMPLEMENTATION #
253 | ############################
254 | estimates = np.zeros((nTime, 2))
255 | variances = np.zeros((nTime, 2))
256 | for k in range(nTime):
257 | # Forecast step
258 | xf = xa
259 | Pf = Pa
260 | # Analysis update step
261 | Pa = R / H**2
262 | xa = obsrvs[k] / H
263 | # Assign
264 | estimates[k] = xf, xa
265 | variances[k] = Pf, Pa
266 | return estimates, variances
267 | ```
268 |
269 | ```python
270 | # show_answer('KF1 code')
271 | ```
272 |
273 | #### Exc -- KF behaviour
274 |
275 | - Set `logQ` to its minimum, and `M=1`.
276 | We established in Exc "AR1" that the true states are now constant in time (but unknown).
277 | How does the KF fare in estimating it?
278 | Does its uncertainty variance ever reach 0?
279 | - What is the KF uncertainty variance in the case of `M=0`?
280 |
281 | ```python
282 | # show_answer('KF behaviour')
283 | ```
284 |
285 |
286 |
287 | #### Exc -- Temporal convergence
288 |
289 | In general, $\DynMod$, $\ObsMod$, $Q$, and $R$ depend on time, $k$
290 | (often to parameterize exogenous/outside factors/forces/conditions),
291 | and there are no limit values that the KF parameters converge to.
292 | But, we assumed that they are all stationary.
293 | In addition, suppose $Q=0$ and $\ObsMod = 1$.
294 | Show that
295 |
296 | - (a) $1/P\supa_k = 1/(\DynMod^2 P\supa_{k-1}) + 1/R$,
297 | by combining the forecast and analysis equations for the variance.
298 | - (b) $1/P\supa_k = 1/P\supa_0 + k/R$, if $\DynMod = 1$.
299 | - (c) $P\supa_{\infty} = 0$, if $\DynMod = 1$.
300 | - (d) $P\supa_{\infty} = 0$, if $\DynMod < 1$.
301 | - (e) $P\supa_{\infty} = R (1-1/\DynMod^2)$, if $\DynMod > 1$.
302 | *Hint: Look for the fixed point of the recursion of part (a).*
303 |
304 | ```python
305 | # show_answer('Asymptotic Riccati', 'a')
306 | ```
307 |
308 | **Exc (optional) -- Temporal CV, part 2:**
309 | Now we don't assume that $Q$ is zero. Instead
310 |
311 | - (a) Suppose $\DynMod = 0$. What does $P\supa_k$ equal?
312 | - (b) Suppose $\DynMod = 1$. Show that $P\supa_\infty$
313 | satisfies the quadratic equation: $0 = P^2 + Q P - Q R$.
314 | Thereby, without solving the quadratic equation, show that
315 | - (c) $P\supa_\infty \rightarrow R$ (from below) if $Q \rightarrow +\infty$.
316 | - (d) $P\supa_\infty \rightarrow \sqrt{ Q R}$ (from above) if $Q \rightarrow 0^+$.
317 |
318 | ```python
319 | # show_answer('Asymptotes when Q>0')
320 | ```
321 |
322 | #### Exc (optional) -- Analytic simplification in the case of an unknown constant
323 |
324 | - Note that in case $Q = 0$,
325 | then $x_{k+1} = \DynMod^k x_0$.
326 | - So if $\DynMod = 1$, then $x_k = x_0$, so we are estimating an unknown *constant*,
327 | and can drop its time index subscript.
328 | - For simplicity, assume $\ObsMod = 1$, and $P^a_0 \rightarrow +\infty$.
329 | - Then $p(x | y_{1:k}) \propto \exp \big\{- \sum_l \| y_l - x \|^2_R / 2 \big\}
330 | = \NormDist(x | \bar{y}, R/k )$, which again follows by completing the square.
331 | - In words, the (accumulated) posterior mean is the sample average,
332 | $\bar{y} = \frac{1}{k}\sum_l y_l$,
333 | and the variance is that of a single observation divided by $k$.
334 |
335 | Show that this is the same posterior that the KF recursions produce.
336 | *Hint: while this is straightforward for the variance,
337 | you will probably want to prove the mean using induction.*
338 |
339 | #### Exc -- Impact of biases
340 |
341 | Re-run the above interactive animation to set the default control values. Answer the following
342 |
343 | - `logR_bias`/`logQ_bias` control the (multiplicative) bias in $R$/$Q$ that is fed to the KF.
344 | What happens when the KF "thinks" the measurement/dynamical error
345 | is (much) smaller than it actually is?
346 | What about larger?
347 | - Re-run the animation to get default values.
348 | Set `logQ` to 0, which will make the following behaviour easier to describe.
349 | In the code, add 20 to the initial `xa` **given to the KF**.
350 | How long does it take for it to recover from this initial bias?
351 | - Multiply `Pa` **given to the KF** by 0.01. What about now?
352 | - Remove the previous biases.
353 | Instead, multiply `M` **given to the KF** by 2, and observe what happens.
354 | Try the same, but dividing `M` by 2.
355 |
356 | ```python
357 | # show_answer('KF with bias')
358 | ```
359 |
360 | ## Alternative methods
361 |
362 | When it comes to (especially univariate) time series analysis,
363 | the Kalman filter (KF) is not the only option.
364 | For example, **signal processing** offers several alternative filters.
365 | Indeed, the word "filter" in the KF comes from that domain,
366 | where it originally referred to removing high-frequency noise,
367 | since this often leads to a better estimate of the signal.
368 | We will not review signal processing theory here,
369 | but challenge you to make use of what `scipy` already has to offer.
370 |
371 | #### Exc (optional) -- signal processing
372 |
373 | Run the following cell to import and define some more tools.
374 |
375 | ```python
376 | import scipy as sp
377 | import scipy.signal as sig
378 | def nrmlz(x):
379 | return x / x.sum()
380 | def trunc(x, n):
381 | return np.pad(x[:n], (0, len(x)-n))
382 | ```
383 |
384 | Now try to "filter" the `obsrvs` to produce estimates of `truth`.
385 | For each method, add your estimate ("filtered signal" in signal processing parlance)
386 | to the `sigproc` dictionary in the interactive animation cell,
387 | using an appropriate name/key (this will automatically include it in the plot).
388 | Use
389 |
390 | - (a) [`sig.wiener`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.wiener.html).
391 | *PS: this is a direct ancestor of the KF*.
392 | - (b) a moving average, for example [`sig.windows.hamming`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.windows.hamming.html).
393 | *Hint: you may also want to use [`sig.convolve`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.convolve.html#scipy.signal.convolve)*.
394 | - (c) a low-pass filter using [`np.fft`](https://docs.scipy.org/doc/scipy/reference/fft.html#).
395 | *Hint: you may also want to use the above `trunc` function.*
396 | - (d) The [`sig.butter`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.butter.html) filter.
397 | *Hint: apply with [`sig.filtfilt`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.signal.filtfilt.html).*
398 | - (e) not really a signal processing method: [`sp.interpolate.UniveriateSpline`](https://docs.scipy.org/doc/scipy/reference/generated/scipy.interpolate.UnivariateSpline.html)
399 |
400 | The answers should be considered examples, not the uniquely right way.
401 |
402 | ```python
403 | # show_answer('signal processing', 'a')
404 | ```
405 |
406 | But for the above problem (which is linear-Gaussian!),
407 | the KF is guaranteed (on average, in the long run, in terms of mean square error)
408 | to outperform any other method.
409 | We will see cases later (in full-blown state estimation)
410 | where the difference is much clearer,
411 | and indeed it might not even be clear how to apply signal processing methods.
412 | However, the KF has an unfair advantage: we are giving it a lot of information
413 | about the problem (`M, H, R, Q`) that the signal processing methods do not have.
414 | Therefore, those methods typically require a good deal of tuning
415 | (but in practice, so does the KF, since `Q` and `R` are rarely well determined).
416 |
417 | ## Summary
418 |
419 | The Kalman filter (KF) can be derived by applying linear-Gaussian assumptions
420 | to a sequential inference problem.
421 | Generally, the uncertainty never converges to 0,
422 | and the performance of the filter depends entirely on
423 | accurate system parameters (models and error covariance matrices).
424 |
425 | As a subset of state estimation (i.e., the KF), we can do classical time series estimation
426 | [(wherein state-estimation is called the state-space approach)](https://www.google.co.uk/search?q=%22We+now+demonstrate+how+to+put+these+models+into+state+space+form%22&btnG=Search+Books&tbm=bks).
427 | Moreover, DA methods produce uncertainty quantification, which is usually more obscure with time series analysis methods.
428 |
429 | ### Next: [T5 - Multivariate Kalman filter](T5%20-%20Multivariate%20Kalman%20filter.ipynb)
430 |
431 |
432 |
433 | ### References
434 |
--------------------------------------------------------------------------------
/notebooks/scripts/T6 - Geostats & Kriging [optional].md:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | cell_metadata_filter: -all
5 | formats: ipynb,scripts//py:light,scripts//md
6 | text_representation:
7 | extension: .md
8 | format_name: markdown
9 | format_version: '1.3'
10 | jupytext_version: 1.17.2
11 | kernelspec:
12 | display_name: Python 3 (ipykernel)
13 | language: python
14 | name: python3
15 | ---
16 |
17 | ```python
18 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
19 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
20 | ```
21 |
22 | ```python
23 | from resources import show_answer, interact
24 | %matplotlib inline
25 | import numpy as np
26 | import matplotlib.pyplot as plt
27 | import numpy.random as rnd
28 | import scipy.linalg as sla
29 | from mpl_tools.misc import nRowCol
30 | from mpl_tools.place import freshfig
31 | plt.ion();
32 | ```
33 |
34 | # T6 - Spatial statistics ("geostatistics") & Kriging
35 |
36 | Covariances between two (or a few) variables is very well,
37 | but if you have not seen it before, the connection between covariances
38 | and geophysical (spatial) fields may not be obvious.
39 | The purpose of this tutorial is to familiarise you with random (spatial) fields
40 | and their estimation.
41 | $
42 | \newcommand{\mat}[1]{{\mathbf{{#1}}}}
43 | \newcommand{\bvec}[1]{{\mathbf{#1}}}
44 | $
45 |
46 | Set some parameters
47 |
48 | ```python
49 | rnd.seed(3000)
50 | grid1D = np.linspace(0, 1, 21)
51 | N = 15 # ensemble size
52 | ```
53 |
54 | ## Variograms
55 |
56 | The "Variogram" of a field is essentially `1 - autocovariance`. Thus, it describes the spatial dependence of the field. The mean (1st moment) of a field is usually estimated and described/parametrized with trend lines/surfaces, while higher moments are usually not worth modelling.
57 |
58 | ```python
59 | def variogram(dists, Range=1, kind="Gauss", nugget=0):
60 | """Compute variogram for distance points `dists`."""
61 | dists = dists / Range
62 | if kind == "Spheric":
63 | gamma = 1.5 * dists - .5 * dists**3
64 | gamma[dists >= 1] = 1
65 | elif kind == "Expo":
66 | dists *= 3 # by convention
67 | gamma = 1 - np.exp(-dists)
68 | else: # "Gauss"
69 | dists *= 3 # by convention
70 | gamma = 1 - np.exp(-(dists)**2)
71 | # Include nugget (discontinuity at 0)
72 | gamma *= (1-nugget)
73 | gamma[dists != 0] += nugget
74 | return gamma
75 | ```
76 |
77 | #### Plot
78 |
79 | ```python
80 | @interact(Range=(.01, 4), nugget=(0.0, 1, .1))
81 | def plot_variogram(Range=1, nugget=0):
82 | fig, ax = plt.subplots(figsize=(6, 3))
83 | ax.set_xlim(0, 1)
84 | ax.set_ylim(0, 1)
85 | for i, kind in enumerate(["Spheric", "Expo", "Gauss"]):
86 | gamma = variogram(grid1D, Range, kind, nugget=nugget)
87 | ax.plot(grid1D, gamma, lw=2, color=f"C{i}", label=kind)
88 | ax.legend(loc="upper left")
89 | plt.show()
90 | ```
91 |
92 | In order to apply the variogram, we must first compute distances.
93 | The following is a fairly efficient implementation.
94 |
95 | ```python
96 | def dist_euclid(A, B):
97 | """Compute the l2-norm between each point (row) of A and B"""
98 | diff = A[:, None, :] - B
99 | d2 = np.sum(diff**2, axis=-1)
100 | return np.sqrt(d2)
101 | ```
102 |
103 | Now the full covariance (matrix) between any sets of points can be defined by the following.
104 |
105 | ```python
106 | def covar(coords, **vg_params):
107 | dists = dist_euclid(coords, coords)
108 | return 1 - variogram(dists, **vg_params)
109 | ```
110 |
111 | ```python
112 | fig, ax = freshfig("1D covar")
113 | C = covar(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3)
114 | ax.matshow(C, cmap="RdBu");
115 | ```
116 |
117 | ## Random fields (1D)
118 |
119 | Gaussian random variables (vectors) are fully specified by their mean and covariance.
120 | Once in possession of a covariance matrix, we can use it to sample random variables
121 | by multiplying its Cholesky factor (square root) onto standard normal variables.
122 |
123 | ```python
124 | def gaussian_fields(coords, **vg_params):
125 | """Gen. random (Gaussian) fields at `coords` (no structure/ordering required)."""
126 | C = covar(coords, **vg_params)
127 | L = sla.cholesky(C)
128 | fields = L.T @ rnd.randn(len(L.T), N)
129 | return fields
130 | ```
131 |
132 | #### Exc
133 |
134 | Use the plotting functionality below to
135 | explain the effect of `Range` and `nugget`
136 |
137 | ```python
138 | fig, ax = freshfig("1D random fields")
139 | fields = gaussian_fields(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3)
140 | ax.plot(grid1D, fields, lw=2);
141 | ```
142 |
143 | ## Random fields (2D)
144 |
145 | The following sets up a 2d grid.
146 |
147 | ```python
148 | grid2x, grid2y = np.meshgrid(grid1D, grid1D)
149 | grid2x.shape
150 | ```
151 |
152 | where `grid2y` has the same shape. However, in the following we will "flatten" (a.k.a."(un)ravel", "vectorize", or "string out") this explicitly 2D grid of nodes into a simple list of points in 2D.
153 |
154 | ```python
155 | grid2D = np.dstack([grid2x, grid2y]).reshape((-1, 2))
156 | grid2D.shape
157 | ```
158 |
159 | Importantly, none of the following methods actually assume any structure to the list. So we could also work with a completely irregularly spaced set of points. For example, `gaussian_fields` is immediately applicable also to this 2D case.
160 |
161 | ```python
162 | vg_params = dict(Range=1, kind="Gauss", nugget=1e-4)
163 | fields = gaussian_fields(grid2D, **vg_params)
164 | ```
165 |
166 | Of course, for plotting purposes, we undo the flattening.
167 |
168 | ```python
169 | def contour_plot(ax, field, cmap="nipy_spectral", levels=12, has_obs=True):
170 | field = field.reshape(grid2x.shape) # undo flattening
171 | if has_obs:
172 | ax.plot(*obs_coo.T, "ko", ms=4)
173 | ax.plot(*obs_coo.T, "yo", ms=1)
174 | ax.set(aspect="equal", xticks=[0, 1], yticks=[0, 1])
175 | return ax.contourf(field, levels=levels, extent=(0, 1, 0, 1),
176 | cmap=cmap, vmin=vmin, vmax=vmax)
177 |
178 | # Fix the color scale for all subsequent `contour_plot`.
179 | # Use `None` to re-compute the color scale for each subplot.
180 | vmin = fields.min()
181 | vmax = fields.max()
182 | ```
183 |
184 | ```python
185 | fig, axs = freshfig(num="2D random fields", figsize=(5, 4),
186 | nrows=3, ncols=4, sharex=True, sharey=True)
187 |
188 | for ax, field in zip(axs.ravel(), fields.T):
189 | contour_plot(ax, field, has_obs=False)
190 | ```
191 |
192 | It might be interesting to inspect the covariance matrix in this 2D case.
193 |
194 | ```python
195 | C = covar(grid2D, **vg_params)
196 | fig, ax = freshfig("2D covar")
197 | ax.matshow(C, cmap="RdBu", vmin=0, vmax=1);
198 | ax.grid(False)
199 | ```
200 |
201 | ## Estimation problem
202 |
203 | For our estimation target we will use one of the above generated random fields.
204 |
205 | ```python
206 | truth = fields.T[0]
207 | ```
208 |
209 | For the observations, we pick some random grid locations for simplicity
210 | (even though the methods work also with observations not on grid nodes).
211 |
212 | ```python
213 | nObs = 10
214 | obs_idx = rnd.randint(0, len(grid2D), nObs)
215 | obs_coo = grid2D[obs_idx]
216 | observations = truth[obs_idx]
217 | ```
218 |
219 | ## Spatial interpolant methods
220 |
221 | ```python
222 | # Pre-compute re-used objects
223 | dists_yy = dist_euclid(obs_coo, obs_coo)
224 | dists_xy = dist_euclid(grid2D, obs_coo)
225 | ```
226 |
227 | ```python
228 | estims = dict(Truth=truth)
229 | vmin=truth.min()
230 | vmax=truth.max()
231 | ```
232 |
233 | The cells below contain snippets of different spatial interpolation methods,
234 | followed by a cell that plots the interpolants.
235 | Complete the code snippets.
236 |
237 | #### Exc: Nearest neighbour interpolation
238 |
239 | Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation).
240 |
241 | ```python
242 | nearest_obs = np.zeros_like(truth, dtype=int) ### FIX THIS ###
243 | estims["Nearest-n."] = observations[nearest_obs]
244 | ```
245 |
246 | ```python
247 | # show_answer('nearest neighbour interp')
248 | ```
249 |
250 | #### Exc: Inverse distance weighting
251 |
252 | Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Inverse_distance_weighting).
253 | *Hint: You can ignore the `errstate` line below. It is just used to "silence warnings" resulting from division by 0 (whose special case is treated in a cell further down).*
254 |
255 | ```python
256 | exponent = 3
257 | with np.errstate(invalid='ignore', divide='ignore'):
258 | weights = np.zeros_like(dists_xy) ### FIX THIS ###
259 | ```
260 |
261 | ```python
262 | # show_answer('inv-dist weight interp')
263 | ```
264 |
265 | ```python
266 | # Apply weights
267 | estims["Inv-dist."] = weights @ observations
268 | ```
269 |
270 | ```python
271 | # Fix singularities
272 | estims["Inv-dist."][obs_idx] = observations
273 | ```
274 |
275 | #### Exc: Simple Kriging
276 |
277 | Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Kriging#Simple_kriging).
278 |
279 | *Hint: use `sla.solve` or `sla.inv` (less recommended)*
280 |
281 | ```python
282 | ### ANSWER HERE ###
283 | covar_yy = ...
284 | cross_xy = ...
285 | regression_coefficients = weights ### FIX THIS ### -- should be cross_xy / covar_yy
286 | ```
287 |
288 | ```python
289 | # show_answer('Kriging code')
290 | ```
291 |
292 | ```python
293 | estims["Kriging"] = regression_coefficients @ observations
294 | ```
295 |
296 | ### Plot truth, estimates, error
297 |
298 | ```python
299 | fig, axs = freshfig(num="Estimation problem", figsize=(8, 4), squeeze=False,
300 | nrows=2, ncols=len(estims), sharex=True, sharey=True)
301 |
302 | for name, ax1, ax2 in zip(estims, *axs):
303 | ax1.set_title(name)
304 | c1 = contour_plot(ax1, estims[name])
305 | c2 = contour_plot(ax2, estims[name] - truth, cmap="RdBu")
306 | fig.tight_layout()
307 | fig.subplots_adjust(right=0.85)
308 | cbar = fig.colorbar(c1, cax=fig.add_axes([0.9, 0.15, 0.03, 0.7]))
309 | axs[1, 0].set_ylabel("Errors");
310 | ```
311 |
312 | #### Exc: Try different values of `Range`
313 |
314 | - Run code to re-compute Kriging estimate.
315 | - What does setting it to `0.1` cause? What about `100`?
316 |
317 | ```python
318 | @interact(Range=(.01, 40))
319 | def plot_krieged(Range=1):
320 | vg_params['Range'] = Range
321 | covar_yy = 1 - variogram(dists_yy, **vg_params)
322 | cross_xy = 1 - variogram(dists_xy, **vg_params)
323 | regression_coefficients = sla.solve(covar_yy, cross_xy.T).T
324 |
325 | fig, ax = freshfig(num="Kriging estimates")
326 | c1 = contour_plot(ax, regression_coefficients @ observations)
327 | fig.colorbar(c1);
328 | plt.show()
329 | ```
330 |
331 | #### Generalizations
332 |
333 | - Unknown mean (Ordinary Kriging)
334 | - Co-Kriging (vector-valued fields)
335 | - Trend surfaces (non-stationarity assumptions)
336 |
337 | ## Summary
338 |
339 | The covariances of random fields can sometimes be described by the autocorrelation function,
340 | or equivalently, the (semi-)variogram.
341 | Covariances form the basis of a family of (geo-)spatial interpolation and approximation
342 | methods known as Kriging, which can also be called/interpreted as
343 | **Radial basis function (RBF) interpolation**,
344 | **Gaussian process regression** (GP) regression.
345 |
346 | - Kriging is derived by minimizing the variance of linear and unbiased estimators.
347 | - RBF interpolation is derived by the explicit desire to fit
348 | N functions to N data points (observations).
349 | - GP regression is derived by conditioning (applying Bayes rule)
350 | to the (supposedly) Gaussian distribution of the random field.
351 |
352 | ### Next: [T7 - Chaos & Lorenz](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb)
353 |
--------------------------------------------------------------------------------
/notebooks/scripts/T6 - Geostats & Kriging [optional].py:
--------------------------------------------------------------------------------
1 | # ---
2 | # jupyter:
3 | # jupytext:
4 | # cell_metadata_filter: -all
5 | # formats: ipynb,scripts//py:light,scripts//md
6 | # text_representation:
7 | # extension: .py
8 | # format_name: light
9 | # format_version: '1.5'
10 | # jupytext_version: 1.17.2
11 | # kernelspec:
12 | # display_name: Python 3 (ipykernel)
13 | # language: python
14 | # name: python3
15 | # ---
16 |
17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
18 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
19 |
20 | from resources import show_answer, interact
21 | # %matplotlib inline
22 | import numpy as np
23 | import matplotlib.pyplot as plt
24 | import numpy.random as rnd
25 | import scipy.linalg as sla
26 | from mpl_tools.misc import nRowCol
27 | from mpl_tools.place import freshfig
28 | plt.ion();
29 |
30 | # # T6 - Spatial statistics ("geostatistics") & Kriging
31 | #
32 | # Covariances between two (or a few) variables is very well,
33 | # but if you have not seen it before, the connection between covariances
34 | # and geophysical (spatial) fields may not be obvious.
35 | # The purpose of this tutorial is to familiarise you with random (spatial) fields
36 | # and their estimation.
37 | # $
38 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}}
39 | # \newcommand{\bvec}[1]{{\mathbf{#1}}}
40 | # $
41 | #
42 | # Set some parameters
43 |
44 | rnd.seed(3000)
45 | grid1D = np.linspace(0, 1, 21)
46 | N = 15 # ensemble size
47 |
48 |
49 | # ## Variograms
50 | #
51 | # The "Variogram" of a field is essentially `1 - autocovariance`. Thus, it describes the spatial dependence of the field. The mean (1st moment) of a field is usually estimated and described/parametrized with trend lines/surfaces, while higher moments are usually not worth modelling.
52 |
53 | def variogram(dists, Range=1, kind="Gauss", nugget=0):
54 | """Compute variogram for distance points `dists`."""
55 | dists = dists / Range
56 | if kind == "Spheric":
57 | gamma = 1.5 * dists - .5 * dists**3
58 | gamma[dists >= 1] = 1
59 | elif kind == "Expo":
60 | dists *= 3 # by convention
61 | gamma = 1 - np.exp(-dists)
62 | else: # "Gauss"
63 | dists *= 3 # by convention
64 | gamma = 1 - np.exp(-(dists)**2)
65 | # Include nugget (discontinuity at 0)
66 | gamma *= (1-nugget)
67 | gamma[dists != 0] += nugget
68 | return gamma
69 |
70 |
71 | # #### Plot
72 |
73 | @interact(Range=(.01, 4), nugget=(0.0, 1, .1))
74 | def plot_variogram(Range=1, nugget=0):
75 | fig, ax = plt.subplots(figsize=(6, 3))
76 | ax.set_xlim(0, 1)
77 | ax.set_ylim(0, 1)
78 | for i, kind in enumerate(["Spheric", "Expo", "Gauss"]):
79 | gamma = variogram(grid1D, Range, kind, nugget=nugget)
80 | ax.plot(grid1D, gamma, lw=2, color=f"C{i}", label=kind)
81 | ax.legend(loc="upper left")
82 | plt.show()
83 |
84 |
85 | # In order to apply the variogram, we must first compute distances.
86 | # The following is a fairly efficient implementation.
87 |
88 | def dist_euclid(A, B):
89 | """Compute the l2-norm between each point (row) of A and B"""
90 | diff = A[:, None, :] - B
91 | d2 = np.sum(diff**2, axis=-1)
92 | return np.sqrt(d2)
93 |
94 |
95 | # Now the full covariance (matrix) between any sets of points can be defined by the following.
96 |
97 | def covar(coords, **vg_params):
98 | dists = dist_euclid(coords, coords)
99 | return 1 - variogram(dists, **vg_params)
100 |
101 |
102 | fig, ax = freshfig("1D covar")
103 | C = covar(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3)
104 | ax.matshow(C, cmap="RdBu");
105 |
106 |
107 | # ## Random fields (1D)
108 | #
109 | # Gaussian random variables (vectors) are fully specified by their mean and covariance.
110 | # Once in possession of a covariance matrix, we can use it to sample random variables
111 | # by multiplying its Cholesky factor (square root) onto standard normal variables.
112 |
113 | def gaussian_fields(coords, **vg_params):
114 | """Gen. random (Gaussian) fields at `coords` (no structure/ordering required)."""
115 | C = covar(coords, **vg_params)
116 | L = sla.cholesky(C)
117 | fields = L.T @ rnd.randn(len(L.T), N)
118 | return fields
119 |
120 |
121 | # #### Exc
122 | #
123 | # Use the plotting functionality below to
124 | # explain the effect of `Range` and `nugget`
125 |
126 | fig, ax = freshfig("1D random fields")
127 | fields = gaussian_fields(grid1D[:, None], Range=1, kind="Gauss", nugget=1e-3)
128 | ax.plot(grid1D, fields, lw=2);
129 |
130 | # ## Random fields (2D)
131 | #
132 | # The following sets up a 2d grid.
133 |
134 | grid2x, grid2y = np.meshgrid(grid1D, grid1D)
135 | grid2x.shape
136 |
137 | # where `grid2y` has the same shape. However, in the following we will "flatten" (a.k.a."(un)ravel", "vectorize", or "string out") this explicitly 2D grid of nodes into a simple list of points in 2D.
138 |
139 | grid2D = np.dstack([grid2x, grid2y]).reshape((-1, 2))
140 | grid2D.shape
141 |
142 | # Importantly, none of the following methods actually assume any structure to the list. So we could also work with a completely irregularly spaced set of points. For example, `gaussian_fields` is immediately applicable also to this 2D case.
143 |
144 | vg_params = dict(Range=1, kind="Gauss", nugget=1e-4)
145 | fields = gaussian_fields(grid2D, **vg_params)
146 |
147 |
148 | # Of course, for plotting purposes, we undo the flattening.
149 |
150 | # +
151 | def contour_plot(ax, field, cmap="nipy_spectral", levels=12, has_obs=True):
152 | field = field.reshape(grid2x.shape) # undo flattening
153 | if has_obs:
154 | ax.plot(*obs_coo.T, "ko", ms=4)
155 | ax.plot(*obs_coo.T, "yo", ms=1)
156 | ax.set(aspect="equal", xticks=[0, 1], yticks=[0, 1])
157 | return ax.contourf(field, levels=levels, extent=(0, 1, 0, 1),
158 | cmap=cmap, vmin=vmin, vmax=vmax)
159 |
160 | # Fix the color scale for all subsequent `contour_plot`.
161 | # Use `None` to re-compute the color scale for each subplot.
162 | vmin = fields.min()
163 | vmax = fields.max()
164 |
165 | # +
166 | fig, axs = freshfig(num="2D random fields", figsize=(5, 4),
167 | nrows=3, ncols=4, sharex=True, sharey=True)
168 |
169 | for ax, field in zip(axs.ravel(), fields.T):
170 | contour_plot(ax, field, has_obs=False)
171 | # -
172 |
173 | # It might be interesting to inspect the covariance matrix in this 2D case.
174 |
175 | C = covar(grid2D, **vg_params)
176 | fig, ax = freshfig("2D covar")
177 | ax.matshow(C, cmap="RdBu", vmin=0, vmax=1);
178 | ax.grid(False)
179 |
180 | # ## Estimation problem
181 | #
182 | # For our estimation target we will use one of the above generated random fields.
183 |
184 | truth = fields.T[0]
185 |
186 | # For the observations, we pick some random grid locations for simplicity
187 | # (even though the methods work also with observations not on grid nodes).
188 |
189 | nObs = 10
190 | obs_idx = rnd.randint(0, len(grid2D), nObs)
191 | obs_coo = grid2D[obs_idx]
192 | observations = truth[obs_idx]
193 |
194 | # ## Spatial interpolant methods
195 |
196 | # Pre-compute re-used objects
197 | dists_yy = dist_euclid(obs_coo, obs_coo)
198 | dists_xy = dist_euclid(grid2D, obs_coo)
199 |
200 | estims = dict(Truth=truth)
201 | vmin=truth.min()
202 | vmax=truth.max()
203 |
204 | # The cells below contain snippets of different spatial interpolation methods,
205 | # followed by a cell that plots the interpolants.
206 | # Complete the code snippets.
207 | #
208 | # #### Exc: Nearest neighbour interpolation
209 | #
210 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation).
211 |
212 | nearest_obs = np.zeros_like(truth, dtype=int) ### FIX THIS ###
213 | estims["Nearest-n."] = observations[nearest_obs]
214 |
215 | # +
216 | # show_answer('nearest neighbour interp')
217 | # -
218 |
219 | # #### Exc: Inverse distance weighting
220 | #
221 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Inverse_distance_weighting).
222 | # *Hint: You can ignore the `errstate` line below. It is just used to "silence warnings" resulting from division by 0 (whose special case is treated in a cell further down).*
223 |
224 | exponent = 3
225 | with np.errstate(invalid='ignore', divide='ignore'):
226 | weights = np.zeros_like(dists_xy) ### FIX THIS ###
227 |
228 | # +
229 | # show_answer('inv-dist weight interp')
230 | # -
231 |
232 | # Apply weights
233 | estims["Inv-dist."] = weights @ observations
234 |
235 | # Fix singularities
236 | estims["Inv-dist."][obs_idx] = observations
237 |
238 | # #### Exc: Simple Kriging
239 | #
240 | # Implement the method [(wikipedia)](https://en.wikipedia.org/wiki/Kriging#Simple_kriging).
241 | #
242 | # *Hint: use `sla.solve` or `sla.inv` (less recommended)*
243 |
244 | ### ANSWER HERE ###
245 | covar_yy = ...
246 | cross_xy = ...
247 | regression_coefficients = weights ### FIX THIS ### -- should be cross_xy / covar_yy
248 |
249 | # +
250 | # show_answer('Kriging code')
251 | # -
252 |
253 | estims["Kriging"] = regression_coefficients @ observations
254 |
255 | # ### Plot truth, estimates, error
256 |
257 | # +
258 | fig, axs = freshfig(num="Estimation problem", figsize=(8, 4), squeeze=False,
259 | nrows=2, ncols=len(estims), sharex=True, sharey=True)
260 |
261 | for name, ax1, ax2 in zip(estims, *axs):
262 | ax1.set_title(name)
263 | c1 = contour_plot(ax1, estims[name])
264 | c2 = contour_plot(ax2, estims[name] - truth, cmap="RdBu")
265 | fig.tight_layout()
266 | fig.subplots_adjust(right=0.85)
267 | cbar = fig.colorbar(c1, cax=fig.add_axes([0.9, 0.15, 0.03, 0.7]))
268 | axs[1, 0].set_ylabel("Errors");
269 |
270 |
271 | # -
272 |
273 | # #### Exc: Try different values of `Range`
274 | #
275 | # - Run code to re-compute Kriging estimate.
276 | # - What does setting it to `0.1` cause? What about `100`?
277 |
278 | @interact(Range=(.01, 40))
279 | def plot_krieged(Range=1):
280 | vg_params['Range'] = Range
281 | covar_yy = 1 - variogram(dists_yy, **vg_params)
282 | cross_xy = 1 - variogram(dists_xy, **vg_params)
283 | regression_coefficients = sla.solve(covar_yy, cross_xy.T).T
284 |
285 | fig, ax = freshfig(num="Kriging estimates")
286 | c1 = contour_plot(ax, regression_coefficients @ observations)
287 | fig.colorbar(c1);
288 | plt.show()
289 |
290 | # #### Generalizations
291 | #
292 | # - Unknown mean (Ordinary Kriging)
293 | # - Co-Kriging (vector-valued fields)
294 | # - Trend surfaces (non-stationarity assumptions)
295 | #
296 | # ## Summary
297 | #
298 | # The covariances of random fields can sometimes be described by the autocorrelation function,
299 | # or equivalently, the (semi-)variogram.
300 | # Covariances form the basis of a family of (geo-)spatial interpolation and approximation
301 | # methods known as Kriging, which can also be called/interpreted as
302 | # **Radial basis function (RBF) interpolation**,
303 | # **Gaussian process regression** (GP) regression.
304 | #
305 | # - Kriging is derived by minimizing the variance of linear and unbiased estimators.
306 | # - RBF interpolation is derived by the explicit desire to fit
307 | # N functions to N data points (observations).
308 | # - GP regression is derived by conditioning (applying Bayes rule)
309 | # to the (supposedly) Gaussian distribution of the random field.
310 | #
311 | # ### Next: [T7 - Chaos & Lorenz](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb)
312 |
--------------------------------------------------------------------------------
/notebooks/scripts/T7 - Chaos & Lorenz [optional].md:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | formats: ipynb,scripts//py:light,scripts//md
5 | text_representation:
6 | extension: .md
7 | format_name: markdown
8 | format_version: '1.3'
9 | jupytext_version: 1.17.2
10 | kernelspec:
11 | display_name: Python 3 (ipykernel)
12 | language: python
13 | name: python3
14 | ---
15 |
16 | ```python
17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
19 | ```
20 |
21 | ```python
22 | from resources import show_answer, interact, frame
23 | %matplotlib inline
24 | import numpy as np
25 | import numpy.random as rnd
26 | import matplotlib.pyplot as plt
27 | plt.ion();
28 | ```
29 |
30 | # T7 - Chaos & Lorenz
31 |
32 | ***Chaos***
33 | is also known as the butterfly effect: "a butterfly that flaps its wings in Brazil can 'cause' a hurricane in Texas".
34 | As opposed to the opinions of Descartes/Newton/Laplace, chaos effectively means that even in a deterministic (non-stochastic) universe, we can only predict "so far" into the future. This will be illustrated below using two toy-model dynamical systems made by ***Edward Lorenz***.
35 | $
36 | \newcommand{\mat}[1]{{\mathbf{{#1}}}}
37 | \newcommand{\bvec}[1]{{\mathbf{#1}}}
38 | \newcommand{\xDim}[0]{D}
39 | \newcommand{\x}[0]{\bvec{x}}
40 | $
41 |
42 | ## Dynamical systems
43 |
44 | Dynamical system are systems (sets of equations) whose variables evolve in time (the equations contains time derivatives). As a branch of mathematics, its theory is mainly concerned with understanding the *behaviour* of solutions (trajectories) of the systems.
45 |
46 | Below is a function to numerically **integrate**
47 | (i.e. step-wise evolve the system forward in time) a set of coupled ODEs.
48 | It relies on `scipy`, but adds some conveniences,
49 | notably taking advantage of Python's `**kwargs` (key-word argument) feature,
50 | to define an internal `dxdt` whose only two arguments are
51 | `x` for the current state, and `t` for time.
52 |
53 | ```python
54 | from scipy.integrate import odeint
55 | from dapper.mods.integration import rk4
56 | dt = 0.01
57 |
58 | def integrate(dxdt, initial_states, final_time, **params):
59 | # Output shape: `(len(initial_states), nTime, len(x))`
60 | dxdt_fixed = lambda x, t: dxdt(x, t, **params) # Fix params
61 | time_steps = np.linspace(0, final_time, 1+int(final_time / dt))
62 | integrated = []
63 | ### Replace the following (in the next exercise) ###
64 | for x0 in initial_states:
65 | trajectory = odeint(dxdt_fixed, x0, time_steps)
66 | integrated.append(trajectory)
67 | return np.array(integrated), time_steps
68 | ```
69 |
70 | In addition, it takes care of looping over `initial_states`,
71 | computing a solution ("phase space trajectory") for each one,
72 | so that we can ask it to compute multiple trajectories at once,
73 | which we call Monte-Carlo simulation, or **ensemble forecasting**.
74 | But *loops are generally slow in Python*.
75 | Fortunately, for simple systems,
76 | we can write our code such that the dynamics get independently (but simultaneously) computed for rows of a *matrix* (rather than a single vector), meaning that each row in the input produces a corresponding row in the output. This in effect leaves `numpy` to do the looping (which it does much quicker than pure Python).
77 | Alternatively, since each simulation is completely independent of another realisation,
78 | they are **"embarrassingly parallelizable"**, which is a good option if the system is very costly to simulate.
79 | The exercise below challenges you to implement the first approach, resulting in much faster visualisation further below.
80 |
81 | #### Exc (optional) -- speed-up by vectorisation & parallelisation
82 |
83 | Replace `odeint` in the code above by `rk4` (which does not care about the size/shape of the input, thereby allowing for matrices, i.e. ensembles). Note that the call signature of `rk4` is similar to `odeint`, except that `time_steps` must be replaced by `t` and `dt`. I.e. it only computes a single time step, `t + dt`, so you must loop over `time_steps` yourself. *Hint: `dxdt(x, t, ...)` generally expect axis-0 (i.e. rows) of `x` to be the dimensions of the state vector -- not independent realisations of the states.*
84 |
85 | ```python
86 | # show_answer('rk4')
87 | ```
88 |
89 | ## The Lorenz (1963) attractor
90 |
91 | The [Lorenz-63 dynamical system](https://en.wikipedia.org/wiki/Lorenz_system) can be derived as an extreme simplification of *Rayleigh-Bénard convection*: fluid circulation in a shallow layer of fluid uniformly heated (cooled) from below (above).
92 | This produces the following 3 *coupled, nonlinear* ordinary differential equations (ODE):
93 |
94 | $$
95 | \begin{aligned}
96 | \dot{x} & = \sigma(y-x) \\
97 | \dot{y} & = \rho x - y - xz \\
98 | \dot{z} & = -\beta z + xy
99 | \end{aligned}
100 | \tag{1}
101 | $$
102 |
103 | where the "dot" represents the time derivative, $\frac{d}{dt}$. The state vector is $\x = (x,y,z)$, and the parameters are typically set to $\sigma = 10, \beta=8/3, \rho=28$. The ODEs can be coded as follows (yes, Python supports Unicode, but it might be cumbersome to type out!)
104 |
105 | ```python
106 | def dxdt63(state, time, σ, β, ρ):
107 | x, y, z = state
108 | return np.asarray([σ * (y - x),
109 | x * (ρ - z) - y,
110 | x * y - β * z])
111 | ```
112 |
113 | The following illustrated the system.
114 |
115 | ```python
116 | store = ['placeholder']
117 | @interact( σ=(0.,200), β=(0.,5), ρ=(0.,50), N=(1,100), ε=(0.01,10), Time=(0.,100), zoom=(.1, 4))
118 | def plot_lorenz63(σ=10, β=8/3, ρ=28 , in3D=True, N=2, ε=0.01, Time=2.0, zoom=1):
119 | rnd.seed(23)
120 | initial_states = [-6.1, 1.2, 32.5] + ε*rnd.randn(N, 3)
121 | trajectories, times = integrate(dxdt63, initial_states, Time, σ=σ, β=β, ρ=ρ)
122 | store[0] = trajectories
123 | if in3D:
124 | ax = plt.figure().add_subplot(111, projection='3d')
125 | for orbit in trajectories:
126 | line, = ax.plot(*(orbit.T), lw=1, alpha=.5)
127 | ax.scatter3D(*orbit[-1], s=40, color=line.get_color())
128 | ax.axis('off')
129 | frame(trajectories, ax, zoom)
130 | else:
131 | fig, axs = plt.subplots(3, sharex=True, figsize=(5, 4))
132 | for dim, ax, orbits in zip('xyz', axs, trajectories.T):
133 | start = int(10/dt/zoom)
134 | ax.plot(times[-start:], orbits[-start:], lw=1, alpha=.5)
135 | ax.set_ylabel(dim)
136 | ax.set_xlabel('Time')
137 | plt.show()
138 | ```
139 |
140 | #### Exc -- Bifurcation hunting
141 |
142 | Classic linear stability analysis involves setting eqn. (1) to zero and considering the eigenvalues (and vectors) of its Jacobian matrix. Here we will go about it mainly by visually inspecting the numerical results of simulations.
143 | Answer the following (to an approximate degree of precision) by gradually increasing $\rho$.
144 | Leave the other model parameters at their defaults, but use `ε`, `N`, `Time` and `zoom` to your advantage.
145 |
146 | - (a) What is the only fixed point for $\rho = 0$?
147 | - (b) At what (larger) value of $\rho$ does this change?
148 | What do you think happened to the original fixed point?
149 | - (c) At what (larger) value of $\rho$ do we see an oscillating (spiraling) motion?
150 | What do you think this entails for the aforementioned eigenvalues?
151 | - (d) Describe the difference in character of the trajectories between $\rho=10$ and $\rho=20$.
152 | - (e) At what (larger) values of $\rho$ do we get chaos?
153 | In other words, when do the trajectories no longer converge to fixed points (or limit cycles)?
154 | - (f) Also try $\rho=144$ (edit the code). What is the nature of the trajectories now?
155 | - (g) *Optional*: Use pen and paper to show that the fixed points of the Lorenz system (1) are
156 | indeed the origin as well as the roots of $x^2=\beta z$ with $y=x$,
157 | but that the latter two only exist for $\rho > 1$.
158 |
159 | In conclusion, while a dynamical system naturally depends on its parameter values (almost by definition), the way in which its behaviour/character depend on it could come as a surprise.
160 |
161 | ```python
162 | # show_answer("Bifurcations63")
163 | ```
164 |
165 | #### Exc -- Doubling time
166 |
167 | Re-run the animation cell to get default parameter values.
168 | Visually investigate the system's (i.e. the trajectories') **sensitivity to initial conditions** by moving `Time`, `N` and `ε`. What do you reckon is the "doubling time" of the perturbations? I.e. how long do you think it takes (on average) for two trajectories to grow twice as far apart as they started (alternatives: 0.03, 0.3, 3, 30)? What are the implications for any prediction/forecasting we might attempt?
169 |
170 | ```python
171 | # show_answer('Guesstimate 63')
172 | ```
173 |
174 | ### Averages
175 |
176 | The result actually depends on where in "phase space" the particles started. For example, predictability in the Lorenz system is much shorter when the state is near the center, where the trajectories diverge into the two wings of the butterfly. So to get a universal answer one must average these experiments for many different initial conditions.
177 | Alternatively, since the above system is [ergodic](https://en.wikipedia.org/wiki/Ergodic_theory#Ergodic_theorems), we could also average a single experiment over a very, very long time, obtaining the same statistics (assuming they have converged). Though not strictly implied, ergodicity is closely related to chaos. It means that
178 |
179 | - A trajectory/orbit never quite repeats (the orbit is aperiodic).
180 | - The tracks of the orbits are sufficiently "dense" that they define a manifold
181 | (something that looks like a surface, such as the butterfly wings above,
182 | and for which we can speak of properties like derivatives and fractal dimension).
183 | - Every part (of positive measure) of the manifold can be reached from any other.
184 | - There is a probability density for the manifold,
185 | quantifying the relative amount of time (of an infinite amount)
186 | that the system spends in that neighbourhood.
187 |
188 | Set `N` and `Time` in the above interactive animation to their upper bounds (might take long to run!).
189 | Execute the code cell below.
190 | Do you think the samples behind the histograms are drawn from the same distribution?
191 | In other words, is the Lorenz system ergodic?
192 |
193 | ```python
194 | @interact()
195 | def histograms():
196 | fig, axs = plt.subplots(ncols=3, sharey=True, figsize=(9, 3))
197 | def hist(ax, sample, lbl):
198 | ax.hist(sample, density=1, bins=20, label=lbl, alpha=.5)
199 |
200 | trajectories63 = store[0]
201 | for i, (ax, lbl) in enumerate(zip(axs, "xyz")):
202 | hist(ax, trajectories63[:, -1, i], "at final time")
203 | hist(ax, trajectories63[-1, ::int(.2/dt), i], "of final member")
204 | ax.set_title(f"Component {lbl}")
205 | plt.legend();
206 | ```
207 |
208 | The long-run distribution of a system may be called its **climatology**.
209 | A somewhat rudimentary weather forecasting initialisation (i.e. DA) technique,
210 | called **optimal interpolation**,
211 | consists in using the climatology as the prior (as opposed to yesterday's forecast)
212 | when applying Bayes' rule (in its [Gaussian guise](T3%20-%20Bayesian%20inference.ipynb#Linear-Gaussian-Bayes'-rule-(1D))) to the observations of the day.
213 |
214 | ## The Lorenz-96 model
215 |
216 | Lorenz-96 is a "spatially 1D" dynamical system of an astoundingly simple design that resemble atmospheric convection,
217 | including nonlinear terms and chaoticity.
218 | Each state variable $\x_i$ can be considered some atmospheric quantity at grid point at a fixed latitude of Earth. The system
219 | is given by the coupled set of ODEs,
220 | $$
221 | \frac{d \x_i}{dt} = (\x_{i+1} − \x_{i-2}) \x_{i-1} − \x_i + F
222 | \,,
223 | \quad \quad i \in \{1,\ldots,\xDim\}
224 | \,,
225 | $$
226 | where the subscript indices apply periodically.
227 |
228 | This model is not derived from physics but has similar characteristics, such as
229 |
230 |
there is external forcing, determined by a parameter $F$;
231 |
there is internal dissipation, emulated by the linear term;
232 |
there is energy-conserving advection, emulated by quadratic terms.
233 |
234 |
235 | [Further description in the very readable original article](https://www.ecmwf.int/sites/default/files/elibrary/1995/75462-predictability-problem-partly-solved_0.pdf).
236 |
237 | **Exc (optional) -- Conservation of energy:** Show that the "total energy" $\sum_{i=1}^{\xDim} \x_i^2$ is preserved by the quadratic terms in the ODE.
238 | *Hint: consider its time derivative.*
239 |
240 | ```python
241 | # show_answer("Lorenz energy")
242 | ```
243 |
244 | The model is animated below.
245 |
246 | ```python
247 | def s(vector, n):
248 | return np.roll(vector, -n)
249 |
250 | def dxdt96(x, time, Force):
251 | return (s(x, 1) - s(x, -2)) * s(x, -1) - x + Force
252 |
253 | ylims = -10, 20
254 | ```
255 |
256 | ```python
257 | store = ["placeholder"]
258 | @interact( xDim=(4,60,1), N=(1,30), Force=(0,15.), ε=(0.01,3,0.1), Time=(0.05,90,0.04))
259 | def plot_lorenz96(xDim=40, N=2, Force=8, ε=0.01, Time=3):
260 | rnd.seed(23)
261 | initial_states = np.zeros((N, xDim))
262 | initial_states[:, 0] = ε*(10 + rnd.randn(N))
263 | trajectories, times = integrate(dxdt96, initial_states, Time, Force=Force)
264 | store[0] = trajectories
265 |
266 | plt.figure(figsize=(7, 4))
267 | plt.plot(np.arange(xDim), trajectories[:, -1].T)
268 | plt.ylim(-10, 20)
269 | plt.show()
270 | ```
271 |
272 | #### Exc -- Bifurcation hunting 96
273 |
274 | Investigate by moving the sliders (but keep `xDim=40`): Under which settings of the force `F`
275 |
276 | - Do the solutions tend to the steady state $\x_i = F$ for all $i$ ?
277 | - Are the solutions periodic?
278 | - Is the system chaotic (i.e., the solutions are extremely sensitive to initial conditions,
279 | meaning that the predictability horizon is finite) ?
280 |
281 | *PS: another way to visualise spatially 1D systems (or cross-sections) over time is the [Hovmöller diagram](https://en.wikipedia.org/wiki/Hovm%C3%B6ller_diagram), here represented for 1 realisation of the simulations.*
282 |
283 | ```python
284 | @interact()
285 | def Hovmoller():
286 | plt.contourf(store[0][0], cmap="viridis", vmin=ylims[0], vmax=ylims[1])
287 | plt.colorbar();
288 | plt.show()
289 | ```
290 |
291 | ```python
292 | # show_answer('Bifurcations96', 'a')
293 | ```
294 |
295 | #### Exc (optional) -- Doubling time
296 |
297 | Maximise `N` (for a large sample), minimise `ε` (to approach linear conditions) and set `Time=1` (a reasonable first guess). Compute a rough estimate of the doubling time in the cell below from the data in `store[0]`, which holds the trajectories, and has shape `(N, len(times))`.
298 | *Hint: The theory for these questions will be described in further detail in the following section.*
299 |
300 | ```python
301 | # show_answer("doubling time")
302 | ```
303 |
304 | ## The double pendulum
305 |
306 | The [double pendulum](https://en.wikipedia.org/wiki/Double_pendulum) is another classic example of a chaotic system.
307 | It is a little longer to implement, so we'll just load it from [DAPPER](https://github.com/nansencenter/DAPPER/blob/master/dapper/mods/DoublePendulum/__init__.py).
308 | Unlike the Lorenz systems, the divergence of its "$f$" flow field is 0,
309 | so it is conservative, and all of the trajectories preserve their initial energy
310 | (except for what friction our numerical integration causes).
311 | Therefore it does not strictly speaking possess an attractor
312 | nor is it ergodic (but some things might be said upon restriction to the set of initial conditions with equal energy levels?)
313 |
314 | ```python
315 | from numpy import cos, sin, pi
316 | from dapper.mods.DoublePendulum import L1, L2, x0, dxdt
317 | def x012(x): return (0 , L1*sin(x[0]) , L1*sin(x[0]) + L2*sin(x[2]))
318 | def y012(x): return (0, -L1*cos(x[0]), -L1*cos(x[0]) - L2*cos(x[2]))
319 |
320 | x0 = [.9*pi, 0, 0, 0] # Angular pos1, vel1, pos2, vel2
321 | initial_states = x0 + 0.01*np.random.randn(20, 4)
322 | trajectories, times = integrate(lambda x, t: dxdt(x), initial_states, 10)
323 |
324 | @interact(k=(0, len(times)-1, 4), N=(1, len(initial_states)))
325 | def plot_pendulum2(k=1, N=2):
326 | fig, ax = plt.subplots()
327 | ax.set(xlim=(-2, 2), ylim=(-2, 2), aspect="equal")
328 | for x in trajectories[:N, k]:
329 | ax.plot(x012(x), y012(x), '-o')
330 | plt.show()
331 | ```
332 |
333 | ## Error/perturbation dynamics
334 |
335 | **Exc (optional) -- Perturbation ODE:** Suppose $x(t)$ and $z(t)$ are "twins": they evolve according to the same law $f$:
336 | $$
337 | \begin{align}
338 | \frac{dx}{dt} &= f(x) \\
339 | \frac{dz}{dt} &= f(z) \,.
340 | \end{align}
341 | $$
342 |
343 | Define the "error": $\varepsilon(t) = x(t) - z(t)$.
344 | Suppose $z(0)$ is close to $x(0)$.
345 | Let $F = \frac{df}{dx}(x(t))$.
346 |
347 | - (a) Show that the error evolves according to the ordinary differential equation (ODE)
348 | $$\frac{d \varepsilon}{dt} \approx F \varepsilon \,.$$
349 |
350 | ```python
351 | # show_answer("error evolution")
352 | ```
353 |
354 | - (b) Suppose $F$ is constant. Show that the error grows exponentially: $\varepsilon(t) = \varepsilon(0) e^{F t} $.
355 |
356 | ```python
357 | # show_answer("anti-deriv")
358 | ```
359 |
360 | - (c)
361 | - (1) Suppose $F<0$.
362 | What happens to the error?
363 | What does this mean for predictability?
364 | - (2) Now suppose $F>0$.
365 | Given that all observations are uncertain (i.e. $R_t>0$, if only ever so slightly),
366 | can we ever hope to estimate $x(t)$ with 0 uncertainty?
367 |
368 | ```python
369 | # show_answer("predictability cases")
370 | ```
371 |
372 | - (d) What is the doubling time of the error?
373 |
374 | ```python
375 | # show_answer("doubling time, Lyapunov")
376 | ```
377 |
378 | - (e) Consider the ODE derived above.
379 | How might we change it in order to model (i.e. emulate) a saturation of the error at some level?
380 | Can you solve this equation?
381 |
382 | ```python
383 | # show_answer("saturation term")
384 | ```
385 |
386 | - (f) Now suppose $z(t)$ evolves according to $\frac{dz}{dt} = g(z)$, with $g \neq f$.
387 | What is now the differential equation governing the evolution of the error, $\varepsilon$?
388 |
389 | ```python
390 | # show_answer("linear growth")
391 | ```
392 |
393 | ## Summary
394 |
395 | Prediction (forecasting) with these systems is challenging because they are chaotic:
396 | small errors grow exponentially.
397 | Therefore there is a limit to how far into the future we can make predictions (skillfully).
398 | Therefore it is crucial to minimize the initial error as much as possible.
399 | This is a task of DA (filtering).
400 |
401 | Also see this [book on chaos and predictability](https://kuiper2000.github.io/chaos_and_predictability/intro.html).
402 |
403 | ### Next: [T8 - Monte-Carlo & ensembles](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb)
404 |
--------------------------------------------------------------------------------
/notebooks/scripts/T7 - Chaos & Lorenz [optional].py:
--------------------------------------------------------------------------------
1 | # ---
2 | # jupyter:
3 | # jupytext:
4 | # formats: ipynb,scripts//py:light,scripts//md
5 | # text_representation:
6 | # extension: .py
7 | # format_name: light
8 | # format_version: '1.5'
9 | # jupytext_version: 1.17.2
10 | # kernelspec:
11 | # display_name: Python 3 (ipykernel)
12 | # language: python
13 | # name: python3
14 | # ---
15 |
16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
18 |
19 | from resources import show_answer, interact, frame
20 | # %matplotlib inline
21 | import numpy as np
22 | import numpy.random as rnd
23 | import matplotlib.pyplot as plt
24 | plt.ion();
25 |
26 | # # T7 - Chaos & Lorenz
27 | #
28 | # ***Chaos***
29 | # is also known as the butterfly effect: "a butterfly that flaps its wings in Brazil can 'cause' a hurricane in Texas".
30 | # As opposed to the opinions of Descartes/Newton/Laplace, chaos effectively means that even in a deterministic (non-stochastic) universe, we can only predict "so far" into the future. This will be illustrated below using two toy-model dynamical systems made by ***Edward Lorenz***.
31 | # $
32 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}}
33 | # \newcommand{\bvec}[1]{{\mathbf{#1}}}
34 | # \newcommand{\xDim}[0]{D}
35 | # \newcommand{\x}[0]{\bvec{x}}
36 | # $
37 | #
38 | # ## Dynamical systems
39 | #
40 | # Dynamical system are systems (sets of equations) whose variables evolve in time (the equations contains time derivatives). As a branch of mathematics, its theory is mainly concerned with understanding the *behaviour* of solutions (trajectories) of the systems.
41 | #
42 | # Below is a function to numerically **integrate**
43 | # (i.e. step-wise evolve the system forward in time) a set of coupled ODEs.
44 | # It relies on `scipy`, but adds some conveniences,
45 | # notably taking advantage of Python's `**kwargs` (key-word argument) feature,
46 | # to define an internal `dxdt` whose only two arguments are
47 | # `x` for the current state, and `t` for time.
48 |
49 | # +
50 | from scipy.integrate import odeint
51 | from dapper.mods.integration import rk4
52 | dt = 0.01
53 |
54 | def integrate(dxdt, initial_states, final_time, **params):
55 | # Output shape: `(len(initial_states), nTime, len(x))`
56 | dxdt_fixed = lambda x, t: dxdt(x, t, **params) # Fix params
57 | time_steps = np.linspace(0, final_time, 1+int(final_time / dt))
58 | integrated = []
59 | ### Replace the following (in the next exercise) ###
60 | for x0 in initial_states:
61 | trajectory = odeint(dxdt_fixed, x0, time_steps)
62 | integrated.append(trajectory)
63 | return np.array(integrated), time_steps
64 |
65 |
66 | # -
67 |
68 | # In addition, it takes care of looping over `initial_states`,
69 | # computing a solution ("phase space trajectory") for each one,
70 | # so that we can ask it to compute multiple trajectories at once,
71 | # which we call Monte-Carlo simulation, or **ensemble forecasting**.
72 | # But *loops are generally slow in Python*.
73 | # Fortunately, for simple systems,
74 | # we can write our code such that the dynamics get independently (but simultaneously) computed for rows of a *matrix* (rather than a single vector), meaning that each row in the input produces a corresponding row in the output. This in effect leaves `numpy` to do the looping (which it does much quicker than pure Python).
75 | # Alternatively, since each simulation is completely independent of another realisation,
76 | # they are **"embarrassingly parallelizable"**, which is a good option if the system is very costly to simulate.
77 | # The exercise below challenges you to implement the first approach, resulting in much faster visualisation further below.
78 | #
79 | # #### Exc (optional) -- speed-up by vectorisation & parallelisation
80 | #
81 | # Replace `odeint` in the code above by `rk4` (which does not care about the size/shape of the input, thereby allowing for matrices, i.e. ensembles). Note that the call signature of `rk4` is similar to `odeint`, except that `time_steps` must be replaced by `t` and `dt`. I.e. it only computes a single time step, `t + dt`, so you must loop over `time_steps` yourself. *Hint: `dxdt(x, t, ...)` generally expect axis-0 (i.e. rows) of `x` to be the dimensions of the state vector -- not independent realisations of the states.*
82 |
83 | # +
84 | # show_answer('rk4')
85 | # -
86 |
87 | # ## The Lorenz (1963) attractor
88 | #
89 | # The [Lorenz-63 dynamical system](https://en.wikipedia.org/wiki/Lorenz_system) can be derived as an extreme simplification of *Rayleigh-Bénard convection*: fluid circulation in a shallow layer of fluid uniformly heated (cooled) from below (above).
90 | # This produces the following 3 *coupled, nonlinear* ordinary differential equations (ODE):
91 | #
92 | # $$
93 | # \begin{aligned}
94 | # \dot{x} & = \sigma(y-x) \\
95 | # \dot{y} & = \rho x - y - xz \\
96 | # \dot{z} & = -\beta z + xy
97 | # \end{aligned}
98 | # \tag{1}
99 | # $$
100 | #
101 | # where the "dot" represents the time derivative, $\frac{d}{dt}$. The state vector is $\x = (x,y,z)$, and the parameters are typically set to $\sigma = 10, \beta=8/3, \rho=28$. The ODEs can be coded as follows (yes, Python supports Unicode, but it might be cumbersome to type out!)
102 |
103 | def dxdt63(state, time, σ, β, ρ):
104 | x, y, z = state
105 | return np.asarray([σ * (y - x),
106 | x * (ρ - z) - y,
107 | x * y - β * z])
108 |
109 |
110 | # The following illustrated the system.
111 |
112 | store = ['placeholder']
113 | @interact( σ=(0.,200), β=(0.,5), ρ=(0.,50), N=(1,100), ε=(0.01,10), Time=(0.,100), zoom=(.1, 4))
114 | def plot_lorenz63(σ=10, β=8/3, ρ=28 , in3D=True, N=2, ε=0.01, Time=2.0, zoom=1):
115 | rnd.seed(23)
116 | initial_states = [-6.1, 1.2, 32.5] + ε*rnd.randn(N, 3)
117 | trajectories, times = integrate(dxdt63, initial_states, Time, σ=σ, β=β, ρ=ρ)
118 | store[0] = trajectories
119 | if in3D:
120 | ax = plt.figure().add_subplot(111, projection='3d')
121 | for orbit in trajectories:
122 | line, = ax.plot(*(orbit.T), lw=1, alpha=.5)
123 | ax.scatter3D(*orbit[-1], s=40, color=line.get_color())
124 | ax.axis('off')
125 | frame(trajectories, ax, zoom)
126 | else:
127 | fig, axs = plt.subplots(3, sharex=True, figsize=(5, 4))
128 | for dim, ax, orbits in zip('xyz', axs, trajectories.T):
129 | start = int(10/dt/zoom)
130 | ax.plot(times[-start:], orbits[-start:], lw=1, alpha=.5)
131 | ax.set_ylabel(dim)
132 | ax.set_xlabel('Time')
133 | plt.show()
134 |
135 |
136 | # #### Exc -- Bifurcation hunting
137 | #
138 | # Classic linear stability analysis involves setting eqn. (1) to zero and considering the eigenvalues (and vectors) of its Jacobian matrix. Here we will go about it mainly by visually inspecting the numerical results of simulations.
139 | # Answer the following (to an approximate degree of precision) by gradually increasing $\rho$.
140 | # Leave the other model parameters at their defaults, but use `ε`, `N`, `Time` and `zoom` to your advantage.
141 | #
142 | # - (a) What is the only fixed point for $\rho = 0$?
143 | # - (b) At what (larger) value of $\rho$ does this change?
144 | # What do you think happened to the original fixed point?
145 | # - (c) At what (larger) value of $\rho$ do we see an oscillating (spiraling) motion?
146 | # What do you think this entails for the aforementioned eigenvalues?
147 | # - (d) Describe the difference in character of the trajectories between $\rho=10$ and $\rho=20$.
148 | # - (e) At what (larger) values of $\rho$ do we get chaos?
149 | # In other words, when do the trajectories no longer converge to fixed points (or limit cycles)?
150 | # - (f) Also try $\rho=144$ (edit the code). What is the nature of the trajectories now?
151 | # - (g) *Optional*: Use pen and paper to show that the fixed points of the Lorenz system (1) are
152 | # indeed the origin as well as the roots of $x^2=\beta z$ with $y=x$,
153 | # but that the latter two only exist for $\rho > 1$.
154 | #
155 | # In conclusion, while a dynamical system naturally depends on its parameter values (almost by definition), the way in which its behaviour/character depend on it could come as a surprise.
156 |
157 | # +
158 | # show_answer("Bifurcations63")
159 | # -
160 |
161 | # #### Exc -- Doubling time
162 | #
163 | # Re-run the animation cell to get default parameter values.
164 | # Visually investigate the system's (i.e. the trajectories') **sensitivity to initial conditions** by moving `Time`, `N` and `ε`. What do you reckon is the "doubling time" of the perturbations? I.e. how long do you think it takes (on average) for two trajectories to grow twice as far apart as they started (alternatives: 0.03, 0.3, 3, 30)? What are the implications for any prediction/forecasting we might attempt?
165 |
166 | # +
167 | # show_answer('Guesstimate 63')
168 | # -
169 |
170 | # ### Averages
171 | #
172 | # The result actually depends on where in "phase space" the particles started. For example, predictability in the Lorenz system is much shorter when the state is near the center, where the trajectories diverge into the two wings of the butterfly. So to get a universal answer one must average these experiments for many different initial conditions.
173 | # Alternatively, since the above system is [ergodic](https://en.wikipedia.org/wiki/Ergodic_theory#Ergodic_theorems), we could also average a single experiment over a very, very long time, obtaining the same statistics (assuming they have converged). Though not strictly implied, ergodicity is closely related to chaos. It means that
174 | #
175 | # - A trajectory/orbit never quite repeats (the orbit is aperiodic).
176 | # - The tracks of the orbits are sufficiently "dense" that they define a manifold
177 | # (something that looks like a surface, such as the butterfly wings above,
178 | # and for which we can speak of properties like derivatives and fractal dimension).
179 | # - Every part (of positive measure) of the manifold can be reached from any other.
180 | # - There is a probability density for the manifold,
181 | # quantifying the relative amount of time (of an infinite amount)
182 | # that the system spends in that neighbourhood.
183 | #
184 | # Set `N` and `Time` in the above interactive animation to their upper bounds (might take long to run!).
185 | # Execute the code cell below.
186 | # Do you think the samples behind the histograms are drawn from the same distribution?
187 | # In other words, is the Lorenz system ergodic?
188 |
189 | @interact()
190 | def histograms():
191 | fig, axs = plt.subplots(ncols=3, sharey=True, figsize=(9, 3))
192 | def hist(ax, sample, lbl):
193 | ax.hist(sample, density=1, bins=20, label=lbl, alpha=.5)
194 |
195 | trajectories63 = store[0]
196 | for i, (ax, lbl) in enumerate(zip(axs, "xyz")):
197 | hist(ax, trajectories63[:, -1, i], "at final time")
198 | hist(ax, trajectories63[-1, ::int(.2/dt), i], "of final member")
199 | ax.set_title(f"Component {lbl}")
200 | plt.legend();
201 |
202 |
203 | # The long-run distribution of a system may be called its **climatology**.
204 | # A somewhat rudimentary weather forecasting initialisation (i.e. DA) technique,
205 | # called **optimal interpolation**,
206 | # consists in using the climatology as the prior (as opposed to yesterday's forecast)
207 | # when applying Bayes' rule (in its [Gaussian guise](T3%20-%20Bayesian%20inference.ipynb#Linear-Gaussian-Bayes'-rule-(1D))) to the observations of the day.
208 | #
209 | # ## The Lorenz-96 model
210 | #
211 | # Lorenz-96 is a "spatially 1D" dynamical system of an astoundingly simple design that resemble atmospheric convection,
212 | # including nonlinear terms and chaoticity.
213 | # Each state variable $\x_i$ can be considered some atmospheric quantity at grid point at a fixed latitude of Earth. The system
214 | # is given by the coupled set of ODEs,
215 | # $$
216 | # \frac{d \x_i}{dt} = (\x_{i+1} − \x_{i-2}) \x_{i-1} − \x_i + F
217 | # \,,
218 | # \quad \quad i \in \{1,\ldots,\xDim\}
219 | # \,,
220 | # $$
221 | # where the subscript indices apply periodically.
222 | #
223 | # This model is not derived from physics but has similar characteristics, such as
224 | #
225 | #
there is external forcing, determined by a parameter $F$;
226 | #
there is internal dissipation, emulated by the linear term;
227 | #
there is energy-conserving advection, emulated by quadratic terms.
228 | #
229 | #
230 | # [Further description in the very readable original article](https://www.ecmwf.int/sites/default/files/elibrary/1995/75462-predictability-problem-partly-solved_0.pdf).
231 | #
232 | # **Exc (optional) -- Conservation of energy:** Show that the "total energy" $\sum_{i=1}^{\xDim} \x_i^2$ is preserved by the quadratic terms in the ODE.
233 | # *Hint: consider its time derivative.*
234 |
235 | # +
236 | # show_answer("Lorenz energy")
237 | # -
238 |
239 | # The model is animated below.
240 |
241 | # +
242 | def s(vector, n):
243 | return np.roll(vector, -n)
244 |
245 | def dxdt96(x, time, Force):
246 | return (s(x, 1) - s(x, -2)) * s(x, -1) - x + Force
247 |
248 | ylims = -10, 20
249 | # -
250 |
251 | store = ["placeholder"]
252 | @interact( xDim=(4,60,1), N=(1,30), Force=(0,15.), ε=(0.01,3,0.1), Time=(0.05,90,0.04))
253 | def plot_lorenz96(xDim=40, N=2, Force=8, ε=0.01, Time=3):
254 | rnd.seed(23)
255 | initial_states = np.zeros((N, xDim))
256 | initial_states[:, 0] = ε*(10 + rnd.randn(N))
257 | trajectories, times = integrate(dxdt96, initial_states, Time, Force=Force)
258 | store[0] = trajectories
259 |
260 | plt.figure(figsize=(7, 4))
261 | plt.plot(np.arange(xDim), trajectories[:, -1].T)
262 | plt.ylim(-10, 20)
263 | plt.show()
264 |
265 |
266 | # #### Exc -- Bifurcation hunting 96
267 | #
268 | # Investigate by moving the sliders (but keep `xDim=40`): Under which settings of the force `F`
269 | #
270 | # - Do the solutions tend to the steady state $\x_i = F$ for all $i$ ?
271 | # - Are the solutions periodic?
272 | # - Is the system chaotic (i.e., the solutions are extremely sensitive to initial conditions,
273 | # meaning that the predictability horizon is finite) ?
274 | #
275 | # *PS: another way to visualise spatially 1D systems (or cross-sections) over time is the [Hovmöller diagram](https://en.wikipedia.org/wiki/Hovm%C3%B6ller_diagram), here represented for 1 realisation of the simulations.*
276 |
277 | @interact()
278 | def Hovmoller():
279 | plt.contourf(store[0][0], cmap="viridis", vmin=ylims[0], vmax=ylims[1])
280 | plt.colorbar();
281 | plt.show()
282 |
283 |
284 | # +
285 | # show_answer('Bifurcations96', 'a')
286 | # -
287 |
288 | # #### Exc (optional) -- Doubling time
289 | #
290 | # Maximise `N` (for a large sample), minimise `ε` (to approach linear conditions) and set `Time=1` (a reasonable first guess). Compute a rough estimate of the doubling time in the cell below from the data in `store[0]`, which holds the trajectories, and has shape `(N, len(times))`.
291 | # *Hint: The theory for these questions will be described in further detail in the following section.*
292 |
293 | # +
294 | # show_answer("doubling time")
295 | # -
296 |
297 | # ## The double pendulum
298 | #
299 | # The [double pendulum](https://en.wikipedia.org/wiki/Double_pendulum) is another classic example of a chaotic system.
300 | # It is a little longer to implement, so we'll just load it from [DAPPER](https://github.com/nansencenter/DAPPER/blob/master/dapper/mods/DoublePendulum/__init__.py).
301 | # Unlike the Lorenz systems, the divergence of its "$f$" flow field is 0,
302 | # so it is conservative, and all of the trajectories preserve their initial energy
303 | # (except for what friction our numerical integration causes).
304 | # Therefore it does not strictly speaking possess an attractor
305 | # nor is it ergodic (but some things might be said upon restriction to the set of initial conditions with equal energy levels?)
306 |
307 | # +
308 | from numpy import cos, sin, pi
309 | from dapper.mods.DoublePendulum import L1, L2, x0, dxdt
310 | def x012(x): return (0 , L1*sin(x[0]) , L1*sin(x[0]) + L2*sin(x[2]))
311 | def y012(x): return (0, -L1*cos(x[0]), -L1*cos(x[0]) - L2*cos(x[2]))
312 |
313 | x0 = [.9*pi, 0, 0, 0] # Angular pos1, vel1, pos2, vel2
314 | initial_states = x0 + 0.01*np.random.randn(20, 4)
315 | trajectories, times = integrate(lambda x, t: dxdt(x), initial_states, 10)
316 |
317 | @interact(k=(0, len(times)-1, 4), N=(1, len(initial_states)))
318 | def plot_pendulum2(k=1, N=2):
319 | fig, ax = plt.subplots()
320 | ax.set(xlim=(-2, 2), ylim=(-2, 2), aspect="equal")
321 | for x in trajectories[:N, k]:
322 | ax.plot(x012(x), y012(x), '-o')
323 | plt.show()
324 | # -
325 |
326 | # ## Error/perturbation dynamics
327 | #
328 | # **Exc (optional) -- Perturbation ODE:** Suppose $x(t)$ and $z(t)$ are "twins": they evolve according to the same law $f$:
329 | # $$
330 | # \begin{align}
331 | # \frac{dx}{dt} &= f(x) \\
332 | # \frac{dz}{dt} &= f(z) \,.
333 | # \end{align}
334 | # $$
335 | #
336 | # Define the "error": $\varepsilon(t) = x(t) - z(t)$.
337 | # Suppose $z(0)$ is close to $x(0)$.
338 | # Let $F = \frac{df}{dx}(x(t))$.
339 | #
340 | # - (a) Show that the error evolves according to the ordinary differential equation (ODE)
341 | # $$\frac{d \varepsilon}{dt} \approx F \varepsilon \,.$$
342 |
343 | # +
344 | # show_answer("error evolution")
345 | # -
346 |
347 | # - (b) Suppose $F$ is constant. Show that the error grows exponentially: $\varepsilon(t) = \varepsilon(0) e^{F t} $.
348 |
349 | # +
350 | # show_answer("anti-deriv")
351 | # -
352 |
353 | # - (c)
354 | # - (1) Suppose $F<0$.
355 | # What happens to the error?
356 | # What does this mean for predictability?
357 | # - (2) Now suppose $F>0$.
358 | # Given that all observations are uncertain (i.e. $R_t>0$, if only ever so slightly),
359 | # can we ever hope to estimate $x(t)$ with 0 uncertainty?
360 |
361 | # +
362 | # show_answer("predictability cases")
363 | # -
364 |
365 | # - (d) What is the doubling time of the error?
366 |
367 | # +
368 | # show_answer("doubling time, Lyapunov")
369 | # -
370 |
371 | # - (e) Consider the ODE derived above.
372 | # How might we change it in order to model (i.e. emulate) a saturation of the error at some level?
373 | # Can you solve this equation?
374 |
375 | # +
376 | # show_answer("saturation term")
377 | # -
378 |
379 | # - (f) Now suppose $z(t)$ evolves according to $\frac{dz}{dt} = g(z)$, with $g \neq f$.
380 | # What is now the differential equation governing the evolution of the error, $\varepsilon$?
381 |
382 | # +
383 | # show_answer("linear growth")
384 | # -
385 |
386 | # ## Summary
387 | #
388 | # Prediction (forecasting) with these systems is challenging because they are chaotic:
389 | # small errors grow exponentially.
390 | # Therefore there is a limit to how far into the future we can make predictions (skillfully).
391 | # Therefore it is crucial to minimize the initial error as much as possible.
392 | # This is a task of DA (filtering).
393 | #
394 | # Also see this [book on chaos and predictability](https://kuiper2000.github.io/chaos_and_predictability/intro.html).
395 | #
396 | # ### Next: [T8 - Monte-Carlo & ensembles](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb)
397 |
--------------------------------------------------------------------------------
/notebooks/scripts/T8 - Monte-Carlo & ensembles.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | formats: ipynb,scripts//py:light,scripts//md
5 | text_representation:
6 | extension: .md
7 | format_name: markdown
8 | format_version: '1.3'
9 | jupytext_version: 1.17.2
10 | kernelspec:
11 | display_name: Python 3 (ipykernel)
12 | language: python
13 | name: python3
14 | ---
15 |
16 | ```python
17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
19 | ```
20 |
21 | ```python
22 | from resources import show_answer, interact, import_from_nb
23 | %matplotlib inline
24 | import numpy as np
25 | import matplotlib as mpl
26 | import scipy.stats as ss
27 | import numpy.random as rnd
28 | import matplotlib.pyplot as plt
29 | from scipy.stats import gaussian_kde
30 | plt.ion();
31 | ```
32 |
33 | ```python
34 | (pdf_G1, grid1d) = import_from_nb("T2", ("pdf_G1", "grid1d"))
35 | ```
36 |
37 | # T8 - The ensemble (Monte-Carlo) approach
38 | **Monte-Carlo methods** are a class of computational algorithms that rely on random/stochastic sampling. They generally trade off higher (though random!) error for lower technical complexity [[1]](#Footnote-1:). Examples from optimisation include randomly choosing search directions, swarms, evolutionary mutations, or perturbations for gradient approximation. Another application area is the computation of (deterministic) integrals via sample averages, which is rooted in the fact that any integral can be formulated as expectations, as well as the law of large numbers (LLN). This is actually a surprisingly large class of problems, including for example a way to [approximate the value of $\pi$](https://en.wikipedia.org/wiki/Monte_Carlo_method#Overview). Moreover, many integrals of interest are inherently expectations, but over probability distributions that are not tractable, as they arise from a complicated random or uncertain process [[2]](#Footnote-2:), whereas a Monte-Carlo sample thereof can be obtained simply by simulating the process.
39 | $
40 | \newcommand{\Expect}[0]{\mathbb{E}}
41 | \newcommand{\NormDist}{\mathscr{N}}
42 | \newcommand{\mat}[1]{{\mathbf{{#1}}}}
43 | \newcommand{\bvec}[1]{{\mathbf{#1}}}
44 | \newcommand{\trsign}{{\mathsf{T}}}
45 | \newcommand{\tr}{^{\trsign}}
46 | \newcommand{\ceq}[0]{\mathrel{≔}}
47 | \newcommand{\xDim}[0]{D}
48 | \newcommand{\I}[0]{\mat{I}}
49 | \newcommand{\X}[0]{\mat{X}}
50 | \newcommand{\Y}[0]{\mat{Y}}
51 | \newcommand{\E}[0]{\mat{E}}
52 | \newcommand{\x}[0]{\bvec{x}}
53 | \newcommand{\y}[0]{\bvec{y}}
54 | \newcommand{\z}[0]{\bvec{z}}
55 | \newcommand{\bx}[0]{\bvec{\bar{x}}}
56 | \newcommand{\by}[0]{\bvec{\bar{y}}}
57 | \newcommand{\barC}[0]{\mat{\bar{C}}}
58 | \newcommand{\ones}[0]{\bvec{1}}
59 | \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)}
60 | $
61 |
62 | **An ensemble** is an *i.i.d.* sample. I.e. a set of "members" ("particles", "realizations", or "sample points") that have been drawn ("sampled") independently from the same distribution. With the EnKF, these assumptions are generally tenuous, but pragmatic.
63 | In particular, an ensemble can be used to characterize uncertainty: either by using it to compute (estimate) *statistics* thereof, such as the mean, median, variance, covariance, skewness, confidence intervals, etc (any function of the ensemble can be seen as a "statistic"), or by using it to reconstruct the distribution/density from which it is sampled. The latter is illustrated by the plot below.
64 |
65 | Take a moment to digest its code. Note:
66 |
67 | - The sample/ensemble is plotted as thin narrow lines.
68 | Note that it is generated via `randn`, which samples from $\NormDist(0, 1)$.
69 | - The "Parametric" density estimate is defined by estimating the mean and the variance,
70 | and using those estimates to define a Gaussian density (with those parameters).
71 | - We will not detail the KDE method, but it can be considered as a "continuous" version of a histogram.
72 |
73 | ```python
74 | mu = 0
75 | sigma2 = 25
76 | N = 80
77 |
78 | @interact( seed=(1, 10), nbins=(2, 60), bw=(0.1, 1))
79 | def pdf_reconstructions(seed=5, nbins=10, bw=.3):
80 | rnd.seed(seed)
81 | E = mu + np.sqrt(sigma2)*rnd.randn(N)
82 |
83 | fig, ax = plt.subplots()
84 | ax.plot(grid1d, pdf_G1(grid1d, mu, sigma2), lw=5, label="True")
85 | ax.plot(E, np.zeros(N), '|k', ms=100, mew=.4, label="_raw ens")
86 | ax.hist(E, nbins, density=1, alpha=.7, color="C5", label="Histogram")
87 | ax.plot(grid1d, pdf_G1(grid1d, np.mean(E), np.var(E)), lw=5, label="Parametric")
88 | ax.plot(grid1d, gaussian_kde(E.ravel(), bw**2).evaluate(grid1d), lw=5, label="KDE")
89 | ax.set_ylim(top=(3*sigma2)**-.5)
90 | ax.legend()
91 | plt.show()
92 | ```
93 |
94 | **Exc -- A matter of taste?:**
95 | - Which approximation to the true pdf looks better?
96 | - Which approximation starts with more information?
97 | What is the downside of making such assumptions?
98 | - What value of `bw` causes the "KDE" method to most closely
99 | reproduce/recover the "Parametric" method?
100 | What about the "Histogram" method?
101 | *PS: we might say that the KDE method "bridges" the other two.*.
102 |
103 |
104 | The widget above illustrated how to estimate or reconstruct a distribution on the basis of a sample. But for the EnKF, we also need to know how to go the other way: drawing a sample from a (multivariate) Gaussian distribution...
105 |
106 | **Exc -- Multivariate Gaussian sampling:**
107 | Suppose $\z$ is a standard Gaussian,
108 | i.e. $p(\z) = \NormDist(\z \mid \bvec{0},\I_{\xDim})$,
109 | where $\I_{\xDim}$ is the $\xDim$-dimensional identity matrix.
110 | Let $\x = \mat{L}\z + \mu$.
111 |
112 | * (a -- optional). Refer to the exercise on [change of variables](T2%20-%20Gaussian%20distribution.ipynb#Exc-(optional)----Change-of-variables) to show that $p(\x) = \NormDist(\x \mid \mu, \mat{C})$, where $\mat{C} = \mat{L}^{}\mat{L}^T$.
113 | * (b). The code below samples $N = 100$ realizations of $\x$
114 | and collects them in an ${\xDim}$-by-$N$ "ensemble matrix" $\E$.
115 | But `for` loops are slow in plain Python (and Matlab).
116 | Replace it with something akin to `E = mu + L@Z`.
117 | *Hint: this code snippet fails because it's trying to add a vector to a matrix.*
118 |
119 | ```python
120 | mu = np.array([1, 100, 5])
121 | xDim = len(mu)
122 | L = np.diag(1+np.arange(xDim))
123 | C = L @ L.T
124 | Z = rnd.randn(xDim, N)
125 |
126 | # Using a loop ("slow")
127 | E = np.zeros((xDim, N))
128 | for n in range(N):
129 | E[:, n] = mu + L@Z[:, n]
130 | ```
131 |
132 | ```python
133 | # show_answer('Gaussian sampling', 'b')
134 | ```
135 |
136 | The following prints some numbers that can be used to ascertain if you got it right.
137 | Note that the estimates will never be exact:
138 | they contain some amount of random error, a.k.a. ***sampling error***.
139 |
140 | ```python
141 | with np.printoptions(precision=1, suppress=True):
142 | print("Estimated mean =", np.mean(E, axis=1))
143 | print("Estimated cov =", np.cov(E), sep="\n")
144 | ```
145 |
146 | **Exc -- Moment estimation code:** Above, we used numpy's (`np`) functions to compute the sample-estimated mean and covariance matrix,
147 | $\bx$ and $\barC$,
148 | from the ensemble matrix $\E$.
149 | Now, instead, implement these estimators yourself:
150 | $$\begin{align}\bx &\ceq \frac{1}{N} \sum_{n=1}^N \x_n \,, \\
151 | \barC &\ceq \frac{1}{N-1} \sum_{n=1}^N (\x_n - \bx) (\x_n - \bx)^T \,. \end{align}$$
152 |
153 | ```python
154 | # Don't use numpy's mean, cov, but feel free to use a `for` loop.
155 | def estimate_mean_and_cov(E):
156 | xDim, N = E.shape
157 |
158 | ### FIX THIS ###
159 | x_bar = np.zeros(xDim)
160 | C_bar = np.zeros((xDim, xDim))
161 |
162 | return x_bar, C_bar
163 |
164 | x_bar, C_bar = estimate_mean_and_cov(E)
165 | with np.printoptions(precision=1):
166 | print("Mean =", x_bar)
167 | print("Covar =", C_bar, sep="\n")
168 | ```
169 |
170 | ```python
171 | # show_answer('ensemble moments, loop')
172 | ```
173 |
174 | **Exc -- An obsession?:** Why do we normalize by $(N-1)$ for the covariance computation?
175 |
176 | ```python
177 | # show_answer('Why (N-1)')
178 | ```
179 |
180 | It can be shown that the above estimators for the mean and the covariance are *consistent and unbiased*.
181 | ***Consistent*** means that if we let $N \rightarrow \infty$, their sampling error will vanish ("almost surely").
182 | ***Unbiased*** means that if we repeat the estimation experiment many times (but use a fixed, finite $N$),
183 | then the average of sampling errors will also vanish.
184 | Under relatively mild regularity conditions, the [absence of bias implies consistency](https://en.wikipedia.org/wiki/Consistent_estimator#Bias_versus_consistency).
185 |
186 |
187 | The following computes a large number ($K$) of $\barC$ and $1/\barC$, estimated with a given ensemble size ($N$).
188 | Note that the true variance is $C = 1$.
189 | The histograms of the estimates is plotted, along with vertical lines displaying the mean values.
190 |
191 | ```python
192 | K = 10000
193 | @interact(N=(2, 30), bottom=True)
194 | def var_and_precision_estimates(N=4):
195 | E = rnd.randn(K, N)
196 | estims = np.var(E, ddof=1, axis=-1)
197 | bins = np.linspace(0, 6, 40)
198 | plt.figure()
199 | plt.hist(estims, bins, alpha=.6, density=1)
200 | plt.hist(1/estims, bins, alpha=.6, density=1)
201 | plt.axvline(np.mean(estims), color="C0", label="C")
202 | plt.axvline(np.mean(1/estims), color="C1", label="1/C")
203 | plt.legend()
204 | plt.show()
205 | ```
206 |
207 | **Exc -- There's bias, and then there's bias:**
208 | - Note that $1/\barC$ does not appear to be an unbiased estimate of $1/C = 1$.
209 | Explain this by referring to a well-known property of the expectation, $\Expect$.
210 | In view of this, consider the role and utility of "unbiasedness" in estimation.
211 | - What, roughly, is the dependence of the mean values (vertical lines) on the ensemble size?
212 | What do they tend to as $N$ goes to $0$?
213 | What about $+\infty$ ?
214 | - Optional: What are the theoretical distributions of $\barC$ and $1/\barC$ ?
215 |
216 | ```python
217 | # show_answer('variance estimate statistics')
218 | ```
219 |
220 | **Exc (optional) -- Error notions:**
221 | * (a). What's the difference between error and residual?
222 | * (b). What's the difference between error and bias?
223 | * (c). Show that `"mean-square-error" (RMSE^2) = Bias^2 + Var`.
224 | *Hint: Let $e = \hat{\theta} - \theta$ be the random "error" referred to above.
225 | Express each term using the expectation $\Expect$.*
226 |
227 | ```python
228 | # show_answer('errors')
229 | ```
230 |
231 | **Exc -- Vectorization:** Python (numpy) is quicker if you "vectorize" loops (similar to Matlab and other high-level languages).
232 | This is eminently possible with computations of ensemble moments:
233 | Let $\X \ceq
234 | \begin{bmatrix}
235 | \x_1 -\bx, & \ldots & \x_N -\bx
236 | \end{bmatrix} \,.$
237 | * (a). Show that $\X = \E \AN$, where $\ones$ is the column vector of length $N$ with all elements equal to $1$.
238 | *Hint: consider column $n$ of $\X$.*
239 | *PS: it can be shown that $\ones \ones\tr / N$ and its complement is a "projection matrix".*
240 | * (b). Show that $\barC = \X \X^T /(N-1)$.
241 | * (c). Code up this, latest, formula for $\barC$ and insert it in `estimate_mean_and_cov(E)`
242 |
243 | ```python
244 | # show_answer('ensemble moments vectorized')
245 | ```
246 |
247 | **Exc -- Moment estimation code, part 2:** The cross-covariance between two random vectors, $\bx$ and $\by$, is given by
248 | $$\begin{align}
249 | \barC_{\x,\y}
250 | &\ceq \frac{1}{N-1} \sum_{n=1}^N
251 | (\x_n - \bx) (\y_n - \by)^T \\\
252 | &= \X \Y^T /(N-1)
253 | \end{align}$$
254 | where $\Y$ is, similar to $\X$, the matrix whose columns are $\y_n - \by$ for $n=1,\ldots,N$.
255 | Note that this is simply the covariance formula, but for two different variables.
256 | I.e. if $\Y = \X$, then $\barC_{\x,\y} = \barC_{\x}$ (which we have denoted $\barC$ in the above).
257 |
258 | Implement the cross-covariance estimator in the code-cell below.
259 |
260 | ```python
261 | def estimate_cross_cov(Ex, Ey):
262 | Cxy = np.zeros((len(Ex), len(Ey))) ### INSERT ANSWER ###
263 | return Cxy
264 | ```
265 |
266 | ```python
267 | # show_answer('estimate cross')
268 | ```
269 |
270 | ## Summary
271 | Parametric assumptions (e.g. assuming Gaussianity) can be useful in approximating distributions.
272 | Sample covariance estimates can be expressed and computed in a vectorized form.
273 |
274 | ### Next: [T9 - Writing your own EnKF](T9%20-%20Writing%20your%20own%20EnKF.ipynb)
275 |
276 | - - -
277 |
278 | - ###### Footnote 1:
279 |
280 | Essentially its (pseudo) randomness means that it is easy to avoid biases.
281 | For example, the Monte-Carlo approach is particularly useful
282 | when grid-based quadrature is difficult, as is often the case for high-dimensional problems.
283 | A common misconception in DA is that MC is somehow more efficient
284 | than deterministic quadrature in high dimensions, $D$.
285 | The confusion arises because, from Chebyshev inequality, we know that
286 | the error of the MC approximation asymptotically converges to zero at a rate proportional to $1/\sqrt{N}$,
287 | while that of quadrature methods typically converges proportional to $1 / N^{1/D}$.
288 | But not only is the coefficient dependent on $D$ (and worse for MC),
289 | also (conjecture!) for any $D$ and $N$ you can always find a gridding strategy that has lower error.
290 | For example, quasi-random (latin hypercube, etc) are easily recommended
291 | in the pure context of hypercube integrals.
292 | - ###### Footnote 2:
293 |
294 | The derivation of the corresponding density might involve
295 | high-dimensional Jacobians for the change-of-variables formula,
296 | or its generalisation for non-bijective transformations,
297 | or to the Chapman-Kolmogorov equations in the case of interacting random variables,
298 | or its time-continuous form of Fokker-Planck.
299 |
300 |
301 |
302 | ### References
303 |
--------------------------------------------------------------------------------
/notebooks/scripts/T8 - Monte-Carlo & ensembles.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # jupyter:
3 | # jupytext:
4 | # formats: ipynb,scripts//py:light,scripts//md
5 | # text_representation:
6 | # extension: .py
7 | # format_name: light
8 | # format_version: '1.5'
9 | # jupytext_version: 1.17.2
10 | # kernelspec:
11 | # display_name: Python 3 (ipykernel)
12 | # language: python
13 | # name: python3
14 | # ---
15 |
16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
18 |
19 | from resources import show_answer, interact, import_from_nb
20 | # %matplotlib inline
21 | import numpy as np
22 | import matplotlib as mpl
23 | import scipy.stats as ss
24 | import numpy.random as rnd
25 | import matplotlib.pyplot as plt
26 | from scipy.stats import gaussian_kde
27 | plt.ion();
28 |
29 | (pdf_G1, grid1d) = import_from_nb("T2", ("pdf_G1", "grid1d"))
30 |
31 | # # T8 - The ensemble (Monte-Carlo) approach
32 | # **Monte-Carlo methods** are a class of computational algorithms that rely on random/stochastic sampling. They generally trade off higher (though random!) error for lower technical complexity [[1]](#Footnote-1:). Examples from optimisation include randomly choosing search directions, swarms, evolutionary mutations, or perturbations for gradient approximation. Another application area is the computation of (deterministic) integrals via sample averages, which is rooted in the fact that any integral can be formulated as expectations, as well as the law of large numbers (LLN). This is actually a surprisingly large class of problems, including for example a way to [approximate the value of $\pi$](https://en.wikipedia.org/wiki/Monte_Carlo_method#Overview). Moreover, many integrals of interest are inherently expectations, but over probability distributions that are not tractable, as they arise from a complicated random or uncertain process [[2]](#Footnote-2:), whereas a Monte-Carlo sample thereof can be obtained simply by simulating the process.
33 | # $
34 | # \newcommand{\Expect}[0]{\mathbb{E}}
35 | # \newcommand{\NormDist}{\mathscr{N}}
36 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}}
37 | # \newcommand{\bvec}[1]{{\mathbf{#1}}}
38 | # \newcommand{\trsign}{{\mathsf{T}}}
39 | # \newcommand{\tr}{^{\trsign}}
40 | # \newcommand{\ceq}[0]{\mathrel{≔}}
41 | # \newcommand{\xDim}[0]{D}
42 | # \newcommand{\I}[0]{\mat{I}}
43 | # \newcommand{\X}[0]{\mat{X}}
44 | # \newcommand{\Y}[0]{\mat{Y}}
45 | # \newcommand{\E}[0]{\mat{E}}
46 | # \newcommand{\x}[0]{\bvec{x}}
47 | # \newcommand{\y}[0]{\bvec{y}}
48 | # \newcommand{\z}[0]{\bvec{z}}
49 | # \newcommand{\bx}[0]{\bvec{\bar{x}}}
50 | # \newcommand{\by}[0]{\bvec{\bar{y}}}
51 | # \newcommand{\barC}[0]{\mat{\bar{C}}}
52 | # \newcommand{\ones}[0]{\bvec{1}}
53 | # \newcommand{\AN}[0]{\big( \I_N - \ones \ones\tr / N \big)}
54 | # $
55 | #
56 | # **An ensemble** is an *i.i.d.* sample. I.e. a set of "members" ("particles", "realizations", or "sample points") that have been drawn ("sampled") independently from the same distribution. With the EnKF, these assumptions are generally tenuous, but pragmatic.
57 | # In particular, an ensemble can be used to characterize uncertainty: either by using it to compute (estimate) *statistics* thereof, such as the mean, median, variance, covariance, skewness, confidence intervals, etc (any function of the ensemble can be seen as a "statistic"), or by using it to reconstruct the distribution/density from which it is sampled. The latter is illustrated by the plot below.
58 | #
59 | # Take a moment to digest its code. Note:
60 | #
61 | # - The sample/ensemble is plotted as thin narrow lines.
62 | # Note that it is generated via `randn`, which samples from $\NormDist(0, 1)$.
63 | # - The "Parametric" density estimate is defined by estimating the mean and the variance,
64 | # and using those estimates to define a Gaussian density (with those parameters).
65 | # - We will not detail the KDE method, but it can be considered as a "continuous" version of a histogram.
66 |
67 | # +
68 | mu = 0
69 | sigma2 = 25
70 | N = 80
71 |
72 | @interact( seed=(1, 10), nbins=(2, 60), bw=(0.1, 1))
73 | def pdf_reconstructions(seed=5, nbins=10, bw=.3):
74 | rnd.seed(seed)
75 | E = mu + np.sqrt(sigma2)*rnd.randn(N)
76 |
77 | fig, ax = plt.subplots()
78 | ax.plot(grid1d, pdf_G1(grid1d, mu, sigma2), lw=5, label="True")
79 | ax.plot(E, np.zeros(N), '|k', ms=100, mew=.4, label="_raw ens")
80 | ax.hist(E, nbins, density=1, alpha=.7, color="C5", label="Histogram")
81 | ax.plot(grid1d, pdf_G1(grid1d, np.mean(E), np.var(E)), lw=5, label="Parametric")
82 | ax.plot(grid1d, gaussian_kde(E.ravel(), bw**2).evaluate(grid1d), lw=5, label="KDE")
83 | ax.set_ylim(top=(3*sigma2)**-.5)
84 | ax.legend()
85 | plt.show()
86 |
87 |
88 | # -
89 |
90 | # **Exc -- A matter of taste?:**
91 | # - Which approximation to the true pdf looks better?
92 | # - Which approximation starts with more information?
93 | # What is the downside of making such assumptions?
94 | # - What value of `bw` causes the "KDE" method to most closely
95 | # reproduce/recover the "Parametric" method?
96 | # What about the "Histogram" method?
97 | # *PS: we might say that the KDE method "bridges" the other two.*.
98 |
99 | # The widget above illustrated how to estimate or reconstruct a distribution on the basis of a sample. But for the EnKF, we also need to know how to go the other way: drawing a sample from a (multivariate) Gaussian distribution...
100 | #
101 | # **Exc -- Multivariate Gaussian sampling:**
102 | # Suppose $\z$ is a standard Gaussian,
103 | # i.e. $p(\z) = \NormDist(\z \mid \bvec{0},\I_{\xDim})$,
104 | # where $\I_{\xDim}$ is the $\xDim$-dimensional identity matrix.
105 | # Let $\x = \mat{L}\z + \mu$.
106 | #
107 | # * (a -- optional). Refer to the exercise on [change of variables](T2%20-%20Gaussian%20distribution.ipynb#Exc-(optional)----Change-of-variables) to show that $p(\x) = \NormDist(\x \mid \mu, \mat{C})$, where $\mat{C} = \mat{L}^{}\mat{L}^T$.
108 | # * (b). The code below samples $N = 100$ realizations of $\x$
109 | # and collects them in an ${\xDim}$-by-$N$ "ensemble matrix" $\E$.
110 | # But `for` loops are slow in plain Python (and Matlab).
111 | # Replace it with something akin to `E = mu + L@Z`.
112 | # *Hint: this code snippet fails because it's trying to add a vector to a matrix.*
113 |
114 | # +
115 | mu = np.array([1, 100, 5])
116 | xDim = len(mu)
117 | L = np.diag(1+np.arange(xDim))
118 | C = L @ L.T
119 | Z = rnd.randn(xDim, N)
120 |
121 | # Using a loop ("slow")
122 | E = np.zeros((xDim, N))
123 | for n in range(N):
124 | E[:, n] = mu + L@Z[:, n]
125 |
126 | # +
127 | # show_answer('Gaussian sampling', 'b')
128 | # -
129 |
130 | # The following prints some numbers that can be used to ascertain if you got it right.
131 | # Note that the estimates will never be exact:
132 | # they contain some amount of random error, a.k.a. ***sampling error***.
133 |
134 | with np.printoptions(precision=1, suppress=True):
135 | print("Estimated mean =", np.mean(E, axis=1))
136 | print("Estimated cov =", np.cov(E), sep="\n")
137 |
138 |
139 | # **Exc -- Moment estimation code:** Above, we used numpy's (`np`) functions to compute the sample-estimated mean and covariance matrix,
140 | # $\bx$ and $\barC$,
141 | # from the ensemble matrix $\E$.
142 | # Now, instead, implement these estimators yourself:
143 | # $$\begin{align}\bx &\ceq \frac{1}{N} \sum_{n=1}^N \x_n \,, \\
144 | # \barC &\ceq \frac{1}{N-1} \sum_{n=1}^N (\x_n - \bx) (\x_n - \bx)^T \,. \end{align}$$
145 |
146 | # +
147 | # Don't use numpy's mean, cov, but feel free to use a `for` loop.
148 | def estimate_mean_and_cov(E):
149 | xDim, N = E.shape
150 |
151 | ### FIX THIS ###
152 | x_bar = np.zeros(xDim)
153 | C_bar = np.zeros((xDim, xDim))
154 |
155 | return x_bar, C_bar
156 |
157 | x_bar, C_bar = estimate_mean_and_cov(E)
158 | with np.printoptions(precision=1):
159 | print("Mean =", x_bar)
160 | print("Covar =", C_bar, sep="\n")
161 |
162 | # +
163 | # show_answer('ensemble moments, loop')
164 | # -
165 |
166 | # **Exc -- An obsession?:** Why do we normalize by $(N-1)$ for the covariance computation?
167 |
168 | # +
169 | # show_answer('Why (N-1)')
170 | # -
171 |
172 | # It can be shown that the above estimators for the mean and the covariance are *consistent and unbiased*.
173 | # ***Consistent*** means that if we let $N \rightarrow \infty$, their sampling error will vanish ("almost surely").
174 | # ***Unbiased*** means that if we repeat the estimation experiment many times (but use a fixed, finite $N$),
175 | # then the average of sampling errors will also vanish.
176 | # Under relatively mild regularity conditions, the [absence of bias implies consistency](https://en.wikipedia.org/wiki/Consistent_estimator#Bias_versus_consistency).
177 |
178 | # The following computes a large number ($K$) of $\barC$ and $1/\barC$, estimated with a given ensemble size ($N$).
179 | # Note that the true variance is $C = 1$.
180 | # The histograms of the estimates is plotted, along with vertical lines displaying the mean values.
181 |
182 | K = 10000
183 | @interact(N=(2, 30), bottom=True)
184 | def var_and_precision_estimates(N=4):
185 | E = rnd.randn(K, N)
186 | estims = np.var(E, ddof=1, axis=-1)
187 | bins = np.linspace(0, 6, 40)
188 | plt.figure()
189 | plt.hist(estims, bins, alpha=.6, density=1)
190 | plt.hist(1/estims, bins, alpha=.6, density=1)
191 | plt.axvline(np.mean(estims), color="C0", label="C")
192 | plt.axvline(np.mean(1/estims), color="C1", label="1/C")
193 | plt.legend()
194 | plt.show()
195 |
196 |
197 | # **Exc -- There's bias, and then there's bias:**
198 | # - Note that $1/\barC$ does not appear to be an unbiased estimate of $1/C = 1$.
199 | # Explain this by referring to a well-known property of the expectation, $\Expect$.
200 | # In view of this, consider the role and utility of "unbiasedness" in estimation.
201 | # - What, roughly, is the dependence of the mean values (vertical lines) on the ensemble size?
202 | # What do they tend to as $N$ goes to $0$?
203 | # What about $+\infty$ ?
204 | # - Optional: What are the theoretical distributions of $\barC$ and $1/\barC$ ?
205 |
206 | # +
207 | # show_answer('variance estimate statistics')
208 | # -
209 |
210 | # **Exc (optional) -- Error notions:**
211 | # * (a). What's the difference between error and residual?
212 | # * (b). What's the difference between error and bias?
213 | # * (c). Show that `"mean-square-error" (RMSE^2) = Bias^2 + Var`.
214 | # *Hint: Let $e = \hat{\theta} - \theta$ be the random "error" referred to above.
215 | # Express each term using the expectation $\Expect$.*
216 |
217 | # +
218 | # show_answer('errors')
219 | # -
220 |
221 | # **Exc -- Vectorization:** Python (numpy) is quicker if you "vectorize" loops (similar to Matlab and other high-level languages).
222 | # This is eminently possible with computations of ensemble moments:
223 | # Let $\X \ceq
224 | # \begin{bmatrix}
225 | # \x_1 -\bx, & \ldots & \x_N -\bx
226 | # \end{bmatrix} \,.$
227 | # * (a). Show that $\X = \E \AN$, where $\ones$ is the column vector of length $N$ with all elements equal to $1$.
228 | # *Hint: consider column $n$ of $\X$.*
229 | # *PS: it can be shown that $\ones \ones\tr / N$ and its complement is a "projection matrix".*
230 | # * (b). Show that $\barC = \X \X^T /(N-1)$.
231 | # * (c). Code up this, latest, formula for $\barC$ and insert it in `estimate_mean_and_cov(E)`
232 |
233 | # +
234 | # show_answer('ensemble moments vectorized')
235 | # -
236 |
237 | # **Exc -- Moment estimation code, part 2:** The cross-covariance between two random vectors, $\bx$ and $\by$, is given by
238 | # $$\begin{align}
239 | # \barC_{\x,\y}
240 | # &\ceq \frac{1}{N-1} \sum_{n=1}^N
241 | # (\x_n - \bx) (\y_n - \by)^T \\\
242 | # &= \X \Y^T /(N-1)
243 | # \end{align}$$
244 | # where $\Y$ is, similar to $\X$, the matrix whose columns are $\y_n - \by$ for $n=1,\ldots,N$.
245 | # Note that this is simply the covariance formula, but for two different variables.
246 | # I.e. if $\Y = \X$, then $\barC_{\x,\y} = \barC_{\x}$ (which we have denoted $\barC$ in the above).
247 | #
248 | # Implement the cross-covariance estimator in the code-cell below.
249 |
250 | def estimate_cross_cov(Ex, Ey):
251 | Cxy = np.zeros((len(Ex), len(Ey))) ### INSERT ANSWER ###
252 | return Cxy
253 |
254 | # +
255 | # show_answer('estimate cross')
256 | # -
257 |
258 | # ## Summary
259 | # Parametric assumptions (e.g. assuming Gaussianity) can be useful in approximating distributions.
260 | # Sample covariance estimates can be expressed and computed in a vectorized form.
261 | #
262 | # ### Next: [T9 - Writing your own EnKF](T9%20-%20Writing%20your%20own%20EnKF.ipynb)
263 | #
264 | # - - -
265 | #
266 | # - ###### Footnote 1:
267 | #
268 | # Essentially its (pseudo) randomness means that it is easy to avoid biases.
269 | # For example, the Monte-Carlo approach is particularly useful
270 | # when grid-based quadrature is difficult, as is often the case for high-dimensional problems.
271 | # A common misconception in DA is that MC is somehow more efficient
272 | # than deterministic quadrature in high dimensions, $D$.
273 | # The confusion arises because, from Chebyshev inequality, we know that
274 | # the error of the MC approximation asymptotically converges to zero at a rate proportional to $1/\sqrt{N}$,
275 | # while that of quadrature methods typically converges proportional to $1 / N^{1/D}$.
276 | # But not only is the coefficient dependent on $D$ (and worse for MC),
277 | # also (conjecture!) for any $D$ and $N$ you can always find a gridding strategy that has lower error.
278 | # For example, quasi-random (latin hypercube, etc) are easily recommended
279 | # in the pure context of hypercube integrals.
280 | # - ###### Footnote 2:
281 | #
282 | # The derivation of the corresponding density might involve
283 | # high-dimensional Jacobians for the change-of-variables formula,
284 | # or its generalisation for non-bijective transformations,
285 | # or to the Chapman-Kolmogorov equations in the case of interacting random variables,
286 | # or its time-continuous form of Fokker-Planck.
287 | #
288 | #
289 | #
290 | # ### References
291 |
--------------------------------------------------------------------------------
/notebooks/scripts/T9 - Writing your own EnKF.md:
--------------------------------------------------------------------------------
1 | ---
2 | jupyter:
3 | jupytext:
4 | formats: ipynb,scripts//py:light,scripts//md
5 | text_representation:
6 | extension: .md
7 | format_name: markdown
8 | format_version: '1.3'
9 | jupytext_version: 1.17.2
10 | kernelspec:
11 | display_name: Python 3 (ipykernel)
12 | language: python
13 | name: python3
14 | ---
15 |
16 | ```python
17 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
18 | !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
19 | ```
20 |
21 | ```python
22 | from resources import show_answer, EnKF_animation
23 | import numpy as np
24 | import matplotlib as mpl
25 | import numpy.random as rnd
26 | import matplotlib.pyplot as plt
27 | from tqdm.auto import tqdm
28 | plt.ion();
29 | ```
30 |
31 | # T9 - Writing your own EnKF
32 | In this tutorial we're going to code an EnKF implementation using numpy.
33 | As with the KF, the EnKF consists of the recursive application of
34 | a forecast step and an analysis step.
35 | $
36 | \newcommand{\Reals}{\mathbb{R}}
37 | \newcommand{\Expect}[0]{\mathbb{E}}
38 | \newcommand{\NormDist}{\mathscr{N}}
39 | \newcommand{\DynMod}[0]{\mathscr{M}}
40 | \newcommand{\ObsMod}[0]{\mathscr{H}}
41 | \newcommand{\mat}[1]{{\mathbf{{#1}}}}
42 | \newcommand{\bvec}[1]{{\mathbf{#1}}}
43 | \newcommand{\trsign}{{\mathsf{T}}}
44 | \newcommand{\tr}{^{\trsign}}
45 | \newcommand{\ceq}[0]{\mathrel{≔}}
46 | \newcommand{\xDim}[0]{D}
47 | \newcommand{\supa}[0]{^\text{a}}
48 | \newcommand{\supf}[0]{^\text{f}}
49 | \newcommand{\I}[0]{\mat{I}}
50 | \newcommand{\K}[0]{\mat{K}}
51 | \newcommand{\bP}[0]{\mat{P}}
52 | \newcommand{\bH}[0]{\mat{H}}
53 | \newcommand{\R}[0]{\mat{R}}
54 | \newcommand{\Q}[0]{\mat{Q}}
55 | \newcommand{\Ri}[0]{\R^{-1}}
56 | \newcommand{\X}[0]{\mat{X}}
57 | \newcommand{\Y}[0]{\mat{Y}}
58 | \newcommand{\E}[0]{\mat{E}}
59 | \newcommand{\x}[0]{\bvec{x}}
60 | \newcommand{\y}[0]{\bvec{y}}
61 | \newcommand{\q}[0]{\bvec{q}}
62 | \newcommand{\r}[0]{\bvec{r}}
63 | \newcommand{\bx}[0]{\bvec{\bar{x}}}
64 | \newcommand{\by}[0]{\bvec{\bar{y}}}
65 | \newcommand{\barP}[0]{\mat{\bar{P}}}
66 | \newcommand{\barK}[0]{\mat{\bar{K}}}
67 | \newcommand{\D}[0]{\mat{D}}
68 | \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}}
69 | \newcommand{\ones}[0]{\bvec{1}}
70 | $
71 |
72 |
73 | This presentation follows the traditional template, presenting the EnKF as the "the Monte Carlo version of the KF
74 | where the state covariance is estimated by the ensemble covariance".
75 | It is not obvious that this postulated method should work;
76 | indeed, it is only justified upon inspection of its properties,
77 | deferred to later.
78 |
79 |
80 | NB:
81 | Since we're going to focus on a single filtering cycle (at a time),
82 | the subscript $k$ is dropped. Moreover,
83 | The superscript $f$ indicates that $\{\x_n\supf\}_{n=1..N}$ is the forecast (prior) ensemble.
84 | The superscript $a$ indicates that $\{\x_n\supa\}_{n=1..N}$ is the analysis (posterior) ensemble.
85 |
86 |
87 | ### The forecast step
88 | Suppose $\{\x_n\supa\}_{n=1..N}$ is an iid. sample from $p(\x_{k-1} \mid \y_1,\ldots, \y_{k-1})$, which may or may not be Gaussian.
89 |
90 | The forecast step of the EnKF consists of a Monte Carlo simulation
91 | of the forecast dynamics for each $\x_n$:
92 | $$
93 | \forall n, \quad \x\supf_n = \DynMod(\x_n\supa) + \q_n \,, \\
94 | $$
95 | where $\{\q_n\}_{n=1..N}$ are sampled iid. from $\NormDist(\bvec{0},\Q)$,
96 | or whatever noise model is assumed,
97 | and $\DynMod$ is the model dynamics.
98 | The dynamics could consist of *any* function, i.e. the EnKF can be applied with nonlinear models.
99 |
100 | The ensemble, $\{\x_n\supf\}_{n=1..N}$, is then an iid. sample from the forecast pdf,
101 | $p(\x_k \mid \y_1,\ldots,\y_{k-1})$. This follows from the definition of the latter, so it is a relatively trivial idea and way to obtain this pdf. However, before Monte-Carlo methods were computationally feasible, the computation of the forecast pdf required computing the [Chapman-Kolmogorov equation](https://en.wikipedia.org/wiki/Chapman%E2%80%93Kolmogorov_equation), which constituted a major hurdle for filtering methods.
102 |
103 | ### The analysis update step
104 | of the ensemble is given by:
105 | $$\begin{align}
106 | \forall n, \quad \x\supa_n &= \x_n\supf + \barK \left\{\y - \r_n - \ObsMod(\x_n\supf) \right\}
107 | \,, \\
108 | \text{or,}\quad
109 | \E\supa &= \E\supf + \barK \left\{\y\ones\tr - \Dobs - \ObsMod(\E\supf) \right\} \,,
110 | \tag{4}
111 | \end{align}
112 | $$
113 | where the "observation perturbations", $\r_n$, are sampled iid. from the observation noise model, e.g. $\NormDist(\bvec{0},\R)$,
114 | and form the columns of $\Dobs$,
115 | and the observation operator (again, any type of function), $\ObsMod$, is applied column-wise to $\E\supf$.
116 |
117 | The gain $\barK$ is defined by inserting the ensemble estimates for
118 | * (i) $\bP\supf \bH\tr$: the cross-covariance between $\x\supf$ and $\ObsMod(\x\supf)$, and
119 | * (ii) $\bH \bP\supf \bH\tr$: the covariance matrix of $\ObsMod(\x\supf)$,
120 |
121 | in the formula for $\K$, namely eqn. (K1) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb).
122 | Using the estimators from [T8](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) yields
123 |
124 | $$\begin{align}
125 | \barK &= \X \Y\tr ( \Y \Y\tr + (N{-}1) \R )^{-1} \,, \tag{5a}
126 | \end{align}
127 | $$
128 |
129 | where $\Y \in \Reals^{P \times N}$
130 | is the centered, *observed* ensemble
131 | $\Y \ceq
132 | \begin{bmatrix}
133 | \y_1 -\by, & \ldots & \y_n -\by, & \ldots & \y_N -\by
134 | \end{bmatrix} \,,$ where $\y_n = \ObsMod(\x_n\supf)$.
135 |
136 | The EnKF is summarized in the animation below.
137 |
138 | ```python
139 | EnKF_animation()
140 | ```
141 |
142 | #### Exc -- Woodbury for the ensemble subspace
143 | (a) Use the Woodbury identity (C2) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb) to show that eqn. (5a) can also be written
144 | $$\begin{align}
145 | \barK &= \X ( \Y\tr \Ri \Y + (N{-}1)\I_N )^{-1} \Y\tr \Ri \,. \tag{5b}
146 | \end{align}
147 | $$
148 | (b) What is the potential benefit of (5b) vs. (5a) ?
149 |
150 |
151 | #### Exc -- KG workings
152 | The above animation assumed that the observation operator is just the identity matrix, $\I$, rather than a general observation operator, $\ObsMod()$. Meanwhile, the Kalman gain used by the EnKF, eqn. (5a), is applicable for any $\ObsMod()$. On the other hand, the formula (5a) consists solely of linear algebra. Therefore it cannot perfectly represent any general (nonlinear) $\ObsMod()$. So how does it actually treat the observation operator? What meaning can we assign to the resulting updates?
153 | *Hint*: consider the limit of $\R \rightarrow 0$.
154 |
155 |
156 | #### Exc -- EnKF nobias (a)
157 | Consider the ensemble averages,
158 | - $\bx\supa = \frac{1}{N}\sum_{n=1}^N \x\supa_n$, and
159 | - $\bx\supf = \frac{1}{N}\sum_{n=1}^N \x\supf_n$,
160 |
161 | and recall that the analysis step, eqn. (4), defines $\x\supa_n$ from $\x\supf_n$.
162 |
163 |
164 | (a) Show that, in case $\ObsMod$ is linear (the matrix $\bH$),
165 | $$\begin{align}
166 | \Expect \bx\supa &= \bx\supf + \barK \left\{\y\ones\tr - \bH\bx\supf \right\} \,, \tag{6}
167 | \end{align}
168 | $$
169 | where the expectation, $\Expect$, is taken with respect to $\Dobs$ only (i.e. not the sampling of the forecast ensemble, $\E\supf$ itself).
170 |
171 | What does this mean?
172 |
173 |
174 | ```python
175 | # show_answer("EnKF_nobias_a")
176 | ```
177 |
178 | #### Exc (optional) -- EnKF nobias (b)
179 | Consider the ensemble covariance matrices:
180 | $$\begin{align}
181 | \barP\supf &= \frac{1}{N-1} \X{\X}\tr \,, \tag{7a} \\\
182 | \barP\supa &= \frac{1}{N-1} \X\supa{\X\supa}\tr \,. \tag{7b}
183 | \end{align}$$
184 |
185 | Now, denote the centralized observation perturbations
186 | $\D \ceq
187 | \begin{bmatrix}
188 | \r_1 -\bar{\r}, & \ldots & \r_n -\bar{\r}, & \ldots & \r_N -\bar{\r}
189 | \end{bmatrix} $.
190 | Note that $\D \ones = \bvec{0}$ and that
191 | $$
192 | \begin{align}
193 | \label{eqn:R_sample_cov_of_D}
194 | \frac{1}{N-1} \D \D\tr &= \R \,, \tag{9a} \\\
195 | \label{eqn:zero_AD_cov}
196 | \X \D\tr &= \bvec{0} \tag{9b}
197 | \end{align}
198 | $$
199 | is satisfied in the expected sense, i.e. by taking the expectation on the left-hand side.
200 | Thereby, show that
201 |
202 | $$\begin{align}
203 | \Expect \, \barP\supa &= [\I_{\xDim} - \barK \bH]\barP\supf \, . \tag{10}
204 | \end{align}$$
205 |
206 | ```python
207 | # show_answer("EnKF_nobias_b")
208 | ```
209 |
210 | #### Exc (optional) -- EnKF bias (c)
211 | Show that, if no observation perturbations are used in eqn. (4), then $\barP\supa$ would be too small.
212 |
213 | ```python
214 | # show_answer("EnKF_without_perturbations")
215 | ```
216 |
217 | ## Experimental setup
218 |
219 | Before making the EnKF, we'll set up an experiment to test it with, so that you can check if you've implemented a working method or not.
220 |
221 | To that end, we'll use the Lorenz-63 model, from [T7](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb). The coupled ODEs are recalled here, but with some of the parameters fixed.
222 |
223 | ```python
224 | xDim = 3
225 |
226 | def dxdt(x, sig=10, rho=28, beta=8/3):
227 | x,y,z = x
228 | d = np.zeros(3)
229 | d[0] = sig*(y - x)
230 | d[1] = rho*x - y - x*z
231 | d[2] = x*y - beta*z
232 | return d
233 | ```
234 |
235 | Next, we make the forecast model $\DynMod$ out of $\frac{d \x}{dt}$ such that $\x(t+dt) = \DynMod(\x(t),t,dt)$. We'll make use of the "4th order Runge-Kutta" integrator `rk4`.
236 |
237 | ```python
238 | from dapper.mods.integration import rk4
239 |
240 | def Dyn(E, t0, dt):
241 |
242 | def step(x0):
243 | return rk4(lambda x, t: dxdt(x), x0, t0, dt)
244 |
245 | if E.ndim == 1:
246 | # Truth (single state vector) case
247 | E = step(E)
248 | else:
249 | # Ensemble case
250 | for n in range(E.shape[1]):
251 | E[:, n] = step(E[:, n])
252 |
253 | return E
254 |
255 | Q12 = np.zeros((xDim, xDim))
256 | Q = Q12 @ Q12.T
257 | ```
258 |
259 | Notice the loop over each ensemble member. For better performance, this should be vectorized, if possible. Or, if the forecast model is computationally demanding (as is typically the case in real applications), the loop should be parallelized: i.e. the forecast simulations should be distributed to separate computers.
260 |
261 |
262 | The following are the time settings that we will use
263 |
264 | ```python
265 | dt = 0.01 # integrational time step
266 | dko = 25 # number of steps between observations
267 | dto = dko*dt # time between observations
268 | Ko = 60 # total number of observations
269 | nTime = dko*(Ko+1) # total number of time steps
270 | ```
271 |
272 | Initial conditions
273 |
274 | ```python
275 | xa = np.array([1.509, -1.531, 25.46])
276 | Pa12 = np.eye(3)
277 | ```
278 |
279 | Observation model settings
280 |
281 | ```python
282 | p = 3 # ndim obs
283 | def Obs(E, t):
284 | return E[:p] if E.ndim == 1 else E[:p, :]
285 |
286 | R12 = np.sqrt(2)*np.eye(p)
287 | R = R12 @ R12.T
288 | ```
289 |
290 | Generate synthetic truth and observations
291 |
292 | ```python
293 | # Init
294 | truths = np.zeros((nTime+1, xDim))
295 | obsrvs = np.zeros((Ko+1, p))
296 | truths[0] = xa + Pa12 @ rnd.randn(xDim)
297 | ```
298 |
299 | ```python
300 | # Loop
301 | for k in range(1, nTime+1):
302 | truths[k] = Dyn(truths[k-1], (k-1)*dt, dt)
303 | truths[k] += Q12 @ rnd.randn(xDim)
304 | if k % dko == 0:
305 | Ko = k//dko-1
306 | obsrvs[Ko] = Obs(truths[k], np.nan) + R12 @ rnd.randn(p)
307 | ```
308 |
309 | ## EnKF implementation
310 |
311 |
312 | We will make use of `estimate_mean_and_cov` and `estimate_cross_cov` from the previous section. Paste them in below.
313 |
314 | ```python
315 | # def estimate_mean_and_cov ...
316 | ```
317 |
318 | **Exc -- EnKF implementation:** Complete the code below
319 |
320 | ```python
321 | # Useful linear algebra: compute B/A
322 | import numpy.linalg as nla
323 |
324 | ens_means = np.zeros((nTime+1, xDim))
325 | ens_vrncs = np.zeros((nTime+1, xDim))
326 |
327 | def my_EnKF(N):
328 | """My implementation of the EnKF."""
329 | ### Init ###
330 | E = np.zeros((xDim, N))
331 | for k in tqdm(range(1, nTime+1)):
332 | t = k*dt
333 | ### Forecast ##
334 | # E = ... # use model
335 | # E = ... # add noise
336 | if k % dko == 0:
337 | ### Analysis ##
338 | y = obsrvs[[k//dko-1]].T # current observation
339 | Eo = Obs(E, t) # observed ensemble
340 | # Compute ensemble moments
341 | PH = ...
342 | HPH = ...
343 | # Compute Kalman Gain
344 | KG = ...
345 | # Generate perturbations
346 | Perturb = ...
347 | # Update ensemble with KG
348 | # E = ...
349 | # Save statistics
350 | ens_means[k] = np.mean(E, axis=1)
351 | ens_vrncs[k] = np.var(E, axis=1, ddof=1)
352 | ```
353 |
354 | Notice that we only store some stats (`ens_means`). This is because in large systems,
355 | keeping the entire ensemble (or its covariance) in memory is probably too much.
356 |
357 | ```python
358 | # show_answer('EnKF v1')
359 | ```
360 |
361 | Now let's try out its capabilities
362 |
363 | ```python
364 | # Run assimilation
365 | my_EnKF(10)
366 |
367 | # Plot
368 | fig, axs = plt.subplots(nrows=3, sharex=True)
369 | for i in range(3):
370 | axs[i].plot(dt*np.arange(nTime+1), truths [:, i], 'k', label="Truth")
371 | axs[i].plot(dt*np.arange(nTime+1), ens_means[:, i], 'b', label="Estimate")
372 | if i
423 |
424 | ### References
425 |
--------------------------------------------------------------------------------
/notebooks/scripts/T9 - Writing your own EnKF.py:
--------------------------------------------------------------------------------
1 | # ---
2 | # jupyter:
3 | # jupytext:
4 | # formats: ipynb,scripts//py:light,scripts//md
5 | # text_representation:
6 | # extension: .py
7 | # format_name: light
8 | # format_version: '1.5'
9 | # jupytext_version: 1.17.2
10 | # kernelspec:
11 | # display_name: Python 3 (ipykernel)
12 | # language: python
13 | # name: python3
14 | # ---
15 |
16 | remote = "https://raw.githubusercontent.com/nansencenter/DA-tutorials"
17 | # !wget -qO- {remote}/master/notebooks/resources/colab_bootstrap.sh | bash -s
18 |
19 | from resources import show_answer, EnKF_animation
20 | import numpy as np
21 | import matplotlib as mpl
22 | import numpy.random as rnd
23 | import matplotlib.pyplot as plt
24 | from tqdm.auto import tqdm
25 | plt.ion();
26 |
27 | # # T9 - Writing your own EnKF
28 | # In this tutorial we're going to code an EnKF implementation using numpy.
29 | # As with the KF, the EnKF consists of the recursive application of
30 | # a forecast step and an analysis step.
31 | # $
32 | # \newcommand{\Reals}{\mathbb{R}}
33 | # \newcommand{\Expect}[0]{\mathbb{E}}
34 | # \newcommand{\NormDist}{\mathscr{N}}
35 | # \newcommand{\DynMod}[0]{\mathscr{M}}
36 | # \newcommand{\ObsMod}[0]{\mathscr{H}}
37 | # \newcommand{\mat}[1]{{\mathbf{{#1}}}}
38 | # \newcommand{\bvec}[1]{{\mathbf{#1}}}
39 | # \newcommand{\trsign}{{\mathsf{T}}}
40 | # \newcommand{\tr}{^{\trsign}}
41 | # \newcommand{\ceq}[0]{\mathrel{≔}}
42 | # \newcommand{\xDim}[0]{D}
43 | # \newcommand{\supa}[0]{^\text{a}}
44 | # \newcommand{\supf}[0]{^\text{f}}
45 | # \newcommand{\I}[0]{\mat{I}}
46 | # \newcommand{\K}[0]{\mat{K}}
47 | # \newcommand{\bP}[0]{\mat{P}}
48 | # \newcommand{\bH}[0]{\mat{H}}
49 | # \newcommand{\R}[0]{\mat{R}}
50 | # \newcommand{\Q}[0]{\mat{Q}}
51 | # \newcommand{\Ri}[0]{\R^{-1}}
52 | # \newcommand{\X}[0]{\mat{X}}
53 | # \newcommand{\Y}[0]{\mat{Y}}
54 | # \newcommand{\E}[0]{\mat{E}}
55 | # \newcommand{\x}[0]{\bvec{x}}
56 | # \newcommand{\y}[0]{\bvec{y}}
57 | # \newcommand{\q}[0]{\bvec{q}}
58 | # \newcommand{\r}[0]{\bvec{r}}
59 | # \newcommand{\bx}[0]{\bvec{\bar{x}}}
60 | # \newcommand{\by}[0]{\bvec{\bar{y}}}
61 | # \newcommand{\barP}[0]{\mat{\bar{P}}}
62 | # \newcommand{\barK}[0]{\mat{\bar{K}}}
63 | # \newcommand{\D}[0]{\mat{D}}
64 | # \newcommand{\Dobs}[0]{\mat{D}_{\text{obs}}}
65 | # \newcommand{\ones}[0]{\bvec{1}}
66 | # $
67 |
68 | # This presentation follows the traditional template, presenting the EnKF as the "the Monte Carlo version of the KF
69 | # where the state covariance is estimated by the ensemble covariance".
70 | # It is not obvious that this postulated method should work;
71 | # indeed, it is only justified upon inspection of its properties,
72 | # deferred to later.
73 | #
74 | #
75 | # NB:
76 | # Since we're going to focus on a single filtering cycle (at a time),
77 | # the subscript $k$ is dropped. Moreover,
78 | # The superscript $f$ indicates that $\{\x_n\supf\}_{n=1..N}$ is the forecast (prior) ensemble.
79 | # The superscript $a$ indicates that $\{\x_n\supa\}_{n=1..N}$ is the analysis (posterior) ensemble.
80 | #
81 | #
82 | # ### The forecast step
83 | # Suppose $\{\x_n\supa\}_{n=1..N}$ is an iid. sample from $p(\x_{k-1} \mid \y_1,\ldots, \y_{k-1})$, which may or may not be Gaussian.
84 | #
85 | # The forecast step of the EnKF consists of a Monte Carlo simulation
86 | # of the forecast dynamics for each $\x_n$:
87 | # $$
88 | # \forall n, \quad \x\supf_n = \DynMod(\x_n\supa) + \q_n \,, \\
89 | # $$
90 | # where $\{\q_n\}_{n=1..N}$ are sampled iid. from $\NormDist(\bvec{0},\Q)$,
91 | # or whatever noise model is assumed,
92 | # and $\DynMod$ is the model dynamics.
93 | # The dynamics could consist of *any* function, i.e. the EnKF can be applied with nonlinear models.
94 | #
95 | # The ensemble, $\{\x_n\supf\}_{n=1..N}$, is then an iid. sample from the forecast pdf,
96 | # $p(\x_k \mid \y_1,\ldots,\y_{k-1})$. This follows from the definition of the latter, so it is a relatively trivial idea and way to obtain this pdf. However, before Monte-Carlo methods were computationally feasible, the computation of the forecast pdf required computing the [Chapman-Kolmogorov equation](https://en.wikipedia.org/wiki/Chapman%E2%80%93Kolmogorov_equation), which constituted a major hurdle for filtering methods.
97 | #
98 | # ### The analysis update step
99 | # of the ensemble is given by:
100 | # $$\begin{align}
101 | # \forall n, \quad \x\supa_n &= \x_n\supf + \barK \left\{\y - \r_n - \ObsMod(\x_n\supf) \right\}
102 | # \,, \\
103 | # \text{or,}\quad
104 | # \E\supa &= \E\supf + \barK \left\{\y\ones\tr - \Dobs - \ObsMod(\E\supf) \right\} \,,
105 | # \tag{4}
106 | # \end{align}
107 | # $$
108 | # where the "observation perturbations", $\r_n$, are sampled iid. from the observation noise model, e.g. $\NormDist(\bvec{0},\R)$,
109 | # and form the columns of $\Dobs$,
110 | # and the observation operator (again, any type of function), $\ObsMod$, is applied column-wise to $\E\supf$.
111 | #
112 | # The gain $\barK$ is defined by inserting the ensemble estimates for
113 | # * (i) $\bP\supf \bH\tr$: the cross-covariance between $\x\supf$ and $\ObsMod(\x\supf)$, and
114 | # * (ii) $\bH \bP\supf \bH\tr$: the covariance matrix of $\ObsMod(\x\supf)$,
115 | #
116 | # in the formula for $\K$, namely eqn. (K1) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb).
117 | # Using the estimators from [T8](T8%20-%20Monte-Carlo%20%26%20ensembles.ipynb) yields
118 | #
119 | # $$\begin{align}
120 | # \barK &= \X \Y\tr ( \Y \Y\tr + (N{-}1) \R )^{-1} \,, \tag{5a}
121 | # \end{align}
122 | # $$
123 | #
124 | # where $\Y \in \Reals^{P \times N}$
125 | # is the centered, *observed* ensemble
126 | # $\Y \ceq
127 | # \begin{bmatrix}
128 | # \y_1 -\by, & \ldots & \y_n -\by, & \ldots & \y_N -\by
129 | # \end{bmatrix} \,,$ where $\y_n = \ObsMod(\x_n\supf)$.
130 | #
131 | # The EnKF is summarized in the animation below.
132 |
133 | EnKF_animation()
134 |
135 | # #### Exc -- Woodbury for the ensemble subspace
136 | # (a) Use the Woodbury identity (C2) of [T5](T5%20-%20Multivariate%20Kalman%20filter.ipynb) to show that eqn. (5a) can also be written
137 | # $$\begin{align}
138 | # \barK &= \X ( \Y\tr \Ri \Y + (N{-}1)\I_N )^{-1} \Y\tr \Ri \,. \tag{5b}
139 | # \end{align}
140 | # $$
141 | # (b) What is the potential benefit of (5b) vs. (5a) ?
142 |
143 | # #### Exc -- KG workings
144 | # The above animation assumed that the observation operator is just the identity matrix, $\I$, rather than a general observation operator, $\ObsMod()$. Meanwhile, the Kalman gain used by the EnKF, eqn. (5a), is applicable for any $\ObsMod()$. On the other hand, the formula (5a) consists solely of linear algebra. Therefore it cannot perfectly represent any general (nonlinear) $\ObsMod()$. So how does it actually treat the observation operator? What meaning can we assign to the resulting updates?
145 | # *Hint*: consider the limit of $\R \rightarrow 0$.
146 |
147 | # #### Exc -- EnKF nobias (a)
148 | # Consider the ensemble averages,
149 | # - $\bx\supa = \frac{1}{N}\sum_{n=1}^N \x\supa_n$, and
150 | # - $\bx\supf = \frac{1}{N}\sum_{n=1}^N \x\supf_n$,
151 | #
152 | # and recall that the analysis step, eqn. (4), defines $\x\supa_n$ from $\x\supf_n$.
153 | #
154 | #
155 | # (a) Show that, in case $\ObsMod$ is linear (the matrix $\bH$),
156 | # $$\begin{align}
157 | # \Expect \bx\supa &= \bx\supf + \barK \left\{\y\ones\tr - \bH\bx\supf \right\} \,, \tag{6}
158 | # \end{align}
159 | # $$
160 | # where the expectation, $\Expect$, is taken with respect to $\Dobs$ only (i.e. not the sampling of the forecast ensemble, $\E\supf$ itself).
161 | #
162 | # What does this mean?
163 |
164 | # +
165 | # show_answer("EnKF_nobias_a")
166 | # -
167 |
168 | # #### Exc (optional) -- EnKF nobias (b)
169 | # Consider the ensemble covariance matrices:
170 | # $$\begin{align}
171 | # \barP\supf &= \frac{1}{N-1} \X{\X}\tr \,, \tag{7a} \\\
172 | # \barP\supa &= \frac{1}{N-1} \X\supa{\X\supa}\tr \,. \tag{7b}
173 | # \end{align}$$
174 | #
175 | # Now, denote the centralized observation perturbations
176 | # $\D \ceq
177 | # \begin{bmatrix}
178 | # \r_1 -\bar{\r}, & \ldots & \r_n -\bar{\r}, & \ldots & \r_N -\bar{\r}
179 | # \end{bmatrix} $.
180 | # Note that $\D \ones = \bvec{0}$ and that
181 | # $$
182 | # \begin{align}
183 | # \label{eqn:R_sample_cov_of_D}
184 | # \frac{1}{N-1} \D \D\tr &= \R \,, \tag{9a} \\\
185 | # \label{eqn:zero_AD_cov}
186 | # \X \D\tr &= \bvec{0} \tag{9b}
187 | # \end{align}
188 | # $$
189 | # is satisfied in the expected sense, i.e. by taking the expectation on the left-hand side.
190 | # Thereby, show that
191 | #
192 | # $$\begin{align}
193 | # \Expect \, \barP\supa &= [\I_{\xDim} - \barK \bH]\barP\supf \, . \tag{10}
194 | # \end{align}$$
195 |
196 | # +
197 | # show_answer("EnKF_nobias_b")
198 | # -
199 |
200 | # #### Exc (optional) -- EnKF bias (c)
201 | # Show that, if no observation perturbations are used in eqn. (4), then $\barP\supa$ would be too small.
202 |
203 | # +
204 | # show_answer("EnKF_without_perturbations")
205 | # -
206 |
207 | # ## Experimental setup
208 | #
209 | # Before making the EnKF, we'll set up an experiment to test it with, so that you can check if you've implemented a working method or not.
210 | #
211 | # To that end, we'll use the Lorenz-63 model, from [T7](T7%20-%20Chaos%20%26%20Lorenz%20[optional].ipynb). The coupled ODEs are recalled here, but with some of the parameters fixed.
212 |
213 | # +
214 | xDim = 3
215 |
216 | def dxdt(x, sig=10, rho=28, beta=8/3):
217 | x,y,z = x
218 | d = np.zeros(3)
219 | d[0] = sig*(y - x)
220 | d[1] = rho*x - y - x*z
221 | d[2] = x*y - beta*z
222 | return d
223 |
224 |
225 | # -
226 |
227 | # Next, we make the forecast model $\DynMod$ out of $\frac{d \x}{dt}$ such that $\x(t+dt) = \DynMod(\x(t),t,dt)$. We'll make use of the "4th order Runge-Kutta" integrator `rk4`.
228 |
229 | # +
230 | from dapper.mods.integration import rk4
231 |
232 | def Dyn(E, t0, dt):
233 |
234 | def step(x0):
235 | return rk4(lambda x, t: dxdt(x), x0, t0, dt)
236 |
237 | if E.ndim == 1:
238 | # Truth (single state vector) case
239 | E = step(E)
240 | else:
241 | # Ensemble case
242 | for n in range(E.shape[1]):
243 | E[:, n] = step(E[:, n])
244 |
245 | return E
246 |
247 | Q12 = np.zeros((xDim, xDim))
248 | Q = Q12 @ Q12.T
249 | # -
250 |
251 | # Notice the loop over each ensemble member. For better performance, this should be vectorized, if possible. Or, if the forecast model is computationally demanding (as is typically the case in real applications), the loop should be parallelized: i.e. the forecast simulations should be distributed to separate computers.
252 |
253 | # The following are the time settings that we will use
254 |
255 | dt = 0.01 # integrational time step
256 | dko = 25 # number of steps between observations
257 | dto = dko*dt # time between observations
258 | Ko = 60 # total number of observations
259 | nTime = dko*(Ko+1) # total number of time steps
260 |
261 | # Initial conditions
262 |
263 | xa = np.array([1.509, -1.531, 25.46])
264 | Pa12 = np.eye(3)
265 |
266 | # Observation model settings
267 |
268 | # +
269 | p = 3 # ndim obs
270 | def Obs(E, t):
271 | return E[:p] if E.ndim == 1 else E[:p, :]
272 |
273 | R12 = np.sqrt(2)*np.eye(p)
274 | R = R12 @ R12.T
275 | # -
276 |
277 | # Generate synthetic truth and observations
278 |
279 | # Init
280 | truths = np.zeros((nTime+1, xDim))
281 | obsrvs = np.zeros((Ko+1, p))
282 | truths[0] = xa + Pa12 @ rnd.randn(xDim)
283 |
284 | # Loop
285 | for k in range(1, nTime+1):
286 | truths[k] = Dyn(truths[k-1], (k-1)*dt, dt)
287 | truths[k] += Q12 @ rnd.randn(xDim)
288 | if k % dko == 0:
289 | Ko = k//dko-1
290 | obsrvs[Ko] = Obs(truths[k], np.nan) + R12 @ rnd.randn(p)
291 |
292 | # ## EnKF implementation
293 |
294 | # We will make use of `estimate_mean_and_cov` and `estimate_cross_cov` from the previous section. Paste them in below.
295 |
296 | # +
297 | # def estimate_mean_and_cov ...
298 | # -
299 |
300 | # **Exc -- EnKF implementation:** Complete the code below
301 |
302 | # +
303 | # Useful linear algebra: compute B/A
304 | import numpy.linalg as nla
305 |
306 | ens_means = np.zeros((nTime+1, xDim))
307 | ens_vrncs = np.zeros((nTime+1, xDim))
308 |
309 | def my_EnKF(N):
310 | """My implementation of the EnKF."""
311 | ### Init ###
312 | E = np.zeros((xDim, N))
313 | for k in tqdm(range(1, nTime+1)):
314 | t = k*dt
315 | ### Forecast ##
316 | # E = ... # use model
317 | # E = ... # add noise
318 | if k % dko == 0:
319 | ### Analysis ##
320 | y = obsrvs[[k//dko-1]].T # current observation
321 | Eo = Obs(E, t) # observed ensemble
322 | # Compute ensemble moments
323 | PH = ...
324 | HPH = ...
325 | # Compute Kalman Gain
326 | KG = ...
327 | # Generate perturbations
328 | Perturb = ...
329 | # Update ensemble with KG
330 | # E = ...
331 | # Save statistics
332 | ens_means[k] = np.mean(E, axis=1)
333 | ens_vrncs[k] = np.var(E, axis=1, ddof=1)
334 |
335 |
336 | # -
337 |
338 | # Notice that we only store some stats (`ens_means`). This is because in large systems,
339 | # keeping the entire ensemble (or its covariance) in memory is probably too much.
340 |
341 | # +
342 | # show_answer('EnKF v1')
343 | # -
344 |
345 | # Now let's try out its capabilities
346 |
347 | # +
348 | # Run assimilation
349 | my_EnKF(10)
350 |
351 | # Plot
352 | fig, axs = plt.subplots(nrows=3, sharex=True)
353 | for i in range(3):
354 | axs[i].plot(dt*np.arange(nTime+1), truths [:, i], 'k', label="Truth")
355 | axs[i].plot(dt*np.arange(nTime+1), ens_means[:, i], 'b', label="Estimate")
356 | if i
408 | #
409 | # ### References
410 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | jupyter_nbextensions_configurator<0.6.4 # compatible with "notebook<6.5"
3 | jupytext
4 | pre-commit
5 | requests
6 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | # DAPPER is maintained for compatibility with Colab.
2 | # https://github.com/nansencenter/DAPPER/issues/41#issuecomment-1381616971
3 | # -e ${HOME}/path/DAPPER
4 | dapper==1.7.3
5 |
6 | ipywidgets
7 | # Fix error "zmq message arrived on closed channel" ... "assert 0 < size <= self._size"
8 | # https://github.com/jupyter/notebook/issues/6721#issuecomment-1662440259
9 | jupyter_client<8
10 | tornado<6.2
11 |
12 | markdown
13 |
--------------------------------------------------------------------------------
/tests/test_all.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | """Test that all notebooks run without error.
3 |
4 | Also test stuff relating to `show_answer`.
5 |
6 | These tests are not for use with pytest (does not use asserts, orchestrates itself).
7 | Simply run the script as any regular Python script.
8 | Why: Mainly because it did not seem necessary. Also I find debugging with pytest somewhat hard.
9 | """
10 |
11 | from pathlib import Path
12 | import os
13 | import subprocess
14 | import sys
15 | import requests
16 | from urllib.parse import unquote
17 |
18 | from markdown import markdown as md2html
19 |
20 |
21 | UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
22 | ROOT = Path(__file__).parents[1]
23 |
24 |
25 | def _report_error(msg):
26 | # raise AssertionError(msg) # for post-portem debugging
27 | print(msg)
28 | return True
29 |
30 |
31 | def _find_anchor(fname: Path, anchor):
32 | lines = fname.read_text().splitlines()
33 | # filter for "# ### Example heading" or "# - ### Heading in bullet point"
34 | headings = [x for x in lines if x.startswith("# #") or x.startswith("# - #")]
35 | headings = [x.lstrip("# -") for x in headings]
36 | headings = [x.replace(" ", "-") for x in headings]
37 | return anchor in headings
38 |
39 |
40 | def assert_all_links_work(lines, fname):
41 | failed = False
42 | for i, line in enumerate(lines):
43 |
44 | # Skip
45 | if not line.startswith("#"):
46 | continue
47 | if any(x in line for x in [
48 | "www.google.com/search", # because md2html fails to parse
49 | "www.example.com"]):
50 | continue
51 |
52 | # First do a *quick* scan for links.
53 | if "](" in line or "http" in line:
54 | # Extract link
55 | html = md2html(line) # since extracting url from md w/ regex is hard
56 | # PS: linebreaks in links ⇒ failure (as desired)
57 | link = html.split('href="')[1].split('">')[0]
58 | # fix parsing error for links ending in ')'
59 | if "))" in link:
60 | link = link.split("))")[0] + ")"
61 |
62 | # Common error message
63 | def errm(issue):
64 | return f"Issue on line {i} with {issue} link\n {link}"
65 |
66 | # Internet links
67 | if "http" in link:
68 | response = None
69 | try:
70 | response = requests.head(link, headers={'User-Agent': UA}, allow_redirects=True, timeout=10)
71 | if response.status_code in (403, 405):
72 | # Fallback to GET if HEAD is not allowed or forbidden
73 | response = requests.get(link, headers={'User-Agent': UA}, allow_redirects=True, timeout=10)
74 | # Ignore status code 429 (Too Many Requests)
75 | if response.status_code == 429:
76 | continue
77 | assert response.status_code < 400
78 | except Exception as e:
79 | # Known problematic domains
80 | skip_domains = ["stack", "wiley.com", "springer.com", "elsevier.com"]
81 | status = response.status_code if response is not None else "N/A"
82 | skip = os.getenv("GITHUB_ACTIONS") and any(domain in link for domain in skip_domains) or status == 429
83 | if not skip:
84 | failed |= True
85 | _report_error(errm("**requesting**") +
86 | f"\nStatus code: {status}\nError: {e}")
87 |
88 | # Local links
89 | else:
90 | link = unquote(link)
91 | link_fname, *link_anchor = link.split("#")
92 |
93 | # Validate filename
94 | if link_fname:
95 | if not (ROOT / "notebooks" / link_fname).is_file():
96 | failed |= _report_error(errm("**filename** of"))
97 |
98 | # Validate anchor
99 | if link_anchor:
100 | if not link_fname:
101 | # Anchor only ⇒ same file
102 | link_fname = fname
103 | else:
104 | # Change "T4...ipynb" --> "tests/T4...py"
105 | link_fname = (ROOT / "tests" / link_fname).with_suffix(".py")
106 |
107 | if not _find_anchor(link_fname, link_anchor[0]):
108 | failed |= _report_error(errm("**anchor tag** of"))
109 | return failed
110 |
111 |
112 | def assert_show_answer(lines, _fname):
113 | """Misc checks on `show_answer`"""
114 | failed = False
115 | found_import = False
116 | for i, line in enumerate(lines):
117 | found_import |= ("show_answer" in line and "import" in line)
118 | if line.lstrip().startswith("show_answer"):
119 | print(f"`show_answer` uncommented on line {i}")
120 | failed |= True
121 | if not found_import:
122 | print("`import show_answer` not found.")
123 | failed = True
124 | return failed
125 |
126 |
127 | def uncomment_show_answer(lines):
128 | """Causes checking existance of answer when script gets run."""
129 | for i, line in enumerate(lines):
130 | OLD = "# show_answer"
131 | NEW = "show_answer"
132 | if line.startswith(OLD):
133 | lines[i] = line.replace(OLD, NEW)
134 | return lines
135 |
136 |
137 | def make_script_runnable_by_fixing_sys_path(lines):
138 | """Makes it seem like CWD is `notebooks`."""
139 | return ['import sys',
140 | f"""sys.path.insert(0, '{ROOT / "notebooks"}')""",
141 | ] + lines
142 |
143 |
144 | ## Convert: notebooks/T*.ipynb --> tests/T*.py
145 | print("\nConverting from notebooks/...ipynb to tests/...py")
146 | print("========================================")
147 | text = dict(capture_output=True, text=True)
148 | converted = []
149 | ipynbs = sorted((ROOT / "notebooks").glob("T*.ipynb"))
150 | for f in ipynbs:
151 | script = (ROOT / "tests" / f.name).with_suffix('.py')
152 | # script = (ROOT / "notebooks" / "scripts" / f.name).with_suffix('.py')
153 | converted.append(script)
154 | cmd = ["jupytext", "--output", str(script), str(f)]
155 | print(subprocess.run(cmd, **text, check=True).stdout)
156 |
157 |
158 | ## Static checks. Also: modify scripts
159 | erred = []
160 | for script in converted:
161 | print("\nStatic analysis for", script.stem)
162 | print("========================================")
163 | lines = script.read_text().splitlines()
164 | failed = False
165 |
166 | # Validatation checks
167 | failed |= assert_all_links_work(lines, script)
168 | failed |= assert_show_answer(lines, script)
169 |
170 | # Modify script in preparation of running it
171 | lines = uncomment_show_answer(lines)
172 | lines = make_script_runnable_by_fixing_sys_path(lines)
173 |
174 | if failed:
175 | erred.append(script)
176 | script.write_text("\n".join(lines))
177 |
178 |
179 | print("\nStatic analysis for", "answers.py")
180 | print("========================================")
181 | sys.path.insert(0, f"{ROOT / 'notebooks'}")
182 | import resources.answers # type: ignore # noqa
183 | for key, answer in resources.answers.answers.items():
184 | lines = ["# " + line for line in answer[1].splitlines()]
185 | fname = Path(resources.answers.__file__ + ":" + key)
186 | if assert_all_links_work(lines, fname):
187 | erred.append(fname)
188 |
189 |
190 | ## Run ipynbs as python scripts
191 | for script in converted:
192 | print("\nRunning", script.name)
193 | print("========================================")
194 | run = subprocess.run(["python", str(script)], **text, check=False)
195 | # print(run.stdout)
196 | if run.returncode:
197 | erred.append(script)
198 | print(run.stderr, file=sys.stderr)
199 |
200 | # Provide return code
201 | if erred:
202 | print("========================================")
203 | print("FOUND ISSUES")
204 | print("========================================")
205 | print(*["- " + str(f) for f in erred], file=sys.stderr)
206 | print("See above for individual tracebacks.")
207 | sys.exit(1)
208 |
--------------------------------------------------------------------------------