├── .gitignore
└── notes
├── circuit.png
├── strang.jpg
├── Demo-Mar18.pdf
├── 1806overview.pdf
├── svd-practice.pdf
├── 1806overview.pptx
├── cyclic-springs.png
├── jordan-vectors.pdf
├── where-to-go-after.pdf
├── cookie-strang-book.jpg
├── Notice of Video Recording 18_06 S23.docx
├── Introduction to Linear Algebra 6th edition and A = CR_04.pdf
├── eigshow.jl
├── Sample Variance division by n-1.ipynb
├── Demo-Mar18.ipynb
├── SVD-eigenproblem.ipynb
├── Machine-Learning-with-Gaussian-elimination.ipynb
├── QR Factorization Examples in Julia.ipynb
├── Perron-Frobenius.ipynb
├── Singular.ipynb
├── Gram-Schmidt.ipynb
├── rank-r and full svds.ipynb
├── Gauss-Jordan.ipynb
├── Markov.ipynb
├── Linear Transformations.ipynb
└── QR in Julia.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | *.log
3 | *.aux
4 | .ipynb_checkpoints
5 |
--------------------------------------------------------------------------------
/notes/circuit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/circuit.png
--------------------------------------------------------------------------------
/notes/strang.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/strang.jpg
--------------------------------------------------------------------------------
/notes/Demo-Mar18.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/Demo-Mar18.pdf
--------------------------------------------------------------------------------
/notes/1806overview.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/1806overview.pdf
--------------------------------------------------------------------------------
/notes/svd-practice.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/svd-practice.pdf
--------------------------------------------------------------------------------
/notes/1806overview.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/1806overview.pptx
--------------------------------------------------------------------------------
/notes/cyclic-springs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/cyclic-springs.png
--------------------------------------------------------------------------------
/notes/jordan-vectors.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/jordan-vectors.pdf
--------------------------------------------------------------------------------
/notes/where-to-go-after.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/where-to-go-after.pdf
--------------------------------------------------------------------------------
/notes/cookie-strang-book.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/cookie-strang-book.jpg
--------------------------------------------------------------------------------
/notes/Notice of Video Recording 18_06 S23.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/Notice of Video Recording 18_06 S23.docx
--------------------------------------------------------------------------------
/notes/Introduction to Linear Algebra 6th edition and A = CR_04.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/Introduction to Linear Algebra 6th edition and A = CR_04.pdf
--------------------------------------------------------------------------------
/notes/eigshow.jl:
--------------------------------------------------------------------------------
1 | using GLVisualize, Colors, Reactive, GLWindow, GeometryTypes
2 | using Images, StaticArrays, GLAbstraction, Iterators
3 | import GeometryTypes: intersects
4 | import GLVisualize: mm
5 | window = glscreen(); @async GLWindow.waiting_renderloop(window)
6 | animate(t, A) = t*eye(A) + (1-t)*A
7 |
8 | iconsize = 8mm
9 | editarea, viewarea = x_partition_abs(window.area, round(Int, 8.2 * iconsize))
10 | edit_screen = Screen(
11 | window, area = editarea,
12 | stroke = (1f0, RGBA{Float32}(0.9f0, 0.9f0, 0.9f0, 1f0))
13 | )
14 | viewscreen = Screen(
15 | window, area = viewarea,
16 | )
17 |
18 | img = loadasset("doge.png")
19 | A = Mat{2}(1, 3, 4, 2) ./ 4
20 | sv, t = GLVisualize.labeled_slider(linspace(1.0,0.0,100), edit_screen)
21 | matrix = map(t-> animate(t, A), t)
22 | matrix_str = map(matrix) do m
23 | str = ""
24 | for i=1:2
25 | for j=1:2
26 | str *= string(@sprintf("%15.4f", m[i, j]), " ")
27 | end
28 | str *= "\n"
29 | end
30 | str
31 | end
32 |
33 | det_str = map(matrix) do m
34 | @sprintf("%15.4f", det(m))
35 | end
36 |
37 | menu = Pair[
38 | "slider:" => sv,
39 | "matrix:" => matrix_str,
40 | "determinant:" => det_str
41 | ]
42 | _view(visualize(
43 | menu,
44 | text_scale = 4mm,
45 | width = 8iconsize
46 | ), edit_screen, camera = :fixed_pixel)
47 |
48 | prim_rect = SimpleRectangle(-250, -250, 500, 500)
49 | mesh = GLUVMesh(prim_rect)
50 | prim = map(t) do t
51 | points = decompose(Point2f0, prim_rect)
52 | points .= (*).((animate(t, A),), points)
53 | mesh.vertices[:] = map(x-> Point3f0(x[1], x[2], 0), points)
54 | mesh
55 | end
56 | _view(visualize(img, fxaa = true, primitive = prim, boundingbox = nothing), viewscreen)
57 |
58 |
59 | origin = Point2f0(0)
60 | lines = Point2f0[]
61 |
62 | function vec_angle(origin, a, b)
63 | diff0 = a - origin
64 | diff1 = b - origin
65 | d = dot(diff0, diff1)
66 | det = cross(diff0, diff1)
67 | atan2(det, d)
68 | end
69 | function sort_rectangle!(points)
70 | middle = mean(points)
71 | p1 = first(points)
72 | sort!(points, by = p-> vec_angle(middle, p, p1))
73 | end
74 |
75 | eigvectpoly = map(t) do t
76 | # bring vertices in correct order and close rectangle
77 | points = sort_rectangle!(map(Point2f0, vertices(value(prim))))
78 | push!(points, points[1]) # close points
79 |
80 | a = eigfact(Array(A))
81 | eigvectors = map(1:size(a.vectors, 1)) do i
82 | normalize(Vec2f0(a.vectors[:, i]...))
83 | end
84 | v1 = eigvectors[1] * 1000f0
85 | v2 = eigvectors[2] * 1000f0
86 | m = animate(t, A)
87 | eigseg1 = LineSegment(origin, Point2f0(m*v1))
88 | eigseg2 = LineSegment(origin, Point2f0(m*v2))
89 | seg1cut = seg2cut = (0, origin)
90 | for (i, (a, b)) in enumerate(partition(points, 2, 1))
91 | seg = LineSegment(a,b)
92 | intersected, p = intersects(eigseg1, seg)
93 | intersected && (seg1cut = (i, p))
94 | intersected, p = intersects(eigseg2, seg)
95 | intersected && (seg2cut = (i, p))
96 | end
97 | pop!(points) #remove closing point
98 | GLPlainMesh(points), Point2f0[seg2cut[2], origin, seg1cut[2]]
99 | end
100 | # _view(visualize(
101 | # map(first, eigvectpoly),
102 | # color = RGBA(1f0, 1f0, 1f0, 0.6f0),
103 | # ), camera = :orthographic_pixel)
104 |
105 | _view(visualize(
106 | map(last, eigvectpoly), :linesegment,
107 | indices = [2, 1, 2, 3],
108 | thickness = 3f0,
109 | color = RGBA(0.60, 0.3f0, 0.4f0, 1f0),
110 | ), viewscreen, camera = :orthographic_pixel)
111 |
112 | _view(visualize(
113 | (Circle(Point2f0(0), 5f0), map(x-> map(Point2f0, vertices(x)), prim)),
114 | color = RGBA(0.7f0, 0.2f0, 0.9f0, 1f0),
115 | ), viewscreen, camera = :orthographic_pixel)
116 | center!(viewscreen, :orthographic_pixel, border = 10f0)
117 |
--------------------------------------------------------------------------------
/notes/Sample Variance division by n-1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "The formula for [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance):\n",
8 | " $$s_n^2 = \\frac{1}{n-1}\\sum (x_i-\\bar{x})^2$$\n",
9 | " has that funny $n-1$ in the denominator.\n",
10 | " \n",
11 | "The n-1 is referred to as [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction).\n",
12 | "The usual explanation involves vague terms such as [degrees of freedom](https://en.wikipedia.org/wiki/Degrees_of_freedom_(statistics%29) which always sounded flaky to me."
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "## 1. Let us first check the n-1 by experiment"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 23,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/plain": [
30 | "f (generic function with 1 method)"
31 | ]
32 | },
33 | "execution_count": 23,
34 | "metadata": {},
35 | "output_type": "execute_result"
36 | }
37 | ],
38 | "source": [
39 | "function f(n)\n",
40 | " x = randn(n)\n",
41 | " norm(x-mean(x))^2\n",
42 | "end"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 30,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "text/plain": [
53 | "10.00378620254928"
54 | ]
55 | },
56 | "execution_count": 30,
57 | "metadata": {},
58 | "output_type": "execute_result"
59 | }
60 | ],
61 | "source": [
62 | "n=11\n",
63 | "mean([f(n) for i=1:1_000_000])"
64 | ]
65 | },
66 | {
67 | "cell_type": "code",
68 | "execution_count": 28,
69 | "metadata": {},
70 | "outputs": [
71 | {
72 | "data": {
73 | "text/plain": [
74 | "3.9965121482424095"
75 | ]
76 | },
77 | "execution_count": 28,
78 | "metadata": {},
79 | "output_type": "execute_result"
80 | }
81 | ],
82 | "source": [
83 | "n=5\n",
84 | "mean([f(n) for i=1:1_000_000])"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "## 2. A few facts about randn"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "metadata": {},
97 | "source": [
98 | "randn(n) is an n-vector of independent standard normals.\n",
99 | "\n",
100 | "If Q is any orthgonal matrix, $Q*$randn(n) is also an n-vector of independent standard normals.\n",
101 | "There is no mathematical way to distinguish randn(n) from $Q*$randn(n). This is because the\n",
102 | "probability function is proportional to $e^{-\\|x\\|^2/2}$, i.e., it only depends on the length of x, not\n",
103 | "the direction.\n",
104 | "\n",
105 | "Also the expected value of randn(1)^2 is 1."
106 | ]
107 | },
108 | {
109 | "cell_type": "markdown",
110 | "metadata": {},
111 | "source": [
112 | "## 3. Linear Algebra makes n-1 easy to understand"
113 | ]
114 | },
115 | {
116 | "cell_type": "markdown",
117 | "metadata": {},
118 | "source": [
119 | "Consider the projection matrix $P=I-1/n$. The matrix-vector product $Px$ computes x-mean(x)."
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 56,
125 | "metadata": {},
126 | "outputs": [
127 | {
128 | "data": {
129 | "text/plain": [
130 | "4×4 Array{Rational{Int64},2}:\n",
131 | " 3//4 -1//4 -1//4 -1//4\n",
132 | " -1//4 3//4 -1//4 -1//4\n",
133 | " -1//4 -1//4 3//4 -1//4\n",
134 | " -1//4 -1//4 -1//4 3//4"
135 | ]
136 | },
137 | "execution_count": 56,
138 | "metadata": {},
139 | "output_type": "execute_result"
140 | }
141 | ],
142 | "source": [
143 | "# example \n",
144 | "n = 4\n",
145 | "P = eye(Int,n) - 1//n"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "If we write the eigendecomposition $P=Q\\Lambda Q'$, then $\\Lambda$ has one diagonal entry (say the first) $0$ and the\n",
153 | "rest $1$.\n",
154 | "
\n",
155 | "Therefore if x=randn(n) so is Qx as a random variable, and $$\\|PQx\\|^2 = \\|Q\\Lambda x\\|^2 = \\|\\Lambda x\\|^2=x_2^2 +\\ldots+x_n^2 $$ which is obviously n-1 in expectation."
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": null,
161 | "metadata": {},
162 | "outputs": [],
163 | "source": []
164 | }
165 | ],
166 | "metadata": {
167 | "kernelspec": {
168 | "display_name": "Julia 0.6.2",
169 | "language": "julia",
170 | "name": "julia-0.6"
171 | },
172 | "language_info": {
173 | "file_extension": ".jl",
174 | "mimetype": "application/julia",
175 | "name": "julia",
176 | "version": "0.6.2"
177 | }
178 | },
179 | "nbformat": 4,
180 | "nbformat_minor": 2
181 | }
182 |
--------------------------------------------------------------------------------
/notes/Demo-Mar18.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "data": {
10 | "text/plain": [
11 | "3×3 Array{Float64,2}:\n",
12 | " 0.262707 0.722601 0.227894\n",
13 | " 0.194994 0.545517 0.595722\n",
14 | " 0.465314 0.686887 0.644692"
15 | ]
16 | },
17 | "execution_count": 2,
18 | "metadata": {},
19 | "output_type": "execute_result"
20 | }
21 | ],
22 | "source": [
23 | "using LinearAlgebra\n",
24 | "A = rand(3,3)"
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 3,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "data": {
34 | "text/plain": [
35 | "3×3 Array{Float64,2}:\n",
36 | " 0.31596 0.740561 0.63726 \n",
37 | " 0.434797 0.847685 0.753471\n",
38 | " 0.556164 1.15377 0.930863"
39 | ]
40 | },
41 | "execution_count": 3,
42 | "metadata": {},
43 | "output_type": "execute_result"
44 | }
45 | ],
46 | "source": [
47 | "A2 = A^2"
48 | ]
49 | },
50 | {
51 | "cell_type": "code",
52 | "execution_count": 4,
53 | "metadata": {},
54 | "outputs": [
55 | {
56 | "data": {
57 | "text/plain": [
58 | "3×3 Array{Float64,2}:\n",
59 | " 0.001 0.002 0.003\n",
60 | " 0.004 0.005 0.006\n",
61 | " 0.007 0.008 0.009"
62 | ]
63 | },
64 | "execution_count": 4,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "dA = [0.001 0.002 0.003 ; 0.004 0.005 0.006 ; 0.007 0.008 0.009]"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 5,
76 | "metadata": {},
77 | "outputs": [
78 | {
79 | "data": {
80 | "text/plain": [
81 | "3×3 Array{Float64,2}:\n",
82 | " 0.00682701 0.00987187 0.0105702\n",
83 | " 0.0114308 0.0177037 0.0170739\n",
84 | " 0.0154144 0.0252529 0.0236327"
85 | ]
86 | },
87 | "execution_count": 5,
88 | "metadata": {},
89 | "output_type": "execute_result"
90 | }
91 | ],
92 | "source": [
93 | "(A + dA)^2 - A2"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 6,
99 | "metadata": {},
100 | "outputs": [
101 | {
102 | "data": {
103 | "text/plain": [
104 | "3×3 Array{Float64,2}:\n",
105 | " 0.00679701 0.00983587 0.0105282\n",
106 | " 0.0113648 0.0176227 0.0169779\n",
107 | " 0.0153124 0.0251269 0.0234827"
108 | ]
109 | },
110 | "execution_count": 6,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "A*dA + dA*A"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 7,
122 | "metadata": {},
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/plain": [
127 | "3×3 Array{Float64,2}:\n",
128 | " 0.00949674 0.0119231 0.0143495\n",
129 | " 0.0130942 0.0157667 0.0184392\n",
130 | " 0.0154514 0.0190452 0.022639 "
131 | ]
132 | },
133 | "execution_count": 7,
134 | "metadata": {},
135 | "output_type": "execute_result"
136 | }
137 | ],
138 | "source": [
139 | "2*A*dA"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 8,
145 | "metadata": {},
146 | "outputs": [
147 | {
148 | "data": {
149 | "text/plain": [
150 | "3×3 Array{Float64,2}:\n",
151 | " 0.00409727 0.00774859 0.00670683\n",
152 | " 0.00963536 0.0194786 0.0155167 \n",
153 | " 0.0151735 0.0312086 0.0243265 "
154 | ]
155 | },
156 | "execution_count": 8,
157 | "metadata": {},
158 | "output_type": "execute_result"
159 | }
160 | ],
161 | "source": [
162 | "2*dA*A"
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "execution_count": 9,
168 | "metadata": {},
169 | "outputs": [
170 | {
171 | "data": {
172 | "text/plain": [
173 | "3×3 Array{Float64,2}:\n",
174 | " -0.85783 -4.61434 4.56708 \n",
175 | " 2.25985 0.944638 -1.67173 \n",
176 | " -1.78861 2.32399 0.0359288"
177 | ]
178 | },
179 | "execution_count": 9,
180 | "metadata": {},
181 | "output_type": "execute_result"
182 | }
183 | ],
184 | "source": [
185 | "AI = inv(A)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 10,
191 | "metadata": {},
192 | "outputs": [
193 | {
194 | "data": {
195 | "text/plain": [
196 | "3×3 Array{Float64,2}:\n",
197 | " 0.00361557 0.0219253 -0.0380485\n",
198 | " -0.000140811 -0.0161177 0.0188606\n",
199 | " 0.00369236 0.0071466 -0.0214998"
200 | ]
201 | },
202 | "execution_count": 10,
203 | "metadata": {},
204 | "output_type": "execute_result"
205 | }
206 | ],
207 | "source": [
208 | "inv(A+dA)-inv(A)"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 11,
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/plain": [
219 | "3×3 Array{Float64,2}:\n",
220 | " 0.0036997 0.0220909 -0.0385415\n",
221 | " -0.0001703 -0.0161938 0.0190541\n",
222 | " 0.0037521 0.00724607 -0.0218293"
223 | ]
224 | },
225 | "execution_count": 11,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "-AI*dA*AI"
232 | ]
233 | },
234 | {
235 | "cell_type": "code",
236 | "execution_count": null,
237 | "metadata": {},
238 | "outputs": [],
239 | "source": []
240 | }
241 | ],
242 | "metadata": {
243 | "kernelspec": {
244 | "display_name": "Julia 1.1.0",
245 | "language": "julia",
246 | "name": "julia-1.1"
247 | },
248 | "language_info": {
249 | "file_extension": ".jl",
250 | "mimetype": "application/julia",
251 | "name": "julia",
252 | "version": "1.1.0"
253 | }
254 | },
255 | "nbformat": 4,
256 | "nbformat_minor": 2
257 | }
258 |
--------------------------------------------------------------------------------
/notes/SVD-eigenproblem.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# The SVD as an eigenproblem\n",
8 | "\n",
9 | "Notice that if $A = U\\Sigma V^H$, then\n",
10 | "\n",
11 | "$$\n",
12 | "A^H A = V \\Sigma U^H U \\Sigma V^H = V \\Sigma^2 V^H\n",
13 | "$$\n",
14 | "\n",
15 | "That is, to multiply $A^H A x$, you (1) compute $V^H x$ (the $V$ components of $x$), then (2) multiply each component by $\\sigma^2$, and finally (3) multiply the coefficients by $V$ and add up. It follows that:\n",
16 | "\n",
17 | "* The singular values $\\sigma^2$ are the **nonzero eigenvalues** of $A^H A$ and the corresponding **eigenvectors are the right singular vectors** $V$.\n",
18 | "\n",
19 | "Similarly,\n",
20 | "\n",
21 | "$$\n",
22 | "A A^H = U \\Sigma V^H V \\Sigma U^H = U \\Sigma^2 U^H\n",
23 | "$$\n",
24 | "\n",
25 | "so\n",
26 | "\n",
27 | "* The singular values $\\sigma^2$ are the **nonzero eigenvalues** of $A A^H$ and the corresponding **eigenvectors are the left singular vectors** $U$.\n",
28 | "\n",
29 | "We can easily check this:"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 1,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "data": {
39 | "text/plain": [
40 | "5×3 Array{Float64,2}:\n",
41 | " 0.202935 -0.810741 0.379812 \n",
42 | " 0.317852 1.17222 0.0789665\n",
43 | " -1.58283 -0.524304 0.949145 \n",
44 | " 0.122448 1.57466 0.693527 \n",
45 | " -0.496476 1.13621 0.511883 "
46 | ]
47 | },
48 | "execution_count": 1,
49 | "metadata": {},
50 | "output_type": "execute_result"
51 | }
52 | ],
53 | "source": [
54 | "A = randn(5,3)"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "Note that in this case, $A$ is a $5 \\times 3$ matrix of rank 3."
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 2,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "data": {
71 | "text/plain": [
72 | "3-element Array{Float64,1}:\n",
73 | " 6.31957 \n",
74 | " 4.01707 \n",
75 | " 0.443596"
76 | ]
77 | },
78 | "execution_count": 2,
79 | "metadata": {},
80 | "output_type": "execute_result"
81 | }
82 | ],
83 | "source": [
84 | "U, σ, V = svd(A)\n",
85 | "\n",
86 | "σ.^2 # the σ² values"
87 | ]
88 | },
89 | {
90 | "cell_type": "markdown",
91 | "metadata": {},
92 | "source": [
93 | "$A^H A$ is a $3 \\times 3$ matrix of rank 3 with three nonzero eigenvalues that equal the singular values squared:"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "execution_count": 3,
99 | "metadata": {},
100 | "outputs": [
101 | {
102 | "data": {
103 | "text/plain": [
104 | "3-element Array{Float64,1}:\n",
105 | " 0.443596\n",
106 | " 4.01707 \n",
107 | " 6.31957 "
108 | ]
109 | },
110 | "execution_count": 3,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "eigvals(A'*A) # AᴴA has the same eigenvals!"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "$AA^H$ is a $5 \\times 5$ matrix of rank 3 (recall that the ranks of $A$, $AA^H$, and $A^H A$ are all equal!). It has 3 nonzero eigenvalues that equal the $\\sigma^2$ values, and 2 zero eigenvalues corresponding to the **two-dimensional** nullspace\n",
124 | "$$\n",
125 | "N(AA^H) = N(A^H) = C(A)^\\perp = C(U)^\\perp\n",
126 | "$$\n",
127 | "That is, the zero eigenvectors are those perpendicuar to the left singular vectors $U$."
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 4,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/plain": [
138 | "5-element Array{Float64,1}:\n",
139 | " -1.71137e-16\n",
140 | " 8.87578e-16\n",
141 | " 0.443596 \n",
142 | " 4.01707 \n",
143 | " 6.31957 "
144 | ]
145 | },
146 | "execution_count": 4,
147 | "metadata": {},
148 | "output_type": "execute_result"
149 | }
150 | ],
151 | "source": [
152 | "eigvals(A*A') # the same *nonzero* eigenvalues"
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {},
158 | "source": [
159 | "We can also check the eigenvectors, e.g. the eigenvectors of $A^H A$ should match V:"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 5,
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "data": {
169 | "text/plain": [
170 | "3×3 Array{Float64,2}:\n",
171 | " 0.564298 -0.817673 0.113922\n",
172 | " -0.203239 -0.00384465 0.979122\n",
173 | " 0.800163 0.57567 0.168353"
174 | ]
175 | },
176 | "execution_count": 5,
177 | "metadata": {},
178 | "output_type": "execute_result"
179 | }
180 | ],
181 | "source": [
182 | "eigvecs(A'A)"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 6,
188 | "metadata": {},
189 | "outputs": [
190 | {
191 | "data": {
192 | "text/plain": [
193 | "3×3 Array{Float64,2}:\n",
194 | " -0.113922 -0.817673 -0.564298\n",
195 | " -0.979122 -0.00384465 0.203239\n",
196 | " -0.168353 0.57567 -0.800163"
197 | ]
198 | },
199 | "execution_count": 6,
200 | "metadata": {},
201 | "output_type": "execute_result"
202 | }
203 | ],
204 | "source": [
205 | "V"
206 | ]
207 | },
208 | {
209 | "cell_type": "markdown",
210 | "metadata": {},
211 | "source": [
212 | "Yes, they match, up to an overall sign flip.\n",
213 | "\n",
214 | "(Note that the columns are in reverse order, because `svdvals` are by default sorted in *descending* order in Julia, whereas `eigvals` of Hermitian matrices are sorted in *ascending* order.)"
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {},
220 | "source": [
221 | "## Remarks\n",
222 | "\n",
223 | "* This, in principle, finally gives us a way to compute the SVD of a matrix: just find the eigenvectors and eigenvalues of $A^H A$. (Note that $AV = U\\Sigma$, so that once you have $V$ and $\\Sigma$ you can get $U$.)\n",
224 | "\n",
225 | "* In practice, computers use a different way to compute the SVD. (The most famous practical method is called \"Golub-Kahan bidiagonalization.\") In 18.06, we are content to let the `svd` function be a \"black box\", much like `eig`.\n",
226 | "\n",
227 | "* The fact that the singular values/vectors are related to eigenvalues of $A^H A$ and $A A^H$ has lots of important applications. Perhaps most famously, it means that the SVD diagonalizes the \"covariance matrix\" in statistics, which gives rise to the statistical method of [principal component analysis (PCA)](https://en.wikipedia.org/wiki/Principal_component_analysis)."
228 | ]
229 | }
230 | ],
231 | "metadata": {
232 | "kernelspec": {
233 | "display_name": "Julia 0.6.3",
234 | "language": "julia",
235 | "name": "julia-0.6"
236 | },
237 | "language_info": {
238 | "file_extension": ".jl",
239 | "mimetype": "application/julia",
240 | "name": "julia",
241 | "version": "0.6.3"
242 | }
243 | },
244 | "nbformat": 4,
245 | "nbformat_minor": 2
246 | }
247 |
--------------------------------------------------------------------------------
/notes/Machine-Learning-with-Gaussian-elimination.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## A Machine Learning Example where we compare Gaussian Elimination with the commonly used method of today\n",
8 | "\n",
9 | "We show that a simple linear neuron can be \"learned\" with Gaussian elimination, and indeed is much\n",
10 | "faster and more accurate upon doing so. (Much of machine learning is non-linear.)\n",
11 | "\n",
12 | "Our model of the universe is that we have an unknow 3-vector\n",
13 | "\n",
14 | "$w = \\left[ \\begin{array}{c} w_1 \\\\ w_2 \\\\ w_3 \\end{array} \\right]$\n",
15 | "\n",
16 | "that we wish to learn. We have three 3-vectors $x_1,x_2,x_3$ and the corresponding scalar values\n",
17 | "$y_1 = w \\cdot x_1$, $\\ y_2 = w \\cdot x_2$, $\\ y_3 = w \\cdot x_3$. (Caution: The $x_i$ are 3-vectors,\n",
18 | "not components.) We will show that Gauassian elimination learns $w$ very quickly, while standard deep learning\n",
19 | "approaches (which use a version of gradient descent currently considered the best known as [ADAM](https://arxiv.org/abs/1412.6980) can require many steps, may be inaccurate, and inconsistent.\n",
20 | "\n",
21 | "One of the issues is how to organize the \"x\" data and the \"y\" data. The \"x\"s can be the columns or rows of a matrix, or can be a vector of vectors. Many applications prefer the matrix approach. The \"y\"s can be bundled into a vector similarly."
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 76,
27 | "metadata": {},
28 | "outputs": [
29 | {
30 | "data": {
31 | "text/plain": [
32 | "3-element Array{Float64,1}:\n",
33 | " 0.982331\n",
34 | " 0.1774 \n",
35 | " 0.212845"
36 | ]
37 | },
38 | "execution_count": 76,
39 | "metadata": {},
40 | "output_type": "execute_result"
41 | }
42 | ],
43 | "source": [
44 | "w = rand(3) ## We are setting up a w. We will know it, but the learning algorithm will only have X and y data below."
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 84,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "data": {
54 | "text/plain": [
55 | "3-element Array{Float64,1}:\n",
56 | " 0.881336\n",
57 | " 1.0557 \n",
58 | " 0.485883"
59 | ]
60 | },
61 | "execution_count": 84,
62 | "metadata": {},
63 | "output_type": "execute_result"
64 | }
65 | ],
66 | "source": [
67 | "# Here is the data. Each \"x\" is a 3-vector. Each \"y\" is a number.\n",
68 | "n = 3\n",
69 | "x1 = rand(3); y1=w ⋅ x1 # We are using the dot product (type \\cdot+tab)\n",
70 | "x2 = rand(3); y2=w ⋅ x2\n",
71 | "x3 = rand(3); y3=w ⋅ x3\n",
72 | "# Gather the \"x\" data into the rows of a matrix and \"y\" into a vector\n",
73 | "X=[x1 x2 x3]'\n",
74 | "y=[y1; y2; y3]"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 81,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "data": {
84 | "text/plain": [
85 | "3-element Array{Float64,1}:\n",
86 | " 0.0\n",
87 | " 0.0\n",
88 | " 0.0"
89 | ]
90 | },
91 | "execution_count": 81,
92 | "metadata": {},
93 | "output_type": "execute_result"
94 | }
95 | ],
96 | "source": [
97 | "# We check that the linear system for the \"unknown\" w is X*w = y\n",
98 | "X*w-y"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 83,
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "data": {
108 | "text/plain": [
109 | "3-element Array{Float64,1}:\n",
110 | " 0.982331\n",
111 | " 0.1774 \n",
112 | " 0.212845"
113 | ]
114 | },
115 | "execution_count": 83,
116 | "metadata": {},
117 | "output_type": "execute_result"
118 | }
119 | ],
120 | "source": [
121 | "## Recover w with Gaussian Elimination\n",
122 | "X\\y"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": 85,
128 | "metadata": {},
129 | "outputs": [
130 | {
131 | "data": {
132 | "text/plain": [
133 | "3-element Array{Float64,1}:\n",
134 | " 0.982331\n",
135 | " 0.1774 \n",
136 | " 0.212845"
137 | ]
138 | },
139 | "execution_count": 85,
140 | "metadata": {},
141 | "output_type": "execute_result"
142 | }
143 | ],
144 | "source": [
145 | "w"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": 115,
151 | "metadata": {
152 | "collapsed": true
153 | },
154 | "outputs": [],
155 | "source": [
156 | "## Recover w with a machine learning package -- 18.06 students might just want to execute as a black box\n",
157 | "using Flux"
158 | ]
159 | },
160 | {
161 | "cell_type": "markdown",
162 | "metadata": {
163 | "collapsed": true
164 | },
165 | "source": [
166 | "We show how the same problem is commonly done with machine learning. Many learning cycles seem to be needed."
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 101,
172 | "metadata": {},
173 | "outputs": [
174 | {
175 | "name": "stdout",
176 | "output_type": "stream",
177 | "text": [
178 | "[0.982331 0.1774 0.212845] : <== estimate after training\n"
179 | ]
180 | }
181 | ],
182 | "source": [
183 | "# t ... a model to be learned to fit the data\n",
184 | "t = Dense(3,1)\n",
185 | "loss(x,y) = Flux.mse(t(x),y)\n",
186 | "opt = ADAM(Flux.params(t)[1:1])\n",
187 | "Flux.train!(loss, Iterators.repeated( (X',y'), 20000), opt) # 20000 steps of training\n",
188 | "println((t.W).data, \" : <== estimate after training\")"
189 | ]
190 | },
191 | {
192 | "cell_type": "code",
193 | "execution_count": 102,
194 | "metadata": {
195 | "collapsed": true
196 | },
197 | "outputs": [],
198 | "source": [
199 | "## Adding more data does not help a whole lot"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 120,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "name": "stdout",
209 | "output_type": "stream",
210 | "text": [
211 | "[0.948837 0.17883 0.218774] : <== estimate after training\n"
212 | ]
213 | }
214 | ],
215 | "source": [
216 | "n = 3000\n",
217 | "X = randn(n,3)\n",
218 | "y = X*w\n",
219 | "t = Dense(3,1)\n",
220 | "loss(x,y) = Flux.mse(t(x),y)\n",
221 | "opt = ADAM(Flux.params(t)[1:1])\n",
222 | "Flux.train!(loss, Iterators.repeated( (X',y'), 2000), opt) # 2000 steps of training\n",
223 | "println((t.W).data, \" : <== estimate after training\")"
224 | ]
225 | }
226 | ],
227 | "metadata": {
228 | "anaconda-cloud": {},
229 | "kernelspec": {
230 | "display_name": "Julia 0.6.0",
231 | "language": "julia",
232 | "name": "julia-0.6"
233 | },
234 | "language_info": {
235 | "file_extension": ".jl",
236 | "mimetype": "application/julia",
237 | "name": "julia",
238 | "version": "0.6.0"
239 | },
240 | "widgets": {
241 | "state": {
242 | "294167a6-1234-43dc-aef6-951949f1fac6": {
243 | "views": [
244 | {
245 | "cell_index": 26
246 | }
247 | ]
248 | },
249 | "41f7367b-0ad3-43e3-bd43-c6e4a1618e8d": {
250 | "views": [
251 | {
252 | "cell_index": 19
253 | }
254 | ]
255 | },
256 | "6e3620ec-4915-4734-8d3a-3332fdc63970": {
257 | "views": [
258 | {
259 | "cell_index": 16
260 | }
261 | ]
262 | },
263 | "ce72699c-d8cc-4a03-902b-a490178223e5": {
264 | "views": [
265 | {
266 | "cell_index": 17
267 | }
268 | ]
269 | },
270 | "db2d9825-08d3-4028-8072-1e865d1a0c4f": {
271 | "views": [
272 | {
273 | "cell_index": 23
274 | }
275 | ]
276 | }
277 | },
278 | "version": "1.2.0"
279 | }
280 | },
281 | "nbformat": 4,
282 | "nbformat_minor": 2
283 | }
284 |
--------------------------------------------------------------------------------
/notes/QR Factorization Examples in Julia.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "using LinearAlgebra"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## Square Case first"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 3,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "data": {
26 | "text/plain": [
27 | "4×4 Array{Float64,2}:\n",
28 | " 0.330869 0.843845 0.0623888 0.398208\n",
29 | " 0.8661 0.204488 0.138221 0.218923\n",
30 | " 0.741009 0.775278 0.576722 0.9775 \n",
31 | " 0.87276 0.139498 0.072938 0.983904"
32 | ]
33 | },
34 | "execution_count": 3,
35 | "metadata": {},
36 | "output_type": "execute_result"
37 | }
38 | ],
39 | "source": [
40 | "A = rand(4,4)"
41 | ]
42 | },
43 | {
44 | "cell_type": "code",
45 | "execution_count": 4,
46 | "metadata": {},
47 | "outputs": [
48 | {
49 | "data": {
50 | "text/plain": [
51 | "LinearAlgebra.QRCompactWY{Float64,Array{Float64,2}}\n",
52 | "Q factor:\n",
53 | "4×4 LinearAlgebra.QRCompactWYQ{Float64,Array{Float64,2}}:\n",
54 | " -0.224588 0.76522 0.603058 -0.017861 \n",
55 | " -0.587892 -0.292547 0.130273 -0.742852 \n",
56 | " -0.502982 0.437253 -0.739298 0.0962124\n",
57 | " -0.592413 -0.371031 0.269792 0.662266 \n",
58 | "R factor:\n",
59 | "4×4 Array{Float64,2}:\n",
60 | " -1.47323 -0.782326 -0.428561 -1.29268 \n",
61 | " 0.0 0.873139 0.232416 0.303028\n",
62 | " 0.0 0.0 -0.351061 -0.188552\n",
63 | " 0.0 0.0 0.0 0.575913"
64 | ]
65 | },
66 | "execution_count": 4,
67 | "metadata": {},
68 | "output_type": "execute_result"
69 | }
70 | ],
71 | "source": [
72 | "Q,R = qr(A)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 6,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "data": {
82 | "text/plain": [
83 | "4×4 Array{Float64,2}:\n",
84 | " 1.0 0.0 0.0 -0.0\n",
85 | " 0.0 1.0 -0.0 -0.0\n",
86 | " 0.0 -0.0 1.0 0.0\n",
87 | " -0.0 -0.0 0.0 1.0"
88 | ]
89 | },
90 | "execution_count": 6,
91 | "metadata": {},
92 | "output_type": "execute_result"
93 | }
94 | ],
95 | "source": [
96 | "round.(Q'Q, digits=0)"
97 | ]
98 | },
99 | {
100 | "cell_type": "code",
101 | "execution_count": 8,
102 | "metadata": {},
103 | "outputs": [
104 | {
105 | "data": {
106 | "text/plain": [
107 | "4×4 Array{Float64,2}:\n",
108 | " 1.0 0.0 -0.0 -0.0\n",
109 | " 0.0 1.0 0.0 -0.0\n",
110 | " -0.0 0.0 1.0 0.0\n",
111 | " -0.0 -0.0 0.0 1.0"
112 | ]
113 | },
114 | "execution_count": 8,
115 | "metadata": {},
116 | "output_type": "execute_result"
117 | }
118 | ],
119 | "source": [
120 | "round.(Q*Q', digits=0)"
121 | ]
122 | },
123 | {
124 | "cell_type": "markdown",
125 | "metadata": {},
126 | "source": [
127 | "## I'm happy, I see Q is orthogonal"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 9,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/plain": [
138 | "true"
139 | ]
140 | },
141 | "execution_count": 9,
142 | "metadata": {},
143 | "output_type": "execute_result"
144 | }
145 | ],
146 | "source": [
147 | "A ≈ Q * R"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 10,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "data": {
157 | "text/plain": [
158 | "4×4 Array{Float64,2}:\n",
159 | " 0.330869 0.843845 0.0623888 0.398208\n",
160 | " 0.8661 0.204488 0.138221 0.218923\n",
161 | " 0.741009 0.775278 0.576722 0.9775 \n",
162 | " 0.87276 0.139498 0.072938 0.983904"
163 | ]
164 | },
165 | "execution_count": 10,
166 | "metadata": {},
167 | "output_type": "execute_result"
168 | }
169 | ],
170 | "source": [
171 | "Q * R"
172 | ]
173 | },
174 | {
175 | "cell_type": "code",
176 | "execution_count": 11,
177 | "metadata": {},
178 | "outputs": [
179 | {
180 | "data": {
181 | "text/plain": [
182 | "4×4 Array{Float64,2}:\n",
183 | " 0.330869 0.843845 0.0623888 0.398208\n",
184 | " 0.8661 0.204488 0.138221 0.218923\n",
185 | " 0.741009 0.775278 0.576722 0.9775 \n",
186 | " 0.87276 0.139498 0.072938 0.983904"
187 | ]
188 | },
189 | "execution_count": 11,
190 | "metadata": {},
191 | "output_type": "execute_result"
192 | }
193 | ],
194 | "source": [
195 | "A"
196 | ]
197 | },
198 | {
199 | "cell_type": "markdown",
200 | "metadata": {},
201 | "source": [
202 | "## Now a tall skinny example"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": 12,
208 | "metadata": {},
209 | "outputs": [
210 | {
211 | "data": {
212 | "text/plain": [
213 | "5×3 Array{Float64,2}:\n",
214 | " 0.657045 0.214426 0.0462177\n",
215 | " 0.268263 0.208357 0.269215 \n",
216 | " 0.410459 0.948475 0.756601 \n",
217 | " 0.391947 0.683485 0.28925 \n",
218 | " 0.949405 0.999374 0.570501 "
219 | ]
220 | },
221 | "execution_count": 12,
222 | "metadata": {},
223 | "output_type": "execute_result"
224 | }
225 | ],
226 | "source": [
227 | "A = rand(5,3)"
228 | ]
229 | },
230 | {
231 | "cell_type": "code",
232 | "execution_count": 15,
233 | "metadata": {},
234 | "outputs": [
235 | {
236 | "data": {
237 | "text/plain": [
238 | "5×3 Array{Float64,2}:\n",
239 | " -0.499955 -0.62267 0.00434913\n",
240 | " -0.204125 -0.0946497 -0.628421 \n",
241 | " -0.312324 0.686918 -0.424575 \n",
242 | " -0.298239 0.362414 0.645285 \n",
243 | " -0.722416 0.0110753 0.0917176 "
244 | ]
245 | },
246 | "execution_count": 15,
247 | "metadata": {},
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "Q,R = qr(A)\n",
253 | "Q = Q[:,1:3] # make sure we have the first three columns"
254 | ]
255 | },
256 | {
257 | "cell_type": "code",
258 | "execution_count": 16,
259 | "metadata": {},
260 | "outputs": [
261 | {
262 | "data": {
263 | "text/plain": [
264 | "5×3 Array{Float64,2}:\n",
265 | " -0.499955 -0.62267 0.00434913\n",
266 | " -0.204125 -0.0946497 -0.628421 \n",
267 | " -0.312324 0.686918 -0.424575 \n",
268 | " -0.298239 0.362414 0.645285 \n",
269 | " -0.722416 0.0110753 0.0917176 "
270 | ]
271 | },
272 | "execution_count": 16,
273 | "metadata": {},
274 | "output_type": "execute_result"
275 | }
276 | ],
277 | "source": [
278 | "Q"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 17,
284 | "metadata": {},
285 | "outputs": [
286 | {
287 | "data": {
288 | "text/plain": [
289 | "3×3 Array{Float64,2}:\n",
290 | " -1.31421 -1.37177 -0.81277 \n",
291 | " 0.0 0.75706 0.57661 \n",
292 | " 0.0 0.0 -0.251239"
293 | ]
294 | },
295 | "execution_count": 17,
296 | "metadata": {},
297 | "output_type": "execute_result"
298 | }
299 | ],
300 | "source": [
301 | "R"
302 | ]
303 | },
304 | {
305 | "cell_type": "code",
306 | "execution_count": 18,
307 | "metadata": {},
308 | "outputs": [
309 | {
310 | "data": {
311 | "text/plain": [
312 | "5×3 Array{Float64,2}:\n",
313 | " 0.657045 0.214426 0.0462177\n",
314 | " 0.268263 0.208357 0.269215 \n",
315 | " 0.410459 0.948475 0.756601 \n",
316 | " 0.391947 0.683485 0.28925 \n",
317 | " 0.949405 0.999374 0.570501 "
318 | ]
319 | },
320 | "execution_count": 18,
321 | "metadata": {},
322 | "output_type": "execute_result"
323 | }
324 | ],
325 | "source": [
326 | "Q * R"
327 | ]
328 | },
329 | {
330 | "cell_type": "code",
331 | "execution_count": 19,
332 | "metadata": {},
333 | "outputs": [
334 | {
335 | "data": {
336 | "text/plain": [
337 | "5×3 Array{Float64,2}:\n",
338 | " 0.657045 0.214426 0.0462177\n",
339 | " 0.268263 0.208357 0.269215 \n",
340 | " 0.410459 0.948475 0.756601 \n",
341 | " 0.391947 0.683485 0.28925 \n",
342 | " 0.949405 0.999374 0.570501 "
343 | ]
344 | },
345 | "execution_count": 19,
346 | "metadata": {},
347 | "output_type": "execute_result"
348 | }
349 | ],
350 | "source": [
351 | "A"
352 | ]
353 | },
354 | {
355 | "cell_type": "code",
356 | "execution_count": 20,
357 | "metadata": {},
358 | "outputs": [
359 | {
360 | "data": {
361 | "text/plain": [
362 | "true"
363 | ]
364 | },
365 | "execution_count": 20,
366 | "metadata": {},
367 | "output_type": "execute_result"
368 | }
369 | ],
370 | "source": [
371 | "A ≈ Q * R"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "execution_count": 22,
377 | "metadata": {},
378 | "outputs": [
379 | {
380 | "data": {
381 | "text/plain": [
382 | "3×3 Array{Float64,2}:\n",
383 | " 1.0 -0.0 -0.0\n",
384 | " -0.0 1.0 -0.0\n",
385 | " -0.0 -0.0 1.0"
386 | ]
387 | },
388 | "execution_count": 22,
389 | "metadata": {},
390 | "output_type": "execute_result"
391 | }
392 | ],
393 | "source": [
394 | "round.(Q'Q, digits=0)"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "execution_count": 23,
400 | "metadata": {},
401 | "outputs": [
402 | {
403 | "data": {
404 | "text/plain": [
405 | "5×5 Array{Float64,2}:\n",
406 | " 0.637693 0.158256 -0.273422 -0.0737523 0.354678 \n",
407 | " 0.158256 0.445538 0.265548 -0.378935 0.0887777\n",
408 | " -0.273422 0.265548 0.749666 0.0681243 0.194295 \n",
409 | " -0.0737523 -0.378935 0.0681243 0.636683 0.27865 \n",
410 | " 0.354678 0.0887777 0.194295 0.27865 0.53042 "
411 | ]
412 | },
413 | "execution_count": 23,
414 | "metadata": {},
415 | "output_type": "execute_result"
416 | }
417 | ],
418 | "source": [
419 | "Q * Q'"
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": null,
425 | "metadata": {},
426 | "outputs": [],
427 | "source": []
428 | }
429 | ],
430 | "metadata": {
431 | "kernelspec": {
432 | "display_name": "Julia 1.3.1",
433 | "language": "julia",
434 | "name": "julia-1.3"
435 | },
436 | "language_info": {
437 | "file_extension": ".jl",
438 | "mimetype": "application/julia",
439 | "name": "julia",
440 | "version": "1.3.1"
441 | }
442 | },
443 | "nbformat": 4,
444 | "nbformat_minor": 2
445 | }
446 |
--------------------------------------------------------------------------------
/notes/Perron-Frobenius.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "The proof of the [Perron-Frobenius](https://en.wikipedia.org/wiki/Perron%E2%80%93Frobenius_theorem) theorem can seem very abstract, but if you play with some examples it is easier to understand.\n",
8 | "This notebook presents the proof with computational examples.
\n",
9 | "\n",
10 | "Step 4 below uses JuMP to turn Perron-Frobenius into a computational algorithm.\n",
11 | "
\n",
12 | "\n",
13 | "There are a few variations on the theorem some with more and some with less information\n",
14 | "but the basic version says that if A has all positive entries, then the maximum\n",
15 | "absolute eigenvalue is real and positive and there is a corresponding real positive eigenvector."
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "Step #1. Assume all(x.>0) and all(y.>0) and define τ as the minimum of y./x"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 93,
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/plain": [
33 | "τ (generic function with 1 method)"
34 | ]
35 | },
36 | "execution_count": 93,
37 | "metadata": {},
38 | "output_type": "execute_result"
39 | }
40 | ],
41 | "source": [
42 | "# Define τ(y,x) on vectors\n",
43 | "\n",
44 | "τ(y::Vector, x::Vector) = minimum(y./x)"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "metadata": {},
50 | "source": [
51 | "Notice that for 0 ≤ t ≤ τ(y,x) we have all(y .≥ t*x)"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": 11,
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/plain": [
62 | "2.0"
63 | ]
64 | },
65 | "execution_count": 11,
66 | "metadata": {},
67 | "output_type": "execute_result"
68 | }
69 | ],
70 | "source": [
71 | "# Example\n",
72 | "y = [10,5,6,9]\n",
73 | "x = [1,2,3,4]\n",
74 | "τ(y,x)"
75 | ]
76 | },
77 | {
78 | "cell_type": "code",
79 | "execution_count": 12,
80 | "metadata": {},
81 | "outputs": [
82 | {
83 | "data": {
84 | "text/plain": [
85 | "(true, true, false)"
86 | ]
87 | },
88 | "execution_count": 12,
89 | "metadata": {},
90 | "output_type": "execute_result"
91 | }
92 | ],
93 | "source": [
94 | "all(y.≥2x), all(y.≥1.99x),all(y.≥2.01x) # check these by hand"
95 | ]
96 | },
97 | {
98 | "cell_type": "markdown",
99 | "metadata": {},
100 | "source": [
101 | "Step #2. If all(A.>0) and all(y.≥0) and y is not the zero vector then all(A*y.>0) (strictly greater)"
102 | ]
103 | },
104 | {
105 | "cell_type": "code",
106 | "execution_count": 16,
107 | "metadata": {},
108 | "outputs": [
109 | {
110 | "data": {
111 | "text/plain": [
112 | "3-element Array{Float64,1}:\n",
113 | " 0.2\n",
114 | " 0.5\n",
115 | " 0.8"
116 | ]
117 | },
118 | "execution_count": 16,
119 | "metadata": {},
120 | "output_type": "execute_result"
121 | }
122 | ],
123 | "source": [
124 | "# Example\n",
125 | "A= [ 1 2 3;4 5 6; 7 8 9]\n",
126 | "y = [0, .1, .0]\n",
127 | "A * y # any one positive entry multiplies an entire positive column of A"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "Step #3:
τ(Ax,x)=τ(A²x,Ax) if x in an eigenvector with all(x.≥0).
\n",
135 | "τ(Ax,x) < τ(A²x,Ax) if x is not an eigenvector.\n",
136 | "\n",
137 | "
\n",
138 | "Proof: If x is an eigenvector, then τ(Ax,x)=τ(A²x,Ax)= the corresponding eigenvalue.
\n",
139 | "If x is not an eigenvector, then letting y\n",
140 | "= Ax - τ(Ax,x) *x, then all(y.≥0) and y is not the 0 vector.
\n",
141 | "From Step 2, all(A*y.>0) or equivalently all(A²x .> τ(Ax,x) *Ax) from which we see\n",
142 | "τ(A²x,Ax) > τ(Ax,x)."
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 98,
148 | "metadata": {},
149 | "outputs": [
150 | {
151 | "data": {
152 | "text/plain": [
153 | "7-element Array{Float64,1}:\n",
154 | " 1.34884\n",
155 | " 2.40402\n",
156 | " 2.68214\n",
157 | " 2.75781\n",
158 | " 2.77293\n",
159 | " 2.77552\n",
160 | " 2.77666"
161 | ]
162 | },
163 | "execution_count": 98,
164 | "metadata": {},
165 | "output_type": "execute_result"
166 | }
167 | ],
168 | "source": [
169 | "# An example\n",
170 | "n = 6\n",
171 | "A = rand(n,n)\n",
172 | "x = rand(n)\n",
173 | "[τ(A^k*x, A^(k-1)*x) for k=1:7] # This sequence will be increasing, but to an eig limit."
174 | ]
175 | },
176 | {
177 | "cell_type": "markdown",
178 | "metadata": {},
179 | "source": [
180 | "Step #4. Let tmax be the maximum of τ(Ax,x) for all non-zero x. We will prove mathematically that x is a positive eigenvector and τ(Ax,x) is the eigenvalue. Before we do it mathematically, let's see it computationally:"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "metadata": {},
186 | "source": [
187 | "One way to form this maximum problem is write this as a constrained optimization:\n",
188 | "\n",
189 | "$\\max t$ subject to
\n",
190 | "$x_i \\ge 0 $
\n",
191 | "$y=Ax$
\n",
192 | "$y[i]/x[i] \\ge t$
\n",
193 | "$sum(x)=1$\n",
194 | " "
195 | ]
196 | },
197 | {
198 | "cell_type": "markdown",
199 | "metadata": {},
200 | "source": [
201 | "We will use the highly popular Julia [Jump Package](https://github.com/JuliaOpt/JuMP.jl) created at MIT (though not in math!), and used widely for operations research and in business schools:"
202 | ]
203 | },
204 | {
205 | "cell_type": "code",
206 | "execution_count": 4,
207 | "metadata": {},
208 | "outputs": [],
209 | "source": [
210 | "# Pkg.add(\"JuMP\")\n",
211 | "using JuMP\n",
212 | "# Pkg.add(\"Ipopt\") (On my mac, this worked with 0.6.2 but not 0.6.0)\n",
213 | "using Ipopt"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": 74,
219 | "metadata": {},
220 | "outputs": [
221 | {
222 | "data": {
223 | "text/plain": [
224 | "7×7 Array{Float64,2}:\n",
225 | " 0.971603 0.325743 0.863038 0.0234046 0.962918 0.496618 0.799348\n",
226 | " 0.62474 0.140906 0.448296 0.505187 0.0646877 0.149136 0.205624\n",
227 | " 0.448767 0.58146 0.47302 0.443701 0.303789 0.114217 0.892493\n",
228 | " 0.808785 0.588347 0.839119 0.883789 0.920193 0.515088 0.22442 \n",
229 | " 0.0089511 0.242133 0.783681 0.420531 0.965035 0.544011 0.334241\n",
230 | " 0.300799 0.990369 0.401669 0.427284 0.207415 0.309122 0.329326\n",
231 | " 0.0314547 0.723179 0.476076 0.445037 0.249261 0.404243 0.502455"
232 | ]
233 | },
234 | "execution_count": 74,
235 | "metadata": {},
236 | "output_type": "execute_result"
237 | }
238 | ],
239 | "source": [
240 | "A = rand(7,7)"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 88,
246 | "metadata": {},
247 | "outputs": [
248 | {
249 | "data": {
250 | "text/plain": [
251 | "3.3686909584508244"
252 | ]
253 | },
254 | "execution_count": 88,
255 | "metadata": {},
256 | "output_type": "execute_result"
257 | }
258 | ],
259 | "source": [
260 | "n=size(A,1)\n",
261 | "\n",
262 | "m = Model(solver=IpoptSolver(print_level=2))\n",
263 | "@variable(m, t); @objective(m, Max, t)\n",
264 | "\n",
265 | "@variable(m, x[1:n]>=0); @constraint(m, sum(x)==1)\n",
266 | "@variable(m, y[1:n]); @constraint(m, y .== A*x)\n",
267 | "\n",
268 | "@NLconstraint(m, [i=1:n], t <= y[i]/x[i]) # nonlinear constraint\n",
269 | "\n",
270 | "status = solve(m)\n",
271 | "x = getvalue.(x)\n",
272 | "t = getobjectivevalue(m)"
273 | ]
274 | },
275 | {
276 | "cell_type": "code",
277 | "execution_count": 89,
278 | "metadata": {},
279 | "outputs": [
280 | {
281 | "data": {
282 | "text/plain": [
283 | "2.4817758919083086e-6"
284 | ]
285 | },
286 | "execution_count": 89,
287 | "metadata": {},
288 | "output_type": "execute_result"
289 | }
290 | ],
291 | "source": [
292 | "norm(A*x-t*x) # demonstrate we have found an eigenpair through optimization"
293 | ]
294 | },
295 | {
296 | "cell_type": "markdown",
297 | "metadata": {},
298 | "source": [
299 | "Step 5: Demonstrate that if x above were not an eigenvector, then the t could not have been the solution to the optimum problem."
300 | ]
301 | },
302 | {
303 | "cell_type": "markdown",
304 | "metadata": {},
305 | "source": [
306 | "As we saw in step 3, if x had not been an eigenvector, then τ(Ax,x) < τ(A²x,Ax), so τ(Ax,x) was not the maximum."
307 | ]
308 | },
309 | {
310 | "cell_type": "markdown",
311 | "metadata": {},
312 | "source": [
313 | "Step 6: Any complex eigenvector, eigenvalue pair has absolute eigenvalue <= tmax:"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {},
319 | "source": [
320 | "If Ax = λx then all( A*abs.(x) .≥ abs(λ)*abs.(x)) by the triangle inequality. Thus abs(λ) <= tmax."
321 | ]
322 | },
323 | {
324 | "cell_type": "markdown",
325 | "metadata": {},
326 | "source": [
327 | "For example:"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": 101,
333 | "metadata": {},
334 | "outputs": [
335 | {
336 | "data": {
337 | "text/plain": [
338 | "5×5 Array{Float64,2}:\n",
339 | " 0.996711 0.656579 0.453247 0.61344 0.50697 \n",
340 | " 0.591166 0.616613 0.987583 0.246784 0.442663 \n",
341 | " 0.949881 0.454748 0.831274 0.708647 0.458239 \n",
342 | " 0.069995 0.108182 0.0296905 0.434673 0.0322304\n",
343 | " 0.105186 0.918176 0.831151 0.126704 0.0709903"
344 | ]
345 | },
346 | "execution_count": 101,
347 | "metadata": {},
348 | "output_type": "execute_result"
349 | }
350 | ],
351 | "source": [
352 | "A = rand(5,5)"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 102,
358 | "metadata": {},
359 | "outputs": [
360 | {
361 | "data": {
362 | "text/plain": [
363 | "5-element Array{Complex{Float64},1}:\n",
364 | " 2.58617+0.0im \n",
365 | " 0.125586+0.34277im\n",
366 | " 0.125586-0.34277im\n",
367 | " 0.351225+0.0im \n",
368 | " -0.238306+0.0im "
369 | ]
370 | },
371 | "execution_count": 102,
372 | "metadata": {},
373 | "output_type": "execute_result"
374 | }
375 | ],
376 | "source": [
377 | "eigvals(A)"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "execution_count": 108,
383 | "metadata": {},
384 | "outputs": [
385 | {
386 | "data": {
387 | "text/plain": [
388 | "0.1255862966495158 + 0.3427698069874712im"
389 | ]
390 | },
391 | "execution_count": 108,
392 | "metadata": {},
393 | "output_type": "execute_result"
394 | }
395 | ],
396 | "source": [
397 | "Λ,X=eig(A);x=X[:,2];λ=Λ[2]"
398 | ]
399 | },
400 | {
401 | "cell_type": "code",
402 | "execution_count": 109,
403 | "metadata": {},
404 | "outputs": [
405 | {
406 | "data": {
407 | "text/plain": [
408 | "8.355107029738416e-16"
409 | ]
410 | },
411 | "execution_count": 109,
412 | "metadata": {},
413 | "output_type": "execute_result"
414 | }
415 | ],
416 | "source": [
417 | "norm(A*x-λ*x)"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "execution_count": 112,
423 | "metadata": {},
424 | "outputs": [
425 | {
426 | "data": {
427 | "text/plain": [
428 | "0.6784932085048402"
429 | ]
430 | },
431 | "execution_count": 112,
432 | "metadata": {},
433 | "output_type": "execute_result"
434 | }
435 | ],
436 | "source": [
437 | "τ(A*abs.(x),abs.(x)) - abs(λ) # This is non-negative"
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": null,
443 | "metadata": {},
444 | "outputs": [],
445 | "source": []
446 | }
447 | ],
448 | "metadata": {
449 | "kernelspec": {
450 | "display_name": "Julia 0.6.2",
451 | "language": "julia",
452 | "name": "julia-0.6"
453 | },
454 | "language_info": {
455 | "file_extension": ".jl",
456 | "mimetype": "application/julia",
457 | "name": "julia",
458 | "version": "0.6.2"
459 | }
460 | },
461 | "nbformat": 4,
462 | "nbformat_minor": 2
463 | }
464 |
--------------------------------------------------------------------------------
/notes/Singular.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Singular matrices: A first look\n",
8 | "\n",
9 | "If we encounter a zero pivot (or even just a small pivot, on a computer) during Gaussian elimination, we normally swap rows to bring a nonzero pivot up from a subsequent row. However, what if there are *no* nonzero values below the pivot in that column? This is called a [singular matrix](https://en.wikipedia.org/wiki/Invertible_matrix): we can still proceed with Gaussian elimination, but **we can't get rid of the zero pivot**.\n",
10 | "\n",
11 | "If you have $Ax=b$ where $A$ is singular, then there will typically (for most right-hand sides $b$) be **no solutions**, but there will occasionally (for very special $b$) be **infinitely many solutions**. (For $2 \\times 2$ matrices, solving $Ax=b$ corresponds to finding the intersection of two lines, and a singular case corresponds to two parallel lines — either there are no intersections, or they intersect everywhere.)\n",
12 | "\n",
13 | "For example, consider the following $4 \\times 4$ matrix $A=LU$:\n",
14 | "\n",
15 | "$$\n",
16 | "\\underbrace{\\begin{pmatrix} \n",
17 | " 2 & -1 & 0 & 3 \\\\\n",
18 | " 4 & -1 & 1 & 8 \\\\\n",
19 | " 6 & 1 & 4 & 15 \\\\\n",
20 | " 2 & -1 & 0 & 0 \\\\\n",
21 | " \\end{pmatrix}}_A =\n",
22 | "\\underbrace{\\begin{pmatrix} \n",
23 | " 1 & 0 & 0 & 0 \\\\\n",
24 | " 2 & 1 & 0 & 0 \\\\\n",
25 | " 3 & 4 & 1 & 0 \\\\\n",
26 | " 1 & 0 & 2 & 1 \\\\\n",
27 | " \\end{pmatrix}}_L\n",
28 | "\\underbrace{\\begin{pmatrix} \n",
29 | " \\color{blue}{2} & -1 & 0 & 3 \\\\\n",
30 | " 0 & \\color{blue}{1} & 1 & 2 \\\\\n",
31 | " 0 & 0 & \\color{red}{0} & \\color{blue}{-2} \\\\\n",
32 | " 0 & 0 & 0 & 1 \\\\\n",
33 | " \\end{pmatrix}}_U\n",
34 | "$$\n",
35 | "\n",
36 | "In the **third column, we got zeros** where we were hoping for a pivot. So, we **only have three pivots (blue)** in this case. Now, suppose we want to solve $Ax=b$. We first solve $Lc=b$ to apply the elimination steps to $b$. This is no problem since $L$ has 1's along the diagonal. Suppose we get $c = (c_1, c_2, c_3, c_4)$. Then we proceed by backsubstitution to solve $Ux = c$, starting with the last row of $U$:\n",
37 | "\n",
38 | "$$\n",
39 | "1 \\times x_4 = c_4 \\implies x_4 = c_4 \\\\\n",
40 | "\\color{red}{0 \\times x_3} - 2 \\times x_4 = c_3 \\implies \\mbox{no solution unless } -2 x_4 = -2 c_4 = c_3\n",
41 | "$$\n",
42 | "For very special right-hand sides, where $c_3 = 2c_4$, we can plug in *any* $x_3$ and get a solution (infinitely many solutions). Otherwise, we get *no* solutions."
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 1,
48 | "metadata": {},
49 | "outputs": [
50 | {
51 | "data": {
52 | "text/plain": [
53 | "4×4 Matrix{Int64}:\n",
54 | " 2 -1 0 3\n",
55 | " 4 -1 1 8\n",
56 | " 6 1 4 15\n",
57 | " 2 -1 0 0"
58 | ]
59 | },
60 | "execution_count": 1,
61 | "metadata": {},
62 | "output_type": "execute_result"
63 | }
64 | ],
65 | "source": [
66 | "[1 0 0 0\n",
67 | " 2 1 0 0\n",
68 | " 3 4 1 0\n",
69 | " 1 0 2 1 ] *\n",
70 | "[2 -1 0 3\n",
71 | " 0 1 1 2\n",
72 | " 0 0 0 -2\n",
73 | " 0 0 0 1 ]"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "You may think that singular cases are not very interesting. In reality, **exactly singular square matrices never occur by accident**. There is always some *deep structure of the underlying problem* that causes the singularity, and understanding this structure is *always* interesting.\n",
81 | "\n",
82 | "On the other hand, **nearly singular** matrices (where the pivots are nonzero but very small) *can* occur by accident, and dealing with them is often a delicate problem because they are very sensitive to roundoff errors. (We call these matrices [ill-conditioned](https://en.wikipedia.org/wiki/Condition_number).) But that's mostly not a topic for 18.06.\n",
83 | "\n",
84 | "Singular **non-square** systems, where you have **more equations than unknowns** are *very* common and important, and lead to *fitting* problems where one *minimizes the error* in the solution. We will talk more about this soon in 18.06."
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "Some matrices are **more singular than others**. For example, they can have **two pivots**:\n",
92 | "\n",
93 | "$$\n",
94 | "\\underbrace{\\begin{pmatrix} \n",
95 | " 2 & -1 & 0 & 3 \\\\\n",
96 | " 4 & -2 & 1 & 8 \\\\\n",
97 | " 6 & 3 & 4 & 17 \\\\\n",
98 | " 2 & -1 & 0 & 3 \\\\\n",
99 | " \\end{pmatrix}}_A =\n",
100 | "\\underbrace{\\begin{pmatrix} \n",
101 | " 1 & 0 & 0 & 0 \\\\\n",
102 | " 2 & 1 & 0 & 0 \\\\\n",
103 | " 3 & 4 & 1 & 0 \\\\\n",
104 | " 1 & 0 & 2 & 1 \\\\\n",
105 | " \\end{pmatrix}}_L\n",
106 | "\\underbrace{\\begin{pmatrix} \n",
107 | " \\color{blue}{2} & -1 & 0 & 3 \\\\\n",
108 | " 0 & 0 & \\color{blue}{1} & 2 \\\\\n",
109 | " 0 & 0 & \\color{red}{0} & \\color{red}{0} \\\\\n",
110 | " 0 & 0 & 0 & \\color{red}{0} \\\\\n",
111 | " \\end{pmatrix}}_U\n",
112 | "$$\n",
113 | "\n",
114 | "or **one pivot**:\n",
115 | "\n",
116 | "$$\n",
117 | "\\underbrace{\\begin{pmatrix} \n",
118 | " 2 & -1 & 0 & 3 \\\\\n",
119 | " 4 & -2 & 0 & 6 \\\\\n",
120 | " 6 & 3 & 0 & 9 \\\\\n",
121 | " 2 & -1 & 0 & 3 \\\\\n",
122 | " \\end{pmatrix}}_A =\n",
123 | "\\underbrace{\\begin{pmatrix} \n",
124 | " 1 & 0 & 0 & 0 \\\\\n",
125 | " 2 & 1 & 0 & 0 \\\\\n",
126 | " 3 & 4 & 1 & 0 \\\\\n",
127 | " 1 & 0 & 2 & 1 \\\\\n",
128 | " \\end{pmatrix}}_L\n",
129 | "\\underbrace{\\begin{pmatrix} \n",
130 | " \\color{blue}{2} & -1 & 0 & 3 \\\\\n",
131 | " 0 & 0 & 0 & 0 \\\\\n",
132 | " 0 & 0 & 0 & 0 \\\\\n",
133 | " 0 & 0 & 0 & 0 \\\\\n",
134 | " \\end{pmatrix}}_U\n",
135 | "$$\n",
136 | "\n",
137 | "or **zero pivots**:\n",
138 | "\n",
139 | "$$\n",
140 | "\\underbrace{\\begin{pmatrix} \n",
141 | " 0 & 0 & 0 & 0 \\\\\n",
142 | " 0 & 0 & 0 & 0 \\\\\n",
143 | " 0 & 0 & 0 & 0 \\\\\n",
144 | " 0 & 0 & 0 & 0 \\\\\n",
145 | " \\end{pmatrix}}_A =\n",
146 | "\\underbrace{\\begin{pmatrix} \n",
147 | " 1 & 0 & 0 & 0 \\\\\n",
148 | " 2 & 1 & 0 & 0 \\\\\n",
149 | " 3 & 4 & 1 & 0 \\\\\n",
150 | " 1 & 0 & 2 & 1 \\\\\n",
151 | " \\end{pmatrix}}_L\n",
152 | "\\underbrace{\\begin{pmatrix} \n",
153 | " 0 & 0 & 0 & 0 \\\\\n",
154 | " 0 & 0 & 0 & 0 \\\\\n",
155 | " 0 & 0 & 0 & 0 \\\\\n",
156 | " 0 & 0 & 0 & 0 \\\\\n",
157 | " \\end{pmatrix}}_U\n",
158 | "$$\n",
159 | "\n",
160 | "If $A$ is the zero matrix, then $Ax=b$ only has solutions when $b=0$, and then *any* $x$ is a solution!\n",
161 | "\n",
162 | "Intuitively, having fewer pivots seems \"more singular\", and requires \"more coincidences\" in the right-hand side to have a solution, and has a \"bigger infinity\" of solutions when there *is* a solution. We will quantify intuitions in 18.06, starting with the notion of the [rank](https://en.wikipedia.org/wiki/Rank_(linear_algebra) of a matrix.\n",
163 | "\n",
164 | "* The **rank = r** of the matrix is the **number of (nonzero) pivots** obtained by elimination (with row swaps if needed) for an $m \\times n$ matrix $A$. \n",
165 | "\n",
166 | "* $r \\le m$ and $r \\le n$ because you can't have more pivots than you have rows or columns.\n",
167 | "\n",
168 | "The smaller the rank is compared to the size of the matrix, the \"more singular\" it is. Pretty soon we will understand this better."
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 2,
174 | "metadata": {},
175 | "outputs": [
176 | {
177 | "data": {
178 | "text/plain": [
179 | "4×4 Matrix{Int64}:\n",
180 | " 2 -1 0 3\n",
181 | " 4 -2 1 8\n",
182 | " 6 -3 4 17\n",
183 | " 2 -1 0 3"
184 | ]
185 | },
186 | "execution_count": 2,
187 | "metadata": {},
188 | "output_type": "execute_result"
189 | }
190 | ],
191 | "source": [
192 | "[1 0 0 0\n",
193 | " 2 1 0 0\n",
194 | " 3 4 1 0\n",
195 | " 1 0 2 1 ] *\n",
196 | "[2 -1 0 3\n",
197 | " 0 0 1 2\n",
198 | " 0 0 0 0\n",
199 | " 0 0 0 0 ]"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 3,
205 | "metadata": {},
206 | "outputs": [
207 | {
208 | "data": {
209 | "text/plain": [
210 | "4×4 Matrix{Int64}:\n",
211 | " 2 -1 0 3\n",
212 | " 4 -2 0 6\n",
213 | " 6 -3 0 9\n",
214 | " 2 -1 0 3"
215 | ]
216 | },
217 | "execution_count": 3,
218 | "metadata": {},
219 | "output_type": "execute_result"
220 | }
221 | ],
222 | "source": [
223 | "[1 0 0 0\n",
224 | " 2 1 0 0\n",
225 | " 3 4 1 0\n",
226 | " 1 0 2 1 ] *\n",
227 | "[2 -1 0 3\n",
228 | " 0 0 0 0\n",
229 | " 0 0 0 0\n",
230 | " 0 0 0 0 ]"
231 | ]
232 | },
233 | {
234 | "cell_type": "markdown",
235 | "metadata": {},
236 | "source": [
237 | "Note that if we encounter zeros in a column where we were hoping for a pivot, and we can't get a nonzero element by swapping rows, we skip to the next column. The following example is **rank 2**, not rank 0:\n",
238 | "\n",
239 | "$$\n",
240 | "\\underbrace{\\begin{pmatrix} \n",
241 | " 0 & -1 & 0 & 3 \\\\\n",
242 | " 0 & -2 & 0 & 8 \\\\\n",
243 | " 0 & 3 & 0 & 17 \\\\\n",
244 | " 0 & -1 & 0 & 3 \\\\\n",
245 | " \\end{pmatrix}}_A =\n",
246 | "\\underbrace{\\begin{pmatrix} \n",
247 | " 1 & 0 & 0 & 0 \\\\\n",
248 | " 2 & 1 & 0 & 0 \\\\\n",
249 | " 3 & 4 & 1 & 0 \\\\\n",
250 | " 1 & 0 & 2 & 1 \\\\\n",
251 | " \\end{pmatrix}}_L\n",
252 | "\\underbrace{\\begin{pmatrix} \n",
253 | " 0 & \\color{blue}{-1} & 0 & 3 \\\\\n",
254 | " 0 & 0 & 0 & \\color{blue}{2} \\\\\n",
255 | " 0 & 0 & 0 & 0 \\\\\n",
256 | " 0 & 0 & 0 & 0 \\\\\n",
257 | " \\end{pmatrix}}_U\n",
258 | "$$\n",
259 | "\n",
260 | "That is, if we encounter *all zeros* in a column where we were hoping for a pivot, we skip to the next column for our pivot and continue eliminating below the pivots."
261 | ]
262 | },
263 | {
264 | "cell_type": "code",
265 | "execution_count": 4,
266 | "metadata": {},
267 | "outputs": [
268 | {
269 | "data": {
270 | "text/plain": [
271 | "4×4 Matrix{Int64}:\n",
272 | " 0 -1 0 3\n",
273 | " 0 -2 0 8\n",
274 | " 0 -3 0 17\n",
275 | " 0 -1 0 3"
276 | ]
277 | },
278 | "execution_count": 4,
279 | "metadata": {},
280 | "output_type": "execute_result"
281 | }
282 | ],
283 | "source": [
284 | "[1 0 0 0\n",
285 | " 2 1 0 0\n",
286 | " 3 4 1 0\n",
287 | " 1 0 2 1 ] *\n",
288 | "[0 -1 0 3\n",
289 | " 0 0 0 2\n",
290 | " 0 0 0 0\n",
291 | " 0 0 0 0 ]"
292 | ]
293 | },
294 | {
295 | "cell_type": "markdown",
296 | "metadata": {},
297 | "source": [
298 | "# More to come\n",
299 | "\n",
300 | "Much of the material in the second part of 18.06 (somewhat in exam 1, but especially in exam 2) will be focused on how we understand **singular and non-square** systems of equations.\n",
301 | "\n",
302 | "It turns out that there are lots of interesting things to say and do about systems of equations that may not have solutions. We don't just give up!"
303 | ]
304 | }
305 | ],
306 | "metadata": {
307 | "@webio": {
308 | "lastCommId": null,
309 | "lastKernelId": null
310 | },
311 | "kernelspec": {
312 | "display_name": "Julia 1.7.1",
313 | "language": "julia",
314 | "name": "julia-1.7"
315 | },
316 | "language_info": {
317 | "file_extension": ".jl",
318 | "mimetype": "application/julia",
319 | "name": "julia",
320 | "version": "1.7.1"
321 | }
322 | },
323 | "nbformat": 4,
324 | "nbformat_minor": 2
325 | }
326 |
--------------------------------------------------------------------------------
/notes/Gram-Schmidt.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Gram–Schmidt orthogonalization\n",
8 | "\n",
9 | "Chapter 4.4 illustrates a hand technique for computing orthonormal vectors q₁,q₂,… from arbitrary vectors a,b,… with the property that the first k vectors in the original set span the same subspace as the orthonormal set, and this is true for k=1,2,3,...\n",
10 | "\n",
11 | "We will move this hand technique to the computer in this notebook. Some of you will notice that on the computer one can combine operations in a simpler block fashion. "
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": 1,
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "using LinearAlgebra"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 2,
26 | "metadata": {},
27 | "outputs": [
28 | {
29 | "data": {
30 | "text/plain": [
31 | "6×4 Matrix{Int64}:\n",
32 | " 5 10 6 8\n",
33 | " 10 6 5 1\n",
34 | " 2 1 7 3\n",
35 | " 7 5 4 8\n",
36 | " 1 3 8 3\n",
37 | " 1 8 9 4"
38 | ]
39 | },
40 | "execution_count": 2,
41 | "metadata": {},
42 | "output_type": "execute_result"
43 | }
44 | ],
45 | "source": [
46 | "# start with four arbitrary independent vectors in ℝᵐ\n",
47 | "# with random entries from 1 to 10.\n",
48 | "m = 6\n",
49 | "a₁ = rand(1:10,m)\n",
50 | "a₂ = rand(1:10,m)\n",
51 | "a₃ = rand(1:10,m)\n",
52 | "a₄ = rand(1:10,m)\n",
53 | "A = [a₁ a₂ a₃ a₄] # show them as the columns of a 6×4 matrix A"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 3,
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "data": {
63 | "text/plain": [
64 | "6×4 Matrix{Float64}:\n",
65 | " 5.0 5.61111 -3.16215 1.37162\n",
66 | " 10.0 -2.77778 -0.0979465 -3.70534\n",
67 | " 2.0 -0.755556 6.16936 1.22464\n",
68 | " 7.0 -1.14444 -0.324354 4.20193\n",
69 | " 1.0 2.12222 5.22283 0.0756904\n",
70 | " 1.0 7.12222 1.49913 -1.74319"
71 | ]
72 | },
73 | "execution_count": 3,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | }
77 | ],
78 | "source": [
79 | "# The vₖ are vectors, but they are all orthogonal and\n",
80 | "#span([v₁]) = span([a₁])\n",
81 | "#span([v₁ v₂]) = span([a₁ a₂])\n",
82 | "#span([v₁ v₂ v₃]) = span([a₁ a₂ a₃] )\n",
83 | "#span([v₁ v₂ v₃ v₄]) = span([a₁ a₂ a₃ a₄])\n",
84 | "v₁ = a₁\n",
85 | "v₂ = a₂ - v₁*(v₁'a₂)/(v₁'v₁)\n",
86 | "v₃ = a₃ - v₁*(v₁'a₃)/(v₁'v₁) - v₂*(v₂'a₃)/(v₂'v₂)\n",
87 | "v₄ = a₄ - v₁*(v₁'a₄)/(v₁'v₁) - v₂*(v₂'a₄)/(v₂'v₂) - v₃*(v₃'a₄)/(v₃'v₃)\n",
88 | "\n",
89 | "# gather into a matrix V with orthogonal but *not* orthonormal columns\n",
90 | "V = [v₁ v₂ v₃ v₄]"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 4,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "data": {
100 | "text/plain": [
101 | "6×4 Matrix{Float64}:\n",
102 | " 0.372678 0.571756 -0.358733 0.223061\n",
103 | " 0.745356 -0.283047 -0.0111116 -0.602583\n",
104 | " 0.149071 -0.0769889 0.699888 0.199157\n",
105 | " 0.521749 -0.116616 -0.0367966 0.683342\n",
106 | " 0.0745356 0.216248 0.592508 0.0123092\n",
107 | " 0.0745356 0.725734 0.170071 -0.283488"
108 | ]
109 | },
110 | "execution_count": 4,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "# now we normalize\n",
117 | "q₁ = normalize(v₁)\n",
118 | "q₂ = normalize(v₂)\n",
119 | "q₃ = normalize(v₃)\n",
120 | "q₄ = normalize(v₄);\n",
121 | "\n",
122 | "# Gather into a matrix Q with orthonormal columns\n",
123 | "Q = [q₁ q₂ q₃ q₄]"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 5,
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/plain": [
134 | "4×4 Matrix{Float64}:\n",
135 | " 1.0 -9.21828e-17 8.99478e-17 1.35877e-16\n",
136 | " -9.21828e-17 1.0 1.0142e-17 1.42552e-16\n",
137 | " 8.99478e-17 1.0142e-17 1.0 -2.09229e-16\n",
138 | " 1.35877e-16 1.42552e-16 -2.09229e-16 1.0"
139 | ]
140 | },
141 | "execution_count": 5,
142 | "metadata": {},
143 | "output_type": "execute_result"
144 | }
145 | ],
146 | "source": [
147 | "#check that Q has orthonormal columns\n",
148 | "Q'Q"
149 | ]
150 | },
151 | {
152 | "cell_type": "code",
153 | "execution_count": 6,
154 | "metadata": {},
155 | "outputs": [
156 | {
157 | "data": {
158 | "text/plain": [
159 | "4×4 Matrix{Float64}:\n",
160 | " 0.0 -9.21828e-17 8.99478e-17 1.35877e-16\n",
161 | " -9.21828e-17 0.0 1.0142e-17 1.42552e-16\n",
162 | " 8.99478e-17 1.0142e-17 0.0 -2.09229e-16\n",
163 | " 1.35877e-16 1.42552e-16 -2.09229e-16 2.22045e-16"
164 | ]
165 | },
166 | "execution_count": 6,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "Q'Q - I"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 7,
178 | "metadata": {},
179 | "outputs": [
180 | {
181 | "data": {
182 | "text/plain": [
183 | "true"
184 | ]
185 | },
186 | "execution_count": 7,
187 | "metadata": {},
188 | "output_type": "execute_result"
189 | }
190 | ],
191 | "source": [
192 | "Q'Q ≈ I"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 8,
198 | "metadata": {},
199 | "outputs": [
200 | {
201 | "data": {
202 | "text/plain": [
203 | "4×4 Matrix{Float64}:\n",
204 | " 180.0 -1.42109e-14 1.33227e-14 1.28786e-14\n",
205 | " -1.42109e-14 96.3111 9.32625e-15 6.94211e-15\n",
206 | " 1.33227e-14 9.32625e-15 77.7003 -1.16249e-14\n",
207 | " 1.28786e-14 6.94211e-15 -1.16249e-14 37.8113"
208 | ]
209 | },
210 | "execution_count": 8,
211 | "metadata": {},
212 | "output_type": "execute_result"
213 | }
214 | ],
215 | "source": [
216 | "# compare to what happens if we didn't normalize:\n",
217 | "V'V # = diagonal matrix (orthogonal columns, but not orthonormal)"
218 | ]
219 | },
220 | {
221 | "cell_type": "code",
222 | "execution_count": 9,
223 | "metadata": {},
224 | "outputs": [
225 | {
226 | "data": {
227 | "text/plain": [
228 | "4×4 Matrix{Float64}:\n",
229 | " 13.4164 11.7766 10.3605 8.86974\n",
230 | " -0.0 9.81382 9.2715 6.67879\n",
231 | " 0.0 0.0 8.81478 1.38213\n",
232 | " 0.0 0.0 0.0 6.14909"
233 | ]
234 | },
235 | "execution_count": 9,
236 | "metadata": {},
237 | "output_type": "execute_result"
238 | }
239 | ],
240 | "source": [
241 | "# What does this triangular structure say?\n",
242 | "round.(Q'A, digits=5)"
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "metadata": {},
248 | "source": [
249 | "## QR factorization\n",
250 | "\n",
251 | "How do we do all this at once on a computer? We ask the computer to factor the matrix as $QR$ (orthonormal columns times upper triangular)."
252 | ]
253 | },
254 | {
255 | "cell_type": "code",
256 | "execution_count": 10,
257 | "metadata": {
258 | "scrolled": true
259 | },
260 | "outputs": [
261 | {
262 | "data": {
263 | "text/plain": [
264 | "LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}}\n",
265 | "Q factor:\n",
266 | "6×6 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}}:\n",
267 | " -0.372678 0.571756 0.358733 -0.223061 -0.0840586 -0.590504\n",
268 | " -0.745356 -0.283047 0.0111116 0.602583 0.0189054 -0.0272178\n",
269 | " -0.149071 -0.0769889 -0.699888 -0.199157 -0.619405 -0.242243\n",
270 | " -0.521749 -0.116616 0.0367966 -0.683342 0.131189 0.478182\n",
271 | " -0.0745356 0.216248 -0.592508 -0.0123092 0.74464 -0.204877\n",
272 | " -0.0745356 0.725734 -0.170071 0.283488 -0.192913 0.566789\n",
273 | "R factor:\n",
274 | "4×4 Matrix{Float64}:\n",
275 | " -13.4164 -11.7766 -10.3604 -8.86974\n",
276 | " 0.0 9.81382 9.2715 6.67879\n",
277 | " 0.0 0.0 -8.81478 -1.38213\n",
278 | " 0.0 0.0 0.0 -6.14909"
279 | ]
280 | },
281 | "execution_count": 10,
282 | "metadata": {},
283 | "output_type": "execute_result"
284 | }
285 | ],
286 | "source": [
287 | "F = qr(A) # returns a \"factorization object\" that stores both Q (implicitly) and R"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 11,
293 | "metadata": {},
294 | "outputs": [
295 | {
296 | "data": {
297 | "text/plain": [
298 | "4×4 Matrix{Float64}:\n",
299 | " -13.4164 -11.7766 -10.3604 -8.86974\n",
300 | " 0.0 9.81382 9.2715 6.67879\n",
301 | " 0.0 0.0 -8.81478 -1.38213\n",
302 | " 0.0 0.0 0.0 -6.14909"
303 | ]
304 | },
305 | "execution_count": 11,
306 | "metadata": {},
307 | "output_type": "execute_result"
308 | }
309 | ],
310 | "source": [
311 | "R = F.R"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": 12,
317 | "metadata": {},
318 | "outputs": [
319 | {
320 | "data": {
321 | "text/plain": [
322 | "6×4 Matrix{Float64}:\n",
323 | " -0.372678 0.571756 0.358733 -0.223061\n",
324 | " -0.745356 -0.283047 0.0111116 0.602583\n",
325 | " -0.149071 -0.0769889 -0.699888 -0.199157\n",
326 | " -0.521749 -0.116616 0.0367966 -0.683342\n",
327 | " -0.0745356 0.216248 -0.592508 -0.0123092\n",
328 | " -0.0745356 0.725734 -0.170071 0.283488"
329 | ]
330 | },
331 | "execution_count": 12,
332 | "metadata": {},
333 | "output_type": "execute_result"
334 | }
335 | ],
336 | "source": [
337 | "Q2 = Matrix(F.Q) # extract the \"thin\" QR factor you would get from Gram–Schmidt"
338 | ]
339 | },
340 | {
341 | "cell_type": "code",
342 | "execution_count": 13,
343 | "metadata": {},
344 | "outputs": [
345 | {
346 | "data": {
347 | "text/plain": [
348 | "4×4 Matrix{Float64}:\n",
349 | " -1.0 -0.0 -0.0 0.0\n",
350 | " 0.0 1.0 0.0 -0.0\n",
351 | " -0.0 0.0 -1.0 0.0\n",
352 | " -0.0 -0.0 -0.0 -1.0"
353 | ]
354 | },
355 | "execution_count": 13,
356 | "metadata": {},
357 | "output_type": "execute_result"
358 | }
359 | ],
360 | "source": [
361 | "round.(Q'Q2, digits=5) # almost I, up to signs"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": 14,
367 | "metadata": {},
368 | "outputs": [
369 | {
370 | "data": {
371 | "text/plain": [
372 | "4×4 Matrix{Float64}:\n",
373 | " -13.4164 -11.7766 -10.3604 -8.86974\n",
374 | " 0.0 9.81382 9.2715 6.67879\n",
375 | " 0.0 0.0 -8.81478 -1.38213\n",
376 | " 0.0 0.0 0.0 -6.14909"
377 | ]
378 | },
379 | "execution_count": 14,
380 | "metadata": {},
381 | "output_type": "execute_result"
382 | }
383 | ],
384 | "source": [
385 | "R # Recognize this matrix?"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 15,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/plain": [
396 | "true"
397 | ]
398 | },
399 | "execution_count": 15,
400 | "metadata": {},
401 | "output_type": "execute_result"
402 | }
403 | ],
404 | "source": [
405 | "Q2*R ≈ A"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 16,
411 | "metadata": {},
412 | "outputs": [
413 | {
414 | "data": {
415 | "text/plain": [
416 | "6-element Vector{Float64}:\n",
417 | " 0.5450097629781777\n",
418 | " 0.8599801580391012\n",
419 | " 0.7036387925825908\n",
420 | " 0.7553540639899048\n",
421 | " 0.7234080262946185\n",
422 | " 0.14725528162868073"
423 | ]
424 | },
425 | "execution_count": 16,
426 | "metadata": {},
427 | "output_type": "execute_result"
428 | }
429 | ],
430 | "source": [
431 | "b = rand(6)"
432 | ]
433 | },
434 | {
435 | "cell_type": "code",
436 | "execution_count": 17,
437 | "metadata": {},
438 | "outputs": [
439 | {
440 | "data": {
441 | "text/plain": [
442 | "4-element Vector{Float64}:\n",
443 | " 0.08995466028300682\n",
444 | " -0.0768023071730735\n",
445 | " 0.07513430689992019\n",
446 | " 0.03688677949256471"
447 | ]
448 | },
449 | "execution_count": 17,
450 | "metadata": {},
451 | "output_type": "execute_result"
452 | }
453 | ],
454 | "source": [
455 | "A \\ b"
456 | ]
457 | },
458 | {
459 | "cell_type": "code",
460 | "execution_count": 18,
461 | "metadata": {},
462 | "outputs": [
463 | {
464 | "data": {
465 | "text/plain": [
466 | "4-element Vector{Float64}:\n",
467 | " 0.08995466028300675\n",
468 | " -0.07680230717307351\n",
469 | " 0.07513430689992023\n",
470 | " 0.03688677949256475"
471 | ]
472 | },
473 | "execution_count": 18,
474 | "metadata": {},
475 | "output_type": "execute_result"
476 | }
477 | ],
478 | "source": [
479 | "inv(A'A) * A'b"
480 | ]
481 | },
482 | {
483 | "cell_type": "code",
484 | "execution_count": 19,
485 | "metadata": {},
486 | "outputs": [
487 | {
488 | "data": {
489 | "text/plain": [
490 | "4-element Vector{Float64}:\n",
491 | " 0.0899546602830068\n",
492 | " -0.07680230717307351\n",
493 | " 0.07513430689992022\n",
494 | " 0.03688677949256465"
495 | ]
496 | },
497 | "execution_count": 19,
498 | "metadata": {},
499 | "output_type": "execute_result"
500 | }
501 | ],
502 | "source": [
503 | "R \\ (Q2'b)[1:4]"
504 | ]
505 | },
506 | {
507 | "cell_type": "code",
508 | "execution_count": 20,
509 | "metadata": {},
510 | "outputs": [
511 | {
512 | "data": {
513 | "text/plain": [
514 | "4-element Vector{Float64}:\n",
515 | " 0.08995466028300678\n",
516 | " -0.07680230717307349\n",
517 | " 0.07513430689992023\n",
518 | " 0.03688677949256459"
519 | ]
520 | },
521 | "execution_count": 20,
522 | "metadata": {},
523 | "output_type": "execute_result"
524 | }
525 | ],
526 | "source": [
527 | "F \\ b # the factorization object F can be used directly for a least-square solve"
528 | ]
529 | },
530 | {
531 | "cell_type": "code",
532 | "execution_count": null,
533 | "metadata": {},
534 | "outputs": [],
535 | "source": []
536 | }
537 | ],
538 | "metadata": {
539 | "@webio": {
540 | "lastCommId": null,
541 | "lastKernelId": null
542 | },
543 | "kernelspec": {
544 | "display_name": "Julia 1.7.1",
545 | "language": "julia",
546 | "name": "julia-1.7"
547 | },
548 | "language_info": {
549 | "file_extension": ".jl",
550 | "mimetype": "application/julia",
551 | "name": "julia",
552 | "version": "1.7.1"
553 | }
554 | },
555 | "nbformat": 4,
556 | "nbformat_minor": 2
557 | }
558 |
--------------------------------------------------------------------------------
/notes/rank-r and full svds.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 15,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "using LinearAlgebra"
10 | ]
11 | },
12 | {
13 | "cell_type": "code",
14 | "execution_count": 16,
15 | "metadata": {},
16 | "outputs": [
17 | {
18 | "data": {
19 | "text/plain": [
20 | "rankrsvd (generic function with 1 method)"
21 | ]
22 | },
23 | "execution_count": 16,
24 | "metadata": {},
25 | "output_type": "execute_result"
26 | }
27 | ],
28 | "source": [
29 | "function fullsvd(A) \n",
30 | " U,s,V = svd(A, full = true) # compute svd\n",
31 | " Σ = zeros(size(A)) # container for Σ \n",
32 | " for i=1:length(s)\n",
33 | " Σ[i,i] = s[i] # place singular values in Σ\n",
34 | " end # a practical svd would never store all these zeros\n",
35 | " display(U);display(Σ);display(V) # display the answer\n",
36 | " return(U,Σ,V) # return the answer\n",
37 | "end\n",
38 | "\n",
39 | "\n",
40 | "function rankrsvd(A) \n",
41 | " U,s,V = svd(A, full = true) # compute svd\n",
42 | " r = sum(s.>1e-8) # rank = how many positive?\n",
43 | " U₁ = U[:,1:r]\n",
44 | " Σᵣ = Diagonal(s[1:r]) # Diagonal matrix of singular values\n",
45 | " V₁ = V[:,1:r]\n",
46 | " display(U₁);display(Σᵣ);display(V₁) # display the answer\n",
47 | " return(U₁,Σᵣ,V₁) # return the answer\n",
48 | "end"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "## 1. random 2x2 matrix"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 17,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/plain": [
66 | "2×2 Array{Float64,2}:\n",
67 | " 0.259439 0.075927\n",
68 | " 0.898109 0.918728"
69 | ]
70 | },
71 | "execution_count": 17,
72 | "metadata": {},
73 | "output_type": "execute_result"
74 | }
75 | ],
76 | "source": [
77 | "A = rand(2,2)"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "execution_count": 18,
83 | "metadata": {},
84 | "outputs": [
85 | {
86 | "data": {
87 | "text/plain": [
88 | "2×2 Array{Float64,2}:\n",
89 | " -0.18222 -0.983258\n",
90 | " -0.983258 0.18222 "
91 | ]
92 | },
93 | "metadata": {},
94 | "output_type": "display_data"
95 | },
96 | {
97 | "data": {
98 | "text/plain": [
99 | "2×2 Array{Float64,2}:\n",
100 | " 1.30643 0.0 \n",
101 | " 0.0 0.13025"
102 | ]
103 | },
104 | "metadata": {},
105 | "output_type": "display_data"
106 | },
107 | {
108 | "data": {
109 | "text/plain": [
110 | "2×2 Adjoint{Float64,Array{Float64,2}}:\n",
111 | " -0.712128 -0.70205 \n",
112 | " -0.70205 0.712128"
113 | ]
114 | },
115 | "metadata": {},
116 | "output_type": "display_data"
117 | }
118 | ],
119 | "source": [
120 | "fullsvd(A);"
121 | ]
122 | },
123 | {
124 | "cell_type": "code",
125 | "execution_count": 19,
126 | "metadata": {},
127 | "outputs": [
128 | {
129 | "data": {
130 | "text/plain": [
131 | "2×2 Array{Float64,2}:\n",
132 | " -0.18222 -0.983258\n",
133 | " -0.983258 0.18222 "
134 | ]
135 | },
136 | "metadata": {},
137 | "output_type": "display_data"
138 | },
139 | {
140 | "data": {
141 | "text/plain": [
142 | "2×2 Diagonal{Float64,Array{Float64,1}}:\n",
143 | " 1.30643 ⋅ \n",
144 | " ⋅ 0.13025"
145 | ]
146 | },
147 | "metadata": {},
148 | "output_type": "display_data"
149 | },
150 | {
151 | "data": {
152 | "text/plain": [
153 | "2×2 Array{Float64,2}:\n",
154 | " -0.712128 -0.70205 \n",
155 | " -0.70205 0.712128"
156 | ]
157 | },
158 | "metadata": {},
159 | "output_type": "display_data"
160 | }
161 | ],
162 | "source": [
163 | "rankrsvd(A);"
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "## 2. random 3x2 matrix"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 20,
176 | "metadata": {},
177 | "outputs": [
178 | {
179 | "data": {
180 | "text/plain": [
181 | "3×2 Array{Float64,2}:\n",
182 | " 0.464581 0.051883\n",
183 | " 0.9702 0.533329\n",
184 | " 0.601868 0.413574"
185 | ]
186 | },
187 | "execution_count": 20,
188 | "metadata": {},
189 | "output_type": "execute_result"
190 | }
191 | ],
192 | "source": [
193 | "A = rand(3,2)"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 21,
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/plain": [
204 | "3×3 Array{Float64,2}:\n",
205 | " -0.311792 0.90137 0.300529\n",
206 | " -0.794641 -0.0739732 -0.602555\n",
207 | " -0.520894 -0.426685 0.73933 "
208 | ]
209 | },
210 | "metadata": {},
211 | "output_type": "display_data"
212 | },
213 | {
214 | "data": {
215 | "text/plain": [
216 | "3×2 Array{Float64,2}:\n",
217 | " 1.39313 0.0 \n",
218 | " 0.0 0.191691\n",
219 | " 0.0 0.0 "
220 | ]
221 | },
222 | "metadata": {},
223 | "output_type": "display_data"
224 | },
225 | {
226 | "data": {
227 | "text/plain": [
228 | "2×2 Adjoint{Float64,Array{Float64,2}}:\n",
229 | " -0.882421 0.47046 \n",
230 | " -0.47046 -0.882421"
231 | ]
232 | },
233 | "metadata": {},
234 | "output_type": "display_data"
235 | }
236 | ],
237 | "source": [
238 | "fullsvd(A);"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 22,
244 | "metadata": {},
245 | "outputs": [
246 | {
247 | "data": {
248 | "text/plain": [
249 | "3×2 Array{Float64,2}:\n",
250 | " -0.311792 0.90137 \n",
251 | " -0.794641 -0.0739732\n",
252 | " -0.520894 -0.426685 "
253 | ]
254 | },
255 | "metadata": {},
256 | "output_type": "display_data"
257 | },
258 | {
259 | "data": {
260 | "text/plain": [
261 | "2×2 Diagonal{Float64,Array{Float64,1}}:\n",
262 | " 1.39313 ⋅ \n",
263 | " ⋅ 0.191691"
264 | ]
265 | },
266 | "metadata": {},
267 | "output_type": "display_data"
268 | },
269 | {
270 | "data": {
271 | "text/plain": [
272 | "2×2 Array{Float64,2}:\n",
273 | " -0.882421 0.47046 \n",
274 | " -0.47046 -0.882421"
275 | ]
276 | },
277 | "metadata": {},
278 | "output_type": "display_data"
279 | }
280 | ],
281 | "source": [
282 | "rankrsvd(A);"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "metadata": {},
288 | "source": [
289 | "## 3. random 2x3 matrix"
290 | ]
291 | },
292 | {
293 | "cell_type": "code",
294 | "execution_count": 23,
295 | "metadata": {},
296 | "outputs": [
297 | {
298 | "data": {
299 | "text/plain": [
300 | "2×3 Array{Float64,2}:\n",
301 | " 0.0451975 0.242917 0.405185\n",
302 | " 0.477637 0.8663 0.725397"
303 | ]
304 | },
305 | "execution_count": 23,
306 | "metadata": {},
307 | "output_type": "execute_result"
308 | }
309 | ],
310 | "source": [
311 | "A = rand(2,3)"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": 24,
317 | "metadata": {},
318 | "outputs": [
319 | {
320 | "data": {
321 | "text/plain": [
322 | "2×2 Array{Float64,2}:\n",
323 | " -0.337277 -0.941405\n",
324 | " -0.941405 0.337277"
325 | ]
326 | },
327 | "metadata": {},
328 | "output_type": "display_data"
329 | },
330 | {
331 | "data": {
332 | "text/plain": [
333 | "2×3 Array{Float64,2}:\n",
334 | " 1.30125 0.0 0.0\n",
335 | " 0.0 0.191821 0.0"
336 | ]
337 | },
338 | "metadata": {},
339 | "output_type": "display_data"
340 | },
341 | {
342 | "data": {
343 | "text/plain": [
344 | "3×3 Adjoint{Float64,Array{Float64,2}}:\n",
345 | " -0.357268 0.618008 -0.700304\n",
346 | " -0.689699 0.331038 0.643994\n",
347 | " -0.629821 -0.713078 -0.30797 "
348 | ]
349 | },
350 | "metadata": {},
351 | "output_type": "display_data"
352 | }
353 | ],
354 | "source": [
355 | "fullsvd(A);"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 25,
361 | "metadata": {},
362 | "outputs": [
363 | {
364 | "data": {
365 | "text/plain": [
366 | "2×2 Array{Float64,2}:\n",
367 | " -0.337277 -0.941405\n",
368 | " -0.941405 0.337277"
369 | ]
370 | },
371 | "metadata": {},
372 | "output_type": "display_data"
373 | },
374 | {
375 | "data": {
376 | "text/plain": [
377 | "2×2 Diagonal{Float64,Array{Float64,1}}:\n",
378 | " 1.30125 ⋅ \n",
379 | " ⋅ 0.191821"
380 | ]
381 | },
382 | "metadata": {},
383 | "output_type": "display_data"
384 | },
385 | {
386 | "data": {
387 | "text/plain": [
388 | "3×2 Array{Float64,2}:\n",
389 | " -0.357268 0.618008\n",
390 | " -0.689699 0.331038\n",
391 | " -0.629821 -0.713078"
392 | ]
393 | },
394 | "metadata": {},
395 | "output_type": "display_data"
396 | }
397 | ],
398 | "source": [
399 | "rankrsvd(A);"
400 | ]
401 | },
402 | {
403 | "cell_type": "markdown",
404 | "metadata": {},
405 | "source": [
406 | "## 4. rank 3, 7x10 matrix"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": 26,
412 | "metadata": {},
413 | "outputs": [
414 | {
415 | "data": {
416 | "text/plain": [
417 | "7×10 Array{Float64,2}:\n",
418 | " 0.409585 0.550866 0.729917 0.396048 … 0.491844 0.870667 0.327258\n",
419 | " 0.824315 0.457974 0.887333 0.932895 0.34797 1.19361 0.266938\n",
420 | " 0.897034 0.338666 0.678985 0.943025 0.418739 1.0996 0.258406\n",
421 | " 0.862495 0.661816 0.926009 0.816466 0.771865 1.33408 0.463019\n",
422 | " 0.878864 0.887087 1.42046 0.99785 0.602456 1.66596 0.472172\n",
423 | " 0.765583 0.423397 0.731421 0.800129 … 0.446203 1.06758 0.287248\n",
424 | " 0.710849 0.713791 1.12556 0.793167 0.511149 1.33563 0.388595"
425 | ]
426 | },
427 | "execution_count": 26,
428 | "metadata": {},
429 | "output_type": "execute_result"
430 | }
431 | ],
432 | "source": [
433 | "A = rand(7,3)*rand(3,10) # this should be rank 3"
434 | ]
435 | },
436 | {
437 | "cell_type": "code",
438 | "execution_count": 27,
439 | "metadata": {},
440 | "outputs": [
441 | {
442 | "data": {
443 | "text/plain": [
444 | "7×7 Array{Float64,2}:\n",
445 | " -0.277937 -0.522311 -0.0744039 … 0.466151 -0.560653 0.264395 \n",
446 | " -0.34423 0.518096 -0.0720404 -0.227442 -0.460716 -0.319634 \n",
447 | " -0.335511 0.428918 0.464644 0.604044 0.0486161 -0.0705107 \n",
448 | " -0.436358 -0.497343 0.506287 -0.350516 0.117976 -0.410459 \n",
449 | " -0.487624 0.0167782 -0.553854 -0.204899 -0.105282 -0.000703896\n",
450 | " -0.327734 0.157075 0.264594 … -0.34661 0.116364 0.806582 \n",
451 | " -0.395088 -0.0499189 -0.374552 0.284817 0.657649 -0.0625039 "
452 | ]
453 | },
454 | "metadata": {},
455 | "output_type": "display_data"
456 | },
457 | {
458 | "data": {
459 | "text/plain": [
460 | "7×10 Array{Float64,2}:\n",
461 | " 6.92226 0.0 0.0 0.0 … 0.0 0.0 0.0 0.0\n",
462 | " 0.0 0.755198 0.0 0.0 0.0 0.0 0.0 0.0\n",
463 | " 0.0 0.0 0.683053 0.0 0.0 0.0 0.0 0.0\n",
464 | " 0.0 0.0 0.0 5.91088e-16 0.0 0.0 0.0 0.0\n",
465 | " 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n",
466 | " 0.0 0.0 0.0 0.0 … 0.0 0.0 0.0 0.0\n",
467 | " 0.0 0.0 0.0 0.0 8.21229e-17 0.0 0.0 0.0"
468 | ]
469 | },
470 | "metadata": {},
471 | "output_type": "display_data"
472 | },
473 | {
474 | "data": {
475 | "text/plain": [
476 | "10×10 Adjoint{Float64,Array{Float64,2}}:\n",
477 | " -0.294011 0.35548 0.312083 … 0.0465244 0.0514862 0.039223 \n",
478 | " -0.2263 -0.249712 -0.334077 0.181558 0.595424 0.0233641\n",
479 | " -0.363646 -0.0109916 -0.510499 -0.0359443 -0.220601 0.140677 \n",
480 | " -0.312911 0.500153 0.171036 -0.359502 -0.207792 0.0736223\n",
481 | " -0.237955 -0.64866 0.255552 -0.601864 -0.131402 -0.104849 \n",
482 | " -0.251608 0.0397458 -0.437401 … 0.164673 -0.485916 -0.221939 \n",
483 | " -0.477004 -0.0809271 0.436915 0.478804 0.100636 0.0399163\n",
484 | " -0.198741 -0.299539 0.17074 0.373694 -0.353637 -0.236875 \n",
485 | " -0.475836 0.133415 -0.153594 -0.275708 0.389516 -0.22132 \n",
486 | " -0.137166 -0.156822 -0.0295032 0.00811763 -0.103379 0.897647 "
487 | ]
488 | },
489 | "metadata": {},
490 | "output_type": "display_data"
491 | }
492 | ],
493 | "source": [
494 | "fullsvd(A);"
495 | ]
496 | },
497 | {
498 | "cell_type": "code",
499 | "execution_count": 28,
500 | "metadata": {},
501 | "outputs": [
502 | {
503 | "data": {
504 | "text/plain": [
505 | "7×3 Array{Float64,2}:\n",
506 | " -0.277937 -0.522311 -0.0744039\n",
507 | " -0.34423 0.518096 -0.0720404\n",
508 | " -0.335511 0.428918 0.464644 \n",
509 | " -0.436358 -0.497343 0.506287 \n",
510 | " -0.487624 0.0167782 -0.553854 \n",
511 | " -0.327734 0.157075 0.264594 \n",
512 | " -0.395088 -0.0499189 -0.374552 "
513 | ]
514 | },
515 | "metadata": {},
516 | "output_type": "display_data"
517 | },
518 | {
519 | "data": {
520 | "text/plain": [
521 | "3×3 Diagonal{Float64,Array{Float64,1}}:\n",
522 | " 6.92226 ⋅ ⋅ \n",
523 | " ⋅ 0.755198 ⋅ \n",
524 | " ⋅ ⋅ 0.683053"
525 | ]
526 | },
527 | "metadata": {},
528 | "output_type": "display_data"
529 | },
530 | {
531 | "data": {
532 | "text/plain": [
533 | "10×3 Array{Float64,2}:\n",
534 | " -0.294011 0.35548 0.312083 \n",
535 | " -0.2263 -0.249712 -0.334077 \n",
536 | " -0.363646 -0.0109916 -0.510499 \n",
537 | " -0.312911 0.500153 0.171036 \n",
538 | " -0.237955 -0.64866 0.255552 \n",
539 | " -0.251608 0.0397458 -0.437401 \n",
540 | " -0.477004 -0.0809271 0.436915 \n",
541 | " -0.198741 -0.299539 0.17074 \n",
542 | " -0.475836 0.133415 -0.153594 \n",
543 | " -0.137166 -0.156822 -0.0295032"
544 | ]
545 | },
546 | "metadata": {},
547 | "output_type": "display_data"
548 | }
549 | ],
550 | "source": [
551 | "rankrsvd(A);"
552 | ]
553 | },
554 | {
555 | "cell_type": "code",
556 | "execution_count": null,
557 | "metadata": {},
558 | "outputs": [],
559 | "source": []
560 | },
561 | {
562 | "cell_type": "code",
563 | "execution_count": null,
564 | "metadata": {},
565 | "outputs": [],
566 | "source": []
567 | }
568 | ],
569 | "metadata": {
570 | "@webio": {
571 | "lastCommId": null,
572 | "lastKernelId": null
573 | },
574 | "kernelspec": {
575 | "display_name": "Julia 1.3.0",
576 | "language": "julia",
577 | "name": "julia-1.3"
578 | },
579 | "language_info": {
580 | "file_extension": ".jl",
581 | "mimetype": "application/julia",
582 | "name": "julia",
583 | "version": "1.3.0"
584 | }
585 | },
586 | "nbformat": 4,
587 | "nbformat_minor": 2
588 | }
589 |
--------------------------------------------------------------------------------
/notes/Gauss-Jordan.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Gauss–Jordan and computing A⁻¹\n",
8 | "\n",
9 | "The Gauss–Jordan algorithm is a technique for hand-calculation of the inverse. Nowadays, you should hardly ever compute a matrix inverse, even on a computer, but Gauss–Jordan is still useful to go over:\n",
10 | "\n",
11 | "* It helps us to understand when and why an inverse matrix exists.\n",
12 | "\n",
13 | "* It gives us yet another example to help us understand the *structure* of elimination operations"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 1,
19 | "metadata": {},
20 | "outputs": [],
21 | "source": [
22 | "using LinearAlgebra # as usual, we'll load this package"
23 | ]
24 | },
25 | {
26 | "cell_type": "markdown",
27 | "metadata": {},
28 | "source": [
29 | "## Review: Inverses\n",
30 | "\n",
31 | "The inverse of a linear operator $A$ is the operat that \"undoes\" the action of $A$:\n",
32 | "\n",
33 | "$$\n",
34 | "\\boxed{A^{-1}(Ax) = x} .\n",
35 | "$$\n",
36 | "\n",
37 | "for *any* $x$. Equivalently, $\\boxed{Ax=b \\implies x = A^{-1} b}$. This means that\n",
38 | "\n",
39 | "* **A⁻¹ only exists for (m×m) square matrices with m (nonzero) pivots**\n",
40 | "\n",
41 | "since for non-square matrices or matrices with one or more \"zero pivots\" we can't always solve $Ax=b$ (we'd divide by zero during backsubstitution). It is also easy to see that $\\boxed{(A^{-1})^{-1} = A}$, i.e. that $A$ undoes the action of $A^{-1}$.\n",
42 | "\n",
43 | "Equivalently,\n",
44 | "$$\n",
45 | "\\boxed{AA^{-1} = A^{-1} A = I}\n",
46 | "$$\n",
47 | "where $I$ is the m×m identity matrix — in linear algebra, we typically *infer* the size of $I$ from context, but if it is ambiguous we might write $I_m$.\n",
48 | "\n",
49 | "### Inverses of products: (AB)⁻¹ = B⁻¹A⁻¹\n",
50 | "\n",
51 | "It is easy to see that the inverse of a product $BA$ is the product of the inverses in *reverse order*: $\\boxed{(AB)^{-1} = B^{-1} A^{-1}}$. Intuitively, when you reverse a sequence of operations, you always need to retrace your steps in backwards order. Explicitly:\n",
52 | "$$\n",
53 | "(AB)^{-1} AB = B^{-1} \\underbrace{A^{-1} A}_I B = B^{-1} B = I \\, .\n",
54 | "$$\n",
55 | "\n",
56 | "For example, we saw that Gaussian elimination corresponded to the factorization $A = LU$, where $U$ is the result of elimination and $L$ is simply a record of the elimination steps. Then\n",
57 | "$$\n",
58 | "Ax = b \\implies x = A^{-1} b = (LU)^{-1} b = \\underbrace{U^{-1} \\underbrace{ L^{-1} b }_\\mbox{forward substitution}}_\\mbox{backsubstitution} \\, .\n",
59 | "$$\n",
60 | "\n",
61 | "### Rarely compute inverses!\n",
62 | "\n",
63 | "In general **rarely if ever** compute inverses explicitly:\n",
64 | "\n",
65 | "* **Read \"x = A⁻¹b\" as \"solve Ax=b for x\" the best way you can**, and invariably there are better ways to solve for x than inverting a matrix.\n",
66 | "\n",
67 | "More on this below. Instead, **inverses are mostly a *conceptual* tool** to move operators/matrices around in equations. Once we have the equations in the form that we want, we then carry out the computations in some other way.\n",
68 | "\n",
69 | "### Notation:\n",
70 | "\n",
71 | "Inverses allow us to \"divide by matrices\", but we always have to be clear about whether we are dividing **on the left or on the right**. The following notations can be convenient, and are used in computer software like Julia and Matlab and elsewhere for square invertible matrices $A$:\n",
72 | "\n",
73 | "$$ B / A = BA^{-1}, \\\\ A \\backslash B = A^{-1} B$$"
74 | ]
75 | },
76 | {
77 | "cell_type": "markdown",
78 | "metadata": {},
79 | "source": [
80 | "## Inverses by linear equations\n",
81 | "\n",
82 | "The equation $A A^{-1} = I$ actually gives us the algorithm to compute $A^{-1}$.\n",
83 | "\n",
84 | "Suppose we denote the *columns* of $A^{-1} = \\begin{pmatrix} x_1 & x_2 & \\cdots & x_m \\end{pmatrix}$, and the columns of $I = \\begin{pmatrix} e_1 & e_2 & \\cdots & e_m \\end{pmatrix}$.\n",
85 | "\n",
86 | "Then \n",
87 | "$$\n",
88 | "A \\underbrace{\\begin{pmatrix} x_1 & x_2 & \\cdots & x_m \\end{pmatrix}}_{A^{-1}} = \n",
89 | "\\begin{pmatrix} A x_1 & A x_2 & \\cdots & A x_n \\end{pmatrix} = \\underbrace{\\begin{pmatrix} e_1 & e_2 & \\cdots & e_m \\end{pmatrix}}_I.\n",
90 | "$$\n",
91 | "(The key fact here is that **multiplying A by a matrix on the right** is equivalent to **multiplying A by each column of that matrix**, which you can easily see by writing out the computation.)\n",
92 | "\n",
93 | "In consequence $A x_k = e_k$, which is a **linear equation for the k-th column of A⁻¹**. Equivalently, to find A⁻¹ for an m×m matrix A, we must **solve Ax=b for m right-hand sides** equal to the columns of I.\n",
94 | "\n",
95 | "* Put another way, for *any* matrix $B$, $Be_k = k\\mbox{-th column of }B$. So the k-th column of $A^{-1}$ is $x_k = A^{-1} e_k$, i.e. the solution to $Ax_k = e_k$.\n",
96 | "\n",
97 | "\n",
98 | "* Ideally, we do Gaussian elimination $A=LU$ *once*, then compute $x_k = U^{-1} L^{-1} e_k$ by forward+back-substitution for each column of $I$. (This is essentially what the computer does.)"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "metadata": {},
104 | "source": [
105 | "## Example: computing L⁻¹ = E\n",
106 | "\n",
107 | "For example, how might we compute the inverse of the L matrix we got from Gaussian elimination in the last lecture, which should give us $L^{-1} = E$? We solve\n",
108 | "\n",
109 | "$$\n",
110 | "\\underbrace{\\begin{pmatrix} 1 & & \\\\ 1 & 1 & \\\\ 3 & -1 & 1 \\end{pmatrix}}_L x_k = e_k\n",
111 | "$$\n",
112 | "\n",
113 | "for $e_1,e_2,e_3$ (the columns of the 3×3 identity I).\n",
114 | "\n",
115 | "Let's do it for $e_1$, to find the **first column** $x_1$ of $L^{-1} = E$:\n",
116 | "$$\n",
117 | "\\underbrace{\\begin{pmatrix} 1 & & \\\\ 1 & 1 & \\\\ 3 & -1 & 1 \\end{pmatrix}}_L \\underbrace{\\begin{pmatrix} a \\\\ b \\\\ c \\end{pmatrix}}_{x_1} = \\underbrace{\\begin{pmatrix} 1 \\\\ 0 \\\\ 0 \\end{pmatrix}}_{x_1}\n",
118 | "$$\n",
119 | "By forward substitution (from top to bottom), we get $a = 1$, $1a + 1b = 0 \\implies b = -1$, $3a - 1b + 1c = 0 \\implies c = -4$, so $\\boxed{x_1 = [1, -1, -4]}$. Let's check:"
120 | ]
121 | },
122 | {
123 | "cell_type": "code",
124 | "execution_count": 2,
125 | "metadata": {},
126 | "outputs": [
127 | {
128 | "data": {
129 | "text/plain": [
130 | "3×3 Matrix{Int64}:\n",
131 | " 1 0 0\n",
132 | " 1 1 0\n",
133 | " 3 -1 1"
134 | ]
135 | },
136 | "execution_count": 2,
137 | "metadata": {},
138 | "output_type": "execute_result"
139 | }
140 | ],
141 | "source": [
142 | "L = [1 0 0\n",
143 | " 1 1 0\n",
144 | " 3 -1 1]"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 3,
150 | "metadata": {},
151 | "outputs": [
152 | {
153 | "data": {
154 | "text/plain": [
155 | "3×3 Matrix{Float64}:\n",
156 | " 1.0 0.0 0.0\n",
157 | " -1.0 1.0 0.0\n",
158 | " -4.0 1.0 1.0"
159 | ]
160 | },
161 | "execution_count": 3,
162 | "metadata": {},
163 | "output_type": "execute_result"
164 | }
165 | ],
166 | "source": [
167 | "E = L^-1"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 4,
173 | "metadata": {},
174 | "outputs": [
175 | {
176 | "data": {
177 | "text/plain": [
178 | "3-element Vector{Float64}:\n",
179 | " 1.0\n",
180 | " -1.0\n",
181 | " -4.0"
182 | ]
183 | },
184 | "execution_count": 4,
185 | "metadata": {},
186 | "output_type": "execute_result"
187 | }
188 | ],
189 | "source": [
190 | "E[:,1] # first column"
191 | ]
192 | },
193 | {
194 | "cell_type": "markdown",
195 | "metadata": {},
196 | "source": [
197 | "Yup, the first column is `[1, -1, -4]`. We could easily get the other two columns as well (left as an exercise).\n",
198 | "\n",
199 | "**Important note***: there is **no simple formula** for the inverse of a triangular matrix like L or U! You can invert *individual* elimination steps $E_k$ by flipping signs, but the *product* of the elimination steps is not so easy to invert.\n",
200 | "\n",
201 | "(A lot of students get confused by this because Strang's lectures and textbook start by inverting individual elimination steps, which is easier.)"
202 | ]
203 | },
204 | {
205 | "cell_type": "markdown",
206 | "metadata": {},
207 | "source": [
208 | "Another way to write this is `L \\ I`, which *conceptually* means \"multiply $I$ by $L^{-1}$ on the *left*\", but *actually* in Julia is computed without inverting any matrix explicitly, by instead solving with 3 right-hand sides:"
209 | ]
210 | },
211 | {
212 | "cell_type": "code",
213 | "execution_count": 5,
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/plain": [
219 | "3×3 Matrix{Float64}:\n",
220 | " 1.0 0.0 0.0\n",
221 | " -1.0 1.0 0.0\n",
222 | " -4.0 1.0 1.0"
223 | ]
224 | },
225 | "execution_count": 5,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "L \\ I"
232 | ]
233 | },
234 | {
235 | "cell_type": "markdown",
236 | "metadata": {},
237 | "source": [
238 | "Note that `I` is a special object defined by Julia's `LinearAlgebra` package which essentially means **an identity matrix whose size is inferred from context**.\n",
239 | "\n",
240 | "If we want an $m \\times m$ identity matrix, we can use `I(m)`:"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 6,
246 | "metadata": {},
247 | "outputs": [
248 | {
249 | "data": {
250 | "text/plain": [
251 | "3×3 Diagonal{Bool, Vector{Bool}}:\n",
252 | " 1 ⋅ ⋅\n",
253 | " ⋅ 1 ⋅\n",
254 | " ⋅ ⋅ 1"
255 | ]
256 | },
257 | "execution_count": 6,
258 | "metadata": {},
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "I(3)"
264 | ]
265 | },
266 | {
267 | "cell_type": "markdown",
268 | "metadata": {},
269 | "source": [
270 | "## The Gauss–Jordan algorithm.\n",
271 | "\n",
272 | "Gauss–Jordan could be viewed as just a trick (primarily for hand calculation) to organize solving $A b_k = e_k$. But it's also nice to think about algebraically — it is a nice application of our \"matrix viewpoint\" of Gaussian elimination.\n",
273 | "\n",
274 | "The Gauss–Jordan idea, in a nutshell is: **if we do some row operations on A to obtain I, then doing the *same* row operations on I gives A⁻¹**. Why?\n",
275 | "\n",
276 | "* Row operations correspond to multiplying $A$ by a some matrix $E=\\cdots E_2 E_1$ on the *left*.\n",
277 | "\n",
278 | "* So, doing row operations that turn $A$ into $I$ means that $EA = I$, and hence $E = A^{-1}$.\n",
279 | "\n",
280 | "* Doing the *same* row operations on $I$ is equivalent to multiplying $I$ on the *left* by the *same* matrix $E$, giving $EI$. But $EI = E$, and $E = A^{-1}$, so this gives $A^{-1}$!\n",
281 | "\n",
282 | "As usual for Gaussian elimination, to do the *same* row operations on both $A$ and $I$ we **augment A** with $I$. That is, we do:\n",
283 | "\n",
284 | "$$\n",
285 | "\\boxed{\n",
286 | "\\left(\\begin{array}{c|c}A & I\\end{array}\\right) \\underset{\\mbox{row ops}}{\\longrightarrow} \\left(\\begin{array}{c|c}I & A^{-1}\\end{array}\\right)\n",
287 | "}\n",
288 | "$$\n",
289 | "\n",
290 | "### Elimination $A \\to I$\n",
291 | "\n",
292 | "How do we do row operations to turn $A$ into $I$? Simple:\n",
293 | "\n",
294 | "1. First, do ordinary Gaussian elimination \"downwards\" to turn $A$ into $U$ (an **upper-triangular** matrix).\n",
295 | "\n",
296 | "2. Then, do Gaussian elimination \"upwards\" on $U$ to eliminate entries *above* the diagonal, turning $U$ into a **diagonal** matrix $D$\n",
297 | "\n",
298 | "3. Finally, divide each row of $D$ by the diagonal entry to turn it into $I$."
299 | ]
300 | },
301 | {
302 | "cell_type": "markdown",
303 | "metadata": {},
304 | "source": [
305 | "# Gauss–Jordan example\n",
306 | "\n",
307 | "Let's perform these $A \\to I$ elimination steps on $3 \\times 3$ matrix $A$: first eliminate down to make $U$, then eliminate up to make $D$, then divide by the diagonals to make $I$:\n",
308 | "\n",
309 | "$$\n",
310 | "\\underbrace{\\begin{pmatrix} \\boxed{1} & 4 & 1 \\\\ 1 & 2 & -1 \\\\ 3 & 14 & 6 \\end{pmatrix}}_A\n",
311 | "\\longrightarrow\n",
312 | "\\begin{pmatrix} \\boxed{1} & 4 & 1 \\\\ 0 & \\boxed{-2} & -2 \\\\ 0 & 2 & 3 \\end{pmatrix}\n",
313 | "\\longrightarrow\n",
314 | "\\underbrace{\\begin{pmatrix} \\boxed{1} & 4 & 1 \\\\ 0 & \\boxed{-2} & -2 \\\\ 0 & 0 & \\boxed{1} \\end{pmatrix}}_U\n",
315 | "\\\\\n",
316 | "\\longrightarrow\n",
317 | "\\begin{pmatrix} 1 & 0 & -3 \\\\ 0 & \\boxed{-2} & -2 \\\\ 0 & 0 & 1 \\end{pmatrix}\n",
318 | "\\longrightarrow\n",
319 | "\\underbrace{\\begin{pmatrix} 1 & 0 & 0 \\\\ 0 & -2 & 0 \\\\ 0 & 0 & \\boxed{1} \\end{pmatrix}}_D\n",
320 | "\\longrightarrow\n",
321 | "\\underbrace{\\begin{pmatrix} 1 & 0 & 0 \\\\ 0 & 1 & 0 \\\\ 0 & 0 & 1 \\end{pmatrix}}_I\n",
322 | "$$\n",
323 | "\n",
324 | "No problem! It is easy to see that this will work **whenever A has all of its pivots** (i.e. it is non-singular)."
325 | ]
326 | },
327 | {
328 | "cell_type": "markdown",
329 | "metadata": {},
330 | "source": [
331 | "To get the inverse, we needed to augment this with $I$ so that we perform the same elimination steps on both.\n",
332 | "\n",
333 | "$$\n",
334 | "\\left(\\begin{array}{rrr|rrr}\n",
335 | " \\boxed{1} & 4 & 1 & 1 & 0 & 0 \\\\\n",
336 | " 1 & 2 & -1 & 0 & 1 & 0 \\\\\n",
337 | " 3 & 14 & 6 & 0 & 0 & 1 \\end{array}\\right)\n",
338 | "\\longrightarrow\n",
339 | "\\left(\\begin{array}{rrr|rrr}\n",
340 | " \\boxed{1} & 4 & 1 & 1 & 0 & 0 \\\\\n",
341 | " 0 & \\boxed{-2} & -2 & -1 & 1 & 0 \\\\\n",
342 | " 0 & 2 & 3 & -3 & 0 & 1 \\end{array}\\right) \\\\\n",
343 | "\\longrightarrow\n",
344 | "\\left(\\begin{array}{rrr|rrr}\n",
345 | " \\boxed{1} & 4 & 1 & 1 & 0 & 0 \\\\\n",
346 | " 0 & \\boxed{-2} & -2 & -1 & 1 & 0 \\\\\n",
347 | " 0 & 0 & \\boxed{1} & -4 & 1 & 1 \\end{array}\\right)\n",
348 | "\\longrightarrow\n",
349 | "\\left(\\begin{array}{rrr|rrr}\n",
350 | " 1 & 0 & -3 & -1 & 2 & 0 \\\\\n",
351 | " 0 & \\boxed{-2} & -2 & -1 & 1 & 0 \\\\\n",
352 | " 0 & 0 & 1 & -4 & 1 & 1 \\end{array}\\right) \\\\\n",
353 | "\\longrightarrow\n",
354 | "\\left(\\begin{array}{rrr|rrr}\n",
355 | " 1 & 0 & 0 & -13 & 5 & 3 \\\\\n",
356 | " 0 & -2 & 0 & -9 & 3 & 2 \\\\\n",
357 | " 0 & 0 & \\boxed{1} & -4 & 1 & 1 \\end{array}\\right)\n",
358 | "\\longrightarrow\n",
359 | "\\left(\\begin{array}{rrr|rrr}\n",
360 | " 1 & 0 & 0 & -13 & 5 & 3 \\\\\n",
361 | " 0 & 1 & 0 & 4.5 & -1.5 & -1 \\\\\n",
362 | " 0 & 0 & 1 & -4 & 1 & 1 \\end{array}\\right)\n",
363 | "$$\n",
364 | "\n",
365 | "Whew, this was a lot of work! Did we get the right answer?"
366 | ]
367 | },
368 | {
369 | "cell_type": "code",
370 | "execution_count": 7,
371 | "metadata": {},
372 | "outputs": [
373 | {
374 | "data": {
375 | "text/plain": [
376 | "3×3 Matrix{Int64}:\n",
377 | " 1 4 1\n",
378 | " 1 2 -1\n",
379 | " 3 14 6"
380 | ]
381 | },
382 | "execution_count": 7,
383 | "metadata": {},
384 | "output_type": "execute_result"
385 | }
386 | ],
387 | "source": [
388 | "A = [1 4 1\n",
389 | " 1 2 -1\n",
390 | " 3 14 6]"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 8,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "data": {
400 | "text/plain": [
401 | "3×3 Matrix{Float64}:\n",
402 | " -13.0 5.0 3.0\n",
403 | " 4.5 -1.5 -1.0\n",
404 | " -4.0 1.0 1.0"
405 | ]
406 | },
407 | "execution_count": 8,
408 | "metadata": {},
409 | "output_type": "execute_result"
410 | }
411 | ],
412 | "source": [
413 | "A^-1"
414 | ]
415 | },
416 | {
417 | "cell_type": "markdown",
418 | "metadata": {},
419 | "source": [
420 | "Hooray!\n",
421 | "\n",
422 | "(It is *really* easy to make a mistake during this process.)"
423 | ]
424 | },
425 | {
426 | "cell_type": "markdown",
427 | "metadata": {},
428 | "source": [
429 | "# (Almost) Never Compute Inverses!\n",
430 | "\n",
431 | "Matrix inverses are funny, however:\n",
432 | "\n",
433 | "* Inverse matrices are very convenient in *analytical* manipulations, because they allow you to move matrices from one side to the other of equations easily.\n",
434 | "\n",
435 | "* Inverse matrices are **almost never computed** in \"serious\" numerical calculations. Whenever you see $A^{-1} B$ (or $A^{-1} b$), when you go to *implement* it on a computer you should *read* $A^{-1} B$ as \"solve $AX = B$ by some method.\" e.g. solve it by `A \\ B` or by first computing the LU factorization of $A$ and then using it to solve $AX = B$.\n",
436 | "\n",
437 | "One reason that you don't usually compute inverse matrices is that it is wasteful: once you have $A=LU$ (later we will generalize this to \"$PA = LU$\"), you can solve $AX=B$ directly without bothering to find $A^{-1}$, and computing $A^{-1}$ requires much more work if you only have to solve a few right-hand sides.\n",
438 | "\n",
439 | "Another reason is that for many special matrices, there are ways to solve $AX=B$ *much* more quickly than you can find $A^{-1}$. For example, many large matrices in practice are [sparse](https://en.wikipedia.org/wiki/Sparse_matrix) (mostly zero), and often for sparse matrices you can arrange for $L$ and $U$ to be sparse too. Sparse matrices are much more efficient to work with than general \"dense\" matrices because you don't have to multiply (or even store) the zeros. Even if $A$ is sparse, however, $A^{-1}$ is usually non-sparse, so you lose the special efficiency of sparsity if you compute the inverse matrix. \n",
440 | "\n",
441 | "For example:\n",
442 | "\n",
443 | "* If you see $U^{-1} b$ where $U$ is *upper* triangular, don't compute $U^{-1}$ explicitly! Just solve $Ux = b$ by *back-substitution* (from the bottom row up).\n",
444 | "\n",
445 | "* If you see $L^{-1} b$ where $L$ is *lower* triangular, don't compute $L^{-1}$ explicitly! Just solve $Lx = b$ by *forward-substitution* (from the top row down)."
446 | ]
447 | }
448 | ],
449 | "metadata": {
450 | "@webio": {
451 | "lastCommId": null,
452 | "lastKernelId": null
453 | },
454 | "kernelspec": {
455 | "display_name": "Julia 1.7.1",
456 | "language": "julia",
457 | "name": "julia-1.7"
458 | },
459 | "language_info": {
460 | "file_extension": ".jl",
461 | "mimetype": "application/julia",
462 | "name": "julia",
463 | "version": "1.7.1"
464 | }
465 | },
466 | "nbformat": 4,
467 | "nbformat_minor": 2
468 | }
469 |
--------------------------------------------------------------------------------
/notes/Markov.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 28,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "using LinearAlgebra"
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "# Markov matrices\n",
17 | "\n",
18 | "A matrix $A$ is a **Markov matrix** if\n",
19 | "\n",
20 | "* Its entries are all $\\ge 0$\n",
21 | "* Each **column**'s entries **sum to 1**\n",
22 | "\n",
23 | "Typicaly, a Markov matrix's entries represent **transition probabilities** from one state to another.\n",
24 | "\n",
25 | "For example, consider the $2 \\times 2$ Markov matrix:"
26 | ]
27 | },
28 | {
29 | "cell_type": "code",
30 | "execution_count": 29,
31 | "metadata": {},
32 | "outputs": [
33 | {
34 | "data": {
35 | "text/plain": [
36 | "2×2 Matrix{Float64}:\n",
37 | " 0.9 0.2\n",
38 | " 0.1 0.8"
39 | ]
40 | },
41 | "execution_count": 29,
42 | "metadata": {},
43 | "output_type": "execute_result"
44 | }
45 | ],
46 | "source": [
47 | "A = [0.9 0.2\n",
48 | " 0.1 0.8]"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {},
54 | "source": [
55 | "Let us suppose that this represents the fraction of people switching majors each year between math and English literature.\n",
56 | "\n",
57 | "Let\n",
58 | "$$\n",
59 | "x = \\begin{pmatrix} m \\\\ e \\end{pmatrix}\n",
60 | "$$\n",
61 | "\n",
62 | "represent the number of math majors $m$ and English majors $e$. Suppose that each year, 10% of math majors and 20% of English majors switch majors. After one year, the new number of math and English majors is:\n",
63 | "\n",
64 | "$$\n",
65 | "m' = 0.9 m + 0.2 e \\\\\n",
66 | "e' = 0.1 m + 0.8 e\n",
67 | "$$\n",
68 | "\n",
69 | "But this is equivalent to a matrix multiplication! i.e. the numbers $x'$ of majors after one year is\n",
70 | "\n",
71 | "$$\n",
72 | "x' = A x \\,\n",
73 | "$$\n",
74 | "\n",
75 | "Note that the two Markov properties are critical: we never have negative numbers of majors (or negative probabilities), and the probabilities must sum to 1 (the net number of majors is not changing: we're not including new students or people that graduate in this silly model)."
76 | ]
77 | },
78 | {
79 | "cell_type": "markdown",
80 | "metadata": {},
81 | "source": [
82 | "## Eigenvalues of Markov matrices\n",
83 | "\n",
84 | "There are two key questions about Markov matrices that can be answered by analysis of their eigenvalues:\n",
85 | "\n",
86 | "* Is there a **steady state**?\n",
87 | " - i.e. is there an $x_0 \\ne 0$ such that $A x_0 = x_0$?\n",
88 | " - i.e. is there $\\lambda_0 = 1$ eigenvector $x_0$?\n",
89 | "\n",
90 | "* Does the system **tend toward a steady state?**\n",
91 | " - i.e. does $A^n x \\to \\mbox{multiple of } x_0$ as $n \\to \\infty$?\n",
92 | " - i.e. is $\\lambda = 1$ the **largest** $|\\lambda|$?\n",
93 | " \n",
94 | "The answers are **YES** for **any Markov** matrix $A$, and **YES** for any *positive* Markov matrix (Markov matrices with entries $> 0$, not just $\\ge 0$). For *any* Markov matrix, all of the λ satisfy $|\\lambda| \\le 1$, but if there are zero entries in the matrix we *may* have multiple $|\\lambda|=1$ eigenvalues (though this doesn't happen often in practical Markov problems)."
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": 30,
100 | "metadata": {},
101 | "outputs": [
102 | {
103 | "data": {
104 | "text/plain": [
105 | "2-element Vector{Float64}:\n",
106 | " 0.7\n",
107 | " 1.0"
108 | ]
109 | },
110 | "execution_count": 30,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "eigvals(A)"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "Let's try just multipling it many times by a \"random\" vector and see whether it is converging to a steady state:"
124 | ]
125 | },
126 | {
127 | "cell_type": "code",
128 | "execution_count": 31,
129 | "metadata": {},
130 | "outputs": [
131 | {
132 | "data": {
133 | "text/plain": [
134 | "2-element Vector{Float64}:\n",
135 | " 14.000000000000089\n",
136 | " 7.000000000000044"
137 | ]
138 | },
139 | "execution_count": 31,
140 | "metadata": {},
141 | "output_type": "execute_result"
142 | }
143 | ],
144 | "source": [
145 | "A^100 * [17, 4]"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "metadata": {},
151 | "source": [
152 | "Yes, it seems to be giving a vector that is not changing, which shoud be a multiple $c x_0$ of a steady-state eigenvector:"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 32,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "data": {
162 | "text/plain": [
163 | "2-element Vector{Float64}:\n",
164 | " 14.000000000000874\n",
165 | " 7.000000000000437"
166 | ]
167 | },
168 | "execution_count": 32,
169 | "metadata": {},
170 | "output_type": "execute_result"
171 | }
172 | ],
173 | "source": [
174 | "cx₀ = A^1000 * [17, 4]"
175 | ]
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "metadata": {},
180 | "source": [
181 | "Let's check that this is an eigenvector of $A$ with eigenvalue $\\lambda=1$:"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 33,
187 | "metadata": {},
188 | "outputs": [
189 | {
190 | "data": {
191 | "text/plain": [
192 | "2-element Vector{Float64}:\n",
193 | " 14.000000000000874\n",
194 | " 7.000000000000437"
195 | ]
196 | },
197 | "execution_count": 33,
198 | "metadata": {},
199 | "output_type": "execute_result"
200 | }
201 | ],
202 | "source": [
203 | "A * cx₀"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "metadata": {},
209 | "source": [
210 | "To see why, the key idea is to write the columns-sum-to-one property of Markov matrices in linear-algebra terms. It is equivalent to the statement:\n",
211 | "\n",
212 | "$$\n",
213 | "\\underbrace{\\begin{pmatrix} 1 & 1 & \\cdots & 1 & 1 \\end{pmatrix}}_{o^T} A = o^T\n",
214 | "$$\n",
215 | "\n",
216 | "since this is just the operation that sums all of the rows of $A$. Equivalently, if we transpose both sides:\n",
217 | "\n",
218 | "$$\n",
219 | "A^T o = o\n",
220 | "$$\n",
221 | "\n",
222 | "i.e. $o$ is an eigenvector of $A^T$ (called a **left eigenvector of A**) with eigenvalue $\\lambda = 1$.\n",
223 | "\n",
224 | "But since $A$ and $A^T$ have the **same eigenvalues** (they have the same characteristic polynomial $\\det (A - \\lambda I) = \\det (A^T - \\lambda I)$ because transposed don't change determinants), this means that $A$ **also has an eigenvalue 1** but with a **different eigenvector**."
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "execution_count": 34,
230 | "metadata": {},
231 | "outputs": [
232 | {
233 | "data": {
234 | "text/plain": [
235 | "1×2 adjoint(::Vector{Float64}) with eltype Float64:\n",
236 | " 1.0 1.0"
237 | ]
238 | },
239 | "execution_count": 34,
240 | "metadata": {},
241 | "output_type": "execute_result"
242 | }
243 | ],
244 | "source": [
245 | "o = [1,1]\n",
246 | "o'A"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 35,
252 | "metadata": {},
253 | "outputs": [
254 | {
255 | "data": {
256 | "text/plain": [
257 | "2-element Vector{Float64}:\n",
258 | " 1.0\n",
259 | " 1.0"
260 | ]
261 | },
262 | "execution_count": 35,
263 | "metadata": {},
264 | "output_type": "execute_result"
265 | }
266 | ],
267 | "source": [
268 | "A' * o"
269 | ]
270 | },
271 | {
272 | "cell_type": "markdown",
273 | "metadata": {},
274 | "source": [
275 | "An eigenvector of $A$ with eigenvalue $1$ must be a basis for $N(A - I)$:"
276 | ]
277 | },
278 | {
279 | "cell_type": "code",
280 | "execution_count": 36,
281 | "metadata": {},
282 | "outputs": [
283 | {
284 | "data": {
285 | "text/plain": [
286 | "2×2 Matrix{Float64}:\n",
287 | " -0.1 0.2\n",
288 | " 0.1 -0.2"
289 | ]
290 | },
291 | "execution_count": 36,
292 | "metadata": {},
293 | "output_type": "execute_result"
294 | }
295 | ],
296 | "source": [
297 | "A - 1*I"
298 | ]
299 | },
300 | {
301 | "cell_type": "markdown",
302 | "metadata": {},
303 | "source": [
304 | "By inspection, $A - I$ is singular here: the second column is -2 times the first. So, $x_0 = (2,1)$ is a basis for its nullspace, and is a steady state:"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 37,
310 | "metadata": {},
311 | "outputs": [
312 | {
313 | "data": {
314 | "text/plain": [
315 | "2-element Vector{Float64}:\n",
316 | " 5.551115123125783e-17\n",
317 | " 5.551115123125783e-17"
318 | ]
319 | },
320 | "execution_count": 37,
321 | "metadata": {},
322 | "output_type": "execute_result"
323 | }
324 | ],
325 | "source": [
326 | "(A - I) * [2,1]"
327 | ]
328 | },
329 | {
330 | "cell_type": "markdown",
331 | "metadata": {},
332 | "source": [
333 | "Let's check if some arbitrary starting vector $(3,0)$ tends towards this steady state:"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 38,
339 | "metadata": {},
340 | "outputs": [
341 | {
342 | "data": {
343 | "text/latex": [
344 | "$\n",
345 | "\\begin{pmatrix} 3.0\\\\0.0\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.7\\\\0.3\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.49\\\\0.51\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.343\\\\0.657\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.24\\\\0.76\\end{pmatrix} \\longrightarrow \\\\\n",
346 | "\\begin{pmatrix} 2.24\\\\0.76\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.168\\\\0.832\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.118\\\\0.882\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.082\\\\0.918\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.058\\\\0.942\\end{pmatrix} \\longrightarrow \\\\\n",
347 | "\\begin{pmatrix} 2.058\\\\0.942\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.04\\\\0.96\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.028\\\\0.972\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.02\\\\0.98\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.014\\\\0.986\\end{pmatrix} \\longrightarrow \\\\\n",
348 | "\\begin{pmatrix} 2.014\\\\0.986\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.01\\\\0.99\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.007\\\\0.993\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.005\\\\0.995\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.003\\\\0.997\\end{pmatrix} \\longrightarrow \\\\\n",
349 | "\\begin{pmatrix} 2.003\\\\0.997\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.002\\\\0.998\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.002\\\\0.998\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\\\\n",
350 | "\\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.0\\\\1.0\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.0\\\\1.0\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.0\\\\1.0\\end{pmatrix} \\longrightarrow \\\\\n",
351 | "$\n"
352 | ]
353 | },
354 | "metadata": {},
355 | "output_type": "display_data"
356 | }
357 | ],
358 | "source": [
359 | "# The following code prints a sequence of Aⁿx values\n",
360 | "# for n=0,1,2,… nicely formatted with LaTeX.\n",
361 | "\n",
362 | "x = [3, 0]\n",
363 | "pmatrix(x) = string(\"\\\\begin{pmatrix} \", round(x[1],digits=3), \"\\\\\\\\\", round(x[2],digits=3), \"\\\\end{pmatrix}\")\n",
364 | "buf = IOBuffer()\n",
365 | "println(buf, \"\\$\")\n",
366 | "for k = 1:6\n",
367 | " print(buf, pmatrix(x), \" \\\\longrightarrow \")\n",
368 | " for i = 1:4\n",
369 | " x = A*x\n",
370 | " print(buf, pmatrix(x), \" \\\\longrightarrow \")\n",
371 | " end\n",
372 | " println(buf, \"\\\\\\\\\")\n",
373 | "end\n",
374 | "println(buf, \"\\$\")\n",
375 | "display(\"text/latex\", String(take!(buf)))"
376 | ]
377 | },
378 | {
379 | "cell_type": "markdown",
380 | "metadata": {},
381 | "source": [
382 | "Yes! In fact, it tends to exactly $(2,1)$, because the other eigenvalue is $< 1$ (and hence that eigenvector component decays exponentially fast).\n",
383 | "\n",
384 | "An interesting property is that the **sum of the vector components is conserved** when we multiply by a Markov matrix. Given a vector $x$, $o^T x$ is the sum of its components. But $o^T A = o^T$, so:\n",
385 | "\n",
386 | "$$\n",
387 | "o^T A x = o^T x = o^T A^n x\n",
388 | "$$\n",
389 | "\n",
390 | "for any $n$! This is why $(3,0)$ must tend to $(2,1)$, and not to any other multiple of $(2,1)$, because both of them sum to 3. (The \"number of majors\" is conserved in this problem.)"
391 | ]
392 | },
393 | {
394 | "cell_type": "markdown",
395 | "metadata": {},
396 | "source": [
397 | "## Why no eigenvalues > 1?\n",
398 | "\n",
399 | "Why are all $|\\lambda| \\le 1$ for a Markov matrix?\n",
400 | "\n",
401 | "The key fact is that the **product AB of two Markov matrices A and B is also Markov**. Reasons:\n",
402 | "\n",
403 | "* If $A$ and $B$ have nonnegative entries, $AB$ does as well: matrix multiplication uses only $\\times$ and $+$, and can't introduce a minus sign.\n",
404 | "\n",
405 | "* If $o^T A = o^T$ and $o^T B = o^T$ (both have columns summing to 1), then $o^T AB = o^T B = o^T$: the columns of $AB$ sum to 1.\n",
406 | "\n",
407 | "For example, $A^n$ is a Markov matrix for any $n$ if $A$ is Markov.\n",
408 | "\n",
409 | "Now, if there were an eigenvalue $|\\lambda| > 1$, the matrix $A^n$ would have to *blow up exponentially* as $n\\to \\infty$ (since the matrix times that eigenvector, or any vector with a nonzero component of that eigenvector, would blow up). But since $A^n$ is Markov, all of its entries must be between 0 and 1. It can't blow up! So we must have all $|\\lambda| \\le 1$."
410 | ]
411 | },
412 | {
413 | "cell_type": "code",
414 | "execution_count": 39,
415 | "metadata": {},
416 | "outputs": [
417 | {
418 | "data": {
419 | "text/plain": [
420 | "2×2 Matrix{Float64}:\n",
421 | " 0.666667 0.666667\n",
422 | " 0.333333 0.333333"
423 | ]
424 | },
425 | "execution_count": 39,
426 | "metadata": {},
427 | "output_type": "execute_result"
428 | }
429 | ],
430 | "source": [
431 | "A^100"
432 | ]
433 | },
434 | {
435 | "cell_type": "markdown",
436 | "metadata": {},
437 | "source": [
438 | "(In fact, $A^n$ is pretty boring for large $n$: it just takes in any vector and redistributes it to the steady state.)\n",
439 | "\n",
440 | "Another way of thinking about $A^{100}$ is\n",
441 | "$$\n",
442 | "A^{100} = A^{100} \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix} =\n",
443 | "\\begin{pmatrix}\n",
444 | " A^{100} \\begin{pmatrix} 1 \\\\ 0 \\end{pmatrix} &\n",
445 | " A^{100} \\begin{pmatrix} 0 \\\\ 1 \\end{pmatrix}\n",
446 | "\\end{pmatrix}\n",
447 | "$$\n",
448 | "i.e. it multiplies $A^{100}$ by each column of the identity matrix (= different possible \"starting populations\"). Because of this, each column of $A^{100}$ tends towards an eigenvector with the biggest $|\\lambda|$."
449 | ]
450 | },
451 | {
452 | "cell_type": "markdown",
453 | "metadata": {},
454 | "source": [
455 | "## Can there be more than one steady state?\n",
456 | "\n",
457 | "We have just showed that we have *at least one* eigenvalue $\\lambda = 1$, and that *all* eigenvalues satisfy $|\\lambda| \\le 1$. But can there be *more than one* independent eigenvector with $\\lambda = 1$?\n",
458 | "\n",
459 | "**Yes!** For example, the **identity matrix** $I$ is a Markov matrix, and *all* of its eigenvectors have eigenvalue $1$. Since $Ix = x$ for *any* $x$, *every vector is a steady state* for $I$!\n",
460 | "\n",
461 | "But this does not usually happen for *interesting* Markov matrices coming from real problems. In fact, there is a theorem:\n",
462 | "\n",
463 | "* If all the entries of a Markov matrix are $> 0$ (not just $\\ge 0$), then *exactly one* of its eigenvalues $\\lambda = 1$ (that eigenvalue has \"multiplicity 1\": $N(A-I)$ is one-dimensional), and **all other eigenvalues have** $|\\lambda| < 1$. There is a *unique steady state* (up to an overall scale factor).\n",
464 | "\n",
465 | "I'm not going to prove this in 18.06, however."
466 | ]
467 | },
468 | {
469 | "cell_type": "markdown",
470 | "metadata": {},
471 | "source": [
472 | "## Can the solutions oscillate?\n",
473 | "\n",
474 | "If you have a Markov matrix with zero entries, then there might be more than one eigenvalue with $|\\lambda| = 1$, but these additional solutions might be *oscillating* solutions rather than steady states.\n",
475 | "\n",
476 | "For example, consider the permutation matrix\n",
477 | "$$\n",
478 | "P = \\begin{pmatrix} 0 & 1 \\\\ 1 & 0 \\end{pmatrix}\n",
479 | "$$\n",
480 | "that simply swaps the first and second entries of any 2-component vector.\n",
481 | "\n",
482 | "If $x = (1,0)$, then $P^n x$ will oscillate forever, never reaching a steady state! It simply oscillates between $(1,0)$ (for even $n$) and $(0,1)$ (for odd $n$):"
483 | ]
484 | },
485 | {
486 | "cell_type": "code",
487 | "execution_count": 40,
488 | "metadata": {},
489 | "outputs": [
490 | {
491 | "data": {
492 | "text/plain": [
493 | "2×2 Matrix{Int64}:\n",
494 | " 0 1\n",
495 | " 1 0"
496 | ]
497 | },
498 | "execution_count": 40,
499 | "metadata": {},
500 | "output_type": "execute_result"
501 | }
502 | ],
503 | "source": [
504 | "P = [0 1\n",
505 | " 1 0]"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 41,
511 | "metadata": {},
512 | "outputs": [
513 | {
514 | "data": {
515 | "text/plain": [
516 | "6-element Vector{Vector{Int64}}:\n",
517 | " [1, 0]\n",
518 | " [0, 1]\n",
519 | " [1, 0]\n",
520 | " [0, 1]\n",
521 | " [1, 0]\n",
522 | " [0, 1]"
523 | ]
524 | },
525 | "execution_count": 41,
526 | "metadata": {},
527 | "output_type": "execute_result"
528 | }
529 | ],
530 | "source": [
531 | "[P^n * [1,0] for n = 0:5]"
532 | ]
533 | },
534 | {
535 | "cell_type": "markdown",
536 | "metadata": {},
537 | "source": [
538 | "But this is a Markov matrix, so all $|\\lambda|$ are $\\le 1$:"
539 | ]
540 | },
541 | {
542 | "cell_type": "code",
543 | "execution_count": 42,
544 | "metadata": {},
545 | "outputs": [
546 | {
547 | "data": {
548 | "text/plain": [
549 | "2-element Vector{Float64}:\n",
550 | " -1.0\n",
551 | " 1.0"
552 | ]
553 | },
554 | "execution_count": 42,
555 | "metadata": {},
556 | "output_type": "execute_result"
557 | }
558 | ],
559 | "source": [
560 | "eigvals(P)"
561 | ]
562 | },
563 | {
564 | "cell_type": "markdown",
565 | "metadata": {},
566 | "source": [
567 | "The problem is that the $\\lambda = -1$ eigenvalue corresponds to an oscillating solution:\n",
568 | "\n",
569 | "$$\n",
570 | "P^n \\begin{pmatrix} 1 \\\\ -1 \\end{pmatrix} = (-1)^n \\begin{pmatrix} 1 \\\\ -1 \\end{pmatrix}\n",
571 | "$$\n",
572 | "\n",
573 | "for the eigenvector $(1,-1)$.\n",
574 | "\n",
575 | "The steady state still exists, corresponding to the eigenvector $(1,1)$:\n",
576 | "\n",
577 | "$$\n",
578 | "P^n \\begin{pmatrix} 1 \\\\ 1 \\end{pmatrix} = \\begin{pmatrix} 1 \\\\ 1 \\end{pmatrix}\n",
579 | "$$"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 43,
585 | "metadata": {},
586 | "outputs": [
587 | {
588 | "data": {
589 | "text/plain": [
590 | "2×2 Matrix{Float64}:\n",
591 | " -0.707107 0.707107\n",
592 | " 0.707107 0.707107"
593 | ]
594 | },
595 | "execution_count": 43,
596 | "metadata": {},
597 | "output_type": "execute_result"
598 | }
599 | ],
600 | "source": [
601 | "X = eigvecs(P) # the eigenvectors"
602 | ]
603 | },
604 | {
605 | "cell_type": "code",
606 | "execution_count": 44,
607 | "metadata": {},
608 | "outputs": [
609 | {
610 | "data": {
611 | "text/plain": [
612 | "2×2 Matrix{Float64}:\n",
613 | " 1.0 1.0\n",
614 | " -1.0 1.0"
615 | ]
616 | },
617 | "execution_count": 44,
618 | "metadata": {},
619 | "output_type": "execute_result"
620 | }
621 | ],
622 | "source": [
623 | "X ./ X[1,:]' # normalize the first row to be 1, to resemble our hand solutions"
624 | ]
625 | },
626 | {
627 | "cell_type": "markdown",
628 | "metadata": {},
629 | "source": [
630 | "Since $(1,0) = [(1,1) + (1,-1)]/2$, we have:\n",
631 | "\n",
632 | "$$\n",
633 | "P^n \\begin{pmatrix} 1 \\\\ 0 \\end{pmatrix} = \\frac{1}{2} \\left[ \\begin{pmatrix} 1 \\\\ 1 \\end{pmatrix} + \n",
634 | "(-1)^n \\begin{pmatrix} 1 \\\\ -1 \\end{pmatrix} \\right]\n",
635 | "$$\n",
636 | "\n",
637 | "which alternates between $(1,0)$ and $(0,1)$."
638 | ]
639 | },
640 | {
641 | "cell_type": "markdown",
642 | "metadata": {},
643 | "source": [
644 | "## Another example\n",
645 | "\n",
646 | "Let's generate a random 5x5 Markov matrix:"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": 45,
652 | "metadata": {},
653 | "outputs": [
654 | {
655 | "data": {
656 | "text/plain": [
657 | "5×5 Matrix{Float64}:\n",
658 | " 0.410618 0.306837 0.410031 0.707623 0.290909\n",
659 | " 0.307687 0.22414 0.676996 0.0455438 0.904309\n",
660 | " 0.999213 0.714056 0.357485 0.913338 0.715352\n",
661 | " 0.647026 0.995701 0.789245 0.577309 0.391341\n",
662 | " 0.73899 0.967951 0.914835 0.565266 0.447786"
663 | ]
664 | },
665 | "execution_count": 45,
666 | "metadata": {},
667 | "output_type": "execute_result"
668 | }
669 | ],
670 | "source": [
671 | "M = rand(5,5) # random entries in [0,1]"
672 | ]
673 | },
674 | {
675 | "cell_type": "code",
676 | "execution_count": 46,
677 | "metadata": {},
678 | "outputs": [
679 | {
680 | "data": {
681 | "text/plain": [
682 | "1×5 Matrix{Float64}:\n",
683 | " 3.10353 3.20869 3.14859 2.80908 2.7497"
684 | ]
685 | },
686 | "execution_count": 46,
687 | "metadata": {},
688 | "output_type": "execute_result"
689 | }
690 | ],
691 | "source": [
692 | "sum(M,dims=1) # not Markov yet"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": 47,
698 | "metadata": {},
699 | "outputs": [
700 | {
701 | "data": {
702 | "text/plain": [
703 | "5×5 Matrix{Float64}:\n",
704 | " 0.132307 0.0956271 0.130227 0.251906 0.105797\n",
705 | " 0.0991408 0.0698541 0.215016 0.0162131 0.328876\n",
706 | " 0.32196 0.222539 0.113538 0.325138 0.260157\n",
707 | " 0.20848 0.310314 0.250666 0.205515 0.142322\n",
708 | " 0.238113 0.301666 0.290554 0.201228 0.162849"
709 | ]
710 | },
711 | "execution_count": 47,
712 | "metadata": {},
713 | "output_type": "execute_result"
714 | }
715 | ],
716 | "source": [
717 | "M = M ./ sum(M,dims=1)"
718 | ]
719 | },
720 | {
721 | "cell_type": "code",
722 | "execution_count": 48,
723 | "metadata": {},
724 | "outputs": [
725 | {
726 | "data": {
727 | "text/plain": [
728 | "1×5 Matrix{Float64}:\n",
729 | " 1.0 1.0 1.0 1.0 1.0"
730 | ]
731 | },
732 | "execution_count": 48,
733 | "metadata": {},
734 | "output_type": "execute_result"
735 | }
736 | ],
737 | "source": [
738 | "sum(M,dims=1)"
739 | ]
740 | },
741 | {
742 | "cell_type": "code",
743 | "execution_count": 49,
744 | "metadata": {},
745 | "outputs": [
746 | {
747 | "data": {
748 | "text/plain": [
749 | "5-element Vector{ComplexF64}:\n",
750 | " -0.17776058595953462 + 0.0im\n",
751 | " -0.1352931760939033 + 0.0im\n",
752 | " -0.0014416561523480091 - 0.07745841517651891im\n",
753 | " -0.0014416561523480091 + 0.07745841517651891im\n",
754 | " 1.0000000000000004 + 0.0im"
755 | ]
756 | },
757 | "execution_count": 49,
758 | "metadata": {},
759 | "output_type": "execute_result"
760 | }
761 | ],
762 | "source": [
763 | "eigvals(M)"
764 | ]
765 | },
766 | {
767 | "cell_type": "code",
768 | "execution_count": 50,
769 | "metadata": {},
770 | "outputs": [
771 | {
772 | "data": {
773 | "text/plain": [
774 | "5-element Vector{Float64}:\n",
775 | " 0.17776058595953462\n",
776 | " 0.1352931760939033\n",
777 | " 0.07747183006822272\n",
778 | " 0.07747183006822272\n",
779 | " 1.0000000000000004"
780 | ]
781 | },
782 | "execution_count": 50,
783 | "metadata": {},
784 | "output_type": "execute_result"
785 | }
786 | ],
787 | "source": [
788 | "abs.(eigvals(M))"
789 | ]
790 | },
791 | {
792 | "cell_type": "code",
793 | "execution_count": 51,
794 | "metadata": {},
795 | "outputs": [
796 | {
797 | "data": {
798 | "text/plain": [
799 | "5-element Vector{Float64}:\n",
800 | " 0.05662283043686728\n",
801 | " 0.13759834468209436\n",
802 | " 0.3955727782747076\n",
803 | " 0.09136601599437516\n",
804 | " 0.3188400306119557"
805 | ]
806 | },
807 | "execution_count": 51,
808 | "metadata": {},
809 | "output_type": "execute_result"
810 | }
811 | ],
812 | "source": [
813 | "x = rand(5)\n",
814 | "x = x / sum(x) # normalize x to have sum = 1"
815 | ]
816 | },
817 | {
818 | "cell_type": "code",
819 | "execution_count": 52,
820 | "metadata": {},
821 | "outputs": [
822 | {
823 | "data": {
824 | "text/plain": [
825 | "5-element Vector{Float64}:\n",
826 | " 0.14590618751218656\n",
827 | " 0.15842031877820567\n",
828 | " 0.24194675375641556\n",
829 | " 0.2186167846876927\n",
830 | " 0.23510995526549555"
831 | ]
832 | },
833 | "execution_count": 52,
834 | "metadata": {},
835 | "output_type": "execute_result"
836 | }
837 | ],
838 | "source": [
839 | "M^100 * x"
840 | ]
841 | },
842 | {
843 | "cell_type": "code",
844 | "execution_count": 53,
845 | "metadata": {},
846 | "outputs": [
847 | {
848 | "data": {
849 | "text/plain": [
850 | "(1.0, 0.9999999999999962)"
851 | ]
852 | },
853 | "execution_count": 53,
854 | "metadata": {},
855 | "output_type": "execute_result"
856 | }
857 | ],
858 | "source": [
859 | "sum(x), sum(M^100 * x) # still = 1"
860 | ]
861 | },
862 | {
863 | "cell_type": "code",
864 | "execution_count": 54,
865 | "metadata": {},
866 | "outputs": [
867 | {
868 | "data": {
869 | "text/plain": [
870 | "5-element Vector{ComplexF64}:\n",
871 | " 0.1459061875121874 + 0.0im\n",
872 | " 0.15842031877820623 + 0.0im\n",
873 | " 0.24194675375641644 + 0.0im\n",
874 | " 0.21861678468769355 + 0.0im\n",
875 | " 0.23510995526549633 + 0.0im"
876 | ]
877 | },
878 | "execution_count": 54,
879 | "metadata": {},
880 | "output_type": "execute_result"
881 | }
882 | ],
883 | "source": [
884 | "λ, X = eigen(M)\n",
885 | "X[:,end] / sum(X[:,end]) # eigenvector for λ=1, normalized to sum=1"
886 | ]
887 | },
888 | {
889 | "cell_type": "markdown",
890 | "metadata": {},
891 | "source": [
892 | "Again, $M^n x$ is approaching a steady-state ($\\lambda = 1$) eigenvector of $M$ as $n$ grows large."
893 | ]
894 | }
895 | ],
896 | "metadata": {
897 | "@webio": {
898 | "lastCommId": null,
899 | "lastKernelId": null
900 | },
901 | "kernelspec": {
902 | "display_name": "Julia 1.8.0",
903 | "language": "julia",
904 | "name": "julia-1.8"
905 | },
906 | "language_info": {
907 | "file_extension": ".jl",
908 | "mimetype": "application/julia",
909 | "name": "julia",
910 | "version": "1.8.2"
911 | },
912 | "widgets": {
913 | "state": {
914 | "e53e5f7b-c65e-4676-a564-3f8ee40c11c0": {
915 | "views": [
916 | {
917 | "cell_index": 13
918 | }
919 | ]
920 | }
921 | },
922 | "version": "1.2.0"
923 | }
924 | },
925 | "nbformat": 4,
926 | "nbformat_minor": 1
927 | }
928 |
--------------------------------------------------------------------------------
/notes/Linear Transformations.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 11,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "data": {
10 | "text/plain": [
11 | "zero (generic function with 18 methods)"
12 | ]
13 | },
14 | "execution_count": 11,
15 | "metadata": {},
16 | "output_type": "execute_result"
17 | }
18 | ],
19 | "source": [
20 | "# Preliminaries: Teach Julia that functions form a vector space\n",
21 | "import Base.+,Base.*,Base.zero\n",
22 | "+(f::Function,g::Function) = x -> f(x)+g(x)\n",
23 | "*(c::Number,f::Function) = x -> c*f(x)\n",
24 | "*(f::Function,c::Number) = x -> c*f(x)\n",
25 | "zero(Function) = x -> 0"
26 | ]
27 | },
28 | {
29 | "cell_type": "markdown",
30 | "metadata": {},
31 | "source": [
32 | "## Examples of Linear Transformations \n",
33 | "\n",
34 | "Some operations are fairly obviously linear. No basis is needed to see this. It is\n",
35 | "efficient theoretically to treat these operations in one fell swoop in a unified way.\n",
36 | "\n",
37 | "For example the derivative of functions $f(x)$ is obviously linear. Derivatives of sums are sums of derivatives: (f+g)'=f'+g'. Derivatives of constant multiples are constant multiples of derivatives (cf)'=cf'.\n",
38 | "Another function transformation example that is obviously linear is the shift by a constant a: $f(x) \\rightarrow f(x+a):$\n",
39 | "\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 21,
45 | "metadata": {},
46 | "outputs": [
47 | {
48 | "data": {
49 | "text/plain": [
50 | "T (generic function with 1 method)"
51 | ]
52 | },
53 | "execution_count": 21,
54 | "metadata": {},
55 | "output_type": "execute_result"
56 | }
57 | ],
58 | "source": [
59 | "function T(f::Function)\n",
60 | " return x->f(x+1)\n",
61 | "end\n",
62 | " "
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 22,
68 | "metadata": {},
69 | "outputs": [
70 | {
71 | "data": {
72 | "text/plain": [
73 | "1×2 Array{Float64,2}:\n",
74 | " -0.279415 -0.279415"
75 | ]
76 | },
77 | "execution_count": 22,
78 | "metadata": {},
79 | "output_type": "execute_result"
80 | }
81 | ],
82 | "source": [
83 | "[T(sin)(5) sin(6)]"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 25,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "data": {
93 | "text/plain": [
94 | "1×2 Array{Float64,2}:\n",
95 | " 2.32168 2.32168"
96 | ]
97 | },
98 | "execution_count": 25,
99 | "metadata": {},
100 | "output_type": "execute_result"
101 | }
102 | ],
103 | "source": [
104 | "# An example check that shifting is linear\n",
105 | "# we check at x=5 that 2*T(sin)+3*T(cos) = T(2*sin+3*cos), where T denotes shift by one\n",
106 | "[( 2*T(sin) + 3*T(cos) )(5) T( 2*sin + 3*cos )(5)]"
107 | ]
108 | },
109 | {
110 | "cell_type": "markdown",
111 | "metadata": {},
112 | "source": [
113 | "Another example considers the vector space of $m_1 \\times n_1$ matrices $X$. If we take a constant\n",
114 | "$m_2 \\times m_1$ matrix $B$ and a constant $n_2 \\times n_1$ matrix $A$ then the map $X \\rightarrow BXA^T$ is\n",
115 | "obviously a linear map from a vector space of dimension $m_1n_1$ to a vector space of dimension $m_2n_2$.\n",
116 | "(Check: $ B(c_1 X+c_2 Y)A^T= c_1 BXA^T + c_2 BYA^T$.)"
117 | ]
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "metadata": {},
122 | "source": [
123 | "## Example 1: Derivatives of (a cos x + b sin x) \n",
124 | "Consider the 2 dimensional vector space consisting of linear combinations of \"sine\" and \"cosine\". How can we take the derivative of a function in this vector space?"
125 | ]
126 | },
127 | {
128 | "cell_type": "markdown",
129 | "metadata": {},
130 | "source": [
131 | "### Derivatives: Method 1, symbolically. Matches the paper and pencil method closely."
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 28,
137 | "metadata": {},
138 | "outputs": [],
139 | "source": [
140 | "using SymPy"
141 | ]
142 | },
143 | {
144 | "cell_type": "code",
145 | "execution_count": 29,
146 | "metadata": {},
147 | "outputs": [],
148 | "source": [
149 | "x,a,b = Sym.([\"x\",\"a\",\"b\"]);"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 42,
155 | "metadata": {},
156 | "outputs": [
157 | {
158 | "data": {
159 | "text/latex": [
160 | "$$- a \\sin{\\left (x \\right )} + b \\cos{\\left (x \\right )}$$"
161 | ],
162 | "text/plain": [
163 | "-a*sin(x) + b*cos(x)"
164 | ]
165 | },
166 | "execution_count": 42,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "diff( a*cos(x) + b*sin(x) ,x)"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "### Method 2, matrix-vector. Emphasizes the linear nature of derivatives. (Easy to imagine a numerical implementation.)\n",
180 | "\n",
181 | "$\\begin{pmatrix} a' \\\\b' \\end{pmatrix} = \n",
182 | "\\begin{pmatrix} 0 & 1 \\\\-1 & 0 \\end{pmatrix}\n",
183 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}$\n",
184 | "\n",
185 | "Understanding: $\\begin{pmatrix} a' \\\\b' \\end{pmatrix}$ is shorthand for\n",
186 | "$a\\cos x + b\\sin x$."
187 | ]
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {},
192 | "source": [
193 | "Example: Differentiate $f(x)=5\\cos x + 2 \\sin x$:\n",
194 | "1. Encode f(x) for computation as the vector $\\begin{pmatrix} 5 \\\\ 2 \\end{pmatrix}.$\n",
195 | "2. Apply $\\begin{pmatrix} 0 & 1 \\\\-1 & 0 \\end{pmatrix}$\n",
196 | "to $\\begin{pmatrix} 5 \\\\ 2 \\end{pmatrix}$\n",
197 | "yielding $\\begin{pmatrix} 2 \\\\ -5 \\end{pmatrix}.$\n",
198 | "3. Decode $\\begin{pmatrix} 2 \\\\ -5 \\end{pmatrix}$ as\n",
199 | "$2 \\cos x -5 \\sin x.$"
200 | ]
201 | },
202 | {
203 | "cell_type": "markdown",
204 | "metadata": {},
205 | "source": [
206 | "### Method 3: no shorthand. Combines method 1 and method 2."
207 | ]
208 | },
209 | {
210 | "cell_type": "markdown",
211 | "metadata": {},
212 | "source": [
213 | "$\\frac{d}{dx} \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
214 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n",
215 | "=\n",
216 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
217 | "\\begin{pmatrix} 0 & -1 \\\\1 & 0 \\end{pmatrix}\n",
218 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n",
219 | "$"
220 | ]
221 | },
222 | {
223 | "cell_type": "markdown",
224 | "metadata": {},
225 | "source": [
226 | "Method 2 is purely numerical. The interpretation is imposed by\n",
227 | "the human. Method 3 can be interpreted as method 2 (matrix times vector) with the labels. \n",
228 | "\n",
229 | "If one associates differently, Method 3 can be interpeted as knowing\n",
230 | "the derivative on the basis functions is sufficient for knowing\n",
231 | "this linear transformation everywhere:\n",
232 | "\n",
233 | "$\\frac{d}{dx} \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
234 | "=\n",
235 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
236 | "\\begin{pmatrix} 0 & -1 \\\\1 & 0 \\end{pmatrix}\n",
237 | "$"
238 | ]
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {},
243 | "source": [
244 | "### Observation: \n",
245 | "Method 1 is straightforward but in the end\n",
246 | "bulky. Method 2 shows that the linear transformation defined\n",
247 | "by differentiating can be encoded as a simple matrix times vector,\n",
248 | "which is very efficient on a computer, and also gets to the algebraic heart of the operation. Method 3 organizes the symbolic with the matrices in a way that points to the generalization.\n",
249 | "
\n",
250 | "Most students of calculus learn that differentiation is linear.\n",
251 | "Derivatives of sums are sums of derivatives ,(f+g)'=f'+g'. Derivatives of\n",
252 | "constant multiples are constant multiples of derivatives (cf)'=cf'."
253 | ]
254 | },
255 | {
256 | "cell_type": "markdown",
257 | "metadata": {},
258 | "source": [
259 | "### With code:"
260 | ]
261 | },
262 | {
263 | "cell_type": "code",
264 | "execution_count": 76,
265 | "metadata": {},
266 | "outputs": [
267 | {
268 | "data": {
269 | "text/plain": [
270 | "1×2 Array{Float64,2}:\n",
271 | " -1.29521 -1.29521"
272 | ]
273 | },
274 | "execution_count": 76,
275 | "metadata": {},
276 | "output_type": "execute_result"
277 | }
278 | ],
279 | "source": [
280 | "f=([sin cos]*[0 1;-1 0]*[5,2])[1]\n",
281 | "x=rand()\n",
282 | "[f(x) 2sin(x)-5cos(x)] ## Check that it gives the right function"
283 | ]
284 | },
285 | {
286 | "cell_type": "markdown",
287 | "metadata": {},
288 | "source": [
289 | "## In general\n",
290 | "\n",
291 | "If $v_1,\\ldots,v_n$ is a basis for a vector space $V$, \n",
292 | "and
$w_1,\\ldots,w_m$ is a basis for a vector space $W$,\n",
293 | "and $T$ is some linear transformation,\n",
294 | "we can write\n",
295 | "\n",
296 | "$$ T[v_1,\\ldots,v_n]\n",
297 | "\\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}\n",
298 | "=\n",
299 | "[w_1,\\ldots,w_m] * A* \\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}$$\n",
300 | "for some $m \\times n$ matrix $A$.\n",
301 | "\n",
302 | "One can associate the equation above concentrating\n",
303 | "on\n",
304 | "$$ T[v_1,\\ldots,v_n]\n",
305 | "=\n",
306 | "[w_1,\\ldots,w_m] * A$$\n",
307 | "\n",
308 | "\n",
309 | "to think of\n",
310 | "$T$ as applied to every basis vector of $V$ to get\n",
311 | "some linear combination of the basis vectors of $W$,\n",
312 | "or one can do \"Method 2\" and think of\n",
313 | "$\\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}$\n",
314 | "as the coefficients in the basis for $V$, and \n",
315 | "$A\\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}$\n",
316 | "as the cooeficients of $Tv$ in the basis for $W$."
317 | ]
318 | },
319 | {
320 | "cell_type": "markdown",
321 | "metadata": {},
322 | "source": [
323 | "## Example 2: Shifting (a cos x + b sin x)\n",
324 | "\n",
325 | "Convince yourself without matrices that\n",
326 | "$Tf$ defined by $ (Tf)(x)=f(x+\\theta)$ is linear for\n",
327 | "any constant $\\theta$.\n",
328 | "\n",
329 | "With matrices we have\n",
330 | "\n",
331 | "$T \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
332 | "=\n",
333 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
334 | "\\begin{pmatrix} \\cos \\theta & -\\sin \\theta \\\\ \\sin \\theta & \\cos \\theta \\end{pmatrix}\n",
335 | "$\n",
336 | "or\n",
337 | "$T \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
338 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n",
339 | "=\n",
340 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n",
341 | "\\begin{pmatrix} \\cos \\theta & -\\sin \\theta \\\\ \\sin \\theta & \\cos \\theta \\end{pmatrix}\n",
342 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n",
343 | "$\n"
344 | ]
345 | },
346 | {
347 | "cell_type": "markdown",
348 | "metadata": {},
349 | "source": [
350 | "which can be done symbolically but gets a little messier looking. The linear algebra is just tidier."
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": 87,
356 | "metadata": {},
357 | "outputs": [
358 | {
359 | "data": {
360 | "text/latex": [
361 | "$$a \\sin{\\left (\\theta + x \\right )} + b \\cos{\\left (\\theta + x \\right )}$$"
362 | ],
363 | "text/plain": [
364 | "a*sin(theta + x) + b*cos(theta + x)"
365 | ]
366 | },
367 | "execution_count": 87,
368 | "metadata": {},
369 | "output_type": "execute_result"
370 | }
371 | ],
372 | "source": [
373 | "x = Sym(\"x\")\n",
374 | "f = a*sin(x) + b*cos(x)\n",
375 | "Tf = subs(f,x,x+Sym(\"theta\"))"
376 | ]
377 | },
378 | {
379 | "cell_type": "code",
380 | "execution_count": 90,
381 | "metadata": {},
382 | "outputs": [
383 | {
384 | "data": {
385 | "text/latex": [
386 | "$$a \\left(\\sin{\\left (\\theta \\right )} \\cos{\\left (x \\right )} + \\sin{\\left (x \\right )} \\cos{\\left (\\theta \\right )}\\right) + b \\left(- \\sin{\\left (\\theta \\right )} \\sin{\\left (x \\right )} + \\cos{\\left (\\theta \\right )} \\cos{\\left (x \\right )}\\right)$$"
387 | ],
388 | "text/plain": [
389 | "a*(sin(theta)*cos(x) + sin(x)*cos(theta)) + b*(-sin(theta)*sin(x) + cos(theta)\n",
390 | "*cos(x))"
391 | ]
392 | },
393 | "execution_count": 90,
394 | "metadata": {},
395 | "output_type": "execute_result"
396 | }
397 | ],
398 | "source": [
399 | "expand_trig(Tf)"
400 | ]
401 | },
402 | {
403 | "cell_type": "markdown",
404 | "metadata": {},
405 | "source": [
406 | "Of course Example 1 is a special case of Example 2 because\n",
407 | "the derivative is the same as shifting by $\\pi/2$ on this very special vector space."
408 | ]
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {},
413 | "source": [
414 | "## Example 3. Change of basis for polynomials"
415 | ]
416 | },
417 | {
418 | "cell_type": "markdown",
419 | "metadata": {},
420 | "source": [
421 | "Suppose one wants to work with Laguerre polynomials.\n",
422 | "Wikipidia can supply the first few of these for us:\n",
423 | " [Laguerre up to degree 6](https://en.wikipedia.org/wiki/Laguerre_polynomials#The_first_few_polynomials)."
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 85,
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "data": {
433 | "text/plain": [
434 | "5×5 Array{Rational{Int64},2}:\n",
435 | " 1//1 1//1 1//1 1//1 1//1 \n",
436 | " 0//1 -1//1 -2//1 -3//1 -4//1 \n",
437 | " 0//1 0//1 1//2 3//2 3//1 \n",
438 | " 0//1 0//1 0//1 -1//6 -2//3 \n",
439 | " 0//1 0//1 0//1 0//1 1//24"
440 | ]
441 | },
442 | "execution_count": 85,
443 | "metadata": {},
444 | "output_type": "execute_result"
445 | }
446 | ],
447 | "source": [
448 | "A = Rational.([\n",
449 | " 1 1 2 6 24\n",
450 | " 0 -1 -4 -18 -96\n",
451 | " 0 0 1 9 72\n",
452 | " 0 0 0 -1 -16\n",
453 | " 0 0 0 0 1])./[1 1 2 6 24]"
454 | ]
455 | },
456 | {
457 | "cell_type": "markdown",
458 | "metadata": {},
459 | "source": [
460 | "Check from the Wikipedia article that\n",
461 | "$[L_0 \\ L_1 \\ L_2 \\ L_3 \\ L_4]=[1 \\ x \\ x^2 \\ x^3 \\ x^4] * A$"
462 | ]
463 | },
464 | {
465 | "cell_type": "code",
466 | "execution_count": 86,
467 | "metadata": {},
468 | "outputs": [
469 | {
470 | "data": {
471 | "text/latex": [
472 | "\\begin{bmatrix}1&- x + 1&\\frac{x^{2}}{2} - 2 x + 1&- \\frac{x^{3}}{6} + \\frac{3 x^{2}}{2} - 3 x + 1&\\frac{x^{4}}{24} - \\frac{2 x^{3}}{3} + 3 x^{2} - 4 x + 1\\end{bmatrix}"
473 | ],
474 | "text/plain": [
475 | "1×5 Array{SymPy.Sym,2}:\n",
476 | " 1 -x + 1 x^2/2 - 2*x + 1 … x^4/24 - 2*x^3/3 + 3*x^2 - 4*x + 1"
477 | ]
478 | },
479 | "execution_count": 86,
480 | "metadata": {},
481 | "output_type": "execute_result"
482 | }
483 | ],
484 | "source": [
485 | "[1 x x^2 x^3 x^4]*A"
486 | ]
487 | },
488 | {
489 | "cell_type": "markdown",
490 | "metadata": {},
491 | "source": [
492 | "Convince yourself that to obtain\n",
493 | "the coefficients of \n",
494 | "$c_0 L_0 + c_1 L_1 + c_2 L_2 + c_3 L_3$\n",
495 | "in the standard basis $1,x,x^2,x^3$\n",
496 | "one must simply compute\n",
497 | "$A * \\begin{pmatrix} c_0 \\\\ c_1 \\\\ c_2 \\\\ c_3 \\end{pmatrix}.$\n",
498 | "\n",
499 | "
\n",
500 | "\n",
501 | "Notationally, we are saying that
\n",
502 | "$[L_0 \\ L_1 \\ L_2 \\ L_3]*\\begin{pmatrix} c_0 \\\\ c_1 \\\\ c_2 \\\\ c_3 \\end{pmatrix}=[1 \\ x \\ x^2 \\ x^3] * A*\\begin{pmatrix} c_0 \\\\ c_1 \\\\ c_2 \\\\ c_3 \\end{pmatrix}$"
503 | ]
504 | },
505 | {
506 | "cell_type": "markdown",
507 | "metadata": {},
508 | "source": [
509 | "Of course inv(A) let's us go the other way\n",
510 | "(from monomial coefficients to Laguerre coefficients)"
511 | ]
512 | },
513 | {
514 | "cell_type": "code",
515 | "execution_count": 103,
516 | "metadata": {},
517 | "outputs": [
518 | {
519 | "data": {
520 | "text/plain": [
521 | "5×5 Array{Int64,2}:\n",
522 | " 1 1 2 6 24\n",
523 | " 0 -1 -4 -18 -96\n",
524 | " 0 0 2 18 144\n",
525 | " 0 0 0 -6 -96\n",
526 | " 0 0 0 0 24"
527 | ]
528 | },
529 | "execution_count": 103,
530 | "metadata": {},
531 | "output_type": "execute_result"
532 | }
533 | ],
534 | "source": [
535 | "Int.(inv(A))"
536 | ]
537 | },
538 | {
539 | "cell_type": "markdown",
540 | "metadata": {},
541 | "source": [
542 | "Thus for example
\n",
543 | "$x^3 = 6(L_0 - 3 L_1 + 3 L_2 - 1 L_3).$"
544 | ]
545 | },
546 | {
547 | "cell_type": "markdown",
548 | "metadata": {},
549 | "source": [
550 | "Note: the numbers are pascal's triangle times factorials"
551 | ]
552 | },
553 | {
554 | "cell_type": "markdown",
555 | "metadata": {},
556 | "source": [
557 | "What if we want to differentiate quartics written in a Laguerre polynomial basis but we only know how to differentiate in a monomial basis?\n",
558 | "
\n",
559 | "In the standard basis $1,x,x^2,x^3,x^4$ the derivative is this matrix:"
560 | ]
561 | },
562 | {
563 | "cell_type": "code",
564 | "execution_count": 89,
565 | "metadata": {},
566 | "outputs": [
567 | {
568 | "data": {
569 | "text/plain": [
570 | "5×5 Array{Int64,2}:\n",
571 | " 0 1 0 0 0\n",
572 | " 0 0 2 0 0\n",
573 | " 0 0 0 3 0\n",
574 | " 0 0 0 0 4\n",
575 | " 0 0 0 0 0"
576 | ]
577 | },
578 | "execution_count": 89,
579 | "metadata": {},
580 | "output_type": "execute_result"
581 | }
582 | ],
583 | "source": [
584 | "D=[0 1 0 0 0\n",
585 | " 0 0 2 0 0\n",
586 | " 0 0 0 3 0\n",
587 | " 0 0 0 0 4\n",
588 | " 0 0 0 0 0]"
589 | ]
590 | },
591 | {
592 | "cell_type": "markdown",
593 | "metadata": {},
594 | "source": [
595 | " We claim\n",
596 | "$\\frac{d}{dx} [1 \\ x \\ x^2 \\ x^3 \\ x^4] = [1 \\ x \\ x^2 \\ x^3 \\ x^4]*D$"
597 | ]
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": 90,
602 | "metadata": {},
603 | "outputs": [
604 | {
605 | "data": {
606 | "text/latex": [
607 | "\\begin{bmatrix}1&x&x^{2}&x^{3}&x^{4}\\end{bmatrix}"
608 | ],
609 | "text/plain": [
610 | "1×5 Array{SymPy.Sym,2}:\n",
611 | " 1 x x^2 x^3 x^4"
612 | ]
613 | },
614 | "execution_count": 90,
615 | "metadata": {},
616 | "output_type": "execute_result"
617 | }
618 | ],
619 | "source": [
620 | "[1 x x^2 x^3 x^4]"
621 | ]
622 | },
623 | {
624 | "cell_type": "code",
625 | "execution_count": 91,
626 | "metadata": {},
627 | "outputs": [
628 | {
629 | "data": {
630 | "text/latex": [
631 | "\\begin{bmatrix}0&1&2 x&3 x^{2}&4 x^{3}\\end{bmatrix}"
632 | ],
633 | "text/plain": [
634 | "1×5 Array{SymPy.Sym,2}:\n",
635 | " 0 1 2*x 3*x^2 4*x^3"
636 | ]
637 | },
638 | "execution_count": 91,
639 | "metadata": {},
640 | "output_type": "execute_result"
641 | }
642 | ],
643 | "source": [
644 | "[1 x x^2 x^3 x^4]*D"
645 | ]
646 | },
647 | {
648 | "cell_type": "code",
649 | "execution_count": 94,
650 | "metadata": {},
651 | "outputs": [
652 | {
653 | "data": {
654 | "text/plain": [
655 | "5×5 Array{Int64,2}:\n",
656 | " 0 -1 -1 -1 -1\n",
657 | " 0 0 -1 -1 -1\n",
658 | " 0 0 0 -1 -1\n",
659 | " 0 0 0 0 -1\n",
660 | " 0 0 0 0 0"
661 | ]
662 | },
663 | "execution_count": 94,
664 | "metadata": {},
665 | "output_type": "execute_result"
666 | }
667 | ],
668 | "source": [
669 | "## Now the derivative in a Laguerre Basis\n",
670 | "Int.(A\\D*A)"
671 | ]
672 | },
673 | {
674 | "cell_type": "markdown",
675 | "metadata": {},
676 | "source": [
677 | "That's interesting. The pattern seems to suggest that\n",
678 | "
\n",
679 | "$\\frac{d}{dx}L_k(x) = -\\sum_{j=0}^{k-1}L_j(x)$\n",
680 | "which is a true identity.\n",
681 | "\n",
682 | "[The wikipedia article](https://en.wikipedia.org/wiki/Laguerre_polynomials) states\n",
683 | "right after the words Sheffer sequence that\n",
684 | "$$\\frac{d}{dx} L_n = -L_{n-1} + \\frac{d}{dx} L_{n-1}$$\n",
685 | "which you should recognize states that the $n$th column\n",
686 | "of the matrix $A^{-1}DA$ is the same as the $n-1$st column,\n",
687 | "with an extra $-1$ in the $(n-1)$st entry."
688 | ]
689 | },
690 | {
691 | "cell_type": "code",
692 | "execution_count": 120,
693 | "metadata": {},
694 | "outputs": [],
695 | "source": [
696 | "# Not working yet\n",
697 | "# SymPy.mpmath[:laguerre](4,0,Sym(\"x\")) "
698 | ]
699 | },
700 | {
701 | "cell_type": "markdown",
702 | "metadata": {},
703 | "source": [
704 | "# Change of Basis\n",
705 | "The above example shows how the derivative matrix can look when changing basis."
706 | ]
707 | },
708 | {
709 | "cell_type": "code",
710 | "execution_count": 106,
711 | "metadata": {},
712 | "outputs": [
713 | {
714 | "data": {
715 | "text/plain": [
716 | "5×5 Array{Int64,2}:\n",
717 | " 0 1 0 0 0\n",
718 | " 0 0 2 0 0\n",
719 | " 0 0 0 3 0\n",
720 | " 0 0 0 0 4\n",
721 | " 0 0 0 0 0"
722 | ]
723 | },
724 | "execution_count": 106,
725 | "metadata": {},
726 | "output_type": "execute_result"
727 | }
728 | ],
729 | "source": [
730 | "# Derivative in standard basis\n",
731 | "D"
732 | ]
733 | },
734 | {
735 | "cell_type": "code",
736 | "execution_count": 107,
737 | "metadata": {},
738 | "outputs": [
739 | {
740 | "data": {
741 | "text/plain": [
742 | "5×5 Array{Int64,2}:\n",
743 | " 0 -1 -1 -1 -1\n",
744 | " 0 0 -1 -1 -1\n",
745 | " 0 0 0 -1 -1\n",
746 | " 0 0 0 0 -1\n",
747 | " 0 0 0 0 0"
748 | ]
749 | },
750 | "execution_count": 107,
751 | "metadata": {},
752 | "output_type": "execute_result"
753 | }
754 | ],
755 | "source": [
756 | "# Derivative in Laguerre basis\n",
757 | "Int.(A\\D*A)"
758 | ]
759 | },
760 | {
761 | "cell_type": "markdown",
762 | "metadata": {},
763 | "source": [
764 | "Conclusion: Similar matrices represent the same linear transformation in a different basis"
765 | ]
766 | },
767 | {
768 | "cell_type": "markdown",
769 | "metadata": {},
770 | "source": [
771 | "## Example 3: Kronecker Products"
772 | ]
773 | },
774 | {
775 | "cell_type": "markdown",
776 | "metadata": {},
777 | "source": [
778 | "Let's check that the [Kronecker Product](https://en.wikipedia.org/wiki/Kronecker_product) is the matrix\n",
779 | "for the transformation $X\\rightarrow BXA^T$"
780 | ]
781 | },
782 | {
783 | "cell_type": "code",
784 | "execution_count": 108,
785 | "metadata": {},
786 | "outputs": [
787 | {
788 | "data": {
789 | "text/plain": [
790 | "4×4 Array{Int64,2}:\n",
791 | " 2 3 2 4\n",
792 | " 8 7 1 4\n",
793 | " 5 9 7 3\n",
794 | " 1 1 2 6"
795 | ]
796 | },
797 | "execution_count": 108,
798 | "metadata": {},
799 | "output_type": "execute_result"
800 | }
801 | ],
802 | "source": [
803 | "A = rand(1:9,4,4)"
804 | ]
805 | },
806 | {
807 | "cell_type": "code",
808 | "execution_count": 109,
809 | "metadata": {},
810 | "outputs": [
811 | {
812 | "data": {
813 | "text/plain": [
814 | "4×4 Array{Int64,2}:\n",
815 | " 2 9 8 6\n",
816 | " 8 2 2 9\n",
817 | " 2 8 6 6\n",
818 | " 4 1 4 9"
819 | ]
820 | },
821 | "execution_count": 109,
822 | "metadata": {},
823 | "output_type": "execute_result"
824 | }
825 | ],
826 | "source": [
827 | "B = rand(1:9,4,4)"
828 | ]
829 | },
830 | {
831 | "cell_type": "code",
832 | "execution_count": 110,
833 | "metadata": {},
834 | "outputs": [
835 | {
836 | "data": {
837 | "text/plain": [
838 | "16×16 Array{Int64,2}:\n",
839 | " 4 18 16 12 6 27 24 18 4 18 16 12 8 36 32 24\n",
840 | " 16 4 4 18 24 6 6 27 16 4 4 18 32 8 8 36\n",
841 | " 4 16 12 12 6 24 18 18 4 16 12 12 8 32 24 24\n",
842 | " 8 2 8 18 12 3 12 27 8 2 8 18 16 4 16 36\n",
843 | " 16 72 64 48 14 63 56 42 2 9 8 6 8 36 32 24\n",
844 | " 64 16 16 72 56 14 14 63 8 2 2 9 32 8 8 36\n",
845 | " 16 64 48 48 14 56 42 42 2 8 6 6 8 32 24 24\n",
846 | " 32 8 32 72 28 7 28 63 4 1 4 9 16 4 16 36\n",
847 | " 10 45 40 30 18 81 72 54 14 63 56 42 6 27 24 18\n",
848 | " 40 10 10 45 72 18 18 81 56 14 14 63 24 6 6 27\n",
849 | " 10 40 30 30 18 72 54 54 14 56 42 42 6 24 18 18\n",
850 | " 20 5 20 45 36 9 36 81 28 7 28 63 12 3 12 27\n",
851 | " 2 9 8 6 2 9 8 6 4 18 16 12 12 54 48 36\n",
852 | " 8 2 2 9 8 2 2 9 16 4 4 18 48 12 12 54\n",
853 | " 2 8 6 6 2 8 6 6 4 16 12 12 12 48 36 36\n",
854 | " 4 1 4 9 4 1 4 9 8 2 8 18 24 6 24 54"
855 | ]
856 | },
857 | "execution_count": 110,
858 | "metadata": {},
859 | "output_type": "execute_result"
860 | }
861 | ],
862 | "source": [
863 | "kron(A,B)"
864 | ]
865 | },
866 | {
867 | "cell_type": "code",
868 | "execution_count": 112,
869 | "metadata": {},
870 | "outputs": [
871 | {
872 | "data": {
873 | "text/plain": [
874 | "4×4 Array{Int64,2}:\n",
875 | " 6 2 7 2\n",
876 | " 5 2 8 4\n",
877 | " 6 1 4 5\n",
878 | " 4 4 5 3"
879 | ]
880 | },
881 | "execution_count": 112,
882 | "metadata": {},
883 | "output_type": "execute_result"
884 | }
885 | ],
886 | "source": [
887 | "X = rand(1:9,4,4)"
888 | ]
889 | },
890 | {
891 | "cell_type": "code",
892 | "execution_count": 115,
893 | "metadata": {},
894 | "outputs": [
895 | {
896 | "data": {
897 | "text/plain": [
898 | "16×2 Array{Int64,2}:\n",
899 | " 1108 1108\n",
900 | " 880 880\n",
901 | " 974 974\n",
902 | " 758 758\n",
903 | " 1950 1950\n",
904 | " 1623 1623\n",
905 | " 1714 1714\n",
906 | " 1395 1395\n",
907 | " 2461 2461\n",
908 | " 2110 2110\n",
909 | " 2186 2186\n",
910 | " 1751 1751\n",
911 | " 1067 1067\n",
912 | " 780 780\n",
913 | " 930 930\n",
914 | " 687 687"
915 | ]
916 | },
917 | "execution_count": 115,
918 | "metadata": {},
919 | "output_type": "execute_result"
920 | }
921 | ],
922 | "source": [
923 | "# The vec operator strings a matrix column wise into one long column\n",
924 | "[ kron(A,B)*vec(X) vec(B*X*A')]"
925 | ]
926 | },
927 | {
928 | "cell_type": "markdown",
929 | "metadata": {},
930 | "source": [
931 | "You might check that kron(A,B) is the matrix of the linear transformation $X \\rightarrow BXA^T$\n",
932 | "in the following basis:"
933 | ]
934 | },
935 | {
936 | "cell_type": "code",
937 | "execution_count": 119,
938 | "metadata": {},
939 | "outputs": [
940 | {
941 | "data": {
942 | "text/plain": [
943 | "4×4 Array{Int64,2}:\n",
944 | " 1 0 0 0\n",
945 | " 0 0 0 0\n",
946 | " 0 0 0 0\n",
947 | " 0 0 0 0"
948 | ]
949 | },
950 | "metadata": {},
951 | "output_type": "display_data"
952 | },
953 | {
954 | "data": {
955 | "text/plain": [
956 | "4×4 Array{Int64,2}:\n",
957 | " 0 0 0 0\n",
958 | " 1 0 0 0\n",
959 | " 0 0 0 0\n",
960 | " 0 0 0 0"
961 | ]
962 | },
963 | "metadata": {},
964 | "output_type": "display_data"
965 | },
966 | {
967 | "data": {
968 | "text/plain": [
969 | "4×4 Array{Int64,2}:\n",
970 | " 0 0 0 0\n",
971 | " 0 0 0 0\n",
972 | " 1 0 0 0\n",
973 | " 0 0 0 0"
974 | ]
975 | },
976 | "metadata": {},
977 | "output_type": "display_data"
978 | },
979 | {
980 | "data": {
981 | "text/plain": [
982 | "4×4 Array{Int64,2}:\n",
983 | " 0 0 0 0\n",
984 | " 0 0 0 0\n",
985 | " 0 0 0 0\n",
986 | " 1 0 0 0"
987 | ]
988 | },
989 | "metadata": {},
990 | "output_type": "display_data"
991 | },
992 | {
993 | "data": {
994 | "text/plain": [
995 | "4×4 Array{Int64,2}:\n",
996 | " 0 1 0 0\n",
997 | " 0 0 0 0\n",
998 | " 0 0 0 0\n",
999 | " 0 0 0 0"
1000 | ]
1001 | },
1002 | "metadata": {},
1003 | "output_type": "display_data"
1004 | },
1005 | {
1006 | "data": {
1007 | "text/plain": [
1008 | "4×4 Array{Int64,2}:\n",
1009 | " 0 0 0 0\n",
1010 | " 0 1 0 0\n",
1011 | " 0 0 0 0\n",
1012 | " 0 0 0 0"
1013 | ]
1014 | },
1015 | "metadata": {},
1016 | "output_type": "display_data"
1017 | },
1018 | {
1019 | "data": {
1020 | "text/plain": [
1021 | "4×4 Array{Int64,2}:\n",
1022 | " 0 0 0 0\n",
1023 | " 0 0 0 0\n",
1024 | " 0 1 0 0\n",
1025 | " 0 0 0 0"
1026 | ]
1027 | },
1028 | "metadata": {},
1029 | "output_type": "display_data"
1030 | },
1031 | {
1032 | "data": {
1033 | "text/plain": [
1034 | "4×4 Array{Int64,2}:\n",
1035 | " 0 0 0 0\n",
1036 | " 0 0 0 0\n",
1037 | " 0 0 0 0\n",
1038 | " 0 1 0 0"
1039 | ]
1040 | },
1041 | "metadata": {},
1042 | "output_type": "display_data"
1043 | },
1044 | {
1045 | "data": {
1046 | "text/plain": [
1047 | "4×4 Array{Int64,2}:\n",
1048 | " 0 0 1 0\n",
1049 | " 0 0 0 0\n",
1050 | " 0 0 0 0\n",
1051 | " 0 0 0 0"
1052 | ]
1053 | },
1054 | "metadata": {},
1055 | "output_type": "display_data"
1056 | },
1057 | {
1058 | "data": {
1059 | "text/plain": [
1060 | "4×4 Array{Int64,2}:\n",
1061 | " 0 0 0 0\n",
1062 | " 0 0 1 0\n",
1063 | " 0 0 0 0\n",
1064 | " 0 0 0 0"
1065 | ]
1066 | },
1067 | "metadata": {},
1068 | "output_type": "display_data"
1069 | },
1070 | {
1071 | "data": {
1072 | "text/plain": [
1073 | "4×4 Array{Int64,2}:\n",
1074 | " 0 0 0 0\n",
1075 | " 0 0 0 0\n",
1076 | " 0 0 1 0\n",
1077 | " 0 0 0 0"
1078 | ]
1079 | },
1080 | "metadata": {},
1081 | "output_type": "display_data"
1082 | },
1083 | {
1084 | "data": {
1085 | "text/plain": [
1086 | "4×4 Array{Int64,2}:\n",
1087 | " 0 0 0 0\n",
1088 | " 0 0 0 0\n",
1089 | " 0 0 0 0\n",
1090 | " 0 0 1 0"
1091 | ]
1092 | },
1093 | "metadata": {},
1094 | "output_type": "display_data"
1095 | },
1096 | {
1097 | "data": {
1098 | "text/plain": [
1099 | "4×4 Array{Int64,2}:\n",
1100 | " 0 0 0 1\n",
1101 | " 0 0 0 0\n",
1102 | " 0 0 0 0\n",
1103 | " 0 0 0 0"
1104 | ]
1105 | },
1106 | "metadata": {},
1107 | "output_type": "display_data"
1108 | },
1109 | {
1110 | "data": {
1111 | "text/plain": [
1112 | "4×4 Array{Int64,2}:\n",
1113 | " 0 0 0 0\n",
1114 | " 0 0 0 1\n",
1115 | " 0 0 0 0\n",
1116 | " 0 0 0 0"
1117 | ]
1118 | },
1119 | "metadata": {},
1120 | "output_type": "display_data"
1121 | },
1122 | {
1123 | "data": {
1124 | "text/plain": [
1125 | "4×4 Array{Int64,2}:\n",
1126 | " 0 0 0 0\n",
1127 | " 0 0 0 0\n",
1128 | " 0 0 0 1\n",
1129 | " 0 0 0 0"
1130 | ]
1131 | },
1132 | "metadata": {},
1133 | "output_type": "display_data"
1134 | },
1135 | {
1136 | "data": {
1137 | "text/plain": [
1138 | "4×4 Array{Int64,2}:\n",
1139 | " 0 0 0 0\n",
1140 | " 0 0 0 0\n",
1141 | " 0 0 0 0\n",
1142 | " 0 0 0 1"
1143 | ]
1144 | },
1145 | "metadata": {},
1146 | "output_type": "display_data"
1147 | }
1148 | ],
1149 | "source": [
1150 | "for j=1:4, i=1:4\n",
1151 | " V=zeros(Int,4,4)\n",
1152 | " V[i,j]=1\n",
1153 | " display(V)\n",
1154 | "end"
1155 | ]
1156 | },
1157 | {
1158 | "cell_type": "code",
1159 | "execution_count": null,
1160 | "metadata": {},
1161 | "outputs": [],
1162 | "source": []
1163 | }
1164 | ],
1165 | "metadata": {
1166 | "kernelspec": {
1167 | "display_name": "Julia 0.6.2",
1168 | "language": "julia",
1169 | "name": "julia-0.6"
1170 | },
1171 | "language_info": {
1172 | "file_extension": ".jl",
1173 | "mimetype": "application/julia",
1174 | "name": "julia",
1175 | "version": "0.6.2"
1176 | }
1177 | },
1178 | "nbformat": 4,
1179 | "nbformat_minor": 2
1180 | }
1181 |
--------------------------------------------------------------------------------
/notes/QR in Julia.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Is Q mxn or nxn? It is both! This is useful! "
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 13,
13 | "metadata": {},
14 | "outputs": [
15 | {
16 | "data": {
17 | "text/plain": [
18 | "4×4 LinearAlgebra.QRCompactWYQ{Float64,Array{Float64,2}}:\n",
19 | " -0.36166 0.916348 -0.135025 -0.106193 \n",
20 | " -0.875369 -0.396235 -0.176529 -0.21346 \n",
21 | " -0.216 0.0556349 0.974526 -0.0234073\n",
22 | " -0.237226 0.0144527 -0.0300856 0.970881 "
23 | ]
24 | },
25 | "execution_count": 13,
26 | "metadata": {},
27 | "output_type": "execute_result"
28 | }
29 | ],
30 | "source": [
31 | "using LinearAlgebra\n",
32 | "A = rand(4,2)\n",
33 | "Q,R = qr(A)\n",
34 | "Q"
35 | ]
36 | },
37 | {
38 | "cell_type": "markdown",
39 | "metadata": {},
40 | "source": [
41 | "## It looks 4x4 !!"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 14,
47 | "metadata": {},
48 | "outputs": [
49 | {
50 | "data": {
51 | "text/plain": [
52 | "(4, 4)"
53 | ]
54 | },
55 | "execution_count": 14,
56 | "metadata": {},
57 | "output_type": "execute_result"
58 | }
59 | ],
60 | "source": [
61 | "size(Q)"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "metadata": {},
67 | "source": [
68 | "## but wait! I can multiply by a vector of size 2"
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "execution_count": 15,
74 | "metadata": {},
75 | "outputs": [
76 | {
77 | "data": {
78 | "text/plain": [
79 | "4-element Array{Float64,1}:\n",
80 | " 1.4710364530489386 \n",
81 | " -1.6678387032016588 \n",
82 | " -0.10473038358204545\n",
83 | " -0.2083203318415276 "
84 | ]
85 | },
86 | "execution_count": 15,
87 | "metadata": {},
88 | "output_type": "execute_result"
89 | }
90 | ],
91 | "source": [
92 | "Q * [1,2]"
93 | ]
94 | },
95 | {
96 | "cell_type": "markdown",
97 | "metadata": {},
98 | "source": [
99 | "## what about size 3? !! (answer: no)"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": 16,
105 | "metadata": {},
106 | "outputs": [
107 | {
108 | "ename": "DimensionMismatch",
109 | "evalue": "DimensionMismatch(\"vector must have length either 4 or 2\")",
110 | "output_type": "error",
111 | "traceback": [
112 | "DimensionMismatch(\"vector must have length either 4 or 2\")",
113 | "",
114 | "Stacktrace:",
115 | " [1] *(::LinearAlgebra.QRCompactWYQ{Float64,Array{Float64,2}}, ::Array{Int64,1}) at /Users/sabae/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.3/LinearAlgebra/src/qr.jl:563",
116 | " [2] top-level scope at In[16]:1"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "Q * [1,2,3]"
122 | ]
123 | },
124 | {
125 | "cell_type": "markdown",
126 | "metadata": {},
127 | "source": [
128 | "## what about size 4? (answer: yes)"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 20,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "data": {
138 | "text/plain": [
139 | "4-element Array{Float64,1}:\n",
140 | " 0.6411891565050274\n",
141 | " -3.051264992118778 \n",
142 | " 2.7252182423170517\n",
143 | " 3.5849468538868363"
144 | ]
145 | },
146 | "execution_count": 20,
147 | "metadata": {},
148 | "output_type": "execute_result"
149 | }
150 | ],
151 | "source": [
152 | "Q * [1,2,3,4]"
153 | ]
154 | },
155 | {
156 | "cell_type": "markdown",
157 | "metadata": {},
158 | "source": [
159 | "## What's going on??"
160 | ]
161 | },
162 | {
163 | "cell_type": "markdown",
164 | "metadata": {},
165 | "source": [
166 | "Q is not stored as elements, it is stored in a more compact form known as a [WY representation](https://www.researchgate.net/publication/23844885_A_Storage-Efficient_WY_Representation_for_Products_of_Householder_Transformations) which we do not cover in 18.06.\n",
167 | "This form not only saves memory, but allows us to complete the tall-skinny mxn Q into a full square orthogonal Q."
168 | ]
169 | },
170 | {
171 | "cell_type": "markdown",
172 | "metadata": {},
173 | "source": [
174 | "The \"extra\" vectors are an orthonormal set of vectors that are orthogonal to the column space of A. This is associated with the left nullspace of A."
175 | ]
176 | },
177 | {
178 | "cell_type": "code",
179 | "execution_count": 1,
180 | "metadata": {},
181 | "outputs": [
182 | {
183 | "data": {
184 | "text/plain": [
185 | "4×4 Array{Float64,2}:\n",
186 | " 0.0486726 0.422254 0.0384858 0.324084\n",
187 | " 0.700173 0.479445 0.570882 0.340941\n",
188 | " 0.868803 0.791772 0.5067 0.294249\n",
189 | " 0.550927 0.680756 0.133569 0.127481"
190 | ]
191 | },
192 | "execution_count": 1,
193 | "metadata": {},
194 | "output_type": "execute_result"
195 | }
196 | ],
197 | "source": [
198 | "A = rand(4,4)"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 2,
204 | "metadata": {},
205 | "outputs": [
206 | {
207 | "data": {
208 | "text/plain": [
209 | "4×3 Array{Float64,2}:\n",
210 | " 0.422254 0.0384858 0.324084\n",
211 | " 0.479445 0.570882 0.340941\n",
212 | " 0.791772 0.5067 0.294249\n",
213 | " 0.680756 0.133569 0.127481"
214 | ]
215 | },
216 | "execution_count": 2,
217 | "metadata": {},
218 | "output_type": "execute_result"
219 | }
220 | ],
221 | "source": [
222 | "A[:,2:end]"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 3,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "data": {
232 | "text/plain": [
233 | "4×3 Array{Float64,2}:\n",
234 | " 0.422254 0.0384858 0.324084\n",
235 | " 0.479445 0.570882 0.340941\n",
236 | " 0.791772 0.5067 0.294249\n",
237 | " 0.680756 0.133569 0.127481"
238 | ]
239 | },
240 | "execution_count": 3,
241 | "metadata": {},
242 | "output_type": "execute_result"
243 | }
244 | ],
245 | "source": [
246 | "A[:,2:4]"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "execution_count": 4,
252 | "metadata": {},
253 | "outputs": [
254 | {
255 | "data": {
256 | "text/plain": [
257 | "4×3 Array{Float64,2}:\n",
258 | " 0.422254 0.0384858 0.324084\n",
259 | " 0.479445 0.570882 0.340941\n",
260 | " 0.791772 0.5067 0.294249\n",
261 | " 0.680756 0.133569 0.127481"
262 | ]
263 | },
264 | "execution_count": 4,
265 | "metadata": {},
266 | "output_type": "execute_result"
267 | }
268 | ],
269 | "source": [
270 | "A[:,[2,3,4]]"
271 | ]
272 | },
273 | {
274 | "cell_type": "markdown",
275 | "metadata": {},
276 | "source": [
277 | "## Six Cases (kind of)\n",
278 | "1. Square (rank=n rank