├── .gitignore └── notes ├── circuit.png ├── strang.jpg ├── Demo-Mar18.pdf ├── 1806overview.pdf ├── svd-practice.pdf ├── 1806overview.pptx ├── cyclic-springs.png ├── jordan-vectors.pdf ├── where-to-go-after.pdf ├── cookie-strang-book.jpg ├── Notice of Video Recording 18_06 S23.docx ├── Introduction to Linear Algebra 6th edition and A = CR_04.pdf ├── eigshow.jl ├── Sample Variance division by n-1.ipynb ├── Demo-Mar18.ipynb ├── SVD-eigenproblem.ipynb ├── Machine-Learning-with-Gaussian-elimination.ipynb ├── QR Factorization Examples in Julia.ipynb ├── Perron-Frobenius.ipynb ├── Singular.ipynb ├── Gram-Schmidt.ipynb ├── rank-r and full svds.ipynb ├── Gauss-Jordan.ipynb ├── Markov.ipynb ├── Linear Transformations.ipynb └── QR in Julia.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.log 3 | *.aux 4 | .ipynb_checkpoints 5 | -------------------------------------------------------------------------------- /notes/circuit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/circuit.png -------------------------------------------------------------------------------- /notes/strang.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/strang.jpg -------------------------------------------------------------------------------- /notes/Demo-Mar18.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/Demo-Mar18.pdf -------------------------------------------------------------------------------- /notes/1806overview.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/1806overview.pdf -------------------------------------------------------------------------------- /notes/svd-practice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/svd-practice.pdf -------------------------------------------------------------------------------- /notes/1806overview.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/1806overview.pptx -------------------------------------------------------------------------------- /notes/cyclic-springs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/cyclic-springs.png -------------------------------------------------------------------------------- /notes/jordan-vectors.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/jordan-vectors.pdf -------------------------------------------------------------------------------- /notes/where-to-go-after.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/where-to-go-after.pdf -------------------------------------------------------------------------------- /notes/cookie-strang-book.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/cookie-strang-book.jpg -------------------------------------------------------------------------------- /notes/Notice of Video Recording 18_06 S23.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/Notice of Video Recording 18_06 S23.docx -------------------------------------------------------------------------------- /notes/Introduction to Linear Algebra 6th edition and A = CR_04.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mitmath/1806/HEAD/notes/Introduction to Linear Algebra 6th edition and A = CR_04.pdf -------------------------------------------------------------------------------- /notes/eigshow.jl: -------------------------------------------------------------------------------- 1 | using GLVisualize, Colors, Reactive, GLWindow, GeometryTypes 2 | using Images, StaticArrays, GLAbstraction, Iterators 3 | import GeometryTypes: intersects 4 | import GLVisualize: mm 5 | window = glscreen(); @async GLWindow.waiting_renderloop(window) 6 | animate(t, A) = t*eye(A) + (1-t)*A 7 | 8 | iconsize = 8mm 9 | editarea, viewarea = x_partition_abs(window.area, round(Int, 8.2 * iconsize)) 10 | edit_screen = Screen( 11 | window, area = editarea, 12 | stroke = (1f0, RGBA{Float32}(0.9f0, 0.9f0, 0.9f0, 1f0)) 13 | ) 14 | viewscreen = Screen( 15 | window, area = viewarea, 16 | ) 17 | 18 | img = loadasset("doge.png") 19 | A = Mat{2}(1, 3, 4, 2) ./ 4 20 | sv, t = GLVisualize.labeled_slider(linspace(1.0,0.0,100), edit_screen) 21 | matrix = map(t-> animate(t, A), t) 22 | matrix_str = map(matrix) do m 23 | str = "" 24 | for i=1:2 25 | for j=1:2 26 | str *= string(@sprintf("%15.4f", m[i, j]), " ") 27 | end 28 | str *= "\n" 29 | end 30 | str 31 | end 32 | 33 | det_str = map(matrix) do m 34 | @sprintf("%15.4f", det(m)) 35 | end 36 | 37 | menu = Pair[ 38 | "slider:" => sv, 39 | "matrix:" => matrix_str, 40 | "determinant:" => det_str 41 | ] 42 | _view(visualize( 43 | menu, 44 | text_scale = 4mm, 45 | width = 8iconsize 46 | ), edit_screen, camera = :fixed_pixel) 47 | 48 | prim_rect = SimpleRectangle(-250, -250, 500, 500) 49 | mesh = GLUVMesh(prim_rect) 50 | prim = map(t) do t 51 | points = decompose(Point2f0, prim_rect) 52 | points .= (*).((animate(t, A),), points) 53 | mesh.vertices[:] = map(x-> Point3f0(x[1], x[2], 0), points) 54 | mesh 55 | end 56 | _view(visualize(img, fxaa = true, primitive = prim, boundingbox = nothing), viewscreen) 57 | 58 | 59 | origin = Point2f0(0) 60 | lines = Point2f0[] 61 | 62 | function vec_angle(origin, a, b) 63 | diff0 = a - origin 64 | diff1 = b - origin 65 | d = dot(diff0, diff1) 66 | det = cross(diff0, diff1) 67 | atan2(det, d) 68 | end 69 | function sort_rectangle!(points) 70 | middle = mean(points) 71 | p1 = first(points) 72 | sort!(points, by = p-> vec_angle(middle, p, p1)) 73 | end 74 | 75 | eigvectpoly = map(t) do t 76 | # bring vertices in correct order and close rectangle 77 | points = sort_rectangle!(map(Point2f0, vertices(value(prim)))) 78 | push!(points, points[1]) # close points 79 | 80 | a = eigfact(Array(A)) 81 | eigvectors = map(1:size(a.vectors, 1)) do i 82 | normalize(Vec2f0(a.vectors[:, i]...)) 83 | end 84 | v1 = eigvectors[1] * 1000f0 85 | v2 = eigvectors[2] * 1000f0 86 | m = animate(t, A) 87 | eigseg1 = LineSegment(origin, Point2f0(m*v1)) 88 | eigseg2 = LineSegment(origin, Point2f0(m*v2)) 89 | seg1cut = seg2cut = (0, origin) 90 | for (i, (a, b)) in enumerate(partition(points, 2, 1)) 91 | seg = LineSegment(a,b) 92 | intersected, p = intersects(eigseg1, seg) 93 | intersected && (seg1cut = (i, p)) 94 | intersected, p = intersects(eigseg2, seg) 95 | intersected && (seg2cut = (i, p)) 96 | end 97 | pop!(points) #remove closing point 98 | GLPlainMesh(points), Point2f0[seg2cut[2], origin, seg1cut[2]] 99 | end 100 | # _view(visualize( 101 | # map(first, eigvectpoly), 102 | # color = RGBA(1f0, 1f0, 1f0, 0.6f0), 103 | # ), camera = :orthographic_pixel) 104 | 105 | _view(visualize( 106 | map(last, eigvectpoly), :linesegment, 107 | indices = [2, 1, 2, 3], 108 | thickness = 3f0, 109 | color = RGBA(0.60, 0.3f0, 0.4f0, 1f0), 110 | ), viewscreen, camera = :orthographic_pixel) 111 | 112 | _view(visualize( 113 | (Circle(Point2f0(0), 5f0), map(x-> map(Point2f0, vertices(x)), prim)), 114 | color = RGBA(0.7f0, 0.2f0, 0.9f0, 1f0), 115 | ), viewscreen, camera = :orthographic_pixel) 116 | center!(viewscreen, :orthographic_pixel, border = 10f0) 117 | -------------------------------------------------------------------------------- /notes/Sample Variance division by n-1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The formula for [sample variance](https://en.wikipedia.org/wiki/Variance#Sample_variance):\n", 8 | " $$s_n^2 = \\frac{1}{n-1}\\sum (x_i-\\bar{x})^2$$\n", 9 | " has that funny $n-1$ in the denominator.\n", 10 | " \n", 11 | "The n-1 is referred to as [Bessel's correction](https://en.wikipedia.org/wiki/Bessel%27s_correction).\n", 12 | "The usual explanation involves vague terms such as [degrees of freedom](https://en.wikipedia.org/wiki/Degrees_of_freedom_(statistics%29) which always sounded flaky to me." 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "## 1. Let us first check the n-1 by experiment" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 23, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "f (generic function with 1 method)" 31 | ] 32 | }, 33 | "execution_count": 23, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "function f(n)\n", 40 | " x = randn(n)\n", 41 | " norm(x-mean(x))^2\n", 42 | "end" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 30, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "10.00378620254928" 54 | ] 55 | }, 56 | "execution_count": 30, 57 | "metadata": {}, 58 | "output_type": "execute_result" 59 | } 60 | ], 61 | "source": [ 62 | "n=11\n", 63 | "mean([f(n) for i=1:1_000_000])" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 28, 69 | "metadata": {}, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "3.9965121482424095" 75 | ] 76 | }, 77 | "execution_count": 28, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "n=5\n", 84 | "mean([f(n) for i=1:1_000_000])" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "## 2. A few facts about randn" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "metadata": {}, 97 | "source": [ 98 | "randn(n) is an n-vector of independent standard normals.\n", 99 | "\n", 100 | "If Q is any orthgonal matrix, $Q*$randn(n) is also an n-vector of independent standard normals.\n", 101 | "There is no mathematical way to distinguish randn(n) from $Q*$randn(n). This is because the\n", 102 | "probability function is proportional to $e^{-\\|x\\|^2/2}$, i.e., it only depends on the length of x, not\n", 103 | "the direction.\n", 104 | "\n", 105 | "Also the expected value of randn(1)^2 is 1." 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## 3. Linear Algebra makes n-1 easy to understand" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "Consider the projection matrix $P=I-1/n$. The matrix-vector product $Px$ computes x-mean(x)." 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 56, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "4×4 Array{Rational{Int64},2}:\n", 131 | " 3//4 -1//4 -1//4 -1//4\n", 132 | " -1//4 3//4 -1//4 -1//4\n", 133 | " -1//4 -1//4 3//4 -1//4\n", 134 | " -1//4 -1//4 -1//4 3//4" 135 | ] 136 | }, 137 | "execution_count": 56, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "# example \n", 144 | "n = 4\n", 145 | "P = eye(Int,n) - 1//n" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "If we write the eigendecomposition $P=Q\\Lambda Q'$, then $\\Lambda$ has one diagonal entry (say the first) $0$ and the\n", 153 | "rest $1$.\n", 154 | "
\n", 155 | "Therefore if x=randn(n) so is Qx as a random variable, and $$\\|PQx\\|^2 = \\|Q\\Lambda x\\|^2 = \\|\\Lambda x\\|^2=x_2^2 +\\ldots+x_n^2 $$ which is obviously n-1 in expectation." 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [] 164 | } 165 | ], 166 | "metadata": { 167 | "kernelspec": { 168 | "display_name": "Julia 0.6.2", 169 | "language": "julia", 170 | "name": "julia-0.6" 171 | }, 172 | "language_info": { 173 | "file_extension": ".jl", 174 | "mimetype": "application/julia", 175 | "name": "julia", 176 | "version": "0.6.2" 177 | } 178 | }, 179 | "nbformat": 4, 180 | "nbformat_minor": 2 181 | } 182 | -------------------------------------------------------------------------------- /notes/Demo-Mar18.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "3×3 Array{Float64,2}:\n", 12 | " 0.262707 0.722601 0.227894\n", 13 | " 0.194994 0.545517 0.595722\n", 14 | " 0.465314 0.686887 0.644692" 15 | ] 16 | }, 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "output_type": "execute_result" 20 | } 21 | ], 22 | "source": [ 23 | "using LinearAlgebra\n", 24 | "A = rand(3,3)" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/plain": [ 35 | "3×3 Array{Float64,2}:\n", 36 | " 0.31596 0.740561 0.63726 \n", 37 | " 0.434797 0.847685 0.753471\n", 38 | " 0.556164 1.15377 0.930863" 39 | ] 40 | }, 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "A2 = A^2" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": 4, 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/plain": [ 58 | "3×3 Array{Float64,2}:\n", 59 | " 0.001 0.002 0.003\n", 60 | " 0.004 0.005 0.006\n", 61 | " 0.007 0.008 0.009" 62 | ] 63 | }, 64 | "execution_count": 4, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "dA = [0.001 0.002 0.003 ; 0.004 0.005 0.006 ; 0.007 0.008 0.009]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 5, 76 | "metadata": {}, 77 | "outputs": [ 78 | { 79 | "data": { 80 | "text/plain": [ 81 | "3×3 Array{Float64,2}:\n", 82 | " 0.00682701 0.00987187 0.0105702\n", 83 | " 0.0114308 0.0177037 0.0170739\n", 84 | " 0.0154144 0.0252529 0.0236327" 85 | ] 86 | }, 87 | "execution_count": 5, 88 | "metadata": {}, 89 | "output_type": "execute_result" 90 | } 91 | ], 92 | "source": [ 93 | "(A + dA)^2 - A2" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 6, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "3×3 Array{Float64,2}:\n", 105 | " 0.00679701 0.00983587 0.0105282\n", 106 | " 0.0113648 0.0176227 0.0169779\n", 107 | " 0.0153124 0.0251269 0.0234827" 108 | ] 109 | }, 110 | "execution_count": 6, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "A*dA + dA*A" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": 7, 122 | "metadata": {}, 123 | "outputs": [ 124 | { 125 | "data": { 126 | "text/plain": [ 127 | "3×3 Array{Float64,2}:\n", 128 | " 0.00949674 0.0119231 0.0143495\n", 129 | " 0.0130942 0.0157667 0.0184392\n", 130 | " 0.0154514 0.0190452 0.022639 " 131 | ] 132 | }, 133 | "execution_count": 7, 134 | "metadata": {}, 135 | "output_type": "execute_result" 136 | } 137 | ], 138 | "source": [ 139 | "2*A*dA" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 8, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "3×3 Array{Float64,2}:\n", 151 | " 0.00409727 0.00774859 0.00670683\n", 152 | " 0.00963536 0.0194786 0.0155167 \n", 153 | " 0.0151735 0.0312086 0.0243265 " 154 | ] 155 | }, 156 | "execution_count": 8, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "2*dA*A" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": 9, 168 | "metadata": {}, 169 | "outputs": [ 170 | { 171 | "data": { 172 | "text/plain": [ 173 | "3×3 Array{Float64,2}:\n", 174 | " -0.85783 -4.61434 4.56708 \n", 175 | " 2.25985 0.944638 -1.67173 \n", 176 | " -1.78861 2.32399 0.0359288" 177 | ] 178 | }, 179 | "execution_count": 9, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "AI = inv(A)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 10, 191 | "metadata": {}, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "3×3 Array{Float64,2}:\n", 197 | " 0.00361557 0.0219253 -0.0380485\n", 198 | " -0.000140811 -0.0161177 0.0188606\n", 199 | " 0.00369236 0.0071466 -0.0214998" 200 | ] 201 | }, 202 | "execution_count": 10, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "inv(A+dA)-inv(A)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 11, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "3×3 Array{Float64,2}:\n", 220 | " 0.0036997 0.0220909 -0.0385415\n", 221 | " -0.0001703 -0.0161938 0.0190541\n", 222 | " 0.0037521 0.00724607 -0.0218293" 223 | ] 224 | }, 225 | "execution_count": 11, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "-AI*dA*AI" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": {}, 238 | "outputs": [], 239 | "source": [] 240 | } 241 | ], 242 | "metadata": { 243 | "kernelspec": { 244 | "display_name": "Julia 1.1.0", 245 | "language": "julia", 246 | "name": "julia-1.1" 247 | }, 248 | "language_info": { 249 | "file_extension": ".jl", 250 | "mimetype": "application/julia", 251 | "name": "julia", 252 | "version": "1.1.0" 253 | } 254 | }, 255 | "nbformat": 4, 256 | "nbformat_minor": 2 257 | } 258 | -------------------------------------------------------------------------------- /notes/SVD-eigenproblem.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# The SVD as an eigenproblem\n", 8 | "\n", 9 | "Notice that if $A = U\\Sigma V^H$, then\n", 10 | "\n", 11 | "$$\n", 12 | "A^H A = V \\Sigma U^H U \\Sigma V^H = V \\Sigma^2 V^H\n", 13 | "$$\n", 14 | "\n", 15 | "That is, to multiply $A^H A x$, you (1) compute $V^H x$ (the $V$ components of $x$), then (2) multiply each component by $\\sigma^2$, and finally (3) multiply the coefficients by $V$ and add up. It follows that:\n", 16 | "\n", 17 | "* The singular values $\\sigma^2$ are the **nonzero eigenvalues** of $A^H A$ and the corresponding **eigenvectors are the right singular vectors** $V$.\n", 18 | "\n", 19 | "Similarly,\n", 20 | "\n", 21 | "$$\n", 22 | "A A^H = U \\Sigma V^H V \\Sigma U^H = U \\Sigma^2 U^H\n", 23 | "$$\n", 24 | "\n", 25 | "so\n", 26 | "\n", 27 | "* The singular values $\\sigma^2$ are the **nonzero eigenvalues** of $A A^H$ and the corresponding **eigenvectors are the left singular vectors** $U$.\n", 28 | "\n", 29 | "We can easily check this:" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 1, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "5×3 Array{Float64,2}:\n", 41 | " 0.202935 -0.810741 0.379812 \n", 42 | " 0.317852 1.17222 0.0789665\n", 43 | " -1.58283 -0.524304 0.949145 \n", 44 | " 0.122448 1.57466 0.693527 \n", 45 | " -0.496476 1.13621 0.511883 " 46 | ] 47 | }, 48 | "execution_count": 1, 49 | "metadata": {}, 50 | "output_type": "execute_result" 51 | } 52 | ], 53 | "source": [ 54 | "A = randn(5,3)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "metadata": {}, 60 | "source": [ 61 | "Note that in this case, $A$ is a $5 \\times 3$ matrix of rank 3." 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 2, 67 | "metadata": {}, 68 | "outputs": [ 69 | { 70 | "data": { 71 | "text/plain": [ 72 | "3-element Array{Float64,1}:\n", 73 | " 6.31957 \n", 74 | " 4.01707 \n", 75 | " 0.443596" 76 | ] 77 | }, 78 | "execution_count": 2, 79 | "metadata": {}, 80 | "output_type": "execute_result" 81 | } 82 | ], 83 | "source": [ 84 | "U, σ, V = svd(A)\n", 85 | "\n", 86 | "σ.^2 # the σ² values" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "$A^H A$ is a $3 \\times 3$ matrix of rank 3 with three nonzero eigenvalues that equal the singular values squared:" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 3, 99 | "metadata": {}, 100 | "outputs": [ 101 | { 102 | "data": { 103 | "text/plain": [ 104 | "3-element Array{Float64,1}:\n", 105 | " 0.443596\n", 106 | " 4.01707 \n", 107 | " 6.31957 " 108 | ] 109 | }, 110 | "execution_count": 3, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "eigvals(A'*A) # AᴴA has the same eigenvals!" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "$AA^H$ is a $5 \\times 5$ matrix of rank 3 (recall that the ranks of $A$, $AA^H$, and $A^H A$ are all equal!). It has 3 nonzero eigenvalues that equal the $\\sigma^2$ values, and 2 zero eigenvalues corresponding to the **two-dimensional** nullspace\n", 124 | "$$\n", 125 | "N(AA^H) = N(A^H) = C(A)^\\perp = C(U)^\\perp\n", 126 | "$$\n", 127 | "That is, the zero eigenvectors are those perpendicuar to the left singular vectors $U$." 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 4, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "5-element Array{Float64,1}:\n", 139 | " -1.71137e-16\n", 140 | " 8.87578e-16\n", 141 | " 0.443596 \n", 142 | " 4.01707 \n", 143 | " 6.31957 " 144 | ] 145 | }, 146 | "execution_count": 4, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "eigvals(A*A') # the same *nonzero* eigenvalues" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "We can also check the eigenvectors, e.g. the eigenvectors of $A^H A$ should match V:" 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "execution_count": 5, 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "3×3 Array{Float64,2}:\n", 171 | " 0.564298 -0.817673 0.113922\n", 172 | " -0.203239 -0.00384465 0.979122\n", 173 | " 0.800163 0.57567 0.168353" 174 | ] 175 | }, 176 | "execution_count": 5, 177 | "metadata": {}, 178 | "output_type": "execute_result" 179 | } 180 | ], 181 | "source": [ 182 | "eigvecs(A'A)" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 6, 188 | "metadata": {}, 189 | "outputs": [ 190 | { 191 | "data": { 192 | "text/plain": [ 193 | "3×3 Array{Float64,2}:\n", 194 | " -0.113922 -0.817673 -0.564298\n", 195 | " -0.979122 -0.00384465 0.203239\n", 196 | " -0.168353 0.57567 -0.800163" 197 | ] 198 | }, 199 | "execution_count": 6, 200 | "metadata": {}, 201 | "output_type": "execute_result" 202 | } 203 | ], 204 | "source": [ 205 | "V" 206 | ] 207 | }, 208 | { 209 | "cell_type": "markdown", 210 | "metadata": {}, 211 | "source": [ 212 | "Yes, they match, up to an overall sign flip.\n", 213 | "\n", 214 | "(Note that the columns are in reverse order, because `svdvals` are by default sorted in *descending* order in Julia, whereas `eigvals` of Hermitian matrices are sorted in *ascending* order.)" 215 | ] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": {}, 220 | "source": [ 221 | "## Remarks\n", 222 | "\n", 223 | "* This, in principle, finally gives us a way to compute the SVD of a matrix: just find the eigenvectors and eigenvalues of $A^H A$. (Note that $AV = U\\Sigma$, so that once you have $V$ and $\\Sigma$ you can get $U$.)\n", 224 | "\n", 225 | "* In practice, computers use a different way to compute the SVD. (The most famous practical method is called \"Golub-Kahan bidiagonalization.\") In 18.06, we are content to let the `svd` function be a \"black box\", much like `eig`.\n", 226 | "\n", 227 | "* The fact that the singular values/vectors are related to eigenvalues of $A^H A$ and $A A^H$ has lots of important applications. Perhaps most famously, it means that the SVD diagonalizes the \"covariance matrix\" in statistics, which gives rise to the statistical method of [principal component analysis (PCA)](https://en.wikipedia.org/wiki/Principal_component_analysis)." 228 | ] 229 | } 230 | ], 231 | "metadata": { 232 | "kernelspec": { 233 | "display_name": "Julia 0.6.3", 234 | "language": "julia", 235 | "name": "julia-0.6" 236 | }, 237 | "language_info": { 238 | "file_extension": ".jl", 239 | "mimetype": "application/julia", 240 | "name": "julia", 241 | "version": "0.6.3" 242 | } 243 | }, 244 | "nbformat": 4, 245 | "nbformat_minor": 2 246 | } 247 | -------------------------------------------------------------------------------- /notes/Machine-Learning-with-Gaussian-elimination.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## A Machine Learning Example where we compare Gaussian Elimination with the commonly used method of today\n", 8 | "\n", 9 | "We show that a simple linear neuron can be \"learned\" with Gaussian elimination, and indeed is much\n", 10 | "faster and more accurate upon doing so. (Much of machine learning is non-linear.)\n", 11 | "\n", 12 | "Our model of the universe is that we have an unknow 3-vector\n", 13 | "\n", 14 | "$w = \\left[ \\begin{array}{c} w_1 \\\\ w_2 \\\\ w_3 \\end{array} \\right]$\n", 15 | "\n", 16 | "that we wish to learn. We have three 3-vectors $x_1,x_2,x_3$ and the corresponding scalar values\n", 17 | "$y_1 = w \\cdot x_1$, $\\ y_2 = w \\cdot x_2$, $\\ y_3 = w \\cdot x_3$. (Caution: The $x_i$ are 3-vectors,\n", 18 | "not components.) We will show that Gauassian elimination learns $w$ very quickly, while standard deep learning\n", 19 | "approaches (which use a version of gradient descent currently considered the best known as [ADAM](https://arxiv.org/abs/1412.6980) can require many steps, may be inaccurate, and inconsistent.\n", 20 | "\n", 21 | "One of the issues is how to organize the \"x\" data and the \"y\" data. The \"x\"s can be the columns or rows of a matrix, or can be a vector of vectors. Many applications prefer the matrix approach. The \"y\"s can be bundled into a vector similarly." 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 76, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "data": { 31 | "text/plain": [ 32 | "3-element Array{Float64,1}:\n", 33 | " 0.982331\n", 34 | " 0.1774 \n", 35 | " 0.212845" 36 | ] 37 | }, 38 | "execution_count": 76, 39 | "metadata": {}, 40 | "output_type": "execute_result" 41 | } 42 | ], 43 | "source": [ 44 | "w = rand(3) ## We are setting up a w. We will know it, but the learning algorithm will only have X and y data below." 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 84, 50 | "metadata": {}, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "3-element Array{Float64,1}:\n", 56 | " 0.881336\n", 57 | " 1.0557 \n", 58 | " 0.485883" 59 | ] 60 | }, 61 | "execution_count": 84, 62 | "metadata": {}, 63 | "output_type": "execute_result" 64 | } 65 | ], 66 | "source": [ 67 | "# Here is the data. Each \"x\" is a 3-vector. Each \"y\" is a number.\n", 68 | "n = 3\n", 69 | "x1 = rand(3); y1=w ⋅ x1 # We are using the dot product (type \\cdot+tab)\n", 70 | "x2 = rand(3); y2=w ⋅ x2\n", 71 | "x3 = rand(3); y3=w ⋅ x3\n", 72 | "# Gather the \"x\" data into the rows of a matrix and \"y\" into a vector\n", 73 | "X=[x1 x2 x3]'\n", 74 | "y=[y1; y2; y3]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 81, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "3-element Array{Float64,1}:\n", 86 | " 0.0\n", 87 | " 0.0\n", 88 | " 0.0" 89 | ] 90 | }, 91 | "execution_count": 81, 92 | "metadata": {}, 93 | "output_type": "execute_result" 94 | } 95 | ], 96 | "source": [ 97 | "# We check that the linear system for the \"unknown\" w is X*w = y\n", 98 | "X*w-y" 99 | ] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "execution_count": 83, 104 | "metadata": {}, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "3-element Array{Float64,1}:\n", 110 | " 0.982331\n", 111 | " 0.1774 \n", 112 | " 0.212845" 113 | ] 114 | }, 115 | "execution_count": 83, 116 | "metadata": {}, 117 | "output_type": "execute_result" 118 | } 119 | ], 120 | "source": [ 121 | "## Recover w with Gaussian Elimination\n", 122 | "X\\y" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 85, 128 | "metadata": {}, 129 | "outputs": [ 130 | { 131 | "data": { 132 | "text/plain": [ 133 | "3-element Array{Float64,1}:\n", 134 | " 0.982331\n", 135 | " 0.1774 \n", 136 | " 0.212845" 137 | ] 138 | }, 139 | "execution_count": 85, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "w" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 115, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "## Recover w with a machine learning package -- 18.06 students might just want to execute as a black box\n", 157 | "using Flux" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "metadata": { 163 | "collapsed": true 164 | }, 165 | "source": [ 166 | "We show how the same problem is commonly done with machine learning. Many learning cycles seem to be needed." 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 101, 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "[0.982331 0.1774 0.212845] : <== estimate after training\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "# t ... a model to be learned to fit the data\n", 184 | "t = Dense(3,1)\n", 185 | "loss(x,y) = Flux.mse(t(x),y)\n", 186 | "opt = ADAM(Flux.params(t)[1:1])\n", 187 | "Flux.train!(loss, Iterators.repeated( (X',y'), 20000), opt) # 20000 steps of training\n", 188 | "println((t.W).data, \" : <== estimate after training\")" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 102, 194 | "metadata": { 195 | "collapsed": true 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "## Adding more data does not help a whole lot" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 120, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "name": "stdout", 209 | "output_type": "stream", 210 | "text": [ 211 | "[0.948837 0.17883 0.218774] : <== estimate after training\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "n = 3000\n", 217 | "X = randn(n,3)\n", 218 | "y = X*w\n", 219 | "t = Dense(3,1)\n", 220 | "loss(x,y) = Flux.mse(t(x),y)\n", 221 | "opt = ADAM(Flux.params(t)[1:1])\n", 222 | "Flux.train!(loss, Iterators.repeated( (X',y'), 2000), opt) # 2000 steps of training\n", 223 | "println((t.W).data, \" : <== estimate after training\")" 224 | ] 225 | } 226 | ], 227 | "metadata": { 228 | "anaconda-cloud": {}, 229 | "kernelspec": { 230 | "display_name": "Julia 0.6.0", 231 | "language": "julia", 232 | "name": "julia-0.6" 233 | }, 234 | "language_info": { 235 | "file_extension": ".jl", 236 | "mimetype": "application/julia", 237 | "name": "julia", 238 | "version": "0.6.0" 239 | }, 240 | "widgets": { 241 | "state": { 242 | "294167a6-1234-43dc-aef6-951949f1fac6": { 243 | "views": [ 244 | { 245 | "cell_index": 26 246 | } 247 | ] 248 | }, 249 | "41f7367b-0ad3-43e3-bd43-c6e4a1618e8d": { 250 | "views": [ 251 | { 252 | "cell_index": 19 253 | } 254 | ] 255 | }, 256 | "6e3620ec-4915-4734-8d3a-3332fdc63970": { 257 | "views": [ 258 | { 259 | "cell_index": 16 260 | } 261 | ] 262 | }, 263 | "ce72699c-d8cc-4a03-902b-a490178223e5": { 264 | "views": [ 265 | { 266 | "cell_index": 17 267 | } 268 | ] 269 | }, 270 | "db2d9825-08d3-4028-8072-1e865d1a0c4f": { 271 | "views": [ 272 | { 273 | "cell_index": 23 274 | } 275 | ] 276 | } 277 | }, 278 | "version": "1.2.0" 279 | } 280 | }, 281 | "nbformat": 4, 282 | "nbformat_minor": 2 283 | } 284 | -------------------------------------------------------------------------------- /notes/QR Factorization Examples in Julia.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "using LinearAlgebra" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Square Case first" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 3, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "4×4 Array{Float64,2}:\n", 28 | " 0.330869 0.843845 0.0623888 0.398208\n", 29 | " 0.8661 0.204488 0.138221 0.218923\n", 30 | " 0.741009 0.775278 0.576722 0.9775 \n", 31 | " 0.87276 0.139498 0.072938 0.983904" 32 | ] 33 | }, 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "output_type": "execute_result" 37 | } 38 | ], 39 | "source": [ 40 | "A = rand(4,4)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 4, 46 | "metadata": {}, 47 | "outputs": [ 48 | { 49 | "data": { 50 | "text/plain": [ 51 | "LinearAlgebra.QRCompactWY{Float64,Array{Float64,2}}\n", 52 | "Q factor:\n", 53 | "4×4 LinearAlgebra.QRCompactWYQ{Float64,Array{Float64,2}}:\n", 54 | " -0.224588 0.76522 0.603058 -0.017861 \n", 55 | " -0.587892 -0.292547 0.130273 -0.742852 \n", 56 | " -0.502982 0.437253 -0.739298 0.0962124\n", 57 | " -0.592413 -0.371031 0.269792 0.662266 \n", 58 | "R factor:\n", 59 | "4×4 Array{Float64,2}:\n", 60 | " -1.47323 -0.782326 -0.428561 -1.29268 \n", 61 | " 0.0 0.873139 0.232416 0.303028\n", 62 | " 0.0 0.0 -0.351061 -0.188552\n", 63 | " 0.0 0.0 0.0 0.575913" 64 | ] 65 | }, 66 | "execution_count": 4, 67 | "metadata": {}, 68 | "output_type": "execute_result" 69 | } 70 | ], 71 | "source": [ 72 | "Q,R = qr(A)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/plain": [ 83 | "4×4 Array{Float64,2}:\n", 84 | " 1.0 0.0 0.0 -0.0\n", 85 | " 0.0 1.0 -0.0 -0.0\n", 86 | " 0.0 -0.0 1.0 0.0\n", 87 | " -0.0 -0.0 0.0 1.0" 88 | ] 89 | }, 90 | "execution_count": 6, 91 | "metadata": {}, 92 | "output_type": "execute_result" 93 | } 94 | ], 95 | "source": [ 96 | "round.(Q'Q, digits=0)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 8, 102 | "metadata": {}, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "4×4 Array{Float64,2}:\n", 108 | " 1.0 0.0 -0.0 -0.0\n", 109 | " 0.0 1.0 0.0 -0.0\n", 110 | " -0.0 0.0 1.0 0.0\n", 111 | " -0.0 -0.0 0.0 1.0" 112 | ] 113 | }, 114 | "execution_count": 8, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "round.(Q*Q', digits=0)" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## I'm happy, I see Q is orthogonal" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 9, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "true" 139 | ] 140 | }, 141 | "execution_count": 9, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "A ≈ Q * R" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 10, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/plain": [ 158 | "4×4 Array{Float64,2}:\n", 159 | " 0.330869 0.843845 0.0623888 0.398208\n", 160 | " 0.8661 0.204488 0.138221 0.218923\n", 161 | " 0.741009 0.775278 0.576722 0.9775 \n", 162 | " 0.87276 0.139498 0.072938 0.983904" 163 | ] 164 | }, 165 | "execution_count": 10, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "Q * R" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": 11, 177 | "metadata": {}, 178 | "outputs": [ 179 | { 180 | "data": { 181 | "text/plain": [ 182 | "4×4 Array{Float64,2}:\n", 183 | " 0.330869 0.843845 0.0623888 0.398208\n", 184 | " 0.8661 0.204488 0.138221 0.218923\n", 185 | " 0.741009 0.775278 0.576722 0.9775 \n", 186 | " 0.87276 0.139498 0.072938 0.983904" 187 | ] 188 | }, 189 | "execution_count": 11, 190 | "metadata": {}, 191 | "output_type": "execute_result" 192 | } 193 | ], 194 | "source": [ 195 | "A" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "## Now a tall skinny example" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 12, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "5×3 Array{Float64,2}:\n", 214 | " 0.657045 0.214426 0.0462177\n", 215 | " 0.268263 0.208357 0.269215 \n", 216 | " 0.410459 0.948475 0.756601 \n", 217 | " 0.391947 0.683485 0.28925 \n", 218 | " 0.949405 0.999374 0.570501 " 219 | ] 220 | }, 221 | "execution_count": 12, 222 | "metadata": {}, 223 | "output_type": "execute_result" 224 | } 225 | ], 226 | "source": [ 227 | "A = rand(5,3)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": 15, 233 | "metadata": {}, 234 | "outputs": [ 235 | { 236 | "data": { 237 | "text/plain": [ 238 | "5×3 Array{Float64,2}:\n", 239 | " -0.499955 -0.62267 0.00434913\n", 240 | " -0.204125 -0.0946497 -0.628421 \n", 241 | " -0.312324 0.686918 -0.424575 \n", 242 | " -0.298239 0.362414 0.645285 \n", 243 | " -0.722416 0.0110753 0.0917176 " 244 | ] 245 | }, 246 | "execution_count": 15, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "Q,R = qr(A)\n", 253 | "Q = Q[:,1:3] # make sure we have the first three columns" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": 16, 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "5×3 Array{Float64,2}:\n", 265 | " -0.499955 -0.62267 0.00434913\n", 266 | " -0.204125 -0.0946497 -0.628421 \n", 267 | " -0.312324 0.686918 -0.424575 \n", 268 | " -0.298239 0.362414 0.645285 \n", 269 | " -0.722416 0.0110753 0.0917176 " 270 | ] 271 | }, 272 | "execution_count": 16, 273 | "metadata": {}, 274 | "output_type": "execute_result" 275 | } 276 | ], 277 | "source": [ 278 | "Q" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 17, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/plain": [ 289 | "3×3 Array{Float64,2}:\n", 290 | " -1.31421 -1.37177 -0.81277 \n", 291 | " 0.0 0.75706 0.57661 \n", 292 | " 0.0 0.0 -0.251239" 293 | ] 294 | }, 295 | "execution_count": 17, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | } 299 | ], 300 | "source": [ 301 | "R" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 18, 307 | "metadata": {}, 308 | "outputs": [ 309 | { 310 | "data": { 311 | "text/plain": [ 312 | "5×3 Array{Float64,2}:\n", 313 | " 0.657045 0.214426 0.0462177\n", 314 | " 0.268263 0.208357 0.269215 \n", 315 | " 0.410459 0.948475 0.756601 \n", 316 | " 0.391947 0.683485 0.28925 \n", 317 | " 0.949405 0.999374 0.570501 " 318 | ] 319 | }, 320 | "execution_count": 18, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "Q * R" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 19, 332 | "metadata": {}, 333 | "outputs": [ 334 | { 335 | "data": { 336 | "text/plain": [ 337 | "5×3 Array{Float64,2}:\n", 338 | " 0.657045 0.214426 0.0462177\n", 339 | " 0.268263 0.208357 0.269215 \n", 340 | " 0.410459 0.948475 0.756601 \n", 341 | " 0.391947 0.683485 0.28925 \n", 342 | " 0.949405 0.999374 0.570501 " 343 | ] 344 | }, 345 | "execution_count": 19, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "A" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 20, 357 | "metadata": {}, 358 | "outputs": [ 359 | { 360 | "data": { 361 | "text/plain": [ 362 | "true" 363 | ] 364 | }, 365 | "execution_count": 20, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "A ≈ Q * R" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": 22, 377 | "metadata": {}, 378 | "outputs": [ 379 | { 380 | "data": { 381 | "text/plain": [ 382 | "3×3 Array{Float64,2}:\n", 383 | " 1.0 -0.0 -0.0\n", 384 | " -0.0 1.0 -0.0\n", 385 | " -0.0 -0.0 1.0" 386 | ] 387 | }, 388 | "execution_count": 22, 389 | "metadata": {}, 390 | "output_type": "execute_result" 391 | } 392 | ], 393 | "source": [ 394 | "round.(Q'Q, digits=0)" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "execution_count": 23, 400 | "metadata": {}, 401 | "outputs": [ 402 | { 403 | "data": { 404 | "text/plain": [ 405 | "5×5 Array{Float64,2}:\n", 406 | " 0.637693 0.158256 -0.273422 -0.0737523 0.354678 \n", 407 | " 0.158256 0.445538 0.265548 -0.378935 0.0887777\n", 408 | " -0.273422 0.265548 0.749666 0.0681243 0.194295 \n", 409 | " -0.0737523 -0.378935 0.0681243 0.636683 0.27865 \n", 410 | " 0.354678 0.0887777 0.194295 0.27865 0.53042 " 411 | ] 412 | }, 413 | "execution_count": 23, 414 | "metadata": {}, 415 | "output_type": "execute_result" 416 | } 417 | ], 418 | "source": [ 419 | "Q * Q'" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": {}, 426 | "outputs": [], 427 | "source": [] 428 | } 429 | ], 430 | "metadata": { 431 | "kernelspec": { 432 | "display_name": "Julia 1.3.1", 433 | "language": "julia", 434 | "name": "julia-1.3" 435 | }, 436 | "language_info": { 437 | "file_extension": ".jl", 438 | "mimetype": "application/julia", 439 | "name": "julia", 440 | "version": "1.3.1" 441 | } 442 | }, 443 | "nbformat": 4, 444 | "nbformat_minor": 2 445 | } 446 | -------------------------------------------------------------------------------- /notes/Perron-Frobenius.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "The proof of the [Perron-Frobenius](https://en.wikipedia.org/wiki/Perron%E2%80%93Frobenius_theorem) theorem can seem very abstract, but if you play with some examples it is easier to understand.\n", 8 | "This notebook presents the proof with computational examples.
\n", 9 | "\n", 10 | "Step 4 below uses JuMP to turn Perron-Frobenius into a computational algorithm.\n", 11 | "
\n", 12 | "\n", 13 | "There are a few variations on the theorem some with more and some with less information\n", 14 | "but the basic version says that if A has all positive entries, then the maximum\n", 15 | "absolute eigenvalue is real and positive and there is a corresponding real positive eigenvector." 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "Step #1. Assume all(x.>0) and all(y.>0) and define τ as the minimum of y./x" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 93, 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "τ (generic function with 1 method)" 34 | ] 35 | }, 36 | "execution_count": 93, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "# Define τ(y,x) on vectors\n", 43 | "\n", 44 | "τ(y::Vector, x::Vector) = minimum(y./x)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "Notice that for 0 ≤ t ≤ τ(y,x) we have all(y .≥ t*x)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 11, 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/plain": [ 62 | "2.0" 63 | ] 64 | }, 65 | "execution_count": 11, 66 | "metadata": {}, 67 | "output_type": "execute_result" 68 | } 69 | ], 70 | "source": [ 71 | "# Example\n", 72 | "y = [10,5,6,9]\n", 73 | "x = [1,2,3,4]\n", 74 | "τ(y,x)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 12, 80 | "metadata": {}, 81 | "outputs": [ 82 | { 83 | "data": { 84 | "text/plain": [ 85 | "(true, true, false)" 86 | ] 87 | }, 88 | "execution_count": 12, 89 | "metadata": {}, 90 | "output_type": "execute_result" 91 | } 92 | ], 93 | "source": [ 94 | "all(y.≥2x), all(y.≥1.99x),all(y.≥2.01x) # check these by hand" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Step #2. If all(A.>0) and all(y.≥0) and y is not the zero vector then all(A*y.>0) (strictly greater)" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": 16, 107 | "metadata": {}, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/plain": [ 112 | "3-element Array{Float64,1}:\n", 113 | " 0.2\n", 114 | " 0.5\n", 115 | " 0.8" 116 | ] 117 | }, 118 | "execution_count": 16, 119 | "metadata": {}, 120 | "output_type": "execute_result" 121 | } 122 | ], 123 | "source": [ 124 | "# Example\n", 125 | "A= [ 1 2 3;4 5 6; 7 8 9]\n", 126 | "y = [0, .1, .0]\n", 127 | "A * y # any one positive entry multiplies an entire positive column of A" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "Step #3:
τ(Ax,x)=τ(A²x,Ax) if x in an eigenvector with all(x.≥0).
\n", 135 | "τ(Ax,x) < τ(A²x,Ax) if x is not an eigenvector.\n", 136 | "\n", 137 | "
\n", 138 | "Proof: If x is an eigenvector, then τ(Ax,x)=τ(A²x,Ax)= the corresponding eigenvalue.
\n", 139 | "If x is not an eigenvector, then letting y\n", 140 | "= Ax - τ(Ax,x) *x, then all(y.≥0) and y is not the 0 vector.
\n", 141 | "From Step 2, all(A*y.>0) or equivalently all(A²x .> τ(Ax,x) *Ax) from which we see\n", 142 | "τ(A²x,Ax) > τ(Ax,x)." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 98, 148 | "metadata": {}, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "7-element Array{Float64,1}:\n", 154 | " 1.34884\n", 155 | " 2.40402\n", 156 | " 2.68214\n", 157 | " 2.75781\n", 158 | " 2.77293\n", 159 | " 2.77552\n", 160 | " 2.77666" 161 | ] 162 | }, 163 | "execution_count": 98, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "# An example\n", 170 | "n = 6\n", 171 | "A = rand(n,n)\n", 172 | "x = rand(n)\n", 173 | "[τ(A^k*x, A^(k-1)*x) for k=1:7] # This sequence will be increasing, but to an eig limit." 174 | ] 175 | }, 176 | { 177 | "cell_type": "markdown", 178 | "metadata": {}, 179 | "source": [ 180 | "Step #4. Let tmax be the maximum of τ(Ax,x) for all non-zero x. We will prove mathematically that x is a positive eigenvector and τ(Ax,x) is the eigenvalue. Before we do it mathematically, let's see it computationally:" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "One way to form this maximum problem is write this as a constrained optimization:\n", 188 | "\n", 189 | "$\\max t$ subject to

\n", 190 | "$x_i \\ge 0 $
\n", 191 | "$y=Ax$
\n", 192 | "$y[i]/x[i] \\ge t$
\n", 193 | "$sum(x)=1$\n", 194 | " " 195 | ] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "metadata": {}, 200 | "source": [ 201 | "We will use the highly popular Julia [Jump Package](https://github.com/JuliaOpt/JuMP.jl) created at MIT (though not in math!), and used widely for operations research and in business schools:" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": 4, 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "# Pkg.add(\"JuMP\")\n", 211 | "using JuMP\n", 212 | "# Pkg.add(\"Ipopt\") (On my mac, this worked with 0.6.2 but not 0.6.0)\n", 213 | "using Ipopt" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 74, 219 | "metadata": {}, 220 | "outputs": [ 221 | { 222 | "data": { 223 | "text/plain": [ 224 | "7×7 Array{Float64,2}:\n", 225 | " 0.971603 0.325743 0.863038 0.0234046 0.962918 0.496618 0.799348\n", 226 | " 0.62474 0.140906 0.448296 0.505187 0.0646877 0.149136 0.205624\n", 227 | " 0.448767 0.58146 0.47302 0.443701 0.303789 0.114217 0.892493\n", 228 | " 0.808785 0.588347 0.839119 0.883789 0.920193 0.515088 0.22442 \n", 229 | " 0.0089511 0.242133 0.783681 0.420531 0.965035 0.544011 0.334241\n", 230 | " 0.300799 0.990369 0.401669 0.427284 0.207415 0.309122 0.329326\n", 231 | " 0.0314547 0.723179 0.476076 0.445037 0.249261 0.404243 0.502455" 232 | ] 233 | }, 234 | "execution_count": 74, 235 | "metadata": {}, 236 | "output_type": "execute_result" 237 | } 238 | ], 239 | "source": [ 240 | "A = rand(7,7)" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 88, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "3.3686909584508244" 252 | ] 253 | }, 254 | "execution_count": 88, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "n=size(A,1)\n", 261 | "\n", 262 | "m = Model(solver=IpoptSolver(print_level=2))\n", 263 | "@variable(m, t); @objective(m, Max, t)\n", 264 | "\n", 265 | "@variable(m, x[1:n]>=0); @constraint(m, sum(x)==1)\n", 266 | "@variable(m, y[1:n]); @constraint(m, y .== A*x)\n", 267 | "\n", 268 | "@NLconstraint(m, [i=1:n], t <= y[i]/x[i]) # nonlinear constraint\n", 269 | "\n", 270 | "status = solve(m)\n", 271 | "x = getvalue.(x)\n", 272 | "t = getobjectivevalue(m)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": 89, 278 | "metadata": {}, 279 | "outputs": [ 280 | { 281 | "data": { 282 | "text/plain": [ 283 | "2.4817758919083086e-6" 284 | ] 285 | }, 286 | "execution_count": 89, 287 | "metadata": {}, 288 | "output_type": "execute_result" 289 | } 290 | ], 291 | "source": [ 292 | "norm(A*x-t*x) # demonstrate we have found an eigenpair through optimization" 293 | ] 294 | }, 295 | { 296 | "cell_type": "markdown", 297 | "metadata": {}, 298 | "source": [ 299 | "Step 5: Demonstrate that if x above were not an eigenvector, then the t could not have been the solution to the optimum problem." 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "As we saw in step 3, if x had not been an eigenvector, then τ(Ax,x) < τ(A²x,Ax), so τ(Ax,x) was not the maximum." 307 | ] 308 | }, 309 | { 310 | "cell_type": "markdown", 311 | "metadata": {}, 312 | "source": [ 313 | "Step 6: Any complex eigenvector, eigenvalue pair has absolute eigenvalue <= tmax:" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "If Ax = λx then all( A*abs.(x) .≥ abs(λ)*abs.(x)) by the triangle inequality. Thus abs(λ) <= tmax." 321 | ] 322 | }, 323 | { 324 | "cell_type": "markdown", 325 | "metadata": {}, 326 | "source": [ 327 | "For example:" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 101, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "5×5 Array{Float64,2}:\n", 339 | " 0.996711 0.656579 0.453247 0.61344 0.50697 \n", 340 | " 0.591166 0.616613 0.987583 0.246784 0.442663 \n", 341 | " 0.949881 0.454748 0.831274 0.708647 0.458239 \n", 342 | " 0.069995 0.108182 0.0296905 0.434673 0.0322304\n", 343 | " 0.105186 0.918176 0.831151 0.126704 0.0709903" 344 | ] 345 | }, 346 | "execution_count": 101, 347 | "metadata": {}, 348 | "output_type": "execute_result" 349 | } 350 | ], 351 | "source": [ 352 | "A = rand(5,5)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 102, 358 | "metadata": {}, 359 | "outputs": [ 360 | { 361 | "data": { 362 | "text/plain": [ 363 | "5-element Array{Complex{Float64},1}:\n", 364 | " 2.58617+0.0im \n", 365 | " 0.125586+0.34277im\n", 366 | " 0.125586-0.34277im\n", 367 | " 0.351225+0.0im \n", 368 | " -0.238306+0.0im " 369 | ] 370 | }, 371 | "execution_count": 102, 372 | "metadata": {}, 373 | "output_type": "execute_result" 374 | } 375 | ], 376 | "source": [ 377 | "eigvals(A)" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "execution_count": 108, 383 | "metadata": {}, 384 | "outputs": [ 385 | { 386 | "data": { 387 | "text/plain": [ 388 | "0.1255862966495158 + 0.3427698069874712im" 389 | ] 390 | }, 391 | "execution_count": 108, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "Λ,X=eig(A);x=X[:,2];λ=Λ[2]" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 109, 403 | "metadata": {}, 404 | "outputs": [ 405 | { 406 | "data": { 407 | "text/plain": [ 408 | "8.355107029738416e-16" 409 | ] 410 | }, 411 | "execution_count": 109, 412 | "metadata": {}, 413 | "output_type": "execute_result" 414 | } 415 | ], 416 | "source": [ 417 | "norm(A*x-λ*x)" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 112, 423 | "metadata": {}, 424 | "outputs": [ 425 | { 426 | "data": { 427 | "text/plain": [ 428 | "0.6784932085048402" 429 | ] 430 | }, 431 | "execution_count": 112, 432 | "metadata": {}, 433 | "output_type": "execute_result" 434 | } 435 | ], 436 | "source": [ 437 | "τ(A*abs.(x),abs.(x)) - abs(λ) # This is non-negative" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": null, 443 | "metadata": {}, 444 | "outputs": [], 445 | "source": [] 446 | } 447 | ], 448 | "metadata": { 449 | "kernelspec": { 450 | "display_name": "Julia 0.6.2", 451 | "language": "julia", 452 | "name": "julia-0.6" 453 | }, 454 | "language_info": { 455 | "file_extension": ".jl", 456 | "mimetype": "application/julia", 457 | "name": "julia", 458 | "version": "0.6.2" 459 | } 460 | }, 461 | "nbformat": 4, 462 | "nbformat_minor": 2 463 | } 464 | -------------------------------------------------------------------------------- /notes/Singular.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Singular matrices: A first look\n", 8 | "\n", 9 | "If we encounter a zero pivot (or even just a small pivot, on a computer) during Gaussian elimination, we normally swap rows to bring a nonzero pivot up from a subsequent row. However, what if there are *no* nonzero values below the pivot in that column? This is called a [singular matrix](https://en.wikipedia.org/wiki/Invertible_matrix): we can still proceed with Gaussian elimination, but **we can't get rid of the zero pivot**.\n", 10 | "\n", 11 | "If you have $Ax=b$ where $A$ is singular, then there will typically (for most right-hand sides $b$) be **no solutions**, but there will occasionally (for very special $b$) be **infinitely many solutions**. (For $2 \\times 2$ matrices, solving $Ax=b$ corresponds to finding the intersection of two lines, and a singular case corresponds to two parallel lines — either there are no intersections, or they intersect everywhere.)\n", 12 | "\n", 13 | "For example, consider the following $4 \\times 4$ matrix $A=LU$:\n", 14 | "\n", 15 | "$$\n", 16 | "\\underbrace{\\begin{pmatrix} \n", 17 | " 2 & -1 & 0 & 3 \\\\\n", 18 | " 4 & -1 & 1 & 8 \\\\\n", 19 | " 6 & 1 & 4 & 15 \\\\\n", 20 | " 2 & -1 & 0 & 0 \\\\\n", 21 | " \\end{pmatrix}}_A =\n", 22 | "\\underbrace{\\begin{pmatrix} \n", 23 | " 1 & 0 & 0 & 0 \\\\\n", 24 | " 2 & 1 & 0 & 0 \\\\\n", 25 | " 3 & 4 & 1 & 0 \\\\\n", 26 | " 1 & 0 & 2 & 1 \\\\\n", 27 | " \\end{pmatrix}}_L\n", 28 | "\\underbrace{\\begin{pmatrix} \n", 29 | " \\color{blue}{2} & -1 & 0 & 3 \\\\\n", 30 | " 0 & \\color{blue}{1} & 1 & 2 \\\\\n", 31 | " 0 & 0 & \\color{red}{0} & \\color{blue}{-2} \\\\\n", 32 | " 0 & 0 & 0 & 1 \\\\\n", 33 | " \\end{pmatrix}}_U\n", 34 | "$$\n", 35 | "\n", 36 | "In the **third column, we got zeros** where we were hoping for a pivot. So, we **only have three pivots (blue)** in this case. Now, suppose we want to solve $Ax=b$. We first solve $Lc=b$ to apply the elimination steps to $b$. This is no problem since $L$ has 1's along the diagonal. Suppose we get $c = (c_1, c_2, c_3, c_4)$. Then we proceed by backsubstitution to solve $Ux = c$, starting with the last row of $U$:\n", 37 | "\n", 38 | "$$\n", 39 | "1 \\times x_4 = c_4 \\implies x_4 = c_4 \\\\\n", 40 | "\\color{red}{0 \\times x_3} - 2 \\times x_4 = c_3 \\implies \\mbox{no solution unless } -2 x_4 = -2 c_4 = c_3\n", 41 | "$$\n", 42 | "For very special right-hand sides, where $c_3 = 2c_4$, we can plug in *any* $x_3$ and get a solution (infinitely many solutions). Otherwise, we get *no* solutions." 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 1, 48 | "metadata": {}, 49 | "outputs": [ 50 | { 51 | "data": { 52 | "text/plain": [ 53 | "4×4 Matrix{Int64}:\n", 54 | " 2 -1 0 3\n", 55 | " 4 -1 1 8\n", 56 | " 6 1 4 15\n", 57 | " 2 -1 0 0" 58 | ] 59 | }, 60 | "execution_count": 1, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "[1 0 0 0\n", 67 | " 2 1 0 0\n", 68 | " 3 4 1 0\n", 69 | " 1 0 2 1 ] *\n", 70 | "[2 -1 0 3\n", 71 | " 0 1 1 2\n", 72 | " 0 0 0 -2\n", 73 | " 0 0 0 1 ]" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "You may think that singular cases are not very interesting. In reality, **exactly singular square matrices never occur by accident**. There is always some *deep structure of the underlying problem* that causes the singularity, and understanding this structure is *always* interesting.\n", 81 | "\n", 82 | "On the other hand, **nearly singular** matrices (where the pivots are nonzero but very small) *can* occur by accident, and dealing with them is often a delicate problem because they are very sensitive to roundoff errors. (We call these matrices [ill-conditioned](https://en.wikipedia.org/wiki/Condition_number).) But that's mostly not a topic for 18.06.\n", 83 | "\n", 84 | "Singular **non-square** systems, where you have **more equations than unknowns** are *very* common and important, and lead to *fitting* problems where one *minimizes the error* in the solution. We will talk more about this soon in 18.06." 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "metadata": {}, 90 | "source": [ 91 | "Some matrices are **more singular than others**. For example, they can have **two pivots**:\n", 92 | "\n", 93 | "$$\n", 94 | "\\underbrace{\\begin{pmatrix} \n", 95 | " 2 & -1 & 0 & 3 \\\\\n", 96 | " 4 & -2 & 1 & 8 \\\\\n", 97 | " 6 & 3 & 4 & 17 \\\\\n", 98 | " 2 & -1 & 0 & 3 \\\\\n", 99 | " \\end{pmatrix}}_A =\n", 100 | "\\underbrace{\\begin{pmatrix} \n", 101 | " 1 & 0 & 0 & 0 \\\\\n", 102 | " 2 & 1 & 0 & 0 \\\\\n", 103 | " 3 & 4 & 1 & 0 \\\\\n", 104 | " 1 & 0 & 2 & 1 \\\\\n", 105 | " \\end{pmatrix}}_L\n", 106 | "\\underbrace{\\begin{pmatrix} \n", 107 | " \\color{blue}{2} & -1 & 0 & 3 \\\\\n", 108 | " 0 & 0 & \\color{blue}{1} & 2 \\\\\n", 109 | " 0 & 0 & \\color{red}{0} & \\color{red}{0} \\\\\n", 110 | " 0 & 0 & 0 & \\color{red}{0} \\\\\n", 111 | " \\end{pmatrix}}_U\n", 112 | "$$\n", 113 | "\n", 114 | "or **one pivot**:\n", 115 | "\n", 116 | "$$\n", 117 | "\\underbrace{\\begin{pmatrix} \n", 118 | " 2 & -1 & 0 & 3 \\\\\n", 119 | " 4 & -2 & 0 & 6 \\\\\n", 120 | " 6 & 3 & 0 & 9 \\\\\n", 121 | " 2 & -1 & 0 & 3 \\\\\n", 122 | " \\end{pmatrix}}_A =\n", 123 | "\\underbrace{\\begin{pmatrix} \n", 124 | " 1 & 0 & 0 & 0 \\\\\n", 125 | " 2 & 1 & 0 & 0 \\\\\n", 126 | " 3 & 4 & 1 & 0 \\\\\n", 127 | " 1 & 0 & 2 & 1 \\\\\n", 128 | " \\end{pmatrix}}_L\n", 129 | "\\underbrace{\\begin{pmatrix} \n", 130 | " \\color{blue}{2} & -1 & 0 & 3 \\\\\n", 131 | " 0 & 0 & 0 & 0 \\\\\n", 132 | " 0 & 0 & 0 & 0 \\\\\n", 133 | " 0 & 0 & 0 & 0 \\\\\n", 134 | " \\end{pmatrix}}_U\n", 135 | "$$\n", 136 | "\n", 137 | "or **zero pivots**:\n", 138 | "\n", 139 | "$$\n", 140 | "\\underbrace{\\begin{pmatrix} \n", 141 | " 0 & 0 & 0 & 0 \\\\\n", 142 | " 0 & 0 & 0 & 0 \\\\\n", 143 | " 0 & 0 & 0 & 0 \\\\\n", 144 | " 0 & 0 & 0 & 0 \\\\\n", 145 | " \\end{pmatrix}}_A =\n", 146 | "\\underbrace{\\begin{pmatrix} \n", 147 | " 1 & 0 & 0 & 0 \\\\\n", 148 | " 2 & 1 & 0 & 0 \\\\\n", 149 | " 3 & 4 & 1 & 0 \\\\\n", 150 | " 1 & 0 & 2 & 1 \\\\\n", 151 | " \\end{pmatrix}}_L\n", 152 | "\\underbrace{\\begin{pmatrix} \n", 153 | " 0 & 0 & 0 & 0 \\\\\n", 154 | " 0 & 0 & 0 & 0 \\\\\n", 155 | " 0 & 0 & 0 & 0 \\\\\n", 156 | " 0 & 0 & 0 & 0 \\\\\n", 157 | " \\end{pmatrix}}_U\n", 158 | "$$\n", 159 | "\n", 160 | "If $A$ is the zero matrix, then $Ax=b$ only has solutions when $b=0$, and then *any* $x$ is a solution!\n", 161 | "\n", 162 | "Intuitively, having fewer pivots seems \"more singular\", and requires \"more coincidences\" in the right-hand side to have a solution, and has a \"bigger infinity\" of solutions when there *is* a solution. We will quantify intuitions in 18.06, starting with the notion of the [rank](https://en.wikipedia.org/wiki/Rank_(linear_algebra) of a matrix.\n", 163 | "\n", 164 | "* The **rank = r** of the matrix is the **number of (nonzero) pivots** obtained by elimination (with row swaps if needed) for an $m \\times n$ matrix $A$. \n", 165 | "\n", 166 | "* $r \\le m$ and $r \\le n$ because you can't have more pivots than you have rows or columns.\n", 167 | "\n", 168 | "The smaller the rank is compared to the size of the matrix, the \"more singular\" it is. Pretty soon we will understand this better." 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 2, 174 | "metadata": {}, 175 | "outputs": [ 176 | { 177 | "data": { 178 | "text/plain": [ 179 | "4×4 Matrix{Int64}:\n", 180 | " 2 -1 0 3\n", 181 | " 4 -2 1 8\n", 182 | " 6 -3 4 17\n", 183 | " 2 -1 0 3" 184 | ] 185 | }, 186 | "execution_count": 2, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "[1 0 0 0\n", 193 | " 2 1 0 0\n", 194 | " 3 4 1 0\n", 195 | " 1 0 2 1 ] *\n", 196 | "[2 -1 0 3\n", 197 | " 0 0 1 2\n", 198 | " 0 0 0 0\n", 199 | " 0 0 0 0 ]" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 3, 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/plain": [ 210 | "4×4 Matrix{Int64}:\n", 211 | " 2 -1 0 3\n", 212 | " 4 -2 0 6\n", 213 | " 6 -3 0 9\n", 214 | " 2 -1 0 3" 215 | ] 216 | }, 217 | "execution_count": 3, 218 | "metadata": {}, 219 | "output_type": "execute_result" 220 | } 221 | ], 222 | "source": [ 223 | "[1 0 0 0\n", 224 | " 2 1 0 0\n", 225 | " 3 4 1 0\n", 226 | " 1 0 2 1 ] *\n", 227 | "[2 -1 0 3\n", 228 | " 0 0 0 0\n", 229 | " 0 0 0 0\n", 230 | " 0 0 0 0 ]" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "Note that if we encounter zeros in a column where we were hoping for a pivot, and we can't get a nonzero element by swapping rows, we skip to the next column. The following example is **rank 2**, not rank 0:\n", 238 | "\n", 239 | "$$\n", 240 | "\\underbrace{\\begin{pmatrix} \n", 241 | " 0 & -1 & 0 & 3 \\\\\n", 242 | " 0 & -2 & 0 & 8 \\\\\n", 243 | " 0 & 3 & 0 & 17 \\\\\n", 244 | " 0 & -1 & 0 & 3 \\\\\n", 245 | " \\end{pmatrix}}_A =\n", 246 | "\\underbrace{\\begin{pmatrix} \n", 247 | " 1 & 0 & 0 & 0 \\\\\n", 248 | " 2 & 1 & 0 & 0 \\\\\n", 249 | " 3 & 4 & 1 & 0 \\\\\n", 250 | " 1 & 0 & 2 & 1 \\\\\n", 251 | " \\end{pmatrix}}_L\n", 252 | "\\underbrace{\\begin{pmatrix} \n", 253 | " 0 & \\color{blue}{-1} & 0 & 3 \\\\\n", 254 | " 0 & 0 & 0 & \\color{blue}{2} \\\\\n", 255 | " 0 & 0 & 0 & 0 \\\\\n", 256 | " 0 & 0 & 0 & 0 \\\\\n", 257 | " \\end{pmatrix}}_U\n", 258 | "$$\n", 259 | "\n", 260 | "That is, if we encounter *all zeros* in a column where we were hoping for a pivot, we skip to the next column for our pivot and continue eliminating below the pivots." 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 4, 266 | "metadata": {}, 267 | "outputs": [ 268 | { 269 | "data": { 270 | "text/plain": [ 271 | "4×4 Matrix{Int64}:\n", 272 | " 0 -1 0 3\n", 273 | " 0 -2 0 8\n", 274 | " 0 -3 0 17\n", 275 | " 0 -1 0 3" 276 | ] 277 | }, 278 | "execution_count": 4, 279 | "metadata": {}, 280 | "output_type": "execute_result" 281 | } 282 | ], 283 | "source": [ 284 | "[1 0 0 0\n", 285 | " 2 1 0 0\n", 286 | " 3 4 1 0\n", 287 | " 1 0 2 1 ] *\n", 288 | "[0 -1 0 3\n", 289 | " 0 0 0 2\n", 290 | " 0 0 0 0\n", 291 | " 0 0 0 0 ]" 292 | ] 293 | }, 294 | { 295 | "cell_type": "markdown", 296 | "metadata": {}, 297 | "source": [ 298 | "# More to come\n", 299 | "\n", 300 | "Much of the material in the second part of 18.06 (somewhat in exam 1, but especially in exam 2) will be focused on how we understand **singular and non-square** systems of equations.\n", 301 | "\n", 302 | "It turns out that there are lots of interesting things to say and do about systems of equations that may not have solutions. We don't just give up!" 303 | ] 304 | } 305 | ], 306 | "metadata": { 307 | "@webio": { 308 | "lastCommId": null, 309 | "lastKernelId": null 310 | }, 311 | "kernelspec": { 312 | "display_name": "Julia 1.7.1", 313 | "language": "julia", 314 | "name": "julia-1.7" 315 | }, 316 | "language_info": { 317 | "file_extension": ".jl", 318 | "mimetype": "application/julia", 319 | "name": "julia", 320 | "version": "1.7.1" 321 | } 322 | }, 323 | "nbformat": 4, 324 | "nbformat_minor": 2 325 | } 326 | -------------------------------------------------------------------------------- /notes/Gram-Schmidt.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Gram–Schmidt orthogonalization\n", 8 | "\n", 9 | "Chapter 4.4 illustrates a hand technique for computing orthonormal vectors q₁,q₂,… from arbitrary vectors a,b,… with the property that the first k vectors in the original set span the same subspace as the orthonormal set, and this is true for k=1,2,3,...\n", 10 | "\n", 11 | "We will move this hand technique to the computer in this notebook. Some of you will notice that on the computer one can combine operations in a simpler block fashion. " 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "using LinearAlgebra" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 2, 26 | "metadata": {}, 27 | "outputs": [ 28 | { 29 | "data": { 30 | "text/plain": [ 31 | "6×4 Matrix{Int64}:\n", 32 | " 5 10 6 8\n", 33 | " 10 6 5 1\n", 34 | " 2 1 7 3\n", 35 | " 7 5 4 8\n", 36 | " 1 3 8 3\n", 37 | " 1 8 9 4" 38 | ] 39 | }, 40 | "execution_count": 2, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "# start with four arbitrary independent vectors in ℝᵐ\n", 47 | "# with random entries from 1 to 10.\n", 48 | "m = 6\n", 49 | "a₁ = rand(1:10,m)\n", 50 | "a₂ = rand(1:10,m)\n", 51 | "a₃ = rand(1:10,m)\n", 52 | "a₄ = rand(1:10,m)\n", 53 | "A = [a₁ a₂ a₃ a₄] # show them as the columns of a 6×4 matrix A" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "execution_count": 3, 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/plain": [ 64 | "6×4 Matrix{Float64}:\n", 65 | " 5.0 5.61111 -3.16215 1.37162\n", 66 | " 10.0 -2.77778 -0.0979465 -3.70534\n", 67 | " 2.0 -0.755556 6.16936 1.22464\n", 68 | " 7.0 -1.14444 -0.324354 4.20193\n", 69 | " 1.0 2.12222 5.22283 0.0756904\n", 70 | " 1.0 7.12222 1.49913 -1.74319" 71 | ] 72 | }, 73 | "execution_count": 3, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "# The vₖ are vectors, but they are all orthogonal and\n", 80 | "#span([v₁]) = span([a₁])\n", 81 | "#span([v₁ v₂]) = span([a₁ a₂])\n", 82 | "#span([v₁ v₂ v₃]) = span([a₁ a₂ a₃] )\n", 83 | "#span([v₁ v₂ v₃ v₄]) = span([a₁ a₂ a₃ a₄])\n", 84 | "v₁ = a₁\n", 85 | "v₂ = a₂ - v₁*(v₁'a₂)/(v₁'v₁)\n", 86 | "v₃ = a₃ - v₁*(v₁'a₃)/(v₁'v₁) - v₂*(v₂'a₃)/(v₂'v₂)\n", 87 | "v₄ = a₄ - v₁*(v₁'a₄)/(v₁'v₁) - v₂*(v₂'a₄)/(v₂'v₂) - v₃*(v₃'a₄)/(v₃'v₃)\n", 88 | "\n", 89 | "# gather into a matrix V with orthogonal but *not* orthonormal columns\n", 90 | "V = [v₁ v₂ v₃ v₄]" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 4, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/plain": [ 101 | "6×4 Matrix{Float64}:\n", 102 | " 0.372678 0.571756 -0.358733 0.223061\n", 103 | " 0.745356 -0.283047 -0.0111116 -0.602583\n", 104 | " 0.149071 -0.0769889 0.699888 0.199157\n", 105 | " 0.521749 -0.116616 -0.0367966 0.683342\n", 106 | " 0.0745356 0.216248 0.592508 0.0123092\n", 107 | " 0.0745356 0.725734 0.170071 -0.283488" 108 | ] 109 | }, 110 | "execution_count": 4, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "# now we normalize\n", 117 | "q₁ = normalize(v₁)\n", 118 | "q₂ = normalize(v₂)\n", 119 | "q₃ = normalize(v₃)\n", 120 | "q₄ = normalize(v₄);\n", 121 | "\n", 122 | "# Gather into a matrix Q with orthonormal columns\n", 123 | "Q = [q₁ q₂ q₃ q₄]" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 5, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "4×4 Matrix{Float64}:\n", 135 | " 1.0 -9.21828e-17 8.99478e-17 1.35877e-16\n", 136 | " -9.21828e-17 1.0 1.0142e-17 1.42552e-16\n", 137 | " 8.99478e-17 1.0142e-17 1.0 -2.09229e-16\n", 138 | " 1.35877e-16 1.42552e-16 -2.09229e-16 1.0" 139 | ] 140 | }, 141 | "execution_count": 5, 142 | "metadata": {}, 143 | "output_type": "execute_result" 144 | } 145 | ], 146 | "source": [ 147 | "#check that Q has orthonormal columns\n", 148 | "Q'Q" 149 | ] 150 | }, 151 | { 152 | "cell_type": "code", 153 | "execution_count": 6, 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "4×4 Matrix{Float64}:\n", 160 | " 0.0 -9.21828e-17 8.99478e-17 1.35877e-16\n", 161 | " -9.21828e-17 0.0 1.0142e-17 1.42552e-16\n", 162 | " 8.99478e-17 1.0142e-17 0.0 -2.09229e-16\n", 163 | " 1.35877e-16 1.42552e-16 -2.09229e-16 2.22045e-16" 164 | ] 165 | }, 166 | "execution_count": 6, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "Q'Q - I" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 7, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "true" 184 | ] 185 | }, 186 | "execution_count": 7, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "Q'Q ≈ I" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 8, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/plain": [ 203 | "4×4 Matrix{Float64}:\n", 204 | " 180.0 -1.42109e-14 1.33227e-14 1.28786e-14\n", 205 | " -1.42109e-14 96.3111 9.32625e-15 6.94211e-15\n", 206 | " 1.33227e-14 9.32625e-15 77.7003 -1.16249e-14\n", 207 | " 1.28786e-14 6.94211e-15 -1.16249e-14 37.8113" 208 | ] 209 | }, 210 | "execution_count": 8, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "# compare to what happens if we didn't normalize:\n", 217 | "V'V # = diagonal matrix (orthogonal columns, but not orthonormal)" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 9, 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "4×4 Matrix{Float64}:\n", 229 | " 13.4164 11.7766 10.3605 8.86974\n", 230 | " -0.0 9.81382 9.2715 6.67879\n", 231 | " 0.0 0.0 8.81478 1.38213\n", 232 | " 0.0 0.0 0.0 6.14909" 233 | ] 234 | }, 235 | "execution_count": 9, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "# What does this triangular structure say?\n", 242 | "round.(Q'A, digits=5)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "markdown", 247 | "metadata": {}, 248 | "source": [ 249 | "## QR factorization\n", 250 | "\n", 251 | "How do we do all this at once on a computer? We ask the computer to factor the matrix as $QR$ (orthonormal columns times upper triangular)." 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 10, 257 | "metadata": { 258 | "scrolled": true 259 | }, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/plain": [ 264 | "LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}}\n", 265 | "Q factor:\n", 266 | "6×6 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}}:\n", 267 | " -0.372678 0.571756 0.358733 -0.223061 -0.0840586 -0.590504\n", 268 | " -0.745356 -0.283047 0.0111116 0.602583 0.0189054 -0.0272178\n", 269 | " -0.149071 -0.0769889 -0.699888 -0.199157 -0.619405 -0.242243\n", 270 | " -0.521749 -0.116616 0.0367966 -0.683342 0.131189 0.478182\n", 271 | " -0.0745356 0.216248 -0.592508 -0.0123092 0.74464 -0.204877\n", 272 | " -0.0745356 0.725734 -0.170071 0.283488 -0.192913 0.566789\n", 273 | "R factor:\n", 274 | "4×4 Matrix{Float64}:\n", 275 | " -13.4164 -11.7766 -10.3604 -8.86974\n", 276 | " 0.0 9.81382 9.2715 6.67879\n", 277 | " 0.0 0.0 -8.81478 -1.38213\n", 278 | " 0.0 0.0 0.0 -6.14909" 279 | ] 280 | }, 281 | "execution_count": 10, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "F = qr(A) # returns a \"factorization object\" that stores both Q (implicitly) and R" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 11, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "4×4 Matrix{Float64}:\n", 299 | " -13.4164 -11.7766 -10.3604 -8.86974\n", 300 | " 0.0 9.81382 9.2715 6.67879\n", 301 | " 0.0 0.0 -8.81478 -1.38213\n", 302 | " 0.0 0.0 0.0 -6.14909" 303 | ] 304 | }, 305 | "execution_count": 11, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "R = F.R" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 12, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "6×4 Matrix{Float64}:\n", 323 | " -0.372678 0.571756 0.358733 -0.223061\n", 324 | " -0.745356 -0.283047 0.0111116 0.602583\n", 325 | " -0.149071 -0.0769889 -0.699888 -0.199157\n", 326 | " -0.521749 -0.116616 0.0367966 -0.683342\n", 327 | " -0.0745356 0.216248 -0.592508 -0.0123092\n", 328 | " -0.0745356 0.725734 -0.170071 0.283488" 329 | ] 330 | }, 331 | "execution_count": 12, 332 | "metadata": {}, 333 | "output_type": "execute_result" 334 | } 335 | ], 336 | "source": [ 337 | "Q2 = Matrix(F.Q) # extract the \"thin\" QR factor you would get from Gram–Schmidt" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": 13, 343 | "metadata": {}, 344 | "outputs": [ 345 | { 346 | "data": { 347 | "text/plain": [ 348 | "4×4 Matrix{Float64}:\n", 349 | " -1.0 -0.0 -0.0 0.0\n", 350 | " 0.0 1.0 0.0 -0.0\n", 351 | " -0.0 0.0 -1.0 0.0\n", 352 | " -0.0 -0.0 -0.0 -1.0" 353 | ] 354 | }, 355 | "execution_count": 13, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "round.(Q'Q2, digits=5) # almost I, up to signs" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 14, 367 | "metadata": {}, 368 | "outputs": [ 369 | { 370 | "data": { 371 | "text/plain": [ 372 | "4×4 Matrix{Float64}:\n", 373 | " -13.4164 -11.7766 -10.3604 -8.86974\n", 374 | " 0.0 9.81382 9.2715 6.67879\n", 375 | " 0.0 0.0 -8.81478 -1.38213\n", 376 | " 0.0 0.0 0.0 -6.14909" 377 | ] 378 | }, 379 | "execution_count": 14, 380 | "metadata": {}, 381 | "output_type": "execute_result" 382 | } 383 | ], 384 | "source": [ 385 | "R # Recognize this matrix?" 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": 15, 391 | "metadata": {}, 392 | "outputs": [ 393 | { 394 | "data": { 395 | "text/plain": [ 396 | "true" 397 | ] 398 | }, 399 | "execution_count": 15, 400 | "metadata": {}, 401 | "output_type": "execute_result" 402 | } 403 | ], 404 | "source": [ 405 | "Q2*R ≈ A" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 16, 411 | "metadata": {}, 412 | "outputs": [ 413 | { 414 | "data": { 415 | "text/plain": [ 416 | "6-element Vector{Float64}:\n", 417 | " 0.5450097629781777\n", 418 | " 0.8599801580391012\n", 419 | " 0.7036387925825908\n", 420 | " 0.7553540639899048\n", 421 | " 0.7234080262946185\n", 422 | " 0.14725528162868073" 423 | ] 424 | }, 425 | "execution_count": 16, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "b = rand(6)" 432 | ] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "execution_count": 17, 437 | "metadata": {}, 438 | "outputs": [ 439 | { 440 | "data": { 441 | "text/plain": [ 442 | "4-element Vector{Float64}:\n", 443 | " 0.08995466028300682\n", 444 | " -0.0768023071730735\n", 445 | " 0.07513430689992019\n", 446 | " 0.03688677949256471" 447 | ] 448 | }, 449 | "execution_count": 17, 450 | "metadata": {}, 451 | "output_type": "execute_result" 452 | } 453 | ], 454 | "source": [ 455 | "A \\ b" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 18, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "data": { 465 | "text/plain": [ 466 | "4-element Vector{Float64}:\n", 467 | " 0.08995466028300675\n", 468 | " -0.07680230717307351\n", 469 | " 0.07513430689992023\n", 470 | " 0.03688677949256475" 471 | ] 472 | }, 473 | "execution_count": 18, 474 | "metadata": {}, 475 | "output_type": "execute_result" 476 | } 477 | ], 478 | "source": [ 479 | "inv(A'A) * A'b" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 19, 485 | "metadata": {}, 486 | "outputs": [ 487 | { 488 | "data": { 489 | "text/plain": [ 490 | "4-element Vector{Float64}:\n", 491 | " 0.0899546602830068\n", 492 | " -0.07680230717307351\n", 493 | " 0.07513430689992022\n", 494 | " 0.03688677949256465" 495 | ] 496 | }, 497 | "execution_count": 19, 498 | "metadata": {}, 499 | "output_type": "execute_result" 500 | } 501 | ], 502 | "source": [ 503 | "R \\ (Q2'b)[1:4]" 504 | ] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "execution_count": 20, 509 | "metadata": {}, 510 | "outputs": [ 511 | { 512 | "data": { 513 | "text/plain": [ 514 | "4-element Vector{Float64}:\n", 515 | " 0.08995466028300678\n", 516 | " -0.07680230717307349\n", 517 | " 0.07513430689992023\n", 518 | " 0.03688677949256459" 519 | ] 520 | }, 521 | "execution_count": 20, 522 | "metadata": {}, 523 | "output_type": "execute_result" 524 | } 525 | ], 526 | "source": [ 527 | "F \\ b # the factorization object F can be used directly for a least-square solve" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": null, 533 | "metadata": {}, 534 | "outputs": [], 535 | "source": [] 536 | } 537 | ], 538 | "metadata": { 539 | "@webio": { 540 | "lastCommId": null, 541 | "lastKernelId": null 542 | }, 543 | "kernelspec": { 544 | "display_name": "Julia 1.7.1", 545 | "language": "julia", 546 | "name": "julia-1.7" 547 | }, 548 | "language_info": { 549 | "file_extension": ".jl", 550 | "mimetype": "application/julia", 551 | "name": "julia", 552 | "version": "1.7.1" 553 | } 554 | }, 555 | "nbformat": 4, 556 | "nbformat_minor": 2 557 | } 558 | -------------------------------------------------------------------------------- /notes/rank-r and full svds.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 15, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "using LinearAlgebra" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 16, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "data": { 19 | "text/plain": [ 20 | "rankrsvd (generic function with 1 method)" 21 | ] 22 | }, 23 | "execution_count": 16, 24 | "metadata": {}, 25 | "output_type": "execute_result" 26 | } 27 | ], 28 | "source": [ 29 | "function fullsvd(A) \n", 30 | " U,s,V = svd(A, full = true) # compute svd\n", 31 | " Σ = zeros(size(A)) # container for Σ \n", 32 | " for i=1:length(s)\n", 33 | " Σ[i,i] = s[i] # place singular values in Σ\n", 34 | " end # a practical svd would never store all these zeros\n", 35 | " display(U);display(Σ);display(V) # display the answer\n", 36 | " return(U,Σ,V) # return the answer\n", 37 | "end\n", 38 | "\n", 39 | "\n", 40 | "function rankrsvd(A) \n", 41 | " U,s,V = svd(A, full = true) # compute svd\n", 42 | " r = sum(s.>1e-8) # rank = how many positive?\n", 43 | " U₁ = U[:,1:r]\n", 44 | " Σᵣ = Diagonal(s[1:r]) # Diagonal matrix of singular values\n", 45 | " V₁ = V[:,1:r]\n", 46 | " display(U₁);display(Σᵣ);display(V₁) # display the answer\n", 47 | " return(U₁,Σᵣ,V₁) # return the answer\n", 48 | "end" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "## 1. random 2x2 matrix" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 17, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "2×2 Array{Float64,2}:\n", 67 | " 0.259439 0.075927\n", 68 | " 0.898109 0.918728" 69 | ] 70 | }, 71 | "execution_count": 17, 72 | "metadata": {}, 73 | "output_type": "execute_result" 74 | } 75 | ], 76 | "source": [ 77 | "A = rand(2,2)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 18, 83 | "metadata": {}, 84 | "outputs": [ 85 | { 86 | "data": { 87 | "text/plain": [ 88 | "2×2 Array{Float64,2}:\n", 89 | " -0.18222 -0.983258\n", 90 | " -0.983258 0.18222 " 91 | ] 92 | }, 93 | "metadata": {}, 94 | "output_type": "display_data" 95 | }, 96 | { 97 | "data": { 98 | "text/plain": [ 99 | "2×2 Array{Float64,2}:\n", 100 | " 1.30643 0.0 \n", 101 | " 0.0 0.13025" 102 | ] 103 | }, 104 | "metadata": {}, 105 | "output_type": "display_data" 106 | }, 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "2×2 Adjoint{Float64,Array{Float64,2}}:\n", 111 | " -0.712128 -0.70205 \n", 112 | " -0.70205 0.712128" 113 | ] 114 | }, 115 | "metadata": {}, 116 | "output_type": "display_data" 117 | } 118 | ], 119 | "source": [ 120 | "fullsvd(A);" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 19, 126 | "metadata": {}, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "2×2 Array{Float64,2}:\n", 132 | " -0.18222 -0.983258\n", 133 | " -0.983258 0.18222 " 134 | ] 135 | }, 136 | "metadata": {}, 137 | "output_type": "display_data" 138 | }, 139 | { 140 | "data": { 141 | "text/plain": [ 142 | "2×2 Diagonal{Float64,Array{Float64,1}}:\n", 143 | " 1.30643 ⋅ \n", 144 | " ⋅ 0.13025" 145 | ] 146 | }, 147 | "metadata": {}, 148 | "output_type": "display_data" 149 | }, 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "2×2 Array{Float64,2}:\n", 154 | " -0.712128 -0.70205 \n", 155 | " -0.70205 0.712128" 156 | ] 157 | }, 158 | "metadata": {}, 159 | "output_type": "display_data" 160 | } 161 | ], 162 | "source": [ 163 | "rankrsvd(A);" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "## 2. random 3x2 matrix" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 20, 176 | "metadata": {}, 177 | "outputs": [ 178 | { 179 | "data": { 180 | "text/plain": [ 181 | "3×2 Array{Float64,2}:\n", 182 | " 0.464581 0.051883\n", 183 | " 0.9702 0.533329\n", 184 | " 0.601868 0.413574" 185 | ] 186 | }, 187 | "execution_count": 20, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "A = rand(3,2)" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 21, 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "3×3 Array{Float64,2}:\n", 205 | " -0.311792 0.90137 0.300529\n", 206 | " -0.794641 -0.0739732 -0.602555\n", 207 | " -0.520894 -0.426685 0.73933 " 208 | ] 209 | }, 210 | "metadata": {}, 211 | "output_type": "display_data" 212 | }, 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "3×2 Array{Float64,2}:\n", 217 | " 1.39313 0.0 \n", 218 | " 0.0 0.191691\n", 219 | " 0.0 0.0 " 220 | ] 221 | }, 222 | "metadata": {}, 223 | "output_type": "display_data" 224 | }, 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "2×2 Adjoint{Float64,Array{Float64,2}}:\n", 229 | " -0.882421 0.47046 \n", 230 | " -0.47046 -0.882421" 231 | ] 232 | }, 233 | "metadata": {}, 234 | "output_type": "display_data" 235 | } 236 | ], 237 | "source": [ 238 | "fullsvd(A);" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 22, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "3×2 Array{Float64,2}:\n", 250 | " -0.311792 0.90137 \n", 251 | " -0.794641 -0.0739732\n", 252 | " -0.520894 -0.426685 " 253 | ] 254 | }, 255 | "metadata": {}, 256 | "output_type": "display_data" 257 | }, 258 | { 259 | "data": { 260 | "text/plain": [ 261 | "2×2 Diagonal{Float64,Array{Float64,1}}:\n", 262 | " 1.39313 ⋅ \n", 263 | " ⋅ 0.191691" 264 | ] 265 | }, 266 | "metadata": {}, 267 | "output_type": "display_data" 268 | }, 269 | { 270 | "data": { 271 | "text/plain": [ 272 | "2×2 Array{Float64,2}:\n", 273 | " -0.882421 0.47046 \n", 274 | " -0.47046 -0.882421" 275 | ] 276 | }, 277 | "metadata": {}, 278 | "output_type": "display_data" 279 | } 280 | ], 281 | "source": [ 282 | "rankrsvd(A);" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "## 3. random 2x3 matrix" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": 23, 295 | "metadata": {}, 296 | "outputs": [ 297 | { 298 | "data": { 299 | "text/plain": [ 300 | "2×3 Array{Float64,2}:\n", 301 | " 0.0451975 0.242917 0.405185\n", 302 | " 0.477637 0.8663 0.725397" 303 | ] 304 | }, 305 | "execution_count": 23, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "A = rand(2,3)" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 24, 317 | "metadata": {}, 318 | "outputs": [ 319 | { 320 | "data": { 321 | "text/plain": [ 322 | "2×2 Array{Float64,2}:\n", 323 | " -0.337277 -0.941405\n", 324 | " -0.941405 0.337277" 325 | ] 326 | }, 327 | "metadata": {}, 328 | "output_type": "display_data" 329 | }, 330 | { 331 | "data": { 332 | "text/plain": [ 333 | "2×3 Array{Float64,2}:\n", 334 | " 1.30125 0.0 0.0\n", 335 | " 0.0 0.191821 0.0" 336 | ] 337 | }, 338 | "metadata": {}, 339 | "output_type": "display_data" 340 | }, 341 | { 342 | "data": { 343 | "text/plain": [ 344 | "3×3 Adjoint{Float64,Array{Float64,2}}:\n", 345 | " -0.357268 0.618008 -0.700304\n", 346 | " -0.689699 0.331038 0.643994\n", 347 | " -0.629821 -0.713078 -0.30797 " 348 | ] 349 | }, 350 | "metadata": {}, 351 | "output_type": "display_data" 352 | } 353 | ], 354 | "source": [ 355 | "fullsvd(A);" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 25, 361 | "metadata": {}, 362 | "outputs": [ 363 | { 364 | "data": { 365 | "text/plain": [ 366 | "2×2 Array{Float64,2}:\n", 367 | " -0.337277 -0.941405\n", 368 | " -0.941405 0.337277" 369 | ] 370 | }, 371 | "metadata": {}, 372 | "output_type": "display_data" 373 | }, 374 | { 375 | "data": { 376 | "text/plain": [ 377 | "2×2 Diagonal{Float64,Array{Float64,1}}:\n", 378 | " 1.30125 ⋅ \n", 379 | " ⋅ 0.191821" 380 | ] 381 | }, 382 | "metadata": {}, 383 | "output_type": "display_data" 384 | }, 385 | { 386 | "data": { 387 | "text/plain": [ 388 | "3×2 Array{Float64,2}:\n", 389 | " -0.357268 0.618008\n", 390 | " -0.689699 0.331038\n", 391 | " -0.629821 -0.713078" 392 | ] 393 | }, 394 | "metadata": {}, 395 | "output_type": "display_data" 396 | } 397 | ], 398 | "source": [ 399 | "rankrsvd(A);" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "## 4. rank 3, 7x10 matrix" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": 26, 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/plain": [ 417 | "7×10 Array{Float64,2}:\n", 418 | " 0.409585 0.550866 0.729917 0.396048 … 0.491844 0.870667 0.327258\n", 419 | " 0.824315 0.457974 0.887333 0.932895 0.34797 1.19361 0.266938\n", 420 | " 0.897034 0.338666 0.678985 0.943025 0.418739 1.0996 0.258406\n", 421 | " 0.862495 0.661816 0.926009 0.816466 0.771865 1.33408 0.463019\n", 422 | " 0.878864 0.887087 1.42046 0.99785 0.602456 1.66596 0.472172\n", 423 | " 0.765583 0.423397 0.731421 0.800129 … 0.446203 1.06758 0.287248\n", 424 | " 0.710849 0.713791 1.12556 0.793167 0.511149 1.33563 0.388595" 425 | ] 426 | }, 427 | "execution_count": 26, 428 | "metadata": {}, 429 | "output_type": "execute_result" 430 | } 431 | ], 432 | "source": [ 433 | "A = rand(7,3)*rand(3,10) # this should be rank 3" 434 | ] 435 | }, 436 | { 437 | "cell_type": "code", 438 | "execution_count": 27, 439 | "metadata": {}, 440 | "outputs": [ 441 | { 442 | "data": { 443 | "text/plain": [ 444 | "7×7 Array{Float64,2}:\n", 445 | " -0.277937 -0.522311 -0.0744039 … 0.466151 -0.560653 0.264395 \n", 446 | " -0.34423 0.518096 -0.0720404 -0.227442 -0.460716 -0.319634 \n", 447 | " -0.335511 0.428918 0.464644 0.604044 0.0486161 -0.0705107 \n", 448 | " -0.436358 -0.497343 0.506287 -0.350516 0.117976 -0.410459 \n", 449 | " -0.487624 0.0167782 -0.553854 -0.204899 -0.105282 -0.000703896\n", 450 | " -0.327734 0.157075 0.264594 … -0.34661 0.116364 0.806582 \n", 451 | " -0.395088 -0.0499189 -0.374552 0.284817 0.657649 -0.0625039 " 452 | ] 453 | }, 454 | "metadata": {}, 455 | "output_type": "display_data" 456 | }, 457 | { 458 | "data": { 459 | "text/plain": [ 460 | "7×10 Array{Float64,2}:\n", 461 | " 6.92226 0.0 0.0 0.0 … 0.0 0.0 0.0 0.0\n", 462 | " 0.0 0.755198 0.0 0.0 0.0 0.0 0.0 0.0\n", 463 | " 0.0 0.0 0.683053 0.0 0.0 0.0 0.0 0.0\n", 464 | " 0.0 0.0 0.0 5.91088e-16 0.0 0.0 0.0 0.0\n", 465 | " 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0\n", 466 | " 0.0 0.0 0.0 0.0 … 0.0 0.0 0.0 0.0\n", 467 | " 0.0 0.0 0.0 0.0 8.21229e-17 0.0 0.0 0.0" 468 | ] 469 | }, 470 | "metadata": {}, 471 | "output_type": "display_data" 472 | }, 473 | { 474 | "data": { 475 | "text/plain": [ 476 | "10×10 Adjoint{Float64,Array{Float64,2}}:\n", 477 | " -0.294011 0.35548 0.312083 … 0.0465244 0.0514862 0.039223 \n", 478 | " -0.2263 -0.249712 -0.334077 0.181558 0.595424 0.0233641\n", 479 | " -0.363646 -0.0109916 -0.510499 -0.0359443 -0.220601 0.140677 \n", 480 | " -0.312911 0.500153 0.171036 -0.359502 -0.207792 0.0736223\n", 481 | " -0.237955 -0.64866 0.255552 -0.601864 -0.131402 -0.104849 \n", 482 | " -0.251608 0.0397458 -0.437401 … 0.164673 -0.485916 -0.221939 \n", 483 | " -0.477004 -0.0809271 0.436915 0.478804 0.100636 0.0399163\n", 484 | " -0.198741 -0.299539 0.17074 0.373694 -0.353637 -0.236875 \n", 485 | " -0.475836 0.133415 -0.153594 -0.275708 0.389516 -0.22132 \n", 486 | " -0.137166 -0.156822 -0.0295032 0.00811763 -0.103379 0.897647 " 487 | ] 488 | }, 489 | "metadata": {}, 490 | "output_type": "display_data" 491 | } 492 | ], 493 | "source": [ 494 | "fullsvd(A);" 495 | ] 496 | }, 497 | { 498 | "cell_type": "code", 499 | "execution_count": 28, 500 | "metadata": {}, 501 | "outputs": [ 502 | { 503 | "data": { 504 | "text/plain": [ 505 | "7×3 Array{Float64,2}:\n", 506 | " -0.277937 -0.522311 -0.0744039\n", 507 | " -0.34423 0.518096 -0.0720404\n", 508 | " -0.335511 0.428918 0.464644 \n", 509 | " -0.436358 -0.497343 0.506287 \n", 510 | " -0.487624 0.0167782 -0.553854 \n", 511 | " -0.327734 0.157075 0.264594 \n", 512 | " -0.395088 -0.0499189 -0.374552 " 513 | ] 514 | }, 515 | "metadata": {}, 516 | "output_type": "display_data" 517 | }, 518 | { 519 | "data": { 520 | "text/plain": [ 521 | "3×3 Diagonal{Float64,Array{Float64,1}}:\n", 522 | " 6.92226 ⋅ ⋅ \n", 523 | " ⋅ 0.755198 ⋅ \n", 524 | " ⋅ ⋅ 0.683053" 525 | ] 526 | }, 527 | "metadata": {}, 528 | "output_type": "display_data" 529 | }, 530 | { 531 | "data": { 532 | "text/plain": [ 533 | "10×3 Array{Float64,2}:\n", 534 | " -0.294011 0.35548 0.312083 \n", 535 | " -0.2263 -0.249712 -0.334077 \n", 536 | " -0.363646 -0.0109916 -0.510499 \n", 537 | " -0.312911 0.500153 0.171036 \n", 538 | " -0.237955 -0.64866 0.255552 \n", 539 | " -0.251608 0.0397458 -0.437401 \n", 540 | " -0.477004 -0.0809271 0.436915 \n", 541 | " -0.198741 -0.299539 0.17074 \n", 542 | " -0.475836 0.133415 -0.153594 \n", 543 | " -0.137166 -0.156822 -0.0295032" 544 | ] 545 | }, 546 | "metadata": {}, 547 | "output_type": "display_data" 548 | } 549 | ], 550 | "source": [ 551 | "rankrsvd(A);" 552 | ] 553 | }, 554 | { 555 | "cell_type": "code", 556 | "execution_count": null, 557 | "metadata": {}, 558 | "outputs": [], 559 | "source": [] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": {}, 565 | "outputs": [], 566 | "source": [] 567 | } 568 | ], 569 | "metadata": { 570 | "@webio": { 571 | "lastCommId": null, 572 | "lastKernelId": null 573 | }, 574 | "kernelspec": { 575 | "display_name": "Julia 1.3.0", 576 | "language": "julia", 577 | "name": "julia-1.3" 578 | }, 579 | "language_info": { 580 | "file_extension": ".jl", 581 | "mimetype": "application/julia", 582 | "name": "julia", 583 | "version": "1.3.0" 584 | } 585 | }, 586 | "nbformat": 4, 587 | "nbformat_minor": 2 588 | } 589 | -------------------------------------------------------------------------------- /notes/Gauss-Jordan.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Gauss–Jordan and computing A⁻¹\n", 8 | "\n", 9 | "The Gauss–Jordan algorithm is a technique for hand-calculation of the inverse. Nowadays, you should hardly ever compute a matrix inverse, even on a computer, but Gauss–Jordan is still useful to go over:\n", 10 | "\n", 11 | "* It helps us to understand when and why an inverse matrix exists.\n", 12 | "\n", 13 | "* It gives us yet another example to help us understand the *structure* of elimination operations" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 1, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "using LinearAlgebra # as usual, we'll load this package" 23 | ] 24 | }, 25 | { 26 | "cell_type": "markdown", 27 | "metadata": {}, 28 | "source": [ 29 | "## Review: Inverses\n", 30 | "\n", 31 | "The inverse of a linear operator $A$ is the operat that \"undoes\" the action of $A$:\n", 32 | "\n", 33 | "$$\n", 34 | "\\boxed{A^{-1}(Ax) = x} .\n", 35 | "$$\n", 36 | "\n", 37 | "for *any* $x$. Equivalently, $\\boxed{Ax=b \\implies x = A^{-1} b}$. This means that\n", 38 | "\n", 39 | "* **A⁻¹ only exists for (m×m) square matrices with m (nonzero) pivots**\n", 40 | "\n", 41 | "since for non-square matrices or matrices with one or more \"zero pivots\" we can't always solve $Ax=b$ (we'd divide by zero during backsubstitution). It is also easy to see that $\\boxed{(A^{-1})^{-1} = A}$, i.e. that $A$ undoes the action of $A^{-1}$.\n", 42 | "\n", 43 | "Equivalently,\n", 44 | "$$\n", 45 | "\\boxed{AA^{-1} = A^{-1} A = I}\n", 46 | "$$\n", 47 | "where $I$ is the m×m identity matrix — in linear algebra, we typically *infer* the size of $I$ from context, but if it is ambiguous we might write $I_m$.\n", 48 | "\n", 49 | "### Inverses of products: (AB)⁻¹ = B⁻¹A⁻¹\n", 50 | "\n", 51 | "It is easy to see that the inverse of a product $BA$ is the product of the inverses in *reverse order*: $\\boxed{(AB)^{-1} = B^{-1} A^{-1}}$. Intuitively, when you reverse a sequence of operations, you always need to retrace your steps in backwards order. Explicitly:\n", 52 | "$$\n", 53 | "(AB)^{-1} AB = B^{-1} \\underbrace{A^{-1} A}_I B = B^{-1} B = I \\, .\n", 54 | "$$\n", 55 | "\n", 56 | "For example, we saw that Gaussian elimination corresponded to the factorization $A = LU$, where $U$ is the result of elimination and $L$ is simply a record of the elimination steps. Then\n", 57 | "$$\n", 58 | "Ax = b \\implies x = A^{-1} b = (LU)^{-1} b = \\underbrace{U^{-1} \\underbrace{ L^{-1} b }_\\mbox{forward substitution}}_\\mbox{backsubstitution} \\, .\n", 59 | "$$\n", 60 | "\n", 61 | "### Rarely compute inverses!\n", 62 | "\n", 63 | "In general **rarely if ever** compute inverses explicitly:\n", 64 | "\n", 65 | "* **Read \"x = A⁻¹b\" as \"solve Ax=b for x\" the best way you can**, and invariably there are better ways to solve for x than inverting a matrix.\n", 66 | "\n", 67 | "More on this below. Instead, **inverses are mostly a *conceptual* tool** to move operators/matrices around in equations. Once we have the equations in the form that we want, we then carry out the computations in some other way.\n", 68 | "\n", 69 | "### Notation:\n", 70 | "\n", 71 | "Inverses allow us to \"divide by matrices\", but we always have to be clear about whether we are dividing **on the left or on the right**. The following notations can be convenient, and are used in computer software like Julia and Matlab and elsewhere for square invertible matrices $A$:\n", 72 | "\n", 73 | "$$ B / A = BA^{-1}, \\\\ A \\backslash B = A^{-1} B$$" 74 | ] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "metadata": {}, 79 | "source": [ 80 | "## Inverses by linear equations\n", 81 | "\n", 82 | "The equation $A A^{-1} = I$ actually gives us the algorithm to compute $A^{-1}$.\n", 83 | "\n", 84 | "Suppose we denote the *columns* of $A^{-1} = \\begin{pmatrix} x_1 & x_2 & \\cdots & x_m \\end{pmatrix}$, and the columns of $I = \\begin{pmatrix} e_1 & e_2 & \\cdots & e_m \\end{pmatrix}$.\n", 85 | "\n", 86 | "Then \n", 87 | "$$\n", 88 | "A \\underbrace{\\begin{pmatrix} x_1 & x_2 & \\cdots & x_m \\end{pmatrix}}_{A^{-1}} = \n", 89 | "\\begin{pmatrix} A x_1 & A x_2 & \\cdots & A x_n \\end{pmatrix} = \\underbrace{\\begin{pmatrix} e_1 & e_2 & \\cdots & e_m \\end{pmatrix}}_I.\n", 90 | "$$\n", 91 | "(The key fact here is that **multiplying A by a matrix on the right** is equivalent to **multiplying A by each column of that matrix**, which you can easily see by writing out the computation.)\n", 92 | "\n", 93 | "In consequence $A x_k = e_k$, which is a **linear equation for the k-th column of A⁻¹**. Equivalently, to find A⁻¹ for an m×m matrix A, we must **solve Ax=b for m right-hand sides** equal to the columns of I.\n", 94 | "\n", 95 | "* Put another way, for *any* matrix $B$, $Be_k = k\\mbox{-th column of }B$. So the k-th column of $A^{-1}$ is $x_k = A^{-1} e_k$, i.e. the solution to $Ax_k = e_k$.\n", 96 | "\n", 97 | "\n", 98 | "* Ideally, we do Gaussian elimination $A=LU$ *once*, then compute $x_k = U^{-1} L^{-1} e_k$ by forward+back-substitution for each column of $I$. (This is essentially what the computer does.)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "## Example: computing L⁻¹ = E\n", 106 | "\n", 107 | "For example, how might we compute the inverse of the L matrix we got from Gaussian elimination in the last lecture, which should give us $L^{-1} = E$? We solve\n", 108 | "\n", 109 | "$$\n", 110 | "\\underbrace{\\begin{pmatrix} 1 & & \\\\ 1 & 1 & \\\\ 3 & -1 & 1 \\end{pmatrix}}_L x_k = e_k\n", 111 | "$$\n", 112 | "\n", 113 | "for $e_1,e_2,e_3$ (the columns of the 3×3 identity I).\n", 114 | "\n", 115 | "Let's do it for $e_1$, to find the **first column** $x_1$ of $L^{-1} = E$:\n", 116 | "$$\n", 117 | "\\underbrace{\\begin{pmatrix} 1 & & \\\\ 1 & 1 & \\\\ 3 & -1 & 1 \\end{pmatrix}}_L \\underbrace{\\begin{pmatrix} a \\\\ b \\\\ c \\end{pmatrix}}_{x_1} = \\underbrace{\\begin{pmatrix} 1 \\\\ 0 \\\\ 0 \\end{pmatrix}}_{x_1}\n", 118 | "$$\n", 119 | "By forward substitution (from top to bottom), we get $a = 1$, $1a + 1b = 0 \\implies b = -1$, $3a - 1b + 1c = 0 \\implies c = -4$, so $\\boxed{x_1 = [1, -1, -4]}$. Let's check:" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 2, 125 | "metadata": {}, 126 | "outputs": [ 127 | { 128 | "data": { 129 | "text/plain": [ 130 | "3×3 Matrix{Int64}:\n", 131 | " 1 0 0\n", 132 | " 1 1 0\n", 133 | " 3 -1 1" 134 | ] 135 | }, 136 | "execution_count": 2, 137 | "metadata": {}, 138 | "output_type": "execute_result" 139 | } 140 | ], 141 | "source": [ 142 | "L = [1 0 0\n", 143 | " 1 1 0\n", 144 | " 3 -1 1]" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 3, 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "3×3 Matrix{Float64}:\n", 156 | " 1.0 0.0 0.0\n", 157 | " -1.0 1.0 0.0\n", 158 | " -4.0 1.0 1.0" 159 | ] 160 | }, 161 | "execution_count": 3, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "E = L^-1" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 4, 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/plain": [ 178 | "3-element Vector{Float64}:\n", 179 | " 1.0\n", 180 | " -1.0\n", 181 | " -4.0" 182 | ] 183 | }, 184 | "execution_count": 4, 185 | "metadata": {}, 186 | "output_type": "execute_result" 187 | } 188 | ], 189 | "source": [ 190 | "E[:,1] # first column" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "Yup, the first column is `[1, -1, -4]`. We could easily get the other two columns as well (left as an exercise).\n", 198 | "\n", 199 | "**Important note***: there is **no simple formula** for the inverse of a triangular matrix like L or U! You can invert *individual* elimination steps $E_k$ by flipping signs, but the *product* of the elimination steps is not so easy to invert.\n", 200 | "\n", 201 | "(A lot of students get confused by this because Strang's lectures and textbook start by inverting individual elimination steps, which is easier.)" 202 | ] 203 | }, 204 | { 205 | "cell_type": "markdown", 206 | "metadata": {}, 207 | "source": [ 208 | "Another way to write this is `L \\ I`, which *conceptually* means \"multiply $I$ by $L^{-1}$ on the *left*\", but *actually* in Julia is computed without inverting any matrix explicitly, by instead solving with 3 right-hand sides:" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": 5, 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "3×3 Matrix{Float64}:\n", 220 | " 1.0 0.0 0.0\n", 221 | " -1.0 1.0 0.0\n", 222 | " -4.0 1.0 1.0" 223 | ] 224 | }, 225 | "execution_count": 5, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "L \\ I" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "Note that `I` is a special object defined by Julia's `LinearAlgebra` package which essentially means **an identity matrix whose size is inferred from context**.\n", 239 | "\n", 240 | "If we want an $m \\times m$ identity matrix, we can use `I(m)`:" 241 | ] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "execution_count": 6, 246 | "metadata": {}, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "3×3 Diagonal{Bool, Vector{Bool}}:\n", 252 | " 1 ⋅ ⋅\n", 253 | " ⋅ 1 ⋅\n", 254 | " ⋅ ⋅ 1" 255 | ] 256 | }, 257 | "execution_count": 6, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "I(3)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "## The Gauss–Jordan algorithm.\n", 271 | "\n", 272 | "Gauss–Jordan could be viewed as just a trick (primarily for hand calculation) to organize solving $A b_k = e_k$. But it's also nice to think about algebraically — it is a nice application of our \"matrix viewpoint\" of Gaussian elimination.\n", 273 | "\n", 274 | "The Gauss–Jordan idea, in a nutshell is: **if we do some row operations on A to obtain I, then doing the *same* row operations on I gives A⁻¹**. Why?\n", 275 | "\n", 276 | "* Row operations correspond to multiplying $A$ by a some matrix $E=\\cdots E_2 E_1$ on the *left*.\n", 277 | "\n", 278 | "* So, doing row operations that turn $A$ into $I$ means that $EA = I$, and hence $E = A^{-1}$.\n", 279 | "\n", 280 | "* Doing the *same* row operations on $I$ is equivalent to multiplying $I$ on the *left* by the *same* matrix $E$, giving $EI$. But $EI = E$, and $E = A^{-1}$, so this gives $A^{-1}$!\n", 281 | "\n", 282 | "As usual for Gaussian elimination, to do the *same* row operations on both $A$ and $I$ we **augment A** with $I$. That is, we do:\n", 283 | "\n", 284 | "$$\n", 285 | "\\boxed{\n", 286 | "\\left(\\begin{array}{c|c}A & I\\end{array}\\right) \\underset{\\mbox{row ops}}{\\longrightarrow} \\left(\\begin{array}{c|c}I & A^{-1}\\end{array}\\right)\n", 287 | "}\n", 288 | "$$\n", 289 | "\n", 290 | "### Elimination $A \\to I$\n", 291 | "\n", 292 | "How do we do row operations to turn $A$ into $I$? Simple:\n", 293 | "\n", 294 | "1. First, do ordinary Gaussian elimination \"downwards\" to turn $A$ into $U$ (an **upper-triangular** matrix).\n", 295 | "\n", 296 | "2. Then, do Gaussian elimination \"upwards\" on $U$ to eliminate entries *above* the diagonal, turning $U$ into a **diagonal** matrix $D$\n", 297 | "\n", 298 | "3. Finally, divide each row of $D$ by the diagonal entry to turn it into $I$." 299 | ] 300 | }, 301 | { 302 | "cell_type": "markdown", 303 | "metadata": {}, 304 | "source": [ 305 | "# Gauss–Jordan example\n", 306 | "\n", 307 | "Let's perform these $A \\to I$ elimination steps on $3 \\times 3$ matrix $A$: first eliminate down to make $U$, then eliminate up to make $D$, then divide by the diagonals to make $I$:\n", 308 | "\n", 309 | "$$\n", 310 | "\\underbrace{\\begin{pmatrix} \\boxed{1} & 4 & 1 \\\\ 1 & 2 & -1 \\\\ 3 & 14 & 6 \\end{pmatrix}}_A\n", 311 | "\\longrightarrow\n", 312 | "\\begin{pmatrix} \\boxed{1} & 4 & 1 \\\\ 0 & \\boxed{-2} & -2 \\\\ 0 & 2 & 3 \\end{pmatrix}\n", 313 | "\\longrightarrow\n", 314 | "\\underbrace{\\begin{pmatrix} \\boxed{1} & 4 & 1 \\\\ 0 & \\boxed{-2} & -2 \\\\ 0 & 0 & \\boxed{1} \\end{pmatrix}}_U\n", 315 | "\\\\\n", 316 | "\\longrightarrow\n", 317 | "\\begin{pmatrix} 1 & 0 & -3 \\\\ 0 & \\boxed{-2} & -2 \\\\ 0 & 0 & 1 \\end{pmatrix}\n", 318 | "\\longrightarrow\n", 319 | "\\underbrace{\\begin{pmatrix} 1 & 0 & 0 \\\\ 0 & -2 & 0 \\\\ 0 & 0 & \\boxed{1} \\end{pmatrix}}_D\n", 320 | "\\longrightarrow\n", 321 | "\\underbrace{\\begin{pmatrix} 1 & 0 & 0 \\\\ 0 & 1 & 0 \\\\ 0 & 0 & 1 \\end{pmatrix}}_I\n", 322 | "$$\n", 323 | "\n", 324 | "No problem! It is easy to see that this will work **whenever A has all of its pivots** (i.e. it is non-singular)." 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "To get the inverse, we needed to augment this with $I$ so that we perform the same elimination steps on both.\n", 332 | "\n", 333 | "$$\n", 334 | "\\left(\\begin{array}{rrr|rrr}\n", 335 | " \\boxed{1} & 4 & 1 & 1 & 0 & 0 \\\\\n", 336 | " 1 & 2 & -1 & 0 & 1 & 0 \\\\\n", 337 | " 3 & 14 & 6 & 0 & 0 & 1 \\end{array}\\right)\n", 338 | "\\longrightarrow\n", 339 | "\\left(\\begin{array}{rrr|rrr}\n", 340 | " \\boxed{1} & 4 & 1 & 1 & 0 & 0 \\\\\n", 341 | " 0 & \\boxed{-2} & -2 & -1 & 1 & 0 \\\\\n", 342 | " 0 & 2 & 3 & -3 & 0 & 1 \\end{array}\\right) \\\\\n", 343 | "\\longrightarrow\n", 344 | "\\left(\\begin{array}{rrr|rrr}\n", 345 | " \\boxed{1} & 4 & 1 & 1 & 0 & 0 \\\\\n", 346 | " 0 & \\boxed{-2} & -2 & -1 & 1 & 0 \\\\\n", 347 | " 0 & 0 & \\boxed{1} & -4 & 1 & 1 \\end{array}\\right)\n", 348 | "\\longrightarrow\n", 349 | "\\left(\\begin{array}{rrr|rrr}\n", 350 | " 1 & 0 & -3 & -1 & 2 & 0 \\\\\n", 351 | " 0 & \\boxed{-2} & -2 & -1 & 1 & 0 \\\\\n", 352 | " 0 & 0 & 1 & -4 & 1 & 1 \\end{array}\\right) \\\\\n", 353 | "\\longrightarrow\n", 354 | "\\left(\\begin{array}{rrr|rrr}\n", 355 | " 1 & 0 & 0 & -13 & 5 & 3 \\\\\n", 356 | " 0 & -2 & 0 & -9 & 3 & 2 \\\\\n", 357 | " 0 & 0 & \\boxed{1} & -4 & 1 & 1 \\end{array}\\right)\n", 358 | "\\longrightarrow\n", 359 | "\\left(\\begin{array}{rrr|rrr}\n", 360 | " 1 & 0 & 0 & -13 & 5 & 3 \\\\\n", 361 | " 0 & 1 & 0 & 4.5 & -1.5 & -1 \\\\\n", 362 | " 0 & 0 & 1 & -4 & 1 & 1 \\end{array}\\right)\n", 363 | "$$\n", 364 | "\n", 365 | "Whew, this was a lot of work! Did we get the right answer?" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 7, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "data": { 375 | "text/plain": [ 376 | "3×3 Matrix{Int64}:\n", 377 | " 1 4 1\n", 378 | " 1 2 -1\n", 379 | " 3 14 6" 380 | ] 381 | }, 382 | "execution_count": 7, 383 | "metadata": {}, 384 | "output_type": "execute_result" 385 | } 386 | ], 387 | "source": [ 388 | "A = [1 4 1\n", 389 | " 1 2 -1\n", 390 | " 3 14 6]" 391 | ] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "execution_count": 8, 396 | "metadata": {}, 397 | "outputs": [ 398 | { 399 | "data": { 400 | "text/plain": [ 401 | "3×3 Matrix{Float64}:\n", 402 | " -13.0 5.0 3.0\n", 403 | " 4.5 -1.5 -1.0\n", 404 | " -4.0 1.0 1.0" 405 | ] 406 | }, 407 | "execution_count": 8, 408 | "metadata": {}, 409 | "output_type": "execute_result" 410 | } 411 | ], 412 | "source": [ 413 | "A^-1" 414 | ] 415 | }, 416 | { 417 | "cell_type": "markdown", 418 | "metadata": {}, 419 | "source": [ 420 | "Hooray!\n", 421 | "\n", 422 | "(It is *really* easy to make a mistake during this process.)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "markdown", 427 | "metadata": {}, 428 | "source": [ 429 | "# (Almost) Never Compute Inverses!\n", 430 | "\n", 431 | "Matrix inverses are funny, however:\n", 432 | "\n", 433 | "* Inverse matrices are very convenient in *analytical* manipulations, because they allow you to move matrices from one side to the other of equations easily.\n", 434 | "\n", 435 | "* Inverse matrices are **almost never computed** in \"serious\" numerical calculations. Whenever you see $A^{-1} B$ (or $A^{-1} b$), when you go to *implement* it on a computer you should *read* $A^{-1} B$ as \"solve $AX = B$ by some method.\" e.g. solve it by `A \\ B` or by first computing the LU factorization of $A$ and then using it to solve $AX = B$.\n", 436 | "\n", 437 | "One reason that you don't usually compute inverse matrices is that it is wasteful: once you have $A=LU$ (later we will generalize this to \"$PA = LU$\"), you can solve $AX=B$ directly without bothering to find $A^{-1}$, and computing $A^{-1}$ requires much more work if you only have to solve a few right-hand sides.\n", 438 | "\n", 439 | "Another reason is that for many special matrices, there are ways to solve $AX=B$ *much* more quickly than you can find $A^{-1}$. For example, many large matrices in practice are [sparse](https://en.wikipedia.org/wiki/Sparse_matrix) (mostly zero), and often for sparse matrices you can arrange for $L$ and $U$ to be sparse too. Sparse matrices are much more efficient to work with than general \"dense\" matrices because you don't have to multiply (or even store) the zeros. Even if $A$ is sparse, however, $A^{-1}$ is usually non-sparse, so you lose the special efficiency of sparsity if you compute the inverse matrix. \n", 440 | "\n", 441 | "For example:\n", 442 | "\n", 443 | "* If you see $U^{-1} b$ where $U$ is *upper* triangular, don't compute $U^{-1}$ explicitly! Just solve $Ux = b$ by *back-substitution* (from the bottom row up).\n", 444 | "\n", 445 | "* If you see $L^{-1} b$ where $L$ is *lower* triangular, don't compute $L^{-1}$ explicitly! Just solve $Lx = b$ by *forward-substitution* (from the top row down)." 446 | ] 447 | } 448 | ], 449 | "metadata": { 450 | "@webio": { 451 | "lastCommId": null, 452 | "lastKernelId": null 453 | }, 454 | "kernelspec": { 455 | "display_name": "Julia 1.7.1", 456 | "language": "julia", 457 | "name": "julia-1.7" 458 | }, 459 | "language_info": { 460 | "file_extension": ".jl", 461 | "mimetype": "application/julia", 462 | "name": "julia", 463 | "version": "1.7.1" 464 | } 465 | }, 466 | "nbformat": 4, 467 | "nbformat_minor": 2 468 | } 469 | -------------------------------------------------------------------------------- /notes/Markov.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 28, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "using LinearAlgebra" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Markov matrices\n", 17 | "\n", 18 | "A matrix $A$ is a **Markov matrix** if\n", 19 | "\n", 20 | "* Its entries are all $\\ge 0$\n", 21 | "* Each **column**'s entries **sum to 1**\n", 22 | "\n", 23 | "Typicaly, a Markov matrix's entries represent **transition probabilities** from one state to another.\n", 24 | "\n", 25 | "For example, consider the $2 \\times 2$ Markov matrix:" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 29, 31 | "metadata": {}, 32 | "outputs": [ 33 | { 34 | "data": { 35 | "text/plain": [ 36 | "2×2 Matrix{Float64}:\n", 37 | " 0.9 0.2\n", 38 | " 0.1 0.8" 39 | ] 40 | }, 41 | "execution_count": 29, 42 | "metadata": {}, 43 | "output_type": "execute_result" 44 | } 45 | ], 46 | "source": [ 47 | "A = [0.9 0.2\n", 48 | " 0.1 0.8]" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": {}, 54 | "source": [ 55 | "Let us suppose that this represents the fraction of people switching majors each year between math and English literature.\n", 56 | "\n", 57 | "Let\n", 58 | "$$\n", 59 | "x = \\begin{pmatrix} m \\\\ e \\end{pmatrix}\n", 60 | "$$\n", 61 | "\n", 62 | "represent the number of math majors $m$ and English majors $e$. Suppose that each year, 10% of math majors and 20% of English majors switch majors. After one year, the new number of math and English majors is:\n", 63 | "\n", 64 | "$$\n", 65 | "m' = 0.9 m + 0.2 e \\\\\n", 66 | "e' = 0.1 m + 0.8 e\n", 67 | "$$\n", 68 | "\n", 69 | "But this is equivalent to a matrix multiplication! i.e. the numbers $x'$ of majors after one year is\n", 70 | "\n", 71 | "$$\n", 72 | "x' = A x \\,\n", 73 | "$$\n", 74 | "\n", 75 | "Note that the two Markov properties are critical: we never have negative numbers of majors (or negative probabilities), and the probabilities must sum to 1 (the net number of majors is not changing: we're not including new students or people that graduate in this silly model)." 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "## Eigenvalues of Markov matrices\n", 83 | "\n", 84 | "There are two key questions about Markov matrices that can be answered by analysis of their eigenvalues:\n", 85 | "\n", 86 | "* Is there a **steady state**?\n", 87 | " - i.e. is there an $x_0 \\ne 0$ such that $A x_0 = x_0$?\n", 88 | " - i.e. is there $\\lambda_0 = 1$ eigenvector $x_0$?\n", 89 | "\n", 90 | "* Does the system **tend toward a steady state?**\n", 91 | " - i.e. does $A^n x \\to \\mbox{multiple of } x_0$ as $n \\to \\infty$?\n", 92 | " - i.e. is $\\lambda = 1$ the **largest** $|\\lambda|$?\n", 93 | " \n", 94 | "The answers are **YES** for **any Markov** matrix $A$, and **YES** for any *positive* Markov matrix (Markov matrices with entries $> 0$, not just $\\ge 0$). For *any* Markov matrix, all of the λ satisfy $|\\lambda| \\le 1$, but if there are zero entries in the matrix we *may* have multiple $|\\lambda|=1$ eigenvalues (though this doesn't happen often in practical Markov problems)." 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 30, 100 | "metadata": {}, 101 | "outputs": [ 102 | { 103 | "data": { 104 | "text/plain": [ 105 | "2-element Vector{Float64}:\n", 106 | " 0.7\n", 107 | " 1.0" 108 | ] 109 | }, 110 | "execution_count": 30, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "eigvals(A)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "Let's try just multipling it many times by a \"random\" vector and see whether it is converging to a steady state:" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 31, 129 | "metadata": {}, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "2-element Vector{Float64}:\n", 135 | " 14.000000000000089\n", 136 | " 7.000000000000044" 137 | ] 138 | }, 139 | "execution_count": 31, 140 | "metadata": {}, 141 | "output_type": "execute_result" 142 | } 143 | ], 144 | "source": [ 145 | "A^100 * [17, 4]" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "Yes, it seems to be giving a vector that is not changing, which shoud be a multiple $c x_0$ of a steady-state eigenvector:" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 32, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "2-element Vector{Float64}:\n", 164 | " 14.000000000000874\n", 165 | " 7.000000000000437" 166 | ] 167 | }, 168 | "execution_count": 32, 169 | "metadata": {}, 170 | "output_type": "execute_result" 171 | } 172 | ], 173 | "source": [ 174 | "cx₀ = A^1000 * [17, 4]" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "metadata": {}, 180 | "source": [ 181 | "Let's check that this is an eigenvector of $A$ with eigenvalue $\\lambda=1$:" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 33, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/plain": [ 192 | "2-element Vector{Float64}:\n", 193 | " 14.000000000000874\n", 194 | " 7.000000000000437" 195 | ] 196 | }, 197 | "execution_count": 33, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "A * cx₀" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "To see why, the key idea is to write the columns-sum-to-one property of Markov matrices in linear-algebra terms. It is equivalent to the statement:\n", 211 | "\n", 212 | "$$\n", 213 | "\\underbrace{\\begin{pmatrix} 1 & 1 & \\cdots & 1 & 1 \\end{pmatrix}}_{o^T} A = o^T\n", 214 | "$$\n", 215 | "\n", 216 | "since this is just the operation that sums all of the rows of $A$. Equivalently, if we transpose both sides:\n", 217 | "\n", 218 | "$$\n", 219 | "A^T o = o\n", 220 | "$$\n", 221 | "\n", 222 | "i.e. $o$ is an eigenvector of $A^T$ (called a **left eigenvector of A**) with eigenvalue $\\lambda = 1$.\n", 223 | "\n", 224 | "But since $A$ and $A^T$ have the **same eigenvalues** (they have the same characteristic polynomial $\\det (A - \\lambda I) = \\det (A^T - \\lambda I)$ because transposed don't change determinants), this means that $A$ **also has an eigenvalue 1** but with a **different eigenvector**." 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 34, 230 | "metadata": {}, 231 | "outputs": [ 232 | { 233 | "data": { 234 | "text/plain": [ 235 | "1×2 adjoint(::Vector{Float64}) with eltype Float64:\n", 236 | " 1.0 1.0" 237 | ] 238 | }, 239 | "execution_count": 34, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "o = [1,1]\n", 246 | "o'A" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 35, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": [ 257 | "2-element Vector{Float64}:\n", 258 | " 1.0\n", 259 | " 1.0" 260 | ] 261 | }, 262 | "execution_count": 35, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "A' * o" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "An eigenvector of $A$ with eigenvalue $1$ must be a basis for $N(A - I)$:" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 36, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/plain": [ 286 | "2×2 Matrix{Float64}:\n", 287 | " -0.1 0.2\n", 288 | " 0.1 -0.2" 289 | ] 290 | }, 291 | "execution_count": 36, 292 | "metadata": {}, 293 | "output_type": "execute_result" 294 | } 295 | ], 296 | "source": [ 297 | "A - 1*I" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "By inspection, $A - I$ is singular here: the second column is -2 times the first. So, $x_0 = (2,1)$ is a basis for its nullspace, and is a steady state:" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 37, 310 | "metadata": {}, 311 | "outputs": [ 312 | { 313 | "data": { 314 | "text/plain": [ 315 | "2-element Vector{Float64}:\n", 316 | " 5.551115123125783e-17\n", 317 | " 5.551115123125783e-17" 318 | ] 319 | }, 320 | "execution_count": 37, 321 | "metadata": {}, 322 | "output_type": "execute_result" 323 | } 324 | ], 325 | "source": [ 326 | "(A - I) * [2,1]" 327 | ] 328 | }, 329 | { 330 | "cell_type": "markdown", 331 | "metadata": {}, 332 | "source": [ 333 | "Let's check if some arbitrary starting vector $(3,0)$ tends towards this steady state:" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 38, 339 | "metadata": {}, 340 | "outputs": [ 341 | { 342 | "data": { 343 | "text/latex": [ 344 | "$\n", 345 | "\\begin{pmatrix} 3.0\\\\0.0\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.7\\\\0.3\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.49\\\\0.51\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.343\\\\0.657\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.24\\\\0.76\\end{pmatrix} \\longrightarrow \\\\\n", 346 | "\\begin{pmatrix} 2.24\\\\0.76\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.168\\\\0.832\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.118\\\\0.882\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.082\\\\0.918\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.058\\\\0.942\\end{pmatrix} \\longrightarrow \\\\\n", 347 | "\\begin{pmatrix} 2.058\\\\0.942\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.04\\\\0.96\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.028\\\\0.972\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.02\\\\0.98\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.014\\\\0.986\\end{pmatrix} \\longrightarrow \\\\\n", 348 | "\\begin{pmatrix} 2.014\\\\0.986\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.01\\\\0.99\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.007\\\\0.993\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.005\\\\0.995\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.003\\\\0.997\\end{pmatrix} \\longrightarrow \\\\\n", 349 | "\\begin{pmatrix} 2.003\\\\0.997\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.002\\\\0.998\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.002\\\\0.998\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\\\\n", 350 | "\\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.001\\\\0.999\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.0\\\\1.0\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.0\\\\1.0\\end{pmatrix} \\longrightarrow \\begin{pmatrix} 2.0\\\\1.0\\end{pmatrix} \\longrightarrow \\\\\n", 351 | "$\n" 352 | ] 353 | }, 354 | "metadata": {}, 355 | "output_type": "display_data" 356 | } 357 | ], 358 | "source": [ 359 | "# The following code prints a sequence of Aⁿx values\n", 360 | "# for n=0,1,2,… nicely formatted with LaTeX.\n", 361 | "\n", 362 | "x = [3, 0]\n", 363 | "pmatrix(x) = string(\"\\\\begin{pmatrix} \", round(x[1],digits=3), \"\\\\\\\\\", round(x[2],digits=3), \"\\\\end{pmatrix}\")\n", 364 | "buf = IOBuffer()\n", 365 | "println(buf, \"\\$\")\n", 366 | "for k = 1:6\n", 367 | " print(buf, pmatrix(x), \" \\\\longrightarrow \")\n", 368 | " for i = 1:4\n", 369 | " x = A*x\n", 370 | " print(buf, pmatrix(x), \" \\\\longrightarrow \")\n", 371 | " end\n", 372 | " println(buf, \"\\\\\\\\\")\n", 373 | "end\n", 374 | "println(buf, \"\\$\")\n", 375 | "display(\"text/latex\", String(take!(buf)))" 376 | ] 377 | }, 378 | { 379 | "cell_type": "markdown", 380 | "metadata": {}, 381 | "source": [ 382 | "Yes! In fact, it tends to exactly $(2,1)$, because the other eigenvalue is $< 1$ (and hence that eigenvector component decays exponentially fast).\n", 383 | "\n", 384 | "An interesting property is that the **sum of the vector components is conserved** when we multiply by a Markov matrix. Given a vector $x$, $o^T x$ is the sum of its components. But $o^T A = o^T$, so:\n", 385 | "\n", 386 | "$$\n", 387 | "o^T A x = o^T x = o^T A^n x\n", 388 | "$$\n", 389 | "\n", 390 | "for any $n$! This is why $(3,0)$ must tend to $(2,1)$, and not to any other multiple of $(2,1)$, because both of them sum to 3. (The \"number of majors\" is conserved in this problem.)" 391 | ] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "## Why no eigenvalues > 1?\n", 398 | "\n", 399 | "Why are all $|\\lambda| \\le 1$ for a Markov matrix?\n", 400 | "\n", 401 | "The key fact is that the **product AB of two Markov matrices A and B is also Markov**. Reasons:\n", 402 | "\n", 403 | "* If $A$ and $B$ have nonnegative entries, $AB$ does as well: matrix multiplication uses only $\\times$ and $+$, and can't introduce a minus sign.\n", 404 | "\n", 405 | "* If $o^T A = o^T$ and $o^T B = o^T$ (both have columns summing to 1), then $o^T AB = o^T B = o^T$: the columns of $AB$ sum to 1.\n", 406 | "\n", 407 | "For example, $A^n$ is a Markov matrix for any $n$ if $A$ is Markov.\n", 408 | "\n", 409 | "Now, if there were an eigenvalue $|\\lambda| > 1$, the matrix $A^n$ would have to *blow up exponentially* as $n\\to \\infty$ (since the matrix times that eigenvector, or any vector with a nonzero component of that eigenvector, would blow up). But since $A^n$ is Markov, all of its entries must be between 0 and 1. It can't blow up! So we must have all $|\\lambda| \\le 1$." 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 39, 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "2×2 Matrix{Float64}:\n", 421 | " 0.666667 0.666667\n", 422 | " 0.333333 0.333333" 423 | ] 424 | }, 425 | "execution_count": 39, 426 | "metadata": {}, 427 | "output_type": "execute_result" 428 | } 429 | ], 430 | "source": [ 431 | "A^100" 432 | ] 433 | }, 434 | { 435 | "cell_type": "markdown", 436 | "metadata": {}, 437 | "source": [ 438 | "(In fact, $A^n$ is pretty boring for large $n$: it just takes in any vector and redistributes it to the steady state.)\n", 439 | "\n", 440 | "Another way of thinking about $A^{100}$ is\n", 441 | "$$\n", 442 | "A^{100} = A^{100} \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix} =\n", 443 | "\\begin{pmatrix}\n", 444 | " A^{100} \\begin{pmatrix} 1 \\\\ 0 \\end{pmatrix} &\n", 445 | " A^{100} \\begin{pmatrix} 0 \\\\ 1 \\end{pmatrix}\n", 446 | "\\end{pmatrix}\n", 447 | "$$\n", 448 | "i.e. it multiplies $A^{100}$ by each column of the identity matrix (= different possible \"starting populations\"). Because of this, each column of $A^{100}$ tends towards an eigenvector with the biggest $|\\lambda|$." 449 | ] 450 | }, 451 | { 452 | "cell_type": "markdown", 453 | "metadata": {}, 454 | "source": [ 455 | "## Can there be more than one steady state?\n", 456 | "\n", 457 | "We have just showed that we have *at least one* eigenvalue $\\lambda = 1$, and that *all* eigenvalues satisfy $|\\lambda| \\le 1$. But can there be *more than one* independent eigenvector with $\\lambda = 1$?\n", 458 | "\n", 459 | "**Yes!** For example, the **identity matrix** $I$ is a Markov matrix, and *all* of its eigenvectors have eigenvalue $1$. Since $Ix = x$ for *any* $x$, *every vector is a steady state* for $I$!\n", 460 | "\n", 461 | "But this does not usually happen for *interesting* Markov matrices coming from real problems. In fact, there is a theorem:\n", 462 | "\n", 463 | "* If all the entries of a Markov matrix are $> 0$ (not just $\\ge 0$), then *exactly one* of its eigenvalues $\\lambda = 1$ (that eigenvalue has \"multiplicity 1\": $N(A-I)$ is one-dimensional), and **all other eigenvalues have** $|\\lambda| < 1$. There is a *unique steady state* (up to an overall scale factor).\n", 464 | "\n", 465 | "I'm not going to prove this in 18.06, however." 466 | ] 467 | }, 468 | { 469 | "cell_type": "markdown", 470 | "metadata": {}, 471 | "source": [ 472 | "## Can the solutions oscillate?\n", 473 | "\n", 474 | "If you have a Markov matrix with zero entries, then there might be more than one eigenvalue with $|\\lambda| = 1$, but these additional solutions might be *oscillating* solutions rather than steady states.\n", 475 | "\n", 476 | "For example, consider the permutation matrix\n", 477 | "$$\n", 478 | "P = \\begin{pmatrix} 0 & 1 \\\\ 1 & 0 \\end{pmatrix}\n", 479 | "$$\n", 480 | "that simply swaps the first and second entries of any 2-component vector.\n", 481 | "\n", 482 | "If $x = (1,0)$, then $P^n x$ will oscillate forever, never reaching a steady state! It simply oscillates between $(1,0)$ (for even $n$) and $(0,1)$ (for odd $n$):" 483 | ] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "execution_count": 40, 488 | "metadata": {}, 489 | "outputs": [ 490 | { 491 | "data": { 492 | "text/plain": [ 493 | "2×2 Matrix{Int64}:\n", 494 | " 0 1\n", 495 | " 1 0" 496 | ] 497 | }, 498 | "execution_count": 40, 499 | "metadata": {}, 500 | "output_type": "execute_result" 501 | } 502 | ], 503 | "source": [ 504 | "P = [0 1\n", 505 | " 1 0]" 506 | ] 507 | }, 508 | { 509 | "cell_type": "code", 510 | "execution_count": 41, 511 | "metadata": {}, 512 | "outputs": [ 513 | { 514 | "data": { 515 | "text/plain": [ 516 | "6-element Vector{Vector{Int64}}:\n", 517 | " [1, 0]\n", 518 | " [0, 1]\n", 519 | " [1, 0]\n", 520 | " [0, 1]\n", 521 | " [1, 0]\n", 522 | " [0, 1]" 523 | ] 524 | }, 525 | "execution_count": 41, 526 | "metadata": {}, 527 | "output_type": "execute_result" 528 | } 529 | ], 530 | "source": [ 531 | "[P^n * [1,0] for n = 0:5]" 532 | ] 533 | }, 534 | { 535 | "cell_type": "markdown", 536 | "metadata": {}, 537 | "source": [ 538 | "But this is a Markov matrix, so all $|\\lambda|$ are $\\le 1$:" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 42, 544 | "metadata": {}, 545 | "outputs": [ 546 | { 547 | "data": { 548 | "text/plain": [ 549 | "2-element Vector{Float64}:\n", 550 | " -1.0\n", 551 | " 1.0" 552 | ] 553 | }, 554 | "execution_count": 42, 555 | "metadata": {}, 556 | "output_type": "execute_result" 557 | } 558 | ], 559 | "source": [ 560 | "eigvals(P)" 561 | ] 562 | }, 563 | { 564 | "cell_type": "markdown", 565 | "metadata": {}, 566 | "source": [ 567 | "The problem is that the $\\lambda = -1$ eigenvalue corresponds to an oscillating solution:\n", 568 | "\n", 569 | "$$\n", 570 | "P^n \\begin{pmatrix} 1 \\\\ -1 \\end{pmatrix} = (-1)^n \\begin{pmatrix} 1 \\\\ -1 \\end{pmatrix}\n", 571 | "$$\n", 572 | "\n", 573 | "for the eigenvector $(1,-1)$.\n", 574 | "\n", 575 | "The steady state still exists, corresponding to the eigenvector $(1,1)$:\n", 576 | "\n", 577 | "$$\n", 578 | "P^n \\begin{pmatrix} 1 \\\\ 1 \\end{pmatrix} = \\begin{pmatrix} 1 \\\\ 1 \\end{pmatrix}\n", 579 | "$$" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 43, 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "data": { 589 | "text/plain": [ 590 | "2×2 Matrix{Float64}:\n", 591 | " -0.707107 0.707107\n", 592 | " 0.707107 0.707107" 593 | ] 594 | }, 595 | "execution_count": 43, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "X = eigvecs(P) # the eigenvectors" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 44, 607 | "metadata": {}, 608 | "outputs": [ 609 | { 610 | "data": { 611 | "text/plain": [ 612 | "2×2 Matrix{Float64}:\n", 613 | " 1.0 1.0\n", 614 | " -1.0 1.0" 615 | ] 616 | }, 617 | "execution_count": 44, 618 | "metadata": {}, 619 | "output_type": "execute_result" 620 | } 621 | ], 622 | "source": [ 623 | "X ./ X[1,:]' # normalize the first row to be 1, to resemble our hand solutions" 624 | ] 625 | }, 626 | { 627 | "cell_type": "markdown", 628 | "metadata": {}, 629 | "source": [ 630 | "Since $(1,0) = [(1,1) + (1,-1)]/2$, we have:\n", 631 | "\n", 632 | "$$\n", 633 | "P^n \\begin{pmatrix} 1 \\\\ 0 \\end{pmatrix} = \\frac{1}{2} \\left[ \\begin{pmatrix} 1 \\\\ 1 \\end{pmatrix} + \n", 634 | "(-1)^n \\begin{pmatrix} 1 \\\\ -1 \\end{pmatrix} \\right]\n", 635 | "$$\n", 636 | "\n", 637 | "which alternates between $(1,0)$ and $(0,1)$." 638 | ] 639 | }, 640 | { 641 | "cell_type": "markdown", 642 | "metadata": {}, 643 | "source": [ 644 | "## Another example\n", 645 | "\n", 646 | "Let's generate a random 5x5 Markov matrix:" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 45, 652 | "metadata": {}, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/plain": [ 657 | "5×5 Matrix{Float64}:\n", 658 | " 0.410618 0.306837 0.410031 0.707623 0.290909\n", 659 | " 0.307687 0.22414 0.676996 0.0455438 0.904309\n", 660 | " 0.999213 0.714056 0.357485 0.913338 0.715352\n", 661 | " 0.647026 0.995701 0.789245 0.577309 0.391341\n", 662 | " 0.73899 0.967951 0.914835 0.565266 0.447786" 663 | ] 664 | }, 665 | "execution_count": 45, 666 | "metadata": {}, 667 | "output_type": "execute_result" 668 | } 669 | ], 670 | "source": [ 671 | "M = rand(5,5) # random entries in [0,1]" 672 | ] 673 | }, 674 | { 675 | "cell_type": "code", 676 | "execution_count": 46, 677 | "metadata": {}, 678 | "outputs": [ 679 | { 680 | "data": { 681 | "text/plain": [ 682 | "1×5 Matrix{Float64}:\n", 683 | " 3.10353 3.20869 3.14859 2.80908 2.7497" 684 | ] 685 | }, 686 | "execution_count": 46, 687 | "metadata": {}, 688 | "output_type": "execute_result" 689 | } 690 | ], 691 | "source": [ 692 | "sum(M,dims=1) # not Markov yet" 693 | ] 694 | }, 695 | { 696 | "cell_type": "code", 697 | "execution_count": 47, 698 | "metadata": {}, 699 | "outputs": [ 700 | { 701 | "data": { 702 | "text/plain": [ 703 | "5×5 Matrix{Float64}:\n", 704 | " 0.132307 0.0956271 0.130227 0.251906 0.105797\n", 705 | " 0.0991408 0.0698541 0.215016 0.0162131 0.328876\n", 706 | " 0.32196 0.222539 0.113538 0.325138 0.260157\n", 707 | " 0.20848 0.310314 0.250666 0.205515 0.142322\n", 708 | " 0.238113 0.301666 0.290554 0.201228 0.162849" 709 | ] 710 | }, 711 | "execution_count": 47, 712 | "metadata": {}, 713 | "output_type": "execute_result" 714 | } 715 | ], 716 | "source": [ 717 | "M = M ./ sum(M,dims=1)" 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 48, 723 | "metadata": {}, 724 | "outputs": [ 725 | { 726 | "data": { 727 | "text/plain": [ 728 | "1×5 Matrix{Float64}:\n", 729 | " 1.0 1.0 1.0 1.0 1.0" 730 | ] 731 | }, 732 | "execution_count": 48, 733 | "metadata": {}, 734 | "output_type": "execute_result" 735 | } 736 | ], 737 | "source": [ 738 | "sum(M,dims=1)" 739 | ] 740 | }, 741 | { 742 | "cell_type": "code", 743 | "execution_count": 49, 744 | "metadata": {}, 745 | "outputs": [ 746 | { 747 | "data": { 748 | "text/plain": [ 749 | "5-element Vector{ComplexF64}:\n", 750 | " -0.17776058595953462 + 0.0im\n", 751 | " -0.1352931760939033 + 0.0im\n", 752 | " -0.0014416561523480091 - 0.07745841517651891im\n", 753 | " -0.0014416561523480091 + 0.07745841517651891im\n", 754 | " 1.0000000000000004 + 0.0im" 755 | ] 756 | }, 757 | "execution_count": 49, 758 | "metadata": {}, 759 | "output_type": "execute_result" 760 | } 761 | ], 762 | "source": [ 763 | "eigvals(M)" 764 | ] 765 | }, 766 | { 767 | "cell_type": "code", 768 | "execution_count": 50, 769 | "metadata": {}, 770 | "outputs": [ 771 | { 772 | "data": { 773 | "text/plain": [ 774 | "5-element Vector{Float64}:\n", 775 | " 0.17776058595953462\n", 776 | " 0.1352931760939033\n", 777 | " 0.07747183006822272\n", 778 | " 0.07747183006822272\n", 779 | " 1.0000000000000004" 780 | ] 781 | }, 782 | "execution_count": 50, 783 | "metadata": {}, 784 | "output_type": "execute_result" 785 | } 786 | ], 787 | "source": [ 788 | "abs.(eigvals(M))" 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "execution_count": 51, 794 | "metadata": {}, 795 | "outputs": [ 796 | { 797 | "data": { 798 | "text/plain": [ 799 | "5-element Vector{Float64}:\n", 800 | " 0.05662283043686728\n", 801 | " 0.13759834468209436\n", 802 | " 0.3955727782747076\n", 803 | " 0.09136601599437516\n", 804 | " 0.3188400306119557" 805 | ] 806 | }, 807 | "execution_count": 51, 808 | "metadata": {}, 809 | "output_type": "execute_result" 810 | } 811 | ], 812 | "source": [ 813 | "x = rand(5)\n", 814 | "x = x / sum(x) # normalize x to have sum = 1" 815 | ] 816 | }, 817 | { 818 | "cell_type": "code", 819 | "execution_count": 52, 820 | "metadata": {}, 821 | "outputs": [ 822 | { 823 | "data": { 824 | "text/plain": [ 825 | "5-element Vector{Float64}:\n", 826 | " 0.14590618751218656\n", 827 | " 0.15842031877820567\n", 828 | " 0.24194675375641556\n", 829 | " 0.2186167846876927\n", 830 | " 0.23510995526549555" 831 | ] 832 | }, 833 | "execution_count": 52, 834 | "metadata": {}, 835 | "output_type": "execute_result" 836 | } 837 | ], 838 | "source": [ 839 | "M^100 * x" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": 53, 845 | "metadata": {}, 846 | "outputs": [ 847 | { 848 | "data": { 849 | "text/plain": [ 850 | "(1.0, 0.9999999999999962)" 851 | ] 852 | }, 853 | "execution_count": 53, 854 | "metadata": {}, 855 | "output_type": "execute_result" 856 | } 857 | ], 858 | "source": [ 859 | "sum(x), sum(M^100 * x) # still = 1" 860 | ] 861 | }, 862 | { 863 | "cell_type": "code", 864 | "execution_count": 54, 865 | "metadata": {}, 866 | "outputs": [ 867 | { 868 | "data": { 869 | "text/plain": [ 870 | "5-element Vector{ComplexF64}:\n", 871 | " 0.1459061875121874 + 0.0im\n", 872 | " 0.15842031877820623 + 0.0im\n", 873 | " 0.24194675375641644 + 0.0im\n", 874 | " 0.21861678468769355 + 0.0im\n", 875 | " 0.23510995526549633 + 0.0im" 876 | ] 877 | }, 878 | "execution_count": 54, 879 | "metadata": {}, 880 | "output_type": "execute_result" 881 | } 882 | ], 883 | "source": [ 884 | "λ, X = eigen(M)\n", 885 | "X[:,end] / sum(X[:,end]) # eigenvector for λ=1, normalized to sum=1" 886 | ] 887 | }, 888 | { 889 | "cell_type": "markdown", 890 | "metadata": {}, 891 | "source": [ 892 | "Again, $M^n x$ is approaching a steady-state ($\\lambda = 1$) eigenvector of $M$ as $n$ grows large." 893 | ] 894 | } 895 | ], 896 | "metadata": { 897 | "@webio": { 898 | "lastCommId": null, 899 | "lastKernelId": null 900 | }, 901 | "kernelspec": { 902 | "display_name": "Julia 1.8.0", 903 | "language": "julia", 904 | "name": "julia-1.8" 905 | }, 906 | "language_info": { 907 | "file_extension": ".jl", 908 | "mimetype": "application/julia", 909 | "name": "julia", 910 | "version": "1.8.2" 911 | }, 912 | "widgets": { 913 | "state": { 914 | "e53e5f7b-c65e-4676-a564-3f8ee40c11c0": { 915 | "views": [ 916 | { 917 | "cell_index": 13 918 | } 919 | ] 920 | } 921 | }, 922 | "version": "1.2.0" 923 | } 924 | }, 925 | "nbformat": 4, 926 | "nbformat_minor": 1 927 | } 928 | -------------------------------------------------------------------------------- /notes/Linear Transformations.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 11, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "data": { 10 | "text/plain": [ 11 | "zero (generic function with 18 methods)" 12 | ] 13 | }, 14 | "execution_count": 11, 15 | "metadata": {}, 16 | "output_type": "execute_result" 17 | } 18 | ], 19 | "source": [ 20 | "# Preliminaries: Teach Julia that functions form a vector space\n", 21 | "import Base.+,Base.*,Base.zero\n", 22 | "+(f::Function,g::Function) = x -> f(x)+g(x)\n", 23 | "*(c::Number,f::Function) = x -> c*f(x)\n", 24 | "*(f::Function,c::Number) = x -> c*f(x)\n", 25 | "zero(Function) = x -> 0" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "## Examples of Linear Transformations \n", 33 | "\n", 34 | "Some operations are fairly obviously linear. No basis is needed to see this. It is\n", 35 | "efficient theoretically to treat these operations in one fell swoop in a unified way.\n", 36 | "\n", 37 | "For example the derivative of functions $f(x)$ is obviously linear. Derivatives of sums are sums of derivatives: (f+g)'=f'+g'. Derivatives of constant multiples are constant multiples of derivatives (cf)'=cf'.\n", 38 | "Another function transformation example that is obviously linear is the shift by a constant a: $f(x) \\rightarrow f(x+a):$\n", 39 | "\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 21, 45 | "metadata": {}, 46 | "outputs": [ 47 | { 48 | "data": { 49 | "text/plain": [ 50 | "T (generic function with 1 method)" 51 | ] 52 | }, 53 | "execution_count": 21, 54 | "metadata": {}, 55 | "output_type": "execute_result" 56 | } 57 | ], 58 | "source": [ 59 | "function T(f::Function)\n", 60 | " return x->f(x+1)\n", 61 | "end\n", 62 | " " 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": 22, 68 | "metadata": {}, 69 | "outputs": [ 70 | { 71 | "data": { 72 | "text/plain": [ 73 | "1×2 Array{Float64,2}:\n", 74 | " -0.279415 -0.279415" 75 | ] 76 | }, 77 | "execution_count": 22, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "[T(sin)(5) sin(6)]" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 25, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "data": { 93 | "text/plain": [ 94 | "1×2 Array{Float64,2}:\n", 95 | " 2.32168 2.32168" 96 | ] 97 | }, 98 | "execution_count": 25, 99 | "metadata": {}, 100 | "output_type": "execute_result" 101 | } 102 | ], 103 | "source": [ 104 | "# An example check that shifting is linear\n", 105 | "# we check at x=5 that 2*T(sin)+3*T(cos) = T(2*sin+3*cos), where T denotes shift by one\n", 106 | "[( 2*T(sin) + 3*T(cos) )(5) T( 2*sin + 3*cos )(5)]" 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "metadata": {}, 112 | "source": [ 113 | "Another example considers the vector space of $m_1 \\times n_1$ matrices $X$. If we take a constant\n", 114 | "$m_2 \\times m_1$ matrix $B$ and a constant $n_2 \\times n_1$ matrix $A$ then the map $X \\rightarrow BXA^T$ is\n", 115 | "obviously a linear map from a vector space of dimension $m_1n_1$ to a vector space of dimension $m_2n_2$.\n", 116 | "(Check: $ B(c_1 X+c_2 Y)A^T= c_1 BXA^T + c_2 BYA^T$.)" 117 | ] 118 | }, 119 | { 120 | "cell_type": "markdown", 121 | "metadata": {}, 122 | "source": [ 123 | "## Example 1: Derivatives of (a cos x + b sin x) \n", 124 | "Consider the 2 dimensional vector space consisting of linear combinations of \"sine\" and \"cosine\". How can we take the derivative of a function in this vector space?" 125 | ] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "### Derivatives: Method 1, symbolically. Matches the paper and pencil method closely." 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 28, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "using SymPy" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 29, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "x,a,b = Sym.([\"x\",\"a\",\"b\"]);" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 42, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/latex": [ 160 | "$$- a \\sin{\\left (x \\right )} + b \\cos{\\left (x \\right )}$$" 161 | ], 162 | "text/plain": [ 163 | "-a*sin(x) + b*cos(x)" 164 | ] 165 | }, 166 | "execution_count": 42, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "diff( a*cos(x) + b*sin(x) ,x)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": {}, 178 | "source": [ 179 | "### Method 2, matrix-vector. Emphasizes the linear nature of derivatives. (Easy to imagine a numerical implementation.)\n", 180 | "\n", 181 | "$\\begin{pmatrix} a' \\\\b' \\end{pmatrix} = \n", 182 | "\\begin{pmatrix} 0 & 1 \\\\-1 & 0 \\end{pmatrix}\n", 183 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}$\n", 184 | "\n", 185 | "Understanding: $\\begin{pmatrix} a' \\\\b' \\end{pmatrix}$ is shorthand for\n", 186 | "$a\\cos x + b\\sin x$." 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "Example: Differentiate $f(x)=5\\cos x + 2 \\sin x$:\n", 194 | "1. Encode f(x) for computation as the vector $\\begin{pmatrix} 5 \\\\ 2 \\end{pmatrix}.$\n", 195 | "2. Apply $\\begin{pmatrix} 0 & 1 \\\\-1 & 0 \\end{pmatrix}$\n", 196 | "to $\\begin{pmatrix} 5 \\\\ 2 \\end{pmatrix}$\n", 197 | "yielding $\\begin{pmatrix} 2 \\\\ -5 \\end{pmatrix}.$\n", 198 | "3. Decode $\\begin{pmatrix} 2 \\\\ -5 \\end{pmatrix}$ as\n", 199 | "$2 \\cos x -5 \\sin x.$" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "metadata": {}, 205 | "source": [ 206 | "### Method 3: no shorthand. Combines method 1 and method 2." 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "$\\frac{d}{dx} \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 214 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n", 215 | "=\n", 216 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 217 | "\\begin{pmatrix} 0 & -1 \\\\1 & 0 \\end{pmatrix}\n", 218 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n", 219 | "$" 220 | ] 221 | }, 222 | { 223 | "cell_type": "markdown", 224 | "metadata": {}, 225 | "source": [ 226 | "Method 2 is purely numerical. The interpretation is imposed by\n", 227 | "the human. Method 3 can be interpreted as method 2 (matrix times vector) with the labels. \n", 228 | "\n", 229 | "If one associates differently, Method 3 can be interpeted as knowing\n", 230 | "the derivative on the basis functions is sufficient for knowing\n", 231 | "this linear transformation everywhere:\n", 232 | "\n", 233 | "$\\frac{d}{dx} \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 234 | "=\n", 235 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 236 | "\\begin{pmatrix} 0 & -1 \\\\1 & 0 \\end{pmatrix}\n", 237 | "$" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "### Observation: \n", 245 | "Method 1 is straightforward but in the end\n", 246 | "bulky. Method 2 shows that the linear transformation defined\n", 247 | "by differentiating can be encoded as a simple matrix times vector,\n", 248 | "which is very efficient on a computer, and also gets to the algebraic heart of the operation. Method 3 organizes the symbolic with the matrices in a way that points to the generalization.\n", 249 | "
\n", 250 | "Most students of calculus learn that differentiation is linear.\n", 251 | "Derivatives of sums are sums of derivatives ,(f+g)'=f'+g'. Derivatives of\n", 252 | "constant multiples are constant multiples of derivatives (cf)'=cf'." 253 | ] 254 | }, 255 | { 256 | "cell_type": "markdown", 257 | "metadata": {}, 258 | "source": [ 259 | "### With code:" 260 | ] 261 | }, 262 | { 263 | "cell_type": "code", 264 | "execution_count": 76, 265 | "metadata": {}, 266 | "outputs": [ 267 | { 268 | "data": { 269 | "text/plain": [ 270 | "1×2 Array{Float64,2}:\n", 271 | " -1.29521 -1.29521" 272 | ] 273 | }, 274 | "execution_count": 76, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "f=([sin cos]*[0 1;-1 0]*[5,2])[1]\n", 281 | "x=rand()\n", 282 | "[f(x) 2sin(x)-5cos(x)] ## Check that it gives the right function" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "## In general\n", 290 | "\n", 291 | "If $v_1,\\ldots,v_n$ is a basis for a vector space $V$, \n", 292 | "and
$w_1,\\ldots,w_m$ is a basis for a vector space $W$,\n", 293 | "and $T$ is some linear transformation,\n", 294 | "we can write\n", 295 | "\n", 296 | "$$ T[v_1,\\ldots,v_n]\n", 297 | "\\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}\n", 298 | "=\n", 299 | "[w_1,\\ldots,w_m] * A* \\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}$$\n", 300 | "for some $m \\times n$ matrix $A$.\n", 301 | "\n", 302 | "One can associate the equation above concentrating\n", 303 | "on\n", 304 | "$$ T[v_1,\\ldots,v_n]\n", 305 | "=\n", 306 | "[w_1,\\ldots,w_m] * A$$\n", 307 | "\n", 308 | "\n", 309 | "to think of\n", 310 | "$T$ as applied to every basis vector of $V$ to get\n", 311 | "some linear combination of the basis vectors of $W$,\n", 312 | "or one can do \"Method 2\" and think of\n", 313 | "$\\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}$\n", 314 | "as the coefficients in the basis for $V$, and \n", 315 | "$A\\begin{pmatrix} c_1 \\\\ \\vdots \\\\ c_n \\end{pmatrix}$\n", 316 | "as the cooeficients of $Tv$ in the basis for $W$." 317 | ] 318 | }, 319 | { 320 | "cell_type": "markdown", 321 | "metadata": {}, 322 | "source": [ 323 | "## Example 2: Shifting (a cos x + b sin x)\n", 324 | "\n", 325 | "Convince yourself without matrices that\n", 326 | "$Tf$ defined by $ (Tf)(x)=f(x+\\theta)$ is linear for\n", 327 | "any constant $\\theta$.\n", 328 | "\n", 329 | "With matrices we have\n", 330 | "\n", 331 | "$T \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 332 | "=\n", 333 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 334 | "\\begin{pmatrix} \\cos \\theta & -\\sin \\theta \\\\ \\sin \\theta & \\cos \\theta \\end{pmatrix}\n", 335 | "$\n", 336 | "or\n", 337 | "$T \\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 338 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n", 339 | "=\n", 340 | "\\begin{pmatrix} \\sin x& \\cos x \\end{pmatrix}\n", 341 | "\\begin{pmatrix} \\cos \\theta & -\\sin \\theta \\\\ \\sin \\theta & \\cos \\theta \\end{pmatrix}\n", 342 | "\\begin{pmatrix} a \\\\b \\end{pmatrix}\n", 343 | "$\n" 344 | ] 345 | }, 346 | { 347 | "cell_type": "markdown", 348 | "metadata": {}, 349 | "source": [ 350 | "which can be done symbolically but gets a little messier looking. The linear algebra is just tidier." 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 87, 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "data": { 360 | "text/latex": [ 361 | "$$a \\sin{\\left (\\theta + x \\right )} + b \\cos{\\left (\\theta + x \\right )}$$" 362 | ], 363 | "text/plain": [ 364 | "a*sin(theta + x) + b*cos(theta + x)" 365 | ] 366 | }, 367 | "execution_count": 87, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "x = Sym(\"x\")\n", 374 | "f = a*sin(x) + b*cos(x)\n", 375 | "Tf = subs(f,x,x+Sym(\"theta\"))" 376 | ] 377 | }, 378 | { 379 | "cell_type": "code", 380 | "execution_count": 90, 381 | "metadata": {}, 382 | "outputs": [ 383 | { 384 | "data": { 385 | "text/latex": [ 386 | "$$a \\left(\\sin{\\left (\\theta \\right )} \\cos{\\left (x \\right )} + \\sin{\\left (x \\right )} \\cos{\\left (\\theta \\right )}\\right) + b \\left(- \\sin{\\left (\\theta \\right )} \\sin{\\left (x \\right )} + \\cos{\\left (\\theta \\right )} \\cos{\\left (x \\right )}\\right)$$" 387 | ], 388 | "text/plain": [ 389 | "a*(sin(theta)*cos(x) + sin(x)*cos(theta)) + b*(-sin(theta)*sin(x) + cos(theta)\n", 390 | "*cos(x))" 391 | ] 392 | }, 393 | "execution_count": 90, 394 | "metadata": {}, 395 | "output_type": "execute_result" 396 | } 397 | ], 398 | "source": [ 399 | "expand_trig(Tf)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "Of course Example 1 is a special case of Example 2 because\n", 407 | "the derivative is the same as shifting by $\\pi/2$ on this very special vector space." 408 | ] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": {}, 413 | "source": [ 414 | "## Example 3. Change of basis for polynomials" 415 | ] 416 | }, 417 | { 418 | "cell_type": "markdown", 419 | "metadata": {}, 420 | "source": [ 421 | "Suppose one wants to work with Laguerre polynomials.\n", 422 | "Wikipidia can supply the first few of these for us:\n", 423 | " [Laguerre up to degree 6](https://en.wikipedia.org/wiki/Laguerre_polynomials#The_first_few_polynomials)." 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 85, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "data": { 433 | "text/plain": [ 434 | "5×5 Array{Rational{Int64},2}:\n", 435 | " 1//1 1//1 1//1 1//1 1//1 \n", 436 | " 0//1 -1//1 -2//1 -3//1 -4//1 \n", 437 | " 0//1 0//1 1//2 3//2 3//1 \n", 438 | " 0//1 0//1 0//1 -1//6 -2//3 \n", 439 | " 0//1 0//1 0//1 0//1 1//24" 440 | ] 441 | }, 442 | "execution_count": 85, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "A = Rational.([\n", 449 | " 1 1 2 6 24\n", 450 | " 0 -1 -4 -18 -96\n", 451 | " 0 0 1 9 72\n", 452 | " 0 0 0 -1 -16\n", 453 | " 0 0 0 0 1])./[1 1 2 6 24]" 454 | ] 455 | }, 456 | { 457 | "cell_type": "markdown", 458 | "metadata": {}, 459 | "source": [ 460 | "Check from the Wikipedia article that\n", 461 | "$[L_0 \\ L_1 \\ L_2 \\ L_3 \\ L_4]=[1 \\ x \\ x^2 \\ x^3 \\ x^4] * A$" 462 | ] 463 | }, 464 | { 465 | "cell_type": "code", 466 | "execution_count": 86, 467 | "metadata": {}, 468 | "outputs": [ 469 | { 470 | "data": { 471 | "text/latex": [ 472 | "\\begin{bmatrix}1&- x + 1&\\frac{x^{2}}{2} - 2 x + 1&- \\frac{x^{3}}{6} + \\frac{3 x^{2}}{2} - 3 x + 1&\\frac{x^{4}}{24} - \\frac{2 x^{3}}{3} + 3 x^{2} - 4 x + 1\\end{bmatrix}" 473 | ], 474 | "text/plain": [ 475 | "1×5 Array{SymPy.Sym,2}:\n", 476 | " 1 -x + 1 x^2/2 - 2*x + 1 … x^4/24 - 2*x^3/3 + 3*x^2 - 4*x + 1" 477 | ] 478 | }, 479 | "execution_count": 86, 480 | "metadata": {}, 481 | "output_type": "execute_result" 482 | } 483 | ], 484 | "source": [ 485 | "[1 x x^2 x^3 x^4]*A" 486 | ] 487 | }, 488 | { 489 | "cell_type": "markdown", 490 | "metadata": {}, 491 | "source": [ 492 | "Convince yourself that to obtain\n", 493 | "the coefficients of \n", 494 | "$c_0 L_0 + c_1 L_1 + c_2 L_2 + c_3 L_3$\n", 495 | "in the standard basis $1,x,x^2,x^3$\n", 496 | "one must simply compute\n", 497 | "$A * \\begin{pmatrix} c_0 \\\\ c_1 \\\\ c_2 \\\\ c_3 \\end{pmatrix}.$\n", 498 | "\n", 499 | "
\n", 500 | "\n", 501 | "Notationally, we are saying that
\n", 502 | "$[L_0 \\ L_1 \\ L_2 \\ L_3]*\\begin{pmatrix} c_0 \\\\ c_1 \\\\ c_2 \\\\ c_3 \\end{pmatrix}=[1 \\ x \\ x^2 \\ x^3] * A*\\begin{pmatrix} c_0 \\\\ c_1 \\\\ c_2 \\\\ c_3 \\end{pmatrix}$" 503 | ] 504 | }, 505 | { 506 | "cell_type": "markdown", 507 | "metadata": {}, 508 | "source": [ 509 | "Of course inv(A) let's us go the other way\n", 510 | "(from monomial coefficients to Laguerre coefficients)" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 103, 516 | "metadata": {}, 517 | "outputs": [ 518 | { 519 | "data": { 520 | "text/plain": [ 521 | "5×5 Array{Int64,2}:\n", 522 | " 1 1 2 6 24\n", 523 | " 0 -1 -4 -18 -96\n", 524 | " 0 0 2 18 144\n", 525 | " 0 0 0 -6 -96\n", 526 | " 0 0 0 0 24" 527 | ] 528 | }, 529 | "execution_count": 103, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | } 533 | ], 534 | "source": [ 535 | "Int.(inv(A))" 536 | ] 537 | }, 538 | { 539 | "cell_type": "markdown", 540 | "metadata": {}, 541 | "source": [ 542 | "Thus for example
\n", 543 | "$x^3 = 6(L_0 - 3 L_1 + 3 L_2 - 1 L_3).$" 544 | ] 545 | }, 546 | { 547 | "cell_type": "markdown", 548 | "metadata": {}, 549 | "source": [ 550 | "Note: the numbers are pascal's triangle times factorials" 551 | ] 552 | }, 553 | { 554 | "cell_type": "markdown", 555 | "metadata": {}, 556 | "source": [ 557 | "What if we want to differentiate quartics written in a Laguerre polynomial basis but we only know how to differentiate in a monomial basis?\n", 558 | "
\n", 559 | "In the standard basis $1,x,x^2,x^3,x^4$ the derivative is this matrix:" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 89, 565 | "metadata": {}, 566 | "outputs": [ 567 | { 568 | "data": { 569 | "text/plain": [ 570 | "5×5 Array{Int64,2}:\n", 571 | " 0 1 0 0 0\n", 572 | " 0 0 2 0 0\n", 573 | " 0 0 0 3 0\n", 574 | " 0 0 0 0 4\n", 575 | " 0 0 0 0 0" 576 | ] 577 | }, 578 | "execution_count": 89, 579 | "metadata": {}, 580 | "output_type": "execute_result" 581 | } 582 | ], 583 | "source": [ 584 | "D=[0 1 0 0 0\n", 585 | " 0 0 2 0 0\n", 586 | " 0 0 0 3 0\n", 587 | " 0 0 0 0 4\n", 588 | " 0 0 0 0 0]" 589 | ] 590 | }, 591 | { 592 | "cell_type": "markdown", 593 | "metadata": {}, 594 | "source": [ 595 | " We claim\n", 596 | "$\\frac{d}{dx} [1 \\ x \\ x^2 \\ x^3 \\ x^4] = [1 \\ x \\ x^2 \\ x^3 \\ x^4]*D$" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 90, 602 | "metadata": {}, 603 | "outputs": [ 604 | { 605 | "data": { 606 | "text/latex": [ 607 | "\\begin{bmatrix}1&x&x^{2}&x^{3}&x^{4}\\end{bmatrix}" 608 | ], 609 | "text/plain": [ 610 | "1×5 Array{SymPy.Sym,2}:\n", 611 | " 1 x x^2 x^3 x^4" 612 | ] 613 | }, 614 | "execution_count": 90, 615 | "metadata": {}, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "[1 x x^2 x^3 x^4]" 621 | ] 622 | }, 623 | { 624 | "cell_type": "code", 625 | "execution_count": 91, 626 | "metadata": {}, 627 | "outputs": [ 628 | { 629 | "data": { 630 | "text/latex": [ 631 | "\\begin{bmatrix}0&1&2 x&3 x^{2}&4 x^{3}\\end{bmatrix}" 632 | ], 633 | "text/plain": [ 634 | "1×5 Array{SymPy.Sym,2}:\n", 635 | " 0 1 2*x 3*x^2 4*x^3" 636 | ] 637 | }, 638 | "execution_count": 91, 639 | "metadata": {}, 640 | "output_type": "execute_result" 641 | } 642 | ], 643 | "source": [ 644 | "[1 x x^2 x^3 x^4]*D" 645 | ] 646 | }, 647 | { 648 | "cell_type": "code", 649 | "execution_count": 94, 650 | "metadata": {}, 651 | "outputs": [ 652 | { 653 | "data": { 654 | "text/plain": [ 655 | "5×5 Array{Int64,2}:\n", 656 | " 0 -1 -1 -1 -1\n", 657 | " 0 0 -1 -1 -1\n", 658 | " 0 0 0 -1 -1\n", 659 | " 0 0 0 0 -1\n", 660 | " 0 0 0 0 0" 661 | ] 662 | }, 663 | "execution_count": 94, 664 | "metadata": {}, 665 | "output_type": "execute_result" 666 | } 667 | ], 668 | "source": [ 669 | "## Now the derivative in a Laguerre Basis\n", 670 | "Int.(A\\D*A)" 671 | ] 672 | }, 673 | { 674 | "cell_type": "markdown", 675 | "metadata": {}, 676 | "source": [ 677 | "That's interesting. The pattern seems to suggest that\n", 678 | "
\n", 679 | "$\\frac{d}{dx}L_k(x) = -\\sum_{j=0}^{k-1}L_j(x)$\n", 680 | "which is a true identity.\n", 681 | "\n", 682 | "[The wikipedia article](https://en.wikipedia.org/wiki/Laguerre_polynomials) states\n", 683 | "right after the words Sheffer sequence that\n", 684 | "$$\\frac{d}{dx} L_n = -L_{n-1} + \\frac{d}{dx} L_{n-1}$$\n", 685 | "which you should recognize states that the $n$th column\n", 686 | "of the matrix $A^{-1}DA$ is the same as the $n-1$st column,\n", 687 | "with an extra $-1$ in the $(n-1)$st entry." 688 | ] 689 | }, 690 | { 691 | "cell_type": "code", 692 | "execution_count": 120, 693 | "metadata": {}, 694 | "outputs": [], 695 | "source": [ 696 | "# Not working yet\n", 697 | "# SymPy.mpmath[:laguerre](4,0,Sym(\"x\")) " 698 | ] 699 | }, 700 | { 701 | "cell_type": "markdown", 702 | "metadata": {}, 703 | "source": [ 704 | "# Change of Basis\n", 705 | "The above example shows how the derivative matrix can look when changing basis." 706 | ] 707 | }, 708 | { 709 | "cell_type": "code", 710 | "execution_count": 106, 711 | "metadata": {}, 712 | "outputs": [ 713 | { 714 | "data": { 715 | "text/plain": [ 716 | "5×5 Array{Int64,2}:\n", 717 | " 0 1 0 0 0\n", 718 | " 0 0 2 0 0\n", 719 | " 0 0 0 3 0\n", 720 | " 0 0 0 0 4\n", 721 | " 0 0 0 0 0" 722 | ] 723 | }, 724 | "execution_count": 106, 725 | "metadata": {}, 726 | "output_type": "execute_result" 727 | } 728 | ], 729 | "source": [ 730 | "# Derivative in standard basis\n", 731 | "D" 732 | ] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "execution_count": 107, 737 | "metadata": {}, 738 | "outputs": [ 739 | { 740 | "data": { 741 | "text/plain": [ 742 | "5×5 Array{Int64,2}:\n", 743 | " 0 -1 -1 -1 -1\n", 744 | " 0 0 -1 -1 -1\n", 745 | " 0 0 0 -1 -1\n", 746 | " 0 0 0 0 -1\n", 747 | " 0 0 0 0 0" 748 | ] 749 | }, 750 | "execution_count": 107, 751 | "metadata": {}, 752 | "output_type": "execute_result" 753 | } 754 | ], 755 | "source": [ 756 | "# Derivative in Laguerre basis\n", 757 | "Int.(A\\D*A)" 758 | ] 759 | }, 760 | { 761 | "cell_type": "markdown", 762 | "metadata": {}, 763 | "source": [ 764 | "Conclusion: Similar matrices represent the same linear transformation in a different basis" 765 | ] 766 | }, 767 | { 768 | "cell_type": "markdown", 769 | "metadata": {}, 770 | "source": [ 771 | "## Example 3: Kronecker Products" 772 | ] 773 | }, 774 | { 775 | "cell_type": "markdown", 776 | "metadata": {}, 777 | "source": [ 778 | "Let's check that the [Kronecker Product](https://en.wikipedia.org/wiki/Kronecker_product) is the matrix\n", 779 | "for the transformation $X\\rightarrow BXA^T$" 780 | ] 781 | }, 782 | { 783 | "cell_type": "code", 784 | "execution_count": 108, 785 | "metadata": {}, 786 | "outputs": [ 787 | { 788 | "data": { 789 | "text/plain": [ 790 | "4×4 Array{Int64,2}:\n", 791 | " 2 3 2 4\n", 792 | " 8 7 1 4\n", 793 | " 5 9 7 3\n", 794 | " 1 1 2 6" 795 | ] 796 | }, 797 | "execution_count": 108, 798 | "metadata": {}, 799 | "output_type": "execute_result" 800 | } 801 | ], 802 | "source": [ 803 | "A = rand(1:9,4,4)" 804 | ] 805 | }, 806 | { 807 | "cell_type": "code", 808 | "execution_count": 109, 809 | "metadata": {}, 810 | "outputs": [ 811 | { 812 | "data": { 813 | "text/plain": [ 814 | "4×4 Array{Int64,2}:\n", 815 | " 2 9 8 6\n", 816 | " 8 2 2 9\n", 817 | " 2 8 6 6\n", 818 | " 4 1 4 9" 819 | ] 820 | }, 821 | "execution_count": 109, 822 | "metadata": {}, 823 | "output_type": "execute_result" 824 | } 825 | ], 826 | "source": [ 827 | "B = rand(1:9,4,4)" 828 | ] 829 | }, 830 | { 831 | "cell_type": "code", 832 | "execution_count": 110, 833 | "metadata": {}, 834 | "outputs": [ 835 | { 836 | "data": { 837 | "text/plain": [ 838 | "16×16 Array{Int64,2}:\n", 839 | " 4 18 16 12 6 27 24 18 4 18 16 12 8 36 32 24\n", 840 | " 16 4 4 18 24 6 6 27 16 4 4 18 32 8 8 36\n", 841 | " 4 16 12 12 6 24 18 18 4 16 12 12 8 32 24 24\n", 842 | " 8 2 8 18 12 3 12 27 8 2 8 18 16 4 16 36\n", 843 | " 16 72 64 48 14 63 56 42 2 9 8 6 8 36 32 24\n", 844 | " 64 16 16 72 56 14 14 63 8 2 2 9 32 8 8 36\n", 845 | " 16 64 48 48 14 56 42 42 2 8 6 6 8 32 24 24\n", 846 | " 32 8 32 72 28 7 28 63 4 1 4 9 16 4 16 36\n", 847 | " 10 45 40 30 18 81 72 54 14 63 56 42 6 27 24 18\n", 848 | " 40 10 10 45 72 18 18 81 56 14 14 63 24 6 6 27\n", 849 | " 10 40 30 30 18 72 54 54 14 56 42 42 6 24 18 18\n", 850 | " 20 5 20 45 36 9 36 81 28 7 28 63 12 3 12 27\n", 851 | " 2 9 8 6 2 9 8 6 4 18 16 12 12 54 48 36\n", 852 | " 8 2 2 9 8 2 2 9 16 4 4 18 48 12 12 54\n", 853 | " 2 8 6 6 2 8 6 6 4 16 12 12 12 48 36 36\n", 854 | " 4 1 4 9 4 1 4 9 8 2 8 18 24 6 24 54" 855 | ] 856 | }, 857 | "execution_count": 110, 858 | "metadata": {}, 859 | "output_type": "execute_result" 860 | } 861 | ], 862 | "source": [ 863 | "kron(A,B)" 864 | ] 865 | }, 866 | { 867 | "cell_type": "code", 868 | "execution_count": 112, 869 | "metadata": {}, 870 | "outputs": [ 871 | { 872 | "data": { 873 | "text/plain": [ 874 | "4×4 Array{Int64,2}:\n", 875 | " 6 2 7 2\n", 876 | " 5 2 8 4\n", 877 | " 6 1 4 5\n", 878 | " 4 4 5 3" 879 | ] 880 | }, 881 | "execution_count": 112, 882 | "metadata": {}, 883 | "output_type": "execute_result" 884 | } 885 | ], 886 | "source": [ 887 | "X = rand(1:9,4,4)" 888 | ] 889 | }, 890 | { 891 | "cell_type": "code", 892 | "execution_count": 115, 893 | "metadata": {}, 894 | "outputs": [ 895 | { 896 | "data": { 897 | "text/plain": [ 898 | "16×2 Array{Int64,2}:\n", 899 | " 1108 1108\n", 900 | " 880 880\n", 901 | " 974 974\n", 902 | " 758 758\n", 903 | " 1950 1950\n", 904 | " 1623 1623\n", 905 | " 1714 1714\n", 906 | " 1395 1395\n", 907 | " 2461 2461\n", 908 | " 2110 2110\n", 909 | " 2186 2186\n", 910 | " 1751 1751\n", 911 | " 1067 1067\n", 912 | " 780 780\n", 913 | " 930 930\n", 914 | " 687 687" 915 | ] 916 | }, 917 | "execution_count": 115, 918 | "metadata": {}, 919 | "output_type": "execute_result" 920 | } 921 | ], 922 | "source": [ 923 | "# The vec operator strings a matrix column wise into one long column\n", 924 | "[ kron(A,B)*vec(X) vec(B*X*A')]" 925 | ] 926 | }, 927 | { 928 | "cell_type": "markdown", 929 | "metadata": {}, 930 | "source": [ 931 | "You might check that kron(A,B) is the matrix of the linear transformation $X \\rightarrow BXA^T$\n", 932 | "in the following basis:" 933 | ] 934 | }, 935 | { 936 | "cell_type": "code", 937 | "execution_count": 119, 938 | "metadata": {}, 939 | "outputs": [ 940 | { 941 | "data": { 942 | "text/plain": [ 943 | "4×4 Array{Int64,2}:\n", 944 | " 1 0 0 0\n", 945 | " 0 0 0 0\n", 946 | " 0 0 0 0\n", 947 | " 0 0 0 0" 948 | ] 949 | }, 950 | "metadata": {}, 951 | "output_type": "display_data" 952 | }, 953 | { 954 | "data": { 955 | "text/plain": [ 956 | "4×4 Array{Int64,2}:\n", 957 | " 0 0 0 0\n", 958 | " 1 0 0 0\n", 959 | " 0 0 0 0\n", 960 | " 0 0 0 0" 961 | ] 962 | }, 963 | "metadata": {}, 964 | "output_type": "display_data" 965 | }, 966 | { 967 | "data": { 968 | "text/plain": [ 969 | "4×4 Array{Int64,2}:\n", 970 | " 0 0 0 0\n", 971 | " 0 0 0 0\n", 972 | " 1 0 0 0\n", 973 | " 0 0 0 0" 974 | ] 975 | }, 976 | "metadata": {}, 977 | "output_type": "display_data" 978 | }, 979 | { 980 | "data": { 981 | "text/plain": [ 982 | "4×4 Array{Int64,2}:\n", 983 | " 0 0 0 0\n", 984 | " 0 0 0 0\n", 985 | " 0 0 0 0\n", 986 | " 1 0 0 0" 987 | ] 988 | }, 989 | "metadata": {}, 990 | "output_type": "display_data" 991 | }, 992 | { 993 | "data": { 994 | "text/plain": [ 995 | "4×4 Array{Int64,2}:\n", 996 | " 0 1 0 0\n", 997 | " 0 0 0 0\n", 998 | " 0 0 0 0\n", 999 | " 0 0 0 0" 1000 | ] 1001 | }, 1002 | "metadata": {}, 1003 | "output_type": "display_data" 1004 | }, 1005 | { 1006 | "data": { 1007 | "text/plain": [ 1008 | "4×4 Array{Int64,2}:\n", 1009 | " 0 0 0 0\n", 1010 | " 0 1 0 0\n", 1011 | " 0 0 0 0\n", 1012 | " 0 0 0 0" 1013 | ] 1014 | }, 1015 | "metadata": {}, 1016 | "output_type": "display_data" 1017 | }, 1018 | { 1019 | "data": { 1020 | "text/plain": [ 1021 | "4×4 Array{Int64,2}:\n", 1022 | " 0 0 0 0\n", 1023 | " 0 0 0 0\n", 1024 | " 0 1 0 0\n", 1025 | " 0 0 0 0" 1026 | ] 1027 | }, 1028 | "metadata": {}, 1029 | "output_type": "display_data" 1030 | }, 1031 | { 1032 | "data": { 1033 | "text/plain": [ 1034 | "4×4 Array{Int64,2}:\n", 1035 | " 0 0 0 0\n", 1036 | " 0 0 0 0\n", 1037 | " 0 0 0 0\n", 1038 | " 0 1 0 0" 1039 | ] 1040 | }, 1041 | "metadata": {}, 1042 | "output_type": "display_data" 1043 | }, 1044 | { 1045 | "data": { 1046 | "text/plain": [ 1047 | "4×4 Array{Int64,2}:\n", 1048 | " 0 0 1 0\n", 1049 | " 0 0 0 0\n", 1050 | " 0 0 0 0\n", 1051 | " 0 0 0 0" 1052 | ] 1053 | }, 1054 | "metadata": {}, 1055 | "output_type": "display_data" 1056 | }, 1057 | { 1058 | "data": { 1059 | "text/plain": [ 1060 | "4×4 Array{Int64,2}:\n", 1061 | " 0 0 0 0\n", 1062 | " 0 0 1 0\n", 1063 | " 0 0 0 0\n", 1064 | " 0 0 0 0" 1065 | ] 1066 | }, 1067 | "metadata": {}, 1068 | "output_type": "display_data" 1069 | }, 1070 | { 1071 | "data": { 1072 | "text/plain": [ 1073 | "4×4 Array{Int64,2}:\n", 1074 | " 0 0 0 0\n", 1075 | " 0 0 0 0\n", 1076 | " 0 0 1 0\n", 1077 | " 0 0 0 0" 1078 | ] 1079 | }, 1080 | "metadata": {}, 1081 | "output_type": "display_data" 1082 | }, 1083 | { 1084 | "data": { 1085 | "text/plain": [ 1086 | "4×4 Array{Int64,2}:\n", 1087 | " 0 0 0 0\n", 1088 | " 0 0 0 0\n", 1089 | " 0 0 0 0\n", 1090 | " 0 0 1 0" 1091 | ] 1092 | }, 1093 | "metadata": {}, 1094 | "output_type": "display_data" 1095 | }, 1096 | { 1097 | "data": { 1098 | "text/plain": [ 1099 | "4×4 Array{Int64,2}:\n", 1100 | " 0 0 0 1\n", 1101 | " 0 0 0 0\n", 1102 | " 0 0 0 0\n", 1103 | " 0 0 0 0" 1104 | ] 1105 | }, 1106 | "metadata": {}, 1107 | "output_type": "display_data" 1108 | }, 1109 | { 1110 | "data": { 1111 | "text/plain": [ 1112 | "4×4 Array{Int64,2}:\n", 1113 | " 0 0 0 0\n", 1114 | " 0 0 0 1\n", 1115 | " 0 0 0 0\n", 1116 | " 0 0 0 0" 1117 | ] 1118 | }, 1119 | "metadata": {}, 1120 | "output_type": "display_data" 1121 | }, 1122 | { 1123 | "data": { 1124 | "text/plain": [ 1125 | "4×4 Array{Int64,2}:\n", 1126 | " 0 0 0 0\n", 1127 | " 0 0 0 0\n", 1128 | " 0 0 0 1\n", 1129 | " 0 0 0 0" 1130 | ] 1131 | }, 1132 | "metadata": {}, 1133 | "output_type": "display_data" 1134 | }, 1135 | { 1136 | "data": { 1137 | "text/plain": [ 1138 | "4×4 Array{Int64,2}:\n", 1139 | " 0 0 0 0\n", 1140 | " 0 0 0 0\n", 1141 | " 0 0 0 0\n", 1142 | " 0 0 0 1" 1143 | ] 1144 | }, 1145 | "metadata": {}, 1146 | "output_type": "display_data" 1147 | } 1148 | ], 1149 | "source": [ 1150 | "for j=1:4, i=1:4\n", 1151 | " V=zeros(Int,4,4)\n", 1152 | " V[i,j]=1\n", 1153 | " display(V)\n", 1154 | "end" 1155 | ] 1156 | }, 1157 | { 1158 | "cell_type": "code", 1159 | "execution_count": null, 1160 | "metadata": {}, 1161 | "outputs": [], 1162 | "source": [] 1163 | } 1164 | ], 1165 | "metadata": { 1166 | "kernelspec": { 1167 | "display_name": "Julia 0.6.2", 1168 | "language": "julia", 1169 | "name": "julia-0.6" 1170 | }, 1171 | "language_info": { 1172 | "file_extension": ".jl", 1173 | "mimetype": "application/julia", 1174 | "name": "julia", 1175 | "version": "0.6.2" 1176 | } 1177 | }, 1178 | "nbformat": 4, 1179 | "nbformat_minor": 2 1180 | } 1181 | -------------------------------------------------------------------------------- /notes/QR in Julia.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Is Q mxn or nxn? It is both! This is useful! " 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 13, 13 | "metadata": {}, 14 | "outputs": [ 15 | { 16 | "data": { 17 | "text/plain": [ 18 | "4×4 LinearAlgebra.QRCompactWYQ{Float64,Array{Float64,2}}:\n", 19 | " -0.36166 0.916348 -0.135025 -0.106193 \n", 20 | " -0.875369 -0.396235 -0.176529 -0.21346 \n", 21 | " -0.216 0.0556349 0.974526 -0.0234073\n", 22 | " -0.237226 0.0144527 -0.0300856 0.970881 " 23 | ] 24 | }, 25 | "execution_count": 13, 26 | "metadata": {}, 27 | "output_type": "execute_result" 28 | } 29 | ], 30 | "source": [ 31 | "using LinearAlgebra\n", 32 | "A = rand(4,2)\n", 33 | "Q,R = qr(A)\n", 34 | "Q" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## It looks 4x4 !!" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 14, 47 | "metadata": {}, 48 | "outputs": [ 49 | { 50 | "data": { 51 | "text/plain": [ 52 | "(4, 4)" 53 | ] 54 | }, 55 | "execution_count": 14, 56 | "metadata": {}, 57 | "output_type": "execute_result" 58 | } 59 | ], 60 | "source": [ 61 | "size(Q)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "## but wait! I can multiply by a vector of size 2" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 15, 74 | "metadata": {}, 75 | "outputs": [ 76 | { 77 | "data": { 78 | "text/plain": [ 79 | "4-element Array{Float64,1}:\n", 80 | " 1.4710364530489386 \n", 81 | " -1.6678387032016588 \n", 82 | " -0.10473038358204545\n", 83 | " -0.2083203318415276 " 84 | ] 85 | }, 86 | "execution_count": 15, 87 | "metadata": {}, 88 | "output_type": "execute_result" 89 | } 90 | ], 91 | "source": [ 92 | "Q * [1,2]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "## what about size 3? !! (answer: no)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 16, 105 | "metadata": {}, 106 | "outputs": [ 107 | { 108 | "ename": "DimensionMismatch", 109 | "evalue": "DimensionMismatch(\"vector must have length either 4 or 2\")", 110 | "output_type": "error", 111 | "traceback": [ 112 | "DimensionMismatch(\"vector must have length either 4 or 2\")", 113 | "", 114 | "Stacktrace:", 115 | " [1] *(::LinearAlgebra.QRCompactWYQ{Float64,Array{Float64,2}}, ::Array{Int64,1}) at /Users/sabae/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.3/LinearAlgebra/src/qr.jl:563", 116 | " [2] top-level scope at In[16]:1" 117 | ] 118 | } 119 | ], 120 | "source": [ 121 | "Q * [1,2,3]" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "## what about size 4? (answer: yes)" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 20, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "4-element Array{Float64,1}:\n", 140 | " 0.6411891565050274\n", 141 | " -3.051264992118778 \n", 142 | " 2.7252182423170517\n", 143 | " 3.5849468538868363" 144 | ] 145 | }, 146 | "execution_count": 20, 147 | "metadata": {}, 148 | "output_type": "execute_result" 149 | } 150 | ], 151 | "source": [ 152 | "Q * [1,2,3,4]" 153 | ] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "## What's going on??" 160 | ] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "metadata": {}, 165 | "source": [ 166 | "Q is not stored as elements, it is stored in a more compact form known as a [WY representation](https://www.researchgate.net/publication/23844885_A_Storage-Efficient_WY_Representation_for_Products_of_Householder_Transformations) which we do not cover in 18.06.\n", 167 | "This form not only saves memory, but allows us to complete the tall-skinny mxn Q into a full square orthogonal Q." 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "The \"extra\" vectors are an orthonormal set of vectors that are orthogonal to the column space of A. This is associated with the left nullspace of A." 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 1, 180 | "metadata": {}, 181 | "outputs": [ 182 | { 183 | "data": { 184 | "text/plain": [ 185 | "4×4 Array{Float64,2}:\n", 186 | " 0.0486726 0.422254 0.0384858 0.324084\n", 187 | " 0.700173 0.479445 0.570882 0.340941\n", 188 | " 0.868803 0.791772 0.5067 0.294249\n", 189 | " 0.550927 0.680756 0.133569 0.127481" 190 | ] 191 | }, 192 | "execution_count": 1, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "A = rand(4,4)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 2, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/plain": [ 209 | "4×3 Array{Float64,2}:\n", 210 | " 0.422254 0.0384858 0.324084\n", 211 | " 0.479445 0.570882 0.340941\n", 212 | " 0.791772 0.5067 0.294249\n", 213 | " 0.680756 0.133569 0.127481" 214 | ] 215 | }, 216 | "execution_count": 2, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "A[:,2:end]" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": 3, 228 | "metadata": {}, 229 | "outputs": [ 230 | { 231 | "data": { 232 | "text/plain": [ 233 | "4×3 Array{Float64,2}:\n", 234 | " 0.422254 0.0384858 0.324084\n", 235 | " 0.479445 0.570882 0.340941\n", 236 | " 0.791772 0.5067 0.294249\n", 237 | " 0.680756 0.133569 0.127481" 238 | ] 239 | }, 240 | "execution_count": 3, 241 | "metadata": {}, 242 | "output_type": "execute_result" 243 | } 244 | ], 245 | "source": [ 246 | "A[:,2:4]" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": 4, 252 | "metadata": {}, 253 | "outputs": [ 254 | { 255 | "data": { 256 | "text/plain": [ 257 | "4×3 Array{Float64,2}:\n", 258 | " 0.422254 0.0384858 0.324084\n", 259 | " 0.479445 0.570882 0.340941\n", 260 | " 0.791772 0.5067 0.294249\n", 261 | " 0.680756 0.133569 0.127481" 262 | ] 263 | }, 264 | "execution_count": 4, 265 | "metadata": {}, 266 | "output_type": "execute_result" 267 | } 268 | ], 269 | "source": [ 270 | "A[:,[2,3,4]]" 271 | ] 272 | }, 273 | { 274 | "cell_type": "markdown", 275 | "metadata": {}, 276 | "source": [ 277 | "## Six Cases (kind of)\n", 278 | "1. Square (rank=n rank