├── Makefile
├── matrix_forensics.pdf
├── src
    ├── imgs
    │   ├── fund_theorem_lin_alg1.png
    │   ├── fund_theorem_lin_alg2.png
    │   ├── fund_theorem_lin_alg3.png
    │   ├── fund_theorem_lin_alg4.png
    │   └── fund_theorem_lin_alg5.png
    ├── Makefile
    ├── introduction.tex
    ├── title.tex
    ├── README.md
    ├── nomenclature.tex
    ├── eigenvalues.tex
    ├── z_math_commands.tex
    ├── norms.tex
    ├── plot_gen.py
    ├── updates.tex
    ├── algorithmics.tex
    ├── matrix_forensics.tex
    ├── decompositions.tex
    ├── optimization.tex
    ├── basics.tex
    ├── derivatives.tex
    ├── refs.bib
    └── rogue_gallery.tex
├── .gitignore
└── README.md


/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	$(MAKE) -C src/
3 | clean:
4 | 	$(MAKE) -C src/ clean


--------------------------------------------------------------------------------
/matrix_forensics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/matrix_forensics.pdf


--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg1.png


--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg2.png


--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg3.png


--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg4.png


--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg5.png


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | matrix_forensics.aux
 2 | matrix_forensics.bbl
 3 | matrix_forensics.blg
 4 | matrix_forensics.log
 5 | matrix_forensics.out
 6 | matrix_forensics.toc
 7 | src/matrix_forensics.pdf
 8 | imgs/
 9 | refs/
10 | *.idx
11 | *.ilg
12 | *.ind
13 | texput.log
14 | 
15 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 | 	#./plot_gen.py
 3 | 	pdflatex --shell-escape matrix_forensics.tex
 4 | 	bibtex matrix_forensics.aux
 5 | 	makeindex matrix_forensics.idx
 6 | 	pdflatex --shell-escape matrix_forensics.tex
 7 | 	pdflatex --shell-escape matrix_forensics.tex
 8 | 	cp matrix_forensics.pdf ../
 9 | 
10 | clean:
11 | 	rm -f *.bbl *.aux *.run.xml *.bcf *.blg *.out *.fff *.log *.lot *.ttt *.toc *.pyg *.idx *.ind *.ilg *.lof *.spl *-blx.bib *.dvi *.fls *.fdb_latexmk *.listing *.aux.blg


--------------------------------------------------------------------------------
/src/introduction.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Introduction}
 2 | 
 3 | \textbf{Goals:}
 4 | \begin{enumerate}
 5 | \item The primary goal of \textit{Matrix Forensics} is to \textbf{solve crimes of matrix math}.
 6 | That is, to make the sometimes mystifying manipulations of matrix math more understandable by cataloging useful identities, transformations, and facts.
 7 | 
 8 | \item \textbf{To be a community-accessible project.} Anyone can contribute to the project. The source code for the book is available on Github and the source code has been thoughtfully arranged with handy macros to help maintain an easy-to-use, aesthetic, and consistent notation and typography.
 9 | \end{enumerate}
10 | 
11 | 
12 | \textbf{Contributing:}
13 | Please contribute on Github at \url{https://github.com/r-barnes/MatrixForensics} either by opening an issue or making a pull request. If you are not comfortable with this, please send your contribution to \url{rijard.barnes@gmail.com}.
14 | 
15 | 
16 | \textbf{Contributors:}
17 | Richard Barnes
18 | 
19 | \textbf{Funding:}
20 | 
21 | The Department of Energy Computational Science Graduate Fellowship (grant DE-FG02-97ER25308). %Richard Barnes


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Click [here](matrix_forensics.pdf) for the PDF
 2 | 
 3 | Matrix Forensics
 4 | ================
 5 | 
 6 | Matrix Forensics is an extensive, yet to-the-point reference manual for matrix math. It is designed to be a quick reference for:
 7 | 
 8 | * Identifying valid transformations
 9 | * Reminding yourself of, e.g., matrix calculus identities
10 | * Finding appropriate computational shortcuts, such as perturbations
11 | * And more!
12 | 
13 | The guide is freely available as a PDF [here](matrix_forensics.pdf).
14 | 
15 | 
16 | 
17 | Building
18 | ----------------
19 | 
20 | On a Linux-based system type `make` to build the book.
21 | 
22 | 
23 | 
24 | Contributing
25 | ----------------
26 | 
27 | You can contribute to Matrix Forensics either by opening an [Issue](https://github.com/r-barnes/MatrixForensics/issues) or submitting changes yourself via a [Pull Request](https://github.com/r-barnes/MatrixForensics/pulls).
28 | 
29 | If your change involves significant reorganization, please open an [Issue](https://github.com/r-barnes/MatrixForensics/issues) first to discuss it.
30 | 
31 | A [README](src/README.md) file is available discussing the structure of the book so you can more easily get started contributing to it.
32 | 


--------------------------------------------------------------------------------
/src/title.tex:
--------------------------------------------------------------------------------
 1 | \begin{titlepage} % Suppresses displaying the page number on the title page and the subsequent page counts as page 1
 2 |   \raggedleft % Right align the title page
 3 | 
 4 |   \rule{1pt}{\textheight} % Vertical line
 5 |   \hspace{0.05\textwidth} % Whitespace between the vertical line and title page text
 6 |   \parbox[b]{0.75\textwidth}{ % Paragraph box for holding the title page text, adjust the width to move the title page left or right on the page
 7 | 
 8 |     {\Huge\bfseries Matrix Forensics \\[\baselineskip]} % Title
 9 |     {\large\textit{Solving crimes of matrix math}}\\[4\baselineskip] % Subtitle or further description
10 |     % {\large\textit{A brief guide to matrix math \\ and its efficient implementation}}\\[4\baselineskip] % Subtitle or further description
11 |     {\Large\textsc{richard barnes}} % Author name, lower case for consistent small caps
12 |     \\[4\baselineskip]
13 |     \immediate\write18{ git rev-parse HEAD | cut -b 1-10 > /tmp/matrix_forensics_version.info }
14 | 
15 |     Git Hash: \input{/tmp/matrix_forensics_version.info} \\
16 |     Compiled on: \today\ at \currenttime
17 |     \\[2\baselineskip]
18 |     \href{https://github.com/r-barnes/MatrixForensics}{github.com/r-barnes/MatrixForensics}
19 | 
20 |     \vspace{0.4\textheight} % Whitespace between the title block and the publisher
21 | 
22 |     %{\noindent The Publisher~~\plogo}\\[\baselineskip] % Publisher and logo
23 |   }
24 | 
25 | \end{titlepage}


--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
 1 | Contributors Guide
 2 | ======================================
 3 | 
 4 | Files
 5 | --------------------------------------
 6 | 
 7 | This subdirectory contains several files. Most are self-explanatory and correspond to sections of the book. A few we describe here:
 8 | 
 9 | * [z_math_commands.tex](z_math_commands.tex) - All of the math commands used
10 | * [plot_gen.py](plot_gen.py) - Used to build the plots and diagrams in the book
11 | * [Makefile](Makefile) - Used to build the book
12 | * [refs.bib](refs.bib) - Bibliography
13 | 
14 | 
15 | 
16 | Contributing Equations
17 | --------------------------------------
18 | 
19 | Note that `z_math_commands.tex` contains extensive simplifying commands for writing equations.
20 | 
21 | In general equations should be typeset as follows:
22 | ```
23 | \begin{equation}
24 | \label{equ:equ_name} %Optional
25 | \eqcite{Thome2016}
26 | \mA = \mB * \mC
27 | \end{equation}
28 | ```
29 | Note that `\eqcite{Thome2016}` typesets a citation to `Thome2016` which is an entry in [refs.bib](refs.bib)
30 | 
31 | Multiple aligned equations can be typeset as follows. Note the careful alignment within the TeX source to improve readability.
32 | ```
33 | \begin{align}
34 | \label{equ:equ_name} %Optional
35 | \mA       &= \mB * \mC        \eqcite{Thome2016} \label{equ:a}  \\
36 | \mA + \mB &= \mB + \mC + \mD  \eqcite{Adam2013}  \label{equ:b}  \\
37 | \mA + \mB &= \mB * \mC + \mE  \eqcite{Jane2020}  \label{equ:c}  \\
38 | \end{align}
39 | ```
40 | 


--------------------------------------------------------------------------------
/src/nomenclature.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Nomenclature}
 2 | 
 3 | \begin{tabular}{cl}
 4 | $\mA$                   & Matrix.                                                   \\
 5 | $\va$                   & (Column) vector.                                          \\
 6 | $a$                     & Scalar.                                                   \\
 7 | $\lambda$               & An eigenvalue of a matrix.                                \\
 8 | & \\
 9 | $\mA_{ij}$              & Matrix indexed. Returns $i$th row and $j$th column.       \\
10 | $\mA\circ \mB$          & Hadamard (element-wise) product of matrices A and B.      \\
11 | $\ns(\mA)$              & Nullspace of the matrix $\mA$.                            \\
12 | $\range(\mA)$           & Range of the matrix $\mA$.                                \\
13 | $\det(\mA)$             & Determinant of the matrix $\mA$.                          \\
14 | $\eig(\mA)$             & Eigenvalues of the matrix $\mA$.                          \\
15 | $\mA^H$                 & Conjugate transpose of the matrix $\mA$.                  \\
16 | $\mA^T$                 & Transpose of the matrix $\mA$.                            \\
17 | $\mA\pinv$              & Pseudoinverse of the matrix $\mA$.                        \\
18 | $\vx\in\sRn$            & The entries of the $n$-vector $\vx$ are all real numbers. \\
19 | $\mA\in\sRmn$           & The entries of the matrix $\mA$ with $m$ rows and $n$ columns are all real numbers. \\
20 | $\mA\in\sSn$            & The matrix $\mA$ is symmetric and has $n$ rows and $n$ columns. \\
21 | & \\
22 | $\mI_n$                 & Identity matrix with $n$ rows and $n$ columns.            \\
23 | & \\
24 | $\{0\}$                 & The empty set       \\
25 | $\sR$                   & The real numbers    \\
26 | $\sC$                   & The complex numbers
27 | \end{tabular}


--------------------------------------------------------------------------------
/src/eigenvalues.tex:
--------------------------------------------------------------------------------
 1 | \chapter{Eigenvalue Properties}
 2 | 
 3 | $\lambda\in\mathbb{C}$ is an eigenvalue of $\mA\in\sRnn$ and $u\in\mathbb{C}^n$ is a corresponding eigenvector if $\mA\vu=\lambda\vu$ and $\vu\ne0$. Equivalantly, $(\lambda \mI_n-\mA)\vu=0$ and $\vu\ne0$. Eigenvalues satisfy the equation $\det(\lambda\mI_n-\mA)=0$.
 4 | 
 5 | Any matrix $\mA\in\sRnn$ has $n$ eigenvalues, though some may be repeated. $\lambda_1$ is the largest eigenvalue and $\lambda_n$ the smallest.
 6 | 
 7 | If $\lambda$ is an eigenvalue of $\mA$, $\lambda^2$ is an eigenvalue of $\mA^2$.
 8 | 
 9 | \begin{equation}
10 | \eig(\mA\mA^T)=\eig(\mA^T\mA)
11 | \end{equation}
12 | (Note that the number of entries in $\mA\mA^T$ and $\mA^T\mA$ may differ significantly leading to different compute times.)
13 | 
14 | \begin{equation}
15 | \eig(\mA^T\mA)\ge0
16 | \end{equation}
17 | 
18 | \begin{equation}
19 | \lambda_\textrm{min}(\mA)\le \frac{\vx^T \mA \vx}{\vx^T\vx} \le \lambda_\textrm{max}(\mA)~~\vx\ne0
20 | \end{equation}
21 | 
22 | \section{Weyl's Inequality}
23 | If $\mM,\mH,\mP\in\sRnn$ are Hermitian matrices and $\mM=\mH+\mP$ ($\mH$ is perturbed by $\mP$) and $\mM$ has eigenvalues $\mu_1\ge\cdots\ge\mu_n$, $\mH$ has eigenvalues $\nu_1\ge\cdots\ge\nu_n$, and $\mP$ has eigenvalues $\rho_1\ge\cdots\ge\rho_n$, then
24 | \begin{equation}
25 | \nu_i+\rho_n\le \mu_i \le \nu_i + \rho_1~\forall i
26 | \end{equation}
27 | If $j+k-n\ge i \ge r+s-1$, then
28 | \begin{equation}
29 | \nu_j+\rho_k\le\mu_i\le\nu_r+\rho_s
30 | \end{equation}
31 | If $\mP\ispsd0$, then $\mu_i>\nu_i~\forall i$.
32 | 
33 | %TODO
34 | % \section*{Computation}
35 | % TODO: eigsh, small eigen value extraction, top-k
36 | 
37 | \section{Estimating Eigenvalues}
38 | \subsection{Gershgorin circle theorem}
39 | For $\mA\in\sCnn$ with entries $a_{ij}$ let $R_i=\sum_{j\ne i} |a_{ij}|$ be the sum of the absolute values of the non-diagonal entries of the $i$-th row. Let $D(a_{ii},R_i)\subseteq\sC$ be a closed disc (a circle containing its boundary) centered at $a_{ii}$ with radius $R_i$. This is the Gershgorin disc.
40 | 
41 | Every eigenvalue of $\mA$ lies within at least one of the $D(a_{ii},R_i)$. Further, if the union of $k$ such discs is disjoint from the union of the other $n-k$ discs then the former union contains exactly $k$ and the latter $n-k$ of the eigenvalues of $\mA$.


--------------------------------------------------------------------------------
/src/z_math_commands.tex:
--------------------------------------------------------------------------------
  1 | % This file contains all of the mathematical commands used in the book
  2 | 
  3 | \renewcommand*{\pd}[3][]{\ensuremath{\frac{\partial^{#1} #2}{\partial #3}}}
  4 | 
  5 | \newcommand{\mA}{\mathbf{A}}
  6 | \newcommand{\mB}{\mathbf{B}}
  7 | \newcommand{\mC}{\mathbf{C}}
  8 | \newcommand{\mD}{\mathbf{D}}
  9 | \newcommand{\mE}{\mathbf{E}}
 10 | \newcommand{\mF}{\mathbf{F}}
 11 | \newcommand{\mH}{\mathbf{H}}
 12 | \newcommand{\mI}{\mathbf{I}}
 13 | \newcommand{\mJ}{\mathbf{J}}
 14 | \newcommand{\mL}{\mathbf{L}}
 15 | \newcommand{\mM}{\mathbf{M}}
 16 | \newcommand{\mP}{\mathbf{P}}
 17 | \newcommand{\mQ}{\mathbf{Q}}
 18 | \newcommand{\mR}{\mathbf{R}}
 19 | \newcommand{\mS}{\mathbf{S}}
 20 | \newcommand{\mU}{\mathbf{U}}
 21 | \newcommand{\mV}{\mathbf{V}}
 22 | \newcommand{\mX}{\mathbf{X}}
 23 | \newcommand{\mY}{\mathbf{Y}}
 24 | 
 25 | \newcommand{\mAi}{\mathbf{A}^{-1}}
 26 | \newcommand{\mBi}{\mathbf{B}^{-1}}
 27 | \newcommand{\mCi}{\mathbf{C}^{-1}}
 28 | \newcommand{\mPi}{\mathbf{P}^{-1}}
 29 | \newcommand{\mRi}{\mathbf{R}^{-1}}
 30 | \newcommand{\mXi}{\mathbf{X}^{-1}}
 31 | \newcommand{\mYi}{\mathbf{Y}^{-1}}
 32 | 
 33 | \newcommand{\mXp}{\mathbf{X}^{+}}
 34 | 
 35 | 
 36 | %%%%% TRANSPOSES
 37 | \newcommand{\T}{^\mathsf{T}}
 38 | \newcommand{\mAT}{\mathbf{A}^{\mathsf{T}}}
 39 | \newcommand{\mBT}{\mathbf{A}^{\mathsf{T}}}
 40 | \newcommand{\mCT}{\mathbf{A}^{\mathsf{T}}}
 41 | \newcommand{\mDT}{\mathbf{A}^{\mathsf{T}}}
 42 | \newcommand{\mET}{\mathbf{A}^{\mathsf{T}}}
 43 | \newcommand{\mXT}{\mathbf{X}^{\mathsf{T}}}
 44 | 
 45 | \newcommand{\mXiT}{\mathbf{X}^{-\mathsf{T}}}
 46 | 
 47 | \newcommand{\va}{\mathbf{a}}
 48 | \newcommand{\vb}{\mathbf{b}}
 49 | \newcommand{\vc}{\mathbf{c}}
 50 | \newcommand{\vd}{\mathbf{d}}
 51 | \newcommand{\ve}{\mathbf{e}}
 52 | \newcommand{\vf}{\mathbf{f}}
 53 | \newcommand{\vg}{\mathbf{g}}
 54 | \newcommand{\vp}{\mathbf{p}}
 55 | \newcommand{\vq}{\mathbf{q}}
 56 | \newcommand{\vu}{\mathbf{u}}
 57 | \newcommand{\vv}{\mathbf{v}}
 58 | \newcommand{\vw}{\mathbf{w}}
 59 | \newcommand{\vx}{\mathbf{x}}
 60 | \newcommand{\vy}{\mathbf{y}}
 61 | \newcommand{\vz}{\mathbf{z}}
 62 | 
 63 | \newcommand{\vaT}{\mathbf{a}^\mathsf{T}}
 64 | \newcommand{\vbT}{\mathbf{b}^\mathsf{T}}
 65 | \newcommand{\vuT}{\mathbf{u}^\mathsf{T}}
 66 | \newcommand{\vvT}{\mathbf{v}^\mathsf{T}}
 67 | \newcommand{\vxT}{\mathbf{x}^\mathsf{T}}
 68 | 
 69 | \newcommand{\vzero}{\mathbf{0}}
 70 | \DeclareMathOperator{\diag}{diag}
 71 | \DeclareMathOperator{\eig}{eig}
 72 | \DeclareMathOperator{\trace}{tr}
 73 | \DeclareMathOperator{\rank}{rank}
 74 | \DeclareMathOperator{\nnz}{nnz}
 75 | \newcommand{\sPSD}{\mathbb{S}^n_+}
 76 | \newcommand{\sC}{\mathbb{C}}
 77 | \newcommand{\sCmn}{\mathbb{C}^{m,n}}
 78 | \newcommand{\sCnn}{\mathbb{C}^{n,n}}
 79 | \newcommand{\sR}{\mathbb{R}}
 80 | \newcommand{\sRm}{\mathbb{R}^{m}}
 81 | \newcommand{\sRn}{\mathbb{R}^{n}}
 82 | \newcommand{\sRp}{\mathbb{R}^{p}}
 83 | \newcommand{\sRkk}{\mathbb{R}^{k,k}}
 84 | \newcommand{\sRkn}{\mathbb{R}^{k,n}}
 85 | \newcommand{\sRnm}{\mathbb{R}^{n,m}}
 86 | \newcommand{\sRmn}{\mathbb{R}^{m,n}}
 87 | \newcommand{\sRnn}{\mathbb{R}^{n,n}}
 88 | \newcommand{\sRnk}{\mathbb{R}^{n,k}}
 89 | \newcommand{\sRnp}{\mathbb{R}^{n,p}}
 90 | \newcommand{\sRnr}{\mathbb{R}^{n,r}}
 91 | \newcommand{\sRmm}{\mathbb{R}^{m,m}}
 92 | \newcommand{\sSn}{\mathbb{S}^{n}}
 93 | \newcommand{\ispsd}{\succeq}
 94 | \newcommand{\ispd}{\succ}
 95 | \newcommand{\pinv}{\!^+}
 96 | \newcommand{\ns}{\mathcal{N}}
 97 | \newcommand{\range}{\mathcal{R}}
 98 | \newcommand{\bs}{\setminus}
 99 | \newcommand{\kp}{\otimes}       %Kronecker product
100 | \newcommand{\hp}{\circ}         %Hadamard product
101 | \newcommand{\grad}{\nabla}      %Gradient operator


--------------------------------------------------------------------------------
/src/norms.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Norms}
  2 | 
  3 | \section{General Properties}
  4 | Matrix norms satisfy some properties:
  5 | \begin{align}
  6 | f(\mA)    &\ge 0             \\
  7 | f(\mA)    &=   0  \iff \mA=0 \\
  8 | f(c\mA)   &=   |c|f(\mA)     \\
  9 | f(\mA+\mB)&\le f(\mA)+f(\mB)
 10 | \end{align}
 11 | Many popular norms also satisfy ``sub-multiplicativity": $f(\mA\mB)\le f(\mA)f(\mB)$.
 12 | 
 13 | \section{Matrices}
 14 | 
 15 | \subsection{Frobenius norm}
 16 | \begin{align}
 17 | \norm{\mA}_F &= \sqrt{\trace\mA\mA^H}                           \\
 18 |              &= \sqrt{\sum_{i=1}^m \sum_{j=1}^n |\mA_{ij}|^2 }  \\
 19 |              &= \sqrt{\sum_{i=1}^m \eig(A^H A)_i }
 20 | \end{align}
 21 | 
 22 | \subsubsection{Special Properties}
 23 | \begin{align}
 24 | \norm{\mA\vx}_2         &\le \norm{\mA}_F \norm{\vx}_2~~~\vx\in\sRn \\
 25 | \norm{\mA\mB}_F         &\le \norm{\mA}_F \norm{\mB}_F \\
 26 | \norm{\mC-\vx\vx^T}_F^2 &= \norm{\mC}_F^2+\norm{\vx}_2^4-2 \vx^T \mC \vx
 27 | \end{align}
 28 | 
 29 | \subsection{Operator Norms}
 30 | For $p=1,2,\infty$ or other values, an operator norm indicates the maximum input-output gain of the matrix.
 31 | \begin{equation}
 32 | \norm{\mA}_p=\max_{\norm{\vu}_p=1} \norm{\mA\vu}_p
 33 | \end{equation}
 34 | 
 35 | \begin{align}
 36 | \norm{\mA}_1
 37 |   &=\max_{\norm{\vu}_1=1} \norm{\mA\vu}_1       \\
 38 |   &=\max_{j=1,\ldots,n} \sum_{i=1}^m |\mA_{ij}| \\
 39 |   &=\textrm{Largest absolute column sum}
 40 | \end{align}
 41 | 
 42 | \begin{align}
 43 | \norm{\mA}_\infty
 44 |   &=\max_{\norm{\vu}_\infty=1} \norm{\mA\vu}_\infty  \\
 45 |   &=\max_{j=1,\ldots,m} \sum_{i=1}^n |\mA_{ij}| \\
 46 |   &=\textrm{Largest absolute row sum}
 47 | \end{align}
 48 | 
 49 | \begin{align}
 50 | \norm{\mA}_2
 51 |   &=\textrm{``spectral norm"}                   \\
 52 |   &=\max_{\norm{\vu}_2=1} \norm{\mA\vu}_2       \\
 53 |   &=\sqrt{\max(\eig(\mA^T\mA))} \\
 54 |   &=\textrm{Square root of largest eigenvalue of~}\mA^T\mA
 55 | \end{align}
 56 | 
 57 | 
 58 | 
 59 | \subsubsection{Special Properties}
 60 | \begin{align}
 61 | \norm{\mA\vu}_p &\le \norm{\mA}_p \norm{\vu}_p \\
 62 | \norm{\mA\mB}_p &\le \norm{\mA}_p \norm{\mB}_p 
 63 | \end{align}
 64 | 
 65 | \subsection{Spectral Radius}
 66 | Not a proper norm.
 67 | \begin{equation}
 68 | \rho(\mA)=\textrm{spectral radius}(\mA)=\max_{i=1,\ldots,n} | \eig(\mA)_i |
 69 | \end{equation}
 70 | 
 71 | \subsubsection{Special Properties}
 72 | \begin{align}
 73 | \rho(\mA) &\le \norm{\mA}_p \\
 74 | \rho(\mA) &\le \min(~\norm{\mA}_1, \norm{\mA}_\infty)
 75 | \end{align}
 76 | 
 77 | 
 78 | \section{Vectors}
 79 | 
 80 | \begin{align}
 81 | \norm{\vx}_1      &= \sum_i |\vx_i|           & \textrm{L1-norm\index{L1-norm}} \\
 82 | \norm{\vx}_p      &= (\sum_i |\vx_i|^p)^{1/p} & \textrm{P-norm\index{P-norm}}   \\
 83 | \norm{\vx}_\infty &= \max_i |\vx_i|           & \textrm{L$\infty$-norm\index{L$\infty$-norm}, L-infinity norm}
 84 | \end{align}
 85 | 
 86 | \subsection{Identities}
 87 | 
 88 | \begin{align}
 89 | 2\norm{\vu}_2^2+2\norm{\vv}_2^2 &= \norm{\vu+\vv}_2^2 + \norm{\vu-\vv}_2^2                                                      & \textrm{Polarization Identity} \\
 90 | <\vx,\vy>                       &= \frac{1}{4}\left(\norm{\vx+\vy}_2^2-\norm{\vx-\vy}_2^2\right)~~\forall \vx,\vy\in\mathcal{V} & \textrm{Polarization Identity} \\
 91 | \norm{u}_2^2+\norm{v}_2^2&=\norm{\begin{bmatrix} u \\ v\end{bmatrix}}_2^2
 92 | \end{align}
 93 | 
 94 | 
 95 | \subsection{Bounds}
 96 | 
 97 | \begin{align}
 98 | |\vx^T \vy| &\le \norm{\vx}_2 \norm{\vy}_2 & \textrm{Cauchy-Schwartz Inequality} \\
 99 | |\vx^T \vy| &\le \sum_{k=1}^n |\vx_k \vy_k| \le \norm{\vx}_p \norm{\vx}_q~~~\forall p,q\ge1: 1/p+1/q=1 & \textrm{H\"older Inequality}
100 | \end{align}
101 | 
102 | For $\vx\in\mathbb{R}^n$
103 | \begin{equation}
104 | \frac{1}{\sqrt{n}}\norm{\vx}_2
105 | \le\norm{\vx}_\infty
106 | \le\norm{\vx}_2
107 | \le\norm{\vx}_1
108 | \le\sqrt{\textrm{card}(\vx)}\norm{\vx}_2
109 | \le\sqrt{n}\norm{\vx}_2
110 | \le n \norm{\vx}_\infty
111 | \end{equation}
112 | 
113 | For any $0<p<q$ we have that $\norm{\vx}_q\le\norm{\vx}_p$.


--------------------------------------------------------------------------------
/src/plot_gen.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | 
  3 | import matplotlib
  4 | matplotlib.use('Agg')
  5 | import matplotlib.pyplot as plt
  6 | import numpy as np
  7 | import os
  8 | import scipy as sp
  9 | from scipy.stats import ortho_group
 10 | 
 11 | def SaveIfNew(fig, filename, *args, **kwargs):
 12 |   if os.path.exists(filename):
 13 |     return
 14 |   fig.savefig(filename, *args, **kwargs)
 15 | 
 16 | def MakeMatrixFig(filename, mat):
 17 |   fig = plt.figure(figsize=(2,2), dpi=200)
 18 |   ax  = plt.subplot(111)
 19 |   ax.matshow(mat, cmap='gray')
 20 |   plt.xticks([])
 21 |   plt.yticks([])
 22 |   SaveIfNew(fig, filename, bbox_inches='tight', pad_inches=0.0)  
 23 |   plt.close()
 24 | 
 25 | 
 26 | 
 27 | #Symmetric Matrix Example
 28 | np.random.seed(123456789)
 29 | N = 10
 30 | a = np.random.rand(N, N)
 31 | m = np.tril(a) + np.tril(a, -1).T
 32 | MakeMatrixFig("imgs/rg_symmetric_matrix.pdf", m)
 33 | 
 34 | np.random.seed(123456789)
 35 | N = 10
 36 | a = np.random.rand(N, N)
 37 | m = np.tril(a)
 38 | MakeMatrixFig("imgs/rg_lower_triangular.pdf", m)
 39 | 
 40 | np.random.seed(123456789)
 41 | N = 10
 42 | a = np.random.rand(N, N)
 43 | m = np.triu(a)
 44 | MakeMatrixFig("imgs/rg_upper_triangular.pdf", m)
 45 | 
 46 | np.random.seed(123456789)
 47 | N = 10
 48 | a = np.random.rand(N)
 49 | m = np.diag(a)
 50 | MakeMatrixFig("imgs/rg_diagonal.pdf", m)
 51 | 
 52 | np.random.seed(123456789)
 53 | N = 10
 54 | m = sp.sparse.diags(
 55 |       [np.random.random(N-1),np.random.random(N),np.random.random(N-1)],
 56 |       [-1,0,1],
 57 |       shape=(N,N)
 58 |     ).todense()
 59 | MakeMatrixFig("imgs/rg_tridiagonal.pdf", m)
 60 | 
 61 | np.random.seed(123456789)
 62 | a     = np.random.rand(10,4)
 63 | u,s,v = np.linalg.svd(a, full_matrices=True)
 64 | s     = np.diag(s)
 65 | s     = np.pad(s, ((0,6),(0,6)), 'constant', constant_values=0)
 66 | v     = np.pad(v, ((0,6),(0,6)), 'constant', constant_values=0)
 67 | MakeMatrixFig("imgs/decomp_svd_a.pdf", a)
 68 | MakeMatrixFig("imgs/decomp_svd_u.pdf", u)
 69 | MakeMatrixFig("imgs/decomp_svd_s.pdf", s)
 70 | MakeMatrixFig("imgs/decomp_svd_v.pdf", np.transpose(v))
 71 | 
 72 | np.random.seed(123456789)
 73 | a     = np.random.rand(10,4)
 74 | u,s,v = np.linalg.svd(a, full_matrices=False)
 75 | s     = np.diag(s)
 76 | MakeMatrixFig("imgs/decomp_svd_a_compact.pdf", a)
 77 | MakeMatrixFig("imgs/decomp_svd_u_compact.pdf", u)
 78 | MakeMatrixFig("imgs/decomp_svd_s_compact.pdf", s)
 79 | MakeMatrixFig("imgs/decomp_svd_v_compact.pdf", np.transpose(v))
 80 | 
 81 | np.random.seed(123456789)
 82 | a    = np.random.rand(6,1)
 83 | b    = np.random.rand(10,1)
 84 | dyad = a@np.transpose(b)
 85 | MakeMatrixFig("imgs/rg_dyad.pdf", dyad)
 86 | 
 87 | np.random.seed(123456789)
 88 | m = ortho_group.rvs(dim=10)
 89 | MakeMatrixFig("imgs/rg_orthogonal.pdf", m)
 90 | 
 91 | 
 92 | np.random.seed(123456787)
 93 | cols = np.random.rand(9)
 94 | rows = np.random.rand(10)
 95 | m    = sp.linalg.toeplitz(cols,rows)
 96 | MakeMatrixFig("imgs/rg_toeplitz.pdf", m)
 97 | 
 98 | 
 99 | np.random.seed(123456787)
100 | h         = np.random.rand(5)
101 | h         = h/np.linalg.norm(h)
102 | padding   = np.zeros(h.shape[0] - 1, h.dtype)
103 | first_col = np.r_[h, padding]
104 | first_row = np.r_[h[0], padding]
105 | H         = sp.linalg.toeplitz(first_col, first_row)
106 | MakeMatrixFig("imgs/rg_toeplitz_1d_conv.pdf", H)
107 | 
108 | 
109 | a  = np.array(range(10))
110 | a  = np.random.permutation(a)
111 | pm = np.zeros((10,10))
112 | pm[np.arange(10), a] = 1
113 | MakeMatrixFig("imgs/rg_permutation_matrix.pdf", pm)
114 | 
115 | 
116 | np.random.seed(123456789)
117 | a   = np.random.rand(10,10)
118 | q,r = np.linalg.qr(a, mode='complete')
119 | MakeMatrixFig("imgs/decomp_qr_a.pdf", a)
120 | MakeMatrixFig("imgs/decomp_qr_q.pdf", q)
121 | MakeMatrixFig("imgs/decomp_qr_r.pdf", r)
122 | 
123 | 
124 | 
125 | np.random.seed(123456789)
126 | a = np.random.rand(10,10)
127 | a = a@np.transpose(a)
128 | L = np.linalg.cholesky(a)
129 | MakeMatrixFig("imgs/decomp_cholesky_a.pdf", a)
130 | MakeMatrixFig("imgs/decomp_cholesky_L.pdf", L)
131 | MakeMatrixFig("imgs/decomp_cholesky_LT.pdf", np.transpose(L))
132 | 
133 | 
134 | 
135 | np.random.seed(123456789)
136 | a           = np.random.rand(10,10)
137 | a           = a@np.transpose(a)
138 | lu, d, perm = sp.linalg.ldl(a)
139 | MakeMatrixFig("imgs/decomp_ldlt_a.pdf", a)
140 | MakeMatrixFig("imgs/decomp_ldlt_L.pdf", lu)
141 | MakeMatrixFig("imgs/decomp_ldlt_D.pdf", d)
142 | MakeMatrixFig("imgs/decomp_ldlt_LT.pdf", np.transpose(lu))


--------------------------------------------------------------------------------
/src/updates.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Updates}
  2 | 
  3 | %TODO
  4 | % \section{Low-Rank Updates to $Ax=b$}
  5 | 
  6 | % Given nonsingular $\mA\in\sRnn$ and $\vu,\vv\in\sRn$ with $1+\vv^T\mA^{-1}\vu\ne0$, if we have solved $\mA\vx=\vb$, then we can quickly find $(\mA+\vu\vv^T)\bar \vx = \vb$. Namely,
  7 | 
  8 | \section{Woodbury Identity (rank-$k$ update to inverse)}
  9 | \label{sec:update:woodbury}
 10 | 
 11 | %TODO: Add material from "ON DERIVING THE INVERSE OF A SUM OF MATRICES"
 12 | %TODO: Add material from "Generalization of the matrix inversion lemma"
 13 | 
 14 | The inverse of a rank-$k$ update of some matrix $\mA$ can be computed by doing a rank-$k$ update of $\mAi$.
 15 | 
 16 | \begin{align}
 17 | %(\mA + \mC\mB\mC^T)^{-1} = \mAi - \mAi\mC(\mBi + \mC^T\mAi\mC)^{-1}\mC^T\mAi %TODO: What is this good for?
 18 | (\mA+\mU\mC\mV)^{-1} &= \mAi - \mAi\mU(\mBi+\mV\mAi\mU)^{-1}\mV\mAi \\
 19 | \end{align} %TODO
 20 | where $\mA\in\sRnn$, $\mC\in\sRkk$, $\mU\in\sRnk$, $\mV\in\sRkn$, and $\mA$ and $\mC$ non-singular.
 21 | 
 22 | If $\mU$ and $\mV$ are vectors, then the Woodbury Identity reduces to the Sherman--Morrison formula (\autoref{sec:update:sherman}).
 23 | 
 24 | If $\mP,\mR$ are positive definite and $\mP\in\sRnn$, $\mR\in\sRkk$, and $\mB\in\sRkn$, then
 25 | \begin{align}
 26 | (\mPi + \mB^T\mRi\mB)^{-1} &= \mP - \mP\mB^T(\mB\mP\mB^T+\mR)^{-1}\mB\mP  \eqcite{WellingXXXX} \\
 27 | (\mPi + \mB^T\mRi\mB)^{-1} \mB^T\mRi &= \mP\mB^T(\mB\mP\mB^T+\mR)^{-1}    \eqcite{WellingXXXX}
 28 | \end{align}
 29 | 
 30 | \section{Sherman--Morrison Formula (rank-1 update to inverse)}
 31 | \label{sec:update:sherman}
 32 | The inverse of a rank-1 update of some matrix $\mA$ can be computed by doing a rank-1 update of $\mAi$.
 33 | \begin{equation}
 34 | (\mA + \vu\vv^T)^{-1}=\mAi-\frac{\mAi\vu\vv^T\mAi}{1+\vv^T\mAi\vu}
 35 | \end{equation}
 36 | This is a special case of the Woodbury Identity (\autoref{sec:update:woodbury}).
 37 | 
 38 | 
 39 | \section{Removing a row from $\mA^T\mA$ ($\mA^T\mA\rightarrow \mA_{\bs i}^T\mA_{\bs i}$)}
 40 | 
 41 | \textbf{Plain English:} Matrix times its transpose after eliminating row $i$ from the matrix
 42 | 
 43 | \textbf{Inputs:} $\mA\in\sR^{k,m},\vu\in\sRm,\vv\in\sRn$ and $i$, the row to remove from $\mA$
 44 | 
 45 | \textbf{Reduces to:} $\mC\in\sR^{k,l}$
 46 | 
 47 | \textbf{Algorithm:}
 48 | 
 49 | % Recall that
 50 | % \begin{equation}
 51 | % (\mA\mB)_{kl} = \sum_m \mA_{km}\mB_{ml}~~~\mA\in\sR^{k,m},\mB\in\sR^{m,l}
 52 | % \end{equation}
 53 | % then we have that
 54 | % \begin{equation}
 55 | % (\mA^T\mA)_{kl} = \sum_{m} \mA_{mk}\mA_{ml}=\sum_{m\ne i} \mA_{mk}\mA_{ml} + \mA_{jk}\mA_{jl}=\sum_{m\ne i} \mA_{mk}\mA_{ml} + (\mA_{k*})_{j} (\mA_{l*})_{j}
 56 | % \end{equation}
 57 | % where $(\mA_k*)_{j}$ is the $j$th element of the $k$th column of $\mA$. %TODO
 58 | 
 59 | % Thus,
 60 | \begin{align}
 61 | \mA_{\bs i}^T\mA_{\bs i} &= \mA^T\mA-\mA_{*i}\mA_{*i}^T \\
 62 | \noalign{Similarly:}
 63 | \mA_{\bs i}^T  y_{\bs i} &= \mA^Ty  -\mA_{*i}y_i^T
 64 | \end{align}
 65 | 
 66 | 
 67 | \section{$\mathbf{1}_r^T \mA \mathbf{1}_c$}
 68 | 
 69 | \textbf{Plain English:} The sum of the elements of the matrix.
 70 | 
 71 | \textbf{Reduces to:} Scalar
 72 | 
 73 | \textbf{Notation:} For $\mA \in \mathbb{R}^{r\times c}$, $\mathbf{1}_r$ is in $\mathbb{R}^{r \times 1}$ and $\mathbf{1}_c$ is in $\mathbb{R}^{c \times 1}$.
 74 | 
 75 | \textbf{Algorithm:} Traverse all the element of the matrix in order keeping track of the sum. For applications where accuracy is important and the matrices have a large dynamic range, Kahan summation or a similar technique should be used.
 76 | 
 77 | \textbf{Update Algorithm:} If an entry changes, subtract its old value from the sum and add its new value to the sum.
 78 | 
 79 | \section{$\mathbf{e}_i \mA \mathbf{e}_j$}
 80 | 
 81 | \textbf{Plain English:} Extract element $\mA_{ij}$ from the matrix
 82 | 
 83 | \textbf{Reduces to:} Scalar
 84 | 
 85 | \textbf{Notation:} TODO
 86 | 
 87 | \textbf{Algorithm:} TODO
 88 | 
 89 | \textbf{Update Algorithm:} TODO
 90 | 
 91 | 
 92 | \section{$\vx^T \mA \vx$}
 93 | 
 94 | \textbf{Plain English:} TODO
 95 | 
 96 | \textbf{Reduces to}: Scalar
 97 | 
 98 | \textbf{Notation:} $\mA$ must be in $\mathbb{R}^{i\times i}$. $\vx$ is in $\mathbb{R}^{i \times 1}$.
 99 | 
100 | \textbf{Algorithm:} TODO
101 | 
102 | \textbf{Update Algorithm:} We make use of the identity $(\vx^T \mA \vx)=\sum_{i,j}\big((\vx \vx^T)\circ\mA\big)$. If an entry $\mA_{i,j}$ in the matrix changes subtract its old value $\vx_i \vx_j \mA_{ij}$ and add the new value $\vx_i \vx_j \mA_{ij}'$. If an entry $\vx_i$ changes TODO.
103 | 


--------------------------------------------------------------------------------
/src/algorithmics.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Algorithmics}
  2 | 
  3 | \section{Time Complexities}
  4 | \begin{center}
  5 | {\footnotesize\renewcommand{\arraystretch}{1.2}
  6 | \begin{tabular}{p{1.5cm}p{3cm}p{3cm}p{4cm}p{1cm}}
  7 | \textbf{Operation}             & \textbf{Input}                   & \textbf{Output}    & \textbf{Algorithm}                     & \textbf{Time}   \\ \hline
  8 | Matmult
  9 |     & $\mA,\mB\in n\times n$
 10 |     & $n \times n$
 11 |     & Schoolbook
 12 |     & $O(n^3)$
 13 |     \\ \hline
 14 | 
 15 |     &
 16 |     &
 17 |     & Strassen~\citep{Strassen1969}
 18 |     & $O(n^{2.807})$
 19 |     \\ \hline
 20 | 
 21 |     &
 22 |     &
 23 |     & Best
 24 |     & $O(n^\omega)$
 25 |     \\ \hline
 26 | Matmult
 27 |     & $\mA\in n\times m, \mB\in m\times p$
 28 |     & $n \times p$
 29 |     & Schoolbook
 30 |     & $O(nmp)$
 31 |     \\ \hline
 32 | Inversion
 33 |     & $\mA\in n\times n$
 34 |     & $n \times n$
 35 |     & Gauss--Jordan elimination
 36 |     & $O(n^3)$
 37 |     \\ \hline
 38 | 
 39 |     &
 40 |     &
 41 |     & Strassen~\citep{Strassen1969}
 42 |     & $O(n^{2.807})$
 43 |     \\ \hline
 44 | 
 45 |     &
 46 |     &
 47 |     & Best
 48 |     & $O(n^\omega)$
 49 |     \\ \hline
 50 | SVD
 51 |     & $\mA\in m\times n$
 52 |     & $m\times m, m\times n, n\times n$ \newline $m\times r, r\times r, n\times r$
 53 |     &
 54 |     & $O(mn^2)$ \newline \hbox{$(m\ge n)$} \\ \hline
 55 | Determinant
 56 |     & $\mA\in n\times n$
 57 |     & Scalar
 58 |     & Laplace expansion
 59 |     & $O(n!)$         \\ \hline
 60 | 
 61 |     &
 62 |     &
 63 |     & Division-free~\citep{Rote2001}
 64 |     & $O(n!)$
 65 |     \\ \hline
 66 | 
 67 |     &
 68 |     &
 69 |     & LU decomposition
 70 |     & $O(n^3)$
 71 |     \\ \hline
 72 | 
 73 |     &
 74 |     &
 75 |     & Integer preserving~\citep{Bareiss1968}
 76 |     & $O(n^3)$
 77 |     \\ \hline
 78 | Back \newline substitution
 79 |     & $\mA$ triangular
 80 |     & $n$ solutions
 81 |     & Back substitution
 82 |     & $O(n^2)$
 83 |     \\ \hline
 84 | \end{tabular}
 85 | }
 86 | \end{center}
 87 | 
 88 | \section{The $\omega$ Exponent}
 89 | 
 90 | The lower bound on matmult time complexity is $O(n^\omega)$, where $\omega$ is an unknown constant bounded by $2\le\omega\le2.3728596$ (\autoref{tbl:omega-vals} lists the known upper bound on $\omega$ over time). Algorithms achieving lower values of $\omega$ tend to be less efficient in practice for all but the largest matrices. Of the algorithms with times of less than $O(n^3)$, only the Strassen algorithm~\citep{Strassen1969} has seen serious attempts at optimized implementation. Most matmult implementations use highly optimized variants of the standard $O(n^3)$ algorithm. At this point, memory and bus speeds dominate the performance of implementations, so simple Big-O notation cannot be used to reliably compare matmult performances.
 91 | 
 92 | The time complexity for solving sparse linear systems was bounded by $\omega$ until recently, when randomized methods were used to obtain a bound of $O(n^{2.331645})$~\citep{Peng2021}.
 93 | 
 94 | \begin{table}
 95 | \centering
 96 | \begin{tabular}{lll}
 97 | \textbf{Name}           & \textbf{Year} & $\omega$  \\ \hline
 98 | Standard                & -             & 3         \\
 99 | \citet{Strassen1969}    & 1969          & 2.807     \\
100 | \citet{Pan1978}         & 1978          & 2.796     \\
101 | \citet{Bini1979}        & 1979          & 2.78      \\
102 | \citet{Schonhage1981}   & 1981          & 2.548     \\
103 | \citet{Schonhage1981}   & 1981          & 2.522     \\
104 | \citet{Romani1982}      & 1982          & 2.517     \\
105 | \citet{Coppersmith1982} & 1982          & 2.496     \\
106 | \citet{Strassen1986}    & 1986          & 2.479     \\
107 | \citet{Copper1990}      & 1990          & 2.376     \\
108 | \citet{Williams2012}    & 2012          & 2.37293   \\
109 | \citet{Williams2012}    & 2012          & 2.37287\footnote{The original conference paper gave a bound of $\omega<2.3727$, but omitted some necessary constraints. This corrected value appears in the final paper.}   \\
110 | \citet{LeGall2014}      & 2014          & 2.3728642 \\
111 | \citet{LeGall2014}      & 2014          & 2.3728640 \\
112 | \citet{LeGall2014}      & 2014          & 2.3728639 \\
113 | \citet{Alman2020}       & 2020          & 2.3728596
114 | \end{tabular}
115 | \caption{Upper bounds on the value of $\omega$ over time \label{tbl:omega-vals}}
116 | \end{table}
117 | 
118 | 
119 | %\section{Gram-Schmidt}
120 | %TODO
121 | % Consider the [[Gram–Schmidt process]] applied to the columns of the full column rank matrix <math>A=[\mathbf{a}_1, \cdots, \mathbf{a}_n]</math>, with [[inner product]] <math>\langle\mathbf{v},\mathbf{w}\rangle = \mathbf{v}^\top \mathbf{w}</math> (or <math>\langle\mathbf{v},\mathbf{w}\rangle = \mathbf{v}^* \mathbf{w}</math> for the complex case).
122 | 
123 | % Define the [[Vector projection|projection]]:
124 | % :<math>\mathrm{proj}_{\mathbf{u}}\mathbf{a}
125 | % = \frac{\left\langle\mathbf{u},\mathbf{a}\right\rangle}{\left\langle\mathbf{u},\mathbf{u}\right\rangle}{\mathbf{u}}
126 | % </math>
127 | % then:
128 | % :<math>
129 | % \begin{align}
130 | %  \mathbf{u}_1 &= \mathbf{a}_1,
131 | %   & \mathbf{e}_1 &= {\mathbf{u}_1 \over \|\mathbf{u}_1\|} \\
132 | %  \mathbf{u}_2 &= \mathbf{a}_2-\mathrm{proj}_{\mathbf{u}_1}\,\mathbf{a}_2,
133 | %   & \mathbf{e}_2 &= {\mathbf{u}_2 \over \|\mathbf{u}_2\|} \\
134 | %  \mathbf{u}_3 &= \mathbf{a}_3-\mathrm{proj}_{\mathbf{u}_1}\,\mathbf{a}_3-\mathrm{proj}_{\mathbf{u}_2}\,\mathbf{a}_3,
135 | %   & \mathbf{e}_3 &= {\mathbf{u}_3 \over \|\mathbf{u}_3\|} \\
136 | %  & \vdots &&\vdots \\
137 | %  \mathbf{u}_k &= \mathbf{a}_k-\sum_{j=1}^{k-1}\mathrm{proj}_{\mathbf{u}_j}\,\mathbf{a}_k,
138 | %   &\mathbf{e}_k &= {\mathbf{u}_k\over\|\mathbf{u}_k\|}
139 | % \end{align}
140 | % </math>
141 | 
142 | % We can now express the <math>\mathbf{a}_i</math>s over our newly computed orthonormal basis:
143 | 
144 | % :<math>
145 | % \begin{align}
146 | %  \mathbf{a}_1 &= \langle\mathbf{e}_1,\mathbf{a}_1 \rangle \mathbf{e}_1  \\
147 | %  \mathbf{a}_2 &= \langle\mathbf{e}_1,\mathbf{a}_2 \rangle \mathbf{e}_1
148 | %   + \langle\mathbf{e}_2,\mathbf{a}_2 \rangle \mathbf{e}_2 \\
149 | %  \mathbf{a}_3 &= \langle\mathbf{e}_1,\mathbf{a}_3 \rangle \mathbf{e}_1
150 | %   + \langle\mathbf{e}_2,\mathbf{a}_3 \rangle \mathbf{e}_2
151 | %   + \langle\mathbf{e}_3,\mathbf{a}_3 \rangle \mathbf{e}_3 \\
152 | %  &\vdots \\
153 | %  \mathbf{a}_k &= \sum_{j=1}^{k} \langle \mathbf{e}_j, \mathbf{a}_k \rangle \mathbf{e}_j
154 | % \end{align}
155 | % </math>
156 | % where <math>\langle\mathbf{e}_i,\mathbf{a}_i \rangle = \|\mathbf{u}_i\|</math>. This can be written in matrix form:
157 | % :<math> A = Q R </math>
158 | % where:
159 | % :<math>Q = \left[ \mathbf{e}_1, \cdots, \mathbf{e}_n\right] </math>
160 | % and
161 | % :<math>
162 | % R = \begin{pmatrix}
163 | % \langle\mathbf{e}_1,\mathbf{a}_1\rangle & \langle\mathbf{e}_1,\mathbf{a}_2\rangle &  \langle\mathbf{e}_1,\mathbf{a}_3\rangle  & \ldots \\
164 | % 0                & \langle\mathbf{e}_2,\mathbf{a}_2\rangle                        &  \langle\mathbf{e}_2,\mathbf{a}_3\rangle  & \ldots \\
165 | % 0                & 0                                       & \langle\mathbf{e}_3,\mathbf{a}_3\rangle                          & \ldots \\
166 | % \vdots           & \vdots                                  & \vdots                                    & \ddots \end{pmatrix}.</math>
167 | 


--------------------------------------------------------------------------------
/src/matrix_forensics.tex:
--------------------------------------------------------------------------------
  1 | \documentclass{book}
  2 | 
  3 | %Post to: https://stats.stackexchange.com/questions/21346/reference-book-for-linear-algebra-applied-to-statistics
  4 | 
  5 | \usepackage[top=1in, bottom=1.25in, left=1.25in, right=1.25in]{geometry}
  6 | 
  7 | \usepackage{amsfonts, amsmath}
  8 | \usepackage{commath}
  9 | \usepackage[yyyymmdd,hhmmss]{datetime}
 10 | \usepackage{graphbox}
 11 | \usepackage[hidelinks]{hyperref}
 12 | \usepackage{marginnote}
 13 | \usepackage{mathtools}
 14 | \usepackage{parskip}
 15 | \usepackage{titlesec}
 16 | \usepackage{xcolor}
 17 | \usepackage{optidef}
 18 | 
 19 | \usepackage{cellspace}%
 20 | \setlength\cellspacetoplimit{3pt}
 21 | \setlength\cellspacebottomlimit{3pt}
 22 | 
 23 | \usepackage{makeidx}
 24 | \makeindex
 25 | 
 26 | \usepackage[numbers,sort&compress]{natbib}
 27 | \bibliographystyle{unsrtnat}
 28 | 
 29 | %Make equations be numbered continuously through book
 30 | \usepackage{chngcntr}
 31 | \counterwithout{equation}{chapter}
 32 | 
 33 | \renewcommand{\sectionautorefname}{\textsection}
 34 | \renewcommand{\subsectionautorefname}{\textsection}
 35 | \renewcommand{\subsubsectionautorefname}{\textsection}
 36 | 
 37 | \input{z_math_commands}
 38 | 
 39 | \hypersetup{
 40 |   pdfauthor={Richard Barnes (ORCID: 0000-0002-0204-6040)},%
 41 |   pdftitle={Matrix Forensics},%
 42 | %            pdfsubject={Whatever},%
 43 |   pdfkeywords = {matrix algebra, matrix relations, matrix identities, linear algebra},%
 44 |   pdfproducer = {LaTeX},%
 45 |   pdfcreator  = {pdfLaTeX}
 46 | }
 47 | 
 48 | 
 49 | \usepackage{fancyhdr}
 50 | % \renewcommand{\chaptermark}[1]{\markboth{#1}{#1}}
 51 | \setlength{\headheight}{15.2pt}
 52 | \pagestyle{fancy}
 53 | 
 54 | \lhead[\thepage]{\leftmark}
 55 | % \chead[]{<odd output>}
 56 | \rhead[\leftmark]{\thepage}
 57 | 
 58 | \renewcommand{\footrulewidth}{0.4pt}% default is 0pt
 59 | \lfoot[\footnotesize{Richard Barnes. Matrix Forensics. \today-\currenttime. \href{https://github.com/r-barnes/MatrixForensics}{github.com/r-barnes/MatrixForensics}}. \input{/tmp/matrix_forensics_version.info}\!\!.]{\footnotesize{Richard Barnes. Matrix Forensics. \today-\currenttime. \href{https://github.com/r-barnes/MatrixForensics}{github.com/r-barnes/MatrixForensics}}. \input{/tmp/matrix_forensics_version.info}\!\!.} %  [<even output>]{<odd output>}
 60 | \cfoot[]{}
 61 | \rfoot[]{}
 62 | 
 63 | 
 64 | \newcommand{\eqcite}[1]{\marginnote{\citep{#1}}}
 65 | 
 66 | %Adjust chapter formatting
 67 | \newcommand{\hsp}{\hspace{20pt}}
 68 | \definecolor{gray75}{gray}{0.75}
 69 | \titleformat{\chapter}[hang]{\Huge\bfseries}{\thechapter\hsp\textcolor{gray75}{$|$}\hsp}{0pt}{\Huge\bfseries}
 70 | \titlespacing*{\chapter}{0pt}{0pt}{20pt} %? BEFORE AFTER
 71 | 
 72 | %Ensure chapters start on the same page
 73 | \usepackage{etoolbox}
 74 | \makeatletter
 75 | \patchcmd{\chapter}{\if@openright\cleardoublepage\else\clearpage\fi}{\clearpage}{}{}
 76 | \makeatother
 77 | 
 78 | 
 79 | \begin{document}
 80 | 
 81 | \input{title}
 82 | 
 83 | 
 84 | \tableofcontents
 85 | 
 86 | \input{introduction}
 87 | 
 88 | \input{nomenclature}
 89 | 
 90 | \input{basics}
 91 | 
 92 | \input{derivatives}
 93 | 
 94 | \input{rogue_gallery}
 95 | 
 96 | \input{decompositions}
 97 | 
 98 | \input{eigenvalues}
 99 | 
100 | \input{norms}
101 | 
102 | 
103 | 
104 | \chapter{Bounds} %TODO: Reorganize
105 | 
106 | \section{Matrix Gain}
107 | \begin{equation}
108 | \lambda_\textrm{min}(\mA^T\mA)\le \frac{\norm{\mA\vx}_2^2}{\norm{\vx}_2^2}\le\lambda_\textrm{max}(\mA^T\mA)
109 | \end{equation}
110 | 
111 | \begin{equation}
112 | \max_{\vx\ne0} \frac{\norm{\mA\vx}_2}{\norm{\vx}_2}=\norm{\mA}_2=\sqrt{\lambda_\textrm{max}(\mA^T\mA)}\implies\vx=u_1
113 | \end{equation}
114 | 
115 | \begin{equation}
116 | \min_{\vx\ne0} \frac{\norm{\mA\vx}_2}{\norm{\vx}_2}=\sqrt{\lambda_\textrm{min}(\mA^T\mA)}\implies\vx=u_n
117 | \end{equation}
118 | 
119 | \section{Rayleigh quotients}
120 | The Rayleigh quotient of $\mA\in\sSn$ is given by
121 | \begin{equation}
122 | \frac{\vx^T \mA \vx}{\vx^T\vx}~~\vx\ne0
123 | \end{equation}
124 | 
125 | \begin{equation}
126 | \lambda_\textrm{min}(\mA)\le \frac{\vx^T \mA \vx}{\vx^T\vx} \le \lambda_\textrm{max}(\mA)~~\vx\ne0
127 | \end{equation}
128 | 
129 | \begin{align}
130 | \lambda_\textrm{max}(A)&=\max_{\vx: \norm{\vx}_2=1} \vx^T\mA\vx=u_1 \\
131 | \lambda_\textrm{min}(A)&=\min_{\vx: \norm{\vx}_2=1} \vx^T\mA\vx=u_n
132 | \end{align}
133 | where $u_1$ and $u_n$ are the eigenvectors associated with $\lambda_\textrm{max}$ and $\lambda_\textrm{min}$, respectively.
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | \chapter{Equations}
142 | 
143 | \section{Linear Equations}
144 | The linear equation $\mA\vx=\vy$ with $\mA\in\sRmn$ admits a solution iff $\rank([\mA \vy])=\rank(\mA)$. If this is satisfied, the set of all solutions is an affine set $\mathcal{S}=\{\vx=\bar \vx+z: z\in\ns(\mA)\}$ where $\bar \vx$ is any vector such that $\mA\bar\vx=\vy$. The solution is unique if $\ns(\mA)=\{0\}$.
145 | 
146 | $\mA\vx=\vy$ is \textit{overdetermined} if it is tall/skinny ($m>n$); that is, if there are more equations than unknowns. If $\rank(\mA)=n$ then $\dim\ns(\mA)=0$, so there is either no solution or one solution. Overdetermined systems often have no solution ($\vy\notin\range(\mA)$), so an approximate solution is necessary. See \autoref{sec:least-squares}.
147 | 
148 | $\mA\vx=\vy$ is \textit{underdetermined} if it is short/wide ($n>m$); that is, if has more unknowns than equations. If $\rank(\mA)=m$ then $\range(\mA)=\sRm$, so $\dim\ns(\mA)=n-m>0$, so the set of solutions is infinite. Therefore, finding a single solution that optimizes some quantity is of interest.
149 | 
150 | $\mA\vx=\vy$ is \textit{square} if $n=m$. If $\mA$ is invertible, then the equations have the unique solution $\vx=\mA^{-1}\vy$. See \autoref{sec:minimum-norm}.
151 | 
152 | \section{Least-Squares}
153 | \label{sec:least-squares}
154 | For an overdetermined system we wish to find:
155 | \begin{equation}
156 | \min_\vx \norm{\mA\vx-\vy}_2^2
157 | \end{equation}
158 | Since $\mA\vx\in\range(\mA)$, we need a point $\tilde \vy = \mA\vx^*\in\range(\mA)$ closest to $\vy$. This point lies in the nullspace of $\mA^T$, so we have $\mA^T(\vy-\mA\vx^*)=0$. There is always a solution to this problem and, if $\rank(\mA)=n$, it is unique~\citep[p.\ 161]{Calafiore2014}
159 | \begin{equation}
160 | \vx^*=(\mA^T\mA)^{-1}\mA^T\vy
161 | \end{equation} %TODO: Check
162 | 
163 | \subsection{Regularized least-squares with low-rank data}
164 | 
165 | For $\mA\in\sRmn$, $\vy\in\sRm$, $\lambda\ge0$, the regularized least-squares problem
166 | \begin{equation}
167 | \textrm{argmin}_\vx \norm{\mA\vx-\vy}_2^2 + \lambda\norm{\vx}_2^2
168 | \end{equation}
169 | has a closed form solution
170 | \begin{equation}
171 | \label{equ:regularized_least_squares}
172 | \vx = (\mA^T\mA   + \lambda \mI)^{-1}\mA^T\vy
173 | \end{equation}
174 | However, if $\mA$ has a $\rank{r}\ll\min(n,m)$ and a known low-rank decomposition $\mA=\mL\mR^T$ with $\mL\in\mathbb{R}^{m,r}$ and $\mR\in\mathbb{R}^{n,r}$, then we can rewrite \autoref{equ:regularized_least_squares} as
175 | \begin{equation}
176 | \vx = (\mR^T \mR\mL^T \mL   + \lambda \mI)^{-1}\mL^T\vy
177 | \end{equation}
178 | This decreases the time complexity from $O(mn^2 + n^\omega)$ to $O(nr^2+mr^2)$.
179 | 
180 | \section{Minimum Norm Solutions}
181 | \label{sec:minimum-norm}
182 | For undertermined systems in which $\mA\in\sRmn$ with $m<n$. We wish to find
183 | \begin{equation}
184 | \min_{\vx: \mA\vx=\vy} \norm{\vx}_2
185 | \end{equation}
186 | The solution $\vx^*$ must be orthogonal to $\ns(\mA)$, so $\vx^*\in\range(\mA^T)$, so $\vx^*=\mA^Tc$ for some $c$. Substituting into $\mA\vx=\vy$ gives $\mA \mA^T c=\vy$, therefore~\citep[p.\ 162]{Calafiore2014}:
187 | \begin{equation}
188 | \vx^*=\mA^T(\mA\mA^T)^{-1}\vy
189 | \end{equation}
190 | 
191 | \section{The Sylvester Equation: $\mA\mX+\mX^T\mB=\mC$}
192 | The equation
193 | \begin{equation}
194 | \mA\mX+\mX^T\mB=\mC
195 | \end{equation}
196 | is called a T-Sylvester equation, or *-Sylvester equation in the complex case. It can be solved using methods from, e.g.:~\citet{Teran2011,Teran2019,Dopico2016}.
197 | 
198 | 
199 | \input{updates}
200 | 
201 | \input{optimization}
202 | 
203 | \input{algorithmics}
204 | 
205 | 
206 | \bibliography{refs}
207 | 
208 | \printindex
209 | 
210 | \end{document}
211 | 
212 | TODO:
213 | Orthogonal matrix = all eigenvalues are 1 or -1
214 | 
215 | Centering matrix
216 | Distance matrix
217 | 
218 | 
219 | 
220 | For two vectors $b$ and $x$, $p=\frac{b^Tx}{b^Tb}b$ is the projection of $x$ onto $b$.
221 | 
222 | TODO: Gilber Strang (2016, p563: Matrix Factorizations)
223 | 
224 | TODO: Strang 2016, p. 583, List of symbols and computer codes
225 | 
226 | 
227 | TODO: Add Gram-Schmidt procedure
228 | TODO: Add computational efficiency notes for QR
229 | 
230 | 
231 | Highlighting matrix example: http://www.texample.net/tikz/examples/highlighting-matrix/


--------------------------------------------------------------------------------
/src/decompositions.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Matrix Decompositions}
  2 | 
  3 | \section{LLT/UTU: Cholesky Decomposition} %TODO: Add UU to TOC
  4 | 
  5 | \begin{center}
  6 | \includegraphics[align=c,height=1in]{imgs/decomp_cholesky_a.pdf}\textbf{\large =}
  7 | \includegraphics[align=c,height=1in]{imgs/decomp_cholesky_L.pdf}\textbf{\large *}
  8 | \includegraphics[align=c,height=1in]{imgs/decomp_cholesky_LT.pdf}
  9 | \end{center}
 10 | 
 11 | If $\mA$ is symmetric, positive definite, square, then
 12 | \begin{equation}
 13 | \mA=\mU^T\mU=\mL\mL^T
 14 | \end{equation}
 15 | where $\mU$ is a unique upper triangular matrix and $\mL$ is a unique lower-triangular matrix.
 16 | 
 17 | \section{LDL Decomposition}
 18 | 
 19 | \begin{center}
 20 | \includegraphics[align=c,height=1in]{imgs/decomp_ldlt_a.pdf}\textbf{\large =}
 21 | \includegraphics[align=c,height=1in]{imgs/decomp_ldlt_L.pdf}\textbf{\large *}
 22 | \includegraphics[align=c,height=1in]{imgs/decomp_ldlt_D.pdf}\textbf{\large *}
 23 | \includegraphics[align=c,height=1in]{imgs/decomp_ldlt_LT.pdf}
 24 | \end{center}
 25 | 
 26 | This is a special case of the LDM decomposition.\footnote{TODO: Crossreference}
 27 | If $\mA$ is a non-singular symmetric definite square matrix, then
 28 | \begin{equation}
 29 | \mA=\mL\mD\mL^T=\mL^T\mD\mL
 30 | \end{equation}
 31 | where $\mL$ is a unit lower triangular matrix and $\mD$ is a diagonal matrix. If $\mA\ispd0$, then $\mD_{ii}>0$.
 32 | 
 33 | % For $\mA\in\sRnn$,
 34 | % \begin{align}
 35 | % \mA&\ispsd0\iff \exists\mB\ispsd0: \mA=\mB^2 \\
 36 | % \mA&\ispd0 \iff \exists\mB\ispd 0: \mA=\mB^2
 37 | % \end{align}
 38 | % where $\mB$ is called the ``matrix square-root" of $\mA$.
 39 | 
 40 | % For $\mA\ispsd0$, we can use the spectral factorization $\mA=\mU\mD\mU^T$ and take $\mD^{1/2}=\diag(\sqrt{\lambda_1},\ldots,\sqrt{\lambda_n})$ to get $\mB=\mU\mD^{1/2}\mU^T$.
 41 | 
 42 | 
 43 | \section{PCA: Principle Components Analysis}
 44 | Find normalized directions in data space such that the variance of the projections of the centered data points is maximal. For centered data $\tilde \mX$, the mean-square variation of data along a vector $\vx$ is $\vx^T \tilde \mX \tilde \mX^T \vx$.
 45 | \begin{equation}
 46 | \max_{\vx\in\sRn,~\norm{\vx}_2=1} \vx^T \tilde \mX \tilde \mX^T \vx
 47 | \end{equation}
 48 | Taking an SVD of $\tilde \mX \tilde \mX^T$ gives $H=\mU_r\mD^2\mU^T$, which is maximized by taking $\vx=\vu_1$. By repeatedly removing the first principal components and recalculating, all the principal axes can be found.
 49 | 
 50 | 
 51 | 
 52 | 
 53 | \section{QR: Orthogonal-triangular}
 54 | 
 55 | \begin{center}
 56 | \includegraphics[align=c,height=1in]{imgs/decomp_qr_a.pdf}\textbf{\large =}
 57 | \includegraphics[align=c,height=1in]{imgs/decomp_qr_q.pdf}\textbf{\large *}
 58 | \includegraphics[align=c,height=1in]{imgs/decomp_qr_r.pdf}
 59 | \end{center}
 60 | 
 61 | For $\mA\in\sRnn$, $\mA=\mQ\mR$ where $\mQ$ is orthogonal and $\mR$ is an upper triangular matrix. If $\mA$ is non-singular, then $\mQ$ and $\mR$ are uniquely defined if $\diag(\mR)$ are imposed to be positive.
 62 | 
 63 | \subsection*{Algorithms}
 64 | 
 65 | Gram-Schmidt.
 66 | 
 67 | 
 68 | 
 69 | 
 70 | \section{SVD: Singular Value Decomposition}
 71 | 
 72 | \begin{center}
 73 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_a.pdf}\textbf{\large =}
 74 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_u.pdf}\textbf{\large *}
 75 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_s.pdf}\textbf{\large *}
 76 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_v.pdf}
 77 | \end{center}
 78 | 
 79 | \begin{center}
 80 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_a_compact.pdf}\textbf{\large =}
 81 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_u_compact.pdf}\textbf{\large *}
 82 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_s_compact.pdf}\textbf{\large *}
 83 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_v_compact.pdf}
 84 | \end{center}
 85 | 
 86 | Any matrix $\mA\in\sRmn$ can be written as
 87 | \begin{equation}
 88 | \mA=\mU \mD \mV^T=\sum_{i=1}^r \sigma_i u_i v_i^T
 89 | \end{equation}
 90 | where
 91 | \begin{align}
 92 | \mU&=\textrm{eigenvectors of~}\mA\mA^T & \sRmm \\
 93 | \mD&=\diag(\sigma_i)=\sqrt{\diag(\eig(\mA\mA^T))}      & \sRmn \\
 94 | \mV&=\textrm{eigenvectors of~}\mA^T\mA & \sRnn
 95 | \end{align}
 96 | Let $\sigma_i$ be the non-zero singular values for $i=1,\ldots,r$ where $r$ is the rank of $\mA$; $\sigma_1\ge\ldots\ge\sigma_r$.
 97 | 
 98 | We also have that
 99 | \begin{align}
100 | \mA   \vv_i &= \sigma_i \vu_i \\
101 | \mA^T \vu_i &= \sigma_i \vv_i \\
102 | \mU^T\mU &= \mI \\
103 | \mV^T\mV &= \mI
104 | \end{align}
105 | 
106 | $\mD$ can be written in an expanded form:
107 | \begin{equation}
108 | \tilde \mD=
109 | \begin{bmatrix}
110 | \mD       & 0_{r,n-r}   \\
111 | 0_{m-r,r} & 0_{m-r,n-r}
112 | \end{bmatrix}
113 | \end{equation}
114 | The final $n-r$ columns of $\mV$ give an orthonormal basis spanning $\ns(\mA)$. An orthonormal basis spanning the range of $\mA$ is given by the first $r$ columns of $\mU$.
115 | 
116 | \begin{align}
117 | \norm{\mA}^2_F&=\textrm{Frobenius norm} =\trace(\mA^T\mA)=\sum_{i=1}^r \sigma_i^2 \\
118 | \norm{\mA}^2_2&=\sigma_1^2 \\
119 | \norm{\mA}_* &= \textrm{nuclear norm}=\sum_{i=1}^r \sigma_i
120 | \end{align}
121 | 
122 | The \textbf{condition number} $\kappa$ of an invertible matrix $\mA\in\sRnn$ is the ratio of the largest and smallest singular value. Matrices with large condition numbers are closer to being singular and more sensitive to changes.
123 | \begin{equation}
124 | \kappa(\mA)=\frac{\sigma_1}{\sigma_n}=\norm{A}_2 \cdot \norm{A^{-1}}_2
125 | \end{equation}
126 | 
127 | \subsection*{Low-Rank Approximation}
128 | Approximating $\mA\in\sRmn$ by a matrix $\mA_k$ of rank $k>0$ can be formulated as the optimization probem
129 | \begin{equation}
130 | \min_{\mA_k\in\sRmn} \norm{\mA-\mA_k}_F^2: \rank{\mA_k}=k, 1\le k \le \rank(\mA)
131 | \end{equation}
132 | The optimal solution of this problem is given by
133 | \begin{equation}
134 | \mA_k=\sum_{i=1}^k \sigma_i \vu_i \vv_i^T
135 | \end{equation}
136 | where
137 | \begin{align}
138 | \frac{\norm{\mA_k}_F^2}{\norm{\mA}_F^2}&=\frac{\sigma_1^2+\ldots+\sigma_k^2}{\sigma_1^2+\ldots+\sigma_r^2} \\
139 | 1-\frac{\norm{\mA_k}_F^2}{\norm{\mA}_F^2}&=\frac{\sigma_{k+1}^2+\ldots+\sigma_r^2}{\sigma_1^2+\ldots+\sigma_r^2}
140 | \end{align}
141 | is the fraction of the total variance in $\mA$ explained by the approximation $\mA_k$.
142 | 
143 | \subsection*{Range and Nullspace}
144 | \begin{align}
145 | \ns(\mA) &= \range(\mV_{nr})                      \\
146 | \ns(\mA)^\perp \equiv\range(\mA^T)&=\range(\mV_r) \\
147 | \range(\mA)&=\range(\mU_r)                        \\
148 | \range(\mA)^\perp\equiv\ns(\mA^T)&=\range(\mU_{nr})
149 | \end{align}
150 | where $\mV_r$ is the first $r$ columns of $V$ and $V_nr$ are the last $[r+1,n]$ columns; similarly for $\mU$.
151 | 
152 | 
153 | \subsection*{Projectors}
154 | The projection of $\vx$ onto $\ns(\mA)$ is $(\mV_{nr}\mV_{nr}^T)\vx$. Since $\mI_n=\mV_r\mV_r^T+\mV_{nr}\mV_{nr}^T$, $(\mI_n-\mV_{r}\mV_{r}^T)\vx$ also works. The projection of $\vx$ onto $\range(\mA)$ is $(\mU_r\mU_r^T)\vx$.
155 | 
156 | If $\mA\in\sRmn$ is full row rank ($\mA\mA^T\ispd0$), then the minimum distance to an affine set $\{x:\mA\vx=\vb\}, \vb\in\sRm$ is given by $\vx^*=\mA^T(\mA\mA^T)^{-1}\vb$. %TODO
157 | 
158 | If $\mA\in\sRmn$ is full column rank ($\mA^T\mA\ispd0$), then the minimum distance to an affine set $\{x:\mA\vx=\vb\}, \vb\in\sRm$ is given by $\vx^*=\mA(\mA^T\mA)^{-1}\mA^T\vb$. %TODO
159 | 
160 | 
161 | \subsection*{Computational Notes}
162 | A \textit{numerical rank} can be estimated for the matrix as the largest $k$ such that $\sigma_k>\epsilon \sigma_1$ for $\epsilon\ge0$.
163 | 
164 | 
165 | 
166 | \section{Eigenvalue Decomposition for Diagonalizable Matrices}
167 | 
168 | For a square, diagonalizable matrix $\mA\in\mathbb{R}^{n,n}$
169 | \begin{equation}
170 | \mA=U\Lambda U^{-1}
171 | \end{equation}
172 | where $U\in\mathbb{C}^{n,n}$ is an invertible matrix whose columns are the eigenvectors of $\mA$ and $\Lambda$ is a diagonal matrix containing the eigenvalues $\lambda_1,\ldots,\lambda_n$ of $\mA$ in the diagonal.
173 | 
174 | The columns $\vu_1,\ldots,\vu_n$ satisfy
175 | \begin{equation}
176 | \mA \vu_i=\lambda_i \vu_i~~i=1,\ldots,n
177 | \end{equation}
178 | 
179 | \section{Eigenvalue (Spectral) Decomposition for Symmetric Matrices}
180 | 
181 | A symmetric matrix $\mA\in\mathbb{R}^{n,n}$ can be factored as
182 | \begin{equation}
183 | \mA=U\Lambda U^T=\sum_i^n \lambda_i \vu_i \vu_i^T
184 | \end{equation}
185 | where $U\in\mathbb{R}^{n,n}$ is an orthogonal matrix whose columns $\vu_i$ are the eigenvectors of $\mA$ and $\Lambda$ is a diagonal matrix containing the eigenvalues $\lambda_1\ge\ldots\ge\lambda_n$ of $\mA$ in the diagonal. These eigenvalues are always real. The eigenvectors can always be chosen to be real and to form an orthonormal basis.
186 | 
187 | The columns $\vu_1,\ldots,\vu_n$ satisfy
188 | \begin{equation}
189 | \mA \vu_i=\lambda_i \vu_i~~i=1,\ldots,n
190 | \end{equation}
191 | 
192 | 
193 | \section{Schur Complements}
194 | 
195 | For $\mA\in\sSn$, $\mB\in\sSn$, $\mX\in\sRnm$ with $\mB\ispd0$ and the block matrix
196 | \begin{equation}
197 | \mM=
198 | \begin{bmatrix}
199 | \mA & \mX \\
200 | \mX^T & \mB
201 | \end{bmatrix}
202 | \end{equation}
203 | and the Schur complement of $\mA$ in $\mM$
204 | \begin{equation}
205 | S=\mA-\mX\mB^{-1}\mX^T
206 | \end{equation}
207 | Then
208 | \begin{align}
209 | \mM\ispsd0&\iff S\ispsd0 \\
210 | \mM\ispd0 &\iff S\ispd0
211 | \end{align}
212 | 


--------------------------------------------------------------------------------
/src/optimization.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Optimization}
  2 | 
  3 | \section{Standard Forms}
  4 | 
  5 | \textbf{Least Squares}
  6 | \begin{equation}
  7 | \min_{\vx\in\sRn} \norm{\vy-\mA\vx}_2
  8 | \end{equation}
  9 | 
 10 | \textbf{LASSO}
 11 | \begin{equation}
 12 | \min_{\vb\in\sRn} \left(\frac{1}{N}\norm{\vy-\mX\vb}_2^2+\lambda\norm{\vb}_1\right)
 13 | \end{equation}
 14 | 
 15 | \textbf{LP: Linear program}
 16 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
 17 | \addConstraint{\mA_\textrm{eq}\vx}{= \vb_\textrm{eq}}
 18 | \addConstraint{\mA\vx}{\le \vb}
 19 | \end{mini!}
 20 | 
 21 | \textbf{Linear Fractional Program}
 22 | \begin{maxi!}{\vx}{\frac{\vc^T\vx + a}{\vd^T \vx + b}}{}{}
 23 | \addConstraint{\mA\vx}{\le \vb}
 24 | \end{maxi!}
 25 | Additional constraints must ensure $\vd^T \vx + b$ has the same sign throughout the entire feasible region.
 26 | 
 27 | 
 28 | \textbf{QCQP: Quadratic Constrainted Quadratic Programs}
 29 | \begin{mini!}{\vx}{\vx^T\mH_0\vx+2\vc_0^T\vx + \vd_0}{}{}
 30 | \addConstraint{\vx^T\mH_i\vx+2\vc_i^T\vx + \vd_i}{\le 0}{~~i\in\mathcal{I}}
 31 | \addConstraint{\vx^T\mH_j\vx+2\vc_j^T\vx + \vd_j}{  = 0}{~~j\in\mathcal{E}}
 32 | \end{mini!}
 33 | If $\mH_i\ispsd 0~\forall i$, then the program is convex. In general, QCQPs are NP-Hard.
 34 | 
 35 | 
 36 | \textbf{QP: Quadratic Program}
 37 | \begin{mini!}{\vx}{\frac{1}{2}\vx^T\mH_0\vx+\vc_0^T\vx}{}{}
 38 | \addConstraint{\mA_\textrm{eq}\vx}{=\vb_\textrm{eq}}
 39 | \addConstraint{\mA\vx}{\le \vb}
 40 | \end{mini!}
 41 | If $\mH_0\ispd 0$, then the program is convex.
 42 | 
 43 | If only equality constraints are present, then the solution is the linear system:
 44 | \begin{equation}
 45 | \begin{bmatrix}
 46 | \mH_0 & \mA^T \\
 47 | \mA & 0
 48 | \end{bmatrix}
 49 | \begin{bmatrix} \vx \\ \lambda \end{bmatrix}
 50 | =\begin{bmatrix} -\vc_0 \\ \vb \end{bmatrix}
 51 | \end{equation}
 52 | where $\lambda$ is a set of Lagrange multipliers.
 53 | 
 54 | For $\mH_0\ispd 0$, the ellipsoid method solves the problem in polynomial time.~\citep{Kozlov1980} If, $\mH_0$ is indefinite, then the problem is NP-hard~\citep{Sahni1974}, even if $\mH_0$ has only one negative eigenvalue~\citep{Pardalos1991}.
 55 | 
 56 | \textbf{SOCP: Second Order Cone Program (Standard Form)}
 57 | \begin{align}
 58 | \min_{\vx}      ~& \vc^T \vx \\
 59 | \textrm{s.t.}   ~& \norm{\mA_i \vx+\vb_i}_2\le \vc_i^T \vx+\vd_i,~~i=1,\ldots,m
 60 | \end{align}
 61 | 
 62 | \textbf{SOCP: Second Order Cone Program (Conic Standard Form)}
 63 | \begin{align}
 64 | \min_{\vx}      ~& \vc^T \vx \\
 65 | \textrm{s.t.}   ~& (\mA_i \vx+\vb_i, \vc_i^T \vx+\vd_i)\in\mathcal{K}_{m_i} ~~i=1,\ldots,m
 66 | \end{align}
 67 | 
 68 | \section{Transformations}
 69 | 
 70 | \subsection{Linear-Fractional to Linear}
 71 | We transform a Linear-Fractional Program
 72 | \begin{maxi!}{\vx}{\frac{\vc^T\vx + a}{\vd^T \vx + b}}{}{}
 73 | \addConstraint{\mA\vx}{\le \vb}
 74 | \end{maxi!}
 75 | where $\vd^T \vx + b$ has the same sign throughout the entire feasible region to a linear program using the Charnes--Cooper transformation~\citep{Charnes1962} by defining
 76 | \begin{align}
 77 | \vy &= \frac{1}{\vd^T\vx+b}\cdot\vx \\
 78 | t   &= \frac{1}{\vd^T\vx+b}
 79 | \end{align}
 80 | to form the equivalent program
 81 | \begin{maxi!}{\vy,t}{\vc^T\vy + at}{}{}
 82 | \addConstraint{\mA\vy}{\le \vb t}
 83 | \addConstraint{\vd^T\vy+bt}{=1}
 84 | \addConstraint{t}{\ge0}
 85 | \end{maxi!}
 86 | We then have $\vx^*=\frac{1}{t}\vy$.
 87 | 
 88 | \subsection{LP as SOCP}
 89 | 
 90 | The linear program
 91 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
 92 | \addConstraint{\mA\vx}{\le \vb}
 93 | \end{mini!}
 94 | becomes can be cast as an SOCP:
 95 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
 96 | \addConstraint{\norm{\mC_i \vx+\vd_i}_2}{\le \vb_i - \va_i^T \vx}{\forall i}
 97 | \end{mini!}
 98 | where $\mC_i=0, d_i=0~\forall i$.
 99 | 
100 | \subsection{QCQP as SOCP}
101 | 
102 | The quadratic constrainted quadratic program
103 | \begin{mini!}{\vx}{\vx^T\mQ_0\vx+\va_0^T\vx}{}{}
104 | \addConstraint{\vx^T\mQ_i\vx+\va_i^T\vx}{\le b_i}{~~i=1,\ldots,m}
105 | \end{mini!}
106 | with $\mQ_i=\mQ_i^T\ispsd0$, $i=0,\ldots,m$ can be cast as an SOCP:
107 | \begin{mini!}{\vx,t}{\va_0^T\vx + t}{}{}
108 | \addConstraint{\norm{\begin{bmatrix} 2 \mQ_0^{1/2}\vx \\ t-1 \end{bmatrix}}_2}{\le t+1}
109 | \addConstraint{\norm{\begin{bmatrix} 2 \mQ_i^{1/2}\vx \\ b_i-\va_i^T\vx-1 \end{bmatrix}}_2}{\le b_i-\va_i^T\vx+1}{~~i=1,\ldots,m}
110 | \end{mini!}
111 | 
112 | 
113 | \subsection{QP as SOCP}
114 | 
115 | The quadratic program
116 | \begin{mini!}{\vx}{\frac{1}{2}\vx^T\mQ\vx+\vc^T\vx}{}{}
117 | \addConstraint{\va_i^T\vx}{\le \vb_i}
118 | \end{mini!}
119 | with $\mQ=\mQ^T\ispsd0$ can be cast as an SOCP:
120 | \begin{mini!}{\vx,\vy}{\vc^T \vx+y}{}{}
121 | \addConstraint{\norm{
122 | \begin{bmatrix} 2 \mQ^{1/2}\vx \\ y-1 \end{bmatrix}}_2}{\le y+1}
123 | \addConstraint{\va_i^T \vx}{\le \vb_i}{~~\forall i}
124 | \end{mini!}
125 | 
126 | \subsection{Sum of L2 Norms to SOCP}
127 | 
128 | \begin{mini!}{\vx}{\sum_{i=1}^p \norm{\mA_i\vx-\vb_i}_2}{}{}
129 | \end{mini!}
130 | becomes
131 | \begin{mini!}{\vx,y}{\sum_{i=1}^p y_i}{}{}
132 | \addConstraint{\norm{\mA_i\vx-\vb_i}_2}{\le y_i}{~~i=1,\ldots,p}
133 | \end{mini!}
134 | 
135 | \subsection{Minimax of L2 Norms to SOCP}
136 | 
137 | \begin{mini!}{\vx}{\max_{i=1,\ldots,p} \norm{\mA_i\vx-\vb_i}_2}{}{}
138 | \end{mini!}
139 | becomes
140 | \begin{mini!}{\vx,y}{y}{}{}
141 | \addConstraint{\norm{\mA_i\vx-\vb_i}_2}{\le y}{~~i=1,\ldots,p}
142 | \end{mini!}
143 | 
144 | \subsection{Hyperbolic Constraints to SOCP}
145 | 
146 | For scalar $w$, a constraint of the form
147 | \begin{equation}
148 | w^2\le xy, ~~x\ge0, ~~y\ge0
149 | \end{equation}
150 | can be transformed into the SOCP constraint
151 | \begin{equation}
152 | \norm{\begin{bmatrix} 2w \\ x-y \end{bmatrix}}_2 \le x+y \eqcite{Lobo1998}
153 | \end{equation}
154 | 
155 | For vector $\vw$, a constraint of the form
156 | \begin{equation}
157 | \vw^T\vw = \norm{\vw}_2^2 \le xy, ~~x\ge0, ~~y\ge0
158 | \end{equation}
159 | can be transformed into the SOCP constraint
160 | \begin{equation}
161 | \label{equ:hyperbolic_constraint_to_socp}
162 | \norm{\begin{bmatrix} 2\vw \\ x-y \end{bmatrix}}_2 \le x+y \eqcite{Lobo1998,Alizadeh2003}
163 | \end{equation}
164 | Note that this implies that
165 | \begin{equation}
166 | x^{-1}\le y \iff \norm{\begin{bmatrix} 2 \\ x-y \end{bmatrix}}_2 \le x+y %TODO: For x>0 ?
167 | \end{equation}
168 | 
169 | %TODO: From slides
170 | % A constraint of the form
171 | % \begin{equation}
172 | % \norm{x}_2^2\le 2yz, ~~y\ge0, ~~z\ge0
173 | % \end{equation}
174 | % can be transformed into the SOCP constraint
175 | % \begin{equation}
176 | % \norm{\begin{bmatrix} x \\ \frac{1}{\sqrt{2}}(y-z) \end{bmatrix}}_2 \le \frac{1}{\sqrt{2}}(y+z)
177 | % \end{equation}
178 | 
179 | %TODO Lobo1998 fractional constraints as SOCPs
180 | 
181 | \subsection{Matrix Fractional to SOCP}
182 | 
183 | The problem
184 | \begin{mini!}{\vx}{(\mF\vx+\vg)^T(\mP_0+\vx_1\mP+\ldots+\vx_p\mP_P)^{-1}(\mF\vx+\vg)}{}{}
185 | \addConstraint{\mP_0+\vx_1\mP+\ldots+\vx_p\mP_P}{>0}
186 | \addConstraint{\vx}{\ge0}
187 | \end{mini!}
188 | where $\mP_i=\mP_i^T\in\sRnn$, $\mF\in\sRnp$, $\vg\in\sRn$, and $\vx\in\sRp$ can be transformed into the SOCP where $t_i\in\sR, \vy_i\in\sRn$:
189 | \begin{mini!}{\vx,t}{t_0+\ldots+t_p}{}{}
190 | \addConstraint{\mP_0^{1/2}\vy_0+\ldots+\mP_p^{1/2}\vy_p}{=\mF\vx+\vg} \eqcite{Lobo1998}
191 | \addConstraint{\norm{\begin{bmatrix} 2 \vy_0 \\ t_0-1\end{bmatrix}}_2}{\le t_0+1}
192 | \addConstraint{\norm{\begin{bmatrix} 2 \vy_i \\ t_i-x_i \end{bmatrix}}_2}{\le t_i+x_i}{~~i=1,\ldots,p}
193 | \end{mini!}
194 | 
195 | \subsection{Fractional Objective to SOCP}
196 | 
197 | Convert
198 | \begin{mini!}{\vx}{\frac{f(x)^2}{g(x)}}{}{}
199 | \addConstraint{g(x)}{>0}
200 | \end{mini!}
201 | to
202 | \begin{mini!}{\vx,t}{t}{}{}
203 | \addConstraint{f(x)^2}{\le t g(y)}
204 | \addConstraint{g(y)}{>0}
205 | \addConstraint{t}{\ge 0}
206 | \end{mini!}
207 | and apply \autoref{equ:hyperbolic_constraint_to_socp}.
208 | 
209 | \subsection{Chance-Constrained LP to SOCP}
210 | 
211 | The problem
212 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
213 | \addConstraint{\textrm{Prob}\{\va_i^T\vx\le \vb_i\}}{\ge p_i}{~~i=1,\ldots,m}
214 | \end{mini!}
215 | where $p_i>0.5$ and all $\va_i$ are independent normal random vectors with expected values $\bar \va_i$ and covariance matrices $\Sigma_i\ispd0$, can be transformed into the SOCP:
216 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
217 | \addConstraint{\bar \va_i^T \vx \le b_i-\Phi^{-1}(p_i)\norm{\Sigma_i^{1/2}\vx}_2}{~~i=1,\ldots,m}
218 | \end{mini!}
219 | where $\Phi^{-1}(p)$ is the inverse cumulative probability distribution of a standard normal variable.
220 | 
221 | %https://stanford.edu/class/ee364a/lectures/chance_constr.pdf
222 | Likewise, the problem
223 | \begin{maxi!}{\vx}{\vc^T \vx}{}{}
224 |   \addConstraint{\textrm{Prob}\{\va_i^T\vx\le \vb_i\}}{\le p_i}{~~i=1,\ldots,m}
225 | \end{maxi!}
226 | transforms to
227 | \begin{maxi!}{\vx}{\vc^T \vx}{}{}
228 |   \addConstraint{\bar \va_i^T \vx \ge \Phi^{-1}(1-p_i)\norm{\Sigma_i^{1/2}\vx}_2}{~~i=1,\ldots,m}
229 | \end{maxi!}
230 | provided $p_i\le0.5$.
231 | 
232 | \subsection{Robust LP with Box Uncertainty as LP}
233 | 
234 | The problem
235 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
236 | \addConstraint{\va_i^T \vx}{\le b_i}{~~\forall \va_i\in\{\hat \va_i + \rho_i \vu : \norm{\vu}_\infty\le1\}}{~~i=1,\ldots,m}
237 | \end{mini!}
238 | is equivalent to
239 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
240 | \addConstraint{\hat \va_i^T \vx + \rho_i\norm{\vx}_1}{\le b_i}{~~i=1,\ldots,m}
241 | \end{mini!}
242 | which is equivalent to:
243 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
244 | \addConstraint{\hat \va_i^T \vx + \rho_i \sum_{j=1}^n \vu_j}{\le b_i}{~~i=1,\ldots,m}
245 | \addConstraint{-\vu_j}{\le \vx_j\le\vu_j}{~~j=1,\ldots,n}
246 | \end{mini!}
247 | 
248 | \subsection{Robust LP with Ellipsoidal Uncertainty as SOCP}
249 | 
250 | The problem
251 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
252 | \addConstraint{\va_i^T \vx}{\le b_i}{~~\forall \va_i\in\{\hat \va_i + \mR_i \vu : \norm{\vu}_2\le1\}}{~~i=1,\ldots,m}
253 | \end{mini!}
254 | is equivalent to
255 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
256 | \addConstraint{\hat \va_i^T \vx + \norm{\mR_i^T \vx}_2}{\le b_i}{~~i=1,\ldots,m}
257 | \end{mini!}
258 | 
259 | \subsection{Square Root as SOCP}
260 | \begin{equation}
261 | \sqrt{x}\ge t \iff x\ge t^2 \iff \norm{\begin{matrix} 1-x \\ 2t \end{matrix}}_2 \le 1+x %TODO: For x>=0?
262 | \end{equation}
263 | 
264 | The problem
265 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
266 | \addConstraint{\va_i^T \vx}{\le b_i}{~~\forall \va_i\in\{\hat \va_i + \mR_i \vu : \norm{\vu}_2\le1\}}{~~i=1,\ldots,m}
267 | \end{mini!}
268 | is equivalent to
269 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
270 | \addConstraint{\hat \va_i^T \vx + \norm{\mR_i^T \vx}_2}{\le b_i}{~~i=1,\ldots,m}
271 | \end{mini!}
272 | 
273 | 
274 | \section{Useful Problems}
275 | 
276 | \begin{align}
277 | \textrm{average}(\vv) &= \min_{x\in\sR} \norm{\vv-x\mathbf{1}}_2^2 \\
278 | \textrm{median}(\vv) &= \min_{x\in\sR} \norm{\vv-x\mathbf{1}}_1
279 | \end{align}


--------------------------------------------------------------------------------
/src/basics.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Basics}
  2 | 
  3 | \section{Fundamental Theorem of Linear Algebra}
  4 | 
  5 | \begin{center}
  6 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg1.png}
  7 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg2.png}
  8 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg3.png}
  9 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg4.png}
 10 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg5.png}
 11 | \end{center}
 12 | 
 13 | 
 14 | \section{Matrix Properties}
 15 | 
 16 | \begin{align}
 17 | \mA(\mB+\mC) &=   \mA\mB+\mA\mC &\textrm{(left distributivity)}   \\
 18 | (\mB+\mC)\mA &=   \mB\mA+\mC\mA &\textrm{(right distributivity)}  \\
 19 | \mA\mB       &\ne \mB\mA        &\textrm{(in general)}            \\
 20 | (\mA\mB)\mC  &=   \mA(\mB\mC)   &\textrm{(associativity)}
 21 | \end{align}
 22 | 
 23 | \section{Rank}
 24 | 
 25 | \begin{align}
 26 | \noalign{If $\mA\in\sRmn$ and $\mB\in\sRnr$, then}
 27 | \eqcite{Thome2016}
 28 | \rank(\mA)+\rank(\mB)-n\le \rank(\mA\mB)\le \min(\rank(\mA),\rank(\mB)) &&~~~~\textrm{Sylvester's Inequality} \\
 29 | \noalign{If $\mA\mB$, $\mA\mB\mC$, $\mB\mC$ are defined, then}
 30 | \eqcite{Thome2016}
 31 | \rank(\mA\mB)+\rank(\mB\mC)\le \rank(\mB)+\rank(\mA\mB\mC) && \textrm{Frobenius's inequality} \\
 32 | \noalign{If $\dim(\mA)=\dim(\mB)$, then}
 33 | \rank(\mA+\mB)\le\rank(\mA)+\rank(\mB) &&\textrm{Subadditivity}
 34 | \end{align}
 35 | If $\mA_1, \mA_2, \ldots, \mA_l$ have $n_1,n_2,\ldots,n_l$ columns, so that $\mA_1\mA_2\ldots\mA_l$ is well-defined, then
 36 | \begin{equation}
 37 | \eqcite{Thome2016}
 38 | \rank(\mA_1\mA_2\ldots\mA_l)
 39 | \ge \sum_{i=1}^{l-1}\rank(\mA_i\mA_{i+1})-\sum_{i=2}^{l-1}\rank(\mA_i)
 40 | \ge\sum_{i=1}^l\rank(\mA_i)-\sum_{i=1}^{l-1}n_i
 41 | \end{equation}
 42 | 
 43 | \section{Identities}
 44 | \begin{align}
 45 | \left(\sum_{i=1}^n \vz_i\right)^2 = \vz^T
 46 | \begin{bmatrix}
 47 | 1      & \hdots & 1      \\
 48 | \vdots & \ddots & \vdots \\
 49 | 1      & \hdots & 1
 50 | \end{bmatrix}
 51 | \vz
 52 | \end{align}
 53 | 
 54 | \section{Matrix Multiplication}
 55 | 
 56 | For $\mA\in\sR^{i,j}$ and $\mB\in\sR^{j,k}$ and $\mC\in\sR^{l,k}$
 57 | \begin{align}
 58 | [\mA\mB]_{ik} &= \sum_j \mA_{ij}\mB_{jk} \\
 59 | [\mA\mB\mC^T]_{il} &= \sum_j \mA_{ij}[\mB\mC^T]_{jl}=\sum_j \mA_{ij}\sum_k \mB_{jk}\mC_{lk}=\sum_j\sum_k \mA_{ij}\mB_{jk}\mC_{lk}
 60 | \end{align}
 61 | %TODO: Algorithms and orderings
 62 | 
 63 | 
 64 | 
 65 | \section{Transpose Properties}
 66 | 
 67 | \begin{align}
 68 | (c\mA)^T            &= c\mA^T                \\
 69 | (\mA\mB)^T          &= \mB^T\mA^T            \\
 70 | (\mA\mB\mC\ldots)^T &= \ldots\mC^T\mB^T\mA^T \\
 71 | (\mA+\mB)^T         &= \mA^T+\mB^T           \\
 72 | (\mA+\mB+\ldots)^T  &= \mA^T+\mB^T+\ldots^T  \\
 73 | (\mA^{-1})^T        &= (\mA^T)^{-1}
 74 | \end{align}
 75 | 
 76 | \section{Conjugate Tranpose}
 77 | 
 78 | \begin{align}
 79 | (\mA^H)^{-1}        &= (\mA^{-1})^H          \\
 80 | (\mA+\mB)^H         &= \mA^H+\mB^H           \\
 81 | (\mA+\mB+\ldots)^H  &= \mA^H+\mB^H+\ldots^H  \\
 82 | (\mA\mB)^H          &= \mB^H \mA^H           \\
 83 | (\mA\mB\mC\ldots)^H &= \ldots\mC^H\mB^H\mA^H
 84 | \end{align}
 85 | 
 86 | 
 87 | \section{Determinant Properties}
 88 | The determinant is only defined for square matrices; here we assume that $\mA\in\sRnn$.
 89 | 
 90 | \begin{align}
 91 | \det(\mI_n)        &= 1                                   \\
 92 | \det(\mA^T)        &= \det(\mA)                           \\
 93 | \det(\mA^H)        &= \det(\mA)^H                         \\
 94 | \det(\mA^{-1})     &= 1/\det(\mA)                         \\
 95 | \det(\mA\mB)       &= \det(\mB\mA)                        \\
 96 | \det(\mA\mB)       &= \det(\mA)\det(\mB)                  &\mB\in\sRnn \\
 97 | \det(c\mA)         &= c^n\det(\mA)                        \\
 98 | \det(\mA)          &= \prod \eig(\mA)                     \\
 99 | \det(\mA^n)        &= \det(\mA)^n                         \\
100 | \det(-\mA)         &= (-1)^n\det(\mA)                     \\
101 | \det(\mA^c)        &= \det(\mA)^c                         \\
102 | \det(\mI+\vu \vv^T)&= 1 + \vu^T \vv                       \\
103 | \det(\mB\mA\mB^{-1}) &= \det(\mA)                         \\
104 | \det(\mB\mA\mB^{-1}-c\mI) &= \det(\mA-c\mI)               \\
105 | \noalign{For n=2:}
106 | \det(\mI+\mA)      &= 1 + \det(\mA)+\trace(\mA) \\
107 | \det(\mA) &=\begin{vmatrix} a & b \\ c & d \end{vmatrix} = ad-bc \\
108 | \noalign{For n=3:}
109 | \det(\mI+\mA)      &= 1 + \det(\mA)+\trace(\mA) + \frac{1}{2}\trace(\mA)^2-\frac{1}{2}\trace(\mA^2) \\
110 | \det(\mA) &=\begin{vmatrix} a & b & c \\ d & e & f \\ g & h & i \end{vmatrix} =
111 |  a\begin{vmatrix} e & f \\ h & i \end{vmatrix}
112 | -b\begin{vmatrix} d & f \\ g & i \end{vmatrix}
113 | +c\begin{vmatrix} d & e \\ g & h \end{vmatrix} \\
114 | \noalign{For n=4:}
115 | \det(\mI+\mA)      &= 1 + \det(\mA)+\trace(\mA) + \frac{1}{2}\trace(\mA)^2-\frac{1}{2}\trace(\mA^2)  \\
116 |                    &    + \frac{1}{6}\trace(\mA)^3-\frac{1}{2}\trace(\mA)\trace(\mA^2)+\frac{1}{3}\trace(\mA^3) \\
117 | \noalign{For small $\epsilon$:}
118 | \det(\mI+\epsilon\mA) &\approx 1 + \det(\mA) + \epsilon\trace(\mA)+\frac{1}{2}\epsilon^2\trace(\mA)^2-\frac{1}{2}\epsilon^2\trace(\mA^2) \\ %TODO: Check from MatrixCookbook
119 | \det(\mI+\epsilon\mA) &\approx 1 + \epsilon\trace(\mA) + O(\epsilon^2) \\ %TODO: Check: From MathWorld
120 | \noalign{Sylvester's determinant identity, for $\mA\in\sRmn, \mB\in\sRnm$}
121 | \eqcite{Sylvester1851}
122 | \det(\mI_m+\mA\mB) &= \det(\mI_n+\mB\mA)                    \\
123 | \det(\mX+\mA\mB)   &= \det(\mX)\det(\mI_n + \mB\mX^{-1}\mA) \\
124 | \noalign{If $\mA$ is triangular}
125 | \det(\mA) &= \prod_i \mA_{i,i} = \prod_i \diag(\mA)_i \\
126 | \noalign{If all entries of $\mA\in\sCnn$ are in the unit disk}
127 | \det(\mA)\le n^{n/2} \eqcite{Hadamard1893} \\
128 | \noalign{Schur's determinant identities}
129 | \det(\mM) &= \det(\begin{bmatrix} \mA & \mB \\ \mC & \mD \end{bmatrix}) = \det(\mA) \det(\mD -\mC \mA^{-1}\mB) \\
130 | \det(\mM) &= \det(\begin{bmatrix} \mA & \mB \\ \mC & \mD \end{bmatrix}) = \det(\mD) \det(\mA -\mB \mD^{-1}\mC) \\
131 | \end{align}
132 | %TODO: Matix exponential identities det(A)=log(det(exp(A)))
133 | 
134 | Geometrically, if a unit volume is acted on by $\mA$, then $|\det(\mA)|$ indicates the volume after the transformation.
135 | 
136 | 
137 | \section{Trace Properties}
138 | The Trace is only defined for square matrices.
139 | \begin{align}
140 | \trace(\mA)      &=\sum_i \mA_{ii}               \\
141 | \trace(\mA)      &=\sum_i \eig(\mA)              \\
142 | \trace(\mA+\mB)  &=\trace(\mA)+\trace(\mB)       \\
143 | \trace(c\mA)     &=c\trace(\mA)                  \\
144 | \trace(\mA)      &=\trace(\mA^T)                 \\
145 | \trace(\mA\mB)   &=\trace(\mB\mA)                \\
146 | \trace(\mA^T\mB) &=\sum_{i,j} \mA_{ij}\mB_{ij}   \\  %TODO: For real matrices only?
147 | \trace(\mA^T\mB) &=\sum_{i,j} (\mA\circ\mB)_{ij} \\  %TODO: For real matrices only?
148 | \va^T \va        &=\trace(\va \va^T)
149 | \end{align}
150 | 
151 | For $\mA,\mB,\mC,\mD$ of compatible dimensions,
152 | 
153 | \begin{equation}
154 | \trace(\mA^T\mB)=\trace(\mA\mB^T)=\trace(\mB^T\mA)=\trace(\mB\mA^T)
155 | \end{equation}
156 | \begin{equation}
157 | \trace(\mA\mB\mC\mD)=\trace(\mB\mC\mD\mA)=\trace(\mC\mD\mA\mB)=\trace(\mD\mA\mB\mC)
158 | \end{equation}
159 | (Invariant under cyclic permutations)
160 | 
161 | 
162 | 
163 | \section{Inverse Properties}
164 | The inverse of $\mA\in\sCnn$ is denoted $\mA^{-1}$ and defined such that
165 | \begin{equation}
166 | \mA\mA^{-1}=\mA^{-1}\mA=\mI_n
167 | \end{equation}
168 | where $\mI_n$ is the $n \times n$ identity matrix. $\mA$ is nonsingular if $\mA^{-1}$ exists; otherwise, $\mA$ is singular.
169 | 
170 | 
171 | If individual inverses exist
172 | \begin{equation}
173 | (\mA\mB)^{-1}=\mB^{-1}\mA^{-1}
174 | \end{equation}
175 | more generally
176 | \begin{equation}
177 | (\mA\mB\mC\ldots)^{-1}=\ldots\mC^{-1}\mB^{-1}\mA^{-1}
178 | \end{equation}
179 | 
180 | \begin{equation}
181 | (\mA^{-1})^T=(\mA^T)^{-1}
182 | \end{equation}
183 | \begin{equation}
184 | (\mA^H)^{-1}=(\mA^{-1})^H
185 | \end{equation}
186 | 
187 | Hua's Identity:
188 | \begin{align}
189 | (\mA+\mB)^{-1} &= \mAi - (\mA+\mA\mBi\mA)^{-1} \\
190 | (\mA-\mB)^{-1} &= \sum_{k=0}^\infty (\mAi\mB)^k\mAi \\
191 | \end{align}
192 | 
193 | 
194 | 
195 | 
196 | \section{Moore--Penrose PseudoInverse}
197 | For $\mA\in\sRmn$, the Moore--Penrose pseudoinverse $\mA\pinv$ satisfies:
198 | \begin{align}
199 | \mA\mA\pinv\mA      &= \mA                            \\
200 | \mA\pinv\mA\mA\pinv &= \mA\pinv                       \\
201 | (\mA\mA\pinv)^T     &= \mA\mA\pinv\ \textrm{(symmetric)} \\
202 | (\mA\pinv\mA)^T     &= \mA\pinv\mA\ \textrm{(symmetric)}
203 | \end{align}
204 | If $\mA\pinv$ exists, it is unique. For complex matrices the symmetry condition is replaced by a requirement that the matrix be Hermitian.
205 | 
206 | If $\mA\in\sCmn$, then:
207 | \begin{align}
208 | (\mA\pinv)\pinv     &=   \mA                  \\
209 | (\mA^T)\pinv        &=   (\mA\pinv)^T         \\
210 | (\mA^H)\pinv        &=   (\mA\pinv)^H         \\
211 | (\mA^*)\pinv        &=   (\mA\pinv)^*         \\
212 | (\mA\pinv\mA)\mA^H  &=   \mA^H                \\
213 | (\mA\pinv\mA)\mA^T  &\ne \mA^T                \\
214 | (c\mA)\pinv         &=   (1/c)\mA\pinv        \\
215 | \mA\pinv            &=   (\mA^T\mA)\pinv\mA^T \\
216 | \mA\pinv            &=   \mA^T(\mA\mA^T)\pinv \\
217 | (\mA^T\mA)\pinv     &=   \mA\pinv(\mA^T)\pinv \\
218 | (\mA\mA^T)\pinv     &=   (\mA^T)\pinv\mA\pinv \\
219 | \mA\pinv            &=   (\mA^H\mA)\pinv\mA^H \\
220 | \mA\pinv            &=   \mA^H(\mA\mA^H)\pinv \\
221 | (\mA^H\mA)\pinv     &=   \mA\pinv(\mA^H)\pinv \\
222 | (\mA\mA^H)\pinv     &=   (\mA^H)\pinv\mA\pinv \\
223 | (\mA\mB)\pinv       &=   (\mA\pinv\mA\mB)\pinv(\mA\mB\mB\pinv)\pinv
224 | \end{align}
225 | 
226 | If $\mA$ is full-rank, then:
227 | \begin{align}
228 | (\mA\mA\pinv)(\mA\mA\pinv) &= \mA\mA\pinv                           \\
229 | (\mA\pinv\mA)(\mA\pinv\mA) &= \mA\pinv\mA                           \\
230 | \trace(\mA\mA\pinv)        &= \rank(\mA\mA\pinv) \eqcite{Seber2002} \\
231 | \trace(\mA\pinv\mA)        &= \rank(\mA\pinv\mA) \eqcite{Seber2002}
232 | \end{align}
233 | 
234 | \subsection*{Special Properties}
235 | \begin{itemize}
236 | \item $\mA\pinv=\mA^{-1}$ if $\mA\in\sRnn$ and $\mA$ is square and nonsingular.
237 | \item $\mA\pinv=(\mA^T\mA)^{-1}\mA^T$, if $\mA\in\sRmn$ is full column rank ($r=n\le m$). $\mA\pinv$ is a left inverse of $\mA$, so $\mA\pinv\mA=\mV_r\mV_r^T=\mV\mV^T=\mI_n$.
238 | \item $\mA\pinv=\mA^T(\mA\mA^T)^{-1}$, if $\mA\in\sRmn$ is full row rank ($r=m\le n$). $\mA\pinv$ is a right inverse of $\mA$, so $\mA\mA\pinv=\mU_r\mU_r^T=\mU\mU^T=\mI_m$.
239 | \end{itemize} %TODO: Check these
240 | 
241 | 
242 | %TODO
243 | % \subsection{Moore-Penrose Pseudoinverse}
244 | % \begin{equation}
245 | % \mA\pinv = \mV \mD^{-1} \mU^T
246 | % \end{equation}
247 | % where the foregoing comes from a singular-value decomposition and $\mD^{-1}=\diag(\frac{1}{\sigma_1},\ldots,\frac{1}{\sigma_r})$
248 | 
249 | 
250 | 
251 | \section{Hadamard Identities}
252 | 
253 | \begin{align}
254 | (\mA\circ\mB)_{ij}    &= A_{ij}B_{ij}~\forall~i,j                                     \\
255 | \mA\circ\mB           &= \mB\circ\mA                             \eqcite{million2007} \\
256 | \mA\circ(\mB\circ\mC) &= (\mA\circ\mB)\circ\mC                                        \\
257 | \mA\circ(\mB+\mC)     &= \mA\circ\mB+\mA\circ\mC                 \eqcite{million2007} \\
258 | a(\mA\circ\mB)        &= (a\mA)\circ\mB =\mA\circ(a\mB)          \eqcite{million2007} \\
259 | (\mA^T\circ\mB^T)     &= (\mA\circ\mB)^T                                              \\
260 | (\mA^T\circ\mB^T)     &= (\mA\circ\mB)^T                                              \\
261 | (\vx^T \mA \vx)       &= \sum_{i,j}\big((\vx \vx^T)\circ\mA\big)                      \\
262 | \vx^T(\mA\circ\mB)\vy &= \trace((\diag(\vx)\mA)^T \mB\diag(\vy))~~~\mA,\mB\in\sRmn \eqcite{Minka2000}   \\
263 | \trace(\mA^T\mB)      &= \mathbf{1}^T(\mA\circ\mB)\mathbf{1}                          \\
264 |                       &= \sum_{i,j} \mA_{ij}\mB_{ij}
265 | \end{align}
266 | 


--------------------------------------------------------------------------------
/src/derivatives.tex:
--------------------------------------------------------------------------------
  1 | %TODO: Include handy diagram from https://explained.ai/matrix-calculus/index.html
  2 | 
  3 | \chapter{Derivatives}
  4 | 
  5 | \section{Useful Rules for Derivatives}
  6 | For general $\mA$ and $\mX$ (no special structure):
  7 | \begin{align}
  8 | \partial\mA           &= 0~~\textrm{where $\mA$ is a constant} \\
  9 | \partial(c\mX)        &= c\partial\mX                          \\
 10 | \partial(\mX+\mY)     &= \partial\mX+\partial\mY               \\
 11 | \partial(\trace(\mX)) &= \trace(\partial(\mX))                 \\
 12 | \partial(\mX\mY)      &= (\partial\mX)\mY + \mX(\partial\mY)   \\
 13 | \partial(\mX\circ\mY) &= (\partial\mX)\circ\mY + \mX\circ(\partial\mY) \\
 14 | %TODO Kronecker x in circle equation 39 Matrix Cookbook
 15 | \partial(\mX^{-1})    &= -\mX^{-1}(\partial\mX)\mX^{-1}        \\
 16 | \partial(\det(\mX))   &= \trace(\textrm{adj}(\mX)\partial\mX)  \\
 17 | \partial(\det(\mX))   &= \det(\mX)\trace(\mX^{-1}\partial\mX)  \\
 18 | \partial(\ln(\det(\mX))) &= \trace(\mX^{-1}\partial\mX)        \\
 19 | \partial(\mXT)       &= (\partial\mX)\T                       \\
 20 | \partial(\mX^H)       &= (\partial\mX)^H
 21 | \end{align}
 22 | 
 23 | \section{Gradient Notation}
 24 | For a matrix $\mA\in\sRnm$, the gradient is defined as:
 25 | \begin{equation}
 26 | \grad_\mA f(\mA)=
 27 | \begin{bmatrix}
 28 | \pd{f(\mA)}{\mA_{11}} & \pd{f(\mA)}{\mA_{12}} & \ldots & \pd{f(\mA)}{\mA_{1n}} \\
 29 | \pd{f(\mA)}{\mA_{21}} & \pd{f(\mA)}{\mA_{22}} & \ldots & \pd{f(\mA)}{\mA_{2n}} \\
 30 | \vdots                & \vdots                & \ddots & \vdots                \\
 31 | \pd{f(\mA)}{\mA_{m1}} & \pd{f(\mA)}{\mA_{m2}} & \ldots & \pd{f(\mA)}{\mA_{mn}}
 32 | \end{bmatrix}
 33 | \end{equation}
 34 | i.e.
 35 | \begin{equation}
 36 | (\grad_\mA f(\mA))_{ij}=\pd{f(\mA)}{\mA_{ij}}
 37 | \end{equation}
 38 | Note that the size of the gradient is always the same size as the entity to which it is taken. Also note that the gradient of a function is only defined if the function is real-valued, that is, if it returns a scalar value.
 39 | 
 40 | \section{Derivatives of Matrices and Vectors}
 41 | 
 42 | \subsection{First-Order}
 43 | 
 44 | In the following, $\mJ$ is the Single-Entry Matrix (\autoref{sec:rogue_single_entry}).
 45 | \begin{align}
 46 | \pd{\vx\T \va}{\vx}     &= \pd{\vaT \vx}{\vx} = \va          \\
 47 | \pd{\va\T\mX\vb}{\mX}   &= \va\vbT                           \\
 48 | \pd{\va\T\mXT\vb}{\mX}  &= \vb\vaT                           \\
 49 | \pd{\va\T\mX\va}{\mX}   &= \pd{\vaT\mXT\va}{\mX} = \va\vaT \\
 50 | \pd{\mX}{\mX_{ij}}      &= \mJ^{ij}                              %TODO: What is this? (MCB 73)
 51 | %TODO: MCB 74, 75
 52 | \end{align}
 53 | 
 54 | \section{Derivatives of vector norms}
 55 | 
 56 | \begin{align}
 57 | \pd{}{\vx}\norm{\vx-\va}_2 &= \frac{\vx-\va}{\norm{\vx-\va}_2} \\
 58 | \pd{}{\vx}\frac{\vx-\va}{\norm{\vx-\va}_2} &= \frac{\mI}{\norm{\vx-\va}_2}-\frac{(\vx-\va)(\vx-\va)\T}{\norm{\vx-\va}_2^3} \\
 59 | \pd{\norm{\vx}_2^2}{\vx} &= \pd{\norm{\vxT\vx}_2}{\vx} = 2\vx
 60 | \end{align}
 61 | 
 62 | \section{Scalar by Vector}
 63 | \begin{center}
 64 | \begin{tabular}{l|Sc|Sc|Sc}
 65 | Qualifier                  & Expression                                    & Numerator layout                                   & Denominator layout                            \\
 66 |                            & $\pd{a}{x}$                                   & $\vzero\T$                                         & $\vzero$                                      \\
 67 |                            & $\pd{au(\vx)}{\vx}$                           & $a\pd{u}{\vx}$                                     & Same                                          \\
 68 |                            & $\pd{u(\vx)+v(\vx)}{\vx}$                     & $\pd{u}{\vx} + \pd{v}{\vx}$                        & Same                                          \\
 69 |                            & $\pd{u(\vx)v(\vx)}{\vx}$                      & $u\pd{v}{\vx} + v\pd{u}{\vx}$                      & Same                                          \\
 70 |                            & $\pd{g(u(\vx))}{\vx}$                         & $\pd{g(u)}{u}\pd{u}{\vx}$                          & Same                                          \\
 71 |                            & $\pd{f(g(u(\vx)))}{\vx}$                      & $\pd{f(g)}{g}\pd{g(u)}{u}\pd{u}{\vx}$              & Same                                          \\
 72 |                            & $\pd{\vu(\vx)\T\vv(\vx)}{\vx}$                & $\vuT\pd{\vv}{\vx}+\vvT\pd{\vu}{\vx}$              & $\pd{\vu}{\vx}\vv+\pd{\vv}{\vx}\vu$           \\
 73 |                            & $\pd{\vu(\vx)\T\mA\vv(\vx)}{\vx}$             & $\vuT\mA\pd{\vv}{\vx}+\vvT\mAT\pd{\vu}{\vx}$       & $\pd{\vu}{\vx}\mA\vv+\pd{\vv}{\vx}\mAT\vu$    \\
 74 |                            & $\md{f}{2}{\vx}{}{{\vxT}}{}$                  &                                                    & $\mH$, the Hessian matrix                     \\
 75 |                            & $\pd{\va\cdot\vx}{\vx}=\pd{\vx\cdot\va}{\vx}$ & $\vaT$                                             & $\va$                                         \\
 76 |                            & $\pd{\vbT\mA\vx}{\vx}$                        & $\vbT\mA$                                          & $\mAT\vb$                                     \\
 77 |                            & $\pd{\vxT\mA\vx}{\vx}$                        & $\vxT(\mA+\mAT)$                                   & $(\mA+\mAT)\vx$                               \\
 78 | $\mA$ symmetric            & $\pd{\vxT\mA\vx}{\vx}$                        & $2\vxT\mA$                                         & $2\mA\vx$                                     \\
 79 |                            & $\pd{\vxT\mA\vx}{\vx}$                        & $\mA+\mAT$                                         & Same                                          \\
 80 | $\mA$ symmetric            & $\pd{\vxT\mA\vx}{\vx}$                        & $\mA$                                              & Same                                          \\
 81 |                            & $\pd{\vxT\vx}{\vx}$                           & $2\vxT$                                            & $2\vx$                                        \\
 82 |                            & $\pd{\vaT\vu(\vx)}{\vx}$                      & $\vaT\pd{\vu}{\vx}$                                & $\pd{\vu}{\vx}\va$                            \\
 83 |                            & $\pd{\vaT\vx\vxT\vb}{\vx}$                    & $\vxT(\va\vbT+\vb\vaT)$                            & $(\va\vbT+\vb\vaT)\vx$                        \\
 84 |                            & $\pd{(\mA\vx+\vb)\T\mC(\mD\vx+\ve)}{\vx}$     & $(\mD\vx+\ve)\T\mCT\mA+(\mA\vx+\vb)\T\mC\mD$       & $\mDT\mCT(\mA\vx+\vb)+\mAT\mC(\mD\vx+\ve)$    \\
 85 |                            & $\pd{\norm{\vx-\va}}{\vx}$                    & $\frac{(\vx-\va)\T}{\norm{\vx-\va}}$               & $\frac{\vx-\va}{\norm{\vx-\va}}$              \\
 86 | \end{tabular}
 87 | \end{center}
 88 | 
 89 | \section{Vector by Vector}
 90 | \begin{center}
 91 | \begin{tabular}{l|Sc|Sc|Sc}
 92 | Qualifier                  & Expression                                    & Numerator layout                                   & Denominator layout                            \\
 93 |                            & $\pd{\va}{\vx}$                               & $\vzero$                                           & Same                                          \\ %TODO: really the same? shouldn't be transposed?
 94 |                            & $\pd{\vx}{\vx}$                               & $\mI$                                              & Same                                          \\ %TODO: Really the identity matrix? Which one?
 95 |                            & $\pd{\mA\vx}{\vx}$                            & $\mA$                                              & $\mAT$                                        \\
 96 |                            & $\pd{\vxT\mA}{\vx}$                           & $\mAT$                                             & $\mA$                                         \\
 97 |                            & $\pd{a\vu(\vx)}{\vx}$                         & $a\pd{\vu}{\vx}$                                   & Same                                          \\
 98 |                            & $\pd{a(\vx)\vu(\vx)}{\vx}$                    & $a\pd{\vu}{\vx}+\vu\pd{a}{\vx}$                    & $a\pd{\vu}{\vx}+\pd{a}{\vx}\vuT$              \\
 99 |                            & $\pd{\mA\vu(\vx)}{\vx}$                       & $\mA\pd{\vu}{\vx}$                                 & $\pd{\vu}{\vx}\mAT$                           \\
100 |                            & $\pd{(\vu(\vx)+\vv(\vx))}{\vx}$               & $\pd{\vu}{\vx}+\pd{\vv}{\vx}$                      & Same                                          \\
101 |                            & $\pd{\vg(\vu(\vx))}{\vx}$                     & $\pd{\vg(\vu)}{\vu}\pd{\vu}{\vx}$                  & $\pd{\vu}{\vx}\pd{\vg(\vu)}{\vu}$             \\
102 |                            & $\pd{\vf(\vg(\vu(\vx)))}{\vx}$                & $\pd{\vf(\vg)}{\vg(\vu)}\pd{\vg(\vu)}{\vu}\pd{\vu}{\vx}$& $\pd{\vu}{\vx}\pd{\vg(\vu)}{\vu}\pd{\vf(\vg)}{\vg}$
103 | \end{tabular}
104 | \end{center}
105 | 
106 | 
107 | 
108 | \section{Matrix by Scalar}
109 | \begin{center}
110 | \begin{tabular}{l|Sc|Sc}
111 | Qualifier                  & Expression                                     & Numerator layout                                       \\
112 |                            & $\pd{a\mU(x)}{x}$                              & $a\pd{\mU}{x}$                                         \\
113 |                            & $\pd{\mA\mU(x)\mB}{x}$                         & $\mA\pd{\mU}{x}\mB$                                    \\
114 |                            & $\pd{(\mU(x)+\mV(x))}{x}$                      & $\pd{\mU}{x}+\pd{\mV}{x}$                              \\
115 |                            & $\pd{(\mU(x)\mV(x))}{x}$                       & $\mU\pd{\mV}{x}+\pd{\mU}{x}\mV$                        \\
116 |                            & $\pd{(\mU(x)\kp\mV(x))}{x}$                    & $\mU\kp\pd{\mV}{x} + \pd{\mU}{x}\kp\mV$                \\
117 |                            & $\pd{(\mU(x)\hp\mV(x))}{x}$                    & $\mU\hp\pd{\mV}{x} + \pd{\mU}{x}\hp\mV$                \\
118 |                            & $\pd{\mU^{-1}(x)}{x}$                          & $-\mU^{-1} \pd{\mU}{x} \mU^{-1}$                       \\
119 |                            & $\md{\mU^{-1}}{2}{x}{}{y}{}$                   & $\mU^{-1}\left(\pd{\mU}{x}\mU^{-1}\pd{\mU}{y} - \md{\mU}{2}{x}{}{y}{} + \pd{\mU}{y}\mU^{-1}\pd{\mU}{x}\right)\mU^{-1}$ \\
120 |                            & $\pd{e^{x\mA}}{x}$                             & $\mA e^{x\mA}=e^{x\mA}\mA$
121 | \end{tabular}
122 | \end{center}
123 | 
124 | \section{Traces}
125 | \begin{align}
126 | \pd{}{\mX}\trace(\mX)             &=\mI            \\
127 | \pd{}{\mX}\trace(\mX\mA)          &=\mAT           \\
128 | \pd{}{\mX}\trace(\mA\mX)          &=\mAT           \\
129 | \pd{}{\mX}\trace(\mA\mX\mB)       &=\mAT\mBT       \\
130 | \pd{}{\mX}\trace(\mA\mXT\mB)      &=\mB\mA         \\
131 | \pd{}{\mX}\trace(\mXT\mA)         &=\mA            \\
132 | \pd{}{\mX}\trace(\mA\mXT)         &=\mA            \\
133 | \pd{}{\mX}\trace(\mA\kp\mX)       &=\trace(\mA)\mI
134 | \end{align}
135 | For traces with many instances of $\mX$ we can apply an analogue of the product rule. For example:
136 | \begin{equation}
137 | \pd{}{\mX}\trace(\mA\mX\mB\mX\mCT)=\pd{}{\mX}\trace(\mA\mX\mD)+\pd{}{\mX}\trace(\mE\mX\mCT)=\mAT\mDT+\mET\mC
138 | \end{equation}
139 | where $\mD=\mB\mX\mCT$ and $\mE=\mA\mX\mB$.
140 | 
141 | \section{Determinants}
142 | 
143 | \subsection{By Scalars}
144 | 
145 | If $\mX$ and $\mY$ are matrices with no special structure and $x$ is a scalar, then:
146 | 
147 | \begin{align}
148 | \pd{\det(\mY)}{x} &= \det(\mY) \trace\left(\mYi \pd{\mY}{x}\right) \\
149 | \sum_k \pd{\det(\mX)}{\mX_{ik}}\mX_{jk} &= \delta_{ij} \det(\mX)   \\
150 | \pd[2]{\mY}{x^2} &= \det(\mY) %TODO: Can this be simplified with squares?
151 |   \left(
152 |       \trace \left(\mYi \pd{\pd{\mY}{x}}{x}\right)
153 |     + \trace \left(\mYi\pd{\mY}{x}\right) \trace\left(\mYi\pd{\mY}{x}\right)
154 |     - \trace \left( \left(\mYi\pd{\mY}{x}\right) \left(\mYi\pd{\mY}{x}\right) \right)
155 |   \right)
156 | \end{align}
157 | 
158 | \subsection{Linear forms}
159 | 
160 | \begin{align}
161 | \pd{\det(\mX)}{\mX} &= \det(\mX)(\mXi)\T \\
162 | \sum_k \pd{\det(\mX)}{\mX_{ik}}\mX_{jk} &= \delta_{ij} \det(\mX)   \\
163 | \pd{\det(\mA\mX\mB)}{\mX} &= \det(\mA\mX\mB)(\mXi)\T \\
164 |                           &= \det(\mA\mX\mB)(\mXT)^{-1}
165 | \end{align}
166 | 
167 | \subsection{Square forms}
168 | 
169 | If $\mX$ is square and invertible:
170 | \begin{equation}
171 | \pd{\det(\mXT\mA\mX)}{\mX} = 2 \det(\mXT \mA \mX)\mXiT
172 | \end{equation}
173 | If $\mX$ is not square and $\mA$ is symmetric, then
174 | \begin{equation}
175 | \pd{\det(\mXT \mA \mX)}{\mX} = 2 \det(\mXT\mA\mX)\mA\mX(\mXT\mA\mX)^{-1}
176 | \end{equation}
177 | If $\mX$ is not square and $\mA$ is not symmetric, then
178 | \begin{equation}
179 | \pd{\det(\mXT\mA\mX)}{\mX} = \det(\mXT\mA\mX)\left(\mA\mX(\mXT\mA\mX)^{-1}+\mAT\mX(\mXT\mAT\mX)^{-1}\right)
180 | \end{equation}
181 | 
182 | \subsection{Nonlinear Forms}
183 | \begin{align}
184 | \pd{\ln\det(\mXT\mX)}{\mX}         &= 2(\mXp)\T \\
185 | \pd{\ln\det(\mXT\mX)}{\mXp}        &= -2\mXT    \\
186 | \pd{\ln\lvert\det(\mX)\rvert}{\mX} &= \mXiT     \\
187 | \pd{\det(\mX^k)}{\mX}              &= k \det(\mX^k) \mXiT
188 | \end{align}


--------------------------------------------------------------------------------
/src/refs.bib:
--------------------------------------------------------------------------------
  1 | @misc{million2007,
  2 |     title        = {The Hadamard Product},
  3 |     author       = {Elizabeth Million},
  4 |     year         = {2007},
  5 |     howpublished = {\url{http://buzzard.ups.edu/courses/2007spring/projects/million-paper.pdf}}
  6 | }
  7 | 
  8 | @book{Strang2016,
  9 |   author = {Gilbert Strang},
 10 |   title  = {Introduction to Linear Algebra},
 11 |   year   = {2016}
 12 | }
 13 | 
 14 | @incollection{Rote2001,
 15 |   title     = {Division-free algorithms for the determinant and the pfaffian: algebraic and combinatorial approaches},
 16 |   author    = {Rote, G{\"u}nter},
 17 |   booktitle = {Computational discrete mathematics},
 18 |   pages     = {119--135},
 19 |   year      = {2001},
 20 |   publisher = {Springer}
 21 | }
 22 | 
 23 | @article{Strassen1969,
 24 |   title     = {Gaussian elimination is not optimal},
 25 |   author    = {Strassen, Volker},
 26 |   journal   = {Numerische mathematik},
 27 |   volume    = {13},
 28 |   number    = {4},
 29 |   pages     = {354--356},
 30 |   year      = {1969},
 31 |   publisher = {Springer}
 32 | }
 33 | 
 34 | @article{Bareiss1968,
 35 |  ISSN      = {00255718, 10886842},
 36 |  URL       = {http://www.jstor.org/stable/2004533},
 37 |  abstract  = {A method is developed which permits integer-preserving elimination in systems of linear equations, AX = B, such that (a) the magnitudes of the coefficients in the transformed matrices are minimized, and (b) the computational efficiency is considerably increased in comparison with the corresponding ordinary (single-step) Gaussian elimination. The algorithms presented can also be used for the efficient evaluation of determinants and their leading minors. Explicit algorithms and flow charts are given for the two-step method. The method should also prove superior to the widely used fraction-producing Gaussian elimination when A is nearly singular.},
 38 |  author    = {Erwin H. Bareiss},
 39 |  journal   = {Mathematics of Computation},
 40 |  number    = {103},
 41 |  pages     = {565--578},
 42 |  publisher = {American Mathematical Society},
 43 |  title     = {Sylvester's Identity and Multistep Integer-Preserving Gaussian Elimination},
 44 |  volume    = {22},
 45 |  year      = {1968},
 46 |  doi       = {10.2307/2004533}
 47 | }
 48 | 
 49 | @article{Copper1990,
 50 | title    = "Matrix multiplication via arithmetic progressions",
 51 | journal  = "Journal of Symbolic Computation",
 52 | volume   = "9",
 53 | number   = "3",
 54 | pages    = "251 - 280",
 55 | year     = "1990",
 56 | note     = "Computational algebraic complexity editorial",
 57 | issn     = "0747-7171",
 58 | doi      = "10.1016/S0747-7171(08)80013-2",
 59 | url      = "http://www.sciencedirect.com/science/article/pii/S0747717108800132",
 60 | author   = "Don Coppersmith and Shmuel Winograd",
 61 | abstract = "We present a new method for accelerating matrix multiplication asymptotically. Thiswork builds on recent ideas of Volker Strassen, by using a basic trilinear form which is not a matrix product. We make novel use of the Salem-Spencer Theorem, which gives a fairly dense set of integers with no three-term arithmetic progression. Our resulting matrix exponent is 2.376."
 62 | }
 63 | 
 64 | @inproceedings{LeGall2014,
 65 |  author    = {Le Gall, Fran\c{c}ois},
 66 |  title     = {Powers of Tensors and Fast Matrix Multiplication},
 67 |  booktitle = {Proceedings of the 39th International Symposium on Symbolic and Algebraic Computation},
 68 |  series    = {ISSAC '14},
 69 |  year      = {2014},
 70 |  isbn      = {978-1-4503-2501-1},
 71 |  location  = {Kobe, Japan},
 72 |  pages     = {296--303},
 73 |  numpages  = {8},
 74 |  url       = {http://doi.acm.org/10.1145/2608628.2608664},
 75 |  doi       = {10.1145/2608628.2608664},
 76 |  acmid     = {2608664},
 77 |  publisher = {ACM},
 78 |  address   = {New York, NY, USA},
 79 |  keywords  = {algebraic complexity theory, matrix multiplication},
 80 | }
 81 | 
 82 | @inproceedings{Williams2012,
 83 |  author    = {Williams, Virginia Vassilevska},
 84 |  title     = {Multiplying Matrices Faster Than Coppersmith-Winograd},
 85 |  booktitle = {Proceedings of the Forty-fourth Annual ACM Symposium on Theory of Computing},
 86 |  series    = {STOC '12},
 87 |  year      = {2012},
 88 |  isbn      = {978-1-4503-1245-5},
 89 |  location  = {New York, New York, USA},
 90 |  pages     = {887--898},
 91 |  numpages  = {12},
 92 |  url       = {http://doi.acm.org/10.1145/2213977.2214056},
 93 |  doi       = {10.1145/2213977.2214056},
 94 |  acmid     = {2214056},
 95 |  publisher = {ACM},
 96 |  address   = {New York, NY, USA},
 97 |  keywords  = {matrix multiplication},
 98 | }
 99 | 
100 | @inproceedings{Pan1978,
101 |   title        = {Strassen's algorithm is not optimal trilinear technique of aggregating, uniting and canceling for constructing fast algorithms for matrix operations},
102 |   author       = {Pan, V Ya},
103 |   booktitle    = {Foundations of Computer Science, 1978., 19th Annual Symposium on},
104 |   pages        = {166--176},
105 |   year         = {1978},
106 |   organization = {IEEE},
107 |   doi          = {10.1109/SFCS.1978.34}
108 | }
109 | 
110 | @article{Bini1979,
111 |   title   = {$O(N^{2.7799})$ COMPLEXITY FOR N BY N APPROXIMATE MATRIX MULTIPLICATION},
112 |   author  = {Bini, DARIO ANDREA and Capovani, Milvio and Romani, Francesco and Lotti, Grazia},
113 |   journal = {Information processing letters},
114 |   volume  = {8},
115 |   number  = {5},
116 |   pages   = {234--235},
117 |   year    = {1979}
118 | }
119 | 
120 | @article{Schonhage1981,
121 |   title     = {Partial and total matrix multiplication},
122 |   author    = {Sch{\"o}nhage, Arnold},
123 |   journal   = {SIAM Journal on Computing},
124 |   volume    = {10},
125 |   number    = {3},
126 |   pages     = {434--455},
127 |   year      = {1981},
128 |   publisher = {SIAM}
129 | }
130 | 
131 | @article{Romani1982,
132 |   title     = {Some properties of disjoint sums of tensors related to matrix multiplication},
133 |   author    = {Romani, Francesco},
134 |   journal   = {SIAM Journal on Computing},
135 |   volume    = {11},
136 |   number    = {2},
137 |   pages     = {263--267},
138 |   year      = {1982},
139 |   publisher = {SIAM}
140 | }
141 | 
142 | @inproceedings{Strassen1986,
143 |   title        = {The asymptotic spectrum of tensors and the exponent of matrix multiplication},
144 |   author       = {Strassen, Volker},
145 |   booktitle    = {Foundations of Computer Science, 1986., 27th Annual Symposium on},
146 |   pages        = {49--54},
147 |   year         = {1986},
148 |   organization = {IEEE}
149 | }
150 | 
151 | @article{Coppersmith1982,
152 |   title     = {On the asymptotic complexity of matrix multiplication},
153 |   author    = {Coppersmith, Don and Winograd, Shmuel},
154 |   journal   = {SIAM Journal on Computing},
155 |   volume    = {11},
156 |   number    = {3},
157 |   pages     = {472--492},
158 |   year      = {1982},
159 |   publisher = {SIAM}
160 | }
161 | 
162 | @misc{Minka2000,
163 |   title        = {Old and new matrix algebra useful for statistics},
164 |   author       = {Minka, Thomas P},
165 |   howpublished = {\url{https://tminka.github.io/papers/matrix/minka-matrix.pdf}},
166 |   year         = {2000}
167 | }
168 | 
169 | @book{Calafiore2014,
170 |   title     = {Optimization models},
171 |   author    = {Calafiore, Giuseppe C and El Ghaoui, Laurent},
172 |   year      = {2014},
173 |   publisher = {Cambridge University Press},
174 |   isbn      = {978-1-107-05087-7}
175 | }
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | @article{Charnes1962,
196 | author  = {Charnes, A. and Cooper, W. W.},
197 | title   = {Programming with linear fractional functionals},
198 | journal = {Naval Research Logistics Quarterly},
199 | volume  = {9},
200 | year    = {1962},
201 | number  = {3‐4},
202 | pages   = {181-186},
203 | doi     = {10.1002/nav.3800090303},
204 | url     = {https://onlinelibrary.wiley.com/doi/abs/10.1002/nav.3800090303},
205 | eprint  = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/nav.3800090303}
206 | }
207 | 
208 | 
209 | 
210 | @article{Lobo1998,
211 |   title     = {Applications of second-order cone programming},
212 |   author    = {Lobo, Miguel Sousa and Vandenberghe, Lieven and Boyd, Stephen and Lebret, Herv{\'e}},
213 |   journal   = {Linear algebra and its applications},
214 |   volume    = {284},
215 |   number    = {1-3},
216 |   pages     = {193--228},
217 |   year      = {1998},
218 |   publisher = {Elsevier}
219 | }
220 | 
221 | 
222 | @article{Thome2016,
223 |   title     = {Inequalities and equalities for l=2 ({S}ylvester), l=3 ({F}robenius), and l>3 matrices},
224 |   author    = {Thome, N{\'e}stor},
225 |   journal   = {Aequationes mathematicae},
226 |   volume    = {90},
227 |   number    = {5},
228 |   pages     = {951--960},
229 |   year      = {2016},
230 |   publisher = {Springer}
231 | }
232 | 
233 | @article{Sylvester1851,
234 |   title     = {XXXVII. On the relation between the minor determinants of linearly equivalent quadratic functions},
235 |   author    = {Sylvester, James Joseph},
236 |   journal   = {The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science},
237 |   volume    = {1},
238 |   number    = {4},
239 |   pages     = {295--305},
240 |   year      = {1851},
241 |   publisher = {Taylor \& Francis}
242 | }
243 | 
244 | @article{Kozlov1980,
245 |   title     = {The polynomial solvability of convex quadratic programming},
246 |   author    = {Kozlov, Mikhail K and Tarasov, Sergei P and Khachiyan, Leonid G},
247 |   journal   = {USSR Computational Mathematics and Mathematical Physics},
248 |   volume    = {20},
249 |   number    = {5},
250 |   pages     = {223--228},
251 |   year      = {1980},
252 |   publisher = {Elsevier}
253 | }
254 | 
255 | 
256 | @article{Sahni1974,
257 |   author  = {Sahni, S.},
258 |   title   = {Computationally Related Problems},
259 |   journal = {SIAM Journal on Computing},
260 |   volume  = {3},
261 |   number  = {4},
262 |   pages   = {262-279},
263 |   year    = {1974},
264 |   doi     = {10.1137/0203021},
265 |   URL     = {https://doi.org/10.1137/0203021},
266 |   eprint  = {https://doi.org/10.1137/0203021}
267 | }
268 | 
269 | @article{Pardalos1991,
270 |   author   = "Pardalos, Panos M. and Vavasis, Stephen A.",
271 |   title    = "Quadratic programming with one negative eigenvalue is NP-hard",
272 |   journal  = "Journal of Global Optimization",
273 |   year     = "1991",
274 |   month    = "Mar",
275 |   day      = "01",
276 |   volume   = "1",
277 |   number   = "1",
278 |   pages    = "15--22",
279 |   abstract = "We show that the problem of minimizing a concave quadratic function with one concave direction is NP-hard. This result can be interpreted as an attempt to understand exactly what makes nonconvex quadratic programming problems hard. Sahni in 1974 [8] showed that quadratic programming with a negative definite quadratic term (n negative eigenvalues) is NP-hard, whereas Kozlov, Tarasov and Ha{\v{c}}ijan [2] showed in 1979 that the ellipsoid algorithm solves the convex quadratic problem (no negative eigenvalues) in polynomial time. This report shows that even one negative eigenvalue makes the problem NP-hard.",
280 |   issn     = "1573-2916",
281 |   doi      = "10.1007/BF00120662",
282 |   url      = "https://doi.org/10.1007/BF00120662"
283 | }
284 | 
285 | 
286 | @inproceedings{Spielman2010,
287 |   title     = {Algorithms, graph theory, and linear equations in {Laplacian} matrices},
288 |   volume    = {4},
289 |   url       = {http://www.cs.yale.edu/homes/spielman/PAPERS/icm10post.pdf},
290 |   urldate   = {2016-05-07},
291 |   booktitle = {Proceedings of the {International} {Congress} of {Mathematicians}},
292 |   author    = {Spielman, Daniel A.},
293 |   year      = {2010},
294 |   pages     = {2698--2722},
295 |   file      = {10.1.1.165.8870.pdf:/home/rick/Zotero/storage/XKEEGKW6/10.1.1.165.8870.pdf:application/pdf}
296 | }
297 | 
298 | @book{Higham2002,
299 |   author    = {Nicholas J. Higham},
300 |   title     = {Accuracy and Stability of Numerical Algorithms},
301 |   edition   = {Second},
302 |   publisher = {SIAM},
303 |   isbn      = {978-0-89871-802-7},
304 |   year      = {2002}
305 | }
306 | 
307 | @book{Quateroni2007,
308 |   author    = {Quarteroni, Alfio and Sacco, Riccardo and Saleri, Fausto},
309 |   year      = {2007},
310 |   title     = {Numerical Mathematics},
311 |   publisher = {Springer},
312 |   isbn      = {978-3-540-34658-6}
313 | }
314 | 
315 | @book{Gallopoulos2016,
316 |   author    = {Gallopoulos, E. and Philippe, B. and Sameh, A.H.},
317 |   year      = {2016},
318 |   title     = {Parallelism in Matrix Computations},
319 |   publisher = {Springer},
320 |   isbn      = {978-94-017-7188-7}
321 | }
322 | 
323 | ﻿@Article{Alizadeh2003,
324 | author  = {Alizadeh, F. and Goldfarb, D.},
325 | title   = {Second-order cone programming},
326 | journal = {Mathematical Programming},
327 | year    = {2003},
328 | month   = {Jan},
329 | day     = {01},
330 | volume  = {95},
331 | number  = {1},
332 | pages   = {3-51},
333 | issn    = {1436-4646},
334 | doi     = {10.1007/s10107-002-0339-5},
335 | url     = {https://doi.org/10.1007/s10107-002-0339-5}
336 | }
337 | 
338 | 
339 | @article{Teran2011,
340 |   title   = {Consistency and efficient solution of the Sylvester equation for*-congruence},
341 |   author  = {De Ter{\'a}n, Fernando and Dopico, Froilan},
342 |   journal = {The Electronic Journal of Linear Algebra},
343 |   volume  = {22},
344 |   year    = {2011}
345 | }
346 | 
347 | @article{Teran2019,
348 | author   = {De Terán, Fernando and Iannazzo, Bruno and Poloni, Federico and Robol, Leonardo},
349 | title    = {Nonsingular systems of generalized Sylvester equations: An algorithmic approach},
350 | journal  = {Numerical Linear Algebra with Applications},
351 | volume   = {26},
352 | number   = {5},
353 | pages    = {e2261},
354 | keywords = {formal matrix product, matrix pencils, periodic QR/QZ algorithm, periodic Schur decomposition, Sylvester and ⋆-Sylvester equations, systems of linear matrix equations},
355 | doi      = {https://doi.org/10.1002/nla.2261},
356 | url      = {https://onlinelibrary.wiley.com/doi/abs/10.1002/nla.2261},
357 | eprint   = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/nla.2261},
358 | note     = {e2261 nla.2261},
359 | abstract = {Summary We consider the uniqueness of solution (i.e., nonsingularity) of systems of r generalized Sylvester and ⋆-Sylvester equations with n×n coefficients. After several reductions, we show that it is sufficient to analyze periodic systems having, at most, one generalized ⋆-Sylvester equation. We provide characterizations for the nonsingularity in terms of spectral properties of either matrix pencils or formal matrix products, both constructed from the coefficients of the system. The proposed approach uses the periodic Schur decomposition and leads to a backward stable O(n3r) algorithm for computing the (unique) solution.},
360 | year     = {2019}
361 | }
362 | 
363 | @article{Dopico2016,
364 |   title   = {Projection methods for large-scale T-Sylvester equations},
365 |   author  = {Dopico, Froil{\'a}n and Gonz{\'a}lez, Javier and Kressner, Daniel and Simoncini, Valeria},
366 |   journal = {Mathematics of Computation},
367 |   volume  = {85},
368 |   number  = {301},
369 |   pages   = {2427--2455},
370 |   year    = {2016}
371 | }
372 | 
373 | @article{Hadamard1893,
374 |   author = {Hadamard, J.},
375 |   title = {Résolution d'une question relative aux déterminants.},
376 |   journal = {Bull. Sci. Math},
377 |   volume = {17},
378 |   pages = {30--31},
379 |   year={1893}
380 | }
381 | 
382 | @book{Seber2002,
383 |   author    = {Seber, G. and Lee, A.},
384 |   title     = {Linear Regression Analysis},
385 |   publisher = {John Wiley and Sons},
386 |   year      = {2002}
387 | }
388 | 
389 | @misc{Alman2020,
390 |       title={A Refined Laser Method and Faster Matrix Multiplication},
391 |       author={Josh Alman and Virginia Vassilevska Williams},
392 |       year={2020},
393 |       eprint={2010.05846},
394 |       archivePrefix={arXiv},
395 |       primaryClass={cs.DS}
396 | }
397 | 
398 | @misc{Peng2021,
399 |       title={Solving Sparse Linear Systems Faster than Matrix Multiplication},
400 |       author={Richard Peng and Santosh Vempala},
401 |       year={2021},
402 |       eprint={2007.10254},
403 |       archivePrefix={arXiv},
404 |       primaryClass={cs.DS}
405 | }
406 | 
407 | @misc{WellingXXXX,
408 |   author = {Max Welling},
409 |   title = {The Kalman Filter},
410 |   howpublished = {Lecture Note},
411 | }


--------------------------------------------------------------------------------
/src/rogue_gallery.tex:
--------------------------------------------------------------------------------
  1 | \chapter{Matrix Rogue Gallery}
  2 | 
  3 | \section{Non-Singular vs.\ Singular Matrices}
  4 | For $\mA\in\sRnn$ (initially drawn from \citep[p.\ 574]{Strang2016}):
  5 | \begin{center}
  6 | \begin{tabular}{ll}
  7 | \textbf{Non-Singular}                           & \textbf{Singular}                        \\
  8 | $\mA$ is invertible                             & $\mA$ is not invertible                  \\
  9 | The columns are independent                     & The columns are dependent                \\
 10 | The rows are independent                        & The rows are dependent                   \\
 11 | $\det(\mA)\ne0$                                 & $\det(\mA)=0$                            \\
 12 | $\mA\vx=0$ has one solution: $\vx=0$            & $\mA\vx=0$ has infinitely many solutions \\
 13 | $\mA\vx=\vb$ has one solution: $\vx=\mA^{-1}\vb$& $\mA\vx=\vb$ has either no or infinitely many solutions \\
 14 | $\mA$ has $n$ nonzero pivots                    & $\mA$ has $r<n$ pivots                   \\
 15 | $\mA$ has full rank $r=n$                       & $\mA$ has rank $r<n$                     \\
 16 | The reduced row echelon form is $\mR=\mI$       & $\mR$ has at least one zero row          \\
 17 | The column space is all of $\sRn$               & The column space has dimension $r<n$     \\
 18 | The row space is all of $\sRn$                  & The row space has dimension $r<n$        \\
 19 | All eigenvalues are nonzero                     & Zero is an eigenvalue of $\mA$           \\
 20 | $\mA^T\mA$ is symmetric positive definite       & $\mA^T\mA$ is only semidefinite          \\
 21 | $\mA$ has $n$ positive singular values          & $\mA$ has $r<n$ singular values
 22 | \end{tabular}
 23 | \end{center}
 24 | 
 25 | \section{2x2 Matrix}
 26 | %TODO: Image
 27 | \begin{equation}
 28 | \mA=
 29 | \begin{bmatrix}
 30 | \mA_{11} & \mA_{12} \\
 31 | \mA_{21} & \mA_{22}
 32 | \end{bmatrix}
 33 | \end{equation}
 34 | 
 35 | \begin{align}
 36 | \det(\mA)   &= \mA_{1,1} \mA_{2,2} - \mA_{1,2} \mA_{2,1} \\
 37 | \trace(\mA) &= \mA_{1,1} + \mA_{2,2} \\
 38 | \mA^{-1}    &= \frac{1}{\det(\mA)}\begin{bmatrix} \mA_{2,2} & -\mA_{1,2} \\ -\mA_{2,1} & \mA_{1,1}\end{bmatrix}
 39 | \end{align}
 40 | \subsection{Eigenvalues}
 41 | \begin{align}
 42 | \lambda &= \frac{\trace(\mA)\pm\sqrt{\trace(\mA)^2 - 4\det(\mA)}}{2} \\
 43 | 0           &= \lambda^2 - \lambda \trace(\mA) + \det(\mA) \\
 44 | \trace(\mA) &= \lambda_1 + \lambda_2            \\
 45 | \det(\mA)   &= \lambda_1\lambda_2
 46 | \end{align}
 47 | \subsection{Eigenvectors}
 48 | \begin{align}
 49 | \vv_1 \propto \begin{bmatrix}\mA_{12} \\ \lambda_1 - \mA_{11}\end{bmatrix} \\
 50 | \vv_2 \propto \begin{bmatrix}\mA_{12} \\ \lambda_2 - \mA_{11}\end{bmatrix} \\
 51 | \end{align}
 52 | 
 53 | 
 54 | 
 55 | \section{Diagonal Matrix}
 56 | 
 57 | \begin{center}
 58 | \includegraphics[width=1.5in]{imgs/rg_diagonal.pdf}
 59 | \end{center}
 60 | 
 61 | \begin{equation}
 62 | A=\diag(a_1,\ldots,a_n)=
 63 | \begin{bmatrix}
 64 | a_1  &        &  \\
 65 |      & \ddots &  \\
 66 |      &        & a_n
 67 | \end{bmatrix}
 68 | \end{equation}
 69 | 
 70 | Square matrix. Entries above diagonal are equal to entries below diagonal.
 71 | 
 72 | Number of ``free entries": $\frac{n(n+1)}{2}$.
 73 | 
 74 | \subsection*{Special Properties}
 75 | 
 76 | \begin{align}
 77 | \eig(A) &= {a_1,\ldots,a_n}           \\
 78 | \det(A) &= \prod_i a_i                \\
 79 | A^{-1}  &=
 80 | \begin{bmatrix}
 81 | \frac{1}{a_1} &        &               \\
 82 |               & \ddots &               \\
 83 |               &        & \frac{1}{a_n}
 84 | \end{bmatrix} \\
 85 | \vx^T \mA \vx &= \sum_i a_i \vx_i^2
 86 | \end{align}
 87 | 
 88 | \section{Doubly stochastic matrix}
 89 | A square matrix of nonnegative real numbers whose rows and columns each sum to 1.
 90 | 
 91 | 
 92 | \section{Dyads}
 93 | 
 94 | \begin{center}
 95 | \includegraphics[width=2in]{imgs/rg_dyad.pdf}
 96 | \end{center}
 97 | 
 98 | $\mA\in\sRmn$ is a dyad if it can be written as
 99 | \begin{equation}
100 | \mA=\vu\vv^T~~~\vu\in\sRm, \vv\in\sRn
101 | \end{equation}
102 | 
103 | \subsection*{Special Properties}
104 | \begin{itemize}
105 | \item The columns of $\mA$ are copies of $\vu$ scaled by the values of $\vv$.
106 | \item The rows of $\mA$ are copies of $\vu^T$ scaled by the values of $\vv$.
107 | \item If $\mA$ is a dyad, it acts on a vector $\vx$ as $\mA\vx=(\vu\vv^T)\vx=(\vv^T\vu)\vx$.
108 | \item $\mA\vx=c\vu$ ($\mA$ scales $\vx$ and points it along $\vu$).
109 | \item $\mA_{ij}=\vu_i\vv_j$.
110 | \item If $\vu,\vv\ne0$, then $\rank(\mA)=1$.
111 | \item If $m=n$, $\mA$ has one eigenvalue $\lambda=\vv^T\vu$ and eigenvector $\vu$.
112 | \item A dyad can always be written in a normalized form $c\tilde\vu\tilde\vv^T$.
113 | \end{itemize}
114 | %TODO: Dyad eigenvalues
115 | 
116 | 
117 | 
118 | \section{Hermitian Matrix}
119 | $\mH\in\sCmn$ is Hermitian iff
120 | \begin{equation}
121 | \mH=\mH^H
122 | \end{equation}
123 | where $\mH^H$ is the conjugate transpose of $\mH$.
124 | 
125 | For $\mH\in\sRmn$, Hermitian and symmetric matrices are equivalent.
126 | 
127 | \subsection*{Special Properties}
128 | \begin{align}
129 | \mH_{ii} &\in \sR      \\
130 | \mH\mH^H &=   \mH^H\mH \\
131 | \vx^H\mH\vx &\in \sR~~\forall\vx\in\sC \\
132 | \mH_1+\mH_2 &= \textrm{Hermitian} \\
133 | \mH^{-1}    &= \textrm{Hermitian} \\
134 | \mA+\mA^H   &= \textrm{Hermitian} \\
135 | \mA-\mA^H   &= \textrm{Skew-Hermitian} \\
136 | \mA\mB      &= \textrm{Hermitian iff $\mA\mB=\mB\mA$} \\
137 | \det(\mH)   &\in \sR \\
138 | \eig(\mH)   &\in \sR
139 | \end{align}
140 | 
141 | 
142 | \section{Hurwitz matrix}
143 | TODO
144 | 
145 | 
146 | \section{Idempotent Matrix}
147 | A matrix $\mA$ is idempotent iff
148 | \begin{equation}
149 | \mA\mA=\mA
150 | \end{equation}
151 | 
152 | \subsection*{Special Properties}
153 | \begin{align}
154 | \mA^n        &=\mA~~\forall n\ge1        \\
155 | \mI-\mA      &~~\textrm{is idempotent}   \\
156 | \mA^H        &~~\textrm{is idempotent}   \\
157 | \mI-\mA^H    &~~\textrm{is idempotent}   \\
158 | \rank(\mA)   &= \trace(\mA)              \\
159 | \mA(\mI-\mA)   &= 0                      \\
160 | \mA\pinv     &= \mA                      \\
161 | f(s\mI+t\mA) &= (\mI-\mA)f(s)+\mA f(s+t) \\
162 | \mA\mB=\mB\mA&\implies \mA\mB~\textrm{is idempotent} \\
163 | \eig(\mA)_i  &\in \{0,1\} \\
164 | \mA & \textrm{~is always diagonalizable}
165 | \end{align}
166 | $\mA-\mI$ may not be idempotent.
167 | 
168 | See also: nilpotent (\autoref{sec:rogue:nilpotent}), unipotent (\autoref{sec:rogue:unipotent}).
169 | 
170 | 
171 | \section{Laplacian Matrix of a Graph}
172 | Let $\mL$ be the Laplacian matrix of a graph $G$ with neither multiple edges nor loops defined by $(V,E,w)$ where $V$ is the set of vertices, $E$ the set of edges, and $w$ is a weight function. Is is also the case that $L=D-A$ where $D$ is the degree matrix and $A$ is the adjaceny matrix. In the case of directed graphs either the indegree or outdegree might be used.
173 | 
174 | The elements of $\mL$ are given by
175 | \begin{equation}
176 | \mL_{i,j} = \begin{cases}
177 |   \deg(v_i) & \textrm{if $i=j$} \\
178 |   -1        & \textrm{if $i\ne j$ and $v_i$ is adjacent to $v_j$} \\
179 |    0        & \textrm{otherwise}
180 | \end{cases}
181 | \end{equation}
182 | 
183 | If $G$ is weighted, the elements of its Laplacian $\mL$ are given by
184 | \begin{equation}
185 | \mL_{i,j} = \begin{cases}
186 |   \sum_{j,j\ne i} w(i,j) & \textrm{if $i=j$} \\
187 |   -w(i,j)   & \textrm{if $i\ne j$ and $v_i$ is adjacent to $v_j$ with weight $w(i,j)$} \\
188 |    0        & \textrm{otherwise}
189 | \end{cases}
190 | \end{equation}
191 | 
192 | For an undirected graph $G$ and its Laplacian $\mL$:
193 | \begin{itemize}
194 | \item $\mL$ is symmetric
195 | \item $L\ispsd 0$
196 | \item The row sum and column sums of $\mL$ are both zero.
197 | \item $\mL$ is singular
198 | \item The number of connected components in $G$ is the dimension of $\ns(L)$ and the algebraic multiplicity of the 0 eigenvalue.
199 | \item The smallest non-zero eigenvalue of $\mL$ is called the spectral gap.
200 | \item The second smallest eigenvalue of $\mL$ (could be zero) is the algebraic connectivity (Fiedler value) of $G$ and approximates the sparest cut of $G$.
201 | \item For a graph with multiple connected components, $\mL$ is a block diagonal matrix.
202 | \item Using preconditioners, the linear equaitons of any Laplacian matrix $\mL\in\sRnn$ can be solved to accuracy $\epsilon$ in time $O((\nnz(\mL) + n\log n (\log \log n)^2) \log \epsilon^{-1})$. The best balance between preconditioners and solving linear equations yields an algorithm of complexity $O(\nnz(\mL) \log^c n \log \epsilon^{-1})$.~\citep{Spielman2010}
203 | \end{itemize}
204 | 
205 | \begin{align}
206 | \vx^T \mL \vx = \sum_{(u,v)\in E} w(u,v) \left(\vx(u)-\vx(v)\right)^2~~~~\vx\in\sR^{V} \label{equ:laplace_quad}
207 | \end{align}
208 | 
209 | \autoref{equ:laplace_quad} provides a measure of the ``smoothness" of $\vx$ over the edges of $G$. The more $\vx$ jumps over an edge, the larger the quadratic form becomes.
210 | 
211 | 
212 | \section{Metzler matrix}
213 | TODO
214 | 
215 | \section{Nilpotent}
216 | \label{sec:rogue:nilpotent}
217 | A matrix $\mA$ is nilpotent iff
218 | \begin{equation}
219 | \mA^2=0
220 | \end{equation}
221 | 
222 | \subsection*{Special Properties}
223 | \begin{align}
224 |   f(s\mI+t\mA) = \mI f(s) + t \mA f'(s)
225 | \end{align}
226 | 
227 | 
228 | 
229 | \section{Orthogonal Matrix}
230 | 
231 | \begin{center}
232 | \includegraphics[width=1.5in]{imgs/rg_orthogonal.pdf}
233 | 
234 | (Not much visible structure)
235 | \end{center}
236 | 
237 | 
238 | \begin{equation}
239 | U=
240 | \begin{bmatrix}
241 | 1 & 0 & 0 & 0 & 0 & 0 \\
242 | 0 & 0 & 0 & 0 & 1 & 0 \\
243 | 0 & 0 & 1 & 0 & 0 & 0 \\
244 | 0 & 1 & 0 & 0 & 0 & 0 \\
245 | 0 & 0 & 0 & 0 & 0 & 1 \\
246 | 0 & 0 & 0 & 1 & 0 & 0 \\
247 | \end{bmatrix}
248 | \end{equation}
249 | 
250 | A square matrix $\mU\in\sRnn$ is orthogonal iff:
251 | 
252 | \begin{equation}
253 | \mU^T \mU = \mU \mU^T = \mI
254 | \end{equation}
255 | 
256 | The columns form an orthonormal basis of $\mathbb{R}^n$.
257 | 
258 | 
259 | 
260 | \subsection*{Special Properties}
261 | 
262 | \begin{itemize}
263 | \item The eigenvalues of $\mU$ are placed on the unit circle.
264 | \item The eigenvectors of $\mU$ are unitary (have length one).
265 | \item $\mU^{-1}$ is orthogonal.
266 | \item The product of two orthogonal matrices is itself orthogonal.
267 | \end{itemize}
268 | 
269 | \begin{align}
270 | \mU^T     &= \mU^{-1} \\
271 | \mU^{-T}  &= \mU      \\
272 | \mU^T\mU  &= \mI      \\
273 | \mU\mU^T  &= \mI      \\
274 | \det(\mU) &= \pm1     \\
275 | \norm{\mU \vx}^2_2&=(\mU\vx)^T(\mU\vx)=\vx^T\mU^T\mU\vx=\vx^T\vx=\norm{\vx}^2_2~~\forall \vx \\
276 | \norm{\mU \mA \mV}_F&=\norm{\mA}_F~~\forall \mA,\mU,\mV~\textrm{with}~\mU,\mV \textrm{orthogonal}
277 | \end{align}
278 | 
279 | Orthogonal matrices preserve the lengths and angles of the vectors they operator on. The converse is true: any matrix which preserves lengths and angles is orthogonal.
280 | 
281 | \section{Permutation Matrix}
282 | \begin{center}
283 | \includegraphics[width=1.5in]{imgs/rg_permutation_matrix.pdf}
284 | \end{center}
285 | 
286 | TODO
287 | 
288 | 
289 | 
290 | \section{Positive Definite}
291 | 
292 | $\mP\in\sSn$ is positive definite (denoted $\mP\ispd0$) if any of the following are true:
293 | \begin{itemize}
294 | \item $\vx^T\mP\vx>0,\forall\vx\in\sRn$.
295 | \item $\eig(\mP)>0$
296 | \item There exists a unique matrix $\mU\in\sRnn$, such that $\mA=\mU\mU^T$ (Cholesky Decomposition). %TODO
297 | \end{itemize}
298 | 
299 | 
300 | \subsection*{Special Properties}
301 | 
302 | \begin{itemize}
303 | \item $\mP^{-1}\ispd0$
304 | \item $c\mP\ispd0$
305 | \item $\mA_{ii}\in\sR$
306 | \item $\mA_{ii}>0$
307 | \item $\trace(\mP)\ge0$. %TODO: Shouldn't this be >0?
308 | \item $\det(\mP)>0$
309 | \item The eigenvalues of $\mP^{-1}$ are the inverses of the eigenvalues of $\mP$.
310 | \item For $\mP\in\sRmn$, $\mP^T\mP\ispd0\iff \mP$ is full-column rank ($\rank(\mP)=n$)
311 | \item For $\mP\in\sRmn$, $\mP\mP^T\ispd0\iff \mP$ is full-row rank ($\rank(\mP)=m$)
312 | \end{itemize}
313 | 
314 | \subsubsection{Ellipsoids}
315 | $\mP\ispd0$ defines a full-dimensional, bounded ellipsoid defined by the set
316 | \begin{equation}
317 | \mathcal{E}=\{\vx\in\sRn: (\vx-\vz)^T\mP^{-1}(\vx-\vz)\le \beta\}
318 | \end{equation}
319 | The eigenvectors of $\mP$ define the directions of the semi-axes of the ellipsoid; the lengths of these axes are given by $\sqrt{\beta\lambda_i}$ where $\lambda_i$ are the eigenvalues of $\mP$. The ellipsoid is centered at $\vz$. Since $\mP\ispd 0 \implies \mP^{-1}\ispd 0$, the Cholesky decomposition says that $\mP^{-1}=\mA^T\mA$; therefore, an equivalent definition of the ellipsoid is $\mathcal{E}=\{\vx\in\sRn: \norm{\mA\vx}_2\le1\}$.
320 | 
321 | \section{Positive Semi-Definite}
322 | 
323 | $\mA$ is positive semi-definite (denoted $\mA\ispsd0$) if any of the following are true:
324 | \begin{itemize}
325 | \item $\vx^T\mA\vx\ge0,\forall\vx\in\sRn$.
326 | \item $\eig(\mA)\ge0$
327 | \item There exists a non-unique matrix $\mU\in\sRnn$, such that $\mA=\mU\mU^T$ (Cholesky Decomposition). %TODO
328 | \end{itemize}
329 | 
330 | \subsection*{Special Properties}
331 | \begin{itemize}
332 | \item For $\mA\in\sRmn$, $\mA^T\mA\ispsd0$
333 | \item For $\mA\in\sRmn$, $\mA\mA^T\ispsd0$
334 | \item $\diag(\mA)_i\ge0$
335 | \item $\sum_{ij} \mA_{ij}\ge0$
336 | \item $\trace(\mA)\ge0$
337 | \item For $\mA,\mB\ispsd0$, $\trace(\mA\mB)\ge0$
338 | \item For $\mA,\mB\ispsd0$, $\trace(\mA\mB)=0\iff \mA\mB=0$
339 | \item The positive semi-definite matrices $\sPSD$ form a convex cone. For any two PSD matrices $\mA,\mB\in\sPSD$ and some $\alpha\in[0,1]$:
340 | \begin{equation}
341 | \vx^T(\alpha\mA+(1-\alpha)\mB)\vx=\alpha \vx^T\mA\vx+(1-\alpha)\vx^T\mB\vx\ge0~~\forall\vx
342 | \end{equation}
343 | \begin{equation}
344 | \alpha\mA+(1-\alpha)\mB\in\sPSD
345 | \end{equation}
346 | \item For $\mA\in\sPSD$ and $\alpha\ge0$, $\alpha\mA\ispsd0$, so $\sPSD$ is a cone.
347 | \item $\mA\ispsd 0$ if and only if there is a PSD matrix $\mS^{1/2}$ such that $\mS^{1/2}\mS^{1/2}=\mA$. This $\mS$ is unique.
348 | \end{itemize}
349 | 
350 | \subsection{Loewner order}
351 | If $\mA-\mB\ispsd 0$, then we say $\mA\ispsd \mB$. A sufficient condition for this is that $\lambda_n(\mA)\ge\lambda_1(\mB)$.
352 | 
353 | 
354 | 
355 | \section{Projection Matrix}
356 | A square matrix $\mP$ is a projection matrix that projects onto a vector space $\mathcal{S}$ iff
357 | \begin{align}
358 | \mP&~\textrm{is idempotent} \\
359 | \mP\vx&\in\mathcal{S}~~\forall\vx \\
360 | \mP\vz&=\vz~~\forall\vz\in\mathcal{S}
361 | \end{align}
362 | 
363 | 
364 | \section{Single-Entry Matrix}
365 | \label{sec:rogue_single_entry}
366 | \begin{equation}
367 | \mJ^{2,3} =
368 | \begin{bmatrix}
369 | 0 & 0 & 0 & 0 \\
370 | 0 & 0 & 1 & 0 \\
371 | 0 & 0 & 0 & 0 \\
372 | 0 & 0 & 0 & 0
373 | \end{bmatrix}
374 | \end{equation}
375 | 
376 | The single-entry matrix $\mJ^{iJ}\in\sRnn$ is defined as the matrix which is zero everywhere except for the entry $(i,j)$, which is $1$.
377 | 
378 | 
379 | %TODO: Much material from MCB
380 | 
381 | 
382 | 
383 | \section{Singular Matrix}
384 | A square matrix that is not invertible.
385 | 
386 | $\mA\in\sRnn$ is singular iff $\det \mA=0$ iff $\mathcal{N}(A)\ne\{0\}$.
387 | 
388 | 
389 | \section{Symmetric Matrix}
390 | 
391 | \begin{center}
392 | \includegraphics[width=1.5in]{imgs/rg_symmetric_matrix.pdf}
393 | \end{center}
394 | 
395 | $\mA\in\sSn$ is a symmetric matrix if $\mA=\mA^T$ (entries above diagonal are equal to entries below diagonal).
396 | 
397 | \begin{equation}
398 | \begin{bmatrix}
399 | a & b & c & d & e & f \\
400 | b & g & l & m & o & p \\
401 | c & l & h & n & q & r \\
402 | d & m & n & i & s & t \\
403 | e & o & q & s & j & u \\
404 | f & p & r & t & u & k \\
405 | \end{bmatrix}
406 | \end{equation}
407 | 
408 | 
409 | \subsection*{Special Properties}
410 | 
411 | \begin{align}
412 | \mA                                &=   \mA^T \\
413 | \eig(A)                            &\in \sRn  \\
414 | \textrm{Number of ``free entries"} &=    \frac{n(n+1)}{2}
415 | \end{align}
416 | 
417 | If $\mA$ is real, it can be decomposed into $\mA=\mQ^T\mD\mQ$ where $\mQ$ is a real orthogonal matrix (the columns of which are eigenvectors of $\mA$) and $\mD$ is real and diagonal containing the eigenvalues of $\mA$.
418 | 
419 | For a real, symmetric matrix with non-negative eignevalues, the eigenvalues and singular values coincide.
420 | 
421 | 
422 | 
423 | \section{Skew-Hermitian}
424 | A matrix $\mH\in\sCmn$ is Skew-Hermitian iff
425 | \begin{equation}
426 | \mH=-\mH^H
427 | \end{equation}
428 | 
429 | 
430 | 
431 | \section{Toeplitz Matrix, General Form}
432 | 
433 | \begin{center}
434 | \includegraphics[width=1.5in]{imgs/rg_toeplitz.pdf}
435 | \end{center}
436 | Constant values on descending diagonals.
437 | \begin{equation}
438 | \begin{bmatrix}
439 |   a_{0} & a_{-1} & a_{-2} & \ldots  & \ldots & a_{-(n-1)}  \\
440 |   a_{1} & a_0    & a_{-1} & \ddots  &        & \vdots \\
441 |   a_{2} & a_{1}  & \ddots & \ddots  & \ddots & \vdots \\
442 |  \vdots & \ddots & \ddots & \ddots  & a_{-1} & a_{-2}\\
443 |  \vdots &        & \ddots & a_{1}   & a_{0}  & a_{-1} \\
444 | a_{n-1} & \ldots & \ldots & a_{2}   & a_{1}  & a_{0}
445 | \end{bmatrix}
446 | \end{equation}
447 | 
448 | 
449 | \section{Toeplitz Matrix, Discrete Convolution}
450 | 
451 | \begin{center}
452 | \includegraphics[width=1.5in]{imgs/rg_toeplitz_1d_conv.pdf}
453 | \end{center}
454 | 
455 | Constant values on main and subdiagonals.
456 | 
457 | \begin{equation}
458 | \begin{bmatrix}
459 |   h_m &   0 &   0 &      \hdots &   0 &   0 \\
460 |   \vdots & h_m &   0 &   \hdots &   0 &   0 \\
461 |   h_1 & \vdots & h_m &   \hdots &   0 &   0 \\
462 |     0 & h_1 & \ddots & \ddots &   0 &   0 \\
463 |     0 &   0 & h_1 &    \ddots & h_m &   0 \\
464 |     0 &   0 &   0 &    \ddots & \vdots & h_m \\
465 |     0 &   0 &   0 &      \hdots & h_1 & \vdots \\
466 |     0 &   0 &   0 &      \hdots &   0 & h_1
467 | \end{bmatrix}
468 | \end{equation}
469 | 
470 | 
471 | \section{Triangular Matrix}
472 | 
473 | \begin{center}
474 | \includegraphics[width=1.5in]{imgs/rg_lower_triangular.pdf}~\includegraphics[width=1.5in]{imgs/rg_upper_triangular.pdf}
475 | \end{center}
476 | 
477 | \begin{equation}
478 | \begin{bmatrix}
479 | a & b & c & d & e & f \\
480 |   & g & h & i & j & k \\
481 |   &   & l & m & n & o \\
482 |   &   &   & p & q & r \\
483 |   &   &   &   & s & t \\
484 |   &   &   &   &   & u \\
485 | \end{bmatrix}
486 | ~
487 | ~
488 | \begin{bmatrix}
489 | a &   &   &   &   &   \\
490 | b & g &   &   &   &   \\
491 | c & h & l &   &   &   \\
492 | d & i & m & p &   &   \\
493 | e & j & n & q & s &   \\
494 | f & k & o & r & t & u \\
495 | \end{bmatrix}
496 | \end{equation}
497 | 
498 | Square matrices in which all elements either above or below the main diagonal are zero. An upper (left) and a lower (right) triangular matrix are shown above.
499 | 
500 | For an upper triangular matrix $A_{ij}=0$ whenever $i>j$; for a lower triangular matrix $A_{ij}=0$ whenever $i<j$.
501 | 
502 | 
503 | \subsection*{Special Properties}
504 | 
505 | \begin{align}
506 | \eig(A) &= \diag(A)             \\
507 | \det(A) &= \prod_i \diag(A)_i
508 | \end{align}
509 | 
510 | The product of two upper (lower) triangular matrices is still upper (lower) triangular.
511 | 
512 | The inverse of a nonsingular upper (lower) triangular matrix is still upper (lower) triangular.
513 | 
514 | 
515 | \section{Tridiagonal Matrix}
516 | 
517 | \begin{center}
518 | \includegraphics[width=1.5in]{imgs/rg_tridiagonal.pdf}
519 | \end{center}
520 | 
521 | \begin{equation}
522 | \begin{bmatrix}
523 | b_1 & c_1 &     &        &        &         \\
524 | a_2 & b_2 & c_2 &        &        &         \\
525 |     & a_3 & b_3 & c_3    &        &         \\
526 |     &     & a_4 & b_4    & \ddots &         \\
527 |     &     &     & \ddots & \ddots & c_{n-1} \\
528 |     &     &     &        & a_n    & b_n     \\
529 | \end{bmatrix}
530 | \end{equation}
531 | 
532 | A tridiagonal matrix has values on its main diagonal as well as the diagonals abutting the main, with zeros elsewhere.
533 | 
534 | A system of $n$ unknowns which can be written as
535 | \begin{align}
536 | a_i x_{i-1}+b_i x_i + c_i x_{i+1} &= d_i \\
537 | a_1 &=0                                  \\
538 | c_n &=0
539 | \end{align}
540 | can be rewritten as
541 | \begin{equation}
542 | \begin{bmatrix}
543 | b_1 & c_1 &     &        &        &         \\
544 | a_2 & b_2 & c_2 &        &        &         \\
545 |     & a_3 & b_3 & c_3    &        &         \\
546 |     &     & a_4 & b_4    & \ddots &         \\
547 |     &     &     & \ddots & \ddots & c_{n-1} \\
548 |     &     &     &        & a_n    & b_n     \\
549 | \end{bmatrix}
550 | \begin{bmatrix}
551 | x_1 \\ x_2 \\ x_3 \\ \vdots \\ x_n
552 | \end{bmatrix}
553 | =
554 | \begin{bmatrix}
555 | d_1 \\ d_2 \\ d_3 \\ \vdots \\ d_n
556 | \end{bmatrix}
557 | \end{equation}
558 | This system can be solved in $O(n)$ time using the tridiagonal matrix algorithm (aka the Thomas Algorithm). The algorithm is not unconditionally stable; however, it is stable when the matrix is diagonally dominant or symmetric positive definite. A matix is diagonally dominant if for every row of the matrix the agnitude of the diagoanl entry is greater than or equal to the sum of the magnitudes of all the other non-diagonal entries in that row ($|a_{ii}|\ge\sum_{j\ne i} |a_{ij}|~\forall i$). If uncondonitional stability is grequired, Gaussian elimination with partial pivoting is an alternative, if slower, solution method. See \citep[Theorem 9.12]{Higham2002} for full stability details.
559 | 
560 | A modified system can be solved for situations involving periodic boundary conditions, e.g.:
561 | \begin{align}
562 | a_1 x_n + b_1 x_1 + c_1 x_2 &= d_1 \\
563 | a_i x_{i-1} + b_i x_i + c_i x_{i+1} &= d_i~~\forall i=2,\ldots,n-1 \\
564 | a_n x_{n-1}+b_n x_n + c_n x_1 &= d_n
565 | \end{align}
566 | 
567 | Modified algorithms are also available for block tridiagonal matrices~\citep[\textsection3.8]{Quateroni2007}. See \citep[\textsection5.5]{Gallopoulos2016} for a discussion of parallel solvers.
568 | 
569 | 
570 | 
571 | \section{Unipotent}
572 | \label{sec:rogue:unipotent}
573 | A matrix $\mA$ is unipotent iff
574 | \begin{equation}
575 | \mA\mA = \mI
576 | \end{equation}
577 | 
578 | \subsection*{Special Properties}
579 | \begin{align}
580 |   f(s\mI+t\mA) = \frac{1}{2}\left((\mI+\mA)f(s+t) + (\mI-\mA)f(s-t)\right)
581 | \end{align}
582 | 
583 | 
584 | 
585 | \section{Vandermonde Matrix}
586 | \begin{equation}
587 | V=
588 | \begin{bmatrix}
589 | 1      & \alpha_1 & \alpha_1^2 & \dots  & \alpha_1^{n-1} \\
590 | 1      & \alpha_2 & \alpha_2^2 & \dots  & \alpha_2^{n-1} \\
591 | 1      & \alpha_3 & \alpha_3^2 & \dots  & \alpha_3^{n-1} \\
592 | \vdots & \vdots   & \vdots     & \ddots & \vdots         \\
593 | 1      & \alpha_m & \alpha_m^2 & \dots  & \alpha_m^{n-1}
594 | \end{bmatrix}
595 | \end{equation} %TODO: Check m vs n
596 | Alternatively,
597 | \begin{equation}
598 | \mV_{i,j} = \alpha_i^{j-1}
599 | \end{equation}
600 | 
601 | \subsection*{Uses}
602 | Polynomial interpolation of data.
603 | 
604 | \subsection*{Special Properties}
605 | $\mV^T$ is also a Vandermone matrix.
606 | \begin{align}
607 | \det(\mV)&=\prod_{1\le i < j \le n} (x_j-x_i)
608 | \end{align} %TODO: What does this mean?


--------------------------------------------------------------------------------