├── Makefile
├── matrix_forensics.pdf
├── src
├── imgs
│ ├── fund_theorem_lin_alg1.png
│ ├── fund_theorem_lin_alg2.png
│ ├── fund_theorem_lin_alg3.png
│ ├── fund_theorem_lin_alg4.png
│ └── fund_theorem_lin_alg5.png
├── Makefile
├── introduction.tex
├── title.tex
├── README.md
├── nomenclature.tex
├── eigenvalues.tex
├── z_math_commands.tex
├── norms.tex
├── plot_gen.py
├── updates.tex
├── algorithmics.tex
├── matrix_forensics.tex
├── decompositions.tex
├── optimization.tex
├── basics.tex
├── derivatives.tex
├── refs.bib
└── rogue_gallery.tex
├── .gitignore
└── README.md
/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | $(MAKE) -C src/
3 | clean:
4 | $(MAKE) -C src/ clean
--------------------------------------------------------------------------------
/matrix_forensics.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/matrix_forensics.pdf
--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg1.png
--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg2.png
--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg3.png
--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg4.png
--------------------------------------------------------------------------------
/src/imgs/fund_theorem_lin_alg5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-barnes/MatrixForensics/HEAD/src/imgs/fund_theorem_lin_alg5.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | matrix_forensics.aux
2 | matrix_forensics.bbl
3 | matrix_forensics.blg
4 | matrix_forensics.log
5 | matrix_forensics.out
6 | matrix_forensics.toc
7 | src/matrix_forensics.pdf
8 | imgs/
9 | refs/
10 | *.idx
11 | *.ilg
12 | *.ind
13 | texput.log
14 |
15 |
--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | #./plot_gen.py
3 | pdflatex --shell-escape matrix_forensics.tex
4 | bibtex matrix_forensics.aux
5 | makeindex matrix_forensics.idx
6 | pdflatex --shell-escape matrix_forensics.tex
7 | pdflatex --shell-escape matrix_forensics.tex
8 | cp matrix_forensics.pdf ../
9 |
10 | clean:
11 | rm -f *.bbl *.aux *.run.xml *.bcf *.blg *.out *.fff *.log *.lot *.ttt *.toc *.pyg *.idx *.ind *.ilg *.lof *.spl *-blx.bib *.dvi *.fls *.fdb_latexmk *.listing *.aux.blg
--------------------------------------------------------------------------------
/src/introduction.tex:
--------------------------------------------------------------------------------
1 | \chapter{Introduction}
2 |
3 | \textbf{Goals:}
4 | \begin{enumerate}
5 | \item The primary goal of \textit{Matrix Forensics} is to \textbf{solve crimes of matrix math}.
6 | That is, to make the sometimes mystifying manipulations of matrix math more understandable by cataloging useful identities, transformations, and facts.
7 |
8 | \item \textbf{To be a community-accessible project.} Anyone can contribute to the project. The source code for the book is available on Github and the source code has been thoughtfully arranged with handy macros to help maintain an easy-to-use, aesthetic, and consistent notation and typography.
9 | \end{enumerate}
10 |
11 |
12 | \textbf{Contributing:}
13 | Please contribute on Github at \url{https://github.com/r-barnes/MatrixForensics} either by opening an issue or making a pull request. If you are not comfortable with this, please send your contribution to \url{rijard.barnes@gmail.com}.
14 |
15 |
16 | \textbf{Contributors:}
17 | Richard Barnes
18 |
19 | \textbf{Funding:}
20 |
21 | The Department of Energy Computational Science Graduate Fellowship (grant DE-FG02-97ER25308). %Richard Barnes
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Click [here](matrix_forensics.pdf) for the PDF
2 |
3 | Matrix Forensics
4 | ================
5 |
6 | Matrix Forensics is an extensive, yet to-the-point reference manual for matrix math. It is designed to be a quick reference for:
7 |
8 | * Identifying valid transformations
9 | * Reminding yourself of, e.g., matrix calculus identities
10 | * Finding appropriate computational shortcuts, such as perturbations
11 | * And more!
12 |
13 | The guide is freely available as a PDF [here](matrix_forensics.pdf).
14 |
15 |
16 |
17 | Building
18 | ----------------
19 |
20 | On a Linux-based system type `make` to build the book.
21 |
22 |
23 |
24 | Contributing
25 | ----------------
26 |
27 | You can contribute to Matrix Forensics either by opening an [Issue](https://github.com/r-barnes/MatrixForensics/issues) or submitting changes yourself via a [Pull Request](https://github.com/r-barnes/MatrixForensics/pulls).
28 |
29 | If your change involves significant reorganization, please open an [Issue](https://github.com/r-barnes/MatrixForensics/issues) first to discuss it.
30 |
31 | A [README](src/README.md) file is available discussing the structure of the book so you can more easily get started contributing to it.
32 |
--------------------------------------------------------------------------------
/src/title.tex:
--------------------------------------------------------------------------------
1 | \begin{titlepage} % Suppresses displaying the page number on the title page and the subsequent page counts as page 1
2 | \raggedleft % Right align the title page
3 |
4 | \rule{1pt}{\textheight} % Vertical line
5 | \hspace{0.05\textwidth} % Whitespace between the vertical line and title page text
6 | \parbox[b]{0.75\textwidth}{ % Paragraph box for holding the title page text, adjust the width to move the title page left or right on the page
7 |
8 | {\Huge\bfseries Matrix Forensics \\[\baselineskip]} % Title
9 | {\large\textit{Solving crimes of matrix math}}\\[4\baselineskip] % Subtitle or further description
10 | % {\large\textit{A brief guide to matrix math \\ and its efficient implementation}}\\[4\baselineskip] % Subtitle or further description
11 | {\Large\textsc{richard barnes}} % Author name, lower case for consistent small caps
12 | \\[4\baselineskip]
13 | \immediate\write18{ git rev-parse HEAD | cut -b 1-10 > /tmp/matrix_forensics_version.info }
14 |
15 | Git Hash: \input{/tmp/matrix_forensics_version.info} \\
16 | Compiled on: \today\ at \currenttime
17 | \\[2\baselineskip]
18 | \href{https://github.com/r-barnes/MatrixForensics}{github.com/r-barnes/MatrixForensics}
19 |
20 | \vspace{0.4\textheight} % Whitespace between the title block and the publisher
21 |
22 | %{\noindent The Publisher~~\plogo}\\[\baselineskip] % Publisher and logo
23 | }
24 |
25 | \end{titlepage}
--------------------------------------------------------------------------------
/src/README.md:
--------------------------------------------------------------------------------
1 | Contributors Guide
2 | ======================================
3 |
4 | Files
5 | --------------------------------------
6 |
7 | This subdirectory contains several files. Most are self-explanatory and correspond to sections of the book. A few we describe here:
8 |
9 | * [z_math_commands.tex](z_math_commands.tex) - All of the math commands used
10 | * [plot_gen.py](plot_gen.py) - Used to build the plots and diagrams in the book
11 | * [Makefile](Makefile) - Used to build the book
12 | * [refs.bib](refs.bib) - Bibliography
13 |
14 |
15 |
16 | Contributing Equations
17 | --------------------------------------
18 |
19 | Note that `z_math_commands.tex` contains extensive simplifying commands for writing equations.
20 |
21 | In general equations should be typeset as follows:
22 | ```
23 | \begin{equation}
24 | \label{equ:equ_name} %Optional
25 | \eqcite{Thome2016}
26 | \mA = \mB * \mC
27 | \end{equation}
28 | ```
29 | Note that `\eqcite{Thome2016}` typesets a citation to `Thome2016` which is an entry in [refs.bib](refs.bib)
30 |
31 | Multiple aligned equations can be typeset as follows. Note the careful alignment within the TeX source to improve readability.
32 | ```
33 | \begin{align}
34 | \label{equ:equ_name} %Optional
35 | \mA &= \mB * \mC \eqcite{Thome2016} \label{equ:a} \\
36 | \mA + \mB &= \mB + \mC + \mD \eqcite{Adam2013} \label{equ:b} \\
37 | \mA + \mB &= \mB * \mC + \mE \eqcite{Jane2020} \label{equ:c} \\
38 | \end{align}
39 | ```
40 |
--------------------------------------------------------------------------------
/src/nomenclature.tex:
--------------------------------------------------------------------------------
1 | \chapter{Nomenclature}
2 |
3 | \begin{tabular}{cl}
4 | $\mA$ & Matrix. \\
5 | $\va$ & (Column) vector. \\
6 | $a$ & Scalar. \\
7 | $\lambda$ & An eigenvalue of a matrix. \\
8 | & \\
9 | $\mA_{ij}$ & Matrix indexed. Returns $i$th row and $j$th column. \\
10 | $\mA\circ \mB$ & Hadamard (element-wise) product of matrices A and B. \\
11 | $\ns(\mA)$ & Nullspace of the matrix $\mA$. \\
12 | $\range(\mA)$ & Range of the matrix $\mA$. \\
13 | $\det(\mA)$ & Determinant of the matrix $\mA$. \\
14 | $\eig(\mA)$ & Eigenvalues of the matrix $\mA$. \\
15 | $\mA^H$ & Conjugate transpose of the matrix $\mA$. \\
16 | $\mA^T$ & Transpose of the matrix $\mA$. \\
17 | $\mA\pinv$ & Pseudoinverse of the matrix $\mA$. \\
18 | $\vx\in\sRn$ & The entries of the $n$-vector $\vx$ are all real numbers. \\
19 | $\mA\in\sRmn$ & The entries of the matrix $\mA$ with $m$ rows and $n$ columns are all real numbers. \\
20 | $\mA\in\sSn$ & The matrix $\mA$ is symmetric and has $n$ rows and $n$ columns. \\
21 | & \\
22 | $\mI_n$ & Identity matrix with $n$ rows and $n$ columns. \\
23 | & \\
24 | $\{0\}$ & The empty set \\
25 | $\sR$ & The real numbers \\
26 | $\sC$ & The complex numbers
27 | \end{tabular}
--------------------------------------------------------------------------------
/src/eigenvalues.tex:
--------------------------------------------------------------------------------
1 | \chapter{Eigenvalue Properties}
2 |
3 | $\lambda\in\mathbb{C}$ is an eigenvalue of $\mA\in\sRnn$ and $u\in\mathbb{C}^n$ is a corresponding eigenvector if $\mA\vu=\lambda\vu$ and $\vu\ne0$. Equivalantly, $(\lambda \mI_n-\mA)\vu=0$ and $\vu\ne0$. Eigenvalues satisfy the equation $\det(\lambda\mI_n-\mA)=0$.
4 |
5 | Any matrix $\mA\in\sRnn$ has $n$ eigenvalues, though some may be repeated. $\lambda_1$ is the largest eigenvalue and $\lambda_n$ the smallest.
6 |
7 | If $\lambda$ is an eigenvalue of $\mA$, $\lambda^2$ is an eigenvalue of $\mA^2$.
8 |
9 | \begin{equation}
10 | \eig(\mA\mA^T)=\eig(\mA^T\mA)
11 | \end{equation}
12 | (Note that the number of entries in $\mA\mA^T$ and $\mA^T\mA$ may differ significantly leading to different compute times.)
13 |
14 | \begin{equation}
15 | \eig(\mA^T\mA)\ge0
16 | \end{equation}
17 |
18 | \begin{equation}
19 | \lambda_\textrm{min}(\mA)\le \frac{\vx^T \mA \vx}{\vx^T\vx} \le \lambda_\textrm{max}(\mA)~~\vx\ne0
20 | \end{equation}
21 |
22 | \section{Weyl's Inequality}
23 | If $\mM,\mH,\mP\in\sRnn$ are Hermitian matrices and $\mM=\mH+\mP$ ($\mH$ is perturbed by $\mP$) and $\mM$ has eigenvalues $\mu_1\ge\cdots\ge\mu_n$, $\mH$ has eigenvalues $\nu_1\ge\cdots\ge\nu_n$, and $\mP$ has eigenvalues $\rho_1\ge\cdots\ge\rho_n$, then
24 | \begin{equation}
25 | \nu_i+\rho_n\le \mu_i \le \nu_i + \rho_1~\forall i
26 | \end{equation}
27 | If $j+k-n\ge i \ge r+s-1$, then
28 | \begin{equation}
29 | \nu_j+\rho_k\le\mu_i\le\nu_r+\rho_s
30 | \end{equation}
31 | If $\mP\ispsd0$, then $\mu_i>\nu_i~\forall i$.
32 |
33 | %TODO
34 | % \section*{Computation}
35 | % TODO: eigsh, small eigen value extraction, top-k
36 |
37 | \section{Estimating Eigenvalues}
38 | \subsection{Gershgorin circle theorem}
39 | For $\mA\in\sCnn$ with entries $a_{ij}$ let $R_i=\sum_{j\ne i} |a_{ij}|$ be the sum of the absolute values of the non-diagonal entries of the $i$-th row. Let $D(a_{ii},R_i)\subseteq\sC$ be a closed disc (a circle containing its boundary) centered at $a_{ii}$ with radius $R_i$. This is the Gershgorin disc.
40 |
41 | Every eigenvalue of $\mA$ lies within at least one of the $D(a_{ii},R_i)$. Further, if the union of $k$ such discs is disjoint from the union of the other $n-k$ discs then the former union contains exactly $k$ and the latter $n-k$ of the eigenvalues of $\mA$.
--------------------------------------------------------------------------------
/src/z_math_commands.tex:
--------------------------------------------------------------------------------
1 | % This file contains all of the mathematical commands used in the book
2 |
3 | \renewcommand*{\pd}[3][]{\ensuremath{\frac{\partial^{#1} #2}{\partial #3}}}
4 |
5 | \newcommand{\mA}{\mathbf{A}}
6 | \newcommand{\mB}{\mathbf{B}}
7 | \newcommand{\mC}{\mathbf{C}}
8 | \newcommand{\mD}{\mathbf{D}}
9 | \newcommand{\mE}{\mathbf{E}}
10 | \newcommand{\mF}{\mathbf{F}}
11 | \newcommand{\mH}{\mathbf{H}}
12 | \newcommand{\mI}{\mathbf{I}}
13 | \newcommand{\mJ}{\mathbf{J}}
14 | \newcommand{\mL}{\mathbf{L}}
15 | \newcommand{\mM}{\mathbf{M}}
16 | \newcommand{\mP}{\mathbf{P}}
17 | \newcommand{\mQ}{\mathbf{Q}}
18 | \newcommand{\mR}{\mathbf{R}}
19 | \newcommand{\mS}{\mathbf{S}}
20 | \newcommand{\mU}{\mathbf{U}}
21 | \newcommand{\mV}{\mathbf{V}}
22 | \newcommand{\mX}{\mathbf{X}}
23 | \newcommand{\mY}{\mathbf{Y}}
24 |
25 | \newcommand{\mAi}{\mathbf{A}^{-1}}
26 | \newcommand{\mBi}{\mathbf{B}^{-1}}
27 | \newcommand{\mCi}{\mathbf{C}^{-1}}
28 | \newcommand{\mPi}{\mathbf{P}^{-1}}
29 | \newcommand{\mRi}{\mathbf{R}^{-1}}
30 | \newcommand{\mXi}{\mathbf{X}^{-1}}
31 | \newcommand{\mYi}{\mathbf{Y}^{-1}}
32 |
33 | \newcommand{\mXp}{\mathbf{X}^{+}}
34 |
35 |
36 | %%%%% TRANSPOSES
37 | \newcommand{\T}{^\mathsf{T}}
38 | \newcommand{\mAT}{\mathbf{A}^{\mathsf{T}}}
39 | \newcommand{\mBT}{\mathbf{A}^{\mathsf{T}}}
40 | \newcommand{\mCT}{\mathbf{A}^{\mathsf{T}}}
41 | \newcommand{\mDT}{\mathbf{A}^{\mathsf{T}}}
42 | \newcommand{\mET}{\mathbf{A}^{\mathsf{T}}}
43 | \newcommand{\mXT}{\mathbf{X}^{\mathsf{T}}}
44 |
45 | \newcommand{\mXiT}{\mathbf{X}^{-\mathsf{T}}}
46 |
47 | \newcommand{\va}{\mathbf{a}}
48 | \newcommand{\vb}{\mathbf{b}}
49 | \newcommand{\vc}{\mathbf{c}}
50 | \newcommand{\vd}{\mathbf{d}}
51 | \newcommand{\ve}{\mathbf{e}}
52 | \newcommand{\vf}{\mathbf{f}}
53 | \newcommand{\vg}{\mathbf{g}}
54 | \newcommand{\vp}{\mathbf{p}}
55 | \newcommand{\vq}{\mathbf{q}}
56 | \newcommand{\vu}{\mathbf{u}}
57 | \newcommand{\vv}{\mathbf{v}}
58 | \newcommand{\vw}{\mathbf{w}}
59 | \newcommand{\vx}{\mathbf{x}}
60 | \newcommand{\vy}{\mathbf{y}}
61 | \newcommand{\vz}{\mathbf{z}}
62 |
63 | \newcommand{\vaT}{\mathbf{a}^\mathsf{T}}
64 | \newcommand{\vbT}{\mathbf{b}^\mathsf{T}}
65 | \newcommand{\vuT}{\mathbf{u}^\mathsf{T}}
66 | \newcommand{\vvT}{\mathbf{v}^\mathsf{T}}
67 | \newcommand{\vxT}{\mathbf{x}^\mathsf{T}}
68 |
69 | \newcommand{\vzero}{\mathbf{0}}
70 | \DeclareMathOperator{\diag}{diag}
71 | \DeclareMathOperator{\eig}{eig}
72 | \DeclareMathOperator{\trace}{tr}
73 | \DeclareMathOperator{\rank}{rank}
74 | \DeclareMathOperator{\nnz}{nnz}
75 | \newcommand{\sPSD}{\mathbb{S}^n_+}
76 | \newcommand{\sC}{\mathbb{C}}
77 | \newcommand{\sCmn}{\mathbb{C}^{m,n}}
78 | \newcommand{\sCnn}{\mathbb{C}^{n,n}}
79 | \newcommand{\sR}{\mathbb{R}}
80 | \newcommand{\sRm}{\mathbb{R}^{m}}
81 | \newcommand{\sRn}{\mathbb{R}^{n}}
82 | \newcommand{\sRp}{\mathbb{R}^{p}}
83 | \newcommand{\sRkk}{\mathbb{R}^{k,k}}
84 | \newcommand{\sRkn}{\mathbb{R}^{k,n}}
85 | \newcommand{\sRnm}{\mathbb{R}^{n,m}}
86 | \newcommand{\sRmn}{\mathbb{R}^{m,n}}
87 | \newcommand{\sRnn}{\mathbb{R}^{n,n}}
88 | \newcommand{\sRnk}{\mathbb{R}^{n,k}}
89 | \newcommand{\sRnp}{\mathbb{R}^{n,p}}
90 | \newcommand{\sRnr}{\mathbb{R}^{n,r}}
91 | \newcommand{\sRmm}{\mathbb{R}^{m,m}}
92 | \newcommand{\sSn}{\mathbb{S}^{n}}
93 | \newcommand{\ispsd}{\succeq}
94 | \newcommand{\ispd}{\succ}
95 | \newcommand{\pinv}{\!^+}
96 | \newcommand{\ns}{\mathcal{N}}
97 | \newcommand{\range}{\mathcal{R}}
98 | \newcommand{\bs}{\setminus}
99 | \newcommand{\kp}{\otimes} %Kronecker product
100 | \newcommand{\hp}{\circ} %Hadamard product
101 | \newcommand{\grad}{\nabla} %Gradient operator
--------------------------------------------------------------------------------
/src/norms.tex:
--------------------------------------------------------------------------------
1 | \chapter{Norms}
2 |
3 | \section{General Properties}
4 | Matrix norms satisfy some properties:
5 | \begin{align}
6 | f(\mA) &\ge 0 \\
7 | f(\mA) &= 0 \iff \mA=0 \\
8 | f(c\mA) &= |c|f(\mA) \\
9 | f(\mA+\mB)&\le f(\mA)+f(\mB)
10 | \end{align}
11 | Many popular norms also satisfy ``sub-multiplicativity": $f(\mA\mB)\le f(\mA)f(\mB)$.
12 |
13 | \section{Matrices}
14 |
15 | \subsection{Frobenius norm}
16 | \begin{align}
17 | \norm{\mA}_F &= \sqrt{\trace\mA\mA^H} \\
18 | &= \sqrt{\sum_{i=1}^m \sum_{j=1}^n |\mA_{ij}|^2 } \\
19 | &= \sqrt{\sum_{i=1}^m \eig(A^H A)_i }
20 | \end{align}
21 |
22 | \subsubsection{Special Properties}
23 | \begin{align}
24 | \norm{\mA\vx}_2 &\le \norm{\mA}_F \norm{\vx}_2~~~\vx\in\sRn \\
25 | \norm{\mA\mB}_F &\le \norm{\mA}_F \norm{\mB}_F \\
26 | \norm{\mC-\vx\vx^T}_F^2 &= \norm{\mC}_F^2+\norm{\vx}_2^4-2 \vx^T \mC \vx
27 | \end{align}
28 |
29 | \subsection{Operator Norms}
30 | For $p=1,2,\infty$ or other values, an operator norm indicates the maximum input-output gain of the matrix.
31 | \begin{equation}
32 | \norm{\mA}_p=\max_{\norm{\vu}_p=1} \norm{\mA\vu}_p
33 | \end{equation}
34 |
35 | \begin{align}
36 | \norm{\mA}_1
37 | &=\max_{\norm{\vu}_1=1} \norm{\mA\vu}_1 \\
38 | &=\max_{j=1,\ldots,n} \sum_{i=1}^m |\mA_{ij}| \\
39 | &=\textrm{Largest absolute column sum}
40 | \end{align}
41 |
42 | \begin{align}
43 | \norm{\mA}_\infty
44 | &=\max_{\norm{\vu}_\infty=1} \norm{\mA\vu}_\infty \\
45 | &=\max_{j=1,\ldots,m} \sum_{i=1}^n |\mA_{ij}| \\
46 | &=\textrm{Largest absolute row sum}
47 | \end{align}
48 |
49 | \begin{align}
50 | \norm{\mA}_2
51 | &=\textrm{``spectral norm"} \\
52 | &=\max_{\norm{\vu}_2=1} \norm{\mA\vu}_2 \\
53 | &=\sqrt{\max(\eig(\mA^T\mA))} \\
54 | &=\textrm{Square root of largest eigenvalue of~}\mA^T\mA
55 | \end{align}
56 |
57 |
58 |
59 | \subsubsection{Special Properties}
60 | \begin{align}
61 | \norm{\mA\vu}_p &\le \norm{\mA}_p \norm{\vu}_p \\
62 | \norm{\mA\mB}_p &\le \norm{\mA}_p \norm{\mB}_p
63 | \end{align}
64 |
65 | \subsection{Spectral Radius}
66 | Not a proper norm.
67 | \begin{equation}
68 | \rho(\mA)=\textrm{spectral radius}(\mA)=\max_{i=1,\ldots,n} | \eig(\mA)_i |
69 | \end{equation}
70 |
71 | \subsubsection{Special Properties}
72 | \begin{align}
73 | \rho(\mA) &\le \norm{\mA}_p \\
74 | \rho(\mA) &\le \min(~\norm{\mA}_1, \norm{\mA}_\infty)
75 | \end{align}
76 |
77 |
78 | \section{Vectors}
79 |
80 | \begin{align}
81 | \norm{\vx}_1 &= \sum_i |\vx_i| & \textrm{L1-norm\index{L1-norm}} \\
82 | \norm{\vx}_p &= (\sum_i |\vx_i|^p)^{1/p} & \textrm{P-norm\index{P-norm}} \\
83 | \norm{\vx}_\infty &= \max_i |\vx_i| & \textrm{L$\infty$-norm\index{L$\infty$-norm}, L-infinity norm}
84 | \end{align}
85 |
86 | \subsection{Identities}
87 |
88 | \begin{align}
89 | 2\norm{\vu}_2^2+2\norm{\vv}_2^2 &= \norm{\vu+\vv}_2^2 + \norm{\vu-\vv}_2^2 & \textrm{Polarization Identity} \\
90 | <\vx,\vy> &= \frac{1}{4}\left(\norm{\vx+\vy}_2^2-\norm{\vx-\vy}_2^2\right)~~\forall \vx,\vy\in\mathcal{V} & \textrm{Polarization Identity} \\
91 | \norm{u}_2^2+\norm{v}_2^2&=\norm{\begin{bmatrix} u \\ v\end{bmatrix}}_2^2
92 | \end{align}
93 |
94 |
95 | \subsection{Bounds}
96 |
97 | \begin{align}
98 | |\vx^T \vy| &\le \norm{\vx}_2 \norm{\vy}_2 & \textrm{Cauchy-Schwartz Inequality} \\
99 | |\vx^T \vy| &\le \sum_{k=1}^n |\vx_k \vy_k| \le \norm{\vx}_p \norm{\vx}_q~~~\forall p,q\ge1: 1/p+1/q=1 & \textrm{H\"older Inequality}
100 | \end{align}
101 |
102 | For $\vx\in\mathbb{R}^n$
103 | \begin{equation}
104 | \frac{1}{\sqrt{n}}\norm{\vx}_2
105 | \le\norm{\vx}_\infty
106 | \le\norm{\vx}_2
107 | \le\norm{\vx}_1
108 | \le\sqrt{\textrm{card}(\vx)}\norm{\vx}_2
109 | \le\sqrt{n}\norm{\vx}_2
110 | \le n \norm{\vx}_\infty
111 | \end{equation}
112 |
113 | For any $0
A=[\mathbf{a}_1, \cdots, \mathbf{a}_n], with [[inner product]] (or for the complex case).
122 |
123 | % Define the [[Vector projection|projection]]:
124 | % :
127 | % then:
128 | % :
141 |
142 | % We can now express the s over our newly computed orthonormal basis:
143 |
144 | % :
156 | % where . This can be written in matrix form:
157 | % :
158 | % where:
159 | % :
160 | % and
161 | % :
167 |
--------------------------------------------------------------------------------
/src/matrix_forensics.tex:
--------------------------------------------------------------------------------
1 | \documentclass{book}
2 |
3 | %Post to: https://stats.stackexchange.com/questions/21346/reference-book-for-linear-algebra-applied-to-statistics
4 |
5 | \usepackage[top=1in, bottom=1.25in, left=1.25in, right=1.25in]{geometry}
6 |
7 | \usepackage{amsfonts, amsmath}
8 | \usepackage{commath}
9 | \usepackage[yyyymmdd,hhmmss]{datetime}
10 | \usepackage{graphbox}
11 | \usepackage[hidelinks]{hyperref}
12 | \usepackage{marginnote}
13 | \usepackage{mathtools}
14 | \usepackage{parskip}
15 | \usepackage{titlesec}
16 | \usepackage{xcolor}
17 | \usepackage{optidef}
18 |
19 | \usepackage{cellspace}%
20 | \setlength\cellspacetoplimit{3pt}
21 | \setlength\cellspacebottomlimit{3pt}
22 |
23 | \usepackage{makeidx}
24 | \makeindex
25 |
26 | \usepackage[numbers,sort&compress]{natbib}
27 | \bibliographystyle{unsrtnat}
28 |
29 | %Make equations be numbered continuously through book
30 | \usepackage{chngcntr}
31 | \counterwithout{equation}{chapter}
32 |
33 | \renewcommand{\sectionautorefname}{\textsection}
34 | \renewcommand{\subsectionautorefname}{\textsection}
35 | \renewcommand{\subsubsectionautorefname}{\textsection}
36 |
37 | \input{z_math_commands}
38 |
39 | \hypersetup{
40 | pdfauthor={Richard Barnes (ORCID: 0000-0002-0204-6040)},%
41 | pdftitle={Matrix Forensics},%
42 | % pdfsubject={Whatever},%
43 | pdfkeywords = {matrix algebra, matrix relations, matrix identities, linear algebra},%
44 | pdfproducer = {LaTeX},%
45 | pdfcreator = {pdfLaTeX}
46 | }
47 |
48 |
49 | \usepackage{fancyhdr}
50 | % \renewcommand{\chaptermark}[1]{\markboth{#1}{#1}}
51 | \setlength{\headheight}{15.2pt}
52 | \pagestyle{fancy}
53 |
54 | \lhead[\thepage]{\leftmark}
55 | % \chead[]{}
56 | \rhead[\leftmark]{\thepage}
57 |
58 | \renewcommand{\footrulewidth}{0.4pt}% default is 0pt
59 | \lfoot[\footnotesize{Richard Barnes. Matrix Forensics. \today-\currenttime. \href{https://github.com/r-barnes/MatrixForensics}{github.com/r-barnes/MatrixForensics}}. \input{/tmp/matrix_forensics_version.info}\!\!.]{\footnotesize{Richard Barnes. Matrix Forensics. \today-\currenttime. \href{https://github.com/r-barnes/MatrixForensics}{github.com/r-barnes/MatrixForensics}}. \input{/tmp/matrix_forensics_version.info}\!\!.} % []{}
60 | \cfoot[]{}
61 | \rfoot[]{}
62 |
63 |
64 | \newcommand{\eqcite}[1]{\marginnote{\citep{#1}}}
65 |
66 | %Adjust chapter formatting
67 | \newcommand{\hsp}{\hspace{20pt}}
68 | \definecolor{gray75}{gray}{0.75}
69 | \titleformat{\chapter}[hang]{\Huge\bfseries}{\thechapter\hsp\textcolor{gray75}{$|$}\hsp}{0pt}{\Huge\bfseries}
70 | \titlespacing*{\chapter}{0pt}{0pt}{20pt} %? BEFORE AFTER
71 |
72 | %Ensure chapters start on the same page
73 | \usepackage{etoolbox}
74 | \makeatletter
75 | \patchcmd{\chapter}{\if@openright\cleardoublepage\else\clearpage\fi}{\clearpage}{}{}
76 | \makeatother
77 |
78 |
79 | \begin{document}
80 |
81 | \input{title}
82 |
83 |
84 | \tableofcontents
85 |
86 | \input{introduction}
87 |
88 | \input{nomenclature}
89 |
90 | \input{basics}
91 |
92 | \input{derivatives}
93 |
94 | \input{rogue_gallery}
95 |
96 | \input{decompositions}
97 |
98 | \input{eigenvalues}
99 |
100 | \input{norms}
101 |
102 |
103 |
104 | \chapter{Bounds} %TODO: Reorganize
105 |
106 | \section{Matrix Gain}
107 | \begin{equation}
108 | \lambda_\textrm{min}(\mA^T\mA)\le \frac{\norm{\mA\vx}_2^2}{\norm{\vx}_2^2}\le\lambda_\textrm{max}(\mA^T\mA)
109 | \end{equation}
110 |
111 | \begin{equation}
112 | \max_{\vx\ne0} \frac{\norm{\mA\vx}_2}{\norm{\vx}_2}=\norm{\mA}_2=\sqrt{\lambda_\textrm{max}(\mA^T\mA)}\implies\vx=u_1
113 | \end{equation}
114 |
115 | \begin{equation}
116 | \min_{\vx\ne0} \frac{\norm{\mA\vx}_2}{\norm{\vx}_2}=\sqrt{\lambda_\textrm{min}(\mA^T\mA)}\implies\vx=u_n
117 | \end{equation}
118 |
119 | \section{Rayleigh quotients}
120 | The Rayleigh quotient of $\mA\in\sSn$ is given by
121 | \begin{equation}
122 | \frac{\vx^T \mA \vx}{\vx^T\vx}~~\vx\ne0
123 | \end{equation}
124 |
125 | \begin{equation}
126 | \lambda_\textrm{min}(\mA)\le \frac{\vx^T \mA \vx}{\vx^T\vx} \le \lambda_\textrm{max}(\mA)~~\vx\ne0
127 | \end{equation}
128 |
129 | \begin{align}
130 | \lambda_\textrm{max}(A)&=\max_{\vx: \norm{\vx}_2=1} \vx^T\mA\vx=u_1 \\
131 | \lambda_\textrm{min}(A)&=\min_{\vx: \norm{\vx}_2=1} \vx^T\mA\vx=u_n
132 | \end{align}
133 | where $u_1$ and $u_n$ are the eigenvectors associated with $\lambda_\textrm{max}$ and $\lambda_\textrm{min}$, respectively.
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 | \chapter{Equations}
142 |
143 | \section{Linear Equations}
144 | The linear equation $\mA\vx=\vy$ with $\mA\in\sRmn$ admits a solution iff $\rank([\mA \vy])=\rank(\mA)$. If this is satisfied, the set of all solutions is an affine set $\mathcal{S}=\{\vx=\bar \vx+z: z\in\ns(\mA)\}$ where $\bar \vx$ is any vector such that $\mA\bar\vx=\vy$. The solution is unique if $\ns(\mA)=\{0\}$.
145 |
146 | $\mA\vx=\vy$ is \textit{overdetermined} if it is tall/skinny ($m>n$); that is, if there are more equations than unknowns. If $\rank(\mA)=n$ then $\dim\ns(\mA)=0$, so there is either no solution or one solution. Overdetermined systems often have no solution ($\vy\notin\range(\mA)$), so an approximate solution is necessary. See \autoref{sec:least-squares}.
147 |
148 | $\mA\vx=\vy$ is \textit{underdetermined} if it is short/wide ($n>m$); that is, if has more unknowns than equations. If $\rank(\mA)=m$ then $\range(\mA)=\sRm$, so $\dim\ns(\mA)=n-m>0$, so the set of solutions is infinite. Therefore, finding a single solution that optimizes some quantity is of interest.
149 |
150 | $\mA\vx=\vy$ is \textit{square} if $n=m$. If $\mA$ is invertible, then the equations have the unique solution $\vx=\mA^{-1}\vy$. See \autoref{sec:minimum-norm}.
151 |
152 | \section{Least-Squares}
153 | \label{sec:least-squares}
154 | For an overdetermined system we wish to find:
155 | \begin{equation}
156 | \min_\vx \norm{\mA\vx-\vy}_2^2
157 | \end{equation}
158 | Since $\mA\vx\in\range(\mA)$, we need a point $\tilde \vy = \mA\vx^*\in\range(\mA)$ closest to $\vy$. This point lies in the nullspace of $\mA^T$, so we have $\mA^T(\vy-\mA\vx^*)=0$. There is always a solution to this problem and, if $\rank(\mA)=n$, it is unique~\citep[p.\ 161]{Calafiore2014}
159 | \begin{equation}
160 | \vx^*=(\mA^T\mA)^{-1}\mA^T\vy
161 | \end{equation} %TODO: Check
162 |
163 | \subsection{Regularized least-squares with low-rank data}
164 |
165 | For $\mA\in\sRmn$, $\vy\in\sRm$, $\lambda\ge0$, the regularized least-squares problem
166 | \begin{equation}
167 | \textrm{argmin}_\vx \norm{\mA\vx-\vy}_2^2 + \lambda\norm{\vx}_2^2
168 | \end{equation}
169 | has a closed form solution
170 | \begin{equation}
171 | \label{equ:regularized_least_squares}
172 | \vx = (\mA^T\mA + \lambda \mI)^{-1}\mA^T\vy
173 | \end{equation}
174 | However, if $\mA$ has a $\rank{r}\ll\min(n,m)$ and a known low-rank decomposition $\mA=\mL\mR^T$ with $\mL\in\mathbb{R}^{m,r}$ and $\mR\in\mathbb{R}^{n,r}$, then we can rewrite \autoref{equ:regularized_least_squares} as
175 | \begin{equation}
176 | \vx = (\mR^T \mR\mL^T \mL + \lambda \mI)^{-1}\mL^T\vy
177 | \end{equation}
178 | This decreases the time complexity from $O(mn^2 + n^\omega)$ to $O(nr^2+mr^2)$.
179 |
180 | \section{Minimum Norm Solutions}
181 | \label{sec:minimum-norm}
182 | For undertermined systems in which $\mA\in\sRmn$ with $m0$.
32 |
33 | % For $\mA\in\sRnn$,
34 | % \begin{align}
35 | % \mA&\ispsd0\iff \exists\mB\ispsd0: \mA=\mB^2 \\
36 | % \mA&\ispd0 \iff \exists\mB\ispd 0: \mA=\mB^2
37 | % \end{align}
38 | % where $\mB$ is called the ``matrix square-root" of $\mA$.
39 |
40 | % For $\mA\ispsd0$, we can use the spectral factorization $\mA=\mU\mD\mU^T$ and take $\mD^{1/2}=\diag(\sqrt{\lambda_1},\ldots,\sqrt{\lambda_n})$ to get $\mB=\mU\mD^{1/2}\mU^T$.
41 |
42 |
43 | \section{PCA: Principle Components Analysis}
44 | Find normalized directions in data space such that the variance of the projections of the centered data points is maximal. For centered data $\tilde \mX$, the mean-square variation of data along a vector $\vx$ is $\vx^T \tilde \mX \tilde \mX^T \vx$.
45 | \begin{equation}
46 | \max_{\vx\in\sRn,~\norm{\vx}_2=1} \vx^T \tilde \mX \tilde \mX^T \vx
47 | \end{equation}
48 | Taking an SVD of $\tilde \mX \tilde \mX^T$ gives $H=\mU_r\mD^2\mU^T$, which is maximized by taking $\vx=\vu_1$. By repeatedly removing the first principal components and recalculating, all the principal axes can be found.
49 |
50 |
51 |
52 |
53 | \section{QR: Orthogonal-triangular}
54 |
55 | \begin{center}
56 | \includegraphics[align=c,height=1in]{imgs/decomp_qr_a.pdf}\textbf{\large =}
57 | \includegraphics[align=c,height=1in]{imgs/decomp_qr_q.pdf}\textbf{\large *}
58 | \includegraphics[align=c,height=1in]{imgs/decomp_qr_r.pdf}
59 | \end{center}
60 |
61 | For $\mA\in\sRnn$, $\mA=\mQ\mR$ where $\mQ$ is orthogonal and $\mR$ is an upper triangular matrix. If $\mA$ is non-singular, then $\mQ$ and $\mR$ are uniquely defined if $\diag(\mR)$ are imposed to be positive.
62 |
63 | \subsection*{Algorithms}
64 |
65 | Gram-Schmidt.
66 |
67 |
68 |
69 |
70 | \section{SVD: Singular Value Decomposition}
71 |
72 | \begin{center}
73 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_a.pdf}\textbf{\large =}
74 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_u.pdf}\textbf{\large *}
75 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_s.pdf}\textbf{\large *}
76 | \includegraphics[align=c,height=1in]{imgs/decomp_svd_v.pdf}
77 | \end{center}
78 |
79 | \begin{center}
80 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_a_compact.pdf}\textbf{\large =}
81 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_u_compact.pdf}\textbf{\large *}
82 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_s_compact.pdf}\textbf{\large *}
83 | \includegraphics[align=c,width=0.5in]{imgs/decomp_svd_v_compact.pdf}
84 | \end{center}
85 |
86 | Any matrix $\mA\in\sRmn$ can be written as
87 | \begin{equation}
88 | \mA=\mU \mD \mV^T=\sum_{i=1}^r \sigma_i u_i v_i^T
89 | \end{equation}
90 | where
91 | \begin{align}
92 | \mU&=\textrm{eigenvectors of~}\mA\mA^T & \sRmm \\
93 | \mD&=\diag(\sigma_i)=\sqrt{\diag(\eig(\mA\mA^T))} & \sRmn \\
94 | \mV&=\textrm{eigenvectors of~}\mA^T\mA & \sRnn
95 | \end{align}
96 | Let $\sigma_i$ be the non-zero singular values for $i=1,\ldots,r$ where $r$ is the rank of $\mA$; $\sigma_1\ge\ldots\ge\sigma_r$.
97 |
98 | We also have that
99 | \begin{align}
100 | \mA \vv_i &= \sigma_i \vu_i \\
101 | \mA^T \vu_i &= \sigma_i \vv_i \\
102 | \mU^T\mU &= \mI \\
103 | \mV^T\mV &= \mI
104 | \end{align}
105 |
106 | $\mD$ can be written in an expanded form:
107 | \begin{equation}
108 | \tilde \mD=
109 | \begin{bmatrix}
110 | \mD & 0_{r,n-r} \\
111 | 0_{m-r,r} & 0_{m-r,n-r}
112 | \end{bmatrix}
113 | \end{equation}
114 | The final $n-r$ columns of $\mV$ give an orthonormal basis spanning $\ns(\mA)$. An orthonormal basis spanning the range of $\mA$ is given by the first $r$ columns of $\mU$.
115 |
116 | \begin{align}
117 | \norm{\mA}^2_F&=\textrm{Frobenius norm} =\trace(\mA^T\mA)=\sum_{i=1}^r \sigma_i^2 \\
118 | \norm{\mA}^2_2&=\sigma_1^2 \\
119 | \norm{\mA}_* &= \textrm{nuclear norm}=\sum_{i=1}^r \sigma_i
120 | \end{align}
121 |
122 | The \textbf{condition number} $\kappa$ of an invertible matrix $\mA\in\sRnn$ is the ratio of the largest and smallest singular value. Matrices with large condition numbers are closer to being singular and more sensitive to changes.
123 | \begin{equation}
124 | \kappa(\mA)=\frac{\sigma_1}{\sigma_n}=\norm{A}_2 \cdot \norm{A^{-1}}_2
125 | \end{equation}
126 |
127 | \subsection*{Low-Rank Approximation}
128 | Approximating $\mA\in\sRmn$ by a matrix $\mA_k$ of rank $k>0$ can be formulated as the optimization probem
129 | \begin{equation}
130 | \min_{\mA_k\in\sRmn} \norm{\mA-\mA_k}_F^2: \rank{\mA_k}=k, 1\le k \le \rank(\mA)
131 | \end{equation}
132 | The optimal solution of this problem is given by
133 | \begin{equation}
134 | \mA_k=\sum_{i=1}^k \sigma_i \vu_i \vv_i^T
135 | \end{equation}
136 | where
137 | \begin{align}
138 | \frac{\norm{\mA_k}_F^2}{\norm{\mA}_F^2}&=\frac{\sigma_1^2+\ldots+\sigma_k^2}{\sigma_1^2+\ldots+\sigma_r^2} \\
139 | 1-\frac{\norm{\mA_k}_F^2}{\norm{\mA}_F^2}&=\frac{\sigma_{k+1}^2+\ldots+\sigma_r^2}{\sigma_1^2+\ldots+\sigma_r^2}
140 | \end{align}
141 | is the fraction of the total variance in $\mA$ explained by the approximation $\mA_k$.
142 |
143 | \subsection*{Range and Nullspace}
144 | \begin{align}
145 | \ns(\mA) &= \range(\mV_{nr}) \\
146 | \ns(\mA)^\perp \equiv\range(\mA^T)&=\range(\mV_r) \\
147 | \range(\mA)&=\range(\mU_r) \\
148 | \range(\mA)^\perp\equiv\ns(\mA^T)&=\range(\mU_{nr})
149 | \end{align}
150 | where $\mV_r$ is the first $r$ columns of $V$ and $V_nr$ are the last $[r+1,n]$ columns; similarly for $\mU$.
151 |
152 |
153 | \subsection*{Projectors}
154 | The projection of $\vx$ onto $\ns(\mA)$ is $(\mV_{nr}\mV_{nr}^T)\vx$. Since $\mI_n=\mV_r\mV_r^T+\mV_{nr}\mV_{nr}^T$, $(\mI_n-\mV_{r}\mV_{r}^T)\vx$ also works. The projection of $\vx$ onto $\range(\mA)$ is $(\mU_r\mU_r^T)\vx$.
155 |
156 | If $\mA\in\sRmn$ is full row rank ($\mA\mA^T\ispd0$), then the minimum distance to an affine set $\{x:\mA\vx=\vb\}, \vb\in\sRm$ is given by $\vx^*=\mA^T(\mA\mA^T)^{-1}\vb$. %TODO
157 |
158 | If $\mA\in\sRmn$ is full column rank ($\mA^T\mA\ispd0$), then the minimum distance to an affine set $\{x:\mA\vx=\vb\}, \vb\in\sRm$ is given by $\vx^*=\mA(\mA^T\mA)^{-1}\mA^T\vb$. %TODO
159 |
160 |
161 | \subsection*{Computational Notes}
162 | A \textit{numerical rank} can be estimated for the matrix as the largest $k$ such that $\sigma_k>\epsilon \sigma_1$ for $\epsilon\ge0$.
163 |
164 |
165 |
166 | \section{Eigenvalue Decomposition for Diagonalizable Matrices}
167 |
168 | For a square, diagonalizable matrix $\mA\in\mathbb{R}^{n,n}$
169 | \begin{equation}
170 | \mA=U\Lambda U^{-1}
171 | \end{equation}
172 | where $U\in\mathbb{C}^{n,n}$ is an invertible matrix whose columns are the eigenvectors of $\mA$ and $\Lambda$ is a diagonal matrix containing the eigenvalues $\lambda_1,\ldots,\lambda_n$ of $\mA$ in the diagonal.
173 |
174 | The columns $\vu_1,\ldots,\vu_n$ satisfy
175 | \begin{equation}
176 | \mA \vu_i=\lambda_i \vu_i~~i=1,\ldots,n
177 | \end{equation}
178 |
179 | \section{Eigenvalue (Spectral) Decomposition for Symmetric Matrices}
180 |
181 | A symmetric matrix $\mA\in\mathbb{R}^{n,n}$ can be factored as
182 | \begin{equation}
183 | \mA=U\Lambda U^T=\sum_i^n \lambda_i \vu_i \vu_i^T
184 | \end{equation}
185 | where $U\in\mathbb{R}^{n,n}$ is an orthogonal matrix whose columns $\vu_i$ are the eigenvectors of $\mA$ and $\Lambda$ is a diagonal matrix containing the eigenvalues $\lambda_1\ge\ldots\ge\lambda_n$ of $\mA$ in the diagonal. These eigenvalues are always real. The eigenvectors can always be chosen to be real and to form an orthonormal basis.
186 |
187 | The columns $\vu_1,\ldots,\vu_n$ satisfy
188 | \begin{equation}
189 | \mA \vu_i=\lambda_i \vu_i~~i=1,\ldots,n
190 | \end{equation}
191 |
192 |
193 | \section{Schur Complements}
194 |
195 | For $\mA\in\sSn$, $\mB\in\sSn$, $\mX\in\sRnm$ with $\mB\ispd0$ and the block matrix
196 | \begin{equation}
197 | \mM=
198 | \begin{bmatrix}
199 | \mA & \mX \\
200 | \mX^T & \mB
201 | \end{bmatrix}
202 | \end{equation}
203 | and the Schur complement of $\mA$ in $\mM$
204 | \begin{equation}
205 | S=\mA-\mX\mB^{-1}\mX^T
206 | \end{equation}
207 | Then
208 | \begin{align}
209 | \mM\ispsd0&\iff S\ispsd0 \\
210 | \mM\ispd0 &\iff S\ispd0
211 | \end{align}
212 |
--------------------------------------------------------------------------------
/src/optimization.tex:
--------------------------------------------------------------------------------
1 | \chapter{Optimization}
2 |
3 | \section{Standard Forms}
4 |
5 | \textbf{Least Squares}
6 | \begin{equation}
7 | \min_{\vx\in\sRn} \norm{\vy-\mA\vx}_2
8 | \end{equation}
9 |
10 | \textbf{LASSO}
11 | \begin{equation}
12 | \min_{\vb\in\sRn} \left(\frac{1}{N}\norm{\vy-\mX\vb}_2^2+\lambda\norm{\vb}_1\right)
13 | \end{equation}
14 |
15 | \textbf{LP: Linear program}
16 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
17 | \addConstraint{\mA_\textrm{eq}\vx}{= \vb_\textrm{eq}}
18 | \addConstraint{\mA\vx}{\le \vb}
19 | \end{mini!}
20 |
21 | \textbf{Linear Fractional Program}
22 | \begin{maxi!}{\vx}{\frac{\vc^T\vx + a}{\vd^T \vx + b}}{}{}
23 | \addConstraint{\mA\vx}{\le \vb}
24 | \end{maxi!}
25 | Additional constraints must ensure $\vd^T \vx + b$ has the same sign throughout the entire feasible region.
26 |
27 |
28 | \textbf{QCQP: Quadratic Constrainted Quadratic Programs}
29 | \begin{mini!}{\vx}{\vx^T\mH_0\vx+2\vc_0^T\vx + \vd_0}{}{}
30 | \addConstraint{\vx^T\mH_i\vx+2\vc_i^T\vx + \vd_i}{\le 0}{~~i\in\mathcal{I}}
31 | \addConstraint{\vx^T\mH_j\vx+2\vc_j^T\vx + \vd_j}{ = 0}{~~j\in\mathcal{E}}
32 | \end{mini!}
33 | If $\mH_i\ispsd 0~\forall i$, then the program is convex. In general, QCQPs are NP-Hard.
34 |
35 |
36 | \textbf{QP: Quadratic Program}
37 | \begin{mini!}{\vx}{\frac{1}{2}\vx^T\mH_0\vx+\vc_0^T\vx}{}{}
38 | \addConstraint{\mA_\textrm{eq}\vx}{=\vb_\textrm{eq}}
39 | \addConstraint{\mA\vx}{\le \vb}
40 | \end{mini!}
41 | If $\mH_0\ispd 0$, then the program is convex.
42 |
43 | If only equality constraints are present, then the solution is the linear system:
44 | \begin{equation}
45 | \begin{bmatrix}
46 | \mH_0 & \mA^T \\
47 | \mA & 0
48 | \end{bmatrix}
49 | \begin{bmatrix} \vx \\ \lambda \end{bmatrix}
50 | =\begin{bmatrix} -\vc_0 \\ \vb \end{bmatrix}
51 | \end{equation}
52 | where $\lambda$ is a set of Lagrange multipliers.
53 |
54 | For $\mH_0\ispd 0$, the ellipsoid method solves the problem in polynomial time.~\citep{Kozlov1980} If, $\mH_0$ is indefinite, then the problem is NP-hard~\citep{Sahni1974}, even if $\mH_0$ has only one negative eigenvalue~\citep{Pardalos1991}.
55 |
56 | \textbf{SOCP: Second Order Cone Program (Standard Form)}
57 | \begin{align}
58 | \min_{\vx} ~& \vc^T \vx \\
59 | \textrm{s.t.} ~& \norm{\mA_i \vx+\vb_i}_2\le \vc_i^T \vx+\vd_i,~~i=1,\ldots,m
60 | \end{align}
61 |
62 | \textbf{SOCP: Second Order Cone Program (Conic Standard Form)}
63 | \begin{align}
64 | \min_{\vx} ~& \vc^T \vx \\
65 | \textrm{s.t.} ~& (\mA_i \vx+\vb_i, \vc_i^T \vx+\vd_i)\in\mathcal{K}_{m_i} ~~i=1,\ldots,m
66 | \end{align}
67 |
68 | \section{Transformations}
69 |
70 | \subsection{Linear-Fractional to Linear}
71 | We transform a Linear-Fractional Program
72 | \begin{maxi!}{\vx}{\frac{\vc^T\vx + a}{\vd^T \vx + b}}{}{}
73 | \addConstraint{\mA\vx}{\le \vb}
74 | \end{maxi!}
75 | where $\vd^T \vx + b$ has the same sign throughout the entire feasible region to a linear program using the Charnes--Cooper transformation~\citep{Charnes1962} by defining
76 | \begin{align}
77 | \vy &= \frac{1}{\vd^T\vx+b}\cdot\vx \\
78 | t &= \frac{1}{\vd^T\vx+b}
79 | \end{align}
80 | to form the equivalent program
81 | \begin{maxi!}{\vy,t}{\vc^T\vy + at}{}{}
82 | \addConstraint{\mA\vy}{\le \vb t}
83 | \addConstraint{\vd^T\vy+bt}{=1}
84 | \addConstraint{t}{\ge0}
85 | \end{maxi!}
86 | We then have $\vx^*=\frac{1}{t}\vy$.
87 |
88 | \subsection{LP as SOCP}
89 |
90 | The linear program
91 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
92 | \addConstraint{\mA\vx}{\le \vb}
93 | \end{mini!}
94 | becomes can be cast as an SOCP:
95 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
96 | \addConstraint{\norm{\mC_i \vx+\vd_i}_2}{\le \vb_i - \va_i^T \vx}{\forall i}
97 | \end{mini!}
98 | where $\mC_i=0, d_i=0~\forall i$.
99 |
100 | \subsection{QCQP as SOCP}
101 |
102 | The quadratic constrainted quadratic program
103 | \begin{mini!}{\vx}{\vx^T\mQ_0\vx+\va_0^T\vx}{}{}
104 | \addConstraint{\vx^T\mQ_i\vx+\va_i^T\vx}{\le b_i}{~~i=1,\ldots,m}
105 | \end{mini!}
106 | with $\mQ_i=\mQ_i^T\ispsd0$, $i=0,\ldots,m$ can be cast as an SOCP:
107 | \begin{mini!}{\vx,t}{\va_0^T\vx + t}{}{}
108 | \addConstraint{\norm{\begin{bmatrix} 2 \mQ_0^{1/2}\vx \\ t-1 \end{bmatrix}}_2}{\le t+1}
109 | \addConstraint{\norm{\begin{bmatrix} 2 \mQ_i^{1/2}\vx \\ b_i-\va_i^T\vx-1 \end{bmatrix}}_2}{\le b_i-\va_i^T\vx+1}{~~i=1,\ldots,m}
110 | \end{mini!}
111 |
112 |
113 | \subsection{QP as SOCP}
114 |
115 | The quadratic program
116 | \begin{mini!}{\vx}{\frac{1}{2}\vx^T\mQ\vx+\vc^T\vx}{}{}
117 | \addConstraint{\va_i^T\vx}{\le \vb_i}
118 | \end{mini!}
119 | with $\mQ=\mQ^T\ispsd0$ can be cast as an SOCP:
120 | \begin{mini!}{\vx,\vy}{\vc^T \vx+y}{}{}
121 | \addConstraint{\norm{
122 | \begin{bmatrix} 2 \mQ^{1/2}\vx \\ y-1 \end{bmatrix}}_2}{\le y+1}
123 | \addConstraint{\va_i^T \vx}{\le \vb_i}{~~\forall i}
124 | \end{mini!}
125 |
126 | \subsection{Sum of L2 Norms to SOCP}
127 |
128 | \begin{mini!}{\vx}{\sum_{i=1}^p \norm{\mA_i\vx-\vb_i}_2}{}{}
129 | \end{mini!}
130 | becomes
131 | \begin{mini!}{\vx,y}{\sum_{i=1}^p y_i}{}{}
132 | \addConstraint{\norm{\mA_i\vx-\vb_i}_2}{\le y_i}{~~i=1,\ldots,p}
133 | \end{mini!}
134 |
135 | \subsection{Minimax of L2 Norms to SOCP}
136 |
137 | \begin{mini!}{\vx}{\max_{i=1,\ldots,p} \norm{\mA_i\vx-\vb_i}_2}{}{}
138 | \end{mini!}
139 | becomes
140 | \begin{mini!}{\vx,y}{y}{}{}
141 | \addConstraint{\norm{\mA_i\vx-\vb_i}_2}{\le y}{~~i=1,\ldots,p}
142 | \end{mini!}
143 |
144 | \subsection{Hyperbolic Constraints to SOCP}
145 |
146 | For scalar $w$, a constraint of the form
147 | \begin{equation}
148 | w^2\le xy, ~~x\ge0, ~~y\ge0
149 | \end{equation}
150 | can be transformed into the SOCP constraint
151 | \begin{equation}
152 | \norm{\begin{bmatrix} 2w \\ x-y \end{bmatrix}}_2 \le x+y \eqcite{Lobo1998}
153 | \end{equation}
154 |
155 | For vector $\vw$, a constraint of the form
156 | \begin{equation}
157 | \vw^T\vw = \norm{\vw}_2^2 \le xy, ~~x\ge0, ~~y\ge0
158 | \end{equation}
159 | can be transformed into the SOCP constraint
160 | \begin{equation}
161 | \label{equ:hyperbolic_constraint_to_socp}
162 | \norm{\begin{bmatrix} 2\vw \\ x-y \end{bmatrix}}_2 \le x+y \eqcite{Lobo1998,Alizadeh2003}
163 | \end{equation}
164 | Note that this implies that
165 | \begin{equation}
166 | x^{-1}\le y \iff \norm{\begin{bmatrix} 2 \\ x-y \end{bmatrix}}_2 \le x+y %TODO: For x>0 ?
167 | \end{equation}
168 |
169 | %TODO: From slides
170 | % A constraint of the form
171 | % \begin{equation}
172 | % \norm{x}_2^2\le 2yz, ~~y\ge0, ~~z\ge0
173 | % \end{equation}
174 | % can be transformed into the SOCP constraint
175 | % \begin{equation}
176 | % \norm{\begin{bmatrix} x \\ \frac{1}{\sqrt{2}}(y-z) \end{bmatrix}}_2 \le \frac{1}{\sqrt{2}}(y+z)
177 | % \end{equation}
178 |
179 | %TODO Lobo1998 fractional constraints as SOCPs
180 |
181 | \subsection{Matrix Fractional to SOCP}
182 |
183 | The problem
184 | \begin{mini!}{\vx}{(\mF\vx+\vg)^T(\mP_0+\vx_1\mP+\ldots+\vx_p\mP_P)^{-1}(\mF\vx+\vg)}{}{}
185 | \addConstraint{\mP_0+\vx_1\mP+\ldots+\vx_p\mP_P}{>0}
186 | \addConstraint{\vx}{\ge0}
187 | \end{mini!}
188 | where $\mP_i=\mP_i^T\in\sRnn$, $\mF\in\sRnp$, $\vg\in\sRn$, and $\vx\in\sRp$ can be transformed into the SOCP where $t_i\in\sR, \vy_i\in\sRn$:
189 | \begin{mini!}{\vx,t}{t_0+\ldots+t_p}{}{}
190 | \addConstraint{\mP_0^{1/2}\vy_0+\ldots+\mP_p^{1/2}\vy_p}{=\mF\vx+\vg} \eqcite{Lobo1998}
191 | \addConstraint{\norm{\begin{bmatrix} 2 \vy_0 \\ t_0-1\end{bmatrix}}_2}{\le t_0+1}
192 | \addConstraint{\norm{\begin{bmatrix} 2 \vy_i \\ t_i-x_i \end{bmatrix}}_2}{\le t_i+x_i}{~~i=1,\ldots,p}
193 | \end{mini!}
194 |
195 | \subsection{Fractional Objective to SOCP}
196 |
197 | Convert
198 | \begin{mini!}{\vx}{\frac{f(x)^2}{g(x)}}{}{}
199 | \addConstraint{g(x)}{>0}
200 | \end{mini!}
201 | to
202 | \begin{mini!}{\vx,t}{t}{}{}
203 | \addConstraint{f(x)^2}{\le t g(y)}
204 | \addConstraint{g(y)}{>0}
205 | \addConstraint{t}{\ge 0}
206 | \end{mini!}
207 | and apply \autoref{equ:hyperbolic_constraint_to_socp}.
208 |
209 | \subsection{Chance-Constrained LP to SOCP}
210 |
211 | The problem
212 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
213 | \addConstraint{\textrm{Prob}\{\va_i^T\vx\le \vb_i\}}{\ge p_i}{~~i=1,\ldots,m}
214 | \end{mini!}
215 | where $p_i>0.5$ and all $\va_i$ are independent normal random vectors with expected values $\bar \va_i$ and covariance matrices $\Sigma_i\ispd0$, can be transformed into the SOCP:
216 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
217 | \addConstraint{\bar \va_i^T \vx \le b_i-\Phi^{-1}(p_i)\norm{\Sigma_i^{1/2}\vx}_2}{~~i=1,\ldots,m}
218 | \end{mini!}
219 | where $\Phi^{-1}(p)$ is the inverse cumulative probability distribution of a standard normal variable.
220 |
221 | %https://stanford.edu/class/ee364a/lectures/chance_constr.pdf
222 | Likewise, the problem
223 | \begin{maxi!}{\vx}{\vc^T \vx}{}{}
224 | \addConstraint{\textrm{Prob}\{\va_i^T\vx\le \vb_i\}}{\le p_i}{~~i=1,\ldots,m}
225 | \end{maxi!}
226 | transforms to
227 | \begin{maxi!}{\vx}{\vc^T \vx}{}{}
228 | \addConstraint{\bar \va_i^T \vx \ge \Phi^{-1}(1-p_i)\norm{\Sigma_i^{1/2}\vx}_2}{~~i=1,\ldots,m}
229 | \end{maxi!}
230 | provided $p_i\le0.5$.
231 |
232 | \subsection{Robust LP with Box Uncertainty as LP}
233 |
234 | The problem
235 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
236 | \addConstraint{\va_i^T \vx}{\le b_i}{~~\forall \va_i\in\{\hat \va_i + \rho_i \vu : \norm{\vu}_\infty\le1\}}{~~i=1,\ldots,m}
237 | \end{mini!}
238 | is equivalent to
239 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
240 | \addConstraint{\hat \va_i^T \vx + \rho_i\norm{\vx}_1}{\le b_i}{~~i=1,\ldots,m}
241 | \end{mini!}
242 | which is equivalent to:
243 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
244 | \addConstraint{\hat \va_i^T \vx + \rho_i \sum_{j=1}^n \vu_j}{\le b_i}{~~i=1,\ldots,m}
245 | \addConstraint{-\vu_j}{\le \vx_j\le\vu_j}{~~j=1,\ldots,n}
246 | \end{mini!}
247 |
248 | \subsection{Robust LP with Ellipsoidal Uncertainty as SOCP}
249 |
250 | The problem
251 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
252 | \addConstraint{\va_i^T \vx}{\le b_i}{~~\forall \va_i\in\{\hat \va_i + \mR_i \vu : \norm{\vu}_2\le1\}}{~~i=1,\ldots,m}
253 | \end{mini!}
254 | is equivalent to
255 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
256 | \addConstraint{\hat \va_i^T \vx + \norm{\mR_i^T \vx}_2}{\le b_i}{~~i=1,\ldots,m}
257 | \end{mini!}
258 |
259 | \subsection{Square Root as SOCP}
260 | \begin{equation}
261 | \sqrt{x}\ge t \iff x\ge t^2 \iff \norm{\begin{matrix} 1-x \\ 2t \end{matrix}}_2 \le 1+x %TODO: For x>=0?
262 | \end{equation}
263 |
264 | The problem
265 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
266 | \addConstraint{\va_i^T \vx}{\le b_i}{~~\forall \va_i\in\{\hat \va_i + \mR_i \vu : \norm{\vu}_2\le1\}}{~~i=1,\ldots,m}
267 | \end{mini!}
268 | is equivalent to
269 | \begin{mini!}{\vx}{\vc^T \vx}{}{}
270 | \addConstraint{\hat \va_i^T \vx + \norm{\mR_i^T \vx}_2}{\le b_i}{~~i=1,\ldots,m}
271 | \end{mini!}
272 |
273 |
274 | \section{Useful Problems}
275 |
276 | \begin{align}
277 | \textrm{average}(\vv) &= \min_{x\in\sR} \norm{\vv-x\mathbf{1}}_2^2 \\
278 | \textrm{median}(\vv) &= \min_{x\in\sR} \norm{\vv-x\mathbf{1}}_1
279 | \end{align}
--------------------------------------------------------------------------------
/src/basics.tex:
--------------------------------------------------------------------------------
1 | \chapter{Basics}
2 |
3 | \section{Fundamental Theorem of Linear Algebra}
4 |
5 | \begin{center}
6 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg1.png}
7 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg2.png}
8 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg3.png}
9 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg4.png}
10 | \includegraphics[width=\textwidth]{imgs/fund_theorem_lin_alg5.png}
11 | \end{center}
12 |
13 |
14 | \section{Matrix Properties}
15 |
16 | \begin{align}
17 | \mA(\mB+\mC) &= \mA\mB+\mA\mC &\textrm{(left distributivity)} \\
18 | (\mB+\mC)\mA &= \mB\mA+\mC\mA &\textrm{(right distributivity)} \\
19 | \mA\mB &\ne \mB\mA &\textrm{(in general)} \\
20 | (\mA\mB)\mC &= \mA(\mB\mC) &\textrm{(associativity)}
21 | \end{align}
22 |
23 | \section{Rank}
24 |
25 | \begin{align}
26 | \noalign{If $\mA\in\sRmn$ and $\mB\in\sRnr$, then}
27 | \eqcite{Thome2016}
28 | \rank(\mA)+\rank(\mB)-n\le \rank(\mA\mB)\le \min(\rank(\mA),\rank(\mB)) &&~~~~\textrm{Sylvester's Inequality} \\
29 | \noalign{If $\mA\mB$, $\mA\mB\mC$, $\mB\mC$ are defined, then}
30 | \eqcite{Thome2016}
31 | \rank(\mA\mB)+\rank(\mB\mC)\le \rank(\mB)+\rank(\mA\mB\mC) && \textrm{Frobenius's inequality} \\
32 | \noalign{If $\dim(\mA)=\dim(\mB)$, then}
33 | \rank(\mA+\mB)\le\rank(\mA)+\rank(\mB) &&\textrm{Subadditivity}
34 | \end{align}
35 | If $\mA_1, \mA_2, \ldots, \mA_l$ have $n_1,n_2,\ldots,n_l$ columns, so that $\mA_1\mA_2\ldots\mA_l$ is well-defined, then
36 | \begin{equation}
37 | \eqcite{Thome2016}
38 | \rank(\mA_1\mA_2\ldots\mA_l)
39 | \ge \sum_{i=1}^{l-1}\rank(\mA_i\mA_{i+1})-\sum_{i=2}^{l-1}\rank(\mA_i)
40 | \ge\sum_{i=1}^l\rank(\mA_i)-\sum_{i=1}^{l-1}n_i
41 | \end{equation}
42 |
43 | \section{Identities}
44 | \begin{align}
45 | \left(\sum_{i=1}^n \vz_i\right)^2 = \vz^T
46 | \begin{bmatrix}
47 | 1 & \hdots & 1 \\
48 | \vdots & \ddots & \vdots \\
49 | 1 & \hdots & 1
50 | \end{bmatrix}
51 | \vz
52 | \end{align}
53 |
54 | \section{Matrix Multiplication}
55 |
56 | For $\mA\in\sR^{i,j}$ and $\mB\in\sR^{j,k}$ and $\mC\in\sR^{l,k}$
57 | \begin{align}
58 | [\mA\mB]_{ik} &= \sum_j \mA_{ij}\mB_{jk} \\
59 | [\mA\mB\mC^T]_{il} &= \sum_j \mA_{ij}[\mB\mC^T]_{jl}=\sum_j \mA_{ij}\sum_k \mB_{jk}\mC_{lk}=\sum_j\sum_k \mA_{ij}\mB_{jk}\mC_{lk}
60 | \end{align}
61 | %TODO: Algorithms and orderings
62 |
63 |
64 |
65 | \section{Transpose Properties}
66 |
67 | \begin{align}
68 | (c\mA)^T &= c\mA^T \\
69 | (\mA\mB)^T &= \mB^T\mA^T \\
70 | (\mA\mB\mC\ldots)^T &= \ldots\mC^T\mB^T\mA^T \\
71 | (\mA+\mB)^T &= \mA^T+\mB^T \\
72 | (\mA+\mB+\ldots)^T &= \mA^T+\mB^T+\ldots^T \\
73 | (\mA^{-1})^T &= (\mA^T)^{-1}
74 | \end{align}
75 |
76 | \section{Conjugate Tranpose}
77 |
78 | \begin{align}
79 | (\mA^H)^{-1} &= (\mA^{-1})^H \\
80 | (\mA+\mB)^H &= \mA^H+\mB^H \\
81 | (\mA+\mB+\ldots)^H &= \mA^H+\mB^H+\ldots^H \\
82 | (\mA\mB)^H &= \mB^H \mA^H \\
83 | (\mA\mB\mC\ldots)^H &= \ldots\mC^H\mB^H\mA^H
84 | \end{align}
85 |
86 |
87 | \section{Determinant Properties}
88 | The determinant is only defined for square matrices; here we assume that $\mA\in\sRnn$.
89 |
90 | \begin{align}
91 | \det(\mI_n) &= 1 \\
92 | \det(\mA^T) &= \det(\mA) \\
93 | \det(\mA^H) &= \det(\mA)^H \\
94 | \det(\mA^{-1}) &= 1/\det(\mA) \\
95 | \det(\mA\mB) &= \det(\mB\mA) \\
96 | \det(\mA\mB) &= \det(\mA)\det(\mB) &\mB\in\sRnn \\
97 | \det(c\mA) &= c^n\det(\mA) \\
98 | \det(\mA) &= \prod \eig(\mA) \\
99 | \det(\mA^n) &= \det(\mA)^n \\
100 | \det(-\mA) &= (-1)^n\det(\mA) \\
101 | \det(\mA^c) &= \det(\mA)^c \\
102 | \det(\mI+\vu \vv^T)&= 1 + \vu^T \vv \\
103 | \det(\mB\mA\mB^{-1}) &= \det(\mA) \\
104 | \det(\mB\mA\mB^{-1}-c\mI) &= \det(\mA-c\mI) \\
105 | \noalign{For n=2:}
106 | \det(\mI+\mA) &= 1 + \det(\mA)+\trace(\mA) \\
107 | \det(\mA) &=\begin{vmatrix} a & b \\ c & d \end{vmatrix} = ad-bc \\
108 | \noalign{For n=3:}
109 | \det(\mI+\mA) &= 1 + \det(\mA)+\trace(\mA) + \frac{1}{2}\trace(\mA)^2-\frac{1}{2}\trace(\mA^2) \\
110 | \det(\mA) &=\begin{vmatrix} a & b & c \\ d & e & f \\ g & h & i \end{vmatrix} =
111 | a\begin{vmatrix} e & f \\ h & i \end{vmatrix}
112 | -b\begin{vmatrix} d & f \\ g & i \end{vmatrix}
113 | +c\begin{vmatrix} d & e \\ g & h \end{vmatrix} \\
114 | \noalign{For n=4:}
115 | \det(\mI+\mA) &= 1 + \det(\mA)+\trace(\mA) + \frac{1}{2}\trace(\mA)^2-\frac{1}{2}\trace(\mA^2) \\
116 | & + \frac{1}{6}\trace(\mA)^3-\frac{1}{2}\trace(\mA)\trace(\mA^2)+\frac{1}{3}\trace(\mA^3) \\
117 | \noalign{For small $\epsilon$:}
118 | \det(\mI+\epsilon\mA) &\approx 1 + \det(\mA) + \epsilon\trace(\mA)+\frac{1}{2}\epsilon^2\trace(\mA)^2-\frac{1}{2}\epsilon^2\trace(\mA^2) \\ %TODO: Check from MatrixCookbook
119 | \det(\mI+\epsilon\mA) &\approx 1 + \epsilon\trace(\mA) + O(\epsilon^2) \\ %TODO: Check: From MathWorld
120 | \noalign{Sylvester's determinant identity, for $\mA\in\sRmn, \mB\in\sRnm$}
121 | \eqcite{Sylvester1851}
122 | \det(\mI_m+\mA\mB) &= \det(\mI_n+\mB\mA) \\
123 | \det(\mX+\mA\mB) &= \det(\mX)\det(\mI_n + \mB\mX^{-1}\mA) \\
124 | \noalign{If $\mA$ is triangular}
125 | \det(\mA) &= \prod_i \mA_{i,i} = \prod_i \diag(\mA)_i \\
126 | \noalign{If all entries of $\mA\in\sCnn$ are in the unit disk}
127 | \det(\mA)\le n^{n/2} \eqcite{Hadamard1893} \\
128 | \noalign{Schur's determinant identities}
129 | \det(\mM) &= \det(\begin{bmatrix} \mA & \mB \\ \mC & \mD \end{bmatrix}) = \det(\mA) \det(\mD -\mC \mA^{-1}\mB) \\
130 | \det(\mM) &= \det(\begin{bmatrix} \mA & \mB \\ \mC & \mD \end{bmatrix}) = \det(\mD) \det(\mA -\mB \mD^{-1}\mC) \\
131 | \end{align}
132 | %TODO: Matix exponential identities det(A)=log(det(exp(A)))
133 |
134 | Geometrically, if a unit volume is acted on by $\mA$, then $|\det(\mA)|$ indicates the volume after the transformation.
135 |
136 |
137 | \section{Trace Properties}
138 | The Trace is only defined for square matrices.
139 | \begin{align}
140 | \trace(\mA) &=\sum_i \mA_{ii} \\
141 | \trace(\mA) &=\sum_i \eig(\mA) \\
142 | \trace(\mA+\mB) &=\trace(\mA)+\trace(\mB) \\
143 | \trace(c\mA) &=c\trace(\mA) \\
144 | \trace(\mA) &=\trace(\mA^T) \\
145 | \trace(\mA\mB) &=\trace(\mB\mA) \\
146 | \trace(\mA^T\mB) &=\sum_{i,j} \mA_{ij}\mB_{ij} \\ %TODO: For real matrices only?
147 | \trace(\mA^T\mB) &=\sum_{i,j} (\mA\circ\mB)_{ij} \\ %TODO: For real matrices only?
148 | \va^T \va &=\trace(\va \va^T)
149 | \end{align}
150 |
151 | For $\mA,\mB,\mC,\mD$ of compatible dimensions,
152 |
153 | \begin{equation}
154 | \trace(\mA^T\mB)=\trace(\mA\mB^T)=\trace(\mB^T\mA)=\trace(\mB\mA^T)
155 | \end{equation}
156 | \begin{equation}
157 | \trace(\mA\mB\mC\mD)=\trace(\mB\mC\mD\mA)=\trace(\mC\mD\mA\mB)=\trace(\mD\mA\mB\mC)
158 | \end{equation}
159 | (Invariant under cyclic permutations)
160 |
161 |
162 |
163 | \section{Inverse Properties}
164 | The inverse of $\mA\in\sCnn$ is denoted $\mA^{-1}$ and defined such that
165 | \begin{equation}
166 | \mA\mA^{-1}=\mA^{-1}\mA=\mI_n
167 | \end{equation}
168 | where $\mI_n$ is the $n \times n$ identity matrix. $\mA$ is nonsingular if $\mA^{-1}$ exists; otherwise, $\mA$ is singular.
169 |
170 |
171 | If individual inverses exist
172 | \begin{equation}
173 | (\mA\mB)^{-1}=\mB^{-1}\mA^{-1}
174 | \end{equation}
175 | more generally
176 | \begin{equation}
177 | (\mA\mB\mC\ldots)^{-1}=\ldots\mC^{-1}\mB^{-1}\mA^{-1}
178 | \end{equation}
179 |
180 | \begin{equation}
181 | (\mA^{-1})^T=(\mA^T)^{-1}
182 | \end{equation}
183 | \begin{equation}
184 | (\mA^H)^{-1}=(\mA^{-1})^H
185 | \end{equation}
186 |
187 | Hua's Identity:
188 | \begin{align}
189 | (\mA+\mB)^{-1} &= \mAi - (\mA+\mA\mBi\mA)^{-1} \\
190 | (\mA-\mB)^{-1} &= \sum_{k=0}^\infty (\mAi\mB)^k\mAi \\
191 | \end{align}
192 |
193 |
194 |
195 |
196 | \section{Moore--Penrose PseudoInverse}
197 | For $\mA\in\sRmn$, the Moore--Penrose pseudoinverse $\mA\pinv$ satisfies:
198 | \begin{align}
199 | \mA\mA\pinv\mA &= \mA \\
200 | \mA\pinv\mA\mA\pinv &= \mA\pinv \\
201 | (\mA\mA\pinv)^T &= \mA\mA\pinv\ \textrm{(symmetric)} \\
202 | (\mA\pinv\mA)^T &= \mA\pinv\mA\ \textrm{(symmetric)}
203 | \end{align}
204 | If $\mA\pinv$ exists, it is unique. For complex matrices the symmetry condition is replaced by a requirement that the matrix be Hermitian.
205 |
206 | If $\mA\in\sCmn$, then:
207 | \begin{align}
208 | (\mA\pinv)\pinv &= \mA \\
209 | (\mA^T)\pinv &= (\mA\pinv)^T \\
210 | (\mA^H)\pinv &= (\mA\pinv)^H \\
211 | (\mA^*)\pinv &= (\mA\pinv)^* \\
212 | (\mA\pinv\mA)\mA^H &= \mA^H \\
213 | (\mA\pinv\mA)\mA^T &\ne \mA^T \\
214 | (c\mA)\pinv &= (1/c)\mA\pinv \\
215 | \mA\pinv &= (\mA^T\mA)\pinv\mA^T \\
216 | \mA\pinv &= \mA^T(\mA\mA^T)\pinv \\
217 | (\mA^T\mA)\pinv &= \mA\pinv(\mA^T)\pinv \\
218 | (\mA\mA^T)\pinv &= (\mA^T)\pinv\mA\pinv \\
219 | \mA\pinv &= (\mA^H\mA)\pinv\mA^H \\
220 | \mA\pinv &= \mA^H(\mA\mA^H)\pinv \\
221 | (\mA^H\mA)\pinv &= \mA\pinv(\mA^H)\pinv \\
222 | (\mA\mA^H)\pinv &= (\mA^H)\pinv\mA\pinv \\
223 | (\mA\mB)\pinv &= (\mA\pinv\mA\mB)\pinv(\mA\mB\mB\pinv)\pinv
224 | \end{align}
225 |
226 | If $\mA$ is full-rank, then:
227 | \begin{align}
228 | (\mA\mA\pinv)(\mA\mA\pinv) &= \mA\mA\pinv \\
229 | (\mA\pinv\mA)(\mA\pinv\mA) &= \mA\pinv\mA \\
230 | \trace(\mA\mA\pinv) &= \rank(\mA\mA\pinv) \eqcite{Seber2002} \\
231 | \trace(\mA\pinv\mA) &= \rank(\mA\pinv\mA) \eqcite{Seber2002}
232 | \end{align}
233 |
234 | \subsection*{Special Properties}
235 | \begin{itemize}
236 | \item $\mA\pinv=\mA^{-1}$ if $\mA\in\sRnn$ and $\mA$ is square and nonsingular.
237 | \item $\mA\pinv=(\mA^T\mA)^{-1}\mA^T$, if $\mA\in\sRmn$ is full column rank ($r=n\le m$). $\mA\pinv$ is a left inverse of $\mA$, so $\mA\pinv\mA=\mV_r\mV_r^T=\mV\mV^T=\mI_n$.
238 | \item $\mA\pinv=\mA^T(\mA\mA^T)^{-1}$, if $\mA\in\sRmn$ is full row rank ($r=m\le n$). $\mA\pinv$ is a right inverse of $\mA$, so $\mA\mA\pinv=\mU_r\mU_r^T=\mU\mU^T=\mI_m$.
239 | \end{itemize} %TODO: Check these
240 |
241 |
242 | %TODO
243 | % \subsection{Moore-Penrose Pseudoinverse}
244 | % \begin{equation}
245 | % \mA\pinv = \mV \mD^{-1} \mU^T
246 | % \end{equation}
247 | % where the foregoing comes from a singular-value decomposition and $\mD^{-1}=\diag(\frac{1}{\sigma_1},\ldots,\frac{1}{\sigma_r})$
248 |
249 |
250 |
251 | \section{Hadamard Identities}
252 |
253 | \begin{align}
254 | (\mA\circ\mB)_{ij} &= A_{ij}B_{ij}~\forall~i,j \\
255 | \mA\circ\mB &= \mB\circ\mA \eqcite{million2007} \\
256 | \mA\circ(\mB\circ\mC) &= (\mA\circ\mB)\circ\mC \\
257 | \mA\circ(\mB+\mC) &= \mA\circ\mB+\mA\circ\mC \eqcite{million2007} \\
258 | a(\mA\circ\mB) &= (a\mA)\circ\mB =\mA\circ(a\mB) \eqcite{million2007} \\
259 | (\mA^T\circ\mB^T) &= (\mA\circ\mB)^T \\
260 | (\mA^T\circ\mB^T) &= (\mA\circ\mB)^T \\
261 | (\vx^T \mA \vx) &= \sum_{i,j}\big((\vx \vx^T)\circ\mA\big) \\
262 | \vx^T(\mA\circ\mB)\vy &= \trace((\diag(\vx)\mA)^T \mB\diag(\vy))~~~\mA,\mB\in\sRmn \eqcite{Minka2000} \\
263 | \trace(\mA^T\mB) &= \mathbf{1}^T(\mA\circ\mB)\mathbf{1} \\
264 | &= \sum_{i,j} \mA_{ij}\mB_{ij}
265 | \end{align}
266 |
--------------------------------------------------------------------------------
/src/derivatives.tex:
--------------------------------------------------------------------------------
1 | %TODO: Include handy diagram from https://explained.ai/matrix-calculus/index.html
2 |
3 | \chapter{Derivatives}
4 |
5 | \section{Useful Rules for Derivatives}
6 | For general $\mA$ and $\mX$ (no special structure):
7 | \begin{align}
8 | \partial\mA &= 0~~\textrm{where $\mA$ is a constant} \\
9 | \partial(c\mX) &= c\partial\mX \\
10 | \partial(\mX+\mY) &= \partial\mX+\partial\mY \\
11 | \partial(\trace(\mX)) &= \trace(\partial(\mX)) \\
12 | \partial(\mX\mY) &= (\partial\mX)\mY + \mX(\partial\mY) \\
13 | \partial(\mX\circ\mY) &= (\partial\mX)\circ\mY + \mX\circ(\partial\mY) \\
14 | %TODO Kronecker x in circle equation 39 Matrix Cookbook
15 | \partial(\mX^{-1}) &= -\mX^{-1}(\partial\mX)\mX^{-1} \\
16 | \partial(\det(\mX)) &= \trace(\textrm{adj}(\mX)\partial\mX) \\
17 | \partial(\det(\mX)) &= \det(\mX)\trace(\mX^{-1}\partial\mX) \\
18 | \partial(\ln(\det(\mX))) &= \trace(\mX^{-1}\partial\mX) \\
19 | \partial(\mXT) &= (\partial\mX)\T \\
20 | \partial(\mX^H) &= (\partial\mX)^H
21 | \end{align}
22 |
23 | \section{Gradient Notation}
24 | For a matrix $\mA\in\sRnm$, the gradient is defined as:
25 | \begin{equation}
26 | \grad_\mA f(\mA)=
27 | \begin{bmatrix}
28 | \pd{f(\mA)}{\mA_{11}} & \pd{f(\mA)}{\mA_{12}} & \ldots & \pd{f(\mA)}{\mA_{1n}} \\
29 | \pd{f(\mA)}{\mA_{21}} & \pd{f(\mA)}{\mA_{22}} & \ldots & \pd{f(\mA)}{\mA_{2n}} \\
30 | \vdots & \vdots & \ddots & \vdots \\
31 | \pd{f(\mA)}{\mA_{m1}} & \pd{f(\mA)}{\mA_{m2}} & \ldots & \pd{f(\mA)}{\mA_{mn}}
32 | \end{bmatrix}
33 | \end{equation}
34 | i.e.
35 | \begin{equation}
36 | (\grad_\mA f(\mA))_{ij}=\pd{f(\mA)}{\mA_{ij}}
37 | \end{equation}
38 | Note that the size of the gradient is always the same size as the entity to which it is taken. Also note that the gradient of a function is only defined if the function is real-valued, that is, if it returns a scalar value.
39 |
40 | \section{Derivatives of Matrices and Vectors}
41 |
42 | \subsection{First-Order}
43 |
44 | In the following, $\mJ$ is the Single-Entry Matrix (\autoref{sec:rogue_single_entry}).
45 | \begin{align}
46 | \pd{\vx\T \va}{\vx} &= \pd{\vaT \vx}{\vx} = \va \\
47 | \pd{\va\T\mX\vb}{\mX} &= \va\vbT \\
48 | \pd{\va\T\mXT\vb}{\mX} &= \vb\vaT \\
49 | \pd{\va\T\mX\va}{\mX} &= \pd{\vaT\mXT\va}{\mX} = \va\vaT \\
50 | \pd{\mX}{\mX_{ij}} &= \mJ^{ij} %TODO: What is this? (MCB 73)
51 | %TODO: MCB 74, 75
52 | \end{align}
53 |
54 | \section{Derivatives of vector norms}
55 |
56 | \begin{align}
57 | \pd{}{\vx}\norm{\vx-\va}_2 &= \frac{\vx-\va}{\norm{\vx-\va}_2} \\
58 | \pd{}{\vx}\frac{\vx-\va}{\norm{\vx-\va}_2} &= \frac{\mI}{\norm{\vx-\va}_2}-\frac{(\vx-\va)(\vx-\va)\T}{\norm{\vx-\va}_2^3} \\
59 | \pd{\norm{\vx}_2^2}{\vx} &= \pd{\norm{\vxT\vx}_2}{\vx} = 2\vx
60 | \end{align}
61 |
62 | \section{Scalar by Vector}
63 | \begin{center}
64 | \begin{tabular}{l|Sc|Sc|Sc}
65 | Qualifier & Expression & Numerator layout & Denominator layout \\
66 | & $\pd{a}{x}$ & $\vzero\T$ & $\vzero$ \\
67 | & $\pd{au(\vx)}{\vx}$ & $a\pd{u}{\vx}$ & Same \\
68 | & $\pd{u(\vx)+v(\vx)}{\vx}$ & $\pd{u}{\vx} + \pd{v}{\vx}$ & Same \\
69 | & $\pd{u(\vx)v(\vx)}{\vx}$ & $u\pd{v}{\vx} + v\pd{u}{\vx}$ & Same \\
70 | & $\pd{g(u(\vx))}{\vx}$ & $\pd{g(u)}{u}\pd{u}{\vx}$ & Same \\
71 | & $\pd{f(g(u(\vx)))}{\vx}$ & $\pd{f(g)}{g}\pd{g(u)}{u}\pd{u}{\vx}$ & Same \\
72 | & $\pd{\vu(\vx)\T\vv(\vx)}{\vx}$ & $\vuT\pd{\vv}{\vx}+\vvT\pd{\vu}{\vx}$ & $\pd{\vu}{\vx}\vv+\pd{\vv}{\vx}\vu$ \\
73 | & $\pd{\vu(\vx)\T\mA\vv(\vx)}{\vx}$ & $\vuT\mA\pd{\vv}{\vx}+\vvT\mAT\pd{\vu}{\vx}$ & $\pd{\vu}{\vx}\mA\vv+\pd{\vv}{\vx}\mAT\vu$ \\
74 | & $\md{f}{2}{\vx}{}{{\vxT}}{}$ & & $\mH$, the Hessian matrix \\
75 | & $\pd{\va\cdot\vx}{\vx}=\pd{\vx\cdot\va}{\vx}$ & $\vaT$ & $\va$ \\
76 | & $\pd{\vbT\mA\vx}{\vx}$ & $\vbT\mA$ & $\mAT\vb$ \\
77 | & $\pd{\vxT\mA\vx}{\vx}$ & $\vxT(\mA+\mAT)$ & $(\mA+\mAT)\vx$ \\
78 | $\mA$ symmetric & $\pd{\vxT\mA\vx}{\vx}$ & $2\vxT\mA$ & $2\mA\vx$ \\
79 | & $\pd{\vxT\mA\vx}{\vx}$ & $\mA+\mAT$ & Same \\
80 | $\mA$ symmetric & $\pd{\vxT\mA\vx}{\vx}$ & $\mA$ & Same \\
81 | & $\pd{\vxT\vx}{\vx}$ & $2\vxT$ & $2\vx$ \\
82 | & $\pd{\vaT\vu(\vx)}{\vx}$ & $\vaT\pd{\vu}{\vx}$ & $\pd{\vu}{\vx}\va$ \\
83 | & $\pd{\vaT\vx\vxT\vb}{\vx}$ & $\vxT(\va\vbT+\vb\vaT)$ & $(\va\vbT+\vb\vaT)\vx$ \\
84 | & $\pd{(\mA\vx+\vb)\T\mC(\mD\vx+\ve)}{\vx}$ & $(\mD\vx+\ve)\T\mCT\mA+(\mA\vx+\vb)\T\mC\mD$ & $\mDT\mCT(\mA\vx+\vb)+\mAT\mC(\mD\vx+\ve)$ \\
85 | & $\pd{\norm{\vx-\va}}{\vx}$ & $\frac{(\vx-\va)\T}{\norm{\vx-\va}}$ & $\frac{\vx-\va}{\norm{\vx-\va}}$ \\
86 | \end{tabular}
87 | \end{center}
88 |
89 | \section{Vector by Vector}
90 | \begin{center}
91 | \begin{tabular}{l|Sc|Sc|Sc}
92 | Qualifier & Expression & Numerator layout & Denominator layout \\
93 | & $\pd{\va}{\vx}$ & $\vzero$ & Same \\ %TODO: really the same? shouldn't be transposed?
94 | & $\pd{\vx}{\vx}$ & $\mI$ & Same \\ %TODO: Really the identity matrix? Which one?
95 | & $\pd{\mA\vx}{\vx}$ & $\mA$ & $\mAT$ \\
96 | & $\pd{\vxT\mA}{\vx}$ & $\mAT$ & $\mA$ \\
97 | & $\pd{a\vu(\vx)}{\vx}$ & $a\pd{\vu}{\vx}$ & Same \\
98 | & $\pd{a(\vx)\vu(\vx)}{\vx}$ & $a\pd{\vu}{\vx}+\vu\pd{a}{\vx}$ & $a\pd{\vu}{\vx}+\pd{a}{\vx}\vuT$ \\
99 | & $\pd{\mA\vu(\vx)}{\vx}$ & $\mA\pd{\vu}{\vx}$ & $\pd{\vu}{\vx}\mAT$ \\
100 | & $\pd{(\vu(\vx)+\vv(\vx))}{\vx}$ & $\pd{\vu}{\vx}+\pd{\vv}{\vx}$ & Same \\
101 | & $\pd{\vg(\vu(\vx))}{\vx}$ & $\pd{\vg(\vu)}{\vu}\pd{\vu}{\vx}$ & $\pd{\vu}{\vx}\pd{\vg(\vu)}{\vu}$ \\
102 | & $\pd{\vf(\vg(\vu(\vx)))}{\vx}$ & $\pd{\vf(\vg)}{\vg(\vu)}\pd{\vg(\vu)}{\vu}\pd{\vu}{\vx}$& $\pd{\vu}{\vx}\pd{\vg(\vu)}{\vu}\pd{\vf(\vg)}{\vg}$
103 | \end{tabular}
104 | \end{center}
105 |
106 |
107 |
108 | \section{Matrix by Scalar}
109 | \begin{center}
110 | \begin{tabular}{l|Sc|Sc}
111 | Qualifier & Expression & Numerator layout \\
112 | & $\pd{a\mU(x)}{x}$ & $a\pd{\mU}{x}$ \\
113 | & $\pd{\mA\mU(x)\mB}{x}$ & $\mA\pd{\mU}{x}\mB$ \\
114 | & $\pd{(\mU(x)+\mV(x))}{x}$ & $\pd{\mU}{x}+\pd{\mV}{x}$ \\
115 | & $\pd{(\mU(x)\mV(x))}{x}$ & $\mU\pd{\mV}{x}+\pd{\mU}{x}\mV$ \\
116 | & $\pd{(\mU(x)\kp\mV(x))}{x}$ & $\mU\kp\pd{\mV}{x} + \pd{\mU}{x}\kp\mV$ \\
117 | & $\pd{(\mU(x)\hp\mV(x))}{x}$ & $\mU\hp\pd{\mV}{x} + \pd{\mU}{x}\hp\mV$ \\
118 | & $\pd{\mU^{-1}(x)}{x}$ & $-\mU^{-1} \pd{\mU}{x} \mU^{-1}$ \\
119 | & $\md{\mU^{-1}}{2}{x}{}{y}{}$ & $\mU^{-1}\left(\pd{\mU}{x}\mU^{-1}\pd{\mU}{y} - \md{\mU}{2}{x}{}{y}{} + \pd{\mU}{y}\mU^{-1}\pd{\mU}{x}\right)\mU^{-1}$ \\
120 | & $\pd{e^{x\mA}}{x}$ & $\mA e^{x\mA}=e^{x\mA}\mA$
121 | \end{tabular}
122 | \end{center}
123 |
124 | \section{Traces}
125 | \begin{align}
126 | \pd{}{\mX}\trace(\mX) &=\mI \\
127 | \pd{}{\mX}\trace(\mX\mA) &=\mAT \\
128 | \pd{}{\mX}\trace(\mA\mX) &=\mAT \\
129 | \pd{}{\mX}\trace(\mA\mX\mB) &=\mAT\mBT \\
130 | \pd{}{\mX}\trace(\mA\mXT\mB) &=\mB\mA \\
131 | \pd{}{\mX}\trace(\mXT\mA) &=\mA \\
132 | \pd{}{\mX}\trace(\mA\mXT) &=\mA \\
133 | \pd{}{\mX}\trace(\mA\kp\mX) &=\trace(\mA)\mI
134 | \end{align}
135 | For traces with many instances of $\mX$ we can apply an analogue of the product rule. For example:
136 | \begin{equation}
137 | \pd{}{\mX}\trace(\mA\mX\mB\mX\mCT)=\pd{}{\mX}\trace(\mA\mX\mD)+\pd{}{\mX}\trace(\mE\mX\mCT)=\mAT\mDT+\mET\mC
138 | \end{equation}
139 | where $\mD=\mB\mX\mCT$ and $\mE=\mA\mX\mB$.
140 |
141 | \section{Determinants}
142 |
143 | \subsection{By Scalars}
144 |
145 | If $\mX$ and $\mY$ are matrices with no special structure and $x$ is a scalar, then:
146 |
147 | \begin{align}
148 | \pd{\det(\mY)}{x} &= \det(\mY) \trace\left(\mYi \pd{\mY}{x}\right) \\
149 | \sum_k \pd{\det(\mX)}{\mX_{ik}}\mX_{jk} &= \delta_{ij} \det(\mX) \\
150 | \pd[2]{\mY}{x^2} &= \det(\mY) %TODO: Can this be simplified with squares?
151 | \left(
152 | \trace \left(\mYi \pd{\pd{\mY}{x}}{x}\right)
153 | + \trace \left(\mYi\pd{\mY}{x}\right) \trace\left(\mYi\pd{\mY}{x}\right)
154 | - \trace \left( \left(\mYi\pd{\mY}{x}\right) \left(\mYi\pd{\mY}{x}\right) \right)
155 | \right)
156 | \end{align}
157 |
158 | \subsection{Linear forms}
159 |
160 | \begin{align}
161 | \pd{\det(\mX)}{\mX} &= \det(\mX)(\mXi)\T \\
162 | \sum_k \pd{\det(\mX)}{\mX_{ik}}\mX_{jk} &= \delta_{ij} \det(\mX) \\
163 | \pd{\det(\mA\mX\mB)}{\mX} &= \det(\mA\mX\mB)(\mXi)\T \\
164 | &= \det(\mA\mX\mB)(\mXT)^{-1}
165 | \end{align}
166 |
167 | \subsection{Square forms}
168 |
169 | If $\mX$ is square and invertible:
170 | \begin{equation}
171 | \pd{\det(\mXT\mA\mX)}{\mX} = 2 \det(\mXT \mA \mX)\mXiT
172 | \end{equation}
173 | If $\mX$ is not square and $\mA$ is symmetric, then
174 | \begin{equation}
175 | \pd{\det(\mXT \mA \mX)}{\mX} = 2 \det(\mXT\mA\mX)\mA\mX(\mXT\mA\mX)^{-1}
176 | \end{equation}
177 | If $\mX$ is not square and $\mA$ is not symmetric, then
178 | \begin{equation}
179 | \pd{\det(\mXT\mA\mX)}{\mX} = \det(\mXT\mA\mX)\left(\mA\mX(\mXT\mA\mX)^{-1}+\mAT\mX(\mXT\mAT\mX)^{-1}\right)
180 | \end{equation}
181 |
182 | \subsection{Nonlinear Forms}
183 | \begin{align}
184 | \pd{\ln\det(\mXT\mX)}{\mX} &= 2(\mXp)\T \\
185 | \pd{\ln\det(\mXT\mX)}{\mXp} &= -2\mXT \\
186 | \pd{\ln\lvert\det(\mX)\rvert}{\mX} &= \mXiT \\
187 | \pd{\det(\mX^k)}{\mX} &= k \det(\mX^k) \mXiT
188 | \end{align}
--------------------------------------------------------------------------------
/src/refs.bib:
--------------------------------------------------------------------------------
1 | @misc{million2007,
2 | title = {The Hadamard Product},
3 | author = {Elizabeth Million},
4 | year = {2007},
5 | howpublished = {\url{http://buzzard.ups.edu/courses/2007spring/projects/million-paper.pdf}}
6 | }
7 |
8 | @book{Strang2016,
9 | author = {Gilbert Strang},
10 | title = {Introduction to Linear Algebra},
11 | year = {2016}
12 | }
13 |
14 | @incollection{Rote2001,
15 | title = {Division-free algorithms for the determinant and the pfaffian: algebraic and combinatorial approaches},
16 | author = {Rote, G{\"u}nter},
17 | booktitle = {Computational discrete mathematics},
18 | pages = {119--135},
19 | year = {2001},
20 | publisher = {Springer}
21 | }
22 |
23 | @article{Strassen1969,
24 | title = {Gaussian elimination is not optimal},
25 | author = {Strassen, Volker},
26 | journal = {Numerische mathematik},
27 | volume = {13},
28 | number = {4},
29 | pages = {354--356},
30 | year = {1969},
31 | publisher = {Springer}
32 | }
33 |
34 | @article{Bareiss1968,
35 | ISSN = {00255718, 10886842},
36 | URL = {http://www.jstor.org/stable/2004533},
37 | abstract = {A method is developed which permits integer-preserving elimination in systems of linear equations, AX = B, such that (a) the magnitudes of the coefficients in the transformed matrices are minimized, and (b) the computational efficiency is considerably increased in comparison with the corresponding ordinary (single-step) Gaussian elimination. The algorithms presented can also be used for the efficient evaluation of determinants and their leading minors. Explicit algorithms and flow charts are given for the two-step method. The method should also prove superior to the widely used fraction-producing Gaussian elimination when A is nearly singular.},
38 | author = {Erwin H. Bareiss},
39 | journal = {Mathematics of Computation},
40 | number = {103},
41 | pages = {565--578},
42 | publisher = {American Mathematical Society},
43 | title = {Sylvester's Identity and Multistep Integer-Preserving Gaussian Elimination},
44 | volume = {22},
45 | year = {1968},
46 | doi = {10.2307/2004533}
47 | }
48 |
49 | @article{Copper1990,
50 | title = "Matrix multiplication via arithmetic progressions",
51 | journal = "Journal of Symbolic Computation",
52 | volume = "9",
53 | number = "3",
54 | pages = "251 - 280",
55 | year = "1990",
56 | note = "Computational algebraic complexity editorial",
57 | issn = "0747-7171",
58 | doi = "10.1016/S0747-7171(08)80013-2",
59 | url = "http://www.sciencedirect.com/science/article/pii/S0747717108800132",
60 | author = "Don Coppersmith and Shmuel Winograd",
61 | abstract = "We present a new method for accelerating matrix multiplication asymptotically. Thiswork builds on recent ideas of Volker Strassen, by using a basic trilinear form which is not a matrix product. We make novel use of the Salem-Spencer Theorem, which gives a fairly dense set of integers with no three-term arithmetic progression. Our resulting matrix exponent is 2.376."
62 | }
63 |
64 | @inproceedings{LeGall2014,
65 | author = {Le Gall, Fran\c{c}ois},
66 | title = {Powers of Tensors and Fast Matrix Multiplication},
67 | booktitle = {Proceedings of the 39th International Symposium on Symbolic and Algebraic Computation},
68 | series = {ISSAC '14},
69 | year = {2014},
70 | isbn = {978-1-4503-2501-1},
71 | location = {Kobe, Japan},
72 | pages = {296--303},
73 | numpages = {8},
74 | url = {http://doi.acm.org/10.1145/2608628.2608664},
75 | doi = {10.1145/2608628.2608664},
76 | acmid = {2608664},
77 | publisher = {ACM},
78 | address = {New York, NY, USA},
79 | keywords = {algebraic complexity theory, matrix multiplication},
80 | }
81 |
82 | @inproceedings{Williams2012,
83 | author = {Williams, Virginia Vassilevska},
84 | title = {Multiplying Matrices Faster Than Coppersmith-Winograd},
85 | booktitle = {Proceedings of the Forty-fourth Annual ACM Symposium on Theory of Computing},
86 | series = {STOC '12},
87 | year = {2012},
88 | isbn = {978-1-4503-1245-5},
89 | location = {New York, New York, USA},
90 | pages = {887--898},
91 | numpages = {12},
92 | url = {http://doi.acm.org/10.1145/2213977.2214056},
93 | doi = {10.1145/2213977.2214056},
94 | acmid = {2214056},
95 | publisher = {ACM},
96 | address = {New York, NY, USA},
97 | keywords = {matrix multiplication},
98 | }
99 |
100 | @inproceedings{Pan1978,
101 | title = {Strassen's algorithm is not optimal trilinear technique of aggregating, uniting and canceling for constructing fast algorithms for matrix operations},
102 | author = {Pan, V Ya},
103 | booktitle = {Foundations of Computer Science, 1978., 19th Annual Symposium on},
104 | pages = {166--176},
105 | year = {1978},
106 | organization = {IEEE},
107 | doi = {10.1109/SFCS.1978.34}
108 | }
109 |
110 | @article{Bini1979,
111 | title = {$O(N^{2.7799})$ COMPLEXITY FOR N BY N APPROXIMATE MATRIX MULTIPLICATION},
112 | author = {Bini, DARIO ANDREA and Capovani, Milvio and Romani, Francesco and Lotti, Grazia},
113 | journal = {Information processing letters},
114 | volume = {8},
115 | number = {5},
116 | pages = {234--235},
117 | year = {1979}
118 | }
119 |
120 | @article{Schonhage1981,
121 | title = {Partial and total matrix multiplication},
122 | author = {Sch{\"o}nhage, Arnold},
123 | journal = {SIAM Journal on Computing},
124 | volume = {10},
125 | number = {3},
126 | pages = {434--455},
127 | year = {1981},
128 | publisher = {SIAM}
129 | }
130 |
131 | @article{Romani1982,
132 | title = {Some properties of disjoint sums of tensors related to matrix multiplication},
133 | author = {Romani, Francesco},
134 | journal = {SIAM Journal on Computing},
135 | volume = {11},
136 | number = {2},
137 | pages = {263--267},
138 | year = {1982},
139 | publisher = {SIAM}
140 | }
141 |
142 | @inproceedings{Strassen1986,
143 | title = {The asymptotic spectrum of tensors and the exponent of matrix multiplication},
144 | author = {Strassen, Volker},
145 | booktitle = {Foundations of Computer Science, 1986., 27th Annual Symposium on},
146 | pages = {49--54},
147 | year = {1986},
148 | organization = {IEEE}
149 | }
150 |
151 | @article{Coppersmith1982,
152 | title = {On the asymptotic complexity of matrix multiplication},
153 | author = {Coppersmith, Don and Winograd, Shmuel},
154 | journal = {SIAM Journal on Computing},
155 | volume = {11},
156 | number = {3},
157 | pages = {472--492},
158 | year = {1982},
159 | publisher = {SIAM}
160 | }
161 |
162 | @misc{Minka2000,
163 | title = {Old and new matrix algebra useful for statistics},
164 | author = {Minka, Thomas P},
165 | howpublished = {\url{https://tminka.github.io/papers/matrix/minka-matrix.pdf}},
166 | year = {2000}
167 | }
168 |
169 | @book{Calafiore2014,
170 | title = {Optimization models},
171 | author = {Calafiore, Giuseppe C and El Ghaoui, Laurent},
172 | year = {2014},
173 | publisher = {Cambridge University Press},
174 | isbn = {978-1-107-05087-7}
175 | }
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 | @article{Charnes1962,
196 | author = {Charnes, A. and Cooper, W. W.},
197 | title = {Programming with linear fractional functionals},
198 | journal = {Naval Research Logistics Quarterly},
199 | volume = {9},
200 | year = {1962},
201 | number = {3‐4},
202 | pages = {181-186},
203 | doi = {10.1002/nav.3800090303},
204 | url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/nav.3800090303},
205 | eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/nav.3800090303}
206 | }
207 |
208 |
209 |
210 | @article{Lobo1998,
211 | title = {Applications of second-order cone programming},
212 | author = {Lobo, Miguel Sousa and Vandenberghe, Lieven and Boyd, Stephen and Lebret, Herv{\'e}},
213 | journal = {Linear algebra and its applications},
214 | volume = {284},
215 | number = {1-3},
216 | pages = {193--228},
217 | year = {1998},
218 | publisher = {Elsevier}
219 | }
220 |
221 |
222 | @article{Thome2016,
223 | title = {Inequalities and equalities for l=2 ({S}ylvester), l=3 ({F}robenius), and l>3 matrices},
224 | author = {Thome, N{\'e}stor},
225 | journal = {Aequationes mathematicae},
226 | volume = {90},
227 | number = {5},
228 | pages = {951--960},
229 | year = {2016},
230 | publisher = {Springer}
231 | }
232 |
233 | @article{Sylvester1851,
234 | title = {XXXVII. On the relation between the minor determinants of linearly equivalent quadratic functions},
235 | author = {Sylvester, James Joseph},
236 | journal = {The London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science},
237 | volume = {1},
238 | number = {4},
239 | pages = {295--305},
240 | year = {1851},
241 | publisher = {Taylor \& Francis}
242 | }
243 |
244 | @article{Kozlov1980,
245 | title = {The polynomial solvability of convex quadratic programming},
246 | author = {Kozlov, Mikhail K and Tarasov, Sergei P and Khachiyan, Leonid G},
247 | journal = {USSR Computational Mathematics and Mathematical Physics},
248 | volume = {20},
249 | number = {5},
250 | pages = {223--228},
251 | year = {1980},
252 | publisher = {Elsevier}
253 | }
254 |
255 |
256 | @article{Sahni1974,
257 | author = {Sahni, S.},
258 | title = {Computationally Related Problems},
259 | journal = {SIAM Journal on Computing},
260 | volume = {3},
261 | number = {4},
262 | pages = {262-279},
263 | year = {1974},
264 | doi = {10.1137/0203021},
265 | URL = {https://doi.org/10.1137/0203021},
266 | eprint = {https://doi.org/10.1137/0203021}
267 | }
268 |
269 | @article{Pardalos1991,
270 | author = "Pardalos, Panos M. and Vavasis, Stephen A.",
271 | title = "Quadratic programming with one negative eigenvalue is NP-hard",
272 | journal = "Journal of Global Optimization",
273 | year = "1991",
274 | month = "Mar",
275 | day = "01",
276 | volume = "1",
277 | number = "1",
278 | pages = "15--22",
279 | abstract = "We show that the problem of minimizing a concave quadratic function with one concave direction is NP-hard. This result can be interpreted as an attempt to understand exactly what makes nonconvex quadratic programming problems hard. Sahni in 1974 [8] showed that quadratic programming with a negative definite quadratic term (n negative eigenvalues) is NP-hard, whereas Kozlov, Tarasov and Ha{\v{c}}ijan [2] showed in 1979 that the ellipsoid algorithm solves the convex quadratic problem (no negative eigenvalues) in polynomial time. This report shows that even one negative eigenvalue makes the problem NP-hard.",
280 | issn = "1573-2916",
281 | doi = "10.1007/BF00120662",
282 | url = "https://doi.org/10.1007/BF00120662"
283 | }
284 |
285 |
286 | @inproceedings{Spielman2010,
287 | title = {Algorithms, graph theory, and linear equations in {Laplacian} matrices},
288 | volume = {4},
289 | url = {http://www.cs.yale.edu/homes/spielman/PAPERS/icm10post.pdf},
290 | urldate = {2016-05-07},
291 | booktitle = {Proceedings of the {International} {Congress} of {Mathematicians}},
292 | author = {Spielman, Daniel A.},
293 | year = {2010},
294 | pages = {2698--2722},
295 | file = {10.1.1.165.8870.pdf:/home/rick/Zotero/storage/XKEEGKW6/10.1.1.165.8870.pdf:application/pdf}
296 | }
297 |
298 | @book{Higham2002,
299 | author = {Nicholas J. Higham},
300 | title = {Accuracy and Stability of Numerical Algorithms},
301 | edition = {Second},
302 | publisher = {SIAM},
303 | isbn = {978-0-89871-802-7},
304 | year = {2002}
305 | }
306 |
307 | @book{Quateroni2007,
308 | author = {Quarteroni, Alfio and Sacco, Riccardo and Saleri, Fausto},
309 | year = {2007},
310 | title = {Numerical Mathematics},
311 | publisher = {Springer},
312 | isbn = {978-3-540-34658-6}
313 | }
314 |
315 | @book{Gallopoulos2016,
316 | author = {Gallopoulos, E. and Philippe, B. and Sameh, A.H.},
317 | year = {2016},
318 | title = {Parallelism in Matrix Computations},
319 | publisher = {Springer},
320 | isbn = {978-94-017-7188-7}
321 | }
322 |
323 | @Article{Alizadeh2003,
324 | author = {Alizadeh, F. and Goldfarb, D.},
325 | title = {Second-order cone programming},
326 | journal = {Mathematical Programming},
327 | year = {2003},
328 | month = {Jan},
329 | day = {01},
330 | volume = {95},
331 | number = {1},
332 | pages = {3-51},
333 | issn = {1436-4646},
334 | doi = {10.1007/s10107-002-0339-5},
335 | url = {https://doi.org/10.1007/s10107-002-0339-5}
336 | }
337 |
338 |
339 | @article{Teran2011,
340 | title = {Consistency and efficient solution of the Sylvester equation for*-congruence},
341 | author = {De Ter{\'a}n, Fernando and Dopico, Froilan},
342 | journal = {The Electronic Journal of Linear Algebra},
343 | volume = {22},
344 | year = {2011}
345 | }
346 |
347 | @article{Teran2019,
348 | author = {De Terán, Fernando and Iannazzo, Bruno and Poloni, Federico and Robol, Leonardo},
349 | title = {Nonsingular systems of generalized Sylvester equations: An algorithmic approach},
350 | journal = {Numerical Linear Algebra with Applications},
351 | volume = {26},
352 | number = {5},
353 | pages = {e2261},
354 | keywords = {formal matrix product, matrix pencils, periodic QR/QZ algorithm, periodic Schur decomposition, Sylvester and ⋆-Sylvester equations, systems of linear matrix equations},
355 | doi = {https://doi.org/10.1002/nla.2261},
356 | url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/nla.2261},
357 | eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/nla.2261},
358 | note = {e2261 nla.2261},
359 | abstract = {Summary We consider the uniqueness of solution (i.e., nonsingularity) of systems of r generalized Sylvester and ⋆-Sylvester equations with n×n coefficients. After several reductions, we show that it is sufficient to analyze periodic systems having, at most, one generalized ⋆-Sylvester equation. We provide characterizations for the nonsingularity in terms of spectral properties of either matrix pencils or formal matrix products, both constructed from the coefficients of the system. The proposed approach uses the periodic Schur decomposition and leads to a backward stable O(n3r) algorithm for computing the (unique) solution.},
360 | year = {2019}
361 | }
362 |
363 | @article{Dopico2016,
364 | title = {Projection methods for large-scale T-Sylvester equations},
365 | author = {Dopico, Froil{\'a}n and Gonz{\'a}lez, Javier and Kressner, Daniel and Simoncini, Valeria},
366 | journal = {Mathematics of Computation},
367 | volume = {85},
368 | number = {301},
369 | pages = {2427--2455},
370 | year = {2016}
371 | }
372 |
373 | @article{Hadamard1893,
374 | author = {Hadamard, J.},
375 | title = {Résolution d'une question relative aux déterminants.},
376 | journal = {Bull. Sci. Math},
377 | volume = {17},
378 | pages = {30--31},
379 | year={1893}
380 | }
381 |
382 | @book{Seber2002,
383 | author = {Seber, G. and Lee, A.},
384 | title = {Linear Regression Analysis},
385 | publisher = {John Wiley and Sons},
386 | year = {2002}
387 | }
388 |
389 | @misc{Alman2020,
390 | title={A Refined Laser Method and Faster Matrix Multiplication},
391 | author={Josh Alman and Virginia Vassilevska Williams},
392 | year={2020},
393 | eprint={2010.05846},
394 | archivePrefix={arXiv},
395 | primaryClass={cs.DS}
396 | }
397 |
398 | @misc{Peng2021,
399 | title={Solving Sparse Linear Systems Faster than Matrix Multiplication},
400 | author={Richard Peng and Santosh Vempala},
401 | year={2021},
402 | eprint={2007.10254},
403 | archivePrefix={arXiv},
404 | primaryClass={cs.DS}
405 | }
406 |
407 | @misc{WellingXXXX,
408 | author = {Max Welling},
409 | title = {The Kalman Filter},
410 | howpublished = {Lecture Note},
411 | }
--------------------------------------------------------------------------------
/src/rogue_gallery.tex:
--------------------------------------------------------------------------------
1 | \chapter{Matrix Rogue Gallery}
2 |
3 | \section{Non-Singular vs.\ Singular Matrices}
4 | For $\mA\in\sRnn$ (initially drawn from \citep[p.\ 574]{Strang2016}):
5 | \begin{center}
6 | \begin{tabular}{ll}
7 | \textbf{Non-Singular} & \textbf{Singular} \\
8 | $\mA$ is invertible & $\mA$ is not invertible \\
9 | The columns are independent & The columns are dependent \\
10 | The rows are independent & The rows are dependent \\
11 | $\det(\mA)\ne0$ & $\det(\mA)=0$ \\
12 | $\mA\vx=0$ has one solution: $\vx=0$ & $\mA\vx=0$ has infinitely many solutions \\
13 | $\mA\vx=\vb$ has one solution: $\vx=\mA^{-1}\vb$& $\mA\vx=\vb$ has either no or infinitely many solutions \\
14 | $\mA$ has $n$ nonzero pivots & $\mA$ has $r0,\forall\vx\in\sRn$.
295 | \item $\eig(\mP)>0$
296 | \item There exists a unique matrix $\mU\in\sRnn$, such that $\mA=\mU\mU^T$ (Cholesky Decomposition). %TODO
297 | \end{itemize}
298 |
299 |
300 | \subsection*{Special Properties}
301 |
302 | \begin{itemize}
303 | \item $\mP^{-1}\ispd0$
304 | \item $c\mP\ispd0$
305 | \item $\mA_{ii}\in\sR$
306 | \item $\mA_{ii}>0$
307 | \item $\trace(\mP)\ge0$. %TODO: Shouldn't this be >0?
308 | \item $\det(\mP)>0$
309 | \item The eigenvalues of $\mP^{-1}$ are the inverses of the eigenvalues of $\mP$.
310 | \item For $\mP\in\sRmn$, $\mP^T\mP\ispd0\iff \mP$ is full-column rank ($\rank(\mP)=n$)
311 | \item For $\mP\in\sRmn$, $\mP\mP^T\ispd0\iff \mP$ is full-row rank ($\rank(\mP)=m$)
312 | \end{itemize}
313 |
314 | \subsubsection{Ellipsoids}
315 | $\mP\ispd0$ defines a full-dimensional, bounded ellipsoid defined by the set
316 | \begin{equation}
317 | \mathcal{E}=\{\vx\in\sRn: (\vx-\vz)^T\mP^{-1}(\vx-\vz)\le \beta\}
318 | \end{equation}
319 | The eigenvectors of $\mP$ define the directions of the semi-axes of the ellipsoid; the lengths of these axes are given by $\sqrt{\beta\lambda_i}$ where $\lambda_i$ are the eigenvalues of $\mP$. The ellipsoid is centered at $\vz$. Since $\mP\ispd 0 \implies \mP^{-1}\ispd 0$, the Cholesky decomposition says that $\mP^{-1}=\mA^T\mA$; therefore, an equivalent definition of the ellipsoid is $\mathcal{E}=\{\vx\in\sRn: \norm{\mA\vx}_2\le1\}$.
320 |
321 | \section{Positive Semi-Definite}
322 |
323 | $\mA$ is positive semi-definite (denoted $\mA\ispsd0$) if any of the following are true:
324 | \begin{itemize}
325 | \item $\vx^T\mA\vx\ge0,\forall\vx\in\sRn$.
326 | \item $\eig(\mA)\ge0$
327 | \item There exists a non-unique matrix $\mU\in\sRnn$, such that $\mA=\mU\mU^T$ (Cholesky Decomposition). %TODO
328 | \end{itemize}
329 |
330 | \subsection*{Special Properties}
331 | \begin{itemize}
332 | \item For $\mA\in\sRmn$, $\mA^T\mA\ispsd0$
333 | \item For $\mA\in\sRmn$, $\mA\mA^T\ispsd0$
334 | \item $\diag(\mA)_i\ge0$
335 | \item $\sum_{ij} \mA_{ij}\ge0$
336 | \item $\trace(\mA)\ge0$
337 | \item For $\mA,\mB\ispsd0$, $\trace(\mA\mB)\ge0$
338 | \item For $\mA,\mB\ispsd0$, $\trace(\mA\mB)=0\iff \mA\mB=0$
339 | \item The positive semi-definite matrices $\sPSD$ form a convex cone. For any two PSD matrices $\mA,\mB\in\sPSD$ and some $\alpha\in[0,1]$:
340 | \begin{equation}
341 | \vx^T(\alpha\mA+(1-\alpha)\mB)\vx=\alpha \vx^T\mA\vx+(1-\alpha)\vx^T\mB\vx\ge0~~\forall\vx
342 | \end{equation}
343 | \begin{equation}
344 | \alpha\mA+(1-\alpha)\mB\in\sPSD
345 | \end{equation}
346 | \item For $\mA\in\sPSD$ and $\alpha\ge0$, $\alpha\mA\ispsd0$, so $\sPSD$ is a cone.
347 | \item $\mA\ispsd 0$ if and only if there is a PSD matrix $\mS^{1/2}$ such that $\mS^{1/2}\mS^{1/2}=\mA$. This $\mS$ is unique.
348 | \end{itemize}
349 |
350 | \subsection{Loewner order}
351 | If $\mA-\mB\ispsd 0$, then we say $\mA\ispsd \mB$. A sufficient condition for this is that $\lambda_n(\mA)\ge\lambda_1(\mB)$.
352 |
353 |
354 |
355 | \section{Projection Matrix}
356 | A square matrix $\mP$ is a projection matrix that projects onto a vector space $\mathcal{S}$ iff
357 | \begin{align}
358 | \mP&~\textrm{is idempotent} \\
359 | \mP\vx&\in\mathcal{S}~~\forall\vx \\
360 | \mP\vz&=\vz~~\forall\vz\in\mathcal{S}
361 | \end{align}
362 |
363 |
364 | \section{Single-Entry Matrix}
365 | \label{sec:rogue_single_entry}
366 | \begin{equation}
367 | \mJ^{2,3} =
368 | \begin{bmatrix}
369 | 0 & 0 & 0 & 0 \\
370 | 0 & 0 & 1 & 0 \\
371 | 0 & 0 & 0 & 0 \\
372 | 0 & 0 & 0 & 0
373 | \end{bmatrix}
374 | \end{equation}
375 |
376 | The single-entry matrix $\mJ^{iJ}\in\sRnn$ is defined as the matrix which is zero everywhere except for the entry $(i,j)$, which is $1$.
377 |
378 |
379 | %TODO: Much material from MCB
380 |
381 |
382 |
383 | \section{Singular Matrix}
384 | A square matrix that is not invertible.
385 |
386 | $\mA\in\sRnn$ is singular iff $\det \mA=0$ iff $\mathcal{N}(A)\ne\{0\}$.
387 |
388 |
389 | \section{Symmetric Matrix}
390 |
391 | \begin{center}
392 | \includegraphics[width=1.5in]{imgs/rg_symmetric_matrix.pdf}
393 | \end{center}
394 |
395 | $\mA\in\sSn$ is a symmetric matrix if $\mA=\mA^T$ (entries above diagonal are equal to entries below diagonal).
396 |
397 | \begin{equation}
398 | \begin{bmatrix}
399 | a & b & c & d & e & f \\
400 | b & g & l & m & o & p \\
401 | c & l & h & n & q & r \\
402 | d & m & n & i & s & t \\
403 | e & o & q & s & j & u \\
404 | f & p & r & t & u & k \\
405 | \end{bmatrix}
406 | \end{equation}
407 |
408 |
409 | \subsection*{Special Properties}
410 |
411 | \begin{align}
412 | \mA &= \mA^T \\
413 | \eig(A) &\in \sRn \\
414 | \textrm{Number of ``free entries"} &= \frac{n(n+1)}{2}
415 | \end{align}
416 |
417 | If $\mA$ is real, it can be decomposed into $\mA=\mQ^T\mD\mQ$ where $\mQ$ is a real orthogonal matrix (the columns of which are eigenvectors of $\mA$) and $\mD$ is real and diagonal containing the eigenvalues of $\mA$.
418 |
419 | For a real, symmetric matrix with non-negative eignevalues, the eigenvalues and singular values coincide.
420 |
421 |
422 |
423 | \section{Skew-Hermitian}
424 | A matrix $\mH\in\sCmn$ is Skew-Hermitian iff
425 | \begin{equation}
426 | \mH=-\mH^H
427 | \end{equation}
428 |
429 |
430 |
431 | \section{Toeplitz Matrix, General Form}
432 |
433 | \begin{center}
434 | \includegraphics[width=1.5in]{imgs/rg_toeplitz.pdf}
435 | \end{center}
436 | Constant values on descending diagonals.
437 | \begin{equation}
438 | \begin{bmatrix}
439 | a_{0} & a_{-1} & a_{-2} & \ldots & \ldots & a_{-(n-1)} \\
440 | a_{1} & a_0 & a_{-1} & \ddots & & \vdots \\
441 | a_{2} & a_{1} & \ddots & \ddots & \ddots & \vdots \\
442 | \vdots & \ddots & \ddots & \ddots & a_{-1} & a_{-2}\\
443 | \vdots & & \ddots & a_{1} & a_{0} & a_{-1} \\
444 | a_{n-1} & \ldots & \ldots & a_{2} & a_{1} & a_{0}
445 | \end{bmatrix}
446 | \end{equation}
447 |
448 |
449 | \section{Toeplitz Matrix, Discrete Convolution}
450 |
451 | \begin{center}
452 | \includegraphics[width=1.5in]{imgs/rg_toeplitz_1d_conv.pdf}
453 | \end{center}
454 |
455 | Constant values on main and subdiagonals.
456 |
457 | \begin{equation}
458 | \begin{bmatrix}
459 | h_m & 0 & 0 & \hdots & 0 & 0 \\
460 | \vdots & h_m & 0 & \hdots & 0 & 0 \\
461 | h_1 & \vdots & h_m & \hdots & 0 & 0 \\
462 | 0 & h_1 & \ddots & \ddots & 0 & 0 \\
463 | 0 & 0 & h_1 & \ddots & h_m & 0 \\
464 | 0 & 0 & 0 & \ddots & \vdots & h_m \\
465 | 0 & 0 & 0 & \hdots & h_1 & \vdots \\
466 | 0 & 0 & 0 & \hdots & 0 & h_1
467 | \end{bmatrix}
468 | \end{equation}
469 |
470 |
471 | \section{Triangular Matrix}
472 |
473 | \begin{center}
474 | \includegraphics[width=1.5in]{imgs/rg_lower_triangular.pdf}~\includegraphics[width=1.5in]{imgs/rg_upper_triangular.pdf}
475 | \end{center}
476 |
477 | \begin{equation}
478 | \begin{bmatrix}
479 | a & b & c & d & e & f \\
480 | & g & h & i & j & k \\
481 | & & l & m & n & o \\
482 | & & & p & q & r \\
483 | & & & & s & t \\
484 | & & & & & u \\
485 | \end{bmatrix}
486 | ~
487 | ~
488 | \begin{bmatrix}
489 | a & & & & & \\
490 | b & g & & & & \\
491 | c & h & l & & & \\
492 | d & i & m & p & & \\
493 | e & j & n & q & s & \\
494 | f & k & o & r & t & u \\
495 | \end{bmatrix}
496 | \end{equation}
497 |
498 | Square matrices in which all elements either above or below the main diagonal are zero. An upper (left) and a lower (right) triangular matrix are shown above.
499 |
500 | For an upper triangular matrix $A_{ij}=0$ whenever $i>j$; for a lower triangular matrix $A_{ij}=0$ whenever $i