├── .gitignore
├── README.md
├── lecture_pdfs
    ├── Lecture_1.pdf
    ├── Lecture_10.pdf
    ├── Lecture_11.pdf
    ├── Lecture_12.pdf
    ├── Lecture_13.pdf
    ├── Lecture_2.pdf
    ├── Lecture_3.pdf
    ├── Lecture_4.pdf
    ├── Lecture_5.pdf
    ├── Lecture_6.pdf
    ├── Lecture_7.pdf
    ├── Lecture_8.pdf
    └── Lecture_9.pdf
└── source_code
    ├── Lecture_1.tex
    ├── Lecture_10.tex
    ├── Lecture_11.tex
    ├── Lecture_12.tex
    ├── Lecture_13.tex
    ├── Lecture_2.tex
    ├── Lecture_3.tex
    ├── Lecture_4.tex
    ├── Lecture_5.tex
    ├── Lecture_6.tex
    ├── Lecture_7.tex
    ├── Lecture_8.tex
    ├── Lecture_9.tex
    ├── et_newbib.bib
    ├── figs_code
        ├── additivity.pdf
        ├── additivity.pdf_t
        ├── alpha_eq.pdf
        ├── ar1_dynam.pdf
        ├── ar1_dynam_lec.pdf
        ├── ar1_dynam_lec1.pdf
        ├── ar1_dynam_lec2.pdf
        ├── ar1_dynam_lec3.pdf
        ├── arch_plot1.pdf
        ├── arch_plot2.pdf
        ├── arch_plot3.pdf
        ├── arch_plot4.pdf
        ├── beta_bayes.pdf
        ├── beta_cdfs.pdf
        ├── betahat_var.pdf
        ├── bivar_gaussian_3d.pdf
        ├── bootstrap_hist.pdf
        ├── c_alpha.pdf
        ├── cauchy_cdf.pdf
        ├── cauchy_cdfs.pdf
        ├── cauchy_densities.pdf
        ├── cauchy_quant.pdf
        ├── cauchy_samples.pdf
        ├── chisq_densities.pdf
        ├── clt3d_2.pdf
        ├── conv_in_prob.pdf
        ├── convolve.pdf
        ├── copula.pdf
        ├── cost_min_2.pdf
        ├── cvbest.pdf
        ├── discrete_cdf.pdf
        ├── ecdf_beta.pdf
        ├── ecdf_lim.pdf
        ├── eigenvecs.pdf
        ├── event_fails.pdf_t
        ├── event_occurs.pdf
        ├── event_occurs.pdf_t
        ├── fcca.pdf
        ├── firms_median.pdf
        ├── flat_plane.pdf
        ├── flat_plane_e_vecs.pdf
        ├── flat_plane_e_vecspdf.png
        ├── flat_plane_no_vecs.pdf
        ├── gaussian_example.pdf
        ├── gdp.pdf
        ├── glu.pdf
        ├── hghb.pdf
        ├── hsi.pdf
        ├── illus_clt.pdf
        ├── inverse_prob.fig
        ├── inverse_prob.fig.bak
        ├── inverse_prob.pdf
        ├── inverse_prob.pdf_t
        ├── iv_example.pdf
        ├── jointplot.pdf
        ├── ks_sim1.pdf
        ├── ks_sim2.pdf
        ├── lin_comb.pdf
        ├── linbijec.pdf
        ├── linear_after_proj.pdf
        ├── linear_after_proj3d.pdf
        ├── linfunc.pdf
        ├── lognorm_sample_mean.pdf
        ├── long_norm_den_seq.pdf
        ├── nikkei_ecdf.pdf
        ├── nikkei_hist.pdf
        ├── nonredundant1.pdf
        ├── nonredundant2.pdf
        ├── nonredundant3.pdf
        ├── norm_den_seq.pdf
        ├── normal_cdfs.pdf
        ├── normal_densities.pdf
        ├── not_in_span.pdf
        ├── not_multiple_of_one.pdf
        ├── npkde.pdf
        ├── nr.pdf
        ├── ofit1.pdf
        ├── ofit11.pdf
        ├── ofit14.pdf
        ├── ofit3.pdf
        ├── one_dim_kde.pdf
        ├── partial_reg_plot.pdf
        ├── power.pdf
        ├── qform_indef.pdf
        ├── qform_nd.pdf
        ├── qform_pd.pdf
        ├── ridge_plots
        │   ├── ridgeplot1.pdf
        │   ├── ridgeplot10.pdf
        │   ├── ridgeplot2.pdf
        │   ├── ridgeplot3.pdf
        │   ├── ridgeplot4.pdf
        │   ├── ridgeplot5.pdf
        │   ├── ridgeplot6.pdf
        │   ├── ridgeplot7.pdf
        │   ├── ridgeplot8.pdf
        │   └── ridgeplot9.pdf
        ├── ridge_risk.pdf
        ├── rotation_1.pdf
        ├── rotation_2.pdf
        ├── rvempr.pdf
        ├── rw_metropolis.pdf
        ├── sampling_distributions.pdf
        ├── simple_mkt.pdf
        ├── small_sim.R
        ├── span_of_one_vec.pdf
        ├── span_plane.pdf
        ├── stein.pdf
        ├── t_to_norm.pdf
        ├── tikreg.pdf
        ├── tikzfigs
        │   ├── bij_inv.tex
        │   ├── bijec.tex
        │   ├── composition.tex
        │   ├── diagonalize.tex
        │   ├── discrete_cdf.tex
        │   ├── fig3.3.tex
        │   ├── finite_rv_approx.pdf
        │   ├── finite_rv_approx.tex
        │   ├── func_nonfunc.tex
        │   ├── function.tex
        │   ├── function2.tex
        │   ├── orth_comp.tex
        │   ├── orth_proj2D.tex
        │   ├── orth_proj2D0.tex
        │   ├── orth_proj2Dp.tex
        │   ├── sim_fun.tex
        │   ├── vec_add.tex
        │   ├── vec_canon.tex
        │   ├── vec_canon_x.tex
        │   ├── vec_minus.tex
        │   ├── vec_noncanon.tex
        │   ├── vec_scalar.tex
        │   ├── xpS.tex
        │   └── xpz.tex
        ├── unempl_sim.pdf
        ├── vec.pdf
        ├── vec_add.pdf
        ├── vec_canon.pdf
        ├── vec_minus.pdf
        ├── vec_noncanon.pdf
        ├── vec_scalar.pdf
        ├── vec_scalar.pdf.pdf
        ├── vecs.pdf
        ├── vector_mean.pdf
        ├── xpS.pdf
        └── xpz.pdf
    ├── minted.sty
    └── preamb.tex


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.log
 2 | *.aux
 3 | *.bcf
 4 | *.nav
 5 | *.out
 6 | *.toc
 7 | *.snm
 8 | *.run.xml
 9 | _minted*/
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Lecture Slides for A Primer in Econometric Theory
 2 | 
 3 | These lecture slides correspond to [John Stachurski](http://johnstachurski.net/index.html)'s
 4 | text [A Primer in Econometric Theory](http://johnstachurski.net/emet.html), published in 2016 by MIT Press.
 5 | 
 6 | They were written by [Akshay Shanker](https://github.com/mathuranand) using source LaTeX and figures from the text.
 7 | 
 8 | It is released under a BSD (Clause 3) license and anyone can download, modify
 9 | and use it as they wish.  We only ask that the original authors are acknowledged.
10 | 
11 | If you notice errors and know how to use Git you can submit a fix via a [pull request](https://github.com/jstac/econometric_theory_slides/pulls)
12 | 
13 | Alternatively, you can let us know via [the issue tracker](https://github.com/jstac/econometric_theory_slides/issues)
14 | 
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_1.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_10.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_11.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_11.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_12.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_12.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_13.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_13.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_2.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_3.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_4.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_5.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_6.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_7.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_8.pdf


--------------------------------------------------------------------------------
/lecture_pdfs/Lecture_9.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/lecture_pdfs/Lecture_9.pdf


--------------------------------------------------------------------------------
/source_code/Lecture_10.tex:
--------------------------------------------------------------------------------
   1 | 
   2 | \input{preamb.tex}
   3 | 
   4 | 
   5 | 
   6 | 
   7 | \title{A Primer in Econometric Theory}
   8 | 
   9 | \subtitle
  10 | {Lecture 10:  Regression}
  11 | 
  12 | \author{John Stachurski \\ \tiny Lectures by Akshay Shanker}
  13 | 
  14 | 
  15 | 
  16 | \begin{document}
  17 | 
  18 | \begin{frame}
  19 |   \titlepage
  20 | \end{frame}
  21 | 
  22 | \section{Linear Regression}
  23 | 
  24 | 
  25 | \begin{frame}\frametitle{Linear Regression}
  26 |     
  27 |     \vspace{2em}
  28 |     Start with the prediction problem discussed in \S\ref{ET-ss:erm} --- a system with vector input $\boldx_n \in \RR^K$ followed by scalar
  29 |     output $y_n$
  30 |     
  31 |     \vspace{.7em}
  32 |     Examples:
  33 |     
  34 |     \begin{itemize}
  35 |         \item $\boldx_n$ is a description of a lottery (probabilities,
  36 |             possible outcomes, etc.) 
  37 |             in a controlled experiment and $y_n$ is willingness to pay in order to
  38 |             participate 
  39 |         \item $\boldx_n$ is a set of household characteristics (ethnicity,
  40 |         age, location, etc.) and $y_n$ is household wealth at some later
  41 |             date 
  42 |         \item $\boldx_n$ is price of electricity, prices of alternatives,
  43 |             temperature, household income, and measurements of the regional income
  44 |             distribution, while $y_n$ is regional electricity consumption 
  45 |     \end{itemize}
  46 |     
  47 | \end{frame}
  48 | 
  49 | \begin{frame}
  50 | 
  51 |     \vspace{2em}
  52 |     Suppose we have $N$ observations $\boldz_n := (\boldx_n,y_n)$, 
  53 |     all draws from fixed joint distribution $P$
  54 |     
  55 |     Since $P$ is fixed, we are assuming the system is stationary across the set of draws
  56 |      
  57 |     \vspace{.7em}     
  58 |     Our problem:
  59 |     %
  60 |     \small\begin{equation*}\label{eq:prp}
  61 |         \text{choose function } f \colon \RR^K \to \RR \text{ such that }
  62 |         f(\boldx) \text{ is a good predictor of } y
  63 |     \end{equation*}
  64 |  
  65 | \end{frame}
  66 | 
  67 | \begin{frame}
  68 |      
  69 |     \vspace{2em}
  70 |     To define ``good predictor" mathematically, we need a loss function
  71 |     
  72 |     We will be using quadratic loss, thus minimize
  73 |     the prediction risk given by
  74 |     %
  75 |     \begin{equation}
  76 |         \label{eq:rf2}
  77 |         R(f) := \EEP  (y - f(\boldx))^2 
  78 |     \end{equation}
  79 |     
  80 |     \vspace{.7em}
  81 |     Minimizer of \eqref{eq:rf2} over the
  82 |     set of all $\bB$-measurable functions is the regression
  83 |     function $f^*(\boldx) :=
  84 |     \EEP  [y \given \boldx]$
  85 |     
  86 |     Recall we cannot compute the regression function because $P$ is not known 
  87 |     
  88 | \end{frame}
  89 | 
  90 | \begin{frame}
  91 | 
  92 |     \vspace{2em}
  93 |     Instead we apply the principle of empirical risk minimization,
  94 |     which leads to the problem
  95 |     %
  96 |     \begin{equation}
  97 |         \label{eq:ermm}
  98 |         \min_{f \in \hH} \Remp(f)
  99 |         \quad \text{where} \quad
 100 |         \Remp(f) := \frac{1}{N} \sum_{n=1}^N (y_n - f(\boldx_n))^2
 101 |     \end{equation}
 102 |     
 103 |     \vspace{.7em}
 104 |     Here $\hH$ is the hypothesis space, a set of candidate functions 
 105 |     mapping $\RR^K$ into $\RR$
 106 |     
 107 |     The problem \eqref{eq:ermm} is
 108 |     called a \navy{least squares} problem
 109 |   
 110 | \end{frame}
 111 | 
 112 | \begin{frame}
 113 | 
 114 |     \vspace{2em}
 115 |     As discussed at length in \S\ref{ET-ss:ermls},
 116 |     minimizing empirical risk is different from minimizing the prediction risk
 117 |     $R(f)$ --- thus $\hH$ must be
 118 |     restricted 
 119 |     
 120 |     \vspace{.7em}
 121 |     Consider the case $\hH = \llL$, where $\llL$ is all
 122 |     linear functions from $\RR^K$ to $\RR$
 123 |     
 124 |     Recalling theorem~\ref{ET-t:lmaeq}, write
 125 |     %
 126 |     \begin{equation*}
 127 |         \label{eq:allaff2}
 128 |         \llL = 
 129 |         \left\{ \, \text{all } f \colon \RR^K \to \RR \, 
 130 |             \text{ such that }
 131 |             f(\boldx) = \boldx^\T \boldb \text{ for some } \boldb \in \RR^K 
 132 |         \right\}
 133 |     \end{equation*}
 134 |     
 135 |     
 136 |     Problem \eqref{eq:ermm} reduces to 
 137 |     %
 138 |     \begin{equation}
 139 |         \label{eq:olslq}
 140 |             \min_{\boldb \in \RR^K} \;
 141 |             \sum_{n=1}^N (y_n - \boldx_n^\T \boldb)^2
 142 |     \end{equation}
 143 |     
 144 | \end{frame}
 145 | 
 146 | \begin{frame}
 147 | 
 148 |     \vspace{2em}
 149 |     Intuition: ``line of best fit" to minimize in-sample prediction error
 150 |     
 151 |     \vspace{.7em}
 152 |     Good reasons to start with $\llL$, even where no linearity
 153 |     assumptions are imposed:
 154 |     
 155 |     \begin{enumerate}
 156 |         \item $\llL$ is a natural starting point when
 157 |             seeking a class of simple, well-behaved functions
 158 |         \item setting $\hH
 159 |             = \llL$ allows us to obtain an analytical expression for the minimizer, 
 160 |             which simplifies both analysis and computation
 161 |         \item the technique has an extension from
 162 |             $\llL$ to broader classes of functions
 163 |     \end{enumerate}
 164 | 
 165 | \end{frame}
 166 | 
 167 | \begin{frame}\frametitle{Least Squares Estimator}
 168 | 
 169 |     \vspace{2em}
 170 |     Now let's solve \eqref{eq:olslq}. Let 
 171 |     %
 172 |     \small \begin{equation*}
 173 |         \label{eq:design0}
 174 |         \boldy := 
 175 |         \left(
 176 |         \begin{array}{c}
 177 |             y_1 \\
 178 |             y_2 \\
 179 |             \vdots \\
 180 |             y_N
 181 |         \end{array}
 182 |         \right),\,
 183 |         \boldx_n := 
 184 |         \left(
 185 |         \begin{array}{c}
 186 |             x_{n1} \\ 
 187 |             x_{n2} \\
 188 |             \vdots \\
 189 |             x_{nK}
 190 |         \end{array}
 191 |         \right)
 192 |         = \text{ $n$th observation of all regressors}
 193 |     \end{equation*}
 194 |     %
 195 |     and
 196 |     %
 197 |     \small \begin{equation*}
 198 |         \label{eq:design}
 199 |         \boldX := 
 200 |         \left(
 201 |         \begin{array}{c}
 202 |             \boldx_1^\T  \vspace{0.2em}  \\
 203 |             \boldx_2^\T  \\
 204 |             \vdots     \\
 205 |             \boldx_N^\T  
 206 |         \end{array}
 207 |         \right)
 208 |         :=:
 209 |         \left(
 210 |         \begin{array}{cccc}
 211 |             x_{11} & x_{12} & \cdots & x_{1K} \\
 212 |             x_{21} & x_{22} & \cdots & x_{2K} \\
 213 |             \vdots & \vdots &  & \vdots \\
 214 |             x_{N1} & x_{N2} & \cdots & x_{NK} 
 215 |         \end{array}
 216 |         \right)
 217 |     \end{equation*}
 218 |     
 219 | \end{frame}
 220 | 
 221 | \begin{frame}
 222 | 
 223 |     \vspace{2em}
 224 |     Sometimes $\boldX$ is called the \navy{design matrix}
 225 |     
 226 |     By construction, $\col_k \boldX =$ all observations on the $k$th regressor
 227 |     
 228 |     \vspace{.7em}
 229 |     Also, for any $\boldb \in \RR^K$, we have
 230 |     %
 231 |     \begin{equation*}
 232 |         \boldX \boldb = 
 233 |         \left(
 234 |         \begin{array}{c}
 235 |             \boldx_1^\T \boldb \vspace{0.2em} \\
 236 |             \boldx_2^\T \boldb\\
 237 |             \vdots \\
 238 |             \boldx_N^\T \boldb
 239 |         \end{array}
 240 |         \right)
 241 |     \end{equation*}
 242 | %
 243 | \end{frame}
 244 | 
 245 | \begin{frame}
 246 | 
 247 |     \vspace{2em}
 248 |     The objective function in (\ref{eq:olslq}) can be written as 
 249 |     %
 250 |     \begin{equation*}
 251 |          \sum_{n=1}^N (y_n - \boldx_n^\T \boldb)^2 
 252 |             = \| \boldy - \boldX \boldb \|^2
 253 |     \end{equation*}
 254 |     %
 255 |     Since strictly increasing transforms preserve the set of minimizers 
 256 |     %
 257 |     \begin{equation}
 258 |         \label{eq:olses}
 259 |         \argmin_{\boldb \in \RR^K} \| \boldy - \boldX \boldb \|^2
 260 |          = \argmin_{\boldb \in \RR^K} \| \boldy - \boldX \boldb \|
 261 |     \end{equation}
 262 |     
 263 |     Using the orthogonal projection theorem (recall theorem~\ref{ET-t:lssol} in ET), the
 264 |     solution is
 265 |     %
 266 |     \begin{equation}
 267 |         \label{eq:thelsqe}
 268 |         \hboldbeta := (\boldX^\T \boldX)^{-1} \boldX^\T \boldy
 269 |     \end{equation}
 270 | \end{frame}
 271 | 
 272 | \begin{frame}
 273 |     
 274 |     \vspace{2em}
 275 |     Traditionally, $\hboldbeta$ called the \navy{least
 276 |     squares estimator}
 277 |     
 278 |     Once we move to more classical assumptions it 
 279 |     will be an estimator of a particular parameter vector
 280 |     
 281 |     \vspace{.7em}
 282 |     At this stage it just
 283 |     defines our answer to the problem posed in \eqref{eq:prp}.  That is,
 284 |     %
 285 |     \begin{equation*}
 286 |         \text{given $\boldx \in \RR^K$, our prediction of $y$ is }
 287 |         f(\boldx) = \boldx^\T \hboldbeta
 288 |     \end{equation*}
 289 |     
 290 | \end{frame}
 291 | 
 292 | \begin{frame}
 293 |     
 294 |     \vspace{2em}
 295 |     In terms of geometric interpretation, since $\boldX \hboldbeta$ solves
 296 |     \eqref{eq:olses}, it is the closest point in $\colspace \boldX$
 297 |     to $\boldy$:
 298 |     %
 299 |     \begin{equation*}
 300 |         \boldP \boldy = \boldX \hboldbeta   
 301 |         \quad \text{when} \quad
 302 |         \boldP := \proj \, (\colspace \boldX )
 303 |     \end{equation*}
 304 |     
 305 |     \vspace{.7em}
 306 |     In what follows,
 307 |     $\boldM$ is the residual projection, as defined in \eqref{ET-eq:ann0} in ET
 308 | 
 309 | \end{frame}
 310 | 
 311 | \begin{frame}\frametitle{Assumptions}
 312 |  
 313 |     \vspace{2em}
 314 |     \Ass\eqref{ET-a:fr} 
 315 |     $\boldX$ has full column rank with probability one
 316 |       
 317 |     By theorem~\ref{ET-t:exth}, $N \geq K$ is a necessary
 318 |     condition for the assumption to hold
 319 |     
 320 |     (If $N < K$, then $\RR^N$, which
 321 |     is necessarily spanned by $N$ vectors, cannot contain $K$ linearly
 322 |     independent vectors)
 323 |     
 324 |     \vspace{.7em}
 325 |     If this assumption does not hold, then minimizer of \eqref{eq:olses} still exists
 326 |     but is no longer unique (see ex.~\ref{ET-ex:lsiosol})  
 327 |     
 328 | \end{frame}
 329 | 
 330 | \begin{frame}
 331 |   
 332 |     \vspace{2em}
 333 |     \Ass\eqref{ET-a:poprank}
 334 |     $P$ is such that all elements of $\EE_P [ \boldz_n \boldz_n^\T]$ are finite. 
 335 |     Moreover
 336 |     %
 337 |     \begin{equation}
 338 |         \label{eq:smx}
 339 |         \Sigmax := \EE_P [ \boldx_n \boldx_n^\T ] \; \text{ is finite and positive definite}
 340 |     \end{equation}
 341 | 
 342 |     \vspace{.7em}
 343 |     Finite second moments imposed to evaluate expected squared errors 
 344 |     
 345 |     Assumption cannot be weakened unless we are willing to work with a 
 346 |     different loss function
 347 | \end{frame}
 348 | 
 349 | \begin{frame}\frametitle{Notation}
 350 |     
 351 |     \vspace{2em}
 352 |     The projection
 353 |     %
 354 |     \begin{equation*}
 355 |         \hboldy := \boldX \hboldbeta = \boldP \boldy
 356 |     \end{equation*}
 357 |     %
 358 |     is called the \navy{vector of fitted values}
 359 |     
 360 |     The $n$th fitted value $\hat
 361 |     y_n$ is the prediction $\boldx_n^\T \, \hboldbeta$ associated with least
 362 |     squares estimate
 363 |     and the $n$th observation $\boldx_n$ of the input vector
 364 |     
 365 |     \vspace{.7em}
 366 |     The vector $\boldM
 367 |     \boldy$ is often denoted $\hboldu$, and called the \navy{vector of residuals}:
 368 |     %
 369 |     \begin{equation*}
 370 |         \hboldu := \boldM \boldy = \boldy - \hboldy
 371 |     \end{equation*}
 372 |     %
 373 | \end{frame}
 374 | 
 375 | \begin{frame}
 376 | 
 377 |     \vspace{2em}
 378 |     The vector of residuals corresponds to the error that occurs when $\boldy$ is
 379 |     approximated by $\boldP \boldy$
 380 |     
 381 |     From fact~\ref{ET-fa:opt3}
 382 |     %
 383 |     \begin{equation}
 384 |         \label{eq:orr}
 385 |         \boldM \boldy \perp \boldP \boldy 
 386 |         \quad \text{and} \quad 
 387 |         \boldy =  \boldP \boldy + \boldM \boldy
 388 |     \end{equation}
 389 |     
 390 |     \vspace{.7em}
 391 |     In other words, $\boldy$ can be decomposed into two orthogonal vectors $\boldP
 392 |     \boldy$ and $\boldM \boldy$:
 393 |     \begin{itemize}
 394 |         \item first represents the best approximation
 395 |     to $\boldy$ in $\colspace \boldX$
 396 |         \item second represents the residual
 397 |     \end{itemize}
 398 | 
 399 | \end{frame}
 400 | 
 401 | \begin{frame}
 402 |     
 403 |     \vspace{2em}
 404 |     Related to the fitted values and residuals, we have some standard definitions:
 405 |     %
 406 |     \begin{itemize}
 407 |         \item \navy{Total sum of squares} $:= \tss := \| \boldy \|^2$
 408 |         \item \navy{Residual sum of squares} $ := \rss := \| \boldM \boldy \|^2$
 409 |         \item \navy{Explained sum of squares} $ := \ess := \| \boldP \boldy \|^2$
 410 |     \end{itemize}
 411 |     %
 412 |     By (\ref{eq:orr}) and the Pythagorean law
 413 |     %
 414 |     \begin{equation}
 415 |         \label{eq:bdtss}
 416 |         \tss = \ess + \rss
 417 |     \end{equation}
 418 |     
 419 |     \vspace{.7em}
 420 |     When running regressions it is conventional to report the \navy{coefficient
 421 |     of determination}, or $R^2$:
 422 |     %
 423 |     \begin{equation}
 424 |         \label{eq:rsd}
 425 |         R^2 := \frac{\ess}{\tss}
 426 |     \end{equation}
 427 |     %
 428 | \end{frame}
 429 | 
 430 | 
 431 | \begin{frame}\frametitle{Out of Sample Fit}
 432 |     
 433 |     \vspace{2em}
 434 |     How does linear least squares perform out-of-sample? Start with
 435 |     a general
 436 |     observation:
 437 | 
 438 |     \Thm
 439 |         \eqref{ET-t:dclee}
 440 |         If $\ell$ is the linear function $\ell (\boldx) =
 441 |         \boldx^\T \boldb$, then
 442 |         %
 443 |         \begin{multline*}
 444 |             R(\ell)
 445 |             = \EE (y - f^*(\boldx))^2  
 446 |            \\ + \EE (f^*(\boldx) - \boldx^\T \boldb^*)^2 
 447 |             +  (\boldb^* - \boldb)^\T 
 448 |                 \Sigmax (\boldb^* - \boldb) 
 449 |         \end{multline*}
 450 |         %
 451 |     
 452 |     Here $f^*$ is the regression function and $\boldb^* = \Sigmax^{-1} \,
 453 |     \EE[\boldx \, y]$ is the vector of coefficients in the best linear predictor
 454 |     
 455 |     $R(f)$ is the prediction risk
 456 |     of $f$ and expectations are taken under the unknown joint distribution $P$ of
 457 |     the pairs $(\boldx, y)$
 458 | 
 459 | \end{frame}
 460 | 
 461 | \begin{frame}
 462 |     
 463 |     \vspace{2em}
 464 |     Theorem~\ref{ET-t:dclee} decomposes the prediction risk of an
 465 |     arbitrary linear predictor $\ell (\boldx) = \boldx^\T \boldb$
 466 |     into three terms:
 467 |     %
 468 |     \begin{enumerate}
 469 |         \label{enum:tr}
 470 |         \item The \navy{intrinsic risk} $\EE (y - f^*(\boldx))^2 $
 471 |         \item The \navy{approximation error} $\EE (f^*(\boldx) - \boldx^\T
 472 |             \boldb^*)^2 $
 473 |         \item The \navy{estimation error} $ (\boldb^* - \boldb)^\T 
 474 |         \Sigmax (\boldb^* - \boldb)$
 475 |     \end{enumerate}
 476 | 
 477 | \end{frame}
 478 |   
 479 |  \begin{frame}
 480 |     
 481 |     \vspace{2em}
 482 |     The intrinsic risk is also called Bayes risk, it is the residual error after $y$ 
 483 |     is approximated with the best possible predictor
 484 |     \begin{itemize}
 485 |         \item large to the extent
 486 |         that $y$ is hard to predict using $\boldx$
 487 |     \end{itemize}
 488 |     
 489 |     \vspace{.7em}
 490 |     The approximation error or \emph{bias} is the deviation between the best
 491 |     predictor and the best linear predictor
 492 | 
 493 | \end{frame}
 494 | 
 495 | \begin{frame}
 496 | 
 497 |     \vspace{2em}
 498 |     The estimation error is caused by the deviation of our estimator
 499 |     from the best linear predictor $\boldb^*$
 500 |     \begin{itemize}
 501 |         \item deviation occurs because
 502 |     we are predicting using finite sample information on the joint
 503 |     distribution of $(\boldx, y)$
 504 |     \end{itemize}
 505 |     
 506 | \end{frame}
 507 | 
 508 | \begin{frame}
 509 | 
 510 |     \vspace{2em}
 511 |     \Thm\eqref{ET-t:cwa}
 512 |     Let assumptions~\ref{ET-a:poprank}--\ref{ET-a:fr} hold and let $\hboldbeta_N$ be
 513 |     the least squares estimator given sample size $N$.  If the observations
 514 |     $\{\boldz_n\}$ are independent,  then
 515 |     %
 516 |     \begin{equation}
 517 |         \label{eq:cwa}
 518 |         \hboldbeta_N \; \toprob \; \boldb^*
 519 |           \quad \text{as} \quad
 520 |         N \to \infty
 521 |     \end{equation}
 522 |     %
 523 |     
 524 |     \vspace{.7em}
 525 |     Independence required only for the LLN to function --- can weaken to ergodicity
 526 |     
 527 | \end{frame}
 528 | 
 529 | 
 530 | \begin{frame}\frametitle{Proofs}
 531 | 
 532 |     \vspace{2em}
 533 |     \Prf[Proof of theorem~\ref{ET-t:dclee}]
 534 |         Fix $\boldb \in \RR^K$ and let 
 535 |         $\ell (\boldx) =
 536 |         \boldx^\T \boldb$
 537 |         
 538 |         Recall from
 539 |         \eqref{ET-eq:ebrf2} in ET that we can write the
 540 |         the prediction risk as
 541 |         %
 542 |         \begin{equation*}
 543 |             R(\ell)
 544 |             = \EE[ (y - f^*(\boldx))^2 ] 
 545 |             + \EE[ (f^*(\boldx) - \boldx^\T \boldb)^2 ]
 546 |         \end{equation*}
 547 |         %
 548 |         To establish result, we show
 549 |         %
 550 |         \begin{multline}
 551 |             \label{eq:odbb}
 552 |             \EE[ (f^*(\boldx) - \boldx^\T \boldb)^2 ]
 553 |             \\ = \EE[ (f^*(\boldx) - \boldx^\T \boldb^*)^2 ]
 554 |             + \EE[ (\boldb^* - \boldb)^\T 
 555 |                 \boldx \boldx^\T (\boldb^* - \boldb) ]
 556 |         \end{multline}
 557 |         
 558 | \end{frame}
 559 | 
 560 | \begin{frame}
 561 | 
 562 |     \vspace{2em}
 563 |     \Prf [Proof of theorem~\ref{ET-t:dclee}](cont.)
 564 |     
 565 |     To see  \eqref{eq:odbb} holds, observe
 566 |     %
 567 |     \begin{equation}
 568 |         \label{eq:odbb2}
 569 |         f^*(\boldx) - \boldx^\T \boldb
 570 |         = f^*(\boldx) - \boldx^\T \boldb^* 
 571 |         + \boldx^\T ( \boldb^* - \boldb)
 572 |     \end{equation}
 573 |     %
 574 |     The terms $f^*(\boldx) - \boldx^\T \boldb^*$ and 
 575 |     $\boldx^\T ( \boldb^* - \boldb)$ are orthogonal because:
 576 |     
 577 |     \begin{itemize}
 578 |     \item $\boldx^\T \boldb^*$ is the orthogonal projection of $f^*(\boldx)$ 
 579 |             onto $S = \Span\{\boldx\}$, the linear subspace of $L_2$ spanned
 580 |             by all linear combinations of the form $\bolda^\T \boldx$
 581 |     \item  as such, $f^*(\boldx) - \boldx^\T \boldb^*$ is
 582 |             orthogonal to every element of the target subspace $\Span\{\boldx\}$, 
 583 |             including $\boldx^\T ( \boldb^* - \boldb)$
 584 |     \end{itemize} 
 585 |         
 586 |             
 587 | \end{frame}
 588 | 
 589 | \begin{frame}
 590 | 
 591 |     \vspace{2em}
 592 |     \Prf[Proof of theorem~\ref{ET-t:dclee}](cont.)
 593 |     
 594 |     For any orthogonal elements $u$ and $v$ of
 595 |     $L_2$ we have 
 596 |     
 597 |     $$\EE[(u + v)^2] = \EE[u^2] + \EE[v^2]$$
 598 |     (This is the
 599 |     Pythagorean law in $L_2$.) 
 600 |     
 601 |     \vspace{.7em}
 602 |     Squaring both sides of 
 603 |     \eqref{eq:odbb2}, taking expectations and applying this law 
 604 |     gives \eqref{eq:odbb}
 605 |     \qedsymbol
 606 |   
 607 | \end{frame}
 608 | 
 609 | 
 610 | \begin{frame}
 611 |         
 612 |     \vspace{2em}
 613 |     \Prf[Proof of theorem~\ref{ET-t:cwa}]
 614 |     
 615 |     First we express $\hboldbeta_N$
 616 |     in a slightly different way
 617 |     
 618 |     Multiplying and dividing by $N$ in the
 619 |     definition of $\hboldbeta_N$ and then
 620 |     expanding out the matrix products 
 621 |     (see ex.~\ref{ET-ex:veseeo}) gives
 622 |     %
 623 |     \begin{multline}
 624 |         \label{eq:seeo0}
 625 |         \hboldbeta_N 
 626 |         = \left[ \frac{1}{N} \, \boldX^\T\boldX \right]^{-1} 
 627 |                 \cdot \; \frac{1}{N} \, \boldX^\T \boldy
 628 |         \\ = \left[ \frac{1}{N} \sum_{n=1}^N \boldx_n \boldx_n^\T \right]^{-1} 
 629 |             \cdot \; \frac{1}{N} \sum_{n=1}^N \boldx_n y_n
 630 |     \end{multline}
 631 |     
 632 | \end{frame}
 633 | 
 634 | \begin{frame}
 635 |         
 636 |     \vspace{2em}
 637 |     \Prf[Proof of theorem~\ref{ET-t:cwa}](cont.)
 638 |     
 639 |     By the matrix LLN in fact~\ref{ET-fa:llnmat}, we have
 640 |     %
 641 |     \begin{equation*}
 642 |          \frac{1}{N} \sum_{n=1}^N \boldx_n \boldx_n^\T 
 643 |              \toprob \Sigmax
 644 |          \quad \text{and} \quad
 645 |         \frac{1}{N} \sum_{n=1}^N \boldx_n y_n
 646 |              \toprob \EE[\boldx y]
 647 |          \quad \text{as} \quad
 648 |          N \to \infty
 649 |     \end{equation*}
 650 |     %
 651 |     By fact~\ref{ET-fa:cmtetcv1} on page~\pageref{ET-fa:cmtetcv1},
 652 |     convergence in probability is
 653 |     preserved over the taking of inverses and products
 654 |     
 655 |     Hence $\hboldbeta_N \, \toprob \, \Sigmax^{-1} \EE[\boldx \, y] = \boldb^*$, 
 656 |     as was to be shown
 657 |     \qedsymbol
 658 |     
 659 | \end{frame}
 660 | 
 661 | \begin{frame}\frametitle{In-Sample Fit}
 662 | 
 663 |     \vspace{2em}
 664 |     The difference between in-sample fit (empirical risk) and
 665 |     out-of-sample fit (risk) was discussed in \S\ref{ET-ss:ermls}
 666 |     
 667 |     In-sample fit of a regression is often measured with $R^2$ (see
 668 |     Equation \eqref{eq:rsd} above)
 669 |     
 670 |     \vspace{.7em}
 671 |     \Fact(11.1.1)
 672 |     $0 \leq R^2 \leq 1$ with $R^2 = 1$ if and only 
 673 |     if $\boldy \in \colspace \boldX$
 674 |     
 675 |     That $R^2 \leq 1$ is immediate from $\| \boldP \boldy \|
 676 |     \leq \| \boldy \|$
 677 |     
 678 |     Exercise~\ref{ET-ex:yibx} asks you to prove the second claim
 679 |     
 680 |     More generally, a
 681 |     high $R^2$ indicates $\boldy$ is relatively close to $\colspace \boldX$
 682 |     
 683 | \end{frame}
 684 | 
 685 | 
 686 | 
 687 | \begin{frame}
 688 |     
 689 |     \vspace{2em}
 690 |     We can increase $R^2$ at least weakly by 
 691 |     adding regressors
 692 |     
 693 |     \vspace{.7em}
 694 |     \Fact\eqref{ET-fa:rsqe}
 695 |          Let $\boldX_a$ and $\boldX_b$ be two design matrices.
 696 |          If $R^2_a$ and $R^2_b$ are the respective
 697 |          coefficients of determination, then
 698 |          %
 699 |          \begin{equation*}
 700 |              \colspace \boldX_a \subset \colspace \boldX_b 
 701 |              \; \implies \;
 702 |              R^2_a \leq R^2_b
 703 |          \end{equation*}
 704 |          %
 705 |     For a proof, see exercise~\ref{ET-ex:rsqe}
 706 |     
 707 | \end{frame}
 708 | 
 709 | \begin{frame}
 710 | 
 711 |     \vspace{.7em}
 712 |     Misleading to equate high $R^2$ with a successful regression
 713 |     
 714 |     Note 
 715 |     %
 716 |     \begin{equation*}
 717 |         R^2 
 718 |         = 1 - \frac{\rss}{ \tss }
 719 |         = 1 - N \frac{\Remp(\hat f) }{\tss}
 720 |     \end{equation*}
 721 |     %
 722 |     where $\Remp$ is as defined in \eqref{eq:ermm} and $\hat f$ is our linear
 723 |     predictor $\hat f(\boldx) = \boldx^\T \hboldbeta$
 724 |     
 725 |     \vspace{.7em}
 726 |     High $R^2$ means low
 727 |     empirical risk and good in-sample fit
 728 |     
 729 |     But low empirical risk no guarantee
 730 |     of low prediction risk, as emphasized in \S\ref{ET-ss:ermls}
 731 |     
 732 | \end{frame}
 733 | 
 734 | \begin{frame}
 735 |     
 736 |     \vspace{2em}
 737 |     Let's link
 738 |     fact~\ref{ET-fa:rsqe} with
 739 |     fact~\ref{ET-fa:emprd}
 740 |     \begin{itemize}
 741 |         \item fact~\ref{ET-fa:emprd} says we can always decrease empirical risk by 
 742 |                 increasing the hypothesis space
 743 |     \end{itemize}
 744 | 
 745 |     \vspace{.7em}
 746 |     Suppose  $\boldx$ lists a large number of possible regressors. Let the
 747 |     hypothesis space be
 748 |     %
 749 |     \begin{equation*}
 750 |         \hH_j := 
 751 |         \left\{ \text{all } f \colon \RR^j \to \RR \st
 752 |                 f(\boldx) = \boldx^\T \boldb \text{ for some }
 753 |             \boldb \in \RR^j
 754 |         \right\}
 755 |     \end{equation*}
 756 |     %
 757 |     where $1 \leq j \leq K$
 758 |     
 759 |     Empirical risk minimization over $\hH_j$ equivalent to linear regression
 760 |     over the first $j$ regressors
 761 |     
 762 |     Empirical risk falls as $j$ increases by
 763 |     fact~\ref{ET-fa:emprd} ---
 764 |     hence $R^2$ increases; same conclusion as fact~\ref{ET-fa:rsqe}
 765 |     
 766 | \end{frame}
 767 | 
 768 | \section{Geometry of Least Squares}
 769 | 
 770 | \begin{frame}\frametitle{Transformations and Basis Functions}
 771 | 
 772 |     \vspace{2em}
 773 |     In discussing the decision to set $\hH = \llL$, we 
 774 |     mentioned we can use many of the same ideas when extending $\hH$ to a
 775 |     broader class of functions
 776 |     
 777 |     First transform the data using
 778 |     some arbitrary function $\boldphi \colon \RR^K \to \RR^J$
 779 |     
 780 |     \vspace{.7em}
 781 |     The action of
 782 |     $\boldphi$ on $\boldx \in \RR^K$
 783 |     %
 784 |     \begin{equation*}
 785 |         \boldx \mapsto \boldphi(\boldx)
 786 |         =
 787 |         \left(
 788 |         \begin{array}{c}
 789 |             \phi_1(\boldx) \\ 
 790 |             \phi_2(\boldx) \\
 791 |             \vdots \\
 792 |             \phi_J(\boldx)
 793 |         \end{array}
 794 |         \right) \in \RR^J
 795 |     \end{equation*}
 796 |     
 797 | \end{frame}
 798 | 
 799 | \begin{frame}
 800 | 
 801 |     \vspace{2em}
 802 |     The individual functions $\phi_1, \ldots, \phi_J$ mapping
 803 |     $\RR^K$ into $\RR$ are sometimes called \navy{basis functions}.
 804 |     In machine learning texts, the range of $\boldphi$ is called \navy{feature space}
 805 |     
 806 |     \vspace{.7em}
 807 |     We solve the
 808 |     empirical risk minimization problem when 
 809 |     the hypothesis space is
 810 |     %
 811 |     \begin{multline*}
 812 |         \hH_{\boldphi} := \{ \text{all functions } \ell \circ \boldphi,  
 813 |         \\ \text{ where $\ell$  is a linear function from $\RR^J$ to $\RR$} \}
 814 |     \end{multline*}
 815 |     
 816 |     The empirical risk minimization problem is then
 817 |     %
 818 |     \begin{equation}
 819 |         \label{eq:olslqphi}
 820 |             \min_{\ell } \;
 821 |             \sum_{n=1}^N \{ y_n - \ell(\boldphi(\boldx_n)) \}^2
 822 |             =
 823 |             \min_{\boldgamma \in \RR^J} \;
 824 |             \sum_{n=1}^N (y_n - \boldgamma^\T \boldphi(\boldx_n))^2
 825 |     \end{equation}
 826 |     
 827 | \end{frame}
 828 | 
 829 | \begin{frame}
 830 | 
 831 |     \vspace{2em}
 832 |     Switching to matrix notation, if
 833 |     %
 834 |     \begin{equation}
 835 |         \label{eq:phidesign}
 836 |         \boldPhi := 
 837 |         \left(
 838 |         \begin{array}{ccc}
 839 |             \phi_1(\boldx_1) & \cdots & \phi_J(\boldx_1)   \\
 840 |             \phi_1(\boldx_2) & \cdots & \phi_J(\boldx_2)   \\
 841 |             \vdots & \cdots & \vdots   \\
 842 |             \phi_1(\boldx_N) & \cdots & \phi_J(\boldx_N)   \\
 843 |         \end{array}
 844 |         \right) \in \RR^{N \times J}
 845 |     \end{equation}
 846 |     
 847 |     Then the objective in \eqref{eq:olslqphi} 
 848 |     can be expressed as $\| \boldy - \boldPhi \boldgamma \|^2$.
 849 |     Since increasing functions don't affect minimizers, the problem becomes
 850 |     %
 851 |     \begin{equation}
 852 |         \label{eq:olsesphi}
 853 |          \argmin_{\boldgamma \in \RR^J} \| \boldy - \boldPhi \boldgamma \|
 854 |     \end{equation}
 855 |     
 856 |     Assuming that $\boldPhi$ is full column rank, the solution is
 857 |     %
 858 |     \begin{equation*}
 859 |         \hboldgamma := (\boldPhi^\T \boldPhi)^{-1} \boldPhi^\T \boldy
 860 |     \end{equation*}
 861 |     
 862 | \end{frame}
 863 | 
 864 | \begin{frame}
 865 | 
 866 |     \vspace{2em}
 867 |     \Eg
 868 |     Adding an intercept to a regression can be regarded as a transformation of the
 869 |     data.  
 870 |     
 871 |     Indeed adding an intercept is equivalent to applying the transformation
 872 |     %
 873 |     \begin{equation*}
 874 |         \boldphi(\boldx)
 875 |         =
 876 |         \left(
 877 |         \begin{array}{c}
 878 |             1 \\ 
 879 |             \boldx
 880 |         \end{array}
 881 |         \right) 
 882 |         =
 883 |         \left(
 884 |         \begin{array}{c}
 885 |             1 \\ 
 886 |             x_1 \\
 887 |             \vdots \\
 888 |             x_K
 889 |         \end{array}
 890 |         \right) 
 891 |     \end{equation*}
 892 |     
 893 |     
 894 |     In practice, adding an intercept means fitting an extra parameter, and
 895 |     this extra degree of freedom allows a more flexible fit in our regression
 896 |     
 897 | \end{frame}
 898 | 
 899 | \begin{frame}
 900 | 
 901 |     \vspace{2em}
 902 |     \Eg
 903 |     Let $K = 1$, so that $x_n \in \RR$.  Consider the mononomial
 904 |     basis functions $\phi_j(x) := x^{j-1}$, so that
 905 |     %
 906 |     \begin{equation}
 907 |         \label{eq:polymod}
 908 |         \boldgamma^\T \boldphi(x_n)
 909 |         = \boldgamma^\T 
 910 |         \left(
 911 |         \begin{array}{c}
 912 |             x_n^0   \\
 913 |             x_n^1   \\
 914 |             \vdots  \\
 915 |             x_n^{J-1}  
 916 |         \end{array}
 917 |         \right)
 918 |         = \sum_{j = 1}^J \gamma_j x_n^{j-1}
 919 |     \end{equation}
 920 |     %
 921 |     The mononomial basis transformation applied to scalar $x$ corresponds to
 922 |     univariate polynomial regression, as discussed in \S\ref{ET-ss:ermls} of ET
 923 |     
 924 |     \vspace{.7em}
 925 |     Under
 926 |     this transformation, the matrix $\boldPhi$ in \eqref{eq:phidesign} is
 927 |     called the \navy{Vandermonde matrix}
 928 |     
 929 |     Weierstrass approximation
 930 |     theorem: polynomials of sufficiently high order can effectively
 931 |     approximate any one-dimensional continuous nonlinear relationship
 932 |     
 933 | \end{frame}
 934 | 
 935 | \begin{frame}
 936 | 
 937 |     \vspace{2em}
 938 |     \Eg
 939 |     A common alternative is to use orthogonal polynomials such as Chebychev
 940 |     polynomials or Hermite polynomials
 941 |     
 942 |     Other
 943 |     alternatives include wavelets and splines
 944 |     
 945 |     \vspace{.7em}
 946 |     In econometrics this procedure
 947 |     is often referred to as nonparametric series regression
 948 |     
 949 |     A key topic is the optimal number of basis functions
 950 |     
 951 | \end{frame}
 952 | 
 953 | \begin{frame}
 954 | 
 955 |     \vspace{2em}
 956 |     In this figure, clear no linear function mapping $x$ to $y$ 
 957 |     can produce small approximation error
 958 |      
 959 |     \begin{figure}
 960 |     \centering
 961 |     \scalebox{.4}{\includegraphics[trim={2em 2em 2em 2em}, clip]{linear_after_proj.pdf}}
 962 |     \caption{ Nonlinear relationship between $x$ and $y$}
 963 |     \end{figure}
 964 |     
 965 | \end{frame}
 966 | 
 967 | \begin{frame}
 968 | 
 969 |     \vspace{2em}
 970 |     Figure on following slide shows data after applying
 971 |     the transformation $\RR \ni x \mapsto \boldphi(x) := (x, x^3)^\T \in \RR^2$
 972 |     
 973 |     The
 974 |     plane drawn in the figure represents a linear function $\ell \colon
 975 |     \RR^2 \to \RR$
 976 |     
 977 |     \vspace{.7em}
 978 |     The composition $\ell \circ \boldphi$ has low approximation
 979 |     error
 980 |     
 981 |     The two figures illustrate how nonlinear data can become linear when
 982 |     projected into higher dimensions
 983 |     
 984 | \end{frame}
 985 | 
 986 | \begin{frame}
 987 | 
 988 |     \begin{figure}
 989 |     \centering
 990 |     \scalebox{.54}{\includegraphics[trim={3em 2em 2em 5em}, clip]{linear_after_proj3d.pdf}}
 991 |     \caption{\label{f:lap3d} Approximate linearity after projecting the data to $\RR^2$}
 992 |     
 993 | \end{figure}
 994 | 
 995 | \end{frame}
 996 | 
 997 | \section{FWL Theorem}
 998 | 
 999 | \begin{frame}\frametitle{The Frisch--Waugh--Lovell Theorem}
1000 | 
1001 |     \vspace{2em}
1002 |     The Frisch--Waugh--Lovell (FWL) theorem yields an expression for an arbitrary
1003 |     sub-vector of the least squares estimator $\hboldbeta$ obtained by regressing
1004 |     $\boldy$ on $\boldX$
1005 |     
1006 |     \vspace{.7em}
1007 |     Continue with assumptions made already in the lecture
1008 |     
1009 |     Let $\boldy$ and $\boldX$ be given and let $\hboldbeta$ be the least squares estimator,
1010 |     as given by equation \eqref{eq:thelsqe} above
1011 |     
1012 | \end{frame}
1013 | 
1014 | \begin{frame}
1015 | 
1016 |     \vspace{2em}
1017 |     In addition, let $K_1$ be an integer with $1 \leq
1018 |     K_1 < K$, and let
1019 |     %
1020 |     \begin{itemize}
1021 |         \item $\boldX_1$ be a matrix consisting of the first $K_1$ columns of
1022 |             $\boldX$,
1023 |         \item $\boldX_2$ be a matrix consisting of the remaining $K_2 := K - K_1$
1024 |             columns,
1025 |         \item $\hboldbeta_1$ be the $K_1 \times 1$ vector consisting of the first
1026 |             $K_1$ elements of $\hboldbeta$.
1027 |         \item $\hboldbeta_2$ be the $K_2 \times 1$ vector consisting of the
1028 |             remaining $K_2$ elements of $\hboldbeta$,
1029 |         \item $\boldP_1 := \proj ( \colspace \boldX_1)$, and
1030 |         \item $\boldM_1 := \boldI - \boldP_1 =$ the corresponding residual
1031 |             projection
1032 |     \end{itemize}
1033 |     
1034 | \end{frame}
1035 | 
1036 | \begin{frame}
1037 | 
1038 |     \vspace{2em}
1039 |     \Thm
1040 |     [FWL theorem]
1041 |     \eqref{ET-t:fwl}
1042 |     The vector $\hboldbeta_2$ satisfies
1043 |     %
1044 |     \begin{equation*}
1045 |         \label{eq:fwle}
1046 |         \hboldbeta_2 
1047 |         = (\boldX_2^\T \boldM_1 \boldX_2)^{-1} \boldX_2^\T \boldM_1 \boldy
1048 |     \end{equation*}
1049 |     %
1050 |     For a proof, see ET page \pageref{ET-t:fwl}
1051 |     
1052 | \end{frame}
1053 | 
1054 | \begin{frame}
1055 | 
1056 |     \vspace{2em}
1057 |     The expression for $\hboldbeta_2$ in theorem~\ref{ET-t:fwl} can be rewritten as
1058 |     %
1059 |     \begin{equation}
1060 |         \label{eq:sxb2}
1061 |         \hboldbeta_2 
1062 |             = [(\boldM_1\boldX_2)^\T \boldM_1 \boldX_2]^{-1} (\boldM_1 \boldX_2)^\T \boldM_1 \boldy
1063 |     \end{equation}
1064 |     
1065 |     (see  exercise~\ref{ET-ex:cnxfwl})
1066 |     
1067 |     \vspace{.7em}
1068 |     The above formula gives us the following claim: there is
1069 |     another way to obtain $\hboldbeta_2$ besides just regressing $\boldy$ on
1070 |     $\boldX$ and then extracting the last $K_2$ elements
1071 |     
1072 |     We can also regress
1073 |     $\boldM_1 \boldy$ on $\boldM_1 \boldX_2$ to produce the same result
1074 | 
1075 | \end{frame}
1076 | 
1077 | \begin{frame}
1078 | 
1079 |     \vspace{2em}
1080 |     For intuition: consider the case where $\boldX_2$ is the single column 
1081 |     $\col_K \boldX$, containing the
1082 |     observations on the $K$th regressor
1083 |     
1084 |     Write $\boldX_1$ as $\boldX_{-K}$ to remind us
1085 |     that it stands for all columns of $\boldX$ expect the $K$th one, and
1086 |     similarly for $\boldM_1$
1087 |     
1088 |     \vspace{.7em}
1089 |     The least squares estimate $\hat \beta_K$ can be found by regressing 
1090 |     %
1091 |     \begin{equation*}
1092 |         \label{eq:resregy}
1093 |         \tilde \boldy 
1094 |         := \boldM_{-K} \boldy = \text{residuals of regressing $\boldy$ on
1095 |         $\boldX_{-K}$}   
1096 |     \end{equation*}
1097 |     %
1098 |     on
1099 |     %
1100 |     \begin{equation*}
1101 |         \label{eq:resregx}
1102 |         \tilde \boldx_K 
1103 |         := \boldM_{-K} \col_K \boldX 
1104 |             = \text{residuals of regressing
1105 |                 $\col_K \boldX$ on $\boldX_{-K}$}   
1106 |     \end{equation*}
1107 |     
1108 | 
1109 | \end{frame}
1110 | 
1111 | \begin{frame}
1112 | 
1113 |     \vspace{2em}
1114 |     The two residual terms $\tilde \boldy$ and $\tilde
1115 |     \boldx_K$ can be thought of as the parts of $\boldy$ and
1116 |     $\col_K \boldX$ that are ``not explained by'' $\boldX_{-K}$
1117 |     
1118 |     Intuitively, the process for obtaining the least squares estimate $\hat
1119 |     \beta_K$ is:
1120 |     %
1121 |     \begin{enumerate}
1122 |         \item remove effects of all other regressors from $\boldy$ and
1123 |             $\col_K \boldX$, producing $\tilde \boldy$ and $\tilde
1124 |             \boldx_K$
1125 |         \item regress $\tilde \boldy$ on $\tilde \boldx_K$
1126 |     \end{enumerate}
1127 |     
1128 |     \vspace{.7em}
1129 |     Different from the process for obtaining the coefficient of 
1130 |     the vector $\col_K \boldX$ in a simple univariate regression:
1131 |     %
1132 |     \begin{enumerate}
1133 |         \item regress $\boldy$ on $\col_K \boldX$
1134 |     \end{enumerate}
1135 |     
1136 | \end{frame}
1137 | 
1138 | \begin{frame}
1139 | 
1140 |     \vspace{2em}
1141 |     Difference between the univariate least squares estimated coefficient of 
1142 |     the $K$th regressor and the multiple regression least squares coefficient:
1143 |     \begin{itemize}
1144 |         \item the
1145 |     multiple regression coefficient $\hat \beta_K$ measures the \emph{isolated
1146 |     relationship} between $x_K$ and $y$
1147 |         \item does not take into account indirect
1148 |     channels involving other variables
1149 |     \end{itemize}
1150 |     %
1151 | \end{frame}
1152 | 
1153 | \begin{frame}
1154 | 
1155 |     \vspace{2em}
1156 |     We can illustrate further with simulation.  Suppose
1157 |     %
1158 |     \begin{equation*}
1159 |         y = x_1 + x_2 + u  
1160 |         \quad \text{where} \quad
1161 |         u \iidsim \nN(0,1)
1162 |     \end{equation*}
1163 |     %
1164 |     Generate $N$ independent observations from this model 
1165 |     
1166 |     \vspace{.7em}
1167 |     Regress $y$
1168 |     on the observations of $(x_1, x_2)$
1169 |     \begin{itemize}
1170 |         \item coefficients for $x_1$ and $x_2$ will both be close to
1171 |     unity, provided $N$ is sufficiently large
1172 |     \end{itemize}
1173 |     
1174 |     Regress $y$ on $x_1$ alone
1175 |     \begin{itemize}
1176 |         \item coefficient for $x_1$ will depend on the relationship between $x_1$ and $x_2$
1177 |     \end{itemize}
1178 |  
1179 | \end{frame}
1180 | 
1181 | \begin{frame}
1182 |     \vspace{2em}
1183 |      For example:
1184 |         %
1185 |         \inputminted{r}{figs_code/small_sim.R}
1186 |         
1187 |         Here the coefficient for $x_1$ is much larger than unity
1188 |         \begin{itemize}
1189 |             \item an increase in $x_1$ tends to have a large positive effect on $x_2$,
1190 |         which in turn increases $y$
1191 |         \end{itemize}
1192 |         
1193 | \end{frame}
1194 | 
1195 | \begin{frame}\frametitle{Simple Regression}
1196 | 
1197 |     \vspace{2em}
1198 |     Application of FWL Theorem: derive expression for the slope coefficient 
1199 |     in simple linear regression from the multivariate expression
1200 |     
1201 |     \vspace{.7em}
1202 |     Simple linear regression as special case of multivariate regression
1203 |     \begin{itemize}
1204 |         \item $\boldone$ is the first column of $\boldX$ and $K = 2$
1205 |     \end{itemize}
1206 |     
1207 |     The second column of $\boldX$ will be denoted by $\boldx$
1208 |     
1209 |     The least squares estimates are
1210 |     %
1211 |     \begin{equation*}
1212 |         \hat \beta_2 = \frac{\sum_{n=1}^N (x_n - \bar x)(y_n - \bar y)}
1213 |                         {\sum_{n=1}^N (x_n - \bar x)^2}
1214 |         \quad \text{and} \quad
1215 |         \hat \beta_1 = \bar y - \hat \beta_2 \bar x
1216 |     \end{equation*}
1217 |     
1218 |     where $\bar x$ is the sample mean of $\boldx$ and $\bar y$ is 
1219 |     the sample mean of $\boldy$
1220 |     
1221 | \end{frame}
1222 | 
1223 | \begin{frame}
1224 | 
1225 |     \vspace{2em}
1226 |     We can rewrite the slope coefficient $\hat \beta_2$
1227 |     %
1228 |     \begin{equation}
1229 |     \label{eq:sre1}
1230 |     \hat \beta_2 = 
1231 |     [(\boldx - \bar x \boldone)^\T(\boldx - \bar x \boldone)]^{-1}
1232 |     (\boldx - \bar x \boldone)^\T(\boldy - \bar y \boldone)
1233 |     \end{equation}
1234 |     %
1235 |     By the FWL theorem (equation~\ref{eq:sxb2})
1236 |     %
1237 |     \begin{equation}
1238 |         \label{eq:sre2}
1239 |         \hat \beta_2 
1240 |             = [(\boldM_c\boldx)^\T \boldM_c \boldx]^{-1} (\boldM_c \boldx)^\T \boldM_c \boldy
1241 |     \end{equation}
1242 | 
1243 |     here $\boldM_c$ is the residual projection associated with the linear
1244 |     subspace $S = \Span\{\boldone\}$
1245 |     
1246 |     \vspace{.7em}
1247 |     For this residual projection $\boldM_c$ and any $\boldz$,
1248 |     we have $\boldM_c \boldz = \boldz - \bar z \boldone$ --- RHS of
1249 |     (\ref{eq:sre1}) and (\ref{eq:sre2}) coincide
1250 |     
1251 | \end{frame}
1252 | 
1253 | \begin{frame}
1254 | 
1255 |     \vspace{2em}
1256 |     Generalize to the
1257 |     case where there are multiple nonconstant regressors
1258 |     
1259 |     Instead of one column $\boldx$ of observations on a
1260 |     single nonconstant regressor, we have a matrix $\boldX_2$ containing multiple
1261 |     columns, each a vector of observations on a nonconstant regressor
1262 |     
1263 |     \vspace{.7em}
1264 |     If the least squares estimate $\hboldbeta$ is partitioned into $(\hat \beta_1,
1265 |     \hboldbeta_2)$, then
1266 |     %
1267 |     \begin{equation*}
1268 |         \boldX \hboldbeta = \boldone \beta_1 + \boldX_2 \hboldbeta_2
1269 |     \end{equation*}
1270 |     
1271 |     
1272 |     Applying the FWL theorem, we can write
1273 |     $\hboldbeta_2$ as
1274 |     %
1275 |     \begin{equation*}
1276 |         \hboldbeta_2 
1277 |             = [(\boldM_c\boldX_2)^\T \boldM_c \boldX_2]^{-1} (\boldM_c \boldX_2)^\T \boldM_c \boldy
1278 |     \end{equation*}
1279 |     %
1280 |     where $\boldM_c$ is the residual projection  (Equation \eqref{ET-eq:pczm} in ET)
1281 |     
1282 | \end{frame}
1283 | 
1284 | \begin{frame}
1285 | 
1286 |     \vspace{2em}
1287 |     $\boldM_c \boldy$ is $\boldy$ centered around its mean
1288 |     
1289 |     $\boldM_c\boldX_2$ is a matrix formed by taking each column of
1290 |     $\boldX_2$ and centering it around its 
1291 |     
1292 |     \vspace{1em}
1293 |     .....in a least squares regression with an intercept, the
1294 |     estimated coefficients of the nonconstant regressors
1295 |     are equal to the estimated coefficients of a zero-intercept regression
1296 |     performed after all variables have been centered around their mean
1297 | 
1298 | \end{frame}
1299 | 
1300 | \begin{frame}\frametitle{Centered $R^{2}$}
1301 | 
1302 |     \vspace{2em}
1303 |     Several versions of
1304 |     $R^2$ reported in common regression packages
1305 |     
1306 |     One of these is so called
1307 |     centered $R^2$
1308 |     
1309 |     \vspace{.7em}
1310 |     The version we discussed so far will now be called
1311 |     uncentered $R^2$
1312 |     
1313 |     Why introduce alternative to uncentered $R^2$? 
1314 |     \begin{itemize}
1315 |         \item fails to be invariant to certain changes of units that involve
1316 |                 addition or subtraction whenever
1317 |                 $\boldX$ contains an intercept
1318 |         \item for e.g. actual inflation versus inflation in excess of a
1319 |                 certain level, income versus income over a certain threshold, etc.
1320 |     \end{itemize}
1321 | 
1322 | 
1323 | \end{frame}
1324 | 
1325 | \begin{frame}
1326 | 
1327 |     \vspace{2em}
1328 |     Define centered
1329 |     $R^2$ 
1330 |     %
1331 |     \begin{equation*}
1332 |         \label{eq:crs}
1333 |          R_c^2 
1334 |          := \frac{\| \boldP \boldM_c \boldy \|^2}{\| \boldM_c \boldy \|^2}
1335 |          = \frac{\|  \boldM_c \boldP \boldy \|^2}{\| \boldM_c \boldy \|^2}
1336 |     \end{equation*}
1337 |     %
1338 |     (See ex.~\ref{ET-ex:crs} to prove equality)
1339 |     
1340 |     \vspace{.7em}
1341 |     Adding a constant to each element
1342 |     of $\boldy$ will have no effect on $R^2_c$ because $\boldM_c$ maps constant
1343 |     vectors to $\boldzero$ (see example~\ref{ET-eg:pvonepm})
1344 |     
1345 | \end{frame}
1346 | 
1347 | \begin{frame}
1348 | 
1349 |     Rewrite $R^2$ (ex.~\ref{ET-ex:cecrs}) as
1350 |     %
1351 |     \begin{equation*}
1352 |         \label{eq:cecrs}
1353 |         R_c^2 = \frac{ \sum_{n=1}^N (\hat y_n - \bar y )^2 }
1354 |                 { \sum_{n=1}^N (y_n - \bar y )^2 }
1355 |     \end{equation*}
1356 |     
1357 |     \vspace{.7em}
1358 |     In the case simple regression, $R_c^2$ is a measure of correlation
1359 |     
1360 |     \begin{itemize}
1361 |         \item $R_c^2$ is equal to the square of the
1362 |     sample correlation between the regressor and regressand, as defined by
1363 |     Equation \ref{ET-eq:sampcorr} (shown in ex.~\eqref{ET-ex:crsicc})
1364 |     \end{itemize}
1365 |     
1366 | \end{frame}
1367 | 
1368 | 
1369 | 
1370 | \end{document}
1371 | 


--------------------------------------------------------------------------------
/source_code/Lecture_12.tex:
--------------------------------------------------------------------------------
   1 | 
   2 | \input{preamb.tex}
   3 | 
   4 | 
   5 | 
   6 | 
   7 | \title{A Primer in Econometric Theory}
   8 | 
   9 | \subtitle
  10 | {Lecture 12: Large Samples and Dependence}
  11 | 
  12 | \author{John Stachurski \\ \tiny Lectures by Akshay Shanker}
  13 | 
  14 | 
  15 | 
  16 | \begin{document}
  17 | 
  18 | \begin{frame}
  19 |   \titlepage
  20 | \end{frame}
  21 | 
  22 | \section{Large Sample Least Squares}
  23 | 
  24 | \begin{frame}\frametitle{Large Sample Least Squares}
  25 | 
  26 |     \vspace{2em}
  27 |     Large samples allow us to drop parametric assumptions on the error term we made for finite sample inference 
  28 |     
  29 |     \vspace{.7em}
  30 |     Theory developed below also
  31 |     useful for cross-sectional environments with no correlation between
  32 |     observations
  33 | 
  34 | \end{frame}
  35 | 
  36 | \begin{frame}
  37 | 
  38 |     \vspace{2em}
  39 |     Assume data $(y_1, \boldx_1), \ldots, (y_T, \boldx_T)$
  40 |     generated by the linear model
  41 |     %
  42 |     \begin{equation}
  43 |         \label{eq:lrtsc}
  44 |         y_t = \boldx_t^\T \boldbeta + u_t,
  45 |         \qquad t = 1, \ldots, T
  46 |     \end{equation}
  47 |     
  48 |     \begin{itemize}
  49 |         \item $\boldbeta$ is a $K$-vector of unknown coefficients, and $u_t$ is an
  50 |     unobservable shock
  51 |         \item observations indexed by $t$ rather than $n$ to remind
  52 |     us that observations are dependent
  53 |         \item sample size will be denoted by $T$
  54 |     \end{itemize}
  55 |     
  56 | \end{frame}
  57 | 
  58 | \begin{frame}
  59 | 
  60 |     \vspace{2em}
  61 |     Let:
  62 |     \begin{itemize}
  63 |         \item $\boldy$ be the $T \times 1$ vector of observed outputs
  64 |         \item $y_t$ is the $t$th element of $\boldy$
  65 |         \item $\boldu$ is the vector of shocks
  66 |         \item $u_t$ is the $t$th element of
  67 |                 $\boldu$
  68 |     \end{itemize}
  69 | 
  70 |     \vspace{.7em}
  71 |     Let $\boldX$ be the $T \times K$ matrix
  72 |     $\boldX := (x_{tk})$, where $1 \leq t \leq T$ and $1 \leq k \leq K$
  73 |     
  74 |     \vspace{.7em}
  75 |     Estimate the parameter vector $\boldbeta$ via least squares
  76 | 
  77 | \end{frame}
  78 | 
  79 | \begin{frame}
  80 | 
  81 |     \vspace{2em}
  82 |     The OLS estimate:
  83 |     %
  84 |     \begin{equation*}
  85 |         \hboldbeta_T 
  86 |         = \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} 
  87 |                 \cdot \; \frac{1}{T} \sum_{t=1}^T \boldx_t y_t
  88 |     \end{equation*}
  89 |     
  90 |     \vspace{.7em}
  91 |     Expression for the sampling error in \eqref{ET-eq:hg} can be
  92 |     expanded into sums to obtain
  93 |     %
  94 |     \begin{equation}
  95 |         \label{eq:seeo}
  96 |         \hboldbeta_T - \boldbeta 
  97 |         = \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} 
  98 |             \cdot \; \frac{1}{T} \sum_{t=1}^T \boldx_t u_t
  99 |     \end{equation}
 100 |     
 101 | \end{frame}
 102 | 
 103 | \begin{frame}
 104 | 
 105 |     \vspace{2em}
 106 |     Drop the exogeneity assumption
 107 |     $\EE[\boldu \given \boldX] = \boldzero$
 108 |     
 109 |     \vspace{.7em}
 110 |     For example,
 111 |     exogeneity fails when we  estimate AR(1) model $y_{t+1} =
 112 |     \beta y_t + u_{t+1}$ 
 113 |     
 114 |     \vspace{.7em}
 115 |     Setting $x_t = y_{t-1}$ produces the
 116 |     regression model
 117 |     %
 118 |     \begin{equation*}
 119 |         \label{eq:ar1reg}
 120 |         y_t = \beta x_t + u_t,
 121 |         \qquad t=1,\ldots,T
 122 |     \end{equation*}
 123 |     
 124 |     \vspace{.7em}
 125 |     Regressor correlated with lagged values of the shock
 126 |     
 127 | \end{frame}
 128 | 
 129 | \begin{frame}
 130 | 
 131 |     \vspace{2em}
 132 |     \Ass\eqref{ET-a:rtsc}
 133 |         The matrix $\boldX$ is full column rank with probability one and the
 134 |         sequence $\{\boldx_t\}$ is stationary.  Moreover
 135 |         %
 136 |         \begin{enumerate}
 137 |             \item $\Sigmax := \EE [ \boldx_t \boldx_t^\T ]$ exists and is
 138 |                 positive definite, and
 139 |             \item the sequence $\{\boldx_t\}$ satisfies 
 140 |                 $\frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \toprob \Sigmax$
 141 |                 as $T \to \infty$.
 142 |         \end{enumerate}
 143 |         
 144 | \end{frame}
 145 | 
 146 | \begin{frame}
 147 | 
 148 |     \vspace{2em}
 149 |     \Eg
 150 |     Let $\{x_t\}$ be the Markov process 
 151 |     in example~\ref{ET-eg:ztar}
 152 |     
 153 |     To repeat
 154 |     %
 155 |     \begin{align*}
 156 |         x_{t+1} = a |x_t| + (1 - a^2)^{1/2} w_{t+1}  
 157 |         \quad \\ \text{with} \quad
 158 |         -1 < a < 1
 159 |         \quad \text{and} \quad
 160 |         \{w_t\} \iidsim \nN(0, 1)
 161 |     \end{align*}
 162 |     %
 163 |     The model has a unique, globally
 164 |     stable stationary distribution $\pi_\infty$
 165 |     
 166 |     \vspace{.7em}
 167 |     If $\lL(x_0) = \pi_\infty$,
 168 |     then the process $\{x_t\}$ is stationary and all of the conditions in
 169 |     assumption~\ref{ET-a:rtsc} are satisfied (see ex.~\ref{ET-ex:chrtsc})
 170 |     
 171 | \end{frame}
 172 | 
 173 | \begin{frame}
 174 | 
 175 |     \vspace{2em}
 176 |     \Ass
 177 |     \eqref{ET-a:stsc}[Weak exogeneity]
 178 |     
 179 |     The shocks $\{u_t\}$ are {\sc iid}
 180 |     
 181 |     \vspace{.7em}
 182 |     Moreover
 183 |     %
 184 |     \begin{enumerate}
 185 |         \item $\EE[u_t] = 0$ and $\EE[u_t^2] = \sigma^2$ for all $t$, and
 186 |         \item $u_t$ is independent of $\boldx_1, \boldx_2,\ldots,\boldx_t$ for
 187 |             all $t$
 188 |     \end{enumerate}
 189 |     
 190 | \end{frame}
 191 | 
 192 | \begin{frame}
 193 | 
 194 |     \vspace{2em}
 195 |     \Eg
 196 |     \eqref{ET-eg:ar1lsols}
 197 |     In the AR(1) regression (\ref{eq:ar1reg}), assumption~\ref{ET-a:stsc}
 198 |     holds if shocks $\{u_t\}$ are {\sc iid}
 199 |     \begin{itemize}
 200 |         \item
 201 |     contemporaneous and lagged regressors $x_1, \ldots, x_t$ are equal to the
 202 |     lagged state variables $y_0, \ldots, y_{t-1}$
 203 |         \item $y_0, \ldots, y_{t-1}$ are functions
 204 |     of only $y_0$ and $u_1,\ldots,u_{t-1}$, and therefore independent of $u_t$
 205 |     \end{itemize}
 206 |     
 207 | \end{frame}
 208 | 
 209 | 
 210 | \begin{frame}
 211 |     
 212 |     \vspace{2em}
 213 |     A consequence of assumption~\ref{ET-a:stsc} 
 214 |     %
 215 |     \begin{equation*}
 216 |         \label{eq:cstsc}
 217 |         \EE[ u_s u_t \given \boldx_1, \ldots,\boldx_t]  
 218 |         = 
 219 |         \begin{cases}
 220 |             & \sigma^2 \quad \text{if} \quad s = t \\ 
 221 |             & 0 \quad \;\; \text{if} \quad s < t 
 222 |         \end{cases}
 223 |     \end{equation*}
 224 |     
 225 |     \vspace{.7em}
 226 |     The proof is an exercise (ex.~\ref{ET-ex:cstsc})
 227 |     
 228 | \end{frame}
 229 | 
 230 | \begin{frame}
 231 | 
 232 |     \vspace{2em}
 233 |     Implication of assumptions~\ref{ET-a:rtsc} and \ref{ET-a:stsc}:
 234 |     linear functions of $\{\boldx_t u_t\}$ form a martingale difference sequence
 235 |     ({\sc mds})
 236 |     
 237 |     \vspace{.7em}
 238 |     \Lem
 239 |         \eqref{ET-l:xumds}
 240 |         if assumptions~\ref{ET-a:rtsc} and \ref{ET-a:stsc} both hold, then, for any
 241 |         constant vector $\bolda \in \RR^K$, the sequence $\{m_t\}$ defined by
 242 |         $m_t = \bolda^\T \boldx_t u_t$ is 
 243 |         %
 244 |         \begin{enumerate}
 245 |             \item stationary with $\EE[m_t^2] = \sigma^2  \bolda^\T
 246 |                 \Sigmax \bolda$ for all $t$, and
 247 |             \item an {\sc mds} with respect to the filtration
 248 |                 defined by
 249 |                 %
 250 |                 \begin{equation*}
 251 |                     \label{eq:deffil}
 252 |                     \fF_t := \{\boldx_1,\ldots,\boldx_t, \boldx_{t+1}, u_1, \ldots, u_t\}
 253 |                 \end{equation*}
 254 |                 %
 255 |         \end{enumerate}
 256 |         
 257 | \end{frame}
 258 | 
 259 | \begin{frame}
 260 |     
 261 |     \vspace{2em}
 262 |     \Prf 
 263 |     
 264 |     First let's check part 1.
 265 |     
 266 |     That $\{m_t\}$ is stationary
 267 |     follows from the assumption that $\{u_t\}$ and $\{\boldx_t\}$ are
 268 |     stationary
 269 |     
 270 |     \vspace{.7em}
 271 |     Regarding the second moment $\EE[m_1^2]$, we
 272 |     have 
 273 |     %
 274 |     \begin{equation*}
 275 |         \EE[m_1^2] 
 276 |         = \EE [ \EE[ u_1^2 (\bolda^\T \boldx_1)^2 \given \boldx_1]]
 277 |         = \EE [ (\bolda^\T \boldx_1)^2 \EE[ u_1^2 \given \boldx_1]]
 278 |     \end{equation*}
 279 |     %
 280 |     From independence of $u_1$ and $\boldx_1$, the inner expectation is
 281 |     $\sigma^2$
 282 |     
 283 |     Moreover
 284 |     %
 285 |     \begin{equation*}
 286 |         (\bolda^\T \boldx_1)^2 = \bolda^\T \boldx_1 \bolda^\T \boldx_1 
 287 |             =  \bolda^\T \boldx_1 \boldx_1^\T \bolda
 288 |     \end{equation*}
 289 |     %
 290 |     \begin{equation*}
 291 |         \fore
 292 |         \EE[m_1^2] 
 293 |         = \EE [ \bolda^\T \boldx_1 \boldx_1^\T  \bolda \; \sigma^2 ]
 294 |         = \sigma^2  \bolda^\T \EE [ \boldx_1 \boldx_1^\T ] \bolda 
 295 |         = \sigma^2  \bolda^\T \Sigmax \bolda 
 296 |     \end{equation*}
 297 |     
 298 | \end{frame}
 299 | 
 300 | \begin{frame}
 301 |     
 302 |     \vspace{2em}
 303 |     To check part 2., note $\{m_t\}$ is adapted to $\{\fF_t\}$, since 
 304 |     $m_t := u_t \bolda^\T \boldx_t$ is a function of variables in $\fF_t$
 305 |     
 306 |     Moreover we have
 307 |     %
 308 |     \begin{align*}
 309 |         \EE[ m_{t+1} \given \fF_t ]
 310 |         = \EE[ u_{t+1} \bolda^\T \boldx_{t+1} \given \fF_t ]
 311 |         = \bolda^\T \boldx_{t+1} \EE[ u_{t+1}  \given \fF_t ]
 312 |         \\ = \bolda^\T \boldx_{t+1} \EE[ u_{t+1} ]
 313 |         = 0
 314 |     \end{align*}
 315 |     
 316 |     This confirms $\{m_t\}$ is an {\sc mds}
 317 |     with respect to $\{\fF_t\}$
 318 |     
 319 | \end{frame}
 320 | 
 321 | \begin{frame}\frametitle{Consistency}
 322 | 
 323 |     \vspace{2em}
 324 |     Under the conditions of \S\ref{ET-ss:sua}, the OLS estimator
 325 |     $\hboldbeta_T$ is consistent for $\boldbeta$:
 326 |     
 327 |     \vspace{.7em}
 328 |     \Thm
 329 |         \eqref{ET-t:cofols}
 330 |         If assumptions~\ref{ET-a:rtsc} and \ref{ET-a:stsc} hold, then 
 331 |         %
 332 |         \begin{equation*}
 333 |             \hboldbeta_T \toprob \boldbeta \quad \text{as} \quad T \to \infty
 334 |         \end{equation*}
 335 |         
 336 |     
 337 | \end{frame}
 338 | 
 339 | \begin{frame}
 340 | 
 341 |     \vspace{2em}
 342 |     \Prf
 343 |     Recall equation \eqref{ET-eq:seeo}:
 344 |     %
 345 |     \begin{equation*}
 346 |         \hboldbeta_T - \boldbeta 
 347 |         = \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} 
 348 |             \cdot \; \frac{1}{T} \sum_{t=1}^T \boldx_t u_t
 349 |     \end{equation*}
 350 |     %
 351 |     We show the expression on the right-hand converges in probability to $\boldzero$
 352 |     
 353 |     First,
 354 |     let's show
 355 |         $\frac{1}{T} \sum_{t=1}^T \boldx_t u_t \toprob \boldzero$. In view of fact~\ref{ET-fa:reconpro}, it
 356 |     suffices to show that, for any $\bolda \in \RR^K$,
 357 |     %
 358 |     \begin{equation}
 359 |         \label{eq:amw}
 360 |         \bolda^\T \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t u_t \right]
 361 |         \toprob \bolda^\T \boldzero = 0
 362 |     \end{equation}
 363 |     %
 364 |     Define $m_t := \bolda^\T \boldx_t u_t$. The left-hand side of
 365 |     (\ref{eq:amw}) can be written as $T^{-1} \sum_{t=1}^T m_t$
 366 |     
 367 | \end{frame}
 368 | 
 369 | 
 370 | \begin{frame}
 371 | 
 372 |     \vspace{2em}
 373 |     \Prf (cont.) Since
 374 |     $\{m_t\}$ is a stationary {\sc mds} (lemma~\ref{ET-l:xumds}), the convergence $T^{-1} \sum_{t=1}^T m_t
 375 |     \toprob 0$ follows from Theorem~\ref{ET-t:mdclt}
 376 |     
 377 |     Return to the expression on the right-hand side of
 378 |     (\ref{ET-eq:seeo})
 379 |     
 380 |     By assumption~\ref{ET-a:rtsc} and fact~\ref{ET-fa:cmtetcv1}, we see that
 381 |     %
 382 |     \begin{equation}
 383 |         \label{eq:cttii}
 384 |         \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} \toprob \Sigmax^{-1}
 385 |         \quad \text{as} \quad
 386 |         T \to \infty
 387 |     \end{equation}
 388 |     
 389 |     Appealing to fact~\ref{ET-fa:cmtetcv1} once more, we obtain
 390 |     %
 391 |     \begin{equation*}
 392 |         \hboldbeta_T - \boldbeta 
 393 |         = \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} 
 394 |             \cdot \; \frac{1}{T} \sum_{t=1}^T u_t \boldx_t 
 395 |             \toprob \Sigmax^{-1} \, \boldzero = \boldzero
 396 |         \qedhere
 397 |     \end{equation*}
 398 |     
 399 | \end{frame}
 400 | 
 401 | \begin{frame}
 402 | 
 403 |     \vspace{2em}
 404 |     \Thm
 405 |     \eqref{ET-t:cofhs2}
 406 |     If assumptions~\ref{ET-a:rtsc} and \ref{ET-a:stsc} hold, then 
 407 |     %
 408 |     \begin{equation*}
 409 |         \hat \sigma^2_T \toprob \sigma^2
 410 |         \quad \text{as} \quad T \to \infty
 411 |     \end{equation*}
 412 |     
 413 |     
 414 |     \Prf 
 415 |     By the definition of $\hat \sigma_T^2$ and the linear model assumption
 416 |     \ref{eq:lrtsc},
 417 |     %
 418 |     \begin{equation*}
 419 |         \label{eq:ndhs2}
 420 |         \hat \sigma_T^2 
 421 |         = \frac{1}{T} \sum_{t=1}^T (y_t - \boldx_t^\T \, \hboldbeta_T)^2
 422 |         = \frac{1}{T} \sum_{t=1}^T 
 423 |         \left[ u_t + \boldx_t^\T \, (\boldbeta - \hboldbeta_T) \right]^2
 424 |     \end{equation*}
 425 |     
 426 | \end{frame}
 427 | 
 428 | \begin{frame}
 429 | 
 430 |     \vspace{2em}
 431 |     \Prf (cont.) Expand out the square
 432 |     %
 433 |     \begin{multline*}
 434 |         \hat \sigma_T^2 
 435 |         = \frac{1}{T} \sum_{t=1}^T u_t^2
 436 |         + 2 (\boldbeta - \hboldbeta_T)^\T \frac{1}{T} \sum_{t=1}^T \boldx_t u_t 
 437 |         \\ + (\boldbeta - \hboldbeta_T)^\T 
 438 |             \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]
 439 |          (\boldbeta - \hboldbeta_T) 
 440 |     \end{multline*}
 441 |     %
 442 |     By assumption~\ref{ET-a:stsc} and the law of large numbers, the first term on
 443 |     the right-hand side converges in probability to $\sigma^2$ 
 444 | 
 445 |     \vspace{.7em}
 446 |     Show the second and third term converge in probability
 447 |     to zero as $T \to \infty$ --- exercise using convergence results we have already established (refer to fact~\ref{ET-fa:cmtetcv1})
 448 |     
 449 | \end{frame}
 450 | 
 451 | \section{Asymptotic Normality}
 452 | 
 453 | \begin{frame}\frametitle{Asymptotic Normality}
 454 |     
 455 |     \vspace{2em}
 456 |     \Thm\eqref{ET-t:cltols}
 457 |         If assumptions~\ref{ET-a:rtsc} and \ref{ET-a:stsc} hold, then
 458 |         %
 459 |         \begin{equation*}
 460 |             \sqrt{T} (\hboldbeta_T - \boldbeta) \tod 
 461 |             \nN \left(\boldzero, \sigma^2 \Sigmaxinv \right)
 462 |             \quad \text{as} \quad T \to \infty
 463 |         \end{equation*}
 464 |         %
 465 |     
 466 |     \Prf 
 467 |     Expression (\ref{eq:seeo}) gives
 468 |     %
 469 |     \begin{equation*}
 470 |         \label{eq:seeo2}
 471 |         \sqrt{T}(\hboldbeta_T - \boldbeta)
 472 |         = \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} 
 473 |         \cdot \; T^{-1/2} \sum_{t=1}^T u_t \boldx_t
 474 |     \end{equation*}
 475 |     %
 476 |     Let $\boldz$ be a random variable satisfying $\lL(\boldz) = \nN(\boldzero,
 477 |     \sigma^2 \Sigmax)$
 478 |     
 479 | \end{frame}
 480 | 
 481 | \begin{frame}
 482 | 
 483 |     \vspace{2em}
 484 |     \Prf (cont.)
 485 |     
 486 |     Suppose we can show
 487 |     %
 488 |     \begin{equation}
 489 |         \label{eq:cux}
 490 |         T^{-1/2} \sum_{t=1}^T u_t \boldx_t \tod \boldz
 491 |         \quad \text{as} \quad T \to \infty
 492 |     \end{equation}
 493 |     
 494 |     If (\ref{eq:cux}) is valid, then, applying assumption~\ref{ET-a:rtsc} along with
 495 |     fact~\ref{ET-fa:cmtetcv2}, we obtain
 496 |     %
 497 |     \begin{equation*}
 498 |         \sqrt{T}(\hboldbeta_T - \boldbeta)
 499 |         = \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} 
 500 |         \cdot \; T^{-1/2} \sum_{t=1}^T u_t \boldx_t
 501 |         \,\, \tod \,\, \Sigmaxinv \boldz
 502 |     \end{equation*}
 503 |     
 504 | \end{frame}
 505 | 
 506 | \begin{frame}
 507 | 
 508 |     \vspace{2em}
 509 |     \Prf (cont.)
 510 |     
 511 |     Clearly $\Sigmaxinv \boldz$ is Gaussian with zero mean
 512 |     
 513 |     By symmetry of $\Sigmaxinv$ (since $\Sigmax$ is symmetric) the
 514 |     variance of $\Sigmaxinv \boldz$ is
 515 |     %
 516 |     \begin{equation*}
 517 |          \Sigmaxinv \, \var[\boldz] \, \Sigmaxinv 
 518 |          = 
 519 |          \Sigmaxinv \, \sigma^2 \, \Sigmax  \,\Sigmaxinv 
 520 |          = 
 521 |          \sigma^2 \Sigmaxinv 
 522 |     \end{equation*}
 523 |     %
 524 |     This completes the proof of theorem~\ref{ET-t:cltols}, conditional on 
 525 |     (\ref{eq:cux})
 526 |     
 527 |     Let's now check that (\ref{eq:cux}) is valid
 528 | 
 529 |     By the Cram\'er--Wold device (fact~\ref{ET-fa:cmtetc}),
 530 |     suffices to show that for any $\bolda \in \RR^K$, we have
 531 |     %
 532 |     \begin{equation}
 533 |         \label{eq:cux2}
 534 |         \bolda^\T \left[ T^{-1/2} \sum_{t=1}^T u_t \boldx_t \right] \tod \bolda^\T \boldz
 535 |     \end{equation}
 536 |     
 537 | \end{frame}
 538 | 
 539 | \begin{frame}
 540 | 
 541 |     \vspace{2em}
 542 |     \Prf(cont.)
 543 |     Fix $\bolda$ and let $m_t := u_t \bolda^\T \boldx_t$; the expression
 544 |     on the left of (\ref{eq:cux2}) can be rewritten as 
 545 |         $$T^{-1/2} \sum_{t=1}^T m_t$$
 546 |     Since $\lL(\boldz) = \nN(\boldzero, \sigma^2 \Sigmax)$,
 547 |     to establish (\ref{eq:cux2}) we need to show
 548 |     %
 549 |     \begin{equation}
 550 |         \label{eq:cux3}
 551 |           T^{-1/2} \sum_{t=1}^T m_t 
 552 |           \tod \nN(0, \sigma^2 \bolda^\T \Sigmax \bolda)
 553 |     \end{equation}
 554 |     
 555 |     From lemma~\ref{ET-l:xumds}, we know  $\{m_t\}$ is stationary
 556 |     with $\EE[m_t^2] = \sigma^2  \bolda^\T \Sigmax \bolda$ and
 557 |     an {\sc mds} with respect to the filtration
 558 |     given in \eqref{eq:deffil}
 559 |     
 560 |    
 561 | \end{frame}
 562 | 
 563 | \begin{frame}
 564 | 
 565 |     \vspace{2em}
 566 |     \Prf (cont.)
 567 |     
 568 |     By the martingale difference CLT, (\ref{eq:cux3}) holds whenever
 569 |     %
 570 |     \begin{equation}
 571 |         \label{eq:laon}
 572 |         \frac{1}{T} \sum_{t=1}^T \EE[m_t^2 \given \fF_{t-1} ] 
 573 |         \toprob \sigma^2 \bolda^\T \Sigmax \bolda
 574 |         \quad \text{as } T \to \infty
 575 |     \end{equation}
 576 |     %
 577 |     Since $\boldx_t \in \fF_{t-1}$, we have
 578 |     %
 579 |     \begin{multline*}
 580 |         \EE[m_t^2 \given \fF_{t-1} ] 
 581 |         = \EE[ u_t^2 (\bolda^\T \boldx_t)^2 \given \fF_{t-1}]
 582 |        \\ =  (\bolda^\T \boldx_t)^2 \EE[ u_t^2  \given \fF_{t-1}]
 583 |         = \sigma^2 (\bolda^\T \boldx_t)^2 
 584 |     \end{multline*}
 585 |     %
 586 | \end{frame}
 587 | 
 588 | \begin{frame}
 589 | 
 590 |     \vspace{2em}
 591 |     \Prf (cont.)
 592 |     
 593 |     Another way to write the last expression is $\sigma^2  \bolda^\T \boldx_t
 594 |     \boldx_t^\T \bolda$
 595 |     
 596 |     The left-hand side of (\ref{eq:laon}) is therefore
 597 |     %
 598 |     \begin{equation*}
 599 |         \frac{1}{T} \sum_{t=1}^T \EE[m_t^2 \given \fF_{t-1} ] 
 600 |         = \frac{1}{T} \sum_{t=1}^T (\sigma^2  \bolda^\T \boldx_t \boldx_t^\T \bolda)
 601 |         = \sigma^2  \bolda^\T 
 602 |         \left[ 
 603 |             \frac{1}{T} \sum_{t=1}^T  \boldx_t \boldx_t^\T
 604 |         \right]  \bolda
 605 |     \end{equation*}
 606 |     %
 607 |     which converges in probability to $\sigma^2 \bolda^\T \Sigmax
 608 |     \bolda$ by assumption~\ref{ET-a:rtsc} and fact~\ref{ET-fa:cmtetcv1}
 609 |     
 610 |     \vspace{.7em}
 611 |     This
 612 |     verifies (\ref{eq:laon}), completing the proof \qedsymbol
 613 |     
 614 | \end{frame}
 615 | 
 616 | \begin{frame}
 617 | 
 618 |     \vspace{.7em}
 619 |     \Eg
 620 |     Consider again the scalar linear Gaussian AR(1) model $x_{t+1} = a x_t
 621 |     + w_{t+1}$ with $|a| < 1$ and $\{w_t\}$ {\sc iid} and standard normal
 622 |     
 623 |     Let $\{x_t\}$ be stationary
 624 |     
 625 |     As discussed in \S\ref{ET-ss:eb}, the OLS estimator of $a$ is 
 626 |     %
 627 |     \begin{equation*}
 628 |         \hat a_T := \frac{\boldx^\T \boldy} {\boldx^\T \boldx}
 629 |         \quad \text{where} \quad
 630 |         \boldy := (x_1,\ldots,x_T) \text{ and  } \boldx := (x_0,\ldots,x_{T-1})
 631 |     \end{equation*}
 632 |     
 633 |     Both assumption~\ref{ET-a:rtsc} and assumption~\ref{ET-a:stsc} are satisfied,
 634 |     so $\sqrt{T} (\hat a_T - a)$ converges in distribution to 
 635 |         $\nN(0, \sigma^2 \Sigmaxinv)$
 636 |     
 637 | \end{frame}
 638 | 
 639 | \begin{frame}
 640 | 
 641 |     \vspace{2em}
 642 |     \Eg(cont.)
 643 |     In this case, $\sigma^2  = 1$ because the shocks are standard normal
 644 |     
 645 |     Furthermore $\Sigmaxinv$ reduces to $1/\EE[x_1^2]$, where the expectation
 646 |     is under the stationary distribution
 647 |     
 648 |     The stationary distribution is $\nN(0,
 649 |     1/(1-a^2))$ (recall our discussion in chapter 7 of ET, particularly surrounding Equation \eqref{ET-eq:sdsg})
 650 |     
 651 |     Hence the inverse of $\EE[x_1^2]$ is $1 - a^2$, and
 652 |     %
 653 |     \begin{equation}
 654 |         \label{eq:avarrols}
 655 |         \sqrt{T} (\hat a_T - a) \tod \nN(0, 1 - a^2)
 656 |     \end{equation}
 657 |     
 658 | \end{frame}
 659 | 
 660 | 
 661 | \begin{frame}\frametitle{Large Sample Tests}
 662 |     
 663 |     \vspace{2em}
 664 |     In the large sample
 665 |     setting, the hypothesis to be tested:
 666 |     %
 667 |     \begin{equation*}
 668 |         H_0 \colon \beta_k = \beta_k^0
 669 |     \end{equation*}
 670 |     
 671 |     \vspace{.7em}
 672 |     Recall if the error
 673 |     terms are normally distributed, then the expression $(\hat \beta_k - \beta_k)
 674 |     / \se(\hat \beta_k)$ is $t$-distributed with $N-K$ degrees of freedom
 675 |     \begin{itemize}
 676 |         \item  in
 677 |         the large sample case, we can use the CLT to show the
 678 |         same statistic is asymptotically normal
 679 |     \end{itemize}
 680 |     
 681 | \end{frame}
 682 | 
 683 | \begin{frame}
 684 | 
 685 |     \vspace{2em}
 686 |     \Thm
 687 |     \eqref{ET-t:ttest2}
 688 |     Let assumptions~\ref{ET-a:rtsc} and \ref{ET-a:stsc} hold, and let
 689 |     %
 690 |     \begin{equation*}
 691 |         \se(\hat \beta_k^T) 
 692 |         := \sqrt{ \hat \sigma^2_T v_k(\boldX) }
 693 |     \end{equation*}
 694 |     
 695 |     \vspace{.7em}
 696 |     Under the null hypothesis $H_0$, we have
 697 |     %
 698 |     \begin{equation}
 699 |         \label{eq:ttest2}
 700 |         z_k^T := \frac{\hat \beta_k^T - \beta_k^0}{\se(\hat \beta_k^T)}
 701 |         \tod \nN(0,1) 
 702 |         \quad \text{as} \quad
 703 |         T \to \infty
 704 |     \end{equation}
 705 |     
 706 | \end{frame}
 707 | 
 708 | \begin{frame}
 709 | 
 710 |     \vspace{2em}
 711 |     \Prf 
 712 |     Recall from theorem~\ref{ET-t:cltols} that 
 713 |    $\sqrt{T} (\hboldbeta_T - \boldbeta) \tod \boldz$, where $\boldz$ is a random vector with distribution
 714 |     $\nN(\boldzero, \sigma^2 \Sigmaxinv)$
 715 |     and $\boldbeta$ is the true parameter vector
 716 |     
 717 |     \vspace{.7em}
 718 |     Hence
 719 |     %
 720 |     \begin{equation*}
 721 |         \sqrt{T} (\hat \beta_k^T - \beta_k)
 722 |         = \bolde_k^\T [\sqrt{T} (\hboldbeta_T - \boldbeta)] \tod 
 723 |         \bolde_k^\T \boldz 
 724 |     \end{equation*}
 725 |     
 726 |     \vspace{.7em}
 727 |     The distribution of $\bolde_k^\T \boldz$ is 
 728 |     $\nN(0, \bolde_k^\T \var[\boldz] \bolde_k)
 729 |     = \nN(0, \sigma^2 \bolde_k^\T \Sigmaxinv \bolde_k)$, so
 730 |     %
 731 |     \begin{equation}
 732 |         \label{eq:tscno}
 733 |         \frac{\sqrt{T} (\hat \beta_k^T - \beta_k)}
 734 |                 {\sqrt{\sigma^2 \bolde_k^\T \Sigmaxinv \bolde_k}}
 735 |         \tod \nN(0, 1)
 736 |     \end{equation}
 737 |     
 738 | \end{frame}
 739 | 
 740 | \begin{frame}
 741 | 
 742 |     \vspace{2em}
 743 |     \Prf (cont.)
 744 |     Since 
 745 |     \begin{equation*}
 746 |     \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1} \toprob \Sigmax^{-1}
 747 |     \quad \text{as} \quad
 748 |     T \to \infty
 749 |     \end{equation*}
 750 |     
 751 |     Now refer to our rules for convergence of random matrices, in particular, 5. of fact~\ref{ET-fa:cmtetcv1}. We have
 752 |     %
 753 |     \begin{equation*}
 754 |         T v_k (\boldX)
 755 |         = 
 756 |         T \bolde_k^\T 
 757 |             (\boldX^\T\boldX)^{-1}
 758 |         \bolde_k
 759 |         =
 760 |         \bolde_k^\T 
 761 |             \left[ \frac{1}{T} \sum_{t=1}^T \boldx_t \boldx_t^\T \right]^{-1}
 762 |         \bolde_k
 763 |         \toprob \bolde_k^\T \Sigmax^{-1} \bolde_k
 764 |     \end{equation*}
 765 |     %
 766 |     By theorem~\ref{ET-t:cofhs2} we have $\hat \sigma_T^2 \toprob \sigma^2$, and
 767 |     hence
 768 |     %
 769 |     \begin{equation*}
 770 |         \sqrt{ \hat \sigma_T^2 \, T v_k(\boldX) }
 771 |         \toprob 
 772 |         \sqrt{ \sigma^2 \bolde_k^\T \Sigmax^{-1} \bolde_k }
 773 |     \end{equation*}
 774 |     %
 775 | 
 776 | \end{frame}
 777 | 
 778 | \begin{frame}
 779 | 
 780 |     \vspace{2em}
 781 |     \Prf (cont.)
 782 |     Combine the above with \eqref{eq:tscno} to arrive at
 783 |     %
 784 |     \begin{equation*}
 785 |         \frac{\sqrt{T} (\hat \beta_k^T - \beta_k)}
 786 |         {\sqrt{\hat \sigma_T^2 \, T v_k(\boldX)}}
 787 |         \tod 
 788 |             \nN(0, 1)
 789 |     \end{equation*}
 790 |     
 791 |     \vspace{.7em}
 792 |     Assuming $H_0$ and canceling $\sqrt{T}$ gives (\ref{eq:ttest2})\qedsymbol
 793 |     
 794 | \end{frame}
 795 | 
 796 | \section{MLE for Markov Processes}
 797 | 
 798 | \begin{frame}\frametitle{MLE for Markov Processes}
 799 | 
 800 |     \vspace{2em}
 801 |     Now turn to nonlinear estimation in a time series setting, using
 802 |     maximum likelihood
 803 |     
 804 |     \vspace{.7em}
 805 |     Consider a Markov process. Suppose:
 806 |     \begin{itemize}
 807 |         \item transition
 808 |         density $p_{\boldtheta}$ depends on some unknown parameter vector $\boldtheta
 809 |         \in \Theta$
 810 |         \item process has a unique stationary density
 811 |             $\pi_{\infty}^{\boldtheta}$ for all $\boldtheta$, and that $\boldx_1$ is a draw from
 812 |             this stationary density
 813 |     \end{itemize}
 814 | 
 815 | \end{frame}
 816 | 
 817 | \begin{frame}
 818 | 
 819 |     \vspace{2em}
 820 |     Log-likelihood function
 821 |     %
 822 |     %
 823 |     \begin{equation*}
 824 |         \ell(\boldtheta) =  \ln \pi_{\infty}^{\boldtheta}(\boldx_1) +
 825 |             \sum_{t=1}^{T-1} \ln p_{\boldtheta}(\boldx_{t+1} \given \boldx_t) 
 826 |     \end{equation*}
 827 |     %
 828 |     In practice drop the first term in this expression
 829 |     \begin{itemize}
 830 |         \item  influence of a single
 831 |     element is likely to be negligible
 832 |     \end{itemize}
 833 |     
 834 |     \vspace{.7em}
 835 |     Abusing notation slightly, write
 836 |     %
 837 |     \begin{equation}
 838 |         \label{eq:likemark}
 839 |         \ell(\boldtheta) = \sum_{t=1}^{T-1} \ln p_{\boldtheta}(\boldx_{t+1} \given \boldx_t) 
 840 |     \end{equation}
 841 |     
 842 | \end{frame}
 843 | 
 844 | \begin{frame}\frametitle{The ARCH Case}
 845 | 
 846 |     \vspace{2em}
 847 |     Recall the ARCH model
 848 |     
 849 |     Suppose 
 850 |     $x_t =
 851 |     \sigma_t w_t$ where $\sigma_{t+1}^2 =
 852 |     \alpha_0 + \alpha_1 x_t^2$
 853 |     
 854 |     \vspace{.7em}
 855 |     Combining these equations:
 856 |     %
 857 |     \begin{equation}
 858 |         \label{eq:arch}
 859 |         x_{t+1} = (\alpha_0 + \alpha_1 x_t^2)^{1/2} w_{t+1}
 860 |         \quad \text{with} \quad \{ w_t \} \iidsim \nN(0,1)
 861 |     \end{equation}
 862 |     %
 863 |     where $\alpha_0 > 0$, $\alpha_1 \geq 0$
 864 |     
 865 |     \vspace{.7em}
 866 |     By \eqref{eq:likemark}, the log-likelihood function is 
 867 |     %
 868 |     \begin{equation}
 869 |         \label{eq:likearch}
 870 |         \ell(a, b) = \sum_{t=1}^{T-1} 
 871 |          \left\{ 
 872 |          - \frac{1}{2} \ln(2 \pi (a + b x_t^2)) - \frac{x_{t+1}^2}{2(a + b x_t^2)} 
 873 |             \right\}
 874 |     \end{equation}
 875 | 
 876 | \end{frame}
 877 | 
 878 | \begin{frame}
 879 |     
 880 |     \vspace{2em}
 881 |     Rearranging, dropping terms that don't depend on $a$ or $b$, and multiplying
 882 |     by 2 (an increasing transformation), rewrite as
 883 |     %
 884 |     \begin{equation}
 885 |         \label{eq:likearch2}
 886 |         \ell(a, b) =  - \sum_{t=1}^{T-1} 
 887 |         \left\{ \ln z_t  + \frac{x_{t+1}^2}{z_t}  \right\}
 888 |         \quad \text{where} \quad
 889 |         z_t := a + b x_t^2
 890 |     \end{equation}
 891 |     
 892 |     \vspace{.7em}
 893 |     Solution method
 894 |     \begin{itemize}
 895 |     \item no analytical expressions for the
 896 |         MLEs
 897 |     \item need to use numerical routines --- \R{}'s inbuilt optimization routines
 898 |     \end{itemize}
 899 |     
 900 | \end{frame}
 901 | 
 902 | \begin{frame}[fragile]
 903 | 
 904 |     Sequence of observations 
 905 |     $x_1,\ldots,x_T$ stored in a vector \mintinline{r}{xdata}
 906 |     
 907 |     the function 
 908 |     \mintinline{r}{arch_like} can be optimized numerically via the commands:
 909 |     
 910 |         
 911 |     \begin{rcode}
 912 | start_theta <- c(0.65, 0.35)  # An initial guess of (a,b)
 913 | neg_like <- function(theta) {
 914 |     return(-arch_like(theta, xdata))  
 915 | }
 916 | opt <- optim(start_theta, neg_like, method="BFGS")
 917 |     \end{rcode}
 918 |     
 919 |     Code to define function \mintinline{r}{arch_like} and simulate observations on following slide
 920 |         
 921 | \end{frame}
 922 | 
 923 | \begin{frame}[fragile, allowframebreaks]
 924 |     
 925 |     
 926 |         \small\begin{rcode}
 927 | arch_like <- function(theta, data) {
 928 |     Y <- data[-1]             #  All but first element
 929 |     X <- data[-length(data)]  #  All but last element
 930 |     Z <- theta[1] + theta[2] * X^2
 931 |     return(-sum(log(Z) + Y^2 / Z))
 932 | } 
 933 | 
 934 | sim_data <- function(a, b, n=500) {
 935 |     x <- numeric(n)
 936 |     x[1] = 0
 937 |     w = rnorm(n)
 938 |     for (t in 1:(n-1)) {
 939 |         x[t+1] = sqrt(a + b * x[t]^2) * w[t]
 940 |     }
 941 |     return(x)
 942 | }
 943 | 
 944 | xdata <- sim_data(0.5, 0.5)  #  True parameters
 945 |     \end{rcode}
 946 |     
 947 | \end{frame}
 948 |     
 949 | \section{The Newton--Raphson Algorithm}
 950 | 
 951 | \begin{frame}\frametitle{The Newton--Raphson Algorithm}
 952 | 
 953 |     \vspace{2em}
 954 |     The Newton--Raphson algorithm is a \emph{root-finding} algorithm
 955 |     \begin{itemize}
 956 |         \item given a function $g \colon \RR \to \RR$, the algorithm searches for
 957 |         points $\bar s \in \RR$ such that $g(\bar s) = 0$
 958 |     \end{itemize}
 959 |     
 960 |     \vspace{.7em}
 961 |     
 962 |     Optimize differentiable functions
 963 |     
 964 |     \begin{itemize}
 965 |         \item for differentiable
 966 |     functions, interior optimizers are always roots of the 
 967 |     objective function's first derivative
 968 |     \end{itemize}
 969 |     
 970 |     
 971 | \end{frame}
 972 | 
 973 | \begin{frame}
 974 | 
 975 |     \vspace{2em}
 976 |     Let 
 977 |     \begin{itemize}
 978 |         \item $g \colon \RR \to \RR$
 979 |         \item $s_0$ be some initial point in $\RR$ that we think (hope) is somewhere near a root
 980 |     \end{itemize} 
 981 |     
 982 |     \vspace{.7em}
 983 |     We know how to move to the root of the
 984 |     function that forms the \emph{tangent line} to $g$ at $s_0$
 985 |     
 986 |     Replace $g$ with its linear approximation around $s_0$, given by
 987 |     %
 988 |     \begin{equation*}
 989 |         \tilde g(s) := g(s_0) + g'(s_0)(s - s_0)   
 990 |         \qquad (s \in \RR)
 991 |     \end{equation*}
 992 |     and solve for the root of $\tilde g$
 993 |     
 994 | \end{frame}
 995 | 
 996 | \begin{frame}
 997 | 
 998 |     \begin{figure}
 999 |        \begin{center}
1000 |     
1001 |             \begin{tikzpicture}[scale=1]
1002 |     
1003 |               \def\xmin{-4}
1004 |               \def\xmax{4}
1005 |               \def\szero{2}
1006 |               \def\sone{-2}
1007 |               \def\coef{0.1}
1008 |     
1009 |               \draw[<->, thick] (\xmin,0) -- (\xmax,0);
1010 |     
1011 |               \node at (\szero,0) [below] {$s_0$};
1012 |               \node at (0,0) [below] {$\bar s$};
1013 |     
1014 |               \draw[color=blue, samples=20, domain=-2:3] plot[id=tikzqfnr]
1015 |               function{\coef * x**3} node[right] {$g$};
1016 |     
1017 |               \draw[color=black, samples=20, domain=-0.2:3] plot[id=tikzqfnra]
1018 |               function{ \coef * \szero**3 + \coef * 3 * (\szero * \szero) * (x - \szero)}
1019 |               node[right] {$\bar g$};
1020 |     
1021 |               \draw[dashed] (\szero, 0) -- (\szero, \coef * \szero * \szero * \szero);
1022 |     
1023 |               \node at (\szero - \szero / 3 + .1, 0) [below] {$s_1$} ; 
1024 |     
1025 |             \end{tikzpicture}
1026 |     
1027 |         \caption{\label{f:nr1d} First step of the Newton--Raphson algorithm}
1028 |        \end{center}
1029 |     \end{figure}
1030 | 
1031 | \end{frame}
1032 | 
1033 | \begin{frame}
1034 |     
1035 |     \vspace{2em}
1036 |     Next guess of the root $s_1 := s_0 -
1037 |     g(s_0)/g'(s_0)$
1038 |     
1039 |     \vspace{.7em}
1040 |     Procedure is repeated, taking the tangent of $g$ at $s_1$
1041 |     
1042 |     \vspace{.7em}
1043 |     Generates a sequence of points $\{s_k\}$ satisfying
1044 |     %
1045 |     \begin{equation*}
1046 |         s_{k+1} = s_k - \frac{g(s_k)}{g'(s_k)} 
1047 |     \end{equation*}
1048 |     
1049 | \end{frame}
1050 | 
1051 | \begin{frame}
1052 | 
1053 |     \vspace{2em}
1054 |     Various results telling us that when $g$ is suitably well-behaved
1055 |     and $s_0$ is sufficiently close to a given root $\bar s$, then sequence
1056 |     $\{s_k\}$ will converge to $\bar s$
1057 |     
1058 |     \vspace{.7em}
1059 |     In practical situations we often have no
1060 |     way of knowing whether the conditions are satisfied, and there have been many
1061 |     attempts to make the procedure more robust
1062 |     
1063 |     \vspace{.7em}
1064 |     The \R{} optimization routine described above is a child of this process
1065 |     
1066 | \end{frame}
1067 | 
1068 | \begin{frame}\frametitle{Optimization}
1069 | 
1070 |     \vspace{.7em}
1071 |     Suppose  $g \colon \RR \to \RR$ is a twice differentiable
1072 |     function we wish to maximize
1073 |     
1074 |     \vspace{.7em}
1075 |     If $s^*$ is a maximizer of 
1076 |     $g$, then $g'(s^*) = 0$
1077 |     
1078 |     \vspace{.7em}
1079 |     Apply the Newton--Raphson algorithm to $g'$, giving the sequence
1080 |     %
1081 |     \begin{equation*}
1082 |         \label{eq:1dnr}
1083 |         s_{k+1} = s_k - \frac{g'(s_k)}{g''(s_k)} 
1084 |     \end{equation*}
1085 |     
1086 | \end{frame}
1087 | 
1088 | \begin{frame}
1089 | 
1090 |     \vspace{2em}
1091 |     Multivariate case: suppose $g$ is
1092 |     twice differentiable and $g \colon \RR^2 \to \RR$s
1093 |     
1094 |     \vspace{.7em}
1095 |     The \navy{gradient vector}
1096 |     and \navy{Hessian} of $g$ at $(x,y) \in \RR^2$ are defined as
1097 |     %
1098 |     \begin{equation*}
1099 |          \nabla g(x,y) := 
1100 |         \left( 
1101 |         \begin{array}{c}
1102 |             g'_1 (x,y) \\
1103 |             g'_2 (x,y) 
1104 |         \end{array}
1105 |         \right)
1106 |     \end{equation*}
1107 |     %
1108 |     and
1109 |     %
1110 |     \begin{equation*}
1111 |          \\\quad
1112 |         \nabla^2 g(x,y) := 
1113 |         \left( 
1114 |         \begin{array}{cc}
1115 |             g''_{11} (x,y) 
1116 |             & g''_{12} (x,y) 
1117 |             \\
1118 |             g''_{21} (x,y) 
1119 |             & g''_{22} (x,y) 
1120 |         \end{array}
1121 |         \right)
1122 |     \end{equation*}
1123 |     %
1124 |     Here $g'_i$ is the first partial of $g$ with respect to its $i$th argument,
1125 |     $g''_{ij}$ is the second cross-partial, and so on
1126 | 
1127 | \end{frame}
1128 | 
1129 | \begin{frame}
1130 | 
1131 |     \vspace{2em}
1132 |     Newton--Raphson algorithm generates the sequence $\{(x_k, y_k)\}$
1133 |     defined by
1134 |     %
1135 |     \begin{equation*}
1136 |         \label{eq:2dnr}
1137 |         (x_{k+1}, y_{k+1}) = (x_k, y_k) 
1138 |             - [\nabla^2 g(x_k, y_k)]^{-1} \nabla g(x_k, y_k)
1139 |     \end{equation*}
1140 |     %
1141 |     from some initial guess $(x_0, y_0)$
1142 |     
1143 |     (Assuming the
1144 |     Hessian matrix is nonsingular)
1145 |     
1146 | \end{frame}
1147 | 
1148 | \begin{frame}
1149 |     
1150 |     \vspace{2em}
1151 |     Consider maximization of the log-likelihood function for the ARCH model --- Equation \eqref{eq:likearch2} above
1152 |     
1153 |     Let $z_t$ be as defined in \eqref{eq:likearch2}
1154 |     
1155 |     \vspace{.7em}
1156 |     The first partials are
1157 |     %
1158 |     \begin{equation*}
1159 |         \frac{\partial \ell}{\partial a} (a,b) 
1160 |         = 
1161 |         \sum_{t=1}^{T-1}
1162 |         \left[
1163 |                 \frac{x_{t+1}^2}{z_t^2} - \frac{1}{z_t}
1164 |             \right],
1165 |         \quad
1166 |         \frac{\partial \ell}{\partial b} (a,b) 
1167 |         = 
1168 |         \sum_{t=1}^{T-1}
1169 |              x_t^2 \left[ 
1170 |                 \frac{x_{t+1}^2}{z_t^2} - \frac{1}{z_t}
1171 |             \right]  
1172 |     \end{equation*}
1173 |     
1174 | \end{frame}
1175 | 
1176 | \begin{frame}
1177 | 
1178 |     \vspace{2em}
1179 |     The second partials are
1180 |     %
1181 |     \begin{equation*}
1182 |         \frac{\partial^2 \ell}{\partial a^2} (a,b) = 
1183 |         \sum_{t=1}^{T-1}
1184 |         \left[
1185 |                 \frac{1}{z_t^2} - 2 \frac{x_{t+1}^2}{z_t^3}  
1186 |             \right]
1187 |     \end{equation*}
1188 |     %
1189 |     and 
1190 |     %
1191 |     \begin{equation*}
1192 |         \frac{\partial^2 \ell}{\partial b^2} (a,b) 
1193 |         = 
1194 |         \sum_{t=1}^{T-1} x_t^4
1195 |         \left[
1196 |                 \frac{1}{z_t^2} - 2 \frac{x_{t+1}^2}{z_t^3}  
1197 |             \right]
1198 |     \end{equation*}
1199 |     
1200 |     \vspace{.7em}
1201 |     The cross-partial is
1202 |     %
1203 |     \begin{equation*}
1204 |         \frac{\partial^2 \ell}{\partial a \partial b} (a,b) 
1205 |         = 
1206 |         \sum_{t=1}^{T-1} x_t^2
1207 |         \left[
1208 |                 \frac{1}{z_t^2} - 2 \frac{x_{t+1}^2}{z_t^3}  
1209 |             \right]
1210 |     \end{equation*}
1211 |     %
1212 | \end{frame}
1213 | 
1214 | \begin{frame}
1215 | 
1216 |     \begin{figure}
1217 |    \begin{center}
1218 |     \scalebox{.42}{\includegraphics[trim={0em 1em 0em 3em}, clip]{nr.pdf}}
1219 |     \caption{\label{f:nr} Newton--Raphson iterates}
1220 |    \end{center}
1221 |     \end{figure}
1222 | 
1223 | \end{frame}
1224 | 
1225 | \end{document}
1226 | 


--------------------------------------------------------------------------------
/source_code/Lecture_5.tex:
--------------------------------------------------------------------------------
  1 | 
  2 | \input{preamb.tex}
  3 | \newcounter{saveenumi}
  4 | \newcommand{\seti}{\setcounter{saveenumi}{\value{enumi}}}
  5 | \newcommand{\conti}{\setcounter{enumi}{\value{saveenumi}}}
  6 | 
  7 | \resetcounteronoverlays{saveenumi}
  8 | \usepackage[export]{adjustbox}
  9 | 
 10 | 
 11 | 
 12 | 
 13 | \title{A Primer in Econometric Theory}
 14 | 
 15 | \subtitle
 16 | {Lecture 5: Aymptotics}
 17 | 
 18 | \author{John Stachurski \\ \tiny Lectures by Akshay Shanker}
 19 | 
 20 | 
 21 | 
 22 | 
 23 | \begin{document}
 24 | 
 25 | \begin{frame}
 26 |   \titlepage
 27 | \end{frame}
 28 | 
 29 | \section{LLN and CLT}
 30 | 
 31 | \begin{frame}\frametitle{Convergence of Random Vectors}
 32 |     
 33 |     \vspace{2em}
 34 |     The law of large numbers and central limit theorem are pillars of econometrics and statistics
 35 |     
 36 |     \vspace{1em}
 37 |     In this lecture, we review both theorems
 38 |     
 39 |     \begin{itemize}
 40 |         \item first start with the necessary concepts of convergence in probability and
 41 |     distribution
 42 |     \end{itemize}
 43 | 
 44 |     
 45 | \end{frame}
 46 | 
 47 | \begin{frame}\frametitle{Convergence in Probability}
 48 | 
 49 |     \vspace{2em}
 50 |     A sequence of random vectors $\{\boldx_n\}$ is said to 
 51 |     \navy{converge in probability} to a random vector $\boldx$ if,
 52 |     %
 53 |     \begin{equation}
 54 |         \label{eq:coninp}
 55 |         \text{for all $\delta > 0$},
 56 |         \quad
 57 |         \PP \{ \|\boldx_n - \boldx\| > \delta \} \to 0 
 58 |         \quad \text{as} \quad
 59 |         n \to \infty
 60 |     \end{equation}
 61 |     
 62 |     \vspace{1em}
 63 |     In symbols, we write \navy{$\boldx_n \toprob \boldx$}.  In the scalar case
 64 |     $\|\boldx_n - \boldx\|$ reduces to $|x_n - x|$
 65 |     
 66 | \end{frame}
 67 | 
 68 | 
 69 | \begin{frame}
 70 |     
 71 |     \vspace{2em}
 72 |     \Eg
 73 |         If $\lL(\boldx_n) =  \nN(\boldzero,\sigma_n\boldI)$ and $\sigma_n \to 0$,
 74 |         then $\boldx_n \toprob \boldzero$ as $n\to \infty$.
 75 |         
 76 |         The variance is $\sigma_n = 1/n$
 77 |         
 78 |         With fixed $\delta > 0$, the probability $\PP \{ |x_n | > \delta \}$ is
 79 |         shown for different values of $n$. This probability collapses to zero
 80 |         as $n \to \infty$
 81 |         
 82 |         If we now fix $\delta$ at a smaller positive value, 
 83 |         $\PP \{ |x_n | > \delta \}$ can again be made arbitrarily small by
 84 |         increasing $n$, thus \eqref{eq:coninp} holds
 85 |         
 86 | \end{frame}
 87 | 
 88 | \begin{frame}
 89 | 
 90 |     \vspace{2em}
 91 |     \begin{figure}
 92 |     \centering
 93 |     \scalebox{.4}{\includegraphics[trim={4em 4em 4em 4em}, clip]{conv_in_prob.pdf}}
 94 |     \caption{\label{f:conv_prob}  $\PP \{ |x_n | > \delta \} \to 0$ when
 95 |     $\lL(x_n) = \nN(0, 1/n)$}
 96 |     \end{figure}
 97 |     
 98 | \end{frame}
 99 | 
100 | \begin{frame}
101 | 
102 |     \vspace{2em}
103 |     \Fact\eqref{ET-fa:reconpro}
104 |     The following statements are true:
105 |     %
106 |     \begin{enumerate}
107 |         \item $\boldx_n \toprob \boldx \iff \| \boldx_n - \boldx \| \toprob
108 |             0$
109 |         \item $\boldx_n \toprob \boldx \implies g(\boldx_n) \toprob g(\boldx)$
110 |             whenever $g$ is continuous at $\boldx$
111 |         \item $\boldx_n \toprob \boldx$ and $\boldy_n \toprob \boldy$
112 |             $\implies$ $\boldx_n + \boldy_n \toprob
113 |             \boldx + \boldy$ and $\boldx_n^\T \boldy_n \toprob \boldx^\T
114 |             \boldy$
115 |         \item $\boldx_n \toprob \boldx$ and $\bolda_n \to \bolda$ $\implies$
116 |             $\boldx_n + \bolda_n \toprob
117 |             \boldx + \bolda$ and $\boldx_n^\T \bolda_n \toprob \boldx^\T
118 |             \bolda$
119 |         \item $\boldx_n \toprob \boldx$ $\iff$ $\bolda^\T \boldx_n \toprob \bolda^\T \boldx$ for any $\bolda \in \RR^K$
120 |     \end{enumerate}
121 |     
122 | \end{frame}
123 | 
124 | \begin{frame}\frametitle{Convergence in mean square}
125 |     
126 |     \vspace{2em}
127 |     The scalar sequence $\{x_n\}$ is said to converge to $x$ \navy{in mean square} 
128 |     if
129 |     %
130 |     \begin{equation}
131 |         \label{eq:cims}
132 |         \EE (x_n - x)^2   \to 0 
133 |         \quad \text{as} \quad n \to \infty
134 |     \end{equation}
135 |     %
136 |     and we write \navy{$x_n \toms x$}
137 |     
138 |     \vspace{1em}
139 |     Unlike convergence in probability, for
140 |     convergence in mean square to be defined we require our variables to have
141 |     finite second moments
142 |     
143 | \end{frame}
144 | 
145 | \begin{frame}
146 |     
147 |     \vspace{2em}
148 |     \Fact\eqref{ET-fa:ffci}
149 |     Let $\{x_n\}$ and $x$ have finite second moments and let $\alpha$ be any
150 |     constant.  The following statements are true:
151 |     %
152 |     \begin{enumerate}
153 |         \item $x_n \toms x \implies x_n \toprob x$.
154 |         \item $x_n \toms \alpha$ $\iff$ $\EE x_n \to \alpha$ and $\var[x_n]
155 |             \to 0$.
156 |     \end{enumerate}
157 |     
158 |     \vspace{1em}
159 |     Part 1. follows from Chebyshev's inequality --- $\PP\{|x| \geq \delta \} \leq \frac{\EE x^2}{\delta^2}$
160 |     
161 |     In particular, from monotonicity of $\PP$:
162 |     %
163 |     \begin{equation*}
164 |         \PP\{|x_n - x| > \delta \} 
165 |         \leq \PP\{|x_n - x| \geq \delta \} 
166 |         \leq \frac{\EE (x_n - x)^2}{\delta^2}
167 |     \end{equation*}
168 |     %
169 | \end{frame}
170 | 
171 | \begin{frame}
172 | 
173 |     \vspace{2em}
174 |     Part 2. of the above is implied by:
175 |     
176 |     \Fact\eqref{ET-fa:dmse0}
177 |         For any $x \in L_2$ and any constant $\alpha$ we have
178 |         %
179 |         \begin{equation}
180 |             \label{eq:dmse0}
181 |             \EE [ (x - \alpha)^2 ] = \var[x] + (\EE[x] - \alpha)^2
182 |         \end{equation}
183 |         
184 |     Proof is an exercise. 
185 |     
186 | 
187 | \end{frame}
188 | 
189 | \begin{frame}
190 |     
191 |     \vspace{2em}
192 |     As a prelude to the Law of Large Numbers (LLN), let's consider the
193 |     effects of averaging over independent random quantities
194 |     
195 |     Let 
196 |     %
197 |     \begin{itemize}
198 |         \item $x_n$ be the payoff from holding one dollar of asset $n$,
199 |         \item $\EE x_n = \mu$ and $\var[x_n] = \sigma^2$ for all $n$, and
200 |         \item $\cov[x_j, x_k] = 0$ when $j \not= k$.
201 |     \end{itemize}
202 |     
203 |     \vspace{1em}
204 |     If we hold just asset 1, then the payoff is $x_1$, the expected payoff is $\mu$
205 |     and the variance is $\sigma^2$
206 |     
207 | \end{frame}
208 | 
209 | 
210 | \begin{frame}
211 | 
212 |     \vspace{2em}
213 |     If we diversify by
214 |     spreading one dollar evenly over $N$ of these assets, our payoff is
215 |     %
216 |     \begin{equation*}
217 |         \bar x_N := \frac{1}{N} \sum_{n=1}^N x_n
218 |     \end{equation*}
219 |     
220 |     \vspace{1em}
221 |     The expected payoff is unchanged at 
222 |     %
223 |     \begin{equation*}
224 |         \label{eq:meanub}
225 |         \EE  \bar x_N 
226 |             = \EE \left[ \frac{1}{N} \sum_{n=1}^N x_n \right]
227 |             = \frac{1}{N} \sum_{n=1}^N \EE  x_n 
228 |             = \mu
229 |     \end{equation*}
230 |     %
231 |     
232 | \end{frame}
233 | 
234 | 
235 | \begin{frame}
236 | 
237 |     \vspace{2em}
238 |     But the variance declines at rate $\frac{1}{N}$ because
239 |     %
240 |     \begin{align*}
241 |         \EE [(\bar x_N - \mu)^2 ]  
242 |         & = \EE \left\{ \left[ \frac{1}{N} \sum_{i=1}^N (x_i - \mu) \right]^2 \right\}
243 |         \\
244 |         & = \frac{1}{N^2} \sum_{i=1}^N \sum_{j=1}^N \EE (x_i - \mu)(x_j - \mu) 
245 |         \\& = \frac{1}{N^2} \sum_{i=1}^N \EE (x_i - \mu)^2 
246 |         = \frac{\sigma^2}{N} 
247 |     \end{align*}
248 |     
249 |     The important equality here is the third one, which holds because of the zero
250 |     covariance between assets
251 |     
252 | \end{frame}
253 | 
254 | \begin{frame}
255 | 
256 |     \vspace{2em}
257 |     To summarize,
258 |     %
259 |     \begin{equation}
260 |         \label{eq:sosm}
261 |         \EE \bar x_N = \mu 
262 |         \quad \text{and} \quad
263 |         \var[ \bar x_N ] = \frac{\sigma^2}{N}
264 |         \quad \text{for all } N
265 |     \end{equation}
266 |     %
267 |     By taking $N \to
268 |     \infty$ and combining \eqref{eq:sosm} with fact~\ref{ET-fa:ffci} above we obtain a proof of 
269 |     the \navy{law of large numbers}:
270 | 
271 |     \vspace{1em}
272 |     \Thm
273 |         \eqref{ET-t:lln0}
274 |         Let $\{x_n\}$ be {\sc iid} copies of $x$.  If $x$ is integrable, then
275 |         %
276 |         \begin{equation}
277 |             \label{eq:lln0}
278 |             \frac{1}{N} \sum_{n=1}^N x_n \toprob \EE x 
279 |              \quad \text{ as } \quad N \to \infty
280 |         \end{equation}
281 |     
282 |     
283 |     We assumed finite second moment: see ET page 164 for references on proofs for the LLN without assumption on second moment
284 | 
285 | \end{frame}
286 | 
287 | \begin{frame}
288 | 
289 |     \vspace{2em}
290 |     We can extend \eqref{eq:lln0} to 
291 |     arbitrary functions of random variables and random vectors:
292 |     
293 |     \vspace{1em}
294 |     If
295 |     $\boldx$ is any random vector, $\{\boldx_n\}$ are {\sc iid} copies and
296 |     $h \colon \RR^N \to \RR$ is any $\bB$-measurable function such that
297 |     $h(\boldx)$ is integrable, then
298 |     %
299 |     \begin{equation*}
300 |         \label{eq:lln0g}
301 |         \frac{1}{N} \sum_{n=1}^N h(\boldx_n) \toprob \EE  h(\boldx) 
302 |              \quad \text{ as } \quad N \to \infty
303 |     \end{equation*}
304 |     
305 |     Proof follows from Theorem \eqref{ET-t:lln0} (exercise, or see page 164 of ET)
306 |     
307 | \end{frame}
308 | 
309 | \begin{frame}
310 | 
311 |     \vspace{2em}
312 |     The law of large numbers applies to probabilities as well as
313 |     expectations
314 |     
315 |     \vspace{1em}
316 |     Fix $B \subset \bB(\RR^N)$, let $h(\bolds) = \1_B(\bolds) = \1\{\bolds \in B\}$,
317 |     we have
318 |     %
319 |     \begin{equation*}
320 |         \EE h(\boldx)  = \EE \1\{\boldx \in B\}  = \PP \{\boldx \in B\} 
321 |     \end{equation*}
322 |     %
323 |     Combine this equality with the LLN, 
324 |     if $\{\boldx_n\}$ is {\sc iid} with distribution $P$, then
325 |     %
326 |     \begin{equation*}
327 |         \label{eq:lln0lg}
328 |         \frac{1}{N} \sum_{n=1}^N \1\{\boldx_n \in B\} \toprob P(B)
329 |     \end{equation*}
330 |     %
331 |     The fraction of the sample that falls in $B$
332 |     converges to the probability that the distribution assigns to $B$
333 |     
334 | \end{frame}
335 | 
336 | \begin{frame}
337 | 
338 |     \vspace{2em}
339 |     To illustrate the law of large numbers, consider flipping a coin until
340 |     10 heads have occurred
341 |     
342 |     \begin{itemize}
343 |     \item probability of heads is
344 |     0.4
345 |     \end{itemize}
346 |     
347 |     \vspace{1em}
348 |     Let $x$ be the number of tails observed in the process
349 |     \begin{itemize}
350 |         \item random variable is known to have the \navy{negative binomial
351 |     distribution} with $\EE x =15$
352 |     \end{itemize}
353 |     
354 |     The LLN predicts that 
355 |     if we simulate a large number of observations of $x$ and take the average,
356 |     we get a value close to 15
357 | 
358 | \end{frame}
359 | 
360 | \begin{frame}[fragile]
361 | 
362 |     \vspace{2em}
363 |     Julia code to illustrate LLN:
364 |     \begin{juliacode}
365 | num_reps = 10^6
366 | outcomes = Array(Float64, num_reps)
367 | 
368 | for i in 1:num_reps
369 |     num_tails = num_heads = 0
370 |     while num_heads < 10
371 |         b = rand()
372 |         num_heads = num_heads + (b < 0.4)   
373 |         num_tails = num_tails + (b >= 0.4) 
374 |     end
375 |     outcomes[i] = num_tails
376 | end
377 | 
378 | println(mean(outcomes))
379 |     \end{juliacode}
380 | 
381 | \end{frame}
382 | 
383 | \begin{frame}
384 | 
385 |     \vspace{2em}
386 |     What happens when the finite first
387 |     moment condition in the LLN is not enforced?
388 |     
389 | \end{frame}
390 | 
391 | \begin{frame}
392 | 
393 |     \vspace{2em}
394 |     \begin{figure}
395 |     \centering
396 |     \scalebox{.44}{\includegraphics[trim={5em 5em 5em 5em}, clip]{cauchy_samples.pdf}}
397 |     \caption{\label{f:cauchy_samples} Samples from the Cauchy distribution and sample mean}
398 |     
399 | \end{figure}
400 | 
401 | \end{frame}
402 | 
403 | \begin{frame}\frametitle{Convergence in Distribution}
404 | 
405 |     \vspace{2em}
406 |     The common notion of convergence of distributions, which is
407 |     called weak convergence, requires $P_n(B) \to P(B)$ for all
408 |     ``continuity sets" in $\RR^K$
409 |     
410 |     \vspace{1em}
411 |     Equivalently: $\{P_n\}$ \navy{converges weakly} to $P$ if
412 |     %
413 |     \begin{align*}
414 |         \int h(\bolds) P_n(\diff \bolds) \to \int h(\bolds) P(\diff \bolds)
415 |         \quad 
416 |         \\ \text{$\forall$ continuous bounded $h \colon \RR^K \to \RR$}
417 |     \end{align*}
418 |     %
419 |     and we write \navy{$P_n \tow P$}
420 |     
421 | \end{frame}
422 | 
423 | \begin{frame}
424 | 
425 |     \vspace{2em}
426 |     \Fact(6.1.4)
427 |     Let $F_n$ be the {\sc cdf} of $P_n$ and let $F$ be the {\sc cdf} of $P$.
428 |     In the univariate case ($K=1$) we have 
429 |     
430 |     \begin{align*}
431 |         P_n \tow P 
432 |         \quad \iff \quad
433 |         F_n(s) \to F(s) 
434 |         \\ \; \text{ for all $s$ at which $F$ is continuous}
435 |     \end{align*}
436 | 
437 |     \vspace{1em}
438 |     \Eg
439 |         It can be shown that the $t$-distribution with $k$ degrees of freedom
440 |         converges weakly to the standard normal distribution as $k \to \infty$
441 |     
442 | \end{frame}
443 | 
444 |     \vspace{2em}
445 |     \begin{frame}
446 |         \begin{figure}
447 |        \begin{center}
448 |         \scalebox{.4}{\includegraphics{t_to_norm.pdf}}
449 |         \caption{\label{f:t_to_norm} $t$-Distribution with $k$ df converges to $\nN(0,1)$ as $k \to \infty$}
450 |        \end{center}
451 |     \end{figure}
452 |       
453 | \end{frame}
454 | 
455 | \begin{frame}
456 | 
457 |     \vspace{2em}
458 |     \Fact\eqref{ET-fa:cdicd}
459 |     Let $\{P_n\}$ and $P$ be absolutely continuous probability measures on
460 |     $\RR^K$, with densities $p_n$ and $p$
461 |     
462 |     If $p_n(\bolds) \to p(\bolds)$ for all
463 |     $\bolds \in \RR^K$, then $P_n \tow P$
464 | 
465 | \end{frame}
466 | 
467 | \begin{frame}
468 | 
469 |     \vspace{2em}
470 |     Let $\{\boldx_n\}$ and $\boldx$ be random vectors
471 |     
472 |     We say $\boldx_n \to \boldx$ \navy{in
473 |     distribution} if their respective distributions converge weakly
474 |     
475 |     The
476 |     convergence is symbolized by $\boldx_n \tod \boldx$
477 |     
478 |     Thus
479 |     %
480 |     \begin{equation*}
481 |         \boldx_n \tod \boldx
482 |         \; \iff \;
483 |         \lL(\boldx_n) \tow \lL(\boldx)
484 |     \end{equation*}
485 |     %
486 |     Equivalent to:
487 |     %
488 |     \begin{equation*}
489 |         \EE[ h(\boldx_n)]  \to \EE[ h(\boldx) ]
490 |         \quad 
491 |         \text{for all continuous bounded $h \colon \RR^K \to \RR$}
492 |     \end{equation*}
493 |     %
494 |     (Why?)
495 |     
496 | \end{frame}
497 | 
498 | \begin{frame}
499 | 
500 |     \vspace{2em}
501 |     \Fact\eqref{ET-fa:cmtetc}
502 |     The following statements are true:
503 |     %
504 |     \begin{enumerate}
505 |         \item If $g \colon \RR^K \to \RR^J$ is continuous and
506 |             $\boldx_n \tod \boldx$, then $g(\boldx_n) \tod g(\boldx)$.
507 |         \item If $\bolda^\T \boldx_n \tod \bolda^\T \boldx$ for any $\bolda \in \RR^K$,
508 |             then $\boldx_n \tod \boldx$.
509 |         \item $\boldx_n \toprob \boldx \implies \boldx_n \tod \boldx$.
510 |         \item If $\bolda$ is a constant vector and $\boldx_n \tod \bolda$,
511 |             then $\boldx_n \toprob \bolda$
512 |     \end{enumerate}
513 |     
514 |     Part 1. called the \navy{continuous mapping
515 |     theorem}
516 |     
517 |     Part 2.  called the 
518 |     Cram\'er--Wold theorem, or the \navy{Cram\'er--Wold device}
519 |     
520 | \end{frame}
521 | 
522 | \begin{frame}
523 | 
524 |     \vspace{2em}
525 |     \Fact\eqref{ET-fa:slut}
526 |         If $\alpha$ is constant, $x_n \toprob \alpha$ and $y_n \tod y$, then
527 |             $x_n + y_n \tod \alpha + y$ and $x_n y_n \tod \alpha y$
528 |     
529 |     \vspace{1em}
530 |     An immediate but useful consequence is that 
531 |     
532 |     \Fact\eqref{ET-fa:sluti}
533 |         $x_n \toprob 0$ and $y_n \tod y$ $\implies$ $x_n y_n \toprob 0$
534 | 
535 | \end{frame}
536 | 
537 | 
538 | \begin{frame}\frametitle{The Central Limit Theorem}
539 | 
540 |     \vspace{2em}
541 |     The \navy{central limit theorem} 
542 |     is among the most striking and important results in all of
543 |     mathematics
544 |     
545 |     \vspace{1em}
546 |     \Thm\eqref{ET-t:clt0}
547 |         Let $x$ have finite second
548 |         moment and let $\{x_n\}$ be {\sc iid} copies of $x$.  If
549 |         $\mu := \EE x$ and $\sigma^2 := \var x$, then
550 |         %
551 |         \begin{equation*}
552 |             \label{eq:clt0}
553 |             \sqrt{N} (\bar x_N - \mu) 
554 |             \tod \nN(0, \sigma^2)
555 |              \quad \text{ as } \quad
556 |              N \to \infty
557 |         \end{equation*}
558 |         
559 | \end{frame}
560 | 
561 | \begin{frame}
562 | 
563 |     \vspace{2em}
564 |     On one hand, $(\bar x_N - \mu) \toprob 0$ by the LLN; 
565 |     on the other hand, $\sqrt{N} \to \infty$
566 |     
567 |     If we take the product, these two
568 |     competing terms just balance
569 |     
570 |     \vspace{1em}
571 |     The distribution of the product
572 |     approaches a zero-mean Gaussian as $N \to \infty$, regardless of the
573 |     distribution of $x$
574 |     
575 | \end{frame}
576 | 
577 | \begin{frame}
578 | 
579 |     \vspace{2em}
580 |     Consider simulating the CLT
581 |     
582 |     Let $Q_N :=$ the distribution of $\sqrt{N} (\bar x_N - \mu)$
583 |     for $N=1,\ldots,5$
584 |     
585 |     Initial distribution $Q=Q_1$ is multi-modal,
586 |     constructed as a convex combination of three beta distributions
587 |     
588 | \end{frame}
589 | 
590 | \begin{frame}
591 | 
592 |     \vspace{2em}
593 |     \begin{figure}
594 |     \centering
595 |     \scalebox{.46}{\includegraphics[trim={5em 2em 10em 2em}, clip, center]{clt3d_2.pdf}}
596 |     \caption{\label{f:clt3d} CLT in action, starting from a beta mixture}
597 |     \end{figure}
598 |     
599 | \end{frame}
600 | 
601 | \begin{frame}
602 |     
603 |     \vspace{2em}
604 |     Another common statement of the central limit theorem:  if all the conditions of the CLT are satisfied, then
605 | 
606 |     \begin{equation*}
607 |         \label{eq:clt1}
608 |         z_N := \sqrt{N} \left\{ \frac{\bar x_N - \mu}{\sigma}  \right\}
609 |         \tod \nN(0,1)
610 |         \quad \text{ as } \quad
611 |         N \to \infty
612 |     \end{equation*}
613 |     
614 | \end{frame}
615 | 
616 | \begin{frame}[fragile]
617 | 
618 |     \vspace{2em}
619 |     Python code to illustrate CLT:
620 |     \begin{pythoncode}
621 | import numpy as np
622 | import scipy.stats as st
623 | 
624 | num_reps = 5000
625 | outcomes = np.empty(num_reps)
626 | N, k = 1000, 5     # k = degrees of freedom
627 | chi = st.chi2(k)
628 | 
629 | for i in range(num_reps):
630 |     xvec = chi.rvs(N)
631 |     outcomes[i] = np.sqrt(N / (2 * k))\
632 |                   *(xvec.mean() - k) 
633 | 
634 |     \end{pythoncode}
635 |     
636 | \end{frame}
637 | 
638 | \begin{frame}
639 | 
640 |     \vspace{2em}
641 |     The listing generates 5,000 observations of
642 |     
643 |     \begin{equation*}
644 |     z_N := \sqrt{N} \left\{ \frac{\bar x_N - \mu}{\sigma}  \right\}
645 |     \tod \nN(0,1)
646 |     \quad \text{ as } \quad
647 |     N \to \infty
648 |     \end{equation*}
649 |     
650 |     Each $x_n$ is $\chi^2(5)$
651 |     \begin{itemize}
652 |         \item mean of this distribution is 5, and the variance is $2
653 |     \times 5 = 10$
654 |     \end{itemize}
655 |     
656 |     \vspace{1em}
657 |     The observations of $z_N$ are stored in the vector
658 |     \mintinline{python}{outcomes}
659 |     
660 | \end{frame}
661 | 
662 | \begin{frame}
663 | 
664 |     \begin{figure}
665 |        \begin{center}
666 |         \scalebox{.44}{\includegraphics{illus_clt.pdf}}
667 |         \caption{\label{f:illus_clt} Observations of $z_N$ in \eqref{eq:clt1} when the underlying distribution is $\chi^2(5)$}
668 |        \end{center}
669 |     \end{figure}
670 | 
671 | \end{frame}
672 | 
673 | \section{Extensions of CLT and LLN}
674 | 
675 | \begin{frame}\frametitle{Convergence of Random Matrices}
676 | 
677 |     \vspace{2em}
678 |     Let $\{\boldX_n\}_{n=1}^{\infty}$ be a sequence of random
679 |     $N \times K$ matrices.  We say that $\boldX_n$ converges to a random $N
680 |     \times K$ matrix $\boldX$ \navy{in probability} and write $\boldX_n \toprob
681 |     \boldX$ if 
682 |     %
683 |     \begin{equation*}
684 |         \| \boldX_n - \boldX \| \toprob 0
685 |         \quad \text{as} \quad n \to \infty
686 |     \end{equation*}
687 |     %
688 |     where $\| \cdot \|$ is the matrix norm defined in \S\ref{ET-ss:mn}
689 |     
690 | \end{frame}
691 | 
692 | \begin{frame}
693 | 
694 |     \vspace{2em}
695 |     \Fact\eqref{ET-fa:cmtetcv1}
696 |     Assuming conformability, the following statements are true:
697 |     %
698 |     \begin{enumerate}
699 |         \item If $\boldX_n \toprob \boldX$ and $\boldX_n$ and $\boldX$ are 
700 |             nonsingular, then $\boldX_n^{-1} \toprob \boldX^{-1}$.
701 |         \item If $\boldX_n \toprob \boldX$ and $\boldY_n \toprob \boldY$, then
702 |             %
703 |             \begin{equation*}
704 |                 \boldX_n + \boldY_n \toprob \boldX + \boldY,
705 |                 \quad
706 |                 \boldX_n \boldY_n \toprob \boldX \boldY,
707 |                 \quad \text{and} \quad
708 |                 \boldY_n \boldX_n \toprob \boldY \boldX
709 |             \end{equation*}
710 |             %
711 |         \item If $\boldX_n \toprob \boldX$ and $\boldA_n \to \boldA$, then
712 |             %
713 |             \begin{equation*}
714 |                 \boldX_n + \boldA_n \toprob \boldX + \boldA,
715 |                 \quad
716 |                 \boldX_n \boldA_n \toprob \boldX \boldA,
717 |                 \quad \text{and} \quad
718 |                 \boldA_n \boldX_n \toprob \boldA \boldX
719 |             \end{equation*}
720 |      \seti        %
721 |     \end{enumerate}
722 |     
723 | \end{frame}
724 | 
725 | \begin{frame}
726 | 
727 |     \vspace{2em}
728 |     \begin{enumerate}
729 |         \conti
730 |         \item $\boldX_n \toprob \boldX$ if and only if $\boldX_n \bolda \toprob
731 |         \boldX \bolda$ for any conformable vector $\bolda$
732 |         \item $\bolda^\T \boldX_n \bolda \toprob \bolda^\T \boldX \bolda$
733 |              whenever $\bolda$ is a conformable constant vector and 
734 |              $\boldX_n \toprob \boldX$
735 |     \end{enumerate}
736 |     
737 | \end{frame}
738 | 
739 | \begin{frame}
740 | 
741 |     \vspace{2em}
742 |     In econometrics we often use the vector version of Slutsky's theorem:
743 | 
744 |     \vspace{1em}
745 |     \Fact\eqref{ET-fa:cmtetcv2}
746 |         Let $\boldx_n$ and $\boldx$ be random vectors in $\RR^K$, let 
747 |         $\boldY_n$ be random matrices, and let $\boldC$ be a constant matrix.
748 |         Assuming conformability, we have
749 |         %
750 |         \begin{align*}
751 |             \boldY_n \toprob \boldC \text{ and } \boldx_n \tod \boldx
752 |             \quad \implies \quad
753 |                 \boldY_n \boldx_n \tod \boldC \boldx
754 |                 \quad \\ \text{and} \quad
755 |                 \boldY_n + \boldx_n \tod \boldC + \boldx
756 |         \end{align*}
757 | 
758 | \end{frame}
759 | 
760 | \begin{frame}
761 | 
762 |     \vspace{2em}
763 |     The scalar LLN and CLT extend to the vector case:
764 |     
765 |     \Thm\eqref{ET-t:vllnclt}
766 |         Let $\boldx$ be a random vector in $\RR^K$ and let $\{\boldx_n\}$ be {\sc
767 |         iid} copies of $\boldx$.  If $\boldmu := \EE \boldx$ is finite, then
768 |         %
769 |         \begin{equation}
770 |             \label{eq:vlln}
771 |             \bar \boldx_N :=
772 |             \frac{1}{N} \sum_{n=1}^N \boldx_n \toprob \boldmu
773 |              \quad \text{ as } \quad N \to \infty
774 |         \end{equation}
775 |         %
776 |         If, in addition, $\EE \|\boldx\|^2 < \infty$, then
777 |         %
778 |         \begin{equation}
779 |             \label{eq:vclt}
780 |             \sqrt{N} \left( \bar \boldx_N - \boldmu \right)
781 |             \tod \nN(\boldzero, \Sigma)
782 |             \quad \text{where } \;
783 |             \Sigma := \var \boldx
784 |         \end{equation}
785 |     %
786 |     Here $\frac{1}{N} \sum_{n=1}^N \boldx_n$ should
787 |     be understood in terms of vector addition and scalar multiplication
788 | 
789 | \end{frame}
790 | 
791 | \begin{frame}
792 | 
793 |     \vspace{2em}
794 |     \begin{figure}
795 |        \begin{center}
796 |         \scalebox{.7}{\includegraphics{vector_mean.pdf}}
797 |         \caption{\label{f:vector_mean} LLN, vector case}
798 |        \end{center}
799 |     \end{figure}
800 | 
801 | \end{frame}
802 | 
803 | \begin{frame}
804 | 
805 |     \vspace{2em}
806 |     Vector LLN in theorem~\ref{ET-t:vllnclt} follows from the scalar LLN 
807 |     
808 |     \begin{itemize}
809 |         \item let $\boldx_n$ be $\{\boldx_n\}$ be {\sc
810 |         iid} copies of $\boldx$
811 |         \item let $\bolda$ be any
812 |     constant vector in $\RR^K$
813 |         \item define $y_n := \bolda^\T \boldx_n$
814 |         \item define $y := \bolda^\T \boldx$
815 |     \end{itemize}
816 |     
817 |     The sequence $\{y_n\}$ is {\sc iid}
818 |     (see fact~\ref{ET-fa:rviifi} on page~\pageref{ET-fa:rviifi})
819 |     with the same distribution as $y$
820 |     
821 |     By the scalar LLN
822 |     %
823 |     \begin{equation*}
824 |         \frac{1}{N} \sum_{n=1}^N y_n \toprob \EE y 
825 |         = \EE[\bolda^\T \boldx]
826 |         = \bolda^\T\EE[ \boldx]
827 |         = \bolda^\T \boldmu
828 |     \end{equation*}
829 |     
830 | \end{frame}
831 | 
832 | \begin{frame}
833 | 
834 |     \vspace{2em}
835 |     At the same time:
836 |     %
837 |     \begin{equation*}
838 |         \frac{1}{N} \sum_{n=1}^N y_n 
839 |         = \frac{1}{N} \sum_{n=1}^N \bolda^\T \boldx_n 
840 |         = \bolda^\T \left[ \frac{1}{N} \sum_{n=1}^N  \boldx_n \right]
841 |         = \bolda^\T \bar \boldx_N
842 |     \end{equation*}
843 |     
844 |     \vspace{2em}
845 |     Thus 
846 |     %
847 |     \begin{equation*}
848 |        \bolda^\T \bar \boldx_N \toprob \bolda^\T \boldmu \; \text{ for any }\;
849 |        \bolda \in \RR^K
850 |     \end{equation*}
851 |     %
852 |    
853 |     The claim $\bar \boldx_N \toprob \boldmu$ now follows (recall fact 6.1.1 above)
854 |     
855 | \end{frame}
856 | 
857 | \begin{frame}
858 | 
859 |     \vspace{2em}
860 |     \Fact\eqref{ET-fa:llnmat}
861 |     Let $\boldX$ be a random matrix and let $\{\boldX_n\}$ be
862 |     {\sc iid} copies of $\boldX$.  If $\EE \| \boldX \|< \infty$, 
863 |     then
864 |     %
865 |     \begin{equation}
866 |         \frac{1}{N} \sum_{n=1}^N \boldX_n \toprob \EE  \boldX 
867 |          \quad \text{ as } \quad N \to \infty
868 |     \end{equation}
869 |     
870 |     
871 |     \Prf
872 |     Since $\boldX_n \bolda$ is a vector with expectation $\EE [ \boldX ] \bolda $, the following
873 |     %
874 |     $$\frac{1}{N} \sum_{n=1}^N \boldX_n \bolda \toprob \EE [ \boldX] \bolda $$
875 |     %
876 |     for any conformable vector $\bolda$, is immediate from the vector LLN (theorem~\ref{ET-t:vllnclt})
877 | 
878 | 
879 | \end{frame}
880 | 
881 | \begin{frame}
882 | 
883 |     \vspace{2em}
884 |     The proof for Fact \ref{ET-fa:llnmat} is then complete by recalling the following from fact \ref{ET-fa:reconpro}
885 |     
886 |     \[\boldx_n \toprob \boldx \quad \iff \quad \bolda^\T \boldx_n \toprob \bolda^\T \boldx \quad \text{for any}\quad \bolda \in \RR^K\]
887 |     
888 | \end{frame}
889 | 
890 | \begin{frame}\frametitle{The Delta Method}
891 | 
892 |     \vspace{2em}
893 |     We showed the asymptotic normality result in the central limit theorem is preserved
894 |     under linear transformations (fact~\ref{ET-fa:cmtetcv2})
895 |     
896 |     The result also holds for functions that are locally
897 |     almost linear --- for differentiable functions
898 |     
899 |     \vspace{1em}
900 |     \Thm\eqref{ET-t:dm}
901 |     Let $g \colon \RR^K \to \RR$, let $\boldtheta$ be a point in the domain
902 |     of $g$, and let $\{\boldt_n\}$ be a sequence of
903 |     random vectors in $\RR^K$. If
904 |     %
905 |     \begin{enumerate}
906 |         \item $\sqrt{n} (\boldt_n - \boldtheta) \tod \nN(0, \boldSigma)$ for
907 |             some positive definite $\boldSigma$ and
908 |         \item $\nabla g(\boldtheta)$ exists, is continuous, and each element is nonzero
909 |     \end{enumerate}
910 |     %
911 |     then
912 |     %
913 |     \begin{equation}
914 |         \label{eq:dmmv}
915 |         \sqrt{n} \{ g(\boldt_n) - g(\boldtheta) \}
916 |         \tod \nN(0, \nabla g(\boldtheta)^\T \boldSigma \nabla g(\boldtheta))
917 |          \quad \text{ as } \quad
918 |          n \to \infty
919 |     \end{equation}
920 |     
921 | \end{frame}
922 | 
923 | 
924 | 
925 | \begin{frame}
926 | 
927 |     \vspace{2em}
928 |     The term $\nabla g(\boldtheta)$ is the \navy{gradient vector}
929 |     of $g$ at $\boldtheta$:
930 |     %
931 |     \begin{equation*}
932 |         \nabla g(\boldtheta)
933 |         := 
934 |         \begin{pmatrix}
935 |              g'_1(\boldtheta)
936 |              \\
937 |              \vdots
938 |              \\
939 |              g'_K(\boldtheta)
940 |         \end{pmatrix}
941 |         \quad \text{where} \quad
942 |         g'_k (\boldtheta) := \frac{\partial g(\boldtheta)} {\partial \theta_k}
943 |     \end{equation*}
944 |     
945 |     \vspace{1em}
946 |     In the scalar case, \eqref{eq:dmmv} translates to     
947 |     %
948 |     \begin{equation*}
949 |         \label{eq:dm0}
950 |         \sqrt{n} \{ g(t_n) - g(\theta) \}
951 |         \tod \nN(0, g'(\theta)^2 \sigma^2)
952 |          \quad \text{ as } \quad
953 |          n \to \infty
954 |     \end{equation*}
955 |     
956 | \end{frame}
957 | 
958 | \end{document}
959 | 


--------------------------------------------------------------------------------
/source_code/figs_code/additivity.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/additivity.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/additivity.pdf_t:
--------------------------------------------------------------------------------
 1 | \begin{picture}(0,0)%
 2 | \includegraphics{additivity.pdf}%
 3 | \end{picture}%
 4 | \setlength{\unitlength}{4144sp}%
 5 | %
 6 | \begingroup\makeatletter\ifx\SetFigFont\undefined%
 7 | \gdef\SetFigFont#1#2#3#4#5{%
 8 |   \reset@font\fontsize{#1}{#2pt}%
 9 |   \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
10 |   \selectfont}%
11 | \fi\endgroup%
12 | \begin{picture}(10515,8750)(301,-8608)
13 | \put(5671,-1051){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\Omega$}%
14 | }}}}
15 | \put(3061,-4561){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$C$}%
16 | }}}}
17 | \put(3961,-2401){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$A$}%
18 | }}}}
19 | \put(8011,-4021){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$B$}%
20 | }}}}
21 | \put(10801,-4606){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\PP(B) = \frac{4}{N}$}%
22 | }}}}
23 | \put(451,-781){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\PP(A) = \frac{2}{N}$}%
24 | }}}}
25 | \put(316,-7441){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\PP(C) = \frac{3}{N}$}%
26 | }}}}
27 | \end{picture}%
28 | 


--------------------------------------------------------------------------------
/source_code/figs_code/alpha_eq.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/alpha_eq.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ar1_dynam.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ar1_dynam.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ar1_dynam_lec.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ar1_dynam_lec.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ar1_dynam_lec1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ar1_dynam_lec1.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ar1_dynam_lec2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ar1_dynam_lec2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ar1_dynam_lec3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ar1_dynam_lec3.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/arch_plot1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/arch_plot1.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/arch_plot2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/arch_plot2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/arch_plot3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/arch_plot3.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/arch_plot4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/arch_plot4.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/beta_bayes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/beta_bayes.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/beta_cdfs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/beta_cdfs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/betahat_var.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/betahat_var.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/bivar_gaussian_3d.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/bivar_gaussian_3d.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/bootstrap_hist.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/bootstrap_hist.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/c_alpha.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/c_alpha.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/cauchy_cdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/cauchy_cdf.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/cauchy_cdfs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/cauchy_cdfs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/cauchy_densities.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/cauchy_densities.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/cauchy_quant.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/cauchy_quant.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/cauchy_samples.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/cauchy_samples.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/chisq_densities.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/chisq_densities.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/clt3d_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/clt3d_2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/conv_in_prob.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/conv_in_prob.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/convolve.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/convolve.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/copula.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/copula.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/cost_min_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/cost_min_2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/cvbest.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/cvbest.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/discrete_cdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/discrete_cdf.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ecdf_beta.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ecdf_beta.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ecdf_lim.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ecdf_lim.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/eigenvecs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/eigenvecs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/event_fails.pdf_t:
--------------------------------------------------------------------------------
 1 | \begin{picture}(0,0)%
 2 | \includegraphics{event_fails.pdf}%
 3 | \end{picture}%
 4 | \setlength{\unitlength}{4144sp}%
 5 | %
 6 | \begingroup\makeatletter\ifx\SetFigFont\undefined%
 7 | \gdef\SetFigFont#1#2#3#4#5{%
 8 |   \reset@font\fontsize{#1}{#2pt}%
 9 |   \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
10 |   \selectfont}%
11 | \fi\endgroup%
12 | \begin{picture}(6310,7389)(3461,-8338)
13 | \put(5896,-2131){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\Omega$}%
14 | }}}}
15 | \put(7381,-4606){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$A$}%
16 | }}}}
17 | \put(8911,-1096){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}this $\omega$ is the outcome}%
18 | }}}}
19 | \put(7606,-3211){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\omega$}%
20 | }}}}
21 | \end{picture}%
22 | 


--------------------------------------------------------------------------------
/source_code/figs_code/event_occurs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/event_occurs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/event_occurs.pdf_t:
--------------------------------------------------------------------------------
 1 | \begin{picture}(0,0)%
 2 | \includegraphics{event_occurs.pdf}%
 3 | \end{picture}%
 4 | \setlength{\unitlength}{4144sp}%
 5 | %
 6 | \begingroup\makeatletter\ifx\SetFigFont\undefined%
 7 | \gdef\SetFigFont#1#2#3#4#5{%
 8 |   \reset@font\fontsize{#1}{#2pt}%
 9 |   \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
10 |   \selectfont}%
11 | \fi\endgroup%
12 | \begin{picture}(12233,6927)(798,-8068)
13 | \put(3691,-2491){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$A$}%
14 | }}}}
15 | \put(1711,-5281){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\Omega$}%
16 | }}}}
17 | \put(10936,-2536){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$A$}%
18 | }}}}
19 | \put(8866,-3526){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\omega$}%
20 | }}}}
21 | \put(6121,-1276){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}outcome}%
22 | }}}}
23 | \put(9316,-5326){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\Omega$}%
24 | }}}}
25 | \put(3286,-3121){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$\omega$}%
26 | }}}}
27 | \put(6166,-6271){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}outcome}%
28 | }}}}
29 | \put(2071,-7531){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}event $A$ occurs}%
30 | }}}}
31 | \put(9496,-7441){\makebox(0,0)[lb]{\smash{{\SetFigFont{25}{30.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$A$ does not occur}%
32 | }}}}
33 | \end{picture}%
34 | 


--------------------------------------------------------------------------------
/source_code/figs_code/fcca.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/fcca.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/firms_median.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/firms_median.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/flat_plane.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/flat_plane.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/flat_plane_e_vecs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/flat_plane_e_vecs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/flat_plane_e_vecspdf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/flat_plane_e_vecspdf.png


--------------------------------------------------------------------------------
/source_code/figs_code/flat_plane_no_vecs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/flat_plane_no_vecs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/gaussian_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/gaussian_example.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/gdp.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/gdp.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/glu.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/glu.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/hghb.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/hghb.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/hsi.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/hsi.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/illus_clt.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/illus_clt.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/inverse_prob.fig:
--------------------------------------------------------------------------------
 1 | #FIG 3.2  Produced by xfig version 3.2.5c
 2 | Landscape
 3 | Center
 4 | Metric
 5 | A4      
 6 | 100.00
 7 | Single
 8 | -2
 9 | 1200 2
10 | 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 0 2731.184 3993.553 2520 3420 3105 3510 3285 3735
11 | 	0 0 1.00 60.00 120.00
12 | 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 0 4211.591 3810.682 4005 4410 3780 4275 3645 4095
13 | 	0 0 1.00 60.00 120.00
14 | 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 5091.136 3820.227 5355 3240 4635 3375 4455 3780
15 | 	0 0 1.00 60.00 120.00
16 | 4 0 0 50 -1 0 24 0.0000 6 180 900 3240 3960 $F(x) = y$\001
17 | 4 0 0 50 -1 0 24 0.0000 6 120 630 5535 3240 outcome\001
18 | 4 0 0 50 -1 0 24 0.0000 6 135 450 1530 3420 model\001
19 | 4 0 0 50 -1 0 24 0.0000 6 165 2520 4230 4545 what $x$ led to outcome $y$?\001
20 | 


--------------------------------------------------------------------------------
/source_code/figs_code/inverse_prob.fig.bak:
--------------------------------------------------------------------------------
 1 | #FIG 3.2  Produced by xfig version 3.2.5c
 2 | Landscape
 3 | Center
 4 | Metric
 5 | A4      
 6 | 100.00
 7 | Single
 8 | -2
 9 | 1200 2
10 | 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 0 2731.184 3993.553 2520 3420 3105 3510 3285 3735
11 | 	0 0 1.00 60.00 120.00
12 | 5 1 0 1 0 7 50 -1 -1 0.000 0 0 1 0 4211.591 3810.682 4005 4410 3780 4275 3645 4095
13 | 	0 0 1.00 60.00 120.00
14 | 5 1 0 1 0 7 50 -1 -1 0.000 0 1 1 0 5091.136 3820.227 5355 3240 4635 3375 4455 3780
15 | 	0 0 1.00 60.00 120.00
16 | 4 0 0 50 -1 0 24 0.0000 6 180 900 3240 3960 $F(x) = y$\001
17 | 4 0 0 50 -1 0 24 0.0000 6 120 630 5535 3240 outcome\001
18 | 4 0 0 50 -1 0 24 0.0000 6 135 450 1530 3420 model\001
19 | 4 0 0 50 -1 0 24 0.0000 6 165 2610 4230 4545 what $x$ lend to outcome $y$?\001
20 | 


--------------------------------------------------------------------------------
/source_code/figs_code/inverse_prob.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/inverse_prob.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/inverse_prob.pdf_t:
--------------------------------------------------------------------------------
 1 | \begin{picture}(0,0)%
 2 | \includegraphics{inverse_prob.pdf}%
 3 | \end{picture}%
 4 | \setlength{\unitlength}{4144sp}%
 5 | %
 6 | \begingroup\makeatletter\ifx\SetFigFont\undefined%
 7 | \gdef\SetFigFont#1#2#3#4#5{%
 8 |   \reset@font\fontsize{#1}{#2pt}%
 9 |   \fontfamily{#3}\fontseries{#4}\fontshape{#5}%
10 |   \selectfont}%
11 | \fi\endgroup%
12 | \begin{picture}(4681,1504)(1516,-3770)
13 | \put(3241,-3121){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}$F(x) = y$}%
14 | }}}}
15 | \put(5536,-2401){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}outcome}%
16 | }}}}
17 | \put(1531,-2581){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}model}%
18 | }}}}
19 | \put(4231,-3706){\makebox(0,0)[lb]{\smash{{\SetFigFont{20}{24.0}{\rmdefault}{\mddefault}{\updefault}{\color[rgb]{0,0,0}what $x$ led to outcome $y$?}%
20 | }}}}
21 | \end{picture}%
22 | 


--------------------------------------------------------------------------------
/source_code/figs_code/iv_example.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/iv_example.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/jointplot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/jointplot.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ks_sim1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ks_sim1.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ks_sim2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ks_sim2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/lin_comb.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/lin_comb.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/linbijec.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/linbijec.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/linear_after_proj.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/linear_after_proj.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/linear_after_proj3d.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/linear_after_proj3d.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/linfunc.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/linfunc.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/lognorm_sample_mean.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/lognorm_sample_mean.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/long_norm_den_seq.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/long_norm_den_seq.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/nikkei_ecdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/nikkei_ecdf.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/nikkei_hist.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/nikkei_hist.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/nonredundant1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/nonredundant1.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/nonredundant2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/nonredundant2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/nonredundant3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/nonredundant3.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/norm_den_seq.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/norm_den_seq.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/normal_cdfs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/normal_cdfs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/normal_densities.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/normal_densities.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/not_in_span.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/not_in_span.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/not_multiple_of_one.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/not_multiple_of_one.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/npkde.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/npkde.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/nr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/nr.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ofit1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ofit1.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ofit11.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ofit11.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ofit14.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ofit14.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ofit3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ofit3.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/one_dim_kde.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/one_dim_kde.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/partial_reg_plot.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/partial_reg_plot.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/power.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/power.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/qform_indef.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/qform_indef.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/qform_nd.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/qform_nd.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/qform_pd.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/qform_pd.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot1.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot10.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot3.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot4.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot5.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot6.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot6.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot7.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot7.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot8.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot8.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_plots/ridgeplot9.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_plots/ridgeplot9.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/ridge_risk.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/ridge_risk.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/rotation_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/rotation_1.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/rotation_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/rotation_2.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/rvempr.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/rvempr.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/rw_metropolis.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/rw_metropolis.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/sampling_distributions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/sampling_distributions.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/simple_mkt.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/simple_mkt.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/small_sim.R:
--------------------------------------------------------------------------------
1 | > N <- 1000
2 | > x1 <- runif(N)
3 | > x2 = 10 * exp(x1) + rnorm(N)
4 | > y <- x1 + x2 + rnorm(N)
5 | > results <- lm(y ~ 0 + x1)
6 | > results$coefficients
7 |       x1 
8 |       30.83076 
9 | 


--------------------------------------------------------------------------------
/source_code/figs_code/span_of_one_vec.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/span_of_one_vec.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/span_plane.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/span_plane.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/stein.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/stein.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/t_to_norm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/t_to_norm.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/tikreg.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/tikreg.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/bij_inv.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{tikzpicture}[scale=0.4,
 3 |     axis/.style={->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black},
 7 |     decoration={brace,amplitude=7pt},
 8 |     ] 
 9 | %% draw circles
10 | \draw[name path=ellipse,very thick]
11 | 		(0,0) circle[x radius = 1.5 cm, y radius = 3 cm];
12 | 
13 | \draw[name path=ellipse,very thick]
14 | 		(8,0) circle[x radius = 1.5 cm, y radius = 3 cm];
15 | 
16 | \draw[name path=ellipse,very thick]
17 | 		(-8,0) circle[x radius = 1.5 cm, y radius = 3 cm];
18 | 
19 | %% paths
20 | \draw[-latex, very thin] (-6.3,1)  to [out=20,in=160] node[above] {$f$} (-1.7,1);
21 | \draw[-latex, very thin] (-1.7,-1)  to [out=200,in=-20] node[below] {$f^{-1}$} (-6.3,-1);
22 | 
23 | \draw[-latex, very thin] (1.7,1)  to [out=20,in=160] node[above] {$g$} (6.3,1);
24 | \draw[-latex, very thin] (6.3,-1)  to [out=200,in=-20] node[below] {$g^{-1}$} (1.7,-1);
25 | 
26 | 
27 | %% A & B & C
28 | \node at (-8,4) {$A$};
29 | \node at (0,4) {$B$};
30 | \node at (8,4) {$C$};
31 | 
32 | \end{tikzpicture}
33 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/bijec.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{tikzpicture}[scale=0.4,
 3 |     axis/.style={->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black},
 7 |     decoration={brace,amplitude=7pt},
 8 |     ] 
 9 | %% draw circles
10 | \draw[name path=ellipse,very thick]
11 | 		(-5,0) circle[x radius = 2 cm, y radius = 6 cm];
12 | 
13 | \draw[name path=ellipse,very thick]
14 | 		(5,0) circle[x radius = 2 cm, y radius = 6 cm];
15 | 
16 | %% draw nodes
17 | \coordinate (a1) at (-5,3);  \coordinate (aa1) at (-4.8,2.8);  
18 | \coordinate (a2) at (-5,-3); \coordinate (aa2) at (-4.8,-3.2);  
19 | \coordinate (b1) at (5,3);   \coordinate (bb1) at (4.8,3.2);  
20 | \coordinate (b2) at (5,-3);  \coordinate (bb2) at (4.8,-2.8); 
21 | 
22 | \node[fill=black,circle,scale=0.37] at (a1){};
23 | \node[fill=black,circle,scale=0.37] at (a2){};
24 | \node[fill=black,circle,scale=0.37] at (b1){};
25 | \node[fill=black,circle,scale=0.37] at (b2){};
26 | 
27 | %% paths
28 | \draw[-latex, very thin] (a1)  to [out=20,in=160] node[above] {$f$} (bb1);
29 | \draw[-latex, very thin] (b1)  to [out=200,in=-20] node[below] {$f^{-1}$} (aa1);
30 | 
31 | \draw[-latex, very thin] (a2)  to [out=20,in=160] node[above] {$f$} (bb2);
32 | \draw[-latex, very thin] (b2)  to [out=200,in=-20] node[below] {$f^{-1}$} (aa2);
33 | 
34 | 
35 | %% A & B & C
36 | \node at (-5,7) {$A$};
37 | \node at (5,7) {$B$};
38 | 
39 | 
40 | \end{tikzpicture}
41 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/composition.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | \begin{tikzpicture}[scale=0.4,
 4 |     axis/.style={->, >=stealth'},
 5 |     important line/.style={thick},
 6 |     dashed line/.style={dashed, thin},
 7 |     every node/.style={color=black},
 8 |     decoration={brace,amplitude=7pt},
 9 |     ] 
10 | %% draw circles
11 | \draw[name path=ellipse,very thick]
12 | 		(-5,0) circle[x radius = 2 cm, y radius = 4 cm];
13 | \draw[name path=ellipse,very thick]
14 | 		(5,0) circle[x radius = 2 cm, y radius = 4 cm];
15 | \draw[name path=ellipse,very thick]
16 | 		(15,0) circle[x radius = 2 cm, y radius = 4 cm];
17 | 
18 | %% draw nodes
19 | \coordinate (a1) at (-5,0);  
20 | \coordinate (aa1) at (-4.8, -0.2);  
21 | \coordinate (b1) at (5,0);   
22 | \coordinate (bb1) at (4.8, 0.2);  
23 | \coordinate (c1) at (15,0);   
24 | \coordinate (cc1) at (14.8, 0.2);  
25 | 
26 | \node[fill=black,circle,scale=0.37] at (a1) {};
27 | %\node[fill=black,circle,scale=0.37] at (a2){};
28 | \node[fill=black,circle,scale=0.37] at (b1){};
29 | %\node[fill=black,circle,scale=0.37] at (b2){};
30 | \node[fill=black,circle,scale=0.37] at (c1){};
31 | 
32 | \node[below] at (a1) {$a$};
33 | \node[below] at (b1) {$b$};
34 | \node[below] at (c1) {$c$};
35 | 
36 | %% paths
37 | \draw[-latex, very thin] (a1)  to [out=20,in=160] node[above] {$f$} (bb1);
38 | \draw[-latex, very thin] (b1)  to [out=20,in=160] node[above] {$g$} (cc1);
39 | 
40 | %\draw[-latex, very thin] (a2)  to [out=20,in=160] node[above] {$f$} (bb2);
41 | %\draw[-latex, very thin] (b2)  to [out=200,in=-20] node[below] {$f^{-1}$} (aa2);
42 | 
43 | 
44 | %% A & B & C
45 | \node at (-5, 5) {$A$};
46 | \node at (5, 5) {$B$};
47 | \node at (15, 5) {$C$};
48 | 
49 | 
50 | \end{tikzpicture}
51 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/diagonalize.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[scale=1,
 2 |     axis/.style={->, >=stealth'},
 3 |     important line/.style={thick},
 4 |     dashed line/.style={dashed, thin},
 5 |     every node/.style={color=black},
 6 |     decoration={brace,amplitude=7pt},
 7 |     ] 
 8 | 
 9 | %% draw nodes
10 | \coordinate (x1) at (-2,2);  \coordinate (x11) at (-2,1.8); \coordinate (x12) at (-1.8,2);  
11 | \coordinate (x2) at (-2,0); \coordinate (x21) at (-1.8,0); \coordinate (x22) at (-2,0.2);
12 | \coordinate (x3) at (2,2);   \coordinate (x31) at (1.8,2);  \coordinate (x32) at (2,1.8);
13 | \coordinate (x4) at (2,0);  \coordinate (x41) at (1.8,0); \coordinate (x42) at (2,0.2);
14 | 
15 | \node[fill=black,circle,scale=0.37] at (x1){};
16 | \node[fill=black,circle,scale=0.37] at (x2){};
17 | \node[fill=black,circle,scale=0.37] at (x3){};
18 | \node[fill=black,circle,scale=0.37] at (x4){};
19 | 
20 | %% label x & Ax
21 | \draw (x2) node[below=5pt] {$\boldx$};
22 | \draw (x4) node[below=5pt] {$\boldA\boldx$};
23 | 
24 | %% draw arrows
25 | \draw[important line, ->]  (x21) -- node[above=5pt] {$\boldA$} (x41);
26 | \draw[important line, ->]  (x22) -- node[left=5pt] {$\boldP^{-1}$} (x11);
27 | \draw[important line, ->]  (x12) -- node[above=5pt] {$\boldB$} (x31);
28 | \draw[important line, ->]  (x32) -- node[right=5pt] {$\boldP$} (x42);
29 |   
30 | \end{tikzpicture}
31 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/discrete_cdf.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black},
 7 |     decoration={brace,amplitude=7pt},
 8 |     ]
 9 | 
10 |     % define simple function
11 |     \coordinate(O) at (0,0);
12 |     \coordinate (s1) at (0.25,0);
13 |     \coordinate (s2) at (0.65,0);
14 |     \coordinate (s3) at (-0.15,0);
15 |     \coordinate (s4) at (0.25,0.4);
16 |     \coordinate (s5) at (0.65,0.4);
17 |     \coordinate (s6) at (0.65,0.8);
18 |     \coordinate (s7) at (1.1,0.8);
19 |     % define curly bracket location
20 |     \coordinate (c1) at (0.23,0.05); \coordinate (c2) at (0.23,0.35);
21 |     \coordinate (c3) at (0.63,0.75); \coordinate (c4) at (0.63,0.45);
22 |     % axis
23 |     \draw[axis] (-0.15,0)  -- (1.1,0) node(xline)[below] {};
24 |     \draw[axis] (0,0) -- (0,1.0) node(yline)[above] {};
25 |     % drawing simple function
26 |     \draw[important line,blue]  (s3) -- (s1);
27 |     \draw[important line,blue]  (s4) -- (s5);
28 |     \draw[important line,blue]  (s6) -- (s7);
29 |     % dashed line
30 |     \draw[dashed line] (s1) node[below] {$s_1$} -- (s4);
31 |     \draw[dashed line] (s2) node[below] {$s_2$} -- (s6);
32 |    % label y axis
33 |    \foreach \y/\ytext in {0.8}
34 |         \draw (0.0pt,\y cm) -- (-0.4pt,\y cm) node[anchor=east] {$1$};
35 |    %curly bracket
36 |    \draw [decorate,very thick] (c1) -- (c2)
37 |    node [midway,anchor=east,inner sep=5pt, outer sep=5pt]{$p_1$};
38 |    \draw [decorate,very thick] (c4) -- (c3)
39 |    node [midway,anchor=east,inner sep=5pt, outer sep=5pt]{$p_2$};
40 |    %circles
41 |    \node[circle, draw,thin,blue,fill=white!10, scale=0.25] at (s1){};
42 |    \node[circle, draw,thin,blue,fill=white!10, scale=0.25] at (s5){};
43 |    \node[fill=blue,circle,scale=0.25] at (s4){};
44 |    \node[fill=blue,circle,scale=0.25] at (s6){};
45 | \end{tikzpicture}
46 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/fig3.3.tex:
--------------------------------------------------------------------------------
 1 | % Figure 2.3 Vector Discrete cdf
 2 | \documentclass[11pt]{article}
 3 | \input{defs.tex}
 4 | \usepackage{amsmath,amsthm,amssymb}
 5 | \usepackage{pgf,tikz}
 6 | \usepackage{pgfplots}
 7 | \usetikzlibrary{decorations}
 8 | \usetikzlibrary{arrows}
 9 | \begin{document}
10 | \begin{figure}
11 | \centering
12 | \begin{tikzpicture}[
13 |     scale=5,
14 |     axis/.style={->, >=stealth'},
15 |     important line/.style={thick},
16 |     dashed line/.style={dashed, thin},
17 |     every node/.style={color=black},
18 |     decoration={brace,amplitude=7pt},
19 |     ]
20 | 
21 |     % define simple function
22 |     \coordinate(O) at (0,0);
23 |     \coordinate (s1) at (0.25,0);
24 |     \coordinate (s2) at (0.65,0);
25 |     \coordinate (s3) at (-0.15,0);
26 |     \coordinate (s4) at (0.25,0.4);
27 |     \coordinate (s5) at (0.65,0.4);
28 |     \coordinate (s6) at (0.65,0.8);
29 |     \coordinate (s7) at (1.1,0.8);
30 |     \coordinate (s8) at (0,0.7);
31 |     % define curly bracket location
32 |     \coordinate (c1) at (0.23,0.05); \coordinate (c2) at (0.23,0.35);
33 |     \coordinate (c3) at (0.63,0.75); \coordinate (c4) at (0.63,0.45);
34 |     % axis
35 |     \draw[axis] (-0.15,0)  -- (1.1,0) node(xline)[below] {};
36 |     \draw[axis] (0,0) -- (0,1.1) node(yline)[above] {};
37 |     % drawing simple function
38 |     \draw[important line,blue]  (s3) -- (s1);
39 |     \draw[important line,blue]  (s4) -- (s5);
40 |     \draw[important line,blue]  (s6) -- (s7);
41 |     % dashed line
42 |     \draw[dashed line] (s1) node[below] {$s_1$} -- (s4);
43 |     \draw[dashed line] (s2) node[below] {$s_2$} -- (s6);
44 |    % label y axis
45 |    \foreach \y/\ytext in {0.8}
46 |         \draw (0.0pt,\y cm) -- (-0.4pt,\y cm) node[anchor=east] {$\boldone$};
47 |    %curly bracket
48 |    \draw [decorate,very thick] (c1) -- (c2)
49 |    node [midway,anchor=east,inner sep=5pt, outer sep=5pt]{$p_1$};
50 |    \draw [decorate,very thick] (c4) -- (c3)
51 |    node [midway,anchor=east,inner sep=5pt, outer sep=5pt]{$p_2$};
52 |    %circles
53 |    \node[circle, draw,thin,blue,fill=white!10, scale=0.25] at (s1){};
54 |    \node[circle, draw,thin,blue,fill=white!10, scale=0.25] at (s5){};
55 |    \node[fill=blue,circle,scale=0.25] at (s4){};
56 |    \node[fill=blue,circle,scale=0.25] at (s6){};
57 | \end{tikzpicture}
58 | \end{figure}
59 | \end{document}
60 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/finite_rv_approx.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/tikzfigs/finite_rv_approx.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/finite_rv_approx.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black}
 7 |     ]
 8 |    % define simple function
 9 |    \coordinate(O) at (0,0);
10 |    \coordinate (s1) at (0.25,0);
11 |    \coordinate (s2) at (0.65,0);
12 |    \coordinate (s3) at (-0.15,0);
13 |    \coordinate (s4) at (0.25,0.4);
14 |    \coordinate (s5) at (0.65,0.4);
15 |    \coordinate (s6) at (0.65,0.8);
16 |    \coordinate (s7) at (1.1,0.8);
17 |    \coordinate (s8) at (0,0.7);
18 |    \coordinate (s9) at (1.1,0.95);
19 |    % axis
20 |    \draw[axis] (O)  -- (1.1,0) node(xline)[below, xshift=-0.8cm] {$\Omega$};
21 |    \draw[axis] (0,0) -- (0,1.1) node(yline)[above] {};
22 |    % drawing simple function
23 |    \draw[important line,blue]  (O) -- (s1);
24 |    \draw[important line,blue]  (s4) -- (s5);
25 |    \draw[important line,blue]  (s6) -- (s7) node[right] {$x_n$};
26 |    % continuous rv
27 |    \draw[thick, xshift=0cm] plot [smooth, tension=1] coordinates { (O) (s4) (s6) (s9)} node[right] {$x$};
28 |    % dashed line
29 |    \draw[dashed line] (s1) -- (s4);
30 |    \draw[dashed line] (s5) -- (s6);
31 |    %circles
32 |    \node[circle, draw,thin,blue,fill=white!10, scale=0.25] at (s1){};
33 |    \node[circle, draw,thin,blue,fill=white!10, scale=0.25] at (s5){};
34 |    \node[fill=blue,circle,scale=0.25] at (s4){};
35 |    \node[fill=blue,circle,scale=0.25] at (s6){};
36 | \end{tikzpicture}
37 | 
38 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/func_nonfunc.tex:
--------------------------------------------------------------------------------
  1 | \begin{tikzpicture}[scale=0.4,
  2 |     axis/.style={->, >=stealth'},
  3 |     important line/.style={thick},
  4 |     dashed line/.style={dashed, thin},
  5 |     every node/.style={color=black},
  6 |     decoration={brace,amplitude=7pt},
  7 |     ] 
  8 | %% draw circles
  9 | \draw[name path=ellipse,very thick]
 10 | 		(-8,5) circle[x radius = 1.3 cm, y radius = 2 cm];
 11 | \draw[name path=ellipse,very thick]
 12 | 		(-3,5) circle[x radius = 1.3 cm, y radius = 2 cm];
 13 | 
 14 | \draw[name path=ellipse,very thick]
 15 | 		(3,5) circle[x radius = 1.3 cm, y radius = 2 cm];
 16 | \draw[name path=ellipse,very thick]
 17 | 		(8,5) circle[x radius = 1.3 cm, y radius = 2 cm];
 18 | 	
 19 | \draw[name path=ellipse,very thick]
 20 | 		(-8,-5) circle[x radius = 1.3 cm, y radius = 2 cm];
 21 | \draw[name path=ellipse,very thick]
 22 | 		(-3,-5) circle[x radius = 1.3 cm, y radius = 2 cm];
 23 | 
 24 | \draw[name path=ellipse,very thick]
 25 | 		(3,-5) circle[x radius = 1.3 cm, y radius = 2 cm];
 26 | \draw[name path=ellipse,very thick]
 27 | 		(8,-5) circle[x radius = 1.3 cm, y radius = 2 cm];
 28 | 
 29 | 
 30 | %% draw nodes
 31 | \coordinate (a1) at (-8,5); 
 32 | \coordinate (a2) at (-8,6);
 33 | \coordinate (a3) at (-8,4);
 34 | \coordinate (a4) at (-3,5.5); \coordinate (aa4) at (-3.2,5.5);
 35 | \coordinate (a5) at (-3,4.5); \coordinate (a51) at (-3.2,4.4); \coordinate (a52) at (-3.2,4.6);
 36 | 
 37 | \coordinate (b1) at (3,5);
 38 | \coordinate (b2) at (8,5.5);
 39 | \coordinate (b3) at (8,4.5); \coordinate (bb3) at (7.8,4.5);
 40 | 
 41 | \coordinate (c1) at (-8,-5);
 42 | \coordinate (c2) at (-8,-6);
 43 | \coordinate (c3) at (-8,-4);
 44 | \coordinate (c4) at (-3,-5.5); \coordinate (c41) at (-3.2,-5.4); \coordinate (c42) at (-3.2,-5.6);
 45 | \coordinate (c5) at (-3,-4.5); \coordinate (c51) at (-3.2,-4.4); \coordinate (c52) at (-3.2,-4.6);
 46 | 
 47 | \coordinate (d1) at (3,-5);
 48 | \coordinate (d2) at (3,-6);
 49 | \coordinate (d3) at (3,-4);
 50 | \coordinate (d4) at (8,-5); \coordinate (d41) at (7.8,-5.1); \coordinate (d42) at (7.8,-4.9);
 51 | 
 52 | \node[fill=black,circle,scale=0.37] at (a1){};
 53 | \node[fill=black,circle,scale=0.37] at (a2){};
 54 | \node[fill=black,circle,scale=0.37] at (a3){};
 55 | \node[fill=black,circle,scale=0.37] at (a4){};
 56 | \node[fill=black,circle,scale=0.37] at (a5){};
 57 | \node[fill=black,circle,scale=0.37] at (b1){};
 58 | \node[fill=black,circle,scale=0.37] at (b2){};
 59 | \node[fill=black,circle,scale=0.37] at (b3){};
 60 | \node[fill=black,circle,scale=0.37] at (c1){};
 61 | \node[fill=black,circle,scale=0.37] at (c2){};
 62 | \node[fill=black,circle,scale=0.37] at (c3){};
 63 | \node[fill=black,circle,scale=0.37] at (c4){};
 64 | \node[fill=black,circle,scale=0.37] at (c5){};
 65 | \node[fill=black,circle,scale=0.37] at (d1){};
 66 | \node[fill=black,circle,scale=0.37] at (d2){};
 67 | \node[fill=black,circle,scale=0.37] at (d3){};
 68 | \node[fill=black,circle,scale=0.37] at (d4){};
 69 | 	
 70 | 
 71 | %% draw arrows
 72 | \draw[-latex, very thin] (a2) -- (aa4);
 73 | \draw[-latex, very thin] (a1) to (a52);
 74 | \draw[-latex, very thin] (a3) to (a51);
 75 | 
 76 | \draw[-latex, very thin] (b1) -- (bb3);
 77 | 
 78 | \draw[-latex, very thin] (c1) -- (c52);
 79 | \draw[-latex, very thin] (c3) to (c51);
 80 | \draw[-latex, very thin] (c1) to (c41);
 81 | \draw[-latex, very thin] (c2) to (c42);
 82 | 
 83 | \draw[-latex, very thin] (d2) to (d41);
 84 | \draw[-latex, very thin] (d1) to (d42);
 85 | 
 86 | %% A function & Not a function
 87 | \node at (-5.5,1.5) {function};
 88 | \node at (-5.5,-8.5) {not a function};
 89 | \node at (5.5,-8.5) {not a function};
 90 | \node at (5.5,1.5) {function};
 91 | 
 92 | %% A & B
 93 | \node at (-8,8) {$A$};
 94 | \node at (-3,8) {$B$};
 95 | 
 96 | \node at (3,8) {$A$};
 97 | \node at (8,8) {$B$};
 98 | 
 99 | \node at (-8,-2) {$A$};
100 | \node at (-3,-2) {$B$};
101 | 
102 | \node at (3,-2) {$A$};
103 | \node at (8,-2) {$B$};
104 | 
105 | \end{tikzpicture}
106 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/function.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{tikzpicture}[scale=0.4,
 3 |     axis/.style={->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black},
 7 |     decoration={brace,amplitude=7pt},
 8 |     ] 
 9 | %% draw circles
10 | \draw[name path=ellipse,very thick]
11 | 		(-5,0) circle[x radius = 2 cm, y radius = 4 cm];
12 | 
13 | \draw[name path=ellipse,very thick]
14 | 		(5,0) circle[x radius = 2 cm, y radius = 4 cm];
15 | 
16 | %% draw nodes
17 | \coordinate (a1) at (-5, 1);  
18 | \coordinate (a2) at (-5, -1);  
19 | \coordinate (b1) at (5, 1);   
20 | \coordinate (bb1) at (4.8, 1);   
21 | \coordinate (b2) at (5, -1);   
22 | \coordinate (bb2) at (4.8, -1);   
23 | 
24 | \node[fill=black,circle,scale=0.37] at (a1) {};
25 | \node[fill=black,circle,scale=0.37] at (a2){};
26 | \node[fill=black,circle,scale=0.37] at (b1){};
27 | \node[fill=black,circle,scale=0.37] at (b2){};
28 | 
29 | \node[right] at (b1) {$b_1$};
30 | \node[left] at (a1) {$a_1$};
31 | \node[right] at (b2) {$b_2$};
32 | \node[left] at (a2) {$a_2$};
33 | 
34 | %% paths
35 | \draw[-latex, very thin] (a1)  to [out=20,in=160] node[above] {$f$} (bb1);
36 | \draw[-latex, very thin] (a2)  to [out=-20,in=-160] node[above] {$f$} (bb2);
37 | 
38 | 
39 | %% A & B & C
40 | \node at (-5, 5) {$A$};
41 | \node at (5, 5) {$B$};
42 | 
43 | 
44 | \end{tikzpicture}
45 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/function2.tex:
--------------------------------------------------------------------------------
 1 | 
 2 | \begin{tikzpicture}[scale=0.4,
 3 |     axis/.style={->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black},
 7 |     decoration={brace,amplitude=7pt},
 8 |     ] 
 9 | %% draw circles
10 | \draw[name path=ellipse,very thick]
11 | 		(-5,0) circle[x radius = 2 cm, y radius = 4 cm];
12 | 
13 | \draw[name path=ellipse,very thick]
14 | 		(5,0) circle[x radius = 2 cm, y radius = 4 cm];
15 | 
16 | %% draw nodes
17 | \coordinate (a1) at (-5, 1);  
18 | \coordinate (a2) at (-5, -1);  
19 | \coordinate (b1) at (5, 1);   
20 | \coordinate (bb1) at (4.8, 1);   
21 | \coordinate (b2) at (5, -1);   
22 | \coordinate (bb2) at (4.8, -1);   
23 | 
24 | \node[fill=black,circle,scale=0.37] at (a1) {};
25 | \node[fill=black,circle,scale=0.37] at (a2){};
26 | \node[fill=black,circle,scale=0.37] at (b1){};
27 | \node[fill=black,circle,scale=0.37] at (b2){};
28 | 
29 | \node[right] at (b1) {$y_1$};
30 | \node[left] at (a1) {$x_1$};
31 | \node[right] at (b2) {$y_2$};
32 | \node[left] at (a2) {$x_2$};
33 | 
34 | %% paths
35 | \draw[-latex, very thin] (a1)  to [out=20,in=160] node[above] {$f$} (bb1);
36 | \draw[-latex, very thin] (a2)  to [out=-20,in=-160] node[above] {$f$} (bb2);
37 | 
38 | 
39 | %% A & B & C
40 | \node at (-5, 5) {$A$};
41 | \node at (5, 5) {$B$};
42 | 
43 | 
44 | \end{tikzpicture}
45 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/orth_comp.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dotted line/.style={dotted, thick,red},
 6 |     dashed line/.style={dashed, thin},
 7 |     every node/.style={color=black}
 8 |     ]
 9 | 
10 |     % define x,z
11 |     \coordinate(O) at (0,0);
12 |     \coordinate (S1) at (-0.4,-0.2);
13 |     \coordinate (S2) at (0.8,0.4);
14 |     \coordinate (S3) at (-0.25,0.5);
15 |     \coordinate (S4) at (0.12,-0.24); 
16 |     % axis
17 |     \draw[axis] (-0.5,0)  -- (0.9,0) node(xline)[right] {};
18 |     \draw[axis] (0,-0.3) -- (0,0.7) node(yline)[above] {};
19 |     % x, z
20 |     \draw[important line, thick]  (S1) -- (S2) node[right] {$S$};
21 |     \draw[important line, thick]  (S4) -- (S3) node[left] {$S^{\perp}$};
22 |     % label angle
23 |     \draw[dotted line] (-0.03,0.06) -- (0.03,0.09);
24 |     \draw[dotted line] (0.06,0.03) -- (0.03,0.09);
25 |    
26 | \end{tikzpicture}
27 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/orth_proj2D.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dotted line/.style={dotted, thick,red},
 6 |     dashed line/.style={dashed, thin},
 7 |     every node/.style={color=black}
 8 |     ]
 9 | 
10 |     % define x,z
11 |     \coordinate(O) at (0,0);
12 |     \coordinate (uhat) at (-0.2,0.4);
13 |     \coordinate (yhat) at (0.6,0.3);
14 |     \coordinate (y) at (0.4,0.7);
15 |     \coordinate (S1) at (-0.4,-0.2);
16 |     \coordinate (S2) at (0.8,0.4);
17 |     \coordinate (S3) at (-0.3,0.6);
18 |     \coordinate (S4) at (0.12,-0.24); 
19 |     % axis
20 |     \draw[axis] (-0.5,0)  -- (0.9,0) node(xline)[right] {};
21 |     \draw[axis] (0,-0.3) -- (0,0.7) node(yline)[above] {};
22 |     % x, z
23 |     \draw[important line,blue,thick, ->]  (O) -- (yhat) node[anchor = north west, text width=4em] {$\boldP \boldy$};
24 |     \draw[important line,blue, ->]  (O) -- (uhat) node[anchor = north east, text width=4em] {$\boldM \boldy$};
25 |     \draw[important line,thick] (uhat) -- (S3) node [anchor = south east, text width=0.5em] {$S^{\perp}$};
26 |     \draw[important line,thick] (O) -- (S4);
27 |     \draw[important line, thick]  (S1) -- (O) node[right] {};
28 |     \draw[important line, thick]  (yhat) -- (S2) node[right] {$S$};
29 |     \draw[important line, blue,->]  (O) -- (y) node[right] {$\boldy$};
30 |     % label angle
31 |     \draw[dotted line] (-0.03,0.06) -- (0.03,0.09);
32 |     \draw[dotted line] (0.06,0.03) -- (0.03,0.09);
33 |     \draw[dotted line] (0.54,0.27) -- (0.51,0.33);
34 |     \draw[dotted line] (0.57,0.36) -- (0.51,0.33);
35 |     \draw[dotted line] (-0.17,0.34) -- (-0.11,0.37);
36 |     \draw[dotted line] (-0.14,0.43) -- (-0.11,0.37);
37 |     
38 |     \draw[dashed line, black] (y) -- (yhat);
39 |     \draw[dashed line, black] (y) -- (uhat);
40 |     
41 | \end{tikzpicture}
42 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/orth_proj2D0.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dotted line/.style={dotted, thick,red},
 6 |     dashed line/.style={dashed, thin},
 7 |     every node/.style={color=black}
 8 |     ]
 9 | 
10 |     % define x,z
11 |     \coordinate(O) at (0,0);
12 |     \coordinate (y-yhat) at (-0.2,0.4);
13 |     \coordinate (yhat) at (0.6,0.3);
14 |     \coordinate (y) at (0.4,0.7);
15 |     \coordinate (Z1) at (-0.4,-0.2);
16 |     \coordinate (Z2) at (0.8,0.4);
17 |     % axis
18 |     \draw[axis] (-0.5,0)  -- (0.9,0) node(xline)[right] {};
19 |     \draw[axis] (0,-0.3) -- (0,0.7) node(yline)[above] {};
20 |     % x, z
21 |     \draw[important line,blue,thick, ->]  (O) -- (yhat) node[below] {$\hboldy$};
22 |     \draw[important line,blue, ->]  (O) -- (y-yhat) node[left] {$\boldy - \hboldy$};
23 |     \draw[important line, thick]  (Z1) -- (O) node[right] {};
24 |     \draw[important line, thick]  (yhat) -- (Z2) node[right] {$S$};
25 |     \draw[important line, blue,->]  (O) -- (y) node[right] {$\boldy$};
26 |     % label angle
27 |     \draw[dotted line] (-0.03,0.06) -- (0.03,0.09);
28 |     \draw[dotted line] (0.06,0.03) -- (0.03,0.09);
29 |     \draw[dotted line] (0.54,0.27) -- (0.51,0.33);
30 |     \draw[dotted line] (0.57,0.36) -- (0.51,0.33);
31 |     \draw[dashed line, black] (y) -- (yhat);
32 |     % path
33 |     \draw[-latex, very thin] (0.5,0.4) to [out=210,in=50] (-0.1,0.2);
34 | \end{tikzpicture}
35 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/orth_proj2Dp.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dotted line/.style={dotted, thick,red},
 6 |     dashed line/.style={dashed, thin},
 7 |     every node/.style={color=black}
 8 |     ]
 9 | 
10 |     % define x,z
11 |     \coordinate(O) at (0,0);
12 |     \coordinate (y') at (-0.4,0.1);
13 |     \coordinate (Py) at (0.6,0.3);
14 |     \coordinate (y) at (0.4,0.7);
15 |     \coordinate (Z1) at (-0.4,-0.2);
16 |     \coordinate (Z2) at (0.8,0.4);
17 |     \coordinate (Py') at (-0.28,-0.14);
18 |     % axis
19 |     \draw[axis] (-0.5,0)  -- (0.9,0) node(xline)[right] {};
20 |     \draw[axis] (0,-0.3) -- (0,0.7) node(yline)[above] {};
21 |     % x, z
22 |     \draw[important line,blue,thick, ->]  (O) -- (Py) node[anchor = north west, text width=2em] {$\boldP \boldy$};
23 |     \draw[important line,blue, ->]  (O) -- (y') node[left] {$\boldy'$};
24 |     \draw[important line, thick]  (Z1) -- (O) node[right] {};
25 |     \draw[important line, thick]  (Py) -- (Z2) node[right] {$S$};
26 |     \draw[important line, blue,->]  (O) -- (y) node[right] {$\boldy$};
27 |     % label angle
28 |     \draw[dotted line] (0.54,0.27) -- (0.51,0.33);
29 |     \draw[dotted line] (0.57,0.36) -- (0.51,0.33);
30 |     \draw[dotted line] (-0.22,-0.11) -- (-0.25,-0.05);
31 |     \draw[dotted line] (-0.31,-0.08) -- (-0.25,-0.05);
32 |     \draw[dashed line, black] (y) -- (Py);
33 |     \draw[dashed line, black] (y') -- (Py') node[anchor = north west, text width=5em] {$\boldP \boldy'$};
34 |   
35 | \end{tikzpicture}
36 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/sim_fun.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={->, >=stealth',thin},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define simple function
10 |     \coordinate(O) at (0,0); \coordinate (r1) at (0.25,0);
11 |     \coordinate (r2) at (0.25,0.4); \coordinate(r3) at (0.45,0.4);
12 |     \coordinate (r4) at (0.45,0.6); \coordinate (r5) at (0.7,0.6);
13 |     \coordinate (r6) at (0.7,0); \coordinate (r7) at (1.07,0);
14 |     % define dashed line
15 |     \coordinate (d1) at (0,0.4); 
16 |     \coordinate (d2) at (0,0.6);
17 |     \coordinate (d3) at (0.45,0);
18 |     % axis
19 |     \draw[axis] (-0.1,0)  -- (1.1,0) node(xline)[below] {$\Omega$};
20 |     \draw[axis] (0,-0.1) -- (0,1.1) node(yline)[above] {};
21 |     % drawing simple function
22 |     \draw[important line,blue]  (O) -- (r1);
23 |     \draw[important line,blue]  (r2) -- (r3);
24 |     \draw[important line,blue]  (r4) -- (r5);
25 |     \draw[important line,blue]  (r6) -- (r7);
26 |     % dashed line
27 |     \draw[dashed line] (r1) -- (r2);
28 |     \draw[dashed line] (r5) -- (r6);
29 |     \draw[dashed line] (d1) node[left, text width=0.5em] {$s$} -- (r2);
30 |     \draw[dashed line] (d2) node[left, text width=0.5em] {$t$} -- (r4);
31 |     \draw[dashed line] (d3) -- (r4);
32 |     % label A,B
33 |    \draw[important line]  (r1) -- node[below] {$A$} (d3);
34 |    \draw[important line]  (d3) -- node[below] {$B$} (r6);
35 |     
36 | \end{tikzpicture}
37 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/vec_add.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define x,y
10 |     \coordinate(O) at (0,0);
11 |     \coordinate (X) at (0.6,0.1);
12 |     \coordinate (Y) at (0.4,0.7);
13 |     \coordinate (X+Y) at (1,0.8);
14 |     % axis
15 |     \draw[axis] (-0.1,0)  -- (1.1,0) node(xline)[right] {};
16 |     \draw[axis] (0,-0.1) -- (0,1.1) node(yline)[above] {};
17 |     % x, y, x+y
18 |     \draw[important line, ->]  (O) -- (X) node[right, text width=5em] {$\boldx$};
19 |     \draw[important line, ->]  (O) -- (Y) node[left, text width=0.5em] {$\boldy$};
20 |     \draw[important line, ->,red]  (O) -- (X+Y) node[right, text width=5em]
21 |     {$\boldx + \boldy$};
22 |     % dashed line
23 |     \draw[dashed line] (X) -- (X+Y);
24 |     \draw[dashed line] (Y) -- (X+Y);
25 | \end{tikzpicture}
26 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/vec_canon.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define x,y
10 |     \coordinate(O) at (0,0);
11 |     \coordinate (e1) at (0.5,0);
12 |     \coordinate (e2) at (0,0.5);
13 |     % axis
14 |     \draw[axis] (-0.3,0)  -- (0.9,0) node(xline)[right] {};
15 |     \draw[axis] (0,-0.2) -- (0,0.7) node(yline)[above] {};
16 |     % x, y, x+y
17 |     \draw[important line, ->]  (O) -- (e1) node[above, text width=5em] {$\bolde_1 = (1,0)$};
18 |     \draw[important line, ->]  (O) -- (e2) node[right, text width=5em] {$\bolde_2 = (0,1)$};
19 | \end{tikzpicture}
20 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/vec_canon_x.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define x,y
10 |     \coordinate(O) at (0,0);
11 |     \coordinate (X) at (0.7,0.4);
12 |     \coordinate (e1) at (0.5,0);
13 |     \coordinate (e2) at (0,0.5);
14 |     % axis
15 |     \draw[axis] (-0.3,0)  -- (0.9,0) node(xline)[right] {};
16 |     \draw[axis] (0,-0.2) -- (0,0.7) node(yline)[above] {};
17 |     % x, y, x+y
18 |     \draw[important line, ->]  (O) -- (e1) node[above, text width=5em] {$\bolde_1 = (1,0)$};
19 |     \draw[important line, ->]  (O) -- (e2) node[right, text width=5em] {$\bolde_2 = (0,1)$};
20 |     \draw[important line, red, ->]  (O) -- (X) node[right] {$\boldy = y_1 \bolde_1 + y_2 \bolde_2$};
21 | \end{tikzpicture}
22 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/vec_minus.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     line/.style={thin},
 6 |     dashed line/.style={dashed, thin},
 7 |     every node/.style={color=black},
 8 |     ]
 9 | 
10 |     % define x,y
11 |     \coordinate(O) at (0,0);
12 |     \coordinate (X) at (0.6,0.1);
13 |     \coordinate (Y) at (0.4,0.55);
14 |     \coordinate (X-Y) at (0.2,-0.45);
15 |     % axis
16 |     \draw[axis] (-0.3,0)  -- (0.9,0) node(xline)[right] {};
17 |     \draw[axis] (0,-0.5) -- (0,0.7) node(yline)[above] {};
18 |     % x, y, x+y
19 |     \draw[important line, ->]  (O) -- (X) node[right, text width=5em] {$\boldx$};
20 |     \draw[important line, ->]  (O) -- (Y) node[right, text width=5em] {$\boldy$};
21 |     \draw[important line, ->,red]  (O) -- (X-Y) node[right, text width=5em] {$\boldx - \boldy$};
22 |     % dashed line
23 |     \draw[dashed line, ->,red] (Y) -- (X);
24 |     %path
25 |      \draw[-latex, very thin] (0.5,0.25) to [out=180,in=0] (0.1,-0.2);
26 | \end{tikzpicture}
27 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/vec_noncanon.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick,color=blue},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define x,y
10 |     \coordinate(O) at (0,0);
11 |     \coordinate (e1) at (0.2,0);
12 |     \coordinate (x) at (-0.4,0);
13 |     % axis
14 |     \draw[axis] (-1,0)  -- (1,0) node(xline)[right] {};
15 |     \draw[axis] (0,-0.2) -- (0,0.2) node(yline)[above] {};
16 |     % x, y, x+y
17 |     \draw[important line, ->]  (O) -- (e1) node[anchor = south west, text width=5em] {$\boldx_1 = (1,0)$};
18 |     \draw[important line, ->]  (O) -- (x) node[below, text width=6em] {$\boldx_2 = (-2,0)$};
19 | \end{tikzpicture}
20 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/vec_scalar.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dashed line/.style={dashed, thin},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define x,-2x
10 |     \coordinate(O) at (0,0);
11 |     \coordinate (X) at (0.2,0.2);
12 |     \coordinate (-2X) at (-0.4,-0.4);
13 |     % axis
14 |     \draw[axis] (-0.6,0)  -- (0.6,0) node(xline)[right] {};
15 |     \draw[axis] (0,-0.6) -- (0,0.6) node(yline)[above] {};
16 |     % x, y, x+y
17 |     \draw[important line, ->]  (O) -- (X) node[right, text width=5em] {$\boldx$};
18 |     \draw[important line, ->, red]  (O) -- (-2X) node[right, text width=5em] {$-2\boldx$};
19 | \end{tikzpicture}
20 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/xpS.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dotted line/.style={dotted, thick,red},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define x,z
10 |     \coordinate(O) at (0,0);
11 |     \coordinate (X) at (-0.2,0.3);
12 |     \coordinate (Z1) at (-0.3,-0.15);
13 |     \coordinate (Z2) at (0.8,0.4);
14 |     % axis
15 |     \draw[axis] (-0.4,0)  -- (0.9,0) node(xline)[right] {};
16 |     \draw[axis] (0,-0.3) -- (0,0.7) node(yline)[above] {};
17 |     % x, z
18 |     \draw[important line,blue, ->]  (O) -- (X) node[left] {$\boldx$};
19 |     \draw[important line]  (Z1) -- (Z2) node[right] {$S$};
20 |     % label angle
21 |     \draw[dotted line] (-0.03,0.045) -- (0.03,0.075);
22 |     \draw[dotted line] (0.06,0.03) -- (0.03,0.075);
23 | \end{tikzpicture}
24 | 


--------------------------------------------------------------------------------
/source_code/figs_code/tikzfigs/xpz.tex:
--------------------------------------------------------------------------------
 1 | \begin{tikzpicture}[
 2 |     scale=5,
 3 |     axis/.style={<->, >=stealth'},
 4 |     important line/.style={thick},
 5 |     dotted line/.style={dotted, thick,red},
 6 |     every node/.style={color=black}
 7 |     ]
 8 | 
 9 |     % define x,z
10 |     \coordinate(O) at (0,0);
11 |     \coordinate (X) at (-0.2,0.3);
12 |     \coordinate (Z) at (0.6,0.3);
13 |     % axis
14 |     \draw[axis] (-0.4,0)  -- (0.9,0) node(xline)[right] {};
15 |     \draw[axis] (0,-0.3) -- (0,0.7) node(yline)[above] {};
16 |     % x, z
17 |     \draw[important line,blue, ->]  (O) -- (X) node[left] {$\boldx$};
18 |     \draw[important line,blue, ->]  (O) -- (Z) node[right] {$\boldz$};
19 |     % label angle
20 |     \draw[dotted line] (-0.03,0.045) -- (0.03,0.075);
21 |     \draw[dotted line] (0.06,0.03) -- (0.03,0.075);
22 | \end{tikzpicture}
23 | 


--------------------------------------------------------------------------------
/source_code/figs_code/unempl_sim.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/unempl_sim.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vec.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vec.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vec_add.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vec_add.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vec_canon.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vec_canon.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vec_minus.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vec_minus.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vec_noncanon.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vec_noncanon.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vec_scalar.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vec_scalar.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vec_scalar.pdf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vec_scalar.pdf.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vecs.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vecs.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/vector_mean.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/vector_mean.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/xpS.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/xpS.pdf


--------------------------------------------------------------------------------
/source_code/figs_code/xpz.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jstac/econometric_theory_slides/f9c8e10ab17cce078e887164bfbd500562523350/source_code/figs_code/xpz.pdf


--------------------------------------------------------------------------------
/source_code/preamb.tex:
--------------------------------------------------------------------------------
  1 | \documentclass[xcolor=dvipsnames]{beamer}  % for hardcopy add 'trans'
  2 | 
  3 | \mode<presentation>
  4 | {
  5 |   \usetheme{Singapore}
  6 |   % or ...
  7 |   \setbeamercovered{transparent}
  8 |   % or whatever (possibly just delete it)
  9 | }
 10 | 
 11 | \usefonttheme{professionalfonts}
 12 | %\usepackage[english]{babel}
 13 | % or whatever
 14 | %\usepackage[latin1]{inputenc}
 15 | % or whatever
 16 | %\usepackage{times}
 17 | %\usepackage[T1]{fontenc}
 18 | % Or whatever. Note that the encoding and the font should match. If T1
 19 | % does not look nice, try deleting the line with the fontenc.
 20 | 
 21 | %%%%%%%%%%%%%%%%%%%%%% start my preamble %%%%%%%%%%%%%%%%%%%%%%
 22 | 
 23 | 
 24 | \addtobeamertemplate{navigation symbols}{}{%
 25 |     \usebeamerfont{footline}%
 26 |     \usebeamercolor[fg]{footline}%
 27 |     \hspace{1em}%
 28 |     \insertframenumber/\inserttotalframenumber
 29 | } 
 30 | 
 31 | \setbeamercolor{footline}{fg=blue}
 32 | \setbeamerfont{footline}{series=\bfseries}
 33 | 
 34 | 
 35 | %\usepackage{epsfig}
 36 | \usepackage{graphicx}
 37 | \graphicspath{{./figs_code/}}
 38 | 
 39 | \usepackage{amsmath, amssymb, amsthm}
 40 | 
 41 | \usepackage{fancyvrb}
 42 | 
 43 | \usepackage{tikz}
 44 | \usetikzlibrary{arrows}
 45 | \usetikzlibrary{calc}
 46 | \usetikzlibrary{intersections}
 47 | \usetikzlibrary{decorations}
 48 | \usepackage{pgf}
 49 | \usepackage{pgfplots}
 50 | \pgfplotsset{compat=1.13}
 51 | 
 52 | \usepackage{graphviz}
 53 |  
 54 | \usepackage{verbatim}
 55 | 
 56 | 
 57 | \usepackage{algorithmicx,algpseudocode}
 58 | 
 59 | 
 60 | %font
 61 | \usepackage{mathpazo}
 62 | %\usepackage[usenames, dvipsnames]{color}
 63 | 
 64 | %\usepackage[linesnumbered, ruled, lined]{algorithm2e}
 65 | 
 66 | \usepackage{xr}
 67 | \externaldocument[ET-]{et}
 68 | 
 69 | 
 70 | \newcommand*{\theorembreak}{\usebeamertemplate{theorem end}\framebreak\usebeamertemplate{theorem begin}}
 71 | 
 72 | \newcommand{\newtopic}[1]{\textcolor{Green}{\Large \bf #1}}
 73 | \newcommand{\navy}[1]{\textcolor{Blue}{\bf #1}}
 74 | \newcommand{\navymth}[1]{\textcolor{Blue}{#1}}
 75 | \newcommand{\red}[1]{\textcolor{red}{#1}}
 76 | 
 77 | 
 78 | \definecolor{pale}{RGB}{235, 235, 235}
 79 | \definecolor{pale2}{RGB}{175,238,238}
 80 | \definecolor{turquois4}{RGB}{0,134,139}
 81 | 
 82 | % Typesetting code
 83 | \definecolor{bg}{rgb}{0.95,0.95,0.95}
 84 | \usepackage{minted}
 85 | \usemintedstyle{friendly}
 86 | \newminted{python}{mathescape,frame=lines,framesep=4mm,bgcolor=bg}
 87 | \newminted{ipython}{mathescape,frame=lines,framesep=4mm,bgcolor=bg}
 88 | \newminted{julia}{mathescape,frame=lines,framesep=4mm,bgcolor=bg}
 89 | \newminted{c}{mathescape,linenos=true}
 90 | \newminted{r}{mathescape,  frame=none, baselinestretch=1, framesep=2mm}
 91 | \renewcommand{\theFancyVerbLine}{\sffamily
 92 |     \textcolor[rgb]{0.5,0.5,1.0}{\scriptsize {\arabic{FancyVerbLine}}}}
 93 | 
 94 | 
 95 | \usepackage{stmaryrd}
 96 | 
 97 | \newcommand{\Fact}{\textcolor{Brown}{\bf Fact. }}
 98 | \newcommand{\Facts}{\textcolor{Brown}{\bf Facts }}
 99 | \newcommand{\keya}{\textcolor{turquois4}{\bf Key Idea. }}
100 | \newcommand{\Factnodot}{\textcolor{Brown}{\bf Fact }}
101 | \newcommand{\Eg}{\textcolor{ForestGreen}{Example. }}
102 | \newcommand{\Egs}{\textcolor{ForestGreen}{Examples. }}
103 | \newcommand{\Ex}{{\bf Ex. }}
104 | \newcommand{\Thm}{\textcolor{Brown}{\bf Theorem. }}
105 | \newcommand{\Prf}{\textcolor{turquois4}{\bf Proof.}}
106 | \newcommand{\Ass}{\textcolor{turquois4}{\bf Assumption.}} 
107 | \newcommand{\Lem}{\textcolor{Brown}{\bf Lemma. }}
108 | 
109 | %source code 
110 | 
111 | 
112 | 
113 | % cali
114 | \usepackage{mathrsfs}
115 | \usepackage{bbm}
116 | \usepackage{subfigure}
117 | 
118 | \newcommand{\argmax}{\operatornamewithlimits{argmax}}
119 | \newcommand{\argmin}{\operatornamewithlimits{argmin}}
120 | 
121 | \newcommand\T{{\mathpalette\raiseT\intercal}}
122 | \newcommand\raiseT[2]{\raisebox{0.25ex}{$#1#2$}}
123 | 
124 | \DeclareMathOperator{\cl}{cl}
125 | %\DeclareMathOperator{\argmax}{argmax}
126 | \DeclareMathOperator{\interior}{int}
127 | \DeclareMathOperator{\Prob}{Prob}
128 | \DeclareMathOperator{\kernel}{ker}
129 | \DeclareMathOperator{\diag}{diag}
130 | \DeclareMathOperator{\sgn}{sgn}
131 | \DeclareMathOperator{\determinant}{det}
132 | \DeclareMathOperator{\trace}{trace}
133 | \DeclareMathOperator{\Span}{span}
134 | \DeclareMathOperator{\rank}{rank}
135 | \DeclareMathOperator{\cov}{cov}
136 | \DeclareMathOperator{\corr}{corr}
137 | \DeclareMathOperator{\range}{rng}
138 | \DeclareMathOperator{\var}{var}
139 | \DeclareMathOperator{\mse}{mse}
140 | \DeclareMathOperator{\se}{se}
141 | \DeclareMathOperator{\row}{row}
142 | \DeclareMathOperator{\col}{col}
143 | \DeclareMathOperator{\dimension}{dim}
144 | \DeclareMathOperator{\fracpart}{frac}
145 | \DeclareMathOperator{\proj}{proj}
146 | \DeclareMathOperator{\colspace}{colspace}
147 | 
148 | \providecommand{\inner}[1]{\left\langle{#1}\right\rangle}
149 | 
150 | % mics short cuts and symbols
151 | % mics short cuts and symbols
152 | \newcommand{\st}{\ensuremath{\ \mathrm{s.t.}\ }}
153 | \newcommand{\setntn}[2]{ \{ #1 : #2 \} }
154 | \newcommand{\cf}[1]{ \lstinline|#1| }
155 | \newcommand{\otms}[1]{ \leftidx{^\circ}{#1}}
156 | 
157 | \newcommand{\fore}{\therefore \quad}
158 | \newcommand{\tod}{\stackrel { d } {\to} }
159 | \newcommand{\tow}{\stackrel { w } {\to} }
160 | \newcommand{\toprob}{\stackrel { p } {\to} }
161 | \newcommand{\toms}{\stackrel { ms } {\to} }
162 | \newcommand{\eqdist}{\stackrel {\textrm{ \scriptsize{d} }} {=} }
163 | \newcommand{\iidsim}{\stackrel {\textrm{ {\sc iid }}} {\sim} }
164 | \newcommand{\1}{\mathbbm 1}
165 | \newcommand{\dee}{\,{\rm d}}
166 | \newcommand{\given}{\, | \,}
167 | \newcommand{\la}{\langle}
168 | \newcommand{\ra}{\rangle}
169 | 
170 | \renewcommand{\rho}{\varrho}
171 | 
172 | \newcommand{\htau}{ \hat \tau }
173 | \newcommand{\hgamma}{ \hat \gamma }
174 | 
175 | \newcommand{\boldx}{ {\mathbf x} }
176 | \newcommand{\boldu}{ {\mathbf u} }
177 | \newcommand{\boldv}{ {\mathbf v} }
178 | \newcommand{\boldw}{ {\mathbf w} }
179 | \newcommand{\boldy}{ {\mathbf y} }
180 | \newcommand{\boldb}{ {\mathbf b} }
181 | \newcommand{\bolda}{ {\mathbf a} }
182 | \newcommand{\boldc}{ {\mathbf c} }
183 | \newcommand{\boldi}{ {\mathbf i} }
184 | \newcommand{\bolde}{ {\mathbf e} }
185 | \newcommand{\boldp}{ {\mathbf p} }
186 | \newcommand{\boldq}{ {\mathbf q} }
187 | \newcommand{\bolds}{ {\mathbf s} }
188 | \newcommand{\boldt}{ {\mathbf t} }
189 | \newcommand{\boldz}{ {\mathbf z} }
190 | 
191 | \newcommand{\boldzero}{ {\mathbf 0} }
192 | \newcommand{\boldone}{ {\mathbf 1} }
193 | 
194 | \newcommand{\boldalpha}{ {\boldsymbol \alpha} }
195 | \newcommand{\boldbeta}{ {\boldsymbol \beta} }
196 | \newcommand{\boldgamma}{ {\boldsymbol \gamma} }
197 | \newcommand{\boldtheta}{ {\boldsymbol \theta} }
198 | \newcommand{\boldxi}{ {\boldsymbol \xi} }
199 | \newcommand{\boldtau}{ {\boldsymbol \tau} }
200 | \newcommand{\boldepsilon}{ {\boldsymbol \epsilon} }
201 | \newcommand{\boldmu}{ {\boldsymbol \mu} }
202 | \newcommand{\boldSigma}{ {\boldsymbol \Sigma} }
203 | \newcommand{\boldOmega}{ {\boldsymbol \Omega} }
204 | \newcommand{\boldPhi}{ {\boldsymbol \Phi} }
205 | \newcommand{\boldLambda}{ {\boldsymbol \Lambda} }
206 | \newcommand{\boldphi}{ {\boldsymbol \phi} }
207 | 
208 | \newcommand{\Sigmax}{ {\boldsymbol \Sigma_{\boldx}}}
209 | \newcommand{\Sigmau}{ {\boldsymbol \Sigma_{\boldu}}}
210 | \newcommand{\Sigmaxinv}{ {\boldsymbol \Sigma_{\boldx}^{-1}}}
211 | \newcommand{\Sigmav}{ {\boldsymbol \Sigma_{\boldv \boldv}}}
212 | 
213 | \newcommand{\hboldx}{ \hat {\mathbf x} }
214 | \newcommand{\hboldy}{ \hat {\mathbf y} }
215 | \newcommand{\hboldb}{ \hat {\mathbf b} }
216 | \newcommand{\hboldu}{ \hat {\mathbf u} }
217 | \newcommand{\hboldtheta}{ \hat {\boldsymbol \theta} }
218 | \newcommand{\hboldtau}{ \hat {\boldsymbol \tau} }
219 | \newcommand{\hboldmu}{ \hat {\boldsymbol \mu} }
220 | \newcommand{\hboldbeta}{ \hat {\boldsymbol \beta} }
221 | \newcommand{\hboldgamma}{ \hat {\boldsymbol \gamma} }
222 | \newcommand{\hboldSigma}{ \hat {\boldsymbol \Sigma} }
223 | 
224 | \newcommand{\boldA}{\mathbf A}
225 | \newcommand{\boldB}{\mathbf B}
226 | \newcommand{\boldC}{\mathbf C}
227 | \newcommand{\boldD}{\mathbf D}
228 | \newcommand{\boldI}{\mathbf I}
229 | \newcommand{\boldL}{\mathbf L}
230 | \newcommand{\boldM}{\mathbf M}
231 | \newcommand{\boldP}{\mathbf P}
232 | \newcommand{\boldQ}{\mathbf Q}
233 | \newcommand{\boldR}{\mathbf R}
234 | \newcommand{\boldX}{\mathbf X}
235 | \newcommand{\boldU}{\mathbf U}
236 | \newcommand{\boldV}{\mathbf V}
237 | \newcommand{\boldW}{\mathbf W}
238 | \newcommand{\boldY}{\mathbf Y}
239 | \newcommand{\boldZ}{\mathbf Z}
240 | 
241 | \newcommand{\bSigmaX}{ {\boldsymbol \Sigma_{\hboldbeta}} }
242 | \newcommand{\hbSigmaX}{ \mathbf{\hat \Sigma_{\hboldbeta}} }
243 | 
244 | \newcommand{\RR}{\mathbbm R}
245 | \newcommand{\CC}{\mathbbm C}
246 | \newcommand{\NN}{\mathbbm N}
247 | \newcommand{\PP}{\mathbbm P}
248 | \newcommand{\EE}{\mathbbm E \nobreak\hspace{.1em}}
249 | \newcommand{\EEP}{\mathbbm E_P \nobreak\hspace{.1em}}
250 | \newcommand{\ZZ}{\mathbbm Z}
251 | \newcommand{\QQ}{\mathbbm Q}
252 | 
253 | 
254 | \newcommand{\XX}{\mathcal X}
255 | 
256 | \newcommand{\aA}{\mathcal A}
257 | \newcommand{\fF}{\mathscr F}
258 | \newcommand{\bB}{\mathscr B}
259 | \newcommand{\iI}{\mathscr I}
260 | \newcommand{\rR}{\mathscr R}
261 | \newcommand{\dD}{\mathcal D}
262 | \newcommand{\lL}{\mathcal L}
263 | \newcommand{\llL}{\mathcal{H}_{\ell}}
264 | \newcommand{\gG}{\mathcal G}
265 | \newcommand{\hH}{\mathcal H}
266 | \newcommand{\nN}{\textrm{\sc n}}
267 | \newcommand{\lN}{\textrm{\sc ln}}
268 | \newcommand{\pP}{\mathscr P}
269 | \newcommand{\qQ}{\mathscr Q}
270 | \newcommand{\xX}{\mathcal X}
271 | 
272 | \newcommand{\ddD}{\mathscr D}
273 | 
274 | 
275 | \newcommand{\R}{{\texttt R}}
276 | \newcommand{\risk}{\mathcal R}
277 | \newcommand{\Remp}{R_{{\rm emp}}}
278 | 
279 | \newcommand*\diff{\mathop{}\!\mathrm{d}}
280 | \newcommand{\ess}{ \textrm{{\sc ess}} }
281 | \newcommand{\tss}{ \textrm{{\sc tss}} }
282 | \newcommand{\rss}{ \textrm{{\sc rss}} }
283 | \newcommand{\rssr}{ \textrm{{\sc rssr}} }
284 | \newcommand{\ussr}{ \textrm{{\sc ussr}} }
285 | \newcommand{\zdata}{\mathbf{z}_{\mathcal D}}
286 | \newcommand{\Pdata}{P_{\mathcal D}}
287 | \newcommand{\Pdatatheta}{P^{\mathcal D}_{\theta}}
288 | \newcommand{\Zdata}{Z_{\mathcal D}}
289 | 
290 | 
291 | \newcommand{\e}[1]{\mathbbm{E}[{#1}]}
292 | \newcommand{\p}[1]{\mathbbm{P}({#1})}
293 | 
294 | %\theoremstyle{plain}
295 | %\newtheorem{axiom}{Axiom}[section]
296 | %\newtheorem{theorem}{Theorem}[section]
297 | %\newtheorem{corollary}{Corollary}[section]
298 | %\newtheorem{lemma}{Lemma}[section]
299 | %\newtheorem{proposition}{Proposition}[section]
300 | %
301 | %\theoremstyle{definition}
302 | %\newtheorem{definition}{Definition}[section]
303 | %\newtheorem{example}{Example}[section]
304 | %\newtheorem{remark}{Remark}[section]
305 | %\newtheorem{notation}{Notation}[section]
306 | %\newtheorem{assumption}{Assumption}[section]
307 | %\newtheorem{condition}{Condition}[section]
308 | %\newtheorem{exercise}{Ex.}[section]
309 | %\newtheorem{fact}{Fact}[section]
310 | 
311 | % Bibliography
312 | \usepackage[authordate,uniquename=false,firstinits,backend=biber,maxcitenames=2]{biblatex-chicago}
313 | \DeclareFieldFormat[article]{title}{#1}
314 | \DeclareFieldFormat[inproceedings]{title}{#1}
315 | \addbibresource{et_newbib.bib}
316 | \renewcommand{\cite}{\textcite}
317 | 
318 | 
319 | 
320 | \setlength{\parskip}{1.5ex plus0.5ex minus0.5ex}
321 | 
322 | 
323 | \setlength{\jot}{12pt} 
324 | 
325 | 
326 | 
327 | 
328 | 


--------------------------------------------------------------------------------