├── .gitattributes
├── .gitignore
├── LICENSE
├── README.md
├── additional-cheatsheet
    ├── additional-cheatsheet-en.pdf
    ├── additional-cheatsheet-en.tex
    ├── additional-cheatsheet-es.pdf
    └── additional-cheatsheet-es.tex
├── econometrics-cheatsheet
    ├── econometrics-cheatsheet-en.pdf
    ├── econometrics-cheatsheet-en.tex
    ├── econometrics-cheatsheet-es.pdf
    └── econometrics-cheatsheet-es.tex
└── time-series-cheatsheet
    ├── time-series-cheatsheet-en.pdf
    ├── time-series-cheatsheet-en.tex
    ├── time-series-cheatsheet-es.pdf
    └── time-series-cheatsheet-es.tex


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Core latex/pdflatex auxiliary files:
  2 | *.aux
  3 | *.lof
  4 | *.log
  5 | *.lot
  6 | *.fls
  7 | *.out
  8 | *.toc
  9 | *.fmt
 10 | *.fot
 11 | *.cb
 12 | *.cb2
 13 | .*.lb
 14 | 
 15 | ## Intermediate documents:
 16 | *.dvi
 17 | *.xdv
 18 | *-converted-to.*
 19 | # these rules might exclude image files for figures etc.
 20 | # *.ps
 21 | # *.eps
 22 | # *.pdf
 23 | 
 24 | ## Generated if empty string is given at "Please type another file name for output:"
 25 | .pdf
 26 | 
 27 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
 28 | *.bbl
 29 | *.bcf
 30 | *.blg
 31 | *-blx.aux
 32 | *-blx.bib
 33 | *.run.xml
 34 | 
 35 | ## Build tool auxiliary files:
 36 | *.fdb_latexmk
 37 | *.synctex
 38 | *.synctex(busy)
 39 | *.synctex.gz
 40 | *.synctex.gz(busy)
 41 | *.pdfsync
 42 | 
 43 | ## Build tool directories for auxiliary files
 44 | # latexrun
 45 | latex.out/
 46 | 
 47 | ## Auxiliary and intermediate files from other packages:
 48 | # algorithms
 49 | *.alg
 50 | *.loa
 51 | 
 52 | # achemso
 53 | acs-*.bib
 54 | 
 55 | # amsthm
 56 | *.thm
 57 | 
 58 | # beamer
 59 | *.nav
 60 | *.pre
 61 | *.snm
 62 | *.vrb
 63 | 
 64 | # changes
 65 | *.soc
 66 | 
 67 | # comment
 68 | *.cut
 69 | 
 70 | # cprotect
 71 | *.cpt
 72 | 
 73 | # elsarticle (documentclass of Elsevier journals)
 74 | *.spl
 75 | 
 76 | # endnotes
 77 | *.ent
 78 | 
 79 | # fixme
 80 | *.lox
 81 | 
 82 | # feynmf/feynmp
 83 | *.mf
 84 | *.mp
 85 | *.t[1-9]
 86 | *.t[1-9][0-9]
 87 | *.tfm
 88 | 
 89 | #(r)(e)ledmac/(r)(e)ledpar
 90 | *.end
 91 | *.?end
 92 | *.[1-9]
 93 | *.[1-9][0-9]
 94 | *.[1-9][0-9][0-9]
 95 | *.[1-9]R
 96 | *.[1-9][0-9]R
 97 | *.[1-9][0-9][0-9]R
 98 | *.eledsec[1-9]
 99 | *.eledsec[1-9]R
100 | *.eledsec[1-9][0-9]
101 | *.eledsec[1-9][0-9]R
102 | *.eledsec[1-9][0-9][0-9]
103 | *.eledsec[1-9][0-9][0-9]R
104 | 
105 | # glossaries
106 | *.acn
107 | *.acr
108 | *.glg
109 | *.glo
110 | *.gls
111 | *.glsdefs
112 | *.lzo
113 | *.lzs
114 | *.slg
115 | *.slo
116 | *.sls
117 | 
118 | # uncomment this for glossaries-extra (will ignore makeindex's style files!)
119 | # *.ist
120 | 
121 | # gnuplot
122 | *.gnuplot
123 | *.table
124 | 
125 | # gnuplottex
126 | *-gnuplottex-*
127 | 
128 | # gregoriotex
129 | *.gaux
130 | *.glog
131 | *.gtex
132 | 
133 | # htlatex
134 | *.4ct
135 | *.4tc
136 | *.idv
137 | *.lg
138 | *.trc
139 | *.xref
140 | 
141 | # hyperref
142 | *.brf
143 | 
144 | # knitr
145 | *-concordance.tex
146 | # TODO Uncomment the next line if you use knitr and want to ignore its generated tikz files
147 | # *.tikz
148 | *-tikzDictionary
149 | 
150 | # listings
151 | *.lol
152 | 
153 | # luatexja-ruby
154 | *.ltjruby
155 | 
156 | # makeidx
157 | *.idx
158 | *.ilg
159 | *.ind
160 | 
161 | # minitoc
162 | *.maf
163 | *.mlf
164 | *.mlt
165 | *.mtc[0-9]*
166 | *.slf[0-9]*
167 | *.slt[0-9]*
168 | *.stc[0-9]*
169 | 
170 | # minted
171 | _minted*
172 | *.pyg
173 | 
174 | # morewrites
175 | *.mw
176 | 
177 | # newpax
178 | *.newpax
179 | 
180 | # nomencl
181 | *.nlg
182 | *.nlo
183 | *.nls
184 | 
185 | # pax
186 | *.pax
187 | 
188 | # pdfpcnotes
189 | *.pdfpc
190 | 
191 | # sagetex
192 | *.sagetex.sage
193 | *.sagetex.py
194 | *.sagetex.scmd
195 | 
196 | # scrwfile
197 | *.wrt
198 | 
199 | # svg
200 | svg-inkscape/
201 | 
202 | # sympy
203 | *.sout
204 | *.sympy
205 | sympy-plots-for-*.tex/
206 | 
207 | # pdfcomment
208 | *.upa
209 | *.upb
210 | 
211 | # pythontex
212 | *.pytxcode
213 | pythontex-files-*/
214 | 
215 | # tcolorbox
216 | *.listing
217 | 
218 | # thmtools
219 | *.loe
220 | 
221 | # TikZ & PGF
222 | *.dpth
223 | *.md5
224 | *.auxlock
225 | 
226 | # titletoc
227 | *.ptc
228 | 
229 | # todonotes
230 | *.tdo
231 | 
232 | # vhistory
233 | *.hst
234 | *.ver
235 | 
236 | # easy-todo
237 | *.lod
238 | 
239 | # xcolor
240 | *.xcp
241 | 
242 | # xmpincl
243 | *.xmpi
244 | 
245 | # xindy
246 | *.xdy
247 | 
248 | # xypic precompiled matrices and outlines
249 | *.xyc
250 | *.xyd
251 | 
252 | # endfloat
253 | *.ttt
254 | *.fff
255 | 
256 | # Latexian
257 | TSWLatexianTemp*
258 | 
259 | ## Editors:
260 | # WinEdt
261 | *.bak
262 | *.sav
263 | 
264 | # Texpad
265 | .texpadtmp
266 | 
267 | # LyX
268 | *.lyx~
269 | 
270 | # Kile
271 | *.backup
272 | 
273 | # gummi
274 | .*.swp
275 | 
276 | # KBibTeX
277 | *~[0-9]*
278 | 
279 | # TeXnicCenter
280 | *.tps
281 | 
282 | # auto folder when using emacs and auctex
283 | ./auto/*
284 | *.el
285 | 
286 | # expex forward references with \gathertags
287 | *-tags.tex
288 | 
289 | # standalone packages
290 | *.sta
291 | 
292 | # Makeindex log files
293 | *.lpz
294 | 
295 | # xwatermark package
296 | *.xwm
297 | 
298 | # REVTeX puts footnotes in the bibliography by default, unless the nofootinbib
299 | # option is specified. Footnotes are the stored in a file with suffix Notes.bib.
300 | # Uncomment the next line to have this generated file ignored.
301 | #*Notes.bib
302 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | Attribution 4.0 International
  2 | 
  3 | =======================================================================
  4 | 
  5 | Creative Commons Corporation ("Creative Commons") is not a law firm and
  6 | does not provide legal services or legal advice. Distribution of
  7 | Creative Commons public licenses does not create a lawyer-client or
  8 | other relationship. Creative Commons makes its licenses and related
  9 | information available on an "as-is" basis. Creative Commons gives no
 10 | warranties regarding its licenses, any material licensed under their
 11 | terms and conditions, or any related information. Creative Commons
 12 | disclaims all liability for damages resulting from their use to the
 13 | fullest extent possible.
 14 | 
 15 | Using Creative Commons Public Licenses
 16 | 
 17 | Creative Commons public licenses provide a standard set of terms and
 18 | conditions that creators and other rights holders may use to share
 19 | original works of authorship and other material subject to copyright
 20 | and certain other rights specified in the public license below. The
 21 | following considerations are for informational purposes only, are not
 22 | exhaustive, and do not form part of our licenses.
 23 | 
 24 |      Considerations for licensors: Our public licenses are
 25 |      intended for use by those authorized to give the public
 26 |      permission to use material in ways otherwise restricted by
 27 |      copyright and certain other rights. Our licenses are
 28 |      irrevocable. Licensors should read and understand the terms
 29 |      and conditions of the license they choose before applying it.
 30 |      Licensors should also secure all rights necessary before
 31 |      applying our licenses so that the public can reuse the
 32 |      material as expected. Licensors should clearly mark any
 33 |      material not subject to the license. This includes other CC-
 34 |      licensed material, or material used under an exception or
 35 |      limitation to copyright. More considerations for licensors:
 36 |      wiki.creativecommons.org/Considerations_for_licensors
 37 | 
 38 |      Considerations for the public: By using one of our public
 39 |      licenses, a licensor grants the public permission to use the
 40 |      licensed material under specified terms and conditions. If
 41 |      the licensor's permission is not necessary for any reason--for
 42 |      example, because of any applicable exception or limitation to
 43 |      copyright--then that use is not regulated by the license. Our
 44 |      licenses grant only permissions under copyright and certain
 45 |      other rights that a licensor has authority to grant. Use of
 46 |      the licensed material may still be restricted for other
 47 |      reasons, including because others have copyright or other
 48 |      rights in the material. A licensor may make special requests,
 49 |      such as asking that all changes be marked or described.
 50 |      Although not required by our licenses, you are encouraged to
 51 |      respect those requests where reasonable. More considerations
 52 |      for the public:
 53 |      wiki.creativecommons.org/Considerations_for_licensees
 54 | 
 55 | =======================================================================
 56 | 
 57 | Creative Commons Attribution 4.0 International Public License
 58 | 
 59 | By exercising the Licensed Rights (defined below), You accept and agree
 60 | to be bound by the terms and conditions of this Creative Commons
 61 | Attribution 4.0 International Public License ("Public License"). To the
 62 | extent this Public License may be interpreted as a contract, You are
 63 | granted the Licensed Rights in consideration of Your acceptance of
 64 | these terms and conditions, and the Licensor grants You such rights in
 65 | consideration of benefits the Licensor receives from making the
 66 | Licensed Material available under these terms and conditions.
 67 | 
 68 | 
 69 | Section 1 -- Definitions.
 70 | 
 71 |   a. Adapted Material means material subject to Copyright and Similar
 72 |      Rights that is derived from or based upon the Licensed Material
 73 |      and in which the Licensed Material is translated, altered,
 74 |      arranged, transformed, or otherwise modified in a manner requiring
 75 |      permission under the Copyright and Similar Rights held by the
 76 |      Licensor. For purposes of this Public License, where the Licensed
 77 |      Material is a musical work, performance, or sound recording,
 78 |      Adapted Material is always produced where the Licensed Material is
 79 |      synched in timed relation with a moving image.
 80 | 
 81 |   b. Adapter's License means the license You apply to Your Copyright
 82 |      and Similar Rights in Your contributions to Adapted Material in
 83 |      accordance with the terms and conditions of this Public License.
 84 | 
 85 |   c. Copyright and Similar Rights means copyright and/or similar rights
 86 |      closely related to copyright including, without limitation,
 87 |      performance, broadcast, sound recording, and Sui Generis Database
 88 |      Rights, without regard to how the rights are labeled or
 89 |      categorized. For purposes of this Public License, the rights
 90 |      specified in Section 2(b)(1)-(2) are not Copyright and Similar
 91 |      Rights.
 92 | 
 93 |   d. Effective Technological Measures means those measures that, in the
 94 |      absence of proper authority, may not be circumvented under laws
 95 |      fulfilling obligations under Article 11 of the WIPO Copyright
 96 |      Treaty adopted on December 20, 1996, and/or similar international
 97 |      agreements.
 98 | 
 99 |   e. Exceptions and Limitations means fair use, fair dealing, and/or
100 |      any other exception or limitation to Copyright and Similar Rights
101 |      that applies to Your use of the Licensed Material.
102 | 
103 |   f. Licensed Material means the artistic or literary work, database,
104 |      or other material to which the Licensor applied this Public
105 |      License.
106 | 
107 |   g. Licensed Rights means the rights granted to You subject to the
108 |      terms and conditions of this Public License, which are limited to
109 |      all Copyright and Similar Rights that apply to Your use of the
110 |      Licensed Material and that the Licensor has authority to license.
111 | 
112 |   h. Licensor means the individual(s) or entity(ies) granting rights
113 |      under this Public License.
114 | 
115 |   i. Share means to provide material to the public by any means or
116 |      process that requires permission under the Licensed Rights, such
117 |      as reproduction, public display, public performance, distribution,
118 |      dissemination, communication, or importation, and to make material
119 |      available to the public including in ways that members of the
120 |      public may access the material from a place and at a time
121 |      individually chosen by them.
122 | 
123 |   j. Sui Generis Database Rights means rights other than copyright
124 |      resulting from Directive 96/9/EC of the European Parliament and of
125 |      the Council of 11 March 1996 on the legal protection of databases,
126 |      as amended and/or succeeded, as well as other essentially
127 |      equivalent rights anywhere in the world.
128 | 
129 |   k. You means the individual or entity exercising the Licensed Rights
130 |      under this Public License. Your has a corresponding meaning.
131 | 
132 | 
133 | Section 2 -- Scope.
134 | 
135 |   a. License grant.
136 | 
137 |        1. Subject to the terms and conditions of this Public License,
138 |           the Licensor hereby grants You a worldwide, royalty-free,
139 |           non-sublicensable, non-exclusive, irrevocable license to
140 |           exercise the Licensed Rights in the Licensed Material to:
141 | 
142 |             a. reproduce and Share the Licensed Material, in whole or
143 |                in part; and
144 | 
145 |             b. produce, reproduce, and Share Adapted Material.
146 | 
147 |        2. Exceptions and Limitations. For the avoidance of doubt, where
148 |           Exceptions and Limitations apply to Your use, this Public
149 |           License does not apply, and You do not need to comply with
150 |           its terms and conditions.
151 | 
152 |        3. Term. The term of this Public License is specified in Section
153 |           6(a).
154 | 
155 |        4. Media and formats; technical modifications allowed. The
156 |           Licensor authorizes You to exercise the Licensed Rights in
157 |           all media and formats whether now known or hereafter created,
158 |           and to make technical modifications necessary to do so. The
159 |           Licensor waives and/or agrees not to assert any right or
160 |           authority to forbid You from making technical modifications
161 |           necessary to exercise the Licensed Rights, including
162 |           technical modifications necessary to circumvent Effective
163 |           Technological Measures. For purposes of this Public License,
164 |           simply making modifications authorized by this Section 2(a)
165 |           (4) never produces Adapted Material.
166 | 
167 |        5. Downstream recipients.
168 | 
169 |             a. Offer from the Licensor -- Licensed Material. Every
170 |                recipient of the Licensed Material automatically
171 |                receives an offer from the Licensor to exercise the
172 |                Licensed Rights under the terms and conditions of this
173 |                Public License.
174 | 
175 |             b. No downstream restrictions. You may not offer or impose
176 |                any additional or different terms or conditions on, or
177 |                apply any Effective Technological Measures to, the
178 |                Licensed Material if doing so restricts exercise of the
179 |                Licensed Rights by any recipient of the Licensed
180 |                Material.
181 | 
182 |        6. No endorsement. Nothing in this Public License constitutes or
183 |           may be construed as permission to assert or imply that You
184 |           are, or that Your use of the Licensed Material is, connected
185 |           with, or sponsored, endorsed, or granted official status by,
186 |           the Licensor or others designated to receive attribution as
187 |           provided in Section 3(a)(1)(A)(i).
188 | 
189 |   b. Other rights.
190 | 
191 |        1. Moral rights, such as the right of integrity, are not
192 |           licensed under this Public License, nor are publicity,
193 |           privacy, and/or other similar personality rights; however, to
194 |           the extent possible, the Licensor waives and/or agrees not to
195 |           assert any such rights held by the Licensor to the limited
196 |           extent necessary to allow You to exercise the Licensed
197 |           Rights, but not otherwise.
198 | 
199 |        2. Patent and trademark rights are not licensed under this
200 |           Public License.
201 | 
202 |        3. To the extent possible, the Licensor waives any right to
203 |           collect royalties from You for the exercise of the Licensed
204 |           Rights, whether directly or through a collecting society
205 |           under any voluntary or waivable statutory or compulsory
206 |           licensing scheme. In all other cases the Licensor expressly
207 |           reserves any right to collect such royalties.
208 | 
209 | 
210 | Section 3 -- License Conditions.
211 | 
212 | Your exercise of the Licensed Rights is expressly made subject to the
213 | following conditions.
214 | 
215 |   a. Attribution.
216 | 
217 |        1. If You Share the Licensed Material (including in modified
218 |           form), You must:
219 | 
220 |             a. retain the following if it is supplied by the Licensor
221 |                with the Licensed Material:
222 | 
223 |                  i. identification of the creator(s) of the Licensed
224 |                     Material and any others designated to receive
225 |                     attribution, in any reasonable manner requested by
226 |                     the Licensor (including by pseudonym if
227 |                     designated);
228 | 
229 |                 ii. a copyright notice;
230 | 
231 |                iii. a notice that refers to this Public License;
232 | 
233 |                 iv. a notice that refers to the disclaimer of
234 |                     warranties;
235 | 
236 |                  v. a URI or hyperlink to the Licensed Material to the
237 |                     extent reasonably practicable;
238 | 
239 |             b. indicate if You modified the Licensed Material and
240 |                retain an indication of any previous modifications; and
241 | 
242 |             c. indicate the Licensed Material is licensed under this
243 |                Public License, and include the text of, or the URI or
244 |                hyperlink to, this Public License.
245 | 
246 |        2. You may satisfy the conditions in Section 3(a)(1) in any
247 |           reasonable manner based on the medium, means, and context in
248 |           which You Share the Licensed Material. For example, it may be
249 |           reasonable to satisfy the conditions by providing a URI or
250 |           hyperlink to a resource that includes the required
251 |           information.
252 | 
253 |        3. If requested by the Licensor, You must remove any of the
254 |           information required by Section 3(a)(1)(A) to the extent
255 |           reasonably practicable.
256 | 
257 |        4. If You Share Adapted Material You produce, the Adapter's
258 |           License You apply must not prevent recipients of the Adapted
259 |           Material from complying with this Public License.
260 | 
261 | 
262 | Section 4 -- Sui Generis Database Rights.
263 | 
264 | Where the Licensed Rights include Sui Generis Database Rights that
265 | apply to Your use of the Licensed Material:
266 | 
267 |   a. for the avoidance of doubt, Section 2(a)(1) grants You the right
268 |      to extract, reuse, reproduce, and Share all or a substantial
269 |      portion of the contents of the database;
270 | 
271 |   b. if You include all or a substantial portion of the database
272 |      contents in a database in which You have Sui Generis Database
273 |      Rights, then the database in which You have Sui Generis Database
274 |      Rights (but not its individual contents) is Adapted Material; and
275 | 
276 |   c. You must comply with the conditions in Section 3(a) if You Share
277 |      all or a substantial portion of the contents of the database.
278 | 
279 | For the avoidance of doubt, this Section 4 supplements and does not
280 | replace Your obligations under this Public License where the Licensed
281 | Rights include other Copyright and Similar Rights.
282 | 
283 | 
284 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
285 | 
286 |   a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
287 |      EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
288 |      AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
289 |      ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
290 |      IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
291 |      WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
292 |      PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
293 |      ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
294 |      KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
295 |      ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
296 | 
297 |   b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
298 |      TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
299 |      NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
300 |      INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
301 |      COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
302 |      USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
303 |      ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
304 |      DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
305 |      IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
306 | 
307 |   c. The disclaimer of warranties and limitation of liability provided
308 |      above shall be interpreted in a manner that, to the extent
309 |      possible, most closely approximates an absolute disclaimer and
310 |      waiver of all liability.
311 | 
312 | 
313 | Section 6 -- Term and Termination.
314 | 
315 |   a. This Public License applies for the term of the Copyright and
316 |      Similar Rights licensed here. However, if You fail to comply with
317 |      this Public License, then Your rights under this Public License
318 |      terminate automatically.
319 | 
320 |   b. Where Your right to use the Licensed Material has terminated under
321 |      Section 6(a), it reinstates:
322 | 
323 |        1. automatically as of the date the violation is cured, provided
324 |           it is cured within 30 days of Your discovery of the
325 |           violation; or
326 | 
327 |        2. upon express reinstatement by the Licensor.
328 | 
329 |      For the avoidance of doubt, this Section 6(b) does not affect any
330 |      right the Licensor may have to seek remedies for Your violations
331 |      of this Public License.
332 | 
333 |   c. For the avoidance of doubt, the Licensor may also offer the
334 |      Licensed Material under separate terms or conditions or stop
335 |      distributing the Licensed Material at any time; however, doing so
336 |      will not terminate this Public License.
337 | 
338 |   d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
339 |      License.
340 | 
341 | 
342 | Section 7 -- Other Terms and Conditions.
343 | 
344 |   a. The Licensor shall not be bound by any additional or different
345 |      terms or conditions communicated by You unless expressly agreed.
346 | 
347 |   b. Any arrangements, understandings, or agreements regarding the
348 |      Licensed Material not stated herein are separate from and
349 |      independent of the terms and conditions of this Public License.
350 | 
351 | 
352 | Section 8 -- Interpretation.
353 | 
354 |   a. For the avoidance of doubt, this Public License does not, and
355 |      shall not be interpreted to, reduce, limit, restrict, or impose
356 |      conditions on any use of the Licensed Material that could lawfully
357 |      be made without permission under this Public License.
358 | 
359 |   b. To the extent possible, if any provision of this Public License is
360 |      deemed unenforceable, it shall be automatically reformed to the
361 |      minimum extent necessary to make it enforceable. If the provision
362 |      cannot be reformed, it shall be severed from this Public License
363 |      without affecting the enforceability of the remaining terms and
364 |      conditions.
365 | 
366 |   c. No term or condition of this Public License will be waived and no
367 |      failure to comply consented to unless expressly agreed to by the
368 |      Licensor.
369 | 
370 |   d. Nothing in this Public License constitutes or may be interpreted
371 |      as a limitation upon, or waiver of, any privileges and immunities
372 |      that apply to the Licensor or You, including from the legal
373 |      processes of any jurisdiction or authority.
374 | 
375 | 
376 | =======================================================================
377 | 
378 | Creative Commons is not a party to its public licenses.
379 | Notwithstanding, Creative Commons may elect to apply one of its public
380 | licenses to material it publishes and in those instances will be
381 | considered the “Licensor.” The text of the Creative Commons public
382 | licenses is dedicated to the public domain under the CC0 Public Domain
383 | Dedication. Except for the limited purpose of indicating that material
384 | is shared under a Creative Commons public license or as otherwise
385 | permitted by the Creative Commons policies published at
386 | creativecommons.org/policies, Creative Commons does not authorize the
387 | use of the trademark "Creative Commons" or any other trademark or logo
388 | of Creative Commons without its prior written consent including,
389 | without limitation, in connection with any unauthorized modifications
390 | to any of its public licenses or any other arrangements,
391 | understandings, or agreements concerning use of licensed material. For
392 | the avoidance of doubt, this paragraph does not form part of the public
393 | licenses.
394 | 
395 | Creative Commons may be contacted at creativecommons.org.
396 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ﻿# The Econometrics Cheat Sheet Project
  2 | 
  3 | Econometrics cheat sheets created using $\LaTeX$:
  4 | 
  5 | * **Econometrics Cheat Sheet**: Basic concepts, OLS assumptions, properties, interpretation, error measurements, hypothesis testing, confidence intervals, dummy variables, structural change, popular problems, and more!
  6 | * **Time Series Cheat Sheet**: Series components, seasonality, autocorrelation, stationarity, cointegration and heterocedasticity in time series.
  7 | * **Additional Cheat Sheet**: OLS matrix notation, variable omission, proxy and instrumental variables, TSLS, information criteria, hypothesis testing (+), functional form, logistic regression, statistical definitions, VAR, SVAR, VECM.
  8 | 
  9 | :bulb: I am currently pursuing a PhD at Universidad Rey Juan Carlos (Madrid, Spain). Also, I am a [researcher and professor](https://gestion2.urjc.es/pdi/ver/marcelo.moreno) at the same institution. Collaboration proposals and academic stays offers in national/international universities are welcome! :rocket:
 10 | 
 11 | :triangular_flag_on_post: [LinkedIn](https://www.linkedin.com/in/marcelomorenop/). Please, send me a message when connecting, or I will ignore the request.
 12 | 
 13 | ## Download links
 14 | 
 15 | | Econometrics | PDF | TeX |
 16 | | :---: | :---: | :---: |
 17 | | English :uk: | [`CS-25.08.1`](https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/main/econometrics-cheatsheet/econometrics-cheatsheet-en.pdf) | [`CS-25.08.1`](econometrics-cheatsheet/econometrics-cheatsheet-en.tex)
 18 | | Spanish :es: | [`CS-25.08.1`](https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/main/econometrics-cheatsheet/econometrics-cheatsheet-es.pdf) | [`CS-25.08.1`](econometrics-cheatsheet/econometrics-cheatsheet-es.tex)
 19 | 
 20 | | Time Series | PDF | TeX |
 21 | | :---: | :---: | :---: |
 22 | | English :uk: | [`TS-25.10`](https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/main/time-series-cheatsheet/time-series-cheatsheet-en.pdf) | [`TS-25.10`](time-series-cheatsheet/time-series-cheatsheet-en.tex)
 23 | | Spanish :es: | [`TS-25.10`](https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/main/time-series-cheatsheet/time-series-cheatsheet-es.pdf) | [`TS-25.10`](time-series-cheatsheet/time-series-cheatsheet-es.tex)
 24 | 
 25 | | Additional | PDF | TeX |
 26 | | :---: | :---: | :---: |
 27 | | English :uk: | [`ADD-25.08.1`](https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/main/additional-cheatsheet/additional-cheatsheet-en.pdf) | [`ADD-25.08.1`](additional-cheatsheet/additional-cheatsheet-en.tex)
 28 | | Spanish :es: | [`ADD-25.08.1`](https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/main/additional-cheatsheet/additional-cheatsheet-es.pdf) | [`ADD-25.08.1`](additional-cheatsheet/additional-cheatsheet-es.tex)
 29 | 
 30 | Complete set (PDF and TeX, all languages): [`ZIP`](https://github.com/marcelomijas/econometrics-cheatsheet/archive/refs/heads/main.zip)
 31 | 
 32 | :printer: If you want to print them two-sided, select the *Flip on Short Edge* option on your printer.
 33 | 
 34 | :globe_with_meridians: Do you want to translate any of these cheat sheets to your language? Open an issue and I will provide instructions.
 35 | 
 36 | :computer: Do you use R? Check out [*Applied Econometrics with R*](https://www.zeileis.org/teaching/AER/) by Christian Kleiber and Achim Zeileis!
 37 | 
 38 | ## Frequently Asked Questions (FAQ)
 39 | 
 40 | ### What does $\text{resid}$ $x_{j}$ mean?
 41 | 
 42 | Those are the residuals from an OLS regression between $x_{j}$ and all the other $x$ 's.
 43 | 
 44 | ### Why is $\beta_{0}$ the constant term? My reference manual/professor's definition of the econometric model is different.
 45 | 
 46 | There is some debate about the correct way to name the coefficients, their sub-index and the sub-index of the variables of a model. The naming could have an impact on how some statistics, like the adjusted R-squared or some tests like the F test, are written.
 47 | 
 48 | For example, while some econometricians write the multiple regression model with a constant term like this:
 49 | 
 50 | $$y_{i} = \beta_{0} + \beta_{1} x_{1i} + ... + \beta_{k} x_{ki} + u_{i} \quad (1)$$
 51 | 
 52 | Others refer to that same econometric model as:
 53 | 
 54 | $$y_{i} = \beta_{1} + \beta_{2} x_{2i} + ... + \beta_{K} x_{Ki} + u_{i} \quad (2)$$
 55 | 
 56 | And others refer to as:
 57 | 
 58 | $$y_{i} = \alpha + \beta_1 x_{1i} + ... + \beta_{k} x_{ki} + u_{i} \quad (3)$$
 59 | 
 60 | All the above are equally valid representations of the multiple regression model. In the specification $(1)$, $\beta_{0}$ represents the constant term, while in specifications $(2)$ and $(3)$, it is represented by $\beta_{1}$ and $\alpha$, respectively.
 61 | 
 62 | In this project, the main specification used is the first $(1)$, so we can say that there are $k$ independent variables and $k + 1$ coefficients (including the constant term). The same could be said for the specification $(3)$, it is used punctually in the project. There are no differences in the statistics and tests formula definition between specifications $(1)$ and $(3)$.
 63 | 
 64 | Specification $(2)$ is different from the rest, since $K \neq k$. In this specification, it could be said that there are $K - 1$ independent variables and $K$ coefficients (including the constant term).
 65 | 
 66 | For specification $(2)$ users, not everything is lost. There is a relation between these three specifications: $K = k + 1$, so $k = K - 1$. This way, a "translation" between formulas for different representations is possible (by the user). For example, the adjusted R-squared:
 67 | 
 68 | $$(1, 3) \quad \overline{R}^{2} = 1 - \frac{n - 1}{n - k - 1} \cdot (1 - R^{2})$$
 69 | 
 70 | $$(2) \quad \overline{R}^{2} = 1 - \frac{n - 1}{n - (K - 1) - 1} \cdot (1 - R^{2}) =$$
 71 | 
 72 | $$= 1 - \frac{n - 1}{n - K} \cdot (1 - R^{2})$$
 73 | 
 74 | ### Where is the nonmatrix version of the standard error of the $\hat{\beta}$ 's?
 75 | 
 76 | For space reasons, the version included in the cheat sheets is the matrix one. It is perfectly valid and equal to the nonmatrix version.
 77 | 
 78 | The nonmatrix version:
 79 | 
 80 | $$\mathrm{se}(\hat{\beta}_{j}) = \sqrt{\frac{\hat{\sigma}_{u}^{2}}{\text{SST}_{j} \cdot (1 - R_{j}^{2})}} \quad , \quad j = 1, ..., k$$
 81 | 
 82 | ### Why don't they have 2 or 4 pages each?
 83 | 
 84 | Each of them are 3 pages long, and there are no plans on changing that for the time being.
 85 | 
 86 | I understand that if you print them two-sided, the second page ends up with an empty side. Consider this space to take your notes.
 87 | 
 88 | Currently, I am very comfortable with the content and font size used. Also, all cheat sheets are made in a way that you can combine any page of one with any page of another or skip them.
 89 | 
 90 | ## Resources
 91 | 
 92 | In addition to the notes taken from the [Degree in Economics](https://www.urjc.es/universidad/calidad/560-economia) and [Master in Modern Economic Analysis](https://www.urjc.es/estudios/master/786-analisis-economico-moderno) by Universidad Rey Juan Carlos, and the [Master in Applied Statistics](https://www.maximaformacion.es/masters/master-de-estadistica-aplicada-con-r-software/) by Máxima Formación and Universidad Nebrija, the books used are:
 93 | 
 94 | [1] Baltagi, B. H. (2011). *Econometrics*. New York: Springer.
 95 | 
 96 | [2] Gujarati, D. N., Porter, D. C., & Gunasekar, S. (2012). *Basic econometrics*. Tata McGraw-Hill Education.
 97 | 
 98 | [3] James, G., Witten, D., Hastie, T., & Tibshirani, R. (2013). *An introduction to statistical learning*. New York: Springer.
 99 | 
100 | [4] Lütkepohl, H., & Krätzig, M. (Eds.). (2004). *Applied Time Series Econometrics*. Cambridge: Cambridge University Press.
101 | 
102 | [5] Oracle Crystal Ball Reference and Examples Guide. *Theil's U*. https://docs.oracle.com/cd/E57185_01/CBREG/ch06s02s03s04.html. Accessed May 18, 2025.
103 | 
104 | [6] Pfaff, B. (2011). *Analysis of integrated and cointegrated time series with R*. New York: Springer.
105 | 
106 | [7] Ruiz-Maya, L., & Pliego, F. J. M. (2004). *Fundamentos de inferencia estadística*. AC.
107 | 
108 | [8] Stock, J. H., & Watson, M. W. (2012). *Introduction to econometrics*. New York: Pearson.
109 | 
110 | [9] Tilburg Science Hub. *Panel Data*. https://tilburgsciencehub.com/topics/analyze/causal-inference/panel-data/#. Accessed May 18, 2025.
111 | 
112 | [10] Wooldridge, J. M. (2015). *Introductory econometrics: A modern approach*. Cengage learning.
113 | 
114 | ## Contributions
115 | 
116 | * Reddit user \_bheg_ - Pointed out the importance of including strong and weak exogeneity and their consequences on bias and consistency properties of OLS.
117 | 
118 | * [Matteo Girelli](https://www.linkedin.com/in/matteo-girelli/) - Contributed with a revision and some ideas, pointing out the importance of considering ergodicity and reviewed a new time-series version that included the changes.
119 | 
120 | ## Support the project
121 | 
122 | The first way to help the project is to **directly support the authors of the manuals that are included in the resources section** (for example, by buying their works). Every one of the authors of the manuals is a wonderful mind who contributes a lot to econometrics and statistics. Another great way to support the project is by sharing it and :star: it!
123 | 
124 | A 2024 snapshot of this project is also published on the Universidad Rey Juan Carlos Institutional Repository: https://hdl.handle.net/10115/44997
125 | 


--------------------------------------------------------------------------------
/additional-cheatsheet/additional-cheatsheet-en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/1f49cd08d8232e3eb9c3357fa47327fd46dddae5/additional-cheatsheet/additional-cheatsheet-en.pdf


--------------------------------------------------------------------------------
/additional-cheatsheet/additional-cheatsheet-en.tex:
--------------------------------------------------------------------------------
  1 | % !TeX spellcheck = en_GB
  2 | % !TeX encoding = UTF-8
  3 | \documentclass[10pt, a4paper, landscape]{article}
  4 | 
  5 | % ----- packages -----
  6 | \usepackage{amsmath} % AMS mathematical facilities for LaTeX
  7 | \usepackage{amssymb}
  8 | \usepackage{enumitem} % Control layout of itemize, enumerate, description
  9 | \usepackage{fancyhdr} % Extensive control of page headers and footers in LaTeX2
 10 | \usepackage{geometry} % Flexible and complete interface to document dimensions
 11 | \usepackage{graphicx} % Enhanced support for graphics
 12 | \usepackage{hyperref} % Extensive support for hypertext in LaTeX
 13 | \usepackage{multicol} % Intermix single and multiple columns
 14 | \usepackage{parskip} % Layout with zero \parindent, non-zero \parskip
 15 | \usepackage{tikz} % Create PostScript and PDF graphics in TeX
 16 | \usepackage{titlesec} % Select alternative section titles
 17 | 
 18 | % ----- pdf metadata -----
 19 | \hypersetup{
 20 | 	pdftitle={Additional Cheat Sheet},
 21 | 	pdfsubject={The Econometrics Cheat Sheet Project - marcelomijas - CC-BY-4.0},
 22 | 	pdfauthor={Marcelo Moreno Porras},
 23 | 	pdfkeywords={statistics, latex, economics, cheatsheet, econometrcis, ols-regression, economic-modelling},
 24 | 	pdfduplex={DuplexFlipShortEdge}
 25 | }
 26 | 
 27 | % ----- random seed -----
 28 | \pgfmathsetseed{10}
 29 | 
 30 | % ----- custom commands -----
 31 | \DeclareMathOperator{\E}{E}
 32 | \DeclareMathOperator{\Var}{Var}
 33 | \DeclareMathOperator{\se}{se}
 34 | \DeclareMathOperator{\Cov}{Cov}
 35 | \DeclareMathOperator{\Corr}{Corr}
 36 | \DeclareMathOperator{\rk}{rk}
 37 | \DeclareMathOperator{\Cr}{Cr}
 38 | \DeclareMathOperator{\AIC}{AIC}
 39 | \DeclareMathOperator{\HQ}{HQ}
 40 | \DeclareMathOperator{\BIC}{BIC}
 41 | \newcommand{\SSR}{\text{SSR}}
 42 | \newcommand{\SSE}{\text{SSE}}
 43 | \newcommand{\SST}{\text{SST}}
 44 | \newcommand{\trend}{\text{Trend}_{t}}
 45 | \newcommand{\const}{\text{const}}
 46 | 
 47 | % ----- page customization -----
 48 | \geometry{margin=1cm} % margins config
 49 | \pagenumbering{gobble} % remove page numeration
 50 | \setlength{\parskip}{0cm} % paragraph spacing
 51 | % title spacing
 52 | \titlespacing{\section}{0pt}{2ex}{1ex}
 53 | \titlespacing{\subsection}{0pt}{1ex}{0ex}
 54 | \titlespacing{\subsubsection}{0pt}{0.5ex}{0ex}
 55 | 
 56 | % ----- footer -----
 57 | \pagestyle{fancy}
 58 | \renewcommand{\headrulewidth}{0pt}
 59 | \cfoot{\href{https://github.com/marcelomijas/econometrics-cheatsheet}{\normalfont \footnotesize ADD-25.08.1-EN - github.com/marcelomijas/econometrics-cheatsheet - CC-BY-4.0 license}}
 60 | \setlength{\footskip}{12pt}
 61 | 
 62 | % ----- document -----
 63 | \begin{document}
 64 | 
 65 | \begin{multicols}{3}
 66 | 
 67 | \begin{center}
 68 | 	\textbf{\LARGE \href{https://github.com/marcelomijas/econometrics-cheatsheet}{Additional Cheat Sheet}}
 69 | 
 70 | 	{\footnotesize By Marcelo Moreno Porras - Universidad Rey Juan Carlos}
 71 | 
 72 | 	{\footnotesize The Econometrics Cheat Sheet Project}
 73 | \end{center}
 74 | 
 75 | \section*{OLS matrix notation}
 76 | 
 77 | The general econometric model:
 78 | 
 79 | \begin{center}
 80 | 	\( y_{i} = \beta_{0} + \beta_{1} x_{1i} + \cdots + \beta_{k} x_{ki} + u_{i} \)
 81 | \end{center}
 82 | 
 83 | Can be written in matrix notation as:
 84 | 
 85 | \begin{center}
 86 | 	\( y = X \beta + u \)
 87 | \end{center}
 88 | 
 89 | Let's call \( \hat{u} \) the vector of estimated residuals \( (\hat{u} \neq u) \):
 90 | 
 91 | \begin{center}
 92 | 	\( \hat{u} = y - X \hat{\beta} \)
 93 | \end{center}
 94 | 
 95 | The \textbf{objective} of OLS is to \textbf{minimise} the \( \SSR \):
 96 | 
 97 | \begin{center}
 98 | 	\( \min \SSR = \min \sum_{i = 1}^{n} \hat{u}_{i}^{2} = \min \hat{u}^{\top} \hat{u} \)
 99 | \end{center}
100 | 
101 | \begin{itemize}[leftmargin=*]
102 | 	\item Defining \( \hat{u}^{\top} \hat{u} \):
103 | 	\begin{center}
104 | 		\( \hat{u}^{\top} \hat{u} = (y - X \hat{\beta})^{\top} (y - X \hat{\beta}) \)
105 | 
106 | 		\( = y^{\top} y - 2 \hat{\beta}^{\top} X^{\top} y + \hat{\beta}^{\top} X^{\top} X \hat{\beta} \)
107 | 	\end{center}
108 | 	\item Minimizing \( \hat{u}^{\top} \hat{u} \):
109 | 	\begin{center}
110 | 		\( \frac{\partial \hat{u}^{\top} \hat{u}}{\partial \hat{\beta}} = -2 X^{\top} y + 2 X^{\top} X \hat{\beta} = 0 \)
111 | 
112 | 		\( \hat{\beta} = (X^{\top} X)^{-1} (X^{\top} y) \)
113 | 
114 | 		\scalebox{0.85}{
115 | 			\(
116 | 			\begin{bmatrix}
117 | 				\beta_{0} \\
118 | 				\beta_{1} \\
119 | 				\vdots    \\
120 | 				\beta_{k}
121 | 			\end{bmatrix}
122 | 			=
123 | 			\begin{bmatrix}
124 | 				n          & \sum x_{1}       & \hdots & \sum x_{k}       \\
125 | 				\sum x_{1} & \sum x_{1}^{2}   & \hdots & \sum x_{1} x_{k} \\
126 | 				\vdots     & \vdots           & \ddots & \vdots           \\
127 | 				\sum x_{k} & \sum x_{k} x_{1} & \hdots & \sum x_{k}^{2}
128 | 			\end{bmatrix}^{-1}\cdot
129 | 			\begin{bmatrix}
130 | 				\sum y       \\
131 | 				\sum y x_{1} \\
132 | 				\vdots       \\
133 | 				\sum y x_{k}
134 | 			\end{bmatrix}
135 | 			\)
136 | 		}
137 | 	\end{center}
138 | 	The second derivative \( \frac{\partial^{2} \hat{u}^{\top} \hat{u}}{\partial \hat{\beta}^{2}} = X^{\top} X > 0 \) (is a min.)
139 | \end{itemize}
140 | 
141 | \section*{Variance-covariance matrix of \( \hat{\beta} \)}
142 | 
143 | Has the following shape:
144 | 
145 | \begin{center}
146 | 	\( \Var(\hat{\beta}) = \hat{\sigma}_{u}^{2} \cdot (X^{\top} X)^{-1} \)
147 | \end{center}
148 | 
149 | \begin{center}
150 | 	\scalebox{0.85}{ 
151 | 		\( =
152 | 		\begin{bmatrix}
153 | 			\Var(\hat{\beta}_{0})                  & \Cov(\hat{\beta}_{0}, \hat{\beta}_{1}) & \hdots & \Cov(\hat{\beta}_{0}, \hat{\beta}_{k}) \\
154 | 			\Cov(\hat{\beta}_{1}, \hat{\beta}_{0}) & \Var(\hat{\beta}_{1})                  & \hdots & \Cov(\hat{\beta}_{1}, \hat{\beta}_{k}) \\
155 | 			\vdots                                 & \vdots                                 & \ddots & \vdots                                 \\
156 | 			\Cov(\hat{\beta}_{k}, \hat{\beta}_{0}) & \Cov(\hat{\beta}_{k}, \hat{\beta}_{1}) & \hdots & \Var(\hat{\beta}_{k})
157 | 		\end{bmatrix}
158 | 		\)
159 | 	}
160 | \end{center}
161 | 
162 | \quad where: \( \hat{\sigma}_{u}^{2} = \frac{\hat{u}^{\top} \hat{u}}{n - k - 1} \)
163 | 
164 | The standard errors are on the diagonal of:
165 | 
166 | \begin{center}
167 | 	\( \se(\hat{\beta}) = \sqrt{\Var(\hat{\beta})} \)
168 | \end{center}
169 | 
170 | \section*{Error measurements}
171 | 
172 | \begin{itemize}[leftmargin=*]
173 | 	\item \( \SSR = \hat{u}^{\top} \hat{u}= y^{\top} y - \hat{\beta}^{\top} X^{\top} y = \sum(y_{i} - \hat{y}_{i})^{2} \)
174 | 	\item \( \SSE = \hat{\beta}^{\top} X^{\top} y - n \overline{y}^{2} = \sum(\hat{y}_{i} - \overline{y})^{2} \)
175 | 	\item \( \SST = \SSR + \SSE = y^{\top} y - n \overline{y}^{2} = \sum(y_{i} - \overline{y})^{2} \)
176 | \end{itemize}
177 | 
178 | \columnbreak
179 | 
180 | \section*{Variance-covariance matrix of \( u \)}
181 | 
182 | Has the following shape:
183 | 
184 | \begin{center}
185 | 	\( \Var(u) = \)
186 | 	\scalebox{0.85}{
187 | 		\(
188 | 		\begin{bmatrix}
189 | 			\Var(u_{1})        & \Cov(u_{1}, u_{2}) & \hdots & \Cov(u_{1}, u_{n}) \\
190 | 			\Cov(u_{2}, u_{1}) & \Var(u_{2})        & \hdots & \Cov(u_{2}, u_{n}) \\
191 | 			\vdots             & \vdots             & \ddots & \vdots             \\
192 | 			\Cov(u_{n}, u_{1}) & \Cov(u_{n}, u_{2}) & \hdots & \Var(u_{n})
193 | 		\end{bmatrix}
194 | 		\)
195 | 	}
196 | \end{center}
197 | 
198 | Under no heteroscedasticity and no autocorrelation, the variance-covariance matrix:
199 | 
200 | \begin{center}
201 | 	\( \Var(u) = \sigma_{u}^{2} \cdot I_{n} = \)
202 | 	\scalebox{0.85}{
203 | 		\(
204 | 		\begin{bmatrix}
205 | 			\sigma_{u}^{2} & 0              & \hdots & 0              \\
206 | 			0              & \sigma_{u}^{2} & \hdots & 0              \\
207 | 			\vdots         & \vdots         & \ddots & \vdots         \\
208 | 			0              & 0              & \hdots & \sigma_{u}^{2}
209 | 		\end{bmatrix}
210 | 		\)
211 | 	}
212 | \end{center}
213 | 
214 | \quad where \( I_{n} \) is an identity matrix of \( n \times n \) elements.
215 | 
216 | Under \textcolor{cyan}{\textbf{heteroscedasticity}} and \textcolor{magenta}{\textbf{autocorrelation}}, the variance-covariance matrix:
217 | 
218 | \begin{center}
219 | 	\( \Var(u) = \sigma_{u}^{2} \cdot \Omega = \)
220 | 	\scalebox{0.85}{
221 | 		\(
222 | 		\begin{bmatrix}
223 | 			\textcolor{cyan}{\sigma_{u_{1}}^2}   & \textcolor{magenta}{\sigma_{u_{12}}} & \hdots & \textcolor{magenta}{\sigma_{u_{1n}}} \\
224 | 			\textcolor{magenta}{\sigma_{u_{21}}} & \textcolor{cyan}{\sigma_{u_{2}}^2}   & \hdots & \textcolor{magenta}{\sigma_{u_{2n}}} \\
225 | 			\vdots                               & \vdots                               & \ddots & \vdots                               \\
226 | 			\textcolor{magenta}{\sigma_{u_{n1}}} & \textcolor{magenta}{\sigma_{u_{n2}}} & \hdots & \textcolor{cyan}{\sigma_{u_{n}}^2}
227 | 		\end{bmatrix}
228 | 		\)
229 | 	}
230 | \end{center}
231 | 
232 | \quad where \( \Omega \neq I_{n} \).
233 | 
234 | \begin{itemize}[leftmargin=*]
235 | 	\item Heteroscedasticity: \( \Var(u) = \sigma_{u_{i}}^{2} \neq \sigma_{u}^{2} \)
236 | 	\item Autocorrelation: \( \Cov(u_{i}, u_{j}) = \sigma_{u_{ij}} \neq 0, \; \forall i \neq j \)
237 | \end{itemize}
238 | 
239 | \section*{Variable omission}
240 | 
241 | Most of the time, it is hard to get all relevant variables for an analysis. For example, a true model with all variables:
242 | 
243 | \begin{center}
244 | 	\( y = \beta_{0} + \beta_{1} x_{1} + \beta_{2} x_{2} + v \)
245 | \end{center}
246 | 
247 | \quad where \( \beta_{2} \neq 0 \), \( v \) is the error term and \( \Cov(v \mid x_{1}, x_{2}) = 0 \).
248 | 
249 | The model with the available variables:
250 | 
251 | \begin{center}
252 | 	\( y = \alpha_{0} + \alpha_{1} x_{1} + u \)
253 | \end{center}
254 | 
255 | \quad where \( u = v + \beta_{2} x_{2} \).
256 | 
257 | Relevant variable omission can cause OLS estimators to be \textbf{biased} and \textbf{inconsistent}, because there is no weak exogeneity, \( \Cov(x_{1}, u) \neq 0 \). Depending on the \( \Corr(x_{1}, x_{2}) \) and the sign of \( \beta_{2} \), the bias on \( \hat{\alpha}_{1} \) could be:
258 | 
259 | \begin{center}
260 | 	\begin{tabular}{ c | c c }
261 | 		                    & \( \Corr(x_{1}, x_{2}) > 0 \) & \( \Corr(x_{1}, x_{2}) < 0 \) \\ \hline
262 | 		\( \beta_{2} > 0 \) & \( (+) \) bias                & \( (-) \) bias                \\
263 | 		\( \beta_{2} < 0 \) & \( (-) \) bias                & \( (+) \) bias
264 | 	\end{tabular}
265 | \end{center}
266 | 
267 | \begin{itemize}[leftmargin=*]
268 | 	\item \( (+) \) bias: \( \hat{\alpha}_{1} \) will be higher than it should be (it includes the effect of \( x_{2} \)) \( \rightarrow \hat{\alpha}_{1} > \beta_{1} \)
269 | 	\item \( (-) \) bias: \( \hat{\alpha}_{1} \) will be lower than it should be (it includes the effect of \( x_{2} \)) \( \rightarrow \hat{\alpha}_{1} < \beta_{1} \)
270 | \end{itemize}
271 | 
272 | If \( \Corr(x_{1}, x_{2}) = 0 \), there is no bias on \( \hat{\alpha}_{1} \), because the effect of \( x_{2} \) will be fully captured by the error term, \( u \).
273 | 
274 | \columnbreak
275 | 
276 | \subsection*{Variable omission correction}
277 | 
278 | \subsubsection*{Proxy variables}
279 | 
280 | Is the approach when a relevant variable is not available because it is non-observable, and no data is available.
281 | 
282 | \begin{itemize}[leftmargin=*]
283 | 	\item A \textbf{proxy variable} is something \textbf{related} to the non-observable variable that has data available.
284 | \end{itemize}
285 | 
286 | For example, the GDP per capita is a proxy variable for life quality (non-observable).
287 | 
288 | \subsubsection*{Instrumental variables}
289 | 
290 | When the variable of interest \( (x) \) is observable but \textbf{endogenous}, the proxy variables approach is no longer valid.
291 | 
292 | \begin{itemize}[leftmargin=*]
293 | 	\item An \textbf{instrumental variable} (IV) \textbf{is an observable variable} \( (z) \) that is \textbf{related} to the variable of interest that is endogenous \( (x) \), and meets the \textbf{requirements}:
294 | 	\begin{center}
295 | 		\( \Cov(z, u) = 0 \rightarrow \) instrument exogeneity
296 | 
297 | 		\( \Cov(z, x) \neq 0 \rightarrow \) instrument relevance
298 | 	\end{center}
299 | \end{itemize}
300 | 
301 | Instrumental variables let the omitted variable into the error term, but instead of estimating the model by OLS, it utilizes a method that recognizes the presence of an omitted variable. It can also solve error measurement problems.
302 | 
303 | \begin{itemize}[leftmargin=*]
304 | 	\item \textbf{Two-Stage Least Squares} (TSLS) is a method to estimate a model with multiple instrumental variables. The \( \Cov(z, u) = 0 \) can be relaxed, but there has to be a minimum number of variables that satisfy it.
305 | 
306 | 	The TSLS \textbf{estimation procedure} is as follows:
307 | 	\begin{enumerate}[leftmargin=*]
308 | 		\item Estimate a model regressing \( x \) by \( z \) using OLS, obtaining \( \hat{x} \):
309 | 		\begin{center}
310 | 			\( \hat{x} = \hat{\pi}_{0} + \hat{\pi}_{1} z \)
311 | 		\end{center}
312 | 		\item Replace \( x \) by \( \hat{x} \) in the final model and estimate it by OLS:
313 | 		\begin{center}
314 | 			\( y = \beta_{0} + \beta_{1} \hat{x}+ u \)
315 | 		\end{center}
316 | 	\end{enumerate}
317 | 	There are some \underline{important} things to know about TSLS:
318 | 	\begin{itemize}[leftmargin=*]
319 | 		\item TSLS estimators are less efficient than OLS when the explanatory variables are exogenous; the \textbf{Hausman test} can be used to check this:
320 | 		\begin{center}
321 | 			\( H_{0} \): OLS estimators are consistent.
322 | 		\end{center}
323 | 		If \( H_{0} \) is not rejected, the OLS estimators are better than TSLS and vice versa.
324 | 		\item When more instruments than endogenous variables are used, the model may be over-identified; the \textbf{Sargan test} can be used to check this:
325 | 		\begin{center}
326 | 			\( H_{0} \): All instruments are valid.
327 | 		\end{center}
328 | 	\end{itemize}
329 | \end{itemize}
330 | 
331 | \columnbreak
332 | 
333 | \section*{Information criterion}
334 | 
335 | Compare models with different numbers of parameters \( (p) \). The general formula:
336 | 
337 | \begin{center}
338 | 	\( \Cr(p) = \log(\frac{\SSR}{n}) + c_{n} \varphi(p) \)
339 | \end{center}
340 | 
341 | where:
342 | 
343 | \begin{itemize}[leftmargin=*]
344 | 	\item \( \SSR \) from a model of order \( p \).
345 | 	\item \( c_{n} \) is a sequence indexed by the sample size.
346 | 	\item \( \varphi(p) \) is a function that penalizes large \( p \) orders.
347 | \end{itemize}
348 | 
349 | It is interpreted as the relative amount of information lost by the model. The order \( p \) that min. the criterion is chosen.
350 | 
351 | There are different \( c_{n} \varphi(p) \) functions:
352 | 
353 | \begin{itemize}[leftmargin=*]
354 | 	\item Akaike: \( \AIC(p) = \log(\frac{\SSR}{n}) + \frac{2}{n} p \)
355 | 	\item Hannan-Quinn: \( \HQ(p) = \log(\frac{\SSR}{n}) + \frac{2 \log(\log(n))}{n} p \)
356 | 	\item Schwarz / Bayesian: \( \BIC(p) = \log(\frac{\SSR}{n}) + \frac{\log(n)}{n} p \)
357 | \end{itemize}
358 | 
359 | \( \BIC(p) \leq \HQ(p) \leq \AIC(p) \)
360 | 
361 | \section*{The non-restricted hypothesis test}
362 | 
363 | An alternative to the F test when there are few hypothesis to test on the parameters. Let \( \beta_{i}, \beta_{j} \) be parameters, \( a, b, c \in \mathbb{R} \) are constants.
364 | 
365 | \begin{itemize}[leftmargin=*]
366 | 	\item \( H_{0}: a \beta_{i} + b \beta_{j} = c \)
367 | 	\item \( H_{1}: a \beta_{i} + b \beta_{j} \neq c \)
368 | \end{itemize}
369 | 
370 | \begin{center}
371 | 	Under \( H_{0} \): \quad
372 | 	\( t = \dfrac{a \hat{\beta}_{i} + b \hat{\beta}_{j} - c}{\se(a \hat{\beta}_{i} + b \hat{\beta}_{j})} \)
373 | 
374 | 	\( = \dfrac{a \hat{\beta}_{i} + b \hat{\beta}_{j} - c}{\sqrt{a^{2} \Var(\hat{\beta}_{i}) + b^{2} \cdot \Var(\hat{\beta}_{j}) + 2 a b \Cov(\hat{\beta}_{i}, \hat{\beta}_{j})}} \)
375 | \end{center}
376 | 
377 | If \( \lvert t \rvert > \lvert t_{n - k - 1, \alpha / 2} \rvert \), there is evidence to reject \( H_{0} \).
378 | 
379 | \section*{ANOVA}
380 | 
381 | Decompose \( \SST \):
382 | 
383 | \begin{center}
384 | 	\scalebox{0.90}{
385 | 		\begin{tabular}{ c c c c }
386 | 			Variation origin & Sum Sq.    & df              & Sum Sq. Avg.             \\ \hline
387 | 			Regression       & \( \SSE \) & \( k \)         & \( \SSE / k \)           \\
388 | 			Residuals        & \( \SSR \) & \( n - k - 1 \) & \( \SSR / (n - k - 1) \) \\
389 | 			Total            & \( \SST \) & \( n - 1 \)     &
390 | 		\end{tabular}
391 | 	}
392 | \end{center}
393 | 
394 | \begin{itemize}[leftmargin=*]
395 | 	\item \( H_{0}: \beta_{1} = \beta_{2} = \cdots = \beta_{k} = 0 \)
396 | 	\item \( H_{1}: \beta_{1} \neq 0 \) and/or \( \beta_{2} \neq 0 \ldots \) and/or \( \beta_{k} \neq 0 \)
397 | \end{itemize}
398 | 
399 | Under \( H_{0} \):
400 | 
401 | \begin{center}
402 | 	\( F = \dfrac{\text{SSA of \SSE}}{\text{SSA of \SSR}} = \dfrac{\SSE}{\SSR} \cdot \dfrac{n - k - 1}{k} \sim F_{k, n - k - 1} \)
403 | \end{center}
404 | 
405 | If \( F > F_{k, n - k - 1} \), there is evidence to reject \( H_{0} \).
406 | 
407 | \columnbreak
408 | 
409 | \section*{Panel data}
410 | 
411 | Observations on \( n \) entities over \( T \) periods.
412 | 
413 | \begin{center}
414 | 	\( y_{it} = X_{it} \beta + \alpha_{i} + u_{it} \)
415 | \end{center}
416 | 
417 | \( \alpha_{i} \) represents the time-invariant unobserved heterogeneity.
418 | 
419 | \textbf{Pooled OLS model}
420 | 
421 | \begin{itemize}[leftmargin=*]
422 | 	\item Apply OLS to the data directly.
423 | 	\item Assumption: \( \alpha_{i} \) is constant.
424 | \end{itemize}
425 | 
426 | \textbf{Fixed effects model} (within estimator)
427 | 
428 | \begin{center}
429 | 	\( y_{it} - \overline{y}_{i} = (X_{i} - \overline{X}_{it}) \beta + (\alpha_{i} - \overline{\alpha}_{i}) + (u_{it} - \overline{u}_{i}) \)
430 | \end{center}
431 | 
432 | \begin{itemize}[leftmargin=*]
433 | 	\item Demeaning is performed to remove \( \alpha_{i} \).
434 | 	\item Control for unobserved entity-specific effects.
435 | 	\item Assumption: \( \Corr(X_{it}, \alpha_i) \neq 0 \).
436 | \end{itemize}
437 | 
438 | \textbf{Least square dummy variable model} (LSDV)
439 | 
440 | Dummy variables are added for each entity and/or time period to capture the fixed effects.
441 | 
442 | \textbf{First difference model}
443 | 
444 | \begin{center}
445 | 	\( y_{it} - y_{i, t - 1} = (X_{it} - X_{i, t - 1}) \beta + (\alpha_{i} - \alpha_{i}) + (u_{it} - u_{i, t - 1}) \)
446 | \end{center}
447 | 
448 | \begin{itemize}[leftmargin=*]
449 | 	\item First differences are performed to remove \( \alpha_{i} \).
450 | 	\item Assumption: \( \Corr(u_{it} - u_{i, t - 1} \mid X_{it} - X_{i, t - 1}) = 0 \).
451 | \end{itemize}
452 | 
453 | \textbf{Random effects model}
454 | 
455 | \begin{center}
456 | 	\( y_{it} = X_{it} \beta + \alpha_{i} + \epsilon_{it} \) where \( u_{it} = \alpha_{i} + \epsilon_{it} \)
457 | \end{center}
458 | 
459 | \begin{itemize}[leftmargin=*]
460 | 	\item Assumption: \( \Corr(X_{it}, \alpha_i) = 0 \).
461 | \end{itemize}
462 | 
463 | \section*{Logistic regression}
464 | 
465 | Binary (0, 1) dependent variable. \textbf{Logit model}:
466 | 
467 | \begin{center}
468 | 	\( P_{i} = \dfrac{1}{1 + e^{-(\beta_{0} + \beta_{1} x_{i} + u_{i})}}= \dfrac{e^{\beta_{0} + \beta_{1} x_{i} + u_{i}}}{1 + e^{\beta_{0} + \beta_{1} x_{i} + u_{i}}} \)
469 | \end{center}
470 | 
471 | where \( P_{i} = \E(y_{i} = 1 \mid x_{i}) \) and \( (1 - P_{i}) = \E(y_{i} = 0 \mid x_{i}) \)
472 | 
473 | The \textbf{odds ratio} (in favour of \( y_{i} = 1 \)):
474 | 
475 | \begin{center}
476 | 	\( \dfrac{P_{i}}{1 - P_{i}} = \dfrac{1 + e^{\beta_{0} + \beta_{1} x_{i} + u_{i}}}{1 + e^{-(\beta_{0} + \beta_{1} x_{i} + u_{i})}} = e^{\beta_{0} + \beta_{1} x_{i} + u_{i}} \)
477 | \end{center}
478 | 
479 | Taking the natural logarithm of the odds ratio, the \textbf{logit}:
480 | 
481 | \begin{center}
482 | 	\( L_{i} = \ln \left( \dfrac{P_i}{1 - P_i}\right) = \beta_{0} + \beta_{1} x_{i} + u_{i} \)
483 | \end{center}
484 | 
485 | \setlength{\multicolsep}{6pt}
486 | \begin{multicols}{2}
487 | 
488 | \( P_{i} \) is between 0 and 1, but \( L_{i} \) goes from \( -\infty \) to \( +\infty \). \\
489 | 
490 | If \( L_{i} \) is positive, it means that when \( x_{i} \) increases, the probability of \( y_{i} = 1 \) increases, and vice versa.
491 | 
492 | \columnbreak
493 | 
494 | \begin{tikzpicture}[scale=0.15]
495 | 	\draw [thick, |->] (-12, 10) node [anchor=west] {\( P \)} -- (-12, -10) -- (12, -10) node [anchor=north] {\( x \)};
496 | 	\draw [red, thick, smooth] plot [domain=-12:12] (\x, {(1 / (1 + exp(-0.8*\x)))*19.5 - 9.75});
497 | 	\draw plot [only marks, mark=*, mark size=6, domain=-8:8, samples=15] ({12.5*rnd - 0.5}, 9.6);
498 | 	\draw plot [only marks, mark=*, mark size=6, domain=-8:8, samples=15] ({-12.5*rnd + 0.5}, -9.6);
499 | 	\draw (-15, -9.5) node [anchor=west] {0};
500 | 	\draw (-15, 9.5) node [anchor=west] {1};
501 | \end{tikzpicture}
502 | 
503 | \end{multicols}
504 | 
505 | \columnbreak
506 | 
507 | \section*{Incorrect functional form}
508 | 
509 | \textbf{Ramsey's RESET} (Regression Specification Error Test).
510 | 
511 | \begin{center}
512 | 	\( H_{0} \): The model is correctly specified.
513 | \end{center}
514 | 
515 | \begin{enumerate}[leftmargin=*]
516 | 	\item Estimate the original model and obtain \( \hat{y} \) and \( R^{2} \):
517 | 	\begin{center}
518 | 		\( \hat{y} = \hat{\beta}_{0} + \hat{\beta}_{1} x_{1} + \cdots + \hat{\beta}_{k} x_{k} \)
519 | 	\end{center}
520 | 	\item Estimate a model adding powers of \( \hat{y} \) and obtain \( R_{\text{new}}^{2} \):
521 | 	\begin{center}
522 | 		\( \tilde{y} = \hat{y} + \tilde{\gamma}_{2} \hat{y}^{2} + \cdots + \tilde{\gamma}_{l} \hat{y}^{l} \)
523 | 	\end{center}
524 | 	\item Test statistic, under \( \gamma_{2} = \cdots = \gamma_{l} = 0 \) as \( H_{0} \):
525 | 	\begin{center}
526 | 		\( F = \frac{R_{\text{new}}^{2} - R^{2}}{1 - R_{\text{new}}^{2}} \cdot \frac{n - (k + 1) - l}{l} \sim F_{l, n - (k + 1) - l} \)
527 | 	\end{center}
528 | \end{enumerate}
529 | 
530 | If \( F > F_{l, n - (k + 1) - l} \), there is evidence to reject \( H_{0} \).
531 | 
532 | \section*{Statistical definitions}
533 | 
534 | Let \( \xi, \eta \) be random variables, \( a, b \in \mathbb{R} \) be constants, and \( P \) denotes probability.
535 | 
536 | \textbf{Mean} \quad \( E(\xi) = \sum_{i = 1}^{n} \xi_{i} \cdot P[\xi = \xi_{i}] \)
537 | 
538 | Sample mean: \quad \( \E(\xi) = \dfrac{1}{n} \sum_{i = 1}^{n} \xi_{i} \)
539 | 
540 | Properties of the mean:
541 | 
542 | \begin{itemize}[leftmargin=*]
543 | 	\item \( \E(a) = a \)
544 | 	\item \( \E(\xi + a) = \E(\xi) + a \)
545 | 	\item \( \E(a \cdot \xi) = a \cdot \E(\xi) \)
546 | 	\item \( \E(\xi \pm \eta) = \E(\xi) + \E(\eta) \)
547 | 	\item \( \E(\xi \cdot \eta) = \E(\xi) \cdot \E(\eta) \) \quad only if \( \xi \) and \( \eta \) are independent.
548 | 	\item \( \E(\xi - \E(\xi)) = 0 \)
549 | 	\item \( \E(a \cdot \xi + b \cdot \eta) = a \cdot \E(\xi) + b \cdot \E(\eta) \)
550 | \end{itemize}
551 | 
552 | \textbf{Variance} \quad \( \Var(\xi) = \E \left[ (\xi - \E(\xi))^{2} \right] \)
553 | 
554 | Sample variance: \quad \( \Var(\xi) = \dfrac{\sum_{i = 1}^{n} (\xi_{i} - \E(\xi))^2}{n - 1} \)
555 | 
556 | Properties of the variance:
557 | 
558 | \begin{itemize}[leftmargin=*]
559 | 	\item \( \Var(a) = 0 \)
560 | 	\item \( \Var(\xi + a) = \Var(\xi) \)
561 | 	\item \( \Var(a \cdot \xi) = a^{2} \cdot \Var(\xi) \)
562 | 	\item \( \Var(\xi \pm \eta) = \Var(\xi) + \Var(\eta) \pm 2 \cdot \Cov(\xi, \eta) \)
563 | 	\item \( \Var(a \cdot \xi \pm b \cdot \eta) = a^{2} \cdot \Var(\xi) + b^{2} \cdot \Var(\eta) \pm 2 a b \cdot \Cov(\xi, \eta) \)
564 | \end{itemize}
565 | 
566 | \textbf{Covariance} \quad \( \Cov(\xi, \eta) = \E \left[ (\xi - E(\xi)) \cdot (\eta - E(\eta)) \right] \)
567 | 
568 | Sample covariance: \quad \( \dfrac{\sum_{i = 1}^{n} (\xi_{i} - \E(\xi)) \cdot (\eta_{i} - \E(\eta))}{n - 1} \)
569 | 
570 | Properties of the covariance:
571 | 
572 | \begin{itemize}[leftmargin=*]
573 | 	\item \( \Cov(\xi, a) = 0 \)
574 | 	\item \( \Cov(\xi + a, \eta + b) = \Cov(\xi, \eta) \)
575 | 	\item \( \Cov(a \cdot \xi, b \cdot \eta) = a b \cdot \Cov(\xi, \eta) \)
576 | 	\item \( \Cov(\xi, \xi) = \Var(\xi) \)
577 | 	\item \( \Cov(\xi, \eta) = \Cov(\eta, \xi) \)
578 | \end{itemize}
579 | 
580 | \columnbreak
581 | 
582 | \section*{Hypothesis testing}
583 | 
584 | \begin{center}
585 | 	\begin{tabular}{ c | c | c }
586 | 		                       & \( H_{0} \) true            & \( H_{0} \) false           \\ \hline
587 | 		Reject \( H_{0} \)     & False positive              & True positive               \\
588 | 		                       & Type I Error \( (\alpha) \) & \( (1 - \beta) \)           \\ \hline
589 | 		Not reject \( H_{0} \) & True negative               & False negative              \\
590 | 		                       & \( (1 - \alpha) \)          & Type II Error \( (\beta) \)
591 | 	\end{tabular}
592 | \end{center}
593 | 
594 | \columnbreak
595 | 
596 | Typical one-tail test:
597 | 
598 | \begin{center}
599 | 	\begin{tikzpicture}[scale=0.108]
600 | 		\fill [magenta] (4, 0) -- plot [domain=4:16, smooth] (\x, {cos(\x*7 + 70)*6 + 6}); 
601 | 		\fill [cyan] (4, 0) -- plot [domain=-16:4, smooth] (\x, {cos(\x*7 - 70)*6 + 6}); 
602 | 		\draw [thick, cyan] plot [domain=-16:36, smooth] (\x, {cos(\x*7 - 70)*6 + 6}); 
603 | 		\draw [thick, magenta] plot [domain=-36:16, smooth] (\x, {cos(\x*7 + 70)*6 + 6}); 
604 | 		\draw [thick, <->] (-40, 0) -- (40, 0); 
605 | 		\draw [thick, dashed] (4, 0) -- (4, 11); 
606 | 		\node at (-20, 15) {\( H_{0} \) distribution};
607 | 		\node at (20, 15) {\( H_{1} \) distribution}; 
608 | 		\node at (-10, 7) {\( 1 - \alpha \)}; 
609 | 		\node at (10, 7) {\( 1 - \beta \)}; 
610 | 		\node at (6, 2) {\( \alpha \)}; 
611 | 		\node at (-2, 2) {\( \beta \)}; 
612 | 		\node at (4, 13) {\( C \)};
613 | 	\end{tikzpicture}
614 | \end{center}
615 | 
616 | where \( (1 - \alpha) \) is the confidence level, \( \alpha \) is the significance level, \( C \) is the critical value, \( (1 - \beta) \) is the statistical power.
617 | 
618 | \section*{Bootstrapping}
619 | 
620 | \textbf{Problem} - Asymptotic approximations to the distributions of test statistics do not work on small samples.
621 | 
622 | \textbf{Solution} - Bootstrap is sampling with replacement. The observed data is treated like a population, and multiple samples are extracted to recalculate an estimator or test statistic multiple times (improves accuracy).
623 | 
624 | \end{multicols}
625 | 
626 | \begin{multicols}{2}
627 | 
628 | \section*{VAR (Vector Autoregressive)}
629 | 
630 | A VAR model captures \textbf{dynamic interactions} between time series. The \( \text{VAR}(p) \):
631 | 
632 | \begin{center}
633 | 	\( y_{t} = A_{1} y_{t - 1} + \cdots + A_{p} y_{t - p} + B x_{t} + CD_{t} + u_{t} \)
634 | \end{center}
635 | 
636 | where:
637 | 
638 | \begin{itemize}[leftmargin=*]
639 | 	\item \( y_{t} = (y_{1t}, \ldots, y_{Kt})^{\top} \) is a vector of \( K \) observable endogenous time series.
640 | 	\item \( A_{i} \)'s are \( K \times K \) coefficient matrices.
641 | 	\item \( x_{t} = (x_{1t}, \ldots, x_{Mt})^{\top} \) is a vector of \( M \) observable exogenous time series.
642 | 	\item \( B \) is a \( K \times M \) coefficient matrix.
643 | 	\item \( D_{t} \) is a vector that contains all deterministic terms: a constant, linear trend, seasonal dummy, and/or any other user-specified dummy variables.
644 | 	\item \( C \) is a coefficient matrix of suitable dimension.
645 | 	\item \( u_{t} = (u_{1t}, \ldots, u_{Kt})^{\top} \) is a vector of \( K \) white noise series.
646 | \end{itemize}
647 | 
648 | \textbf{Stability condition}:
649 | 
650 | \begin{center}
651 | 	\( \det(I_{K} - A_{1} z - \cdots - A_{p} z^{p}) \neq 0 \quad \text{for}\quad \lvert z \rvert \leq 1 \)
652 | \end{center}
653 | 
654 | \quad this is, there are \textbf{no roots} in and on the complex unit circle.
655 | 
656 | For example, a VAR model with two endogenous variables \( (K = 2) \), two lags \( (p = 2) \), an exogenous contemporaneous variable \( (M = 1) \), a constant \( (\const) \) and a trend \( (\trend) \):
657 | 
658 | \begin{center}
659 | 	\scalebox{0.80}{
660 | 		\(
661 | 		\begin{bmatrix}
662 | 			y_{1t} \\
663 | 			y_{2t}
664 | 		\end{bmatrix}
665 | 		=
666 | 		\begin{bmatrix}
667 | 			a_{11, 1} & a_{12, 1} \\
668 | 			a_{21, 1} & a_{22, 1}
669 | 		\end{bmatrix}
670 | 		\cdot
671 | 		\begin{bmatrix}
672 | 			y_{1, t - 1} \\
673 | 			y_{2, t - 1}
674 | 		\end{bmatrix}
675 | 		+
676 | 		\begin{bmatrix}
677 | 			a_{11, 2} & a_{12, 2} \\
678 | 			a_{21, 2} & a_{22, 2}
679 | 		\end{bmatrix}
680 | 		\cdot
681 | 		\begin{bmatrix}
682 | 			y_{1, t - 2} \\
683 | 			y_{2, t - 2}
684 | 		\end{bmatrix}
685 | 		+
686 | 		\begin{bmatrix}
687 | 			b_{11} \\
688 | 			b_{21}
689 | 		\end{bmatrix}
690 | 		\cdot
691 | 		\begin{bmatrix}
692 | 			x_{t}
693 | 		\end{bmatrix}
694 | 		+
695 | 		\begin{bmatrix}
696 | 			c_{11} & c_{12} \\
697 | 			c_{21} & c_{22}
698 | 		\end{bmatrix}
699 | 		\cdot
700 | 		\begin{bmatrix}
701 | 			\const \\
702 | 			\trend
703 | 		\end{bmatrix}
704 | 		+
705 | 		\begin{bmatrix}
706 | 			u_{1t} \\
707 | 			u_{2t}
708 | 		\end{bmatrix}
709 | 		\)
710 | 	}
711 | \end{center}
712 | 
713 | Visualizing the separate equations:
714 | 
715 | \begin{center}
716 | 	\( y_{1t} = a_{11, 1} y_{1, t - 1} + a_{12, 1} y_{2, t - 1} + a_{11, 2} y_{1, t - 2} + a_{12, 2} y_{2, t - 2} + b_{11} x_{t} + c_{11} + c_{12} \trend + u_{1t} \)
717 | 
718 | 	\( y_{2t} = a_{21, 1} y_{2, t - 1} + a_{22, 1} y_{1, t - 1} + a_{21, 2} y_{2, t - 2} + a_{22, 2} y_{1, t - 2} + b_{21} x_{t} + c_{21} + c_{22} \trend + u_{2t} \)
719 | \end{center}
720 | 
721 | If there is a unit root, the determinant is zero for \( z = 1 \); then some or all variables are integrated, and a VAR model is no longer appropriate (it becomes unstable).
722 | 
723 | \subsection*{SVAR (Structural VAR)}
724 | 
725 | In a VAR model, causal interpretation is not explicit, and results are sensitive to variable ordering. A SVAR extends VAR by imposing theory-based restrictions on \( \mathsf{A} \) and/or \( \mathsf{B} \) matrices. This can enable causal interpretation and shock analysis without reliance on arbitrary ordering.
726 | 
727 | For example, a basic \( \text{SVAR}(p) \) model:
728 | 
729 | \begin{center}
730 | 	\( \mathsf{A} y_t = \mathsf{A} [A_1, \ldots, A_p] y_{t - 1} + \mathsf{B} \varepsilon_t \)
731 | \end{center}
732 | 
733 | where:
734 | 
735 | \begin{itemize}[leftmargin=*]
736 | 	\item \( u_t = \mathsf{A}^{-1} \mathsf{B} \varepsilon_t \)
737 | 	\item \( \mathsf{A} \), \( \mathsf{B} \) are \( (K \times K) \) matrices.
738 | \end{itemize}
739 | 
740 | \columnbreak
741 | 
742 | \section*{VECM (Vector Error Correction Model)}
743 | 
744 | If \textbf{cointegrating relations} are present in a system of variables, the VAR form is not the most convenient. It is better to use a VECM, that is, the levels VAR, subtracting \( y_{t - 1} \) from both sides. The \( \text{VECM}(p - 1) \):
745 | 
746 | \begin{center}
747 | 	\( \Delta y_{t} = \Pi y_{t - 1} + \sum_{i = 1}^{p - 1} \Gamma_{i} \Delta y_{t - i} + B x_{t} + CD_{t} + u_{t} \)
748 | \end{center}
749 | 
750 | where:
751 | 
752 | \begin{itemize}[leftmargin=*]
753 | 	\item \( \Delta y_{t} = (\Delta y_{1t}, \ldots, \Delta y_{Kt})^{\top} \) is a vector of \( K \) observable endogenous time series.
754 | 	\item \( \Pi y_{t - 1} \) is the \textbf{long-term} part.
755 | 	\begin{itemize}[leftmargin=*, label={\( \diamond \)}]
756 | 		\item \( \Pi = - (I_{K} - A_{1} - \cdots - A_{p}) \) for \( i = 1, \ldots, p - 1 \)
757 | 		\item \( \Pi = \alpha \beta^{\top} \)
758 | 		\item \( \alpha \) is the \textbf{loading matrix} \( (K \times r) \). It represents the speed of adjustment.
759 | 		\item \( \beta \) is the \textbf{cointegration matrix} \( (K \times r) \).
760 | 		\item \( \beta^{\top} y_{t - 1} \) is the \textbf{cointegrating equation}. It represents the long-run equilibrium.
761 | 		\item \( \rk(\Pi) = \rk(\alpha) = \rk(\beta) = r \) is the \textbf{cointegrating rank}.
762 | 	\end{itemize}
763 | 	\item \( \Gamma_{i} = - (A_{i + 1} + \cdots + A_{p}) \) for \( i = 1, \ldots, p - 1 \) are the \textbf{short-term} parameters.
764 | 	\item \( x_{t} \), \( B \), \( C \), \( D_{t} \) and \( u_{t} \) are as in VAR.
765 | \end{itemize}
766 | 
767 | For example, a VECM with three endogenous variables \( (K = 3) \), two lags \( (p = 2) \) and two cointegrating relations \( (r = 2) \):
768 | 
769 | \begin{center}
770 | 	\( \Delta y_{t} = \Pi y_{t - 1} + \Gamma_{1} \Delta y_{t - 1} + u_{t} \)
771 | \end{center}
772 | 
773 | \quad where:
774 | 
775 | \begin{center}
776 | 	\scalebox{0.95}{
777 | 		\(
778 | 		\Pi y_{t - 1} = \alpha \beta^{\top} y_{t - 1} =
779 | 		\begin{bmatrix}
780 | 			\alpha_{11} & \alpha_{12} \\
781 | 			\alpha_{21} & \alpha_{22} \\
782 | 			\alpha_{31} & \alpha_{32}
783 | 		\end{bmatrix}
784 | 		\begin{bmatrix}
785 | 			\beta_{11} & \beta_{21} & \beta_{31} \\
786 | 			\beta_{12} & \beta_{22} & \beta_{32}
787 | 		\end{bmatrix}
788 | 		\begin{bmatrix}
789 | 			y_{1, t - 1} \\
790 | 			y_{2, t - 1} \\
791 | 			y_{3, t - 1}
792 | 		\end{bmatrix}
793 | 		=
794 | 		\begin{bmatrix}
795 | 			\alpha_{11} ec_{1, t - 1} + \alpha_{12} ec_{2, t - 1} \\
796 | 			\alpha_{21} ec_{1, t - 1} + \alpha_{22} ec_{2, t - 1} \\
797 | 			\alpha_{31} ec_{1, t - 1} + \alpha_{32} ec_{2, t - 1}
798 | 		\end{bmatrix}
799 | 		\)
800 | 	}
801 | \end{center}
802 | 
803 | \vspace*{0.2cm}
804 | 
805 | \begin{center}
806 | 	\( ec_{1, t - 1} = \beta_{11} y_{1, t - 1} + \beta_{21} y_{2, t - 1} + \beta_{31} y_{3, t - 1} \)
807 | 
808 | 	\( ec_{2, t - 1} = \beta_{12} y_{1, t - 1} + \beta_{22} y_{2, t - 1} + \beta_{32} y_{3, t - 1} \)
809 | \end{center}
810 | 
811 | \quad and
812 | 
813 | \begin{center}
814 | 	\scalebox{0.95}{
815 | 		\(
816 | 		\Gamma_{1} \Delta y_{t - 1} = 
817 | 		\begin{bmatrix}
818 | 			\gamma_{11} & \gamma_{12} & \gamma_{13} \\
819 | 			\gamma_{21} & \gamma_{22} & \gamma_{23} \\
820 | 			\gamma_{31} & \gamma_{32} & \gamma_{33}
821 | 		\end{bmatrix}
822 | 		\begin{bmatrix}
823 | 			\Delta y_{1, t - 1} \\
824 | 			\Delta y_{2, t - 1} \\
825 | 			\Delta y_{3, t - 1}
826 | 		\end{bmatrix}
827 | 		\quad
828 | 		u_t =
829 | 		\begin{bmatrix}
830 | 			u_{1} \\
831 | 			u_{2} \\
832 | 			u_{3}
833 | 		\end{bmatrix}
834 | 		\)
835 | 	}
836 | \end{center}
837 | 
838 | Visualizing the separate equations:
839 | 
840 | \begin{center}
841 | 	\( \Delta y_{1t} = \alpha_{11} ec_{1, t - 1} + \alpha_{12} ec_{2, t - 1}  + \gamma_{11} \Delta y_{1, t - 1} + \gamma_{12} \Delta y_{2, t - 1} + \gamma_{13} \Delta y_{3, t - 1} + u_{1t} \)
842 | 
843 | 	\( \Delta y_{2t} = \alpha_{21} ec_{1, t - 1} + \alpha_{22} ec_{2, t - 1}  + \gamma_{21} \Delta y_{1, t - 1} + \gamma_{22} \Delta y_{2, t - 1} + \gamma_{23} \Delta y_{3, t - 1} + u_{2t} \)
844 | 
845 | 	\( \Delta y_{3t} = \alpha_{31} ec_{1, t - 1} + \alpha_{32} ec_{2, t - 1}  + \gamma_{31} \Delta y_{1, t - 1} + \gamma_{32} \Delta y_{2, t - 1} + \gamma_{33} \Delta y_{3, t - 1} + u_{3t} \)
846 | \end{center}
847 | 
848 | \end{multicols}
849 | 
850 | \end{document}


--------------------------------------------------------------------------------
/additional-cheatsheet/additional-cheatsheet-es.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/1f49cd08d8232e3eb9c3357fa47327fd46dddae5/additional-cheatsheet/additional-cheatsheet-es.pdf


--------------------------------------------------------------------------------
/additional-cheatsheet/additional-cheatsheet-es.tex:
--------------------------------------------------------------------------------
  1 | % !TeX spellcheck = es_ES
  2 | % !TeX encoding = UTF-8
  3 | \documentclass[10pt, a4paper, landscape]{article}
  4 | 
  5 | % ----- packages -----
  6 | \usepackage{amsmath} % AMS mathematical facilities for LaTeX
  7 | \usepackage{amssymb}
  8 | \usepackage{enumitem} % Control layout of itemize, enumerate, description
  9 | \usepackage{fancyhdr} % Extensive control of page headers and footers in LaTeX2
 10 | \usepackage{geometry} % Flexible and complete interface to document dimensions
 11 | \usepackage{graphicx} % Enhanced support for graphics
 12 | \usepackage{hyperref} % Extensive support for hypertext in LaTeX
 13 | \usepackage{multicol} % Intermix single and multiple columns
 14 | \usepackage{parskip} % Layout with zero \parindent, non-zero \parskip
 15 | \usepackage{tikz} % Create PostScript and PDF graphics in TeX
 16 | \usepackage{titlesec} % Select alternative section titles
 17 | 
 18 | % ----- pdf metadata -----
 19 | \hypersetup{
 20 | 	pdftitle={Hoja de Referencia Adicional},
 21 | 	pdfsubject={The Econometrics Cheat Sheet Project - marcelomijas - CC-BY-4.0},
 22 | 	pdfauthor={Marcelo Moreno Porras},
 23 | 	pdfkeywords={statistics, latex, economics, cheatsheet, econometrcis, ols-regression, economic-modelling},
 24 | 	pdfduplex={DuplexFlipShortEdge}
 25 | }
 26 | 
 27 | % ----- random seed -----
 28 | \pgfmathsetseed{10}
 29 | 
 30 | % ----- custom commands -----
 31 | \DeclareMathOperator{\E}{E}
 32 | \DeclareMathOperator{\Var}{Var}
 33 | \DeclareMathOperator{\se}{ee}
 34 | \DeclareMathOperator{\Cov}{Cov}
 35 | \DeclareMathOperator{\Corr}{Corr}
 36 | \DeclareMathOperator{\rk}{rg}
 37 | \DeclareMathOperator{\Cr}{Cr}
 38 | \DeclareMathOperator{\AIC}{AIC}
 39 | \DeclareMathOperator{\HQ}{HQ}
 40 | \DeclareMathOperator{\BIC}{BIC}
 41 | \newcommand{\SSR}{\text{SRC}}
 42 | \newcommand{\SSE}{\text{SEC}}
 43 | \newcommand{\SST}{\text{STC}}
 44 | \newcommand{\trend}{\text{Tend}_{t}}
 45 | \newcommand{\const}{\text{const}}
 46 | 
 47 | % ----- page customization -----
 48 | \geometry{margin=1cm} % margins config
 49 | \pagenumbering{gobble} % remove page numeration
 50 | \setlength{\parskip}{0cm} % paragraph spacing
 51 | % title spacing
 52 | \titlespacing{\section}{0pt}{2ex}{1ex}
 53 | \titlespacing{\subsection}{0pt}{1ex}{0ex}
 54 | \titlespacing{\subsubsection}{0pt}{0.5ex}{0ex}
 55 | 
 56 | % ----- footer -----
 57 | \pagestyle{fancy}
 58 | \renewcommand{\headrulewidth}{0pt}
 59 | \cfoot{\href{https://github.com/marcelomijas/econometrics-cheatsheet}{\normalfont \footnotesize ADD-25.08.1-ES - github.com/marcelomijas/econometrics-cheatsheet - Licencia CC-BY-4.0}}
 60 | \setlength{\footskip}{12pt}
 61 | 
 62 | % ----- document -----
 63 | \begin{document}
 64 | 
 65 | \begin{multicols}{3}
 66 | 
 67 | \begin{center}
 68 | 	\textbf{\LARGE \href{https://github.com/marcelomijas/econometrics-cheatsheet}{Hoja de Referencia Adicional}}
 69 | 
 70 | 	{\footnotesize Por Marcelo Moreno Porras - Universidad Rey Juan Carlos}
 71 | 
 72 | 	{\footnotesize The Econometrics Cheat Sheet Project}
 73 | \end{center}
 74 | 
 75 | \section*{Notación matricial MCO}
 76 | 
 77 | El modelo econométrico general:
 78 | 
 79 | \begin{center}
 80 | 	\( y_{i} = \beta_{0} + \beta_{1} x_{1i} + \cdots + \beta_{k} x_{ki} + u_{i} \)
 81 | \end{center}
 82 | 
 83 | Puede ser escrito en notación matricial como:
 84 | 
 85 | \begin{center}
 86 | 	\( y = X \beta + u \)
 87 | \end{center}
 88 | 
 89 | Llamemos \( \hat{u} \) al vector de residuos estimados \( (\hat{u} \neq u) \):
 90 | 
 91 | \begin{center}
 92 | 	\( \hat{u} = y - X \hat{\beta} \)
 93 | \end{center}
 94 | 
 95 | El \textbf{objetivo} de MCO es \textbf{minimizar} la \( \SSR \):
 96 | 
 97 | \begin{center}
 98 | 	\( \min \SSR = \min \sum_{i = 1}^{n} \hat{u}_{i}^{2} = \min \hat{u}^{\top} \hat{u} \)
 99 | \end{center}
100 | 
101 | \begin{itemize}[leftmargin=*]
102 | 	\item Definiendo \( \hat{u}^{\top} \hat{u} \):
103 | 	\begin{center}
104 | 		\( \hat{u}^{\top} \hat{u} = (y - X \hat{\beta})^{\top} (y - X \hat{\beta}) \)
105 | 
106 | 		\( = y^{\top} y - 2 \hat{\beta}^{\top} X^{\top} y + \hat{\beta}^{\top} X^{\top} X \hat{\beta} \)
107 | 	\end{center}
108 | 	\item Minimizando \( \hat{u}^{\top} \hat{u} \):
109 | 	\begin{center}
110 | 		\( \frac{\partial \hat{u}^{\top} \hat{u}}{\partial \hat{\beta}} = -2 X^{\top} y + 2 X^{\top} X \hat{\beta} = 0 \)
111 | 
112 | 		\( \hat{\beta} = (X^{\top} X)^{-1} (X^{\top} y) \)
113 | 
114 | 		\scalebox{0.85}{
115 | 			\(
116 | 			\begin{bmatrix}
117 | 				\beta_{0} \\
118 | 				\beta_{1} \\
119 | 				\vdots    \\
120 | 				\beta_{k}
121 | 			\end{bmatrix}
122 | 			=
123 | 			\begin{bmatrix}
124 | 				n          & \sum x_{1}       & \hdots & \sum x_{k}       \\
125 | 				\sum x_{1} & \sum x_{1}^{2}   & \hdots & \sum x_{1} x_{k} \\
126 | 				\vdots     & \vdots           & \ddots & \vdots           \\
127 | 				\sum x_{k} & \sum x_{k} x_{1} & \hdots & \sum x_{k}^{2}
128 | 			\end{bmatrix}^{-1}\cdot
129 | 			\begin{bmatrix}
130 | 				\sum y       \\
131 | 				\sum y x_{1} \\
132 | 				\vdots       \\
133 | 				\sum y x_{k}
134 | 			\end{bmatrix}
135 | 			\)
136 | 		}
137 | 	\end{center}
138 | 	La segunda derivada \( \frac{\partial^{2} \hat{u}^{\top} \hat{u}}{\partial \hat{\beta}^{2}} = X^{\top} X > 0 \) (es un mín.)
139 | \end{itemize}
140 | 
141 | \section*{Matriz de varianzas-covarianzas de \( \hat{\beta} \)}
142 | 
143 | Tiene la siguiente forma:
144 | 
145 | \begin{center}
146 | 	\( \Var(\hat{\beta}) = \hat{\sigma}_{u}^{2} \cdot (X^{\top} X)^{-1} \)
147 | \end{center}
148 | 
149 | \begin{center}
150 | 	\scalebox{0.85}{ 
151 | 		\( =
152 | 		\begin{bmatrix}
153 | 			\Var(\hat{\beta}_{0})                  & \Cov(\hat{\beta}_{0}, \hat{\beta}_{1}) & \hdots & \Cov(\hat{\beta}_{0}, \hat{\beta}_{k}) \\
154 | 			\Cov(\hat{\beta}_{1}, \hat{\beta}_{0}) & \Var(\hat{\beta}_{1})                  & \hdots & \Cov(\hat{\beta}_{1}, \hat{\beta}_{k}) \\
155 | 			\vdots                                 & \vdots                                 & \ddots & \vdots                                 \\
156 | 			\Cov(\hat{\beta}_{k}, \hat{\beta}_{0}) & \Cov(\hat{\beta}_{k}, \hat{\beta}_{1}) & \hdots & \Var(\hat{\beta}_{k})
157 | 		\end{bmatrix}
158 | 		\)
159 | 	}
160 | \end{center}
161 | 
162 | \quad donde: \( \hat{\sigma}_{u}^{2} = \frac{\hat{u}^{\top} \hat{u}}{n - k - 1} \)
163 | 
164 | Los errores estándar están en la diagonal de:
165 | 
166 | \begin{center}
167 | 	\( \se(\hat{\beta}) = \sqrt{\Var(\hat{\beta})} \)
168 | \end{center}
169 | 
170 | \section*{Medidas de error}
171 | 
172 | \begin{itemize}[leftmargin=*]
173 | 	\item \( \SSR = \hat{u}^{\top} \hat{u}= y^{\top} y - \hat{\beta}^{\top} X^{\top} y = \sum(y_{i} - \hat{y}_{i})^{2} \)
174 | 	\item \( \SSE = \hat{\beta}^{\top} X^{\top} y - n \overline{y}^{2} = \sum(\hat{y}_{i} - \overline{y})^{2} \)
175 | 	\item \( \SST = \SSR + \SSE = y^{\top} y - n \overline{y}^{2} = \sum(y_{i} - \overline{y})^{2} \)
176 | \end{itemize}
177 | 
178 | \columnbreak
179 | 
180 | \section*{Matriz de varianzas-covarianzas de \( u \)}
181 | 
182 | Tiene la siguiente forma:
183 | 
184 | \begin{center}
185 | 	\( \Var(u) = \)
186 | 	\scalebox{0.85}{
187 | 		\(
188 | 		\begin{bmatrix}
189 | 			\Var(u_{1})        & \Cov(u_{1}, u_{2}) & \hdots & \Cov(u_{1}, u_{n}) \\
190 | 			\Cov(u_{2}, u_{1}) & \Var(u_{2})        & \hdots & \Cov(u_{2}, u_{n}) \\
191 | 			\vdots             & \vdots             & \ddots & \vdots             \\
192 | 			\Cov(u_{n}, u_{1}) & \Cov(u_{n}, u_{2}) & \hdots & \Var(u_{n})
193 | 		\end{bmatrix}
194 | 		\)
195 | 	}
196 | \end{center}
197 | 
198 | Bajo no heterocedasticidad y no autocorrelación, la matriz de varianzas-covarianzas:
199 | 
200 | \begin{center}
201 | 	\( \Var(u) = \sigma_{u}^{2} \cdot I_{n} = \)
202 | 	\scalebox{0.85}{
203 | 		\(
204 | 		\begin{bmatrix}
205 | 			\sigma_{u}^{2} & 0              & \hdots & 0              \\
206 | 			0              & \sigma_{u}^{2} & \hdots & 0              \\
207 | 			\vdots         & \vdots         & \ddots & \vdots         \\
208 | 			0              & 0              & \hdots & \sigma_{u}^{2}
209 | 		\end{bmatrix}
210 | 		\)
211 | 	}
212 | \end{center}
213 | 
214 | \quad donde \( I_{n} \) es una matriz identidad con \( n \times n \) elementos.
215 | 
216 | Bajo \textcolor{cyan}{\textbf{heterocedasticidad}} y \textcolor{magenta}{\textbf{autocorrelación}}, la matriz de varianzas-covarianzas:
217 | 
218 | \begin{center}
219 | 	\( \Var(u) = \sigma_{u}^{2} \cdot \Omega = \)
220 | 	\scalebox{0.85}{
221 | 		\(
222 | 		\begin{bmatrix}
223 | 			\textcolor{cyan}{\sigma_{u_{1}}^2}   & \textcolor{magenta}{\sigma_{u_{12}}} & \hdots & \textcolor{magenta}{\sigma_{u_{1n}}} \\
224 | 			\textcolor{magenta}{\sigma_{u_{21}}} & \textcolor{cyan}{\sigma_{u_{2}}^2}   & \hdots & \textcolor{magenta}{\sigma_{u_{2n}}} \\
225 | 			\vdots                               & \vdots                               & \ddots & \vdots                               \\
226 | 			\textcolor{magenta}{\sigma_{u_{n1}}} & \textcolor{magenta}{\sigma_{u_{n2}}} & \hdots & \textcolor{cyan}{\sigma_{u_{n}}^2}
227 | 		\end{bmatrix}
228 | 		\)
229 | 	}
230 | \end{center}
231 | 
232 | \quad donde \( \Omega \neq I_{n} \).
233 | 
234 | \begin{itemize}[leftmargin=*]
235 | 	\item Heterocedasticidad: \( \Var(u) = \sigma_{u_{i}}^{2} \neq \sigma_{u}^{2} \)
236 | 	\item Autocorrelación: \( \Cov(u_{i}, u_{j}) = \sigma_{u_{ij}} \neq 0, \; \forall i \neq j \)
237 | \end{itemize}
238 | 
239 | \section*{Omisión de variables}
240 | 
241 | Casi siempre es difícil disponer de todas las variables relevantes. Por ejemplo, un modelo con todas las variables:
242 | 
243 | \begin{center}
244 | 	\( y = \beta_{0} + \beta_{1} x_{1} + \beta_{2} x_{2} + v \)
245 | \end{center}
246 | 
247 | \quad donde \( \beta_{2} \neq 0 \), \( v \) el término de error y \( \Cov(v \mid x_{1}, x_{2}) = 0 \).
248 | 
249 | El modelo con las variables disponibles:
250 | 
251 | \begin{center}
252 | 	\( y = \alpha_{0} + \alpha_{1} x_{1} + u \)
253 | \end{center}
254 | 
255 | \quad donde \( u = v + \beta_{2} x_{2} \).
256 | 
257 | Omisión de variables relevantes puede causar que los estimadores MCO sean \textbf{sesgados} e \textbf{inconsistentes},porque no hay exogeneidad estricta, \( \Cov(x_{1}, u) \neq 0 \). Dependiendo de \( \Corr(x_{1}, x_{2}) \) y el signo de \( \beta_{2} \), el sesgo en \( \hat{\alpha}_{1} \) puede ser:
258 | 
259 | \begin{center}
260 | 	\begin{tabular}{ c | c c }
261 | 		                    & \( \Corr(x_{1}, x_{2}) > 0 \) & \( \Corr(x_{1}, x_{2}) < 0 \) \\ \hline
262 | 		\( \beta_{2} > 0 \) & sesgo \( (+) \)               & sesgo \( (-) \)               \\
263 | 		\( \beta_{2} < 0 \) & sesgo \( (-) \)               & sesgo \( (+) \)
264 | 	\end{tabular}
265 | \end{center}
266 | 
267 | \begin{itemize}[leftmargin=*]
268 | 	\item Sesgo \( (+) \): \( \hat{\alpha}_{1} \) será más alto de lo que debería (incluye el efecto de \( x_{2} \)) \( \rightarrow \hat{\alpha}_{1} > \beta_{1} \)
269 | 	\item Sesgo \( (-) \): \( \hat{\alpha}_{1} \) será más bajo de lo que debería (incluye el efecto de \( x_{2} \)) \( \rightarrow \hat{\alpha}_{1} < \beta_{1} \)
270 | \end{itemize}
271 | 
272 | Si \( \Corr(x_{1}, x_{2}) = 0 \), no hay sesgo en \( \hat{\alpha}_{1} \), porque el efecto de \( x_{2} \) será totalmente recogido por el término de error, \( u \).
273 | 
274 | \columnbreak
275 | 
276 | \subsection*{Corrección de omisión de variables}
277 | 
278 | \subsubsection*{Variables proxy}
279 | 
280 | Es el camino cuando la variable relevante no está disponible porque no es observable, y no hay datos disponibles.
281 | 
282 | \begin{itemize}[leftmargin=*]
283 | 	\item Una \textbf{variable proxy} es algo \textbf{relacionado} con la variable no observable que tiene datos disponibles.
284 | \end{itemize}
285 | 
286 | Por ejemplo, el PIB per capita es una variable proxy para la calidad de vida (no observable).
287 | 
288 | \subsubsection*{Instrumental variables}
289 | 
290 | Cuando una variable de interés \( (x) \) es observable pero \textbf{endógena}, el camino de variables proxy ya no es válido.
291 | 
292 | \begin{itemize}[leftmargin=*]
293 | 	\item Una \textbf{variable instrumental} (VI) \textbf{es una variable observable} \( (z) \) que está \textbf{relacionada} con la variable de interés que es endógena \( (x) \), y cumple los \textbf{requisitos}:
294 | 	\begin{center}
295 | 		\( \Cov(z, u) = 0 \rightarrow \) exogeneidad del instrumento
296 | 
297 | 		\( \Cov(z, x) \neq 0 \rightarrow \) relevancia del instrumento
298 | 	\end{center}
299 | \end{itemize}
300 | 
301 | Variables instrumentales deja la variable omitida en el término de error, pero en vez de estimar el modelo por MCO, utiliza un método que reconoce la omisión de variable. Puede también corregir errores de medida.
302 | 
303 | \begin{itemize}[leftmargin=*]
304 | 	\item \textbf{Mínimos Cuadrados en Dos Etapas} (MC2E) es un método de estimar un modelo con múltiples variables instrumentales. Que \( \Cov(z, u) = 0 \) puede ser relajado, pero debe haber un mínimo de variables que lo satisfacen.
305 | 
306 | 	El \textbf{procedimiento de estimación} de MC2E:
307 | 	\begin{enumerate}[leftmargin=*]
308 | 		\item Estimar un modelo regresando \( x \) por \( z \) usando MCO, obteniendo \( \hat{x} \):
309 | 		\begin{center}
310 | 			\( \hat{x} = \hat{\pi}_{0} + \hat{\pi}_{1} z \)
311 | 		\end{center}
312 | 		\item Reemplazar \( x \) por \( \hat{x} \) en el modelo final y estimarlo por MCO:
313 | 		\begin{center}
314 | 			\( y = \beta_{0} + \beta_{1} \hat{x}+ u \)
315 | 		\end{center}
316 | 	\end{enumerate}
317 | 	Hay algunas cosas \underline{importantes} sobre MC2E:
318 | 	\begin{itemize}[leftmargin=*]
319 | 		\item MC2E son menos eficientes que MCO cuando las variables explicativas son exógenas; el \textbf{contraste de Hausman} puede usarse para comprobarlo:
320 | 		\begin{center}
321 | 			\( H_{0} \): los estimadores MCO son consistentes.
322 | 		\end{center}
323 | 		Si \( H_{0} \) no es rechazada, los estimadores MCO son mejores que MC2E y viceversa.
324 | 		\item Cuando se usan más instrumentos que variables endógenas, el modelo puede estar sobre-identificado; el \textbf{contraste de Sargan} puede usarse para comprobarlo:
325 | 		\begin{center}
326 | 			\( H_{0} \): todos los instrumentos son válidos.
327 | 		\end{center}
328 | 	\end{itemize}
329 | \end{itemize}
330 | 
331 | \columnbreak
332 | 
333 | \section*{Criterio de información}
334 | 
335 | Comparar modelos con diferente número de parámetros \( (p) \). La fórmula general:
336 | 
337 | \begin{center}
338 | 	\( \Cr(p) = \log(\frac{\SSR}{n}) + c_{n} \varphi(p) \)
339 | \end{center}
340 | 
341 | donde:
342 | 
343 | \begin{itemize}[leftmargin=*]
344 | 	\item \( \SSR \) de un modelo de orden \( p \).
345 | 	\item \( c_{n} \) es una secuencia indexada por el tamaño muestral.
346 | 	\item \( \varphi(p) \) es una función que penaliza órdenes grandes de \( p \).
347 | \end{itemize}
348 | 
349 | Interpretado como el tamaño relativo de información perdida por el modelo. Orden \( p \) que mín. el criterio es elegido.
350 | 
351 | Hay diferentes funciones \( c_{n} \varphi(p) \):
352 | 
353 | \begin{itemize}[leftmargin=*]
354 | 	\item Akaike: \( \AIC(p) = \log(\frac{\SSR}{n}) + \frac{2}{n} p \)
355 | 	\item Hannan-Quinn: \( \HQ(p) = \log(\frac{\SSR}{n}) + \frac{2 \log(\log(n))}{n} p \)
356 | 	\item Schwarz / Bayesian: \( \BIC(p) = \log(\frac{\SSR}{n}) + \frac{\log(n)}{n} p \)
357 | \end{itemize}
358 | 
359 | \( \BIC(p) \leq \HQ(p) \leq \AIC(p) \)
360 | 
361 | \section*{Contraste de hipótesis no restringido}
362 | 
363 | Alternativa al contraste F cuando hay pocas hipótesis a probar sobre los parámetros. Sean \( \beta_{i}, \beta_{j} \) parámetros, \( a, b, c \in \mathbb{R} \) constantes.
364 | 
365 | \begin{itemize}[leftmargin=*]
366 | 	\item \( H_{0}: a \beta_{i} + b \beta_{j} = c \)
367 | 	\item \( H_{1}: a \beta_{i} + b \beta_{j} \neq c \)
368 | \end{itemize}
369 | 
370 | \begin{center}
371 | 	Bajo \( H_{0} \): \quad
372 | 	\( t = \dfrac{a \hat{\beta}_{i} + b \hat{\beta}_{j} - c}{\se(a \hat{\beta}_{i} + b \hat{\beta}_{j})} \)
373 | 
374 | 	\( = \dfrac{a \hat{\beta}_{i} + b \hat{\beta}_{j} - c}{\sqrt{a^{2} \Var(\hat{\beta}_{i}) + b^{2} \cdot \Var(\hat{\beta}_{j}) + 2 a b \Cov(\hat{\beta}_{i}, \hat{\beta}_{j})}} \)
375 | \end{center}
376 | 
377 | Si \( \lvert t \rvert > \lvert t_{n - k - 1, \alpha / 2}\rvert \), existe evidencia para rechazar \( H_{0} \).
378 | 
379 | \section*{ANOVA}
380 | 
381 | Descomponer \( \SST \):
382 | 
383 | \begin{center}
384 | 	\scalebox{0.90}{
385 | 		\begin{tabular}{ c c c c }
386 | 			Origen var. & Suma Cuad. & df              & Suma Cuad. Media         \\ \hline
387 | 			Regresión   & \( \SSE \) & \( k \)         & \( \SSE / k \)           \\
388 | 			Residuos    & \( \SSR \) & \( n - k - 1 \) & \( \SSR / (n - k - 1) \) \\
389 | 			Total       & \( \SST \) & \( n - 1 \)     &
390 | 		\end{tabular}
391 | 	}
392 | \end{center}
393 | 
394 | \begin{itemize}[leftmargin=*]
395 | 	\item \( H_{0}: \beta_{1} = \beta_{2} = \cdots = \beta_{k} = 0 \)
396 | 	\item \( H_{1}: \beta_{1} \neq 0 \) and/or \( \beta_{2} \neq 0 \ldots \) and/or \( \beta_{k} \neq 0 \)
397 | \end{itemize}
398 | 
399 | Bajo \( H_{0} \):
400 | 
401 | \begin{center}
402 | 	\( F = \dfrac{\text{SCP de \SSE}}{\text{SCP de \SSR}} = \dfrac{\SSE}{\SSR} \cdot \dfrac{n - k - 1}{k} \sim F_{k, n - k - 1} \)
403 | \end{center}
404 | 
405 | Si \( F > F_{k, n - k - 1} \), existe evidencia para rechazar \( H_{0} \).
406 | 
407 | \columnbreak
408 | 
409 | \section*{Datos de panel}
410 | 
411 | Observaciones de \( n \) entidades durante \( T \) períodos.
412 | 
413 | \begin{center}
414 | 	\( y_{it} = X_{it} \beta + \alpha_{i} + u_{it} \)
415 | \end{center}
416 | 
417 | \( \alpha_{i} \) es heterogeneidad no observada invariante en el tiempo.
418 | 
419 | \textbf{Pooled OLS}
420 | 
421 | \begin{itemize}[leftmargin=*]
422 | 	\item Aplicar MCO a los datos directamente.
423 | 	\item Supuesto: \( \alpha_{i} \) es constante.
424 | \end{itemize}
425 | 
426 | \textbf{Modelo de efectos fijos} (within estimator)
427 | 
428 | \begin{center}
429 | 	\( y_{it} - \overline{y}_{i} = (X_{i} - \overline{X}_{it}) \beta + (\alpha_{i} - \overline{\alpha}_{i}) + (u_{it} - \overline{u}_{i}) \)
430 | \end{center}
431 | 
432 | \begin{itemize}[leftmargin=*]
433 | 	\item Se realiza centrado para eliminar \( \alpha_{i} \).
434 | 	\item Controla efectos específicos de la entidad no observados.
435 | 	\item Supuesto: \( \Corr(X_{it}, \alpha_i) \neq 0 \).
436 | \end{itemize}
437 | 
438 | \textbf{Modelo de variable ficticia de mín. cuad.} (LSDV)
439 | 
440 | Se agregan variables ficticias para cada entidad y/o período de tiempo para capturar los efectos fijos.
441 | 
442 | \textbf{Modelo de primeras diferencias}
443 | 
444 | \begin{center}
445 | 	\( y_{it} - y_{i, t - 1} = (X_{it} - X_{i, t - 1}) \beta + (\alpha_{i} - \alpha_{i}) + (u_{it} - u_{i, t - 1}) \)
446 | \end{center}
447 | 
448 | \begin{itemize}[leftmargin=*]
449 | 	\item Se realizan primeras diferencias para eliminar \( \alpha_{i} \).
450 | 	\item Supuesto: \( \Corr(u_{it} - u_{i, t - 1} \mid X_{it} - X_{i, t - 1}) = 0 \).
451 | \end{itemize}
452 | 
453 | \textbf{Modelo de efectos aleatorios}
454 | 
455 | \begin{center}
456 | 	\( y_{it} = X_{it} \beta + \alpha_{i} + \epsilon_{it} \) donde \( u_{it} = \alpha_{i} + \epsilon_{it} \)
457 | \end{center}
458 | 
459 | \begin{itemize}[leftmargin=*]
460 | 	\item Supuesto: \( \Corr(X_{it}, \alpha_i) = 0 \).
461 | \end{itemize}
462 | 
463 | \section*{Regresión logística}
464 | 
465 | Variable dependiente binaria (0, 1). \textbf{Modelo logit}:
466 | 
467 | \begin{center}
468 | 	\( P_{i} = \dfrac{1}{1 + e^{-(\beta_{0} + \beta_{1} x_{i} + u_{i})}}= \dfrac{e^{\beta_{0} + \beta_{1} x_{i} + u_{i}}}{1 + e^{\beta_{0} + \beta_{1} x_{i} + u_{i}}} \)
469 | \end{center}
470 | 
471 | donde \( P_{i} = \E(y_{i} = 1 \mid x_{i}) \) y \( (1 - P_{i}) = \E(y_{i} = 0 \mid x_{i}) \)
472 | 
473 | La \textbf{razón de probabilidades} (a favor de \( y_{i} = 1 \)):
474 | 
475 | \begin{center}
476 | 	\( \dfrac{P_{i}}{1 - P_{i}} = \dfrac{1 + e^{\beta_{0} + \beta_{1} x_{i} + u_{i}}}{1 + e^{-(\beta_{0} + \beta_{1} x_{i} + u_{i})}} = e^{\beta_{0} + \beta_{1} x_{i} + u_{i}} \)
477 | \end{center}
478 | 
479 | Tomando el logaritmo natural de la razón, el \textbf{logit}:
480 | 
481 | \begin{center}
482 | 	\( L_{i} = \ln \left( \dfrac{P_i}{1 - P_i}\right) = \beta_{0} + \beta_{1} x_{i} + u_{i} \)
483 | \end{center}
484 | 
485 | \setlength{\multicolsep}{6pt}
486 | \begin{multicols}{2}
487 | 
488 | \( P_{i} \) se encuentra entre 0 y 1, pero \( L_{i} \) va desde \( -\infty \) a \( +\infty \). \\
489 | 
490 | Si \( L_{i} \) es positivo, significa que cuando \( x_{i} \) incrementa, la probabilidad de que \( y_{i} = 1 \) incrementa, y viceversa.
491 | 
492 | \columnbreak
493 | 
494 | \begin{tikzpicture}[scale=0.15]
495 | 	\draw [thick, |->] (-12, 10) node [anchor=west] {\( P \)} -- (-12, -10) -- (12, -10) node [anchor=north] {\( x \)};
496 | 	\draw [red, thick, smooth] plot [domain=-12:12] (\x, {(1 / (1 + exp(-0.8*\x)))*19.5 - 9.75});
497 | 	\draw plot [only marks, mark=*, mark size=6, domain=-8:8, samples=15] ({12.5*rnd - 0.5}, 9.6);
498 | 	\draw plot [only marks, mark=*, mark size=6, domain=-8:8, samples=15] ({-12.5*rnd + 0.5}, -9.6);
499 | 	\draw (-15, -9.5) node [anchor=west] {0};
500 | 	\draw (-15, 9.5) node [anchor=west] {1};
501 | \end{tikzpicture}
502 | 
503 | \end{multicols}
504 | 
505 | \columnbreak
506 | 
507 | \section*{Forma funcional incorrecta}
508 | 
509 | \textbf{Ramsey's RESET} (Regression Specification Error Test).
510 | 
511 | \begin{center}
512 | 	\( H_{0} \): el modelo está correctamente especificado.
513 | \end{center}
514 | 
515 | \begin{enumerate}[leftmargin=*]
516 | 	\item Estimar el modelo original y obtener  \( \hat{y} \) y \( R^{2} \):
517 | 	\begin{center}
518 | 		\( \hat{y} = \hat{\beta}_{0} + \hat{\beta}_{1} x_{1} + \cdots + \hat{\beta}_{k} x_{k} \)
519 | 	\end{center}
520 | 	\item Estimar modelo con potencias de \( \hat{y} \) y obtener \( R_{\text{new}}^{2} \):
521 | 	\begin{center}
522 | 		\( \tilde{y} = \hat{y} + \tilde{\gamma}_{2} \hat{y}^{2} + \cdots + \tilde{\gamma}_{l} \hat{y}^{l} \)
523 | 	\end{center}
524 | 	\item Estadístico, bajo \( \gamma_{2} = \cdots = \gamma_{l} = 0 \) como \( H_{0} \):
525 | 	\begin{center}
526 | 		\( F = \frac{R_{\text{new}}^{2} - R^{2}}{1 - R_{\text{new}}^{2}} \cdot \frac{n - (k + 1) - l}{l} \sim F_{l, n - (k + 1) - l} \)
527 | 	\end{center}
528 | \end{enumerate}
529 | 
530 | Si \( F > F_{l, n - (k + 1) - l} \), hay evidencia para rechazar \( H_{0} \).
531 | 
532 | \section*{Definiciones estadísticas}
533 | 
534 | Sean \( \xi, \eta \) variables aleatorias, \( a, b \in \mathbb{R} \) constantes, y \( P \) denota probabilidad.
535 | 
536 | \textbf{Media} \quad \( E(\xi) = \sum_{i = 1}^{n} \xi_{i} \cdot P[\xi = \xi_{i}] \)
537 | 
538 | Media muestral: \quad \( \E(\xi) = \dfrac{1}{n} \sum_{i = 1}^{n} \xi_{i} \)
539 | 
540 | Propiedades de la media:
541 | 
542 | \begin{itemize}[leftmargin=*]
543 | 	\item \( \E(a) = a \)
544 | 	\item \( \E(\xi + a) = \E(\xi) + a \)
545 | 	\item \( \E(a \cdot \xi) = a \cdot \E(\xi) \)
546 | 	\item \( \E(\xi \pm \eta) = \E(\xi) + \E(\eta) \)
547 | 	\item \( \E(\xi \cdot \eta) = \E(\xi) \cdot \E(\eta) \) \quad sólo si \( \xi \) y \( \eta \) son independientes.
548 | 	\item \( \E(\xi - \E(\xi)) = 0 \)
549 | 	\item \( \E(a \cdot \xi + b \cdot \eta) = a \cdot \E(\xi) + b \cdot \E(\eta) \)
550 | \end{itemize}
551 | 
552 | \textbf{Varianza} \quad \( \Var(\xi) = \E \left[ (\xi - \E(\xi))^{2} \right] \)
553 | 
554 | Varianza muestral: \quad \( \Var(\xi) = \dfrac{\sum_{i = 1}^{n} (\xi_{i} - \E(\xi))^2}{n - 1} \)
555 | 
556 | Propiedades de la varianza:
557 | 
558 | \begin{itemize}[leftmargin=*]
559 | 	\item \( \Var(a) = 0 \)
560 | 	\item \( \Var(\xi + a) = \Var(\xi) \)
561 | 	\item \( \Var(a \cdot \xi) = a^{2} \cdot \Var(\xi) \)
562 | 	\item \( \Var(\xi \pm \eta) = \Var(\xi) + \Var(\eta) \pm 2 \cdot \Cov(\xi, \eta) \)
563 | 	\item \( \Var(a \cdot \xi \pm b \cdot \eta) = a^{2} \cdot \Var(\xi) + b^{2} \cdot \Var(\eta) \pm 2 a b \cdot \Cov(\xi, \eta) \)
564 | \end{itemize}
565 | 
566 | \textbf{Covarianza} \quad \( \Cov(\xi, \eta) = \E \left[ (\xi - E(\xi)) \cdot (\eta - E(\eta)) \right] \)
567 | 
568 | Covarianza muestral: \quad \( \dfrac{\sum_{i = 1}^{n} (\xi_{i} - \E(\xi)) \cdot (\eta_{i} - \E(\eta))}{n - 1} \)
569 | 
570 | Propiedades de la covarianza:
571 | 
572 | \begin{itemize}[leftmargin=*]
573 | 	\item \( \Cov(\xi, a) = 0 \)
574 | 	\item \( \Cov(\xi + a, \eta + b) = \Cov(\xi, \eta) \)
575 | 	\item \( \Cov(a \cdot \xi, b \cdot \eta) = a b \cdot \Cov(\xi, \eta) \)
576 | 	\item \( \Cov(\xi, \xi) = \Var(\xi) \)
577 | 	\item \( \Cov(\xi, \eta) = \Cov(\eta, \xi) \)
578 | \end{itemize}
579 | 
580 | \columnbreak
581 | 
582 | \section*{Contraste de hipótesis}
583 | 
584 | \begin{center}
585 | 	\begin{tabular}{ c | c | c }
586 | 		                        & \( H_{0} \) verdadera       & \( H_{0} \) falsa           \\ \hline
587 | 		Rechazar \( H_{0} \)    & Falso positivo              & Verdadero pos.              \\
588 | 		                        & Error Tipo I \( (\alpha) \) & \( (1 - \beta) \)           \\ \hline
589 | 		No rechazar \( H_{0} \) & Verdadero neg.              & Falso negativo              \\
590 | 		                        & \( (1 - \alpha) \)          & Error Tipo II \( (\beta) \)
591 | 	\end{tabular}
592 | \end{center}
593 | 
594 | \columnbreak
595 | 
596 | Típico contraste de una cola:
597 | 
598 | \begin{center}
599 | 	\begin{tikzpicture}[scale=0.108]
600 | 		\fill [magenta] (4, 0) -- plot [domain=4:16, smooth] (\x, {cos(\x*7 + 70)*6 + 6}); 
601 | 		\fill [cyan] (4, 0) -- plot [domain=-16:4, smooth] (\x, {cos(\x*7 - 70)*6 + 6}); 
602 | 		\draw [thick, cyan] plot [domain=-16:36, smooth] (\x, {cos(\x*7 - 70)*6 + 6}); 
603 | 		\draw [thick, magenta] plot [domain=-36:16, smooth] (\x, {cos(\x*7 + 70)*6 + 6}); 
604 | 		\draw [thick, <->] (-40, 0) -- (40, 0); 
605 | 		\draw [thick, dashed] (4, 0) -- (4, 11); 
606 | 		\node at (-20, 15) {Distribución \( H_{0} \)};
607 | 		\node at (20, 15) {Distribución \( H_{1} \)}; 
608 | 		\node at (-10, 7) {\( 1 - \alpha \)}; 
609 | 		\node at (10, 7) {\( 1 - \beta \)}; 
610 | 		\node at (6, 2) {\( \alpha \)}; 
611 | 		\node at (-2, 2) {\( \beta \)}; 
612 | 		\node at (4, 13) {\( C \)};
613 | 	\end{tikzpicture}
614 | \end{center}
615 | 
616 | donde \( (1 - \alpha) \) es nivel de confianza, \( \alpha \) es nivel de significación, \( C \) es valor crítico, \( (1 - \beta) \) es potencia estadística.
617 | 
618 | \section*{Bootstraping}
619 | 
620 | \textbf{Problema} - Aprox. asint. a las distribuciones de los estadísticos de contraste no funcionan en muestras pequeñas.
621 | 
622 | \textbf{Solución} - Boostrap es muestreo con reemplazo. Los datos observados se tratan como una población y se extraen varias muestras para recalcular un estimador o estadístico varias veces (mejora la precisión).
623 | 
624 | \end{multicols}
625 | 
626 | \begin{multicols}{2}
627 | 
628 | \section*{VAR (Vector Autoregressive)}
629 | 
630 | Un modelo VAR captura \textbf{interacciones dinámicas} entre series temporales. El \( \text{VAR}(p) \):
631 | 
632 | \begin{center}
633 | 	\( y_{t} = A_{1} y_{t - 1} + \cdots + A_{p} y_{t - p} + B x_{t} + CD_{t} + u_{t} \)
634 | \end{center}
635 | 
636 | donde:
637 | 
638 | \begin{itemize}[leftmargin=*]
639 | 	\item \( y_{t} = (y_{1t}, \ldots, y_{Kt})^{\top} \) es un vector de \( K \) series temporales observables endógenas.
640 | 	\item \( A_{i} \)'s son \( K \times K \) matrices de coeficientes.
641 | 	\item \( x_{t} = (x_{1t}, \ldots, x_{Mt})^{\top} \) es un vector de \( M \) series temporales observables exógenas.
642 | 	\item \( B \) es una matriz de coeficientes \( K \times M \).
643 | 	\item \( D_{t} \) es un vector que contiene los términos deterministas: una constante, tendencia lineal, variables estacionales binarias, y/o cualquier otra variable ficticia especificada.
644 | 	\item \( C \) es una matriz de coeficientes de dimensión apropiada.
645 | 	\item \( u_{t} = (u_{1t}, \ldots, u_{Kt})^{\top} \) es un vector de \( K \) series de ruido blanco.
646 | \end{itemize}
647 | 
648 | \textbf{Condición de estabilidad}:
649 | 
650 | \begin{center}
651 | 	\( \det(I_{K} - A_{1} z - \cdots - A_{p} z^{p}) \neq 0 \quad \text{para}\quad \lvert z \rvert \leq 1 \)
652 | \end{center}
653 | 
654 | \quad esto es, \textbf{no hay raíces} en y sobre el círculo unitario complejo.
655 | 
656 | Por ejemplo, un modelo VAR con dos variables endógenas \( (K = 2) \), dos retardos \( (p = 2) \), una variable exógena contemporánea \( (M = 1) \), constante \( (\const) \) y tendencia \( (\trend) \):
657 | 
658 | \begin{center}
659 | 	\scalebox{0.80}{
660 | 		\(
661 | 		\begin{bmatrix}
662 | 			y_{1t} \\
663 | 			y_{2t}
664 | 		\end{bmatrix}
665 | 		=
666 | 		\begin{bmatrix}
667 | 			a_{11, 1} & a_{12, 1} \\
668 | 			a_{21, 1} & a_{22, 1}
669 | 		\end{bmatrix}
670 | 		\cdot
671 | 		\begin{bmatrix}
672 | 			y_{1, t - 1} \\
673 | 			y_{2, t - 1}
674 | 		\end{bmatrix}
675 | 		+
676 | 		\begin{bmatrix}
677 | 			a_{11, 2} & a_{12, 2} \\
678 | 			a_{21, 2} & a_{22, 2}
679 | 		\end{bmatrix}
680 | 		\cdot
681 | 		\begin{bmatrix}
682 | 			y_{1, t - 2} \\
683 | 			y_{2, t - 2}
684 | 		\end{bmatrix}
685 | 		+
686 | 		\begin{bmatrix}
687 | 			b_{11} \\
688 | 			b_{21}
689 | 		\end{bmatrix}
690 | 		\cdot
691 | 		\begin{bmatrix}
692 | 			x_{t}
693 | 		\end{bmatrix}
694 | 		+
695 | 		\begin{bmatrix}
696 | 			c_{11} & c_{12} \\
697 | 			c_{21} & c_{22}
698 | 		\end{bmatrix}
699 | 		\cdot
700 | 		\begin{bmatrix}
701 | 			\const \\
702 | 			\trend
703 | 		\end{bmatrix}
704 | 		+
705 | 		\begin{bmatrix}
706 | 			u_{1t} \\
707 | 			u_{2t}
708 | 		\end{bmatrix}
709 | 		\)
710 | 	}
711 | \end{center}
712 | 
713 | Visualizando las ecuaciones por separado:
714 | 
715 | \begin{center}
716 | 	\( y_{1t} = a_{11, 1} y_{1, t - 1} + a_{12, 1} y_{2, t - 1} + a_{11, 2} y_{1, t - 2} + a_{12, 2} y_{2, t - 2} + b_{11} x_{t} + c_{11} + c_{12} \trend + u_{1t} \)
717 | 
718 | 	\( y_{2t} = a_{21, 1} y_{2, t - 1} + a_{22, 1} y_{1, t - 1} + a_{21, 2} y_{2, t - 2} + a_{22, 2} y_{1, t - 2} + b_{21} x_{t} + c_{21} + c_{22} \trend + u_{2t} \)
719 | \end{center}
720 | 
721 | Si hay una raíz unitaria, el determinante es cero para \( z = 1 \); entonces una o todas las variables son integrados, y el modelo VAR ya no es apropiado (es inestable).
722 | 
723 | \subsection*{SVAR (Structural VAR)}
724 | 
725 | En un modelo VAR, las interpretaciones de causalidad no son explícitas, y los resultados pueden variar según el orden de las variables. Un SVAR extiende el VAR al imponer restricciones sobre las matrices \( \mathsf{A} \) y/o \( \mathsf{B} \). Esto permite una interpretación causal y un análisis de shocks sin necesidad de depender de un orden arbitrario.
726 | 
727 | Por ejemplo, un modelo \( \text{SVAR}(p) \) básico:
728 | 
729 | \begin{center}
730 | 	\( \mathsf{A} y_t = \mathsf{A} [A_1, \ldots, A_p] y_{t - 1} + \mathsf{B} \varepsilon_t \)
731 | \end{center}
732 | 
733 | donde:
734 | 
735 | \begin{itemize}[leftmargin=*]
736 | 	\item \( u_t = \mathsf{A}^{-1} \mathsf{B} \varepsilon_t \)
737 | 	\item \( \mathsf{A} \), \( \mathsf{B} \) son \( (K \times K) \) matrices.
738 | \end{itemize}
739 | 
740 | \columnbreak
741 | 
742 | \section*{VECM (Vector Error Correction Model)}
743 | 
744 | Si existen \textbf{relaciones cointegradoras} en un sistema de variables, la forma VAR no es la más conveniente. Es mejor usar un VECM, esto es, el VAR en niveles, sustrayendo \( y_{t - 1} \) de ambos lados. El \( \text{VECM}(p - 1) \):
745 | 
746 | \begin{center}
747 | 	\( \Delta y_{t} = \Pi y_{t - 1} + \sum_{i = 1}^{p - 1} \Gamma_{i} \Delta y_{t - i} + B x_{t} + CD_{t} + u_{t} \)
748 | \end{center}
749 | 
750 | donde:
751 | 
752 | \begin{itemize}[leftmargin=*]
753 | 	\item \( \Delta y_{t} = (\Delta y_{1t}, \ldots, \Delta y_{Kt})^{\top} \) es un vector de \( K \) series temporales observables endógenas.
754 | 	\item \( \Pi y_{t - 1} \) es la parte \textbf{largo plazo}.
755 | 	\begin{itemize}[leftmargin=*, label={\( \diamond \)}]
756 | 		\item \( \Pi = - (I_{K} - A_{1} - \cdots - A_{p}) \) para \( i = 1, \ldots, p - 1 \)
757 | 		\item \( \Pi = \alpha \beta^{\top} \)
758 | 		\item \( \alpha \) es la \textbf{matriz de carga} \( (K \times r) \). Representa la velocidad de ajuste.
759 | 		\item \( \beta \) es la \textbf{matriz de cointegración} \( (K \times r) \).
760 | 		\item \( \beta^{\top} y_{t - 1} \) es la \textbf{ecuación de cointegración}. Representa el equilibrio a largo plazo.
761 | 		\item \( \rk(\Pi) = \rk(\alpha) = \rk(\beta) = r \) es el \textbf{rango cointegrador}.
762 | 	\end{itemize}
763 | 	\item \( \Gamma_{i} = - (A_{i + 1} + \cdots + A_{p}) \) para \( i = 1, \ldots, p - 1 \) son los parámetros a \textbf{corto plazo}.
764 | 	\item \( x_{t} \), \( B \), \( C \), \( D_{t} \) y \( u_{t} \) son como en VAR.
765 | \end{itemize}
766 | 
767 | Por ejemplo, un VECM con tres variables endógenas \( (K = 3) \), dos retardos \( (p = 2) \) y dos relaciones cointegradoras \( (r = 2) \):
768 | 
769 | \begin{center}
770 | 	\( \Delta y_{t} = \Pi y_{t - 1} + \Gamma_{1} \Delta y_{t - 1} + u_{t} \)
771 | \end{center}
772 | 
773 | \quad donde:
774 | 
775 | \begin{center}
776 | 	\scalebox{0.95}{
777 | 		\(
778 | 		\Pi y_{t - 1} = \alpha \beta^{\top} y_{t - 1} =
779 | 		\begin{bmatrix}
780 | 			\alpha_{11} & \alpha_{12} \\
781 | 			\alpha_{21} & \alpha_{22} \\
782 | 			\alpha_{31} & \alpha_{32}
783 | 		\end{bmatrix}
784 | 		\begin{bmatrix}
785 | 			\beta_{11} & \beta_{21} & \beta_{31} \\
786 | 			\beta_{12} & \beta_{22} & \beta_{32}
787 | 		\end{bmatrix}
788 | 		\begin{bmatrix}
789 | 			y_{1, t - 1} \\
790 | 			y_{2, t - 1} \\
791 | 			y_{3, t - 1}
792 | 		\end{bmatrix}
793 | 		=
794 | 		\begin{bmatrix}
795 | 			\alpha_{11} ec_{1, t - 1} + \alpha_{12} ec_{2, t - 1} \\
796 | 			\alpha_{21} ec_{1, t - 1} + \alpha_{22} ec_{2, t - 1} \\
797 | 			\alpha_{31} ec_{1, t - 1} + \alpha_{32} ec_{2, t - 1}
798 | 		\end{bmatrix}
799 | 		\)
800 | 	}
801 | \end{center}
802 | 
803 | \vspace*{0.2cm}
804 | 
805 | \begin{center}
806 | 	\( ec_{1, t - 1} = \beta_{11} y_{1, t - 1} + \beta_{21} y_{2, t - 1} + \beta_{31} y_{3, t - 1} \)
807 | 
808 | 	\( ec_{2, t - 1} = \beta_{12} y_{1, t - 1} + \beta_{22} y_{2, t - 1} + \beta_{32} y_{3, t - 1} \)
809 | \end{center}
810 | 
811 | \quad y
812 | 
813 | \begin{center}
814 | 	\scalebox{0.95}{
815 | 		\(
816 | 		\Gamma_{1} \Delta y_{t - 1} = 
817 | 		\begin{bmatrix}
818 | 			\gamma_{11} & \gamma_{12} & \gamma_{13} \\
819 | 			\gamma_{21} & \gamma_{22} & \gamma_{23} \\
820 | 			\gamma_{31} & \gamma_{32} & \gamma_{33}
821 | 		\end{bmatrix}
822 | 		\begin{bmatrix}
823 | 			\Delta y_{1, t - 1} \\
824 | 			\Delta y_{2, t - 1} \\
825 | 			\Delta y_{3, t - 1}
826 | 		\end{bmatrix}
827 | 		\quad
828 | 		u_t =
829 | 		\begin{bmatrix}
830 | 			u_{1} \\
831 | 			u_{2} \\
832 | 			u_{3}
833 | 		\end{bmatrix}
834 | 		\)
835 | 	}
836 | \end{center}
837 | 
838 | Visualizando las ecuaciones por separado:
839 | 
840 | \begin{center}
841 | 	\( \Delta y_{1t} = \alpha_{11} ec_{1, t - 1} + \alpha_{12} ec_{2, t - 1}  + \gamma_{11} \Delta y_{1, t - 1} + \gamma_{12} \Delta y_{2, t - 1} + \gamma_{13} \Delta y_{3, t - 1} + u_{1t} \)
842 | 
843 | 	\( \Delta y_{2t} = \alpha_{21} ec_{1, t - 1} + \alpha_{22} ec_{2, t - 1}  + \gamma_{21} \Delta y_{1, t - 1} + \gamma_{22} \Delta y_{2, t - 1} + \gamma_{23} \Delta y_{3, t - 1} + u_{2t} \)
844 | 
845 | 	\( \Delta y_{3t} = \alpha_{31} ec_{1, t - 1} + \alpha_{32} ec_{2, t - 1}  + \gamma_{31} \Delta y_{1, t - 1} + \gamma_{32} \Delta y_{2, t - 1} + \gamma_{33} \Delta y_{3, t - 1} + u_{3t} \)
846 | \end{center}
847 | 
848 | \end{multicols}
849 | 
850 | \end{document}


--------------------------------------------------------------------------------
/econometrics-cheatsheet/econometrics-cheatsheet-en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/1f49cd08d8232e3eb9c3357fa47327fd46dddae5/econometrics-cheatsheet/econometrics-cheatsheet-en.pdf


--------------------------------------------------------------------------------
/econometrics-cheatsheet/econometrics-cheatsheet-en.tex:
--------------------------------------------------------------------------------
  1 | % !TeX spellcheck = en_GB
  2 | % !TeX encoding = UTF-8
  3 | \documentclass[10pt, a4paper, landscape]{article}
  4 | 
  5 | % ----- packages -----
  6 | \usepackage{amsmath} % AMS mathematical facilities for LaTeX
  7 | \usepackage{enumitem} % Control layout of itemize, enumerate, description
  8 | \usepackage{fancyhdr} % Extensive control of page headers and footers in LaTeX2
  9 | \usepackage{geometry} % Flexible and complete interface to document dimensions
 10 | \usepackage{graphicx} % Enhanced support for graphics
 11 | \usepackage{hyperref} % Extensive support for hypertext in LaTeX
 12 | \usepackage{multicol} % Intermix single and multiple columns
 13 | \usepackage{parskip} % Layout with zero \parindent, non-zero \parskip
 14 | \usepackage{tikz} % Create PostScript and PDF graphics in TeX
 15 | \usepackage{titlesec} % Select alternative section titles
 16 | 
 17 | % ----- pdf metadata -----
 18 | \hypersetup{
 19 | 	pdftitle={Econometrics Cheat Sheet},
 20 | 	pdfsubject={The Econometrics Cheat Sheet Project - marcelomijas - CC-BY-4.0},
 21 | 	pdfauthor={Marcelo Moreno Porras},
 22 | 	pdfkeywords={statistics, latex, economics, cheatsheet, econometrcis, ols-regression, economic-modelling},
 23 | 	pdfduplex={DuplexFlipShortEdge}
 24 | }
 25 | 
 26 | % ----- random seed -----
 27 | \pgfmathsetseed{12}
 28 | 
 29 | % ----- custom commands -----
 30 | \DeclareMathOperator{\E}{E}
 31 | \DeclareMathOperator{\Var}{Var}
 32 | \DeclareMathOperator{\se}{se}
 33 | \DeclareMathOperator{\Cov}{Cov}
 34 | \DeclareMathOperator{\Corr}{Corr}
 35 | \DeclareMathOperator{\resid}{resid}
 36 | \newcommand{\SSR}{\text{SSR}}
 37 | \newcommand{\SSE}{\text{SSE}}
 38 | \newcommand{\SST}{\text{SST}}
 39 | 
 40 | % ----- page customization -----
 41 | \geometry{margin=1cm} % margins config
 42 | \pagenumbering{gobble} % remove page numeration
 43 | \setlength{\parskip}{0cm} % paragraph spacing
 44 | % title spacing
 45 | \titlespacing{\section}{0pt}{2ex}{1ex}
 46 | \titlespacing{\subsection}{0pt}{1ex}{0ex}
 47 | \titlespacing{\subsubsection}{0pt}{0.5ex}{0ex}
 48 | 
 49 | % ----- footer -----
 50 | \pagestyle{fancy}
 51 | \renewcommand{\headrulewidth}{0pt}
 52 | \cfoot{\href{https://github.com/marcelomijas/econometrics-cheatsheet}{\normalfont \footnotesize CS-25.08.1-EN - github.com/marcelomijas/econometrics-cheatsheet - CC-BY-4.0 license}}
 53 | \setlength{\footskip}{12pt}
 54 | 
 55 | % ----- document -----
 56 | \begin{document}
 57 | 
 58 | \begin{multicols}{3}
 59 | 
 60 | \begin{center}
 61 | 	\textbf{\LARGE \href{https://github.com/marcelomijas/econometrics-cheatsheet}{Econometrics Cheat Sheet}}
 62 | 
 63 | 	{\footnotesize By Marcelo Moreno Porras - Universidad Rey Juan Carlos}
 64 | 
 65 | 	{\footnotesize The Econometrics Cheat Sheet Project}
 66 | \end{center}
 67 | 
 68 | \section*{Basic concepts}
 69 | 
 70 | \subsection*{Definitions}
 71 | 
 72 | \textbf{Econometrics} - is a social science discipline with the objective of quantifying the relationships between economic agents, test economic theories and evaluate and implement government and business policies.
 73 | 
 74 | \textbf{Econometric model} - is a simplified representation of the reality to explain economic phenomena.
 75 | 
 76 | \textbf{\textsl{Ceteris paribus}} - if all the other relevant factors remain constant.
 77 | 
 78 | \subsection*{Data structures}
 79 | 
 80 | \textbf{Cross-section} - sample taken at a given point in time, an static \textsl{photo}. Order does not matter.
 81 | 
 82 | \textbf{Time series} - observations over time. Order does matter.
 83 | 
 84 | \textbf{Panel data} - a time series for each observation of a cross-section.
 85 | 
 86 | \textbf{Pooled cross-sections} - cross sections from different time periods.
 87 | 
 88 | \subsection*{Phases of an econometric model}
 89 | 
 90 | \begin{enumerate}[leftmargin=*]
 91 | 	\setlength{\multicolsep}{0pt}
 92 | 	\begin{multicols}{2}
 93 | 		\item Specification.
 94 | 		\item Estimation.
 95 | 	\columnbreak
 96 | 		\item Validation.
 97 | 		\item Utilization.
 98 | 	\end{multicols}
 99 | \end{enumerate}
100 | 
101 | \subsection*{Regression analysis}
102 | 
103 | Study and predict the mean value of a variable (dependent variable, \( y \)) regarding the base of fixed values of other variables (independent variables, \( x \)'s). In econometrics, it is common to use Ordinary Least Squares (OLS) for regression analysis.
104 | 
105 | \subsection*{Correlation analysis}
106 | 
107 | Correlation analysis does not distinguish between dependent and independent variables.
108 | 
109 | \begin{itemize}[leftmargin=*]
110 | 	\item Simple correlation measures the grade of linear association between two variables.
111 | 	\begin{center}
112 | 		\( r = \frac{\Cov(x, y)}{\sigma_{x} \cdot \sigma_{y}} = \frac{\sum_{i = 1}^{n} \left( (x_{i} - \overline{x}) \cdot (y_{i} - \overline{y}) \right)}{\sqrt{\sum_{i = 1}^{n} (x_{i} - \overline{x})^{2} \cdot \sum_{i = 1}^{n} (y_{i} - \overline{y})^{2}}} \)
113 | 	\end{center}
114 | 	\item Partial correlation measures the grade of linear association between two variables controlling a third.
115 | \end{itemize}
116 | 
117 | \columnbreak
118 | 
119 | \section*{Assumptions and properties}
120 | 
121 | \subsection*{Econometric model assumptions}
122 | 
123 | Under these assumptions, the OLS estimator will present good properties. \textbf{Gauss-Markov} assumptions:
124 | 
125 | \begin{enumerate}[leftmargin=*]
126 | 	\item \textbf{Parameters linearity} (and weak dependence in time series). \( y \) must be a linear function of the \( \beta \)'s.
127 | 	\item \textbf{Random sampling}. The sample from the population has been randomly taken. (Only when cross-section)
128 | 	\item \textbf{No perfect collinearity}.
129 | 	\begin{itemize}[leftmargin=*]
130 | 		\item There are no independent variables that are constant: \( \Var(x_{j}) \neq 0, \; \forall j = 1, \ldots, k \)
131 | 		\item There is no exact linear relation between independent variables.
132 | 	\end{itemize}
133 | 	\item \textbf{Conditional mean zero and correlation zero}.
134 | 	\begin{enumerate}[leftmargin=*, label=\alph{*}.]
135 | 		\item There are no systematic errors: \( \E(u \mid x_{1}, \ldots, x_{k}) = \E(u) = 0 \rightarrow \) \textbf{strong exogeneity} (a implies b).
136 | 		\item There are no relevant variables left out of the model: \( \Cov(x_{j}, u) = 0, \; \forall j = 1, \ldots, k \rightarrow \) \textbf{weak exogeneity}.
137 | 	\end{enumerate}
138 | 	\item \textbf{Homoscedasticity}. The variability of the residuals is the same for all levels of \( x \): \\ \( \Var(u \mid x_{1}, \ldots, x_{k}) = \sigma_{u}^{2} \)
139 | 	\item \textbf{No autocorrelation}. Residuals do not contain information about any other residuals: \\ \( \Corr(u_{t}, u_{s} \mid x_{1}, \ldots, x_{k}) = 0, \; \forall t \neq s \)
140 | 	\item \textbf{Normality}. Residuals are independent and identically distributed: \( u \sim \mathcal{N} (0, \sigma_{u}^{2}) \)
141 | 	\item \textbf{Data size}. The number of observations available must be greater than \( (k + 1) \) parameters to estimate. (It is already satisfied under asymptotic situations)
142 | \end{enumerate}
143 | 
144 | \subsection*{Asymptotic properties of OLS}
145 | 
146 | Under the econometric model assumptions and the Central Limit Theorem (CLT):
147 | 
148 | \begin{itemize}[leftmargin=*]
149 | 	\item Hold 1 to 4a: OLS is \textbf{unbiased}. \( \E(\hat{\beta}_{j}) = \beta_{j} \)
150 | 	\item Hold 1 to 4: OLS is \textbf{consistent}. \( \operatorname{plim}(\hat{\beta}_{j}) = \beta_{j} \) (to 4b left out 4a, weak exogeneity, biased but consistent)
151 | 	\item Hold 1 to 5: \textbf{Asymptotic normality} of OLS (then, 7 is necessarily satisfied): \( u \underset{a}{\sim} \mathcal{N} (0, \sigma_{u}^{2}) \)
152 | 	\item Hold 1 to 6: \textbf{Unbiased estimate} of \( \sigma_{u}^{2} \). \( \E(\hat{\sigma}_{u}^{2}) = \sigma_{u}^{2} \)
153 | 	\item Hold 1 to 6: OLS is \textcolor{blue}{BLUE} (Best Linear Unbiased Estimator) or \textbf{efficient}.
154 | 	\item Hold 1 to 7: Hypothesis testing and confidence intervals can be done reliably.
155 | \end{itemize}
156 | 
157 | \columnbreak
158 | 
159 | \section*{Ordinary Least Squares}
160 | 
161 | \textbf{Objective} - minimise the Sum of Squared Residuals (SSR):
162 | 
163 | \begin{center}
164 | 	\( \min \sum_{i = 1}^{n} \hat{u}_{i}^{2} \), where \( \hat{u}_{i} = y_{i} - \hat{y}_{i} \)
165 | \end{center}
166 | 
167 | \subsection*{Simple regression model}
168 | 
169 | \setlength{\multicolsep}{2pt}
170 | \setlength{\columnsep}{-40pt}
171 | \begin{multicols}{2}
172 | 
173 | \begin{tikzpicture}[scale=0.15]
174 | 	\draw [thick, <->] (0, 20) node [anchor=south] {\( y \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( x \)}; 
175 | 	\draw [red, thick] plot [domain=0:20] (\x, {5 + 0.5*\x});
176 | 	\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {rnd*5 + 2.5 + 0.5*\x}); 
177 | 	\draw (0.5, 0.5) -- (0.5, 4.5) node [anchor=north west] {\( \beta_{0} \)}; 
178 | 	\draw (8, 9) -- (12, 9) arc (0:25:4); 
179 | 	\draw (13.5, 9) node {\( \beta_{1} \)};
180 | \end{tikzpicture}
181 | 
182 | \columnbreak
183 | 
184 | Equation:
185 | 
186 | \begin{center}
187 | 	\( y_{i} = \beta_{0} + \beta_{1} x_{i} + u_{i} \)
188 | \end{center}
189 | 
190 | Estimation:
191 | 
192 | \begin{center}
193 | 	\( \hat{y}_{i} = \hat{\beta}_{0} + \hat{\beta}_{1} x_{i} \)
194 | \end{center}
195 | 
196 | where:
197 | 
198 | \begin{center}
199 | 	\( \hat{\beta}_{0} = \overline{y} - \hat{\beta}_{1} \overline{x} \)
200 | 
201 | 	\( \hat{\beta}_{1} = \frac{\Cov(y, x)}{\Var(x)} \)
202 | \end{center}
203 | 
204 | \end{multicols}
205 | 
206 | \subsection*{Multiple regression model}
207 | 
208 | \setlength{\multicolsep}{2pt}
209 | \setlength{\columnsep}{-40pt}
210 | \begin{multicols}{2}
211 | 
212 | \begin{tikzpicture}[scale=0.15]
213 | 	\draw [thick, ->] (0, 0) -- (7, 6) node [anchor=north west] {\( x_{2} \)}; 
214 | 	\draw [thick, <->] (0, 20) node [anchor=south] {\( y \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( x_{1} \)}; 
215 | 	\draw [red, thick] (0, 6) -- (5, 13); 
216 | 	\draw [red, thick] (3, 7) -- (8, 14); 
217 | 	\draw [red, thick] (6, 8) -- (11, 15); 
218 | 	\draw [red, thick] (9, 9) -- (14, 16); 
219 | 	\draw [red, thick] (12, 10) -- (17, 17); 
220 | 	\draw [red, thick] (15, 11) -- (20, 18); 
221 | 	\draw [red, thick] (0, 6) -- (15, 11);
222 | 	\draw [red, thick] (1.25, 7.75) -- (16.25, 12.75); 
223 | 	\draw [red, thick] (2.5, 9.5) -- (17.5, 14.5); 
224 | 	\draw [red, thick] (3.75, 11.25) -- (18.75, 16.25);
225 | 	\draw [red, thick] (5, 13) -- (20, 18); 
226 | 	\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {rnd*6 + 4 + 0.5*\x}); 
227 | 	\draw (0.5, 1) -- (0.5, 5.5) node [anchor=north west] {\( \beta_{0} \)};
228 | \end{tikzpicture}
229 | 
230 | \columnbreak
231 | 
232 | Equation:
233 | 
234 | \begin{center}
235 | 	\( y_{i} = \beta_{0} + \beta_{1} x_{1i} + \cdots + \beta_{k} x_{ki} + u_{i} \)
236 | \end{center}
237 | 
238 | Estimation:
239 | 
240 | \begin{center}
241 | 	\( \hat{y}_{i} = \hat{\beta}_{0} + \hat{\beta}_{1} x_{1i} + \cdots + \hat{\beta}_{k} x_{ki} \)
242 | \end{center}
243 | 
244 | where:
245 | 
246 | \begin{center}
247 | 	\( \hat{\beta}_{0} = \overline{y} - \hat{\beta}_{1} \overline{x}_{1} - \cdots - \hat{\beta}_{k} \overline{x}_{k} \)
248 | 
249 | 	\( \hat{\beta}_{j} = \frac{\Cov(y, \resid x_{j})}{\Var(\resid x_{j})} \)
250 | \end{center}
251 | 
252 | Matrix: \( \hat{\beta} = (X^{\top} X)^{-1}(X^{\top} y) \)
253 | 
254 | \end{multicols}
255 | 
256 | \subsection*{Interpretation of coefficients}
257 | 
258 | \begin{center}
259 | 	\scalebox{0.85}{
260 | 		\begin{tabular}{ c c c c }
261 | 			Model       & Dependent     & Independent     & \( \beta_{1} \) interpretation                         \\ \hline
262 | 			Level-level & \( y \)       & \( x \)         & \( \Delta y = \beta_{1} \Delta x \)                    \\
263 | 			Level-log   & \( y \)       & \( \log(x) \)   & \( \Delta y \approx (\beta_{1} / 100) (\% \Delta x) \) \\
264 | 			Log-level   & \( \log(y) \) & \( x \)         & \( \% \Delta y \approx (100 \beta_{1}) \Delta x \)     \\
265 | 			Log-log     & \( \log(y) \) & \( \log(x) \)   & \( \% \Delta y \approx \beta_{1} (\% \Delta x) \)      \\
266 | 			Quadratic   & \( y \)       & \( x + x^{2} \) & \( \Delta y = (\beta_{1} + 2 \beta_{2} x) \Delta x \)
267 | 		\end{tabular}
268 | 	}
269 | \end{center}
270 | 
271 | \subsection*{Error measurements}
272 | 
273 | Sum of Sq. Residuals: \hfill \( \SSR = \sum_{i = 1}^{n} \hat{u}_{i}^{2} = \sum_{i = 1}^{n} (y_{i} - \hat{y}_{i})^{2} \)
274 | 
275 | Explained Sum of Squares: \hfill \( \SSE = \sum_{i = 1}^{n} (\hat{y}_{i} - \overline{y})^{2} \)
276 | 
277 | Total Sum of Sq.: \hfill \( \SST = \SSE + \SSR = \sum_{i = 1}^{n} (y_{i} - \overline{y})^{2} \)
278 | 
279 | Standard Error of the Regression: \hfill \( \hat{\sigma}_{u} = \sqrt{\frac{\SSR}{n - k - 1}} \)
280 | 
281 | Standard Error of the \( \hat{\beta} \)'s: \hfill \( \se(\hat{\beta}) = \sqrt{\hat{\sigma}_{u}^{2} \cdot (X^{\top} X)^{-1}} \)
282 | 
283 | Root Mean Squared Error: \hfill \( \text{RMSE} = \sqrt{\frac{\sum_{i = 1}^{n} (y_{i} - \hat{y}_{i})^{2}}{n}} \)
284 | 
285 | Absolute Mean Error: \hfill \( \text{AME} = \frac{\sum_{i = 1}^{n} \lvert y_{i} - \hat{y}_{i} \rvert}{n} \)
286 | 
287 | Mean Percentage Error: \hfill \( \text{MPE} = \frac{\sum_{i = 1}^{n} \lvert \hat{u}_{i} / y_{i} \rvert}{n} \cdot 100 \)
288 | 
289 | \columnbreak
290 | 
291 | \section*{R-squared}
292 | 
293 | It is a measure of the \textbf{goodness of the fit}, how the regression fits the data:
294 | 
295 | \begin{center}
296 | 	\( R^{2} = \frac{\SSE}{\SST} = 1 - \frac{\SSR}{\SST} \)
297 | \end{center}
298 | 
299 | \begin{itemize}[leftmargin=*]
300 | 	\item Measures the \textbf{percentage of variation} of \( y \) that is linearly \textbf{explained} by the variations of \( x \)'s.
301 | 	\item Takes values \textbf{between 0} (no linear explanation) \textbf{and 1} (total explanation).
302 | \end{itemize}
303 | 
304 | When the number of regressors increases, the value of the R-squared also increases, whatever the new variables are relevant or not. To solve this problem, there is an \textbf{adjusted R-squared} by degrees of freedom (or corrected):
305 | 
306 | \begin{center}
307 | 	\( \overline{R}^{2} = 1 - \frac{n - 1}{n - k - 1} \cdot \frac{\SSR}{\SST} = 1 - \frac{n - 1}{n - k - 1} \cdot (1 - R^{2}) \)
308 | \end{center}
309 | 
310 | For big sample sizes: \( \overline{R}^{2} \approx R^{2} \)
311 | 
312 | \section*{Hypothesis testing}
313 | 
314 | \subsection*{Definitions}
315 | 
316 | It is a rule designed to explain from a sample, if exists \textbf{evidence or not to reject a hypothesis} that is made about one or more population parameters.
317 | 
318 | Elements of a hypothesis test:
319 | 
320 | \begin{itemize}[leftmargin=*]
321 | 	\item \textbf{Null hypothesis} \( (H_{0}) \) - is the hypothesis to be tested.
322 | 	\item \textbf{Alternative hypothesis} \( (H_{1}) \) - is the hypothesis that cannot be rejected when \( H_{0} \) is rejected.
323 | 	\item \textbf{Test statistic} - is a random variable whose probability distribution is known under \( H_{0} \).
324 | 	\item \textbf{Critical value} \( (C) \) - is the value against which the test statistic is compared to determine if \( H_{0} \) is rejected or not. It sets the frontier between the regions of acceptance and rejection of \( H_{0} \).
325 | 	\item \textbf{Significance level} \( (\alpha) \) - is the probability of rejecting \( H_{0} \) being true (Type I Error). It is chosen by those who conduct the test. It is commonly 10\%, 5\% or 1\%.
326 | 	\item \textbf{p-value} - is the highest level of significance by which \( H_{0} \) cannot be rejected.
327 | \end{itemize}
328 | 
329 | \setlength{\multicolsep}{0pt}
330 | \setlength{\columnsep}{20pt}
331 | \begin{multicols}{2}
332 | 
333 | \begin{tikzpicture}[scale=0.10]
334 | 	\node at (0, 15) {Two-tailed test. \( H_{0} \) dist.}; 
335 | 	\fill [red] (12, 0) -- plot [domain=12:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
336 | 	\fill [red] (-12, 0) -- plot [domain=-18:-12, smooth] (\x, {cos(\x*10)*6 + 6}); 
337 | 	\draw [thick] plot [domain=-18:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
338 | 	\draw [thick, <->] (-20, 0) -- (20, 0); 
339 | 	\draw [thick, dashed] (12, 0) -- (12, 7); 
340 | 	\draw [thick, dashed] (-12, 0) -- (-12, 7); 
341 | 	\node at (0, 2) {Accept. region}; 
342 | 	\node at (0, 7) {\( 1 - \alpha \)}; 
343 | 	\node [red] at (-16, 4) {\( \alpha /\ 2 \)}; 
344 | 	\node [red] at (16, 4) {\( \alpha /\ 2 \)}; 
345 | 	\node at (12, 9) {\( C \)}; 
346 | 	\node at (-13, 9) {\( -C \)};
347 | \end{tikzpicture}
348 | 
349 | \columnbreak
350 | 
351 | \begin{tikzpicture}[scale=0.10]
352 | 	\node at (0, 15) {One-tailed test. \( H_{0} \) dist.}; 
353 | 	\fill [red] (9, 0) -- plot [domain=9:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
354 | 	\draw [thick] plot [domain=-18:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
355 | 	\draw [thick, <->] (-20, 0) -- (20, 0); 
356 | 	\draw [thick, dashed] (9, 0) -- (9, 7); 
357 | 	\node at (-1, 2) {Accept. reg.};
358 | 	\node at (0, 7) {\( 1 - \alpha \)}; 
359 | 	\node [red] at (14, 4) {\( \alpha \)}; 
360 | 	\node at (9, 9) {\( C \)};
361 | \end{tikzpicture}
362 | 
363 | \end{multicols}
364 | 
365 | \textbf{The rule is}: if p-value \( < \alpha \) holds, there is evidence to reject \( H_{0} \), thus, there is evidence to accept \( H_{1} \).
366 | 
367 | \columnbreak
368 | 
369 | \subsection*{Individual tests}
370 | 
371 | Tests if a parameter is significantly different from a given value, \( \vartheta \).
372 | 
373 | \begin{itemize}[leftmargin=*]
374 | 	\item \( H_{0}: \beta_{j} = \vartheta \)
375 | 	\item \( H_{1}: \beta_{j} \neq \vartheta \)
376 | \end{itemize}
377 | 
378 | \begin{center}
379 | 	Under \( H_{0} \): \quad \( t = \frac{\hat{\beta}_{j} - \vartheta}{\se(\hat{\beta}_{j})} \sim t_{n - k - 1, \alpha / 2} \)
380 | \end{center}
381 | 
382 | If \( \lvert t \rvert > \lvert t_{n - k - 1, \alpha / 2} \rvert \), there is evidence to reject \( H_{0} \).
383 | 
384 | \textbf{Individual significance test} - tests if a parameter is significantly \textbf{different from zero}.
385 | 
386 | \begin{itemize}[leftmargin=*]
387 | 	\item \( H_{0}: \beta_{j} = 0 \)
388 | 	\item \( H_{1}: \beta_{j} \neq 0 \)
389 | \end{itemize}
390 | 
391 | \begin{center}
392 | 	Under \( H_{0} \): \quad \( t = \frac{\hat{\beta}_{j}}{\se(\hat{\beta}_{j})} \sim t_{n - k - 1, \alpha / 2} \)
393 | \end{center}
394 | 
395 | If \( \lvert t \rvert > \lvert t_{n - k - 1, \alpha / 2} \rvert \), there is evidence to reject \( H_{0} \).
396 | 
397 | \subsection*{The F test}
398 | 
399 | Simultaneously tests multiple (linear) hypothesis about the parameters. It makes use of a non-restricted model and a restricted model:
400 | 
401 | \begin{itemize}[leftmargin=*]
402 | 	\item \textbf{Non-restricted model} - is the model on which we want to test the hypothesis.
403 | 	\item \textbf{Restricted model} - is the model on which the hypothesis that we want to test has been imposed.
404 | \end{itemize}
405 | 
406 | Then, looking at the errors, there are:
407 | 
408 | \begin{itemize}[leftmargin=*]
409 | 	\item \textbf{\( \SSR_{\text{UR}} \)} - is the \( \SSR \) of the non-restricted model.
410 | 	\item \textbf{\( \SSR_{\text{R}} \)} - is the \( \SSR \) of the restricted model.
411 | \end{itemize}
412 | 
413 | \begin{center}
414 | 	Under \( H_{0} \): \quad \( F = \frac{\SSR_{\text{R}} - \SSR_{\text{UR}}}{\SSR_{\text{UR}}} \cdot \frac{n - k - 1}{q} \sim F_{q, n - k - 1} \)
415 | \end{center}
416 | 
417 | where \( k \) is the number of parameters of the non-restricted model and \( q \) is the number of linear hypothesis tested.
418 | 
419 | If \( F > F_{q, n - k - 1} \), there is evidence to reject \( H_{0} \).
420 | 
421 | \textbf{Global significance test} - tests if all the parameters associated with \( x \)'s are \textbf{simultaneously equal to zero}.
422 | 
423 | \begin{itemize}[leftmargin=*]
424 | 	\item \( H_{0}: \beta_{1} = \beta_{2} = \cdots = \beta_{k} = 0 \)
425 | 	\item \( H_{1}: \beta_{1} \neq 0 \) and/or \( \beta_{2} \neq 0 \ldots \) and/or \( \beta_{k} \neq 0 \)
426 | \end{itemize}
427 | 
428 | We can simplify the formula for the \( F \) statistic:
429 | 
430 | \begin{center}
431 | 	Under \( H_{0} \): \quad \( F = \frac{R^{2}}{1 - R^{2}} \cdot \frac{n - k - 1}{k} \sim F_{k, n - k - 1} \)
432 | \end{center}
433 | 
434 | If \( F > F_{k, n - k - 1} \), there is evidence to reject \( H_{0} \).
435 | 
436 | \section*{Confidence intervals}
437 | 
438 | The confidence intervals at \( (1 - \alpha) \) confidence level can be calculated:
439 | 
440 | \begin{center}
441 | 	\( \hat{\beta}_{j} \mp t_{n - k - 1, \alpha / 2} \cdot \se(\hat{\beta}_{j}) \)
442 | \end{center}
443 | 
444 | \columnbreak
445 | 
446 | \section*{Dummy variables}
447 | 
448 | Dummy (or binary) variables are used for qualitative information like sex, civil status, country, etc.
449 | 
450 | \begin{itemize}[leftmargin=*]
451 | 	\item Takes the \textbf{value 1} in a given category and \textbf{0 in the rest}.
452 | 	\item Are used to analyse and model \textbf{structural changes} in the parameters.
453 | \end{itemize}
454 | 
455 | If a qualitative variable has \( m \) categories, only \( (m - 1) \) dummy variables must be included in the model.
456 | 
457 | \subsection*{Structural change}
458 | 
459 | Structural change refers to changes in the values of the parameters of the econometric model produced by the effect of different sub-populations. Structural change can be included in the model through dummy variables.
460 | 
461 | The location of the dummy variables \( (D) \) matters:
462 | 
463 | \begin{itemize}[leftmargin=*]
464 | 	\item \textbf{On the intercept} (additive effect) - represents the mean difference between the values produced by the structural change.
465 | 	\begin{center}
466 | 		\( y = \beta_{0} + \delta_{1} D + \beta_{1} x_{1} + u \)
467 | 	\end{center}
468 | 	\item \textbf{On the slope} (multiplicative effect) - represents the effect (slope) difference between the values produced by the structural change.
469 | 	\begin{center}
470 | 		\( y = \beta_{0} + \beta_{1} x_{1} + \delta_{1} D \cdot x_{1} + u \)
471 | 	\end{center}
472 | \end{itemize}
473 | 
474 | \textbf{Chow's structural test} - analyse the existence of structural changes in all the model parameters, it's a particular expression of the F test, where \( H_{0} \): No structural change (all \( \delta = 0 \)).
475 | 
476 | \section*{Changes of scale}
477 | 
478 | Changes in the \textbf{measurement units} of the variables:
479 | 
480 | \begin{itemize}[leftmargin=*]
481 | 	\item In the \textbf{endogenous} variable, \( y^{*} = y \cdot \lambda \) - affects all model parameters, \( \beta_{j}^{*} = \beta_{j} \cdot \lambda, \; \forall j = 1, \ldots, k \)
482 | 	\item In an \textbf{exogenous} variable, \( x_{j}^{*} = x_{j} \cdot \lambda \) - only affect the parameter linked to said exogenous variable, \( \beta_{j}^{*} = \beta_{j} \cdot \lambda \)
483 | 	\item Same scale change on endogenous and exogenous - only affects the intercept, \( \beta_{0}^{*} = \beta_{0} \cdot \lambda \)
484 | \end{itemize}
485 | 
486 | \section*{Changes of origin}
487 | 
488 | Changes in the \textbf{measurement origin} of the variables (endogenous or exogenous), \( y^{*} = y + \lambda \) - only affects the model's intercept, \( \beta_{0}^{*} = \beta_{0} + \lambda \)
489 | 
490 | \columnbreak
491 | 
492 | \section*{Multicollinearity}
493 | 
494 | \begin{itemize}[leftmargin=*]
495 | 	\item \textbf{Perfect multicollinearity} - there are independent variables that are constant and/or there is an exact linear relation between independent variables. Is the \textbf{breaking of the third (3) econometric} model \textbf{assumption}.
496 | 	\item \textbf{Approximate multicollinearity} - there are independent variables that are approximately constant and/or there is an approximately linear relation between independent variables. It \textbf{does not break any econometric} model \textbf{assumption} but affects OLS.
497 | \end{itemize}
498 | 
499 | \subsection*{Consequences}
500 | 
501 | \begin{itemize}[leftmargin=*]
502 | 	\item \textbf{Perfect multicollinearity} - the equation system of OLS cannot be solved due to infinite solutions.
503 | 	\item \textbf{Approximate multicollinearity}
504 | 	\begin{itemize}[leftmargin=*]
505 | 		\item Small sample variations can induce to big variations in the OLS estimations.
506 | 		\item The variance of the OLS estimators of the \( x \)'s that are collinear, increments, thus the inference of the parameter is affected. The estimation of the parameter is very imprecise (big confidence interval).
507 | 	\end{itemize}
508 | \end{itemize}
509 | 
510 | \subsection*{Detection}
511 | 
512 | \begin{itemize}[leftmargin=*]
513 | 	\item \textbf{Correlation analysis} - look for high correlations between independent variables, \( \lvert r \rvert > 0.7 \).
514 | 	\item \textbf{Variance Inflation Factor (VIF)} - indicates the increment of \( \Var(\hat{\beta}_{j}) \) because of the multicollinearity.
515 | 	\begin{center}
516 | 		\( \operatorname{VIF}(\hat{\beta}_{j}) = \frac{1}{1 - R_{j}^{2}} \)
517 | 	\end{center}
518 | 	where \( R_{j}^{2} \) denotes the R-squared from a regression between \( x_{j} \) and all the other \( x \)'s.
519 | 	\begin{itemize}[leftmargin=*]
520 | 		\item Values between 4 to 10 - there might be multicollinearity problems.
521 | 		\item Values \( > 10 \) - there are multicollinearity problems.
522 | 	\end{itemize}
523 | \end{itemize}
524 | 
525 | One typical characteristic of multicollinearity is that the regression coefficients of the model are not individually different from zero (due to high variances), but jointly they are different from zero.
526 | 
527 | \subsection*{Correction}
528 | 
529 | \begin{itemize}[leftmargin=*]
530 | 	\item Delete one of the collinear variables.
531 | 	\item Perform factorial analysis (or any other dimension reduction technique) on the collinear variables.
532 | 	\item Interpret coefficients with multicollinearity jointly.
533 | \end{itemize}
534 | 
535 | \columnbreak
536 | 
537 | \section*{Heteroscedasticity}
538 | 
539 | The residuals \( u_{i} \) of the population regression function do not have the same variance \( \sigma_{u}^{2} \):
540 | 
541 | \begin{center}
542 | 	\( \Var(u \mid x_{1}, \ldots, x_{k}) = \Var(u) \neq \sigma_{u}^{2} \)
543 | \end{center}
544 | 
545 | Is the \textbf{breaking of the fifth (5) econometric} model \textbf{assumption}.
546 | 
547 | \subsection*{Consequences}
548 | 
549 | \begin{itemize}[leftmargin=*]
550 | 	\item OLS estimators are still unbiased.
551 | 	\item OLS estimators are still consistent.
552 | 	\item OLS is \textbf{not efficient} any more, but still a LUE (Linear Unbiased Estimator).
553 | 	\item \textbf{Variance estimations} of the estimators are \textbf{biased}: the construction of confidence intervals and the hypothesis testing is not reliable.
554 | \end{itemize}
555 | 
556 | \subsection*{Detection}
557 | 
558 | \begin{itemize}[leftmargin=*]
559 | 	\setlength{\multicolsep}{0pt}
560 | 	\setlength{\columnsep}{20pt}
561 | 	\begin{multicols}{3}
562 | 	\item \textbf{Graphs} - look for scatter patterns on \( x \) vs. \( u \) or \( x \) vs. \( y \) plots.
563 | 	\columnbreak
564 | 	\begin{tikzpicture}[scale=0.108]
565 | 		\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=north] {\( x \)}; 
566 | 		\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u \)}; 
567 | 		\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=50] (\x, {-0.5*rand*\x + 10}); 
568 | 		\draw [thick, dashed, red, -latex] plot [domain=1:18] (\x, {-0.5*\x + 9.5}); 
569 | 		\draw [thick, dashed, red, -latex] plot [domain=1:18] (\x, {0.5*\x + 10.5});
570 | 	\end{tikzpicture}
571 | 	\columnbreak
572 | 	\begin{tikzpicture}[scale=0.108]
573 | 		\draw [thick, <->] (0, 20) node [anchor=west] {\( y \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( x \)}; 
574 | 		\draw plot [only marks, mark=*, mark size=6, domain=2:16, samples=50] (\x, {0.5*\x*(rand + 1) + 2}); 
575 | 		\draw [thick, dashed, red, -latex] plot [domain=1:16] (\x, {1.5}); 
576 | 		\draw [thick, dashed, red, -latex] plot [domain=1:16] (\x, {1*\x + 2.5});
577 | 	\end{tikzpicture}
578 | 	\end{multicols}
579 | 	\item \textbf{Formal tests} - White, Bartlett, Breusch-Pagan, etc. Commonly, \( H_{0} \): No heteroscedasticity.
580 | \end{itemize}
581 | 
582 | \subsection*{Correction}
583 | 
584 | \begin{itemize}[leftmargin=*]
585 | 	\item Use OLS with a variance-covariance matrix estimator robust to heteroscedasticity (HC), for example, the one proposed by White.
586 | 	\item If the variance structure is known, make use of Weighted Least Squares (WLS) or Generalized Least Squares (GLS):
587 | 	\begin{itemize}[leftmargin=*]
588 | 		\item Supposing that \( \Var(u) = \sigma_{u}^{2} \cdot x_{i} \), divide the model variables by the square root of \( x_{i} \) and apply OLS.
589 | 		\item Supposing that \( \Var(u) = \sigma_{u}^{2} \cdot x_{i}^{2} \), divide the model variables by \( x_{i} \) (the square root of \( x_{i}^{2} \)) and apply OLS.
590 | 	\end{itemize}
591 | 	\item If the variance structure is not known, make use of Feasible Weighted Least Squares (FWLS), which estimates a possible variance, divides the model variables by it, and then apply OLS.
592 | 	\item Make a new model specification, for example, logarithmic transformation (lower variance).
593 | \end{itemize}
594 | 
595 | \columnbreak
596 | 
597 | \section*{Autocorrelation}
598 | 
599 | The residual of any observation, \( u_{t} \), is correlated with the residual of any other observation. The observations are not independent.
600 | 
601 | \begin{center}
602 | 	\( \Corr(u_{t}, u_{s} \mid x_{1}, \ldots, x_{k}) = \Corr(u_{t}, u_{s}) \neq 0, \quad \forall t \neq s \)
603 | \end{center}
604 | 
605 | The ``natural" context of this phenomenon is time series. Is the \textbf{breaking of the sixth (6) econometric} model \textbf{assumption}.
606 | 
607 | \subsection*{Consequences}
608 | 
609 | \begin{itemize}[leftmargin=*]
610 | 	\item OLS estimators are still unbiased.
611 | 	\item OLS estimators are still consistent.
612 | 	\item OLS is \textbf{not efficient} any more, but still a LUE (Linear Unbiased Estimator).
613 | 	\item \textbf{Variance estimations} of the estimators are \textbf{biased}: the construction of confidence intervals and the hypothesis testing is not reliable.
614 | \end{itemize}
615 | 
616 | \subsection*{Detection}
617 | 
618 | \begin{itemize}[leftmargin=*]
619 | 	\item \textbf{Graphs} - look for scatter patterns on \( u_{t - 1} \) vs. \( u_{t} \) or make use of a correlogram.
620 | 	
621 | 	\setlength{\multicolsep}{0pt}
622 | 	\setlength{\columnsep}{6pt}
623 | 	\begin{multicols}{3}
624 | 	\begin{center}
625 | 		\begin{tikzpicture}[scale=0.11]
626 | 			\node at (10, 23) {\textbf{Ac.}}; 
627 | 			\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=south] {\( u_{t - 1} \)}; 
628 | 			\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
629 | 			\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=50] (\x, {-0.2*(\x - 10)^2 + 13 + 6*rnd}); 
630 | 			\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {-0.2*(\x - 10)^2 + 16});
631 | 		\end{tikzpicture}
632 | 	\end{center}
633 | 	\columnbreak
634 | 	\begin{center}
635 | 		\begin{tikzpicture}[scale=0.11]
636 | 			\node at (10, 23) {\textbf{Ac. \( + \)}}; 
637 | 			\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=north] {\( u_{t - 1} \)}; 
638 | 			\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
639 | 			\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {5*rnd + 2.5 + 0.5*\x}); 
640 | 			\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {5 + 0.5*\x});
641 | 		\end{tikzpicture}
642 | 	\end{center}
643 | 	\columnbreak
644 | 	\begin{center}
645 | 		\begin{tikzpicture}[scale=0.11]
646 | 			\node at (10, 23) {\textbf{Ac. \( - \)}}; 
647 | 			\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=south] {\( u_{t - 1} \)}; 
648 | 			\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
649 | 			\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {5*rnd + 12.5 - 0.5*\x}); 
650 | 			\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {15 - 0.5*\x});
651 | 		\end{tikzpicture}
652 | 	\end{center}
653 | 	\end{multicols}
654 | 	\item \textbf{Formal tests} - Durbin-Watson, Breusch-Godfrey, etc. Commonly, \( H_{0} \): No autocorrelation.
655 | \end{itemize}
656 | 
657 | \subsection*{Correction}
658 | 
659 | \begin{itemize}[leftmargin=*]
660 | 	\item Use OLS with a variance-covariance matrix estimator robust to heteroscedasticity and autocorrelation (HAC), for example, the one proposed by Newey-West.
661 | 	\item Use Generalized Least Squares. Supposing \( y_{t} = \beta_{0} + \beta_{1} x_{t} + u_{t} \), with \( u_{t} = \rho u_{t - 1} + \varepsilon_{t} \), where \( \lvert \rho \rvert < 1 \) and \( \varepsilon_{t} \) is white noise.
662 | 	\begin{itemize}[leftmargin=*]
663 | 		\item If \( \rho \) is known, create a quasi-differentiated model where \( u_{t} \) is white noise and estimate it by OLS.
664 | 		\item If \( \rho \) is not known, estimate it by -for example- the Cochrane-Orcutt method, create a quasi-differentiated model where \( u_{t} \) is white noise and estimate it by OLS.
665 | 	\end{itemize}
666 | \end{itemize}
667 | 
668 | \end{multicols}
669 | 
670 | \end{document}


--------------------------------------------------------------------------------
/econometrics-cheatsheet/econometrics-cheatsheet-es.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/1f49cd08d8232e3eb9c3357fa47327fd46dddae5/econometrics-cheatsheet/econometrics-cheatsheet-es.pdf


--------------------------------------------------------------------------------
/econometrics-cheatsheet/econometrics-cheatsheet-es.tex:
--------------------------------------------------------------------------------
  1 | % !TeX spellcheck = es_ES
  2 | % !TeX encoding = UTF-8
  3 | \documentclass[10pt, a4paper, landscape]{article}
  4 | 
  5 | % ----- packages -----
  6 | \usepackage{amsmath} % AMS mathematical facilities for LaTeX
  7 | \usepackage{enumitem} % Control layout of itemize, enumerate, description
  8 | \usepackage{fancyhdr} % Extensive control of page headers and footers in LaTeX2
  9 | \usepackage{geometry} % Flexible and complete interface to document dimensions
 10 | \usepackage{graphicx} % Enhanced support for graphics
 11 | \usepackage{hyperref} % Extensive support for hypertext in LaTeX
 12 | \usepackage{multicol} % Intermix single and multiple columns
 13 | \usepackage{parskip} % Layout with zero \parindent, non-zero \parskip
 14 | \usepackage{tikz} % Create PostScript and PDF graphics in TeX
 15 | \usepackage{titlesec} % Select alternative section titles
 16 | 
 17 | % ----- pdf metadata -----
 18 | \hypersetup{
 19 | 	pdftitle={Hoja de Referencia Econometría},
 20 | 	pdfsubject={The Econometrics Cheat Sheet Project - marcelomijas - CC-BY-4.0},
 21 | 	pdfauthor={Marcelo Moreno Porras},
 22 | 	pdfkeywords={statistics, latex, economics, cheatsheet, econometrcis, ols-regression, economic-modelling},
 23 | 	pdfduplex={DuplexFlipShortEdge}
 24 | }
 25 | 
 26 | % ----- random seed -----
 27 | \pgfmathsetseed{12}
 28 | 
 29 | % ----- custom commands -----
 30 | \DeclareMathOperator{\E}{E}
 31 | \DeclareMathOperator{\Var}{Var}
 32 | \DeclareMathOperator{\se}{ee}
 33 | \DeclareMathOperator{\Cov}{Cov}
 34 | \DeclareMathOperator{\Corr}{Corr}
 35 | \DeclareMathOperator{\resid}{resid}
 36 | \newcommand{\SSR}{\text{SRC}}
 37 | \newcommand{\SSE}{\text{SEC}}
 38 | \newcommand{\SST}{\text{STC}}
 39 | 
 40 | % ----- page customization -----
 41 | \geometry{margin=1cm} % margins config
 42 | \pagenumbering{gobble} % remove page numeration
 43 | \setlength{\parskip}{0cm} % paragraph spacing
 44 | % title spacing
 45 | \titlespacing{\section}{0pt}{2ex}{1ex}
 46 | \titlespacing{\subsection}{0pt}{1ex}{0ex}
 47 | \titlespacing{\subsubsection}{0pt}{0.5ex}{0ex}
 48 | 
 49 | % ----- footer -----
 50 | \pagestyle{fancy}
 51 | \renewcommand{\headrulewidth}{0pt}
 52 | \cfoot{\href{https://github.com/marcelomijas/econometrics-cheatsheet}{\normalfont \footnotesize CS-25.08.1-ES - github.com/marcelomijas/econometrics-cheatsheet - CC-BY-4.0 license}}
 53 | \setlength{\footskip}{12pt}
 54 | 
 55 | % ----- document -----
 56 | \begin{document}
 57 | 
 58 | \begin{multicols}{3}
 59 | 
 60 | \begin{center}
 61 | 	\textbf{\LARGE \href{https://github.com/marcelomijas/econometrics-cheatsheet}{Hoja de Referencia Econometría}}
 62 | 
 63 | 	{\footnotesize Por Marcelo Moreno Porras - Universidad Rey Juan Carlos}
 64 | 
 65 | 	{\footnotesize The Econometrics Cheat Sheet Project}
 66 | \end{center}
 67 | 
 68 | \section*{Conceptos básicos}
 69 | 
 70 | \subsection*{Definiciones}
 71 | 
 72 | \textbf{Econometría} - es una disciplina de las ciencias sociales que tiene como objetivo cuantificar las relaciones entre agentes económicos, contrastar teorías económicas y evaluar e implementar políticas públicas y privadas.
 73 | 
 74 | \textbf{Modelo econométrico} - es una representación simplificada de la realidad para explicar fenómenos económicos.
 75 | 
 76 | \textbf{\textsl{Ceteris paribus}} - si todos los demás factores relevantes permanecen constantes.
 77 | 
 78 | \subsection*{Estructuras de datos}
 79 | 
 80 | \textbf{Sección cruzada} - muestra recogida en un momento dado en el tiempo, una \textsl{foto} estática. El orden no importa.
 81 | 
 82 | \textbf{Series temporales} - observaciones a lo largo del tiempo. El orden sí importa.
 83 | 
 84 | \textbf{Datos de panel} - una serie temporal por cada observación de una sección cruzada.
 85 | 
 86 | \textbf{Secciones transversales agrupadas} - secciones cruzadas de diferentes periodos temporales.
 87 | 
 88 | \subsection*{Fases de un modelo econométrico}
 89 | 
 90 | \begin{enumerate}[leftmargin=*]
 91 | 	\setlength{\multicolsep}{0pt}
 92 | 	\begin{multicols}{2}
 93 | 		\item Especificación.
 94 | 		\item Estimación.
 95 | 	\columnbreak
 96 | 		\item Validación.
 97 | 		\item Utilización.
 98 | 	\end{multicols}
 99 | \end{enumerate}
100 | 
101 | \subsection*{Análisis de regresión}
102 | 
103 | Estudiar y predecir el valor medio de una variable (dependiente, \( y \)) respecto a unos valores fijos de otras variables (variables independientes, \( x \)). En econometría, es común usar Mínimos Cuadrados Ordinarios (MCO) para análisis de regresión.
104 | 
105 | \subsection*{Análisis de correlación}
106 | 
107 | El análisis de correlación no distingue entre variables dependientes e independientes.
108 | 
109 | \begin{itemize}[leftmargin=*]
110 | 	\item La correlación simple mide el grado de asociación lineal entre dos variables.
111 | 	\begin{center}
112 | 		\( r = \frac{\Cov(x, y)}{\sigma_{x} \cdot \sigma_{y}} = \frac{\sum_{i = 1}^{n} \left( (x_{i} - \overline{x}) \cdot (y_{i} - \overline{y}) \right)}{\sqrt{\sum_{i = 1}^{n} (x_{i} - \overline{x})^{2} \cdot \sum_{i = 1}^{n} (y_{i} - \overline{y})^{2}}} \)
113 | 	\end{center}
114 | 	\item La correlación parcial mide el grado de de asociación lineal entre dos variables controlando una tercera.
115 | \end{itemize}
116 | 
117 | \columnbreak
118 | 
119 | \section*{Supuestos y propiedades}
120 | 
121 | \subsection*{Supuestos del modelo econométrico}
122 | 
123 | Bajo estos supuestos, el estimador de MCO presentará buenas propiedades. Supuestos \textbf{Gauss-Markov}:
124 | 
125 | \begin{enumerate}[leftmargin=*]
126 | 	\item \textbf{Linealidad en parámetros} (y dependencia débil en series temporales). \( y \) debe ser una función lineal de \( \beta \).
127 | 	\item \textbf{Muestreo aleatorio}. La muestra de la población se ha tomado de forma aleatoria. (Sólo sección cruzada)
128 | 	\item \textbf{No colinealidad perfecta}.
129 | 	\begin{itemize}[leftmargin=*]
130 | 		\item No hay variables independientes que sean constantes: \( \Var(x_{j}) \neq 0, \; \forall j = 1, \ldots, k \)
131 | 		\item No hay una relación lineal exacta entre variables independientes.
132 | 	\end{itemize}
133 | 	\item \textbf{Media condicional cero y correlación cero}.
134 | 	\begin{enumerate}[leftmargin=*, label=\alph{*}.]
135 | 		\item No hay errores sistemáticos: \( \E(u \mid x_{1}, \ldots, x_{k}) = \E(u) = 0 \rightarrow \) \textbf{exogeneidad fuerte} (a implica b).
136 | 		\item No hay variables relevantes fuera del modelo: \( \Cov(x_{j}, u) = 0, \; \forall j = 1, \ldots, k \rightarrow \) \textbf{exogeneidad débil}.
137 | 	\end{enumerate}
138 | 	\item \textbf{Homocedasticidad}. La variabilidad de los residuos es igual para todos los niveles de \( x \): \\ \( \Var(u \mid x_{1}, \ldots, x_{k}) = \sigma_{u}^{2} \)
139 | 	\item \textbf{No autocorrelación}. Los residuos no contienen información sobre otros residuos: \\ \( \Corr(u_{t}, u_{s} \mid x_{1}, \ldots, x_{k}) = 0, \; \forall t \neq s \)
140 | 	\item \textbf{Normalidad}. Los residuos son independientes e idénticamente distribuidos: \( u \sim \mathcal{N} (0, \sigma_{u}^{2}) \)
141 | 	\item \textbf{Tamaño de datos}. El número de observaciones disponibles debe ser mayor a \( (k + 1) \) parámetros a estimar. (Ya satisfecho bajo situaciones asintóticas)
142 | \end{enumerate}
143 | 
144 | \subsection*{Propiedades asintóticas de MCO}
145 | 
146 | Bajo los supuestos del modelo econométrico y el Teorema Central del Límite (TCL):
147 | 
148 | \begin{itemize}[leftmargin=*]
149 | 	\item De 1 a 4a: MCO es \textbf{insesgado}. \( \E(\hat{\beta}_j) = \beta_{j} \)
150 | 	\item De 1 a 4: MCO es \textbf{consistente}. \( \operatorname{plim}(\hat{\beta}_{j}) = \beta_{j} \) (a 4b sin 4a, exogeneidad débil, insesgado y consistente).
151 | 	\item De 1 a 5: \textbf{normalidad asintótica} de MCO (entonces, 7 es necesariamente satisfecho): \( u \underset{a}{\sim} \mathcal{N} (0, \sigma_{u}^{2}) \)
152 | 	\item De 1 a 6: \textbf{estimador insesgado} de \( \sigma_{u}^{2} \). \( \E(\hat{\sigma}_{u}^{2}) = \sigma_{u}^{2} \)
153 | 	\item De 1 a 6: MCO es MELI (Mejor Estimador Lineal Insesgado, \textcolor{blue}{BLUE} en inglés) ó \textbf{eficiente}.
154 | 	\item De 1 a 7: contrastes de hipótesis e intervalos de confianza son fiables.
155 | \end{itemize}
156 | 
157 | \columnbreak
158 | 
159 | \section*{Mínimos Cuadrados Ordinarios}
160 | 
161 | \textbf{Objetivo} - minimizar Suma de Resid. Cuadrados (SRC):
162 | 
163 | \begin{center}
164 | 	\( \min \sum_{i = 1}^{n} \hat{u}_{i}^{2} \), donde \( \hat{u}_{i} = y_{i} - \hat{y}_{i} \)
165 | \end{center}
166 | 
167 | \subsection*{Modelo de regresión simple}
168 | 
169 | \setlength{\multicolsep}{2pt}
170 | \setlength{\columnsep}{-40pt}
171 | \begin{multicols}{2}
172 | 
173 | \begin{tikzpicture}[scale=0.15]
174 | 	\draw [thick, <->] (0, 20) node [anchor=south] {\( y \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( x \)}; 
175 | 	\draw [red, thick] plot [domain=0:20] (\x, {5 + 0.5*\x});
176 | 	\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {rnd*5 + 2.5 + 0.5*\x}); 
177 | 	\draw (0.5, 0.5) -- (0.5, 4.5) node [anchor=north west] {\( \beta_{0} \)}; 
178 | 	\draw (8, 9) -- (12, 9) arc (0:25:4); 
179 | 	\draw (13.5, 9) node {\( \beta_{1} \)};
180 | \end{tikzpicture}
181 | 
182 | \columnbreak
183 | 
184 | Ecuación:
185 | 
186 | \begin{center}
187 | 	\( y_{i} = \beta_{0} + \beta_{1} x_{i} + u_{i} \)
188 | \end{center}
189 | 
190 | Estimación:
191 | 
192 | \begin{center}
193 | 	\( \hat{y}_{i} = \hat{\beta}_{0} + \hat{\beta}_{1} x_{i} \)
194 | \end{center}
195 | 
196 | donde:
197 | 
198 | \begin{center}
199 | 	\( \hat{\beta}_{0} = \overline{y} - \hat{\beta}_{1} \overline{x} \)
200 | 
201 | 	\( \hat{\beta}_{1} = \frac{\Cov(y, x)}{\Var(x)} \)
202 | \end{center}
203 | 
204 | \end{multicols}
205 | 
206 | \subsection*{Modelo de regresión múltiple}
207 | 
208 | \setlength{\multicolsep}{2pt}
209 | \setlength{\columnsep}{-40pt}
210 | \begin{multicols}{2}
211 | 
212 | \begin{tikzpicture}[scale=0.15]
213 | 	\draw [thick, ->] (0, 0) -- (7, 6) node [anchor=north west] {\( x_{2} \)}; 
214 | 	\draw [thick, <->] (0, 20) node [anchor=south] {\( y \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( x_{1} \)}; 
215 | 	\draw [red, thick] (0, 6) -- (5, 13); 
216 | 	\draw [red, thick] (3, 7) -- (8, 14); 
217 | 	\draw [red, thick] (6, 8) -- (11, 15); 
218 | 	\draw [red, thick] (9, 9) -- (14, 16); 
219 | 	\draw [red, thick] (12, 10) -- (17, 17); 
220 | 	\draw [red, thick] (15, 11) -- (20, 18); 
221 | 	\draw [red, thick] (0, 6) -- (15, 11);
222 | 	\draw [red, thick] (1.25, 7.75) -- (16.25, 12.75); 
223 | 	\draw [red, thick] (2.5, 9.5) -- (17.5, 14.5); 
224 | 	\draw [red, thick] (3.75, 11.25) -- (18.75, 16.25);
225 | 	\draw [red, thick] (5, 13) -- (20, 18); 
226 | 	\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {rnd*6 + 4 + 0.5*\x}); 
227 | 	\draw (0.5, 1) -- (0.5, 5.5) node [anchor=north west] {\( \beta_{0} \)};
228 | \end{tikzpicture}
229 | 
230 | \columnbreak
231 | 
232 | Ecuación:
233 | 
234 | \begin{center}
235 | 	\( y_{i} = \beta_{0} + \beta_{1} x_{1i} + \cdots + \beta_{k} x_{ki} + u_{i} \)
236 | \end{center}
237 | 
238 | Estimación:
239 | 
240 | \begin{center}
241 | 	\( \hat{y}_{i} = \hat{\beta}_{0} + \hat{\beta}_{1} x_{1i} + \cdots + \hat{\beta}_{k} x_{ki} \)
242 | \end{center}
243 | 
244 | donde:
245 | 
246 | \begin{center}
247 | 	\( \hat{\beta}_{0} = \overline{y} - \hat{\beta}_{1} \overline{x}_{1} - \cdots - \hat{\beta}_{k} \overline{x}_{k} \)
248 | 
249 | 	\( \hat{\beta}_{j} = \frac{\Cov(y, \resid x_{j})}{\Var(\resid x_{j})} \)
250 | \end{center}
251 | 
252 | Matriz: \( \hat{\beta} = (X^{\top} X)^{-1}(X^{\top} y) \)
253 | 
254 | \end{multicols}
255 | 
256 | \subsection*{Interpretación de coeficientes}
257 | 
258 | \begin{center}
259 | 	\scalebox{0.85}{
260 | 		\begin{tabular}{ c c c c }
261 | 			Modelo      & Dependiente   & Independ.       & Interpretación \( \beta_{1} \)                         \\ \hline
262 | 			Nivel-nivel & \( y \)       & \( x \)         & \( \Delta y = \beta_{1} \Delta x \)                    \\
263 | 			Nivel-log   & \( y \)       & \( \log(x) \)   & \( \Delta y \approx (\beta_{1} / 100) (\% \Delta x) \) \\
264 | 			Log-nivel   & \( \log(y) \) & \( x \)         & \( \% \Delta y \approx (100 \beta_{1}) \Delta x \)     \\
265 | 			Log-log     & \( \log(y) \) & \( \log(x) \)   & \( \% \Delta y \approx \beta_{1} (\% \Delta x) \)      \\
266 | 			Cuadrático  & \( y \)       & \( x + x^{2} \) & \( \Delta y = (\beta_{1} + 2 \beta_{2} x) \Delta x \)
267 | 		\end{tabular}
268 | 	}
269 | \end{center}
270 | 
271 | \subsection*{Medidas de error}
272 | 
273 | Suma de Resid. Cuad.: \hfill \( \SSR = \sum_{i = 1}^{n} \hat{u}_{i}^{2} = \sum_{i = 1}^{n} (y_{i} - \hat{y}_{i})^{2} \)
274 | 
275 | Suma Explicada de Cuadrados: \hfill \( \SSE = \sum_{i = 1}^{n} (\hat{y}_{i} - \overline{y})^{2} \)
276 | 
277 | Suma Tot. de Cuad.: \hfill \( \SST = \SSE + \SSR = \sum_{i = 1}^{n} (y_{i} - \overline{y})^{2} \)
278 | 
279 | Error Estándar de la Regresión: \hfill \( \hat{\sigma}_{u} = \sqrt{\frac{\SSR}{n - k - 1}} \)
280 | 
281 | Error Estándar de \( \hat{\beta} \): \hfill \( \se(\hat{\beta}) = \sqrt{\hat{\sigma}_{u}^{2} \cdot (X^{\top} X)^{-1}} \)
282 | 
283 | Raíz del Error Cuadrático Medio: \hfill \( \text{RECM} = \sqrt{\frac{\sum_{i = 1}^{n} (y_{i} - \hat{y}_{i})^{2}}{n}} \)
284 | 
285 | Error Medio Absoluto: \hfill \( \text{EMA} = \frac{\sum_{i = 1}^{n} \lvert y_{i} - \hat{y}_{i} \rvert}{n} \)
286 | 
287 | Porcentaje Medio de Error: \hfill \( \text{PME} = \frac{\sum_{i = 1}^{n} \lvert \hat{u}_{i} / y_{i} \rvert}{n} \cdot 100 \)
288 | 
289 | \columnbreak
290 | 
291 | \section*{R-cuadrado}
292 | 
293 | Es una medida de la \textbf{bondad del ajuste}, cómo la regresión se ajusta a los datos:
294 | 
295 | \begin{center}
296 | 	\( R^{2} = \frac{\SSE}{\SST} = 1 - \frac{\SSR}{\SST} \)
297 | \end{center}
298 | 
299 | \begin{itemize}[leftmargin=*]
300 | 	\item Mide el \textbf{porcentaje de variación} en \( y \) que es linealmente \textbf{explicado} por variaciones de las \( x \).
301 | 	\item Toma valores \textbf{entre 0} (no hay explicación lineal) \textbf{y 1} (explicación total).
302 | \end{itemize}
303 | 
304 | Cuando el número de regresores incrementa, el R-cuadrado también, independientemente de si las nuevas variables son relevantes o no. Para resolver este problema, hay un \textbf{R-cuadrado ajustado} por grados de libertad (o corregido):
305 | 
306 | \begin{center}
307 | 	\( \overline{R}^{2} = 1 - \frac{n - 1}{n - k - 1} \cdot \frac{\SSR}{\SST} = 1 - \frac{n - 1}{n - k - 1} \cdot (1 - R^{2}) \)
308 | \end{center}
309 | 
310 | Para muestras grandes: \( \overline{R}^{2} \approx R^{2} \)
311 | 
312 | \section*{Contrastes de hipótesis}
313 | 
314 | \subsection*{Definiciones}
315 | 
316 | Es una regla diseñada para, a partir de una muestra, explicar si existe \textbf{evidencia para rechazar (o no) una hipótesis} sobre uno o más parámetros poblacionales.
317 | 
318 | Elementos de un contraste de hipótesis:
319 | 
320 | \begin{itemize}[leftmargin=*]
321 | 	\item \textbf{Hipótesis nula} \( (H_{0}) \) - es la hipótesis a ser probada.
322 | 	\item \textbf{Hipótesis alternativa} \( (H_{1}) \) - el la hipótesis que no puede rechazarse si \( H_{0} \) es rechazada.
323 | 	\item \textbf{Estadístico de contraste} - es una variable aleatoria cuya distribución de probabilidad es conocida bajo \( H_{0} \).
324 | 	\item \textbf{Valor crítico} \( (C) \) - es el valor contra el cual se compara el estadístico de contraste para determinar si se rechaza o no \( H_{0} \). Determina la frontera entre la región de aceptación y la de rechazo de \( H_{0} \).
325 | 	\item \textbf{Nivel de significación} \( (\alpha) \) - es la probabilidad de rechazar \( H_{0} \) siendo cierta (Error Tipo I). Es elegido por quien conduce el contraste. Usualmente 10\%, 5\% ó 1\%.
326 | 	\item \textbf{p-valor} - es el nivel de significación máximo por el cual \( H_{0} \) no puede ser rechazada.
327 | \end{itemize}
328 | 
329 | \setlength{\multicolsep}{0pt}
330 | \setlength{\columnsep}{20pt}
331 | \begin{multicols}{2}
332 | 
333 | \begin{tikzpicture}[scale=0.10]
334 | 	\node at (0, 15) {Dos colas. Distrib. \( H_{0} \)}; 
335 | 	\fill [red] (12, 0) -- plot [domain=12:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
336 | 	\fill [red] (-12, 0) -- plot [domain=-18:-12, smooth] (\x, {cos(\x*10)*6 + 6}); 
337 | 	\draw [thick] plot [domain=-18:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
338 | 	\draw [thick, <->] (-20, 0) -- (20, 0); 
339 | 	\draw [thick, dashed] (12, 0) -- (12, 7); 
340 | 	\draw [thick, dashed] (-12, 0) -- (-12, 7); 
341 | 	\node at (0, 2) {Región acept.}; 
342 | 	\node at (0, 7) {\( 1 - \alpha \)}; 
343 | 	\node [red] at (-16, 4) {\( \alpha /\ 2 \)}; 
344 | 	\node [red] at (16, 4) {\( \alpha /\ 2 \)}; 
345 | 	\node at (12, 9) {\( C \)}; 
346 | 	\node at (-13, 9) {\( -C \)};
347 | \end{tikzpicture}
348 | 
349 | \columnbreak
350 | 
351 | \begin{tikzpicture}[scale=0.10]
352 | 	\node at (0, 15) {Una cola. Distrib. \( H_{0} \)}; 
353 | 	\fill [red] (9, 0) -- plot [domain=9:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
354 | 	\draw [thick] plot [domain=-18:18, smooth] (\x, {cos(\x*10)*6 + 6}); 
355 | 	\draw [thick, <->] (-20, 0) -- (20, 0); 
356 | 	\draw [thick, dashed] (9, 0) -- (9, 7); 
357 | 	\node at (-1, 2) {Región acept.};
358 | 	\node at (0, 7) {\( 1 - \alpha \)}; 
359 | 	\node [red] at (14, 4) {\( \alpha \)}; 
360 | 	\node at (9, 9) {\( C \)};
361 | \end{tikzpicture}
362 | 
363 | \end{multicols}
364 | 
365 | \textbf{Regla general}: si p-valor \( < \alpha \), existe evidencia para rechazar \( H_{0} \), es decir, existe evidencia para aceptar \( H_{1} \).
366 | 
367 | \columnbreak
368 | 
369 | \subsection*{Contrastes individuales}
370 | 
371 | Prueba si un parámetro es significativamente diferente de un cierto valor, \( \vartheta \).
372 | 
373 | \begin{itemize}[leftmargin=*]
374 | 	\item \( H_{0}: \beta_{j} = \vartheta \)
375 | 	\item \( H_{1}: \beta_{j} \neq \vartheta \)
376 | \end{itemize}
377 | 
378 | \begin{center}
379 | 	Bajo \( H_{0} \): \quad \( t = \frac{\hat{\beta}_{j} - \vartheta}{\se(\hat{\beta}_{j})} \sim t_{n - k - 1, \alpha / 2} \)
380 | \end{center}
381 | 
382 | Si \( \lvert t \rvert > \lvert t_{n - k - 1, \alpha / 2} \rvert \), existe evidencia para rechazar \( H_{0} \).
383 | 
384 | \textbf{Contraste de significación individual} - prueba si un parámetro es \textbf{significativamente distinto de cero}.
385 | 
386 | \begin{itemize}[leftmargin=*]
387 | 	\item \( H_{0}: \beta_{j} = 0 \)
388 | 	\item \( H_{1}: \beta_{j} \neq 0 \)
389 | \end{itemize}
390 | 
391 | \begin{center}
392 | 	Bajo \( H_{0} \): \quad \( t = \frac{\hat{\beta}_{j}}{\se(\hat{\beta}_{j})} \sim t_{n - k - 1, \alpha / 2} \)
393 | \end{center}
394 | 
395 | Si \( \lvert t \rvert > \lvert t_{n - k - 1, \alpha / 2} \rvert \), existe evidencia para rechazar \( H_{0} \).
396 | 
397 | \subsection*{Contraste F}
398 | 
399 | Prueba simultáneamente múltiples hipótesis (lineales) sobre los parámetros. Hace uso de un modelo no restringido y uno restringido:
400 | 
401 | \begin{itemize}[leftmargin=*]
402 | 	\item \textbf{Modelo no restringido} - es el modelo donde se quiere probar la hipótesis.
403 | 	\item \textbf{Modelo restringido} - es el modelo donde se ha impuesto la hipótesis que se quiere probar.
404 | \end{itemize}
405 | 
406 | Entonces, viendo los errores, hay:
407 | 
408 | \begin{itemize}[leftmargin=*]
409 | 	\item \textbf{\( \SSR_{\text{UR}} \)} - es la \( \SSR \) del modelo no restringido.
410 | 	\item \textbf{\( \SSR_{\text{R}} \)} - es la \( \SSR \) del modelo restringido.
411 | \end{itemize}
412 | 
413 | \begin{center}
414 | 	Bajo \( H_{0} \): \quad \( F = \frac{\SSR_{\text{R}} - \SSR_{\text{UR}}}{\SSR_{\text{UR}}} \cdot \frac{n - k - 1}{q} \sim F_{q, n - k - 1} \)
415 | \end{center}
416 | 
417 | donde \( k \) es el número de parámetros del modelo no restringido y \( q \) es el número de hipótesis lineales a probar.
418 | 
419 | Si \( F > F_{q, n - k - 1} \), existe evidencia para rechazar \( H_{0} \).
420 | 
421 | \textbf{Contraste de significación global} - prueba si todos los parámetros asociados a las \( x \) son \textbf{simultáneamente cero}.
422 | 
423 | \begin{itemize}[leftmargin=*]
424 | 	\item \( H_{0}: \beta_{1} = \beta_{2} = \cdots = \beta_{k} = 0 \)
425 | 	\item \( H_{1}: \beta_{1} \neq 0 \) y/o \( \beta_{2} \neq 0 \ldots \) y/o \( \beta_{k} \neq 0 \)
426 | \end{itemize}
427 | 
428 | Podemos simplificar la fórmula para el estadístico \( F \):
429 | 
430 | \begin{center}
431 | 	Bajo \( H_{0} \): \quad \( F = \frac{R^{2}}{1 - R^{2}} \cdot \frac{n - k - 1}{k} \sim F_{k, n - k - 1} \)
432 | \end{center}
433 | 
434 | Si \( F > F_{k, n - k - 1} \), existe evidencia para rechazar \( H_{0} \).
435 | 
436 | \section*{Intervalos de confianza}
437 | 
438 | Los intervalos de confianza al nivel de confianza \( (1 - \alpha) \), se pueden calcular:
439 | 
440 | \begin{center}
441 | 	\( \hat{\beta}_{j} \mp t_{n - k - 1, \alpha / 2} \cdot \se(\hat{\beta}_{j}) \)
442 | \end{center}
443 | 
444 | \columnbreak
445 | 
446 | \section*{Variables ficticias}
447 | 
448 | Las variables ficticias (o binarias) son usadas para recoger información cualitativa: sexo, estado civil, país, etc.
449 | 
450 | \begin{itemize}[leftmargin=*]
451 | 	\item Toman \textbf{valor 1} en una categoría dada y \textbf{0 en el resto}.
452 | 	\item Se usan para analizar y modelar \textbf{cambios estructurales} en los parámetros.
453 | \end{itemize}
454 | 
455 | Si una variable cualitativa tiene \( m \) categorías, sólo hay que incluir \( (m - 1) \) variables ficticias en el modelo.
456 | 
457 | \subsection*{Cambio estructural}
458 | 
459 | El cambio estructural se refiere a los cambios en los valores de los parámetros del modelo producidos por el efecto de diferentes sub-poblaciones. El cambio estructural se puede incluir en el modelo a través de variables ficticias.
460 | 
461 | La ubicación de las variables ficticias \( (D) \) es importante:
462 | 
463 | \begin{itemize}[leftmargin=*]
464 | 	\item \textbf{En la constante} (efecto aditivo) - representa la diferencia media entre los valores producidos por el cambio estructural.
465 | 	\begin{center}
466 | 		\( y = \beta_{0} + \delta_{1} D + \beta_{1} x_{1} + u \)
467 | 	\end{center}
468 | 	\item \textbf{En la pendiente} (efecto multiplicativo) - representa la diferencia en el efecto (pendiente) entre los valores producidos por el cambio estructural.
469 | 	\begin{center}
470 | 		\( y = \beta_{0} + \beta_{1} x_{1} + \delta_{1} D \cdot x_{1} + u \)
471 | 	\end{center}
472 | \end{itemize}
473 | 
474 | \textbf{Contraste de Chow para cambio estructural} - analiza la existencia de cambio estructural en todos los parámetros del modelo, es una expresión particular del contraste F, donde \( H_{0} \): No hay cambio estructural (todos \( \delta = 0 \)).
475 | 
476 | \section*{Cambios de escala}
477 | 
478 | Cambios en las \textbf{unidades de medida} de las variables:
479 | 
480 | \begin{itemize}[leftmargin=*]
481 | 	\item Sobre la variable \textbf{endógena}, \( y^{*} = y \cdot \lambda \) - afecta a todos los parámetros del modelo, \( \beta_{j}^{*} = \beta_{j} \cdot \lambda, \; \forall j = 1, \ldots, k \)
482 | 	\item Sobre una variable \textbf{exógena}, \( x_{j}^{*} = x_{j} \cdot \lambda \) - sólo afecta al parámetro ligado a dicha variable exógena, \( \beta_{j}^{*} = \beta_{j} \cdot \lambda \)
483 | 	\item Mismo cambio de escala sobre endógena y exógena - sólo afecta al término constante, \( \beta_{0}^{*} = \beta_{0} \cdot \lambda \)
484 | \end{itemize}
485 | 
486 | \section*{Cambios de origen}
487 | 
488 | Cambios en el \textbf{origen de medida} de las variables (endógenas o exógenas), \( y^{*} = y + \lambda \) - sólo afectan al término constante del modelo, \( \beta_{0}^{*} = \beta_{0} + \lambda \)
489 | 
490 | \columnbreak
491 | 
492 | \section*{Multicolinealidad}
493 | 
494 | \begin{itemize}[leftmargin=*]
495 | 	\item \textbf{Multicolinealidad perfecta} - hay variables independientes que son constantes y/o hay una relación lineal exacta entre variables independientes. Es el \textbf{incumplimiento del tercer (3) supuesto} del modelo.
496 | 	\item \textbf{Multicolinealidad aproximada} - hay variables independientes que son aproximadamente constantes y/o hay una relación lineal aproximada entre variables independientes. \textbf{No implica el incumplimiento de algún supuesto} del modelo, pero afecta a MCO.
497 | \end{itemize}
498 | 
499 | \subsection*{Consecuencias}
500 | 
501 | \begin{itemize}[leftmargin=*]
502 | 	\item \textbf{Multicolinealidad perfecta} - el sistema de ecuaciones de MCO no puede resolverse (infinitas soluciones).
503 | 	\item \textbf{Multicolinealidad aproximada}
504 | 	\begin{itemize}[leftmargin=*]
505 | 		\item Pequeñas variaciones en la muestra producen grandes variaciones en las estimaciones de MCO.
506 | 		\item La varianza de los estimadores MCO de las \( x \) que son colineales incrementa, la inferencia de los parámetros es afectada (intervalo de confianza grande).
507 | 	\end{itemize}
508 | \end{itemize}
509 | 
510 | \subsection*{Detección}
511 | 
512 | \begin{itemize}[leftmargin=*]
513 | 	\item \textbf{Análisis de correlación} - buscar altas correlaciones entre variables independientes, \( \lvert r \rvert > 0.7 \).
514 | 	\item \textbf{Factor de Inflación de la Varianza (FIV o VIF)} - indica el incremento en \( \Var(\hat{\beta}_{j}) \) debido a la multicolinealidad.
515 | 	\begin{center}
516 | 		\( \operatorname{VIF}(\hat{\beta}_{j}) = \frac{1}{1 - R_{j}^{2}} \)
517 | 	\end{center}
518 | 	donde \( R_{j}^{2} \) denota el R-cuadrado de una regresión entre \( x_{j} \) y todas las otras \( x \).
519 | 	\begin{itemize}[leftmargin=*]
520 | 		\item Valores entre 4 y 10 - pueden existir problemas de multicolinealidad.
521 | 		\item Valores \( > 10 \) - existen problemas de multicolinealidad.
522 | 	\end{itemize}
523 | \end{itemize}
524 | 
525 | Una característica típica de la multicolinealidad es que los coeficientes de regresión del modelo no son individualmente significativos (por las altas varianzas), pero sí que son conjuntamente significativos.
526 | 
527 | \subsection*{Corrección}
528 | 
529 | \begin{itemize}[leftmargin=*]
530 | 	\item Eliminar una de las variables colineales.
531 | 	\item Realizar análisis factorial(u otra técnica de reducción de dimensiones) en las variables colineales.
532 | 	\item Interpretar los coeficientes con multicolinealidad conjuntamente.
533 | \end{itemize}
534 | 
535 | \columnbreak
536 | 
537 | \section*{Heterocedasticidad}
538 | 
539 | Los residuos \( u_{i} \) de la función de regresión poblacional no tienen una varianza constante \( \sigma_{u}^{2} \):
540 | 
541 | \begin{center}
542 | 	\( \Var(u \mid x_{1}, \ldots, x_{k}) = \Var(u) \neq \sigma_{u}^{2} \)
543 | \end{center}
544 | 
545 | Es el \textbf{incumplimiento del quinto (5) supuesto} del modelo.
546 | 
547 | \subsection*{Consecuencias}
548 | 
549 | \begin{itemize}[leftmargin=*]
550 | 	\item Estimadores MCO son insesgados.
551 | 	\item Estimadores MCO son consistentes.
552 | 	\item MCO ya \textbf{no es eficiente}, pero sigue siendo ELI (Estimador Lineal Insesgado).
553 | 	\item La \textbf{estimación de la varianza} de los estimadores es \textbf{sesgada}: la construcción de intervalos de confianza y contraste de hipótesis no son fiables.
554 | \end{itemize}
555 | 
556 | \subsection*{Detección}
557 | 
558 | \begin{itemize}[leftmargin=*]
559 | 	\setlength{\multicolsep}{0pt}
560 | 	\setlength{\columnsep}{20pt}
561 | 	\begin{multicols}{3}
562 | 	\item \textbf{Gráficos} - buscar patrones de dispersión en gráficos \( x \) vs. \( u \) ó \( x \) vs. \( y \).
563 | 	\columnbreak
564 | 	\begin{tikzpicture}[scale=0.108]
565 | 		\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=north] {\( x \)}; 
566 | 		\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u \)}; 
567 | 		\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=50] (\x, {-0.5*rand*\x + 10}); 
568 | 		\draw [thick, dashed, red, -latex] plot [domain=1:18] (\x, {-0.5*\x + 9.5}); 
569 | 		\draw [thick, dashed, red, -latex] plot [domain=1:18] (\x, {0.5*\x + 10.5});
570 | 	\end{tikzpicture}
571 | 	\columnbreak
572 | 	\begin{tikzpicture}[scale=0.108]
573 | 		\draw [thick, <->] (0, 20) node [anchor=west] {\( y \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( x \)}; 
574 | 		\draw plot [only marks, mark=*, mark size=6, domain=2:16, samples=50] (\x, {0.5*\x*(rand + 1) + 2}); 
575 | 		\draw [thick, dashed, red, -latex] plot [domain=1:16] (\x, {1.5}); 
576 | 		\draw [thick, dashed, red, -latex] plot [domain=1:16] (\x, {1*\x + 2.5});
577 | 	\end{tikzpicture}
578 | 	\end{multicols}
579 | 	\item \textbf{Contrastes} - White, Bartlett, Breusch-Pagan, etc. Generalmente, \( H_{0} \): No heterocedasticidad.
580 | \end{itemize}
581 | 
582 | \subsection*{Corrección}
583 | 
584 | \begin{itemize}[leftmargin=*]
585 | 	\item Usar MCO con un estimador de la matriz de varianzas-covarianzas robusto a la heterocedasticidad (HC), por ejemplo, la propuesta de White.
586 | 	\item Si la estructura de la varianza es conocida, usar Mínimos Cuadrados Ponderados (MCP) o Mínimos Cuadrados Generalizados (MCG):
587 | 	\begin{itemize}[leftmargin=*]
588 | 		\item Suponiendo que \( \Var(u) = \sigma_{u}^{2} \cdot x_{i} \), dividir las variables del modelo entre la raíz cuadrada de \( x_{i} \) y aplicar MCO.
589 | 		\item Suponiendo que \( \Var(u) = \sigma_{u}^{2} \cdot x_{i}^{2} \), dividir las variables del modelo entre \( x_{i} \) (la raíz cuadrada de \( x_{i}^{2} \)) y aplicar MCO.
590 | 	\end{itemize}
591 | 	\item Si la estructura de la varianza es desconocida, hacer uso de Mínimos Cuadrados Ponderados Factibles (MCPF), que estima una posible varianza, divide las variables del modelo entre ella y entonces aplica MCO.
592 | 	\item Nueva especificación del modelo, por ejemplo, transformación logarítmica (reduce la varianza).
593 | \end{itemize}
594 | 
595 | \columnbreak
596 | 
597 | \section*{Autocorrelación}
598 | 
599 | El residuo de cualquier observación, $u_{t}$, está correlacionado con el residuo de cualquier otra observación. Las observaciones no son independientes.
600 | 
601 | \begin{center}
602 | 	\( \Corr(u_{t}, u_{s} \mid x_{1}, \ldots, x_{k}) = \Corr(u_{t}, u_{s}) \neq 0, \quad \forall t \neq s \)
603 | \end{center}
604 | 
605 | El contexto ``natural" de este fenómeno son las series temporales. Es el \textbf{incumplimiento del sexto (6) supuesto} del modelo.
606 | 
607 | \subsection*{Consecuencias}
608 | 
609 | \begin{itemize}[leftmargin=*]
610 | 	\item Estimadores MCO son insesgados.
611 | 	\item Estimadores MCO son consistentes.
612 | 	\item MCO ya \textbf{no es eficiente}, pero sigue siendo ELI (Estimador Lineal Insesgado).
613 | 	\item La \textbf{estimación de la varianza} de los estimadores es \textbf{sesgada}: la construcción de intervalos de confianza y contraste de hipótesis no son fiables.
614 | \end{itemize}
615 | 
616 | \subsection*{Detección}
617 | 
618 | \begin{itemize}[leftmargin=*]
619 | 	\item \textbf{Gráficos} - buscar patrones de dispersión en gráficos \( u_{t - 1} \) vs. \( u_{t} \) o hacer uso del correlograma.
620 | 	
621 | 	\setlength{\multicolsep}{0pt}
622 | 	\setlength{\columnsep}{6pt}
623 | 	\begin{multicols}{3}
624 | 	\begin{center}
625 | 		\begin{tikzpicture}[scale=0.11]
626 | 			\node at (10, 23) {\textbf{Ac.}}; 
627 | 			\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=south] {\( u_{t - 1} \)}; 
628 | 			\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
629 | 			\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=50] (\x, {-0.2*(\x - 10)^2 + 13 + 6*rnd}); 
630 | 			\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {-0.2*(\x - 10)^2 + 16});
631 | 		\end{tikzpicture}
632 | 	\end{center}
633 | 	\columnbreak
634 | 	\begin{center}
635 | 		\begin{tikzpicture}[scale=0.11]
636 | 			\node at (10, 23) {\textbf{Ac. \( + \)}}; 
637 | 			\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=north] {\( u_{t - 1} \)}; 
638 | 			\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
639 | 			\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {5*rnd + 2.5 + 0.5*\x}); 
640 | 			\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {5 + 0.5*\x});
641 | 		\end{tikzpicture}
642 | 	\end{center}
643 | 	\columnbreak
644 | 	\begin{center}
645 | 		\begin{tikzpicture}[scale=0.11]
646 | 			\node at (10, 23) {\textbf{Ac. \( - \)}}; 
647 | 			\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=south] {\( u_{t - 1} \)}; 
648 | 			\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
649 | 			\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {5*rnd + 12.5 - 0.5*\x}); 
650 | 			\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {15 - 0.5*\x});
651 | 		\end{tikzpicture}
652 | 	\end{center}
653 | 	\end{multicols}
654 | 	\item \textbf{Contrastes} - Durbin-Watson, Breusch-Godfrey, etc. Generalmente, \( H_{0} \): No autocorrelación.
655 | \end{itemize}
656 | 
657 | \subsection*{Corrección}
658 | 
659 | \begin{itemize}[leftmargin=*]
660 | 	\item Usar MCO con un estimador de la matriz de varianzas-covarianzas robusto a la heterocedasticidad y autocorrelación (HAC), por ejemplo, la propuesta de Newey-West.
661 | 	\item Usar Mínimos Cuadrados Generalizados. Suponiendo \( y_{t} = \beta_{0} + \beta_{1} x_{t} + u_{t} \), con \( u_{t} = \rho u_{t - 1} + \varepsilon_{t} \), donde \( \lvert \rho \rvert < 1 \) y \( \varepsilon_{t} \) es ruido blanco.
662 | 	\begin{itemize}[leftmargin=*]
663 | 		\item Si \( \rho \) es conocido, crear un modelo cuasi-diferenciado donde \( u_{t} \) es ruido blanco y estimarlo por MCO.
664 | 		\item Si \( \rho \) es desconocido, estimarlo -por ejemplo- por el método de Cochrane-Orcutt, crear un modelo cuasi-diferenciado donde \( u_{t} \) es ruido blanco y estimarlo por MCO.
665 | 	\end{itemize}
666 | \end{itemize}
667 | 
668 | \end{multicols}
669 | 
670 | \end{document}


--------------------------------------------------------------------------------
/time-series-cheatsheet/time-series-cheatsheet-en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/1f49cd08d8232e3eb9c3357fa47327fd46dddae5/time-series-cheatsheet/time-series-cheatsheet-en.pdf


--------------------------------------------------------------------------------
/time-series-cheatsheet/time-series-cheatsheet-en.tex:
--------------------------------------------------------------------------------
  1 | % !TeX spellcheck = en_GB
  2 | % !TeX encoding = UTF-8
  3 | \documentclass[10pt, a4paper, landscape]{article}
  4 | 
  5 | % ----- packages -----
  6 | \usepackage{amsmath} % AMS mathematical facilities for LaTeX
  7 | \usepackage{enumitem} % Control layout of itemize, enumerate, description
  8 | \usepackage{fancyhdr} % Extensive control of page headers and footers in LaTeX2
  9 | \usepackage{geometry} % Flexible and complete interface to document dimensions
 10 | \usepackage{graphicx} % Enhanced support for graphics
 11 | \usepackage{hyperref} % Extensive support for hypertext in LaTeX
 12 | \usepackage{multicol} % Intermix single and multiple columns
 13 | \usepackage{parskip} % Layout with zero \parindent, non-zero \parskip
 14 | \usepackage{tikz} % Create PostScript and PDF graphics in TeX
 15 | \usepackage{titlesec} % Select alternative section titles
 16 | 
 17 | % ----- pdf metadata -----
 18 | \hypersetup{
 19 | 	pdftitle={Time Series Cheat Sheet},
 20 | 	pdfsubject={The Econometrics Cheat Sheet Project - marcelomijas - CC-BY-4.0},
 21 | 	pdfauthor={Marcelo Moreno Porras},
 22 | 	pdfkeywords={statistics, latex, economics, cheatsheet, econometrcis, ols-regression, economic-modelling},
 23 | 	pdfduplex={DuplexFlipShortEdge}
 24 | }
 25 | 
 26 | % ----- random seed -----
 27 | \pgfmathsetseed{12}
 28 | 
 29 | % ----- custom commands -----
 30 | \DeclareMathOperator{\E}{E}
 31 | \DeclareMathOperator{\Var}{Var}
 32 | \DeclareMathOperator{\se}{se}
 33 | \DeclareMathOperator{\Cov}{Cov}
 34 | \DeclareMathOperator{\Corr}{Corr}
 35 | 
 36 | % ----- page customization -----
 37 | \geometry{margin=1cm} % margins config
 38 | \pagenumbering{gobble} % remove page numeration
 39 | \setlength{\parskip}{0cm} % paragraph spacing
 40 | % title spacing
 41 | \titlespacing{\section}{0pt}{2ex}{1ex}
 42 | \titlespacing{\subsection}{0pt}{1ex}{0ex}
 43 | \titlespacing{\subsubsection}{0pt}{0.5ex}{0ex}
 44 | 
 45 | % ----- footer -----
 46 | \pagestyle{fancy}
 47 | \renewcommand{\headrulewidth}{0pt}
 48 | \cfoot{\href{https://github.com/marcelomijas/econometrics-cheatsheet}{\normalfont \footnotesize TS-25.10-EN - github.com/marcelomijas/econometrics-cheatsheet - CC-BY-4.0 license}}
 49 | \setlength{\footskip}{12pt}
 50 | 
 51 | % ----- document -----
 52 | \begin{document}
 53 | 
 54 | \begin{multicols}{3}
 55 | 
 56 | \begin{center}
 57 | 	\textbf{\LARGE \href{https://github.com/marcelomijas/econometrics-cheatsheet}{Time Series Cheat Sheet}}
 58 | 
 59 | 	{\footnotesize By Marcelo Moreno Porras - Universidad Rey Juan Carlos}
 60 | 
 61 | 	{\footnotesize The Econometrics Cheat Sheet Project}
 62 | \end{center}
 63 | 
 64 | \section*{Basic concepts}
 65 | 
 66 | \subsection*{Definitions}
 67 | 
 68 | \textbf{Time series} - succession of observations ordered in time with a fixed frequency.
 69 | 
 70 | Given the format of a time series:
 71 | 
 72 | \begin{itemize}[leftmargin=*]
 73 | 	\item \textbf{Point-in-time (stock)} - a single value is recorded for each period.
 74 | 	\item \textbf{Aggregated (flow)} - values represent totals or averages over the period.
 75 | 	\item \textbf{Range/interval (OHLC)} - each period records multiple summary statistics, such as min, max, open, close.
 76 | \end{itemize}
 77 | 
 78 | \textbf{Stochastic process} - a sequence of random variables that are indexed in time.
 79 | 
 80 | \subsection*{Components of a time series}
 81 | 
 82 | \begin{itemize}[leftmargin=*]
 83 | 	\item \textbf{Trend} - the long-term general movement of a series.
 84 | 	\item \textbf{Seasonal variations} - periodic oscillations that are produced in a period equal to or inferior than a year, and can be easily identified across different years (usually the result of climatology).
 85 | 	\item \textbf{Cyclical variations} - periodic oscillations that are produced in a period greater than a year (are the result of the economic cycle).
 86 | 	\item \textbf{Residual variations} - movements that do not follow a recognizable periodic oscillation (irregular events).
 87 | \end{itemize}
 88 | 
 89 | \subsection*{Type of time series models}
 90 | 
 91 | \begin{itemize}[leftmargin=*]
 92 | 	\item \textbf{Static models} - the relation between \( y \) and \( x \) is contemporary. Conceptually:
 93 | 	\begin{center}
 94 | 		\( y_{t} = \beta_{0} + \beta_{1} x_{t} + u_{t} \)
 95 | 	\end{center}
 96 | 	\item \textbf{Distributed-lag models} - the relation between \( y \) and \( x \) is not contemporary. Conceptually:
 97 | 	\begin{center}
 98 | 		\( y_{t} = \beta_{0} + \beta_{1} x_{t} + \beta_{2} x_{t - 1} + \cdots + \beta_{s} x_{t - (s - 1)} + u_{t} \)
 99 | 	\end{center}
100 | 	The long-term cumulative effect in \( y \) when \( \Delta x \) is:
101 | 	\begin{center}
102 | 		\( \beta_{1} + \beta_{2} + \cdots + \beta_{s} \)
103 | 	\end{center}
104 | 	\item \textbf{Dynamic models} - lags of the dependent variable (endogeneity). Conceptually:
105 | 	\begin{center}
106 | 		\( y_{t} = \beta_{0} + \beta_{1} y_{t - 1} + \cdots + \beta_{s} y_{t - s} + u_{t} \)
107 | 	\end{center}
108 | 		\item Combinations of the above, like the rational distributed-lag models (distributed-lag + dynamic).
109 | \end{itemize}
110 | 
111 | \columnbreak
112 | 
113 | \section*{Assumptions and properties}
114 | 
115 | \subsection*{OLS model assumptions under time series}
116 | 
117 | Under these assumptions, the OLS estimator will present good properties. \textbf{Gauss-Markov assumptions} extended for time series:
118 | 
119 | \begin{enumerate}[leftmargin=*, label=t\arabic{*}.]
120 | 	\item \textbf{Parameters linearity and weak dependence}.
121 | 	\begin{enumerate}[leftmargin=*, label=\alph{*}.]
122 | 		\item \( y_{t} \) must be a linear function of the \( \beta \)'s.
123 | 		\item The stochastic \( \lbrace (x_{t}, y_{t}) : t = 1, 2, \ldots, T \rbrace \) is stationary and weakly dependent.
124 | 	\end{enumerate}
125 | 	\item \textbf{No perfect collinearity}.
126 | 	\begin{itemize}[leftmargin=*]
127 | 		\item There are no independent variables that are constant: \( \Var(x_{j}) \neq 0, \; \forall j = 1, \ldots, k \)
128 | 		\item There is no exact linear relation between independent variables.
129 | 	\end{itemize}
130 | 	\item \textbf{Conditional mean zero and correlation zero}.
131 | 	\begin{enumerate}[leftmargin=*, label=\alph{*}.]
132 | 		\item There are no systematic errors: \( \E(u \mid x_{1}, \ldots, x_{k}) = \E(u) = 0 \rightarrow \) \textbf{strong exogeneity} (a implies b).
133 | 		\item There are no relevant variables left out of the model: \( \Cov(x_{j} , u) = 0, \; \forall j = 1, \ldots, k \rightarrow \) \textbf{weak exogeneity}.
134 | 	\end{enumerate}
135 | 	\item \textbf{Homoscedasticity}. The variability of the residuals is the same for any \( x \): \( \Var(u \mid x_{1}, \ldots, x_{k}) = \sigma_{u}^{2} \)
136 | 	\item \textbf{No autocorrelation}. Residuals do not contain information about any other residuals: \\
137 | 	\( \Corr(u_{t}, u_{s} \mid x_{1}, \ldots, x_{k}) = 0, \; \forall t \neq s \)
138 | 	\item \textbf{Normality}. Residuals are independent and identically distributed (\textbf{i.i.d.}): \( u \sim \mathcal{N} (0, \sigma_{u}^{2}) \)
139 | 	\item \textbf{Data size}. The number of observations available must be greater than \( (k + 1) \) parameters to estimate. (It is already satisfied under asymptotic situations)
140 | \end{enumerate}
141 | 
142 | \subsection*{Asymptotic properties of OLS}
143 | 
144 | Under the econometric model assumptions and the Central Limit Theorem:
145 | 
146 | \begin{itemize}[leftmargin=*]
147 | 	\item Hold t1 to t3a: OLS is \textbf{unbiased}. \( \E(\hat{\beta}_{j}) = \beta_{j} \)
148 | 	\item Hold t1 to t3: OLS is \textbf{consistent}. \( \operatorname{plim}(\hat{\beta}_{j}) = \beta_{j} \) (to t3b left out t3a, weak exogeneity, biased but consistent)
149 | 	\item Hold t1 to t5: \textbf{Asymptotic normality} of OLS (then, t6 is necessarily satisfied): \( u \underset{a}{\sim} \mathcal{N} (0, \sigma_{u}^{2}) \)
150 | 	\item Hold t1 to t5: \textbf{Unbiased estimate} of \( \sigma_{u}^{2} \). \( \E(\hat{\sigma}_{u}^{2}) = \sigma^{2}_{u} \)
151 | 	\item Hold t1 to t5: OLS is \textcolor{blue}{BLUE} (Best Linear Unbiased Estimator) or \textbf{efficient}.
152 | 	\item Hold t1 to t6: Hypothesis testing and confidence intervals can be done reliably.
153 | \end{itemize}
154 | 
155 | \columnbreak
156 | 
157 | \section*{Trends and seasonality}
158 | 
159 | \textbf{Spurious regression} - is when the relation between \( y \) and \( x \) is due to factors that affect \( y \) and have a correlation with \( x \), \( \Corr(x_{j}, u) \neq 0 \). Is the \textbf{non-fulfilment of t3}.
160 | 
161 | \subsection*{Trends}
162 | 
163 | Two time series can have the same (or contrary) trend, which should lead to a high level of correlation. This can provoke a false appearance of causality; the problem is \textbf{spurious regression}. Given the model:
164 | 
165 | \begin{center}
166 | 	\( y_{t} = \beta_{0} + \beta_{1} x_{t} + u_{t} \)
167 | \end{center}
168 | 
169 | where:
170 | 
171 | \begin{center}
172 | 	\( y_{t} = \alpha_{0} + \alpha_{1} \text{Trend} + v_{t} \)
173 | 
174 | 	\( x_{t} = \gamma_{0} + \gamma_{1} \text{Trend} + v_{t} \)
175 | \end{center}
176 | 
177 | Adding a trend to the model can solve the problem:
178 | 
179 | \begin{center}
180 | 	\( y_{t} = \beta_{0} + \beta_{1} x_{t} + \beta_{2} \text{Trend} + u_{t} \)
181 | \end{center}
182 | 
183 | The trend can be linear or non-linear (quadratic, cubic, exponential, etc.)
184 | 
185 | Another way is to make use of the \textbf{Hodrick-Prescott filter} to extract the trend and the cyclical component.
186 | 
187 | \subsection*{Seasonality}
188 | 
189 | \setlength{\multicolsep}{0pt}
190 | \begin{multicols}{2}
191 | 
192 | A time series with can manifest seasonality. That is, the series is subject to a seasonal variations or patterns, usually related to climatology conditions.
193 | 
194 | \columnbreak
195 | 
196 | \begin{tikzpicture}[scale=0.20]
197 | 	\draw [thick, <->] (0, 12) node [anchor=west] {\( y \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( t \)};
198 | 	\draw [thick, black, rounded corners] 
199 | 	(0.0, 2.79) -- (0.8, 4.81) -- 
200 | 	(1.6, 2.50) -- (2.4, 7.61) -- 
201 | 	(3.2, 6.03) -- (4.0, 8.84) -- 
202 | 	(4.8, 5.42) -- (5.6, 10.85) -- 
203 | 	(6.4, 8.47) -- (7.2, 9.69) -- 
204 | 	(8.0, 5.48) -- (8.8, 9.51) -- 
205 | 	(9.6, 7.68) -- (10.4, 9.57) -- 
206 | 	(11.2, 5.78) -- (12.0, 10.36) -- 
207 | 	(12.8, 8.29) -- (13.6, 9.45) -- 
208 | 	(14.4, 5.60) -- (15.2, 10.09) -- 
209 | 	(16.0, 8.96) -- (16.8, 11.28) -- 
210 | 	(17.6, 7.13) -- (18.4, 11.70);
211 | 	\draw [thick, red, densely dashed, line join=round] 
212 | 	(0.0, 3.79) -- (0.8, 3.99) -- 
213 | 	(1.6, 3.90) -- (2.4, 4.91) -- 
214 | 	(3.2, 6.09) -- (4.0, 6.93) -- 
215 | 	(4.8, 6.99) -- (5.6, 7.54) -- 
216 | 	(6.4, 7.47) -- (7.2, 7.69) -- 
217 | 	(8.0, 7.48) -- (8.8, 7.51) -- 
218 | 	(9.6, 7.67) -- (10.4, 7.57) -- 
219 | 	(11.2, 7.78) -- (12.0, 8.33) -- 
220 | 	(12.8, 7.99) -- (13.6, 8.15) -- 
221 | 	(14.4, 8.60) -- (15.2, 8.90) -- 
222 | 	(16.0, 8.96) -- (16.8, 8.71) -- 
223 | 	(17.6, 8.99) -- (18.4, 9.19);
224 | \end{tikzpicture}
225 | 
226 | \end{multicols}
227 | 
228 | For example, GDP is usually higher in summer and lower in winter (seasonally adjusted series in {\color{red} dashed red}).
229 | 
230 | \begin{itemize}[leftmargin=*]
231 | 	\item Regressing time series that present seasonality can lead to \textbf{spurious results}.
232 | \end{itemize}
233 | 
234 | There are different \textbf{seasonal adjustment} methods:
235 | 
236 | \begin{enumerate}[leftmargin=*, label=\alph{*}.]
237 | 	\item Include seasonal binary variables in the model. For example, for quarterly series (\( S q_{t} \) are binary variables):
238 | 	\begin{center}
239 | 		\( y_{t} = \beta_{0} + \beta_{1} S2_{t} + \beta_{2} S3_{t} + \beta_{3} S4_{t} + \beta_{4} x_{1t} + \cdots + \beta_{k} x_{kt} + u_{t} \)
240 | 	\end{center}
241 | 	\item Seasonally adjust the variables and then perform the regression with the adjusted variables.
242 | 	\item Take seasonal differences, this can also help with the removal of trend (\( s \) is the seasonal period):
243 | 	\begin{center}
244 | 		\( \nabla_{s} y_{t} = y_{t} - y_{t - s} \)
245 | 	\end{center}
246 | 	\item Apply \textbf{X-13ARIMA-SEATS} (better and more complicated method than the previous).
247 | \end{enumerate}
248 | 
249 | \columnbreak
250 | 
251 | \section*{Autocorrelation}
252 | 
253 | The residual of any observation, \( u_{t} \), is correlated with the residual of any other observation. The observations are not independent. Is the \textbf{non-fulfilment} of \textbf{t5}.
254 | 
255 | \begin{center}
256 | 	\( \Corr(u_{t}, u_{s} \mid x_{1}, \ldots, x_{k}) = \Corr(u_{t}, u_{s}) \neq 0, \; \forall t \neq s \)
257 | \end{center}
258 | 
259 | \subsection*{Consequences}
260 | 
261 | \begin{itemize}[leftmargin=*]
262 | 	\item OLS estimators are still unbiased.
263 | 	\item OLS estimators are still consistent.
264 | 	\item OLS is \textbf{not efficient} any more, but still a LUE (Linear Unbiased Estimator).
265 | 	\item \textbf{Variance estimations} of the estimators are \textbf{biased}: the construction of confidence intervals and the hypothesis testing is not reliable.
266 | \end{itemize}
267 | 
268 | \subsection*{Detection}
269 | 
270 | \textbf{Scatter plots} - look for scatter patterns on \( u_{t - 1} \) vs. \( u_{t} \).
271 | 
272 | \setlength{\multicolsep}{0pt}
273 | \setlength{\columnsep}{6pt}
274 | \begin{multicols}{3}
275 | 
276 | \begin{center}
277 | 	\begin{tikzpicture}[scale=0.11]
278 | 		\node at (16, 20) {\textbf{Ac.}}; 
279 | 		\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=south] {\( u_{t - 1} \)}; 
280 | 		\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
281 | 		\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=50] (\x, {-0.2*(\x - 10)^2 + 13 + 6*rnd}); 
282 | 		\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {-0.2*(\x - 10)^2 + 16});
283 | 	\end{tikzpicture}
284 | \end{center}
285 | 
286 | \columnbreak
287 | 
288 | \begin{center}
289 | 	\begin{tikzpicture}[scale=0.11]
290 | 		\node at (16, 20) {\textbf{Ac. \( + \)}}; 
291 | 		\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=north] {\( u_{t - 1} \)}; 
292 | 		\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
293 | 		\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {5*rnd + 2.5 + 0.5*\x}); 
294 | 		\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {5 + 0.5*\x});
295 | 	\end{tikzpicture}
296 | \end{center}
297 | 
298 | \columnbreak
299 | 
300 | \begin{center}
301 | 	\begin{tikzpicture}[scale=0.11]
302 | 		\node at (16, 20) {\textbf{Ac. \( - \)}}; 
303 | 		\draw [thick, ->] (0, 10) -- (20, 10) node [anchor=south] {\( u_{t - 1} \)}; 
304 | 		\draw [thick, -] (0, 0) -- (0, 20) node [anchor=west] {\( u_{t} \)}; 
305 | 		\draw plot [only marks, mark=*, mark size=6, domain=2:18, samples=20] (\x, {5*rnd + 12.5 - 0.5*\x}); 
306 | 		\draw [thick, dashed, red, -latex] plot [domain=2:18] (\x, {15 - 0.5*\x});
307 | 	\end{tikzpicture}
308 | \end{center}
309 | 
310 | \end{multicols}
311 | 
312 | \begin{multicols}{2}
313 | 
314 | \textbf{Correlogram} - autocorrelation function (ACF) and partial ACF (PACF).
315 | 
316 | \columnbreak
317 | 
318 | \begin{itemize}[leftmargin=*]
319 | 	\item Y axis: correlation.
320 | 	\item X axis: lag number.
321 | 	\item Grey area: \( \pm 1.96 / T^{0.5} \)
322 | \end{itemize}
323 | 
324 | \end{multicols}
325 | 
326 | \begin{center}
327 | 	\begin{tikzpicture}[scale=0.25]
328 | 		% acf plot
329 | 		\node at (-2.5, 14) {\small \rotatebox{90}{\textbf{ACF}}}; 
330 | 		\node at (-1, 17.5) {\small 1};
331 | 		\node at (-1, 14) {\small 0};
332 | 		\node at (-1, 10.5) {\small -1}; 
333 | 		\fill [lightgray] (0, 13) rectangle (30.5, 15); 
334 | 		\draw [dashed, thin] (0, 14) -- (30.5, 14); 
335 | 		\draw [thick, |->] (0, 18) -- (0, 10) -- (30.5, 10);
336 | 		\fill [red] (2, 14) rectangle (2.5, 17.95);
337 | 		\fill [red] (5, 14) rectangle (5.5, 16.96);
338 | 		\fill [red] (8, 14) rectangle (8.5, 16.22); 
339 | 		\fill [red] (11, 14) rectangle (11.5, 15.67);
340 | 		\fill [red] (14, 14) rectangle (14.5, 15.25);
341 | 		\fill [red] (17, 14) rectangle (17.5, 14.94);
342 | 		\fill [red] (20, 14) rectangle (20.5, 14.70);
343 | 		\fill [red] (23, 14) rectangle (23.5, 14.53);
344 | 		\fill [red] (26, 14) rectangle (26.5, 14.40);
345 | 		\fill [red] (29, 14) rectangle (29.5, 14.30);
346 | 		% pacf plot
347 | 		\node at (-2.5, 4) {\small \rotatebox{90}{\textbf{PACF}}};
348 | 		\node at (-1, 7.5) {\small 1};
349 | 		\node at (-1, 4) {\small 0};
350 | 		\node at (-1, 0.5) {\small -1};
351 | 		\fill [lightgray] (0, 3) rectangle (30.5, 5);
352 | 		\draw [dashed, thin] (0, 4) -- (30.5, 4);
353 | 		\draw [thick, |->] (0, 8) -- (0, 0) -- (30.5, 0);
354 | 		\fill [red] (2, 4) rectangle (2.5, 7.90);
355 | 		\fill [red] (5, 4) rectangle (5.5, 7.00);
356 | 		\fill [red] (8, 4) rectangle (8.5, 3.47);
357 | 		\fill [red]	(11, 4) rectangle (11.5, 4.24);
358 | 		\fill [red] (14, 4) rectangle (14.5, 4.43);
359 | 		\fill [red] (17, 4) rectangle (17.5, 4.89);
360 | 		\fill [red] (20, 4) rectangle (20.5, 3.09);
361 | 		\fill [red] (23, 4) rectangle (23.5, 3.58);
362 | 		\fill [red] (26, 4) rectangle (26.5, 4.46);
363 | 		\fill [red] (29, 4) rectangle (29.5, 4.86);
364 | 	\end{tikzpicture}
365 | \end{center}
366 | 
367 | \begin{itemize}[leftmargin=*]
368 | 	\item \textbf{\( \text{MA}(q) \) process}. \underline{ACF}: only the first \( q \) coefficients are significant, the remaining are abruptly cancelled. \underline{PACF}: attenuated exponential fast decay or sine waves.
369 | 	\item \textbf{\( \text{AR}(p) \) process}. \underline{ACF}: attenuated exponential fast decay or sine waves. \underline{PACF}: only the first \( p \) coefficients are significant, the remaining are abruptly cancelled.
370 | \end{itemize}
371 | 
372 | \columnbreak
373 | 
374 | \begin{itemize}[leftmargin=*]
375 | 	\item \textbf{\( \text{ARMA}(p, q) \) process}. \underline{ACF} and \underline{PACF}: the coefficients are not abruptly cancelled and present a fast decay.
376 | \end{itemize}
377 | 
378 | If the ACF coefficients do not decay rapidly, there is a clear indicator of a lack of stationarity in mean.
379 | 
380 | \textbf{Formal tests} - Generally, \( H_{0} \): No autocorrelation.
381 | 
382 | Supposing that \( u_{t} \) follows an AR(1) process:
383 | 
384 | \begin{center}
385 | 	\( u_{t} = \rho_{1} u_{t - 1} + \varepsilon_{t} \)
386 | \end{center}
387 | 
388 | where \( \varepsilon_{t} \) is white noise.
389 | 
390 | \begin{itemize}[leftmargin=*]
391 | 	\item \textbf{AR(1) t test} (exogenous regressors):
392 | 	\begin{center}
393 | 		\( t = \dfrac{\hat{\rho}_{1}}{\se(\hat{\rho}_{1})} \sim t_{T - k - 1, \alpha / 2} \)
394 | 	\end{center}
395 | 	\( H_{1} \): Autocorrelation of order one, AR(1).
396 | \end{itemize}
397 | 
398 | \begin{itemize}[leftmargin=*]
399 | 	\item \textbf{Durbin-Watson statistic} (exogenous regressors and residual normality):
400 | 	\begin{center}
401 | 		\( d = \dfrac{\sum_{t = 2}^{n} (\hat{u}_{t} - \hat{u}_{t - 1})^{2}}{\sum_{t = 1}^{n} \hat{u}_{t}^{2}} \approx 2 \cdot (1 - \hat{\rho}_{1}) \)
402 | 	\end{center}
403 | 	Where \( 0 \leq d \leq 4 \)
404 | 
405 | 	\( H_{1} \): Autocorrelation of order one, AR(1).
406 | \end{itemize}
407 | 
408 | \begin{center}
409 | 	\begin{tabular}{ c | c | c | c }
410 | 		\( d = \)          & 0 & 2 & 4  \\ \hline
411 | 		\( \rho \approx \) & 1 & 0 & -1
412 | 	\end{tabular}
413 | 
414 | 	\begin{tikzpicture}[scale=0.3]
415 | 		\fill [lightgray] (5, 0) rectangle (9, 6); 
416 | 		\draw (5, 0) -- (5, 6);
417 | 		\draw (9, 0) -- (9, 6);
418 | 		\fill [lightgray] (16, 0) rectangle (20, 6);
419 | 		\draw (16, 0) -- (16, 6);
420 | 		\draw (20, 0) -- (20, 6);
421 | 		\draw [thick] (0, 6) -- (0, 0) -- (25, 0);
422 | 		\draw [dashed] (12.5, 0) -- (12.5, 6);
423 | 		\node at (-0.5, 6.5) {\small \( f(d) \)};
424 | 		\node at (0, -0.6) {\small 0};
425 | 		\node at (5, -0.6) {\small \( d_{L} \)};
426 | 		\node at (9, -0.6) {\small \( d_{U} \)};
427 | 		\node at (12.5, -0.6) {\small 2};
428 | 		\node at (16.7, -0.6) {\tiny \( (4 - d_{U}) \)};
429 | 		\node at (20.7, -0.6) {\tiny \( (4 - d_{L}) \)};
430 | 		\node at (25, -0.6) {\small 4};
431 | 		\node at (2.5, 3.5) {\small Rej. \( H_{0} \)};
432 | 		\node at (2.5, 2.5) {\small AR \( + \)};
433 | 		\node [text=red] at (7, 3) {\textbf{?}};
434 | 		\node at (12.5, 3.5) {\small Not rej. \( H_{0} \)};
435 | 		\node at (12.5, 2.5) {\small No AR};
436 | 		\node [text=red] at (18, 3) {\textbf{?}};
437 | 		\node at (22.5, 3.5) {\small Rej. \( H_{0} \)};
438 | 		\node at (22.5, 2.5) {\small AR \( - \)};
439 | 	\end{tikzpicture}
440 | \end{center}
441 | 
442 | \begin{itemize}[leftmargin=*]
443 | 	\item \textbf{Durbin's h} (endogenous regressors):
444 | 	\begin{center}
445 | 		\( h = \hat{\rho} \cdot \sqrt{\dfrac{T}{1 - T \cdot \upsilon}} \)
446 | 	\end{center}
447 | 	where \( \upsilon \) is the estimated variance of the coefficient associated with the endogenous variable.
448 | 
449 | 	\( H_{1} \): Autocorrelation of order one, AR(1).
450 | \end{itemize}
451 | 
452 | \begin{itemize}[leftmargin=*]
453 | 	\item \textbf{Breusch-Godfrey test} (endogenous regressors): it can detect \( \text{MA}(q) \) and \( \text{AR}(p) \) processes (\( \varepsilon_{t} \) is w. noise):
454 | 	\begin{itemize}[leftmargin=*]
455 | 		\item \( \text{MA}(q) \): \( u_{t} = \varepsilon_{t} - m_{1} u_{t - 1} - \cdots - m_{q} u_{t - q} \)
456 | 		\item \( \text{AR}(p) \): \( u_{t} = \rho_{1} u_{t - 1} + \cdots + \rho_{p} u_{t - p}+ \varepsilon_{t} \)
457 | 	\end{itemize}
458 | 	Under \( H_{0} \): No autocorrelation:
459 | 	\begin{center}
460 | 		\( \hfill T \cdot R_{\hat{u}_t}^{2} \underset{a}{\sim} \chi_{q}^{2} \hfill \text{or} \hfill T \cdot R_{\hat{u}_t}^{2} \underset{a}{\sim} \chi_{p}^{2} \hfill \)
461 | 	\end{center}
462 | 	\( H_{1} \): Autocorrelation of order \( q \) (or \( p \)).
463 | \end{itemize}
464 | 
465 | \begin{itemize}[leftmargin=*]
466 | 	\item \textbf{Ljung-Box Q test}:
467 | 
468 | 	\( H_{1} \): Autocorrelation up to lag \( h \).
469 | \end{itemize}
470 | 
471 | \columnbreak
472 | 
473 | \subsection*{Correction}
474 | 
475 | \begin{itemize}[leftmargin=*]
476 | 	\item Use OLS with a variance-covariance matrix estimator that is \textbf{robust to heteroscedasticity and autocorrelation} (HAC), for example, the one proposed by \textbf{Newey-West}.
477 | 	\item Use \textbf{Generalized Least Squares} (GLS). Supposing \( y_{t} = \beta_{0} + \beta_{1} x_{t} + u_{t} \), with \( u_{t} = \rho u_{t - 1} + \varepsilon_{t} \), where \( \lvert \rho \rvert < 1 \) and \( \varepsilon_{t} \) is \underline{white noise}.
478 | 	\begin{itemize}[leftmargin=*]
479 | 		\item If \( \rho \) is \textbf{known}, use a \textbf{quasi-differentiated model}:
480 | 		\begin{center}
481 | 			\( y_{t} - \rho y_{t - 1}= \beta_{0} (1 - \rho) + \beta_{1} (x_{t} - \rho x_{t - 1}) + u_{t} - \rho u_{t - 1} \)
482 | 
483 | 			\( y_{t}^{*} = \beta_{0}^{*} + \beta_{1}' x_{t}^{*} + \varepsilon_{t} \)
484 | 		\end{center}
485 | 		where \( \beta_{1}' = \beta_{1} \); and estimate it by OLS.
486 | 		\item If \( \rho \) is \textbf{not known}, estimate it by -for example- the \textbf{Cochrane-Orcutt iterative method} (Prais-Winsten's method is also good):
487 | 		\begin{enumerate}[leftmargin=*]
488 | 			\item Obtain \( \hat{u}_{t} \) from the original model.
489 | 			\item Estimate \( \hat{u}_{t} = \rho \hat{u}_{t - 1} + \varepsilon_{t} \) and obtain \( \hat{\rho} \).
490 | 			\item Create a quasi-differentiated model:
491 | 			\begin{center}
492 | 				\( y_{t} - \hat{\rho}y_{t - 1} = \beta_{0} (1 - \hat{\rho}) + \beta_{1} (x_{t} - \hat{\rho} x_{t - 1}) + u_{t} - \hat{\rho}u_{t - 1} \)
493 | 
494 | 				\( y_{t}^{*} = \beta_{0}^{*} + \beta_{1}' x_{t}^{*} + \varepsilon_{t} \)
495 | 			\end{center}
496 | 			where \( \beta_{1}' = \beta_{1} \); and estimate it by OLS.
497 | 			\item Obtain \( \hat{u}_{t}^{*} = y_{t} - (\hat{\beta}_{0}^{*} + \hat{\beta}_{1}' x_{t}) \neq y_{t} - (\hat{\beta}_{0}^{*} + \hat{\beta}_{1}' x_{t}^{*}) \).
498 | 			\item Repeat from step 2. The algorithm ends when the estimated parameters vary very little between iterations.
499 | 		\end{enumerate}
500 | 	\end{itemize}
501 | 	\item If not solved, look for \textbf{high dependence} in the series.
502 | \end{itemize}
503 | 
504 | \section*{Exponential smoothing}
505 | 
506 | Given \( \{ y_{t} \} \), the smoothed series \( \{ f_{t} \} \):
507 | 
508 | \begin{center}
509 | 	\( f_{t} = \alpha y_{t} + (1 - \alpha) f_{t - 1} \)
510 | \end{center}
511 | 
512 | where \( 0 < \alpha < 1 \) is the smoothing factor and \( f_{0} = y_{0} \).
513 | 
514 | \section*{Forecasts}
515 | 
516 | Two types of forecasts:
517 | 
518 | \begin{itemize}[leftmargin=*]
519 | 	\item Of the mean value of \( y \) for a specific value of \( x \).
520 | 	\item Of an individual value of \( y \) for a specific value of \( x \).
521 | \end{itemize}
522 | 
523 | \textbf{Theil's U statistic} - compares the forecast results with the ones of forecasting with minimal historical data.
524 | 
525 | \begin{center}
526 | 	\( U = \sqrt{\frac{\sum_{t = 1}^{T - 1} \left( \frac{\hat{y}_{t + 1} - y_{t + 1}}{y_{t}} \right)^{2}}{\sum_{t = 1}^{T - 1} \left( \frac{y_{t + 1} - y_{t}}{y_{t}} \right)^{2}}} \)
527 | \end{center}
528 | 
529 | \begin{itemize}[leftmargin=*]
530 | 	\item \( < 1 \): The forecast is better than guessing.
531 | 	\item \( = 1 \): The forecast is about as good as guessing.
532 | 	\item \( > 1 \): The forecast is worse than guessing.
533 | \end{itemize}
534 | 
535 | \columnbreak
536 | 
537 | \section*{Stationarity}
538 | 
539 | Stationarity allows to correctly identify relations --that stay unchanged with time-- between variables.
540 | 
541 | \begin{itemize}[leftmargin=*]
542 | 	\item \textbf{Stationary process} (strict stationarity) - the joint probability distribution of the process remains unchanged when shifted \( h \) periods.
543 | 	\item \textbf{Non-stationary process} - for example, a series with trend, where at least the mean changes with time.
544 | 	\item \textbf{Covariance stationary process} - it is a weaker form of stationarity:
545 | 	\begin{itemize}[leftmargin=*]
546 | 		\begin{multicols}{2}
547 | 			\item \( \E(x_{t}) \) is constant.
548 | 		\columnbreak
549 | 			\item \( \Var(x_{t}) \) is constant.
550 | 		\end{multicols}
551 | 		\item For any \( t, h \geq 1 \), \( \Cov(x_{t}, x_{t + h}) \) depends only of \( h \), not of \( t \).
552 | 	\end{itemize}
553 | \end{itemize}
554 | 
555 | \section*{Weak dependence}
556 | 
557 | Weak dependence replaces the random sampling assumption for time series.
558 | 
559 | \begin{itemize}[leftmargin=*]
560 | 	\item An stationary process \( \{ x_{t} \} \) is \textbf{weakly dependent} when \( x_{t} \) and \( x_{t + h} \) are almost independent as \( h \) increases without a limit.
561 | 	\item A covariance stationary process is \textbf{weakly dependent} if the correlation between \( x_{t} \) and \( x_{t + h} \) tends to 0 fast enough when \( h \rightarrow \infty \) (they are not asymptotically correlated).
562 | \end{itemize}
563 | 
564 | Weakly dependent processes are known as \textbf{integrated of order zero}, I(0). Some examples:
565 | 
566 | \begin{itemize}[leftmargin=*]
567 | 	\item \textbf{Moving average} - \( \{ x_{t} \} \) is a moving average of order \( q \), \( \text{MA}(q) \):
568 | 	\begin{center}
569 | 		\( x_{t} = e_{t} + m_{1} e_{t - 1} + \cdots + m_{q} e_{t - q} \)
570 | 	\end{center}
571 | 	where \( \{ e_{t} : t = 0, 1, \ldots, T \} \) is an \textsl{i.i.d.} sequence with zero mean and \( \sigma_{e}^{2} \) variance.
572 | 	\item \textbf{Autoregressive process} - \( \{ x_{t} \} \) is an autoregressive process of order \( p \), \( \text{AR}(p) \):
573 | 	\begin{center}
574 | 		\( x_{t} = \rho_{1} x_{t - 1} + \cdots + \rho_{p} x_{t - p} + e_{t} \)
575 | 	\end{center}
576 | 	where \( \{ e_{t} : t = 1, 2, \ldots, T \} \) is an \textsl{i.i.d.} sequence with zero mean and \( \sigma_{e}^{2} \) variance.
577 | 
578 | 	\textbf{Stability condition}: if \( 1 - \rho_{1} z - \cdots - \rho_{p} z^{p} = 0 \) for \( \lvert z \rvert > 1 \) then \( \{ x_{t} \} \) is an \( \text{AR}(p) \) stable process that is weakly dependent. For AR(1), the condition is: \( \lvert \rho_{1} \rvert < 1 \).
579 | 
580 | 	\item \textbf{ARMA process} - is a combination of the previous; \( \{ x_{t} \} \) is an \( \text{ARMA}(p, q) \):
581 | 	\begin{center}
582 | 		\( x_{t} = e_{t} + m_{1} e_{t - 1} + \cdots + m_{q} e_{t - q} + \rho_{1} x_{t - 1} + \cdots + \rho_{p} x_{t - p} \)
583 | 	\end{center}
584 | \end{itemize}
585 | 
586 | \columnbreak
587 | 
588 | \section*{Unit roots}
589 | 
590 | A process is integrated of order \( d \), \( \text{I}(d) \), if applying differences \( d \) times makes the process stationary.
591 | 
592 | When \( d \geq 1 \), the process is said to have a \textbf{unit root}. A process has a unit root when the stability condition is not met (there are roots on the unit circle).
593 | 
594 | \subsection*{Strong dependence}
595 | 
596 | Generally, economic series are highly persistent in time. Some examples of \textbf{unit root} I(1):
597 | 
598 | \begin{itemize}[leftmargin=*]
599 | 	\item \textbf{Random walk} - an AR(1) process with \( \rho_{1} = 1 \).
600 | 	\begin{center}
601 | 		\( y_{t} = y_{t - 1} + e_{t} \)
602 | 	\end{center}
603 | 	where \( \{ e_{t} : t = 1, 2, \ldots, T \} \) is an \textsl{i.i.d.} sequence with zero mean and \( \sigma_{e}^{2} \) variance.
604 | 	\item \textbf{Random walk with a drift} - an AR(1) process with \( \rho_{1} = 1 \) and a constant.
605 | 	\begin{center}
606 | 		\( y_{t} = \beta_{0} + y_{t - 1} + e_{t} \)
607 | 	\end{center}
608 | 	where \( \{ e_{t} : t = 1, 2, \ldots, T \} \) is an \textsl{i.i.d.} sequence with zero mean and \( \sigma_{e}^{2} \) variance.
609 | \end{itemize}
610 | 
611 | \subsection*{Unit root tests}
612 | 
613 | \begin{center}
614 | 	\begin{tabular}{ c | c | c }
615 | 		Test            & \( H_{0} \) & Reject \( H_{0} \)                 \\ \hline
616 | 		ADF             & I(1)        & tau \textless \, Critical value    \\ \hline
617 | 		KPSS            & I(0) level  & mu \textgreater \, Critical value  \\
618 | 		                & I(0) trend  & tau \textgreater \, Critical value \\ \hline
619 | 		Phillips-Perron & I(1)        & Z-tau \textless \, Critical value  \\ \hline
620 | 		Zivot-Andrews   & I(1)        & tau \textless \, Critical value
621 | 	\end{tabular}
622 | \end{center}
623 | 
624 | \subsection*{From unit root to weak dependence}
625 | 
626 | Integrated of \textbf{order one}, I(1), means that \textbf{the first difference} of the process is \textbf{weakly dependent} or I(0) (and usually, stationary). Let \( \{ y_{t} \} \) be a random walk:
627 | 
628 | \begin{multicols}{2}
629 | 
630 | \begin{center}
631 | 	\( \Delta y_{t} = y_{t} - y_{t - 1} = e_{t} \)
632 | \end{center}
633 | 
634 | where \( \{ e_{t} \} = \{ \Delta y_{t} \} \) is \textsl{i.i.d.}
635 | 
636 | Note:
637 | 
638 | \begin{itemize}[leftmargin=*]
639 | 	\item The {\color{red} first difference} of a series removes its trend.
640 | 	\item Logarithms of a series stabilizes its variance.
641 | \end{itemize}
642 | 
643 | \columnbreak
644 | 
645 | \begin{tikzpicture}[scale=0.18]
646 | 	\draw [thick, <->] (0, 15) node [anchor=west] {\( y, {\color{red} \Delta y} \)} -- (0, 0) -- (20, 0) node [anchor=south] {\( t \)}; 
647 | 	\draw [thick, black] 
648 | 	(0.0, 2.000) -- (0.8, 2.459) -- 
649 | 	(1.6, 2.716) -- (3.2, 3.205) -- 
650 | 	(4.0, 3.571) -- (4.8, 3.952) -- 
651 | 	(5.6, 4.047) -- (6.4, 4.514) -- 
652 | 	(7.2, 4.719) -- (8.0, 5.160) -- 
653 | 	(8.8, 5.674) -- (9.6, 5.987) -- 
654 | 	(10.4, 6.242) -- (11.2, 6.471) -- 
655 | 	(12.0, 6.944) -- (12.8, 7.104) -- 
656 | 	(13.6, 7.584) -- (14.4, 8.087) -- 
657 | 	(15.2, 8.112) -- (16.0, 8.834) -- 
658 | 	(16.8, 9.470) -- (17.6, 9.718) -- 
659 | 	(18.4, 10.032); 
660 | 	\draw [thick, red, densely dashed, line join=round]
661 | 	(0.8, 10.28) -- (1.6, 6.20) -- 
662 | 	(3.2, 10.88) -- (4.0, 8.40) -- 
663 | 	(4.8, 8.70) -- (5.6, 2.92) -- 
664 | 	(6.4, 10.45) -- (7.2, 5.13) -- 
665 | 	(8.0, 9.92) -- (8.8, 11.39) -- 
666 | 	(9.6, 7.34) -- (10.4, 6.15) -- 
667 | 	(11.2, 5.62) -- (12.0, 10.57) -- 
668 | 	(12.8, 4.23) -- (13.6, 10.70) -- 
669 | 	(14.4, 11.18) -- (15.2, 1.51) -- 
670 | 	(16.0, 15.60) -- (16.8, 13.86) -- 
671 | 	(17.6, 6.01) -- (18.4, 7.36);
672 | \end{tikzpicture}
673 | 
674 | \end{multicols}
675 | 
676 | \subsubsection*{From unit root to percentage change}
677 | 
678 | When an I(1) series is strictly positive, logs are often used before differencing to approximate percentage changes:
679 | 
680 | \begin{center}
681 | 	\( \Delta \log(y_{t}) = \log(y_{t}) - \log(y_{t - 1}) \approx \dfrac{y_t - y_{t - 1}} {y_{t - 1}} \)
682 | \end{center}
683 | 
684 | \columnbreak
685 | 
686 | \subsection*{Ergodicity}
687 | 
688 | A strictly stationary process \( \{ y_{t} \} \) is \textbf{ergodic} if time averages converge to their ensemble averages (expectations). This is often ensured by \textbf{strong mixing}, which implies asymptotic independence of distant events.
689 | 
690 | \begin{center}
691 | 	\( \frac{1}{T} \sum_{t = 1}^{T} y_{t} \underset{a}{\rightarrow} \E(y_{t}) \)
692 | \end{center}
693 | 
694 | Without it, sample moments may not reflect population moments. Estimators are inconsistent.
695 | 
696 | \section*{Cointegration}
697 | 
698 | Two I(1) series are \textbf{cointegrated} if a linear combination is I(0). In that case, a regression between them is not spurious but reflects a valid \textbf{long-run} relationship. Cointegrated variables share a common stochastic trend.
699 | 
700 | For example, \( \{ x_{t} \} \) and \( \{ y_{t} \} \) are I(1), but \( y_{t} - \beta x_{t} = u_{t} \) where \( \{ u_{t} \} \) is I(0). (\( \beta \) is the cointegrating parameter).
701 | 
702 | \subsection*{Cointegration test}
703 | 
704 | \begin{enumerate}[leftmargin=*]
705 | 	\item Estimate \( y_{t} = \alpha + \beta x_{t} + \varepsilon_{t} \) and obtain \( \hat{\varepsilon}_{t} \).
706 | 	\item Perform an ADF test on \( \hat{\varepsilon}_{t} \) with a modified distribution.
707 | 	The result of this test is equivalent to:
708 | 	\begin{itemize}[leftmargin=*]
709 | 		\item \( H_{0} \): \( \beta = 0 \) (no cointegration)
710 | 		\item \( H_{1} \): \( \beta \neq 0 \) (cointegration)
711 | 	\end{itemize}
712 | 	if test statistic \( > \) critical value, reject \( H_{0} \).
713 | \end{enumerate}
714 | 
715 | \section*{Heteroscedasticity in time series}
716 | 
717 | The \textbf{assumption} affected is \textbf{t4}, which leads \textbf{OLS to be not efficient}.
718 | 
719 | Use tests like Breusch-Pagan or White's, where \( H_{0} \): No heteroscedasticity. For these tests to work, there should be \textbf{no autocorrelation}.
720 | 
721 | \subsection*{ARCH}
722 | 
723 | An autoregressive conditional heteroscedasticity (ARCH) model is used to analyse a form of dynamic heteroscedasticity, where the error variance follows an \( \text{AR}(p) \) process.
724 | 
725 | Given the model: \( y_{t} = \beta_{0} + \beta_{1} z_{t} + u_{t} \) where, there is AR(1) and heteroscedasticity:
726 | 
727 | \begin{center}
728 | 	\( \E(u_{t}^{2} \mid u_{t - 1}) = \alpha_{0} + \alpha_{1} u_{t - 1}^{2} \)
729 | \end{center}
730 | 
731 | \subsection*{GARCH}
732 | 
733 | A general ARCH (GARCH) model is similar to ARCH, but the error variance follows an \( \text{ARMA}(p, q) \) process.
734 | 
735 | \end{multicols}
736 | 
737 | \end{document}


--------------------------------------------------------------------------------
/time-series-cheatsheet/time-series-cheatsheet-es.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/marcelomijas/econometrics-cheatsheet/1f49cd08d8232e3eb9c3357fa47327fd46dddae5/time-series-cheatsheet/time-series-cheatsheet-es.pdf


--------------------------------------------------------------------------------