├── .github
    ├── .gitignore
    └── workflows
    │   └── check-standard.yaml
├── vignettes
    ├── .gitignore
    └── documentation.Rmd
├── LICENSE
├── tests
    ├── testthat
    │   ├── input
    │   │   ├── .gitignore
    │   │   ├── pvalues.txt
    │   │   ├── df_test_base_STATA_allzero.csv
    │   │   ├── df_test_AddCI.csv
    │   │   ├── make.log
    │   │   ├── df_test.csv
    │   │   ├── df_test_base_STATA.csv
    │   │   ├── readme.MD
    │   │   ├── make.py
    │   │   ├── df_test_base_STATA_FHS.csv
    │   │   └── code
    │   │   │   ├── build_OLS.do
    │   │   │   └── build_FHS.do
    │   ├── test-AddSmPath.R
    │   ├── test-ComputeFirstDifferences.R
    │   ├── test-AddMeans.R
    │   ├── test-AddSuptBand.R
    │   ├── test-PrepareModelFormula.R
    │   ├── test-TestLinear.R
    │   ├── test-ComputeShifts.R
    │   ├── test-AddCIs.R
    │   ├── test-PreparePlottingData.R
    │   ├── test-EventStudyPlot.R
    │   ├── test-EventStudyOLS.R
    │   └── test-EventStudyFHS.R
    └── testthat.R
├── R
    ├── .gitattributes
    ├── sysdata.rda
    ├── data.R
    ├── AddMeans.R
    ├── AddCIs.R
    ├── ComputeFirstDifferences.R
    ├── AddSuptBand.R
    ├── ComputeShifts.R
    ├── AddSmPath.R
    ├── PrepareModelFormula.R
    ├── EventStudyOLS.R
    ├── TestLinear.R
    ├── EventStudyFHS.R
    ├── PreparePlottingData.R
    ├── SmPathHelpers.R
    ├── EventStudyPlot.R
    └── EventStudy.R
├── data
    ├── .gitattributes
    └── example_data.rda
├── man
    ├── figures
    │   ├── logo.png
    │   └── README-example-1.png
    ├── example_data.Rd
    ├── TestLinear.Rd
    ├── EventStudyPlot.Rd
    └── EventStudy.Rd
├── examples
    └── source
    │   └── raw
    │       └── eventstudy_illustration_data
    │           ├── orig
    │               ├── simulation_data_static.dta
    │               └── simulation_data_dynamic.dta
    │           └── README.md
├── .Rbuildignore
├── data-raw
    ├── example_data.R
    └── save_internal_data.R
├── .gitattributes
├── .gitignore
├── inst
    └── CITATION
├── cran-comments.md
├── EventStudyR.Rproj
├── NAMESPACE
├── LICENSE.md
├── LICENSE.note
├── DESCRIPTION
├── README.md
└── README.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: JMSLab
3 | 


--------------------------------------------------------------------------------
/tests/testthat/input/.gitignore:
--------------------------------------------------------------------------------
1 | !df_test*.csv
2 | 
3 | 


--------------------------------------------------------------------------------
/R/.gitattributes:
--------------------------------------------------------------------------------
1 | *.rda -filter=lfs -diff=lfs -merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/data/.gitattributes:
--------------------------------------------------------------------------------
1 | *.rda -filter=lfs -diff=lfs -merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/tests/testthat/input/pvalues.txt:
--------------------------------------------------------------------------------
1 | .8270326624995562
2 | .6923175802538313
3 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JMSLab/eventstudyr/HEAD/R/sysdata.rda


--------------------------------------------------------------------------------
/data/example_data.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JMSLab/eventstudyr/HEAD/data/example_data.rda


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(eventstudyr)
3 | 
4 | test_check("eventstudyr")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/input/df_test_base_STATA_allzero.csv:
--------------------------------------------------------------------------------
1 | "","coef","std_error"
2 | "z",".1045735",".0575732"
3 | 


--------------------------------------------------------------------------------
/tests/testthat/input/df_test_AddCI.csv:
--------------------------------------------------------------------------------
1 | term,estimate,std.error
2 | z_fd,1,1
3 | z_fd_lag1,-2,1
4 | z_fd_lead1,2,1
5 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:8500ef3719d6f69e213f7f85dd45ec8af8fe3c8e977b1b51e389ccffc1fbf020
3 | size 21970
4 | 


--------------------------------------------------------------------------------
/man/figures/README-example-1.png:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:249d7897c569857a28cef3387690dbf5caafa7ae89b3abb19d776ec19eb5c0c4
3 | size 4389
4 | 


--------------------------------------------------------------------------------
/tests/testthat/input/make.log:
--------------------------------------------------------------------------------
1 | This file was created by make.py
2 | Current date: 2023-03-19 -- 11:41:34
3 | 
4 | Execution of code\build_FHS.do completed
5 | Execution of code\build_OLS.do completed
6 | 


--------------------------------------------------------------------------------
/examples/source/raw/eventstudy_illustration_data/orig/simulation_data_static.dta:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:c19a432e3e1967bf53b6d521ef2a46a45c420fa04cf0cf29aa09238ab3eaf867
3 | size 45415
4 | 


--------------------------------------------------------------------------------
/examples/source/raw/eventstudy_illustration_data/orig/simulation_data_dynamic.dta:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:d42e65f17cb4ef6a7c46874db4db3c9450f71e6a396aa5993f738d99724c271b
3 | size 105895
4 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^eventstudyr\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.github$
 4 | ^LICENSE\.md$
 5 | ^issue[0-9]+$
 6 | ^examples$
 7 | ^data-raw$
 8 | ^vignettes/eventstudyr.Rmd$
 9 | ^cran-comments\.md$
10 | ^README\.Rmd$
11 | ^revdep$
12 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' Sample dataset obtained from the replication archive for Freyaldenhoven et al. (2021)
2 | #'
3 | #' @source Dataset in .dta format can be found in the .zip archive in \url{https://data.nber.org/data-appendix/w29170/}
4 | "example_data"
5 | 
6 | 


--------------------------------------------------------------------------------
/data-raw/example_data.R:
--------------------------------------------------------------------------------
1 | 
2 | library(haven)
3 | 
4 | indir  <- 'examples/source/raw/eventstudy_illustration_data/orig'
5 | 
6 | example_data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir))
7 | usethis::use_data(example_data, overwrite = TRUE, version = 3)
8 | 


--------------------------------------------------------------------------------
/tests/testthat/input/df_test.csv:
--------------------------------------------------------------------------------
 1 | country,values,periods
 2 | A,0.7838108576834202,1
 3 | A,0.1423108058515936,2
 4 | A,0.8868071401957422,3
 5 | A,0.9385397769510746,4
 6 | B,0.921952681383118,1
 7 | B,0.7619687209371477,2
 8 | B,0.8936178737785667,3
 9 | B,0.6012312234379351,4
10 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | *.dta filter=lfs diff=lfs merge=lfs -text
2 | *.RData filter=lfs diff=lfs merge=lfs -text
3 | *.rda filter=lfs diff=lfs merge=lfs -text
4 | *.rds filter=lfs diff=lfs merge=lfs -text
5 | *.pdf filter=lfs diff=lfs merge=lfs -text
6 | *.png filter=lfs diff=lfs merge=lfs -text
7 | *.xlsx filter=lfs diff=lfs merge=lfs -text
8 | 


--------------------------------------------------------------------------------
/tests/testthat/input/df_test_base_STATA.csv:
--------------------------------------------------------------------------------
 1 | "","coef","std_error"
 2 | "zfd",".0774304",".1290923"
 3 | "x_r","-.0522901",".02916"
 4 | "L.zfd",".3167293",".1759148"
 5 | "L2.zfd",".2994021",".1529249"
 6 | "F2.zfd",".0770573",".1660454"
 7 | "F3.zfd",".1157483",".1619981"
 8 | "F3.z","-.1641421",".1424652"
 9 | "L3.z",".2121165",".1309685"
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | /drive
 2 | /datastore
 3 | /temp
 4 | *.pyc
 5 | *.lyx~
 6 | *.lyx#
 7 | *.RDS
 8 | *.RData
 9 | *.Rhistory
10 | *.Rapp.history
11 | *.DS_Store
12 | *.aux
13 | *.fls
14 | *.lof
15 | *.lot
16 | *.nav
17 | *.snm
18 | *.toc
19 | *.out
20 | *.svn
21 | *.m~
22 | .Rproj.user
23 | !data/*.RData
24 | !data/*.rda
25 | *.csv
26 | inst/doc
27 | revdep/
28 | *.Rproj
29 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite eventstudyr in publications use:")
 2 | 
 3 | bibentry(
 4 |   bibtype = "Misc",
 5 |   title    = "eventstudyr package",
 6 |   author   = "Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, Jesse M. Shapiro, Veli M. Andirin, Richard Calvo, Santiago Hermo, Nathan Schor, Emily Wang",
 7 |   url      = "https://github.com/JMSLab/eventstudyr",
 8 |   year     = 2023
 9 |   )
10 | 


--------------------------------------------------------------------------------
/tests/testthat/input/readme.MD:
--------------------------------------------------------------------------------
 1 | ## Overview
 2 | 
 3 | This directory contains input data for unit tests.
 4 | 
 5 | - The `/code/` folder stores `.do` files that construct inputs for unit tests.
 6 | - The code uses data from the `/examples/` folder.
 7 | 
 8 | To build the input folder run `make.py`.
 9 | 
10 | ### Requirements
11 | 
12 | To run the code you need:
13 | 
14 | - STATA compiler
15 | - Python compiler
16 | - `estout` package for STATA
17 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Notes
 2 | 
 3 | Re-submission of the package with patch update.
 4 | 
 5 | ## R CMD check results
 6 | 
 7 | 0 errors | 0 warnings | 0 notes
 8 | 
 9 | ## revdepcheck results
10 | 
11 | We checked 0 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
12 | 
13 |  * We saw 0 new problems
14 |  * We failed to check 0 packages
15 | 
16 | ## Package changes
17 | 
18 |  * Implemented patch updates for compatibility with `ggplot2` 3.6.0
19 | 


--------------------------------------------------------------------------------
/EventStudyR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 4
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/examples/source/raw/eventstudy_illustration_data/README.md:
--------------------------------------------------------------------------------
 1 | ### Overview
 2 | This directory includes sample datasets downloaded from the replication archive for [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170)
 3 | 
 4 | ### Source
 5 | NBER
 6 | 
 7 | ### When/where obtained & original form of files
 8 | 
 9 | Zip archive with datasets was downloaded from [this page](https://data.nber.org/data-appendix/w29170/) on February 2, 2022 by Veli Murat Andirin. 
10 | 
11 | ### Description
12 | ./orig: Sample datasets as `.dta` files.
13 | 
14 | ./docs: Screenshot of the download page.
15 | 
16 | 


--------------------------------------------------------------------------------
/man/example_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{example_data}
 5 | \alias{example_data}
 6 | \title{Sample dataset obtained from the replication archive for Freyaldenhoven et al. (2021)}
 7 | \format{
 8 | An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 2000 rows and 12 columns.
 9 | }
10 | \source{
11 | Dataset in .dta format can be found in the .zip archive in \url{https://data.nber.org/data-appendix/w29170/}
12 | }
13 | \usage{
14 | example_data
15 | }
16 | \description{
17 | Sample dataset obtained from the replication archive for Freyaldenhoven et al. (2021)
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/tests/testthat/input/make.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import glob
 3 | import time
 4 | 
 5 | with open('make.log', 'w') as f:
 6 |     f.write('This file was created by make.py\n')
 7 |     f.write(f'Current date: {time.strftime("%Y-%m-%d -- %H:%M:%S")}\n\n')
 8 | 
 9 | do_files = glob.glob('code/*.do')
10 | 
11 | stata = 'StataSE-64'
12 | 
13 | log_files = glob.glob('log/*.log')
14 | for ff in log_files:
15 |     os.remove(ff)
16 | 
17 | for ff in do_files:
18 |     os.system(f'{stata} /e {ff}')
19 | 
20 |     with open('make.log', 'a') as f:
21 |         f.write(f'Execution of {ff} completed\n')
22 | 
23 |     ff_base = os.path.basename(ff)
24 |     ff_log = ff_base.replace('.do', '.log')
25 | 
26 |     os.rename(f'{ff_log}', f'log/{ff_log}')
27 | 
28 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(EventStudy)
 4 | export(EventStudyPlot)
 5 | export(TestLinear)
 6 | import(dplyr)
 7 | import(estimatr)
 8 | import(ggplot2)
 9 | import(stringr)
10 | importFrom(MASS,mvrnorm)
11 | importFrom(car,linearHypothesis)
12 | importFrom(data.table,":=")
13 | importFrom(data.table,.SD)
14 | importFrom(data.table,CJ)
15 | importFrom(data.table,as.data.table)
16 | importFrom(data.table,copy)
17 | importFrom(data.table,is.data.table)
18 | importFrom(data.table,setnames)
19 | importFrom(data.table,setorder)
20 | importFrom(data.table,setorderv)
21 | importFrom(data.table,shift)
22 | importFrom(pracma,inv)
23 | importFrom(pracma,pinv)
24 | importFrom(rlang,.data)
25 | importFrom(stats,as.formula)
26 | importFrom(stats,pnorm)
27 | importFrom(stats,qchisq)
28 | importFrom(stats,qnorm)
29 | importFrom(stats,reformulate)
30 | importFrom(stats,setNames)
31 | 


--------------------------------------------------------------------------------
/tests/testthat/input/df_test_base_STATA_FHS.csv:
--------------------------------------------------------------------------------
 1 | "","coef","std_error","coef","std_error","coef","std_error","coef","std_error","coef","std_error","coef","std_error"
 2 | "eta_m","1.669717","2.2977","1.948522","2.130648","1.634839","2.282194","1.634839","2.172034","1.94113","2.235042","1.94113","2.076632"
 3 | "x_r","-.0500838",".028236","-.0533449",".0277989","-.0488262",".0323695","-.0488262",".0295303","-.0524275",".0322331","-.0524275",".0286475"
 4 | "z_lead3",".0456388",".2185155",".0980764",".2460766",".0703236",".2541796",".0703236",".2390053",".1105664",".2566083",".1105664",".2469213"
 5 | "z_fd_lead2",".020128",".1406527",".0128612",".1433497",".0163729",".1297879",".0163729",".1410474",".0091793",".1303523",".0091793",".1437379"
 6 | "z_fd",".4703923",".6147443",".4809733",".5654271",".4654961",".6314258",".4654961",".5886608",".4814239",".6164001",".4814239",".553323"
 7 | "z_fd_lag1","1.039846","1.085427","1.183739","1.008607","1.033747","1.082208","1.033747","1.037399","1.187588","1.060234","1.187588",".9880124"
 8 | "z_fd_lag2","1.355806","1.53003","1.547324","1.410691","1.343855","1.536298","1.343855","1.457308","1.549578","1.504329","1.549578","1.376885"
 9 | "z_lag3","1.889111","2.380558","2.202123","2.2346","1.870236","2.412257","1.870236","2.299706","2.195865","2.363999","2.195865","2.19404"
10 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 JMSLab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | # Licenses for incorporated dependencies
24 | 
25 | * [suptCriticalValue](https://github.com/ryanedmundkessler/suptCriticalValue/blob/c1dd6b2e9aaeaa7990efa2c857e76806d23bea9e/LICENSE)
26 | 


--------------------------------------------------------------------------------
/LICENSE.note:
--------------------------------------------------------------------------------
 1 | The eventstudyr package as a whole is distributed under MIT. The eventstudyr package includes one other open source software component that is distributed under MIT. The component is a GitHub repository called suptCriticalValue
 2 | 
 3 | A copy of the license agreements used by this component is included below.
 4 | 
 5 | MIT License
 6 | 
 7 | Copyright (c) 2021 Ryan Kessler
 8 | 
 9 | Permission is hereby granted, free of charge, to any person obtaining a copy
10 | of this software and associated documentation files (the "Software"), to deal
11 | in the Software without restriction, including without limitation the rights
12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 | copies of the Software, and to permit persons to whom the Software is
14 | furnished to do so, subject to the following conditions:
15 | 
16 | The above copyright notice and this permission notice shall be included in all
17 | copies or substantial portions of the Software.
18 | 
19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 | SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/tests/testthat/input/code/build_OLS.do:
--------------------------------------------------------------------------------
 1 | clear all
 2 | version 15
 3 | set more off
 4 | 
 5 | program main
 6 |     local indir  "../../../examples/source/raw/eventstudy_illustration_data/orig"
 7 |     local outdir "."
 8 | 
 9 |     use "`indir'/simulation_data_dynamic.dta", clear
10 | 
11 | 
12 |     local G 1
13 |     local LG 2
14 |     local M 1
15 |     local LM 2
16 |     local furthest_lead = `G' + `LG'
17 |     local furthest_lag  = `M' + `LM'
18 |     local M_minus1 = `M' - 1
19 |     local furthest_lag_minus1 = `furthest_lag' - 1
20 | 
21 |     xtset id t
22 |     gen zfd = d.z
23 | 
24 |     local leads
25 |     forvalues i = 2/`furthest_lead' {
26 |         local leads `leads' f`i'.zfd
27 |     }
28 |     display "`leads'"	
29 |     local lags
30 |     forvalues i = 1/`furthest_lag_minus1' {
31 |         local lags `lags' l`i'.zfd
32 |     }
33 |             
34 |     eststo: reg y_base zfd x_r `lags' `leads' f`furthest_lead'.z l`furthest_lag'.z i.t i.id, vce(cluster id)
35 | 
36 |     esttab * using "`outdir'/df_test_base_STATA.csv", csv plain wide noobs ///
37 |         keep(x_r zfd L.zfd L2.zfd F2.zfd F3.zfd F3.z L3.z) ///
38 |         replace se nomtitles collabels("coef" "std_error") 
39 | 
40 |     eststo clear 
41 |     eststo: reg y_base z i.t i.id, vce(cluster id)
42 | 
43 |     esttab * using "`outdir'/df_test_base_STATA_allzero.csv", csv plain wide noobs ///
44 |         keep(z) ///
45 |         replace se nomtitles collabels("coef" "std_error") 
46 | 
47 | end
48 | 
49 | 
50 | main
51 | 


--------------------------------------------------------------------------------
/.github/workflows/check-standard.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v3
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v2
35 | 
36 |       - uses: r-lib/actions/setup-r@v2
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v2
43 |         with:
44 |           extra-packages: any::rcmdcheck
45 |           needs: check
46 | 
47 |       - uses: r-lib/actions/check-r-package@v2
48 |         with:
49 |           upload-snapshots: true
50 | 


--------------------------------------------------------------------------------
/tests/testthat/test-AddSmPath.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("AddZerosCovar correctly adds zeros where normalized coefficient should be", {
 3 | 
 4 |   vcov_matrix_all <- diag(rep(3,10))
 5 |   eventstudy_coeffs  <- c(paste0("a", 1:2), paste0("a", 4:9))
 6 |   rownames(vcov_matrix_all) <- c(eventstudy_coeffs, "a10", "a11")
 7 |   colnames(vcov_matrix_all) <- c(eventstudy_coeffs, "a10", "a11")
 8 | 
 9 |   norm_column  <- "a3"
10 |   coeffs_oder  <- paste0("a", 1:9)
11 | 
12 |   expected_matrix <- diag(c(rep(3,2), 0, rep(3,6)))
13 |   rownames(expected_matrix) <- paste0("a", 1:9)
14 |   colnames(expected_matrix) <- paste0("a", 1:9)
15 | 
16 |   covar <- AddZerosCovar(vcov_matrix_all, eventstudy_coeffs, norm_column, coeffs_oder)
17 | 
18 |   expect_equal(covar, expected_matrix)
19 | })
20 | 
21 | test_that("AddZerosCovar correctly adds zeros where normalized coefficient should be with 2 normalizations", {
22 | 
23 |     vcov_matrix_all <- diag(rep(3,9))
24 |     eventstudy_coeffs  <- c(paste0("a", 1:2), paste0("a", 4), paste0("a", 6:10))
25 |     rownames(vcov_matrix_all) <- c(eventstudy_coeffs, "a11")
26 |     colnames(vcov_matrix_all) <- c(eventstudy_coeffs, "a11")
27 | 
28 |     norm_column  <- c("a3", "a5")
29 |     coeffs_oder  <- paste0("a", 1:10)
30 | 
31 |     expected_matrix <- diag(c(rep(3,2), 0, 3, 0, rep(3,5)))
32 |     rownames(expected_matrix) <- paste0("a", 1:10)
33 |     colnames(expected_matrix) <- paste0("a", 1:10)
34 | 
35 |     covar <- AddZerosCovar(vcov_matrix_all, eventstudy_coeffs, norm_column, coeffs_oder)
36 | 
37 |     expect_equal(covar, expected_matrix)
38 | })
39 | 


--------------------------------------------------------------------------------
/R/AddMeans.R:
--------------------------------------------------------------------------------
 1 | #' Adds the mean of the outcome variable for unit-time pairs where the variable whose coefficient is normalized is non-zero.
 2 | #'
 3 | #' @param df Data frame containing the additional variables created for estimation.
 4 | #' @param normalization_column Character indicating variable whose coefficient will be normalized.
 5 | #' @param policyvar Character indicating column of policy variable z.
 6 | #' @param outcomevar Character indicating column of outcome variable y.
 7 | #' @param custom_scalar Scalar to be displayed along y = 0 line if the user doesn't want the mean computed by the package.
 8 | #' @keywords internal
 9 | #' @noRd
10 | 
11 | AddMeans <- function(df, normalization_column, policyvar, outcomevar,
12 |                      custom_scalar = NULL) {
13 | 
14 |     if (! is.data.frame(df)) {stop("df should be a data frame.")}
15 |     if (! is.character(outcomevar)) {stop("outcomevar should be a character.")}
16 |     if (! is.character(policyvar)) {stop("policyvar should be a character.")}
17 |     if (! is.character(normalization_column)) {stop("normalization_column should be a character.")}
18 |     if (! (is.numeric(custom_scalar) | is.null(custom_scalar))) {stop("custom_scalar should be numeric.")}
19 | 
20 |     if (is.null(custom_scalar)) {
21 |         if (grepl(paste0(policyvar, "_lead"), normalization_column)) {
22 |             df_change <- df[get(normalization_column) == 0 & !is.na(get(normalization_column))]
23 |         } else {
24 |             df_change <- df[get(normalization_column) != 0 & !is.na(get(normalization_column))]
25 |         }
26 |         mean <- df_change[, mean(get(outcomevar), na.rm = TRUE)]
27 |     } else {
28 |         mean <- custom_scalar
29 |     }
30 | 
31 |     return(mean)
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/man/TestLinear.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/TestLinear.R
 3 | \name{TestLinear}
 4 | \alias{TestLinear}
 5 | \title{Perform Tests of Linear Hypotheses}
 6 | \usage{
 7 | TestLinear(
 8 |   estimates,
 9 |   test = NA,
10 |   test_name = "User Test",
11 |   pretrends = TRUE,
12 |   leveling_off = TRUE
13 | )
14 | }
15 | \arguments{
16 | \item{estimates}{A list of length 2 containing estimation results and model information.
17 | Should be an output of \code{\link[=EventStudy]{EventStudy()}}.}
18 | 
19 | \item{test}{The hypothesis to be estimated. Accepts inputs that can be passed to
20 | hypothesis.matrix argument in \code{car::linearHypothesis()}.}
21 | 
22 | \item{test_name}{Name for test input by user. Defaults to "User Test."}
23 | 
24 | \item{pretrends}{If TRUE, uses pre and overidpre from estimates to test for pre-trends.
25 | Defaults to TRUE.}
26 | 
27 | \item{leveling_off}{If TRUE, uses post and overidpost from estimates to test for leveling-off.
28 | Defaults to TRUE.}
29 | }
30 | \value{
31 | A data frame containing the F-statistic and p-value for the specified test(s).
32 | }
33 | \description{
34 | \code{TestLinear} tests linear restrictions on coefficients.
35 | }
36 | \examples{
37 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
38 |                         policyvar = "z", idvar = "id", timevar = "t",
39 |                         controls = "x_r", FE = TRUE, TFE = TRUE,
40 |                         post = 3, pre = 2, overidpre = 4, overidpost = 5,
41 |                         normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
42 | 
43 | TestLinear(
44 |     estimates,
45 |     test = "z_fd_lag1 = z_fd",
46 |     test_name = "Hypothesis Test",
47 |     pretrends = TRUE,
48 |     leveling_off = TRUE
49 |    )
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ComputeFirstDifferences.R:
--------------------------------------------------------------------------------
 1 | library(data.table)
 2 | 
 3 | test_that("correctly recognizes wrong variable type for grouping variable", {
 4 |     dt_test <- data.table::fread("./input/df_test.csv")
 5 | 
 6 |     expect_error(ComputeFirstDifferences(dt_test, country, "periods", "values"))
 7 | })
 8 | 
 9 | test_that("correctly recognizes wrong variable type for diffvar variable", {
10 |     dt_test <- data.table::fread("./input/df_test.csv")
11 | 
12 |     expect_error(ComputeFirstDifferences(dt_test, "country", "periods", values))
13 | })
14 | 
15 | test_that("a column with _fd suffix is added", {
16 |     dt_test <- data.table::fread("./input/df_test.csv")
17 | 
18 |     df_diff <- ComputeFirstDifferences(dt_test, "country", "periods", "values")
19 | 
20 |     expect_true("values_fd" %in% colnames(df_diff))
21 | })
22 | 
23 | test_that("correctly computes differences with a balanced dataset", {
24 |     df <- data.table(
25 |         id     = rep(c("A", "B"), each = 4),
26 |         time   = rep(1:4, times = 2),
27 |         policy = c(10, 12, 11, 13, 8, 9, 10, 11),
28 |         policy_fd_expected = c(NA, 2, -1, 2, NA, 1, 1, 1)
29 |     )
30 | 
31 |     df_out <- ComputeFirstDifferences(df, "id", "time", "policy", timevar_holes = FALSE)
32 | 
33 |     expect_identical(df_out$policy_fd, df$policy_fd_expected)
34 | })
35 | 
36 | test_that("correctly computes differences with a dataset that has holes in time var", {
37 |     df <- data.table(
38 |         id = c(rep("A", 4), rep("B", 2), rep("C", 3)),
39 |         time = c(1, 2, 4, 5, 2, 3, 2, 3, 4),
40 |         policy = c(10, 12, 13, 14, 8, 9, 10, 11, 12),
41 |         policy_fd_expected = c(NA, 2, NA, 1, NA, 1, NA, 1, 1)
42 |     )
43 | 
44 |     df_out <- ComputeFirstDifferences(df, "id", "time", "policy", 
45 |                                       timevar_holes = TRUE)
46 | 
47 |     expect_identical(df_out$policy_fd, df$policy_fd_expected)
48 | })
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-AddMeans.R:
--------------------------------------------------------------------------------
 1 | test_that("means are computed correctly when a first differenced variable is normalized", {
 2 |     suppressWarnings(
 3 |         sample_estimation <- EventStudy(estimator = "OLS", data = example_data,
 4 |                                     outcomevar = "y_base", policyvar = "z",
 5 |                                     idvar = "id", timevar = "t", FE = TRUE,
 6 |                                     TFE = TRUE, post = 2, pre = 2, overidpre = 2,
 7 |                                     overidpost = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE)
 8 |     )
 9 | 
10 |     df_estimation <- sample_estimation[[2]]$data
11 | 
12 |     mean_function <- AddMeans(df_estimation, "z_fd_lead1",
13 |                               sample_estimation[[2]]$policyvar,
14 |                               sample_estimation[[2]]$outcomevar)
15 | 
16 |     mean_manual <- df_estimation[z_fd_lead1 != 0, mean(y_base, na.rm = TRUE)]
17 | 
18 |     expect_equal(mean_function, mean_manual)
19 | 
20 | })
21 | 
22 | test_that("means are computed correctly when the furthest lead is normalized", {
23 |     sample_estimation <- EventStudy(estimator = "OLS", data = example_data,
24 |                                     outcomevar = "y_base", policyvar = "z",
25 |                                     idvar = "id", timevar = "t", FE = TRUE,
26 |                                     TFE = TRUE, post = 2, pre = 2, overidpre = 2,
27 |                                     overidpost = 2, normalize = -5, cluster = TRUE, anticipation_effects_normalization = TRUE)
28 | 
29 |     df_estimation <- sample_estimation[[2]]$data
30 | 
31 |     mean_function <- AddMeans(df_estimation, "z_lead4",
32 |                               sample_estimation[[2]]$policyvar,
33 |                               sample_estimation[[2]]$outcomevar)
34 | 
35 |     mean_manual <- df_estimation[z_lead4 == 0, mean(y_base, na.rm = TRUE)]
36 | 
37 |     expect_equal(mean_function, mean_manual)
38 | })
39 | 
40 | 
41 | 


--------------------------------------------------------------------------------
/data-raw/save_internal_data.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(dplyr)
 3 | library(haven)
 4 | 
 5 | indir  <- 'examples/source/raw/eventstudy_illustration_data/orig'
 6 | 
 7 | example_data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir))
 8 | 
 9 | policyvar <- "z"
10 | idvar <- "id"
11 | timevar <- "t"
12 | 
13 | post <- 1
14 | pre <- 1
15 | overidpost <- 2
16 | overidpre <- 2
17 | 
18 | df_first_diff <- GetFirstDifferences(df = example_data, timevar = "t", groupvar="id", diffvar = "z")
19 | 
20 | num_fd_lag_periods   <- post + overidpost - 1
21 | num_fd_lead_periods  <- pre + overidpre
22 | 
23 | furthest_lag_period  <- num_fd_lag_periods + 1
24 | 
25 | df_first_diff_leads      <- PrepareLeads(df_first_diff, groupvar = idvar, timevar,
26 |                                          leadvar = paste0(policyvar, "_fd"), leads = 1:num_fd_lead_periods)
27 | df_first_diff_leads_lags <- PrepareLags(df_first_diff_leads, groupvar = idvar, timevar,
28 |                                         lagvar = paste0(policyvar, "_fd"), lags = 1:num_fd_lag_periods)
29 | 
30 | df_lag           <- PrepareLags(df_first_diff_leads_lags, groupvar = idvar, timevar,
31 |                                 lagvar = policyvar, lags = furthest_lag_period)
32 | df_lag_lead      <- PrepareLeads(df_lag, groupvar = idvar, timevar,
33 |                                  leadvar = policyvar, leads = num_fd_lead_periods)
34 | 
35 | column_subtract_1              <- paste0(policyvar, "_lead", num_fd_lead_periods)
36 | df_lag_lead[column_subtract_1] <- 1 - df_lag_lead[column_subtract_1]
37 | 
38 | v_OLS_variables <- c("id", "t", "z", "y_base", "x_r", "z_fd",
39 |                  "z_fd_lead1", "z_fd_lead2", "z_fd_lead3",
40 |                  "z_fd_lag1", "z_fd_lag2", "z_lag3","z_lead3")
41 | 
42 | v_FHS_variables <- c(v_OLS_variables, "eta_m")
43 | 
44 | df_EventStudyOLS_example <- df_lag_lead[v_OLS_variables]
45 | df_EventStudyFHS_example <- df_lag_lead[v_FHS_variables]
46 | df_sample_static  <- read_dta(sprintf('%s/simulation_data_static.dta', indir))
47 | 
48 | usethis::use_data(df_EventStudyOLS_example, df_EventStudyFHS_example, df_sample_static, overwrite = TRUE, version = 3, internal = TRUE)
49 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: eventstudyr
 2 | Title: Estimation and Visualization of Linear Panel Event Studies
 3 | Version: 1.1.5
 4 | Authors@R: 
 5 |     c(person(given = "Simon",
 6 |            family = "Freyaldenhoven",
 7 |            role = c("aut")),
 8 |     person(given = "Christian",
 9 |            family = "Hansen",
10 |            role = c("aut")),
11 |     person(given = "Jorge",
12 |            family = "Pérez Pérez",
13 |            role = c("aut")),
14 |     person(given = "Jesse",
15 |            family = "Shapiro",
16 |            role = c("aut")),
17 |     person(given = "Veli",
18 |            family = "Andirin",
19 |            role = c("aut")),
20 |     person(given = "Richard",
21 |            family = "Calvo",
22 |            role = c("aut")),
23 |     person(given = "Santiago",
24 |            family = "Hermo",
25 |            email = "santiago.hermo@monash.edu",
26 |            role = c("aut", "cre")),
27 |     person(given = "Nathan",
28 |            family = "Schor",
29 |            role = c("aut")),
30 |     person(given = "Emily",
31 |            family = "Wang",
32 |            role = c("aut")),
33 |     person(given = "JMSLab",
34 |            role = c("cph")),
35 |     person(given = "Ryan",
36 |            family = "Kessler",
37 |            role = c("cph")))
38 | Description: Estimates linear panel event study models. Plots coefficients following the recommendations in Freyaldenhoven et al. (2021) <doi:10.3386/w29170>. Includes sup-t bands, testing for key hypotheses, least wiggly path through the Wald region. Allows instrumental variables estimation following Freyaldenhoven et al. (2019) <doi:10.1257/aer.20180609>.
39 | License: MIT + file LICENSE
40 | Encoding: UTF-8
41 | Imports:
42 |     car,
43 |     data.table,
44 |     dplyr,
45 |     estimatr,
46 |     ggplot2,
47 |     MASS,
48 |     rlang,
49 |     pracma,
50 |     stats,
51 |     stringr
52 | VignetteBuilder: 
53 |     knitr
54 | LazyData: true
55 | Roxygen: list(markdown = TRUE)
56 | RoxygenNote: 7.3.2
57 | Suggests: 
58 |     rmarkdown,
59 |     knitr,
60 |     testthat (>= 3.0.0)
61 | Config/testthat/edition: 3
62 | Depends: 
63 |     R (>= 3.5.0)
64 | URL: https://github.com/JMSLab/eventstudyr
65 | BugReports: https://github.com/JMSLab/eventstudyr/issues
66 | 


--------------------------------------------------------------------------------
/tests/testthat/test-AddSuptBand.R:
--------------------------------------------------------------------------------
 1 | # OLS -----------------------------------
 2 | 
 3 | test_that("check that the correct part of vcov matrix is used", {
 4 |     suppressWarnings(
 5 |         sample_estimation <- EventStudy(estimator = "OLS", data = example_data,
 6 |                                     outcomevar = "y_base", policyvar = "z",
 7 |                                     idvar = "id", timevar = "t", FE = TRUE, control = "x_r",
 8 |                                     TFE = TRUE, post = 2, pre = 2, overidpre = 2,
 9 |                                     overidpost = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE)
10 |     )
11 | 
12 |     eventstudy_coefficients <- sample_estimation[[2]]$eventstudy_coefficients
13 |     vcov_matrix_all <- sample_estimation[[1]]$vcov
14 |     v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients
15 |     vcov_matrix     <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep]
16 | 
17 |     expect_true(isSymmetric(vcov_matrix))
18 |     expect_true(all(eventstudy_coefficients %in% colnames(vcov_matrix)))
19 |     expect_true(! "x_r" %in% colnames(vcov_matrix))
20 | })
21 | 
22 | # FHS -----------------------------------
23 | 
24 | test_that("check that the correct part of vcov matrix is used", {
25 |     data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")]
26 |     suppressWarnings(
27 |         sample_estimation <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id",
28 |                             timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1,
29 |                             overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE)
30 |     )
31 | 
32 |     eventstudy_coefficients <- sample_estimation[[2]]$eventstudy_coefficients
33 |     vcov_matrix_all <- sample_estimation[[1]]$vcov
34 |     v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients
35 |     vcov_matrix     <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep]
36 | 
37 |     expect_true(isSymmetric(vcov_matrix, tol = sqrt(.Machine$double.eps)))
38 |     expect_true(all(eventstudy_coefficients %in% colnames(vcov_matrix)))
39 |     expect_true(! "x_r" %in% colnames(vcov_matrix))
40 | })
41 | 
42 | 


--------------------------------------------------------------------------------
/tests/testthat/input/code/build_FHS.do:
--------------------------------------------------------------------------------
 1 | clear all
 2 | version 15
 3 | set more off
 4 | 
 5 | program main
 6 |     local indir  "../../../examples/source/raw/eventstudy_illustration_data/orig"
 7 |     local outdir "."
 8 | 
 9 |     use "`indir'/simulation_data_dynamic.dta", clear
10 | 
11 |     local G 1
12 |     local LG 2
13 |     local M 1
14 |     local LM 2
15 |     local furthest_lead = `G' + `LG'
16 |     local furthest_lag  = `M' + `LM'
17 |     local M_minus1 = `M' - 1
18 |     local furthest_lag_minus1 = `furthest_lag' - 1
19 | 
20 |     xtset id t
21 |     gen z_fd = d.z
22 | 
23 |     forvalues i = 2/`furthest_lead' {
24 |         gen z_fd_lead`i' = f`i'.z_fd
25 |     }
26 |     forvalues i = 1/`furthest_lag_minus1' {
27 |         gen z_fd_lag`i' = l`i'.z_fd
28 |     }
29 |     gen z_lead`furthest_lead' = f`furthest_lead'.z
30 |     gen z_lag`furthest_lag' = l`furthest_lag'.z
31 | 
32 |     run_regressions
33 | 
34 |     esttab * using "`outdir'/df_test_base_STATA_FHS.csv", csv plain wide noobs ///
35 |         keep(z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 x_r eta_m) ///
36 |         replace se nomtitles collabels("coef" "std_error") 
37 | end
38 | 
39 | program run_regressions
40 | 
41 |     * Case 2: FE=TRUE , TFE=TRUE , cluster=TRUE
42 |     eststo: xtivreg y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 i.t, fe vce(cluster id)
43 | 
44 |     * Case 4: FE=TRUE , TFE=FALSE, cluster=TRUE
45 |     eststo: xtivreg y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3, fe vce(cluster id)
46 | 
47 |     * Case 5: FE=FALSE, TFE=TRUE , cluster=FALSE
48 |     eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 i.t, small vce(robust)
49 | 
50 |     * Case 6: FE=FALSE, TFE=TRUE , cluster=TRUE
51 |     eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 i.t, small vce(cluster id)
52 | 
53 |     * Case 7: FE=FALSE, TFE=FALSE, cluster=FALSE
54 |     eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3, small vce(robust)
55 | 
56 |     * Case 8: FE=FALSE, TFE=FALSE, cluster=TRUE
57 |     eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3, small vce(cluster id)
58 | 
59 | end
60 | 
61 | 
62 | main
63 | 


--------------------------------------------------------------------------------
/R/AddCIs.R:
--------------------------------------------------------------------------------
 1 | #' Adds columns to data frame containing confidence intervals around provided estimates.
 2 | #'
 3 | #' @param df_estimates, A data frame with columns for term, estimate, and standard error.
 4 | #' @param eventstudy_coefficients, A list specifying the names of the columns that were not normalized in EventStudy().
 5 | #' @param conf_level, Confidence level used for confidence interval
 6 | #' expressed as a real number between 0 and 1, inclusively. Defaults to 0.95.
 7 | #'
 8 | #'
 9 | #' @import dplyr
10 | #' @importFrom rlang .data
11 | #' @importFrom stats qnorm
12 | #' @import estimatr
13 | #' @keywords internal
14 | #' @noRd
15 | #'
16 | #' @examples
17 | #' estimates <- EventStudy(
18 | #'    estimator = "OLS",
19 | #'    data = example_data,
20 | #'    outcomevar = "y_base",
21 | #'    policyvar = "z",
22 | #'    idvar = "id",
23 | #'    timevar = "t",
24 | #'    controls = "x_r",
25 | #'    FE = TRUE,
26 | #'    TFE = TRUE,
27 | #'    post = 3,
28 | #'    pre = 2,
29 | #'    overidpre = 4,
30 | #'    overidpost = 5,
31 | #'    normalize = - 3,
32 | #'    cluster = TRUE,
33 | #'    anticipation_effects_normalization = TRUE
34 | #' )
35 | #'
36 | #' df_estimates_tidy <- estimatr::tidy(estimates$output)
37 | #'
38 | #' df_estimates_tidy <- AddCIs(
39 | #'    df_estimates_tidy,
40 | #'    eventstudy_coefficients = estimates$arguments$eventstudy_coefficients,
41 | #'    conf_level = 0.95
42 | #')
43 | #'
44 | #'
45 | 
46 | AddCIs <- function(df_estimates, eventstudy_coefficients, conf_level = 0.95) {
47 |     if (! inherits(df_estimates, "data.frame")) {stop("df_estimates should be a data frame")}
48 |     if (!all(c("term", "estimate", "std.error") %in% colnames(df_estimates))) {
49 |         stop("df_estimates should include columns 'term', 'estimate', and 'std.error'.")
50 |     }
51 |     if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character vector.")}
52 |     if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")}
53 | 
54 |     terms <- eventstudy_coefficients
55 | 
56 |     percentile <- conf_level + ((1 - conf_level)/2)
57 | 
58 |     df_CI <- dplyr::filter(df_estimates, .data$term %in% terms)
59 |     df_CI <- dplyr::mutate(df_CI, ci_lower = .data$estimate - .data$std.error * stats::qnorm(percentile),
60 |                                   ci_upper = .data$estimate + .data$std.error * stats::qnorm(percentile))
61 |     df_CI <- dplyr::select(df_CI, c("term", "ci_lower", "ci_upper"))
62 |     df_estimates <- dplyr::left_join(df_estimates, df_CI, by = "term")
63 | 
64 |     return(df_estimates)
65 | }
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # eventstudyr <img src='man/figures/logo.png' align="right" height="139" />
 5 | 
 6 | <!-- badges: start -->
 7 | 
 8 | [![CRAN
 9 | status](https://www.r-pkg.org/badges/version/eventstudyr)](https://CRAN.R-project.org/package=eventstudyr)
10 | [![CRAN RStudio mirror
11 | downloads](https://cranlogs.r-pkg.org/badges/grand-total/eventstudyr)](https://r-pkg.org/pkg/eventstudyr)
12 | <!-- badges: end -->
13 | 
14 | ## Overview
15 | 
16 | The **eventstudyr** package implements tools for estimating linear panel
17 | event study models, following the recommendations in [Freyaldenhoven et
18 | al. (2021)](https://www.nber.org/papers/w29170). Includes sup-t bands,
19 | testing for key hypotheses, least wiggly path through the Wald region.
20 | Allows instrumental variables estimation following [Freyaldenhoven et
21 | al. (2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609).
22 | 
23 | ## Installation
24 | 
25 | ``` r
26 | # Install from CRAN
27 | install.packages("eventstudyr")
28 | 
29 | # Install latest version from GitHub
30 | install.packages("devtools")
31 | devtools::install_github("JMSLab/eventstudyr")
32 | ```
33 | 
34 | ## Usage
35 | 
36 | Find a minimal example below. For more examples see the package
37 | [documentation](https://cran.r-project.org/package=eventstudyr/eventstudyr.pdf)
38 | and
39 | [vignette](https://cran.r-project.org/package=eventstudyr/vignettes/documentation.html).
40 | 
41 | ``` r
42 | library(eventstudyr)
43 | set.seed(10) # for reproducibility of sup-t bands
44 | 
45 | estimates_ols <- EventStudy(
46 |    estimator = "OLS",
47 |    data = example_data,   # Use package sample data
48 |    outcomevar = "y_smooth_m",
49 |    policyvar = "z",
50 |    idvar = "id",
51 |    timevar = "t",
52 |    controls = "x_r",
53 |    pre = 0,  post = 4
54 | )
55 | 
56 | plt <- EventStudyPlot(estimates = estimates_ols)
57 | plt
58 | ```
59 | 
60 | <img src="man/figures/README-example-1.png" alt="Event-study plot using the example data of the package
61 |  and default plotting options."  />
62 | 
63 | ## Citation
64 | 
65 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, and Jesse M.
66 | Shapiro. “Visualization, Identification, and Estimation in the Panel
67 | Event-Study Design.” [NBER Working Paper
68 | No. 29170](https://www.nber.org/papers/w29170), August 2021.
69 | 
70 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, Jesse M.
71 | Shapiro, Veli M. Andirin, Richard Calvo, Santiago Hermo, Nathan Schor,
72 | Emily Wang. “`eventstudyr` package.” Code and data repository at
73 | <https://github.com/JMSLab/eventstudyr>, March 2023.
74 | 
75 | ## Acknowledgments
76 | 
77 | Thank you to Eliana Sena Sarmiento and Melissa Wu for their excellent
78 | work testing and reviewing `eventstudyr` prior to its public release.
79 | 


--------------------------------------------------------------------------------
/R/ComputeFirstDifferences.R:
--------------------------------------------------------------------------------
 1 | #' Adds first differences of a variable, robustly to gaps in time variable, as new columns in a panel dataset
 2 | #'
 3 | #' @param dt Data frame that will be modified.
 4 | #' @param idvar Character indicating column of units.
 5 | #' @param timevar Character indicating column of time periods.
 6 | #' @param diffvar Character indicating column of variable whose first difference will be taken.
 7 | #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE.
 8 | #'
 9 | #' @return The passed dataset augmented with columns that reflect the desired first differences
10 | #'
11 | #' @seealso [data.table::shift()]
12 | #'
13 | #' @examples
14 | #' ComputeFirstDifferences(
15 | #'     example_data,
16 | #'     idvar = "id",
17 | #'     timevar = "t",
18 | #'     diffvar = "z"
19 | #' )
20 | #'
21 | #' @importFrom data.table setorderv setnames shift := CJ .SD
22 | #'
23 | #' @keywords internal
24 | #' @noRd
25 | 
26 | ComputeFirstDifferences <- function(dt, idvar, timevar, diffvar,
27 |                                     timevar_holes = FALSE) {
28 |     if (! data.table::is.data.table(dt)) {
29 |         stop("Input data should be a data.table.")
30 |     }
31 |     for (var in c(idvar, timevar, diffvar)) {
32 |         if ((! is.character(var))) {
33 |             stop(paste0(var, " should be a character."))
34 |         }
35 |         if (! var %in% colnames(dt)) {
36 |             stop(paste0(var, " should be the name of a variable in the dataset."))
37 |         }
38 |     }
39 |     if (! is.logical(timevar_holes)) {
40 |         stop("timevar_holes should be logical.")
41 |     }
42 | 
43 |     data.table::setorderv(dt, cols = c(idvar, timevar))
44 | 
45 |     if (!timevar_holes) {
46 |         dt[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))),
47 |            by = idvar]
48 |     } else {
49 |         ## Create dataset with all combinations to compute first differences
50 |         all_combinations <- data.table::CJ(unique(dt[[idvar]]),
51 |                                            min(dt[[timevar]]):max(dt[[timevar]]))
52 |         data.table::setnames(all_combinations, new = c(idvar, timevar))
53 | 
54 |         dt_all <- data.table::merge.data.table(dt, all_combinations,
55 |                                                by = c(idvar, timevar), all = TRUE)
56 | 
57 |         dt_all[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))),
58 |                 by = idvar]
59 | 
60 |         ## Bring first differences back to the original dataset
61 |         vars_to_keep <- c(idvar, timevar, paste0(diffvar, "_fd"))
62 | 
63 |         dt <- data.table::merge.data.table(dt, dt_all[, .SD, .SDcols = vars_to_keep],
64 |                                            by = c(idvar, timevar), all.x = TRUE)
65 |     }
66 | 
67 |     return(dt)
68 | }
69 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, echo = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-"
12 | )
13 | ```
14 | 
15 | # eventstudyr <img src='man/figures/logo.png' align="right" height="139" />
16 | 
17 | <!-- badges: start -->
18 | [![CRAN status](https://www.r-pkg.org/badges/version/eventstudyr)](https://CRAN.R-project.org/package=eventstudyr)
19 | [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/grand-total/eventstudyr)](https://r-pkg.org/pkg/eventstudyr)
20 | <!-- badges: end -->
21 | 
22 | 
23 | ## Overview
24 | 
25 | The **eventstudyr** package implements tools for estimating linear panel event study models, following the recommendations in [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
26 | Includes sup-t bands, testing for key hypotheses, least wiggly path through the Wald region.
27 | Allows instrumental variables estimation following [Freyaldenhoven et al. (2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609).
28 | 
29 | ## Installation
30 | 
31 | ```{r, eval = FALSE}
32 | # Install from CRAN
33 | install.packages("eventstudyr")
34 | 
35 | # Install latest version from GitHub
36 | install.packages("devtools")
37 | devtools::install_github("JMSLab/eventstudyr")
38 | ```
39 | 
40 | ## Usage
41 | 
42 | Find a minimal example below. 
43 | For more examples see the package [documentation](https://cran.r-project.org/package=eventstudyr/eventstudyr.pdf) and [vignette](https://cran.r-project.org/package=eventstudyr/vignettes/documentation.html).
44 | 
45 | ```{r example}
46 | #| fig.alt = "Event-study plot using the example data of the package
47 | #|  and default plotting options."
48 | library(eventstudyr)
49 | set.seed(10) # for reproducibility of sup-t bands
50 | 
51 | estimates_ols <- EventStudy(
52 |    estimator = "OLS",
53 |    data = example_data,   # Use package sample data
54 |    outcomevar = "y_smooth_m",
55 |    policyvar = "z",
56 |    idvar = "id",
57 |    timevar = "t",
58 |    controls = "x_r",
59 |    pre = 0,  post = 4
60 | )
61 | 
62 | plt <- EventStudyPlot(estimates = estimates_ols)
63 | plt
64 | ```
65 | 
66 | ## Citation
67 | 
68 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, and Jesse M. Shapiro. "Visualization, Identification, and Estimation in the Panel Event-Study Design." [NBER Working Paper No. 29170](https://www.nber.org/papers/w29170),
69 | August 2021.
70 | 
71 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, Jesse M. Shapiro, Veli M. Andirin, Richard Calvo, Santiago Hermo, Nathan Schor, Emily Wang. "`eventstudyr` package." Code and data repository at https://github.com/JMSLab/eventstudyr, March 2023.
72 | 
73 | ## Acknowledgments
74 | 
75 | Thank you to Eliana Sena Sarmiento and Melissa Wu for their excellent work testing and reviewing `eventstudyr` prior to its public release.
76 | 


--------------------------------------------------------------------------------
/tests/testthat/test-PrepareModelFormula.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("outcomevar is the dependent variable for OLS", {
 3 | 
 4 |   estimator       <-  "OLS"
 5 |   outcomevar      <- "y_base"
 6 |   str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2")
 7 | 
 8 |   fmla            <- PrepareModelFormula(estimator, outcomevar, str_policy_vars)
 9 |   expect_equal(all.vars(fmla)[1], outcomevar)
10 | })
11 | 
12 | test_that("formula does not have an intercept for OLS", {
13 | 
14 |   estimator       <- "OLS"
15 |   outcomevar      <- "y_base"
16 |   str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2")
17 | 
18 |   fmla            <- PrepareModelFormula(estimator, outcomevar, str_policy_vars)
19 |   expect_equal(attr(terms(fmla), "intercept"), 0)
20 | })
21 | 
22 | test_that("vars in str_policy_vars argument are incorporated into formula for OLS", {
23 | 
24 |   estimator       <- "OLS"
25 |   outcomevar      <- "y_base"
26 |   str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2")
27 | 
28 |   fmla            <- PrepareModelFormula(estimator, outcomevar, str_policy_vars)
29 | 
30 |   z_vars_mask <- grepl("z_fd|z_lead|z_lag", attr(terms(fmla), "term.labels"))
31 |   expect_equal(sum(z_vars_mask), length(str_policy_vars))
32 | })
33 | 
34 | test_that("controls argument is incorporated into formula", {
35 | 
36 |   estimator       <- "OLS"
37 |   outcomevar      <- "y_base"
38 |   str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2")
39 |   ctrls           <- c("x1", "x2")
40 | 
41 |   fmla            <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
42 |                                          controls = ctrls)
43 |   controls_in_model <- attr(terms(fmla), "term.labels") %in% ctrls
44 |   expect_equal(sum(controls_in_model), length(ctrls))
45 | })
46 | 
47 | test_that("formula for IV regression is correct", {
48 |     estimator         <-  "FHS"
49 |     outcomevar        <-  "y_base"
50 |     str_policy_fd     <-  c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2", "z_fd_lag3")
51 |     str_policy_lead   <-  "z_lead3"
52 |     str_policy_lag    <-  "z_lag4"
53 |     controls          <-  "x_r"
54 |     proxy             <-  "eta_m"
55 |     proxyIV           <-  "z_fd_lead3"
56 | 
57 |     fmla <- PrepareModelFormula(estimator, outcomevar,
58 |                                 str_policy_vars = c(str_policy_fd, str_policy_lead, str_policy_lag),
59 |                                 static = FALSE,
60 |                                 controls, proxy, proxyIV)
61 | 
62 |     expect_equal(class(fmla), "formula")
63 |     expect_equal(deparse(fmla[[2]]), "y_base")
64 |     expect_equal(sort(all.vars(fmla[[c(3,2)]])),
65 |                  sort(c("z_fd", "z_fd_lead2", "z_fd_lag1", "z_fd_lag2", "z_fd_lag3", "z_lead3", "z_lag4", "x_r", "eta_m")))
66 |     expect_equal(sort(all.vars(fmla[[c(3,3)]])),
67 |                  sort(c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2", "z_fd_lag3", "z_lead3", "z_lag4", "x_r")))
68 | })
69 | 
70 | test_that("formula for static model is correct", {
71 |     estimator       <- "OLS"
72 |     outcomevar      <- "y_base"
73 |     str_policy_vars <- "z"
74 | 
75 |     fmla <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static = TRUE)
76 | 
77 |     expect_equal(class(fmla), "formula")
78 |     expect_equal(deparse(fmla[[2]]), "y_base")
79 |     expect_equal(all.vars(fmla[[3]]), "z")
80 | })
81 | 


--------------------------------------------------------------------------------
/R/AddSuptBand.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Produces sup-t bands for the event-study coefficients.
 3 | #'
 4 | #' @description Source code from Ryan Kessler (2022). [suptCriticalValue](https://github.com/ryanedmundkessler/suptCriticalValue): Computes critical values
 5 | #' underlying simultaneous sup-t confidence bands. R package version
 6 | #' 0.1.0.
 7 | #'
 8 | #' @param estimates The first element extracted from [EventStudy()]. Should be a list.
 9 | #' @param num_sim The number of simulations used in generating the sup-t bands.
10 | #' Should be a natural number. Defaults to 1000.
11 | #' @param conf_level The confidence level used for obtaining the sup-t bands critical value.
12 | #' Should be a real number between 0 and 1, inclusive. Defaults to .95.
13 | #' @param eventstudy_coefficients The names of the event-study coefficients. This vector is
14 | #' outputted in the second element of the [EventStudy()] function. Should be a vector of strings.
15 | #'
16 | #' @return A data.frame that contains the upper and lower sup-t band values
17 | #' for each event-study coefficient.
18 | #' @import estimatr
19 | #' @importFrom MASS mvrnorm
20 | #' @keywords internal
21 | #' @noRd
22 | #'
23 | #' @examples
24 | #' eventstudy_estimates <- EventStudy(
25 | #'   estimator = "OLS",
26 | #'   data = example_data,
27 | #'   outcomevar = "y_base",
28 | #'   policyvar = "z",
29 | #'   idvar = "id",
30 | #'   timevar = "t",
31 | #'   controls = "x_r",
32 | #'   FE = TRUE,
33 | #'   TFE = TRUE,
34 | #'   post = 3,
35 | #'   pre = 2,
36 | #'   overidpre = 4,
37 | #'   overidpost = 5,
38 | #'   normalize = - 3,
39 | #'   cluster = TRUE,
40 | #'   anticipation_effects_normalization = TRUE
41 | #' )
42 | #'
43 | #' AddSuptBand(
44 | #'   estimates = eventstudy_estimates$output,
45 | #'   num_sim = 100,
46 | #'   conf_level = .95,
47 | #'   eventstudy_coefficients = eventstudy_estimates$arguments$eventstudy_coefficients
48 | #')
49 | 
50 | AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) {
51 | 
52 |     if (! class(estimates) %in% c("lm_robust", "iv_robust")) {
53 |         stop("estimates is not a data frame with coefficient estimates and standard errors")
54 |     }
55 |     if (! is.numeric(num_sim) | num_sim %% 1 != 0 | num_sim <= 0) {stop("num_sim should be a natural number.")}
56 |     if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")}
57 |     if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character.")}
58 | 
59 |     vcov_matrix_all <- estimates$vcov
60 |     v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients
61 |     vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep]
62 | 
63 |     v_std_errors <- t(sqrt(diag(vcov_matrix)))
64 |     draws <- MASS::mvrnorm(n = num_sim, mu = rep(0, nrow(vcov_matrix)), Sigma = vcov_matrix)
65 |     t <- draws / (v_std_errors %x% matrix(rep(1, num_sim)))
66 | 
67 |     t <- apply(abs(t), 1, FUN = max)
68 |     t <- sort(t)
69 | 
70 |     conf_level_num_sim <- conf_level * num_sim
71 | 
72 |     if (round(conf_level_num_sim) == conf_level_num_sim) {
73 |         critical_value = (t[conf_level_num_sim] + t[conf_level_num_sim + 1]) / 2
74 |     } else {
75 |         critical_value = t[floor(conf_level_num_sim) + 1]
76 |     }
77 | 
78 |     df_estimates_tidy <- estimatr::tidy(estimates)
79 | 
80 |     df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error)
81 |     df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error)
82 | 
83 | 
84 |     return(df_estimates_tidy)
85 | 
86 | }
87 | 


--------------------------------------------------------------------------------
/R/ComputeShifts.R:
--------------------------------------------------------------------------------
 1 | #' Adds leads or lags of a variable, robustly to gaps in time variable, as new columns in a panel dataset
 2 | #'
 3 | #' @param df Data frame that will be modified.
 4 | #' @param idvar Character indicating column of units.
 5 | #' @param timevar Character indicating column of time periods.
 6 | #' @param shiftvar Character indicating column of variable that will be shifted.
 7 | #' @param shiftvalues Numeric vector specifying the leads/lags to be computed. For example, c(-1, 0, 1) will compute the lead, current, and lag values.
 8 | #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE.
 9 | #'
10 | #' @return The passed dataset augmented with columns that reflect the desired shift values
11 | 
12 | #' @seealso [data.table::shift()]
13 | #'
14 | #' @examples
15 | #' ComputeShifts(
16 | #'     example_data,
17 | #'     idvar = "id",
18 | #'     timevar = "t",
19 | #'     shiftvar = "z",
20 | #'     shiftvalues = -2:2
21 | #' )
22 | #'
23 | #'
24 | #' @importFrom data.table setorderv shift := CJ .SD
25 | #' @keywords internal
26 | #' @noRd
27 | 
28 | ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues,
29 |                           timevar_holes = FALSE) {
30 |     if (! data.table::is.data.table(df)) {
31 |         stop("df should be a data.table.")
32 |     }
33 |     for (var in c(idvar, timevar, shiftvar)) {
34 |         if ((! is.character(var))) {
35 |             stop(paste0(var, " should be a character."))
36 |         }
37 |         if (! var %in% colnames(df)) {
38 |             stop(paste0(var, " should be the name of a variable in the dataset."))
39 |         }
40 |     }
41 |     if (! is.numeric(shiftvalues)) {
42 |         stop("shiftvalues should be numeric.")
43 |     }
44 |     if (0 %in% shiftvalues & length(shiftvalues) == 1) {
45 |         stop("shiftvalues must be different than 0 to compute leads/lags.")
46 |     }
47 |     if (! is.logical(timevar_holes)) {
48 |         stop("timevar_holes should be logical.")
49 |     }
50 | 
51 |     data.table::setorderv(df, cols = c(idvar, timevar))
52 | 
53 |     lags  <-    shiftvalues[shiftvalues >  0]
54 |     leads <- -1*shiftvalues[shiftvalues <= 0]
55 | 
56 |     compute_shifts <- function(dat, idvar, shiftvar, shiftvalues, lags, leads) {
57 | 
58 |         if (any(shiftvalues > 0)) {
59 |             dat[, paste0(shiftvar, "_lag", lags) :=
60 |                      data.table::shift(get(shiftvar), lags, type = "lag"),
61 |                  by = c(idvar)]
62 |         }
63 |         if (any(shiftvalues <= 0)) {
64 |             dat[, paste0(shiftvar, "_lead", leads) :=
65 |                      data.table::shift(get(shiftvar), leads, type = "lead"),
66 |                  by = c(idvar)]
67 |         }
68 |         return(dat)
69 |     }
70 | 
71 |     if (!timevar_holes) {
72 |         df = compute_shifts(df, idvar, shiftvar, shiftvalues, lags, leads)
73 |     } else {
74 |         ## Create dataset with all combinations to compute shifts
75 |         all_combinations <- CJ(unique(df[[idvar]]),
76 |                                min(df[[timevar]]):max(df[[timevar]]))
77 |         setnames(all_combinations, new = c(idvar, timevar))
78 | 
79 |         vars_to_keep <- c(idvar, timevar, shiftvar)
80 | 
81 |         df_all <- merge(df[, .SD, .SDcols = vars_to_keep], all_combinations,
82 |                         by = c(idvar, timevar), all = TRUE)
83 | 
84 |         df_all = compute_shifts(df_all, idvar, shiftvar, shiftvalues, lags, leads)
85 | 
86 |         ## Bring shifts back to the original dataset
87 |         df <- merge(df, df_all,
88 |                     by = c(idvar, timevar, shiftvar), all.x = TRUE)
89 |     }
90 |     return(df)
91 | }
92 | 


--------------------------------------------------------------------------------
/R/AddSmPath.R:
--------------------------------------------------------------------------------
 1 | #' Calculates smoothest path
 2 | #'
 3 | #' @description Function to add smoothest path to dataframe with coefficients
 4 | #'
 5 | #' @param df Dataset with coefficients prepared for plotting (must include coefficients normalized in estimation).
 6 | #' @param coefficients Event-study coefficients (must include coefficients normalized in estimation).
 7 | #' @param inv_covar Inverse of covariance matrix of coefficients (must include row and column of zeros for normalized coefficients).
 8 | #' @param conf_level Confidence level to define critical value of Wald region. Should be a real number between 0 and 1, inclusive. Defaults to 0.95.
 9 | #' @param maxorder Sets a maximum polynomial order that will be used when calculating lowest possible polynomial order. Should be a whole number. Defaults to 10.
10 | #' @param maxiter_solver Sets the maximum number of iterations when searching for the smoothest path with minimum squared term in highest order coefficient. Should be a positive whole number. Defaults to 2e6.
11 | #'
12 | #' @return df with smoothest path added as a new column
13 | #' @importFrom pracma inv pinv
14 | #' @importFrom stats qchisq
15 | #' @keywords internal
16 | #' @noRd
17 | 
18 | AddSmPath <- function(df, coefficients, inv_covar,
19 |                       conf_level = 0.95, maxorder = 10, maxiter_solver = 2e6){
20 | 
21 |     if (!is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {
22 |         stop("Argument 'conf_level' should be a real number between 0 and 1, inclusive.")
23 |     }
24 |     if (!(maxorder%%1 == 0) | maxorder < 0 | maxorder > 10) {
25 |         stop("Argument 'maxorder' should be an integer between 0 and 10.")
26 |     }
27 |     if (!is.data.frame(df)) {
28 |         stop("Argument 'df' should be a dataframe.")
29 |     }
30 |     if (!is.matrix(inv_covar)) {
31 |         stop("Argument 'inv_covar' should be a matrix.")
32 |     }
33 |     if (!(maxiter_solver%%1 == 0) | maxiter_solver < 0) {
34 |         stop("Argument 'maxiter_solver' should be a positive integer.")
35 |     }
36 |     unselect_message <- "Please change the 'Smpath' argument in 'EventStudyPlot' to FALSE."
37 | 
38 |     coeff_length <- length(coefficients)
39 |     norm_idxs    <- which(coefficients == 0)
40 |     Wcritic      <- stats::qchisq(conf_level, coeff_length - length(norm_idxs))
41 |     pN           <- length(norm_idxs)
42 | 
43 |     # First step: Find lowest possible polynomial order
44 |     res_order <- FindOrder(coefficients, inv_covar, Wcritic, maxorder)
45 |     order     <- res_order$order
46 |     res_order <- res_order$results
47 | 
48 |     cat(paste0("Smoothest path note: ",
49 |                "The lowest order such that a polynomial is in confidence region is ",
50 |                 order+1, ".\n"))
51 | 
52 |     # Second step: Find minimum coefficient on highest-order term
53 |     if (order == 0) {
54 | 
55 |         Fmat  <- GetFmat(coeff_length, 0)
56 |         vstar <- matrix(0)
57 |     } else if (order == maxorder) {
58 | 
59 |         stop(paste0("Search for smoothest path reached the maximum order of ", maxorder,". ", unselect_message))
60 |     } else {
61 | 
62 |         Fmat <- GetFmat(coeff_length, order)
63 | 
64 |         if (pN < order) {
65 |             vstar <- FindCoeffs(res_order, coefficients, inv_covar, Wcritic, pN, order, norm_idxs, Fmat, maxiter_solver)
66 | 
67 |         } else if (pN == order) {
68 |             vstar <- FindCoeffsEq(res_order, coefficients, inv_covar, Wcritic, pN, order, norm_idxs, Fmat, maxiter_solver)
69 | 
70 |         } else {
71 |             stop(paste0("The smoothest path cannot be found because the number of normalized coefficients is larger than the minimum order. ",
72 |                         unselect_message))
73 |         }
74 |     }
75 | 
76 |     sm_path = Fmat %*% vstar
77 |     Woptim  = (t(sm_path - coefficients)%*%inv_covar)%*%(sm_path - coefficients)
78 | 
79 |     df["smoothest_path"] = sm_path
80 |     return(df)
81 | }
82 | 


--------------------------------------------------------------------------------
/R/PrepareModelFormula.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Prepares a formula object for use in [EventStudyOLS()] or [EventStudyFHS()]
 3 | 
 4 | #' @param estimator Accepts one of "OLS" or "FHS". If "FHS" is specified, implements IV estimator in Freyaldenhoven et al. 2019.
 5 | #' @param outcomevar Character indicating column of outcome variable.
 6 | #' @param str_policy_vars Character vector indicating event-study variables.
 7 | #' @param controls Character indicating optional vector of controls.
 8 | #' @param proxy Character indicating column of variable that is thought to be affected by the confound but not by the policy.
 9 | #' Should be specified if and only if estimator is specified as "FHS".
10 | #' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS".
11 | #' If NULL, defaults to the strongest lead of the policy variable based on the first stage.
12 | #' @param static Indicates whether the model to be estimated is static. Defaults to FALSE.
13 | #' @return A formula object to be passed to EventStudy
14 | #'
15 | #' @importFrom stats reformulate as.formula
16 | #' @keywords internal
17 | #' @noRd
18 | #'
19 | #' @examples
20 | #' # For a static model:
21 | #' PrepareModelFormula(estimator = "OLS", outcomevar = "y_base",
22 | #'                     str_policy_vars = "z", static = T)
23 | #' 
24 | #' # For a dynamic model:
25 | #' PrepareModelFormula(estimator = "OLS", outcomevar = "y_base",
26 | #'                     str_policy_vars = c("z_lead3", "z_fd_lead3", "z_fd_lead2",
27 | #'                                         "z_fd_lag1", "z_fd_lag2", "z_lag3"),
28 | #'                     controls = "x_r")
29 | #'
30 | #' # If you would like to use IV regression:
31 | #' PrepareModelFormula(estimator = "FHS",
32 | #'                     outcomevar = "y_base",
33 | #'                     str_policy_vars = c("z_lead3", "z_fd_lead3", "z_fd_lead2",
34 | #'                                         "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3"),
35 | #'                     controls = "x_r",
36 | #'                     proxy = "eta_m",
37 | #'                     proxyIV = "z_fd_lead3")
38 | #'
39 | 
40 | PrepareModelFormula <- function(estimator, outcomevar,
41 |                                 str_policy_vars, static = FALSE,
42 |                                 controls = NULL, proxy = NULL, proxyIV = NULL) {
43 | 
44 |     if (! estimator %in% c("OLS", "FHS"))      {stop("estimator should be either 'OLS' or 'FHS'.")}
45 |     if (! is.character(outcomevar))            {stop("outcomevar should be a character.")}
46 |     if (! is.character(str_policy_vars))       {stop("str_policy_vars should be a character.")}
47 |     if (! (is.null(controls) | is.character(controls))) {stop("controls should be either NULL or a character.")}
48 |     if (is.null(proxyIV) & estimator == "FHS") {stop("proxyIV must be specified when estimator is FHS.")}
49 |     if (is.null(proxy) & estimator == "FHS")   {stop("proxy must be specified when estimator is FHS.")}
50 | 
51 |     if (! is.logical(static) )                   {stop("static should be a logical.")}
52 |     if (  static & length(str_policy_vars) > 1)  {stop("str_policy_vars must have one variable with static = TRUE.")}
53 |     if (! static & length(str_policy_vars) <= 1) {stop("str_policy_vars must have more than one variable with static = FALSE.")}
54 |     if (  static & !is.null(proxyIV))            {stop("static model is not compatible with FHS estimator.")}
55 | 
56 |     if (estimator == "OLS") {
57 |         reg_formula <- stats::reformulate(
58 |             termlabels = c(str_policy_vars, controls),
59 |             response = outcomevar,
60 |             intercept = FALSE
61 |         )
62 |     }
63 | 
64 |     if (estimator == "FHS") {
65 |         exogenous <- c(str_policy_vars, controls)
66 |         exogenous <- exogenous[exogenous != proxy]
67 |         exogenous <- exogenous[exogenous != proxyIV]
68 | 
69 |         reg_formula <- stats::as.formula(
70 |             paste(outcomevar, "~",
71 |             paste(c(exogenous, proxy), collapse="+"),
72 |             "|",
73 |             paste(c(exogenous, proxyIV), collapse="+"))
74 |         )
75 |     }
76 | 
77 |     return(reg_formula)
78 | }
79 | 


--------------------------------------------------------------------------------
/man/EventStudyPlot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/EventStudyPlot.R
  3 | \name{EventStudyPlot}
  4 | \alias{EventStudyPlot}
  5 | \title{Creates an Event-Study Plot Following the Suggestions in Freyaldenhoven et al. (2021)}
  6 | \usage{
  7 | EventStudyPlot(
  8 |   estimates,
  9 |   xtitle = "Event time",
 10 |   ytitle = "Coefficient",
 11 |   ybreaks = NULL,
 12 |   conf_level = 0.95,
 13 |   supt = 0.95,
 14 |   num_sim = 1000,
 15 |   add_mean = FALSE,
 16 |   pre_event_coeffs = TRUE,
 17 |   post_event_coeffs = TRUE,
 18 |   add_zero_line = TRUE,
 19 |   smpath = FALSE
 20 | )
 21 | }
 22 | \arguments{
 23 | \item{estimates}{The output from calling \code{\link[=EventStudy]{EventStudy()}}. Should be a list of length 2.}
 24 | 
 25 | \item{xtitle}{The title for the x-axis. Should be a string. Defaults to "Event time".}
 26 | 
 27 | \item{ytitle}{The title for the y-axis. Should be a string. Defaults to "Coefficient".}
 28 | 
 29 | \item{ybreaks}{A vector containing the desired breaks for the y-axis.
 30 | Defaults to NULL, which means the breaks are computed automatically.
 31 | If custom breaks are selected with the \code{add_mean} argument set to TRUE, then the breaks must include zero.}
 32 | 
 33 | \item{conf_level}{Confidence level used for confidence interval
 34 | expressed as a real number between 0 and 1, inclusive. Defaults to 0.95.}
 35 | 
 36 | \item{supt}{The confidence level used for obtaining the sup-t bands critical value.
 37 | Should be a real number between 0 and 1, inclusive. Defaults to .95. Sup-t bands are simulation-based,
 38 | so you must set a seed if you would like your sup-t band results to be reproducible (see examples).}
 39 | 
 40 | \item{num_sim}{The number of simulations used in generating the sup-t bands.
 41 | Should be a natural number. Defaults to 1000.}
 42 | 
 43 | \item{add_mean}{Adds the mean of the dependent variable in the period used for normalization.
 44 | Should be TRUE or FALSE. Defaults to FALSE.}
 45 | 
 46 | \item{pre_event_coeffs}{If TRUE, uses pre and overidpre from estimates to test for pre-trends.
 47 | Should be TRUE or FALSE. Defaults to TRUE.}
 48 | 
 49 | \item{post_event_coeffs}{If TRUE, uses post and overidpost from estimates to test for leveling-off.
 50 | Should be TRUE or FALSE. Defaults to TRUE.}
 51 | 
 52 | \item{add_zero_line}{Whether or not to plot a dashed horizontal line at y = 0.
 53 | Should be TRUE or FALSE. Defaults to TRUE, meaning the line is plotted.}
 54 | 
 55 | \item{smpath}{Plot smoothest path of confounder that rationalizes event study coefficients.
 56 | Should be TRUE or FALSE. Defaults to FALSE.}
 57 | }
 58 | \value{
 59 | The Event-Study plot as a ggplot2 object.
 60 | }
 61 | \description{
 62 | \code{EventStudyPlot} takes the output from \code{\link[=EventStudy]{EventStudy()}} and combines it with additional optional arguments to facilitate constructing an Event-Study Plot.
 63 | }
 64 | \examples{
 65 | 
 66 | #
 67 | 
 68 | # Minimal examples
 69 | ### OLS
 70 | 
 71 | estimates_ols <- EventStudy(
 72 |    estimator = "OLS",
 73 |    data = example_data,
 74 |    outcomevar = "y_smooth_m",
 75 |    policyvar = "z",
 76 |    idvar = "id",
 77 |    timevar = "t",
 78 |    controls = "x_r",
 79 |    FE = TRUE, TFE = TRUE,
 80 |    post = 3, overidpost = 5,
 81 |    pre = 2,  overidpre = 4,
 82 |    normalize = - 3
 83 | )
 84 | 
 85 | plt_ols <- EventStudyPlot(estimates = estimates_ols)
 86 | plt_ols
 87 | 
 88 | ### IV
 89 | 
 90 | estimates_fhs <- EventStudy(
 91 |    estimator = "FHS",
 92 |    data = example_data,
 93 |    outcomevar = "y_smooth_m",
 94 |    policyvar = "z",
 95 |    idvar = "id",
 96 |    timevar = "t",
 97 |    proxy = "x_r",
 98 |    post = 2, overidpost = 1,
 99 |    pre = 0,  overidpre = 3,
100 |    normalize = -1
101 | )
102 | 
103 | plt_fhs <- EventStudyPlot(estimates = estimates_fhs)
104 | plt_fhs
105 | 
106 | # Optional arguments
107 | 
108 | ### Change x- and y-axis titles and set ybreaks
109 | EventStudyPlot(estimates = estimates_ols,
110 |                xtitle = "Relative time", ytitle = "",
111 |                ybreaks = seq(-2, 1, 0.5))
112 | 
113 | ### Add smoothest path
114 | EventStudyPlot(estimates = estimates_ols, smpath = TRUE)
115 | 
116 | ### Add y-mean to y-axis and line y = 0
117 | EventStudyPlot(estimates = estimates_ols, add_mean = TRUE,
118 |                add_zero_line = TRUE)
119 | 
120 | ### Do not plot supt bands
121 | EventStudyPlot(estimates = estimates_ols, supt = NULL)
122 | 
123 | ### Setting seed prior to plotting sup-t bands
124 | set.seed(1234)
125 | EventStudyPlot(estimates = estimates_ols)
126 | 
127 | # Modify plots using ggplot2 functions
128 | library(ggplot2)
129 | 
130 | ### Change color of dots, horizontal line, and theme
131 | plt_ols +
132 |   geom_point(color = "red") +
133 |   geom_hline(color = "gray", yintercept = 0) +
134 |   theme_light() +
135 |   theme(panel.grid.minor.x = element_blank())
136 | 
137 | }
138 | 


--------------------------------------------------------------------------------
/R/EventStudyOLS.R:
--------------------------------------------------------------------------------
  1 | #' Runs Ordinary Least Squares (OLS) with optional fixed effects and clustering
  2 | #'
  3 | #' @param prepared_model_formula A formula object created in [PrepareModelFormula()] that is passed to [EventStudy()].
  4 | #' @param prepared_data Data frame containing all of the parameters required for [EventStudy()] plus leads and
  5 | #' lags of the first differenced policy variable and leads and lags of the policy variable.
  6 | #' @param idvar Character indicating column of units.
  7 | #' @param timevar Character indicating column of time periods.
  8 | #' @param FE Specifies if unit fixed-effects should be included. Defaults to TRUE.
  9 | #' @param TFE Specifies if time fixed-effects should be included. Defaults to TRUE.
 10 | #' @param cluster Specifies whether to use clustered errors by units. If FALSE, will use unclustered
 11 | #' heteroskedasticity-robust standard errors. Defaults to TRUE. Must be TRUE if FE is TRUE.
 12 | #'
 13 | #' @return A data.frame that contains the estimates for the event study coefficients.
 14 | #' @import estimatr
 15 | #' @keywords internal
 16 | #' @noRd
 17 | #'
 18 | #' @examples
 19 | #' model_formula <-  PrepareModelFormula(
 20 | #'    estimator = "OLS",
 21 | #'    outcomevar = "y_base",
 22 | #'    str_policy_fd = c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2"),
 23 | #'    str_policy_lead = "z_lead3",
 24 | #'    str_policy_lag = "z_lag3",
 25 | #'    controls = "x_r"
 26 | #' )
 27 | #'
 28 | #' EventStudyOLS(
 29 | #'    prepared_model_formula = model_formula,
 30 | #'    prepared_data = df_EventStudyOLS_example,
 31 | #'    idvar = "id",
 32 | #'    timevar = "t",
 33 | #'    FE = TRUE,
 34 | #'    TFE = TRUE,
 35 | #'    cluster = TRUE
 36 | #')
 37 | 
 38 | EventStudyOLS <- function(prepared_model_formula, prepared_data,
 39 |                           idvar, timevar, FE, TFE, cluster) {
 40 | 
 41 |     if (! inherits(prepared_model_formula, "formula")) {stop("prepared_model_formula should be a formula")}
 42 |     if (! is.data.frame(prepared_data)) {stop("data should be a data frame.")}
 43 |     if (! is.character(idvar)) {stop("idvar should be a character.")}
 44 |     if (! is.character(timevar)) {stop("timevar should be a character.")}
 45 |     if (! is.logical(FE)) {stop("FE should be either TRUE or FALSE.")}
 46 |     if (! is.logical(TFE)) {stop("TFE should be either TRUE or FALSE.")}
 47 |     if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")}
 48 | 
 49 |     if (FE & TFE & cluster) {
 50 | 
 51 |         ols_output <- estimatr::lm_robust(
 52 |             formula = prepared_model_formula,
 53 |             data = prepared_data,
 54 |             fixed_effects = ~ get(idvar) + get(timevar),
 55 |             clusters = get(idvar),
 56 |             se_type = "stata"
 57 |         )
 58 | 
 59 |     } else if ((!FE) & TFE & cluster) {
 60 | 
 61 |         ols_output <- estimatr::lm_robust(
 62 |             formula = prepared_model_formula,
 63 |             data = prepared_data,
 64 |             fixed_effects = ~ get(timevar),
 65 |             clusters = get(idvar),
 66 |             se_type = "stata"
 67 |         )
 68 | 
 69 |     } else if (FE & (!TFE) & cluster) {
 70 | 
 71 |         ols_output <- estimatr::lm_robust(
 72 |             formula = prepared_model_formula,
 73 |             data = prepared_data,
 74 |             fixed_effects = ~ get(idvar),
 75 |             clusters = get(idvar),
 76 |             se_type = "stata"
 77 |         )
 78 | 
 79 |     } else if ((!FE) & (!TFE) & cluster) {
 80 | 
 81 |         ols_output <- estimatr::lm_robust(
 82 |             formula = prepared_model_formula,
 83 |             data = prepared_data,
 84 |             clusters = get(idvar),
 85 |             se_type = "stata"
 86 |         )
 87 | 
 88 |     } else if (FE & TFE & (!cluster)) {
 89 | 
 90 |         ols_output <- estimatr::lm_robust(
 91 |             formula = prepared_model_formula,
 92 |             data = prepared_data,
 93 |             fixed_effects = ~ get(idvar) + get(timevar),
 94 |             se_type = "stata"
 95 |         )
 96 | 
 97 |     } else if ((!FE) & TFE & (!cluster)) {
 98 | 
 99 |         ols_output <- estimatr::lm_robust(
100 |             formula = prepared_model_formula,
101 |             data = prepared_data,
102 |             fixed_effects = ~ get(timevar),
103 |             se_type = "stata"
104 |         )
105 | 
106 |     } else if (FE & (!TFE) & (!cluster)) {
107 | 
108 |         ols_output <- estimatr::lm_robust(
109 |             formula = prepared_model_formula,
110 |             data = prepared_data,
111 |             fixed_effects = ~ get(idvar),
112 |             se_type = "stata"
113 |         )
114 | 
115 |     } else if ((!FE) & (!TFE) & (!cluster)) {
116 | 
117 |         ols_output <- estimatr::lm_robust(
118 |             formula = prepared_model_formula,
119 |             data = prepared_data,
120 |             se_type = "stata"
121 |         )
122 |     }
123 | 
124 |     return(ols_output)
125 | }
126 | 


--------------------------------------------------------------------------------
/tests/testthat/test-TestLinear.R:
--------------------------------------------------------------------------------
  1 | test_that("correctly recognizes wrong variable type for estimate argument", {
  2 |     estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
  3 |                           policyvar = "z", idvar = "id", timevar = "t",
  4 |                           controls = "x_r", FE = TRUE, TFE = TRUE,
  5 |                           post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
  6 | 
  7 |     df_estimate <- estimatr::tidy(estimate[[1]])
  8 | 
  9 |     test = "z_fd_lag1 = z_fd"
 10 | 
 11 |     expect_error(TestLinear(df_estimate, test, pretrends = T, leveling_off =  T))
 12 |     expect_error(TestLinear(estimate[[1]], test, pretrends = T, leveling_off =  T))
 13 |     expect_error(TestLinear(estimate[[2]], test, pretrends = T, leveling_off =  T))
 14 | })
 15 | 
 16 | test_that("correctly recognizes wrong variable type for pretrends", {
 17 |     estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 18 |                            policyvar = "z", idvar = "id", timevar = "t",
 19 |                            controls = "x_r", FE = TRUE, TFE = TRUE,
 20 |                            post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 21 | 
 22 |     expect_error(TestLinear(df_estimate, pretrends = "pretrends"))
 23 |     expect_error(TestLinear(df_estimate, pretrends = 1))
 24 | })
 25 | 
 26 | test_that("correctly recognizes wrong variable type for leveling_off", {
 27 |     estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 28 |                            policyvar = "z", idvar = "id", timevar = "t",
 29 |                            controls = "x_r", FE = TRUE, TFE = TRUE,
 30 |                            post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 31 | 
 32 |     expect_error(TestLinear(df_estimate, leveling_off = "leveling_off"))
 33 |     expect_error(TestLinear(df_estimate, leveling_off = 1))
 34 | })
 35 | 
 36 | test_that("produces only functions that are specified", {
 37 |     estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 38 |                            policyvar = "z", idvar = "id", timevar = "t",
 39 |                            controls = "x_r", FE = TRUE, TFE = TRUE,
 40 |                            post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 41 | 
 42 |     test = "z_fd_lag1 = z_fd"
 43 | 
 44 |     df <- TestLinear(estimate, pretrends = F, leveling_off = F)
 45 |     expect_equal(length(df$Test), 0)
 46 | 
 47 |     df <- TestLinear(estimate, test, pretrends = F, leveling_off = F)
 48 |     expect_equal(length(df$Test), 1)
 49 |     expect_equal(df$Test, "User Test")
 50 | 
 51 |     df <- TestLinear(estimate, pretrends = T, leveling_off = F)
 52 |     expect_equal(length(df$Test), 1)
 53 |     expect_equal(df$Test, "Pre-Trends")
 54 | 
 55 |     df <- TestLinear(estimate, pretrends = F, leveling_off = T)
 56 |     expect_equal(length(df$Test), 1)
 57 |     expect_equal(df$Test, "Leveling-Off")
 58 | 
 59 |     df <- TestLinear(estimate, test, pretrends = T, leveling_off = F)
 60 |     expect_equal(length(df$Test), 2)
 61 |     tests <- c("User Test", "Pre-Trends")
 62 |     expect_equal(df$Test, tests)
 63 | 
 64 |     df <- TestLinear(estimate, test, pretrends = F, leveling_off = T)
 65 |     expect_equal(length(df$Test), 2)
 66 |     tests <- c("User Test", "Leveling-Off")
 67 |     expect_equal(df$Test, tests)
 68 | 
 69 |     df <- TestLinear(estimate, pretrends = T, leveling_off = T)
 70 |     expect_equal(length(df$Test), 2)
 71 |     tests <- c("Pre-Trends", "Leveling-Off")
 72 |     expect_equal(df$Test, tests)
 73 | 
 74 |     df <- TestLinear(estimate, test, pretrends = T, leveling_off = T)
 75 |     expect_equal(length(df$Test), 3)
 76 |     tests <- c("User Test", "Pre-Trends", "Leveling-Off")
 77 |     expect_equal(df$Test, tests)
 78 | })
 79 | 
 80 | 
 81 | test_that("checks equality with STATA", {
 82 |     estimate <- suppressWarnings(
 83 |         EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 84 |                            policyvar = "z", idvar = "id", timevar = "t",
 85 |                            FE = TRUE, TFE = TRUE,
 86 |                            post = 2, pre = 2, overidpre = 2,
 87 |                            overidpost = 2, normalize = - 1,
 88 |                            cluster = TRUE, anticipation_effects_normalization = TRUE)
 89 |     )
 90 | 
 91 |     codes <- read.csv('input/pvalues.txt', header = F)
 92 | 
 93 |     pretrends_stata_p <- codes[1,1]
 94 |     leveloff_stata_p  <- codes[2,1]
 95 | 
 96 |     df_test_linear <- TestLinear(estimate, pretrends = T, leveling_off = T)
 97 |     pretrends_p_value   <- df_test_linear[df_test_linear["Test"] == "Pre-Trends",   "p.value"]
 98 |     levelingoff_p_value <- df_test_linear[df_test_linear["Test"] == "Leveling-Off", "p.value"]
 99 | 
100 |     expect_true(abs(pretrends_stata_p - pretrends_p_value) < 0.0001)
101 |     expect_true(abs(leveloff_stata_p  - levelingoff_p_value) < 0.0001)
102 | })
103 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ComputeShifts.R:
--------------------------------------------------------------------------------
  1 | test_that("correctly recognizes wrong variable types", {
  2 |     df_test <- read.csv("./input/df_test.csv")
  3 | 
  4 |     # Character variables
  5 |     expect_error(ComputeShifts(df_test, 1,        "periods", "values",  3, F, T))
  6 |     expect_error(ComputeShifts(df_test, "country", 1,        "values",  3, F, T))
  7 |     expect_error(ComputeShifts(df_test, "country", "periods", 1,        3, F, T))
  8 |     expect_error(ComputeShifts(df_test, "country", "periods", "policy", 3, F, T))
  9 | 
 10 |     # Logical variables
 11 |     expect_error(ComputeShifts(df_test, "country", "periods", "values", 3, "T", F))
 12 |     expect_error(ComputeShifts(df_test, "country", "periods", "values", 3, T, "T"))
 13 | 
 14 |     # Numeric variables
 15 |     expect_error(ComputeShifts(df_test, "country", "periods", "values", "2", F, T))
 16 | })
 17 | 
 18 | test_that("correctly recognizes variables missing from dataset", {
 19 |     df_test <- read.csv("./input/df_test.csv")
 20 | 
 21 |     # Character variables
 22 |     expect_error(ComputeShifts(df_test, "country", "periods", "policy", 3, F, T))
 23 |     expect_error(ComputeShifts(df_test, "state",  "periods", "values", 3, F, T))
 24 | })
 25 | 
 26 | test_that("correctly adds the desired column(s) of shifts", {
 27 |     df_test <- read.csv("./input/df_test.csv")
 28 |     cols_original <- colnames(df_test)
 29 | 
 30 |     # Add 1 lead/lag
 31 |     dt_test <- data.table::as.data.table(df_test)
 32 |     cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
 33 |                                     shiftvar = "values",
 34 |                                     shiftvalues = 2))
 35 |     expect_equal(cols_new, c(cols_original, "values_lag2"))
 36 | 
 37 |     dt_test <- data.table::as.data.table(df_test)
 38 |     cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
 39 |                                     shiftvar = "values",
 40 |                                     shiftvalues = -2))
 41 |     expect_equal(cols_new, c(cols_original, "values_lead2"))
 42 | 
 43 |     # Add multiple leads/lags
 44 |     dt_test <- data.table::as.data.table(df_test)
 45 |     cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
 46 |                                     shiftvar = "values",
 47 |                                     shiftvalues = 1:2))
 48 |     expect_equal(cols_new, c(cols_original, "values_lag1", "values_lag2"))
 49 | 
 50 |     dt_test <- data.table::as.data.table(df_test)
 51 |     cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods",
 52 |                                     shiftvar = "values",
 53 |                                     shiftvalues = -2:-1))
 54 |     expect_equal(cols_new, c(cols_original, "values_lead2", "values_lead1"))
 55 | })
 56 | 
 57 | test_that("the columns added have correct suffixes", {
 58 |     df_test <- data.table::fread("./input/df_test.csv")
 59 | 
 60 |     df_lags <- ComputeShifts(df_test, idvar = "country", timevar = "periods",
 61 |                              shiftvar = "values",
 62 |                              shiftvalues = 1:2)
 63 | 
 64 |     v_newvars <- setdiff(colnames(df_lags), colnames(df_test))
 65 | 
 66 |     expect_true(all(grepl("_lag", v_newvars)))
 67 | 
 68 |     df_leads <- ComputeShifts(df_test, idvar = "country", timevar = "periods",
 69 |                               shiftvar = "values",
 70 |                               shiftvalues = -2:-1)
 71 | 
 72 |     v_newvars <- setdiff(colnames(df_leads), colnames(df_test))
 73 | 
 74 |     expect_true(all(grepl("_lead", v_newvars)))
 75 | })
 76 | 
 77 | test_that("correctly shifts variable when there are no holes in timevar", {
 78 |     df <- data.table::data.table(
 79 |         id      = c(rep("A", 4), rep("B", 2), rep("C", 3)),
 80 |         time    = c(1, 2, 3, 4, 1, 2, 1, 2, 3),
 81 |         z       = c(10, 12, 13, 14, 8, 9, 10, 11, 12),
 82 |         z_lag1  = c(NA, 10, 12, 13, NA, 8, NA, 10, 11),
 83 |         z_lead1 = c(12, 13, 14, NA, 9, NA, 11, 12, NA)
 84 |     )
 85 | 
 86 |     df_shifts <- ComputeShifts(df[, c("id", "time", "z")],
 87 |                                idvar = "id", timevar = "time",
 88 |                                shiftvar = "z",
 89 |                                shiftvalues = c(-1, 1),
 90 |                                timevar_holes = FALSE)
 91 | 
 92 |     expect_equal(df$z_lead1, df_shifts$z_lead1)
 93 |     expect_equal(df$z_lag1,  df_shifts$z_lag1)
 94 | 
 95 |     expect_true(all(c("id", "time", "z") %in% colnames(df_shifts)))
 96 | })
 97 | 
 98 | test_that("correctly shifts variable when there are holes in timevar", {
 99 |     df <- data.table::data.table(
100 |         id      = c(rep("A", 4), rep("B", 2), rep("C", 3)),
101 |         time    = c(1, 2, 4, 5, 2, 3, 2, 3, 4),
102 |         z       = c(10, 12, 13, 14, 8, 9, 10, 11, 12),
103 |         z_lag1  = c(NA, 10, NA, 13, NA, 8, NA, 10, 11),
104 |         z_lead1 = c(12, NA, 14, NA, 9, NA, 11, 12, NA)
105 |     )
106 | 
107 |     df_shifts <- ComputeShifts(df[, c("id", "time", "z")],
108 |                                idvar = "id", timevar = "time",
109 |                                shiftvar = "z",
110 |                                shiftvalues = c(-1, 1),
111 |                                timevar_holes = TRUE)
112 | 
113 |     expect_equal(df$z_lead1, df_shifts$z_lead1)
114 |     expect_equal(df$z_lag1,  df_shifts$z_lag1)
115 | 
116 |     expect_true(all(c("id", "time", "z") %in% colnames(df_shifts)))
117 | })
118 | 


--------------------------------------------------------------------------------
/R/TestLinear.R:
--------------------------------------------------------------------------------
  1 | #' Perform Tests of Linear Hypotheses
  2 | #'
  3 | #' @description `TestLinear` tests linear restrictions on coefficients.
  4 | #'
  5 | #' @param estimates A list of length 2 containing estimation results and model information.
  6 | #' Should be an output of [EventStudy()].
  7 | #' @param test The hypothesis to be estimated. Accepts inputs that can be passed to
  8 | #' hypothesis.matrix argument in `car::linearHypothesis()`.
  9 | #' @param test_name Name for test input by user. Defaults to "User Test."
 10 | #' @param pretrends If TRUE, uses pre and overidpre from estimates to test for pre-trends.
 11 | #' Defaults to TRUE.
 12 | #' @param leveling_off If TRUE, uses post and overidpost from estimates to test for leveling-off.
 13 | #' Defaults to TRUE.
 14 | #'
 15 | #' @return A data frame containing the F-statistic and p-value for the specified test(s).
 16 | #' @importFrom car linearHypothesis
 17 | #' @export
 18 | #'
 19 | #' @examples
 20 | #' estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 21 | #'                         policyvar = "z", idvar = "id", timevar = "t",
 22 | #'                         controls = "x_r", FE = TRUE, TFE = TRUE,
 23 | #'                         post = 3, pre = 2, overidpre = 4, overidpost = 5,
 24 | #'                         normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 25 | #'
 26 | #' TestLinear(
 27 | #'     estimates,
 28 | #'     test = "z_fd_lag1 = z_fd",
 29 | #'     test_name = "Hypothesis Test",
 30 | #'     pretrends = TRUE,
 31 | #'     leveling_off = TRUE
 32 | #'    )
 33 | #'
 34 | 
 35 | 
 36 | TestLinear <- function(estimates, test = NA, test_name = "User Test", pretrends = TRUE, leveling_off = TRUE){
 37 |     if (! is.list(estimates) | length(estimates) != 2){
 38 |         stop("estimates should be a list of length two, an output of EventStudy()")}
 39 |     if ((! class(estimates$output) %in% c("lm_robust", "iv_robust")) | ! is.list(estimates$output)) {
 40 |         stop("The first element of estimates should be a list of class 'lm_robust' with coefficient estimates and standard errors")
 41 |     }
 42 |     if (! is.list(estimates$arguments) | ! is.list(estimates$arguments)) {
 43 |         stop("The second element of estimates should be a list with argument definitions, an output of EventStudy().")
 44 |     }
 45 |     if (! is.character(test_name)) {stop("test_name should be of class character. Defaults to 'User Test'.")}
 46 |     if (! is.logical(pretrends)) {stop("pretrends should be a logical. Default value is TRUE")}
 47 |     if (! is.logical(leveling_off)) {stop("leveling_off should be a logical. Default value is TRUE")}
 48 | 
 49 |     if(estimates$arguments$cluster == TRUE){
 50 | 
 51 |         estimates$output$df.residual <- estimates$output$nclusters - 1
 52 | 
 53 |     }
 54 | 
 55 |     coefficients <- estimates$arguments$eventstudy_coefficients
 56 | 
 57 |     if (!is.null(estimates$arguments$proxyIV)){
 58 |         coefficients <- coefficients[coefficients != estimates$arguments$proxyIV]
 59 |     }
 60 | 
 61 |     test_results <- data.frame(row.names = c("Test", "F.statistic", "p.value"))
 62 | 
 63 |     if (!is.na(test)){
 64 |         user_results <- car::linearHypothesis(estimates$output, test, test = "F")
 65 | 
 66 |         temp <- data.frame("Test"    = test_name,
 67 |                            "F"       = user_results[2, ]$F,
 68 |                            "p.value" = user_results[2, ]$`Pr(>F)`)
 69 |         test_results <- rbind(test_results, temp)
 70 |     }
 71 | 
 72 |     if (pretrends == TRUE){
 73 | 
 74 |         G   <- estimates$arguments$pre
 75 |         L_G <- estimates$arguments$overidpre
 76 |         k   <- as.character(seq.int(G+1, (G+L_G)))
 77 | 
 78 |         suffix <- paste0("_lead",k)
 79 | 
 80 |         suffix_len  <- str_length(suffix)[1]
 81 | 
 82 |         delta_k <- coefficients[str_sub(coefficients, start= -suffix_len) %in% suffix]
 83 | 
 84 |         pretrends_hyp <- paste0(delta_k, "=0")
 85 | 
 86 |         pretrends_results <- car::linearHypothesis(estimates$output, pretrends_hyp, test = "F")
 87 | 
 88 |         temp <- data.frame("Test"        = "Pre-Trends",
 89 |                            "F"           = pretrends_results[2, ]$F,
 90 |                            "p.value"     = pretrends_results[2, ]$`Pr(>F)`)
 91 | 
 92 |         test_results <- rbind(test_results, temp)
 93 |     }
 94 | 
 95 |     if (leveling_off == TRUE){
 96 | 
 97 |         M   <- estimates$arguments$post
 98 |         L_M <- estimates$arguments$overidpost
 99 |         k   <- as.character(seq.int(M+1, M+L_M))
100 | 
101 |         suffix_M  <- paste0("_lag",as.character(M))
102 |         suffix_Mk <- paste0("_lag",k)
103 | 
104 |         suffix_M_len  <- str_length(suffix_M)
105 |         suffix_Mk_len <- str_length(suffix_Mk)[1]
106 | 
107 |         delta_M   <- coefficients[str_sub(coefficients, start= -suffix_M_len) %in% suffix_M]
108 |         delta_Mk  <- coefficients[str_sub(coefficients, start= -suffix_Mk_len) %in% suffix_Mk]
109 | 
110 |         leveling_off_hyp <- paste0(delta_Mk, "=", delta_M)
111 | 
112 |         leveling_results <- car::linearHypothesis(estimates$output, leveling_off_hyp, test = "F")
113 | 
114 |         temp <- data.frame("Test"        = "Leveling-Off",
115 |                            "F"           = leveling_results[2, ]$F,
116 |                            "p.value"     = leveling_results[2, ]$`Pr(>F)`)
117 | 
118 |         test_results <- rbind(test_results, temp)
119 |     }
120 | 
121 |     return(test_results)
122 | }
123 | 
124 | 


--------------------------------------------------------------------------------
/vignettes/documentation.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "documentation"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{documentation}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |   collapse = TRUE,
 13 |   comment = "#>"
 14 | )
 15 | ```
 16 | 
 17 | ```{r setup}
 18 | library(eventstudyr)
 19 | ```
 20 | 
 21 | ## Event Studies
 22 | 
 23 | Linear panel models, and the event-study plots that often accompany them, are popular tools for learning about policy effects. 
 24 |  
 25 | ## eventstudyr
 26 | 
 27 | `eventstudyr` facilitates estimating linear panel event-study models and constructing event-study plots following the suggestions in [Freyaldenhoven _et al._ (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf). In particular, it provides functionality for:
 28 | 
 29 | * depicting cumulative estimated effects of a policy relative to a user-controlled normalized period
 30 | * adding confidence intervals and sup-t confidence bands
 31 | * testing for the presence of "pre-trends" and stable dynamics post-event
 32 | * plotting the least "wiggly" confound consistent with the estimates
 33 |  
 34 | ## Data
 35 | 
 36 | We will use the `example_data` dataset to demonstrate the basic functionality of `eventstudyr`. These sample data are from the [replication archive](https://data.nber.org/data-appendix/w29170/) for Freyaldenhoven et al. (2021). The documentation can be accessed using `?example_data`.
 37 |  
 38 | ```{r Data preview}
 39 | 
 40 | dim(example_data)
 41 | head(example_data)
 42 | ```
 43 | 
 44 | 
 45 | ## EventStudy()
 46 | 
 47 | `EventStudy()` estimates the regression model from Equation (2) in Freyaldenhoven et al. (2021) and returns a list object that stores the estimation results (as an lm() object) as well as the arguments given in the function call. It accepts variables specifying the outcome, policy, ID and time variables. One must also specify the number of periods in the past before which the past values of the policy are not supposed to affect the value of the outcome and the number of periods in the future after which the future values of the policy are not supposed to affect the value of the outcome today. The function optionally accepts variables specifying the controls, the time window, whether fixed effects should be included, and the period to be used for normalization.
 48 | 
 49 | Here is an example using the sample data:
 50 | 
 51 | ```{r Basic Eventstudy Example - Show Code, eval = FALSE}
 52 | results <- EventStudy(estimator = "OLS",
 53 |                       data = example_data,
 54 |                       outcomevar = "y_jump_m",
 55 |                       policyvar = "z",
 56 |                       idvar = "id",
 57 |                       timevar = "t",
 58 |                       post = 3,
 59 |                       pre = 0)
 60 | ```
 61 | ```{r Basic Eventstudy Example - Run Code, echo = FALSE}
 62 | results <- EventStudy(estimator = "OLS",
 63 |                       data = example_data,
 64 |                       outcomevar = "y_jump_m",
 65 |                       policyvar = "z",
 66 |                       idvar = "id",
 67 |                       timevar = "t",
 68 |                       post = 3,
 69 |                       pre = 0)
 70 | ```
 71 | ```{r Basic Eventstudy Example - Show Results 1, echo=TRUE, eval=TRUE}
 72 | summary(results$output)
 73 | ```
 74 | <details>
 75 |   <summary>Click for `results$arguments`</summary>
 76 |   ```{r}
 77 |   ## Estimator
 78 |   results$arguments$estimator
 79 |   
 80 |   ## Data
 81 |   results$arguments$data[1:5,]
 82 |   
 83 |   ## Variables
 84 |   results$arguments$outcomevar
 85 |   results$arguments$outcomevar
 86 |   results$arguments$policyvar
 87 |   results$arguments$idvar
 88 |   results$arguments$timevar
 89 |   results$arguments$controls
 90 |   
 91 |   ## Proxies
 92 |   results$arguments$proxy
 93 |   results$arguments$proxyIV
 94 |   
 95 |   ## Fixed effects
 96 |   results$arguments$FE
 97 |   results$arguments$TFE
 98 |   
 99 |   ## Periods
100 |   results$arguments$post
101 |   results$arguments$overidpost
102 |   results$arguments$pre
103 |   results$arguments$overidpre
104 |   
105 |   ## Normalization
106 |   results$arguments$normalize
107 |   results$arguments$normalization_column
108 |   
109 |   ## Cluster
110 |   results$arguments$cluster
111 |   
112 |   ## Eventstudy coefficients
113 |   results$arguments$eventstudy_coefficients
114 |   ```
115 | </details>
116 | 
117 | ## EventStudyPlot()
118 | `EventStudyPlot()` prepares an event-study plot based on the suggestions in Freyaldenhoven et al. (2021).
119 | 
120 | This function is designed to use the output of the `EventStudy()` and returns a ggplot object. Here is an example of using the function with some default settings:
121 | 
122 | ```{r EventStudyPlot example 1, fig.dim = c(7, 5)}
123 | eventstudy_estimates_ols <- EventStudy(estimator = "OLS",
124 |                                        data = example_data,
125 |                                        outcomevar = "y_jump_m",
126 |                                        policyvar = "z",
127 |                                        idvar = "id",
128 |                                        timevar = "t",
129 |                                        post = 3,
130 |                                        pre = 0)
131 |  
132 | EventStudyPlot(estimates = eventstudy_estimates_ols,
133 |                xtitle = "Event time",
134 |                ytitle = "Coefficient")
135 | ```
136 | 
137 | ## References
138 | 
139 | Freyaldenhoven, S., Hansen, C., Pérez, J.P. and Shapiro, J.M., 2021. Visualization, identification, and estimation in the linear panel event-study design (No. w29170). National Bureau of Economic Research.
140 | 


--------------------------------------------------------------------------------
/R/EventStudyFHS.R:
--------------------------------------------------------------------------------
  1 | #' Implements proxy Instrumental Variables (IV) estimator proposed in [Freyaldenhoven Hansen and Shapiro (FHS, 2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609)
  2 | #'
  3 | #' @param prepared_model_formula A formula object created in PrepareModelForumla that is passed to EventStudy.
  4 | #' @param prepared_data Data frame containing all of the parameters required for [EventStudy()] plus leads and
  5 | #' lags of the first differenced policy variable and leads and lags of the policy variable.
  6 | #' @param idvar Character indicating column of units.
  7 | #' @param timevar Character indicating column of time periods.
  8 | #' @param FE Specifies if unit fixed-effects should be included. Defaults to TRUE.
  9 | #' @param TFE Specifies if time fixed-effects should be included. Defaults to TRUE.
 10 | #' @param cluster Specifies whether to use clustered errors by units. If FALSE, will use unclustered
 11 | #' heteroskedasticity-robust standard errors. Defaults to TRUE. Must be TRUE if FE is TRUE.
 12 | #'
 13 | #' @return A data.frame that contains the estimates for the event study coefficients.
 14 | #' @import estimatr
 15 | #' @importFrom stats qnorm pnorm
 16 | #' @keywords internal
 17 | #' @noRd
 18 | #'
 19 | #' @examples
 20 | #' model_formula <-  PrepareModelFormula(
 21 | #'    estimator = "FHS",
 22 | #'    outcomevar = "y_base",
 23 | #'    str_policy_fd = c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2"),
 24 | #'    str_policy_lead = "z_lead3",
 25 | #'    str_policy_lag = "z_lag3",
 26 | #'    controls = "x_r",
 27 | #'    proxy = "eta_m",
 28 | #'    proxyIV = "z_fd_lead3"
 29 | #')
 30 | #'
 31 | #' EventStudyFHS(
 32 | #'    prepared_model_formula = model_formula,
 33 | #'    prepared_data = df_EventStudyFHS_example,
 34 | #'    idvar = "id",
 35 | #'    timevar = "t",
 36 | #'    FE = TRUE,
 37 | #'    TFE = TRUE,
 38 | #'    cluster = TRUE
 39 | #')
 40 | 
 41 | EventStudyFHS <- function(prepared_model_formula, prepared_data,
 42 |                           idvar, timevar, FE, TFE, cluster) {
 43 | 
 44 |   if (! inherits(prepared_model_formula, "formula")) {stop("prepared_model_formula should be a formula")}
 45 |   if (! is.data.frame(prepared_data)) {stop("data should be a data frame.")}
 46 |   if (! is.character(idvar)) {stop("idvar should be a character.")}
 47 |   if (! is.character(timevar)) {stop("timevar should be a character.")}
 48 |   if (! is.logical(FE)) {stop("FE should be either TRUE or FALSE.")}
 49 |   if (! is.logical(TFE)) {stop("TFE should be either TRUE or FALSE.")}
 50 |   if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")}
 51 |   if (FE & !cluster) {stop("cluster=TRUE required when FE=TRUE.")}
 52 | 
 53 | 
 54 |   if (FE & TFE & cluster) {
 55 | 
 56 |       fhs_output <- estimatr::iv_robust(
 57 |           formula = prepared_model_formula,
 58 |           data = prepared_data,
 59 |           clusters = get(idvar),
 60 |           fixed_effects = ~ get(idvar) + get(timevar),
 61 |           se_type="stata")
 62 | 
 63 |       N <- fhs_output$nobs
 64 |       n <- fhs_output$nclusters
 65 |       K <- length(fhs_output$felevels$`get(timevar)`) + fhs_output$rank
 66 | 
 67 |       fhs_output$std.error <- fhs_output$std.error / sqrt((N - K) / (N - n - K + 1))
 68 |       fhs_output$statistic <- fhs_output$coefficients / fhs_output$std.error
 69 |       fhs_output$p.value <- 2*stats::pnorm(abs(fhs_output$statistic), lower.tail = FALSE)
 70 |       fhs_output$conf.low <- fhs_output$coefficients - stats::qnorm(0.975) * fhs_output$std.error
 71 |       fhs_output$conf.high <- fhs_output$coefficients + stats::qnorm(0.975) * fhs_output$std.error
 72 |       fhs_output$vcov <- fhs_output$vcov / ((N - K) / (N - n - K + 1))
 73 | 
 74 |   } else if (FE & (!TFE) & cluster) {
 75 | 
 76 |       fhs_output <- estimatr::iv_robust(
 77 |           formula = prepared_model_formula,
 78 |           data = prepared_data,
 79 |           clusters = get(idvar),
 80 |           fixed_effects = ~ get(idvar),
 81 |           se_type="stata")
 82 | 
 83 |       N <- fhs_output$nobs
 84 |       n <- fhs_output$nclusters
 85 |       K <- 1 + fhs_output$rank
 86 | 
 87 |       fhs_output$std.error <- fhs_output$std.error / sqrt((N - K)/(N - n - K + 1))
 88 |       fhs_output$statistic <- fhs_output$coefficients / fhs_output$std.error
 89 |       fhs_output$p.value <- 2*stats::pnorm(abs(fhs_output$statistic), lower.tail = FALSE)
 90 |       fhs_output$conf.low <- fhs_output$coefficients - stats::qnorm(0.975) * fhs_output$std.error
 91 |       fhs_output$conf.high <- fhs_output$coefficients + stats::qnorm(0.975) * fhs_output$std.error
 92 |       fhs_output$vcov <- fhs_output$vcov / ((N - K) / (N - n - K + 1))
 93 | 
 94 |   } else if ((!FE) & TFE & (!cluster)) {
 95 | 
 96 |       fhs_output <- estimatr::iv_robust(
 97 |           formula = prepared_model_formula,
 98 |           data = prepared_data,
 99 |           fixed_effects = ~ get(timevar),
100 |           se_type="stata")
101 | 
102 |   } else if ((!FE) & TFE & cluster) {
103 | 
104 |       fhs_output <- estimatr::iv_robust(
105 |           formula = prepared_model_formula,
106 |           data = prepared_data,
107 |           clusters = get(idvar),
108 |           fixed_effects = ~ get(timevar),
109 |           se_type="stata")
110 | 
111 |   } else if ((!FE) & (!TFE) & (!cluster)) {
112 | 
113 |       fhs_output <- estimatr::iv_robust(
114 |           formula = prepared_model_formula,
115 |           data = prepared_data,
116 |           se_type = "stata")
117 | 
118 |   } else if ((!FE) & (!TFE) & cluster) {
119 | 
120 |       fhs_output <- estimatr::iv_robust(
121 |           formula = prepared_model_formula,
122 |           data = prepared_data,
123 |           clusters = get(idvar),
124 |           se_type = "stata")
125 |   }
126 | 
127 |   return(fhs_output)
128 | }
129 | 
130 | 


--------------------------------------------------------------------------------
/tests/testthat/test-AddCIs.R:
--------------------------------------------------------------------------------
  1 | test_that("correctly calculates conf_level at 0.95", {
  2 |     df_test <- data.table::fread("./input/df_test_AddCI.csv")
  3 | 
  4 |     policyvar <- "z"
  5 |     eventstudy_coefficients <- c("z_fd_lag1", "z_fd_lead1")
  6 | 
  7 |     df_test_CI <- AddCIs(df_test, eventstudy_coefficients, 0.95)
  8 | 
  9 |     expected_lower <- 2 - 1.959964
 10 |     expected_upper <- 2 + 1.959964
 11 | 
 12 |     expect_equal(df_test_CI$ci_lower[df_test_CI$term == "z_fd_lead1"], expected_lower, tolerance = 1e-6)
 13 |     expect_equal(df_test_CI$ci_upper[df_test_CI$term == "z_fd_lead1"], expected_upper, tolerance = 1e-6)
 14 | })
 15 | 
 16 | # OLS ------------------------------------------
 17 | 
 18 | test_that("correctly recognizes wrong class for estimate argument", {
 19 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 20 |                           policyvar = "z", idvar = "id", timevar = "t",
 21 |                           controls = "x_r", FE = TRUE, TFE = TRUE,
 22 |                           post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 23 | 
 24 |     policyvar <- "z"
 25 |     eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients
 26 | 
 27 |     expect_error(AddCIs(df_test, eventstudy_coefficients, 0.95))
 28 | })
 29 | 
 30 | test_that("correctly recognizes missing columns in estimates argument", {
 31 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 32 |                           policyvar = "z", idvar = "id", timevar = "t",
 33 |                           controls = "x_r", FE = TRUE, TFE = TRUE,
 34 |                           post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 35 | 
 36 |     df_test <- estimatr::tidy(estimates$output)
 37 | 
 38 |     eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients
 39 | 
 40 |     df_test_noterm <- df_test %>% rename(wrongname = term)
 41 |     df_test_noest  <- df_test %>% rename(wrongname = estimate)
 42 |     df_test_nostd  <- df_test %>% rename(wrongname = std.error)
 43 | 
 44 |     expect_error(AddCIs(df_test_noterm, eventstudy_coefficients, 0.95))
 45 |     expect_error(AddCIs(df_test_noest , eventstudy_coefficients, 0.95))
 46 |     expect_error(AddCIs(df_test_nostd , eventstudy_coefficients, 0.95))
 47 | })
 48 | 
 49 | test_that("correctly recognizes wrong inputs for conf_level argument", {
 50 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 51 |                      policyvar = "z", idvar = "id", timevar = "t",
 52 |                      controls = "x_r", FE = TRUE, TFE = TRUE,
 53 |                      post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 54 | 
 55 |     df_test <- estimatr::tidy(estimates$output)
 56 | 
 57 |     eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients
 58 | 
 59 |     expect_error(AddCIs(df_test, eventstudy_coefficients, "95"))
 60 |     expect_error(AddCIs(df_test, eventstudy_coefficients,  95))
 61 |     expect_error(AddCIs(df_test, eventstudy_coefficients,-.95))
 62 | })
 63 | 
 64 | # FHS ------------------------------------------
 65 | 
 66 | test_that("correctly recognizes wrong class for estimate argument", {
 67 |     data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")]
 68 |     estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id",
 69 |                             timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1,
 70 |                             overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE)
 71 | 
 72 |     eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients
 73 | 
 74 |     expect_error(AddCIs(df_test, eventstudy_coefficients, 0.95))
 75 | })
 76 | 
 77 | test_that("correctly recognizes missing columns in estimates argument", {
 78 |     data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")]
 79 |     estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id",
 80 |                             timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1,
 81 |                             overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE)
 82 | 
 83 |     df_test <- estimatr::tidy(estimates$output)
 84 | 
 85 |     eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients
 86 | 
 87 |     df_test_noterm <- df_test %>% rename(wrongname = term)
 88 |     df_test_noest  <- df_test %>% rename(wrongname = estimate)
 89 |     df_test_nostd  <- df_test %>% rename(wrongname = std.error)
 90 | 
 91 |     expect_error(AddCIs(df_test_noterm, eventstudy_coefficients, 0.95))
 92 |     expect_error(AddCIs(df_test_noest , eventstudy_coefficients, 0.95))
 93 |     expect_error(AddCIs(df_test_nostd , eventstudy_coefficients, 0.95))
 94 | })
 95 | 
 96 | test_that("correctly recognizes wrong inputs for conf_level argument", {
 97 |     data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")]
 98 |     estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id",
 99 |                             timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1,
100 |                             overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE)
101 | 
102 |     df_test <- estimatr::tidy(estimates$output)
103 | 
104 |     eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients
105 | 
106 |     expect_error(AddCIs(df_test, eventstudy_coefficients, "95"))
107 |     expect_error(AddCIs(df_test, eventstudy_coefficients,  95))
108 |     expect_error(AddCIs(df_test, eventstudy_coefficients,-.95))
109 | })
110 | 
111 | 
112 | 
113 | 


--------------------------------------------------------------------------------
/man/EventStudy.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/EventStudy.R
  3 | \name{EventStudy}
  4 | \alias{EventStudy}
  5 | \title{Estimates Equation (2) in Freyaldenhoven et al. (2021)}
  6 | \usage{
  7 | EventStudy(
  8 |   estimator,
  9 |   data,
 10 |   outcomevar,
 11 |   policyvar,
 12 |   idvar,
 13 |   timevar,
 14 |   controls = NULL,
 15 |   proxy = NULL,
 16 |   proxyIV = NULL,
 17 |   FE = TRUE,
 18 |   TFE = TRUE,
 19 |   post,
 20 |   overidpost = 1,
 21 |   pre,
 22 |   overidpre = post + pre,
 23 |   normalize = -1 * (pre + 1),
 24 |   cluster = TRUE,
 25 |   anticipation_effects_normalization = TRUE,
 26 |   allow_duplicate_id = FALSE,
 27 |   avoid_internal_copy = FALSE
 28 | )
 29 | }
 30 | \arguments{
 31 | \item{estimator}{Accepts one of "OLS" or "FHS". If "OLS" is specified, implements Ordinary Least Squares. If "FHS" is specified, implements Instrumental Variables (IV) estimator proposed in \href{https://www.aeaweb.org/articles?id=10.1257/aer.20180609}{Freyaldenhoven Hansen Shapiro (FHS, 2019)}.}
 32 | 
 33 | \item{data}{Data frame containing the variables of interest.}
 34 | 
 35 | \item{outcomevar}{Character indicating column of outcome variable y.}
 36 | 
 37 | \item{policyvar}{Character indicating column of policy variable z.}
 38 | 
 39 | \item{idvar}{Character indicating column of units.}
 40 | 
 41 | \item{timevar}{Character indicating column of time periods.}
 42 | 
 43 | \item{controls}{Optional character vector indicating a set of control variables q.}
 44 | 
 45 | \item{proxy}{Character indicating column of variable that is thought to be affected by the confound but not by the policy.
 46 | Should be specified if and only if estimator is specified as "FHS".}
 47 | 
 48 | \item{proxyIV}{Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS".
 49 | If NULL, defaults to the strongest lead of the policy variable based on the first stage.}
 50 | 
 51 | \item{FE}{Logical indicating whether unit fixed-effects should be included. Defaults to TRUE.}
 52 | 
 53 | \item{TFE}{Logical indicating whether time fixed-effects should be included. Defaults to TRUE.}
 54 | 
 55 | \item{post}{Whole number indicating the number of periods in the past before which the past values of the policy
 56 | are not supposed to affect the value of the outcome. Corresponds to M in equation (2) of
 57 | \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.}
 58 | 
 59 | \item{overidpost}{Optional whole number indicating the number of event times after "post" to be included in estimation.
 60 | Defaults to 1.
 61 | Corresponds to L_M in equation (2) of \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.}
 62 | 
 63 | \item{pre}{Whole number indicating the number of periods in the future after which the future values of the policy are
 64 | not supposed to affect the value of the outcome today. Corresponds to G in equation (2) of
 65 | \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.}
 66 | 
 67 | \item{overidpre}{Optional whole number indicating the number of event times earlier than -"pre" to be included in estimation.
 68 | Defaults to "post" + "pre".
 69 | Corresponds to L_G in equation (2) of \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.}
 70 | 
 71 | \item{normalize}{Specifies the event-time coefficient to be normalized. Defaults to - pre - 1.}
 72 | 
 73 | \item{cluster}{Logical indicating whether to use clustered errors by units. If FALSE, will use unclustered heteroskedasticity-robust standard errors.
 74 | Defaults to TRUE. Must be TRUE if FE is TRUE.}
 75 | 
 76 | \item{anticipation_effects_normalization}{If set to TRUE, runs the default process and switches coefficient to be normalized to 0
 77 | when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE.}
 78 | 
 79 | \item{allow_duplicate_id}{If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE.}
 80 | 
 81 | \item{avoid_internal_copy}{If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE.}
 82 | }
 83 | \value{
 84 | A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function.
 85 | }
 86 | \description{
 87 | \code{EventStudy} uses regression methods to estimate the effect of a policy on a given outcome.
 88 | }
 89 | \examples{
 90 | 
 91 | # A minimal example
 92 | eventstudy_model <-
 93 |   EventStudy(
 94 |     estimator = "OLS",
 95 |     data = example_data,
 96 |     outcomevar = "y_base",
 97 |     policyvar = "z",
 98 |     idvar = "id",
 99 |     timevar = "t",
100 |     pre = 0, post = 3,
101 |     normalize = -1
102 |   )
103 | 
104 | ### Access estimated model
105 | eventstudy_model$output
106 | 
107 | summary(eventstudy_model$output)
108 | 
109 | ### data.frame of estimates
110 | estimatr::tidy(eventstudy_model$output)
111 | 
112 | ### Access arguments
113 | eventstudy_model$arguments
114 | 
115 | # A dynamic OLS model with anticipation effects and controls
116 | eventstudy_model_dyn <-
117 |   EventStudy(
118 |     estimator = "OLS",
119 |     data = example_data,
120 |     outcomevar = "y_base",
121 |     policyvar = "z",
122 |     idvar = "id",
123 |     timevar = "t",
124 |     controls = "x_r",
125 |     FE = TRUE, TFE = TRUE,
126 |     post = 3, overidpost = 5,
127 |     pre  = 2, overidpre  = 4,
128 |     normalize = - 3,
129 |     cluster = TRUE,
130 |     anticipation_effects_normalization = TRUE
131 |   )
132 | 
133 | summary(eventstudy_model_dyn$output)
134 | 
135 | # A static model
136 | eventstudy_model_static <-
137 |   EventStudy(
138 |     estimator = "OLS",
139 |     data = example_data,
140 |     outcomevar = "y_jump_m",
141 |     policyvar = "z",
142 |     idvar = "id",
143 |     timevar = "t",
144 |     FE = TRUE, TFE = TRUE,
145 |     post = 0, overidpost = 0,
146 |     pre  = 0, overidpre  = 0,
147 |     cluster = TRUE
148 |   )
149 | 
150 | summary(eventstudy_model_static$output)
151 | 
152 | # A dynamic model with an unbalanced panel
153 | data_unbal <- example_data[1:(nrow(example_data)-1),]  # drop last row to make unbalanced
154 | 
155 | eventstudy_model_unbal <-
156 |  EventStudy(
157 |     estimator = "OLS",
158 |     data = data_unbal,
159 |     outcomevar = "y_base",
160 |     policyvar = "z",
161 |     idvar = "id",
162 |     timevar = "t",
163 |     pre = 0, post = 3,
164 |     normalize = -1
165 |   )
166 | 
167 | summary(eventstudy_model_unbal$output)
168 | 
169 | # A dynamic model estimated using IV
170 | eventstudy_model_iv <-
171 |   EventStudy(
172 |     estimator = "FHS",
173 |     data = example_data,
174 |     outcomevar = "y_base",
175 |     policyvar = "z",
176 |     idvar = "id",
177 |     timevar = "t",
178 |     proxy = "x_r",
179 |     FE = TRUE, TFE = TRUE,
180 |     post = 2, overidpost = 1,
181 |     pre  = 0, overidpre  = 3,
182 |     normalize = -1,
183 |     cluster = TRUE
184 |   )
185 | 
186 | summary(eventstudy_model_iv$output)
187 | 
188 | }
189 | 


--------------------------------------------------------------------------------
/R/PreparePlottingData.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' Orders the eventstudy coefficients and generates the x-axis labels
  3 | #'
  4 | #' @param df_tidy_estimates A data.frame created from applying \link[estimatr]{tidy}
  5 | #' to the estimation output from [EventStudy()].
  6 | #' At a minimum, it contains a column called "term" with the name for the coefficient and a
  7 | #' column called "estimate" that contains the corresponding estimate. Should be a data.frame.
  8 | #' @param policyvar Character indicating column of policy variable z.
  9 | #' @param post Whole number indicating the number of periods in the past before which the past values of the policy
 10 | #' are not supposed to affect the value of the outcome. Corresponds to M in equation (2) of
 11 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf).
 12 | #' @param overidpost Optional whole number indicating the number of event times after "post" to be included in estimation. Defaults to 1.
 13 | #' Corresponds to L_M in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf).
 14 | #' @param pre Whole number indicating the number of periods in the future after which the future values of the policy are
 15 | #' not supposed to affect the value of the outcome today. Corresponds to G in equation (2) of
 16 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf).
 17 | #' @param overidpre Optional whole number indicating the number of event times earlier than -"pre" to be included in estimation. Defaults to "post" + "pre".
 18 | #' Corresponds to L_G in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf).
 19 | #' @param normalization_column The name of the column containing the coefficient that will
 20 | #' be set to 0 in the eventstudy plot. Should be a character.
 21 | #' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS".
 22 | #' If NULL, defaults to the strongest lead of the policy variable based on the first stage.
 23 | #'
 24 | #' @return A data.frame that contains the x-axis labels, y-axis estimates,
 25 | #' and optional plot aesthetics to be used in creating the eventstudy plot
 26 | #' @import stringr
 27 | #' @importFrom stats setNames
 28 | #' @import estimatr
 29 | #'
 30 | #' @keywords internal
 31 | #' @noRd
 32 | #'
 33 | #' @examples
 34 | #'
 35 | #' tidy_eventstudy_estimates <- estimatr::tidy(EventStudy(estimator = "OLS",
 36 | #'                                        data = example_data,
 37 | #'                                        outcomevar = "y_base",
 38 | #'                                        policyvar = "z",
 39 | #'                                        idvar = "id",
 40 | #'                                        timevar = "t",
 41 | #'                                        controls = "x_r",
 42 | #'                                        FE = TRUE,
 43 | #'                                        TFE = TRUE,
 44 | #'                                        post = 3, overidpost = 5,
 45 | #'                                        pre = 2,  overidpre = 4,
 46 | #'                                        normalize = - 3,
 47 | #'                                        anticipation_effects_normalization = TRUE)$output)
 48 | #'
 49 | #' PreparePlottingData(df_tidy_estimates = tidy_eventstudy_estimates,
 50 | #'                     policyvar = "z",
 51 | #'                     post = 3, overidpost = 5,
 52 | #'                     pre = 2,  overidpre = 4,
 53 | #'                     normalization_column = "z_fd_lead3",
 54 | #'                     proxyIV = NULL)
 55 | #'
 56 | #' # If you would like to use IV regression:
 57 | #' data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")]
 58 | #'
 59 | #' tidy_eventstudy_estimates <- estimatr::tidy(EventStudy(estimator = "FHS",
 60 | #'                                        data = data,
 61 | #'                                        outcomevar = "y_base",
 62 | #'                                        policyvar = "z",
 63 | #'                                        idvar = "id",
 64 | #'                                        timevar = "t",
 65 | #'                                        controls = "x_r",
 66 | #'                                        proxy = "eta_m",
 67 | #'                                        FE = TRUE,
 68 | #'                                        TFE = TRUE,
 69 | #'                                        post = 1, overidpost = 2,
 70 | #'                                        pre = 1,  overidpre = 2,
 71 | #'                                        normalize = -1,
 72 | #'                                        anticipation_effects_normalization = TRUE)$output)
 73 | #'
 74 | #' PreparePlottingData(df_tidy_estimates = tidy_eventstudy_estimates,
 75 | #'                     policyvar = "z",
 76 | #'                     post = 1, overidpost = 2,
 77 | #'                     pre = 1,  overidpre = 2,
 78 | #'                     normalization_column = "z_fd_lead2",
 79 | #'                     proxyIV = "z_fd_lead3")
 80 | #'
 81 | 
 82 | PreparePlottingData <- function(df_tidy_estimates, policyvar,
 83 |                                 post, overidpost, pre, overidpre, normalization_column,
 84 |                                 proxyIV = NULL) {
 85 | 
 86 |     if (! is.data.frame(df_tidy_estimates)) {stop("data should be a data frame.")}
 87 |     if (! is.character(policyvar)) {stop("policyvar should be a character.")}
 88 |     if (! (is.numeric(post) & post >= 0 & post %% 1 == 0)) {stop("post should be a whole number.")}
 89 |     if (! (is.numeric(overidpost) & overidpost >= 0 & overidpost %% 1 == 0)) {stop("overidpost should be a whole number.")}
 90 |     if (! (is.numeric(pre) & pre >= 0 & pre %% 1 == 0)) {stop("pre should be a whole number.")}
 91 |     if (! (is.numeric(overidpre) & overidpre >= 0 & overidpre %% 1 == 0)) {stop("overidpre should be a whole number.")}
 92 |     if (! is.character(normalization_column)) {stop("normalization_column should be a character.")}
 93 |     if (normalization_column %in% df_tidy_estimates$term) {stop("normalization_column should not be one of the strings in the 'term' column.")}
 94 |     if (! (is.null(proxyIV) | is.character(proxyIV))) {stop("proxyIV should be either a character or NULL.")}
 95 | 
 96 |     largest_lead <- pre + overidpre
 97 |     largest_lag  <- post + overidpost - 1
 98 | 
 99 |     integer_regex <- "[0-9]+$"
100 | 
101 |     first_lead         <- paste0(policyvar, "_lead", largest_lead)
102 |     first_lead_integer <- as.integer(stringr::str_extract(first_lead, integer_regex)) + 1
103 |     first_lead_label   <- paste0("-", first_lead_integer, "+")
104 | 
105 |     v_leads         <- paste0(policyvar, "_fd_lead", largest_lead:1)
106 |     v_leads_integer <- stringr::str_extract(v_leads, integer_regex)
107 |     v_leads_label   <- paste0("-", v_leads_integer)
108 | 
109 |     t_0_term       <- paste0(policyvar, "_fd")
110 |     t_0_term_label <- "0"
111 | 
112 |     v_lags       <- paste0(policyvar, "_fd_lag", 1:largest_lag)
113 |     v_lags_label <- stringr::str_extract(v_lags, integer_regex)
114 | 
115 |     last_lag         <- paste0(policyvar, "_lag", largest_lag + 1)
116 |     last_lag_integer <- stringr::str_extract(last_lag, integer_regex)
117 |     last_lag_label   <- paste0(last_lag_integer, "+")
118 | 
119 |     v_terms_to_plot_ordered <- c(first_lead, v_leads, t_0_term, v_lags, last_lag)
120 |     v_terms_to_plot_labels  <- c(first_lead_label, v_leads_label, t_0_term_label, v_lags_label, last_lag_label)
121 | 
122 |     v_all_terms      <- df_tidy_estimates[["term"]]
123 |     v_plotting_terms <- v_all_terms %in% v_terms_to_plot_ordered
124 | 
125 |     df_plotting <- df_tidy_estimates[v_plotting_terms, ]
126 | 
127 |     v_zeroes              <- rep(0, ncol(df_plotting) - 2)
128 |     v_names_for_zeroes    <- names(df_plotting)[3:ncol(df_plotting)]
129 |     v_normalization_other <- stats::setNames(v_zeroes, v_names_for_zeroes)
130 | 
131 |     supt_or_ci_present <- v_names_for_zeroes %in% c("suptband_lower", "suptband_upper", "ci_lower", "ci_upper")
132 | 
133 |     if (sum(supt_or_ci_present) > 0) {
134 | 
135 |         v_normalization_other[which(supt_or_ci_present)] <- NA
136 | 
137 |     }
138 | 
139 |     df_normalization_column <- data.frame(
140 |         "term"     = normalization_column,
141 |         "estimate" = 0,
142 |         as.list(v_normalization_other)
143 |     )
144 | 
145 |     df_plotting <- rbind(df_plotting, df_normalization_column)
146 | 
147 |     if (!is.null(proxyIV)) {
148 | 
149 |         proxyIV_integer <- stringr::str_extract(proxyIV, integer_regex)
150 |         proxyIV_lead_or_lag <- stringr::str_extract(proxyIV, "lead|lag")
151 | 
152 |         proxyIV_sign <- switch (proxyIV_lead_or_lag,
153 |             "lead" = "-",
154 |             "lag" = "+"
155 |         )
156 | 
157 |         df_proxyIV_column <- data.frame(
158 |             "term"     = proxyIV,
159 |             "estimate" = 0,
160 |             as.list(v_normalization_other)
161 |         )
162 | 
163 |         proxyIV_label <- paste0(proxyIV_integer, proxyIV_sign)
164 | 
165 |         df_plotting <- rbind(df_plotting, df_proxyIV_column)
166 | 
167 |     }
168 | 
169 | 
170 |     df_plotting["label"] <- factor(df_plotting$term, levels = v_terms_to_plot_ordered, labels = v_terms_to_plot_labels)
171 | 
172 |     return(df_plotting)
173 | 
174 | }
175 | 


--------------------------------------------------------------------------------
/R/SmPathHelpers.R:
--------------------------------------------------------------------------------
  1 | # Add zero where normalized coefficient(s) should be in covar matrix
  2 | AddZerosCovar <- function(vcov_matrix_all, eventstudy_coeffs, norm_column,
  3 |                           coeffs_order) {
  4 | 
  5 |     v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coeffs
  6 |     covar           <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep]
  7 | 
  8 |     n_coefs      = length(coeffs_order)
  9 |     needed_zeros = length(norm_column)
 10 | 
 11 |     # Add row and col of zeros at the end
 12 |     ZerosRight  = matrix(0, ncol = needed_zeros, nrow = nrow(covar))
 13 |     ZerosBottom = matrix(0, ncol = n_coefs,      nrow = needed_zeros)
 14 |     covar           <- rbind(cbind(covar, ZerosRight),
 15 |                             ZerosBottom)
 16 |     rownames(covar) <- c(eventstudy_coeffs, norm_column)
 17 |     colnames(covar) <- c(eventstudy_coeffs, norm_column)
 18 | 
 19 |     # Sort matrix
 20 |     covar           <- covar[coeffs_order, coeffs_order]
 21 | 
 22 |     return(covar)
 23 | }
 24 | 
 25 | # Computes F matrix using coeff_length and poly_order as arguments
 26 | GetFmat <- function(coeff_length, poly_order) {
 27 | 
 28 |     k    = seq(0, coeff_length-1)/(coeff_length-1)
 29 |     Fmat = sapply(seq(1, poly_order+1),
 30 |                 function(j) {k^(j-1)})
 31 | 
 32 |     return(Fmat)
 33 | }
 34 | 
 35 | # Find minimum order of polynomial such that the constraint is satisfied
 36 | FindOrder <- function(coeffs, inv_covar, Wcritic, maxorder) {
 37 | 
 38 |     norm_index <- which(coeffs == 0)
 39 | 
 40 |     Wvalue = 1e6
 41 |     poly_order = 0
 42 | 
 43 |     # Compute Wald value for polynomials of increasing order until Wald Value < Critical Value
 44 |     while (poly_order <= maxorder & Wvalue >= Wcritic) {
 45 | 
 46 |         min_results <- MinimizeWald(coeffs, inv_covar, norm_index, poly_order)
 47 |         Wvalue     = min_results$W
 48 |         poly_order = poly_order + 1
 49 |     }
 50 | 
 51 |     return(list(order = poly_order - 1,
 52 |                 results = min_results))
 53 | }
 54 | 
 55 | # Minimize Wald objective given coefficients and inverse covariance matrix
 56 | MinimizeWald <- function(coeffs, inv_covar, norm_index, poly_order) {
 57 | 
 58 |     coeff_length = length(coeffs)
 59 | 
 60 |     if (poly_order == 0) {
 61 |         trfit = rep(0, coeff_length)
 62 |         W     = (t(coeffs)%*%inv_covar)%*%coeffs
 63 |         vhat  = 0
 64 | 
 65 |     } else {
 66 |         Fmat  <- GetFmat(coeff_length, poly_order)
 67 |         Anorm <- Fmat[norm_index, , drop = F]
 68 | 
 69 |         FtinvVd    = (t(Fmat)%*%inv_covar)%*%matrix(coeffs)
 70 |         invFtinvVF = pracma::inv((t(Fmat)%*%inv_covar)%*%Fmat)
 71 |         AtFtinvVFA = (Anorm%*%invFtinvVF)%*%t(Anorm)
 72 |         multiple   = (t(Anorm)%*%pracma::inv(AtFtinvVFA))%*%Anorm
 73 | 
 74 |         difference = FtinvVd - (multiple%*%invFtinvVF)%*%FtinvVd
 75 |         vhat       = invFtinvVF%*%difference
 76 | 
 77 |         trfit <- Fmat%*%vhat
 78 |         W     <- (t(trfit-coeffs)%*%inv_covar)%*%(trfit-coeffs)
 79 |     }
 80 | 
 81 |     return(list("trfit" = trfit,
 82 |                 "W"     = W,
 83 |                 "vhat"  = vhat))
 84 | }
 85 | 
 86 | # Find coefficients such that square of highest order term is minimized
 87 | # Num normalized coefficients less than order of polynomial
 88 | FindCoeffs <- function(res_order, coeffs, inv_covar, Wcritic, pN, order, norm_idxs, Fmat,
 89 |                        maxiter_solver = 2e6) {
 90 | 
 91 |     if (is.null(dim(Fmat))) { # If one-dimensional make sure it's also a matrix object
 92 |         Fmat <- matrix(Fmat)
 93 |     }
 94 | 
 95 |     # Prevent conversion to vector with drop = F
 96 |     Anorm <- Fmat[norm_idxs, , drop = F]
 97 | 
 98 |     stopifnot(ncol(Anorm) == ncol(Fmat))
 99 | 
100 |     colindex_b = 1:(ncol(Anorm)-pN-1)
101 |     colindex_1 = (ncol(Anorm)-pN):(ncol(Anorm)-1)
102 |     colindex_2 = ncol(Anorm)
103 | 
104 |     Ab <- Anorm[, colindex_b, drop = F]
105 |     A1 <- Anorm[, colindex_1, drop = F]
106 |     A2 <- Anorm[, colindex_2, drop = F]
107 | 
108 |     Fb <- Fmat[, colindex_b, drop = F]
109 |     F1 <- Fmat[, colindex_1, drop = F]
110 |     F2 <- Fmat[, colindex_2, drop = F]
111 | 
112 |     x0 = res_order$vhat[1:ncol(Fb)]
113 | 
114 |     optim_pos <- stats::optim(par     = x0,
115 |                               fn      = Objective,
116 |                               method  = "Nelder-Mead",
117 |                               control = list("maxit" = maxiter_solver, "reltol" = 1e-4),
118 |                               d   = coeffs, inv_covar = inv_covar,
119 |                               Fb  = Fb, F1 = F1, F2 = F2, Ab = Ab, A1 = A1, A2 = A2,
120 |                               Wcritic = Wcritic, positive = T)
121 |     optim_neg <- stats::optim(par     = x0,
122 |                               fn      = Objective,
123 |                               method  = "Nelder-Mead",
124 |                               control = list("maxit" = maxiter_solver, "reltol" = 1e-4),
125 |                               d   = coeffs, inv_covar = inv_covar,
126 |                               Fb  = Fb, F1 = F1, F2 = F2, Ab = Ab, A1 = A1, A2 = A2,
127 |                               Wcritic = Wcritic, positive = F)
128 | 
129 |     if (optim_pos$convergence != 0 | optim_neg$convergence != 0) {
130 |         stop("Numerical optimization failed when searching for the smoothest path. Please set 'Smpath' to FALSE.")
131 |     }
132 | 
133 |     vb_pos <- optim_pos$par
134 |     vb_neg <- optim_neg$par
135 | 
136 |     d0_    = d0(coeffs, inv_covar, F1, F2, A1, A2)
137 |     d1_pos = d1(vb_pos, coeffs, inv_covar, Fb, F1, F2, Ab, A1, A2)
138 |     d2_pos = d2(vb_pos, coeffs, inv_covar, Fb, F1,     Ab, A1,     Wcritic)
139 |     d1_neg = d1(vb_neg, coeffs, inv_covar, Fb, F1, F2, Ab, A1, A2)
140 |     d2_neg = d2(vb_neg, coeffs, inv_covar, Fb, F1,     Ab, A1,     Wcritic)
141 | 
142 |     discriminat_pos = d1_pos^2 - 4*d0_*d2_pos
143 |     discriminat_neg = d1_neg^2 - 4*d0_*d2_neg
144 | 
145 |     v2_pos = (-d1_pos + sqrt(discriminat_pos))/(2*d0_)
146 |     v2_neg = (-d1_neg - sqrt(discriminat_neg))/(2*d0_)
147 | 
148 |     if (abs(v2_pos) < abs(v2_neg)) {
149 |         vb = vb_pos
150 |         v2 = v2_pos
151 |     } else {
152 |         vb = vb_neg
153 |         v2 = v2_neg
154 |     }
155 |     v1 = inv(A1)%*%(-Ab%*%vb - A2%*%v2)
156 | 
157 |     return(c(vb, v1, v2))
158 | }
159 | 
160 | d0 <- function(d, inv_covar, F1, F2, A1, A2) {
161 |     single_factor = F2 - F1%*%inv(A1)%*%A2
162 | 
163 |     return(t(single_factor)%*%inv_covar%*%single_factor)
164 | }
165 | 
166 | d1 <- function(vb, d, inv_covar, Fb, F1, F2, Ab, A1, A2) {
167 |     pre_factor  = (Fb - F1%*%(inv(A1)%*%Ab))%*%vb - d
168 |     post_factor = F2 - F1%*%inv(A1)%*%A2
169 | 
170 |     return(2*t(pre_factor)%*%inv_covar%*%post_factor)
171 | }
172 | 
173 | d2 <- function(vb, d, inv_covar, Fb, F1, Ab, A1, Wcritic) {
174 |     single_factor = (Fb - F1%*%(inv(A1)%*%Ab))%*%vb - d
175 | 
176 |     return(t(single_factor)%*%inv_covar%*%single_factor - Wcritic)
177 | }
178 | 
179 | Objective <- function(vb, d, inv_covar, Fb, F1, F2, Ab, A1, A2, Wcritic,
180 |                       positive = T) {
181 | 
182 |     vb = matrix(vb)
183 | 
184 |     d0_ = d0(    d, inv_covar,     F1, F2,     A1, A2)
185 |     d1_ = d1(vb, d, inv_covar, Fb, F1, F2, Ab, A1, A2)
186 |     d2_ = d2(vb, d, inv_covar, Fb, F1,     Ab, A1,    Wcritic)
187 | 
188 |     discriminant = d1_^2 - 4*d0_*d2_
189 | 
190 |     if (discriminant < 0) {
191 |         return(Inf)
192 |     }
193 | 
194 |     if (positive) {
195 |         return(( (-d1_ + sqrt(discriminant))/(2*d0_) )^2)
196 |     } else {
197 |         return(( (-d1_ - sqrt(discriminant))/(2*d0_) )^2)
198 |     }
199 | }
200 | 
201 | 
202 | # Find coeffs such that square of highest order term is minimized
203 | # Num normalized coefficients equals order of polynomial
204 | FindCoeffsEq <- function(res_order, coeffs, inv_covar, Wcritic, pN, order, norm_idxs, Fmat,
205 |                          maxiter_solver = 1e6) {
206 | 
207 |     if (is.null(dim(Fmat))) { # If one-dimensional make sure it's also a matrix object
208 |         Fmat <- matrix(Fmat)
209 |     }
210 | 
211 |     # Prevent conversion to vector with drop = F
212 |     Anorm <- Fmat[norm_idxs, , drop = F]
213 | 
214 |     stopifnot(ncol(Anorm) == ncol(Fmat))
215 | 
216 |     colindex_1 = (ncol(Anorm)-pN):(ncol(Anorm)-1)
217 |     colindex_2 = ncol(Anorm)
218 | 
219 |     A1 <- Anorm[, colindex_1, drop = F]
220 |     A2 <- Anorm[, colindex_2, drop = F]
221 | 
222 |     F1 <- Fmat[, colindex_1, drop = F]
223 |     F2 <- Fmat[, colindex_2, drop = F]
224 | 
225 |     d0_ = d0Eq(coeffs, inv_covar,         F1, F2, A1, A2)
226 |     d1_ = d1Eq(coeffs, inv_covar,         F1, F2, A1, A2)
227 |     d2_ = d2Eq(coeffs, inv_covar, Wcritic)
228 | 
229 |     discriminant = d1_^2 - 4*d0_*d2_
230 | 
231 |     v2_pos <- (-d1_ + sqrt(discriminant))/(2*d0_)
232 |     v2_neg <- (-d1_ - sqrt(discriminant))/(2*d0_)
233 | 
234 |     if (abs(v2_pos) < abs(v2_neg)) {
235 |         v2 = v2_pos
236 |     } else {
237 |         v2 = v2_neg
238 |     }
239 |     v1 = -inv(A1)%*%(A2%*%v2)
240 | 
241 |     return(c(v1, v2))
242 | }
243 | 
244 | d0Eq <- function(d, inv_covar, F1, F2, A1, A2) {
245 |     single_factor = F2 - F1%*%inv(A1)%*%A2
246 | 
247 |     return(t(single_factor)%*%inv_covar%*%single_factor)
248 | }
249 | 
250 | d1Eq <- function(d, inv_covar, F1, F2, A1, A2) {
251 |     pre_factor = F2 - F1%*%inv(A1)%*%A2
252 | 
253 |     return(-2*t(pre_factor)%*%inv_covar%*%d)
254 | }
255 | 
256 | d2Eq <- function(d, inv_covar, Wcritic) {
257 | 
258 |     return(t(d)%*%inv_covar%*%d - Wcritic)
259 | }
260 | 


--------------------------------------------------------------------------------
/tests/testthat/test-PreparePlottingData.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # OLS ---------------------------------------------------------------------
  3 | 
  4 | test_that("labels are unique", {
  5 | 
  6 | 
  7 |     list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
  8 |                                   policyvar = "z", idvar = "id", timevar = "t",
  9 |                                   controls = "x_r", FE = TRUE, TFE = TRUE,
 10 |                                   post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 11 | 
 12 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
 13 | 
 14 |     policyvar  <- list_EventStudy[[2]]$policyvar
 15 |     post       <- list_EventStudy[[2]]$post
 16 |     overidpost <- list_EventStudy[[2]]$overidpost
 17 |     pre        <- list_EventStudy[[2]]$pre
 18 |     overidpre  <- list_EventStudy[[2]]$overidpre
 19 |     normalization_column <- list_EventStudy[[2]]$normalization_column
 20 |     proxyIV    <- list_EventStudy[[2]]$proxyIV
 21 | 
 22 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar,
 23 |                                                   post, overidpost, pre, overidpre, normalization_column, proxyIV)
 24 | 
 25 |     labels_actual <- as.character(df_PreparePlottingData$label)
 26 |     num_labels    <- length(labels_actual)
 27 |     num_unique_labels <- length(unique(labels_actual))
 28 | 
 29 |     expect_equal(num_unique_labels, num_labels)
 30 | })
 31 | 
 32 | test_that("the correct labels are created", {
 33 | 
 34 | 
 35 |     list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 36 |                                   policyvar = "z", idvar = "id", timevar = "t",
 37 |                                   controls = "x_r", FE = TRUE, TFE = TRUE,
 38 |                                   post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 39 | 
 40 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
 41 | 
 42 |     policyvar  <- list_EventStudy[[2]]$policyvar
 43 |     post       <- list_EventStudy[[2]]$post
 44 |     overidpost <- list_EventStudy[[2]]$overidpost
 45 |     pre        <- list_EventStudy[[2]]$pre
 46 |     overidpre  <- list_EventStudy[[2]]$overidpre
 47 |     normalization_column <- list_EventStudy[[2]]$normalization_column
 48 |     proxyIV    <- list_EventStudy[[2]]$proxyIV
 49 | 
 50 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar,
 51 |                                                   post, overidpost, pre, overidpre, normalization_column, proxyIV)
 52 | 
 53 |     labels_actual <- as.character(df_PreparePlottingData$label)
 54 | 
 55 |     expect_equal(labels_actual, c("-7+", "-6", "-5", "-4", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8+", "-3"))
 56 | 
 57 | })
 58 | 
 59 | test_that("the labels are ordered correctly", {
 60 | 
 61 | 
 62 |     list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 63 |                                   policyvar = "z", idvar = "id", timevar = "t",
 64 |                                   controls = "x_r", FE = TRUE, TFE = TRUE,
 65 |                                   post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 66 | 
 67 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
 68 | 
 69 |     policyvar  <- list_EventStudy[[2]]$policyvar
 70 |     post       <- list_EventStudy[[2]]$post
 71 |     overidpost <- list_EventStudy[[2]]$overidpost
 72 |     pre        <- list_EventStudy[[2]]$pre
 73 |     overidpre  <- list_EventStudy[[2]]$overidpre
 74 |     normalization_column <- list_EventStudy[[2]]$normalization_column
 75 |     proxyIV    <- list_EventStudy[[2]]$proxyIV
 76 | 
 77 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, post, overidpost, pre, overidpre, normalization_column, proxyIV)
 78 | 
 79 |     levels_coefficients <- stringr::str_remove(levels(df_PreparePlottingData$label), "\\+")
 80 |     actual_levels_as_integer <- as.integer(levels_coefficients)
 81 | 
 82 |     expect_equal(actual_levels_as_integer, (-pre - overidpre - 1):(post + overidpost))
 83 | 
 84 | })
 85 | 
 86 | test_that("the control variable is removed", {
 87 | 
 88 | 
 89 |     list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 90 |                                   policyvar = "z", idvar = "id", timevar = "t",
 91 |                                   controls = "x_r", FE = TRUE, TFE = TRUE,
 92 |                                   post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
 93 | 
 94 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
 95 | 
 96 |     policyvar  <- list_EventStudy[[2]]$policyvar
 97 |     post       <- list_EventStudy[[2]]$post
 98 |     overidpost <- list_EventStudy[[2]]$overidpost
 99 |     pre        <- list_EventStudy[[2]]$pre
100 |     overidpre  <- list_EventStudy[[2]]$overidpre
101 |     normalization_column <- list_EventStudy[[2]]$normalization_column
102 |     controls   <- list_EventStudy[[2]]$controls
103 |     proxyIV    <- list_EventStudy[[2]]$proxyIV
104 | 
105 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar,
106 |                                                   post, overidpost, pre, overidpre, normalization_column, proxyIV)
107 | 
108 |     terms_actual <- df_PreparePlottingData$term
109 | 
110 |     expect_true(!controls %in% terms_actual)
111 | })
112 | 
113 | test_that("the largest lag label is correctly created", {
114 | 
115 | 
116 |     list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
117 |                                     policyvar = "z", idvar = "id", timevar = "t",
118 |                                     controls = "x_r", FE = TRUE, TFE = TRUE,
119 |                                     post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
120 | 
121 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
122 | 
123 |     policyvar  <- list_EventStudy[[2]]$policyvar
124 |     post       <- list_EventStudy[[2]]$post
125 |     overidpost <- list_EventStudy[[2]]$overidpost
126 |     pre        <- list_EventStudy[[2]]$pre
127 |     overidpre  <- list_EventStudy[[2]]$overidpre
128 |     normalization_column <- list_EventStudy[[2]]$normalization_column
129 |     controls   <- list_EventStudy[[2]]$controls
130 |     proxyIV    <- list_EventStudy[[2]]$proxyIV
131 | 
132 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar,
133 |                                                   post, overidpost, pre, overidpre, normalization_column, proxyIV)
134 | 
135 |     term_actual <- as.character(df_PreparePlottingData[df_PreparePlottingData["term"] == paste0(policyvar, "_lag", (post + overidpost)), ][["label"]])
136 | 
137 |     expect_equal(term_actual, "8+")
138 | 
139 | 
140 | })
141 | 
142 | test_that("the largest lead label is correctly created", {
143 | 
144 | 
145 |     list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
146 |                                   policyvar = "z", idvar = "id", timevar = "t",
147 |                                   controls = "x_r", FE = TRUE, TFE = TRUE,
148 |                                   post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
149 | 
150 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
151 | 
152 |     policyvar  <- list_EventStudy[[2]]$policyvar
153 |     post       <- list_EventStudy[[2]]$post
154 |     overidpost <- list_EventStudy[[2]]$overidpost
155 |     pre        <- list_EventStudy[[2]]$pre
156 |     overidpre  <- list_EventStudy[[2]]$overidpre
157 |     normalization_column <- list_EventStudy[[2]]$normalization_column
158 |     controls   <- list_EventStudy[[2]]$controls
159 |     proxyIV    <- list_EventStudy[[2]]$proxyIV
160 | 
161 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar,
162 |                                                   post, overidpost, pre, overidpre, normalization_column, proxyIV)
163 | 
164 |     term_actual <- as.character(df_PreparePlottingData[df_PreparePlottingData["term"] == paste0(policyvar, "_lead", (pre + overidpre)), ][["label"]])
165 | 
166 |     expect_equal(term_actual, "-7+")
167 | 
168 | 
169 | })
170 | 
171 | test_that("all columns besides 'term' and 'label' are 0 for the normalization column", {
172 | 
173 | 
174 |     list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
175 |                                   policyvar = "z", idvar = "id", timevar = "t",
176 |                                   controls = "x_r", FE = TRUE, TFE = TRUE,
177 |                                   post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
178 | 
179 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
180 | 
181 |     policyvar  <- list_EventStudy[[2]]$policyvar
182 |     post       <- list_EventStudy[[2]]$post
183 |     overidpost <- list_EventStudy[[2]]$overidpost
184 |     pre        <- list_EventStudy[[2]]$pre
185 |     overidpre  <- list_EventStudy[[2]]$overidpre
186 |     normalization_column <- list_EventStudy[[2]]$normalization_column
187 |     controls   <- list_EventStudy[[2]]$controls
188 |     proxyIV    <- list_EventStudy[[2]]$proxyIV
189 | 
190 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar,
191 |                                                   post, overidpost, pre, overidpre, normalization_column, proxyIV)
192 | 
193 |     all_columns <- names(df_PreparePlottingData)
194 |     v_zero_column_names <- all_columns[all_columns != c("term", "label")]
195 | 
196 |     normalized_row <- df_PreparePlottingData[df_PreparePlottingData["term"] == normalization_column, v_zero_column_names]
197 |     actual_row_total <- apply(normalized_row, MARGIN = 1, function(x) sum(as.integer(x)))
198 | 
199 |     expect_equal(unname(actual_row_total), 0)
200 | 
201 | 
202 | })
203 | 
204 | # FHS ---------------------------------------------------------------------
205 | 
206 | test_that("all columns besides 'term' and 'label' are 0 for the proxyIV column", {
207 | 
208 | 
209 |     list_EventStudy <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base",
210 |                                   policyvar = "z", idvar = "id", timevar = "t",
211 |                                   controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m",
212 |                                   post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE)
213 | 
214 |     df_tidy_estimates <- estimatr::tidy(list_EventStudy$output)
215 | 
216 |     policyvar <- list_EventStudy[[2]]$policyvar
217 |     post <- list_EventStudy[[2]]$post
218 |     overidpost <- list_EventStudy[[2]]$overidpost
219 |     pre <- list_EventStudy[[2]]$pre
220 |     overidpre <- list_EventStudy[[2]]$overidpre
221 |     normalization_column <- list_EventStudy[[2]]$normalization_column
222 |     proxyIV <- list_EventStudy[[2]]$proxyIV
223 | 
224 |     df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, post, overidpost, pre, overidpre, normalization_column, proxyIV)
225 | 
226 |     all_columns <- names(df_PreparePlottingData)
227 |     v_zero_column_names <- all_columns[all_columns != c("term", "label")]
228 | 
229 |     proxyIV_row <- df_PreparePlottingData[df_PreparePlottingData["term"] == proxyIV, v_zero_column_names]
230 |     actual_row_total <- apply(proxyIV_row, MARGIN = 1, function(x) sum(as.integer(x)))
231 | 
232 |     expect_equal(unname(actual_row_total), 0)
233 | })
234 | 


--------------------------------------------------------------------------------
/tests/testthat/test-EventStudyPlot.R:
--------------------------------------------------------------------------------
  1 | 
  2 | get_labs <- function(x) x$labels
  3 | if ("get_labs" %in% getNamespaceExports("ggplot2")) {
  4 |     get_labs <- ggplot2::get_labs
  5 | }
  6 | 
  7 | test_that("Dimension of OLS and FHS estimation output is the same", {
  8 | 
  9 |     estimates_ols <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m",
 10 |                                 policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
 11 |                                 post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
 12 | 
 13 |     estimates_fhs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_smooth_m",
 14 |                                 policyvar = "z", idvar = "id", timevar = "t", proxy = "eta_r", controls = "x_r",
 15 |                                 post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
 16 | 
 17 |     expect_equal(length(estimates_ols), length(estimates_fhs))
 18 | 
 19 |     # Compare first element of list
 20 |     coeffs_ols <- names(estimates_ols[[1]]$coefficients)
 21 |     coeffs_fhs <- names(estimates_fhs[[1]]$coefficients)
 22 | 
 23 |     expect_true(all(coeffs_fhs %in% c(coeffs_ols, "eta_r")))
 24 |     expect_equal(length(coeffs_ols), length(coeffs_fhs))     # FHS: Norm coeff removes one coeff and proxy adds one
 25 | 
 26 |     expect_true(all(names(estimates_ols[[1]]) %in% names(estimates_fhs[[1]])))
 27 | 
 28 |     # Compare second element of list
 29 |     expect_true(all(names(estimates_ols[[2]]) %in% names(estimates_fhs[[2]])))
 30 |     expect_true(all(names(estimates_fhs[[2]]) %in% names(estimates_ols[[2]])))
 31 | })
 32 | 
 33 | 
 34 | test_that("correctly changes x-axis and y-axis labels", {
 35 | 
 36 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 37 |                             policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
 38 |                             post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
 39 | 
 40 |     p_labels <- EventStudyPlot(estimates = estimates,
 41 |                                conf_level = .95,
 42 |                                xtitle     = "Event Time",
 43 |                                ytitle     = "Event-study Coefficients",)
 44 | 
 45 |     labels <- get_labs(p_labels)
 46 |     expect_equal(labels$x, "Event Time")
 47 |     expect_equal(labels$y, "Event-study Coefficients")
 48 | 
 49 | })
 50 | 
 51 | test_that("x- and y-axis breaks and limits are correct", {
 52 | 
 53 |     estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 54 |                            policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
 55 |                            post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
 56 | 
 57 |     p_Addmean <- EventStudyPlot(estimates = estimates,
 58 |                                 ybreaks   = c(-1.5, -.5, 0, .5, 1.5),
 59 |                                 add_mean   = TRUE)
 60 | 
 61 |     p_noAddmean <- EventStudyPlot(estimates = estimates,
 62 |                                   ybreaks   = c(-1.5, -.5, 0, .5, 1.5),
 63 |                                   add_mean   = FALSE)
 64 | 
 65 |     v_limits_addmeans    <- p_Addmean$scales$scales[[2]]$limits
 66 |     v_limits_no_addmeans <- p_noAddmean$scales$scales[[2]]$limits
 67 |     v_breaks_addmeans    <- p_Addmean$scales$scales[[2]]$breaks
 68 |     v_breaks_no_addmeans <- p_noAddmean$scales$scales[[2]]$breaks
 69 | 
 70 |     expect_equal(v_limits_addmeans,    c(-1.5, 1.5))
 71 |     expect_equal(v_limits_no_addmeans, c(-1.5, 1.5))
 72 | 
 73 |     expect_equal(v_breaks_addmeans,    c(-1.5, -.5, 0, .5, 1.5))
 74 |     expect_equal(v_breaks_no_addmeans, c(-1.5, -.5, 0, .5, 1.5))
 75 | })
 76 | 
 77 | test_that("correctly adds mean of outcome var", {
 78 | 
 79 |     estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
 80 |                            policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
 81 |                            post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
 82 | 
 83 |     p_Addmean <- EventStudyPlot(estimates = estimates,
 84 |                                 ybreaks   = c(-1.5, -.5, 0, .5, 1.5),
 85 |                                 add_mean   = TRUE)
 86 | 
 87 |     p_noAddmean <- EventStudyPlot(estimates = estimates,
 88 |                                   ybreaks   = c(-1.5, -.5, 0, .5, 1.5),
 89 |                                   add_mean   = FALSE)
 90 | 
 91 |     y_mean <- AddMeans(estimates$arguments$data, estimates$arguments$normalization_column,
 92 |                        "z", "y_base")
 93 |     y_mean <- round(y_mean, 2)
 94 | 
 95 |     v_labels_addmeans    <- p_Addmean$scales$scales[[2]]$labels
 96 |     v_labels_no_addmeans <- p_noAddmean$scales$scales[[2]]$labels
 97 | 
 98 |     expect_equal(v_labels_addmeans,    c("-1.5", "-0.5", sprintf("0 (%s)", y_mean), "0.5", "1.5"))
 99 |     expect_equal(v_labels_no_addmeans, c(-1.5, -.5, 0, .5, 1.5))
100 | })
101 | 
102 | test_that("sup-t bands are appropriately present or absent", {
103 | 
104 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
105 |                             policyvar = "z", idvar = "id", timevar = "t",
106 |                             controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
107 | 
108 |     p_supt <- get_labs(EventStudyPlot(estimates = estimates,
109 |                                       supt = .95))
110 | 
111 |     p_no_supt <- get_labs(EventStudyPlot(estimates = estimates,
112 |                                          supt = NULL))
113 | 
114 |     expect_true(p_supt$ymin    == "suptband_lower")
115 |     expect_true(p_no_supt$ymin != "suptband_lower")
116 | 
117 |     expect_true(p_supt$ymax    == "suptband_upper")
118 |     expect_true(p_no_supt$ymax != "suptband_upper")
119 | })
120 | 
121 | test_that("confidence intervals are appropriately present or absent", {
122 | 
123 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
124 |                             policyvar = "z", idvar = "id", timevar = "t",
125 |                             controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
126 | 
127 |     p_ci <- get_labs(EventStudyPlot(estimates = estimates,
128 |                                     conf_level = .95, supt = NULL))
129 | 
130 |     p_no_ci <- get_labs(EventStudyPlot(estimates = estimates,
131 |                                        conf_level = NULL, supt = NULL))
132 | 
133 |     expect_equal(p_ci$ymin, "ci_lower")
134 |     expect_equal(p_ci$ymax, "ci_upper")
135 |     expect_null(p_no_ci$ymin)
136 |     expect_null(p_no_ci$ymax)
137 | })
138 | 
139 | test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or absent", {
140 | 
141 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
142 |                             policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
143 |                             post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
144 | 
145 |     p_pre_post_caption <- get_labs(EventStudyPlot(estimates       = estimates,
146 |                                                   ybreaks         = c(-1.5, -.5, 0, .5, 1.5),
147 |                                                   pre_event_coeffs  = TRUE,
148 |                                                   post_event_coeffs = TRUE))$caption
149 | 
150 |     p_pre_caption      <- get_labs(EventStudyPlot(estimates       = estimates,
151 |                                                   ybreaks         = c(-1.5, -.5, 0, .5, 1.5),
152 |                                                   pre_event_coeffs  = TRUE,
153 |                                                   post_event_coeffs = FALSE))$caption
154 | 
155 |     p_post_caption     <- get_labs(EventStudyPlot(estimates       = estimates,
156 |                                                   ybreaks         = c(-1.5, -.5, 0, .5, 1.5),
157 |                                                   pre_event_coeffs  = FALSE,
158 |                                                   post_event_coeffs = TRUE))$caption
159 | 
160 |     p_neither_caption   <- get_labs(EventStudyPlot(estimates       = estimates,
161 |                                                    ybreaks         = c(-1.5, -.5, 0, .5, 1.5),
162 |                                                    pre_event_coeffs  = FALSE,
163 |                                                    post_event_coeffs = FALSE))$caption
164 | 
165 |     regex_for_p_value <- "1\\.0*$|0\\.\\d+" # 1 followed by . and then zero or more 0's or 0 then . then any number
166 |     regex_pretrends   <- "Pretrends p-value = "
167 |     regex_posttrends  <- "Leveling off p-value = "
168 | 
169 |     expect_true(
170 |         stringr::str_detect(p_pre_post_caption, regex_for_p_value) &
171 |         stringr::str_detect(p_pre_post_caption, regex_pretrends)   &
172 |         stringr::str_detect(p_pre_post_caption, regex_posttrends)
173 |     )
174 | 
175 |     expect_true(
176 |         stringr::str_detect(p_pre_caption, regex_for_p_value) &
177 |         stringr::str_detect(p_pre_caption, regex_pretrends)
178 |     )
179 | 
180 |     expect_false(
181 |         stringr::str_detect(p_pre_caption, regex_for_p_value) &
182 |         stringr::str_detect(p_pre_caption, regex_posttrends)
183 |     )
184 | 
185 |     expect_false(
186 |         stringr::str_detect(p_post_caption, regex_for_p_value) &
187 |         stringr::str_detect(p_post_caption, regex_pretrends)
188 |     )
189 | 
190 |     expect_true(
191 |         stringr::str_detect(p_post_caption, regex_for_p_value) &
192 |         stringr::str_detect(p_post_caption, regex_posttrends)
193 |     )
194 | 
195 |     expect_null(p_neither_caption)
196 | })
197 | 
198 | test_that("Sup-t bands are wider than confidence intervals", {
199 | 
200 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
201 |                             policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
202 |                             post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
203 | 
204 |     p <- EventStudyPlot(estimates = estimates,
205 |                         conf_level = .95,
206 |                         supt = .95)
207 | 
208 |     ci_lower       <- na.omit(p$data$ci_lower)
209 |     ci_upper       <- na.omit(p$data$ci_upper)
210 |     suptband_lower <- na.omit(p$data$suptband_lower)
211 |     suptband_upper <- na.omit(p$data$suptband_upper)
212 |     num_terms      <- nrow(na.omit(p$data))
213 | 
214 |     v_lower_comparison <- (suptband_lower <= ci_lower)
215 |     v_upper_comparison <- (suptband_upper >= ci_upper)
216 | 
217 |     expect_equal(num_terms, sum(v_lower_comparison))
218 |     expect_equal(num_terms, sum(v_upper_comparison))
219 | })
220 | 
221 | test_that("computed smoothest path for examples is within expectations", {
222 | 
223 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base",
224 |                             policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
225 |                             post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3)
226 | 
227 |     p <- EventStudyPlot(estimates = estimates,
228 |                         smpath    = T)
229 | 
230 |     expect_equal(p$data$smoothest_path, matrix(rep(0, nrow(p$data))))
231 | 
232 |     estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m",
233 |                             policyvar = "z", idvar = "id", timevar = "t", controls = "x_r",
234 |                             post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3)
235 | 
236 |     p <- EventStudyPlot(estimates = estimates,
237 |                         smpath    = T)
238 | 
239 |     normalized_index  <- which(p$data$estimate == 0)
240 |     normalized_smpath <- p$data$smoothest_path[normalized_index]
241 | 
242 |     # Expect normalized_smpath to be almost equal to zero
243 |     expect_true(all(abs(normalized_smpath) < 1e-10))
244 | 
245 |     max_smpath <- max(p$data$smoothest_path)
246 |     min_smpath <- min(p$data$smoothest_path)
247 | 
248 |     max_suptband <- max(p$data$suptband_upper, na.rm = T)
249 |     min_suptband <- min(p$data$suptband_lower, na.rm = T)
250 | 
251 |     # Expect smpath to be contained in suptband
252 |     expect_true(max_smpath < max_suptband)
253 |     expect_true(min_smpath > min_suptband)
254 | })
255 | 
256 | test_that("computed smoothest path for FHS has at least two coefficients almost equal to zero", {
257 | 
258 |     estimates <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_jump_m",
259 |                             policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_r",
260 |                             post = 3, pre = 0, overidpre = 3, overidpost = 1, normalize = -1, proxyIV = "z_fd_lead3")
261 | 
262 |     p <- EventStudyPlot(estimates = estimates,
263 |                         smpath    = T)
264 | 
265 |     normalized_index  <- which(p$data$estimate == 0)
266 |     normalized_smpath <- p$data$smoothest_path[normalized_index]
267 | 
268 |     expect_true(length(normalized_index) >= 2)
269 |     expect_true(all(abs(normalized_smpath) < 1e-10))
270 | })
271 | 


--------------------------------------------------------------------------------
/R/EventStudyPlot.R:
--------------------------------------------------------------------------------
  1 | #' Creates an Event-Study Plot Following the Suggestions in Freyaldenhoven et al. (2021)
  2 | #'
  3 | #' @description `EventStudyPlot` takes the output from [EventStudy()] and combines it with additional optional arguments to facilitate constructing an Event-Study Plot.
  4 | #'
  5 | #' @param estimates The output from calling [EventStudy()]. Should be a list of length 2.
  6 | #' @param xtitle The title for the x-axis. Should be a string. Defaults to "Event time".
  7 | #' @param ytitle The title for the y-axis. Should be a string. Defaults to "Coefficient".
  8 | #' @param ybreaks A vector containing the desired breaks for the y-axis.
  9 | #' Defaults to NULL, which means the breaks are computed automatically.
 10 | #' If custom breaks are selected with the `add_mean` argument set to TRUE, then the breaks must include zero.
 11 | #' @param conf_level Confidence level used for confidence interval
 12 | #' expressed as a real number between 0 and 1, inclusive. Defaults to 0.95.
 13 | #' @param supt The confidence level used for obtaining the sup-t bands critical value.
 14 | #' Should be a real number between 0 and 1, inclusive. Defaults to .95. Sup-t bands are simulation-based,
 15 | #' so you must set a seed if you would like your sup-t band results to be reproducible (see examples).
 16 | #' @param num_sim The number of simulations used in generating the sup-t bands.
 17 | #' Should be a natural number. Defaults to 1000.
 18 | #' @param add_mean Adds the mean of the dependent variable in the period used for normalization.
 19 | #' Should be TRUE or FALSE. Defaults to FALSE.
 20 | #' @param pre_event_coeffs If TRUE, uses pre and overidpre from estimates to test for pre-trends.
 21 | #' Should be TRUE or FALSE. Defaults to TRUE.
 22 | #' @param post_event_coeffs If TRUE, uses post and overidpost from estimates to test for leveling-off.
 23 | #' Should be TRUE or FALSE. Defaults to TRUE.
 24 | #' @param add_zero_line Whether or not to plot a dashed horizontal line at y = 0.
 25 | #' Should be TRUE or FALSE. Defaults to TRUE, meaning the line is plotted.
 26 | #' @param smpath Plot smoothest path of confounder that rationalizes event study coefficients.
 27 | #' Should be TRUE or FALSE. Defaults to FALSE.
 28 | #'
 29 | #' @return The Event-Study plot as a ggplot2 object.
 30 | #' @import ggplot2 dplyr
 31 | #' @import estimatr
 32 | #' @importFrom rlang .data
 33 | #' @importFrom data.table setorder
 34 | #' @export
 35 | #'
 36 | #' @examples
 37 | #'
 38 | #' #
 39 | #'
 40 | #' # Minimal examples
 41 | #' ### OLS
 42 | #'
 43 | #' estimates_ols <- EventStudy(
 44 | #'    estimator = "OLS",
 45 | #'    data = example_data,
 46 | #'    outcomevar = "y_smooth_m",
 47 | #'    policyvar = "z",
 48 | #'    idvar = "id",
 49 | #'    timevar = "t",
 50 | #'    controls = "x_r",
 51 | #'    FE = TRUE, TFE = TRUE,
 52 | #'    post = 3, overidpost = 5,
 53 | #'    pre = 2,  overidpre = 4,
 54 | #'    normalize = - 3
 55 | #' )
 56 | #'
 57 | #' plt_ols <- EventStudyPlot(estimates = estimates_ols)
 58 | #' plt_ols
 59 | #'
 60 | #' ### IV
 61 | #'
 62 | #' estimates_fhs <- EventStudy(
 63 | #'    estimator = "FHS",
 64 | #'    data = example_data,
 65 | #'    outcomevar = "y_smooth_m",
 66 | #'    policyvar = "z",
 67 | #'    idvar = "id",
 68 | #'    timevar = "t",
 69 | #'    proxy = "x_r",
 70 | #'    post = 2, overidpost = 1,
 71 | #'    pre = 0,  overidpre = 3,
 72 | #'    normalize = -1
 73 | #' )
 74 | #'
 75 | #' plt_fhs <- EventStudyPlot(estimates = estimates_fhs)
 76 | #' plt_fhs
 77 | #'
 78 | #' # Optional arguments
 79 | #'
 80 | #' ### Change x- and y-axis titles and set ybreaks
 81 | #' EventStudyPlot(estimates = estimates_ols,
 82 | #'                xtitle = "Relative time", ytitle = "",
 83 | #'                ybreaks = seq(-2, 1, 0.5))
 84 | #'
 85 | #' ### Add smoothest path
 86 | #' EventStudyPlot(estimates = estimates_ols, smpath = TRUE)
 87 | #'
 88 | #' ### Add y-mean to y-axis and line y = 0
 89 | #' EventStudyPlot(estimates = estimates_ols, add_mean = TRUE,
 90 | #'                add_zero_line = TRUE)
 91 | #'
 92 | #' ### Do not plot supt bands
 93 | #' EventStudyPlot(estimates = estimates_ols, supt = NULL)
 94 | #'
 95 | #' ### Setting seed prior to plotting sup-t bands
 96 | #' set.seed(1234)
 97 | #' EventStudyPlot(estimates = estimates_ols)
 98 | #'
 99 | #' # Modify plots using ggplot2 functions
100 | #' library(ggplot2)
101 | #'
102 | #' ### Change color of dots, horizontal line, and theme
103 | #' plt_ols +
104 | #'   geom_point(color = "red") +
105 | #'   geom_hline(color = "gray", yintercept = 0) +
106 | #'   theme_light() +
107 | #'   theme(panel.grid.minor.x = element_blank())
108 | #'
109 | 
110 | EventStudyPlot <- function(estimates,
111 |                            xtitle = "Event time", ytitle = "Coefficient", ybreaks = NULL,
112 |                            conf_level = .95, supt = .95, num_sim = 1000, add_mean = FALSE,
113 |                            pre_event_coeffs = TRUE, post_event_coeffs = TRUE,
114 |                            add_zero_line = TRUE, smpath = FALSE) {
115 | 
116 |     if (!is.character(xtitle))    {stop("Argument 'xtitle' should be a character.")}
117 |     if (!is.character(ytitle))    {stop("Argument 'ytitle' should be a character.")}
118 |     if (!is.logical(add_zero_line)) {stop("Argument 'add_zero_line' should be either TRUE or FALSE.")}
119 |     if (!is.null(ybreaks) &
120 |         !is.numeric(ybreaks))     {stop("Argument 'ybreaks' should be NULL or a numeric vector.")}
121 | 
122 | # Estimation Elements -----------------------------------------------------
123 | 
124 |     df_estimates      <- estimates$output
125 |     df_estimates_tidy <- estimatr::tidy(estimates$output)
126 | 
127 |     static_model <- nrow(df_estimates_tidy) == 1
128 |     if (static_model) {
129 |         stop("EventStudyPlot() does not support static models.")
130 |     }
131 | 
132 |     df_data                 <- estimates$arguments$data
133 |     outcomevar              <- estimates$arguments$outcomevar
134 |     policyvar               <- estimates$arguments$policyvar
135 |     post                    <- estimates$arguments$post
136 |     overidpost              <- estimates$arguments$overidpost
137 |     pre                     <- estimates$arguments$pre
138 |     overidpre               <- estimates$arguments$overidpre
139 |     normalize               <- estimates$arguments$normalize
140 |     normalization_column    <- estimates$arguments$normalization_column
141 |     eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients
142 |     proxyIV                 <- estimates$arguments$proxyIV
143 | 
144 | # Optionally Add Suptbands/Confidence Intervals ---------------------------
145 | 
146 |     plot_supt <- if(!is.null(supt)) TRUE else FALSE
147 | 
148 |     if (plot_supt) {
149 |         df_estimates_tidy <- AddSuptBand(df_estimates, num_sim = 1000, conf_level = supt,
150 |                                          eventstudy_coefficients = eventstudy_coefficients)
151 |     }
152 | 
153 |     plot_CI <- if(!is.null(conf_level)) TRUE else FALSE
154 | 
155 |     if (plot_CI) {
156 | 
157 |         df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level)
158 |     }
159 | 
160 | # Optionally Test For Pretrends/Levelling-Off -----------------------------
161 | 
162 |     df_test_linear <- TestLinear(estimates = estimates, pretrends = pre_event_coeffs, leveling_off = post_event_coeffs)
163 | 
164 |     if ((pre_event_coeffs | post_event_coeffs)) {
165 |         pretrends_p_value   <- df_test_linear[df_test_linear["Test"] == "Pre-Trends",   "p.value"]
166 |         levelingoff_p_value <- df_test_linear[df_test_linear["Test"] == "Leveling-Off", "p.value"]
167 | 
168 |         text_pretrends   <- paste0("Pretrends p-value = ", round(pretrends_p_value, 2))
169 |         text_levelingoff <- paste0("Leveling off p-value = ", round(levelingoff_p_value, 2))
170 | 
171 | 
172 |         if (pre_event_coeffs & post_event_coeffs) {
173 |             text_caption <- paste0(text_pretrends, " -- ", text_levelingoff)
174 | 
175 |         } else if (pre_event_coeffs & !post_event_coeffs) {
176 |             text_caption <- text_pretrends
177 | 
178 |         } else if (!pre_event_coeffs & post_event_coeffs) {
179 |             text_caption <- text_levelingoff
180 | 
181 |         }
182 |     } else {
183 |         text_caption <- NULL
184 |     }
185 | 
186 | 
187 |     df_plt <- PreparePlottingData(df_estimates_tidy, policyvar,
188 |                                   post, overidpost, pre, overidpre, normalization_column, proxyIV)
189 | 
190 | # Construct y breaks ------------------------------------------------------
191 | 
192 |     if (!is.null(ybreaks)) {
193 |         if (!(0 %in% ybreaks) & add_mean) {
194 |             stop("If you want to add the mean of y in the y-axis then 'ybreaks' must include 0.")
195 |         }
196 | 
197 |         ylabels <- ybreaks
198 |         ylims   <- c(min(ybreaks), max(ybreaks))
199 |     } else {
200 |         min_value <- min(c(df_plt$estimate, df_plt$ci_lower, df_plt$suptband_lower), na.rm = T)
201 |         max_value <- max(c(df_plt$estimate, df_plt$ci_upper, df_plt$suptband_upper), na.rm = T)
202 |         max_abs   <- max(abs(min_value), abs(max_value))
203 | 
204 |         magnitude <- 10^floor(log10(max_abs))
205 | 
206 |         # Determine step depending on how far the endpoints are from the magnitude
207 |         mean_ratio <- mean(c(abs(min_value)/magnitude, max_value/magnitude))
208 |         if (mean_ratio > 6.67) {
209 |             step = 3*magnitude
210 |         } else if (mean_ratio > 3.33) {
211 |             step = 2*magnitude
212 |         } else {
213 |             step = magnitude
214 |         }
215 | 
216 |         # Pick multiples of step to ensure zero is included
217 |         close_to_min <- floor(min_value/step)*step
218 |         close_to_max <- ceiling(max_value/step)*step
219 | 
220 |         ybreaks <- seq(close_to_min, close_to_max, step)
221 |         ylims   <- c(min(ybreaks), max(ybreaks))
222 | 
223 |         if (length(ybreaks) >= 9) {
224 |             # Too many breaks, double step size
225 |             step         <- step*2
226 |             close_to_min <- floor(min_value/step)*step
227 |             close_to_max <- ceiling(max_value/step)*step
228 | 
229 |             ybreaks <- seq(close_to_min, close_to_max, step)
230 |         } else if (length(ybreaks) <= 3) {
231 |             # Too few breaks, halve step size
232 |             step         <- step/2
233 |             close_to_min <- floor(min_value/step)*step
234 |             close_to_max <- ceiling(max_value/step)*step
235 | 
236 |             ybreaks <- seq(close_to_min, close_to_max, step)
237 |         }
238 |         ylabels <- ybreaks
239 |     }
240 | 
241 | # Optionally Adds Mean ----------------------------------------------------
242 | 
243 |     if (add_mean) {
244 | 
245 |         y_mean <- AddMeans(df_data, normalization_column, policyvar, outcomevar)
246 | 
247 |         index_zero <- which(ybreaks == 0)
248 |         ylabels[index_zero] <- paste0(ylabels[index_zero], " (", round(y_mean, 2), ")")
249 |     }
250 | 
251 | # Optionally Add smooth path ----------------------------------------------
252 | 
253 |     # Order coefficients
254 |     label_var = "label"
255 |     data.table::setorderv(df_plt, c(label_var))
256 |     ordered_labels <- df_plt$label
257 | 
258 |     if (smpath) {
259 | 
260 |         unselect_message <- "Please change the 'Smpath' argument in 'EventStudyPlot' to FALSE."
261 | 
262 |         if (!is.null(proxyIV)) {
263 |             if (sum(df_plt$estimate == 0) > 2) {
264 |                 stop(paste0("The smoothest path is not supported for the FHS estimator with more than one instrument.",
265 |                             unselect_message))
266 |             }
267 |         }
268 | 
269 |         coefficients <- df_plt$estimate
270 | 
271 |         # Add column and row in matrix of coefficients in index of norm columns
272 |         covar <- AddZerosCovar(estimates$output$vcov,
273 |                                eventstudy_coefficients,
274 |                                df_plt[df_plt$estimate==0, ]$term,
275 |                                df_plt$term)
276 | 
277 |         inv_covar <- pracma::pinv(covar)
278 | 
279 |         df_plt <- AddSmPath(df_plt, coefficients, inv_covar)
280 |     }
281 | 
282 | # Construct Plot ----------------------------------------------------------
283 | 
284 |     df_plt$label_num <- as.numeric(gsub("+", "", df_plt$label, fixed = T))
285 | 
286 |     plt <- ggplot(df_plt,
287 |                   aes(x = .data$label_num, y = .data$estimate))
288 | 
289 |     if (add_zero_line) {
290 |         plt <- plt +
291 |             geom_hline(yintercept = 0,
292 |                        color = "green", linetype = "dashed")
293 |     }
294 |     if (plot_supt) {
295 |         plt <- plt +
296 |             geom_linerange(aes(ymin = .data$suptband_lower,
297 |                                ymax = .data$suptband_upper),
298 |                                data = df_plt[df_plt$estimate != 0,])
299 |     }
300 |     if (plot_CI) {
301 |         plt <- plt +
302 |             geom_errorbar(aes(ymin = .data$ci_lower,
303 |                               ymax = .data$ci_upper),
304 |                               data = df_plt[df_plt$estimate != 0,],
305 |                               width = .2)
306 |     }
307 |     if (smpath) {
308 |         plt <- plt +
309 |             geom_line(aes(y = .data$smoothest_path, group = 1),
310 |                       color = "black")
311 |     }
312 | 
313 |     plt <- plt +
314 |         geom_point(color = "#006600") +
315 |         scale_x_continuous(breaks = min(df_plt$label_num):max(df_plt$label_num),
316 |                            labels = ordered_labels) +
317 |         scale_y_continuous(breaks = ybreaks,
318 |                            labels = ylabels,
319 |                            limits = ylims) +
320 |         labs(x = xtitle, y = ytitle,
321 |              caption = text_caption) +
322 |         theme_bw() +
323 |         theme(panel.grid   = element_blank(),
324 |               plot.caption = element_text(hjust = 0))
325 | 
326 |     return(plt)
327 | }
328 | 


--------------------------------------------------------------------------------
/tests/testthat/test-EventStudyOLS.R:
--------------------------------------------------------------------------------
  1 | 
  2 | test_that("FE = TRUE,
  3 |            TFE = TRUE,
  4 |            cluster = TRUE works", {
  5 | 
  6 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
  7 | 
  8 |     estimator       <-  "OLS"
  9 |     outcomevar      <- "y_base"
 10 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
 11 |     controls        <- "x_r"
 12 | 
 13 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
 14 |                                                static = F, controls = controls)
 15 | 
 16 |     idvar   <- "id"
 17 |     timevar <- "t"
 18 | 
 19 |     FE      <- TRUE
 20 |     TFE     <- TRUE
 21 |     cluster <- TRUE
 22 | 
 23 |     if (FE & TFE & cluster) {
 24 | 
 25 |         reg <- EventStudyOLS(
 26 |             prepared_model_formula = event_study_formula,
 27 |             prepared_data = df_test_EventStudyOLS,
 28 |             idvar = idvar,
 29 |             timevar = timevar,
 30 |             FE = FE,
 31 |             TFE = TFE,
 32 |             cluster = cluster
 33 |         )
 34 |     }
 35 | 
 36 |         expect_true(all.equal(reg$felevels$`get(idvar)`, as.character(unique(df_test_EventStudyOLS$id))))
 37 |         expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id)))
 38 |         expect_true(reg$se_type == "stata")
 39 | 
 40 | })
 41 | 
 42 | test_that("FE = FALSE,
 43 |            TFE = TRUE,
 44 |            cluster = TRUE works", {
 45 | 
 46 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
 47 | 
 48 |     estimator       <-  "OLS"
 49 |     outcomevar      <- "y_base"
 50 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
 51 |     controls        <- "x_r"
 52 | 
 53 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
 54 |                                                static = F, controls = controls)
 55 | 
 56 |     idvar   <- "id"
 57 |     timevar <- "t"
 58 | 
 59 |     FE      <- FALSE
 60 |     TFE     <- TRUE
 61 |     cluster <- TRUE
 62 | 
 63 |     if ((!FE) & TFE & cluster) {
 64 | 
 65 |         reg <- EventStudyOLS(
 66 |             prepared_model_formula = event_study_formula,
 67 |             prepared_data = df_test_EventStudyOLS,
 68 |             idvar = idvar,
 69 |             timevar = timevar,
 70 |             FE = FE,
 71 |             TFE = TFE,
 72 |             cluster = cluster
 73 |         )
 74 |     }
 75 | 
 76 |     expect_equal(is.null(reg$felevels$`get(idvar)`), TRUE)
 77 |     expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id)))
 78 |     expect_true(reg$se_type == "stata")
 79 | 
 80 | })
 81 | 
 82 | test_that("FE = TRUE,
 83 |            TFE = FALSE,
 84 |            cluster = TRUE works", {
 85 | 
 86 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
 87 | 
 88 |     estimator       <-  "OLS"
 89 |     outcomevar      <- "y_base"
 90 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
 91 |     controls        <- "x_r"
 92 | 
 93 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
 94 |                                                static = F, controls = controls)
 95 | 
 96 |     idvar   <- "id"
 97 |     timevar <- "t"
 98 | 
 99 |     FE      <- TRUE
100 |     TFE     <- FALSE
101 |     cluster <- TRUE
102 | 
103 |     if (FE & (!TFE) & cluster) {
104 | 
105 |         reg <- EventStudyOLS(
106 |             prepared_model_formula = event_study_formula,
107 |             prepared_data = df_test_EventStudyOLS,
108 |             idvar = idvar,
109 |             timevar = timevar,
110 |             FE = FE,
111 |             TFE = TFE,
112 |             cluster = cluster
113 |         )
114 |     }
115 | 
116 |     expect_true(all.equal(reg$felevels$V1, as.character(unique(df_test_EventStudyOLS$id))))
117 |     expect_true(is.null(reg$felevels$`get(timevar)`), TRUE)
118 |     expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id)))
119 |     expect_true(reg$se_type == "stata")
120 | 
121 | })
122 | 
123 | test_that("FE = FALSE,
124 |            TFE = FALSE,
125 |            cluster = TRUE works", {
126 | 
127 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
128 | 
129 |     estimator       <-  "OLS"
130 |     outcomevar      <- "y_base"
131 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
132 |     controls        <- "x_r"
133 | 
134 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
135 |                                                static = F, controls = controls)
136 | 
137 |     idvar   <- "id"
138 |     timevar <- "t"
139 | 
140 |     FE      <- FALSE
141 |     TFE     <- FALSE
142 |     cluster <- TRUE
143 | 
144 |     if  ((!FE) & (!TFE) & cluster) {
145 | 
146 |         reg <- EventStudyOLS(
147 |             prepared_model_formula = event_study_formula,
148 |             prepared_data = df_test_EventStudyOLS,
149 |             idvar = idvar,
150 |             timevar = timevar,
151 |             FE = FE,
152 |             TFE = TFE,
153 |             cluster = cluster
154 |         )
155 |     }
156 | 
157 |     expect_true(is.null(reg$felevels$`get(idvar)`), TRUE)
158 |     expect_true(is.null(reg$felevels$`get(timevar)`), TRUE)
159 |     expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id)))
160 |     expect_true(reg$se_type == "stata")
161 | 
162 | })
163 | 
164 | test_that("FE = TRUE,
165 |            TFE = TRUE,
166 |            cluster = FALSE works", {
167 | 
168 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
169 | 
170 |     estimator       <-  "OLS"
171 |     outcomevar      <- "y_base"
172 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
173 |     controls        <- "x_r"
174 | 
175 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
176 |                                                static = F, controls = controls)
177 | 
178 |     idvar   <- "id"
179 |     timevar <- "t"
180 | 
181 |     FE      <- TRUE
182 |     TFE     <- TRUE
183 |     cluster <- FALSE
184 | 
185 |     if (FE & TFE & (!cluster)) {
186 | 
187 |         reg <- EventStudyOLS(
188 |             prepared_model_formula = event_study_formula,
189 |             prepared_data = df_test_EventStudyOLS,
190 |             idvar = idvar,
191 |             timevar = timevar,
192 |             FE = FE,
193 |             TFE = TFE,
194 |             cluster = cluster
195 |         )
196 |     }
197 | 
198 |     expect_true(all.equal(reg$felevels$`get(idvar)`, as.character(unique(df_test_EventStudyOLS$id))))
199 |     expect_true(is.null(reg$nclusters), TRUE)
200 |     expect_true(reg$se_type %in% c("stata", "HC1"))
201 | 
202 | })
203 | 
204 | test_that("FE = FALSE,
205 |            TFE = TRUE,
206 |            cluster = FALSE works", {
207 | 
208 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
209 | 
210 |     estimator       <-  "OLS"
211 |     outcomevar      <- "y_base"
212 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
213 |     controls        <- "x_r"
214 | 
215 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
216 |                                                static = F, controls = controls)
217 | 
218 |     idvar   <- "id"
219 |     timevar <- "t"
220 | 
221 |     FE      <- FALSE
222 |     TFE     <- TRUE
223 |     cluster <- FALSE
224 | 
225 |     if ((!FE) & TFE & (!cluster)) {
226 | 
227 |         reg <- EventStudyOLS(
228 |             prepared_model_formula = event_study_formula,
229 |             prepared_data = df_test_EventStudyOLS,
230 |             idvar = idvar,
231 |             timevar = timevar,
232 |             FE = FE,
233 |             TFE = TFE,
234 |             cluster = cluster
235 |         )
236 |     }
237 | 
238 |     expect_true(is.null(reg$felevels$`get(idvar)`), TRUE)
239 |     expect_true(is.null(reg$nclusters), TRUE)
240 |     expect_true(reg$se_type %in% c("stata", "HC1"))
241 | 
242 | })
243 | 
244 | test_that("FE = TRUE,
245 |            TFE = FALSE,
246 |            cluster = FALSE works", {
247 | 
248 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
249 | 
250 |     estimator       <-  "OLS"
251 |     outcomevar      <- "y_base"
252 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
253 |     controls        <- "x_r"
254 | 
255 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
256 |                                                static = F, controls = controls)
257 | 
258 |     idvar   <- "id"
259 |     timevar <- "t"
260 | 
261 |     FE      <- TRUE
262 |     TFE     <- FALSE
263 |     cluster <- FALSE
264 | 
265 |     if (FE & (!TFE) & (!cluster))  {
266 | 
267 |         reg <- EventStudyOLS(
268 |             prepared_model_formula = event_study_formula,
269 |             prepared_data = df_test_EventStudyOLS,
270 |             idvar = idvar,
271 |             timevar = timevar,
272 |             FE = FE,
273 |             TFE = TFE,
274 |             cluster = cluster
275 |         )
276 |     }
277 | 
278 |     expect_true(all.equal(reg$felevels$V1, as.character(unique(df_test_EventStudyOLS$id))))
279 |     expect_true(is.null(reg$felevels$`get(timevar)`), TRUE)
280 |     expect_true(is.null(reg$nclusters), TRUE)
281 |     expect_true(reg$se_type %in% c("stata", "HC1"))
282 | 
283 | })
284 | 
285 | test_that("FE = FALSE,
286 |            TFE = FALSE,
287 |            cluster = FALSE works", {
288 | 
289 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
290 | 
291 |     estimator       <- "OLS"
292 |     outcomevar      <- "y_base"
293 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
294 |     controls        <- "x_r"
295 | 
296 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
297 |                                                static = F, controls = controls)
298 | 
299 |     idvar   <- "id"
300 |     timevar <- "t"
301 | 
302 |     FE      <- FALSE
303 |     TFE     <- FALSE
304 |     cluster <- FALSE
305 | 
306 |     if ((!FE) & (!TFE) & (!cluster))  {
307 | 
308 |         reg <- EventStudyOLS(
309 |             prepared_model_formula = event_study_formula,
310 |             prepared_data = df_test_EventStudyOLS,
311 |             idvar = idvar,
312 |             timevar = timevar,
313 |             FE = FE,
314 |             TFE = TFE,
315 |             cluster = cluster
316 |         )
317 |     }
318 | 
319 |     expect_true(is.null(reg$felevels$`get(idvar)`), TRUE)
320 |     expect_true(is.null(reg$felevels$`get(timevar)`), TRUE)
321 |     expect_true(is.null(reg$nclusters), TRUE)
322 |     expect_true(reg$se_type %in% c("stata", "HC1"))
323 | 
324 | })
325 | 
326 | 
327 | test_that("Coefficients and Standard Errors agree with base STATA", {
328 | 
329 |     df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv")
330 | 
331 |     estimator       <-  "OLS"
332 |     outcomevar      <- "y_base"
333 |     str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
334 |     controls        <- "x_r"
335 | 
336 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
337 |                                                static = F, controls = controls)
338 | 
339 |     idvar   <- "id"
340 |     timevar <- "t"
341 | 
342 |     FE      <- TRUE
343 |     TFE     <- TRUE
344 |     cluster <- TRUE
345 | 
346 |     reg <- EventStudyOLS(
347 |             prepared_model_formula = event_study_formula,
348 |             prepared_data = df_test_EventStudyOLS,
349 |             idvar = idvar,
350 |             timevar = timevar,
351 |             FE = FE,
352 |             TFE = TFE,
353 |             cluster = cluster
354 |         )
355 | 
356 |     df_test_STATA <- read.csv("./input/df_test_base_STATA.csv", col.names = c("term", "coef", "std_error"))
357 | 
358 |     epsilon <- 10e-7
359 | 
360 |     expect_equal(unname(reg$coefficients["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[2]], tolerance = epsilon)
361 |     expect_equal(unname(reg$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[2]], tolerance = epsilon)
362 |     expect_equal(unname(reg$coefficients["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["coef"]][1], tolerance = epsilon)
363 |     expect_equal(unname(reg$coefficients["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[2]], tolerance = epsilon)
364 |     expect_equal(unname(reg$coefficients["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[2]], tolerance = epsilon)
365 |     expect_equal(unname(reg$coefficients["z_lead3"]), -1 * df_test_STATA[df_test_STATA["term"] == "F3.z",][["coef"]], tolerance = epsilon)
366 |     expect_equal(unname(reg$coefficients["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[2]], tolerance = epsilon)
367 |     expect_equal(unname(reg$coefficients["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[2]], tolerance = epsilon)
368 | 
369 |     expect_equal(unname(reg$std.error["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[3]], tolerance = epsilon)
370 |     expect_equal(unname(reg$std.error["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[3]], tolerance = epsilon)
371 |     expect_equal(unname(reg$std.error["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["std_error"]][1], tolerance = epsilon)
372 |     expect_equal(unname(reg$std.error["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[3]], tolerance = epsilon)
373 |     expect_equal(unname(reg$std.error["z_fd_lag2"]),df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[3]], tolerance = epsilon)
374 |     expect_equal(unname(reg$std.error["z_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.z",][["std_error"]], tolerance = epsilon)
375 |     expect_equal(unname(reg$std.error["z_lag3"]),df_test_STATA[df_test_STATA["term"] == "L3.z",][[3]], tolerance = epsilon)
376 |     expect_equal(unname(reg$std.error["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[3]], tolerance = epsilon * 10)
377 | })
378 | 


--------------------------------------------------------------------------------
/tests/testthat/test-EventStudyFHS.R:
--------------------------------------------------------------------------------
  1 | 
  2 | test_that("FE = TRUE,
  3 |            TFE = TRUE,
  4 |            cluster = TRUE works", {
  5 | 
  6 |     estimator       <-  "FHS"
  7 |     outcomevar      <- "y_base"
  8 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
  9 |     controls        <- "x_r"
 10 |     proxy           <- "eta_m"
 11 |     proxyIV         <- "z_fd_lead3"
 12 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
 13 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
 14 | 
 15 |     idvar   <- "id"
 16 |     timevar <- "t"
 17 | 
 18 |     FE      <- TRUE
 19 |     TFE     <- TRUE
 20 |     cluster <- TRUE
 21 | 
 22 |     if (FE & TFE & cluster) {
 23 | 
 24 |         reg <- EventStudyFHS(
 25 |             prepared_model_formula = event_study_formula,
 26 |             prepared_data = df_EventStudyFHS_example,
 27 |             idvar = idvar,
 28 |             timevar = timevar,
 29 |             FE = FE,
 30 |             TFE = TFE,
 31 |             cluster = cluster
 32 |         )
 33 |     }
 34 | 
 35 |         expect_true(all.equal(reg$felevels$`get(idvar)`, as.character(unique(df_EventStudyFHS_example$id))))
 36 |         expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id)))
 37 |         expect_true(reg$se_type == "stata")
 38 | 
 39 | })
 40 | 
 41 | test_that("FE = FALSE,
 42 |            TFE = TRUE,
 43 |            cluster = TRUE works", {
 44 | 
 45 |     estimator       <-  "FHS"
 46 |     outcomevar      <- "y_base"
 47 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
 48 |     controls        <- "x_r"
 49 |     proxy           <- "eta_m"
 50 |     proxyIV         <- "z_fd_lead3"
 51 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
 52 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
 53 | 
 54 |     idvar   <- "id"
 55 |     timevar <- "t"
 56 | 
 57 |     FE      <- FALSE
 58 |     TFE     <- TRUE
 59 |     cluster <- TRUE
 60 | 
 61 |     if ((!FE) & TFE & cluster) {
 62 | 
 63 |         reg <- EventStudyFHS(
 64 |             prepared_model_formula = event_study_formula,
 65 |             prepared_data = df_EventStudyFHS_example,
 66 |             idvar = idvar,
 67 |             timevar = timevar,
 68 |             FE = FE,
 69 |             TFE = TFE,
 70 |             cluster = cluster
 71 |         )
 72 |     }
 73 | 
 74 |     expect_equal(is.null(reg$felevels$`get(idvar)`), TRUE)
 75 |     expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id)))
 76 |     expect_true(reg$se_type == "stata")
 77 | 
 78 | })
 79 | 
 80 | test_that("FE = TRUE,
 81 |            TFE = FALSE,
 82 |            cluster = TRUE works", {
 83 | 
 84 |     estimator       <-  "FHS"
 85 |     outcomevar      <- "y_base"
 86 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
 87 |     controls        <- "x_r"
 88 |     proxy           <- "eta_m"
 89 |     proxyIV         <- "z_fd_lead3"
 90 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
 91 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
 92 | 
 93 |     idvar   <- "id"
 94 |     timevar <- "t"
 95 | 
 96 |     FE      <- TRUE
 97 |     TFE     <- FALSE
 98 |     cluster <- TRUE
 99 | 
100 |     if (FE & (!TFE) & cluster) {
101 | 
102 |         reg <- EventStudyFHS(
103 |             prepared_model_formula = event_study_formula,
104 |             prepared_data = df_EventStudyFHS_example,
105 |             idvar = idvar,
106 |             timevar = timevar,
107 |             FE = FE,
108 |             TFE = TFE,
109 |             cluster = cluster
110 |         )
111 |     }
112 | 
113 |     expect_true(all.equal(reg$felevels$V1, as.character(unique(df_EventStudyFHS_example$id))))
114 |     expect_true(is.null(reg$felevels$`get(timevar)`), TRUE)
115 |     expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id)))
116 |     expect_true(reg$se_type == "stata")
117 | 
118 | })
119 | 
120 | test_that("FE = FALSE,
121 |            TFE = FALSE,
122 |            cluster = TRUE works", {
123 | 
124 |     estimator       <-  "FHS"
125 |     outcomevar      <- "y_base"
126 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
127 |     controls        <- "x_r"
128 |     proxy           <- "eta_m"
129 |     proxyIV         <- "z_fd_lead3"
130 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
131 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
132 | 
133 |     idvar   <- "id"
134 |     timevar <- "t"
135 | 
136 |     FE      <- FALSE
137 |     TFE     <- FALSE
138 |     cluster <- TRUE
139 | 
140 |     if  ((!FE) & (!TFE) & cluster) {
141 | 
142 |         reg <- EventStudyFHS(
143 |             prepared_model_formula = event_study_formula,
144 |             prepared_data = df_EventStudyFHS_example,
145 |             idvar = idvar,
146 |             timevar = timevar,
147 |             FE = FE,
148 |             TFE = TFE,
149 |             cluster = cluster
150 |         )
151 |     }
152 | 
153 |     expect_true(is.null(reg$felevels$`get(idvar)`), TRUE)
154 |     expect_true(is.null(reg$felevels$`get(timevar)`), TRUE)
155 |     expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id)))
156 |     expect_true(reg$se_type == "stata")
157 | 
158 | })
159 | 
160 | test_that("FE = TRUE,
161 |            TFE = TRUE,
162 |            cluster = FALSE does not work", {
163 | 
164 |     estimator       <-  "FHS"
165 |     outcomevar      <- "y_base"
166 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
167 |     controls        <- "x_r"
168 |     proxy           <- "eta_m"
169 |     proxyIV         <- "z_fd_lead3"
170 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
171 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
172 | 
173 |     idvar   <- "id"
174 |     timevar <- "t"
175 | 
176 |     FE      <- TRUE
177 |     TFE     <- TRUE
178 |     cluster <- FALSE
179 | 
180 |     expect_error(
181 |         if (FE & TFE & (!cluster)) {
182 | 
183 |             reg <- EventStudyFHS(
184 |                 prepared_model_formula = event_study_formula,
185 |                 prepared_data = df_EventStudyFHS_example,
186 |                 idvar = idvar,
187 |                 timevar = timevar,
188 |                 FE = FE,
189 |                 TFE = TFE,
190 |                 cluster = cluster
191 |             )
192 |         },
193 |         "cluster=TRUE required when FE=TRUE."
194 |     )
195 | })
196 | 
197 | test_that("FE = FALSE,
198 |            TFE = TRUE,
199 |            cluster = FALSE works", {
200 | 
201 |     estimator       <-  "FHS"
202 |     outcomevar      <- "y_base"
203 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
204 |     controls        <- "x_r"
205 |     proxy           <- "eta_m"
206 |     proxyIV         <- "z_fd_lead3"
207 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
208 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
209 | 
210 |     idvar   <- "id"
211 |     timevar <- "t"
212 | 
213 |     FE      <- FALSE
214 |     TFE     <- TRUE
215 |     cluster <- FALSE
216 | 
217 |     if ((!FE) & TFE & (!cluster)) {
218 | 
219 |         reg <- EventStudyFHS(
220 |             prepared_model_formula = event_study_formula,
221 |             prepared_data = df_EventStudyFHS_example,
222 |             idvar = idvar,
223 |             timevar = timevar,
224 |             FE = FE,
225 |             TFE = TFE,
226 |             cluster = cluster
227 |         )
228 |     }
229 | 
230 |     expect_true(is.null(reg$felevels$`get(idvar)`), TRUE)
231 |     expect_true(is.null(reg$nclusters), TRUE)
232 |     expect_true(reg$se_type %in% c("stata", "HC1"))
233 | 
234 | })
235 | 
236 | test_that("FE = TRUE,
237 |            TFE = FALSE,
238 |            cluster = FALSE does not work", {
239 | 
240 |     estimator       <-  "FHS"
241 |     outcomevar      <- "y_base"
242 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
243 |     controls        <- "x_r"
244 |     proxy           <- "eta_m"
245 |     proxyIV         <- "z_fd_lead3"
246 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
247 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
248 | 
249 |     idvar   <- "id"
250 |     timevar <- "t"
251 | 
252 |     FE      <- TRUE
253 |     TFE     <- FALSE
254 |     cluster <- FALSE
255 | 
256 |     expect_error(
257 |         if (FE & (!TFE) & (!cluster))  {
258 | 
259 |             reg <- EventStudyFHS(
260 |                 prepared_model_formula = event_study_formula,
261 |                 prepared_data = df_EventStudyFHS_example,
262 |                 idvar = idvar,
263 |                 timevar = timevar,
264 |                 FE = FE,
265 |                 TFE = TFE,
266 |                 cluster = cluster
267 |             )
268 |         },
269 |         "cluster=TRUE required when FE=TRUE."
270 |     )
271 | })
272 | 
273 | test_that("FE = FALSE,
274 |            TFE = FALSE,
275 |            cluster = FALSE works", {
276 | 
277 |     estimator       <- "FHS"
278 |     outcomevar      <- "y_base"
279 |     str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
280 |     controls        <- "x_r"
281 |     proxy           <- "eta_m"
282 |     proxyIV         <- "z_fd_lead3"
283 |     event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
284 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
285 | 
286 |     idvar   <- "id"
287 |     timevar <- "t"
288 | 
289 |     FE      <- FALSE
290 |     TFE     <- FALSE
291 |     cluster <- FALSE
292 | 
293 |     if ((!FE) & (!TFE) & (!cluster))  {
294 | 
295 |         reg <- EventStudyFHS(
296 |             prepared_model_formula = event_study_formula,
297 |             prepared_data = df_EventStudyFHS_example,
298 |             idvar = idvar,
299 |             timevar = timevar,
300 |             FE = FE,
301 |             TFE = TFE,
302 |             cluster = cluster
303 |         )
304 |     }
305 | 
306 |     expect_true(is.null(reg$felevels$`get(idvar)`), TRUE)
307 |     expect_true(is.null(reg$felevels$`get(timevar)`), TRUE)
308 |     expect_true(is.null(reg$nclusters), TRUE)
309 |     expect_true(reg$se_type %in% c("stata", "HC1"))
310 | 
311 | })
312 | 
313 | 
314 | test_that("Coefficients and Standard Errors agree with base STATA", {
315 | 
316 |     bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT")
317 | 
318 |     for (i in length(bools)) {
319 |         bool <- bools[i]
320 |         estimator       <-  "FHS"
321 |         outcomevar      <- "y_base"
322 |         str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3")
323 |         controls        <- "x_r"
324 |         proxy           <- "eta_m"
325 |         proxyIV         <- "z_fd_lead3"
326 |         event_study_formula <- PrepareModelFormula(estimator, outcomevar, 
327 |                                                str_policy_vars, FALSE, controls, proxy, proxyIV)
328 | 
329 |         idvar   <- "id"
330 |         timevar <- "t"
331 | 
332 |         FE <- as.logical(substring(bool, 1, 1))
333 |         TFE <- as.logical(substring(bool, 2, 2))
334 |         cluster <- as.logical(substring(bool, 3, 3))
335 | 
336 |         reg <- EventStudyFHS(
337 |             prepared_model_formula = event_study_formula,
338 |             prepared_data = df_EventStudyFHS_example,
339 |             idvar = idvar,
340 |             timevar = timevar,
341 |             FE = FE,
342 |             TFE = TFE,
343 |             cluster = cluster
344 |         )
345 | 
346 |         df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv")
347 |         epsilon <- 10e-6
348 | 
349 |         expect_equal(unname(reg$coefficients["z_fd"]),       df_test_STATA[df_test_STATA[1] ==         "z_fd",][[2*i]],      tolerance = epsilon)
350 |         expect_equal(unname(reg$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] ==   "z_fd_lead2",][[2*i]],      tolerance = epsilon)
351 |         expect_equal(unname(reg$coefficients["eta_m"]),      df_test_STATA[df_test_STATA[1] ==        "eta_m",][[2*i]],      tolerance = epsilon)
352 |         expect_equal(unname(reg$coefficients["z_fd_lag1"]),  df_test_STATA[df_test_STATA[1] ==    "z_fd_lag1",][[2*i]],      tolerance = epsilon)
353 |         expect_equal(unname(reg$coefficients["z_fd_lag2"]),  df_test_STATA[df_test_STATA[1] ==    "z_fd_lag2",][[2*i]],      tolerance = epsilon)
354 |         expect_equal(unname(reg$coefficients["z_lead3"]),    df_test_STATA[df_test_STATA[1] ==      "z_lead3",][[2*i]]*(-1), tolerance = epsilon)
355 |         expect_equal(unname(reg$coefficients["z_lag3"]),     df_test_STATA[df_test_STATA[1] ==       "z_lag3",][[2*i]],      tolerance = epsilon)
356 |         expect_equal(unname(reg$coefficients["x_r"]),        df_test_STATA[df_test_STATA[1] ==          "x_r",][[2*i]],      tolerance = epsilon)
357 | 
358 |         expect_equal(unname(reg$std.error["z_fd"]),          df_test_STATA[df_test_STATA[1] ==       "z_fd",][[2*i+1]],      tolerance = epsilon)
359 |         expect_equal(unname(reg$std.error["z_fd_lead2"]),    df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i+1]],      tolerance = epsilon)
360 |         expect_equal(unname(reg$std.error["eta_m"]),         df_test_STATA[df_test_STATA[1] ==      "eta_m",][[2*i+1]],      tolerance = epsilon)
361 |         expect_equal(unname(reg$std.error["z_fd_lag1"]),     df_test_STATA[df_test_STATA[1] ==  "z_fd_lag1",][[2*i+1]],      tolerance = epsilon)
362 |         expect_equal(unname(reg$std.error["z_fd_lag2"]),     df_test_STATA[df_test_STATA[1] ==  "z_fd_lag2",][[2*i+1]],      tolerance = epsilon)
363 |         expect_equal(unname(reg$std.error["z_lead3"]),       df_test_STATA[df_test_STATA[1] ==    "z_lead3",][[2*i+1]],      tolerance = epsilon)
364 |         expect_equal(unname(reg$std.error["z_lag3"]),        df_test_STATA[df_test_STATA[1] ==     "z_lag3",][[2*i+1]],      tolerance = epsilon)
365 |         expect_equal(unname(reg$std.error["x_r"]),           df_test_STATA[df_test_STATA[1] ==        "x_r",][[2*i+1]],      tolerance = epsilon)
366 |     }
367 | })
368 | 
369 | 


--------------------------------------------------------------------------------
/R/EventStudy.R:
--------------------------------------------------------------------------------
  1 | #' Estimates Equation (2) in Freyaldenhoven et al. (2021)
  2 | #'
  3 | #' @description `EventStudy` uses regression methods to estimate the effect of a policy on a given outcome.
  4 | #'
  5 | #' @param estimator Accepts one of "OLS" or "FHS". If "OLS" is specified, implements Ordinary Least Squares. If "FHS" is specified, implements Instrumental Variables (IV) estimator proposed in [Freyaldenhoven Hansen Shapiro (FHS, 2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609).
  6 | #' @param data Data frame containing the variables of interest.
  7 | #' @param outcomevar Character indicating column of outcome variable y.
  8 | #' @param policyvar Character indicating column of policy variable z.
  9 | #' @param idvar Character indicating column of units.
 10 | #' @param timevar Character indicating column of time periods.
 11 | #' @param controls Optional character vector indicating a set of control variables q.
 12 | #' @param proxy Character indicating column of variable that is thought to be affected by the confound but not by the policy.
 13 | #' Should be specified if and only if estimator is specified as "FHS".
 14 | #' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS".
 15 | #' If NULL, defaults to the strongest lead of the policy variable based on the first stage.
 16 | #' @param FE Logical indicating whether unit fixed-effects should be included. Defaults to TRUE.
 17 | #' @param TFE Logical indicating whether time fixed-effects should be included. Defaults to TRUE.
 18 | #' @param cluster Logical indicating whether to use clustered errors by units. If FALSE, will use unclustered heteroskedasticity-robust standard errors.
 19 | #' Defaults to TRUE. Must be TRUE if FE is TRUE.
 20 | #' @param post Whole number indicating the number of periods in the past before which the past values of the policy
 21 | #' are not supposed to affect the value of the outcome. Corresponds to M in equation (2) of
 22 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
 23 | #' @param overidpost Optional whole number indicating the number of event times after "post" to be included in estimation.
 24 | #' Defaults to 1.
 25 | #' Corresponds to L_M in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
 26 | #' @param pre Whole number indicating the number of periods in the future after which the future values of the policy are
 27 | #' not supposed to affect the value of the outcome today. Corresponds to G in equation (2) of
 28 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
 29 | #' @param overidpre Optional whole number indicating the number of event times earlier than -"pre" to be included in estimation.
 30 | #' Defaults to "post" + "pre".
 31 | #' Corresponds to L_G in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170).
 32 | #' @param normalize Specifies the event-time coefficient to be normalized. Defaults to - pre - 1.
 33 | #' @param anticipation_effects_normalization If set to TRUE, runs the default process and switches coefficient to be normalized to 0
 34 | #' when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE.
 35 | #' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE.
 36 | #' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE.
 37 | #'
 38 | #' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function.
 39 | #' @import dplyr
 40 | #' @import estimatr
 41 | #' @importFrom stats reformulate
 42 | #' @importFrom data.table setorderv as.data.table is.data.table .SD copy
 43 | #' @export
 44 | #'
 45 | #' @examples
 46 | #'
 47 | #' # A minimal example
 48 | #' eventstudy_model <-
 49 | #'   EventStudy(
 50 | #'     estimator = "OLS",
 51 | #'     data = example_data,
 52 | #'     outcomevar = "y_base",
 53 | #'     policyvar = "z",
 54 | #'     idvar = "id",
 55 | #'     timevar = "t",
 56 | #'     pre = 0, post = 3,
 57 | #'     normalize = -1
 58 | #'   )
 59 | #'
 60 | #' ### Access estimated model
 61 | #' eventstudy_model$output
 62 | #'
 63 | #' summary(eventstudy_model$output)
 64 | #'
 65 | #' ### data.frame of estimates
 66 | #' estimatr::tidy(eventstudy_model$output)
 67 | #'
 68 | #' ### Access arguments
 69 | #' eventstudy_model$arguments
 70 | #'
 71 | #' # A dynamic OLS model with anticipation effects and controls
 72 | #' eventstudy_model_dyn <-
 73 | #'   EventStudy(
 74 | #'     estimator = "OLS",
 75 | #'     data = example_data,
 76 | #'     outcomevar = "y_base",
 77 | #'     policyvar = "z",
 78 | #'     idvar = "id",
 79 | #'     timevar = "t",
 80 | #'     controls = "x_r",
 81 | #'     FE = TRUE, TFE = TRUE,
 82 | #'     post = 3, overidpost = 5,
 83 | #'     pre  = 2, overidpre  = 4,
 84 | #'     normalize = - 3,
 85 | #'     cluster = TRUE,
 86 | #'     anticipation_effects_normalization = TRUE
 87 | #'   )
 88 | #'
 89 | #' summary(eventstudy_model_dyn$output)
 90 | #'
 91 | #' # A static model
 92 | #' eventstudy_model_static <-
 93 | #'   EventStudy(
 94 | #'     estimator = "OLS",
 95 | #'     data = example_data,
 96 | #'     outcomevar = "y_jump_m",
 97 | #'     policyvar = "z",
 98 | #'     idvar = "id",
 99 | #'     timevar = "t",
100 | #'     FE = TRUE, TFE = TRUE,
101 | #'     post = 0, overidpost = 0,
102 | #'     pre  = 0, overidpre  = 0,
103 | #'     cluster = TRUE
104 | #'   )
105 | #'
106 | #' summary(eventstudy_model_static$output)
107 | #'
108 | #' # A dynamic model with an unbalanced panel
109 | #' data_unbal <- example_data[1:(nrow(example_data)-1),]  # drop last row to make unbalanced
110 | #'
111 | #' eventstudy_model_unbal <-
112 | #'  EventStudy(
113 | #'     estimator = "OLS",
114 | #'     data = data_unbal,
115 | #'     outcomevar = "y_base",
116 | #'     policyvar = "z",
117 | #'     idvar = "id",
118 | #'     timevar = "t",
119 | #'     pre = 0, post = 3,
120 | #'     normalize = -1
121 | #'   )
122 | #'
123 | #' summary(eventstudy_model_unbal$output)
124 | #'
125 | #' # A dynamic model estimated using IV
126 | #' eventstudy_model_iv <-
127 | #'   EventStudy(
128 | #'     estimator = "FHS",
129 | #'     data = example_data,
130 | #'     outcomevar = "y_base",
131 | #'     policyvar = "z",
132 | #'     idvar = "id",
133 | #'     timevar = "t",
134 | #'     proxy = "x_r",
135 | #'     FE = TRUE, TFE = TRUE,
136 | #'     post = 2, overidpost = 1,
137 | #'     pre  = 0, overidpre  = 3,
138 | #'     normalize = -1,
139 | #'     cluster = TRUE
140 | #'   )
141 | #'
142 | #' summary(eventstudy_model_iv$output)
143 | #'
144 | 
145 | EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL,
146 |                        proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre,
147 |                        normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE,
148 |                        allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) {
149 | 
150 |     # Check for errors in arguments
151 |     if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")}
152 |     if (! is.data.frame(data))            {stop("data should be a data frame.")}
153 |     for (var in c(idvar, timevar, outcomevar, policyvar)) {
154 |         if ((! is.character(var))) {
155 |             stop(paste0(var, " should be a character."))
156 |         }
157 |         if (! var %in% colnames(data)) {
158 |             stop(paste0(var, " should be the name of a variable in the dataset."))
159 |         }
160 |     }
161 |     if (! (is.null(controls) | is.character(controls))) {stop("controls should be either NULL or a character.")}
162 | 
163 |     if ((estimator == "OLS" & ! is.null(proxy)))        {stop("proxy should only be specified when estimator = 'FHS'.")}
164 |     if ((estimator == "FHS" & ! is.character(proxy)))   {stop("proxy should be a character.")}
165 |     if ((estimator == "OLS" & ! is.null(proxyIV)))      {stop("proxyIV should only be specified when estimator = 'FHS'.")}
166 |     if ((estimator == "FHS" &
167 |         ! is.null(proxyIV) & ! is.character(proxyIV)))  {stop("proxyIV should be a character.")}
168 |     if (estimator == "FHS" & pre == 0 & overidpre == 0 & is.null(proxyIV)) {
169 |         stop("When estimator is 'FHS' and there are no leads in the model, proxyIV must be specified explicitly.")
170 |     }
171 | 
172 |     for (var in c(FE, TFE, cluster, anticipation_effects_normalization, allow_duplicate_id, avoid_internal_copy)) {
173 |         if (! is.logical(var)) {
174 |             stop(paste0(var, " should be either TRUE or FALSE."))
175 |         }
176 |     }
177 | 
178 |     if (FE & !cluster)         {stop("cluster=TRUE is required when FE=TRUE.")}
179 | 
180 |     if (! (is.numeric(post)       &  post >= 0      &  post %% 1 == 0))           {stop("post should be a whole number.")}
181 |     if (! (is.numeric(overidpost) & overidpost >= 0 & overidpost %% 1 == 0))      {stop("overidpost should be a whole number.")}
182 |     if (! (is.numeric(pre)        &  pre >= 0       &  pre %% 1 == 0))            {stop("pre should be a whole number.")}
183 |     if (! (is.numeric(overidpre)  & overidpre >= 0  & overidpre %% 1 == 0))       {stop("overidpre should be a whole number.")}
184 |     if (normalize == 0 & post == 0 & overidpost == 0 & pre == 0 & overidpre == 0) {stop("normalize cannot be zero when post = overidpost = pre = overidpre = 0.")}
185 |     if (! (is.numeric(normalize) & normalize %% 1 == 0
186 |            & normalize >= -(pre + overidpre + 1) & normalize <= post + overidpost)) {
187 |         stop("normalize should be an integer between -(pre + overidpre + 1) and (post + overidpost).")
188 |     }
189 |     if (avoid_internal_copy & ! data.table::is.data.table(data)) {
190 |         warning("`avoid_internal_copy` has no effect because dataset passed to `data` is not a `data.table`.")
191 |     }
192 | 
193 |     # Check for errors in data
194 |     if (! is.numeric(data[[timevar]])) {stop("timevar column in dataset should be numeric.")}
195 |     if (! all(data[[timevar]] %% 1 == 0)) {
196 |         stop("timevar column in dataset should be a vector of integers.")
197 |     }
198 | 
199 |     if (data.table::is.data.table(data)) {
200 |         if (!avoid_internal_copy) {
201 |             data <- data.table::copy(data)
202 |         }
203 |     } else {
204 |         data <- data.table::as.data.table(data)
205 |     }
206 |     data.table::setorderv(data, c(idvar, timevar))
207 |     data_ids <- data[, .SD, .SDcols = c(idvar, timevar)]
208 | 
209 |     # Check panel balance and unique keys
210 |     n_units       <- length(base::unique(data[[idvar]]))
211 |     n_periods     <- length(base::unique(data[[timevar]]))
212 |     n_unique_rows <- nrow(data[!base::duplicated(data_ids),])
213 |     if (n_unique_rows != n_units*n_periods) {
214 |         warning("Dataset is unbalanced.")
215 |         unbalanced <- TRUE
216 |     } else {
217 |         unbalanced <- FALSE
218 |     }
219 |     if (n_unique_rows != nrow(data)) {
220 |         if (allow_duplicate_id == TRUE) {
221 |             warning("idvar-timevar pairs do not uniquely identify all rows in the data.")
222 |         } else if (allow_duplicate_id == FALSE) {
223 |             stop("idvar-timevar pairs do not uniquely identify all rows in the data. Turn on allow_duplicate_id if you want to proceed with weighted duplicated rows.")
224 |         }
225 |     }
226 | 
227 |     detect_holes <- function(dt, idvar, timevar) {
228 |         holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar)
229 |                          ][, list(holes = any(base::diff(base::get(timevar)) != 1)),
230 |                             by = c(idvar)]
231 | 
232 |         return(any(holes_per_id$holes))
233 |     }
234 | 
235 |     if (detect_holes(data, idvar, timevar)) {
236 |         warning(paste0("Note: gaps of more than one unit in the time variable '", timevar, "' were detected. ",
237 |                        "Treating these as gaps in the panel dimension."))
238 |         timevar_holes <- TRUE
239 |     } else {
240 |         timevar_holes <- FALSE
241 |     }
242 | 
243 |     if (post == 0 & overidpost == 0 & pre == 0 & overidpre == 0) {
244 |         static <- TRUE
245 |     } else {
246 |         static <- FALSE
247 |     }
248 | 
249 |     num_evenstudy_coeffs <- overidpre + pre + post + overidpost
250 |     num_periods          <- max(data[[timevar]], na.rm = T) - min(data[[timevar]], na.rm = T)
251 |     if  (num_evenstudy_coeffs > num_periods - 1) {stop("overidpre + pre + post + overidpost cannot exceed the data window.")}
252 | 
253 |     for (tag in c("_fd", "_lead", "_lag")) {
254 |         if  (sum(grepl(paste0(policyvar, tag), colnames(data))) > 0) {
255 |             warning(paste0("Variables starting with ", policyvar, tag,
256 |                            " should be reserved for usage by eventstudyr."))
257 |         }
258 |     }
259 | 
260 |     # Compute shifts in policy variable
261 |     num_fd_lags  <- post + overidpost - 1
262 |     num_fd_leads <- pre  + overidpre
263 | 
264 |     furthest_lag_period <- num_fd_lags + 1
265 | 
266 |     if (static) {
267 |         message("post, overidpost, pre, and overidpre are set to 0. A static model will be estimated.")
268 |     } else {
269 |         data <- ComputeFirstDifferences(data, idvar, timevar, policyvar, timevar_holes)
270 | 
271 |         if ((post + overidpost - 1 >= 1) & (pre + overidpre >= 1)) {
272 |             shift_values = c(-num_fd_leads:-1, 1:num_fd_lags)
273 |         } else if (pre + overidpre < 1) {
274 |             shift_values = 1:num_fd_lags
275 |         } else if (post + overidpost - 1 < 1) {
276 |             shift_values = -num_fd_leads:-1
277 |         }
278 | 
279 |         data <- ComputeShifts(data, idvar, timevar,
280 |                               shiftvar    = paste0(policyvar, "_fd"),
281 |                               shiftvalues = shift_values,
282 |                               timevar_holes = timevar_holes)
283 |     }
284 | 
285 |     if (!static) {
286 |         data <- ComputeShifts(data, idvar, timevar,
287 |                               shiftvar    = policyvar,
288 |                               shiftvalues = c(-num_fd_leads, furthest_lag_period),
289 |                               timevar_holes = timevar_holes)
290 | 
291 |         lead_endpoint_var <- paste0(policyvar, "_lead", num_fd_leads)
292 |         data[, (lead_endpoint_var) := 1 - get(lead_endpoint_var)]
293 |     }
294 | 
295 |     if (pre != 0 & normalize == -1 & anticipation_effects_normalization) {
296 |         normalize <- -pre - 1
297 |         warning(paste("You allowed for anticipation effects", pre,
298 |                       "periods before the event, so the coefficient at", normalize,
299 |                       "was selected to be normalized to zero.",
300 |                       "To override this, change anticipation_effects_normalization to FALSE."))
301 |     }
302 | 
303 |     if (normalize < 0) {
304 |         if (normalize == -(pre + overidpre + 1)) {
305 |            normalization_column <- paste0(policyvar, "_lead", (-1 * (normalize + 1)))
306 |         } else {
307 |             normalization_column <- paste0(policyvar, "_fd_lead", (-1 * normalize))
308 |         }
309 |     } else if (normalize == 0){
310 |         if (normalize == post + overidpost) {
311 |             normalization_column <- paste0(policyvar, "_lag", (normalize))
312 |         } else {
313 |             normalization_column <- paste0(policyvar, "_fd")
314 |         }
315 |     } else {
316 |         if (normalize == post + overidpost) {
317 |             normalization_column <- paste0(policyvar, "_lag", (normalize))
318 |         } else {
319 |             normalization_column <- paste0(policyvar, "_fd_lag", (normalize))
320 |         }
321 |     }
322 | 
323 |     if (static) {
324 |         str_policy_vars = policyvar
325 |     } else {
326 |         all_vars <- names(data)[grepl(policyvar, names(data))]
327 | 
328 |         lead_endpoint_var <- all_vars[grepl(paste0("^", policyvar, "_lead"), all_vars)]
329 |         lead_fd_vars      <- all_vars[grepl(paste0("^", policyvar, "_fd_lead"), all_vars)]
330 |         fd_var            <- paste0(policyvar, "_fd")
331 |         lag_fd_vars       <- all_vars[grepl(paste0("^", policyvar, "_fd_lag"), all_vars)]
332 |         lag_endpoint_var  <- all_vars[grepl(paste0("^", policyvar, "_lag"), all_vars)]
333 | 
334 |         str_policy_vars <- c(lead_endpoint_var, lead_fd_vars, fd_var, lag_fd_vars, lag_endpoint_var)
335 |         str_policy_vars <- str_policy_vars[!(str_policy_vars %in% normalization_column)]
336 |     }
337 | 
338 |     if (estimator == "OLS") {
339 |         event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
340 |                                                    static, controls, proxy, proxyIV)
341 | 
342 |         output       <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster)
343 |         coefficients <- str_policy_vars
344 |     }
345 |     if (estimator == "FHS") {
346 | 
347 |         if (is.null(proxyIV)) {
348 |             Fstart <- 0
349 |             str_fd_leads <- str_policy_vars[grepl("^z_fd_lead", str_policy_vars)]
350 | 
351 |             for (var in str_fd_leads) {
352 |                 lm <- lm(data = data, formula = stats::reformulate(termlabels = var, response = proxy))
353 |                 Floop <- summary(lm)$fstatistic["value"]
354 |                 if (Floop > Fstart) {
355 |                     Fstart <- Floop
356 |                     proxyIV <- var
357 |                 }
358 |             }
359 |             message(paste0("Defaulting to strongest lead of differenced policy variable: proxyIV = ", proxyIV,
360 |                            ". To specify a different proxyIV use the proxyIV argument."))
361 |         }
362 | 
363 |         event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars,
364 |                                                    static, controls, proxy, proxyIV)
365 | 
366 |         output       <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster)
367 |         coefficients <- dplyr::setdiff(str_policy_vars, proxyIV)
368 |     }
369 | 
370 |     event_study_args <- list("estimator"  = estimator,
371 |                              "data"       = data,
372 |                              "outcomevar" = outcomevar,
373 |                              "policyvar"  = policyvar,
374 |                              "idvar"      = idvar,
375 |                              "timevar"    = timevar,
376 |                              "controls"   = controls,
377 |                              "proxy"      = proxy,
378 |                              "proxyIV"    = proxyIV,
379 |                              "FE"         = FE,
380 |                              "TFE"        = TFE,
381 |                              "post"       = post,
382 |                              "overidpost" = overidpost,
383 |                              "pre"        = pre,
384 |                              "overidpre"  = overidpre,
385 |                              "normalize"  = normalize,
386 |                              "normalization_column"    = normalization_column,
387 |                              "cluster"                 = cluster,
388 |                              "eventstudy_coefficients" = coefficients)
389 | 
390 |     return(list("output"    = output,
391 |                 "arguments" = event_study_args))
392 | }
393 | 


--------------------------------------------------------------------------------