├── .github ├── .gitignore └── workflows │ └── check-standard.yaml ├── vignettes ├── .gitignore └── documentation.Rmd ├── LICENSE ├── tests ├── testthat │ ├── input │ │ ├── .gitignore │ │ ├── pvalues.txt │ │ ├── df_test_base_STATA_allzero.csv │ │ ├── df_test_AddCI.csv │ │ ├── make.log │ │ ├── df_test.csv │ │ ├── df_test_base_STATA.csv │ │ ├── readme.MD │ │ ├── make.py │ │ ├── df_test_base_STATA_FHS.csv │ │ └── code │ │ │ ├── build_OLS.do │ │ │ └── build_FHS.do │ ├── test-AddSmPath.R │ ├── test-ComputeFirstDifferences.R │ ├── test-AddMeans.R │ ├── test-AddSuptBand.R │ ├── test-PrepareModelFormula.R │ ├── test-TestLinear.R │ ├── test-ComputeShifts.R │ ├── test-AddCIs.R │ ├── test-PreparePlottingData.R │ ├── test-EventStudyPlot.R │ ├── test-EventStudyOLS.R │ └── test-EventStudyFHS.R └── testthat.R ├── R ├── .gitattributes ├── sysdata.rda ├── data.R ├── AddMeans.R ├── AddCIs.R ├── ComputeFirstDifferences.R ├── AddSuptBand.R ├── ComputeShifts.R ├── AddSmPath.R ├── PrepareModelFormula.R ├── EventStudyOLS.R ├── TestLinear.R ├── EventStudyFHS.R ├── PreparePlottingData.R ├── SmPathHelpers.R ├── EventStudyPlot.R └── EventStudy.R ├── data ├── .gitattributes └── example_data.rda ├── man ├── figures │ ├── logo.png │ └── README-example-1.png ├── example_data.Rd ├── TestLinear.Rd ├── EventStudyPlot.Rd └── EventStudy.Rd ├── examples └── source │ └── raw │ └── eventstudy_illustration_data │ ├── orig │ ├── simulation_data_static.dta │ └── simulation_data_dynamic.dta │ └── README.md ├── .Rbuildignore ├── data-raw ├── example_data.R └── save_internal_data.R ├── .gitattributes ├── .gitignore ├── inst └── CITATION ├── cran-comments.md ├── EventStudyR.Rproj ├── NAMESPACE ├── LICENSE.md ├── LICENSE.note ├── DESCRIPTION ├── README.md └── README.Rmd /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2023 2 | COPYRIGHT HOLDER: JMSLab 3 | -------------------------------------------------------------------------------- /tests/testthat/input/.gitignore: -------------------------------------------------------------------------------- 1 | !df_test*.csv 2 | 3 | -------------------------------------------------------------------------------- /R/.gitattributes: -------------------------------------------------------------------------------- 1 | *.rda -filter=lfs -diff=lfs -merge=lfs -text 2 | -------------------------------------------------------------------------------- /data/.gitattributes: -------------------------------------------------------------------------------- 1 | *.rda -filter=lfs -diff=lfs -merge=lfs -text 2 | -------------------------------------------------------------------------------- /tests/testthat/input/pvalues.txt: -------------------------------------------------------------------------------- 1 | .8270326624995562 2 | .6923175802538313 3 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JMSLab/eventstudyr/HEAD/R/sysdata.rda -------------------------------------------------------------------------------- /data/example_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JMSLab/eventstudyr/HEAD/data/example_data.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(eventstudyr) 3 | 4 | test_check("eventstudyr") 5 | -------------------------------------------------------------------------------- /tests/testthat/input/df_test_base_STATA_allzero.csv: -------------------------------------------------------------------------------- 1 | "","coef","std_error" 2 | "z",".1045735",".0575732" 3 | -------------------------------------------------------------------------------- /tests/testthat/input/df_test_AddCI.csv: -------------------------------------------------------------------------------- 1 | term,estimate,std.error 2 | z_fd,1,1 3 | z_fd_lag1,-2,1 4 | z_fd_lead1,2,1 5 | -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8500ef3719d6f69e213f7f85dd45ec8af8fe3c8e977b1b51e389ccffc1fbf020 3 | size 21970 4 | -------------------------------------------------------------------------------- /man/figures/README-example-1.png: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:249d7897c569857a28cef3387690dbf5caafa7ae89b3abb19d776ec19eb5c0c4 3 | size 4389 4 | -------------------------------------------------------------------------------- /tests/testthat/input/make.log: -------------------------------------------------------------------------------- 1 | This file was created by make.py 2 | Current date: 2023-03-19 -- 11:41:34 3 | 4 | Execution of code\build_FHS.do completed 5 | Execution of code\build_OLS.do completed 6 | -------------------------------------------------------------------------------- /examples/source/raw/eventstudy_illustration_data/orig/simulation_data_static.dta: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:c19a432e3e1967bf53b6d521ef2a46a45c420fa04cf0cf29aa09238ab3eaf867 3 | size 45415 4 | -------------------------------------------------------------------------------- /examples/source/raw/eventstudy_illustration_data/orig/simulation_data_dynamic.dta: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:d42e65f17cb4ef6a7c46874db4db3c9450f71e6a396aa5993f738d99724c271b 3 | size 105895 4 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^eventstudyr\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.github$ 4 | ^LICENSE\.md$ 5 | ^issue[0-9]+$ 6 | ^examples$ 7 | ^data-raw$ 8 | ^vignettes/eventstudyr.Rmd$ 9 | ^cran-comments\.md$ 10 | ^README\.Rmd$ 11 | ^revdep$ 12 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Sample dataset obtained from the replication archive for Freyaldenhoven et al. (2021) 2 | #' 3 | #' @source Dataset in .dta format can be found in the .zip archive in \url{https://data.nber.org/data-appendix/w29170/} 4 | "example_data" 5 | 6 | -------------------------------------------------------------------------------- /data-raw/example_data.R: -------------------------------------------------------------------------------- 1 | 2 | library(haven) 3 | 4 | indir <- 'examples/source/raw/eventstudy_illustration_data/orig' 5 | 6 | example_data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) 7 | usethis::use_data(example_data, overwrite = TRUE, version = 3) 8 | -------------------------------------------------------------------------------- /tests/testthat/input/df_test.csv: -------------------------------------------------------------------------------- 1 | country,values,periods 2 | A,0.7838108576834202,1 3 | A,0.1423108058515936,2 4 | A,0.8868071401957422,3 5 | A,0.9385397769510746,4 6 | B,0.921952681383118,1 7 | B,0.7619687209371477,2 8 | B,0.8936178737785667,3 9 | B,0.6012312234379351,4 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.dta filter=lfs diff=lfs merge=lfs -text 2 | *.RData filter=lfs diff=lfs merge=lfs -text 3 | *.rda filter=lfs diff=lfs merge=lfs -text 4 | *.rds filter=lfs diff=lfs merge=lfs -text 5 | *.pdf filter=lfs diff=lfs merge=lfs -text 6 | *.png filter=lfs diff=lfs merge=lfs -text 7 | *.xlsx filter=lfs diff=lfs merge=lfs -text 8 | -------------------------------------------------------------------------------- /tests/testthat/input/df_test_base_STATA.csv: -------------------------------------------------------------------------------- 1 | "","coef","std_error" 2 | "zfd",".0774304",".1290923" 3 | "x_r","-.0522901",".02916" 4 | "L.zfd",".3167293",".1759148" 5 | "L2.zfd",".2994021",".1529249" 6 | "F2.zfd",".0770573",".1660454" 7 | "F3.zfd",".1157483",".1619981" 8 | "F3.z","-.1641421",".1424652" 9 | "L3.z",".2121165",".1309685" 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /drive 2 | /datastore 3 | /temp 4 | *.pyc 5 | *.lyx~ 6 | *.lyx# 7 | *.RDS 8 | *.RData 9 | *.Rhistory 10 | *.Rapp.history 11 | *.DS_Store 12 | *.aux 13 | *.fls 14 | *.lof 15 | *.lot 16 | *.nav 17 | *.snm 18 | *.toc 19 | *.out 20 | *.svn 21 | *.m~ 22 | .Rproj.user 23 | !data/*.RData 24 | !data/*.rda 25 | *.csv 26 | inst/doc 27 | revdep/ 28 | *.Rproj 29 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite eventstudyr in publications use:") 2 | 3 | bibentry( 4 | bibtype = "Misc", 5 | title = "eventstudyr package", 6 | author = "Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, Jesse M. Shapiro, Veli M. Andirin, Richard Calvo, Santiago Hermo, Nathan Schor, Emily Wang", 7 | url = "https://github.com/JMSLab/eventstudyr", 8 | year = 2023 9 | ) 10 | -------------------------------------------------------------------------------- /tests/testthat/input/readme.MD: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | This directory contains input data for unit tests. 4 | 5 | - The `/code/` folder stores `.do` files that construct inputs for unit tests. 6 | - The code uses data from the `/examples/` folder. 7 | 8 | To build the input folder run `make.py`. 9 | 10 | ### Requirements 11 | 12 | To run the code you need: 13 | 14 | - STATA compiler 15 | - Python compiler 16 | - `estout` package for STATA 17 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Notes 2 | 3 | Re-submission of the package with patch update. 4 | 5 | ## R CMD check results 6 | 7 | 0 errors | 0 warnings | 0 notes 8 | 9 | ## revdepcheck results 10 | 11 | We checked 0 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package. 12 | 13 | * We saw 0 new problems 14 | * We failed to check 0 packages 15 | 16 | ## Package changes 17 | 18 | * Implemented patch updates for compatibility with `ggplot2` 3.6.0 19 | -------------------------------------------------------------------------------- /EventStudyR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 4 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /examples/source/raw/eventstudy_illustration_data/README.md: -------------------------------------------------------------------------------- 1 | ### Overview 2 | This directory includes sample datasets downloaded from the replication archive for [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170) 3 | 4 | ### Source 5 | NBER 6 | 7 | ### When/where obtained & original form of files 8 | 9 | Zip archive with datasets was downloaded from [this page](https://data.nber.org/data-appendix/w29170/) on February 2, 2022 by Veli Murat Andirin. 10 | 11 | ### Description 12 | ./orig: Sample datasets as `.dta` files. 13 | 14 | ./docs: Screenshot of the download page. 15 | 16 | -------------------------------------------------------------------------------- /man/example_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{example_data} 5 | \alias{example_data} 6 | \title{Sample dataset obtained from the replication archive for Freyaldenhoven et al. (2021)} 7 | \format{ 8 | An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 2000 rows and 12 columns. 9 | } 10 | \source{ 11 | Dataset in .dta format can be found in the .zip archive in \url{https://data.nber.org/data-appendix/w29170/} 12 | } 13 | \usage{ 14 | example_data 15 | } 16 | \description{ 17 | Sample dataset obtained from the replication archive for Freyaldenhoven et al. (2021) 18 | } 19 | \keyword{datasets} 20 | -------------------------------------------------------------------------------- /tests/testthat/input/make.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import time 4 | 5 | with open('make.log', 'w') as f: 6 | f.write('This file was created by make.py\n') 7 | f.write(f'Current date: {time.strftime("%Y-%m-%d -- %H:%M:%S")}\n\n') 8 | 9 | do_files = glob.glob('code/*.do') 10 | 11 | stata = 'StataSE-64' 12 | 13 | log_files = glob.glob('log/*.log') 14 | for ff in log_files: 15 | os.remove(ff) 16 | 17 | for ff in do_files: 18 | os.system(f'{stata} /e {ff}') 19 | 20 | with open('make.log', 'a') as f: 21 | f.write(f'Execution of {ff} completed\n') 22 | 23 | ff_base = os.path.basename(ff) 24 | ff_log = ff_base.replace('.do', '.log') 25 | 26 | os.rename(f'{ff_log}', f'log/{ff_log}') 27 | 28 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(EventStudy) 4 | export(EventStudyPlot) 5 | export(TestLinear) 6 | import(dplyr) 7 | import(estimatr) 8 | import(ggplot2) 9 | import(stringr) 10 | importFrom(MASS,mvrnorm) 11 | importFrom(car,linearHypothesis) 12 | importFrom(data.table,":=") 13 | importFrom(data.table,.SD) 14 | importFrom(data.table,CJ) 15 | importFrom(data.table,as.data.table) 16 | importFrom(data.table,copy) 17 | importFrom(data.table,is.data.table) 18 | importFrom(data.table,setnames) 19 | importFrom(data.table,setorder) 20 | importFrom(data.table,setorderv) 21 | importFrom(data.table,shift) 22 | importFrom(pracma,inv) 23 | importFrom(pracma,pinv) 24 | importFrom(rlang,.data) 25 | importFrom(stats,as.formula) 26 | importFrom(stats,pnorm) 27 | importFrom(stats,qchisq) 28 | importFrom(stats,qnorm) 29 | importFrom(stats,reformulate) 30 | importFrom(stats,setNames) 31 | -------------------------------------------------------------------------------- /tests/testthat/input/df_test_base_STATA_FHS.csv: -------------------------------------------------------------------------------- 1 | "","coef","std_error","coef","std_error","coef","std_error","coef","std_error","coef","std_error","coef","std_error" 2 | "eta_m","1.669717","2.2977","1.948522","2.130648","1.634839","2.282194","1.634839","2.172034","1.94113","2.235042","1.94113","2.076632" 3 | "x_r","-.0500838",".028236","-.0533449",".0277989","-.0488262",".0323695","-.0488262",".0295303","-.0524275",".0322331","-.0524275",".0286475" 4 | "z_lead3",".0456388",".2185155",".0980764",".2460766",".0703236",".2541796",".0703236",".2390053",".1105664",".2566083",".1105664",".2469213" 5 | "z_fd_lead2",".020128",".1406527",".0128612",".1433497",".0163729",".1297879",".0163729",".1410474",".0091793",".1303523",".0091793",".1437379" 6 | "z_fd",".4703923",".6147443",".4809733",".5654271",".4654961",".6314258",".4654961",".5886608",".4814239",".6164001",".4814239",".553323" 7 | "z_fd_lag1","1.039846","1.085427","1.183739","1.008607","1.033747","1.082208","1.033747","1.037399","1.187588","1.060234","1.187588",".9880124" 8 | "z_fd_lag2","1.355806","1.53003","1.547324","1.410691","1.343855","1.536298","1.343855","1.457308","1.549578","1.504329","1.549578","1.376885" 9 | "z_lag3","1.889111","2.380558","2.202123","2.2346","1.870236","2.412257","1.870236","2.299706","2.195865","2.363999","2.195865","2.19404" 10 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 JMSLab 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | # Licenses for incorporated dependencies 24 | 25 | * [suptCriticalValue](https://github.com/ryanedmundkessler/suptCriticalValue/blob/c1dd6b2e9aaeaa7990efa2c857e76806d23bea9e/LICENSE) 26 | -------------------------------------------------------------------------------- /LICENSE.note: -------------------------------------------------------------------------------- 1 | The eventstudyr package as a whole is distributed under MIT. The eventstudyr package includes one other open source software component that is distributed under MIT. The component is a GitHub repository called suptCriticalValue 2 | 3 | A copy of the license agreements used by this component is included below. 4 | 5 | MIT License 6 | 7 | Copyright (c) 2021 Ryan Kessler 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | -------------------------------------------------------------------------------- /tests/testthat/input/code/build_OLS.do: -------------------------------------------------------------------------------- 1 | clear all 2 | version 15 3 | set more off 4 | 5 | program main 6 | local indir "../../../examples/source/raw/eventstudy_illustration_data/orig" 7 | local outdir "." 8 | 9 | use "`indir'/simulation_data_dynamic.dta", clear 10 | 11 | 12 | local G 1 13 | local LG 2 14 | local M 1 15 | local LM 2 16 | local furthest_lead = `G' + `LG' 17 | local furthest_lag = `M' + `LM' 18 | local M_minus1 = `M' - 1 19 | local furthest_lag_minus1 = `furthest_lag' - 1 20 | 21 | xtset id t 22 | gen zfd = d.z 23 | 24 | local leads 25 | forvalues i = 2/`furthest_lead' { 26 | local leads `leads' f`i'.zfd 27 | } 28 | display "`leads'" 29 | local lags 30 | forvalues i = 1/`furthest_lag_minus1' { 31 | local lags `lags' l`i'.zfd 32 | } 33 | 34 | eststo: reg y_base zfd x_r `lags' `leads' f`furthest_lead'.z l`furthest_lag'.z i.t i.id, vce(cluster id) 35 | 36 | esttab * using "`outdir'/df_test_base_STATA.csv", csv plain wide noobs /// 37 | keep(x_r zfd L.zfd L2.zfd F2.zfd F3.zfd F3.z L3.z) /// 38 | replace se nomtitles collabels("coef" "std_error") 39 | 40 | eststo clear 41 | eststo: reg y_base z i.t i.id, vce(cluster id) 42 | 43 | esttab * using "`outdir'/df_test_base_STATA_allzero.csv", csv plain wide noobs /// 44 | keep(z) /// 45 | replace se nomtitles collabels("coef" "std_error") 46 | 47 | end 48 | 49 | 50 | main 51 | -------------------------------------------------------------------------------- /.github/workflows/check-standard.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | -------------------------------------------------------------------------------- /tests/testthat/test-AddSmPath.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("AddZerosCovar correctly adds zeros where normalized coefficient should be", { 3 | 4 | vcov_matrix_all <- diag(rep(3,10)) 5 | eventstudy_coeffs <- c(paste0("a", 1:2), paste0("a", 4:9)) 6 | rownames(vcov_matrix_all) <- c(eventstudy_coeffs, "a10", "a11") 7 | colnames(vcov_matrix_all) <- c(eventstudy_coeffs, "a10", "a11") 8 | 9 | norm_column <- "a3" 10 | coeffs_oder <- paste0("a", 1:9) 11 | 12 | expected_matrix <- diag(c(rep(3,2), 0, rep(3,6))) 13 | rownames(expected_matrix) <- paste0("a", 1:9) 14 | colnames(expected_matrix) <- paste0("a", 1:9) 15 | 16 | covar <- AddZerosCovar(vcov_matrix_all, eventstudy_coeffs, norm_column, coeffs_oder) 17 | 18 | expect_equal(covar, expected_matrix) 19 | }) 20 | 21 | test_that("AddZerosCovar correctly adds zeros where normalized coefficient should be with 2 normalizations", { 22 | 23 | vcov_matrix_all <- diag(rep(3,9)) 24 | eventstudy_coeffs <- c(paste0("a", 1:2), paste0("a", 4), paste0("a", 6:10)) 25 | rownames(vcov_matrix_all) <- c(eventstudy_coeffs, "a11") 26 | colnames(vcov_matrix_all) <- c(eventstudy_coeffs, "a11") 27 | 28 | norm_column <- c("a3", "a5") 29 | coeffs_oder <- paste0("a", 1:10) 30 | 31 | expected_matrix <- diag(c(rep(3,2), 0, 3, 0, rep(3,5))) 32 | rownames(expected_matrix) <- paste0("a", 1:10) 33 | colnames(expected_matrix) <- paste0("a", 1:10) 34 | 35 | covar <- AddZerosCovar(vcov_matrix_all, eventstudy_coeffs, norm_column, coeffs_oder) 36 | 37 | expect_equal(covar, expected_matrix) 38 | }) 39 | -------------------------------------------------------------------------------- /R/AddMeans.R: -------------------------------------------------------------------------------- 1 | #' Adds the mean of the outcome variable for unit-time pairs where the variable whose coefficient is normalized is non-zero. 2 | #' 3 | #' @param df Data frame containing the additional variables created for estimation. 4 | #' @param normalization_column Character indicating variable whose coefficient will be normalized. 5 | #' @param policyvar Character indicating column of policy variable z. 6 | #' @param outcomevar Character indicating column of outcome variable y. 7 | #' @param custom_scalar Scalar to be displayed along y = 0 line if the user doesn't want the mean computed by the package. 8 | #' @keywords internal 9 | #' @noRd 10 | 11 | AddMeans <- function(df, normalization_column, policyvar, outcomevar, 12 | custom_scalar = NULL) { 13 | 14 | if (! is.data.frame(df)) {stop("df should be a data frame.")} 15 | if (! is.character(outcomevar)) {stop("outcomevar should be a character.")} 16 | if (! is.character(policyvar)) {stop("policyvar should be a character.")} 17 | if (! is.character(normalization_column)) {stop("normalization_column should be a character.")} 18 | if (! (is.numeric(custom_scalar) | is.null(custom_scalar))) {stop("custom_scalar should be numeric.")} 19 | 20 | if (is.null(custom_scalar)) { 21 | if (grepl(paste0(policyvar, "_lead"), normalization_column)) { 22 | df_change <- df[get(normalization_column) == 0 & !is.na(get(normalization_column))] 23 | } else { 24 | df_change <- df[get(normalization_column) != 0 & !is.na(get(normalization_column))] 25 | } 26 | mean <- df_change[, mean(get(outcomevar), na.rm = TRUE)] 27 | } else { 28 | mean <- custom_scalar 29 | } 30 | 31 | return(mean) 32 | } 33 | 34 | -------------------------------------------------------------------------------- /man/TestLinear.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/TestLinear.R 3 | \name{TestLinear} 4 | \alias{TestLinear} 5 | \title{Perform Tests of Linear Hypotheses} 6 | \usage{ 7 | TestLinear( 8 | estimates, 9 | test = NA, 10 | test_name = "User Test", 11 | pretrends = TRUE, 12 | leveling_off = TRUE 13 | ) 14 | } 15 | \arguments{ 16 | \item{estimates}{A list of length 2 containing estimation results and model information. 17 | Should be an output of \code{\link[=EventStudy]{EventStudy()}}.} 18 | 19 | \item{test}{The hypothesis to be estimated. Accepts inputs that can be passed to 20 | hypothesis.matrix argument in \code{car::linearHypothesis()}.} 21 | 22 | \item{test_name}{Name for test input by user. Defaults to "User Test."} 23 | 24 | \item{pretrends}{If TRUE, uses pre and overidpre from estimates to test for pre-trends. 25 | Defaults to TRUE.} 26 | 27 | \item{leveling_off}{If TRUE, uses post and overidpost from estimates to test for leveling-off. 28 | Defaults to TRUE.} 29 | } 30 | \value{ 31 | A data frame containing the F-statistic and p-value for the specified test(s). 32 | } 33 | \description{ 34 | \code{TestLinear} tests linear restrictions on coefficients. 35 | } 36 | \examples{ 37 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 38 | policyvar = "z", idvar = "id", timevar = "t", 39 | controls = "x_r", FE = TRUE, TFE = TRUE, 40 | post = 3, pre = 2, overidpre = 4, overidpost = 5, 41 | normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 42 | 43 | TestLinear( 44 | estimates, 45 | test = "z_fd_lag1 = z_fd", 46 | test_name = "Hypothesis Test", 47 | pretrends = TRUE, 48 | leveling_off = TRUE 49 | ) 50 | 51 | } 52 | -------------------------------------------------------------------------------- /tests/testthat/test-ComputeFirstDifferences.R: -------------------------------------------------------------------------------- 1 | library(data.table) 2 | 3 | test_that("correctly recognizes wrong variable type for grouping variable", { 4 | dt_test <- data.table::fread("./input/df_test.csv") 5 | 6 | expect_error(ComputeFirstDifferences(dt_test, country, "periods", "values")) 7 | }) 8 | 9 | test_that("correctly recognizes wrong variable type for diffvar variable", { 10 | dt_test <- data.table::fread("./input/df_test.csv") 11 | 12 | expect_error(ComputeFirstDifferences(dt_test, "country", "periods", values)) 13 | }) 14 | 15 | test_that("a column with _fd suffix is added", { 16 | dt_test <- data.table::fread("./input/df_test.csv") 17 | 18 | df_diff <- ComputeFirstDifferences(dt_test, "country", "periods", "values") 19 | 20 | expect_true("values_fd" %in% colnames(df_diff)) 21 | }) 22 | 23 | test_that("correctly computes differences with a balanced dataset", { 24 | df <- data.table( 25 | id = rep(c("A", "B"), each = 4), 26 | time = rep(1:4, times = 2), 27 | policy = c(10, 12, 11, 13, 8, 9, 10, 11), 28 | policy_fd_expected = c(NA, 2, -1, 2, NA, 1, 1, 1) 29 | ) 30 | 31 | df_out <- ComputeFirstDifferences(df, "id", "time", "policy", timevar_holes = FALSE) 32 | 33 | expect_identical(df_out$policy_fd, df$policy_fd_expected) 34 | }) 35 | 36 | test_that("correctly computes differences with a dataset that has holes in time var", { 37 | df <- data.table( 38 | id = c(rep("A", 4), rep("B", 2), rep("C", 3)), 39 | time = c(1, 2, 4, 5, 2, 3, 2, 3, 4), 40 | policy = c(10, 12, 13, 14, 8, 9, 10, 11, 12), 41 | policy_fd_expected = c(NA, 2, NA, 1, NA, 1, NA, 1, 1) 42 | ) 43 | 44 | df_out <- ComputeFirstDifferences(df, "id", "time", "policy", 45 | timevar_holes = TRUE) 46 | 47 | expect_identical(df_out$policy_fd, df$policy_fd_expected) 48 | }) 49 | -------------------------------------------------------------------------------- /tests/testthat/test-AddMeans.R: -------------------------------------------------------------------------------- 1 | test_that("means are computed correctly when a first differenced variable is normalized", { 2 | suppressWarnings( 3 | sample_estimation <- EventStudy(estimator = "OLS", data = example_data, 4 | outcomevar = "y_base", policyvar = "z", 5 | idvar = "id", timevar = "t", FE = TRUE, 6 | TFE = TRUE, post = 2, pre = 2, overidpre = 2, 7 | overidpost = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) 8 | ) 9 | 10 | df_estimation <- sample_estimation[[2]]$data 11 | 12 | mean_function <- AddMeans(df_estimation, "z_fd_lead1", 13 | sample_estimation[[2]]$policyvar, 14 | sample_estimation[[2]]$outcomevar) 15 | 16 | mean_manual <- df_estimation[z_fd_lead1 != 0, mean(y_base, na.rm = TRUE)] 17 | 18 | expect_equal(mean_function, mean_manual) 19 | 20 | }) 21 | 22 | test_that("means are computed correctly when the furthest lead is normalized", { 23 | sample_estimation <- EventStudy(estimator = "OLS", data = example_data, 24 | outcomevar = "y_base", policyvar = "z", 25 | idvar = "id", timevar = "t", FE = TRUE, 26 | TFE = TRUE, post = 2, pre = 2, overidpre = 2, 27 | overidpost = 2, normalize = -5, cluster = TRUE, anticipation_effects_normalization = TRUE) 28 | 29 | df_estimation <- sample_estimation[[2]]$data 30 | 31 | mean_function <- AddMeans(df_estimation, "z_lead4", 32 | sample_estimation[[2]]$policyvar, 33 | sample_estimation[[2]]$outcomevar) 34 | 35 | mean_manual <- df_estimation[z_lead4 == 0, mean(y_base, na.rm = TRUE)] 36 | 37 | expect_equal(mean_function, mean_manual) 38 | }) 39 | 40 | 41 | -------------------------------------------------------------------------------- /data-raw/save_internal_data.R: -------------------------------------------------------------------------------- 1 | 2 | library(dplyr) 3 | library(haven) 4 | 5 | indir <- 'examples/source/raw/eventstudy_illustration_data/orig' 6 | 7 | example_data <- read_dta(sprintf('%s/simulation_data_dynamic.dta', indir)) 8 | 9 | policyvar <- "z" 10 | idvar <- "id" 11 | timevar <- "t" 12 | 13 | post <- 1 14 | pre <- 1 15 | overidpost <- 2 16 | overidpre <- 2 17 | 18 | df_first_diff <- GetFirstDifferences(df = example_data, timevar = "t", groupvar="id", diffvar = "z") 19 | 20 | num_fd_lag_periods <- post + overidpost - 1 21 | num_fd_lead_periods <- pre + overidpre 22 | 23 | furthest_lag_period <- num_fd_lag_periods + 1 24 | 25 | df_first_diff_leads <- PrepareLeads(df_first_diff, groupvar = idvar, timevar, 26 | leadvar = paste0(policyvar, "_fd"), leads = 1:num_fd_lead_periods) 27 | df_first_diff_leads_lags <- PrepareLags(df_first_diff_leads, groupvar = idvar, timevar, 28 | lagvar = paste0(policyvar, "_fd"), lags = 1:num_fd_lag_periods) 29 | 30 | df_lag <- PrepareLags(df_first_diff_leads_lags, groupvar = idvar, timevar, 31 | lagvar = policyvar, lags = furthest_lag_period) 32 | df_lag_lead <- PrepareLeads(df_lag, groupvar = idvar, timevar, 33 | leadvar = policyvar, leads = num_fd_lead_periods) 34 | 35 | column_subtract_1 <- paste0(policyvar, "_lead", num_fd_lead_periods) 36 | df_lag_lead[column_subtract_1] <- 1 - df_lag_lead[column_subtract_1] 37 | 38 | v_OLS_variables <- c("id", "t", "z", "y_base", "x_r", "z_fd", 39 | "z_fd_lead1", "z_fd_lead2", "z_fd_lead3", 40 | "z_fd_lag1", "z_fd_lag2", "z_lag3","z_lead3") 41 | 42 | v_FHS_variables <- c(v_OLS_variables, "eta_m") 43 | 44 | df_EventStudyOLS_example <- df_lag_lead[v_OLS_variables] 45 | df_EventStudyFHS_example <- df_lag_lead[v_FHS_variables] 46 | df_sample_static <- read_dta(sprintf('%s/simulation_data_static.dta', indir)) 47 | 48 | usethis::use_data(df_EventStudyOLS_example, df_EventStudyFHS_example, df_sample_static, overwrite = TRUE, version = 3, internal = TRUE) 49 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: eventstudyr 2 | Title: Estimation and Visualization of Linear Panel Event Studies 3 | Version: 1.1.5 4 | Authors@R: 5 | c(person(given = "Simon", 6 | family = "Freyaldenhoven", 7 | role = c("aut")), 8 | person(given = "Christian", 9 | family = "Hansen", 10 | role = c("aut")), 11 | person(given = "Jorge", 12 | family = "Pérez Pérez", 13 | role = c("aut")), 14 | person(given = "Jesse", 15 | family = "Shapiro", 16 | role = c("aut")), 17 | person(given = "Veli", 18 | family = "Andirin", 19 | role = c("aut")), 20 | person(given = "Richard", 21 | family = "Calvo", 22 | role = c("aut")), 23 | person(given = "Santiago", 24 | family = "Hermo", 25 | email = "santiago.hermo@monash.edu", 26 | role = c("aut", "cre")), 27 | person(given = "Nathan", 28 | family = "Schor", 29 | role = c("aut")), 30 | person(given = "Emily", 31 | family = "Wang", 32 | role = c("aut")), 33 | person(given = "JMSLab", 34 | role = c("cph")), 35 | person(given = "Ryan", 36 | family = "Kessler", 37 | role = c("cph"))) 38 | Description: Estimates linear panel event study models. Plots coefficients following the recommendations in Freyaldenhoven et al. (2021) . Includes sup-t bands, testing for key hypotheses, least wiggly path through the Wald region. Allows instrumental variables estimation following Freyaldenhoven et al. (2019) . 39 | License: MIT + file LICENSE 40 | Encoding: UTF-8 41 | Imports: 42 | car, 43 | data.table, 44 | dplyr, 45 | estimatr, 46 | ggplot2, 47 | MASS, 48 | rlang, 49 | pracma, 50 | stats, 51 | stringr 52 | VignetteBuilder: 53 | knitr 54 | LazyData: true 55 | Roxygen: list(markdown = TRUE) 56 | RoxygenNote: 7.3.2 57 | Suggests: 58 | rmarkdown, 59 | knitr, 60 | testthat (>= 3.0.0) 61 | Config/testthat/edition: 3 62 | Depends: 63 | R (>= 3.5.0) 64 | URL: https://github.com/JMSLab/eventstudyr 65 | BugReports: https://github.com/JMSLab/eventstudyr/issues 66 | -------------------------------------------------------------------------------- /tests/testthat/test-AddSuptBand.R: -------------------------------------------------------------------------------- 1 | # OLS ----------------------------------- 2 | 3 | test_that("check that the correct part of vcov matrix is used", { 4 | suppressWarnings( 5 | sample_estimation <- EventStudy(estimator = "OLS", data = example_data, 6 | outcomevar = "y_base", policyvar = "z", 7 | idvar = "id", timevar = "t", FE = TRUE, control = "x_r", 8 | TFE = TRUE, post = 2, pre = 2, overidpre = 2, 9 | overidpost = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) 10 | ) 11 | 12 | eventstudy_coefficients <- sample_estimation[[2]]$eventstudy_coefficients 13 | vcov_matrix_all <- sample_estimation[[1]]$vcov 14 | v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients 15 | vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] 16 | 17 | expect_true(isSymmetric(vcov_matrix)) 18 | expect_true(all(eventstudy_coefficients %in% colnames(vcov_matrix))) 19 | expect_true(! "x_r" %in% colnames(vcov_matrix)) 20 | }) 21 | 22 | # FHS ----------------------------------- 23 | 24 | test_that("check that the correct part of vcov matrix is used", { 25 | data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] 26 | suppressWarnings( 27 | sample_estimation <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", 28 | timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, 29 | overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = TRUE) 30 | ) 31 | 32 | eventstudy_coefficients <- sample_estimation[[2]]$eventstudy_coefficients 33 | vcov_matrix_all <- sample_estimation[[1]]$vcov 34 | v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients 35 | vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] 36 | 37 | expect_true(isSymmetric(vcov_matrix, tol = sqrt(.Machine$double.eps))) 38 | expect_true(all(eventstudy_coefficients %in% colnames(vcov_matrix))) 39 | expect_true(! "x_r" %in% colnames(vcov_matrix)) 40 | }) 41 | 42 | -------------------------------------------------------------------------------- /tests/testthat/input/code/build_FHS.do: -------------------------------------------------------------------------------- 1 | clear all 2 | version 15 3 | set more off 4 | 5 | program main 6 | local indir "../../../examples/source/raw/eventstudy_illustration_data/orig" 7 | local outdir "." 8 | 9 | use "`indir'/simulation_data_dynamic.dta", clear 10 | 11 | local G 1 12 | local LG 2 13 | local M 1 14 | local LM 2 15 | local furthest_lead = `G' + `LG' 16 | local furthest_lag = `M' + `LM' 17 | local M_minus1 = `M' - 1 18 | local furthest_lag_minus1 = `furthest_lag' - 1 19 | 20 | xtset id t 21 | gen z_fd = d.z 22 | 23 | forvalues i = 2/`furthest_lead' { 24 | gen z_fd_lead`i' = f`i'.z_fd 25 | } 26 | forvalues i = 1/`furthest_lag_minus1' { 27 | gen z_fd_lag`i' = l`i'.z_fd 28 | } 29 | gen z_lead`furthest_lead' = f`furthest_lead'.z 30 | gen z_lag`furthest_lag' = l`furthest_lag'.z 31 | 32 | run_regressions 33 | 34 | esttab * using "`outdir'/df_test_base_STATA_FHS.csv", csv plain wide noobs /// 35 | keep(z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 x_r eta_m) /// 36 | replace se nomtitles collabels("coef" "std_error") 37 | end 38 | 39 | program run_regressions 40 | 41 | * Case 2: FE=TRUE , TFE=TRUE , cluster=TRUE 42 | eststo: xtivreg y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 i.t, fe vce(cluster id) 43 | 44 | * Case 4: FE=TRUE , TFE=FALSE, cluster=TRUE 45 | eststo: xtivreg y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3, fe vce(cluster id) 46 | 47 | * Case 5: FE=FALSE, TFE=TRUE , cluster=FALSE 48 | eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 i.t, small vce(robust) 49 | 50 | * Case 6: FE=FALSE, TFE=TRUE , cluster=TRUE 51 | eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3 i.t, small vce(cluster id) 52 | 53 | * Case 7: FE=FALSE, TFE=FALSE, cluster=FALSE 54 | eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3, small vce(robust) 55 | 56 | * Case 8: FE=FALSE, TFE=FALSE, cluster=TRUE 57 | eststo: ivregress 2sls y_base x_r (eta_m = z_fd_lead3) z_lead3 z_fd_lead2 z_fd z_fd_lag1 z_fd_lag2 z_lag3, small vce(cluster id) 58 | 59 | end 60 | 61 | 62 | main 63 | -------------------------------------------------------------------------------- /R/AddCIs.R: -------------------------------------------------------------------------------- 1 | #' Adds columns to data frame containing confidence intervals around provided estimates. 2 | #' 3 | #' @param df_estimates, A data frame with columns for term, estimate, and standard error. 4 | #' @param eventstudy_coefficients, A list specifying the names of the columns that were not normalized in EventStudy(). 5 | #' @param conf_level, Confidence level used for confidence interval 6 | #' expressed as a real number between 0 and 1, inclusively. Defaults to 0.95. 7 | #' 8 | #' 9 | #' @import dplyr 10 | #' @importFrom rlang .data 11 | #' @importFrom stats qnorm 12 | #' @import estimatr 13 | #' @keywords internal 14 | #' @noRd 15 | #' 16 | #' @examples 17 | #' estimates <- EventStudy( 18 | #' estimator = "OLS", 19 | #' data = example_data, 20 | #' outcomevar = "y_base", 21 | #' policyvar = "z", 22 | #' idvar = "id", 23 | #' timevar = "t", 24 | #' controls = "x_r", 25 | #' FE = TRUE, 26 | #' TFE = TRUE, 27 | #' post = 3, 28 | #' pre = 2, 29 | #' overidpre = 4, 30 | #' overidpost = 5, 31 | #' normalize = - 3, 32 | #' cluster = TRUE, 33 | #' anticipation_effects_normalization = TRUE 34 | #' ) 35 | #' 36 | #' df_estimates_tidy <- estimatr::tidy(estimates$output) 37 | #' 38 | #' df_estimates_tidy <- AddCIs( 39 | #' df_estimates_tidy, 40 | #' eventstudy_coefficients = estimates$arguments$eventstudy_coefficients, 41 | #' conf_level = 0.95 42 | #') 43 | #' 44 | #' 45 | 46 | AddCIs <- function(df_estimates, eventstudy_coefficients, conf_level = 0.95) { 47 | if (! inherits(df_estimates, "data.frame")) {stop("df_estimates should be a data frame")} 48 | if (!all(c("term", "estimate", "std.error") %in% colnames(df_estimates))) { 49 | stop("df_estimates should include columns 'term', 'estimate', and 'std.error'.") 50 | } 51 | if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character vector.")} 52 | if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")} 53 | 54 | terms <- eventstudy_coefficients 55 | 56 | percentile <- conf_level + ((1 - conf_level)/2) 57 | 58 | df_CI <- dplyr::filter(df_estimates, .data$term %in% terms) 59 | df_CI <- dplyr::mutate(df_CI, ci_lower = .data$estimate - .data$std.error * stats::qnorm(percentile), 60 | ci_upper = .data$estimate + .data$std.error * stats::qnorm(percentile)) 61 | df_CI <- dplyr::select(df_CI, c("term", "ci_lower", "ci_upper")) 62 | df_estimates <- dplyr::left_join(df_estimates, df_CI, by = "term") 63 | 64 | return(df_estimates) 65 | } 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # eventstudyr 5 | 6 | 7 | 8 | [![CRAN 9 | status](https://www.r-pkg.org/badges/version/eventstudyr)](https://CRAN.R-project.org/package=eventstudyr) 10 | [![CRAN RStudio mirror 11 | downloads](https://cranlogs.r-pkg.org/badges/grand-total/eventstudyr)](https://r-pkg.org/pkg/eventstudyr) 12 | 13 | 14 | ## Overview 15 | 16 | The **eventstudyr** package implements tools for estimating linear panel 17 | event study models, following the recommendations in [Freyaldenhoven et 18 | al. (2021)](https://www.nber.org/papers/w29170). Includes sup-t bands, 19 | testing for key hypotheses, least wiggly path through the Wald region. 20 | Allows instrumental variables estimation following [Freyaldenhoven et 21 | al. (2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609). 22 | 23 | ## Installation 24 | 25 | ``` r 26 | # Install from CRAN 27 | install.packages("eventstudyr") 28 | 29 | # Install latest version from GitHub 30 | install.packages("devtools") 31 | devtools::install_github("JMSLab/eventstudyr") 32 | ``` 33 | 34 | ## Usage 35 | 36 | Find a minimal example below. For more examples see the package 37 | [documentation](https://cran.r-project.org/package=eventstudyr/eventstudyr.pdf) 38 | and 39 | [vignette](https://cran.r-project.org/package=eventstudyr/vignettes/documentation.html). 40 | 41 | ``` r 42 | library(eventstudyr) 43 | set.seed(10) # for reproducibility of sup-t bands 44 | 45 | estimates_ols <- EventStudy( 46 | estimator = "OLS", 47 | data = example_data, # Use package sample data 48 | outcomevar = "y_smooth_m", 49 | policyvar = "z", 50 | idvar = "id", 51 | timevar = "t", 52 | controls = "x_r", 53 | pre = 0, post = 4 54 | ) 55 | 56 | plt <- EventStudyPlot(estimates = estimates_ols) 57 | plt 58 | ``` 59 | 60 | Event-study plot using the example data of the package
61 |  and default plotting options. 62 | 63 | ## Citation 64 | 65 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, and Jesse M. 66 | Shapiro. “Visualization, Identification, and Estimation in the Panel 67 | Event-Study Design.” [NBER Working Paper 68 | No. 29170](https://www.nber.org/papers/w29170), August 2021. 69 | 70 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, Jesse M. 71 | Shapiro, Veli M. Andirin, Richard Calvo, Santiago Hermo, Nathan Schor, 72 | Emily Wang. “`eventstudyr` package.” Code and data repository at 73 | , March 2023. 74 | 75 | ## Acknowledgments 76 | 77 | Thank you to Eliana Sena Sarmiento and Melissa Wu for their excellent 78 | work testing and reviewing `eventstudyr` prior to its public release. 79 | -------------------------------------------------------------------------------- /R/ComputeFirstDifferences.R: -------------------------------------------------------------------------------- 1 | #' Adds first differences of a variable, robustly to gaps in time variable, as new columns in a panel dataset 2 | #' 3 | #' @param dt Data frame that will be modified. 4 | #' @param idvar Character indicating column of units. 5 | #' @param timevar Character indicating column of time periods. 6 | #' @param diffvar Character indicating column of variable whose first difference will be taken. 7 | #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE. 8 | #' 9 | #' @return The passed dataset augmented with columns that reflect the desired first differences 10 | #' 11 | #' @seealso [data.table::shift()] 12 | #' 13 | #' @examples 14 | #' ComputeFirstDifferences( 15 | #' example_data, 16 | #' idvar = "id", 17 | #' timevar = "t", 18 | #' diffvar = "z" 19 | #' ) 20 | #' 21 | #' @importFrom data.table setorderv setnames shift := CJ .SD 22 | #' 23 | #' @keywords internal 24 | #' @noRd 25 | 26 | ComputeFirstDifferences <- function(dt, idvar, timevar, diffvar, 27 | timevar_holes = FALSE) { 28 | if (! data.table::is.data.table(dt)) { 29 | stop("Input data should be a data.table.") 30 | } 31 | for (var in c(idvar, timevar, diffvar)) { 32 | if ((! is.character(var))) { 33 | stop(paste0(var, " should be a character.")) 34 | } 35 | if (! var %in% colnames(dt)) { 36 | stop(paste0(var, " should be the name of a variable in the dataset.")) 37 | } 38 | } 39 | if (! is.logical(timevar_holes)) { 40 | stop("timevar_holes should be logical.") 41 | } 42 | 43 | data.table::setorderv(dt, cols = c(idvar, timevar)) 44 | 45 | if (!timevar_holes) { 46 | dt[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))), 47 | by = idvar] 48 | } else { 49 | ## Create dataset with all combinations to compute first differences 50 | all_combinations <- data.table::CJ(unique(dt[[idvar]]), 51 | min(dt[[timevar]]):max(dt[[timevar]])) 52 | data.table::setnames(all_combinations, new = c(idvar, timevar)) 53 | 54 | dt_all <- data.table::merge.data.table(dt, all_combinations, 55 | by = c(idvar, timevar), all = TRUE) 56 | 57 | dt_all[, paste0(diffvar, "_fd") := get(diffvar) - data.table::shift((get(diffvar))), 58 | by = idvar] 59 | 60 | ## Bring first differences back to the original dataset 61 | vars_to_keep <- c(idvar, timevar, paste0(diffvar, "_fd")) 62 | 63 | dt <- data.table::merge.data.table(dt, dt_all[, .SD, .SDcols = vars_to_keep], 64 | by = c(idvar, timevar), all.x = TRUE) 65 | } 66 | 67 | return(dt) 68 | } 69 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-" 12 | ) 13 | ``` 14 | 15 | # eventstudyr 16 | 17 | 18 | [![CRAN status](https://www.r-pkg.org/badges/version/eventstudyr)](https://CRAN.R-project.org/package=eventstudyr) 19 | [![CRAN RStudio mirror downloads](https://cranlogs.r-pkg.org/badges/grand-total/eventstudyr)](https://r-pkg.org/pkg/eventstudyr) 20 | 21 | 22 | 23 | ## Overview 24 | 25 | The **eventstudyr** package implements tools for estimating linear panel event study models, following the recommendations in [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170). 26 | Includes sup-t bands, testing for key hypotheses, least wiggly path through the Wald region. 27 | Allows instrumental variables estimation following [Freyaldenhoven et al. (2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609). 28 | 29 | ## Installation 30 | 31 | ```{r, eval = FALSE} 32 | # Install from CRAN 33 | install.packages("eventstudyr") 34 | 35 | # Install latest version from GitHub 36 | install.packages("devtools") 37 | devtools::install_github("JMSLab/eventstudyr") 38 | ``` 39 | 40 | ## Usage 41 | 42 | Find a minimal example below. 43 | For more examples see the package [documentation](https://cran.r-project.org/package=eventstudyr/eventstudyr.pdf) and [vignette](https://cran.r-project.org/package=eventstudyr/vignettes/documentation.html). 44 | 45 | ```{r example} 46 | #| fig.alt = "Event-study plot using the example data of the package 47 | #| and default plotting options." 48 | library(eventstudyr) 49 | set.seed(10) # for reproducibility of sup-t bands 50 | 51 | estimates_ols <- EventStudy( 52 | estimator = "OLS", 53 | data = example_data, # Use package sample data 54 | outcomevar = "y_smooth_m", 55 | policyvar = "z", 56 | idvar = "id", 57 | timevar = "t", 58 | controls = "x_r", 59 | pre = 0, post = 4 60 | ) 61 | 62 | plt <- EventStudyPlot(estimates = estimates_ols) 63 | plt 64 | ``` 65 | 66 | ## Citation 67 | 68 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, and Jesse M. Shapiro. "Visualization, Identification, and Estimation in the Panel Event-Study Design." [NBER Working Paper No. 29170](https://www.nber.org/papers/w29170), 69 | August 2021. 70 | 71 | Simon Freyaldenhoven, Christian Hansen, Jorge Pérez Pérez, Jesse M. Shapiro, Veli M. Andirin, Richard Calvo, Santiago Hermo, Nathan Schor, Emily Wang. "`eventstudyr` package." Code and data repository at https://github.com/JMSLab/eventstudyr, March 2023. 72 | 73 | ## Acknowledgments 74 | 75 | Thank you to Eliana Sena Sarmiento and Melissa Wu for their excellent work testing and reviewing `eventstudyr` prior to its public release. 76 | -------------------------------------------------------------------------------- /tests/testthat/test-PrepareModelFormula.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("outcomevar is the dependent variable for OLS", { 3 | 4 | estimator <- "OLS" 5 | outcomevar <- "y_base" 6 | str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2") 7 | 8 | fmla <- PrepareModelFormula(estimator, outcomevar, str_policy_vars) 9 | expect_equal(all.vars(fmla)[1], outcomevar) 10 | }) 11 | 12 | test_that("formula does not have an intercept for OLS", { 13 | 14 | estimator <- "OLS" 15 | outcomevar <- "y_base" 16 | str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2") 17 | 18 | fmla <- PrepareModelFormula(estimator, outcomevar, str_policy_vars) 19 | expect_equal(attr(terms(fmla), "intercept"), 0) 20 | }) 21 | 22 | test_that("vars in str_policy_vars argument are incorporated into formula for OLS", { 23 | 24 | estimator <- "OLS" 25 | outcomevar <- "y_base" 26 | str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2") 27 | 28 | fmla <- PrepareModelFormula(estimator, outcomevar, str_policy_vars) 29 | 30 | z_vars_mask <- grepl("z_fd|z_lead|z_lag", attr(terms(fmla), "term.labels")) 31 | expect_equal(sum(z_vars_mask), length(str_policy_vars)) 32 | }) 33 | 34 | test_that("controls argument is incorporated into formula", { 35 | 36 | estimator <- "OLS" 37 | outcomevar <- "y_base" 38 | str_policy_vars <- c("z_lead2", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_lag2") 39 | ctrls <- c("x1", "x2") 40 | 41 | fmla <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 42 | controls = ctrls) 43 | controls_in_model <- attr(terms(fmla), "term.labels") %in% ctrls 44 | expect_equal(sum(controls_in_model), length(ctrls)) 45 | }) 46 | 47 | test_that("formula for IV regression is correct", { 48 | estimator <- "FHS" 49 | outcomevar <- "y_base" 50 | str_policy_fd <- c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2", "z_fd_lag3") 51 | str_policy_lead <- "z_lead3" 52 | str_policy_lag <- "z_lag4" 53 | controls <- "x_r" 54 | proxy <- "eta_m" 55 | proxyIV <- "z_fd_lead3" 56 | 57 | fmla <- PrepareModelFormula(estimator, outcomevar, 58 | str_policy_vars = c(str_policy_fd, str_policy_lead, str_policy_lag), 59 | static = FALSE, 60 | controls, proxy, proxyIV) 61 | 62 | expect_equal(class(fmla), "formula") 63 | expect_equal(deparse(fmla[[2]]), "y_base") 64 | expect_equal(sort(all.vars(fmla[[c(3,2)]])), 65 | sort(c("z_fd", "z_fd_lead2", "z_fd_lag1", "z_fd_lag2", "z_fd_lag3", "z_lead3", "z_lag4", "x_r", "eta_m"))) 66 | expect_equal(sort(all.vars(fmla[[c(3,3)]])), 67 | sort(c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2", "z_fd_lag3", "z_lead3", "z_lag4", "x_r"))) 68 | }) 69 | 70 | test_that("formula for static model is correct", { 71 | estimator <- "OLS" 72 | outcomevar <- "y_base" 73 | str_policy_vars <- "z" 74 | 75 | fmla <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, static = TRUE) 76 | 77 | expect_equal(class(fmla), "formula") 78 | expect_equal(deparse(fmla[[2]]), "y_base") 79 | expect_equal(all.vars(fmla[[3]]), "z") 80 | }) 81 | -------------------------------------------------------------------------------- /R/AddSuptBand.R: -------------------------------------------------------------------------------- 1 | 2 | #' Produces sup-t bands for the event-study coefficients. 3 | #' 4 | #' @description Source code from Ryan Kessler (2022). [suptCriticalValue](https://github.com/ryanedmundkessler/suptCriticalValue): Computes critical values 5 | #' underlying simultaneous sup-t confidence bands. R package version 6 | #' 0.1.0. 7 | #' 8 | #' @param estimates The first element extracted from [EventStudy()]. Should be a list. 9 | #' @param num_sim The number of simulations used in generating the sup-t bands. 10 | #' Should be a natural number. Defaults to 1000. 11 | #' @param conf_level The confidence level used for obtaining the sup-t bands critical value. 12 | #' Should be a real number between 0 and 1, inclusive. Defaults to .95. 13 | #' @param eventstudy_coefficients The names of the event-study coefficients. This vector is 14 | #' outputted in the second element of the [EventStudy()] function. Should be a vector of strings. 15 | #' 16 | #' @return A data.frame that contains the upper and lower sup-t band values 17 | #' for each event-study coefficient. 18 | #' @import estimatr 19 | #' @importFrom MASS mvrnorm 20 | #' @keywords internal 21 | #' @noRd 22 | #' 23 | #' @examples 24 | #' eventstudy_estimates <- EventStudy( 25 | #' estimator = "OLS", 26 | #' data = example_data, 27 | #' outcomevar = "y_base", 28 | #' policyvar = "z", 29 | #' idvar = "id", 30 | #' timevar = "t", 31 | #' controls = "x_r", 32 | #' FE = TRUE, 33 | #' TFE = TRUE, 34 | #' post = 3, 35 | #' pre = 2, 36 | #' overidpre = 4, 37 | #' overidpost = 5, 38 | #' normalize = - 3, 39 | #' cluster = TRUE, 40 | #' anticipation_effects_normalization = TRUE 41 | #' ) 42 | #' 43 | #' AddSuptBand( 44 | #' estimates = eventstudy_estimates$output, 45 | #' num_sim = 100, 46 | #' conf_level = .95, 47 | #' eventstudy_coefficients = eventstudy_estimates$arguments$eventstudy_coefficients 48 | #') 49 | 50 | AddSuptBand <- function(estimates, num_sim = 1000, conf_level = .95, eventstudy_coefficients) { 51 | 52 | if (! class(estimates) %in% c("lm_robust", "iv_robust")) { 53 | stop("estimates is not a data frame with coefficient estimates and standard errors") 54 | } 55 | if (! is.numeric(num_sim) | num_sim %% 1 != 0 | num_sim <= 0) {stop("num_sim should be a natural number.")} 56 | if (! is.numeric(conf_level) | conf_level < 0 | conf_level > 1) {stop("conf_level should be a real number between 0 and 1, inclusive.")} 57 | if (! is.character(eventstudy_coefficients)) {stop("eventstudy_coefficients should be a character.")} 58 | 59 | vcov_matrix_all <- estimates$vcov 60 | v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coefficients 61 | vcov_matrix <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] 62 | 63 | v_std_errors <- t(sqrt(diag(vcov_matrix))) 64 | draws <- MASS::mvrnorm(n = num_sim, mu = rep(0, nrow(vcov_matrix)), Sigma = vcov_matrix) 65 | t <- draws / (v_std_errors %x% matrix(rep(1, num_sim))) 66 | 67 | t <- apply(abs(t), 1, FUN = max) 68 | t <- sort(t) 69 | 70 | conf_level_num_sim <- conf_level * num_sim 71 | 72 | if (round(conf_level_num_sim) == conf_level_num_sim) { 73 | critical_value = (t[conf_level_num_sim] + t[conf_level_num_sim + 1]) / 2 74 | } else { 75 | critical_value = t[floor(conf_level_num_sim) + 1] 76 | } 77 | 78 | df_estimates_tidy <- estimatr::tidy(estimates) 79 | 80 | df_estimates_tidy["suptband_lower"] <- df_estimates_tidy$estimate - (critical_value * df_estimates_tidy$std.error) 81 | df_estimates_tidy["suptband_upper"] <- df_estimates_tidy$estimate + (critical_value * df_estimates_tidy$std.error) 82 | 83 | 84 | return(df_estimates_tidy) 85 | 86 | } 87 | -------------------------------------------------------------------------------- /R/ComputeShifts.R: -------------------------------------------------------------------------------- 1 | #' Adds leads or lags of a variable, robustly to gaps in time variable, as new columns in a panel dataset 2 | #' 3 | #' @param df Data frame that will be modified. 4 | #' @param idvar Character indicating column of units. 5 | #' @param timevar Character indicating column of time periods. 6 | #' @param shiftvar Character indicating column of variable that will be shifted. 7 | #' @param shiftvalues Numeric vector specifying the leads/lags to be computed. For example, c(-1, 0, 1) will compute the lead, current, and lag values. 8 | #' @param timevar_holes Logical indicating whether the panel contains gaps in the time variable. Defaults to FALSE. 9 | #' 10 | #' @return The passed dataset augmented with columns that reflect the desired shift values 11 | 12 | #' @seealso [data.table::shift()] 13 | #' 14 | #' @examples 15 | #' ComputeShifts( 16 | #' example_data, 17 | #' idvar = "id", 18 | #' timevar = "t", 19 | #' shiftvar = "z", 20 | #' shiftvalues = -2:2 21 | #' ) 22 | #' 23 | #' 24 | #' @importFrom data.table setorderv shift := CJ .SD 25 | #' @keywords internal 26 | #' @noRd 27 | 28 | ComputeShifts <- function(df, idvar, timevar, shiftvar, shiftvalues, 29 | timevar_holes = FALSE) { 30 | if (! data.table::is.data.table(df)) { 31 | stop("df should be a data.table.") 32 | } 33 | for (var in c(idvar, timevar, shiftvar)) { 34 | if ((! is.character(var))) { 35 | stop(paste0(var, " should be a character.")) 36 | } 37 | if (! var %in% colnames(df)) { 38 | stop(paste0(var, " should be the name of a variable in the dataset.")) 39 | } 40 | } 41 | if (! is.numeric(shiftvalues)) { 42 | stop("shiftvalues should be numeric.") 43 | } 44 | if (0 %in% shiftvalues & length(shiftvalues) == 1) { 45 | stop("shiftvalues must be different than 0 to compute leads/lags.") 46 | } 47 | if (! is.logical(timevar_holes)) { 48 | stop("timevar_holes should be logical.") 49 | } 50 | 51 | data.table::setorderv(df, cols = c(idvar, timevar)) 52 | 53 | lags <- shiftvalues[shiftvalues > 0] 54 | leads <- -1*shiftvalues[shiftvalues <= 0] 55 | 56 | compute_shifts <- function(dat, idvar, shiftvar, shiftvalues, lags, leads) { 57 | 58 | if (any(shiftvalues > 0)) { 59 | dat[, paste0(shiftvar, "_lag", lags) := 60 | data.table::shift(get(shiftvar), lags, type = "lag"), 61 | by = c(idvar)] 62 | } 63 | if (any(shiftvalues <= 0)) { 64 | dat[, paste0(shiftvar, "_lead", leads) := 65 | data.table::shift(get(shiftvar), leads, type = "lead"), 66 | by = c(idvar)] 67 | } 68 | return(dat) 69 | } 70 | 71 | if (!timevar_holes) { 72 | df = compute_shifts(df, idvar, shiftvar, shiftvalues, lags, leads) 73 | } else { 74 | ## Create dataset with all combinations to compute shifts 75 | all_combinations <- CJ(unique(df[[idvar]]), 76 | min(df[[timevar]]):max(df[[timevar]])) 77 | setnames(all_combinations, new = c(idvar, timevar)) 78 | 79 | vars_to_keep <- c(idvar, timevar, shiftvar) 80 | 81 | df_all <- merge(df[, .SD, .SDcols = vars_to_keep], all_combinations, 82 | by = c(idvar, timevar), all = TRUE) 83 | 84 | df_all = compute_shifts(df_all, idvar, shiftvar, shiftvalues, lags, leads) 85 | 86 | ## Bring shifts back to the original dataset 87 | df <- merge(df, df_all, 88 | by = c(idvar, timevar, shiftvar), all.x = TRUE) 89 | } 90 | return(df) 91 | } 92 | -------------------------------------------------------------------------------- /R/AddSmPath.R: -------------------------------------------------------------------------------- 1 | #' Calculates smoothest path 2 | #' 3 | #' @description Function to add smoothest path to dataframe with coefficients 4 | #' 5 | #' @param df Dataset with coefficients prepared for plotting (must include coefficients normalized in estimation). 6 | #' @param coefficients Event-study coefficients (must include coefficients normalized in estimation). 7 | #' @param inv_covar Inverse of covariance matrix of coefficients (must include row and column of zeros for normalized coefficients). 8 | #' @param conf_level Confidence level to define critical value of Wald region. Should be a real number between 0 and 1, inclusive. Defaults to 0.95. 9 | #' @param maxorder Sets a maximum polynomial order that will be used when calculating lowest possible polynomial order. Should be a whole number. Defaults to 10. 10 | #' @param maxiter_solver Sets the maximum number of iterations when searching for the smoothest path with minimum squared term in highest order coefficient. Should be a positive whole number. Defaults to 2e6. 11 | #' 12 | #' @return df with smoothest path added as a new column 13 | #' @importFrom pracma inv pinv 14 | #' @importFrom stats qchisq 15 | #' @keywords internal 16 | #' @noRd 17 | 18 | AddSmPath <- function(df, coefficients, inv_covar, 19 | conf_level = 0.95, maxorder = 10, maxiter_solver = 2e6){ 20 | 21 | if (!is.numeric(conf_level) | conf_level < 0 | conf_level > 1) { 22 | stop("Argument 'conf_level' should be a real number between 0 and 1, inclusive.") 23 | } 24 | if (!(maxorder%%1 == 0) | maxorder < 0 | maxorder > 10) { 25 | stop("Argument 'maxorder' should be an integer between 0 and 10.") 26 | } 27 | if (!is.data.frame(df)) { 28 | stop("Argument 'df' should be a dataframe.") 29 | } 30 | if (!is.matrix(inv_covar)) { 31 | stop("Argument 'inv_covar' should be a matrix.") 32 | } 33 | if (!(maxiter_solver%%1 == 0) | maxiter_solver < 0) { 34 | stop("Argument 'maxiter_solver' should be a positive integer.") 35 | } 36 | unselect_message <- "Please change the 'Smpath' argument in 'EventStudyPlot' to FALSE." 37 | 38 | coeff_length <- length(coefficients) 39 | norm_idxs <- which(coefficients == 0) 40 | Wcritic <- stats::qchisq(conf_level, coeff_length - length(norm_idxs)) 41 | pN <- length(norm_idxs) 42 | 43 | # First step: Find lowest possible polynomial order 44 | res_order <- FindOrder(coefficients, inv_covar, Wcritic, maxorder) 45 | order <- res_order$order 46 | res_order <- res_order$results 47 | 48 | cat(paste0("Smoothest path note: ", 49 | "The lowest order such that a polynomial is in confidence region is ", 50 | order+1, ".\n")) 51 | 52 | # Second step: Find minimum coefficient on highest-order term 53 | if (order == 0) { 54 | 55 | Fmat <- GetFmat(coeff_length, 0) 56 | vstar <- matrix(0) 57 | } else if (order == maxorder) { 58 | 59 | stop(paste0("Search for smoothest path reached the maximum order of ", maxorder,". ", unselect_message)) 60 | } else { 61 | 62 | Fmat <- GetFmat(coeff_length, order) 63 | 64 | if (pN < order) { 65 | vstar <- FindCoeffs(res_order, coefficients, inv_covar, Wcritic, pN, order, norm_idxs, Fmat, maxiter_solver) 66 | 67 | } else if (pN == order) { 68 | vstar <- FindCoeffsEq(res_order, coefficients, inv_covar, Wcritic, pN, order, norm_idxs, Fmat, maxiter_solver) 69 | 70 | } else { 71 | stop(paste0("The smoothest path cannot be found because the number of normalized coefficients is larger than the minimum order. ", 72 | unselect_message)) 73 | } 74 | } 75 | 76 | sm_path = Fmat %*% vstar 77 | Woptim = (t(sm_path - coefficients)%*%inv_covar)%*%(sm_path - coefficients) 78 | 79 | df["smoothest_path"] = sm_path 80 | return(df) 81 | } 82 | -------------------------------------------------------------------------------- /R/PrepareModelFormula.R: -------------------------------------------------------------------------------- 1 | 2 | #' Prepares a formula object for use in [EventStudyOLS()] or [EventStudyFHS()] 3 | 4 | #' @param estimator Accepts one of "OLS" or "FHS". If "FHS" is specified, implements IV estimator in Freyaldenhoven et al. 2019. 5 | #' @param outcomevar Character indicating column of outcome variable. 6 | #' @param str_policy_vars Character vector indicating event-study variables. 7 | #' @param controls Character indicating optional vector of controls. 8 | #' @param proxy Character indicating column of variable that is thought to be affected by the confound but not by the policy. 9 | #' Should be specified if and only if estimator is specified as "FHS". 10 | #' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS". 11 | #' If NULL, defaults to the strongest lead of the policy variable based on the first stage. 12 | #' @param static Indicates whether the model to be estimated is static. Defaults to FALSE. 13 | #' @return A formula object to be passed to EventStudy 14 | #' 15 | #' @importFrom stats reformulate as.formula 16 | #' @keywords internal 17 | #' @noRd 18 | #' 19 | #' @examples 20 | #' # For a static model: 21 | #' PrepareModelFormula(estimator = "OLS", outcomevar = "y_base", 22 | #' str_policy_vars = "z", static = T) 23 | #' 24 | #' # For a dynamic model: 25 | #' PrepareModelFormula(estimator = "OLS", outcomevar = "y_base", 26 | #' str_policy_vars = c("z_lead3", "z_fd_lead3", "z_fd_lead2", 27 | #' "z_fd_lag1", "z_fd_lag2", "z_lag3"), 28 | #' controls = "x_r") 29 | #' 30 | #' # If you would like to use IV regression: 31 | #' PrepareModelFormula(estimator = "FHS", 32 | #' outcomevar = "y_base", 33 | #' str_policy_vars = c("z_lead3", "z_fd_lead3", "z_fd_lead2", 34 | #' "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3"), 35 | #' controls = "x_r", 36 | #' proxy = "eta_m", 37 | #' proxyIV = "z_fd_lead3") 38 | #' 39 | 40 | PrepareModelFormula <- function(estimator, outcomevar, 41 | str_policy_vars, static = FALSE, 42 | controls = NULL, proxy = NULL, proxyIV = NULL) { 43 | 44 | if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} 45 | if (! is.character(outcomevar)) {stop("outcomevar should be a character.")} 46 | if (! is.character(str_policy_vars)) {stop("str_policy_vars should be a character.")} 47 | if (! (is.null(controls) | is.character(controls))) {stop("controls should be either NULL or a character.")} 48 | if (is.null(proxyIV) & estimator == "FHS") {stop("proxyIV must be specified when estimator is FHS.")} 49 | if (is.null(proxy) & estimator == "FHS") {stop("proxy must be specified when estimator is FHS.")} 50 | 51 | if (! is.logical(static) ) {stop("static should be a logical.")} 52 | if ( static & length(str_policy_vars) > 1) {stop("str_policy_vars must have one variable with static = TRUE.")} 53 | if (! static & length(str_policy_vars) <= 1) {stop("str_policy_vars must have more than one variable with static = FALSE.")} 54 | if ( static & !is.null(proxyIV)) {stop("static model is not compatible with FHS estimator.")} 55 | 56 | if (estimator == "OLS") { 57 | reg_formula <- stats::reformulate( 58 | termlabels = c(str_policy_vars, controls), 59 | response = outcomevar, 60 | intercept = FALSE 61 | ) 62 | } 63 | 64 | if (estimator == "FHS") { 65 | exogenous <- c(str_policy_vars, controls) 66 | exogenous <- exogenous[exogenous != proxy] 67 | exogenous <- exogenous[exogenous != proxyIV] 68 | 69 | reg_formula <- stats::as.formula( 70 | paste(outcomevar, "~", 71 | paste(c(exogenous, proxy), collapse="+"), 72 | "|", 73 | paste(c(exogenous, proxyIV), collapse="+")) 74 | ) 75 | } 76 | 77 | return(reg_formula) 78 | } 79 | -------------------------------------------------------------------------------- /man/EventStudyPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EventStudyPlot.R 3 | \name{EventStudyPlot} 4 | \alias{EventStudyPlot} 5 | \title{Creates an Event-Study Plot Following the Suggestions in Freyaldenhoven et al. (2021)} 6 | \usage{ 7 | EventStudyPlot( 8 | estimates, 9 | xtitle = "Event time", 10 | ytitle = "Coefficient", 11 | ybreaks = NULL, 12 | conf_level = 0.95, 13 | supt = 0.95, 14 | num_sim = 1000, 15 | add_mean = FALSE, 16 | pre_event_coeffs = TRUE, 17 | post_event_coeffs = TRUE, 18 | add_zero_line = TRUE, 19 | smpath = FALSE 20 | ) 21 | } 22 | \arguments{ 23 | \item{estimates}{The output from calling \code{\link[=EventStudy]{EventStudy()}}. Should be a list of length 2.} 24 | 25 | \item{xtitle}{The title for the x-axis. Should be a string. Defaults to "Event time".} 26 | 27 | \item{ytitle}{The title for the y-axis. Should be a string. Defaults to "Coefficient".} 28 | 29 | \item{ybreaks}{A vector containing the desired breaks for the y-axis. 30 | Defaults to NULL, which means the breaks are computed automatically. 31 | If custom breaks are selected with the \code{add_mean} argument set to TRUE, then the breaks must include zero.} 32 | 33 | \item{conf_level}{Confidence level used for confidence interval 34 | expressed as a real number between 0 and 1, inclusive. Defaults to 0.95.} 35 | 36 | \item{supt}{The confidence level used for obtaining the sup-t bands critical value. 37 | Should be a real number between 0 and 1, inclusive. Defaults to .95. Sup-t bands are simulation-based, 38 | so you must set a seed if you would like your sup-t band results to be reproducible (see examples).} 39 | 40 | \item{num_sim}{The number of simulations used in generating the sup-t bands. 41 | Should be a natural number. Defaults to 1000.} 42 | 43 | \item{add_mean}{Adds the mean of the dependent variable in the period used for normalization. 44 | Should be TRUE or FALSE. Defaults to FALSE.} 45 | 46 | \item{pre_event_coeffs}{If TRUE, uses pre and overidpre from estimates to test for pre-trends. 47 | Should be TRUE or FALSE. Defaults to TRUE.} 48 | 49 | \item{post_event_coeffs}{If TRUE, uses post and overidpost from estimates to test for leveling-off. 50 | Should be TRUE or FALSE. Defaults to TRUE.} 51 | 52 | \item{add_zero_line}{Whether or not to plot a dashed horizontal line at y = 0. 53 | Should be TRUE or FALSE. Defaults to TRUE, meaning the line is plotted.} 54 | 55 | \item{smpath}{Plot smoothest path of confounder that rationalizes event study coefficients. 56 | Should be TRUE or FALSE. Defaults to FALSE.} 57 | } 58 | \value{ 59 | The Event-Study plot as a ggplot2 object. 60 | } 61 | \description{ 62 | \code{EventStudyPlot} takes the output from \code{\link[=EventStudy]{EventStudy()}} and combines it with additional optional arguments to facilitate constructing an Event-Study Plot. 63 | } 64 | \examples{ 65 | 66 | # 67 | 68 | # Minimal examples 69 | ### OLS 70 | 71 | estimates_ols <- EventStudy( 72 | estimator = "OLS", 73 | data = example_data, 74 | outcomevar = "y_smooth_m", 75 | policyvar = "z", 76 | idvar = "id", 77 | timevar = "t", 78 | controls = "x_r", 79 | FE = TRUE, TFE = TRUE, 80 | post = 3, overidpost = 5, 81 | pre = 2, overidpre = 4, 82 | normalize = - 3 83 | ) 84 | 85 | plt_ols <- EventStudyPlot(estimates = estimates_ols) 86 | plt_ols 87 | 88 | ### IV 89 | 90 | estimates_fhs <- EventStudy( 91 | estimator = "FHS", 92 | data = example_data, 93 | outcomevar = "y_smooth_m", 94 | policyvar = "z", 95 | idvar = "id", 96 | timevar = "t", 97 | proxy = "x_r", 98 | post = 2, overidpost = 1, 99 | pre = 0, overidpre = 3, 100 | normalize = -1 101 | ) 102 | 103 | plt_fhs <- EventStudyPlot(estimates = estimates_fhs) 104 | plt_fhs 105 | 106 | # Optional arguments 107 | 108 | ### Change x- and y-axis titles and set ybreaks 109 | EventStudyPlot(estimates = estimates_ols, 110 | xtitle = "Relative time", ytitle = "", 111 | ybreaks = seq(-2, 1, 0.5)) 112 | 113 | ### Add smoothest path 114 | EventStudyPlot(estimates = estimates_ols, smpath = TRUE) 115 | 116 | ### Add y-mean to y-axis and line y = 0 117 | EventStudyPlot(estimates = estimates_ols, add_mean = TRUE, 118 | add_zero_line = TRUE) 119 | 120 | ### Do not plot supt bands 121 | EventStudyPlot(estimates = estimates_ols, supt = NULL) 122 | 123 | ### Setting seed prior to plotting sup-t bands 124 | set.seed(1234) 125 | EventStudyPlot(estimates = estimates_ols) 126 | 127 | # Modify plots using ggplot2 functions 128 | library(ggplot2) 129 | 130 | ### Change color of dots, horizontal line, and theme 131 | plt_ols + 132 | geom_point(color = "red") + 133 | geom_hline(color = "gray", yintercept = 0) + 134 | theme_light() + 135 | theme(panel.grid.minor.x = element_blank()) 136 | 137 | } 138 | -------------------------------------------------------------------------------- /R/EventStudyOLS.R: -------------------------------------------------------------------------------- 1 | #' Runs Ordinary Least Squares (OLS) with optional fixed effects and clustering 2 | #' 3 | #' @param prepared_model_formula A formula object created in [PrepareModelFormula()] that is passed to [EventStudy()]. 4 | #' @param prepared_data Data frame containing all of the parameters required for [EventStudy()] plus leads and 5 | #' lags of the first differenced policy variable and leads and lags of the policy variable. 6 | #' @param idvar Character indicating column of units. 7 | #' @param timevar Character indicating column of time periods. 8 | #' @param FE Specifies if unit fixed-effects should be included. Defaults to TRUE. 9 | #' @param TFE Specifies if time fixed-effects should be included. Defaults to TRUE. 10 | #' @param cluster Specifies whether to use clustered errors by units. If FALSE, will use unclustered 11 | #' heteroskedasticity-robust standard errors. Defaults to TRUE. Must be TRUE if FE is TRUE. 12 | #' 13 | #' @return A data.frame that contains the estimates for the event study coefficients. 14 | #' @import estimatr 15 | #' @keywords internal 16 | #' @noRd 17 | #' 18 | #' @examples 19 | #' model_formula <- PrepareModelFormula( 20 | #' estimator = "OLS", 21 | #' outcomevar = "y_base", 22 | #' str_policy_fd = c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2"), 23 | #' str_policy_lead = "z_lead3", 24 | #' str_policy_lag = "z_lag3", 25 | #' controls = "x_r" 26 | #' ) 27 | #' 28 | #' EventStudyOLS( 29 | #' prepared_model_formula = model_formula, 30 | #' prepared_data = df_EventStudyOLS_example, 31 | #' idvar = "id", 32 | #' timevar = "t", 33 | #' FE = TRUE, 34 | #' TFE = TRUE, 35 | #' cluster = TRUE 36 | #') 37 | 38 | EventStudyOLS <- function(prepared_model_formula, prepared_data, 39 | idvar, timevar, FE, TFE, cluster) { 40 | 41 | if (! inherits(prepared_model_formula, "formula")) {stop("prepared_model_formula should be a formula")} 42 | if (! is.data.frame(prepared_data)) {stop("data should be a data frame.")} 43 | if (! is.character(idvar)) {stop("idvar should be a character.")} 44 | if (! is.character(timevar)) {stop("timevar should be a character.")} 45 | if (! is.logical(FE)) {stop("FE should be either TRUE or FALSE.")} 46 | if (! is.logical(TFE)) {stop("TFE should be either TRUE or FALSE.")} 47 | if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")} 48 | 49 | if (FE & TFE & cluster) { 50 | 51 | ols_output <- estimatr::lm_robust( 52 | formula = prepared_model_formula, 53 | data = prepared_data, 54 | fixed_effects = ~ get(idvar) + get(timevar), 55 | clusters = get(idvar), 56 | se_type = "stata" 57 | ) 58 | 59 | } else if ((!FE) & TFE & cluster) { 60 | 61 | ols_output <- estimatr::lm_robust( 62 | formula = prepared_model_formula, 63 | data = prepared_data, 64 | fixed_effects = ~ get(timevar), 65 | clusters = get(idvar), 66 | se_type = "stata" 67 | ) 68 | 69 | } else if (FE & (!TFE) & cluster) { 70 | 71 | ols_output <- estimatr::lm_robust( 72 | formula = prepared_model_formula, 73 | data = prepared_data, 74 | fixed_effects = ~ get(idvar), 75 | clusters = get(idvar), 76 | se_type = "stata" 77 | ) 78 | 79 | } else if ((!FE) & (!TFE) & cluster) { 80 | 81 | ols_output <- estimatr::lm_robust( 82 | formula = prepared_model_formula, 83 | data = prepared_data, 84 | clusters = get(idvar), 85 | se_type = "stata" 86 | ) 87 | 88 | } else if (FE & TFE & (!cluster)) { 89 | 90 | ols_output <- estimatr::lm_robust( 91 | formula = prepared_model_formula, 92 | data = prepared_data, 93 | fixed_effects = ~ get(idvar) + get(timevar), 94 | se_type = "stata" 95 | ) 96 | 97 | } else if ((!FE) & TFE & (!cluster)) { 98 | 99 | ols_output <- estimatr::lm_robust( 100 | formula = prepared_model_formula, 101 | data = prepared_data, 102 | fixed_effects = ~ get(timevar), 103 | se_type = "stata" 104 | ) 105 | 106 | } else if (FE & (!TFE) & (!cluster)) { 107 | 108 | ols_output <- estimatr::lm_robust( 109 | formula = prepared_model_formula, 110 | data = prepared_data, 111 | fixed_effects = ~ get(idvar), 112 | se_type = "stata" 113 | ) 114 | 115 | } else if ((!FE) & (!TFE) & (!cluster)) { 116 | 117 | ols_output <- estimatr::lm_robust( 118 | formula = prepared_model_formula, 119 | data = prepared_data, 120 | se_type = "stata" 121 | ) 122 | } 123 | 124 | return(ols_output) 125 | } 126 | -------------------------------------------------------------------------------- /tests/testthat/test-TestLinear.R: -------------------------------------------------------------------------------- 1 | test_that("correctly recognizes wrong variable type for estimate argument", { 2 | estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 3 | policyvar = "z", idvar = "id", timevar = "t", 4 | controls = "x_r", FE = TRUE, TFE = TRUE, 5 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 6 | 7 | df_estimate <- estimatr::tidy(estimate[[1]]) 8 | 9 | test = "z_fd_lag1 = z_fd" 10 | 11 | expect_error(TestLinear(df_estimate, test, pretrends = T, leveling_off = T)) 12 | expect_error(TestLinear(estimate[[1]], test, pretrends = T, leveling_off = T)) 13 | expect_error(TestLinear(estimate[[2]], test, pretrends = T, leveling_off = T)) 14 | }) 15 | 16 | test_that("correctly recognizes wrong variable type for pretrends", { 17 | estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 18 | policyvar = "z", idvar = "id", timevar = "t", 19 | controls = "x_r", FE = TRUE, TFE = TRUE, 20 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 21 | 22 | expect_error(TestLinear(df_estimate, pretrends = "pretrends")) 23 | expect_error(TestLinear(df_estimate, pretrends = 1)) 24 | }) 25 | 26 | test_that("correctly recognizes wrong variable type for leveling_off", { 27 | estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 28 | policyvar = "z", idvar = "id", timevar = "t", 29 | controls = "x_r", FE = TRUE, TFE = TRUE, 30 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 31 | 32 | expect_error(TestLinear(df_estimate, leveling_off = "leveling_off")) 33 | expect_error(TestLinear(df_estimate, leveling_off = 1)) 34 | }) 35 | 36 | test_that("produces only functions that are specified", { 37 | estimate <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 38 | policyvar = "z", idvar = "id", timevar = "t", 39 | controls = "x_r", FE = TRUE, TFE = TRUE, 40 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 41 | 42 | test = "z_fd_lag1 = z_fd" 43 | 44 | df <- TestLinear(estimate, pretrends = F, leveling_off = F) 45 | expect_equal(length(df$Test), 0) 46 | 47 | df <- TestLinear(estimate, test, pretrends = F, leveling_off = F) 48 | expect_equal(length(df$Test), 1) 49 | expect_equal(df$Test, "User Test") 50 | 51 | df <- TestLinear(estimate, pretrends = T, leveling_off = F) 52 | expect_equal(length(df$Test), 1) 53 | expect_equal(df$Test, "Pre-Trends") 54 | 55 | df <- TestLinear(estimate, pretrends = F, leveling_off = T) 56 | expect_equal(length(df$Test), 1) 57 | expect_equal(df$Test, "Leveling-Off") 58 | 59 | df <- TestLinear(estimate, test, pretrends = T, leveling_off = F) 60 | expect_equal(length(df$Test), 2) 61 | tests <- c("User Test", "Pre-Trends") 62 | expect_equal(df$Test, tests) 63 | 64 | df <- TestLinear(estimate, test, pretrends = F, leveling_off = T) 65 | expect_equal(length(df$Test), 2) 66 | tests <- c("User Test", "Leveling-Off") 67 | expect_equal(df$Test, tests) 68 | 69 | df <- TestLinear(estimate, pretrends = T, leveling_off = T) 70 | expect_equal(length(df$Test), 2) 71 | tests <- c("Pre-Trends", "Leveling-Off") 72 | expect_equal(df$Test, tests) 73 | 74 | df <- TestLinear(estimate, test, pretrends = T, leveling_off = T) 75 | expect_equal(length(df$Test), 3) 76 | tests <- c("User Test", "Pre-Trends", "Leveling-Off") 77 | expect_equal(df$Test, tests) 78 | }) 79 | 80 | 81 | test_that("checks equality with STATA", { 82 | estimate <- suppressWarnings( 83 | EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 84 | policyvar = "z", idvar = "id", timevar = "t", 85 | FE = TRUE, TFE = TRUE, 86 | post = 2, pre = 2, overidpre = 2, 87 | overidpost = 2, normalize = - 1, 88 | cluster = TRUE, anticipation_effects_normalization = TRUE) 89 | ) 90 | 91 | codes <- read.csv('input/pvalues.txt', header = F) 92 | 93 | pretrends_stata_p <- codes[1,1] 94 | leveloff_stata_p <- codes[2,1] 95 | 96 | df_test_linear <- TestLinear(estimate, pretrends = T, leveling_off = T) 97 | pretrends_p_value <- df_test_linear[df_test_linear["Test"] == "Pre-Trends", "p.value"] 98 | levelingoff_p_value <- df_test_linear[df_test_linear["Test"] == "Leveling-Off", "p.value"] 99 | 100 | expect_true(abs(pretrends_stata_p - pretrends_p_value) < 0.0001) 101 | expect_true(abs(leveloff_stata_p - levelingoff_p_value) < 0.0001) 102 | }) 103 | -------------------------------------------------------------------------------- /tests/testthat/test-ComputeShifts.R: -------------------------------------------------------------------------------- 1 | test_that("correctly recognizes wrong variable types", { 2 | df_test <- read.csv("./input/df_test.csv") 3 | 4 | # Character variables 5 | expect_error(ComputeShifts(df_test, 1, "periods", "values", 3, F, T)) 6 | expect_error(ComputeShifts(df_test, "country", 1, "values", 3, F, T)) 7 | expect_error(ComputeShifts(df_test, "country", "periods", 1, 3, F, T)) 8 | expect_error(ComputeShifts(df_test, "country", "periods", "policy", 3, F, T)) 9 | 10 | # Logical variables 11 | expect_error(ComputeShifts(df_test, "country", "periods", "values", 3, "T", F)) 12 | expect_error(ComputeShifts(df_test, "country", "periods", "values", 3, T, "T")) 13 | 14 | # Numeric variables 15 | expect_error(ComputeShifts(df_test, "country", "periods", "values", "2", F, T)) 16 | }) 17 | 18 | test_that("correctly recognizes variables missing from dataset", { 19 | df_test <- read.csv("./input/df_test.csv") 20 | 21 | # Character variables 22 | expect_error(ComputeShifts(df_test, "country", "periods", "policy", 3, F, T)) 23 | expect_error(ComputeShifts(df_test, "state", "periods", "values", 3, F, T)) 24 | }) 25 | 26 | test_that("correctly adds the desired column(s) of shifts", { 27 | df_test <- read.csv("./input/df_test.csv") 28 | cols_original <- colnames(df_test) 29 | 30 | # Add 1 lead/lag 31 | dt_test <- data.table::as.data.table(df_test) 32 | cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", 33 | shiftvar = "values", 34 | shiftvalues = 2)) 35 | expect_equal(cols_new, c(cols_original, "values_lag2")) 36 | 37 | dt_test <- data.table::as.data.table(df_test) 38 | cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", 39 | shiftvar = "values", 40 | shiftvalues = -2)) 41 | expect_equal(cols_new, c(cols_original, "values_lead2")) 42 | 43 | # Add multiple leads/lags 44 | dt_test <- data.table::as.data.table(df_test) 45 | cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", 46 | shiftvar = "values", 47 | shiftvalues = 1:2)) 48 | expect_equal(cols_new, c(cols_original, "values_lag1", "values_lag2")) 49 | 50 | dt_test <- data.table::as.data.table(df_test) 51 | cols_new <- colnames(ComputeShifts(dt_test, idvar = "country", timevar = "periods", 52 | shiftvar = "values", 53 | shiftvalues = -2:-1)) 54 | expect_equal(cols_new, c(cols_original, "values_lead2", "values_lead1")) 55 | }) 56 | 57 | test_that("the columns added have correct suffixes", { 58 | df_test <- data.table::fread("./input/df_test.csv") 59 | 60 | df_lags <- ComputeShifts(df_test, idvar = "country", timevar = "periods", 61 | shiftvar = "values", 62 | shiftvalues = 1:2) 63 | 64 | v_newvars <- setdiff(colnames(df_lags), colnames(df_test)) 65 | 66 | expect_true(all(grepl("_lag", v_newvars))) 67 | 68 | df_leads <- ComputeShifts(df_test, idvar = "country", timevar = "periods", 69 | shiftvar = "values", 70 | shiftvalues = -2:-1) 71 | 72 | v_newvars <- setdiff(colnames(df_leads), colnames(df_test)) 73 | 74 | expect_true(all(grepl("_lead", v_newvars))) 75 | }) 76 | 77 | test_that("correctly shifts variable when there are no holes in timevar", { 78 | df <- data.table::data.table( 79 | id = c(rep("A", 4), rep("B", 2), rep("C", 3)), 80 | time = c(1, 2, 3, 4, 1, 2, 1, 2, 3), 81 | z = c(10, 12, 13, 14, 8, 9, 10, 11, 12), 82 | z_lag1 = c(NA, 10, 12, 13, NA, 8, NA, 10, 11), 83 | z_lead1 = c(12, 13, 14, NA, 9, NA, 11, 12, NA) 84 | ) 85 | 86 | df_shifts <- ComputeShifts(df[, c("id", "time", "z")], 87 | idvar = "id", timevar = "time", 88 | shiftvar = "z", 89 | shiftvalues = c(-1, 1), 90 | timevar_holes = FALSE) 91 | 92 | expect_equal(df$z_lead1, df_shifts$z_lead1) 93 | expect_equal(df$z_lag1, df_shifts$z_lag1) 94 | 95 | expect_true(all(c("id", "time", "z") %in% colnames(df_shifts))) 96 | }) 97 | 98 | test_that("correctly shifts variable when there are holes in timevar", { 99 | df <- data.table::data.table( 100 | id = c(rep("A", 4), rep("B", 2), rep("C", 3)), 101 | time = c(1, 2, 4, 5, 2, 3, 2, 3, 4), 102 | z = c(10, 12, 13, 14, 8, 9, 10, 11, 12), 103 | z_lag1 = c(NA, 10, NA, 13, NA, 8, NA, 10, 11), 104 | z_lead1 = c(12, NA, 14, NA, 9, NA, 11, 12, NA) 105 | ) 106 | 107 | df_shifts <- ComputeShifts(df[, c("id", "time", "z")], 108 | idvar = "id", timevar = "time", 109 | shiftvar = "z", 110 | shiftvalues = c(-1, 1), 111 | timevar_holes = TRUE) 112 | 113 | expect_equal(df$z_lead1, df_shifts$z_lead1) 114 | expect_equal(df$z_lag1, df_shifts$z_lag1) 115 | 116 | expect_true(all(c("id", "time", "z") %in% colnames(df_shifts))) 117 | }) 118 | -------------------------------------------------------------------------------- /R/TestLinear.R: -------------------------------------------------------------------------------- 1 | #' Perform Tests of Linear Hypotheses 2 | #' 3 | #' @description `TestLinear` tests linear restrictions on coefficients. 4 | #' 5 | #' @param estimates A list of length 2 containing estimation results and model information. 6 | #' Should be an output of [EventStudy()]. 7 | #' @param test The hypothesis to be estimated. Accepts inputs that can be passed to 8 | #' hypothesis.matrix argument in `car::linearHypothesis()`. 9 | #' @param test_name Name for test input by user. Defaults to "User Test." 10 | #' @param pretrends If TRUE, uses pre and overidpre from estimates to test for pre-trends. 11 | #' Defaults to TRUE. 12 | #' @param leveling_off If TRUE, uses post and overidpost from estimates to test for leveling-off. 13 | #' Defaults to TRUE. 14 | #' 15 | #' @return A data frame containing the F-statistic and p-value for the specified test(s). 16 | #' @importFrom car linearHypothesis 17 | #' @export 18 | #' 19 | #' @examples 20 | #' estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 21 | #' policyvar = "z", idvar = "id", timevar = "t", 22 | #' controls = "x_r", FE = TRUE, TFE = TRUE, 23 | #' post = 3, pre = 2, overidpre = 4, overidpost = 5, 24 | #' normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 25 | #' 26 | #' TestLinear( 27 | #' estimates, 28 | #' test = "z_fd_lag1 = z_fd", 29 | #' test_name = "Hypothesis Test", 30 | #' pretrends = TRUE, 31 | #' leveling_off = TRUE 32 | #' ) 33 | #' 34 | 35 | 36 | TestLinear <- function(estimates, test = NA, test_name = "User Test", pretrends = TRUE, leveling_off = TRUE){ 37 | if (! is.list(estimates) | length(estimates) != 2){ 38 | stop("estimates should be a list of length two, an output of EventStudy()")} 39 | if ((! class(estimates$output) %in% c("lm_robust", "iv_robust")) | ! is.list(estimates$output)) { 40 | stop("The first element of estimates should be a list of class 'lm_robust' with coefficient estimates and standard errors") 41 | } 42 | if (! is.list(estimates$arguments) | ! is.list(estimates$arguments)) { 43 | stop("The second element of estimates should be a list with argument definitions, an output of EventStudy().") 44 | } 45 | if (! is.character(test_name)) {stop("test_name should be of class character. Defaults to 'User Test'.")} 46 | if (! is.logical(pretrends)) {stop("pretrends should be a logical. Default value is TRUE")} 47 | if (! is.logical(leveling_off)) {stop("leveling_off should be a logical. Default value is TRUE")} 48 | 49 | if(estimates$arguments$cluster == TRUE){ 50 | 51 | estimates$output$df.residual <- estimates$output$nclusters - 1 52 | 53 | } 54 | 55 | coefficients <- estimates$arguments$eventstudy_coefficients 56 | 57 | if (!is.null(estimates$arguments$proxyIV)){ 58 | coefficients <- coefficients[coefficients != estimates$arguments$proxyIV] 59 | } 60 | 61 | test_results <- data.frame(row.names = c("Test", "F.statistic", "p.value")) 62 | 63 | if (!is.na(test)){ 64 | user_results <- car::linearHypothesis(estimates$output, test, test = "F") 65 | 66 | temp <- data.frame("Test" = test_name, 67 | "F" = user_results[2, ]$F, 68 | "p.value" = user_results[2, ]$`Pr(>F)`) 69 | test_results <- rbind(test_results, temp) 70 | } 71 | 72 | if (pretrends == TRUE){ 73 | 74 | G <- estimates$arguments$pre 75 | L_G <- estimates$arguments$overidpre 76 | k <- as.character(seq.int(G+1, (G+L_G))) 77 | 78 | suffix <- paste0("_lead",k) 79 | 80 | suffix_len <- str_length(suffix)[1] 81 | 82 | delta_k <- coefficients[str_sub(coefficients, start= -suffix_len) %in% suffix] 83 | 84 | pretrends_hyp <- paste0(delta_k, "=0") 85 | 86 | pretrends_results <- car::linearHypothesis(estimates$output, pretrends_hyp, test = "F") 87 | 88 | temp <- data.frame("Test" = "Pre-Trends", 89 | "F" = pretrends_results[2, ]$F, 90 | "p.value" = pretrends_results[2, ]$`Pr(>F)`) 91 | 92 | test_results <- rbind(test_results, temp) 93 | } 94 | 95 | if (leveling_off == TRUE){ 96 | 97 | M <- estimates$arguments$post 98 | L_M <- estimates$arguments$overidpost 99 | k <- as.character(seq.int(M+1, M+L_M)) 100 | 101 | suffix_M <- paste0("_lag",as.character(M)) 102 | suffix_Mk <- paste0("_lag",k) 103 | 104 | suffix_M_len <- str_length(suffix_M) 105 | suffix_Mk_len <- str_length(suffix_Mk)[1] 106 | 107 | delta_M <- coefficients[str_sub(coefficients, start= -suffix_M_len) %in% suffix_M] 108 | delta_Mk <- coefficients[str_sub(coefficients, start= -suffix_Mk_len) %in% suffix_Mk] 109 | 110 | leveling_off_hyp <- paste0(delta_Mk, "=", delta_M) 111 | 112 | leveling_results <- car::linearHypothesis(estimates$output, leveling_off_hyp, test = "F") 113 | 114 | temp <- data.frame("Test" = "Leveling-Off", 115 | "F" = leveling_results[2, ]$F, 116 | "p.value" = leveling_results[2, ]$`Pr(>F)`) 117 | 118 | test_results <- rbind(test_results, temp) 119 | } 120 | 121 | return(test_results) 122 | } 123 | 124 | -------------------------------------------------------------------------------- /vignettes/documentation.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "documentation" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{documentation} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ```{r setup} 18 | library(eventstudyr) 19 | ``` 20 | 21 | ## Event Studies 22 | 23 | Linear panel models, and the event-study plots that often accompany them, are popular tools for learning about policy effects. 24 | 25 | ## eventstudyr 26 | 27 | `eventstudyr` facilitates estimating linear panel event-study models and constructing event-study plots following the suggestions in [Freyaldenhoven _et al._ (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf). In particular, it provides functionality for: 28 | 29 | * depicting cumulative estimated effects of a policy relative to a user-controlled normalized period 30 | * adding confidence intervals and sup-t confidence bands 31 | * testing for the presence of "pre-trends" and stable dynamics post-event 32 | * plotting the least "wiggly" confound consistent with the estimates 33 | 34 | ## Data 35 | 36 | We will use the `example_data` dataset to demonstrate the basic functionality of `eventstudyr`. These sample data are from the [replication archive](https://data.nber.org/data-appendix/w29170/) for Freyaldenhoven et al. (2021). The documentation can be accessed using `?example_data`. 37 | 38 | ```{r Data preview} 39 | 40 | dim(example_data) 41 | head(example_data) 42 | ``` 43 | 44 | 45 | ## EventStudy() 46 | 47 | `EventStudy()` estimates the regression model from Equation (2) in Freyaldenhoven et al. (2021) and returns a list object that stores the estimation results (as an lm() object) as well as the arguments given in the function call. It accepts variables specifying the outcome, policy, ID and time variables. One must also specify the number of periods in the past before which the past values of the policy are not supposed to affect the value of the outcome and the number of periods in the future after which the future values of the policy are not supposed to affect the value of the outcome today. The function optionally accepts variables specifying the controls, the time window, whether fixed effects should be included, and the period to be used for normalization. 48 | 49 | Here is an example using the sample data: 50 | 51 | ```{r Basic Eventstudy Example - Show Code, eval = FALSE} 52 | results <- EventStudy(estimator = "OLS", 53 | data = example_data, 54 | outcomevar = "y_jump_m", 55 | policyvar = "z", 56 | idvar = "id", 57 | timevar = "t", 58 | post = 3, 59 | pre = 0) 60 | ``` 61 | ```{r Basic Eventstudy Example - Run Code, echo = FALSE} 62 | results <- EventStudy(estimator = "OLS", 63 | data = example_data, 64 | outcomevar = "y_jump_m", 65 | policyvar = "z", 66 | idvar = "id", 67 | timevar = "t", 68 | post = 3, 69 | pre = 0) 70 | ``` 71 | ```{r Basic Eventstudy Example - Show Results 1, echo=TRUE, eval=TRUE} 72 | summary(results$output) 73 | ``` 74 |
75 | Click for `results$arguments` 76 | ```{r} 77 | ## Estimator 78 | results$arguments$estimator 79 | 80 | ## Data 81 | results$arguments$data[1:5,] 82 | 83 | ## Variables 84 | results$arguments$outcomevar 85 | results$arguments$outcomevar 86 | results$arguments$policyvar 87 | results$arguments$idvar 88 | results$arguments$timevar 89 | results$arguments$controls 90 | 91 | ## Proxies 92 | results$arguments$proxy 93 | results$arguments$proxyIV 94 | 95 | ## Fixed effects 96 | results$arguments$FE 97 | results$arguments$TFE 98 | 99 | ## Periods 100 | results$arguments$post 101 | results$arguments$overidpost 102 | results$arguments$pre 103 | results$arguments$overidpre 104 | 105 | ## Normalization 106 | results$arguments$normalize 107 | results$arguments$normalization_column 108 | 109 | ## Cluster 110 | results$arguments$cluster 111 | 112 | ## Eventstudy coefficients 113 | results$arguments$eventstudy_coefficients 114 | ``` 115 |
116 | 117 | ## EventStudyPlot() 118 | `EventStudyPlot()` prepares an event-study plot based on the suggestions in Freyaldenhoven et al. (2021). 119 | 120 | This function is designed to use the output of the `EventStudy()` and returns a ggplot object. Here is an example of using the function with some default settings: 121 | 122 | ```{r EventStudyPlot example 1, fig.dim = c(7, 5)} 123 | eventstudy_estimates_ols <- EventStudy(estimator = "OLS", 124 | data = example_data, 125 | outcomevar = "y_jump_m", 126 | policyvar = "z", 127 | idvar = "id", 128 | timevar = "t", 129 | post = 3, 130 | pre = 0) 131 | 132 | EventStudyPlot(estimates = eventstudy_estimates_ols, 133 | xtitle = "Event time", 134 | ytitle = "Coefficient") 135 | ``` 136 | 137 | ## References 138 | 139 | Freyaldenhoven, S., Hansen, C., Pérez, J.P. and Shapiro, J.M., 2021. Visualization, identification, and estimation in the linear panel event-study design (No. w29170). National Bureau of Economic Research. 140 | -------------------------------------------------------------------------------- /R/EventStudyFHS.R: -------------------------------------------------------------------------------- 1 | #' Implements proxy Instrumental Variables (IV) estimator proposed in [Freyaldenhoven Hansen and Shapiro (FHS, 2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609) 2 | #' 3 | #' @param prepared_model_formula A formula object created in PrepareModelForumla that is passed to EventStudy. 4 | #' @param prepared_data Data frame containing all of the parameters required for [EventStudy()] plus leads and 5 | #' lags of the first differenced policy variable and leads and lags of the policy variable. 6 | #' @param idvar Character indicating column of units. 7 | #' @param timevar Character indicating column of time periods. 8 | #' @param FE Specifies if unit fixed-effects should be included. Defaults to TRUE. 9 | #' @param TFE Specifies if time fixed-effects should be included. Defaults to TRUE. 10 | #' @param cluster Specifies whether to use clustered errors by units. If FALSE, will use unclustered 11 | #' heteroskedasticity-robust standard errors. Defaults to TRUE. Must be TRUE if FE is TRUE. 12 | #' 13 | #' @return A data.frame that contains the estimates for the event study coefficients. 14 | #' @import estimatr 15 | #' @importFrom stats qnorm pnorm 16 | #' @keywords internal 17 | #' @noRd 18 | #' 19 | #' @examples 20 | #' model_formula <- PrepareModelFormula( 21 | #' estimator = "FHS", 22 | #' outcomevar = "y_base", 23 | #' str_policy_fd = c("z_fd", "z_fd_lead2", "z_fd_lead3", "z_fd_lag1", "z_fd_lag2"), 24 | #' str_policy_lead = "z_lead3", 25 | #' str_policy_lag = "z_lag3", 26 | #' controls = "x_r", 27 | #' proxy = "eta_m", 28 | #' proxyIV = "z_fd_lead3" 29 | #') 30 | #' 31 | #' EventStudyFHS( 32 | #' prepared_model_formula = model_formula, 33 | #' prepared_data = df_EventStudyFHS_example, 34 | #' idvar = "id", 35 | #' timevar = "t", 36 | #' FE = TRUE, 37 | #' TFE = TRUE, 38 | #' cluster = TRUE 39 | #') 40 | 41 | EventStudyFHS <- function(prepared_model_formula, prepared_data, 42 | idvar, timevar, FE, TFE, cluster) { 43 | 44 | if (! inherits(prepared_model_formula, "formula")) {stop("prepared_model_formula should be a formula")} 45 | if (! is.data.frame(prepared_data)) {stop("data should be a data frame.")} 46 | if (! is.character(idvar)) {stop("idvar should be a character.")} 47 | if (! is.character(timevar)) {stop("timevar should be a character.")} 48 | if (! is.logical(FE)) {stop("FE should be either TRUE or FALSE.")} 49 | if (! is.logical(TFE)) {stop("TFE should be either TRUE or FALSE.")} 50 | if (! is.logical(cluster)) {stop("cluster should be either TRUE or FALSE.")} 51 | if (FE & !cluster) {stop("cluster=TRUE required when FE=TRUE.")} 52 | 53 | 54 | if (FE & TFE & cluster) { 55 | 56 | fhs_output <- estimatr::iv_robust( 57 | formula = prepared_model_formula, 58 | data = prepared_data, 59 | clusters = get(idvar), 60 | fixed_effects = ~ get(idvar) + get(timevar), 61 | se_type="stata") 62 | 63 | N <- fhs_output$nobs 64 | n <- fhs_output$nclusters 65 | K <- length(fhs_output$felevels$`get(timevar)`) + fhs_output$rank 66 | 67 | fhs_output$std.error <- fhs_output$std.error / sqrt((N - K) / (N - n - K + 1)) 68 | fhs_output$statistic <- fhs_output$coefficients / fhs_output$std.error 69 | fhs_output$p.value <- 2*stats::pnorm(abs(fhs_output$statistic), lower.tail = FALSE) 70 | fhs_output$conf.low <- fhs_output$coefficients - stats::qnorm(0.975) * fhs_output$std.error 71 | fhs_output$conf.high <- fhs_output$coefficients + stats::qnorm(0.975) * fhs_output$std.error 72 | fhs_output$vcov <- fhs_output$vcov / ((N - K) / (N - n - K + 1)) 73 | 74 | } else if (FE & (!TFE) & cluster) { 75 | 76 | fhs_output <- estimatr::iv_robust( 77 | formula = prepared_model_formula, 78 | data = prepared_data, 79 | clusters = get(idvar), 80 | fixed_effects = ~ get(idvar), 81 | se_type="stata") 82 | 83 | N <- fhs_output$nobs 84 | n <- fhs_output$nclusters 85 | K <- 1 + fhs_output$rank 86 | 87 | fhs_output$std.error <- fhs_output$std.error / sqrt((N - K)/(N - n - K + 1)) 88 | fhs_output$statistic <- fhs_output$coefficients / fhs_output$std.error 89 | fhs_output$p.value <- 2*stats::pnorm(abs(fhs_output$statistic), lower.tail = FALSE) 90 | fhs_output$conf.low <- fhs_output$coefficients - stats::qnorm(0.975) * fhs_output$std.error 91 | fhs_output$conf.high <- fhs_output$coefficients + stats::qnorm(0.975) * fhs_output$std.error 92 | fhs_output$vcov <- fhs_output$vcov / ((N - K) / (N - n - K + 1)) 93 | 94 | } else if ((!FE) & TFE & (!cluster)) { 95 | 96 | fhs_output <- estimatr::iv_robust( 97 | formula = prepared_model_formula, 98 | data = prepared_data, 99 | fixed_effects = ~ get(timevar), 100 | se_type="stata") 101 | 102 | } else if ((!FE) & TFE & cluster) { 103 | 104 | fhs_output <- estimatr::iv_robust( 105 | formula = prepared_model_formula, 106 | data = prepared_data, 107 | clusters = get(idvar), 108 | fixed_effects = ~ get(timevar), 109 | se_type="stata") 110 | 111 | } else if ((!FE) & (!TFE) & (!cluster)) { 112 | 113 | fhs_output <- estimatr::iv_robust( 114 | formula = prepared_model_formula, 115 | data = prepared_data, 116 | se_type = "stata") 117 | 118 | } else if ((!FE) & (!TFE) & cluster) { 119 | 120 | fhs_output <- estimatr::iv_robust( 121 | formula = prepared_model_formula, 122 | data = prepared_data, 123 | clusters = get(idvar), 124 | se_type = "stata") 125 | } 126 | 127 | return(fhs_output) 128 | } 129 | 130 | -------------------------------------------------------------------------------- /tests/testthat/test-AddCIs.R: -------------------------------------------------------------------------------- 1 | test_that("correctly calculates conf_level at 0.95", { 2 | df_test <- data.table::fread("./input/df_test_AddCI.csv") 3 | 4 | policyvar <- "z" 5 | eventstudy_coefficients <- c("z_fd_lag1", "z_fd_lead1") 6 | 7 | df_test_CI <- AddCIs(df_test, eventstudy_coefficients, 0.95) 8 | 9 | expected_lower <- 2 - 1.959964 10 | expected_upper <- 2 + 1.959964 11 | 12 | expect_equal(df_test_CI$ci_lower[df_test_CI$term == "z_fd_lead1"], expected_lower, tolerance = 1e-6) 13 | expect_equal(df_test_CI$ci_upper[df_test_CI$term == "z_fd_lead1"], expected_upper, tolerance = 1e-6) 14 | }) 15 | 16 | # OLS ------------------------------------------ 17 | 18 | test_that("correctly recognizes wrong class for estimate argument", { 19 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 20 | policyvar = "z", idvar = "id", timevar = "t", 21 | controls = "x_r", FE = TRUE, TFE = TRUE, 22 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 23 | 24 | policyvar <- "z" 25 | eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients 26 | 27 | expect_error(AddCIs(df_test, eventstudy_coefficients, 0.95)) 28 | }) 29 | 30 | test_that("correctly recognizes missing columns in estimates argument", { 31 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 32 | policyvar = "z", idvar = "id", timevar = "t", 33 | controls = "x_r", FE = TRUE, TFE = TRUE, 34 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 35 | 36 | df_test <- estimatr::tidy(estimates$output) 37 | 38 | eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients 39 | 40 | df_test_noterm <- df_test %>% rename(wrongname = term) 41 | df_test_noest <- df_test %>% rename(wrongname = estimate) 42 | df_test_nostd <- df_test %>% rename(wrongname = std.error) 43 | 44 | expect_error(AddCIs(df_test_noterm, eventstudy_coefficients, 0.95)) 45 | expect_error(AddCIs(df_test_noest , eventstudy_coefficients, 0.95)) 46 | expect_error(AddCIs(df_test_nostd , eventstudy_coefficients, 0.95)) 47 | }) 48 | 49 | test_that("correctly recognizes wrong inputs for conf_level argument", { 50 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 51 | policyvar = "z", idvar = "id", timevar = "t", 52 | controls = "x_r", FE = TRUE, TFE = TRUE, 53 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 54 | 55 | df_test <- estimatr::tidy(estimates$output) 56 | 57 | eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients 58 | 59 | expect_error(AddCIs(df_test, eventstudy_coefficients, "95")) 60 | expect_error(AddCIs(df_test, eventstudy_coefficients, 95)) 61 | expect_error(AddCIs(df_test, eventstudy_coefficients,-.95)) 62 | }) 63 | 64 | # FHS ------------------------------------------ 65 | 66 | test_that("correctly recognizes wrong class for estimate argument", { 67 | data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] 68 | estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", 69 | timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, 70 | overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) 71 | 72 | eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients 73 | 74 | expect_error(AddCIs(df_test, eventstudy_coefficients, 0.95)) 75 | }) 76 | 77 | test_that("correctly recognizes missing columns in estimates argument", { 78 | data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] 79 | estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", 80 | timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, 81 | overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) 82 | 83 | df_test <- estimatr::tidy(estimates$output) 84 | 85 | eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients 86 | 87 | df_test_noterm <- df_test %>% rename(wrongname = term) 88 | df_test_noest <- df_test %>% rename(wrongname = estimate) 89 | df_test_nostd <- df_test %>% rename(wrongname = std.error) 90 | 91 | expect_error(AddCIs(df_test_noterm, eventstudy_coefficients, 0.95)) 92 | expect_error(AddCIs(df_test_noest , eventstudy_coefficients, 0.95)) 93 | expect_error(AddCIs(df_test_nostd , eventstudy_coefficients, 0.95)) 94 | }) 95 | 96 | test_that("correctly recognizes wrong inputs for conf_level argument", { 97 | data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] 98 | estimates <- EventStudy(estimator = "FHS", data = data, outcomevar = "y_base", policyvar = "z", idvar = "id", 99 | timevar = "t", controls = "x_r", proxy = "eta_m", FE = TRUE, TFE = TRUE, post = 1, 100 | overidpost = 2, pre = 1, overidpre = 2, normalize = -1, cluster = TRUE, anticipation_effects_normalization = FALSE) 101 | 102 | df_test <- estimatr::tidy(estimates$output) 103 | 104 | eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients 105 | 106 | expect_error(AddCIs(df_test, eventstudy_coefficients, "95")) 107 | expect_error(AddCIs(df_test, eventstudy_coefficients, 95)) 108 | expect_error(AddCIs(df_test, eventstudy_coefficients,-.95)) 109 | }) 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /man/EventStudy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EventStudy.R 3 | \name{EventStudy} 4 | \alias{EventStudy} 5 | \title{Estimates Equation (2) in Freyaldenhoven et al. (2021)} 6 | \usage{ 7 | EventStudy( 8 | estimator, 9 | data, 10 | outcomevar, 11 | policyvar, 12 | idvar, 13 | timevar, 14 | controls = NULL, 15 | proxy = NULL, 16 | proxyIV = NULL, 17 | FE = TRUE, 18 | TFE = TRUE, 19 | post, 20 | overidpost = 1, 21 | pre, 22 | overidpre = post + pre, 23 | normalize = -1 * (pre + 1), 24 | cluster = TRUE, 25 | anticipation_effects_normalization = TRUE, 26 | allow_duplicate_id = FALSE, 27 | avoid_internal_copy = FALSE 28 | ) 29 | } 30 | \arguments{ 31 | \item{estimator}{Accepts one of "OLS" or "FHS". If "OLS" is specified, implements Ordinary Least Squares. If "FHS" is specified, implements Instrumental Variables (IV) estimator proposed in \href{https://www.aeaweb.org/articles?id=10.1257/aer.20180609}{Freyaldenhoven Hansen Shapiro (FHS, 2019)}.} 32 | 33 | \item{data}{Data frame containing the variables of interest.} 34 | 35 | \item{outcomevar}{Character indicating column of outcome variable y.} 36 | 37 | \item{policyvar}{Character indicating column of policy variable z.} 38 | 39 | \item{idvar}{Character indicating column of units.} 40 | 41 | \item{timevar}{Character indicating column of time periods.} 42 | 43 | \item{controls}{Optional character vector indicating a set of control variables q.} 44 | 45 | \item{proxy}{Character indicating column of variable that is thought to be affected by the confound but not by the policy. 46 | Should be specified if and only if estimator is specified as "FHS".} 47 | 48 | \item{proxyIV}{Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS". 49 | If NULL, defaults to the strongest lead of the policy variable based on the first stage.} 50 | 51 | \item{FE}{Logical indicating whether unit fixed-effects should be included. Defaults to TRUE.} 52 | 53 | \item{TFE}{Logical indicating whether time fixed-effects should be included. Defaults to TRUE.} 54 | 55 | \item{post}{Whole number indicating the number of periods in the past before which the past values of the policy 56 | are not supposed to affect the value of the outcome. Corresponds to M in equation (2) of 57 | \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.} 58 | 59 | \item{overidpost}{Optional whole number indicating the number of event times after "post" to be included in estimation. 60 | Defaults to 1. 61 | Corresponds to L_M in equation (2) of \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.} 62 | 63 | \item{pre}{Whole number indicating the number of periods in the future after which the future values of the policy are 64 | not supposed to affect the value of the outcome today. Corresponds to G in equation (2) of 65 | \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.} 66 | 67 | \item{overidpre}{Optional whole number indicating the number of event times earlier than -"pre" to be included in estimation. 68 | Defaults to "post" + "pre". 69 | Corresponds to L_G in equation (2) of \href{https://www.nber.org/papers/w29170}{Freyaldenhoven et al. (2021)}.} 70 | 71 | \item{normalize}{Specifies the event-time coefficient to be normalized. Defaults to - pre - 1.} 72 | 73 | \item{cluster}{Logical indicating whether to use clustered errors by units. If FALSE, will use unclustered heteroskedasticity-robust standard errors. 74 | Defaults to TRUE. Must be TRUE if FE is TRUE.} 75 | 76 | \item{anticipation_effects_normalization}{If set to TRUE, runs the default process and switches coefficient to be normalized to 0 77 | when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE.} 78 | 79 | \item{allow_duplicate_id}{If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE.} 80 | 81 | \item{avoid_internal_copy}{If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE.} 82 | } 83 | \value{ 84 | A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. 85 | } 86 | \description{ 87 | \code{EventStudy} uses regression methods to estimate the effect of a policy on a given outcome. 88 | } 89 | \examples{ 90 | 91 | # A minimal example 92 | eventstudy_model <- 93 | EventStudy( 94 | estimator = "OLS", 95 | data = example_data, 96 | outcomevar = "y_base", 97 | policyvar = "z", 98 | idvar = "id", 99 | timevar = "t", 100 | pre = 0, post = 3, 101 | normalize = -1 102 | ) 103 | 104 | ### Access estimated model 105 | eventstudy_model$output 106 | 107 | summary(eventstudy_model$output) 108 | 109 | ### data.frame of estimates 110 | estimatr::tidy(eventstudy_model$output) 111 | 112 | ### Access arguments 113 | eventstudy_model$arguments 114 | 115 | # A dynamic OLS model with anticipation effects and controls 116 | eventstudy_model_dyn <- 117 | EventStudy( 118 | estimator = "OLS", 119 | data = example_data, 120 | outcomevar = "y_base", 121 | policyvar = "z", 122 | idvar = "id", 123 | timevar = "t", 124 | controls = "x_r", 125 | FE = TRUE, TFE = TRUE, 126 | post = 3, overidpost = 5, 127 | pre = 2, overidpre = 4, 128 | normalize = - 3, 129 | cluster = TRUE, 130 | anticipation_effects_normalization = TRUE 131 | ) 132 | 133 | summary(eventstudy_model_dyn$output) 134 | 135 | # A static model 136 | eventstudy_model_static <- 137 | EventStudy( 138 | estimator = "OLS", 139 | data = example_data, 140 | outcomevar = "y_jump_m", 141 | policyvar = "z", 142 | idvar = "id", 143 | timevar = "t", 144 | FE = TRUE, TFE = TRUE, 145 | post = 0, overidpost = 0, 146 | pre = 0, overidpre = 0, 147 | cluster = TRUE 148 | ) 149 | 150 | summary(eventstudy_model_static$output) 151 | 152 | # A dynamic model with an unbalanced panel 153 | data_unbal <- example_data[1:(nrow(example_data)-1),] # drop last row to make unbalanced 154 | 155 | eventstudy_model_unbal <- 156 | EventStudy( 157 | estimator = "OLS", 158 | data = data_unbal, 159 | outcomevar = "y_base", 160 | policyvar = "z", 161 | idvar = "id", 162 | timevar = "t", 163 | pre = 0, post = 3, 164 | normalize = -1 165 | ) 166 | 167 | summary(eventstudy_model_unbal$output) 168 | 169 | # A dynamic model estimated using IV 170 | eventstudy_model_iv <- 171 | EventStudy( 172 | estimator = "FHS", 173 | data = example_data, 174 | outcomevar = "y_base", 175 | policyvar = "z", 176 | idvar = "id", 177 | timevar = "t", 178 | proxy = "x_r", 179 | FE = TRUE, TFE = TRUE, 180 | post = 2, overidpost = 1, 181 | pre = 0, overidpre = 3, 182 | normalize = -1, 183 | cluster = TRUE 184 | ) 185 | 186 | summary(eventstudy_model_iv$output) 187 | 188 | } 189 | -------------------------------------------------------------------------------- /R/PreparePlottingData.R: -------------------------------------------------------------------------------- 1 | 2 | #' Orders the eventstudy coefficients and generates the x-axis labels 3 | #' 4 | #' @param df_tidy_estimates A data.frame created from applying \link[estimatr]{tidy} 5 | #' to the estimation output from [EventStudy()]. 6 | #' At a minimum, it contains a column called "term" with the name for the coefficient and a 7 | #' column called "estimate" that contains the corresponding estimate. Should be a data.frame. 8 | #' @param policyvar Character indicating column of policy variable z. 9 | #' @param post Whole number indicating the number of periods in the past before which the past values of the policy 10 | #' are not supposed to affect the value of the outcome. Corresponds to M in equation (2) of 11 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf). 12 | #' @param overidpost Optional whole number indicating the number of event times after "post" to be included in estimation. Defaults to 1. 13 | #' Corresponds to L_M in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf). 14 | #' @param pre Whole number indicating the number of periods in the future after which the future values of the policy are 15 | #' not supposed to affect the value of the outcome today. Corresponds to G in equation (2) of 16 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf). 17 | #' @param overidpre Optional whole number indicating the number of event times earlier than -"pre" to be included in estimation. Defaults to "post" + "pre". 18 | #' Corresponds to L_G in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/system/files/working_papers/w29170/w29170.pdf). 19 | #' @param normalization_column The name of the column containing the coefficient that will 20 | #' be set to 0 in the eventstudy plot. Should be a character. 21 | #' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS". 22 | #' If NULL, defaults to the strongest lead of the policy variable based on the first stage. 23 | #' 24 | #' @return A data.frame that contains the x-axis labels, y-axis estimates, 25 | #' and optional plot aesthetics to be used in creating the eventstudy plot 26 | #' @import stringr 27 | #' @importFrom stats setNames 28 | #' @import estimatr 29 | #' 30 | #' @keywords internal 31 | #' @noRd 32 | #' 33 | #' @examples 34 | #' 35 | #' tidy_eventstudy_estimates <- estimatr::tidy(EventStudy(estimator = "OLS", 36 | #' data = example_data, 37 | #' outcomevar = "y_base", 38 | #' policyvar = "z", 39 | #' idvar = "id", 40 | #' timevar = "t", 41 | #' controls = "x_r", 42 | #' FE = TRUE, 43 | #' TFE = TRUE, 44 | #' post = 3, overidpost = 5, 45 | #' pre = 2, overidpre = 4, 46 | #' normalize = - 3, 47 | #' anticipation_effects_normalization = TRUE)$output) 48 | #' 49 | #' PreparePlottingData(df_tidy_estimates = tidy_eventstudy_estimates, 50 | #' policyvar = "z", 51 | #' post = 3, overidpost = 5, 52 | #' pre = 2, overidpre = 4, 53 | #' normalization_column = "z_fd_lead3", 54 | #' proxyIV = NULL) 55 | #' 56 | #' # If you would like to use IV regression: 57 | #' data <- example_data[, c("y_base", "z", "id", "t", "x_r", "eta_m")] 58 | #' 59 | #' tidy_eventstudy_estimates <- estimatr::tidy(EventStudy(estimator = "FHS", 60 | #' data = data, 61 | #' outcomevar = "y_base", 62 | #' policyvar = "z", 63 | #' idvar = "id", 64 | #' timevar = "t", 65 | #' controls = "x_r", 66 | #' proxy = "eta_m", 67 | #' FE = TRUE, 68 | #' TFE = TRUE, 69 | #' post = 1, overidpost = 2, 70 | #' pre = 1, overidpre = 2, 71 | #' normalize = -1, 72 | #' anticipation_effects_normalization = TRUE)$output) 73 | #' 74 | #' PreparePlottingData(df_tidy_estimates = tidy_eventstudy_estimates, 75 | #' policyvar = "z", 76 | #' post = 1, overidpost = 2, 77 | #' pre = 1, overidpre = 2, 78 | #' normalization_column = "z_fd_lead2", 79 | #' proxyIV = "z_fd_lead3") 80 | #' 81 | 82 | PreparePlottingData <- function(df_tidy_estimates, policyvar, 83 | post, overidpost, pre, overidpre, normalization_column, 84 | proxyIV = NULL) { 85 | 86 | if (! is.data.frame(df_tidy_estimates)) {stop("data should be a data frame.")} 87 | if (! is.character(policyvar)) {stop("policyvar should be a character.")} 88 | if (! (is.numeric(post) & post >= 0 & post %% 1 == 0)) {stop("post should be a whole number.")} 89 | if (! (is.numeric(overidpost) & overidpost >= 0 & overidpost %% 1 == 0)) {stop("overidpost should be a whole number.")} 90 | if (! (is.numeric(pre) & pre >= 0 & pre %% 1 == 0)) {stop("pre should be a whole number.")} 91 | if (! (is.numeric(overidpre) & overidpre >= 0 & overidpre %% 1 == 0)) {stop("overidpre should be a whole number.")} 92 | if (! is.character(normalization_column)) {stop("normalization_column should be a character.")} 93 | if (normalization_column %in% df_tidy_estimates$term) {stop("normalization_column should not be one of the strings in the 'term' column.")} 94 | if (! (is.null(proxyIV) | is.character(proxyIV))) {stop("proxyIV should be either a character or NULL.")} 95 | 96 | largest_lead <- pre + overidpre 97 | largest_lag <- post + overidpost - 1 98 | 99 | integer_regex <- "[0-9]+$" 100 | 101 | first_lead <- paste0(policyvar, "_lead", largest_lead) 102 | first_lead_integer <- as.integer(stringr::str_extract(first_lead, integer_regex)) + 1 103 | first_lead_label <- paste0("-", first_lead_integer, "+") 104 | 105 | v_leads <- paste0(policyvar, "_fd_lead", largest_lead:1) 106 | v_leads_integer <- stringr::str_extract(v_leads, integer_regex) 107 | v_leads_label <- paste0("-", v_leads_integer) 108 | 109 | t_0_term <- paste0(policyvar, "_fd") 110 | t_0_term_label <- "0" 111 | 112 | v_lags <- paste0(policyvar, "_fd_lag", 1:largest_lag) 113 | v_lags_label <- stringr::str_extract(v_lags, integer_regex) 114 | 115 | last_lag <- paste0(policyvar, "_lag", largest_lag + 1) 116 | last_lag_integer <- stringr::str_extract(last_lag, integer_regex) 117 | last_lag_label <- paste0(last_lag_integer, "+") 118 | 119 | v_terms_to_plot_ordered <- c(first_lead, v_leads, t_0_term, v_lags, last_lag) 120 | v_terms_to_plot_labels <- c(first_lead_label, v_leads_label, t_0_term_label, v_lags_label, last_lag_label) 121 | 122 | v_all_terms <- df_tidy_estimates[["term"]] 123 | v_plotting_terms <- v_all_terms %in% v_terms_to_plot_ordered 124 | 125 | df_plotting <- df_tidy_estimates[v_plotting_terms, ] 126 | 127 | v_zeroes <- rep(0, ncol(df_plotting) - 2) 128 | v_names_for_zeroes <- names(df_plotting)[3:ncol(df_plotting)] 129 | v_normalization_other <- stats::setNames(v_zeroes, v_names_for_zeroes) 130 | 131 | supt_or_ci_present <- v_names_for_zeroes %in% c("suptband_lower", "suptband_upper", "ci_lower", "ci_upper") 132 | 133 | if (sum(supt_or_ci_present) > 0) { 134 | 135 | v_normalization_other[which(supt_or_ci_present)] <- NA 136 | 137 | } 138 | 139 | df_normalization_column <- data.frame( 140 | "term" = normalization_column, 141 | "estimate" = 0, 142 | as.list(v_normalization_other) 143 | ) 144 | 145 | df_plotting <- rbind(df_plotting, df_normalization_column) 146 | 147 | if (!is.null(proxyIV)) { 148 | 149 | proxyIV_integer <- stringr::str_extract(proxyIV, integer_regex) 150 | proxyIV_lead_or_lag <- stringr::str_extract(proxyIV, "lead|lag") 151 | 152 | proxyIV_sign <- switch (proxyIV_lead_or_lag, 153 | "lead" = "-", 154 | "lag" = "+" 155 | ) 156 | 157 | df_proxyIV_column <- data.frame( 158 | "term" = proxyIV, 159 | "estimate" = 0, 160 | as.list(v_normalization_other) 161 | ) 162 | 163 | proxyIV_label <- paste0(proxyIV_integer, proxyIV_sign) 164 | 165 | df_plotting <- rbind(df_plotting, df_proxyIV_column) 166 | 167 | } 168 | 169 | 170 | df_plotting["label"] <- factor(df_plotting$term, levels = v_terms_to_plot_ordered, labels = v_terms_to_plot_labels) 171 | 172 | return(df_plotting) 173 | 174 | } 175 | -------------------------------------------------------------------------------- /R/SmPathHelpers.R: -------------------------------------------------------------------------------- 1 | # Add zero where normalized coefficient(s) should be in covar matrix 2 | AddZerosCovar <- function(vcov_matrix_all, eventstudy_coeffs, norm_column, 3 | coeffs_order) { 4 | 5 | v_terms_to_keep <- colnames(vcov_matrix_all) %in% eventstudy_coeffs 6 | covar <- vcov_matrix_all[v_terms_to_keep, v_terms_to_keep] 7 | 8 | n_coefs = length(coeffs_order) 9 | needed_zeros = length(norm_column) 10 | 11 | # Add row and col of zeros at the end 12 | ZerosRight = matrix(0, ncol = needed_zeros, nrow = nrow(covar)) 13 | ZerosBottom = matrix(0, ncol = n_coefs, nrow = needed_zeros) 14 | covar <- rbind(cbind(covar, ZerosRight), 15 | ZerosBottom) 16 | rownames(covar) <- c(eventstudy_coeffs, norm_column) 17 | colnames(covar) <- c(eventstudy_coeffs, norm_column) 18 | 19 | # Sort matrix 20 | covar <- covar[coeffs_order, coeffs_order] 21 | 22 | return(covar) 23 | } 24 | 25 | # Computes F matrix using coeff_length and poly_order as arguments 26 | GetFmat <- function(coeff_length, poly_order) { 27 | 28 | k = seq(0, coeff_length-1)/(coeff_length-1) 29 | Fmat = sapply(seq(1, poly_order+1), 30 | function(j) {k^(j-1)}) 31 | 32 | return(Fmat) 33 | } 34 | 35 | # Find minimum order of polynomial such that the constraint is satisfied 36 | FindOrder <- function(coeffs, inv_covar, Wcritic, maxorder) { 37 | 38 | norm_index <- which(coeffs == 0) 39 | 40 | Wvalue = 1e6 41 | poly_order = 0 42 | 43 | # Compute Wald value for polynomials of increasing order until Wald Value < Critical Value 44 | while (poly_order <= maxorder & Wvalue >= Wcritic) { 45 | 46 | min_results <- MinimizeWald(coeffs, inv_covar, norm_index, poly_order) 47 | Wvalue = min_results$W 48 | poly_order = poly_order + 1 49 | } 50 | 51 | return(list(order = poly_order - 1, 52 | results = min_results)) 53 | } 54 | 55 | # Minimize Wald objective given coefficients and inverse covariance matrix 56 | MinimizeWald <- function(coeffs, inv_covar, norm_index, poly_order) { 57 | 58 | coeff_length = length(coeffs) 59 | 60 | if (poly_order == 0) { 61 | trfit = rep(0, coeff_length) 62 | W = (t(coeffs)%*%inv_covar)%*%coeffs 63 | vhat = 0 64 | 65 | } else { 66 | Fmat <- GetFmat(coeff_length, poly_order) 67 | Anorm <- Fmat[norm_index, , drop = F] 68 | 69 | FtinvVd = (t(Fmat)%*%inv_covar)%*%matrix(coeffs) 70 | invFtinvVF = pracma::inv((t(Fmat)%*%inv_covar)%*%Fmat) 71 | AtFtinvVFA = (Anorm%*%invFtinvVF)%*%t(Anorm) 72 | multiple = (t(Anorm)%*%pracma::inv(AtFtinvVFA))%*%Anorm 73 | 74 | difference = FtinvVd - (multiple%*%invFtinvVF)%*%FtinvVd 75 | vhat = invFtinvVF%*%difference 76 | 77 | trfit <- Fmat%*%vhat 78 | W <- (t(trfit-coeffs)%*%inv_covar)%*%(trfit-coeffs) 79 | } 80 | 81 | return(list("trfit" = trfit, 82 | "W" = W, 83 | "vhat" = vhat)) 84 | } 85 | 86 | # Find coefficients such that square of highest order term is minimized 87 | # Num normalized coefficients less than order of polynomial 88 | FindCoeffs <- function(res_order, coeffs, inv_covar, Wcritic, pN, order, norm_idxs, Fmat, 89 | maxiter_solver = 2e6) { 90 | 91 | if (is.null(dim(Fmat))) { # If one-dimensional make sure it's also a matrix object 92 | Fmat <- matrix(Fmat) 93 | } 94 | 95 | # Prevent conversion to vector with drop = F 96 | Anorm <- Fmat[norm_idxs, , drop = F] 97 | 98 | stopifnot(ncol(Anorm) == ncol(Fmat)) 99 | 100 | colindex_b = 1:(ncol(Anorm)-pN-1) 101 | colindex_1 = (ncol(Anorm)-pN):(ncol(Anorm)-1) 102 | colindex_2 = ncol(Anorm) 103 | 104 | Ab <- Anorm[, colindex_b, drop = F] 105 | A1 <- Anorm[, colindex_1, drop = F] 106 | A2 <- Anorm[, colindex_2, drop = F] 107 | 108 | Fb <- Fmat[, colindex_b, drop = F] 109 | F1 <- Fmat[, colindex_1, drop = F] 110 | F2 <- Fmat[, colindex_2, drop = F] 111 | 112 | x0 = res_order$vhat[1:ncol(Fb)] 113 | 114 | optim_pos <- stats::optim(par = x0, 115 | fn = Objective, 116 | method = "Nelder-Mead", 117 | control = list("maxit" = maxiter_solver, "reltol" = 1e-4), 118 | d = coeffs, inv_covar = inv_covar, 119 | Fb = Fb, F1 = F1, F2 = F2, Ab = Ab, A1 = A1, A2 = A2, 120 | Wcritic = Wcritic, positive = T) 121 | optim_neg <- stats::optim(par = x0, 122 | fn = Objective, 123 | method = "Nelder-Mead", 124 | control = list("maxit" = maxiter_solver, "reltol" = 1e-4), 125 | d = coeffs, inv_covar = inv_covar, 126 | Fb = Fb, F1 = F1, F2 = F2, Ab = Ab, A1 = A1, A2 = A2, 127 | Wcritic = Wcritic, positive = F) 128 | 129 | if (optim_pos$convergence != 0 | optim_neg$convergence != 0) { 130 | stop("Numerical optimization failed when searching for the smoothest path. Please set 'Smpath' to FALSE.") 131 | } 132 | 133 | vb_pos <- optim_pos$par 134 | vb_neg <- optim_neg$par 135 | 136 | d0_ = d0(coeffs, inv_covar, F1, F2, A1, A2) 137 | d1_pos = d1(vb_pos, coeffs, inv_covar, Fb, F1, F2, Ab, A1, A2) 138 | d2_pos = d2(vb_pos, coeffs, inv_covar, Fb, F1, Ab, A1, Wcritic) 139 | d1_neg = d1(vb_neg, coeffs, inv_covar, Fb, F1, F2, Ab, A1, A2) 140 | d2_neg = d2(vb_neg, coeffs, inv_covar, Fb, F1, Ab, A1, Wcritic) 141 | 142 | discriminat_pos = d1_pos^2 - 4*d0_*d2_pos 143 | discriminat_neg = d1_neg^2 - 4*d0_*d2_neg 144 | 145 | v2_pos = (-d1_pos + sqrt(discriminat_pos))/(2*d0_) 146 | v2_neg = (-d1_neg - sqrt(discriminat_neg))/(2*d0_) 147 | 148 | if (abs(v2_pos) < abs(v2_neg)) { 149 | vb = vb_pos 150 | v2 = v2_pos 151 | } else { 152 | vb = vb_neg 153 | v2 = v2_neg 154 | } 155 | v1 = inv(A1)%*%(-Ab%*%vb - A2%*%v2) 156 | 157 | return(c(vb, v1, v2)) 158 | } 159 | 160 | d0 <- function(d, inv_covar, F1, F2, A1, A2) { 161 | single_factor = F2 - F1%*%inv(A1)%*%A2 162 | 163 | return(t(single_factor)%*%inv_covar%*%single_factor) 164 | } 165 | 166 | d1 <- function(vb, d, inv_covar, Fb, F1, F2, Ab, A1, A2) { 167 | pre_factor = (Fb - F1%*%(inv(A1)%*%Ab))%*%vb - d 168 | post_factor = F2 - F1%*%inv(A1)%*%A2 169 | 170 | return(2*t(pre_factor)%*%inv_covar%*%post_factor) 171 | } 172 | 173 | d2 <- function(vb, d, inv_covar, Fb, F1, Ab, A1, Wcritic) { 174 | single_factor = (Fb - F1%*%(inv(A1)%*%Ab))%*%vb - d 175 | 176 | return(t(single_factor)%*%inv_covar%*%single_factor - Wcritic) 177 | } 178 | 179 | Objective <- function(vb, d, inv_covar, Fb, F1, F2, Ab, A1, A2, Wcritic, 180 | positive = T) { 181 | 182 | vb = matrix(vb) 183 | 184 | d0_ = d0( d, inv_covar, F1, F2, A1, A2) 185 | d1_ = d1(vb, d, inv_covar, Fb, F1, F2, Ab, A1, A2) 186 | d2_ = d2(vb, d, inv_covar, Fb, F1, Ab, A1, Wcritic) 187 | 188 | discriminant = d1_^2 - 4*d0_*d2_ 189 | 190 | if (discriminant < 0) { 191 | return(Inf) 192 | } 193 | 194 | if (positive) { 195 | return(( (-d1_ + sqrt(discriminant))/(2*d0_) )^2) 196 | } else { 197 | return(( (-d1_ - sqrt(discriminant))/(2*d0_) )^2) 198 | } 199 | } 200 | 201 | 202 | # Find coeffs such that square of highest order term is minimized 203 | # Num normalized coefficients equals order of polynomial 204 | FindCoeffsEq <- function(res_order, coeffs, inv_covar, Wcritic, pN, order, norm_idxs, Fmat, 205 | maxiter_solver = 1e6) { 206 | 207 | if (is.null(dim(Fmat))) { # If one-dimensional make sure it's also a matrix object 208 | Fmat <- matrix(Fmat) 209 | } 210 | 211 | # Prevent conversion to vector with drop = F 212 | Anorm <- Fmat[norm_idxs, , drop = F] 213 | 214 | stopifnot(ncol(Anorm) == ncol(Fmat)) 215 | 216 | colindex_1 = (ncol(Anorm)-pN):(ncol(Anorm)-1) 217 | colindex_2 = ncol(Anorm) 218 | 219 | A1 <- Anorm[, colindex_1, drop = F] 220 | A2 <- Anorm[, colindex_2, drop = F] 221 | 222 | F1 <- Fmat[, colindex_1, drop = F] 223 | F2 <- Fmat[, colindex_2, drop = F] 224 | 225 | d0_ = d0Eq(coeffs, inv_covar, F1, F2, A1, A2) 226 | d1_ = d1Eq(coeffs, inv_covar, F1, F2, A1, A2) 227 | d2_ = d2Eq(coeffs, inv_covar, Wcritic) 228 | 229 | discriminant = d1_^2 - 4*d0_*d2_ 230 | 231 | v2_pos <- (-d1_ + sqrt(discriminant))/(2*d0_) 232 | v2_neg <- (-d1_ - sqrt(discriminant))/(2*d0_) 233 | 234 | if (abs(v2_pos) < abs(v2_neg)) { 235 | v2 = v2_pos 236 | } else { 237 | v2 = v2_neg 238 | } 239 | v1 = -inv(A1)%*%(A2%*%v2) 240 | 241 | return(c(v1, v2)) 242 | } 243 | 244 | d0Eq <- function(d, inv_covar, F1, F2, A1, A2) { 245 | single_factor = F2 - F1%*%inv(A1)%*%A2 246 | 247 | return(t(single_factor)%*%inv_covar%*%single_factor) 248 | } 249 | 250 | d1Eq <- function(d, inv_covar, F1, F2, A1, A2) { 251 | pre_factor = F2 - F1%*%inv(A1)%*%A2 252 | 253 | return(-2*t(pre_factor)%*%inv_covar%*%d) 254 | } 255 | 256 | d2Eq <- function(d, inv_covar, Wcritic) { 257 | 258 | return(t(d)%*%inv_covar%*%d - Wcritic) 259 | } 260 | -------------------------------------------------------------------------------- /tests/testthat/test-PreparePlottingData.R: -------------------------------------------------------------------------------- 1 | 2 | # OLS --------------------------------------------------------------------- 3 | 4 | test_that("labels are unique", { 5 | 6 | 7 | list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 8 | policyvar = "z", idvar = "id", timevar = "t", 9 | controls = "x_r", FE = TRUE, TFE = TRUE, 10 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 11 | 12 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 13 | 14 | policyvar <- list_EventStudy[[2]]$policyvar 15 | post <- list_EventStudy[[2]]$post 16 | overidpost <- list_EventStudy[[2]]$overidpost 17 | pre <- list_EventStudy[[2]]$pre 18 | overidpre <- list_EventStudy[[2]]$overidpre 19 | normalization_column <- list_EventStudy[[2]]$normalization_column 20 | proxyIV <- list_EventStudy[[2]]$proxyIV 21 | 22 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, 23 | post, overidpost, pre, overidpre, normalization_column, proxyIV) 24 | 25 | labels_actual <- as.character(df_PreparePlottingData$label) 26 | num_labels <- length(labels_actual) 27 | num_unique_labels <- length(unique(labels_actual)) 28 | 29 | expect_equal(num_unique_labels, num_labels) 30 | }) 31 | 32 | test_that("the correct labels are created", { 33 | 34 | 35 | list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 36 | policyvar = "z", idvar = "id", timevar = "t", 37 | controls = "x_r", FE = TRUE, TFE = TRUE, 38 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 39 | 40 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 41 | 42 | policyvar <- list_EventStudy[[2]]$policyvar 43 | post <- list_EventStudy[[2]]$post 44 | overidpost <- list_EventStudy[[2]]$overidpost 45 | pre <- list_EventStudy[[2]]$pre 46 | overidpre <- list_EventStudy[[2]]$overidpre 47 | normalization_column <- list_EventStudy[[2]]$normalization_column 48 | proxyIV <- list_EventStudy[[2]]$proxyIV 49 | 50 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, 51 | post, overidpost, pre, overidpre, normalization_column, proxyIV) 52 | 53 | labels_actual <- as.character(df_PreparePlottingData$label) 54 | 55 | expect_equal(labels_actual, c("-7+", "-6", "-5", "-4", "-2", "-1", "0", "1", "2", "3", "4", "5", "6", "7", "8+", "-3")) 56 | 57 | }) 58 | 59 | test_that("the labels are ordered correctly", { 60 | 61 | 62 | list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 63 | policyvar = "z", idvar = "id", timevar = "t", 64 | controls = "x_r", FE = TRUE, TFE = TRUE, 65 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 66 | 67 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 68 | 69 | policyvar <- list_EventStudy[[2]]$policyvar 70 | post <- list_EventStudy[[2]]$post 71 | overidpost <- list_EventStudy[[2]]$overidpost 72 | pre <- list_EventStudy[[2]]$pre 73 | overidpre <- list_EventStudy[[2]]$overidpre 74 | normalization_column <- list_EventStudy[[2]]$normalization_column 75 | proxyIV <- list_EventStudy[[2]]$proxyIV 76 | 77 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, post, overidpost, pre, overidpre, normalization_column, proxyIV) 78 | 79 | levels_coefficients <- stringr::str_remove(levels(df_PreparePlottingData$label), "\\+") 80 | actual_levels_as_integer <- as.integer(levels_coefficients) 81 | 82 | expect_equal(actual_levels_as_integer, (-pre - overidpre - 1):(post + overidpost)) 83 | 84 | }) 85 | 86 | test_that("the control variable is removed", { 87 | 88 | 89 | list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 90 | policyvar = "z", idvar = "id", timevar = "t", 91 | controls = "x_r", FE = TRUE, TFE = TRUE, 92 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 93 | 94 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 95 | 96 | policyvar <- list_EventStudy[[2]]$policyvar 97 | post <- list_EventStudy[[2]]$post 98 | overidpost <- list_EventStudy[[2]]$overidpost 99 | pre <- list_EventStudy[[2]]$pre 100 | overidpre <- list_EventStudy[[2]]$overidpre 101 | normalization_column <- list_EventStudy[[2]]$normalization_column 102 | controls <- list_EventStudy[[2]]$controls 103 | proxyIV <- list_EventStudy[[2]]$proxyIV 104 | 105 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, 106 | post, overidpost, pre, overidpre, normalization_column, proxyIV) 107 | 108 | terms_actual <- df_PreparePlottingData$term 109 | 110 | expect_true(!controls %in% terms_actual) 111 | }) 112 | 113 | test_that("the largest lag label is correctly created", { 114 | 115 | 116 | list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 117 | policyvar = "z", idvar = "id", timevar = "t", 118 | controls = "x_r", FE = TRUE, TFE = TRUE, 119 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 120 | 121 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 122 | 123 | policyvar <- list_EventStudy[[2]]$policyvar 124 | post <- list_EventStudy[[2]]$post 125 | overidpost <- list_EventStudy[[2]]$overidpost 126 | pre <- list_EventStudy[[2]]$pre 127 | overidpre <- list_EventStudy[[2]]$overidpre 128 | normalization_column <- list_EventStudy[[2]]$normalization_column 129 | controls <- list_EventStudy[[2]]$controls 130 | proxyIV <- list_EventStudy[[2]]$proxyIV 131 | 132 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, 133 | post, overidpost, pre, overidpre, normalization_column, proxyIV) 134 | 135 | term_actual <- as.character(df_PreparePlottingData[df_PreparePlottingData["term"] == paste0(policyvar, "_lag", (post + overidpost)), ][["label"]]) 136 | 137 | expect_equal(term_actual, "8+") 138 | 139 | 140 | }) 141 | 142 | test_that("the largest lead label is correctly created", { 143 | 144 | 145 | list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 146 | policyvar = "z", idvar = "id", timevar = "t", 147 | controls = "x_r", FE = TRUE, TFE = TRUE, 148 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 149 | 150 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 151 | 152 | policyvar <- list_EventStudy[[2]]$policyvar 153 | post <- list_EventStudy[[2]]$post 154 | overidpost <- list_EventStudy[[2]]$overidpost 155 | pre <- list_EventStudy[[2]]$pre 156 | overidpre <- list_EventStudy[[2]]$overidpre 157 | normalization_column <- list_EventStudy[[2]]$normalization_column 158 | controls <- list_EventStudy[[2]]$controls 159 | proxyIV <- list_EventStudy[[2]]$proxyIV 160 | 161 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, 162 | post, overidpost, pre, overidpre, normalization_column, proxyIV) 163 | 164 | term_actual <- as.character(df_PreparePlottingData[df_PreparePlottingData["term"] == paste0(policyvar, "_lead", (pre + overidpre)), ][["label"]]) 165 | 166 | expect_equal(term_actual, "-7+") 167 | 168 | 169 | }) 170 | 171 | test_that("all columns besides 'term' and 'label' are 0 for the normalization column", { 172 | 173 | 174 | list_EventStudy <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 175 | policyvar = "z", idvar = "id", timevar = "t", 176 | controls = "x_r", FE = TRUE, TFE = TRUE, 177 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 178 | 179 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 180 | 181 | policyvar <- list_EventStudy[[2]]$policyvar 182 | post <- list_EventStudy[[2]]$post 183 | overidpost <- list_EventStudy[[2]]$overidpost 184 | pre <- list_EventStudy[[2]]$pre 185 | overidpre <- list_EventStudy[[2]]$overidpre 186 | normalization_column <- list_EventStudy[[2]]$normalization_column 187 | controls <- list_EventStudy[[2]]$controls 188 | proxyIV <- list_EventStudy[[2]]$proxyIV 189 | 190 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, 191 | post, overidpost, pre, overidpre, normalization_column, proxyIV) 192 | 193 | all_columns <- names(df_PreparePlottingData) 194 | v_zero_column_names <- all_columns[all_columns != c("term", "label")] 195 | 196 | normalized_row <- df_PreparePlottingData[df_PreparePlottingData["term"] == normalization_column, v_zero_column_names] 197 | actual_row_total <- apply(normalized_row, MARGIN = 1, function(x) sum(as.integer(x))) 198 | 199 | expect_equal(unname(actual_row_total), 0) 200 | 201 | 202 | }) 203 | 204 | # FHS --------------------------------------------------------------------- 205 | 206 | test_that("all columns besides 'term' and 'label' are 0 for the proxyIV column", { 207 | 208 | 209 | list_EventStudy <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_base", 210 | policyvar = "z", idvar = "id", timevar = "t", 211 | controls = "x_r", FE = TRUE, TFE = TRUE, proxy = "eta_m", 212 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3, cluster = TRUE, anticipation_effects_normalization = TRUE) 213 | 214 | df_tidy_estimates <- estimatr::tidy(list_EventStudy$output) 215 | 216 | policyvar <- list_EventStudy[[2]]$policyvar 217 | post <- list_EventStudy[[2]]$post 218 | overidpost <- list_EventStudy[[2]]$overidpost 219 | pre <- list_EventStudy[[2]]$pre 220 | overidpre <- list_EventStudy[[2]]$overidpre 221 | normalization_column <- list_EventStudy[[2]]$normalization_column 222 | proxyIV <- list_EventStudy[[2]]$proxyIV 223 | 224 | df_PreparePlottingData <- PreparePlottingData(df_tidy_estimates, policyvar, post, overidpost, pre, overidpre, normalization_column, proxyIV) 225 | 226 | all_columns <- names(df_PreparePlottingData) 227 | v_zero_column_names <- all_columns[all_columns != c("term", "label")] 228 | 229 | proxyIV_row <- df_PreparePlottingData[df_PreparePlottingData["term"] == proxyIV, v_zero_column_names] 230 | actual_row_total <- apply(proxyIV_row, MARGIN = 1, function(x) sum(as.integer(x))) 231 | 232 | expect_equal(unname(actual_row_total), 0) 233 | }) 234 | -------------------------------------------------------------------------------- /tests/testthat/test-EventStudyPlot.R: -------------------------------------------------------------------------------- 1 | 2 | get_labs <- function(x) x$labels 3 | if ("get_labs" %in% getNamespaceExports("ggplot2")) { 4 | get_labs <- ggplot2::get_labs 5 | } 6 | 7 | test_that("Dimension of OLS and FHS estimation output is the same", { 8 | 9 | estimates_ols <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", 10 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 11 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 12 | 13 | estimates_fhs <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_smooth_m", 14 | policyvar = "z", idvar = "id", timevar = "t", proxy = "eta_r", controls = "x_r", 15 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 16 | 17 | expect_equal(length(estimates_ols), length(estimates_fhs)) 18 | 19 | # Compare first element of list 20 | coeffs_ols <- names(estimates_ols[[1]]$coefficients) 21 | coeffs_fhs <- names(estimates_fhs[[1]]$coefficients) 22 | 23 | expect_true(all(coeffs_fhs %in% c(coeffs_ols, "eta_r"))) 24 | expect_equal(length(coeffs_ols), length(coeffs_fhs)) # FHS: Norm coeff removes one coeff and proxy adds one 25 | 26 | expect_true(all(names(estimates_ols[[1]]) %in% names(estimates_fhs[[1]]))) 27 | 28 | # Compare second element of list 29 | expect_true(all(names(estimates_ols[[2]]) %in% names(estimates_fhs[[2]]))) 30 | expect_true(all(names(estimates_fhs[[2]]) %in% names(estimates_ols[[2]]))) 31 | }) 32 | 33 | 34 | test_that("correctly changes x-axis and y-axis labels", { 35 | 36 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 37 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 38 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 39 | 40 | p_labels <- EventStudyPlot(estimates = estimates, 41 | conf_level = .95, 42 | xtitle = "Event Time", 43 | ytitle = "Event-study Coefficients",) 44 | 45 | labels <- get_labs(p_labels) 46 | expect_equal(labels$x, "Event Time") 47 | expect_equal(labels$y, "Event-study Coefficients") 48 | 49 | }) 50 | 51 | test_that("x- and y-axis breaks and limits are correct", { 52 | 53 | estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 54 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 55 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 56 | 57 | p_Addmean <- EventStudyPlot(estimates = estimates, 58 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 59 | add_mean = TRUE) 60 | 61 | p_noAddmean <- EventStudyPlot(estimates = estimates, 62 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 63 | add_mean = FALSE) 64 | 65 | v_limits_addmeans <- p_Addmean$scales$scales[[2]]$limits 66 | v_limits_no_addmeans <- p_noAddmean$scales$scales[[2]]$limits 67 | v_breaks_addmeans <- p_Addmean$scales$scales[[2]]$breaks 68 | v_breaks_no_addmeans <- p_noAddmean$scales$scales[[2]]$breaks 69 | 70 | expect_equal(v_limits_addmeans, c(-1.5, 1.5)) 71 | expect_equal(v_limits_no_addmeans, c(-1.5, 1.5)) 72 | 73 | expect_equal(v_breaks_addmeans, c(-1.5, -.5, 0, .5, 1.5)) 74 | expect_equal(v_breaks_no_addmeans, c(-1.5, -.5, 0, .5, 1.5)) 75 | }) 76 | 77 | test_that("correctly adds mean of outcome var", { 78 | 79 | estimates = EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 80 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 81 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 82 | 83 | p_Addmean <- EventStudyPlot(estimates = estimates, 84 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 85 | add_mean = TRUE) 86 | 87 | p_noAddmean <- EventStudyPlot(estimates = estimates, 88 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 89 | add_mean = FALSE) 90 | 91 | y_mean <- AddMeans(estimates$arguments$data, estimates$arguments$normalization_column, 92 | "z", "y_base") 93 | y_mean <- round(y_mean, 2) 94 | 95 | v_labels_addmeans <- p_Addmean$scales$scales[[2]]$labels 96 | v_labels_no_addmeans <- p_noAddmean$scales$scales[[2]]$labels 97 | 98 | expect_equal(v_labels_addmeans, c("-1.5", "-0.5", sprintf("0 (%s)", y_mean), "0.5", "1.5")) 99 | expect_equal(v_labels_no_addmeans, c(-1.5, -.5, 0, .5, 1.5)) 100 | }) 101 | 102 | test_that("sup-t bands are appropriately present or absent", { 103 | 104 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 105 | policyvar = "z", idvar = "id", timevar = "t", 106 | controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 107 | 108 | p_supt <- get_labs(EventStudyPlot(estimates = estimates, 109 | supt = .95)) 110 | 111 | p_no_supt <- get_labs(EventStudyPlot(estimates = estimates, 112 | supt = NULL)) 113 | 114 | expect_true(p_supt$ymin == "suptband_lower") 115 | expect_true(p_no_supt$ymin != "suptband_lower") 116 | 117 | expect_true(p_supt$ymax == "suptband_upper") 118 | expect_true(p_no_supt$ymax != "suptband_upper") 119 | }) 120 | 121 | test_that("confidence intervals are appropriately present or absent", { 122 | 123 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 124 | policyvar = "z", idvar = "id", timevar = "t", 125 | controls = "x_r", post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 126 | 127 | p_ci <- get_labs(EventStudyPlot(estimates = estimates, 128 | conf_level = .95, supt = NULL)) 129 | 130 | p_no_ci <- get_labs(EventStudyPlot(estimates = estimates, 131 | conf_level = NULL, supt = NULL)) 132 | 133 | expect_equal(p_ci$ymin, "ci_lower") 134 | expect_equal(p_ci$ymax, "ci_upper") 135 | expect_null(p_no_ci$ymin) 136 | expect_null(p_no_ci$ymax) 137 | }) 138 | 139 | test_that("Preevent Coeffs and Postevent Coeffs are appropriately present or absent", { 140 | 141 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 142 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 143 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 144 | 145 | p_pre_post_caption <- get_labs(EventStudyPlot(estimates = estimates, 146 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 147 | pre_event_coeffs = TRUE, 148 | post_event_coeffs = TRUE))$caption 149 | 150 | p_pre_caption <- get_labs(EventStudyPlot(estimates = estimates, 151 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 152 | pre_event_coeffs = TRUE, 153 | post_event_coeffs = FALSE))$caption 154 | 155 | p_post_caption <- get_labs(EventStudyPlot(estimates = estimates, 156 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 157 | pre_event_coeffs = FALSE, 158 | post_event_coeffs = TRUE))$caption 159 | 160 | p_neither_caption <- get_labs(EventStudyPlot(estimates = estimates, 161 | ybreaks = c(-1.5, -.5, 0, .5, 1.5), 162 | pre_event_coeffs = FALSE, 163 | post_event_coeffs = FALSE))$caption 164 | 165 | regex_for_p_value <- "1\\.0*$|0\\.\\d+" # 1 followed by . and then zero or more 0's or 0 then . then any number 166 | regex_pretrends <- "Pretrends p-value = " 167 | regex_posttrends <- "Leveling off p-value = " 168 | 169 | expect_true( 170 | stringr::str_detect(p_pre_post_caption, regex_for_p_value) & 171 | stringr::str_detect(p_pre_post_caption, regex_pretrends) & 172 | stringr::str_detect(p_pre_post_caption, regex_posttrends) 173 | ) 174 | 175 | expect_true( 176 | stringr::str_detect(p_pre_caption, regex_for_p_value) & 177 | stringr::str_detect(p_pre_caption, regex_pretrends) 178 | ) 179 | 180 | expect_false( 181 | stringr::str_detect(p_pre_caption, regex_for_p_value) & 182 | stringr::str_detect(p_pre_caption, regex_posttrends) 183 | ) 184 | 185 | expect_false( 186 | stringr::str_detect(p_post_caption, regex_for_p_value) & 187 | stringr::str_detect(p_post_caption, regex_pretrends) 188 | ) 189 | 190 | expect_true( 191 | stringr::str_detect(p_post_caption, regex_for_p_value) & 192 | stringr::str_detect(p_post_caption, regex_posttrends) 193 | ) 194 | 195 | expect_null(p_neither_caption) 196 | }) 197 | 198 | test_that("Sup-t bands are wider than confidence intervals", { 199 | 200 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 201 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 202 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 203 | 204 | p <- EventStudyPlot(estimates = estimates, 205 | conf_level = .95, 206 | supt = .95) 207 | 208 | ci_lower <- na.omit(p$data$ci_lower) 209 | ci_upper <- na.omit(p$data$ci_upper) 210 | suptband_lower <- na.omit(p$data$suptband_lower) 211 | suptband_upper <- na.omit(p$data$suptband_upper) 212 | num_terms <- nrow(na.omit(p$data)) 213 | 214 | v_lower_comparison <- (suptband_lower <= ci_lower) 215 | v_upper_comparison <- (suptband_upper >= ci_upper) 216 | 217 | expect_equal(num_terms, sum(v_lower_comparison)) 218 | expect_equal(num_terms, sum(v_upper_comparison)) 219 | }) 220 | 221 | test_that("computed smoothest path for examples is within expectations", { 222 | 223 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_base", 224 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 225 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = - 3) 226 | 227 | p <- EventStudyPlot(estimates = estimates, 228 | smpath = T) 229 | 230 | expect_equal(p$data$smoothest_path, matrix(rep(0, nrow(p$data)))) 231 | 232 | estimates <- EventStudy(estimator = "OLS", data = example_data, outcomevar = "y_smooth_m", 233 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", 234 | post = 3, pre = 2, overidpre = 4, overidpost = 5, normalize = -3) 235 | 236 | p <- EventStudyPlot(estimates = estimates, 237 | smpath = T) 238 | 239 | normalized_index <- which(p$data$estimate == 0) 240 | normalized_smpath <- p$data$smoothest_path[normalized_index] 241 | 242 | # Expect normalized_smpath to be almost equal to zero 243 | expect_true(all(abs(normalized_smpath) < 1e-10)) 244 | 245 | max_smpath <- max(p$data$smoothest_path) 246 | min_smpath <- min(p$data$smoothest_path) 247 | 248 | max_suptband <- max(p$data$suptband_upper, na.rm = T) 249 | min_suptband <- min(p$data$suptband_lower, na.rm = T) 250 | 251 | # Expect smpath to be contained in suptband 252 | expect_true(max_smpath < max_suptband) 253 | expect_true(min_smpath > min_suptband) 254 | }) 255 | 256 | test_that("computed smoothest path for FHS has at least two coefficients almost equal to zero", { 257 | 258 | estimates <- EventStudy(estimator = "FHS", data = example_data, outcomevar = "y_jump_m", 259 | policyvar = "z", idvar = "id", timevar = "t", controls = "x_r", proxy = "eta_r", 260 | post = 3, pre = 0, overidpre = 3, overidpost = 1, normalize = -1, proxyIV = "z_fd_lead3") 261 | 262 | p <- EventStudyPlot(estimates = estimates, 263 | smpath = T) 264 | 265 | normalized_index <- which(p$data$estimate == 0) 266 | normalized_smpath <- p$data$smoothest_path[normalized_index] 267 | 268 | expect_true(length(normalized_index) >= 2) 269 | expect_true(all(abs(normalized_smpath) < 1e-10)) 270 | }) 271 | -------------------------------------------------------------------------------- /R/EventStudyPlot.R: -------------------------------------------------------------------------------- 1 | #' Creates an Event-Study Plot Following the Suggestions in Freyaldenhoven et al. (2021) 2 | #' 3 | #' @description `EventStudyPlot` takes the output from [EventStudy()] and combines it with additional optional arguments to facilitate constructing an Event-Study Plot. 4 | #' 5 | #' @param estimates The output from calling [EventStudy()]. Should be a list of length 2. 6 | #' @param xtitle The title for the x-axis. Should be a string. Defaults to "Event time". 7 | #' @param ytitle The title for the y-axis. Should be a string. Defaults to "Coefficient". 8 | #' @param ybreaks A vector containing the desired breaks for the y-axis. 9 | #' Defaults to NULL, which means the breaks are computed automatically. 10 | #' If custom breaks are selected with the `add_mean` argument set to TRUE, then the breaks must include zero. 11 | #' @param conf_level Confidence level used for confidence interval 12 | #' expressed as a real number between 0 and 1, inclusive. Defaults to 0.95. 13 | #' @param supt The confidence level used for obtaining the sup-t bands critical value. 14 | #' Should be a real number between 0 and 1, inclusive. Defaults to .95. Sup-t bands are simulation-based, 15 | #' so you must set a seed if you would like your sup-t band results to be reproducible (see examples). 16 | #' @param num_sim The number of simulations used in generating the sup-t bands. 17 | #' Should be a natural number. Defaults to 1000. 18 | #' @param add_mean Adds the mean of the dependent variable in the period used for normalization. 19 | #' Should be TRUE or FALSE. Defaults to FALSE. 20 | #' @param pre_event_coeffs If TRUE, uses pre and overidpre from estimates to test for pre-trends. 21 | #' Should be TRUE or FALSE. Defaults to TRUE. 22 | #' @param post_event_coeffs If TRUE, uses post and overidpost from estimates to test for leveling-off. 23 | #' Should be TRUE or FALSE. Defaults to TRUE. 24 | #' @param add_zero_line Whether or not to plot a dashed horizontal line at y = 0. 25 | #' Should be TRUE or FALSE. Defaults to TRUE, meaning the line is plotted. 26 | #' @param smpath Plot smoothest path of confounder that rationalizes event study coefficients. 27 | #' Should be TRUE or FALSE. Defaults to FALSE. 28 | #' 29 | #' @return The Event-Study plot as a ggplot2 object. 30 | #' @import ggplot2 dplyr 31 | #' @import estimatr 32 | #' @importFrom rlang .data 33 | #' @importFrom data.table setorder 34 | #' @export 35 | #' 36 | #' @examples 37 | #' 38 | #' # 39 | #' 40 | #' # Minimal examples 41 | #' ### OLS 42 | #' 43 | #' estimates_ols <- EventStudy( 44 | #' estimator = "OLS", 45 | #' data = example_data, 46 | #' outcomevar = "y_smooth_m", 47 | #' policyvar = "z", 48 | #' idvar = "id", 49 | #' timevar = "t", 50 | #' controls = "x_r", 51 | #' FE = TRUE, TFE = TRUE, 52 | #' post = 3, overidpost = 5, 53 | #' pre = 2, overidpre = 4, 54 | #' normalize = - 3 55 | #' ) 56 | #' 57 | #' plt_ols <- EventStudyPlot(estimates = estimates_ols) 58 | #' plt_ols 59 | #' 60 | #' ### IV 61 | #' 62 | #' estimates_fhs <- EventStudy( 63 | #' estimator = "FHS", 64 | #' data = example_data, 65 | #' outcomevar = "y_smooth_m", 66 | #' policyvar = "z", 67 | #' idvar = "id", 68 | #' timevar = "t", 69 | #' proxy = "x_r", 70 | #' post = 2, overidpost = 1, 71 | #' pre = 0, overidpre = 3, 72 | #' normalize = -1 73 | #' ) 74 | #' 75 | #' plt_fhs <- EventStudyPlot(estimates = estimates_fhs) 76 | #' plt_fhs 77 | #' 78 | #' # Optional arguments 79 | #' 80 | #' ### Change x- and y-axis titles and set ybreaks 81 | #' EventStudyPlot(estimates = estimates_ols, 82 | #' xtitle = "Relative time", ytitle = "", 83 | #' ybreaks = seq(-2, 1, 0.5)) 84 | #' 85 | #' ### Add smoothest path 86 | #' EventStudyPlot(estimates = estimates_ols, smpath = TRUE) 87 | #' 88 | #' ### Add y-mean to y-axis and line y = 0 89 | #' EventStudyPlot(estimates = estimates_ols, add_mean = TRUE, 90 | #' add_zero_line = TRUE) 91 | #' 92 | #' ### Do not plot supt bands 93 | #' EventStudyPlot(estimates = estimates_ols, supt = NULL) 94 | #' 95 | #' ### Setting seed prior to plotting sup-t bands 96 | #' set.seed(1234) 97 | #' EventStudyPlot(estimates = estimates_ols) 98 | #' 99 | #' # Modify plots using ggplot2 functions 100 | #' library(ggplot2) 101 | #' 102 | #' ### Change color of dots, horizontal line, and theme 103 | #' plt_ols + 104 | #' geom_point(color = "red") + 105 | #' geom_hline(color = "gray", yintercept = 0) + 106 | #' theme_light() + 107 | #' theme(panel.grid.minor.x = element_blank()) 108 | #' 109 | 110 | EventStudyPlot <- function(estimates, 111 | xtitle = "Event time", ytitle = "Coefficient", ybreaks = NULL, 112 | conf_level = .95, supt = .95, num_sim = 1000, add_mean = FALSE, 113 | pre_event_coeffs = TRUE, post_event_coeffs = TRUE, 114 | add_zero_line = TRUE, smpath = FALSE) { 115 | 116 | if (!is.character(xtitle)) {stop("Argument 'xtitle' should be a character.")} 117 | if (!is.character(ytitle)) {stop("Argument 'ytitle' should be a character.")} 118 | if (!is.logical(add_zero_line)) {stop("Argument 'add_zero_line' should be either TRUE or FALSE.")} 119 | if (!is.null(ybreaks) & 120 | !is.numeric(ybreaks)) {stop("Argument 'ybreaks' should be NULL or a numeric vector.")} 121 | 122 | # Estimation Elements ----------------------------------------------------- 123 | 124 | df_estimates <- estimates$output 125 | df_estimates_tidy <- estimatr::tidy(estimates$output) 126 | 127 | static_model <- nrow(df_estimates_tidy) == 1 128 | if (static_model) { 129 | stop("EventStudyPlot() does not support static models.") 130 | } 131 | 132 | df_data <- estimates$arguments$data 133 | outcomevar <- estimates$arguments$outcomevar 134 | policyvar <- estimates$arguments$policyvar 135 | post <- estimates$arguments$post 136 | overidpost <- estimates$arguments$overidpost 137 | pre <- estimates$arguments$pre 138 | overidpre <- estimates$arguments$overidpre 139 | normalize <- estimates$arguments$normalize 140 | normalization_column <- estimates$arguments$normalization_column 141 | eventstudy_coefficients <- estimates$arguments$eventstudy_coefficients 142 | proxyIV <- estimates$arguments$proxyIV 143 | 144 | # Optionally Add Suptbands/Confidence Intervals --------------------------- 145 | 146 | plot_supt <- if(!is.null(supt)) TRUE else FALSE 147 | 148 | if (plot_supt) { 149 | df_estimates_tidy <- AddSuptBand(df_estimates, num_sim = 1000, conf_level = supt, 150 | eventstudy_coefficients = eventstudy_coefficients) 151 | } 152 | 153 | plot_CI <- if(!is.null(conf_level)) TRUE else FALSE 154 | 155 | if (plot_CI) { 156 | 157 | df_estimates_tidy <- AddCIs(df_estimates_tidy, eventstudy_coefficients, conf_level) 158 | } 159 | 160 | # Optionally Test For Pretrends/Levelling-Off ----------------------------- 161 | 162 | df_test_linear <- TestLinear(estimates = estimates, pretrends = pre_event_coeffs, leveling_off = post_event_coeffs) 163 | 164 | if ((pre_event_coeffs | post_event_coeffs)) { 165 | pretrends_p_value <- df_test_linear[df_test_linear["Test"] == "Pre-Trends", "p.value"] 166 | levelingoff_p_value <- df_test_linear[df_test_linear["Test"] == "Leveling-Off", "p.value"] 167 | 168 | text_pretrends <- paste0("Pretrends p-value = ", round(pretrends_p_value, 2)) 169 | text_levelingoff <- paste0("Leveling off p-value = ", round(levelingoff_p_value, 2)) 170 | 171 | 172 | if (pre_event_coeffs & post_event_coeffs) { 173 | text_caption <- paste0(text_pretrends, " -- ", text_levelingoff) 174 | 175 | } else if (pre_event_coeffs & !post_event_coeffs) { 176 | text_caption <- text_pretrends 177 | 178 | } else if (!pre_event_coeffs & post_event_coeffs) { 179 | text_caption <- text_levelingoff 180 | 181 | } 182 | } else { 183 | text_caption <- NULL 184 | } 185 | 186 | 187 | df_plt <- PreparePlottingData(df_estimates_tidy, policyvar, 188 | post, overidpost, pre, overidpre, normalization_column, proxyIV) 189 | 190 | # Construct y breaks ------------------------------------------------------ 191 | 192 | if (!is.null(ybreaks)) { 193 | if (!(0 %in% ybreaks) & add_mean) { 194 | stop("If you want to add the mean of y in the y-axis then 'ybreaks' must include 0.") 195 | } 196 | 197 | ylabels <- ybreaks 198 | ylims <- c(min(ybreaks), max(ybreaks)) 199 | } else { 200 | min_value <- min(c(df_plt$estimate, df_plt$ci_lower, df_plt$suptband_lower), na.rm = T) 201 | max_value <- max(c(df_plt$estimate, df_plt$ci_upper, df_plt$suptband_upper), na.rm = T) 202 | max_abs <- max(abs(min_value), abs(max_value)) 203 | 204 | magnitude <- 10^floor(log10(max_abs)) 205 | 206 | # Determine step depending on how far the endpoints are from the magnitude 207 | mean_ratio <- mean(c(abs(min_value)/magnitude, max_value/magnitude)) 208 | if (mean_ratio > 6.67) { 209 | step = 3*magnitude 210 | } else if (mean_ratio > 3.33) { 211 | step = 2*magnitude 212 | } else { 213 | step = magnitude 214 | } 215 | 216 | # Pick multiples of step to ensure zero is included 217 | close_to_min <- floor(min_value/step)*step 218 | close_to_max <- ceiling(max_value/step)*step 219 | 220 | ybreaks <- seq(close_to_min, close_to_max, step) 221 | ylims <- c(min(ybreaks), max(ybreaks)) 222 | 223 | if (length(ybreaks) >= 9) { 224 | # Too many breaks, double step size 225 | step <- step*2 226 | close_to_min <- floor(min_value/step)*step 227 | close_to_max <- ceiling(max_value/step)*step 228 | 229 | ybreaks <- seq(close_to_min, close_to_max, step) 230 | } else if (length(ybreaks) <= 3) { 231 | # Too few breaks, halve step size 232 | step <- step/2 233 | close_to_min <- floor(min_value/step)*step 234 | close_to_max <- ceiling(max_value/step)*step 235 | 236 | ybreaks <- seq(close_to_min, close_to_max, step) 237 | } 238 | ylabels <- ybreaks 239 | } 240 | 241 | # Optionally Adds Mean ---------------------------------------------------- 242 | 243 | if (add_mean) { 244 | 245 | y_mean <- AddMeans(df_data, normalization_column, policyvar, outcomevar) 246 | 247 | index_zero <- which(ybreaks == 0) 248 | ylabels[index_zero] <- paste0(ylabels[index_zero], " (", round(y_mean, 2), ")") 249 | } 250 | 251 | # Optionally Add smooth path ---------------------------------------------- 252 | 253 | # Order coefficients 254 | label_var = "label" 255 | data.table::setorderv(df_plt, c(label_var)) 256 | ordered_labels <- df_plt$label 257 | 258 | if (smpath) { 259 | 260 | unselect_message <- "Please change the 'Smpath' argument in 'EventStudyPlot' to FALSE." 261 | 262 | if (!is.null(proxyIV)) { 263 | if (sum(df_plt$estimate == 0) > 2) { 264 | stop(paste0("The smoothest path is not supported for the FHS estimator with more than one instrument.", 265 | unselect_message)) 266 | } 267 | } 268 | 269 | coefficients <- df_plt$estimate 270 | 271 | # Add column and row in matrix of coefficients in index of norm columns 272 | covar <- AddZerosCovar(estimates$output$vcov, 273 | eventstudy_coefficients, 274 | df_plt[df_plt$estimate==0, ]$term, 275 | df_plt$term) 276 | 277 | inv_covar <- pracma::pinv(covar) 278 | 279 | df_plt <- AddSmPath(df_plt, coefficients, inv_covar) 280 | } 281 | 282 | # Construct Plot ---------------------------------------------------------- 283 | 284 | df_plt$label_num <- as.numeric(gsub("+", "", df_plt$label, fixed = T)) 285 | 286 | plt <- ggplot(df_plt, 287 | aes(x = .data$label_num, y = .data$estimate)) 288 | 289 | if (add_zero_line) { 290 | plt <- plt + 291 | geom_hline(yintercept = 0, 292 | color = "green", linetype = "dashed") 293 | } 294 | if (plot_supt) { 295 | plt <- plt + 296 | geom_linerange(aes(ymin = .data$suptband_lower, 297 | ymax = .data$suptband_upper), 298 | data = df_plt[df_plt$estimate != 0,]) 299 | } 300 | if (plot_CI) { 301 | plt <- plt + 302 | geom_errorbar(aes(ymin = .data$ci_lower, 303 | ymax = .data$ci_upper), 304 | data = df_plt[df_plt$estimate != 0,], 305 | width = .2) 306 | } 307 | if (smpath) { 308 | plt <- plt + 309 | geom_line(aes(y = .data$smoothest_path, group = 1), 310 | color = "black") 311 | } 312 | 313 | plt <- plt + 314 | geom_point(color = "#006600") + 315 | scale_x_continuous(breaks = min(df_plt$label_num):max(df_plt$label_num), 316 | labels = ordered_labels) + 317 | scale_y_continuous(breaks = ybreaks, 318 | labels = ylabels, 319 | limits = ylims) + 320 | labs(x = xtitle, y = ytitle, 321 | caption = text_caption) + 322 | theme_bw() + 323 | theme(panel.grid = element_blank(), 324 | plot.caption = element_text(hjust = 0)) 325 | 326 | return(plt) 327 | } 328 | -------------------------------------------------------------------------------- /tests/testthat/test-EventStudyOLS.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("FE = TRUE, 3 | TFE = TRUE, 4 | cluster = TRUE works", { 5 | 6 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 7 | 8 | estimator <- "OLS" 9 | outcomevar <- "y_base" 10 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 11 | controls <- "x_r" 12 | 13 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 14 | static = F, controls = controls) 15 | 16 | idvar <- "id" 17 | timevar <- "t" 18 | 19 | FE <- TRUE 20 | TFE <- TRUE 21 | cluster <- TRUE 22 | 23 | if (FE & TFE & cluster) { 24 | 25 | reg <- EventStudyOLS( 26 | prepared_model_formula = event_study_formula, 27 | prepared_data = df_test_EventStudyOLS, 28 | idvar = idvar, 29 | timevar = timevar, 30 | FE = FE, 31 | TFE = TFE, 32 | cluster = cluster 33 | ) 34 | } 35 | 36 | expect_true(all.equal(reg$felevels$`get(idvar)`, as.character(unique(df_test_EventStudyOLS$id)))) 37 | expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id))) 38 | expect_true(reg$se_type == "stata") 39 | 40 | }) 41 | 42 | test_that("FE = FALSE, 43 | TFE = TRUE, 44 | cluster = TRUE works", { 45 | 46 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 47 | 48 | estimator <- "OLS" 49 | outcomevar <- "y_base" 50 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 51 | controls <- "x_r" 52 | 53 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 54 | static = F, controls = controls) 55 | 56 | idvar <- "id" 57 | timevar <- "t" 58 | 59 | FE <- FALSE 60 | TFE <- TRUE 61 | cluster <- TRUE 62 | 63 | if ((!FE) & TFE & cluster) { 64 | 65 | reg <- EventStudyOLS( 66 | prepared_model_formula = event_study_formula, 67 | prepared_data = df_test_EventStudyOLS, 68 | idvar = idvar, 69 | timevar = timevar, 70 | FE = FE, 71 | TFE = TFE, 72 | cluster = cluster 73 | ) 74 | } 75 | 76 | expect_equal(is.null(reg$felevels$`get(idvar)`), TRUE) 77 | expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id))) 78 | expect_true(reg$se_type == "stata") 79 | 80 | }) 81 | 82 | test_that("FE = TRUE, 83 | TFE = FALSE, 84 | cluster = TRUE works", { 85 | 86 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 87 | 88 | estimator <- "OLS" 89 | outcomevar <- "y_base" 90 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 91 | controls <- "x_r" 92 | 93 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 94 | static = F, controls = controls) 95 | 96 | idvar <- "id" 97 | timevar <- "t" 98 | 99 | FE <- TRUE 100 | TFE <- FALSE 101 | cluster <- TRUE 102 | 103 | if (FE & (!TFE) & cluster) { 104 | 105 | reg <- EventStudyOLS( 106 | prepared_model_formula = event_study_formula, 107 | prepared_data = df_test_EventStudyOLS, 108 | idvar = idvar, 109 | timevar = timevar, 110 | FE = FE, 111 | TFE = TFE, 112 | cluster = cluster 113 | ) 114 | } 115 | 116 | expect_true(all.equal(reg$felevels$V1, as.character(unique(df_test_EventStudyOLS$id)))) 117 | expect_true(is.null(reg$felevels$`get(timevar)`), TRUE) 118 | expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id))) 119 | expect_true(reg$se_type == "stata") 120 | 121 | }) 122 | 123 | test_that("FE = FALSE, 124 | TFE = FALSE, 125 | cluster = TRUE works", { 126 | 127 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 128 | 129 | estimator <- "OLS" 130 | outcomevar <- "y_base" 131 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 132 | controls <- "x_r" 133 | 134 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 135 | static = F, controls = controls) 136 | 137 | idvar <- "id" 138 | timevar <- "t" 139 | 140 | FE <- FALSE 141 | TFE <- FALSE 142 | cluster <- TRUE 143 | 144 | if ((!FE) & (!TFE) & cluster) { 145 | 146 | reg <- EventStudyOLS( 147 | prepared_model_formula = event_study_formula, 148 | prepared_data = df_test_EventStudyOLS, 149 | idvar = idvar, 150 | timevar = timevar, 151 | FE = FE, 152 | TFE = TFE, 153 | cluster = cluster 154 | ) 155 | } 156 | 157 | expect_true(is.null(reg$felevels$`get(idvar)`), TRUE) 158 | expect_true(is.null(reg$felevels$`get(timevar)`), TRUE) 159 | expect_equal(reg$nclusters, length(unique(df_test_EventStudyOLS$id))) 160 | expect_true(reg$se_type == "stata") 161 | 162 | }) 163 | 164 | test_that("FE = TRUE, 165 | TFE = TRUE, 166 | cluster = FALSE works", { 167 | 168 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 169 | 170 | estimator <- "OLS" 171 | outcomevar <- "y_base" 172 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 173 | controls <- "x_r" 174 | 175 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 176 | static = F, controls = controls) 177 | 178 | idvar <- "id" 179 | timevar <- "t" 180 | 181 | FE <- TRUE 182 | TFE <- TRUE 183 | cluster <- FALSE 184 | 185 | if (FE & TFE & (!cluster)) { 186 | 187 | reg <- EventStudyOLS( 188 | prepared_model_formula = event_study_formula, 189 | prepared_data = df_test_EventStudyOLS, 190 | idvar = idvar, 191 | timevar = timevar, 192 | FE = FE, 193 | TFE = TFE, 194 | cluster = cluster 195 | ) 196 | } 197 | 198 | expect_true(all.equal(reg$felevels$`get(idvar)`, as.character(unique(df_test_EventStudyOLS$id)))) 199 | expect_true(is.null(reg$nclusters), TRUE) 200 | expect_true(reg$se_type %in% c("stata", "HC1")) 201 | 202 | }) 203 | 204 | test_that("FE = FALSE, 205 | TFE = TRUE, 206 | cluster = FALSE works", { 207 | 208 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 209 | 210 | estimator <- "OLS" 211 | outcomevar <- "y_base" 212 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 213 | controls <- "x_r" 214 | 215 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 216 | static = F, controls = controls) 217 | 218 | idvar <- "id" 219 | timevar <- "t" 220 | 221 | FE <- FALSE 222 | TFE <- TRUE 223 | cluster <- FALSE 224 | 225 | if ((!FE) & TFE & (!cluster)) { 226 | 227 | reg <- EventStudyOLS( 228 | prepared_model_formula = event_study_formula, 229 | prepared_data = df_test_EventStudyOLS, 230 | idvar = idvar, 231 | timevar = timevar, 232 | FE = FE, 233 | TFE = TFE, 234 | cluster = cluster 235 | ) 236 | } 237 | 238 | expect_true(is.null(reg$felevels$`get(idvar)`), TRUE) 239 | expect_true(is.null(reg$nclusters), TRUE) 240 | expect_true(reg$se_type %in% c("stata", "HC1")) 241 | 242 | }) 243 | 244 | test_that("FE = TRUE, 245 | TFE = FALSE, 246 | cluster = FALSE works", { 247 | 248 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 249 | 250 | estimator <- "OLS" 251 | outcomevar <- "y_base" 252 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 253 | controls <- "x_r" 254 | 255 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 256 | static = F, controls = controls) 257 | 258 | idvar <- "id" 259 | timevar <- "t" 260 | 261 | FE <- TRUE 262 | TFE <- FALSE 263 | cluster <- FALSE 264 | 265 | if (FE & (!TFE) & (!cluster)) { 266 | 267 | reg <- EventStudyOLS( 268 | prepared_model_formula = event_study_formula, 269 | prepared_data = df_test_EventStudyOLS, 270 | idvar = idvar, 271 | timevar = timevar, 272 | FE = FE, 273 | TFE = TFE, 274 | cluster = cluster 275 | ) 276 | } 277 | 278 | expect_true(all.equal(reg$felevels$V1, as.character(unique(df_test_EventStudyOLS$id)))) 279 | expect_true(is.null(reg$felevels$`get(timevar)`), TRUE) 280 | expect_true(is.null(reg$nclusters), TRUE) 281 | expect_true(reg$se_type %in% c("stata", "HC1")) 282 | 283 | }) 284 | 285 | test_that("FE = FALSE, 286 | TFE = FALSE, 287 | cluster = FALSE works", { 288 | 289 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 290 | 291 | estimator <- "OLS" 292 | outcomevar <- "y_base" 293 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 294 | controls <- "x_r" 295 | 296 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 297 | static = F, controls = controls) 298 | 299 | idvar <- "id" 300 | timevar <- "t" 301 | 302 | FE <- FALSE 303 | TFE <- FALSE 304 | cluster <- FALSE 305 | 306 | if ((!FE) & (!TFE) & (!cluster)) { 307 | 308 | reg <- EventStudyOLS( 309 | prepared_model_formula = event_study_formula, 310 | prepared_data = df_test_EventStudyOLS, 311 | idvar = idvar, 312 | timevar = timevar, 313 | FE = FE, 314 | TFE = TFE, 315 | cluster = cluster 316 | ) 317 | } 318 | 319 | expect_true(is.null(reg$felevels$`get(idvar)`), TRUE) 320 | expect_true(is.null(reg$felevels$`get(timevar)`), TRUE) 321 | expect_true(is.null(reg$nclusters), TRUE) 322 | expect_true(reg$se_type %in% c("stata", "HC1")) 323 | 324 | }) 325 | 326 | 327 | test_that("Coefficients and Standard Errors agree with base STATA", { 328 | 329 | df_test_EventStudyOLS <- read.csv("./input/df_test_EventStudyOLS.csv") 330 | 331 | estimator <- "OLS" 332 | outcomevar <- "y_base" 333 | str_policy_vars <- c("z_lead3", "z_fd_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 334 | controls <- "x_r" 335 | 336 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 337 | static = F, controls = controls) 338 | 339 | idvar <- "id" 340 | timevar <- "t" 341 | 342 | FE <- TRUE 343 | TFE <- TRUE 344 | cluster <- TRUE 345 | 346 | reg <- EventStudyOLS( 347 | prepared_model_formula = event_study_formula, 348 | prepared_data = df_test_EventStudyOLS, 349 | idvar = idvar, 350 | timevar = timevar, 351 | FE = FE, 352 | TFE = TFE, 353 | cluster = cluster 354 | ) 355 | 356 | df_test_STATA <- read.csv("./input/df_test_base_STATA.csv", col.names = c("term", "coef", "std_error")) 357 | 358 | epsilon <- 10e-7 359 | 360 | expect_equal(unname(reg$coefficients["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[2]], tolerance = epsilon) 361 | expect_equal(unname(reg$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[2]], tolerance = epsilon) 362 | expect_equal(unname(reg$coefficients["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["coef"]][1], tolerance = epsilon) 363 | expect_equal(unname(reg$coefficients["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[2]], tolerance = epsilon) 364 | expect_equal(unname(reg$coefficients["z_fd_lag2"]), df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[2]], tolerance = epsilon) 365 | expect_equal(unname(reg$coefficients["z_lead3"]), -1 * df_test_STATA[df_test_STATA["term"] == "F3.z",][["coef"]], tolerance = epsilon) 366 | expect_equal(unname(reg$coefficients["z_lag3"]), df_test_STATA[df_test_STATA["term"] == "L3.z",][[2]], tolerance = epsilon) 367 | expect_equal(unname(reg$coefficients["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[2]], tolerance = epsilon) 368 | 369 | expect_equal(unname(reg$std.error["z_fd"]), df_test_STATA[df_test_STATA["term"] == "zfd",][[3]], tolerance = epsilon) 370 | expect_equal(unname(reg$std.error["z_fd_lead2"]), df_test_STATA[df_test_STATA["term"] == "F2.zfd",][[3]], tolerance = epsilon) 371 | expect_equal(unname(reg$std.error["z_fd_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.zfd",][["std_error"]][1], tolerance = epsilon) 372 | expect_equal(unname(reg$std.error["z_fd_lag1"]), df_test_STATA[df_test_STATA["term"] == "L.zfd",][[3]], tolerance = epsilon) 373 | expect_equal(unname(reg$std.error["z_fd_lag2"]),df_test_STATA[df_test_STATA["term"] == "L2.zfd",][[3]], tolerance = epsilon) 374 | expect_equal(unname(reg$std.error["z_lead3"]), df_test_STATA[df_test_STATA["term"] == "F3.z",][["std_error"]], tolerance = epsilon) 375 | expect_equal(unname(reg$std.error["z_lag3"]),df_test_STATA[df_test_STATA["term"] == "L3.z",][[3]], tolerance = epsilon) 376 | expect_equal(unname(reg$std.error["x_r"]), df_test_STATA[df_test_STATA["term"] == "x_r",][[3]], tolerance = epsilon * 10) 377 | }) 378 | -------------------------------------------------------------------------------- /tests/testthat/test-EventStudyFHS.R: -------------------------------------------------------------------------------- 1 | 2 | test_that("FE = TRUE, 3 | TFE = TRUE, 4 | cluster = TRUE works", { 5 | 6 | estimator <- "FHS" 7 | outcomevar <- "y_base" 8 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 9 | controls <- "x_r" 10 | proxy <- "eta_m" 11 | proxyIV <- "z_fd_lead3" 12 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 13 | str_policy_vars, FALSE, controls, proxy, proxyIV) 14 | 15 | idvar <- "id" 16 | timevar <- "t" 17 | 18 | FE <- TRUE 19 | TFE <- TRUE 20 | cluster <- TRUE 21 | 22 | if (FE & TFE & cluster) { 23 | 24 | reg <- EventStudyFHS( 25 | prepared_model_formula = event_study_formula, 26 | prepared_data = df_EventStudyFHS_example, 27 | idvar = idvar, 28 | timevar = timevar, 29 | FE = FE, 30 | TFE = TFE, 31 | cluster = cluster 32 | ) 33 | } 34 | 35 | expect_true(all.equal(reg$felevels$`get(idvar)`, as.character(unique(df_EventStudyFHS_example$id)))) 36 | expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id))) 37 | expect_true(reg$se_type == "stata") 38 | 39 | }) 40 | 41 | test_that("FE = FALSE, 42 | TFE = TRUE, 43 | cluster = TRUE works", { 44 | 45 | estimator <- "FHS" 46 | outcomevar <- "y_base" 47 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 48 | controls <- "x_r" 49 | proxy <- "eta_m" 50 | proxyIV <- "z_fd_lead3" 51 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 52 | str_policy_vars, FALSE, controls, proxy, proxyIV) 53 | 54 | idvar <- "id" 55 | timevar <- "t" 56 | 57 | FE <- FALSE 58 | TFE <- TRUE 59 | cluster <- TRUE 60 | 61 | if ((!FE) & TFE & cluster) { 62 | 63 | reg <- EventStudyFHS( 64 | prepared_model_formula = event_study_formula, 65 | prepared_data = df_EventStudyFHS_example, 66 | idvar = idvar, 67 | timevar = timevar, 68 | FE = FE, 69 | TFE = TFE, 70 | cluster = cluster 71 | ) 72 | } 73 | 74 | expect_equal(is.null(reg$felevels$`get(idvar)`), TRUE) 75 | expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id))) 76 | expect_true(reg$se_type == "stata") 77 | 78 | }) 79 | 80 | test_that("FE = TRUE, 81 | TFE = FALSE, 82 | cluster = TRUE works", { 83 | 84 | estimator <- "FHS" 85 | outcomevar <- "y_base" 86 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 87 | controls <- "x_r" 88 | proxy <- "eta_m" 89 | proxyIV <- "z_fd_lead3" 90 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 91 | str_policy_vars, FALSE, controls, proxy, proxyIV) 92 | 93 | idvar <- "id" 94 | timevar <- "t" 95 | 96 | FE <- TRUE 97 | TFE <- FALSE 98 | cluster <- TRUE 99 | 100 | if (FE & (!TFE) & cluster) { 101 | 102 | reg <- EventStudyFHS( 103 | prepared_model_formula = event_study_formula, 104 | prepared_data = df_EventStudyFHS_example, 105 | idvar = idvar, 106 | timevar = timevar, 107 | FE = FE, 108 | TFE = TFE, 109 | cluster = cluster 110 | ) 111 | } 112 | 113 | expect_true(all.equal(reg$felevels$V1, as.character(unique(df_EventStudyFHS_example$id)))) 114 | expect_true(is.null(reg$felevels$`get(timevar)`), TRUE) 115 | expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id))) 116 | expect_true(reg$se_type == "stata") 117 | 118 | }) 119 | 120 | test_that("FE = FALSE, 121 | TFE = FALSE, 122 | cluster = TRUE works", { 123 | 124 | estimator <- "FHS" 125 | outcomevar <- "y_base" 126 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 127 | controls <- "x_r" 128 | proxy <- "eta_m" 129 | proxyIV <- "z_fd_lead3" 130 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 131 | str_policy_vars, FALSE, controls, proxy, proxyIV) 132 | 133 | idvar <- "id" 134 | timevar <- "t" 135 | 136 | FE <- FALSE 137 | TFE <- FALSE 138 | cluster <- TRUE 139 | 140 | if ((!FE) & (!TFE) & cluster) { 141 | 142 | reg <- EventStudyFHS( 143 | prepared_model_formula = event_study_formula, 144 | prepared_data = df_EventStudyFHS_example, 145 | idvar = idvar, 146 | timevar = timevar, 147 | FE = FE, 148 | TFE = TFE, 149 | cluster = cluster 150 | ) 151 | } 152 | 153 | expect_true(is.null(reg$felevels$`get(idvar)`), TRUE) 154 | expect_true(is.null(reg$felevels$`get(timevar)`), TRUE) 155 | expect_equal(reg$nclusters, length(unique(df_EventStudyFHS_example$id))) 156 | expect_true(reg$se_type == "stata") 157 | 158 | }) 159 | 160 | test_that("FE = TRUE, 161 | TFE = TRUE, 162 | cluster = FALSE does not work", { 163 | 164 | estimator <- "FHS" 165 | outcomevar <- "y_base" 166 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 167 | controls <- "x_r" 168 | proxy <- "eta_m" 169 | proxyIV <- "z_fd_lead3" 170 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 171 | str_policy_vars, FALSE, controls, proxy, proxyIV) 172 | 173 | idvar <- "id" 174 | timevar <- "t" 175 | 176 | FE <- TRUE 177 | TFE <- TRUE 178 | cluster <- FALSE 179 | 180 | expect_error( 181 | if (FE & TFE & (!cluster)) { 182 | 183 | reg <- EventStudyFHS( 184 | prepared_model_formula = event_study_formula, 185 | prepared_data = df_EventStudyFHS_example, 186 | idvar = idvar, 187 | timevar = timevar, 188 | FE = FE, 189 | TFE = TFE, 190 | cluster = cluster 191 | ) 192 | }, 193 | "cluster=TRUE required when FE=TRUE." 194 | ) 195 | }) 196 | 197 | test_that("FE = FALSE, 198 | TFE = TRUE, 199 | cluster = FALSE works", { 200 | 201 | estimator <- "FHS" 202 | outcomevar <- "y_base" 203 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 204 | controls <- "x_r" 205 | proxy <- "eta_m" 206 | proxyIV <- "z_fd_lead3" 207 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 208 | str_policy_vars, FALSE, controls, proxy, proxyIV) 209 | 210 | idvar <- "id" 211 | timevar <- "t" 212 | 213 | FE <- FALSE 214 | TFE <- TRUE 215 | cluster <- FALSE 216 | 217 | if ((!FE) & TFE & (!cluster)) { 218 | 219 | reg <- EventStudyFHS( 220 | prepared_model_formula = event_study_formula, 221 | prepared_data = df_EventStudyFHS_example, 222 | idvar = idvar, 223 | timevar = timevar, 224 | FE = FE, 225 | TFE = TFE, 226 | cluster = cluster 227 | ) 228 | } 229 | 230 | expect_true(is.null(reg$felevels$`get(idvar)`), TRUE) 231 | expect_true(is.null(reg$nclusters), TRUE) 232 | expect_true(reg$se_type %in% c("stata", "HC1")) 233 | 234 | }) 235 | 236 | test_that("FE = TRUE, 237 | TFE = FALSE, 238 | cluster = FALSE does not work", { 239 | 240 | estimator <- "FHS" 241 | outcomevar <- "y_base" 242 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 243 | controls <- "x_r" 244 | proxy <- "eta_m" 245 | proxyIV <- "z_fd_lead3" 246 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 247 | str_policy_vars, FALSE, controls, proxy, proxyIV) 248 | 249 | idvar <- "id" 250 | timevar <- "t" 251 | 252 | FE <- TRUE 253 | TFE <- FALSE 254 | cluster <- FALSE 255 | 256 | expect_error( 257 | if (FE & (!TFE) & (!cluster)) { 258 | 259 | reg <- EventStudyFHS( 260 | prepared_model_formula = event_study_formula, 261 | prepared_data = df_EventStudyFHS_example, 262 | idvar = idvar, 263 | timevar = timevar, 264 | FE = FE, 265 | TFE = TFE, 266 | cluster = cluster 267 | ) 268 | }, 269 | "cluster=TRUE required when FE=TRUE." 270 | ) 271 | }) 272 | 273 | test_that("FE = FALSE, 274 | TFE = FALSE, 275 | cluster = FALSE works", { 276 | 277 | estimator <- "FHS" 278 | outcomevar <- "y_base" 279 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 280 | controls <- "x_r" 281 | proxy <- "eta_m" 282 | proxyIV <- "z_fd_lead3" 283 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 284 | str_policy_vars, FALSE, controls, proxy, proxyIV) 285 | 286 | idvar <- "id" 287 | timevar <- "t" 288 | 289 | FE <- FALSE 290 | TFE <- FALSE 291 | cluster <- FALSE 292 | 293 | if ((!FE) & (!TFE) & (!cluster)) { 294 | 295 | reg <- EventStudyFHS( 296 | prepared_model_formula = event_study_formula, 297 | prepared_data = df_EventStudyFHS_example, 298 | idvar = idvar, 299 | timevar = timevar, 300 | FE = FE, 301 | TFE = TFE, 302 | cluster = cluster 303 | ) 304 | } 305 | 306 | expect_true(is.null(reg$felevels$`get(idvar)`), TRUE) 307 | expect_true(is.null(reg$felevels$`get(timevar)`), TRUE) 308 | expect_true(is.null(reg$nclusters), TRUE) 309 | expect_true(reg$se_type %in% c("stata", "HC1")) 310 | 311 | }) 312 | 313 | 314 | test_that("Coefficients and Standard Errors agree with base STATA", { 315 | 316 | bools <- c("TTT", "TFT", "FTF", "FTT", "FFF", "FFT") 317 | 318 | for (i in length(bools)) { 319 | bool <- bools[i] 320 | estimator <- "FHS" 321 | outcomevar <- "y_base" 322 | str_policy_vars <- c("z_lead3", "z_fd_lead2", "z_fd", "z_fd_lag1", "z_fd_lag2", "z_lag3") 323 | controls <- "x_r" 324 | proxy <- "eta_m" 325 | proxyIV <- "z_fd_lead3" 326 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, 327 | str_policy_vars, FALSE, controls, proxy, proxyIV) 328 | 329 | idvar <- "id" 330 | timevar <- "t" 331 | 332 | FE <- as.logical(substring(bool, 1, 1)) 333 | TFE <- as.logical(substring(bool, 2, 2)) 334 | cluster <- as.logical(substring(bool, 3, 3)) 335 | 336 | reg <- EventStudyFHS( 337 | prepared_model_formula = event_study_formula, 338 | prepared_data = df_EventStudyFHS_example, 339 | idvar = idvar, 340 | timevar = timevar, 341 | FE = FE, 342 | TFE = TFE, 343 | cluster = cluster 344 | ) 345 | 346 | df_test_STATA <- read.csv("./input/df_test_base_STATA_FHS.csv") 347 | epsilon <- 10e-6 348 | 349 | expect_equal(unname(reg$coefficients["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i]], tolerance = epsilon) 350 | expect_equal(unname(reg$coefficients["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i]], tolerance = epsilon) 351 | expect_equal(unname(reg$coefficients["eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i]], tolerance = epsilon) 352 | expect_equal(unname(reg$coefficients["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i]], tolerance = epsilon) 353 | expect_equal(unname(reg$coefficients["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i]], tolerance = epsilon) 354 | expect_equal(unname(reg$coefficients["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i]]*(-1), tolerance = epsilon) 355 | expect_equal(unname(reg$coefficients["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i]], tolerance = epsilon) 356 | expect_equal(unname(reg$coefficients["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i]], tolerance = epsilon) 357 | 358 | expect_equal(unname(reg$std.error["z_fd"]), df_test_STATA[df_test_STATA[1] == "z_fd",][[2*i+1]], tolerance = epsilon) 359 | expect_equal(unname(reg$std.error["z_fd_lead2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lead2",][[2*i+1]], tolerance = epsilon) 360 | expect_equal(unname(reg$std.error["eta_m"]), df_test_STATA[df_test_STATA[1] == "eta_m",][[2*i+1]], tolerance = epsilon) 361 | expect_equal(unname(reg$std.error["z_fd_lag1"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag1",][[2*i+1]], tolerance = epsilon) 362 | expect_equal(unname(reg$std.error["z_fd_lag2"]), df_test_STATA[df_test_STATA[1] == "z_fd_lag2",][[2*i+1]], tolerance = epsilon) 363 | expect_equal(unname(reg$std.error["z_lead3"]), df_test_STATA[df_test_STATA[1] == "z_lead3",][[2*i+1]], tolerance = epsilon) 364 | expect_equal(unname(reg$std.error["z_lag3"]), df_test_STATA[df_test_STATA[1] == "z_lag3",][[2*i+1]], tolerance = epsilon) 365 | expect_equal(unname(reg$std.error["x_r"]), df_test_STATA[df_test_STATA[1] == "x_r",][[2*i+1]], tolerance = epsilon) 366 | } 367 | }) 368 | 369 | -------------------------------------------------------------------------------- /R/EventStudy.R: -------------------------------------------------------------------------------- 1 | #' Estimates Equation (2) in Freyaldenhoven et al. (2021) 2 | #' 3 | #' @description `EventStudy` uses regression methods to estimate the effect of a policy on a given outcome. 4 | #' 5 | #' @param estimator Accepts one of "OLS" or "FHS". If "OLS" is specified, implements Ordinary Least Squares. If "FHS" is specified, implements Instrumental Variables (IV) estimator proposed in [Freyaldenhoven Hansen Shapiro (FHS, 2019)](https://www.aeaweb.org/articles?id=10.1257/aer.20180609). 6 | #' @param data Data frame containing the variables of interest. 7 | #' @param outcomevar Character indicating column of outcome variable y. 8 | #' @param policyvar Character indicating column of policy variable z. 9 | #' @param idvar Character indicating column of units. 10 | #' @param timevar Character indicating column of time periods. 11 | #' @param controls Optional character vector indicating a set of control variables q. 12 | #' @param proxy Character indicating column of variable that is thought to be affected by the confound but not by the policy. 13 | #' Should be specified if and only if estimator is specified as "FHS". 14 | #' @param proxyIV Character of column to be used as an instrument. Should be specified if and only if estimator is specified as "FHS". 15 | #' If NULL, defaults to the strongest lead of the policy variable based on the first stage. 16 | #' @param FE Logical indicating whether unit fixed-effects should be included. Defaults to TRUE. 17 | #' @param TFE Logical indicating whether time fixed-effects should be included. Defaults to TRUE. 18 | #' @param cluster Logical indicating whether to use clustered errors by units. If FALSE, will use unclustered heteroskedasticity-robust standard errors. 19 | #' Defaults to TRUE. Must be TRUE if FE is TRUE. 20 | #' @param post Whole number indicating the number of periods in the past before which the past values of the policy 21 | #' are not supposed to affect the value of the outcome. Corresponds to M in equation (2) of 22 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170). 23 | #' @param overidpost Optional whole number indicating the number of event times after "post" to be included in estimation. 24 | #' Defaults to 1. 25 | #' Corresponds to L_M in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170). 26 | #' @param pre Whole number indicating the number of periods in the future after which the future values of the policy are 27 | #' not supposed to affect the value of the outcome today. Corresponds to G in equation (2) of 28 | #' [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170). 29 | #' @param overidpre Optional whole number indicating the number of event times earlier than -"pre" to be included in estimation. 30 | #' Defaults to "post" + "pre". 31 | #' Corresponds to L_G in equation (2) of [Freyaldenhoven et al. (2021)](https://www.nber.org/papers/w29170). 32 | #' @param normalize Specifies the event-time coefficient to be normalized. Defaults to - pre - 1. 33 | #' @param anticipation_effects_normalization If set to TRUE, runs the default process and switches coefficient to be normalized to 0 34 | #' when there are anticipation effects. If set to FALSE, does not make the switch. Defaults to TRUE. 35 | #' @param allow_duplicate_id If TRUE, the function estimates a regression where duplicated ID-time rows are weighted by their duplication count. If FALSE, the function raises an error if duplicate unit-time keys exist in the input data. Default is FALSE. 36 | #' @param avoid_internal_copy If TRUE, the function avoids making an internal deep copy of the input data, and instead directly modifies the input data.table. Default is FALSE. 37 | #' 38 | #' @return A list that contains, under "output", the estimation output as an lm_robust object, and under "arguments", the arguments passed to the function. 39 | #' @import dplyr 40 | #' @import estimatr 41 | #' @importFrom stats reformulate 42 | #' @importFrom data.table setorderv as.data.table is.data.table .SD copy 43 | #' @export 44 | #' 45 | #' @examples 46 | #' 47 | #' # A minimal example 48 | #' eventstudy_model <- 49 | #' EventStudy( 50 | #' estimator = "OLS", 51 | #' data = example_data, 52 | #' outcomevar = "y_base", 53 | #' policyvar = "z", 54 | #' idvar = "id", 55 | #' timevar = "t", 56 | #' pre = 0, post = 3, 57 | #' normalize = -1 58 | #' ) 59 | #' 60 | #' ### Access estimated model 61 | #' eventstudy_model$output 62 | #' 63 | #' summary(eventstudy_model$output) 64 | #' 65 | #' ### data.frame of estimates 66 | #' estimatr::tidy(eventstudy_model$output) 67 | #' 68 | #' ### Access arguments 69 | #' eventstudy_model$arguments 70 | #' 71 | #' # A dynamic OLS model with anticipation effects and controls 72 | #' eventstudy_model_dyn <- 73 | #' EventStudy( 74 | #' estimator = "OLS", 75 | #' data = example_data, 76 | #' outcomevar = "y_base", 77 | #' policyvar = "z", 78 | #' idvar = "id", 79 | #' timevar = "t", 80 | #' controls = "x_r", 81 | #' FE = TRUE, TFE = TRUE, 82 | #' post = 3, overidpost = 5, 83 | #' pre = 2, overidpre = 4, 84 | #' normalize = - 3, 85 | #' cluster = TRUE, 86 | #' anticipation_effects_normalization = TRUE 87 | #' ) 88 | #' 89 | #' summary(eventstudy_model_dyn$output) 90 | #' 91 | #' # A static model 92 | #' eventstudy_model_static <- 93 | #' EventStudy( 94 | #' estimator = "OLS", 95 | #' data = example_data, 96 | #' outcomevar = "y_jump_m", 97 | #' policyvar = "z", 98 | #' idvar = "id", 99 | #' timevar = "t", 100 | #' FE = TRUE, TFE = TRUE, 101 | #' post = 0, overidpost = 0, 102 | #' pre = 0, overidpre = 0, 103 | #' cluster = TRUE 104 | #' ) 105 | #' 106 | #' summary(eventstudy_model_static$output) 107 | #' 108 | #' # A dynamic model with an unbalanced panel 109 | #' data_unbal <- example_data[1:(nrow(example_data)-1),] # drop last row to make unbalanced 110 | #' 111 | #' eventstudy_model_unbal <- 112 | #' EventStudy( 113 | #' estimator = "OLS", 114 | #' data = data_unbal, 115 | #' outcomevar = "y_base", 116 | #' policyvar = "z", 117 | #' idvar = "id", 118 | #' timevar = "t", 119 | #' pre = 0, post = 3, 120 | #' normalize = -1 121 | #' ) 122 | #' 123 | #' summary(eventstudy_model_unbal$output) 124 | #' 125 | #' # A dynamic model estimated using IV 126 | #' eventstudy_model_iv <- 127 | #' EventStudy( 128 | #' estimator = "FHS", 129 | #' data = example_data, 130 | #' outcomevar = "y_base", 131 | #' policyvar = "z", 132 | #' idvar = "id", 133 | #' timevar = "t", 134 | #' proxy = "x_r", 135 | #' FE = TRUE, TFE = TRUE, 136 | #' post = 2, overidpost = 1, 137 | #' pre = 0, overidpre = 3, 138 | #' normalize = -1, 139 | #' cluster = TRUE 140 | #' ) 141 | #' 142 | #' summary(eventstudy_model_iv$output) 143 | #' 144 | 145 | EventStudy <- function(estimator, data, outcomevar, policyvar, idvar, timevar, controls = NULL, 146 | proxy = NULL, proxyIV = NULL, FE = TRUE, TFE = TRUE, post, overidpost = 1, pre, overidpre = post + pre, 147 | normalize = -1 * (pre + 1), cluster = TRUE, anticipation_effects_normalization = TRUE, 148 | allow_duplicate_id = FALSE, avoid_internal_copy = FALSE) { 149 | 150 | # Check for errors in arguments 151 | if (! estimator %in% c("OLS", "FHS")) {stop("estimator should be either 'OLS' or 'FHS'.")} 152 | if (! is.data.frame(data)) {stop("data should be a data frame.")} 153 | for (var in c(idvar, timevar, outcomevar, policyvar)) { 154 | if ((! is.character(var))) { 155 | stop(paste0(var, " should be a character.")) 156 | } 157 | if (! var %in% colnames(data)) { 158 | stop(paste0(var, " should be the name of a variable in the dataset.")) 159 | } 160 | } 161 | if (! (is.null(controls) | is.character(controls))) {stop("controls should be either NULL or a character.")} 162 | 163 | if ((estimator == "OLS" & ! is.null(proxy))) {stop("proxy should only be specified when estimator = 'FHS'.")} 164 | if ((estimator == "FHS" & ! is.character(proxy))) {stop("proxy should be a character.")} 165 | if ((estimator == "OLS" & ! is.null(proxyIV))) {stop("proxyIV should only be specified when estimator = 'FHS'.")} 166 | if ((estimator == "FHS" & 167 | ! is.null(proxyIV) & ! is.character(proxyIV))) {stop("proxyIV should be a character.")} 168 | if (estimator == "FHS" & pre == 0 & overidpre == 0 & is.null(proxyIV)) { 169 | stop("When estimator is 'FHS' and there are no leads in the model, proxyIV must be specified explicitly.") 170 | } 171 | 172 | for (var in c(FE, TFE, cluster, anticipation_effects_normalization, allow_duplicate_id, avoid_internal_copy)) { 173 | if (! is.logical(var)) { 174 | stop(paste0(var, " should be either TRUE or FALSE.")) 175 | } 176 | } 177 | 178 | if (FE & !cluster) {stop("cluster=TRUE is required when FE=TRUE.")} 179 | 180 | if (! (is.numeric(post) & post >= 0 & post %% 1 == 0)) {stop("post should be a whole number.")} 181 | if (! (is.numeric(overidpost) & overidpost >= 0 & overidpost %% 1 == 0)) {stop("overidpost should be a whole number.")} 182 | if (! (is.numeric(pre) & pre >= 0 & pre %% 1 == 0)) {stop("pre should be a whole number.")} 183 | if (! (is.numeric(overidpre) & overidpre >= 0 & overidpre %% 1 == 0)) {stop("overidpre should be a whole number.")} 184 | if (normalize == 0 & post == 0 & overidpost == 0 & pre == 0 & overidpre == 0) {stop("normalize cannot be zero when post = overidpost = pre = overidpre = 0.")} 185 | if (! (is.numeric(normalize) & normalize %% 1 == 0 186 | & normalize >= -(pre + overidpre + 1) & normalize <= post + overidpost)) { 187 | stop("normalize should be an integer between -(pre + overidpre + 1) and (post + overidpost).") 188 | } 189 | if (avoid_internal_copy & ! data.table::is.data.table(data)) { 190 | warning("`avoid_internal_copy` has no effect because dataset passed to `data` is not a `data.table`.") 191 | } 192 | 193 | # Check for errors in data 194 | if (! is.numeric(data[[timevar]])) {stop("timevar column in dataset should be numeric.")} 195 | if (! all(data[[timevar]] %% 1 == 0)) { 196 | stop("timevar column in dataset should be a vector of integers.") 197 | } 198 | 199 | if (data.table::is.data.table(data)) { 200 | if (!avoid_internal_copy) { 201 | data <- data.table::copy(data) 202 | } 203 | } else { 204 | data <- data.table::as.data.table(data) 205 | } 206 | data.table::setorderv(data, c(idvar, timevar)) 207 | data_ids <- data[, .SD, .SDcols = c(idvar, timevar)] 208 | 209 | # Check panel balance and unique keys 210 | n_units <- length(base::unique(data[[idvar]])) 211 | n_periods <- length(base::unique(data[[timevar]])) 212 | n_unique_rows <- nrow(data[!base::duplicated(data_ids),]) 213 | if (n_unique_rows != n_units*n_periods) { 214 | warning("Dataset is unbalanced.") 215 | unbalanced <- TRUE 216 | } else { 217 | unbalanced <- FALSE 218 | } 219 | if (n_unique_rows != nrow(data)) { 220 | if (allow_duplicate_id == TRUE) { 221 | warning("idvar-timevar pairs do not uniquely identify all rows in the data.") 222 | } else if (allow_duplicate_id == FALSE) { 223 | stop("idvar-timevar pairs do not uniquely identify all rows in the data. Turn on allow_duplicate_id if you want to proceed with weighted duplicated rows.") 224 | } 225 | } 226 | 227 | detect_holes <- function(dt, idvar, timevar) { 228 | holes_per_id <- dt[, .SD[!is.na(base::get(timevar))], by = c(idvar) 229 | ][, list(holes = any(base::diff(base::get(timevar)) != 1)), 230 | by = c(idvar)] 231 | 232 | return(any(holes_per_id$holes)) 233 | } 234 | 235 | if (detect_holes(data, idvar, timevar)) { 236 | warning(paste0("Note: gaps of more than one unit in the time variable '", timevar, "' were detected. ", 237 | "Treating these as gaps in the panel dimension.")) 238 | timevar_holes <- TRUE 239 | } else { 240 | timevar_holes <- FALSE 241 | } 242 | 243 | if (post == 0 & overidpost == 0 & pre == 0 & overidpre == 0) { 244 | static <- TRUE 245 | } else { 246 | static <- FALSE 247 | } 248 | 249 | num_evenstudy_coeffs <- overidpre + pre + post + overidpost 250 | num_periods <- max(data[[timevar]], na.rm = T) - min(data[[timevar]], na.rm = T) 251 | if (num_evenstudy_coeffs > num_periods - 1) {stop("overidpre + pre + post + overidpost cannot exceed the data window.")} 252 | 253 | for (tag in c("_fd", "_lead", "_lag")) { 254 | if (sum(grepl(paste0(policyvar, tag), colnames(data))) > 0) { 255 | warning(paste0("Variables starting with ", policyvar, tag, 256 | " should be reserved for usage by eventstudyr.")) 257 | } 258 | } 259 | 260 | # Compute shifts in policy variable 261 | num_fd_lags <- post + overidpost - 1 262 | num_fd_leads <- pre + overidpre 263 | 264 | furthest_lag_period <- num_fd_lags + 1 265 | 266 | if (static) { 267 | message("post, overidpost, pre, and overidpre are set to 0. A static model will be estimated.") 268 | } else { 269 | data <- ComputeFirstDifferences(data, idvar, timevar, policyvar, timevar_holes) 270 | 271 | if ((post + overidpost - 1 >= 1) & (pre + overidpre >= 1)) { 272 | shift_values = c(-num_fd_leads:-1, 1:num_fd_lags) 273 | } else if (pre + overidpre < 1) { 274 | shift_values = 1:num_fd_lags 275 | } else if (post + overidpost - 1 < 1) { 276 | shift_values = -num_fd_leads:-1 277 | } 278 | 279 | data <- ComputeShifts(data, idvar, timevar, 280 | shiftvar = paste0(policyvar, "_fd"), 281 | shiftvalues = shift_values, 282 | timevar_holes = timevar_holes) 283 | } 284 | 285 | if (!static) { 286 | data <- ComputeShifts(data, idvar, timevar, 287 | shiftvar = policyvar, 288 | shiftvalues = c(-num_fd_leads, furthest_lag_period), 289 | timevar_holes = timevar_holes) 290 | 291 | lead_endpoint_var <- paste0(policyvar, "_lead", num_fd_leads) 292 | data[, (lead_endpoint_var) := 1 - get(lead_endpoint_var)] 293 | } 294 | 295 | if (pre != 0 & normalize == -1 & anticipation_effects_normalization) { 296 | normalize <- -pre - 1 297 | warning(paste("You allowed for anticipation effects", pre, 298 | "periods before the event, so the coefficient at", normalize, 299 | "was selected to be normalized to zero.", 300 | "To override this, change anticipation_effects_normalization to FALSE.")) 301 | } 302 | 303 | if (normalize < 0) { 304 | if (normalize == -(pre + overidpre + 1)) { 305 | normalization_column <- paste0(policyvar, "_lead", (-1 * (normalize + 1))) 306 | } else { 307 | normalization_column <- paste0(policyvar, "_fd_lead", (-1 * normalize)) 308 | } 309 | } else if (normalize == 0){ 310 | if (normalize == post + overidpost) { 311 | normalization_column <- paste0(policyvar, "_lag", (normalize)) 312 | } else { 313 | normalization_column <- paste0(policyvar, "_fd") 314 | } 315 | } else { 316 | if (normalize == post + overidpost) { 317 | normalization_column <- paste0(policyvar, "_lag", (normalize)) 318 | } else { 319 | normalization_column <- paste0(policyvar, "_fd_lag", (normalize)) 320 | } 321 | } 322 | 323 | if (static) { 324 | str_policy_vars = policyvar 325 | } else { 326 | all_vars <- names(data)[grepl(policyvar, names(data))] 327 | 328 | lead_endpoint_var <- all_vars[grepl(paste0("^", policyvar, "_lead"), all_vars)] 329 | lead_fd_vars <- all_vars[grepl(paste0("^", policyvar, "_fd_lead"), all_vars)] 330 | fd_var <- paste0(policyvar, "_fd") 331 | lag_fd_vars <- all_vars[grepl(paste0("^", policyvar, "_fd_lag"), all_vars)] 332 | lag_endpoint_var <- all_vars[grepl(paste0("^", policyvar, "_lag"), all_vars)] 333 | 334 | str_policy_vars <- c(lead_endpoint_var, lead_fd_vars, fd_var, lag_fd_vars, lag_endpoint_var) 335 | str_policy_vars <- str_policy_vars[!(str_policy_vars %in% normalization_column)] 336 | } 337 | 338 | if (estimator == "OLS") { 339 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 340 | static, controls, proxy, proxyIV) 341 | 342 | output <- EventStudyOLS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) 343 | coefficients <- str_policy_vars 344 | } 345 | if (estimator == "FHS") { 346 | 347 | if (is.null(proxyIV)) { 348 | Fstart <- 0 349 | str_fd_leads <- str_policy_vars[grepl("^z_fd_lead", str_policy_vars)] 350 | 351 | for (var in str_fd_leads) { 352 | lm <- lm(data = data, formula = stats::reformulate(termlabels = var, response = proxy)) 353 | Floop <- summary(lm)$fstatistic["value"] 354 | if (Floop > Fstart) { 355 | Fstart <- Floop 356 | proxyIV <- var 357 | } 358 | } 359 | message(paste0("Defaulting to strongest lead of differenced policy variable: proxyIV = ", proxyIV, 360 | ". To specify a different proxyIV use the proxyIV argument.")) 361 | } 362 | 363 | event_study_formula <- PrepareModelFormula(estimator, outcomevar, str_policy_vars, 364 | static, controls, proxy, proxyIV) 365 | 366 | output <- EventStudyFHS(event_study_formula, data, idvar, timevar, FE, TFE, cluster) 367 | coefficients <- dplyr::setdiff(str_policy_vars, proxyIV) 368 | } 369 | 370 | event_study_args <- list("estimator" = estimator, 371 | "data" = data, 372 | "outcomevar" = outcomevar, 373 | "policyvar" = policyvar, 374 | "idvar" = idvar, 375 | "timevar" = timevar, 376 | "controls" = controls, 377 | "proxy" = proxy, 378 | "proxyIV" = proxyIV, 379 | "FE" = FE, 380 | "TFE" = TFE, 381 | "post" = post, 382 | "overidpost" = overidpost, 383 | "pre" = pre, 384 | "overidpre" = overidpre, 385 | "normalize" = normalize, 386 | "normalization_column" = normalization_column, 387 | "cluster" = cluster, 388 | "eventstudy_coefficients" = coefficients) 389 | 390 | return(list("output" = output, 391 | "arguments" = event_study_args)) 392 | } 393 | --------------------------------------------------------------------------------