├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── Changelog ├── DESCRIPTION ├── NAMESPACE ├── R ├── functions.R ├── onAttach.R ├── pwfe.R └── wfe.R ├── README.md ├── man ├── pwfe.Rd └── wfe.Rd └── src ├── Makevars ├── init.c ├── vector.c ├── vector.h ├── wfe.c └── wfe.h /.Rbuildignore: -------------------------------------------------------------------------------- 1 | README.md 2 | README.Rmd 3 | \.git 4 | \.gitignore 5 | \.travis\.yml 6 | \.DS_Store 7 | \.\.Rcheck 8 | builder.sh 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | sudo: false 5 | cache: packages 6 | -------------------------------------------------------------------------------- /Changelog: -------------------------------------------------------------------------------- 1 | version date description 2 | 0.1 12/12/11 The first version on CRAN 3 | 1.0 07/17/12 First-difference and difference-in-differences estimator added 4 | 1.1 11/14/12 1) Standard errors with degrees of freedom adjustment, 2) missing data issue fixed, 3) issues with pdata.frame resolved 5 | 1.2 02/05/13 store.wdm option added 6 | 1.3 08/09/14 minor bug fixed 7 | 1.4 04/23/16 minor bug fixed 8 | 1.5 07/03/16 Implementation of Difference-in-Differences with nearest neighbor matching on pre-treatment outcome variables 9 | 1.6 07/18/17 standard error with degress of freedom correction (two-way) 10 | 1.7 11/25/18 standard error with degress of freedom correction (one-way) 11 | 1.8 01/09/19 minor update using cbind() and rbind() instead of cBind() and rBind() 12 | 1.9.1 02/13/19 White statistics degress of freedom correction and cleanup 13 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: wfe 2 | Type: Package 3 | Title: Weighted Linear Fixed Effects Regression Models for Causal Inference 4 | Version: 1.9.1 5 | Date: 2019-02-13 6 | Authors@R: c( 7 | person("In Song", "Kim", email = "insong@mit.edu", role = c("aut", "cre")), 8 | person("Kosuke", "Imai", email = "imai@Harvard.edu", role = c("aut")) 9 | ) 10 | Description: Provides a computationally efficient way of fitting 11 | weighted linear fixed effects estimators for causal 12 | inference with various weighting schemes. Weighted linear 13 | fixed effects estimators can be used to estimate the 14 | average treatment effects under different identification 15 | strategies. This includes stratified randomized 16 | experiments, matching and stratification for 17 | observational studies, first differencing, and 18 | difference-in-differences. The package implements methods 19 | described in Imai and Kim (2017) "When should We Use 20 | Linear Fixed Effects Regression Models for Causal 21 | Inference with Longitudinal Data?", available at 22 | . 23 | License: GPL(>= 2) 24 | Imports: 25 | utils, 26 | arm, 27 | Matrix, 28 | MASS, 29 | methods 30 | Depends: R (>= 3.2.0) 31 | Encoding: UTF-8 32 | LazyData: true 33 | BugReports: https://github.com/insongkim/wfe/issues 34 | 35 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(wfe, .registration = TRUE, .fixes = "C_") 2 | export(wfe, pwfe) 3 | 4 | ## importFrom(Rcpp, evalCpp) 5 | ## exportPattern("^[[:alpha:]]+") 6 | 7 | importFrom(arm, bayesglm) 8 | import(Matrix, MASS) 9 | 10 | S3method(summary, wfe) 11 | S3method(print, wfe) 12 | S3method(print, summary.wfe) 13 | 14 | S3method(summary, pwfe) 15 | S3method(print, pwfe) 16 | S3method(print, summary.pwfe) 17 | 18 | S3method(summary, wfedid) 19 | S3method(print, wfedid) 20 | S3method(print, summary.wfedid) 21 | 22 | 23 | importFrom("methods", "as") 24 | importFrom("stats", "as.formula", "binomial", "coef", "fitted", "lm", 25 | "model.frame", "model.matrix", "model.response", "na.omit", 26 | "pchisq", "printCoefmat", "pt", "resid", "vcov") 27 | importFrom("utils", "flush.console", "object.size", 28 | "packageDescription") 29 | -------------------------------------------------------------------------------- /R/functions.R: -------------------------------------------------------------------------------- 1 | 2 | Index <- function(index_name, uniq_index_name, len_u_index, len_data) { 3 | 4 | return(.C("Index", as.integer(index_name), as.integer(uniq_index_name), as.integer(len_u_index), 5 | as.integer(len_data), 6 | result = integer(len_data))$result) 7 | 8 | } 9 | 10 | 11 | 12 | VectorizeC <- function(W, time.index, dyad.index, n.row) { 13 | 14 | return(.C("VectorizeC", as.double(W), as.integer(nrow(W)), as.integer(ncol(W)), 15 | as.integer(time.index), as.integer(dyad.index), as.integer(n.row), 16 | results = double(n.row))$results) 17 | 18 | } 19 | 20 | 21 | 22 | Transform <- function(y, treat, pscore) { 23 | 24 | return(.C("Transform", as.double(y), as.integer(length(y)), as.integer(treat), 25 | as.double(pscore), ytrans = double(length(y)))$ytrans) 26 | 27 | } 28 | 29 | 30 | 31 | GenTime <- function(unit_index, len_data, len_u_index) { 32 | return(.C("GenTime", as.integer(unit_index), as.integer(len_data), 33 | as.integer(len_u_index), time_index = double(len_data))$time_index) 34 | } 35 | 36 | 37 | 38 | GenWeightsUnit <- function(unit_index, time_index, tr, C_it, len_data, len_u_index, len_t_index, ate, att, size, verbose) { 39 | return(.C("GenWeightsUnit", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it), 40 | as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index), 41 | as.integer(ate), as.integer(att), as.integer(verbose), 42 | weight = double(len_u_index*len_t_index))$weight) 43 | } 44 | 45 | 46 | GenWeightsTime <- function(time_index, unit_index, tr, C_it, len_data, len_t_index, len_u_index, ate, att, size, verbose) { 47 | return(.C("GenWeightsTime", as.integer(time_index), as.integer(unit_index), as.integer(tr), as.integer(C_it), 48 | as.integer(len_data), as.integer(len_t_index), as.integer(len_u_index), 49 | as.integer(ate), as.integer(att), as.integer(verbose), 50 | weight = double(len_u_index*len_t_index))$weight) 51 | } 52 | 53 | 54 | 55 | 56 | 57 | GenWeightsFD <- function(unit_index, time_index, tr, C_it, len_data, len_u_index, len_t_index, ate, att, verbose) { 58 | return(.C("GenWeightsFD", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it), 59 | as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index), 60 | as.integer(ate), as.integer(att), as.integer(verbose), 61 | weightfd = double(len_u_index*len_t_index))$weightfd) 62 | } 63 | 64 | 65 | CalDID <- function(unit_index, time_index, tr, C_it, y, len_data, len_u_index, len_t_index, ate, att, verbose) { 66 | return(.C("CalDID", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it), 67 | as.double(y), as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index), 68 | as.integer(ate), as.integer(att), as.integer(verbose), 69 | did = double(1))$did) 70 | } 71 | 72 | 73 | 74 | DemeanDID <- function(var_name, weight, unit_index, time_index, len_u_index, len_t_index, len_data) { 75 | return(.C("DemeanDID", as.double(var_name), as.double(weight), 76 | as.integer(unit_index), as.integer(time_index), 77 | as.integer(len_u_index), as.integer(len_t_index), as.integer(len_data), 78 | DemeanDID = double(len_data))$DemeanDID) 79 | } 80 | 81 | 82 | GenWeightsMDID <- function(unit_index, time_index, tr, C_it, y, maxdev.did, len_data, 83 | len_u_index, len_t_index, ate, att, verbose) { 84 | return(.C("GenWeightsMDID", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it), 85 | as.double(y), as.double(maxdev.did), as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index), 86 | as.integer(ate), as.integer(att), as.integer(verbose), 87 | weightmdid = double(len_u_index*len_t_index))$weightmdid) 88 | } 89 | 90 | 91 | 92 | GenWeightsDID <- function(unit_index, time_index, tr, C_it, len_data, len_u_index, len_t_index, ate, att, verbose) { 93 | return(.C("GenWeightsDID", as.integer(unit_index), as.integer(time_index), as.integer(tr), as.integer(C_it), 94 | as.integer(len_data), as.integer(len_u_index), as.integer(len_t_index), 95 | as.integer(ate), as.integer(att), as.integer(verbose), 96 | weightdid = double(len_u_index*len_t_index))$weightdid) 97 | } 98 | 99 | Demean <- function(var_name, index, len_index, len_data) { 100 | return(.C("Demean", as.double(var_name), as.integer(index), as.integer(len_index), 101 | as.integer(len_data), demean = double(len_data))$demean) 102 | } 103 | 104 | 105 | 106 | 107 | WDemean <- function(var_name, weight, index, len_index, len_data) { 108 | return(.C("WDemean", as.double(var_name), as.double(weight), as.integer(index), as.integer(len_index), 109 | as.integer(len_data), Wdemean = double(len_data))$Wdemean) 110 | } 111 | 112 | 113 | 114 | 115 | WWDemean <- function(var_name, weight, index, len_index, len_data) { 116 | return(.C("WWDemean", as.double(var_name), as.double(weight), as.integer(index), as.integer(len_index), 117 | as.integer(len_data), WWdemean = double(len_data))$WWdemean) 118 | } 119 | 120 | 121 | 122 | TwayDemean <- function(var_name, unit_index, time_index, len_u_index, len_t_index, len_data) { 123 | return(.C("TwayDemean", as.double(var_name), as.integer(unit_index), as.integer(time_index), 124 | as.integer(len_u_index), as.integer(len_t_index), as.integer(len_data), 125 | TwayDemean = double(len_data))$TwayDemean) 126 | } 127 | 128 | 129 | 130 | MDummy <- function(index, len_index, len_data) { 131 | return(.C("MDummy", as.integer(index), as.integer(len_index), as.integer(len_data), 132 | dummy = integer(len_data*len_index))$dummy) 133 | } 134 | 135 | 136 | 137 | 138 | XXiSum <- function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde) { 139 | return(.C("XXiSum", as.integer(len_data), as.integer(n_cov), 140 | as.integer(unit_index), as.integer(len_uniq_unit_index), 141 | as.double(X.tilde), 142 | result = double(n_cov*n_cov))$result) 143 | } 144 | 145 | 146 | 147 | XWXiSum <- function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, weights) { 148 | return(.C("XWXiSum", as.integer(len_data), as.integer(n_cov), 149 | as.integer(unit_index), as.integer(len_uniq_unit_index), 150 | as.double(X.tilde), as.double(weights), 151 | result = double(n_cov*n_cov))$result) 152 | } 153 | 154 | 155 | OmegaHatHAC <- function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde) { 156 | return(.C("OmegaHatHAC", as.integer(len_data), as.integer(n_cov), 157 | as.integer(unit_index), as.integer(len_uniq_unit_index), 158 | as.double(X.tilde), as.double(u.tilde), 159 | Omega_hat_HAC = double(n_cov*n_cov))$Omega_hat_HAC) 160 | } 161 | 162 | 163 | OmegaHatHC <- function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde) { 164 | return(.C("OmegaHatHC", as.integer(len_data), as.integer(n_cov), 165 | as.integer(unit_index), as.integer(len_uniq_unit_index), 166 | as.double(X.tilde), as.double(u.tilde), 167 | Omega_hat_HC = double(n_cov*n_cov))$Omega_hat_HC) 168 | } 169 | 170 | 171 | 172 | 173 | OmegaDiDHAC <- function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde, W) { 174 | return(.C("OmegaDiDHAC", as.integer(len_data), as.integer(n_cov), 175 | as.integer(unit_index), as.integer(len_uniq_unit_index), 176 | as.double(X.tilde), as.double(u.tilde), as.double(W), 177 | Omega_DiD_HAC = double(n_cov*n_cov))$Omega_DiD_HAC) 178 | } 179 | 180 | 181 | OmegaDiDHAC2 <- function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde, W) { 182 | return(.C("OmegaDiDHAC2", as.integer(len_data), as.integer(n_cov), 183 | as.integer(unit_index), as.integer(len_uniq_unit_index), 184 | as.double(X.tilde), as.double(u.tilde), as.double(W), 185 | Omega_DiD_HAC = double(n_cov*n_cov))$Omega_DiD_HAC) 186 | } 187 | 188 | 189 | 190 | OmegaDiDHAC <- function(len_data, n_cov, unit_index, len_uniq_unit_index, X.tilde, u.tilde, W) { 191 | return(.C("OmegaDiDHAC", as.integer(len_data), as.integer(n_cov), 192 | as.integer(unit_index), as.integer(len_uniq_unit_index), 193 | as.double(X.tilde), as.double(u.tilde), as.double(W), 194 | Omega_DiD_HAC = double(n_cov*n_cov))$Omega_DiD_HAC) 195 | } 196 | 197 | 198 | 199 | 200 | ProjectionM <- function(Q_QQinv, Q, P1.first, P1.second, 201 | P1.third, P1.fourth, P1.fifth, 202 | Y, X, len_data, n_col, n_var, 203 | n_p1, n_p2, n_p3, n_p4){ 204 | return(.C("ProjectionM", as.complex(Q_QQinv), as.complex(Q), 205 | as.complex(P1.first), as.complex(P1.second), 206 | as.complex(P1.third), as.complex(P1.fourth), as.complex(P1.fifth), 207 | as.complex(Y), as.complex(X), 208 | as.integer(len_data), as.integer(n_col), 209 | as.integer(n_var), as.integer(n_p1), 210 | as.integer(n_p2), as.integer(n_p3), as.integer(n_p4), 211 | Projection = complex(len_data*n_var))$Projection) 212 | } 213 | 214 | 215 | 216 | comp_OmegaHAC <- function(X_1, u_1, X_2, u_2, len_data, 217 | n_cov, unit_index, len_unit){ 218 | return(.C("comp_OmegaHAC", as.complex(X_1), as.complex(u_1), 219 | as.complex(X_2), as.complex(u_2), as.integer(len_data), 220 | as.integer(n_cov), as.integer(unit_index), as.integer(len_unit), 221 | OmegaHAC = complex(n_cov*n_cov))$OmegaHAC) 222 | } 223 | 224 | 225 | 226 | comp_OmegaHC <- function(X_1, u_1, X_2, u_2, len_data, 227 | n_cov, unit_index, len_unit){ 228 | return(.C("comp_OmegaHC", as.complex(X_1), as.complex(u_1), 229 | as.complex(X_2), as.complex(u_2), as.integer(len_data), 230 | as.integer(n_cov), as.integer(unit_index), as.integer(len_unit), 231 | OmegaHC = complex(n_cov*n_cov))$OmegaHC) 232 | } 233 | 234 | 235 | 236 | LamdaDID1 <- function(len_Xtrow, len_Xhrow, Tunit_index, len_uniq_Tunit_index, 237 | Hunit_index, len_uniq_Hunit_index, 238 | X.tilde, len_Xtcol, u.tilde, 239 | X.hat, len_Xhcol, u.hat, W) { 240 | return(.C("LamdaDID1", as.integer(len_Xtrow), as.integer(len_Xhrow), 241 | as.integer(Tunit_index), as.integer(len_uniq_Tunit_index), 242 | as.integer(Hunit_index), as.integer(len_uniq_Hunit_index), 243 | as.double(X.tilde), as.integer(len_Xtcol), as.double(u.tilde), 244 | as.double(X.hat), as.integer(len_Xhcol), as.double(u.hat), as.double(W), 245 | LamdaDID1 = double(len_Xhcol*len_Xtcol))$LamdaDID1) 246 | } 247 | 248 | 249 | 250 | LamdaDID2 <- function(len_Xtrow, len_Xhrow, Tunit_index, len_uniq_Tunit_index, 251 | Hunit_index, len_uniq_Hunit_index, 252 | X.tilde, len_Xtcol, u.tilde, 253 | X.hat, len_Xhcol, u.hat, W) { 254 | return(.C("LamdaDID2", as.integer(len_Xtrow), as.integer(len_Xhrow), 255 | as.integer(Tunit_index), as.integer(len_uniq_Tunit_index), 256 | as.integer(Hunit_index), as.integer(len_uniq_Hunit_index), 257 | as.double(X.tilde), as.integer(len_Xtcol), as.double(u.tilde), 258 | as.double(X.hat), as.integer(len_Xhcol), as.double(u.hat), as.double(W), 259 | LamdaDID2 = double(len_Xtcol*len_Xhcol))$LamdaDID2) 260 | } 261 | 262 | 263 | 264 | 265 | 266 | Wdemean <- function (x, w, unit.index, time.index, data) { 267 | data$u.index <- Index(data[,unit.index]) 268 | data$t.index <- Index(data[,time.index]) 269 | uniq.unit <- unique(data$u.index) 270 | uniq.time <- unique(data$t.index) 271 | wdemean.x <- c() 272 | wdemean.x <- new.tr.x <- matrix(NA, nrow = length(uniq.time), ncol = length(uniq.unit)) 273 | for (i in 1:length(uniq.unit)){ 274 | sub.i <- data[data$u.index == uniq.unit[i], ] 275 | nr <- nrow(sub.i) 276 | denom <- as.numeric(sum(sub.i[,w])) 277 | x.star <- as.numeric((sub.i[,x]%*%sub.i[,w])/(denom)) 278 | tr.x <- as.vector(sub.i[,x] - rep(x.star, nr)) 279 | wdemean.x[,i] <- tr.x 280 | # sqrt(w)* (weighted demean) for SE 281 | for (j in 1:length(uniq.time)){ 282 | new.tr.x[j,i] <- sqrt(sub.i[,w][j])*(sub.i[,x][j] - x.star) 283 | } 284 | } 285 | w.demean <- as.vector(wdemean.x) 286 | w.tr.x <- as.vector(new.tr.x) 287 | list(w.demeaned = wdemean.x, new.tr.x = w.tr.x) 288 | } 289 | 290 | 291 | 292 | 293 | ## a function that checks memory usage 294 | 295 | .ls.objects <- function (pos = 1, pattern, order.by = "Size", decreasing=TRUE, head = TRUE, n = 10) { 296 | # based on postings by Petr Pikal and David Hinds to the r-help list in 2004 297 | # modified by: Dirk Eddelbuettel (http://stackoverflow.com/questions/1358003/tricks-to-manage-the-available-memory-in-an-r-session) 298 | # I then gave it a few tweaks (show size as megabytes and use defaults that I like) 299 | # a data frame of the objects and their associated storage needs. 300 | napply <- function(names, fn) sapply(names, function(x) 301 | fn(get(x, pos = pos))) 302 | names <- ls(pos = pos, pattern = pattern) 303 | obj.class <- napply(names, function(x) as.character(class(x))[1]) 304 | obj.mode <- napply(names, mode) 305 | obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class) 306 | obj.size <- napply(names, object.size) / 10^6 # megabytes 307 | obj.dim <- t(napply(names, function(x) 308 | as.numeric(dim(x))[1:2])) 309 | vec <- is.na(obj.dim)[, 1] & (obj.type != "function") 310 | obj.dim[vec, 1] <- napply(names, length)[vec] 311 | out <- data.frame(obj.type, obj.size, obj.dim) 312 | names(out) <- c("Type", "Size", "Rows", "Columns") 313 | out <- out[order(out[[order.by]], decreasing=decreasing), ] 314 | if (head) 315 | out <- head(out, n) 316 | out 317 | } 318 | 319 | 320 | ## ### wfe objects for xtable 321 | 322 | ## xtable.wfe <- function(x,caption=NULL,label=NULL,align=NULL, 323 | ## digits=NULL,display=NULL,...) { 324 | ## return(xtable.summary.wfe(summary(x),caption=caption,label=label, 325 | ## align=align, digits=digits,display=display)) 326 | ## } 327 | 328 | ## xtable.summary.wfe <- function(x,caption=NULL,label=NULL,align=NULL, 329 | ## digits=NULL,display=NULL,...) { 330 | ## x <- data.frame(x$coef,check.names=FALSE) 331 | 332 | ## class(x) <- c("xtable","data.frame") 333 | ## caption(x) <- caption 334 | ## label(x) <- label 335 | ## align(x) <- switch(1+is.null(align),align,c("r","r","r","r","r")) 336 | ## digits(x) <- switch(1+is.null(digits),digits,c(0,4,4,2,4)) 337 | ## display(x) <- switch(1+is.null(display),display,c("s","f","f","f","f")) 338 | ## return(x) 339 | ## } 340 | 341 | -------------------------------------------------------------------------------- /R/onAttach.R: -------------------------------------------------------------------------------- 1 | ".onAttach" <- function(lib, pkg) { 2 | mylib <- dirname(system.file(package = pkg)) 3 | title <- packageDescription(pkg, lib.loc = mylib)$Title 4 | ver <- packageDescription(pkg, lib.loc = mylib)$Version 5 | packageStartupMessage(paste(pkg, ": ", title, "\nVersion: ", ver, "\n", sep="")) 6 | } 7 | -------------------------------------------------------------------------------- /R/pwfe.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | pwfe <- function (formula, treat = "treat.name", outcome, data, pscore = NULL, 4 | unit.index, time.index = NULL, method = "unit", within.unit = TRUE, 5 | qoi = c("ate", "att"), estimator = NULL, C.it = NULL, 6 | White = TRUE, White.alpha = 0.05, 7 | hetero.se = TRUE, auto.se = TRUE, unbiased.se = FALSE, 8 | verbose = TRUE) { 9 | 10 | 11 | pwfe.call <- match.call() 12 | tn.row <- nrow(data) # total number of rows in data 13 | 14 | 15 | 16 | ### Warnings 17 | ## Warning for missing unit & time index 18 | if (missing(unit.index)) 19 | stop("'unit.index' or index for strata should be provided") 20 | 21 | if (is.null(time.index) & method == "time") 22 | stop("'time.index' should be provided") 23 | 24 | ## Warning for methods 25 | if(method=="time" && !is.null(estimator) && estimator == "fd") 26 | stop("First Difference is not compatible with 'time' method") 27 | 28 | if(is.null(time.index) && !is.null(estimator) && estimator == "fd") 29 | stop("First Difference cannot calculate when 'time.index' is missing") 30 | 31 | if(is.null(time.index) && !is.null(estimator) && estimator == "did") 32 | stop("Difference-in-Differences is not compatible with pwfe") 33 | 34 | ## Warning for C.it 35 | if (!is.null(C.it)){ 36 | Cit <- data[,C.it] 37 | if (!is.numeric(Cit) && length(Cit)!= tn.row) 38 | stop("'C.it' must be a numeric vector with length equal to number of observations") 39 | if ( sum(Cit < 0) > 0 ) 40 | stop("'C.it' must be a non-negative numeric vector") 41 | } 42 | 43 | ## Waring for pscore (if propensity score is provided by researcher) 44 | p.score <- data[,pscore] 45 | if (!is.null(pscore) && !is.numeric(p.score) && length(p.score)!= tn.row) 46 | stop("'pscore' must be a numeric vector with length equal to number of observations") 47 | if (!is.null(pscore)) { 48 | if (!((0 < p.score ) && (p.score < 1))) 49 | stop("'pscore' must be a bounded away from zero and one") 50 | } 51 | if (!is.null(pscore) && !missing(formula)) 52 | stop("'formula' should not be provided when pscore is specified") 53 | 54 | 55 | 56 | 57 | ## cat("warnings done:\n") 58 | ## C.it 59 | ## Default for ATE 60 | if (is.null(C.it)){ 61 | data$C.it <- as.integer(rep(1, nrow(data))) 62 | } 63 | 64 | ## White.alpha 65 | if (is.null(White.alpha)){ 66 | White.alpha <- 0.05 67 | } else { 68 | White.alpha <- White.alpha 69 | } 70 | 71 | ## Warning for binary treatment 72 | 73 | ## treatment variable 74 | data$TR <- as.numeric(data[,treat]) 75 | 76 | if (length(unique(data$TR)) !=2 ){ 77 | stop("'treat' must be a binary vector") 78 | } 79 | if (sum(unique(data$TR)) !=1) { 80 | stop("'treat' must be either 0 or 1 where 1 indicates treated") 81 | } 82 | 83 | 84 | 85 | ### Unit and Time index 86 | ## Creating time index for strata fixed effect analysis 87 | 88 | 89 | ## unit index 90 | 91 | numeric.u.index <- as.numeric(as.factor(data[,unit.index])) 92 | numeric.u.index[is.na(numeric.u.index)] <- 0 93 | ## handling missing unit index 94 | uniq.u <- unique(na.omit(numeric.u.index)) 95 | uniq.u <- sort(uniq.u[!(uniq.u %in% 0)]) 96 | J.u <- length(uniq.u) 97 | 98 | 99 | data$u.index <- Index(numeric.u.index, uniq.u, J.u, tn.row) 100 | 101 | ## time index 102 | if (is.null(time.index)) { 103 | data$t.index <- GenTime(data$u.index, tn.row, length(uniq.u)) 104 | numeric.t.index <- as.numeric(as.factor(data$t.index)) 105 | } else { 106 | numeric.t.index <- as.numeric(as.factor(data[,time.index])) 107 | numeric.t.index[is.na(numeric.t.index)] <- 0 108 | ## handling missing time index 109 | uniq.t <- unique(na.omit(numeric.t.index)) 110 | uniq.t <- sort(uniq.t[!(uniq.t %in% 0)]) 111 | ## needs to sort for unbalnced panel, See Index() 112 | J.t <- length(uniq.t) 113 | data$t.index <- Index(numeric.t.index, uniq.t, J.t, tn.row) 114 | } 115 | uniq.t <- unique(data$t.index) 116 | 117 | 118 | 119 | ## unique unit number and sorting data 120 | if (method == "unit"){ 121 | unit.number <- length(uniq.u) 122 | } else if (method == "time"){ 123 | unit.number <- length(uniq.t) 124 | } else { 125 | stop("method should be either unit or time") 126 | } 127 | 128 | if (verbose) 129 | cat(" \nNumber of unique", method, "is", unit.number, "\n") 130 | 131 | 132 | ## order data 133 | data <- data[order(data$u.index, data$t.index),] 134 | 135 | ## saving unit index for each unit 136 | name.unit <- unique(data[,unit.index]) 137 | number.unit <- unique(numeric.u.index) 138 | 139 | ## saving time index for each time 140 | name.time <- unique(data[,time.index]) 141 | number.time <- unique(numeric.t.index) 142 | 143 | 144 | 145 | ## quantity of interest: qoi 146 | 147 | if (missing(qoi)){ 148 | causal <- "ate" 149 | ate.n <- 1 150 | } 151 | 152 | ate.n <- att.n <- 0 153 | if (qoi == "ate"){ 154 | causal <- "ATE (Average Treatment Effect)" 155 | ate.n <- 1 156 | } 157 | if (qoi == "att"){ 158 | causal <- "ATT (Average Treatment Effect for the Treated)" 159 | att.n <- 1 160 | } 161 | 162 | if (is.null(estimator)) { 163 | est <- "NULL" 164 | } 165 | if (!is.null(estimator) && estimator == "fd"){ 166 | est <- "FD (First-Difference)" 167 | } 168 | if (!is.null(estimator) && estimator == "did"){ 169 | est <- "DID (Difference-in-Differences)" 170 | } 171 | 172 | 173 | ### propensity score estimation 174 | 175 | if (!is.null(pscore)) { 176 | data$y.star <- Transform(data[,outcome], data[,treat], data[,pscore]) 177 | } 178 | if (is.null(pscore)) { 179 | 180 | ## formula for propensity score estimation 181 | f.left <- as.character(treat) 182 | f.right <- unlist(strsplit(as.character(formula), "~"))[2] 183 | c.formula <- paste(f.left,"~ -1 + ", f.right) 184 | 185 | 186 | ps.formula <- as.formula(c.formula) # formula for propensity score estimation 187 | 188 | 189 | data$index <- 1:nrow(data) # indexing for saving y.star 190 | 191 | if (verbose){ 192 | cat("\nPropensity Score estimation started based on the following model:\n", c.formula, "\n") 193 | } 194 | flush.console() 195 | ## transform Y_{it} 196 | if (method=="time" & within.unit == TRUE){ 197 | ## regression for propensity score estimation is run within unit 198 | Data.ps <- c() 199 | for (j in 1:length(uniq.t)){ 200 | ## print(j) 201 | if (verbose){ 202 | cat(".") 203 | flush.console() 204 | } 205 | 206 | pool <- data[data$t.index == uniq.t[j], ] # subset data by time j 207 | n.pool <- nrow(pool) # gives total number of units for the year j 208 | n.treat <- sum(pool[,treat]) # number of treated units in a given year t 209 | n.cont <- sum(1-pool[,treat]) # number of controlled units in a given year t 210 | ## estimate propensity score within each time t 211 | fit.ps <- bayesglm(ps.formula, family=binomial(link="logit"), 212 | data = pool) 213 | pool$fitted.ps <- fitted(fit.ps) # save the estimated propensity score 214 | pool.u.index <- unique(pool$u.index) 215 | pool$ystar <- Transform(pool[,outcome], pool[,treat], pool$fitted.ps) 216 | Data.ps <- rbind(Data.ps, pool) 217 | } 218 | data$y.star[Data.ps$index] <- Data.ps$ystar 219 | 220 | } 221 | 222 | if (method=="unit" & within.unit == TRUE) { 223 | ## regression for propensity score estimation is run within unit 224 | Data.ps <- c() 225 | for (i in 1:length(uniq.u)){ 226 | 227 | if (verbose){ 228 | if (i %% 100 == 0) 229 | cat(".") 230 | flush.console() 231 | } 232 | 233 | pool <- data[data$u.index == uniq.u[i], ] # subset data by unit i 234 | n.pool <- nrow(pool) # gives total number of times for the unit i 235 | n.treat <- sum(pool[,treat]) # number of treated times in a given unit i 236 | n.cont <- sum(1-pool[,treat]) # number of controlled times in a given unit i 237 | ## estimate propensity score within each time t 238 | fit.ps <- bayesglm(ps.formula, family=binomial(link="logit"), data 239 | = pool) 240 | pool$fitted.ps <- fitted(fit.ps) # save the estimated propensity score 241 | pool.t.index <- unique(pool$t.index) 242 | pool$ystar <- Transform(pool[,outcome], pool[,treat], pool$fitted.ps) 243 | Data.ps <- rbind(Data.ps, pool) 244 | } 245 | data$y.star[Data.ps$index] <- Data.ps$ystar 246 | 247 | } 248 | 249 | 250 | 251 | ## transform Y_{it} 252 | if (within.unit == FALSE){ 253 | ## regression for propensity score estimation is on entire data 254 | n.treat <- sum(data[,treat]) # number of treated units 255 | n.cont <- sum(1-data[,treat]) # number of controlled units 256 | ## estimate propensity score within each time t 257 | fit.ps <- bayesglm(ps.formula, family=binomial(link="logit"), 258 | data = data) 259 | data$fitted.ps <- fitted(fit.ps) # save the estimated propensity score 260 | data$y.star <- Transform(data[,outcome], data[,treat], data$fitted.ps) 261 | } 262 | 263 | } 264 | 265 | if (verbose) 266 | cat("\nPropensity Score estimation done \n") 267 | flush.console() 268 | 269 | 270 | 271 | if (verbose) 272 | cat("\nWeight calculation started ") 273 | flush.console() 274 | ## Unit fixed effects models 275 | if ( (method=="unit" & qoi=="ate" & is.null(estimator) ) | (method=="unit" & qoi=="att" & is.null(estimator)) ) { 276 | W <- GenWeightsUnit(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, length(uniq.u)*length(uniq.t), verbose) 277 | W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T) 278 | data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row) 279 | } 280 | 281 | ## Time fixed effects models 282 | if ( (method=="time" & qoi=="ate" & is.null(estimator) ) | (method=="time" & qoi=="att" & is.null(estimator)) ) { 283 | W <- GenWeightsTime(data$t.index, data$u.index, data$TR, data$C.it, tn.row, length(uniq.t), length(uniq.u), ate.n, att.n, length(uniq.t)*length(uniq.u), verbose) 284 | W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T) 285 | data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row) 286 | } 287 | 288 | 289 | ## Within Unit First Difference 290 | if(( (method=="unit") && (qoi == "ate") && (!is.null(estimator) && estimator == "fd")) | ((method == "unit") && (qoi =="att") && (!is.null(estimator) && estimator == "fd"))) { 291 | W <- GenWeightsFD(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose) 292 | W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T) 293 | data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row) 294 | } 295 | if (verbose) 296 | cat(" Weight calculation done \n") 297 | ## print(Sys.time()) 298 | flush.console() 299 | ##cat("weights:", print(W), "\n") 300 | 301 | 302 | ## e <- environment() 303 | ## save(file = "temp.RData", list = ls(), env = e) 304 | 305 | 306 | Data.final <- as.data.frame(cbind(data$y.star, data$TR, data$W.it, data$u.index, data$t.index)) 307 | colnames(Data.final) <- c("y.star", "TR", "W.it", "u.index", "t.index") 308 | 309 | 310 | 311 | ## Creating the matrix for final analysis 312 | Data.dm <- as.data.frame(matrix(NA, ncol = 2, nrow = nrow(Data.final))) 313 | Data.wdm <- as.data.frame(matrix(NA, ncol = 2, nrow = nrow(Data.final))) 314 | 315 | 316 | ## column names 317 | ## print(colnames(Data.final)) 318 | colnames(Data.dm) <- colnames(Data.wdm) <- c("dep.var", "treat") 319 | 320 | ## de-meaning for fast calculation 321 | for (k in 1:2){ 322 | if (method == "unit"){ 323 | w.wdemean <- WWDemean(Data.final[,k], Data.final$W.it, Data.final$u.index, unit.number, tn.row) 324 | demean <- Demean(Data.final[,k], Data.final$u.index, unit.number, tn.row) 325 | } 326 | if (method == "time"){ 327 | w.wdemean <- WWDemean(Data.final[,k], Data.final$W.it, Data.final$t.index, unit.number, tn.row) 328 | demean <- Demean(Data.final[,k], Data.final$u.index, unit.number, tn.row) 329 | } 330 | Data.wdm[,k] <- as.vector(w.wdemean) 331 | Data.dm[,k] <- as.vector(demean) 332 | } 333 | 334 | Data.dm$unit <- Data.final$u.index 335 | Data.dm$time <- Data.final$t.index 336 | Data.wdm$unit <- Data.final$u.index 337 | Data.wdm$time <- Data.final$t.index 338 | 339 | ## Print de(weighted)-meaned data 340 | 341 | ## cat("#####", "\n","De-meaned Data:","\n") 342 | ## print(Data.dm) 343 | ## cat("#####", "\n","sqrt(W) x (weighted demeaned):","\n") 344 | ## print(Data.wdm) 345 | 346 | ## final regression on weighted demeaned data 347 | fit.final <- lm(dep.var ~ -1 + treat, data = Data.wdm) 348 | fit.ols <- lm(dep.var ~ -1 + treat, data = Data.dm) 349 | 350 | ## print(summary(fit.final)) 351 | 352 | ## set up data frame, with support for standard and modified responses 353 | 354 | mf.final <- model.frame(formula="dep.var ~ -1 + treat", data=Data.wdm) 355 | X <- model.matrix(attr(mf.final, "terms"), data=mf.final) 356 | Y <- model.response(mf.final, "numeric") 357 | p <- ncol(X) 358 | 359 | coef.wls <- fit.final$coef 360 | coef.ols <- fit.ols$coef 361 | d.f <- fit.final$df - unit.number 362 | 363 | sigma2 <- sum(resid(fit.final)^2)/d.f 364 | vcov.wls <- vcov(fit.final)*((fit.final$df)/(fit.final$df - unit.number)) 365 | var.cov <- vcov.wls 366 | vcov.ols <- vcov(fit.ols) 367 | 368 | ## e <- environment() 369 | ## save(file = "temp.RData", list = ls(), env = e) 370 | 371 | 372 | ## residual <- resid(fit.final)*1/sqrt(Data.final$W.it) 373 | residual <- c(data[,outcome] - data[,treat]%*%t(coef.wls)) 374 | resid.ols <- resid(fit.ols) 375 | 376 | ## save residuals 377 | Data.wdm$u.tilde <- sqrt(Data.final$W.it)*resid(fit.final) 378 | Data.dm$u.hat <- u.hat <- resid(fit.ols) 379 | 380 | 381 | 382 | ### Robust Standard Errors 383 | 384 | ## contructing de(weighted)-meaned X matrix 385 | X.tilde <- as.matrix(Data.wdm[,2]) # NT x 1 (where p is number of covariates) 386 | X.hat <- as.matrix(Data.dm[,2]) # NT x 1 (where p is number of covariates) 387 | 388 | ## residuals 389 | u.tilde <- as.matrix(Data.wdm$u.tilde) # NT x 1 390 | u.hat <- as.matrix(Data.dm$u.hat) # NT x 1 391 | 392 | ## unit vector (the length should be same as nrow(X.tilde) and length(u.tilde) 393 | wdm.unit <- as.vector(Data.wdm$unit) 394 | uniq.u.pw <- length(unique(wdm.unit)) # number of uniq units with positive weights 395 | 396 | 397 | ## cat("3: Robust error calculation started\n") 398 | ## print(Sys.time()) 399 | 400 | ginv.XX.tilde <- ginv(crossprod(X.tilde, X.tilde)) 401 | ginv.XX.hat <- ginv(crossprod(X.hat, X.hat)) 402 | 403 | diag.ee.tilde <- c(u.tilde^2) 404 | diag.ee.hat <- c(u.hat^2) 405 | 406 | 407 | if ((hetero.se == TRUE) & (auto.se == TRUE)) {# Default is Arellano 408 | std.error <- "Heteroscedastic / Autocorrelation Robust Standard Error" 409 | 410 | ## 1. arbitrary autocorrelation as well as heteroskedasticity (Eq 12) 411 | 412 | Omega.hat.HAC <- OmegaHatHAC(nrow(X.tilde), p, wdm.unit, J.u, X.tilde, u.tilde) 413 | Omega.hat.HAC <- matrix(Omega.hat.HAC, nrow = p, ncol = p) 414 | Omega.hat.HAC <- (1/(nrow(X.tilde)))* Omega.hat.HAC # without degree of freedom adjustment 415 | ## Omega.hat.HAC <- (1/(nrow(X.tilde) - J.u - p))* Omega.hat.HAC 416 | 417 | Omega.hat.fe.HAC <- OmegaHatHAC(nrow(X.hat), p, wdm.unit, J.u, X.hat, u.hat) 418 | Omega.hat.fe.HAC <- matrix(Omega.hat.fe.HAC, nrow = p, ncol = p) 419 | Omega.hat.fe.HAC <- (1/(nrow(X.hat))) * Omega.hat.fe.HAC # without degree of freedom adjustment 420 | ## Omega.hat.fe.HAC <- (1/(nrow(X.hat) - J.u - p)) * Omega.hat.fe.HAC 421 | 422 | Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Omega.hat.HAC %*% (nrow(X.tilde) * ginv.XX.tilde) 423 | Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Omega.hat.fe.HAC %*% (nrow(X.hat) * ginv.XX.hat) 424 | 425 | } else if ( (hetero.se == TRUE) & (auto.se == FALSE) ) {# independence across observations but heteroskedasticity 426 | std.error <- "Heteroscedastic Robust Standard Error" 427 | 428 | ## 2. independence across observations but heteroskedasticity (Eq 11) 429 | 430 | ## Omega.hat.HC <- OmegaHatHC(nrow(X.tilde), p, wdm.unit, J.u, X.tilde, u.tilde) 431 | ## Omega.hat.HC <- matrix(Omega.hat.HC, nrow = p, ncol = p) 432 | ## ## same as the following matrix multiplication but slower 433 | ## ## Omega.hat.he <- t(X.tilde) %*% diag(c(u.tilde)^2, nrow=nrow(X.tilde)) %*% X.tilde 434 | ## Omega.hat.HC <- (1/(nrow(X.tilde) - J.u - p)) * Omega.hat.HC 435 | 436 | 437 | Omega.hat.HC <- (1/(nrow(X.tilde) - J.u - p))*(crossprod((X.tilde*diag.ee.tilde), X.tilde)) 438 | 439 | Omega.hat.fe.HC <- (1/(nrow(X.hat) - J.u - p))*(crossprod((X.hat*diag.ee.hat), X.hat)) 440 | 441 | 442 | ### Stock-Watson (Econometrica 2008: Eq(6)): Bias-asjusted for balance panel 443 | if (unbiased.se == TRUE) { 444 | ## check if panel is balanced 445 | if (sum(as.numeric(apply(matrix(table(wdm.unit)), 1, mean) != mean(matrix(table(wdm.unit))))) == 0 ){ 446 | std.error <- "Heteroskedastic Standard Error (Stock-Watson Biased Corrected)" 447 | 448 | B.hat <- matrix(0, nrow=dim(X.tilde)[2], ncol=dim(X.tilde)[2]) 449 | B2.hat <- matrix(0, nrow=dim(X.hat)[2], ncol=dim(X.hat)[2]) 450 | for (i in 1:J.u) { 451 | ## cat("unit", i, "\n") 452 | X.i <- X.tilde[Data.wdm$unit == i,] 453 | X2.i <- X.hat[Data.wdm$unit == i,] 454 | ## print(sum(as.numeric(Data.wdm$unit == i))) 455 | ## print(X.i) 456 | if (sum(as.numeric(Data.wdm$unit == i)) > 1) { 457 | u.i <- u.tilde[Data.wdm$unit == i] 458 | u2.i <- u.hat[Data.wdm$unit == i] 459 | ## print(length(u.i)) 460 | flush.console() 461 | XX.i <- crossprod(X.i, X.i) 462 | XX2.i <- crossprod(X2.i, X2.i) 463 | B.hat <- B.hat + ((1/J.t)* XX.i*(1/(J.t-1))*sum(u.i^2)) 464 | B2.hat <- B2.hat + ((1/J.t)* XX2.i*(1/(J.t-1))*sum(u2.i^2)) 465 | } 466 | } 467 | 468 | B.hat <- B.hat * (1/J.u) 469 | B2.hat <- B2.hat * (1/J.u) 470 | 471 | cat("time", J.t, "\n") 472 | Sigma_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.HC - (1/(J.t-1))*B.hat) 473 | Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Sigma_HRFE %*% (nrow(X.tilde) * ginv.XX.tilde) 474 | 475 | Sigma2_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.fe.HC - (1/(J.t-1))*B2.hat) 476 | Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Sigma2_HRFE %*% (nrow(X.hat) * ginv.XX.hat) 477 | } else { 478 | stop ("unbiased.se == TRUE is allowed only when panel is balanced") 479 | } 480 | } 481 | 482 | Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Omega.hat.HC %*% (nrow(X.tilde) * ginv.XX.tilde) 483 | 484 | Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Omega.hat.fe.HC %*% (nrow(X.hat) * ginv.XX.hat) 485 | 486 | 487 | } else if ( (hetero.se == FALSE) & (auto.se == FALSE) ) {# indepdence and homoskedasticity 488 | std.error <- "Homoskedastic Standard Error" 489 | 490 | Psi.hat.wfe <- nrow(X.tilde) * (sigma2 * ginv.XX.tilde) 491 | 492 | ## same as the following 493 | 494 | ## Psi.hat.wfe2 <- J.u * sigma2 * solve(XX.tilde) 495 | ## cat("compare homoskedasticity s.e\n") 496 | ## print(sqrt(diag(Psi.hat.wfe))) 497 | ## print(sqrt(diag(Psi.hat.wfe2))) 498 | 499 | Psi.hat.fe <- nrow(X.hat) * vcov(fit.ols) 500 | 501 | } else if ( (hetero.se == FALSE) & (auto.se == TRUE) ) {# Kiefer 502 | 503 | stop ("robust standard errors with autocorrelation and homoskedasiticy is not supported") 504 | 505 | } 506 | 507 | var.cov <- Psi.hat.wfe * (1/nrow(X.tilde)) 508 | var.cov.fe <- Psi.hat.fe *(1/nrow(X.hat)) 509 | 510 | 511 | 512 | ### White (1980) Test: Theorem 4 513 | 514 | if (White == TRUE){ 515 | 516 | diag.ee <- c(u.hat) * c(u.tilde) 517 | 518 | Lambda.hat1 <- 1/((nrow(X.hat)))* (crossprod((X.hat*diag.ee), X.tilde)) 519 | Lambda.hat2 <- 1/((nrow(X.tilde)))* (crossprod((X.tilde*diag.ee), X.hat)) 520 | 521 | Phi.hat <- Psi.hat.wfe + Psi.hat.fe - (nrow(X.hat)*ginv.XX.hat) %*% Lambda.hat1 %*% (nrow(X.tilde)*ginv.XX.tilde) - (nrow(X.tilde)*ginv.XX.tilde) %*% Lambda.hat2 %*% (nrow(X.hat)*ginv.XX.hat) 522 | 523 | ## White test: null hypothesis is ``no misspecification'' 524 | 525 | white.stat <- as.double(Re(nrow(X.hat) * t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls))) 526 | test.null <- pchisq(as.numeric(white.stat), df=p, lower.tail=F) < White.alpha 527 | white.p <- pchisq(as.numeric(white.stat), df=p, lower.tail=F) 528 | flush.console() 529 | 530 | ## if (verbose) { 531 | ## cat("\nWhite calculation done") 532 | ## flush.console() 533 | ## } 534 | 535 | } else { 536 | white.stat <- "NULL" 537 | test.null <- "NULL" 538 | white.p <- "NULL" 539 | } 540 | 541 | mf <- as.data.frame(cbind(data[,outcome], data[,treat])) 542 | colnames(mf) <- c(outcome, treat) 543 | y <- as.data.frame(data[,outcome]) 544 | colnames(y) <- outcome 545 | x <- as.data.frame(data[,treat]) 546 | colnames(x) <- treat 547 | 548 | 549 | ### Saving results 550 | z <- list(coefficients = coef.wls, 551 | x = X, 552 | y = y, 553 | mf = mf, 554 | call = pwfe.call, 555 | vcov = var.cov, 556 | se = sqrt(diag(var.cov)), 557 | sigma = sqrt(sigma2), 558 | df = d.f, 559 | residuals = residual, 560 | W = W, 561 | unit.name = name.unit, 562 | unit.index = number.unit, 563 | time.name = name.time, 564 | time.index = number.time, 565 | method = method, 566 | causal = causal, 567 | est = est, 568 | std.error = std.error, 569 | White.pvalue = white.p, 570 | White.alpha = White.alpha, 571 | White.stat = white.stat, 572 | White.test = test.null) 573 | class(z) <- "pwfe" 574 | z 575 | 576 | 577 | 578 | } 579 | 580 | 581 | print.pwfe <- function(x,...){ 582 | cat("Call:\n") 583 | print(x$call) 584 | cat("\nCoefficients:\n") 585 | print(x$coefficients) 586 | cat("\nStd.Err:\n") 587 | print(x$se) 588 | } 589 | 590 | 591 | 592 | summary.pwfe <- function(object, signif.stars = getOption("show.signif.stars"),...){ 593 | se <- object$se 594 | sigma <- object$sigma 595 | df <- object$df 596 | tval <- coef(object) / se 597 | TAB <- cbind(Estimate = coef(object), 598 | Std.Err = se, 599 | t.value = tval, 600 | p.value = 2*pt(-abs(tval), df = object$df)) 601 | res <- list(call = object$call, 602 | coefficients = TAB, 603 | sigma = object$sigma, 604 | df = object$df, 605 | W = object$weights, 606 | residuals = object$residuals, 607 | method = object$method, 608 | causal = object$causal, 609 | estimator = object$est, 610 | std.error = object$std.error, 611 | White.pvalue = object$White.pvalue, 612 | White.alpha = object$White.alpha, 613 | White.stat = object$White.stat, 614 | White.test = object$White.test) 615 | class(res) <- "summary.pwfe" 616 | res 617 | } 618 | 619 | 620 | print.summary.pwfe <- function(x, ...){ 621 | cat("\nMethod:", x$method, "Fixed Effects (Propensity Score)\n") 622 | cat("\nQuantity of Interest:", x$causal) 623 | cat("\nEstimator:", x$estimator) 624 | cat("\nStandard Error:", x$std.error) 625 | cat("\n") 626 | cat("\n") 627 | cat("Call:\n") 628 | print(x$call) 629 | cat("\n") 630 | printCoefmat(x$coefficients, P.values=TRUE, has.Pvalue=TRUE) 631 | cat("\nResidual standard error:", format(signif(x$sigma, 632 | 4)), "on", x$df, "degrees of freedom") 633 | cat("\nWhite statistics for functional misspecification:", x$White.stat, "with Pvalue=", x$White.pvalue) 634 | cat("\nReject the null of NO misspecification:", x$White.test) 635 | cat("\n") 636 | } 637 | -------------------------------------------------------------------------------- /R/wfe.R: -------------------------------------------------------------------------------- 1 | wfe <- function (formula, data, treat = "treat.name", 2 | unit.index, time.index = NULL, method = "unit", 3 | dyad1.index = NULL, dyad2.index = NULL, 4 | qoi = "ate", estimator = NULL, C.it = NULL, 5 | hetero.se = TRUE, auto.se = TRUE, 6 | dyad.se = FALSE, 7 | White = TRUE, White.alpha = 0.05, 8 | verbose = TRUE, unbiased.se = FALSE, unweighted = FALSE, 9 | store.wdm = FALSE, maxdev.did= NULL, 10 | tol = sqrt(.Machine$double.eps)){ 11 | 12 | 13 | wfe.call <- match.call() 14 | ## set up data frame, with support for standard and modified responses 15 | mf <- model.frame(formula=formula, data=data) 16 | x <- model.matrix(attr(mf, "terms"), data=mf) 17 | y <- model.response(mf, "numeric") 18 | tn.row <- nrow(mf) # total number of rows in data 19 | 20 | class(data) <- "data.frame" 21 | 22 | ## ## remove missing variables: removing rows with missing values in either y or treat 23 | remove.indices <- which(!rownames(data) %in% rownames(mf)) 24 | 25 | 26 | if (length(remove.indices) > 0){ 27 | data <- data[-remove.indices,] 28 | if (verbose) 29 | cat(" \nMissing values are removed\n") 30 | } 31 | 32 | data$y <- y 33 | 34 | ## Creating dummies variables for White test in the end 35 | X <- as.data.frame(x[,-1]) 36 | p <- ncol(X) 37 | 38 | ## e <- environment() 39 | ## save(file = "temp.RData", list = ls(), env = e) 40 | 41 | ### -------------------------------------------------------- 42 | ### Warnings 43 | ### -------------------------------------------------------- 44 | 45 | ## Warning for missing unit & time index 46 | if (missing(unit.index)) 47 | stop("'unit.index' or index for strata should be provided") 48 | 49 | if (is.null(time.index) & method == "time") 50 | stop("'time.index' should be provided") 51 | 52 | ## Warning for methods 53 | if(method=="time" && !is.null(estimator) && estimator == "fd") 54 | stop("First Difference is not compatible with 'time' method: set method == 'unit'") 55 | 56 | if(method=="time" && !is.null(estimator) && estimator == "did") 57 | stop("Difference-in-Differences is not compatible with 'time' method: set method == 'unit'") 58 | 59 | if(method=="time" && !is.null(estimator) && estimator == "Mdid") 60 | stop("Match-Difference-in-Differences is not compatible with 'time' method: set method == 'unit'") 61 | 62 | if(is.null(time.index) && !is.null(estimator) && estimator == "fd") 63 | stop("First Difference cannot calculate when 'time.index' is missing") 64 | 65 | ## Warning for C.it 66 | if (!is.null(C.it)){ 67 | Cit <- data[,C.it] 68 | if (!is.numeric(Cit) && length(Cit)!= tn.row) 69 | stop("'C.it' must be a numeric vector with length equal to number of observations") 70 | if ( sum(Cit < 0) > 0 ) 71 | stop("'C.it' must be a non-negative numeric vector") 72 | } 73 | 74 | ## cat("warnings done:\n") 75 | 76 | ## C.it 77 | ## Default for ATE 78 | if (is.null(C.it)){ 79 | data$C.it <- as.integer(rep(1, nrow(data))) 80 | } 81 | 82 | ## White.alpha 83 | if (is.null(White.alpha)){ 84 | White.alpha <- 0.05 85 | } else { 86 | White.alpha <- White.alpha 87 | } 88 | 89 | ## Warning for binary treatment 90 | ## treat should be 0,1 where 1 indicates treatment 91 | 92 | ## Warning for maxdev.did 93 | if(!is.null(maxdev.did) && maxdev.did < 0){ 94 | stop("Warning: maxdev.did should be a positive numeric value") 95 | } 96 | 97 | 98 | ## -------------------------------------------------------- 99 | ## Unit and Time index 100 | ## -------------------------------------------------------- 101 | ## Creating time index for strata fixed effect analysis 102 | 103 | ## storing original unit and time index 104 | orig.unit.idx <- as.character(data[,unit.index]) 105 | 106 | ## unit index 107 | numeric.u.index <- as.numeric(as.factor(data[,unit.index])) 108 | numeric.u.index[is.na(numeric.u.index)] <- 0 109 | ## handling missing unit index 110 | uniq.u <- unique(na.omit(numeric.u.index)) 111 | uniq.u <- sort(uniq.u[!(uniq.u %in% 0)]) 112 | J.u <- length(uniq.u) 113 | data$u.index <- Index(numeric.u.index, uniq.u, J.u, tn.row) 114 | 115 | ## for dyadic data: unit 1 and unit 2 that compose each dyad 116 | if(dyad.se == TRUE){ 117 | ## Warning 118 | if(is.null(dyad1.index) | is.null(dyad2.index)){ 119 | stop("Warning: For dyadic data, two separate unit indices for the members of each dyad -- dyad1.index, dyad2.indx -- should be provided") 120 | } 121 | 122 | data$dyad <- data[, unit.index] 123 | data$c1 <- data[, dyad1.index] 124 | data$c2 <- data[, dyad2.index] 125 | 126 | } 127 | 128 | ## time index 129 | if (is.null(time.index)) { 130 | data$t.index <- GenTime(data$u.index, tn.row, length(uniq.u)) 131 | numeric.t.index <- as.numeric(as.factor(data$t.index)) 132 | 133 | ## storing original unit and time index 134 | orig.time.idx <- as.character(data$t.index) 135 | } else { 136 | ## storing original unit and time index 137 | orig.time.idx <- as.character(data[,time.index]) 138 | 139 | numeric.t.index <- as.numeric(as.factor(data[,time.index])) 140 | numeric.t.index[is.na(numeric.t.index)] <- 0 141 | ## handling missing time index 142 | uniq.t <- unique(na.omit(numeric.t.index)) 143 | uniq.t <- sort(uniq.t[!(uniq.t %in% 0)]) 144 | ## needs to sort for unbalnced panel, See Index() 145 | J.t <- length(uniq.t) 146 | data$t.index <- Index(numeric.t.index, uniq.t, J.t, tn.row) 147 | } 148 | uniq.t <- unique(data$t.index) 149 | 150 | 151 | ## unique unit number and sorting data 152 | if (method == "unit"){ 153 | unit.number <- length(uniq.u) 154 | } else if (method == "time"){ 155 | unit.number <- length(uniq.t) 156 | } else { 157 | stop("method should be either unit or time") 158 | } 159 | 160 | if (verbose) 161 | cat(" \nNumber of unique", method, "is", unit.number, "\n") 162 | 163 | ## e <- environment() 164 | ## save(file = "temp.RData", list = ls(), env = e) 165 | ### -------------------------------------------------------- 166 | 167 | ## order data by unit index 168 | 169 | tmp <- cbind(X, data$u.index, data$t.index) 170 | tmp <- tmp[order(tmp[,(p+1)], tmp[,(p+2)]),] 171 | X <- as.data.frame(tmp[,-((p+1):(p+2))]) 172 | colnames(X) <- colnames(x)[-1] 173 | 174 | data <- data[order(data$u.index, data$t.index),] 175 | y <- data$y[order(data$u.index, data$t.index)] 176 | 177 | ## e <- environment() 178 | ## save(file = "temp.RData", list = ls(), env = e) 179 | 180 | ## saving unit index for each unit 181 | name.unit <- unique(data[,unit.index]) 182 | number.unit <- unique(numeric.u.index) 183 | units <- as.data.frame(cbind(number.unit,as.character(name.unit))) 184 | colnames(units) <- c("unit.index", "unit") 185 | rownames(units) <- seq(1:nrow(units)) 186 | 187 | ## saving time index for each time 188 | 189 | if (is.null(time.index)) { 190 | name.time <- unique(data$t.index) 191 | } else { 192 | name.time <- unique(data[,time.index]) 193 | } 194 | number.time <- unique(numeric.t.index) 195 | times <- cbind(number.time, name.time) 196 | times <- as.data.frame(times[order(times[,1]),]) 197 | colnames(times) <- c("time.index", "time") 198 | 199 | 200 | ## new model frame order by u.index 201 | mf.col <- colnames(mf) 202 | mf.sorted <- cbind(y,X) 203 | colnames(mf.sorted) <- mf.col 204 | 205 | ## treatment variable 206 | data$TR <- as.numeric(data[,treat]) 207 | 208 | if (length(unique(data$TR)) !=2) 209 | stop("'treat' must be a binary vector: there are more than two values of treatment") 210 | 211 | if (sum(unique(data$TR)) !=1) 212 | stop("'treat' must be a either zero or one where one indicates treatment") 213 | 214 | ### -------------------------------------------------------- 215 | ### Quantity of interest: qoi 216 | ### -------------------------------------------------------- 217 | 218 | if (missing(qoi)){ 219 | qoi <- "ate" 220 | causal <- "ate" 221 | ate.n <- 1 222 | } 223 | 224 | ate.n <- att.n <- 0 225 | if (qoi == "ate"){ 226 | causal <- "ATE (Average Treatment Effect)" 227 | ate.n <- 1 228 | } 229 | if (qoi == "att"){ 230 | causal <- "ATT (Average Treatment Effect for the Treated)" 231 | att.n <- 1 232 | } 233 | 234 | if (is.null(estimator)) { 235 | est <- "NULL" 236 | } 237 | if (!is.null(estimator) && estimator == "fd"){ 238 | est <- "FD (First-Difference)" 239 | } 240 | if (!is.null(estimator) && estimator == "did"){ 241 | est <- "DID (Difference-in-Differences)" 242 | } 243 | if (!is.null(estimator) && estimator == "Mdid"){ 244 | est <- "DID (Difference-in-Differences) with Matching on Pre-treatment Outcome" 245 | } 246 | 247 | if (unweighted == TRUE){ 248 | causal <- "Unweighted (Standard) Fixed Effect" 249 | } 250 | 251 | 252 | ### Weights calculation 253 | 254 | ## One-way Weighted FE 255 | if (is.null(estimator) || estimator=="fd") { 256 | 257 | if (verbose) { 258 | cat("\nWeight calculation started ") 259 | flush.console() 260 | } 261 | 262 | ## Standard Fixed effect 263 | if (unweighted == TRUE) { 264 | data$W.it <- rep(1, nrow(data)) 265 | W <- matrix(1,nrow=length(uniq.t), ncol=length(uniq.u)) 266 | } else { 267 | 268 | ## Unit fixed effects models 269 | if ( (method=="unit" & qoi=="ate" & is.null(estimator) ) | (method=="unit" & qoi=="att" & is.null(estimator)) ) { 270 | W <- GenWeightsUnit(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, length(uniq.u)*length(uniq.t), verbose) 271 | W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T) 272 | data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row) 273 | } 274 | 275 | ## Time fixed effects models 276 | if ( (method=="time" & qoi=="ate" & is.null(estimator) ) | (method=="time" & qoi=="att" & is.null(estimator)) ) { 277 | W <- GenWeightsTime(data$t.index, data$u.index, data$TR, data$C.it, tn.row, length(uniq.t), length(uniq.u), ate.n, att.n, length(uniq.t)*length(uniq.u), verbose) 278 | W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T) 279 | data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row) 280 | } 281 | 282 | 283 | ## Within Unit First Difference 284 | if(( (method=="unit") && (qoi == "ate") && (!is.null(estimator) && estimator == "fd")) | ((method == "unit") && (qoi =="att") && (!is.null(estimator) && estimator == "fd"))) { 285 | W <- GenWeightsFD(data$u.index, data$t.index, data$TR, data$C.it, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose) 286 | W <- matrix(W, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T) 287 | data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row) 288 | } 289 | } 290 | 291 | 292 | if (verbose) { 293 | cat("Weight calculation done \n") 294 | flush.console() 295 | } 296 | 297 | if(( (method=="unit") && (qoi == "ate") && (!is.null(estimator) && estimator == "fd")) | ((method == "unit") && (qoi =="att") && (!is.null(estimator) && estimator == "fd"))) { 298 | nz.obs <- sum(as.numeric(data$W.it !=0)) 299 | if (verbose) 300 | cat("\nTotal number of observations with non-zero weight:", nz.obs,"\n") 301 | flush.console() 302 | } 303 | 304 | 305 | 306 | 307 | ### Demean based on the weights 308 | 309 | wdm.Data <- mf.sorted 310 | 311 | 312 | ## Add Weight and unit index 313 | wdm.Data$W <- data$W.it 314 | wdm.Data$unit <- data$u.index 315 | wdm.Data$time <- data$t.index 316 | wdm.Data <- as.data.frame(wdm.Data) 317 | 318 | ## data for traditional fixed effect 319 | dm.Data <- wdm.Data 320 | 321 | ## excluding zero weights data for weighted fixed effect 322 | 323 | nc <- ncol(wdm.Data) 324 | 325 | ## unique number of units and time with positive weights 326 | if (method == "unit"){ 327 | unit.number.pw <- length(unique(wdm.Data$unit)) 328 | } else if (method == "time"){ 329 | unit.number.pw <- length(unique(wdm.Data$time)) 330 | } else { 331 | stop("method should be either unit or time") 332 | } 333 | 334 | 335 | ## ## Creating the matrix for final analysis 336 | ## Data.dm <- as.data.frame(matrix(NA, ncol = nc-3, nrow = nrow(wdm.Data))) 337 | ## Data.wdm <- as.data.frame(matrix(NA, ncol = nc-3, nrow = nrow(wdm.Data))) 338 | 339 | ## Creating the matrix for final analysis 340 | Data.dm <- dm.Data 341 | Data.wdm <- wdm.Data 342 | 343 | 344 | ## colume names 345 | colnames(Data.dm) <- colnames(wdm.Data)[1:nc] 346 | colnames(Data.wdm) <- colnames(wdm.Data)[1:nc] 347 | 348 | 349 | for (k in 1:(nc-3)) { 350 | ## in C 351 | if (method == "unit"){ 352 | w.wdemean <- WWDemean(wdm.Data[,k], wdm.Data$W, wdm.Data$unit, unit.number.pw, nrow(wdm.Data)) 353 | demean <- Demean(dm.Data[,k], dm.Data$unit, unit.number, nrow(dm.Data)) 354 | Data.wdm[,k] <- as.vector(w.wdemean) 355 | Data.dm[,k] <- as.vector(demean) 356 | } 357 | if (method == "time"){ 358 | w.wdemean <- WWDemean(wdm.Data[,k], wdm.Data$W, wdm.Data$time, unit.number.pw, nrow(wdm.Data)) 359 | demean <- Demean(dm.Data[,k], dm.Data$time, unit.number, nrow(dm.Data)) 360 | Data.wdm[,k] <- as.vector(w.wdemean) 361 | Data.dm[,k] <- as.vector(demean) 362 | } 363 | 364 | } 365 | 366 | ## save weighted demeaned dataframe 367 | if (store.wdm == TRUE){ 368 | Y.wdm <- Data.wdm[,1] 369 | X.wdm <- Data.wdm[,(2:(nc-3))] 370 | } else { 371 | Y.wdm <- NULL 372 | X.wdm <- NULL 373 | } 374 | 375 | 376 | 377 | ## change formula without intercept 378 | a <- unlist(strsplit(as.character(formula), "~")) 379 | formula.ni <- as.formula(paste(a[2], "~ -1 + ", a[3])) 380 | ## print(formula.ni) 381 | 382 | ## final regression on weighted demeaned data 383 | fit.final <- lm(formula.ni, data = Data.wdm) 384 | fit.ols <- lm(formula.ni, data = Data.dm) 385 | 386 | ## brute force matrix calculation 387 | ## V <- as.matrix(Data.wdm[,2:(nc-3)]) 388 | ## coef2 <- ginv(crossprod(V, V))%*% crossprod(V, Data.wdm[,1]) 389 | 390 | 391 | ## residuals 392 | 393 | ## u.tilde <- sqrt(wdm.Data$W)*resid(fit.final) # NT x 1 394 | u.tilde <- resid(fit.final) # NT x 1 395 | 396 | ## alternatively 397 | ## u.tilde <- sqrt(wdm.Data$W) * (Data.wdm[,1] - as.matrix(Data.wdm[,2:(nc-3)])%*%c(fit.final$coef)) 398 | u.hat <- as.matrix(resid(fit.ols)) # NT x 1 399 | 400 | ## saving results 401 | coef.wls <- fit.final$coef 402 | coef.ols <- fit.ols$coef 403 | d.f <- fit.final$df - J.u 404 | ## sigma2 <- (sum((sqrt(wdm.Data$W)*resid(fit.final))^2))/d.f 405 | sigma2 <- (sum((resid(fit.final))^2))/d.f 406 | 407 | ## alternatively 408 | ## sigma2.a <- sum(u.tilde^2)/d.f 409 | ## cat("compare:", sigma2, sigma2.a, "\n") # same... 410 | 411 | 412 | ### Robust Standard Errors 413 | 414 | ## contructing de(weighted)-meaned X matrix 415 | 416 | ## in case only one covariate 417 | if (p == 1){ # one covariate case 418 | X.tilde <- as.matrix(Data.wdm[,2]) 419 | X.hat <- as.matrix(Data.dm[,2]) 420 | } else { 421 | X.tilde <- as.matrix(Data.wdm[,2:(1+p)]) 422 | X.hat <- as.matrix(Data.dm[,2:(1+p)]) 423 | } 424 | 425 | 426 | ## unit vector (the length should be same as nrow(X.tilde) and length(u.tilde) 427 | wdm.unit <- as.vector(Data.wdm$unit) 428 | 429 | ### (Robust) standard errors 430 | 431 | ## cat("3: Robust error calculation started\n") 432 | 433 | 434 | ginv.XX.tilde <- ginv(crossprod(X.tilde, X.tilde)) 435 | ginv.XX.hat <- ginv(crossprod(X.hat, X.hat)) 436 | 437 | diag.ee.tilde <- c(u.tilde^2) 438 | diag.ee.hat <- c(u.hat^2) 439 | 440 | ## e <- environment() 441 | ## save(file = "dyadSE.RData", list = ls(), env = e) 442 | 443 | 444 | if(dyad.se == TRUE){ 445 | 446 | std.error <- "Robust Standard Error for Dyadic Data" 447 | 448 | OmegaDyad <- function(X.tilde, e.tilde, dyadID, c1, c2) { 449 | 450 | uniq.dyadID <- unique(dyadID) 451 | 452 | for(d in 1:length(uniq.dyadID)){ 453 | ## print(d) 454 | dyad.d <- uniq.dyadID[d] 455 | cty1 <- c1[which(dyadID == dyad.d)][1] 456 | cty2 <- c2[which(dyadID == dyad.d)][1] 457 | 458 | idx.d <- which(dyadID == dyad.d) 459 | x.d <- as.matrix(X.tilde[idx.d,]) 460 | e.d <- matrix(e.tilde[idx.d], ncol=1) 461 | 462 | ## consider all the other dyads in which country 1 or country 463 | ## 2 is the member 464 | idx.dprime <- which( (c1==cty1 | c2 == cty2) & dyadID!=dyad.d) 465 | uniq.dprime <- unique(dyadID[idx.dprime]) 466 | 467 | ## in case only one year is observed 468 | if(is.null(nrow(x.d))){ 469 | x.d <- t(matrix(x.d, ncol=1)) 470 | e.d <- as.matrix(as.numeric(e.d)) 471 | Omega.hat <- t(x.d) %*% e.d %*% t(e.d) %*% x.d 472 | } else { 473 | Omega.hat <- t(x.d) %*% e.d %*% t(e.d) %*% x.d 474 | } 475 | 476 | ## loop over all dyads that have one of the two members 477 | for(p in 1:length(uniq.dprime)){ 478 | ## print(p) 479 | dprime <- uniq.dprime[p] 480 | idx.dprime <- which(dyadID == dprime) 481 | x.dprime <- as.matrix(X.tilde[idx.dprime,]) 482 | e.dprime <- matrix(e.tilde[idx.dprime], ncol=1) 483 | 484 | if(is.null(nrow((x.dprime)))){ 485 | x.dprime <- t(matrix(x.dprime, ncol=1)) 486 | e.dprime <- as.matrix(as.numeric(e.dprime)) 487 | Odprime <- t(x.d) %*% e.d %*% t(e.dprime) %*% x.dprime 488 | } else { 489 | Odprime <- t(x.d) %*% e.d %*% t(e.dprime) %*% x.dprime 490 | } 491 | 492 | if(p==1){ 493 | Oprime <- Odprime 494 | } else { 495 | Oprime <- Oprime + Odprime 496 | } 497 | } 498 | 499 | ## storing results 500 | if(d==1){ 501 | Omega.hat.dyad <- Omega.hat + Oprime 502 | } else { 503 | Omega.hat.dyad <- Omega.hat.dyad + Omega.hat + Oprime 504 | } 505 | } 506 | return(Omega.hat.dyad) 507 | } 508 | 509 | Omega.hat.DYAD <- OmegaDyad(X.tilde, u.tilde, data$dyad, data$c1, data$c2) 510 | Omega.hat.fe.DYAD <- OmegaDyad(X.hat, u.hat, data$dyad, data$c1, data$c2) 511 | 512 | Psi.hat.wfe <- (ginv.XX.tilde) %*% Omega.hat.DYAD %*% (ginv.XX.tilde) 513 | Psi.hat.fe <- (ginv.XX.hat) %*% Omega.hat.fe.DYAD %*% (ginv.XX.hat) 514 | 515 | } else if ((hetero.se == TRUE) & (auto.se == TRUE)) {# Default is Arellano 516 | std.error <- "Heteroscedastic / Autocorrelation Robust Standard Error" 517 | 518 | ## 1. arbitrary autocorrelation as well as heteroskedasticity (Eq 12) 519 | 520 | Omega.hat.HAC <- OmegaHatHAC(nrow(X.tilde), p, wdm.unit, J.u, X.tilde, u.tilde) 521 | Omega.hat.HAC <- matrix(Omega.hat.HAC, nrow = p, ncol = p) 522 | ## Omega.hat.HAC <- (1/(nrow(X.tilde)))* Omega.hat.HAC # without degree of freedom adjustment 523 | Omega.hat.HAC <- 1/J.u * Omega.hat.HAC 524 | 525 | Omega.hat.fe.HAC <- OmegaHatHAC(nrow(X.hat), p, wdm.unit, J.u, X.hat, u.hat) 526 | Omega.hat.fe.HAC <- matrix(Omega.hat.fe.HAC, nrow = p, ncol = p) 527 | ## Omega.hat.fe.HAC <- (1/(nrow(X.hat))) * Omega.hat.fe.HAC # without degree of freedom adjustment 528 | Omega.hat.fe.HAC <- 1/J.u * Omega.hat.fe.HAC 529 | 530 | ## Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Omega.hat.HAC %*% (nrow(X.tilde) * ginv.XX.tilde) 531 | ## Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Omega.hat.fe.HAC %*% (nrow(X.hat) * ginv.XX.hat) 532 | 533 | Psi.hat.wfe <- (J.u*ginv.XX.tilde) %*% Omega.hat.HAC %*% (J.u*ginv.XX.tilde) 534 | Psi.hat.fe <- (J.u*ginv.XX.hat) %*% Omega.hat.fe.HAC %*% (J.u*ginv.XX.hat) 535 | 536 | ## degrees of freedom adjustment 537 | Nnonzero <- length(which(data$W.it !=0)) 538 | K <- nc-3 539 | dfHAC <- (J.u/(J.u-1)) * (Nnonzero/(Nnonzero-K+1)) 540 | 541 | Psi.hat.wfe <- dfHAC * Psi.hat.wfe 542 | Psi.hat.fe <- dfHAC * Psi.hat.fe 543 | 544 | 545 | } else if ( (hetero.se == TRUE) & (auto.se == FALSE) ) {# independence across observations but heteroskedasticity 546 | std.error <- "Heteroscedastic Robust Standard Error" 547 | 548 | ## 2. independence across observations but heteroskedasticity (Eq 11) 549 | 550 | Omega.hat.HC <- (1/J.u)*(crossprod((X.tilde*diag.ee.tilde), X.tilde)) 551 | Omega.hat.fe.HC <- (1/J.u)*(crossprod((X.hat*diag.ee.hat), X.hat)) 552 | 553 | 554 | ### Stock-Watson (Econometrica 2008: Eq(6)): Bias-asjusted for balance panel 555 | if (unbiased.se == TRUE) { 556 | ## check if panel is balanced 557 | if (sum(as.numeric(apply(matrix(table(wdm.unit)), 1, mean) != mean(matrix(table(wdm.unit))))) == 0 ){ 558 | std.error <- "Heteroskedastic Standard Error (Stock-Watson Biased Corrected)" 559 | 560 | B.hat <- matrix(0, nrow=dim(X.tilde)[2], ncol=dim(X.tilde)[2]) 561 | B2.hat <- matrix(0, nrow=dim(X.hat)[2], ncol=dim(X.hat)[2]) 562 | for (i in 1:J.u) { 563 | ## cat("unit", i, "\n") 564 | X.i <- X.tilde[Data.wdm$unit == i,] 565 | X2.i <- X.hat[Data.wdm$unit == i,] 566 | ## print(sum(as.numeric(Data.wdm$unit == i))) 567 | ## print(X.i) 568 | if (sum(as.numeric(Data.wdm$unit == i)) > 1) { 569 | u.i <- u.tilde[Data.wdm$unit == i] 570 | u2.i <- u.hat[Data.wdm$unit == i] 571 | ## print(length(u.i)) 572 | flush.console() 573 | XX.i <- crossprod(X.i, X.i) 574 | XX2.i <- crossprod(X2.i, X2.i) 575 | B.hat <- B.hat + ((1/J.t)* XX.i*(1/(J.t-1))*sum(u.i^2)) 576 | B2.hat <- B2.hat + ((1/J.t)* XX2.i*(1/(J.t-1))*sum(u2.i^2)) 577 | } 578 | } 579 | 580 | B.hat <- B.hat * (1/J.u) 581 | B2.hat <- B2.hat * (1/J.u) 582 | 583 | cat("time", J.t, "\n") 584 | Sigma_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.HC - (1/(J.t-1))*B.hat) 585 | ## Psi.hat.wfe <- (nrow(X.tilde) * ginv.XX.tilde) %*% Sigma_HRFE %*% (nrow(X.tilde) * ginv.XX.tilde) 586 | Psi.hat.wfe <- (J.u*ginv.XX.tilde) %*% Sigma_HRFE %*% (J.u*ginv.XX.tilde) 587 | 588 | Sigma2_HRFE <- ((J.t-1)/(J.t-2))*(Omega.hat.fe.HC - (1/(J.t-1))*B2.hat) 589 | ## Psi.hat.fe <- (nrow(X.hat) * ginv.XX.hat) %*% Sigma2_HRFE %*% (nrow(X.hat) * ginv.XX.hat) 590 | Psi.hat.fe <- (J.u*ginv.XX.hat) %*% Sigma2_HRFE %*% (J.u*ginv.XX.hat) 591 | 592 | 593 | } else { 594 | stop ("unbiased.se == TRUE is allowed only when panel is balanced") 595 | } 596 | } 597 | 598 | Psi.hat.wfe <- (J.u*ginv.XX.tilde) %*% Omega.hat.HC %*% (J.u*ginv.XX.tilde) 599 | Psi.hat.fe <- (J.u*ginv.XX.hat) %*% Omega.hat.fe.HC %*% (J.u*ginv.XX.hat) 600 | 601 | ## degrees of freedom adjustment: G / (G -1) * N / (N - K + 1) 602 | ## where G is the number of groups (number of fixed effects), 603 | ## N is the number of non-zero weights 604 | Nnonzero <- length(which(data$W.it !=0)) 605 | K <- nc-3 606 | dfHC <- (J.u/(J.u-1)) * (Nnonzero/(Nnonzero-K+1)) 607 | Psi.hat.wfe <- dfHC * Psi.hat.wfe 608 | Psi.hat.fe <- dfHC * Psi.hat.fe 609 | 610 | 611 | } else if ( (hetero.se == FALSE) & (auto.se == FALSE) ) {# indepdence and homoskedasticity 612 | 613 | stop("standard errors with independence and homoskedasticity is not supported") 614 | 615 | ## std.error <- "Homoskedastic Standard Error" 616 | 617 | ## ## Psi.hat.wfe <- nrow(X.tilde) * (sigma2 * ginv.XX.tilde) 618 | ## Psi.hat.wfe <- J.u * (sigma2 * ginv.XX.tilde) 619 | 620 | ## ## Psi.hat.fe <- nrow(X.hat) * vcov(fit.ols) 621 | ## Psi.hat.fe <- J.u * vcov(fit.ols) 622 | 623 | } else if ( (hetero.se == FALSE) & (auto.se == TRUE) ) {# Kiefer 624 | 625 | stop ("robust standard errors with autocorrelation and homoskedasiticy is not supported") 626 | 627 | } 628 | 629 | 630 | if(dyad.se == TRUE){ 631 | var.cov <- Psi.hat.wfe 632 | var.cov.fe <- Psi.hat.fe 633 | } else { 634 | var.cov <- Psi.hat.wfe * (1/J.u) 635 | var.cov.fe <- Psi.hat.fe *(1/J.u) 636 | } 637 | 638 | 639 | ### White (1980) Test: Theorem 4 640 | 641 | Nnonzero <- length(which(data$W.it !=0)) 642 | 643 | diag.ee <- c(u.hat) * c(u.tilde) 644 | 645 | Lambda.hat1 <- 1/((nrow(X.hat) - J.u - p))* (crossprod((X.hat*diag.ee), X.tilde)) 646 | Lambda.hat2 <- 1/((nrow(X.tilde) - J.u - p))* (crossprod((X.tilde*diag.ee), X.hat)) 647 | 648 | 649 | Phi.hat <- Psi.hat.wfe + Psi.hat.fe - (Nnonzero*ginv.XX.hat) %*% Lambda.hat1 %*% (Nnonzero*ginv.XX.tilde) - (Nnonzero*ginv.XX.tilde) %*% Lambda.hat2 %*% (Nnonzero*ginv.XX.hat) 650 | 651 | rm(Lambda.hat1, Lambda.hat2) 652 | gc() 653 | 654 | ## White test: null hypothesis is ``no misspecification'' 655 | ## white.stat <- nrow(X.hat) * t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls) 656 | white.stat <- Nnonzero * t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls) 657 | 658 | test.null <- pchisq(as.numeric(white.stat), df=p, lower.tail=F) < White.alpha 659 | white.p <- pchisq(as.numeric(white.stat), df=p, lower.tail=F) 660 | 661 | 662 | flush.console() 663 | 664 | 665 | ## ## compare with sandwich standard error (essentially same) 666 | 667 | ## ## cat("sandwich package regression se HC:", print(sqrt(diag((vcovHC(fit.traditional, type="HC")[1:p,1:p])))), "\n") 668 | ## ## cat("sandwich package regression se HC0:", print(sqrt(diag((vcovHC(fit.traditional, type="HC0")[1:p,1:p])))), "\n") 669 | ## ## cat("sandwich package regression se HC1:", print(sqrt(diag((vcovHC(fit.traditional, type="HC1")[1:p,1:p])))), "\n") 670 | ## ## cat("sandwich package regression se HC2:", print(sqrt(diag((vcovHC(fit.traditional, type="HC2")[1:p,1:p])))), "\n") 671 | ## ## cat("sandwich package regression se HC3:", print(sqrt(diag((vcovHC(fit.traditional, type="HC3")[1:p,1:p])))), "\n") 672 | ## ## cat("sandwich package regression se HC4:", print(sqrt(diag((vcovHC(fit.traditional, type="HC4")[1:p,1:p])))), "\n") 673 | ## ## cat("sandwich package regression se HC4m:", print(sqrt(diag((vcovHC(fit.traditional, type="HC4m")[1:p,1:p])))), "\n") 674 | ## ## cat("sandwich package regression se HC5:", print(sqrt(diag((vcovHC(fit.traditional, type="HC5")[1:p,1:p])))), "\n") 675 | 676 | 677 | ## Creating a weight verctor 678 | ## original index 679 | idx <- paste(orig.unit.idx, orig.time.idx, sep="_") 680 | a <- units 681 | b <- times 682 | Wv <- as.vector(W) # as vector 683 | a1 <- rep(as.character(a$unit), each=nrow(W)) 684 | b1 <- rep(as.character(b$time), ncol(W)) 685 | idxall <- paste(a1, b1, sep="_") 686 | idxall.sub <- idxall[which(idxall %in% idx)] 687 | W.it <- Wv[which(idxall %in% idx)] 688 | u.sub <- unlist(lapply(idxall.sub, 689 | function(x) strsplit(x, "_")[[1]][1])) 690 | t.sub <- unlist(lapply(idxall.sub, 691 | function(x) strsplit(x, "_")[[1]][2])) 692 | cmd <- paste("W1 <- data.frame(", unit.index, "= u.sub)", sep="") 693 | eval(parse(text=cmd)) 694 | if(is.null(time.index)){ 695 | W1$obs.idx <- t.sub 696 | } else { 697 | cmd2 <- paste("W1$", time.index, " <- t.sub", sep="") 698 | eval(parse(text=cmd2)) 699 | } 700 | W1$W.it <- W.it 701 | 702 | ## ensuring the order reflects the original idx 703 | mf$W.it <- W.it[match(idxall.sub, idx)] 704 | u.orig <- unlist(lapply(idx, 705 | function(x) strsplit(x, "_")[[1]][1])) 706 | t.orig <- unlist(lapply(idx, 707 | function(x) strsplit(x, "_")[[1]][2])) 708 | cmd <- paste("D <- data.frame(", unit.index, "= u.orig)", sep="") 709 | eval(parse(text=cmd)) 710 | if(is.null(time.index)){ 711 | D$obs.idx <- t.orig 712 | } else { 713 | cmd2 <- paste("D$", time.index, " <- t.orig", sep="") 714 | eval(parse(text=cmd2)) 715 | } 716 | 717 | mf <- cbind(D, mf) 718 | 719 | Num.nonzero <- length(which(!W1$weights==0)) 720 | 721 | ###Saving results 722 | 723 | 724 | z <- list(coefficients = coef.wls, 725 | x = x[,-1,drop=FALSE], 726 | y = y, 727 | mf = mf, 728 | call = wfe.call, 729 | vcov = var.cov, 730 | se = sqrt(diag(var.cov)), 731 | sigma = sqrt(sigma2), 732 | df = d.f, 733 | residuals = y - (x[,-1,drop=FALSE] %*% coef.wls), 734 | W = W1, 735 | Num.nonzero = Num.nonzero, 736 | uniq.n.units = J.u, 737 | units = units, 738 | times = times, 739 | method = method, 740 | causal = causal, 741 | est = est, 742 | std.error = std.error, 743 | White.pvalue = white.p, 744 | White.alpha = White.alpha, 745 | White.stat = white.stat, 746 | White.test = test.null, 747 | Y.wdm = Y.wdm, 748 | X.wdm = X.wdm) 749 | class(z) <- "wfe" 750 | z 751 | 752 | ### ******************************************************** 753 | ### Two-way Weighted Fixed Effects 754 | ### ******************************************************** 755 | } else { 756 | ## if (verbose) { 757 | ## did <- CalDID(data$u.index, data$t.index, data$TR, data$C.it, 758 | ## y, tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose) 759 | ## cat("\nMulti-period DID estimate with no covariate adjustments is", did ,"\n") 760 | ## flush.console() 761 | ## } 762 | 763 | ## Differences-in-difference 764 | if(( (method=="unit") & (qoi == "ate") & (!is.null(estimator) & estimator == "did")) | 765 | ( (method == "unit") & (qoi =="att") & (!is.null(estimator) & estimator == "did")) | 766 | ( (method=="unit") & (qoi == "ate") & (!is.null(estimator) & estimator == "Mdid")) | 767 | ( (method == "unit") & (qoi =="att") & (!is.null(estimator) & estimator == "Mdid")) 768 | ) { 769 | 770 | method <- "Weighted Two-way" 771 | ## Standard Fixed effect 772 | if (unweighted == TRUE) { 773 | data$W.it <- rep(1, nrow(data)) 774 | W <- matrix(1,nrow=length(uniq.t), ncol=length(uniq.u)) 775 | } else { 776 | if (verbose) { 777 | cat("\nWeight calculation started ") 778 | flush.console() 779 | 780 | } 781 | if(estimator == "Mdid"){ 782 | if(is.null(maxdev.did)){ 783 | maxdev.did <- -1 784 | if (verbose) { 785 | cat(": Nearest Neighbor Matching\n") 786 | flush.console() 787 | } 788 | 789 | } else { 790 | if (verbose) { 791 | cat(": Matching on Pre-Treatment Outcome Within Maximum Deviation", maxdev.did,"\n") 792 | flush.console() 793 | } 794 | 795 | maxdev.did <- as.numeric(maxdev.did) 796 | } 797 | WDiD <- GenWeightsMDID(data$u.index, data$t.index, data$TR, data$C.it, y, maxdev.did, 798 | tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose) 799 | 800 | } else { 801 | WDiD <- GenWeightsDID(data$u.index, data$t.index, data$TR, data$C.it, 802 | tn.row, length(uniq.u), length(uniq.t), ate.n, att.n, verbose) 803 | } 804 | 805 | W <- matrix(WDiD, nrow=length(uniq.t), ncol=length(uniq.u), byrow=T) 806 | data$W.it <- VectorizeC(as.matrix(W), data$t.index, data$u.index, tn.row) 807 | 808 | if (verbose) { 809 | cat("\nWeight calculation done \n") 810 | flush.console() 811 | } 812 | 813 | } 814 | ## e <- environment() 815 | ## save(file = "temp.RData", list = ls(), env = e) 816 | 817 | ## creating index for sparse dummy matrix 818 | 819 | u <- as.matrix(table(data$u.index)) 820 | 821 | Udummy.i <- seq(1:sum(u)) 822 | Udummy.j <- c() 823 | 824 | for (j in 1:length(uniq.u)) { 825 | Udummy.j <- c(Udummy.j, rep(j, u[j,1])) 826 | } 827 | Udummy <- sparseMatrix(x=1, i=Udummy.i, j=Udummy.j) 828 | 829 | t <- as.matrix(table(data$u.index, data$t.index)) 830 | 831 | ## checking panel structure 832 | if (verbose) 833 | if (length(which(t==0)) > 0){ 834 | cat("\nUnbalanced Panel Data\n") 835 | } 836 | if ( length(which(t>1)) > 0 ){ 837 | stop ("\nunit-time pair is not unique\n") 838 | } 839 | flush.console() 840 | 841 | ## this takes time: should be made more efficient 842 | Tdummy <- array(0,dim=c(0,length(uniq.t))) 843 | for (j in 1:nrow(t)) { 844 | Tdummy <- rbind(Tdummy, Diagonal(x = t[j,], n=length(uniq.t))) 845 | } 846 | 847 | ## when panel is unbalanced there will be rows of zeros 848 | zero <- which(apply(Tdummy, 1, mean) == 0) 849 | if(length(zero) > 0) { 850 | Tdummy <- Tdummy[-zero,] 851 | } 852 | 853 | ## if (verbose) 854 | ## cat("\n Dummy creation done \n") 855 | ## flush.console() 856 | 857 | 858 | ######################################################################### 859 | ### Projection for standard twoway FE 860 | 861 | ## if (verbose) 862 | ## cat("\n Standard FE Projection Started \n") 863 | ## flush.console() 864 | 865 | ## this step takes time 866 | P1 <- Udummy %*% tcrossprod(Diagonal(x=1/as.vector(table(data$u.index))), Udummy) 867 | 868 | ## e <- environment() 869 | ## save(file = "test.RData", list = ls(), env = e) 870 | 871 | Q1 <- Diagonal(x=1, n=nrow(Udummy)) - P1 872 | 873 | 874 | ## Q: not too sparse 875 | Q <- Q1 %*% Tdummy 876 | Q.QQginv <- Q %*% ginv(as.matrix(crossprod(Q))) 877 | 878 | X <- as.matrix(X) 879 | Y <- as.matrix(data$y) 880 | 881 | YX <- cbind(Y,X) 882 | 883 | Data.2wdm <- as.data.frame(as.matrix(YX - P1%*%YX - Q.QQginv %*% crossprod(Q,YX))) 884 | colnames(Data.2wdm) <- colnames(mf.sorted) 885 | 886 | rm(YX, P1, Q.QQginv, Q) 887 | gc() 888 | 889 | ## if (verbose) 890 | ## cat("\n Standard FE Projection done \n") 891 | ## flush.console() 892 | 893 | a <- unlist(strsplit(as.character(formula), "~")) 894 | formula.ni <- as.formula(paste(a[2], "~ -1 + ", a[3])) 895 | 896 | 897 | ## final regression on 2way demeaned data 898 | fit.ols <- lm(formula.ni, data = Data.2wdm) 899 | 900 | coef.ols <- fit.ols$coef 901 | resid.ols <- resid(fit.ols) 902 | 903 | u.hat <- as.matrix(resid.ols) 904 | X.hat <- as.matrix(Data.2wdm[,-1]) 905 | rm(Data.2wdm) 906 | gc() 907 | 908 | 909 | ############################################################ 910 | 911 | 912 | ## subset observations with non-zero weights 913 | if (White == TRUE){ 914 | nz.index <- seq(1,tn.row) 915 | } else { # cannot calculate White statistics 916 | ## exclude zero-weights observations for efficient calculation 917 | nz.index <- data$W.it !=0 918 | tn.row <- length(which(data$W.it !=0)) 919 | } 920 | 921 | nz.obs <- sum(as.numeric(data$W.it !=0)) 922 | 923 | X <- as.matrix(X)[nz.index,] 924 | Y <- as.matrix(data$y)[nz.index] 925 | 926 | 927 | ## removing zero weights rows 928 | Udummy <- Udummy[nz.index,] 929 | Tdummy <- Tdummy[nz.index,] 930 | 931 | 932 | ### removing zero columns for full-rank (after deleting zero weights observations) 933 | 934 | ## 1. Unit dummies 935 | 936 | u.zero <- try(which(as(apply(Udummy, 2, sum), "sparseVector") == 0), silent=TRUE) 937 | 938 | if (class(u.zero) == "try-error") { 939 | u.zero <- c() 940 | for (i in 1:ncol(Udummy)) { 941 | if (sum(Udummy[,i]) == 0){ 942 | temp <- i 943 | u.zero <- c(u.zero, temp) 944 | } 945 | } 946 | if (verbose) 947 | cat("\nReached Memory Limit: White == FALSE option is recommended\n") 948 | flush.console() 949 | 950 | if (length(u.zero) > 0) { 951 | Udummy <- Udummy[,-u.zero] 952 | gc() 953 | } 954 | } else { 955 | if (length(u.zero) > 0) { 956 | Udummy <- Udummy[,-u.zero] 957 | gc() 958 | } 959 | } 960 | 961 | ## if (verbose) 962 | ## cat("\n Udummy done\n") 963 | ## flush.console() 964 | 965 | 966 | ## 2. Time dummies 967 | 968 | if (length(which(as(apply(Tdummy, 2, sum), "sparseVector")==0)) > 0) { 969 | zero <- which(as(apply(Tdummy, 2, sum), "sparseVector") == 0) 970 | n.zero <- length(zero) 971 | Tdummy <- Tdummy[,-zero] 972 | ## delete last column 973 | ## last <- ncol(Tdummy) 974 | ## Tdummy <- Tdummy[,-last] 975 | ## adding a column of 1000's for numerical stability of ginv 976 | Tdummy <- cbind(Tdummy, rep(1000, nrow(Tdummy))) 977 | gc() 978 | } else { 979 | last <- ncol(Tdummy) 980 | Tdummy <- Tdummy[,-last] # for identification exclude the last year dummy 981 | ## adding a column of 1's for numerical stability of ginv 982 | Tdummy <- cbind(Tdummy, rep(1000, nrow(Tdummy))) 983 | gc() 984 | } 985 | 986 | ## if (verbose) 987 | ## cat("\n Tdummy done\n") 988 | ## flush.console() 989 | 990 | ## number of columns for Unit/Time dummy matrix 991 | n.Udummy <- ncol(Udummy) 992 | n.Tdummy <- ncol(Tdummy) 993 | 994 | 995 | ## combining unit dummy matrix and X matrix 996 | D <- Matrix(cbind(Udummy, Tdummy)) 997 | ## Note: Tdummy part does not have zero columns, but Udummy part has it 998 | ## will be addressed thie issue below by n.zero 999 | 1000 | ## final number of dummies 1001 | fn.dummies <- ncol(D) # final number of dummies 1002 | 1003 | 1004 | 1005 | ### Projection onto Complex-plane 1006 | 1007 | 1008 | ## if (verbose) 1009 | ## cat("\n Calculation for Projection Matrix Started \n") 1010 | ## flush.console() 1011 | 1012 | ## sqrt of weights (imaginary numbers) 1013 | Im.sqrt <- function(weights) { 1014 | if (weights >= 0) # real number 1015 | im.w <- complex(real=sqrt(weights), imaginary =0) 1016 | if (weights < 0) # imaginary number 1017 | im.w <- complex(real=0, imaginary = sqrt(-weights)) 1018 | invisible(im.w) 1019 | } 1020 | 1021 | ## vector of sqrt(W.it) 1022 | w.sqrt <- sapply(data$W.it[nz.index], Im.sqrt) 1023 | 1024 | 1025 | ######################################################################### 1026 | 1027 | 1028 | ### Matrix multiplication: two sparse matrix : A=R1+I1i, B=R2+I2i 1029 | 1030 | ## A%*%B: result is a list where [[1]] is real part [[2]] is imaginary part of complex matrix multiplication 1031 | Sparse_compMatrixMultiply <- function(R1,I1,R2,I2) { 1032 | result <- list() 1033 | result[[1]] <- drop0(R1%*%R2) - drop0(I1%*%I2) 1034 | result[[2]] <- drop0(R1%*%I2) + drop0(I1%*%R2) 1035 | result 1036 | } 1037 | 1038 | ## A %*% t(B): result is a list where [[1]] is real part [[2]] is imaginary part of complex matrix multiplication 1039 | Sparse_compMatrix_tcrossprod <- function(R1,I1,R2,I2) { 1040 | ## real part 1041 | result <- list() 1042 | result[[1]] <- drop0(tcrossprod(R1, R2)) - drop0(tcrossprod(I1, I2)) 1043 | result[[2]] <- drop0(tcrossprod(R1, I2)) + drop0(tcrossprod(I1, R2)) 1044 | result 1045 | } 1046 | 1047 | ## t(A) %*% B: result is a list where [[1]] is real part [[2]] is imaginary part of complex matrix multiplication 1048 | Sparse_compMatrix_crossprod <- function(R1,I1,R2,I2) { 1049 | ## real part 1050 | result <- list() 1051 | result[[1]] <- drop0(crossprod(R1, R2)) - drop0(crossprod(I1, I2)) 1052 | result[[2]] <- drop0(crossprod(R1, I2)) + drop0(crossprod(I1, R2)) 1053 | result 1054 | } 1055 | 1056 | 1057 | R1 <- Diagonal(x = Re(w.sqrt), n=tn.row) 1058 | I1 <- Diagonal(x = Im(w.sqrt), n=tn.row) 1059 | 1060 | yL <- list() 1061 | yL[[1]] <- Matrix(Y) 1062 | yL[[2]] <- Matrix(0, nrow=nrow(yL[[1]]), ncol=ncol(yL[[1]])) 1063 | y.starL <- Sparse_compMatrixMultiply(R1,I1, yL[[1]], yL[[2]]) 1064 | rm(yL) 1065 | gc() 1066 | 1067 | xL <- list() 1068 | xL[[1]] <- Matrix(X) 1069 | xL[[2]] <- Matrix(0, nrow=nrow(xL[[1]]), ncol=ncol(xL[[1]])) 1070 | x.starL <- Sparse_compMatrixMultiply(R1,I1, xL[[1]], xL[[2]]) 1071 | rm(xL) 1072 | gc() 1073 | 1074 | ######################################################################### 1075 | 1076 | ## create D1, and D2 1077 | R2 <- Matrix(D) 1078 | I2 <- Matrix(0, nrow=nrow(D), ncol=ncol(D)) 1079 | 1080 | D.starL <- Sparse_compMatrixMultiply(R1,I1,R2,I2) 1081 | 1082 | ## e <- environment() 1083 | ## save(file = "temp.RData", list = ls(), env = e) 1084 | 1085 | rm(R2, I2) 1086 | gc() 1087 | 1088 | 1089 | D1.starL <- D2.starL <- list() 1090 | 1091 | ## e <- environment() 1092 | ## save(file = "temp.RData", list = ls(), env = e, compress = TRUE) 1093 | 1094 | 1095 | D1.starL[[1]] <- drop0(D.starL[[1]][,1:n.Udummy]) 1096 | D1.starL[[2]] <- drop0(D.starL[[2]][,1:n.Udummy]) 1097 | D2.starL[[1]] <- drop0(D.starL[[1]][,((n.Udummy+1):(n.Udummy+n.Tdummy))]) 1098 | D2.starL[[2]] <- drop0(D.starL[[2]][,((n.Udummy+1):(n.Udummy+n.Tdummy))]) 1099 | ## cat("Number of columns for D1", n.Udummy, "\n") 1100 | ## cat("Number of columns for D2", n.Tdummy, "\n") 1101 | 1102 | rm(D.starL) 1103 | gc() 1104 | ######################################################################### 1105 | 1106 | ## sum of sqrt(W.it) across years for each dyad 1107 | general.inv <- function(weight, tol = tol){ 1108 | if(abs(weight) < tol) { 1109 | out <- 0 1110 | } else { 1111 | out <- 1/weight 1112 | } 1113 | out 1114 | } 1115 | 1116 | sum.sqrtW <- complex(real=apply(Sparse_compMatrixMultiply(R1,I1,D1.starL[[1]],D1.starL[[2]])[[1]], 2, sum), imaginary=rep(0, length(J.u))) 1117 | rm(R1, I1) 1118 | gc() 1119 | 1120 | 1121 | inv.weight <- sapply(sum.sqrtW, general.inv, tol) 1122 | 1123 | rm(sum.sqrtW) 1124 | gc() 1125 | 1126 | ## e <- environment() 1127 | ## save(file = "temp.RData", list = ls(), env = e) 1128 | 1129 | ginvW <- list() 1130 | ginvW[[1]] <- Diagonal(x = Re(inv.weight)) 1131 | ginvW[[2]] <- Diagonal(x = Im(inv.weight)) 1132 | 1133 | rm(inv.weight) 1134 | gc() 1135 | 1136 | Dginv <- Sparse_compMatrixMultiply(D1.starL[[1]], D1.starL[[2]], ginvW[[1]], ginvW[[2]]) 1137 | rm(ginvW) 1138 | gc() 1139 | 1140 | P1L <- Sparse_compMatrix_tcrossprod(Dginv[[1]], Dginv[[2]], D1.starL[[1]], D1.starL[[2]]) 1141 | 1142 | rm(Dginv, D1.starL) 1143 | gc() 1144 | 1145 | ## if (verbose) { 1146 | ## cat("\n P1 created\n") 1147 | ## flush.console() 1148 | ## } 1149 | 1150 | Q1L <- list() 1151 | Q1L[[1]] <- drop0(Diagonal(x=1, n=nrow(P1L[[1]])) - P1L[[1]]) 1152 | Q1L[[2]] <- drop0(Diagonal(x=0, n=nrow(P1L[[1]])) - P1L[[2]]) 1153 | 1154 | ## if (verbose) { 1155 | ## cat("\n Q1 created\n") 1156 | ## flush.console() 1157 | ## } 1158 | 1159 | Q <- try(Sparse_compMatrixMultiply(Q1L[[1]], Q1L[[2]], D2.starL[[1]], D2.starL[[2]]), silent = TRUE) 1160 | if ((class(Q) == "try-error") & (White == TRUE)) { 1161 | stop ("Insufficient memory. White = FALSE option is needed") 1162 | } 1163 | rm(Q1L, D2.starL) 1164 | gc() 1165 | 1166 | 1167 | Q.matrix <- matrix(complex(real=as.matrix(Q[[1]]), imaginary=as.matrix(Q[[2]])), nrow=nrow(Q[[1]])) 1168 | 1169 | QQ.inv <- list() 1170 | QQ.inv[[1]] <- drop0(Matrix(Re(ginv(crossprod(Q.matrix))))) 1171 | QQ.inv[[2]] <- drop0(Matrix(Im(ginv(crossprod(Q.matrix))))) 1172 | rm(Q.matrix) 1173 | gc() 1174 | 1175 | PL <- list() 1176 | 1177 | Q.QQinv <- Sparse_compMatrixMultiply(Q[[1]], Q[[2]], QQ.inv[[1]], QQ.inv[[2]]) 1178 | rm(QQ.inv) 1179 | gc() 1180 | 1181 | ## if (verbose) { 1182 | ## cat("\n Q.QQinv created\n") 1183 | ## flush.console() 1184 | ## } 1185 | 1186 | ######################################################################### 1187 | ### Fast Projection in R 1188 | 1189 | 1190 | YX.starL <- list() 1191 | YX.starL[[1]] <- cbind(y.starL[[1]], x.starL[[1]]) 1192 | YX.starL[[2]] <- cbind(y.starL[[2]], x.starL[[2]]) 1193 | rm(y.starL, x.starL) 1194 | gc() 1195 | 1196 | 1197 | P1.YX <- Sparse_compMatrixMultiply(P1L[[1]], P1L[[2]], YX.starL[[1]], YX.starL[[2]]) 1198 | rm(P1L) 1199 | gc() 1200 | 1201 | 1202 | Q.YX <- Sparse_compMatrix_crossprod(Q[[1]], Q[[2]], YX.starL[[1]], YX.starL[[2]]) 1203 | rm(Q) 1204 | gc() 1205 | 1206 | QQQQ.YX <- Sparse_compMatrixMultiply(Q.QQinv[[1]], Q.QQinv[[2]], Q.YX[[1]], Q.YX[[2]]) 1207 | 1208 | ## cat("dimension of P1.YX:", dim(P1.YX[[1]]), "\n") 1209 | ## cat("dimension of QQQQ.YX:", dim(QQQQ.YX[[1]]), "\n") 1210 | 1211 | ## TransformedL <- list() 1212 | ## TransformedL[[1]] <- YX.starL[[1]] - P1.YX[[1]] - P2.YX[[1]] 1213 | ## TransformedL[[2]] <- YX.starL[[2]] - P1.YX[[2]] - P2.YX[[2]] 1214 | 1215 | Transformed <- matrix(complex(real=as.vector(YX.starL[[1]] - P1.YX[[1]] - QQQQ.YX[[1]]), imaginary=as.vector(YX.starL[[2]] - P1.YX[[2]] - QQQQ.YX[[2]])), nrow=tn.row) 1216 | rm(YX.starL, P1.YX, QQQQ.YX) 1217 | 1218 | y.tilde <- Transformed[,1] 1219 | X.tilde <- as.matrix(Transformed[,-1]) 1220 | 1221 | ## save weighted demeaned dataframe 1222 | if (store.wdm == TRUE){ 1223 | Y.wdm <- y.tilde 1224 | X.wdm <- X.tilde 1225 | } else { 1226 | Y.wdm <- NULL 1227 | X.wdm <- NULL 1228 | } 1229 | 1230 | rm(Transformed) 1231 | gc() 1232 | 1233 | if (ncol(X.tilde) == 1) { 1234 | colnames(X.tilde) <- a[3] 1235 | } else { 1236 | colnames(X.tilde) <- colnames(X) 1237 | } 1238 | 1239 | ginv.XX.tilde <- ginv(crossprod(X.tilde)) 1240 | betaT <- ginv.XX.tilde%*% crossprod(X.tilde, y.tilde) 1241 | if (length(betaT) == 1) { 1242 | colnames(betaT) <- a[3] 1243 | } 1244 | ## print(betaT) 1245 | coef.wls <- matrix(as.double(Re(betaT))) 1246 | rownames(coef.wls) <- colnames(X.tilde) 1247 | 1248 | ## e <- environment() 1249 | ## save(file = "temp.RData", list = ls(), env = e) 1250 | 1251 | ## weighted residuals 1252 | e.tilde <- (y.tilde - X.tilde %*% betaT) 1253 | colnames(e.tilde) <- "e.tilde" 1254 | ## true residuals 1255 | resid <- try(1/w.sqrt * (y.tilde - X.tilde %*% betaT)) 1256 | 1257 | ## in case zero weights observations are not excluded 1258 | if (White == TRUE){ 1259 | if (sum(as.numeric(w.sqrt==0)) > 0) { 1260 | zero.index <- data$W.it ==0 1261 | resid[zero.index] <- 0 1262 | } 1263 | } 1264 | rm(w.sqrt) 1265 | gc() 1266 | 1267 | ## check residuals 1268 | ## print(cbind(as.matrix(true.resid), e.tilde, data$W.it)) 1269 | ## print(cbind(sum(true.resid^2), sum(e.tilde^2))) 1270 | 1271 | 1272 | ## diag.ee.tilde <- diag(tcrossprod(e.tilde,e.tilde)) 1273 | ## diag.resid <- as.vector(resid * resid) 1274 | diag.ee.tilde <- as.vector(e.tilde * e.tilde) 1275 | 1276 | ######################################################################### 1277 | 1278 | ## cat("dimension of X.tilde:", dim(X.tilde), "\n") 1279 | 1280 | ## XX.hat <- crossprod(X.hat, X.hat) 1281 | ginv.XX.hat <- ginv(crossprod(X.hat, X.hat)) 1282 | ## d.f <- length(y.tilde) - n.Udummy - n.Tdummy - dim(X.tilde)[2] 1283 | d.f <- length(y.tilde) 1284 | 1285 | ## cat("Sum of squared residuals:", sum(resid^2), "\n") 1286 | sigma2 <- as.double(Re(sum(resid^2)/d.f)) 1287 | 1288 | ## Remove observations with zero weights 1289 | ## data backup 1290 | data.zero <- data 1291 | zero.ind <- which(data$W.it==0) 1292 | if(length(zero.ind) > 0){ 1293 | data.nonzero <- data[-zero.ind, ] 1294 | } else { 1295 | data.nonzero <- data 1296 | } 1297 | n.units <- length(unique(data$u.index)) 1298 | n.times <- length(unique(data$t.index)) 1299 | 1300 | Mstar <- nrow(data.nonzero) 1301 | if (verbose) 1302 | cat("\nTotal number of observations with non-zero weight:", Mstar,"\n") 1303 | flush.console() 1304 | 1305 | if(unweighted == FALSE){ 1306 | n.nonzero.units <- length(unique(data.nonzero$u.index)) 1307 | n.nonzero.times <- length(unique(data.nonzero$t.index)) 1308 | } else { 1309 | n.nonzero.units <- n.units 1310 | n.nonzero.times <- n.times 1311 | } 1312 | 1313 | x.vars <- colnames(x) 1314 | x.vars <- x.vars[-grep("Intercept", x.vars)] 1315 | nK <- length(x.vars) 1316 | variables <- c("y", x.vars) 1317 | 1318 | ## e <- environment() 1319 | ## save(file = "temp.RData", list = ls(), env = e) 1320 | 1321 | 1322 | ## ####################################################################### 1323 | ## (Robust) standard errors (GMM asymptotic variance for wfe) 1324 | ## calculating GMM standard errors 1325 | ## ####################################################################### 1326 | 1327 | if ((hetero.se == TRUE) & (auto.se == TRUE)){ 1328 | 1329 | ## 1. arbitrary autocorrelation as well as heteroskedasticity (Eq 12) 1330 | std.error <- "Heteroscedastic / Autocorrelation Robust Standard Error" 1331 | ## stop ("Robust standard errors with autocorrelation is currently not supported") 1332 | 1333 | ## ## MeatHAC takes unit level data with T_i rows and 1334 | ## ## compute t(X_i) %*% e_i %*% t(e_i) %*% X_i 1335 | ## MeatHAC <- function(x){ 1336 | ## Xtilde <- as.matrix(x[, -c(1,2)]) # removing u.index, e.tilde 1337 | ## etilde <- x[,2] 1338 | ## Meat <- t(Xtilde) %*% etilde %*% t(etilde) %*% Xtilde 1339 | ## Meat <- matrix(Meat, ncol=ncol(Xtilde), nrow=ncol(Xtilde)) 1340 | ## return(Meat) 1341 | ## } 1342 | 1343 | ## ## prepare data for vcov calculation & remove units with zero weights 1344 | ## D.tilde <- data.frame(u.index = data[,c("u.index")], e.tilde, X.tilde) 1345 | ## colnames(D.tilde)[1] <- c("u.index") 1346 | 1347 | ## if(length(zero.ind)>0){ 1348 | ## D.tilde <- D.tilde[-zero.ind,] 1349 | ## e.tilde <- e.tilde[-zero.ind] 1350 | ## } else { 1351 | ## D.tilde <- D.tilde 1352 | ## e.tilde <- e.tilde 1353 | ## } 1354 | 1355 | 1356 | ## ----------------------------------------------------- 1357 | ## vcov matrix for WFE 1358 | ## ----------------------------------------------------- 1359 | 1360 | ## degrees of freedom adjustment 1361 | df_wfe2 <- (Mstar/(Mstar-1))*((Mstar-nK)/(Mstar- n.nonzero.units - n.nonzero.times - nK)) 1362 | 1363 | ## ## meat part 1364 | ## XeeX <- lapply(split(D.tilde,D.tilde$u.index), MeatHAC) 1365 | ## XeeX.wfe <- matrix(0, nrow=nK, ncol=nK) 1366 | ## ## add unit level vcov 1367 | ## for(g in 1:n.nonzero.units){ 1368 | ## XeeX.wfe <- XeeX.wfe + XeeX[[g]] 1369 | ## } 1370 | 1371 | ## ## sandwich estimator 1372 | ## Psi.hat.wfe2 <- df_wfe2 * (ginv.XX.tilde %*% XeeX.wfe %*% ginv.XX.tilde) 1373 | ## print(Psi.hat.wfe2) 1374 | 1375 | XeeX <- as.double(comp_OmegaHAC(c(X.tilde), e.tilde, c(X.tilde), e.tilde, 1376 | dim(X.tilde)[1], dim(X.tilde)[2], data$u.index, J.u)) 1377 | XeeX.wfe <- matrix(XeeX, nrow=ncol(X.tilde), ncol=ncol(X.tilde), byrow=T) 1378 | 1379 | Psi.hat.wfe <- df_wfe2*((ginv.XX.tilde %*% XeeX.wfe %*% ginv.XX.tilde)) 1380 | 1381 | ## print(Psi.hat.wfe) 1382 | 1383 | ## ----------------------------------------------------- 1384 | ## vcov matrix for FE for White statistics calculation 1385 | ## ----------------------------------------------------- 1386 | 1387 | if (White == TRUE){ 1388 | 1389 | ## degrees of freedom adjustment 1390 | df_fe2 <- (nrow(X.hat)/(nrow(X.hat)-1))*((nrow(X.hat)-nK)/(nrow(X.hat)-n.units-n.times-nK)) 1391 | 1392 | ## ## prepare data for vcov calculation 1393 | ## D.hat <- data.frame(u.index = data[,c("u.index")], u.hat, X.hat) 1394 | ## colnames(D.hat)[1] <- c("u.index") 1395 | 1396 | ## ## SE calcluation 1397 | ## XeeX <- lapply(split(D.hat,D.hat$u.index), MeatHAC) 1398 | ## XeeX.fe <- matrix(0, nrow=nK, ncol=nK) 1399 | ## ## add unit level vcov 1400 | ## for(g in 1:n.units){ 1401 | ## XeeX.fe <- XeeX.fe + XeeX[[g]] 1402 | ## } 1403 | 1404 | ## Psi.hat.fe <- df_fe2 * (ginv.XX.hat %*% XeeX.fe %*% ginv.XX.hat) 1405 | ## print(Psi.hat.fe) 1406 | 1407 | XeeX <- OmegaHatHAC(nrow(X.hat), ncol(X.hat), data$u.index, J.u, X.hat, u.hat) 1408 | XeeX.fe <- matrix(XeeX, nrow = ncol(X.hat), ncol = ncol(X.hat)) 1409 | 1410 | Psi.hat.fe <- df_fe2 * (ginv.XX.hat %*% XeeX.fe %*% ginv.XX.hat) 1411 | ## print(Psi.hat.fe) 1412 | 1413 | ## storing standard errors 1414 | var.cov.fe <- Psi.hat.fe 1415 | se.ols <- sqrt(diag(var.cov.fe)) 1416 | ## print(Psi.hat.fe) 1417 | 1418 | } 1419 | 1420 | 1421 | } else if ( (hetero.se == TRUE) & (auto.se == FALSE)) { 1422 | stop("Please set hetero.se == TRUE & auto.se == TRUE when you run two-way FE") 1423 | } else if ( (hetero.se == FALSE) & (auto.se == FALSE) ) {# indepdence and homoskedasticity 1424 | stop("Please set hetero.se == TRUE & auto.se == TRUE when you run two-way FE") 1425 | } else if ( (hetero.se == FALSE) & (auto.se == TRUE) ) {# Kiefer 1426 | stop ("Robust standard errors with autocorrelation and homoskedasiticy is not supported") 1427 | } 1428 | 1429 | ## storing standard errors 1430 | vcov.wfe <- Psi.hat.wfe 1431 | se.did <- as.double(Re(sqrt(diag(vcov.wfe)))) 1432 | 1433 | if (verbose) { 1434 | cat("\nStd.error calculation done\n") 1435 | flush.console() 1436 | } 1437 | 1438 | ### White (1980) Test: Theorem 4 1439 | 1440 | if (White == TRUE){ 1441 | 1442 | df.white <- (nrow(X.hat)-nK)/(Mstar- n.nonzero.units - n.nonzero.times - nK) 1443 | 1444 | ## ## MeatHAC_White takes unit level data with T_i rows and 1445 | ## ## compute t(X_i) %*% e1_i %*% t(e2_i) %*% X_i 1446 | ## MeatHAC_White <- function(x,y){ 1447 | ## X1 <- as.matrix(x[, -c(1,2)]) # removing u.index, e.tilde 1448 | ## X2 <- as.matrix(y[, -c(1,2)]) # removing u.index, e.tilde, e.hat 1449 | ## e1 <- x[,2] 1450 | ## e2 <- y[,2] 1451 | ## Meat <- t(X1) %*% e1 %*% t(e2) %*% X2 1452 | ## Meat <- matrix(Meat, ncol=ncol(X1), nrow=ncol(X1)) 1453 | ## ## print(Meat) 1454 | ## return(Meat) 1455 | ## } 1456 | 1457 | ## ## ----------------------------------------------------- 1458 | ## ## cov term esitmate for beta_fe2 - beta_wfe2 1459 | ## ## ----------------------------------------------------- 1460 | 1461 | ## ## meat part 1462 | ## XeeX1 <- mapply(MeatHAC_White, split(D.tilde,D.tilde$u.index), split(D.hat,D.hat$u.index)) 1463 | ## XeeX2 <- mapply(MeatHAC_White, split(D.hat,D.hat$u.index), split(D.tilde,D.tilde$u.index)) 1464 | 1465 | ## Meat1 <- matrix(0, nrow=nK, ncol=nK) 1466 | ## Meat2 <- matrix(0, nrow=nK, ncol=nK) 1467 | ## ## add unit level vcov 1468 | ## for(g in 1:n.nonzero.units){ 1469 | ## if(nK==1){ 1470 | ## Meat1 <- Meat1 + matrix(XeeX1[g], ncol=nK, nrow=nK) 1471 | ## Meat2 <- Meat2 + matrix(XeeX2[g], ncol=nK, nrow=nK) 1472 | ## } else { 1473 | ## Meat1 <- Meat1 + matrix(XeeX1[,g], ncol=nK, nrow=nK) 1474 | ## Meat2 <- Meat2 + matrix(XeeX2[,g], ncol=nK, nrow=nK) 1475 | ## } 1476 | ## } 1477 | 1478 | ## cov.term <- df.white*( (ginv.XX.tilde %*% Meat1 %*% ginv.XX.hat) + (ginv.XX.hat %*% Meat2 %*% ginv.XX.tilde)) 1479 | ## print(cov.term) 1480 | 1481 | meat1 <- as.double(comp_OmegaHAC(c(X.tilde), e.tilde, c(X.hat), u.hat, 1482 | dim(X.tilde)[1], dim(X.hat)[2], data$u.index, J.u)) 1483 | Meat1 <- matrix(meat1, nrow=ncol(X.tilde), ncol=ncol(X.tilde), byrow=T) 1484 | 1485 | meat2 <- as.double(comp_OmegaHAC(c(X.hat), u.hat, c(X.tilde), e.tilde, 1486 | dim(X.hat)[1], dim(X.tilde)[2], data$u.index, J.u)) 1487 | Meat2 <- matrix(meat2, nrow=ncol(X.tilde), ncol=ncol(X.tilde), byrow=T) 1488 | 1489 | cov.term <- df.white*( (ginv.XX.tilde %*% Meat1 %*% ginv.XX.hat) + (ginv.XX.hat %*% Meat2 %*% ginv.XX.tilde)) 1490 | ## print(cov.term) 1491 | 1492 | Phi.hat <- Psi.hat.wfe + Psi.hat.fe - cov.term 1493 | 1494 | ## ----------------------------------------------------- 1495 | ## White test: null hypothesis is ``no misspecification'' 1496 | ## ----------------------------------------------------- 1497 | 1498 | white.stat <- as.double(Re(t(coef.ols - coef.wls) %*% ginv(Phi.hat) %*% (coef.ols - coef.wls))) 1499 | 1500 | test.null <- pchisq(as.numeric(white.stat), df=nK, lower.tail=F) < White.alpha 1501 | white.p <- pchisq(as.numeric(white.stat), df=nK, lower.tail=F) 1502 | 1503 | ## e <- environment() 1504 | ## save(file = "temp.RData", list = ls(), env = e) 1505 | 1506 | flush.console() 1507 | 1508 | if (verbose) { 1509 | cat("\nWhite calculation done\n") 1510 | flush.console() 1511 | } 1512 | 1513 | 1514 | } else { 1515 | white.stat <- "NULL" 1516 | test.null <- "NULL" 1517 | white.p <- "NULL" 1518 | } 1519 | 1520 | 1521 | ## Creating a weight verctor 1522 | ## original index 1523 | idx <- paste(orig.unit.idx, orig.time.idx, sep="_") 1524 | a <- units 1525 | b <- times 1526 | Wv <- as.vector(W) # as vector 1527 | a1 <- rep(as.character(a$unit), each=nrow(W)) 1528 | b1 <- rep(as.character(b$time), ncol(W)) 1529 | idxall <- paste(a1, b1, sep="_") 1530 | idxall.sub <- idxall[which(idxall %in% idx)] 1531 | W.it <- Wv[which(idxall %in% idx)] 1532 | u.sub <- unlist(lapply(idxall.sub, 1533 | function(x) strsplit(x, "_")[[1]][1])) 1534 | t.sub <- unlist(lapply(idxall.sub, 1535 | function(x) strsplit(x, "_")[[1]][2])) 1536 | 1537 | cmd <- paste("W1 <- data.frame(", unit.index, "= u.sub)", sep="") 1538 | eval(parse(text=cmd)) 1539 | if(is.null(time.index)){ 1540 | W1$obs.idx <- t.sub 1541 | } else { 1542 | cmd2 <- paste("W1$", time.index, " <- t.sub", sep="") 1543 | eval(parse(text=cmd2)) 1544 | } 1545 | W1$W.it <- W.it 1546 | 1547 | ## ensuring the order reflects the original idx 1548 | mf$W.it <- W.it[match(idxall.sub, idx)] 1549 | u.orig <- unlist(lapply(idx, 1550 | function(x) strsplit(x, "_")[[1]][1])) 1551 | t.orig <- unlist(lapply(idx, 1552 | function(x) strsplit(x, "_")[[1]][2])) 1553 | 1554 | cmd <- paste("D <- data.frame(", unit.index, "= u.orig)", sep="") 1555 | eval(parse(text=cmd)) 1556 | if(is.null(time.index)){ 1557 | D$obs.idx <- t.orig 1558 | } else { 1559 | cmd2 <- paste("D$", time.index, " <- t.orig", sep="") 1560 | eval(parse(text=cmd2)) 1561 | } 1562 | 1563 | mf <- cbind(D, mf) 1564 | Num.nonzero <- length(which(!W1$weights==0)) 1565 | 1566 | 1567 | 1568 | ### Saving results 1569 | 1570 | z <- list(coefficients = coef.wls, 1571 | x = x[,-1,drop=FALSE], 1572 | y = y, 1573 | mf = mf, 1574 | call = wfe.call, 1575 | vcov = as.numeric(vcov.wfe), 1576 | se = se.did, 1577 | sigma = try(sqrt(sigma2)), 1578 | df = d.f, 1579 | residuals = y - (x[,-1,drop=FALSE] %*% coef.wls), 1580 | W = W1, 1581 | Num.nonzero = Num.nonzero, 1582 | units = units, 1583 | times = times, 1584 | method = method, 1585 | causal = causal, 1586 | est = est, 1587 | std.error = std.error, 1588 | White.pvalue = white.p, 1589 | White.alpha = White.alpha, 1590 | White.stat = white.stat, 1591 | White.test = test.null, 1592 | Y.wdm = Y.wdm, 1593 | X.wdm = X.wdm) 1594 | 1595 | class(z) <- "wfedid" 1596 | z 1597 | } 1598 | 1599 | } 1600 | } 1601 | 1602 | 1603 | 1604 | 1605 | ### print wfe class 1606 | 1607 | print.wfe <- function(x,...){ 1608 | cat("Call:\n") 1609 | print(x$call) 1610 | cat("\nCoefficients:\n") 1611 | print(x$coefficients) 1612 | cat("\nStd.Err:\n") 1613 | print(x$se) 1614 | } 1615 | 1616 | summary.wfe <- function(object, signif.stars = getOption("show.signif.stars"),...){ 1617 | se <- object$se 1618 | sigma <- object$sigma 1619 | df <- object$df 1620 | tval <- try(coef(object) / se) 1621 | TAB <- cbind(Estimate = coef(object), 1622 | Std.Error = se, 1623 | t.value = tval, 1624 | p.value = 2*pt(-abs(tval), df = object$df)) 1625 | res <- list(call = object$call, 1626 | coefficients = TAB, 1627 | sigma = object$sigma, 1628 | df = object$df, 1629 | W = object$W, 1630 | Num.nonzero = object$Num.nonzero, 1631 | units = object$units, 1632 | times = object$times, 1633 | residuals = object$residuals, 1634 | method = object$method, 1635 | causal = object$causal, 1636 | estimator = object$est, 1637 | std.error = object$std.error, 1638 | White.pvalue = object$White.pvalue, 1639 | White.alpha = object$White.alpha, 1640 | White.stat = object$White.stat, 1641 | White.test = object$White.test, 1642 | Y = object$y, 1643 | X = object$x, 1644 | Y.wdm = object$Y.wdm, 1645 | X.wdm = object$X.wdm 1646 | ) 1647 | class(res) <- "summary.wfe" 1648 | res 1649 | } 1650 | 1651 | print.summary.wfe <- function(x, ...){ 1652 | cat("\nMethod:", x$method, "Fixed Effects\n") 1653 | cat("\nQuantity of Interest:", x$causal) 1654 | cat("\nEstimator:", x$estimator) 1655 | cat("\nStandard Error:", x$std.error) 1656 | cat("\n") 1657 | cat("\n") 1658 | cat("Call:\n") 1659 | print(x$call) 1660 | cat("\n") 1661 | cat("Coefficients:\n") 1662 | printCoefmat(x$coefficients, P.values=TRUE, has.Pvalue=TRUE) 1663 | cat("\nResidual standard error:", format(signif(x$sigma, 1664 | 4)), "on", x$df, "degrees of freedom") 1665 | cat("\nWhite statistics for functional misspecification:", x$White.stat, "with Pvalue=", x$White.pvalue) 1666 | cat("\nReject the null of NO misspecification:", x$White.test) 1667 | cat("\n") 1668 | } 1669 | 1670 | 1671 | 1672 | ### print wfedid class 1673 | 1674 | print.wfedid <- function(x,...){ 1675 | cat("Call:\n") 1676 | print(x$call) 1677 | cat("\nCoefficients:\n") 1678 | print(x$coefficients) 1679 | cat("\nStd.Err:\n") 1680 | print(x$se) 1681 | } 1682 | 1683 | summary.wfedid <- function(object, signif.stars = getOption("show.signif.stars"),...){ 1684 | coef <- object$coefficients 1685 | se <- object$se 1686 | sigma <- object$sigma 1687 | df <- object$df 1688 | tval <- coef(object) / se 1689 | TAB <- cbind(Estimate = coef, 1690 | Std.Error = se, 1691 | t.value = tval, 1692 | p.value = 2*pt(-abs(tval), df = object$df)) 1693 | res <- list(call = object$call, 1694 | coefficients = TAB, 1695 | sigma = object$sigma, 1696 | df = object$df, 1697 | W = object$W, 1698 | Num.nonzero = object$Num.nonzero, 1699 | residuals = object$residuals, 1700 | method = object$method, 1701 | causal = object$causal, 1702 | estimator = object$est, 1703 | std.error = object$std.error, 1704 | White.pvalue = object$White.pvalue, 1705 | White.alpha = object$White.alpha, 1706 | White.stat = object$White.stat, 1707 | White.test = object$White.test, 1708 | Y = object$y, 1709 | X = object$x, 1710 | Y.wdm = object$Y.wdm, 1711 | X.wdm = object$X.wdm 1712 | ) 1713 | class(res) <- "summary.wfedid" 1714 | res 1715 | } 1716 | 1717 | print.summary.wfedid <- function(x, ...){ 1718 | cat("\nMethod:", x$method, "Fixed Effects\n") 1719 | cat("\nQuantity of Interest:", x$causal) 1720 | cat("\nEstimator:", x$estimator) 1721 | cat("\nStandard Error:", x$std.error) 1722 | cat("\n") 1723 | cat("\n") 1724 | cat("Call:\n") 1725 | cat("Coefficients:\n") 1726 | print(x$call) 1727 | cat("\n") 1728 | colnames(x$coefficients) <- c("Estimate", "Std.Err", "t value", "Pr(>|t|)") 1729 | printCoefmat(x$coefficients, P.values=TRUE, has.Pvalue=TRUE) 1730 | cat("\nResidual standard error:", format(signif(x$sigma, 1731 | 4)), "on", x$df, "degrees of freedom") 1732 | if (!is.null(x$White.stat)){ 1733 | cat("\nWhite statistics for functional misspecification:", x$White.stat, "with Pvalue=", x$White.pvalue) 1734 | cat("\nReject the null of NO misspecification:", x$White.test) 1735 | } 1736 | cat("\n") 1737 | } 1738 | 1739 | 1740 | 1741 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # wfe: Weighted Linear Fixed Effects Regression Models for Causal Inference [![Build Status](https://travis-ci.org/insongkim/wfe.svg?branch=master)](https://travis-ci.org/insongkim/wfe) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/wfe)](https://cran.r-project.org/package=wfe) ![CRAN downloads](http://cranlogs.r-pkg.org/badges/grand-total/wfe) 2 | 3 | This R package provides a computationally efficient way of fitting 4 | weighted linear fixed effects estimators for causal inference with 5 | various weighting schemes. Imai and Kim (2016) show that weighted 6 | linear fixed effects estimators can be used to estimate the average 7 | treatment effects under different identification strategies. This 8 | includes stratified randomized experiments, matching and 9 | stratification for observational studies, first differencing, and 10 | difference-in-differences. The package also provides various robust 11 | standard errors and a specification test for standard linear fixed 12 | effects estimators. 13 | -------------------------------------------------------------------------------- /man/pwfe.Rd: -------------------------------------------------------------------------------- 1 | \name{pwfe} 2 | \alias{pwfe} 3 | \title{Fitting the Weighted Fixed Effects Model with Propensity Score Weighting} 4 | \description{ 5 | \code{pwfe} is used to fit weighted fixed effects model for causal 6 | inference after transforming outcome variable based on estimated 7 | propensity score. \code{pwfe} also derives the regression weights for 8 | different causal quantity of interest. 9 | } 10 | \usage{ 11 | pwfe(formula, treat = "treat.name", outcome, data, pscore = NULL, 12 | unit.index, time.index = NULL, method = "unit", within.unit = TRUE, 13 | qoi = c("ate", "att"), estimator = NULL, C.it = NULL, 14 | White = TRUE, White.alpha = 0.05, 15 | hetero.se = TRUE, auto.se = TRUE, unbiased.se = FALSE, 16 | verbose = TRUE) 17 | } 18 | 19 | \arguments{ 20 | 21 | \item{formula}{a symbolic description of the model for estimating 22 | propensity score. The formula should not include dummmies for 23 | fixed effects. The details of model specifications are given 24 | under `Details'. 25 | } 26 | 27 | \item{treat}{a character string indicating the name of treatment 28 | variable used in the models. The treatment should be binary 29 | indicator (integer with 0 for the control group and 1 for the 30 | treatment group). 31 | } 32 | 33 | \item{outcome}{a character string indicating the name of outcome 34 | variable. 35 | } 36 | 37 | \item{data}{data frame containing the variables in the model. 38 | } 39 | 40 | \item{pscore}{an optional character string indicating the name of 41 | estimated propensity score. Note that pre-specified propensity score 42 | should be bounded away from zero and one. 43 | } 44 | 45 | \item{unit.index}{a character string indicating the name of unit 46 | variable used in the models. The index of unit should be factor. 47 | } 48 | \item{time.index}{a character string indicating the name of time 49 | variable used in the models. The index of time should be factor. 50 | } 51 | \item{method}{method for weighted fixed effects regression, either 52 | \code{unit} for unit fixed effects; \code{time} for time fixed 53 | effects. The default is \code{unit}. 54 | } 55 | 56 | \item{within.unit}{a logical value indicating whether propensity score 57 | is estimated within unit. The default is \code{TRUE}. 58 | } 59 | 60 | \item{qoi}{one of \code{"ate"} or \code{"att"}. The default is 61 | \code{"ate"}. \code{"fd"} and \code{"did"} are not compatible with 62 | \code{pwfe}.} 63 | 64 | \item{estimator}{an optional character string \code{"fd"} indicating 65 | whether the first-difference estimator will be used.} 66 | 67 | \item{C.it}{an optional non-negative numeric vector specifying 68 | relative weights for each unit of analysis. 69 | } 70 | 71 | \item{White}{a logical value indicating whether White misspecification 72 | statistics should be calculated. The default is \code{TRUE}. 73 | } 74 | 75 | \item{White.alpha}{level of functional specification test. See White 76 | (1980) and Imai . The default is \code{0.05}. 77 | } 78 | 79 | \item{hetero.se}{a logical value indicating whether heteroskedasticity 80 | across units is allowed in calculating standard errors. The default 81 | is \code{TRUE}. 82 | } 83 | 84 | \item{auto.se}{a logical value indicating whether arbitrary 85 | autocorrelation is allowed in calculating standard errors. The 86 | default is \code{TRUE}. 87 | } 88 | 89 | \item{unbiased.se}{logical. If \code{TRUE}, bias-asjusted 90 | heteroskedasticity-robust standard errors are used. See Stock and 91 | Watson (2008). Should be used only for balanced panel. The default 92 | is \code{FALSE}. 93 | } 94 | 95 | \item{verbose}{logical. If \code{TRUE}, helpful messages along with 96 | a progress report of the weight calculation are printed 97 | on the screen. The default is \code{TRUE}. 98 | } 99 | 100 | } 101 | \value{ 102 | \code{pwfe} returns an object of class "pwfe", a list that contains the 103 | components listed below. 104 | 105 | 106 | The function \code{summary} (i.e., \code{summary.pwfe}) can be used to 107 | obtain a table of the results. 108 | 109 | \item{coefficients}{a named vector of coefficients} 110 | \item{residuals}{the residuals, that is respons minus fitted values} 111 | \item{df}{the degree of freedom} 112 | \item{W}{weight matrix calculated from the model. Row and column 113 | indices can be found from unit.name, time.name.} 114 | \item{call}{the matched call} 115 | \item{causal}{causal quantity of interest} 116 | \item{estimator}{the estimating method} 117 | \item{unit.name}{a vector containing unique unit names} 118 | \item{unit.index}{a vector containing unique unit index number} 119 | \item{time.name}{a vector containing unique time names} 120 | \item{time.index}{a vector containing unique time index number} 121 | \item{method}{call of the method used} 122 | \item{vcov}{the variance covariance matrix} 123 | \item{White.alpha}{the alpha level for White specification test} 124 | \item{White.pvalue}{the p-value for White specification test} 125 | \item{White.stat}{the White statistics} 126 | \item{x}{the design matrix} 127 | \item{y}{the response vector} 128 | \item{mf}{the model frame} 129 | 130 | } 131 | \details{ 132 | 133 | To fit the weighted unit (time) fixed effects model with propensity 134 | score weighting, use the syntax for the formula, \code{~ x1 + x2}, 135 | where \code{x1} and \code{x2} are unit (time) varying 136 | covariates. 137 | 138 | One can provide his/her own estimated \code{pscore} which can be used 139 | to transform the outcome varialbe. If so, one does not need to specify 140 | \code{formula}. 141 | 142 | If \code{pscore} is not provided, \code{bayesglm} will be used to 143 | estimate propensity scores. If \code{within.unit = TRUE}, propensity 144 | score will be separately estimated within time (unit) when 145 | \code{method} is \code{unit} (\code{time}). Otherwise, propensity 146 | score will be estimated on entire data at once. 147 | 148 | The estimated propensity scores will be used to transform the 149 | \code{outcome} variable as described in Imai and Kim (2018). 150 | 151 | \code{pwfe} calculates weights based on different underlying causal 152 | quantity of interest: Average Treatment Effect (\code{qoi = "ate"}) or 153 | Average Treatment Effect for the Treated (\code{qoi = "att"}). 154 | 155 | One can further set estimating methods: First-Difference 156 | (\code{estimator ="fd"}) or Difference-in-differences (\code{estimator 157 | = "did"}). 158 | 159 | To specify different ex-ante weights for each unit of analysis, use 160 | non-negative weights \code{C.it}. For instance, using the survey 161 | weights for \code{C.it} enables the estimation fo the average 162 | treatement effect for the target population. 163 | 164 | 165 | } 166 | 167 | \references{ Imai, Kosuke and In Song Kim. (2018) ``When Should We Use 168 | Unit Fixed Effects Regression Models for Causal Inference with 169 | Longitudinal Data?" American Journal of Political Science, 170 | Forthcoming. 171 | 172 | Stock, James and Mark Watson. (2008) ``Heteroskedasticity-Robust 173 | Standard Errors for Fixed Effect Panel Data Regression'' Econometrica, 174 | 76, 1. 175 | 176 | White, Halbert. (1980) `Using Least Squares to Approximate Unknown 177 | Regression Functions.'' International Economic Review, 21, 1, 178 | 149--170. 179 | 180 | } 181 | \seealso{ 182 | \code{wfe} for fitting weighted fixed effect models. 183 | } 184 | \examples{ 185 | ### NOTE: this example illustrates the use of wfe function with randomly 186 | ### generated panel data with arbitrary number of units and time. 187 | 188 | ## generate panel data with number of units = N, number of time = Time 189 | \dontrun{ 190 | 191 | N <- 10 # number of distinct units 192 | Time <- 15 # number of distinct time 193 | 194 | ## generate treatment variable 195 | treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N) 196 | ## make sure at least one observation is treated for each unit 197 | while ((sum(apply(treat, 2, mean) == 0) > 0) | (sum(apply(treat, 2, mean) == 1) > 0) | 198 | (sum(apply(treat, 1, mean) == 0) > 0) | (sum(apply(treat, 1, mean) == 1) > 0)) { 199 | treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N) 200 | } 201 | treat.vec <- c(treat) 202 | 203 | ## unit fixed effects 204 | alphai <- rnorm(N, mean = apply(treat, 2, mean)) 205 | 206 | ## geneate two random covariates 207 | x1 <- matrix(rnorm(N*Time, 0.5,1), ncol=N) 208 | x2 <- matrix(rbeta(N*Time, 5,1), ncol=N) 209 | pscore <- matrix(runif(N*Time, 0,1), ncol=N) 210 | x1.vec <- c(x1) 211 | x2.vec <- c(x2) 212 | pscore <- c(pscore) 213 | 214 | ## generate outcome variable 215 | y <- matrix(NA, ncol = N, nrow = Time) 216 | for (i in 1:N) { 217 | y[, i] <- alphai[i] + treat[, i] + x1[,i] + x2[,i] + rnorm(Time) 218 | } 219 | y.vec <- c(y) 220 | 221 | ## generate unit and time index 222 | unit.index <- rep(1:N, each = Time) 223 | time.index <- rep(1:Time, N) 224 | 225 | Data.str <- as.data.frame(cbind(y.vec, treat.vec, unit.index, x1.vec, x2.vec)) 226 | colnames(Data.str) <- c("y", "tr", "strata.id", "x1", "x2") 227 | 228 | Data.obs <- as.data.frame(cbind(y.vec, treat.vec, unit.index, time.index, x1.vec, x2.vec, pscore)) 229 | colnames(Data.obs) <- c("y", "tr", "unit", "time", "x1", "x2", "pscore") 230 | 231 | 232 | ############################################################ 233 | # Example 1: Stratified Randomized Experiments 234 | ############################################################ 235 | 236 | ## run the weighted fixed effect regression with strata fixed effect. 237 | ## Note: the quantity of interest is Average Treatment Effect ("ate") 238 | ## and the standard errors allow heteroskedasticity and arbitrary 239 | ## autocorrelation. 240 | 241 | 242 | ### Average Treatment Effect 243 | ps.ate <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.str, 244 | unit.index = "strata.id", method = "unit", within.unit = TRUE, 245 | qoi = "ate", hetero.se=TRUE, auto.se=TRUE) 246 | ## summarize the results 247 | summary(ps.ate) 248 | 249 | ### Average Treatment Effect for the Treated 250 | ps.att <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.str, 251 | unit.index = "strata.id", method = "unit", within.unit = TRUE, 252 | qoi = "att", hetero.se=TRUE, auto.se=TRUE) 253 | ## summarize the results 254 | summary(ps.att) 255 | 256 | 257 | ############################################################ 258 | # Example 2: Observational Studies with Unit Fixed-effects 259 | ############################################################ 260 | 261 | ## run the weighted fixed effect regression with unit fixed effect. 262 | ## Note: the quantity of interest is Average Treatment Effect ("ate") 263 | ## and the standard errors allow heteroskedasticity and arbitrary 264 | ## autocorrelation. 265 | 266 | ### Average Treatment Effect 267 | ps.obs <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.obs, 268 | unit.index = "unit", time.index = "time", 269 | method = "unit", within.unit = TRUE, 270 | qoi = "ate", hetero.se=TRUE, auto.se=TRUE) 271 | 272 | ## summarize the results 273 | summary(ps.obs) 274 | 275 | ## extracting weigths 276 | summary(ps.obs)$Weights 277 | 278 | ### Average Treatment Effect with First-difference 279 | 280 | ps.fd <- pwfe(~ x1+x2, treat = "tr", outcome = "y", data = Data.obs, 281 | unit.index = "unit", time.index = "time", 282 | method = "unit", within.unit = TRUE, 283 | qoi = "ate", estimator = "fd", hetero.se=TRUE, auto.se=TRUE) 284 | 285 | ## summarize the results 286 | summary(ps.fd) 287 | 288 | 289 | ############################################################ 290 | # Example 3: Estimation with pre-specified propensity score 291 | ############################################################ 292 | 293 | ### Average Treatment Effect with Pre-specified Propensity Scores 294 | 295 | mod.ps <- pwfe(treat = "tr", outcome = "y", data = Data.obs, pscore = "pscore", 296 | unit.index = "unit", time.index = "time", 297 | method = "unit", within.unit = TRUE, 298 | qoi = "ate", hetero.se=TRUE, auto.se=TRUE) 299 | 300 | ## summarize the results 301 | summary(mod.ps) 302 | } 303 | 304 | } 305 | 306 | \author{In Song Kim, Massachusetts Institute of Technology, \email{insong@mit.edu} 307 | and Kosuke Imai, Princeton University, \email{imai@harvard.edu} } 308 | 309 | \keyword{regression} 310 | 311 | 312 | 313 | -------------------------------------------------------------------------------- /man/wfe.Rd: -------------------------------------------------------------------------------- 1 | \name{wfe} 2 | 3 | \alias{wfe} 4 | \title{Fitting the Weighted Fixed Effects Model for Causal Inference} 5 | 6 | \description{ 7 | \code{wfe} is used to fit weighted fixed effects model for causal 8 | inference. \code{wfe} also derives the regression weights for 9 | different causal quantity of interest. 10 | } 11 | 12 | \usage{ 13 | wfe(formula, data, treat = "treat.name", 14 | unit.index, time.index = NULL, method = "unit", 15 | dyad1.index = NULL, dyad2.index = NULL, 16 | qoi = "ate", estimator = NULL, C.it = NULL, 17 | hetero.se = TRUE, auto.se = TRUE, 18 | dyad.se = FALSE, 19 | White = TRUE, White.alpha = 0.05, 20 | verbose = TRUE, unbiased.se = FALSE, unweighted = FALSE, 21 | store.wdm = FALSE, maxdev.did = NULL, 22 | tol = sqrt(.Machine$double.eps)) 23 | } 24 | 25 | 26 | 27 | \arguments{ 28 | 29 | \item{formula}{a symbolic description of the model to be fitted. The 30 | formula should not include dummmies for fixed effects. The details 31 | of model specifications are given under `Details'. 32 | } 33 | \item{data}{data frame containing the variables in the model. 34 | } 35 | \item{treat}{a character string indicating the name of treatment 36 | variable used in the models. The treatment should be binary 37 | indicator (integer with 0 for the control group and 1 for the 38 | treatment group). 39 | } 40 | \item{unit.index}{a character string indicating the name of unit 41 | variable used in the models. The index of unit should be factor. 42 | } 43 | \item{time.index}{a character string indicating the name of time 44 | variable used in the models. The index of time should be factor. 45 | } 46 | 47 | \item{method}{method for weighted fixed effects regression, either 48 | \code{unit} for unit fixed effects; \code{time} for time fixed 49 | effects. The default is \code{unit}. For two-way weighted fixed 50 | effects regression models, set method to the default value 51 | \code{unit}.} 52 | 53 | \item{dyad1.index}{a character string indicating the variable name of first unit 54 | of a given dyad. The default is \code{NULL}. 55 | This is required to calculate robust standard errors with dyadic data. 56 | } 57 | 58 | \item{dyad2.index}{a character string indicating the variable name of second unit 59 | of a given dyad. The default is \code{NULL}. 60 | This is required to calculate robust standard errors with dyadic data. 61 | } 62 | 63 | \item{qoi}{one of \code{"ate"} or \code{"att"}. The default is 64 | \code{"ate"}. If set to \code{"att"} in implementing \code{"fd"} 65 | and \code{"did"} estimators, the comparison of the treated 66 | observation is restricted to the control observation from the 67 | previous time period but not with the control observation from the 68 | next time period.} 69 | 70 | \item{estimator}{an optional character string indicating the 71 | estimating method. One of \code{"fd"}, \code{"did"}, or 72 | \code{"Mdid"}. \code{"fd"} is for First-Difference 73 | Design. \code{"did"} is for multi-period Difference-in-Differences 74 | design. The default is \code{NULL}. Setting estimator to be 75 | \code{"Mdid"} implements the Difference-in-Differences design with 76 | Matching on the pretreatment outcome variables.} 77 | 78 | \item{C.it}{an optional non-negative numeric vector specifying 79 | relative weights for each unit of analysis. If not specified, the 80 | weights will be calculated based on the estimator and quantity of 81 | interest. } 82 | 83 | \item{hetero.se}{a logical value indicating whether heteroskedasticity 84 | across units is allowed in calculating standard errors. The default 85 | is \code{TRUE}. 86 | } 87 | 88 | \item{auto.se}{a logical value indicating whether arbitrary 89 | autocorrelation is allowed in calculating standard errors. The 90 | default is \code{TRUE}. 91 | } 92 | 93 | \item{dyad.se}{a logical value indicating whether correlations across dyads exist. The 94 | default is \code{FALSE}. 95 | } 96 | 97 | \item{White}{a logical value indicating whether White misspecification 98 | statistics should be calculated. The default is \code{TRUE}. 99 | } 100 | 101 | \item{White.alpha}{level of functional specification test. See White 102 | (1980) and Imai and Kim (2018). The default is \code{0.05}. 103 | } 104 | 105 | \item{verbose}{logical. If \code{TRUE}, helpful messages along with 106 | a progress report of the weight calculation are printed 107 | on the screen. The default is \code{TRUE}. 108 | } 109 | 110 | \item{unbiased.se}{logical. If \code{TRUE}, bias-asjusted 111 | heteroskedasticity-robust standard errors are used. See Stock and 112 | Watson (2008). Should be used only for balanced panel. The default 113 | is \code{FALSE}. 114 | } 115 | 116 | \item{unweighted}{logical. If \code{TRUE}, standard unweighted fixed 117 | effects model is estimated. The default is \code{FALSE}. Note: 118 | users do not need to specify \code{qoi} when 119 | unweighted=\code{TRUE}. For standard two-way fixed effects model 120 | (unit and time), set estimator=\code{"did"} and 121 | unweighted=\code{"TRUE"}.} 122 | 123 | \item{store.wdm}{logical. If \code{TRUE}, weighted demeaned 124 | dataframe will be stored. The default is \code{FALSE}.} 125 | 126 | \item{maxdev.did}{an optional positive numeric value specifying the 127 | maximum deviation in pre-treatment outcome when \code{"Mdid"} is 128 | implemented. The default is \code{NULL}, which implements 129 | nearest-neighbor matching.} 130 | 131 | \item{tol}{a relative tolerance to detect zero singular values for 132 | generalized inverse. The default is sqrt(.Machine$double.eps) 133 | } 134 | 135 | 136 | 137 | } 138 | 139 | 140 | \value{ 141 | 142 | \code{wfe} returns an object of class "wfe", a list that contains the 143 | components listed below. 144 | 145 | 146 | The function \code{summary} (i.e., \code{summary.wfe}) can be used to 147 | obtain a table of the results. 148 | 149 | \item{coefficients}{a named vector of coefficients} 150 | \item{residuals}{the residuals, that is respons minus fitted values} 151 | \item{df}{the degree of freedom} 152 | \item{W}{a dataframe containing unit and time indices along with the 153 | weights used for the observation. If method=\code{unit}, integer numbers 154 | corresponding to the order of input data will be used for generating time index.} 155 | \item{Num.nonzero}{Number of observations with non-zero weights} 156 | \item{call}{the matched call} 157 | \item{causal}{causal quantity of interest} 158 | \item{estimator}{the estimating method} 159 | \item{units}{a dataframe containing unit names used for \code{W}} 160 | \item{times}{a dataframe containing time names used for \code{W}} 161 | \item{method}{call of the method used} 162 | \item{vcov}{the variance covariance matrix} 163 | \item{White.alpha}{the alpha level for White specification test} 164 | \item{White.pvalue}{the p-value for White specification test} 165 | \item{White.stat}{the White statistics} 166 | \item{X}{the design matrix} 167 | \item{Y}{the response vector} 168 | \item{X.wdm}{the demeaned design matrix} 169 | \item{Y.wdm}{the demeaned response vector} 170 | \item{mf}{the model frame where the last column is the weights used for the analysis} 171 | } 172 | 173 | \details{ 174 | 175 | To fit the weighted unit (time) fixed effects model, use the syntax 176 | for the formula, \code{y ~ x1 + x2}, where \code{y} is a dependent 177 | variable and \code{x1} and \code{x2} are unit (time) varying 178 | covariates. 179 | 180 | \code{wfe} calculates weights based on different underlying causal 181 | quantity of interest: Average Treatment Effect (\code{qoi = "ate"}) or 182 | Average Treatment Effect for the Treated (\code{qoi = "att"}). 183 | 184 | One can further set estimating methods: First-Difference 185 | (\code{estimator ="fd"}) or Difference-in-differences (\code{estimator 186 | = "did"}). For the two-way fixed effects model, set \code{estimator 187 | = "did"} 188 | 189 | To specify different ex-ante weights for each unit of analysis, use 190 | non-negative weights \code{C.it}. For instance, using the survey 191 | weights for \code{C.it} enables the estimation fo the average 192 | treatement effect for the target population. 193 | 194 | An object of class "wfe" contains vectors of unique unit(time) names and 195 | unique unit(time) indices. 196 | 197 | } 198 | 199 | \references{Imai, Kosuke and In Song Kim. (2018) ``When Should We Use 200 | Unit Fixed Effects Regression Models for Causal Inference with 201 | Longitudinal Data?" American Journal of Political Science, 202 | Forthcoming. 203 | 204 | Aronow, Peter M., Cyrus Samii, and Valentina A. Assenova (2015) ``Cluster–robust 205 | Variance Estimation for Dyadic Data." Political Analysis 23, no. 4, 564--577. 206 | 207 | Stock, James and Mark Watson. (2008) ``Heteroskedasticity-Robust 208 | Standard Errors for Fixed Effect Panel Data Regression'' Econometrica, 209 | 76, 1. 210 | 211 | White, Halbert. (1980) ``Using Least Squares to Approximate Unknown 212 | Regression Functions.'' International Economic Review, 21, 1, 213 | 149--170. 214 | 215 | } 216 | \seealso{ 217 | \code{pwfe} for fitting weighted fixed effects models with propensity 218 | score weighting 219 | } 220 | \examples{ 221 | ### NOTE: this example illustrates the use of wfe function with randomly 222 | ### generated panel data with arbitrary number of units and time. 223 | 224 | ## generate panel data with number of units = N, number of time = Time 225 | N <- 10 # number of distinct units 226 | Time <- 15 # number of distinct time 227 | 228 | ## treatment effect 229 | beta <- 1 230 | 231 | ## generate treatment variable 232 | treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N) 233 | ## make sure at least one observation is treated for each unit 234 | while ((sum(apply(treat, 2, mean) == 0) > 0) | (sum(apply(treat, 2, mean) == 1) > 0) | 235 | (sum(apply(treat, 1, mean) == 0) > 0) | (sum(apply(treat, 1, mean) == 1) > 0)) { 236 | treat <- matrix(rbinom(N*Time, size = 1, 0.25), ncol = N) 237 | } 238 | treat.vec <- c(treat) 239 | 240 | ## unit fixed effects 241 | alphai <- rnorm(N, mean = apply(treat, 2, mean)) 242 | 243 | ## geneate two random covariates 244 | x1 <- matrix(rnorm(N*Time, 0.5,1), ncol=N) 245 | x2 <- matrix(rbeta(N*Time, 5,1), ncol=N) 246 | x1.vec <- c(x1) 247 | x2.vec <- c(x2) 248 | ## generate outcome variable 249 | y <- matrix(NA, ncol = N, nrow = Time) 250 | for (i in 1:N) { 251 | y[, i] <- alphai[i] + treat[, i] + x1[,i] + x2[,i] + rnorm(Time) 252 | } 253 | y.vec <- c(y) 254 | 255 | ## generate unit and time index 256 | unit.index <- rep(1:N, each = Time) 257 | time.index <- rep(1:Time, N) 258 | 259 | Data.str <- as.data.frame(cbind(y.vec, treat.vec, unit.index, x1.vec, x2.vec)) 260 | colnames(Data.str) <- c("y", "tr", "strata.id", "x1", "x2") 261 | 262 | Data.obs <- as.data.frame(cbind(y.vec, treat.vec, unit.index, time.index, x1.vec, x2.vec)) 263 | colnames(Data.obs) <- c("y", "tr", "unit", "time", "x1", "x2") 264 | 265 | ############################################################ 266 | # Example 1: Stratified Randomized Experiments 267 | ############################################################ 268 | 269 | ## run the weighted fixed effect regression with strata fixed effect. 270 | ## Note: the quantity of interest is Average Treatment Effect ("ate") 271 | ## and the standard errors allow heteroskedasticity and arbitrary 272 | ## autocorrelation. 273 | 274 | 275 | ### Average Treatment Effect 276 | mod.ate <- wfe(y~ tr+x1+x2, data = Data.str, treat = "tr", 277 | unit.index = "strata.id", method = "unit", 278 | qoi = "ate", hetero.se=TRUE, auto.se=TRUE) 279 | ## summarize the results 280 | summary(mod.ate) 281 | 282 | ### Average Treatment Effect for the Treated 283 | mod.att <- wfe(y~ tr+x1+x2, data = Data.str, treat = "tr", 284 | unit.index = "strata.id", method = "unit", 285 | qoi = "att", hetero.se=TRUE, auto.se=TRUE) 286 | ## summarize the results 287 | summary(mod.att) 288 | 289 | 290 | ############################################################ 291 | # Example 2: Observational Studies with Unit Fixed-effects 292 | ############################################################ 293 | 294 | ## run the weighted fixed effect regression with unit fixed effect. 295 | ## Note: the quantity of interest is Average Treatment Effect ("ate") 296 | ## and the standard errors allow heteroskedasticity and arbitrary 297 | ## autocorrelation. 298 | 299 | mod.obs <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr", 300 | unit.index = "unit", time.index = "time", method = "unit", 301 | qoi = "ate", hetero.se=TRUE, auto.se=TRUE, 302 | White = TRUE, White.alpha = 0.05) 303 | 304 | ## summarize the results 305 | summary(mod.obs) 306 | 307 | ## extracting weigths 308 | summary(mod.obs)$W 309 | 310 | 311 | \dontrun{ 312 | ################################################################### 313 | # Example 3: Observational Studies with differences-in-differences 314 | ################################################################### 315 | 316 | ## run difference-in-differences estimator. 317 | ## Note: the quantity of interest is Average Treatment Effect ("ate") 318 | ## and the standard errors allow heteroskedasticity and arbitrary 319 | ## autocorrelation. 320 | 321 | mod.did <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr", 322 | unit.index = "unit", time.index = "time", method = "unit", 323 | qoi = "ate", estimator ="did", hetero.se=TRUE, auto.se=TRUE, 324 | White = TRUE, White.alpha = 0.05, verbose = TRUE) 325 | 326 | ## summarize the results 327 | summary(mod.did) 328 | 329 | ## extracting weigths 330 | summary(mod.did)$W 331 | 332 | ######################################################################### 333 | # Example 4: DID with Matching on Pre-treatment Outcomes 334 | ######################################################################### 335 | 336 | ## implements matching on pre-treatment outcomes where the maximum 337 | ## deviation is specified as 0.5 338 | 339 | mod.Mdid <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr", 340 | unit.index = "unit", time.index = "time", method = "unit", 341 | qoi = "ate", estimator ="Mdid", hetero.se=TRUE, auto.se=TRUE, 342 | White = TRUE, White.alpha = 0.05, maxdev.did = 0.5, verbose = TRUE) 343 | 344 | ## summarize the results 345 | summary(mod.Mdid) 346 | 347 | ## Note: setting the maximum deviation to infinity (or any value 348 | ## bigger than the maximum pair-wise difference in the outcome) will 349 | ## return the same result as Example 3. 350 | 351 | dev <- 1000+max(Data.obs$y)-min(Data.obs$y) 352 | mod.did2 <- wfe(y~ tr+x1+x2, data = Data.obs, treat = "tr", 353 | unit.index = "unit", time.index = "time", method = "unit", 354 | qoi = "ate", estimator ="Mdid", hetero.se=TRUE, auto.se=TRUE, 355 | White = TRUE, White.alpha = 0.05, maxdev.did = dev, verbose = TRUE) 356 | 357 | ## summarize the results 358 | summary(mod.did2) 359 | mod.did2$coef[1] == mod.did$coef[1] 360 | } 361 | 362 | } 363 | 364 | 365 | \author{In Song Kim, Massachusetts Institute of Technology, \email{insong@mit.edu} 366 | and Kosuke Imai, Princeton University, \email{imai@harvard.edu} } 367 | 368 | \keyword{regression} 369 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 2 | 3 | 4 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include // for NULL 2 | #include 3 | 4 | /* FIXME: 5 | Check these declarations against the C/Fortran source code. 6 | */ 7 | 8 | /* .C calls */ 9 | extern void CalDID(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 10 | extern void comp_OmegaHAC(void *, void *, void *, void *, void *, void *, void *, void *, void *); 11 | extern void comp_OmegaHC(void *, void *, void *, void *, void *, void *, void *, void *, void *); 12 | extern void Demean(void *, void *, void *, void *, void *); 13 | extern void DemeanDID(void *, void *, void *, void *, void *, void *, void *, void *); 14 | extern void GenTime(void *, void *, void *, void *); 15 | extern void GenWeightsDID(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 16 | extern void GenWeightsFD(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 17 | extern void GenWeightsMDID(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 18 | extern void GenWeightsTime(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 19 | extern void GenWeightsUnit(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 20 | extern void Index(void *, void *, void *, void *, void *); 21 | extern void LamdaDID1(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 22 | extern void LamdaDID2(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 23 | extern void MDummy(void *, void *, void *, void *); 24 | extern void OmegaDiDHAC(void *, void *, void *, void *, void *, void *, void *, void *); 25 | extern void OmegaDiDHAC2(void *, void *, void *, void *, void *, void *, void *, void *); 26 | extern void OmegaHatHAC(void *, void *, void *, void *, void *, void *, void *); 27 | extern void OmegaHatHC(void *, void *, void *, void *, void *, void *, void *); 28 | extern void ProjectionM(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 29 | extern void Transform(void *, void *, void *, void *, void *); 30 | extern void TwayDemean(void *, void *, void *, void *, void *, void *, void *); 31 | extern void VectorizeC(void *, void *, void *, void *, void *, void *, void *); 32 | extern void WDemean(void *, void *, void *, void *, void *, void *); 33 | extern void WWDemean(void *, void *, void *, void *, void *, void *); 34 | extern void XWXiSum(void *, void *, void *, void *, void *, void *, void *); 35 | extern void XXiSum(void *, void *, void *, void *, void *, void *); 36 | 37 | static const R_CMethodDef CEntries[] = { 38 | {"CalDID", (DL_FUNC) &CalDID, 12}, 39 | {"comp_OmegaHAC", (DL_FUNC) &comp_OmegaHAC, 9}, 40 | {"comp_OmegaHC", (DL_FUNC) &comp_OmegaHC, 9}, 41 | {"Demean", (DL_FUNC) &Demean, 5}, 42 | {"DemeanDID", (DL_FUNC) &DemeanDID, 8}, 43 | {"GenTime", (DL_FUNC) &GenTime, 4}, 44 | {"GenWeightsDID", (DL_FUNC) &GenWeightsDID, 11}, 45 | {"GenWeightsFD", (DL_FUNC) &GenWeightsFD, 11}, 46 | {"GenWeightsMDID", (DL_FUNC) &GenWeightsMDID, 13}, 47 | {"GenWeightsTime", (DL_FUNC) &GenWeightsTime, 11}, 48 | {"GenWeightsUnit", (DL_FUNC) &GenWeightsUnit, 11}, 49 | {"Index", (DL_FUNC) &Index, 5}, 50 | {"LamdaDID1", (DL_FUNC) &LamdaDID1, 14}, 51 | {"LamdaDID2", (DL_FUNC) &LamdaDID2, 14}, 52 | {"MDummy", (DL_FUNC) &MDummy, 4}, 53 | {"OmegaDiDHAC", (DL_FUNC) &OmegaDiDHAC, 8}, 54 | {"OmegaDiDHAC2", (DL_FUNC) &OmegaDiDHAC2, 8}, 55 | {"OmegaHatHAC", (DL_FUNC) &OmegaHatHAC, 7}, 56 | {"OmegaHatHC", (DL_FUNC) &OmegaHatHC, 7}, 57 | {"ProjectionM", (DL_FUNC) &ProjectionM, 17}, 58 | {"Transform", (DL_FUNC) &Transform, 5}, 59 | {"TwayDemean", (DL_FUNC) &TwayDemean, 7}, 60 | {"VectorizeC", (DL_FUNC) &VectorizeC, 7}, 61 | {"WDemean", (DL_FUNC) &WDemean, 6}, 62 | {"WWDemean", (DL_FUNC) &WWDemean, 6}, 63 | {"XWXiSum", (DL_FUNC) &XWXiSum, 7}, 64 | {"XXiSum", (DL_FUNC) &XXiSum, 6}, 65 | {NULL, NULL, 0} 66 | }; 67 | 68 | void R_init_wfe(DllInfo *dll) 69 | { 70 | R_registerRoutines(dll, CEntries, NULL, NULL, NULL); 71 | R_useDynamicSymbols(dll, FALSE); 72 | } 73 | -------------------------------------------------------------------------------- /src/vector.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "vector.h" 7 | #include 8 | #include 9 | 10 | Rcomplex* compArray(int num) { 11 | 12 | Rcomplex *cArray = (Rcomplex *)malloc(num * sizeof(Rcomplex)); 13 | if (!cArray) 14 | error("Out of memory error in compArray\n"); 15 | return cArray; 16 | } 17 | 18 | 19 | Rcomplex** compMatrix(int row, int col) { 20 | 21 | int i; 22 | Rcomplex **cMatrix = (Rcomplex **)malloc(row * sizeof(Rcomplex *)); 23 | if (!cMatrix) 24 | error("Out of memory error in compMatrix\n"); 25 | for (i = 0; i < row; i++) { 26 | cMatrix[i] = (Rcomplex *)calloc(col, sizeof(Rcomplex)); 27 | if (!cMatrix[i]) 28 | error("Out of memory error in compMatrix\n"); 29 | } 30 | return cMatrix; 31 | } 32 | 33 | void PcompMatrix(Rcomplex **cmatrix, int row, int col) { 34 | int i, j; 35 | for (i = 0; i < row; i++) { 36 | for (j = 0; j < col; j++) { 37 | if (cmatrix[i][j].i >= 0) { 38 | Rprintf("%6.4f+%6.4fi ", cmatrix[i][j].r, cmatrix[i][j].i); 39 | } 40 | if (cmatrix[i][j].i < 0) { 41 | Rprintf("%6.4f%6.4fi ", cmatrix[i][j].r, cmatrix[i][j].i); 42 | } 43 | } 44 | Rprintf("\n"); 45 | } 46 | } 47 | 48 | int* intArray(int num) { 49 | int *iArray = (int *)malloc(num * sizeof(int)); 50 | if (!iArray) 51 | error("Out of memory error in intArray\n"); 52 | return iArray; 53 | } 54 | 55 | void PintArray(int *ivector, int length) { 56 | int i; 57 | for (i = 0; i < length; i++) 58 | Rprintf("%5d\n", ivector[i]); 59 | } 60 | 61 | int** intMatrix(int row, int col) { 62 | int i; 63 | int **iMatrix = (int **)malloc(row * sizeof(int *)); 64 | if (!iMatrix) 65 | error("Out of memory error in intMatrix\n"); 66 | for (i = 0; i < row; i++) { 67 | iMatrix[i] = (int *)malloc(col * sizeof(int)); 68 | if (!iMatrix[i]) 69 | error("Out of memory error in intMatrix\n"); 70 | } 71 | return iMatrix; 72 | } 73 | 74 | void PintMatrix(int **imatrix, int row, int col) { 75 | int i, j; 76 | for (i = 0; i < row; i++) { 77 | for (j = 0; j < col; j++) 78 | Rprintf("%5d", imatrix[i][j]); 79 | Rprintf("\n"); 80 | } 81 | } 82 | 83 | 84 | double* doubleArray(int num) { 85 | double *dArray = (double *)malloc(num * sizeof(double)); 86 | if (!dArray) 87 | error("Out of memory error in doubleArray\n"); 88 | return dArray; 89 | } 90 | 91 | void PdoubleArray(double *dvector, int length) { 92 | int i; 93 | for (i = 0; i < length; i++) 94 | Rprintf("%14g\n", dvector[i]); 95 | } 96 | 97 | double** doubleMatrix(int row, int col) { 98 | int i; 99 | double **dMatrix = (double **)malloc((size_t)(row * sizeof(double *))); 100 | if (!dMatrix) 101 | error("Out of memory error in doubleMatrix\n"); 102 | for (i = 0; i < row; i++) { 103 | dMatrix[i] = (double *)malloc((size_t)(col * sizeof(double))); 104 | if (!dMatrix[i]) 105 | error("Out of memory error in doubleMatrix\n"); 106 | } 107 | return dMatrix; 108 | } 109 | 110 | void PdoubleMatrix(double **dmatrix, int row, int col) { 111 | int i, j; 112 | for (i = 0; i < row; i++) { 113 | for (j = 0; j < col; j++) 114 | Rprintf("%14g", dmatrix[i][j]); 115 | Rprintf("\n"); 116 | } 117 | } 118 | 119 | double*** doubleMatrix3D(int x, int y, int z) { 120 | int i; 121 | double ***dM3 = (double ***)malloc(x * sizeof(double **)); 122 | if (!dM3) 123 | error("Out of memory error in doubleMatrix3D\n"); 124 | for (i = 0; i < x; i++) 125 | dM3[i] = doubleMatrix(y, z); 126 | return dM3; 127 | } 128 | 129 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z) { 130 | int i, j, k; 131 | for (i = 0; i < x; i++) { 132 | Rprintf("First dimension = %5d\n", i); 133 | for (j = 0; j < y; j++) { 134 | for (k = 0; k < z; k++) 135 | Rprintf("%14g", dmatrix3D[i][j][k]); 136 | Rprintf("\n"); 137 | } 138 | } 139 | } 140 | 141 | double**** doubleMatrix4D(int x, int y, int z, int zz) { 142 | int i; 143 | double ****dM4 = (double ****)malloc(x * sizeof(double ***)); 144 | if (!dM4) 145 | error("Out of memory error in doubleMatrix4D\n"); 146 | for (i = 0; i < x; i++) 147 | dM4[i] = doubleMatrix3D(y, z, zz); 148 | return dM4; 149 | } 150 | 151 | long* longArray(int num) { 152 | long *lArray = (long *)malloc(num * sizeof(long)); 153 | if (!lArray) 154 | error("Out of memory error in longArray\n"); 155 | return lArray; 156 | } 157 | 158 | void FreeMatrix(double **Matrix, int row) { 159 | int i; 160 | for (i = 0; i < row; i++) 161 | free(Matrix[i]); 162 | free(Matrix); 163 | } 164 | 165 | void FreeintMatrix(int **Matrix, int row) { 166 | int i; 167 | for (i = 0; i < row; i++) 168 | free(Matrix[i]); 169 | free(Matrix); 170 | } 171 | 172 | void FreecompMatrix(Rcomplex **Matrix, int row) { 173 | int i; 174 | for (i = 0; i < row; i++) 175 | free(Matrix[i]); 176 | free(Matrix); 177 | } 178 | 179 | 180 | void Free3DMatrix(double ***Matrix, int index, int row) { 181 | int i; 182 | for (i = 0; i < index; i++) 183 | FreeMatrix(Matrix[i], row); 184 | free(Matrix); 185 | } 186 | 187 | void Free4DMatrix(double ****Matrix, int index, int index1, int row) { 188 | int i; 189 | for (i = 0; i < index; i++) 190 | Free3DMatrix(Matrix[i], index1, row); 191 | free(Matrix); 192 | } 193 | 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /src/vector.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | Rcomplex *compArray(int num); 5 | Rcomplex **compMatrix(int row, int col); 6 | 7 | int *intArray(int num); 8 | void PintArray(int *ivector, int length); 9 | int **intMatrix(int row, int col); 10 | void PintMatrix(int **imatrix, int row, int col); 11 | 12 | double *doubleArray(int num); 13 | void PdoubleArray(double *dvector, int length); 14 | double **doubleMatrix(int row, int col); 15 | void PdoubleMatrix(double **dmatrix, int row, int col); 16 | 17 | double ***doubleMatrix3D(int x, int y, int z); 18 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z); 19 | 20 | double ****doubleMatrix4D(int x, int y, int z, int zz); 21 | 22 | long *longArray(int num); 23 | 24 | void FreeMatrix(double **Matrix, int row); 25 | void FreeintMatrix(int **Matrix, int row); 26 | void FreecompMatrix(Rcomplex **Matrix, int row); 27 | void Free3DMatrix(double ***Matrix, int index, int row); 28 | void Free4DMatrix(double ****Matrix, int index, int index1, int row); 29 | -------------------------------------------------------------------------------- /src/wfe.h: -------------------------------------------------------------------------------- 1 | /* This file was automatically generated. Do not edit! */ 2 | void LamdaDID2(int *len_Xtrow,int *len_Xhrow,int *Tunit_index,int *len_uniq_Tu_index,int *Hunit_index,int *len_uniq_Hu_index,double *Xtilde,int *len_Xtcol,double *utilde,double *Xhat,int *len_Xhcol,double *uhat,double *W,double *LamdaDID2); 3 | void LamdaDID1(int *len_Xtrow,int *len_Xhrow,int *Tunit_index,int *len_uniq_Tu_index,int *Hunit_index,int *len_uniq_Hu_index,double *Xtilde,int *len_Xtcol,double *utilde,double *Xhat,int *len_Xhcol,double *uhat,double *W,double *LamdaDID1); 4 | void OmegaDiDHC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *W,double *Omega_DiD_HC); 5 | void OmegaDiDHAC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *W,double *Omega_DiD_HAC); 6 | void LamdaHat(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *Xhat,double *uhat,double *Lamda_hat); 7 | void OmegaHatHC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *Omega_hat_HC); 8 | void OmegaHatHAC(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *utilde,double *Omega_hat_HAC); 9 | void XWXiSum(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *weights,double *result); 10 | void XXiSum(int *len_data,int *n_cov,int *unit_index,int *len_uniq_u_index,double *Xtilde,double *result); 11 | void MDummy(int *index,int *len_index,int *len_data,int *dummy); 12 | void GenWeightsDID(int *unit_index,int *time_index,int *tr,int *C_it,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *weightdid); 13 | void CalDID(int *unit_index,int *time_index,int *tr,int *C_it,double *y,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *did); 14 | int t_t1_same(int *unit_index,int *time_index,int *len_u_index,int *len_t_index,int *len_data,int *tr,int **same); 15 | int is_t_t1_same(int *u_i,int *t_i,int i,int j,int *tr,int size); 16 | void GenWeightsFD(int *unit_index,int *time_index,int *tr,int *C_it,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *weightfd); 17 | void GenWeightsTime(int *time_index,int *unit_index,int *tr,int *C_it,int *len_data,int *len_t_index,int *len_u_index,int *ate,int *att,int *verbose,double *weight); 18 | void GenWeightsUnit(int *unit_index,int *time_index,int *tr,int *C_it,int *len_data,int *len_u_index,int *len_t_index,int *ate,int *att,int *verbose,double *weight); 19 | int is_time_index_exist(int *u_i,int *t_i,int i,int j,int size); 20 | int is_index_exist(int *unit_index,int *time_index,int *len_u_index,int *len_t_index,int *len_data,int **exist); 21 | void TwayDemean(double *var,int *unit_index,int *time_index,int *len_u_index,int *len_t_index,int *len_data,double *TwayDemean); 22 | void WWDemean(double *var,double *weight,int *index,int *len_index,int *len_data,double *WWdemean); 23 | void WDemean(double *var,double *weight,int *index,int *len_index,int *len_data,double *Wdemean); 24 | void Demean(double *var,int *index,int *len_index,int *len_data,double *demean); 25 | void GenTime(int *unit_index,int *len_data,int *len_u_index,double *time_index); 26 | void Transform(double *y,int *n,int *treat,double *pscore,double *ytrans); 27 | void Vectorize(double *Wvec,int *nrow,int *ncol,int *time_index,int *dyad_index,int *n_obs,double *results); 28 | void Index(int *index,int *uniq_index,int *len_u_index,int *len_data,int *result); 29 | --------------------------------------------------------------------------------